(c) Хабр
И решил таки опять написать для проверки производительности
VB.NET [Structure]
- Код: Выделить всё
Dim Time As New System.Diagnostics.Stopwatch
Function sMultiply(ByRef oM As sMatrix, ByRef M1 As sMatrix, ByRef M2 As sMatrix) As Single
Dim i As Integer = 0I
Time.Reset()
Time.Start()
For i = 0I To 9999999
oM._11 = M1._11 * M2._11 + M1._12 * M2._21 + M1._13 * M2._31 + M1._14 * M2._41
oM._12 = M1._11 * M2._12 + M1._12 * M2._22 + M1._13 * M2._32 + M1._14 * M2._42
oM._13 = M1._11 * M2._13 + M1._12 * M2._23 + M1._13 * M2._33 + M1._14 * M2._43
oM._14 = M1._11 * M2._14 + M1._12 * M2._24 + M1._13 * M2._34 + M1._14 * M2._44
oM._21 = M1._21 * M2._11 + M1._22 * M2._21 + M1._23 * M2._31 + M1._24 * M2._41
oM._22 = M1._21 * M2._12 + M1._22 * M2._22 + M1._23 * M2._32 + M1._24 * M2._42
oM._23 = M1._21 * M2._13 + M1._22 * M2._23 + M1._23 * M2._33 + M1._24 * M2._43
oM._24 = M1._21 * M2._14 + M1._22 * M2._24 + M1._23 * M2._34 + M1._24 * M2._44
oM._31 = M1._31 * M2._11 + M1._32 * M2._21 + M1._33 * M2._31 + M1._34 * M2._41
oM._32 = M1._31 * M2._12 + M1._32 * M2._22 + M1._33 * M2._32 + M1._34 * M2._42
oM._33 = M1._31 * M2._13 + M1._32 * M2._23 + M1._33 * M2._33 + M1._34 * M2._43
oM._34 = M1._31 * M2._14 + M1._32 * M2._24 + M1._33 * M2._34 + M1._34 * M2._44
oM._41 = M1._41 * M2._11 + M1._42 * M2._21 + M1._43 * M2._31 + M1._44 * M2._41
oM._42 = M1._41 * M2._12 + M1._42 * M2._22 + M1._43 * M2._32 + M1._44 * M2._42
oM._43 = M1._41 * M2._13 + M1._42 * M2._23 + M1._43 * M2._33 + M1._44 * M2._43
oM._44 = M1._41 * M2._14 + M1._42 * M2._24 + M1._43 * M2._34 + M1._44 * M2._44
Next
Time.Stop()
Return Time.ElapsedMilliseconds / 1000
End Function
VB.NET [Array]
- Код: Выделить всё
Function aMultiply(ByRef oM(,) As Single, ByRef M1(,) As Single, ByRef M2(,) As Single) As Single
Dim i As Integer = 0I
Dim j As Integer = 0I
Dim k As Integer = 0I
Dim z As Integer = 0I
Time.Reset()
Time.Start()
For i = 0I To 9999999
oM(0, 0) = M1(0, 0) * M2(0, 0) + M1(0, 1) * M2(1, 0) + M1(0, 2) * M2(2, 0) + M1(0, 3) * M2(3, 0)
oM(1, 0) = M1(1, 0) * M2(0, 0) + M1(1, 1) * M2(1, 0) + M1(1, 2) * M2(2, 0) + M1(1, 3) * M2(3, 0)
oM(2, 0) = M1(2, 0) * M2(0, 0) + M1(2, 1) * M2(1, 0) + M1(2, 2) * M2(2, 0) + M1(2, 3) * M2(3, 0)
oM(3, 0) = M1(3, 0) * M2(0, 0) + M1(3, 1) * M2(1, 0) + M1(3, 2) * M2(2, 0) + M1(3, 3) * M2(3, 0)
oM(0, 1) = M1(0, 0) * M2(0, 1) + M1(0, 1) * M2(1, 1) + M1(0, 2) * M2(2, 1) + M1(0, 3) * M2(3, 1)
oM(1, 1) = M1(1, 0) * M2(0, 1) + M1(1, 1) * M2(1, 1) + M1(1, 2) * M2(2, 1) + M1(1, 3) * M2(3, 1)
oM(2, 1) = M1(2, 0) * M2(0, 1) + M1(2, 1) * M2(1, 1) + M1(2, 2) * M2(2, 1) + M1(2, 3) * M2(3, 1)
oM(3, 1) = M1(3, 0) * M2(0, 1) + M1(3, 1) * M2(1, 1) + M1(3, 2) * M2(2, 1) + M1(3, 3) * M2(3, 1)
oM(0, 2) = M1(0, 0) * M2(0, 2) + M1(0, 1) * M2(1, 2) + M1(0, 2) * M2(2, 2) + M1(0, 3) * M2(3, 2)
oM(1, 2) = M1(1, 0) * M2(0, 2) + M1(1, 1) * M2(1, 2) + M1(1, 2) * M2(2, 2) + M1(1, 3) * M2(3, 2)
oM(2, 2) = M1(2, 0) * M2(0, 2) + M1(2, 1) * M2(1, 2) + M1(2, 2) * M2(2, 2) + M1(2, 3) * M2(3, 2)
oM(3, 2) = M1(3, 0) * M2(0, 2) + M1(3, 1) * M2(1, 2) + M1(3, 2) * M2(2, 2) + M1(3, 3) * M2(3, 2)
oM(0, 3) = M1(0, 0) * M2(0, 3) + M1(0, 1) * M2(1, 3) + M1(0, 2) * M2(2, 3) + M1(0, 3) * M2(3, 3)
oM(1, 3) = M1(1, 0) * M2(0, 3) + M1(1, 1) * M2(1, 3) + M1(1, 2) * M2(2, 3) + M1(1, 3) * M2(3, 3)
oM(2, 3) = M1(2, 0) * M2(0, 3) + M1(2, 1) * M2(1, 3) + M1(2, 2) * M2(2, 3) + M1(2, 3) * M2(3, 3)
oM(3, 3) = M1(3, 0) * M2(0, 3) + M1(3, 1) * M2(1, 3) + M1(3, 2) * M2(2, 3) + M1(3, 3) * M2(3, 3)
Next
Time.Stop()
Return Time.ElapsedMilliseconds / 1000
End Function
C++
- Код: Выделить всё
double sMultiply(__inout sMatrix *oM, __in sMatrix *M1, __in sMatrix *M2)
{
__int64 ctr1 = 0, ctr2 = 0, freq = 0;
int i = 0;
if (QueryPerformanceCounter((LARGE_INTEGER *) &ctr1)!= 0)
{
for (i = 0; i < 10000000; i++)
{
oM->_11 = M1->_11 * M2->_11 + M1->_12 * M2->_21 + M1->_13 * M2->_31 + M1->_14 * M2->_41;
oM->_12 = M1->_11 * M2->_12 + M1->_12 * M2->_22 + M1->_13 * M2->_32 + M1->_14 * M2->_42;
oM->_13 = M1->_11 * M2->_13 + M1->_12 * M2->_23 + M1->_13 * M2->_33 + M1->_14 * M2->_43;
oM->_14 = M1->_11 * M2->_14 + M1->_12 * M2->_24 + M1->_13 * M2->_34 + M1->_14 * M2->_44;
oM->_21 = M1->_21 * M2->_11 + M1->_22 * M2->_21 + M1->_23 * M2->_31 + M1->_24 * M2->_41;
oM->_22 = M1->_21 * M2->_12 + M1->_22 * M2->_22 + M1->_23 * M2->_32 + M1->_24 * M2->_42;
oM->_23 = M1->_21 * M2->_13 + M1->_22 * M2->_23 + M1->_23 * M2->_33 + M1->_24 * M2->_43;
oM->_24 = M1->_21 * M2->_14 + M1->_22 * M2->_24 + M1->_23 * M2->_34 + M1->_24 * M2->_44;
oM->_31 = M1->_31 * M2->_11 + M1->_32 * M2->_21 + M1->_33 * M2->_31 + M1->_34 * M2->_41;
oM->_32 = M1->_31 * M2->_12 + M1->_32 * M2->_22 + M1->_33 * M2->_32 + M1->_34 * M2->_42;
oM->_33 = M1->_31 * M2->_13 + M1->_32 * M2->_23 + M1->_33 * M2->_33 + M1->_34 * M2->_43;
oM->_34 = M1->_31 * M2->_14 + M1->_32 * M2->_24 + M1->_33 * M2->_34 + M1->_34 * M2->_44;
oM->_41 = M1->_41 * M2->_11 + M1->_42 * M2->_21 + M1->_43 * M2->_31 + M1->_44 * M2->_41;
oM->_42 = M1->_41 * M2->_12 + M1->_42 * M2->_22 + M1->_43 * M2->_32 + M1->_44 * M2->_42;
oM->_43 = M1->_41 * M2->_13 + M1->_42 * M2->_23 + M1->_43 * M2->_33 + M1->_44 * M2->_43;
oM->_44 = M1->_41 * M2->_14 + M1->_42 * M2->_24 + M1->_43 * M2->_34 + M1->_44 * M2->_44;
}
QueryPerformanceCounter((LARGE_INTEGER *) &ctr2);
QueryPerformanceFrequency((LARGE_INTEGER *) &freq);
return(((double) (ctr2 - ctr1) / ((double) freq)));
} else {
//DWORD dwError = GetLastError();
return(-1);
}
}
SSE + C++
- Код: Выделить всё
double ssMultiply(__out Matrix_SSE *M0, __in Matrix_SSE *M1, __in Matrix_SSE *M2)
{
__int64 ctr1 = 0, ctr2 = 0, freq = 0;
int i = 0;
if (QueryPerformanceCounter((LARGE_INTEGER *) &ctr1)!= 0)
{
for (i = 0; i < 10000000; i++)
{
M0->m1 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(M1->m1, M1->m1, _MM_SHUFFLE(0,0,0,0)), M2->m1), _mm_mul_ps(_mm_shuffle_ps(M1->m1, M1->m1, _MM_SHUFFLE(1,1,1,1)), M2->m2)), _mm_mul_ps(_mm_shuffle_ps(M1->m1, M1->m1, _MM_SHUFFLE(2,2,2,2)), M2->m3)), _mm_mul_ps(_mm_shuffle_ps(M1->m1, M1->m1, _MM_SHUFFLE(3,3,3,3)), M2->m4));
M0->m2 =_mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(M1->m2, M1->m2, _MM_SHUFFLE(0,0,0,0)), M2->m1), _mm_mul_ps(_mm_shuffle_ps(M1->m2, M1->m2, _MM_SHUFFLE(1,1,1,1)), M2->m2)), _mm_mul_ps(_mm_shuffle_ps(M1->m2, M1->m2, _MM_SHUFFLE(2,2,2,2)), M2->m3)), _mm_mul_ps(_mm_shuffle_ps(M1->m2, M1->m2, _MM_SHUFFLE(3,3,3,3)), M2->m4));
M0->m3 =_mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(M1->m3, M1->m3, _MM_SHUFFLE(0,0,0,0)), M2->m1), _mm_mul_ps(_mm_shuffle_ps(M1->m3, M1->m3, _MM_SHUFFLE(1,1,1,1)), M2->m2)), _mm_mul_ps(_mm_shuffle_ps(M1->m3, M1->m3, _MM_SHUFFLE(2,2,2,2)), M2->m3)), _mm_mul_ps(_mm_shuffle_ps(M1->m3, M1->m3, _MM_SHUFFLE(3,3,3,3)), M2->m4));
M0->m4 =_mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(M1->m4, M1->m4, _MM_SHUFFLE(0,0,0,0)), M2->m1), _mm_mul_ps(_mm_shuffle_ps(M1->m4, M1->m4, _MM_SHUFFLE(1,1,1,1)), M2->m2)), _mm_mul_ps(_mm_shuffle_ps(M1->m4, M1->m4, _MM_SHUFFLE(2,2,2,2)), M2->m3)), _mm_mul_ps(_mm_shuffle_ps(M1->m4, M1->m4, _MM_SHUFFLE(3,3,3,3)), M2->m4));
}
QueryPerformanceCounter((LARGE_INTEGER *) &ctr2);
QueryPerformanceFrequency((LARGE_INTEGER *) &freq);
return(((double) (ctr2 - ctr1) / ((double) freq)));
} else {
//DWORD dwError = GetLastError();
return(-1);
}
}
Запустив у себя на компутере получил:
Для испытаний использовал 2 цикла
1) вызывающий функу = 10 интераций
2) в функе = 10000000 итераций
VB.NET [Structure]
- Код: Выделить всё
Matrix as structure: 1,476
Matrix as structure: 1,458
Matrix as structure: 1,46
Matrix as structure: 1,464
Matrix as structure: 1,498
Matrix as structure: 1,458
Matrix as structure: 1,46
Matrix as structure: 1,458
Matrix as structure: 1,461
Matrix as structure: 1,461
VB.NET [Array]
- Код: Выделить всё
Matrix as array: 21,965
Matrix as array: 22,938
Matrix as array: 23,269
Matrix as array: 25,202
Matrix as array: 25,504
Matrix as array: 24,498
Matrix as array: 23,505
Matrix as array: 26,127
Matrix as array: 24,335
Matrix as array: 25,502
C++
- Код: Выделить всё
Matrix as struct: 1.60665
Matrix as struct: 1.70774
Matrix as struct: 1.91144
Matrix as struct: 1.96475
Matrix as struct: 1.72743
Matrix as struct: 1.73313
Matrix as struct: 1.84587
Matrix as struct: 1.84521
Matrix as struct: 1.90631
Matrix as struct: 1.88942
SSE + C++
- Код: Выделить всё
Matrix as array: 1.72086
Matrix as array: 1.741
Matrix as array: 1.65637
Matrix as array: 1.67555
Matrix as array: 1.79351
Matrix as array: 1.83651
Matrix as array: 1.71052
Matrix as array: 1.6825
Matrix as array: 1.69106
Matrix as array: 1.69123
Тестировалось на проце: Pentium D 2,8ГГц (2 ядра)
Компилилось только в VS2010
1) Судя по тестам VB.NET обгоняет и даже перегоняет умножение на SSE => что, при знании всех тонкостей программирования, .NET ЯП сравним по скорости с нативными языками? и проигрыш в производительности - враки?
2) Почему работаю со структурой и с массивом (VB.NET) мы получаем такую большую разницу во времени?
Причём если поменять последовательность вычисления элементов с
- Код: Выделить всё
oM(0,1)
oM(0,2)
oM(0,3)
...
на
- Код: Выделить всё
oM(0,1)
oM(1,1)
oM(2,1)
...
Мы получаем профит в несколько секунд (~7)