voidnormalSqrt(float *a, int N){ for (int i = 0; i != N; ++i) a[i] = sqrt(a[i]); }
voidSSESqrt(float *a, int N){ __m128* ptr = (__m128*) a; for (int i = 0; i != N / 4; ++i, ++ptr, a += 4) _mm_store_ps(a, _mm_sqrt_ps(*ptr)); }
voidOISqrt(float *a, int N){ for (int i = 0; i < N; i += 4) { a[i] = sqrt(a[i]); a[i+1] = sqrt(a[i+1]); a[i+2] = sqrt(a[i+2]); a[i+3] = sqrt(a[i+3]); } }
voidOISqrt2(float *a, int N){ for (int i = 0; i < N; i += 2) { a[i] = sqrt(a[i]); a[i+1] = sqrt(a[i+1]); } }
voidAVXSqrt(float *a, int N){ for (int i = 0; i != N / 8; ++i, a += 8) { _mm256_store_ps(a, _mm256_sqrt_ps(_mm256_load_ps(a))); } } intmain(int argv, char **argc){ srand(time(NULL)); int N = 64000000; float *a; posix_memalign((void**)&a, 32, N * sizeof(float));
auto t = getCurrentTime(); for (int i = 0; i != N; ++i) a[i] = rand(); echoDiff("Rand generate", getDiffTime(t));
t = getCurrentTime(); normalSqrt(a, N); echoDiff("Normal", getDiffTime(t));
t = getCurrentTime(); SSESqrt(a, N); echoDiff("SSE", getDiffTime(t));
t = getCurrentTime(); OISqrt(a, N); echoDiff("OI4", getDiffTime(t)); t = getCurrentTime(); OISqrt2(a, N); echoDiff("OI2", getDiffTime(t));
t = getCurrentTime(); AVXSqrt(a, N); echoDiff("AVX", getDiffTime(t));