diff options
author | Chris Robinson <[email protected]> | 2015-10-17 09:59:26 -0700 |
---|---|---|
committer | Chris Robinson <[email protected]> | 2015-10-17 09:59:26 -0700 |
commit | c4ba9d21dce745cafa3b54060a84c15a0ce7535f (patch) | |
tree | 0d5a5f5369bee4e332d8bd7a9b88e3f6ceaaeb20 | |
parent | d9e9a7b05fa2c863ab6a83064c3acf442a14c2ea (diff) |
Use the correct array indices for SSE register components
SSE uses reverse ordering, such that component 0 is the last in memory.
_mm_load_* and _mm_loadu_*, and the corresponding stores, do not change the
memory ordering.
-rw-r--r-- | Alc/mixer_sse2.c | 2 | ||||
-rw-r--r-- | Alc/mixer_sse3.c | 8 | ||||
-rw-r--r-- | Alc/mixer_sse41.c | 50 |
3 files changed, 30 insertions, 30 deletions
diff --git a/Alc/mixer_sse2.c b/Alc/mixer_sse2.c index 6759f795..1787c196 100644 --- a/Alc/mixer_sse2.c +++ b/Alc/mixer_sse2.c @@ -63,7 +63,7 @@ const ALfloat *Resample_lerp32_SSE2(const ALfloat *src, ALuint frac, ALuint incr _mm_store_ps(pos_.f, _mm_castsi128_ps(pos4)); } - pos = pos_.i[0]; + pos = pos_.i[3]; frac = _mm_cvtsi128_si32(frac4); for(;i < numsamples;i++) diff --git a/Alc/mixer_sse3.c b/Alc/mixer_sse3.c index dbf963ff..acb0338a 100644 --- a/Alc/mixer_sse3.c +++ b/Alc/mixer_sse3.c @@ -78,8 +78,8 @@ const ALfloat *Resample_fir4_32_SSE3(const ALfloat *src, ALuint frac, ALuint inc _mm_store_ps(frac_.f, _mm_castsi128_ps(frac4)); } - pos = pos_.i[0]; - frac = frac_.i[0]; + pos = pos_.i[3]; + frac = frac_.i[3]; for(;i < numsamples;i++) { @@ -143,8 +143,8 @@ const ALfloat *Resample_fir8_32_SSE3(const ALfloat *src, ALuint frac, ALuint inc _mm_store_ps(frac_.f, _mm_castsi128_ps(frac4)); } - pos = pos_.i[0]; - frac = frac_.i[0]; + pos = pos_.i[3]; + frac = frac_.i[3]; for(;i < numsamples;i++) { diff --git a/Alc/mixer_sse41.c b/Alc/mixer_sse41.c index 8fd5c4b5..85ec8705 100644 --- a/Alc/mixer_sse41.c +++ b/Alc/mixer_sse41.c @@ -61,13 +61,13 @@ const ALfloat *Resample_lerp32_SSE41(const ALfloat *src, ALuint frac, ALuint inc pos4 = _mm_add_epi32(pos4, _mm_srli_epi32(frac4, FRACTIONBITS)); frac4 = _mm_and_si128(frac4, fracMask4); - pos_.i[0] = _mm_extract_epi32(pos4, 0); - pos_.i[1] = _mm_extract_epi32(pos4, 1); - pos_.i[2] = _mm_extract_epi32(pos4, 2); - pos_.i[3] = _mm_extract_epi32(pos4, 3); + pos_.i[0] = _mm_extract_epi32(pos4, 3); + pos_.i[1] = _mm_extract_epi32(pos4, 2); + pos_.i[2] = _mm_extract_epi32(pos4, 1); + pos_.i[3] = _mm_extract_epi32(pos4, 0); } - pos = pos_.i[0]; + pos = pos_.i[3]; frac = _mm_cvtsi128_si32(frac4); for(;i < numsamples;i++) @@ -124,18 +124,18 @@ const ALfloat *Resample_fir4_32_SSE41(const ALfloat *src, ALuint frac, ALuint in pos4 = _mm_add_epi32(pos4, _mm_srli_epi32(frac4, FRACTIONBITS)); frac4 = _mm_and_si128(frac4, fracMask4); - pos_.i[0] = _mm_extract_epi32(pos4, 0); - pos_.i[1] = _mm_extract_epi32(pos4, 1); - pos_.i[2] = _mm_extract_epi32(pos4, 2); - pos_.i[3] = _mm_extract_epi32(pos4, 3); - frac_.i[0] = _mm_extract_epi32(frac4, 0); - frac_.i[1] = _mm_extract_epi32(frac4, 1); - frac_.i[2] = _mm_extract_epi32(frac4, 2); - frac_.i[3] = _mm_extract_epi32(frac4, 3); + pos_.i[0] = _mm_extract_epi32(pos4, 3); + pos_.i[1] = _mm_extract_epi32(pos4, 2); + pos_.i[2] = _mm_extract_epi32(pos4, 1); + pos_.i[3] = _mm_extract_epi32(pos4, 0); + frac_.i[0] = _mm_extract_epi32(frac4, 3); + frac_.i[1] = _mm_extract_epi32(frac4, 2); + frac_.i[2] = _mm_extract_epi32(frac4, 1); + frac_.i[3] = _mm_extract_epi32(frac4, 0); } - pos = pos_.i[0]; - frac = frac_.i[0]; + pos = pos_.i[3]; + frac = frac_.i[3]; for(;i < numsamples;i++) { @@ -195,18 +195,18 @@ const ALfloat *Resample_fir8_32_SSE41(const ALfloat *src, ALuint frac, ALuint in pos4 = _mm_add_epi32(pos4, _mm_srli_epi32(frac4, FRACTIONBITS)); frac4 = _mm_and_si128(frac4, fracMask4); - pos_.i[0] = _mm_extract_epi32(pos4, 0); - pos_.i[1] = _mm_extract_epi32(pos4, 1); - pos_.i[2] = _mm_extract_epi32(pos4, 2); - pos_.i[3] = _mm_extract_epi32(pos4, 3); - frac_.i[0] = _mm_extract_epi32(frac4, 0); - frac_.i[1] = _mm_extract_epi32(frac4, 1); - frac_.i[2] = _mm_extract_epi32(frac4, 2); - frac_.i[3] = _mm_extract_epi32(frac4, 3); + pos_.i[0] = _mm_extract_epi32(pos4, 3); + pos_.i[1] = _mm_extract_epi32(pos4, 2); + pos_.i[2] = _mm_extract_epi32(pos4, 1); + pos_.i[3] = _mm_extract_epi32(pos4, 0); + frac_.i[0] = _mm_extract_epi32(frac4, 3); + frac_.i[1] = _mm_extract_epi32(frac4, 2); + frac_.i[2] = _mm_extract_epi32(frac4, 1); + frac_.i[3] = _mm_extract_epi32(frac4, 0); } - pos = pos_.i[0]; - frac = frac_.i[0]; + pos = pos_.i[3]; + frac = frac_.i[3]; for(;i < numsamples;i++) { |