aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Robinson <[email protected]>2015-10-17 09:59:26 -0700
committerChris Robinson <[email protected]>2015-10-17 09:59:26 -0700
commitc4ba9d21dce745cafa3b54060a84c15a0ce7535f (patch)
tree0d5a5f5369bee4e332d8bd7a9b88e3f6ceaaeb20
parentd9e9a7b05fa2c863ab6a83064c3acf442a14c2ea (diff)
Use the correct array indices for SSE register components
SSE uses reverse ordering, such that component 0 is the last in memory. _mm_load_* and _mm_loadu_*, and the corresponding stores, do not change the memory ordering.
-rw-r--r--Alc/mixer_sse2.c2
-rw-r--r--Alc/mixer_sse3.c8
-rw-r--r--Alc/mixer_sse41.c50
3 files changed, 30 insertions, 30 deletions
diff --git a/Alc/mixer_sse2.c b/Alc/mixer_sse2.c
index 6759f795..1787c196 100644
--- a/Alc/mixer_sse2.c
+++ b/Alc/mixer_sse2.c
@@ -63,7 +63,7 @@ const ALfloat *Resample_lerp32_SSE2(const ALfloat *src, ALuint frac, ALuint incr
_mm_store_ps(pos_.f, _mm_castsi128_ps(pos4));
}
- pos = pos_.i[0];
+ pos = pos_.i[3];
frac = _mm_cvtsi128_si32(frac4);
for(;i < numsamples;i++)
diff --git a/Alc/mixer_sse3.c b/Alc/mixer_sse3.c
index dbf963ff..acb0338a 100644
--- a/Alc/mixer_sse3.c
+++ b/Alc/mixer_sse3.c
@@ -78,8 +78,8 @@ const ALfloat *Resample_fir4_32_SSE3(const ALfloat *src, ALuint frac, ALuint inc
_mm_store_ps(frac_.f, _mm_castsi128_ps(frac4));
}
- pos = pos_.i[0];
- frac = frac_.i[0];
+ pos = pos_.i[3];
+ frac = frac_.i[3];
for(;i < numsamples;i++)
{
@@ -143,8 +143,8 @@ const ALfloat *Resample_fir8_32_SSE3(const ALfloat *src, ALuint frac, ALuint inc
_mm_store_ps(frac_.f, _mm_castsi128_ps(frac4));
}
- pos = pos_.i[0];
- frac = frac_.i[0];
+ pos = pos_.i[3];
+ frac = frac_.i[3];
for(;i < numsamples;i++)
{
diff --git a/Alc/mixer_sse41.c b/Alc/mixer_sse41.c
index 8fd5c4b5..85ec8705 100644
--- a/Alc/mixer_sse41.c
+++ b/Alc/mixer_sse41.c
@@ -61,13 +61,13 @@ const ALfloat *Resample_lerp32_SSE41(const ALfloat *src, ALuint frac, ALuint inc
pos4 = _mm_add_epi32(pos4, _mm_srli_epi32(frac4, FRACTIONBITS));
frac4 = _mm_and_si128(frac4, fracMask4);
- pos_.i[0] = _mm_extract_epi32(pos4, 0);
- pos_.i[1] = _mm_extract_epi32(pos4, 1);
- pos_.i[2] = _mm_extract_epi32(pos4, 2);
- pos_.i[3] = _mm_extract_epi32(pos4, 3);
+ pos_.i[0] = _mm_extract_epi32(pos4, 3);
+ pos_.i[1] = _mm_extract_epi32(pos4, 2);
+ pos_.i[2] = _mm_extract_epi32(pos4, 1);
+ pos_.i[3] = _mm_extract_epi32(pos4, 0);
}
- pos = pos_.i[0];
+ pos = pos_.i[3];
frac = _mm_cvtsi128_si32(frac4);
for(;i < numsamples;i++)
@@ -124,18 +124,18 @@ const ALfloat *Resample_fir4_32_SSE41(const ALfloat *src, ALuint frac, ALuint in
pos4 = _mm_add_epi32(pos4, _mm_srli_epi32(frac4, FRACTIONBITS));
frac4 = _mm_and_si128(frac4, fracMask4);
- pos_.i[0] = _mm_extract_epi32(pos4, 0);
- pos_.i[1] = _mm_extract_epi32(pos4, 1);
- pos_.i[2] = _mm_extract_epi32(pos4, 2);
- pos_.i[3] = _mm_extract_epi32(pos4, 3);
- frac_.i[0] = _mm_extract_epi32(frac4, 0);
- frac_.i[1] = _mm_extract_epi32(frac4, 1);
- frac_.i[2] = _mm_extract_epi32(frac4, 2);
- frac_.i[3] = _mm_extract_epi32(frac4, 3);
+ pos_.i[0] = _mm_extract_epi32(pos4, 3);
+ pos_.i[1] = _mm_extract_epi32(pos4, 2);
+ pos_.i[2] = _mm_extract_epi32(pos4, 1);
+ pos_.i[3] = _mm_extract_epi32(pos4, 0);
+ frac_.i[0] = _mm_extract_epi32(frac4, 3);
+ frac_.i[1] = _mm_extract_epi32(frac4, 2);
+ frac_.i[2] = _mm_extract_epi32(frac4, 1);
+ frac_.i[3] = _mm_extract_epi32(frac4, 0);
}
- pos = pos_.i[0];
- frac = frac_.i[0];
+ pos = pos_.i[3];
+ frac = frac_.i[3];
for(;i < numsamples;i++)
{
@@ -195,18 +195,18 @@ const ALfloat *Resample_fir8_32_SSE41(const ALfloat *src, ALuint frac, ALuint in
pos4 = _mm_add_epi32(pos4, _mm_srli_epi32(frac4, FRACTIONBITS));
frac4 = _mm_and_si128(frac4, fracMask4);
- pos_.i[0] = _mm_extract_epi32(pos4, 0);
- pos_.i[1] = _mm_extract_epi32(pos4, 1);
- pos_.i[2] = _mm_extract_epi32(pos4, 2);
- pos_.i[3] = _mm_extract_epi32(pos4, 3);
- frac_.i[0] = _mm_extract_epi32(frac4, 0);
- frac_.i[1] = _mm_extract_epi32(frac4, 1);
- frac_.i[2] = _mm_extract_epi32(frac4, 2);
- frac_.i[3] = _mm_extract_epi32(frac4, 3);
+ pos_.i[0] = _mm_extract_epi32(pos4, 3);
+ pos_.i[1] = _mm_extract_epi32(pos4, 2);
+ pos_.i[2] = _mm_extract_epi32(pos4, 1);
+ pos_.i[3] = _mm_extract_epi32(pos4, 0);
+ frac_.i[0] = _mm_extract_epi32(frac4, 3);
+ frac_.i[1] = _mm_extract_epi32(frac4, 2);
+ frac_.i[2] = _mm_extract_epi32(frac4, 1);
+ frac_.i[3] = _mm_extract_epi32(frac4, 0);
}
- pos = pos_.i[0];
- frac = frac_.i[0];
+ pos = pos_.i[3];
+ frac = frac_.i[3];
for(;i < numsamples;i++)
{