aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Robinson <[email protected]>2014-12-15 17:13:31 -0800
committerChris Robinson <[email protected]>2014-12-15 17:13:31 -0800
commit1b7c5540687d31d75b54ae85b3b7b4c2b5bc99ef (patch)
treecd28dfe6ed042c3582577fa17bf3708b8bb7e5b3
parenta0e7ad493b5f6290e9e664f6ff8e130d10804bec (diff)
Multiply samples with the cubic coeffs before transposing
This avoids having to transpose the cubic coefficients.
-rw-r--r--Alc/mixer_sse2.c13
-rw-r--r--Alc/mixer_sse41.c13
2 files changed, 14 insertions, 12 deletions
diff --git a/Alc/mixer_sse2.c b/Alc/mixer_sse2.c
index 1c71b458..fbc2b629 100644
--- a/Alc/mixer_sse2.c
+++ b/Alc/mixer_sse2.c
@@ -106,14 +106,15 @@ const ALfloat *Resample_cubic32_SSE2(const ALfloat *src, ALuint frac, ALuint inc
__m128 k3 = _mm_load_ps(CubicLUT[frac_.i[3]]);
__m128 out;
+ val0 = _mm_mul_ps(val0, k0);
+ val1 = _mm_mul_ps(val1, k1);
+ val2 = _mm_mul_ps(val2, k2);
+ val3 = _mm_mul_ps(val3, k3);
_MM_TRANSPOSE4_PS(val0, val1, val2, val3);
- _MM_TRANSPOSE4_PS(k0, k1, k2, k3);
+ out = _mm_add_ps(val0, val1);
+ out = _mm_add_ps(out, val2);
+ out = _mm_add_ps(out, val3);
- /* k0*val0 + k1*val1 + k2*val2 + k3*val3 */
- out = _mm_mul_ps(k0, val0);
- out = _mm_add_ps(out, _mm_mul_ps(k1, val1));
- out = _mm_add_ps(out, _mm_mul_ps(k2, val2));
- out = _mm_add_ps(out, _mm_mul_ps(k3, val3));
_mm_store_ps(&dst[i], out);
frac4 = _mm_add_epi32(frac4, increment4);
diff --git a/Alc/mixer_sse41.c b/Alc/mixer_sse41.c
index 9ea4379a..36f06255 100644
--- a/Alc/mixer_sse41.c
+++ b/Alc/mixer_sse41.c
@@ -110,14 +110,15 @@ const ALfloat *Resample_cubic32_SSE41(const ALfloat *src, ALuint frac, ALuint in
__m128 k3 = _mm_load_ps(CubicLUT[frac_.i[3]]);
__m128 out;
+ val0 = _mm_mul_ps(val0, k0);
+ val1 = _mm_mul_ps(val1, k1);
+ val2 = _mm_mul_ps(val2, k2);
+ val3 = _mm_mul_ps(val3, k3);
_MM_TRANSPOSE4_PS(val0, val1, val2, val3);
- _MM_TRANSPOSE4_PS(k0, k1, k2, k3);
+ out = _mm_add_ps(val0, val1);
+ out = _mm_add_ps(out, val2);
+ out = _mm_add_ps(out, val3);
- /* k0*val0 + k1*val1 + k2*val2 + k3*val3 */
- out = _mm_mul_ps(k0, val0);
- out = _mm_add_ps(out, _mm_mul_ps(k1, val1));
- out = _mm_add_ps(out, _mm_mul_ps(k2, val2));
- out = _mm_add_ps(out, _mm_mul_ps(k3, val3));
_mm_store_ps(&dst[i], out);
frac4 = _mm_add_epi32(frac4, increment4);