diff options
author | Chris Robinson <[email protected]> | 2014-12-15 17:13:31 -0800 |
---|---|---|
committer | Chris Robinson <[email protected]> | 2014-12-15 17:13:31 -0800 |
commit | 1b7c5540687d31d75b54ae85b3b7b4c2b5bc99ef (patch) | |
tree | cd28dfe6ed042c3582577fa17bf3708b8bb7e5b3 /Alc | |
parent | a0e7ad493b5f6290e9e664f6ff8e130d10804bec (diff) |
Multiply samples with the cubic coeffs before transposing
This avoids having to transpose the cubic coefficients.
Diffstat (limited to 'Alc')
-rw-r--r-- | Alc/mixer_sse2.c | 13 | ||||
-rw-r--r-- | Alc/mixer_sse41.c | 13 |
2 files changed, 14 insertions, 12 deletions
diff --git a/Alc/mixer_sse2.c b/Alc/mixer_sse2.c index 1c71b458..fbc2b629 100644 --- a/Alc/mixer_sse2.c +++ b/Alc/mixer_sse2.c @@ -106,14 +106,15 @@ const ALfloat *Resample_cubic32_SSE2(const ALfloat *src, ALuint frac, ALuint inc __m128 k3 = _mm_load_ps(CubicLUT[frac_.i[3]]); __m128 out; + val0 = _mm_mul_ps(val0, k0); + val1 = _mm_mul_ps(val1, k1); + val2 = _mm_mul_ps(val2, k2); + val3 = _mm_mul_ps(val3, k3); _MM_TRANSPOSE4_PS(val0, val1, val2, val3); - _MM_TRANSPOSE4_PS(k0, k1, k2, k3); + out = _mm_add_ps(val0, val1); + out = _mm_add_ps(out, val2); + out = _mm_add_ps(out, val3); - /* k0*val0 + k1*val1 + k2*val2 + k3*val3 */ - out = _mm_mul_ps(k0, val0); - out = _mm_add_ps(out, _mm_mul_ps(k1, val1)); - out = _mm_add_ps(out, _mm_mul_ps(k2, val2)); - out = _mm_add_ps(out, _mm_mul_ps(k3, val3)); _mm_store_ps(&dst[i], out); frac4 = _mm_add_epi32(frac4, increment4); diff --git a/Alc/mixer_sse41.c b/Alc/mixer_sse41.c index 9ea4379a..36f06255 100644 --- a/Alc/mixer_sse41.c +++ b/Alc/mixer_sse41.c @@ -110,14 +110,15 @@ const ALfloat *Resample_cubic32_SSE41(const ALfloat *src, ALuint frac, ALuint in __m128 k3 = _mm_load_ps(CubicLUT[frac_.i[3]]); __m128 out; + val0 = _mm_mul_ps(val0, k0); + val1 = _mm_mul_ps(val1, k1); + val2 = _mm_mul_ps(val2, k2); + val3 = _mm_mul_ps(val3, k3); _MM_TRANSPOSE4_PS(val0, val1, val2, val3); - _MM_TRANSPOSE4_PS(k0, k1, k2, k3); + out = _mm_add_ps(val0, val1); + out = _mm_add_ps(out, val2); + out = _mm_add_ps(out, val3); - /* k0*val0 + k1*val1 + k2*val2 + k3*val3 */ - out = _mm_mul_ps(k0, val0); - out = _mm_add_ps(out, _mm_mul_ps(k1, val1)); - out = _mm_add_ps(out, _mm_mul_ps(k2, val2)); - out = _mm_add_ps(out, _mm_mul_ps(k3, val3)); _mm_store_ps(&dst[i], out); frac4 = _mm_add_epi32(frac4, increment4); |