aboutsummaryrefslogtreecommitdiffstats
path: root/Alc/mixer_sse.c
diff options
context:
space:
mode:
authorChris Robinson <[email protected]>2013-11-10 05:52:22 -0800
committerChris Robinson <[email protected]>2013-11-10 05:52:22 -0800
commit25b9c3d0c15e959d544f5d0ac7ea507ea5f6d69f (patch)
tree7ad82f7947d40f3926377eb67b76d5183cf6d1b3 /Alc/mixer_sse.c
parent4386ee32ce24a19bcb0577322d5b12a9db7c69f2 (diff)
Apply HRTF coefficient stepping separately
Diffstat (limited to 'Alc/mixer_sse.c')
-rw-r--r--Alc/mixer_sse.c60
1 files changed, 7 insertions, 53 deletions
diff --git a/Alc/mixer_sse.c b/Alc/mixer_sse.c
index 56a4bdae..719ebd23 100644
--- a/Alc/mixer_sse.c
+++ b/Alc/mixer_sse.c
@@ -21,65 +21,19 @@
#include "mixer_defs.h"
-static inline void ApplyCoeffsStep(ALuint Offset, ALfloat (*restrict Values)[2],
- const ALuint IrSize,
+static inline void ApplyCoeffsStep(const ALuint IrSize,
ALfloat (*restrict Coeffs)[2],
- const ALfloat (*restrict CoeffStep)[2],
- ALfloat left, ALfloat right)
+ const ALfloat (*restrict CoeffStep)[2])
{
- const __m128 lrlr = { left, right, left, right };
- __m128 coeffs, deltas, imp0, imp1;
- __m128 vals = _mm_setzero_ps();
+ __m128 coeffs, deltas;
ALuint i;
- if((Offset&1))
+ for(i = 0;i < IrSize;i += 2)
{
- const ALuint o0 = Offset&HRIR_MASK;
- const ALuint o1 = (Offset+IrSize-1)&HRIR_MASK;
-
- coeffs = _mm_load_ps(&Coeffs[0][0]);
- deltas = _mm_load_ps(&CoeffStep[0][0]);
- vals = _mm_loadl_pi(vals, (__m64*)&Values[o0][0]);
- imp0 = _mm_mul_ps(lrlr, coeffs);
+ coeffs = _mm_load_ps(&Coeffs[i][0]);
+ deltas = _mm_load_ps(&CoeffStep[i][0]);
coeffs = _mm_add_ps(coeffs, deltas);
- vals = _mm_add_ps(imp0, vals);
- _mm_store_ps(&Coeffs[0][0], coeffs);
- _mm_storel_pi((__m64*)&Values[o0][0], vals);
- for(i = 1;i < IrSize-1;i += 2)
- {
- const ALuint o2 = (Offset+i)&HRIR_MASK;
-
- coeffs = _mm_load_ps(&Coeffs[i+1][0]);
- deltas = _mm_load_ps(&CoeffStep[i+1][0]);
- vals = _mm_load_ps(&Values[o2][0]);
- imp1 = _mm_mul_ps(lrlr, coeffs);
- coeffs = _mm_add_ps(coeffs, deltas);
- imp0 = _mm_shuffle_ps(imp0, imp1, _MM_SHUFFLE(1, 0, 3, 2));
- vals = _mm_add_ps(imp0, vals);
- _mm_store_ps(&Coeffs[i+1][0], coeffs);
- _mm_store_ps(&Values[o2][0], vals);
- imp0 = imp1;
- }
- vals = _mm_loadl_pi(vals, (__m64*)&Values[o1][0]);
- imp0 = _mm_movehl_ps(imp0, imp0);
- vals = _mm_add_ps(imp0, vals);
- _mm_storel_pi((__m64*)&Values[o1][0], vals);
- }
- else
- {
- for(i = 0;i < IrSize;i += 2)
- {
- const ALuint o = (Offset + i)&HRIR_MASK;
-
- coeffs = _mm_load_ps(&Coeffs[i][0]);
- deltas = _mm_load_ps(&CoeffStep[i][0]);
- vals = _mm_load_ps(&Values[o][0]);
- imp0 = _mm_mul_ps(lrlr, coeffs);
- coeffs = _mm_add_ps(coeffs, deltas);
- vals = _mm_add_ps(imp0, vals);
- _mm_store_ps(&Coeffs[i][0], coeffs);
- _mm_store_ps(&Values[o][0], vals);
- }
+ _mm_store_ps(&Coeffs[i][0], coeffs);
}
}