aboutsummaryrefslogtreecommitdiffstats
path: root/Alc/mixer_sse.c
diff options
context:
space:
mode:
authorChris Robinson <[email protected]>2012-08-15 08:19:04 -0700
committerChris Robinson <[email protected]>2012-08-15 08:19:04 -0700
commit0bca771a88f83e77cd6a5eb04757125b4fa5dc6c (patch)
tree5d38eb665129c0b708e1b61634922b54a96d05fb /Alc/mixer_sse.c
parentf4ff63e2715b28e8ed1fd54e48e3ee6077f1689d (diff)
Apply HRTF mixer coefficients with stepping using SSE
Diffstat (limited to 'Alc/mixer_sse.c')
-rw-r--r--Alc/mixer_sse.c34
1 files changed, 32 insertions, 2 deletions
diff --git a/Alc/mixer_sse.c b/Alc/mixer_sse.c
index 2da2b6b8..bbca54ec 100644
--- a/Alc/mixer_sse.c
+++ b/Alc/mixer_sse.c
@@ -10,18 +10,48 @@
#include "alu.h"
+static __inline void ApplyCoeffsStep(ALuint Offset, ALfloat (*RESTRICT Values)[2],
+ ALfloat (*RESTRICT Coeffs)[2],
+ ALfloat (*RESTRICT CoeffStep)[2],
+ ALfloat left, ALfloat right)
+{
+ const __m128 lrlr = { left, right, left, right };
+ __m128 vals = { 0.0f, 0.0f, 0.0f, 0.0f };
+ __m128 coeffs, coeffstep;
+ ALuint c;
+ for(c = 0;c < HRIR_LENGTH;c += 2)
+ {
+ const ALuint o0 = (Offset++)&HRIR_MASK;
+ const ALuint o1 = (Offset++)&HRIR_MASK;
+
+ coeffs = _mm_load_ps(&Coeffs[c][0]);
+ vals = _mm_loadl_pi(vals, (__m64*)&Values[o0][0]);
+ vals = _mm_loadh_pi(vals, (__m64*)&Values[o1][0]);
+
+ vals = _mm_add_ps(vals, _mm_mul_ps(coeffs, lrlr));
+ _mm_storel_pi((__m64*)&Values[o0][0], vals);
+ _mm_storeh_pi((__m64*)&Values[o1][0], vals);
+
+ coeffstep = _mm_load_ps(&CoeffStep[c][0]);
+ coeffs = _mm_add_ps(coeffs, coeffstep);
+ _mm_store_ps(&Coeffs[c][0], coeffs);
+ }
+}
+
static __inline void ApplyCoeffs(ALuint Offset, ALfloat (*RESTRICT Values)[2],
ALfloat (*RESTRICT Coeffs)[2],
ALfloat left, ALfloat right)
{
const __m128 lrlr = { left, right, left, right };
+ __m128 vals = { 0.0f, 0.0f, 0.0f, 0.0f };
+ __m128 coeffs;
ALuint c;
for(c = 0;c < HRIR_LENGTH;c += 2)
{
const ALuint o0 = (Offset++)&HRIR_MASK;
const ALuint o1 = (Offset++)&HRIR_MASK;
- __m128 coeffs = _mm_load_ps(&Coeffs[c][0]);
- __m128 vals = { 0.0f, 0.0f, 0.0f, 0.0f };
+
+ coeffs = _mm_load_ps(&Coeffs[c][0]);
vals = _mm_loadl_pi(vals, (__m64*)&Values[o0][0]);
vals = _mm_loadh_pi(vals, (__m64*)&Values[o1][0]);