aboutsummaryrefslogtreecommitdiffstats
path: root/Alc
diff options
context:
space:
mode:
authorChris Robinson <[email protected]>2012-08-14 03:53:07 -0700
committerChris Robinson <[email protected]>2012-08-14 03:53:07 -0700
commite9a20cb985c5686fd2777540dbbd2a13c9525ee0 (patch)
tree568b287aff7b9dad019ea3e2048b1f5a1dc0e79c /Alc
parentcfc52055697a124edd32c9f057d34857a49e7e14 (diff)
Add an SSE-enhanced HRTF mixer using intrinsics
Not as good as it could be, but it's something
Diffstat (limited to 'Alc')
-rw-r--r--Alc/ALc.c4
-rw-r--r--Alc/mixer.c48
2 files changed, 51 insertions, 1 deletions
diff --git a/Alc/ALc.c b/Alc/ALc.c
index e664d25a..14a74a92 100644
--- a/Alc/ALc.c
+++ b/Alc/ALc.c
@@ -812,7 +812,9 @@ static void alc_initconfig(void)
continue;
len = (next ? ((size_t)(next-str)) : strlen(str));
- if(strncasecmp(str, "neon", len) == 0)
+ if(strncasecmp(str, "sse", len) == 0)
+ capfilter &= ~CPU_CAP_SSE;
+ else if(strncasecmp(str, "neon", len) == 0)
capfilter &= ~CPU_CAP_NEON;
else
WARN("Invalid CPU extension \"%s\"\n", str);
diff --git a/Alc/mixer.c b/Alc/mixer.c
index e7fada8f..f5d9bb09 100644
--- a/Alc/mixer.c
+++ b/Alc/mixer.c
@@ -25,6 +25,9 @@
#include <string.h>
#include <ctype.h>
#include <assert.h>
+#ifdef HAVE_XMMINTRIN_H
+#include <xmmintrin.h>
+#endif
#ifdef HAVE_ARM_NEON_H
#include <arm_neon.h>
#endif
@@ -221,6 +224,39 @@ DECL_TEMPLATE(point32, C)
DECL_TEMPLATE(lerp32, C)
DECL_TEMPLATE(cubic32, C)
+#ifdef HAVE_XMMINTRIN_H
+
+static __inline void ApplyCoeffsSSE(ALuint Offset, ALfloat (*RESTRICT Values)[2],
+ ALfloat (*RESTRICT Coeffs)[2],
+ ALfloat left, ALfloat right)
+{
+ const __m128 lrlr = { left, right, left, right };
+ ALuint c;
+ for(c = 0;c < HRIR_LENGTH;c += 2)
+ {
+ const ALuint o0 = (Offset++)&HRIR_MASK;
+ const ALuint o1 = (Offset++)&HRIR_MASK;
+ __m128 vals = { 0.0f, 0.0f, 0.0f, 0.0f };
+ __m128 coeffs = { 0.0f, 0.0f, 0.0f, 0.0f };
+
+ vals = _mm_loadl_pi(vals, (__m64*)&Values[o0][0]);
+ vals = _mm_loadh_pi(vals, (__m64*)&Values[o1][0]);
+ coeffs = _mm_loadl_pi(coeffs, (__m64*)&Coeffs[c ][0]);
+ coeffs = _mm_loadh_pi(coeffs, (__m64*)&Coeffs[c+1][0]);
+
+ vals = _mm_add_ps(vals, _mm_mul_ps(coeffs, lrlr));
+
+ _mm_storel_pi((__m64*)&Values[o0][0], vals);
+ _mm_storeh_pi((__m64*)&Values[o1][0], vals);
+ }
+}
+
+DECL_TEMPLATE(point32, SSE)
+DECL_TEMPLATE(lerp32, SSE)
+DECL_TEMPLATE(cubic32, SSE)
+
+#endif
+
#ifdef HAVE_ARM_NEON_H
static __inline void ApplyCoeffsNeon(ALuint Offset, ALfloat (*RESTRICT Values)[2],
@@ -419,18 +455,30 @@ DryMixerFunc SelectHrtfMixer(enum Resampler Resampler)
switch(Resampler)
{
case PointResampler:
+#ifdef HAVE_XMMINTRIN_H
+ if((CPUCapFlags&CPU_CAP_SSE))
+ return MixDirect_Hrtf_point32_SSE;
+#endif
#ifdef HAVE_ARM_NEON_H
if((CPUCapFlags&CPU_CAP_NEON))
return MixDirect_Hrtf_point32_Neon;
#endif
return MixDirect_Hrtf_point32_C;
case LinearResampler:
+#ifdef HAVE_XMMINTRIN_H
+ if((CPUCapFlags&CPU_CAP_SSE))
+ return MixDirect_Hrtf_lerp32_SSE;
+#endif
#ifdef HAVE_ARM_NEON_H
if((CPUCapFlags&CPU_CAP_NEON))
return MixDirect_Hrtf_lerp32_Neon;
#endif
return MixDirect_Hrtf_lerp32_C;
case CubicResampler:
+#ifdef HAVE_XMMINTRIN_H
+ if((CPUCapFlags&CPU_CAP_SSE))
+ return MixDirect_Hrtf_cubic32_SSE;
+#endif
#ifdef HAVE_ARM_NEON_H
if((CPUCapFlags&CPU_CAP_NEON))
return MixDirect_Hrtf_cubic32_Neon;