summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Robinson <[email protected]>2012-08-14 03:53:07 -0700
committerChris Robinson <[email protected]>2012-08-14 03:53:07 -0700
commite9a20cb985c5686fd2777540dbbd2a13c9525ee0 (patch)
tree568b287aff7b9dad019ea3e2048b1f5a1dc0e79c
parentcfc52055697a124edd32c9f057d34857a49e7e14 (diff)
Add an SSE-enhanced HRTF mixer using intrinsics
Not as good as it could be, but it's something
-rw-r--r--Alc/ALc.c4
-rw-r--r--Alc/mixer.c48
-rw-r--r--CMakeLists.txt1
-rw-r--r--alsoftrc.sample4
-rw-r--r--config.h.in3
5 files changed, 57 insertions, 3 deletions
diff --git a/Alc/ALc.c b/Alc/ALc.c
index e664d25a..14a74a92 100644
--- a/Alc/ALc.c
+++ b/Alc/ALc.c
@@ -812,7 +812,9 @@ static void alc_initconfig(void)
continue;
len = (next ? ((size_t)(next-str)) : strlen(str));
- if(strncasecmp(str, "neon", len) == 0)
+ if(strncasecmp(str, "sse", len) == 0)
+ capfilter &= ~CPU_CAP_SSE;
+ else if(strncasecmp(str, "neon", len) == 0)
capfilter &= ~CPU_CAP_NEON;
else
WARN("Invalid CPU extension \"%s\"\n", str);
diff --git a/Alc/mixer.c b/Alc/mixer.c
index e7fada8f..f5d9bb09 100644
--- a/Alc/mixer.c
+++ b/Alc/mixer.c
@@ -25,6 +25,9 @@
#include <string.h>
#include <ctype.h>
#include <assert.h>
+#ifdef HAVE_XMMINTRIN_H
+#include <xmmintrin.h>
+#endif
#ifdef HAVE_ARM_NEON_H
#include <arm_neon.h>
#endif
@@ -221,6 +224,39 @@ DECL_TEMPLATE(point32, C)
DECL_TEMPLATE(lerp32, C)
DECL_TEMPLATE(cubic32, C)
+#ifdef HAVE_XMMINTRIN_H
+
+static __inline void ApplyCoeffsSSE(ALuint Offset, ALfloat (*RESTRICT Values)[2],
+ ALfloat (*RESTRICT Coeffs)[2],
+ ALfloat left, ALfloat right)
+{
+ const __m128 lrlr = { left, right, left, right };
+ ALuint c;
+ for(c = 0;c < HRIR_LENGTH;c += 2)
+ {
+ const ALuint o0 = (Offset++)&HRIR_MASK;
+ const ALuint o1 = (Offset++)&HRIR_MASK;
+ __m128 vals = { 0.0f, 0.0f, 0.0f, 0.0f };
+ __m128 coeffs = { 0.0f, 0.0f, 0.0f, 0.0f };
+
+ vals = _mm_loadl_pi(vals, (__m64*)&Values[o0][0]);
+ vals = _mm_loadh_pi(vals, (__m64*)&Values[o1][0]);
+ coeffs = _mm_loadl_pi(coeffs, (__m64*)&Coeffs[c ][0]);
+ coeffs = _mm_loadh_pi(coeffs, (__m64*)&Coeffs[c+1][0]);
+
+ vals = _mm_add_ps(vals, _mm_mul_ps(coeffs, lrlr));
+
+ _mm_storel_pi((__m64*)&Values[o0][0], vals);
+ _mm_storeh_pi((__m64*)&Values[o1][0], vals);
+ }
+}
+
+DECL_TEMPLATE(point32, SSE)
+DECL_TEMPLATE(lerp32, SSE)
+DECL_TEMPLATE(cubic32, SSE)
+
+#endif
+
#ifdef HAVE_ARM_NEON_H
static __inline void ApplyCoeffsNeon(ALuint Offset, ALfloat (*RESTRICT Values)[2],
@@ -419,18 +455,30 @@ DryMixerFunc SelectHrtfMixer(enum Resampler Resampler)
switch(Resampler)
{
case PointResampler:
+#ifdef HAVE_XMMINTRIN_H
+ if((CPUCapFlags&CPU_CAP_SSE))
+ return MixDirect_Hrtf_point32_SSE;
+#endif
#ifdef HAVE_ARM_NEON_H
if((CPUCapFlags&CPU_CAP_NEON))
return MixDirect_Hrtf_point32_Neon;
#endif
return MixDirect_Hrtf_point32_C;
case LinearResampler:
+#ifdef HAVE_XMMINTRIN_H
+ if((CPUCapFlags&CPU_CAP_SSE))
+ return MixDirect_Hrtf_lerp32_SSE;
+#endif
#ifdef HAVE_ARM_NEON_H
if((CPUCapFlags&CPU_CAP_NEON))
return MixDirect_Hrtf_lerp32_Neon;
#endif
return MixDirect_Hrtf_lerp32_C;
case CubicResampler:
+#ifdef HAVE_XMMINTRIN_H
+ if((CPUCapFlags&CPU_CAP_SSE))
+ return MixDirect_Hrtf_cubic32_SSE;
+#endif
#ifdef HAVE_ARM_NEON_H
if((CPUCapFlags&CPU_CAP_NEON))
return MixDirect_Hrtf_cubic32_Neon;
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 96dd4347..f4a0d1f7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -225,6 +225,7 @@ CHECK_INCLUDE_FILE(guiddef.h HAVE_GUIDDEF_H)
IF(NOT HAVE_GUIDDEF_H)
CHECK_INCLUDE_FILE(initguid.h HAVE_INITGUID_H)
ENDIF()
+CHECK_INCLUDE_FILE(xmmintrin.h HAVE_XMMINTRIN_H)
CHECK_INCLUDE_FILE(arm_neon.h HAVE_ARM_NEON_H)
CHECK_INCLUDE_FILE(cpuid.h HAVE_CPUID_H)
diff --git a/alsoftrc.sample b/alsoftrc.sample
index bcfbe5e2..866154ef 100644
--- a/alsoftrc.sample
+++ b/alsoftrc.sample
@@ -14,8 +14,8 @@
## disable-cpu-exts:
# Disables use of the listed CPU extensions. Certain methods may utilize CPU
# extensions when detected, and this option is useful for preventing those
-# extensions from being used. The available extensions are: neon. Specifying
-# 'all' disables use of all extensions.
+# extensions from being used. The available extensions are: sse, neon.
+# Specifying 'all' disables use of all extensions.
#disable-cpu-exts =
## channels:
diff --git a/config.h.in b/config.h.in
index 82b6c334..af7420b9 100644
--- a/config.h.in
+++ b/config.h.in
@@ -104,6 +104,9 @@
/* Define if we have pthread_np.h */
#cmakedefine HAVE_PTHREAD_NP_H
+/* Define if we have xmmintrin.h */
+#cmakedefine HAVE_XMMINTRIN_H
+
/* Define if we have arm_neon.h */
#cmakedefine HAVE_ARM_NEON_H