aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Alc/mixer.c55
-rw-r--r--CMakeLists.txt1
-rw-r--r--config.h.in3
3 files changed, 52 insertions, 7 deletions
diff --git a/Alc/mixer.c b/Alc/mixer.c
index d68b6567..d3f22b51 100644
--- a/Alc/mixer.c
+++ b/Alc/mixer.c
@@ -69,6 +69,53 @@ static __inline ALdouble cubic8(const ALbyte *vals, ALint step, ALint frac)
#define UNLIKELY(x) (x)
#endif
+#if defined(__ARM_NEON__) && defined(HAVE_ARM_NEON_H)
+#include <arm_neon.h>
+
+static __inline void ApplyCoeffs(ALuint Offset, ALfloat (*RESTRICT Values)[2],
+ ALfloat (*RESTRICT Coeffs)[2],
+ ALfloat left, ALfloat right)
+{
+ ALuint c;
+ float32x4_t leftright4;
+ {
+ float32x2_t leftright2 = vdup_n_f32(0.0);
+ leftright2 = vset_lane_f32(left, leftright2, 0);
+ leftright2 = vset_lane_f32(right, leftright2, 1);
+ leftright4 = vcombine_f32(leftright2, leftright2);
+ }
+ for(c = 0;c < HRIR_LENGTH;c += 2)
+ {
+ const ALuint o0 = (Offset+c)&HRIR_MASK;
+ const ALuint o1 = (o0+1)&HRIR_MASK;
+ float32x4_t vals = vcombine_f32(vld1_f32((float32_t*)&Values[o0][0]),
+ vld1_f32((float32_t*)&Values[o1][0]));
+ float32x4_t coefs = vld1q_f32((float32_t*)&Coeffs[c][0]);
+
+ vals = vmlaq_f32(vals, coefs, leftright4);
+
+ vst1_f32((float32_t*)&Values[o0][0], vget_low_f32(vals));
+ vst1_f32((float32_t*)&Values[o1][0], vget_high_f32(vals));
+ }
+}
+
+#else
+
+static __inline void ApplyCoeffs(ALuint Offset, ALfloat (*RESTRICT Values)[2],
+ ALfloat (*RESTRICT Coeffs)[2],
+ ALfloat left, ALfloat right)
+{
+ ALuint c;
+ for(c = 0;c < HRIR_LENGTH;c++)
+ {
+ const ALuint off = (Offset+c)&HRIR_MASK;
+ Values[off][0] += Coeffs[c][0] * left;
+ Values[off][1] += Coeffs[c][1] * right;
+ }
+}
+
+#endif
+
#define DECL_TEMPLATE(T, sampler) \
static void Mix_Hrtf_##T##_##sampler(ALsource *Source, ALCdevice *Device, \
const ALvoid *srcdata, ALuint *DataPosInt, ALuint *DataPosFrac, \
@@ -185,13 +232,7 @@ static void Mix_Hrtf_##T##_##sampler(ALsource *Source, ALCdevice *Device, \
Values[Offset&HRIR_MASK][1] = 0.0f; \
Offset++; \
\
- for(c = 0;c < HRIR_LENGTH;c++) \
- { \
- const ALuint off = (Offset+c)&HRIR_MASK; \
- Values[off][0] += Coeffs[c][0] * left; \
- Values[off][1] += Coeffs[c][1] * right; \
- } \
- \
+ ApplyCoeffs(Offset, Values, Coeffs, left, right); \
DryBuffer[OutPos][FRONT_LEFT] += Values[Offset&HRIR_MASK][0]; \
DryBuffer[OutPos][FRONT_RIGHT] += Values[Offset&HRIR_MASK][1]; \
\
diff --git a/CMakeLists.txt b/CMakeLists.txt
index fa6ecb2b..9901f511 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -201,6 +201,7 @@ CHECK_INCLUDE_FILE(guiddef.h HAVE_GUIDDEF_H)
IF(NOT HAVE_GUIDDEF_H)
CHECK_INCLUDE_FILE(initguid.h HAVE_INITGUID_H)
ENDIF()
+CHECK_INCLUDE_FILE(arm_neon.h HAVE_ARM_NEON_H)
CHECK_LIBRARY_EXISTS(m powf "" HAVE_POWF)
CHECK_LIBRARY_EXISTS(m sqrtf "" HAVE_SQRTF)
diff --git a/config.h.in b/config.h.in
index c289e75d..4421578a 100644
--- a/config.h.in
+++ b/config.h.in
@@ -92,6 +92,9 @@
/* Define if we have pthread_np.h */
#cmakedefine HAVE_PTHREAD_NP_H
+/* Define if we have arm_neon.h */
+#cmakedefine HAVE_ARM_NEON_H
+
/* Define if we have guiddef.h */
#cmakedefine HAVE_GUIDDEF_H