aboutsummaryrefslogtreecommitdiffstats
path: root/alc
diff options
context:
space:
mode:
authorChris Robinson <[email protected]>2020-12-31 13:01:17 -0800
committerChris Robinson <[email protected]>2020-12-31 13:01:17 -0800
commitf2b7a063ef49e2377c41dddae095d5c66b84bf9b (patch)
treed0e14e0603233ad2318a5b0188943c84bccba964 /alc
parent9d354f721c39dc643399b36297c57ef809451f6f (diff)
Add NEON-enhanced FIR loops for convolution and UHJ
Diffstat (limited to 'alc')
-rw-r--r--alc/effects/convolution.cpp15
1 files changed, 15 insertions, 0 deletions
diff --git a/alc/effects/convolution.cpp b/alc/effects/convolution.cpp
index 64c05172..4e10b62a 100644
--- a/alc/effects/convolution.cpp
+++ b/alc/effects/convolution.cpp
@@ -5,6 +5,8 @@
#ifdef HAVE_SSE_INTRINSICS
#include <xmmintrin.h>
+#elif defined(HAVE_NEON)
+#include <arm_neon.h>
#endif
#include "alcmain.h"
@@ -126,6 +128,19 @@ void apply_fir(al::span<float> dst, const float *RESTRICT src, const float *REST
++src;
}
+#elif defined(HAVE_NEON)
+
+ for(float &output : dst)
+ {
+ float32x4_t r4{vdupq_n_f32(0.0f)};
+ for(size_t j{0};j < ConvolveUpdateSamples;j+=4)
+ r4 = vmlaq_f32(r4, vld1q_f32(&src[j]), vld1q_f32(&filter[j]));
+ r4 = vaddq_f32(r4, vrev64q_f32(r4));
+ output = vget_lane_f32(vadd_f32(vget_low_f32(r4), vget_high_f32(r4)), 0);
+
+ ++src;
+ }
+
#else
for(float &output : dst)