diff options
author | Chris Robinson <[email protected]> | 2020-12-31 13:01:17 -0800 |
---|---|---|
committer | Chris Robinson <[email protected]> | 2020-12-31 13:01:17 -0800 |
commit | f2b7a063ef49e2377c41dddae095d5c66b84bf9b (patch) | |
tree | d0e14e0603233ad2318a5b0188943c84bccba964 /alc | |
parent | 9d354f721c39dc643399b36297c57ef809451f6f (diff) |
Add NEON-enhanced FIR loops for convolution and UHJ
Diffstat (limited to 'alc')
-rw-r--r-- | alc/effects/convolution.cpp | 15 |
1 files changed, 15 insertions, 0 deletions
diff --git a/alc/effects/convolution.cpp b/alc/effects/convolution.cpp index 64c05172..4e10b62a 100644 --- a/alc/effects/convolution.cpp +++ b/alc/effects/convolution.cpp @@ -5,6 +5,8 @@ #ifdef HAVE_SSE_INTRINSICS #include <xmmintrin.h> +#elif defined(HAVE_NEON) +#include <arm_neon.h> #endif #include "alcmain.h" @@ -126,6 +128,19 @@ void apply_fir(al::span<float> dst, const float *RESTRICT src, const float *REST ++src; } +#elif defined(HAVE_NEON) + + for(float &output : dst) + { + float32x4_t r4{vdupq_n_f32(0.0f)}; + for(size_t j{0};j < ConvolveUpdateSamples;j+=4) + r4 = vmlaq_f32(r4, vld1q_f32(&src[j]), vld1q_f32(&filter[j])); + r4 = vaddq_f32(r4, vrev64q_f32(r4)); + output = vget_lane_f32(vadd_f32(vget_low_f32(r4), vget_high_f32(r4)), 0); + + ++src; + } + #else for(float &output : dst) |