Mix gain steps using SIMD with Neon

author: Chris Robinson <[email protected]> 2016-08-05 18:47:26 -0700
committer: Chris Robinson <[email protected]> 2016-08-05 18:47:26 -0700
commit: 6117cb2377b95e5800b6ff4febe77cf029d58e64 (patch)
tree: 86471ba43b201a87405dee4596431e654e4aa89b
parent: f775f2537920ef0031926872c57332e99c048f63 (diff)
1 files changed, 24 insertions, 0 deletions
diff --git a/Alc/mixer_neon.c b/Alc/mixer_neon.c
index 073f62c8..48e41703 100644
--- a/Alc/mixer_neon.c
+++ b/Alc/mixer_neon.c
@@ -89,6 +89,30 @@ void Mix_Neon(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer
         if(step != 0.0f && Counter > 0)
         {
             ALuint minsize = minu(BufferSize, Counter);
+            /* Mix with applying gain steps in aligned multiples of 4. */
+            if(minsize-pos > 3)
+            {
+                float32x4_t step4;
+                gain4 = vsetq_lane_f32(gain, gain4, 0);
+                gain4 = vsetq_lane_f32(gain + step, gain4, 1);
+                gain4 = vsetq_lane_f32(gain + step + step, gain4, 2);
+                gain4 = vsetq_lane_f32(gain + step + step + step, gain4, 3);
+                step4 = vdupq_n_f32(step + step + step + step);
+                do {
+                    const float32x4_t val4 = vld1q_f32(&data[pos]);
+                    float32x4_t dry4 = vld1q_f32(&OutBuffer[c][OutPos+pos]);
+                    dry4 = vmlaq_f32(dry4, val4, gain4);
+                    gain4 = vaddq_f32(gain4, step4);
+                    vst1q_f32(&OutBuffer[c][OutPos+pos], dry4);
+                    pos += 4;
+                } while(minsize-pos > 3);
+                /* NOTE: gain4 now represents the next four gains after the
+                 * last four mixed samples, so the lowest element represents
+                 * the next gain to apply.
+                 */
+                gain = vgetq_lane_f32(gain4, 0);
+            }
+            /* Mix with applying left over gain steps that aren't aligned multiples of 4. */
             for(;pos < minsize;pos++)
             {
                 OutBuffer[c][OutPos+pos] += data[pos]*gain;
author	Chris Robinson <[email protected]>	2016-08-05 18:47:26 -0700
committer	Chris Robinson <[email protected]>	2016-08-05 18:47:26 -0700
commit	6117cb2377b95e5800b6ff4febe77cf029d58e64 (patch)
tree	86471ba43b201a87405dee4596431e654e4aa89b
parent	f775f2537920ef0031926872c57332e99c048f63 (diff)