diff options
author | Chris Robinson <[email protected]> | 2016-08-05 18:47:26 -0700 |
---|---|---|
committer | Chris Robinson <[email protected]> | 2016-08-05 18:47:26 -0700 |
commit | 6117cb2377b95e5800b6ff4febe77cf029d58e64 (patch) | |
tree | 86471ba43b201a87405dee4596431e654e4aa89b /Alc | |
parent | f775f2537920ef0031926872c57332e99c048f63 (diff) |
Mix gain steps using SIMD with Neon
Diffstat (limited to 'Alc')
-rw-r--r-- | Alc/mixer_neon.c | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/Alc/mixer_neon.c b/Alc/mixer_neon.c index 073f62c8..48e41703 100644 --- a/Alc/mixer_neon.c +++ b/Alc/mixer_neon.c @@ -89,6 +89,30 @@ void Mix_Neon(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer if(step != 0.0f && Counter > 0) { ALuint minsize = minu(BufferSize, Counter); + /* Mix with applying gain steps in aligned multiples of 4. */ + if(minsize-pos > 3) + { + float32x4_t step4; + gain4 = vsetq_lane_f32(gain, gain4, 0); + gain4 = vsetq_lane_f32(gain + step, gain4, 1); + gain4 = vsetq_lane_f32(gain + step + step, gain4, 2); + gain4 = vsetq_lane_f32(gain + step + step + step, gain4, 3); + step4 = vdupq_n_f32(step + step + step + step); + do { + const float32x4_t val4 = vld1q_f32(&data[pos]); + float32x4_t dry4 = vld1q_f32(&OutBuffer[c][OutPos+pos]); + dry4 = vmlaq_f32(dry4, val4, gain4); + gain4 = vaddq_f32(gain4, step4); + vst1q_f32(&OutBuffer[c][OutPos+pos], dry4); + pos += 4; + } while(minsize-pos > 3); + /* NOTE: gain4 now represents the next four gains after the + * last four mixed samples, so the lowest element represents + * the next gain to apply. + */ + gain = vgetq_lane_f32(gain4, 0); + } + /* Mix with applying left over gain steps that aren't aligned multiples of 4. */ for(;pos < minsize;pos++) { OutBuffer[c][OutPos+pos] += data[pos]*gain; |