diff options
author | Chris Robinson <[email protected]> | 2018-05-15 23:14:23 -0700 |
---|---|---|
committer | Chris Robinson <[email protected]> | 2018-05-15 23:14:23 -0700 |
commit | 588a6bcb4f79e4be1cae651dbbc14356d039c91b (patch) | |
tree | 27bbe529d74d445cb06721373c6c1a10ab2783ba /Alc | |
parent | 72e39ba1c93e14f57dc342d0e70f5fb272a6d686 (diff) |
Simplify counting for SIMD MixRow functions
Diffstat (limited to 'Alc')
-rw-r--r-- | Alc/mixer/mixer_neon.c | 19 | ||||
-rw-r--r-- | Alc/mixer/mixer_sse.c | 17 |
2 files changed, 20 insertions, 16 deletions
diff --git a/Alc/mixer/mixer_neon.c b/Alc/mixer/mixer_neon.c index b6181b42..0af977e6 100644 --- a/Alc/mixer/mixer_neon.c +++ b/Alc/mixer/mixer_neon.c @@ -250,13 +250,10 @@ void Mix_Neon(const ALfloat *data, ALsizei OutChans, ALfloat (*restrict OutBuffe void MixRow_Neon(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*restrict data)[BUFFERSIZE], ALsizei InChans, ALsizei InPos, ALsizei BufferSize) { - float32x4_t gain4; ALsizei c; ASSUME(InChans > 0); ASSUME(BufferSize > 0); - data = ASSUME_ALIGNED(data, 16); - OutBuffer = ASSUME_ALIGNED(OutBuffer, 16); for(c = 0;c < InChans;c++) { @@ -265,13 +262,17 @@ void MixRow_Neon(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*restr if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD)) continue; - gain4 = vdupq_n_f32(gain); - for(;BufferSize-pos > 3;pos += 4) + if(LIKELY(BufferSize > 3)) { - const float32x4_t val4 = vld1q_f32(&data[c][InPos+pos]); - float32x4_t dry4 = vld1q_f32(&OutBuffer[pos]); - dry4 = vmlaq_f32(dry4, val4, gain4); - vst1q_f32(&OutBuffer[pos], dry4); + ALsizei todo = BufferSize >> 2; + float32x4_t gain4 = vdupq_n_f32(gain); + do { + const float32x4_t val4 = vld1q_f32(&data[c][InPos+pos]); + float32x4_t dry4 = vld1q_f32(&OutBuffer[pos]); + dry4 = vmlaq_f32(dry4, val4, gain4); + vst1q_f32(&OutBuffer[pos], dry4); + pos += 4; + } while(--todo); } for(;pos < BufferSize;pos++) OutBuffer[pos] += data[c][InPos+pos]*gain; diff --git a/Alc/mixer/mixer_sse.c b/Alc/mixer/mixer_sse.c index fa79eb4d..5b4208f9 100644 --- a/Alc/mixer/mixer_sse.c +++ b/Alc/mixer/mixer_sse.c @@ -215,7 +215,6 @@ void Mix_SSE(const ALfloat *data, ALsizei OutChans, ALfloat (*restrict OutBuffer void MixRow_SSE(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*restrict data)[BUFFERSIZE], ALsizei InChans, ALsizei InPos, ALsizei BufferSize) { - __m128 gain4; ALsizei c; ASSUME(InChans > 0); @@ -228,13 +227,17 @@ void MixRow_SSE(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*restri if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD)) continue; - gain4 = _mm_set1_ps(gain); - for(;BufferSize-pos > 3;pos += 4) + if(LIKELY(BufferSize > 3)) { - const __m128 val4 = _mm_load_ps(&data[c][InPos+pos]); - __m128 dry4 = _mm_load_ps(&OutBuffer[pos]); - dry4 = _mm_add_ps(dry4, _mm_mul_ps(val4, gain4)); - _mm_store_ps(&OutBuffer[pos], dry4); + ALsizei todo = BufferSize >> 2; + const __m128 gain4 = _mm_set1_ps(gain); + do { + const __m128 val4 = _mm_load_ps(&data[c][InPos+pos]); + __m128 dry4 = _mm_load_ps(&OutBuffer[pos]); + dry4 = _mm_add_ps(dry4, _mm_mul_ps(val4, gain4)); + _mm_store_ps(&OutBuffer[pos], dry4); + pos += 4; + } while(--todo); } for(;pos < BufferSize;pos++) OutBuffer[pos] += data[c][InPos+pos]*gain; |