aboutsummaryrefslogtreecommitdiffstats
path: root/Alc
diff options
context:
space:
mode:
authorChris Robinson <[email protected]>2018-05-15 23:14:23 -0700
committerChris Robinson <[email protected]>2018-05-15 23:14:23 -0700
commit588a6bcb4f79e4be1cae651dbbc14356d039c91b (patch)
tree27bbe529d74d445cb06721373c6c1a10ab2783ba /Alc
parent72e39ba1c93e14f57dc342d0e70f5fb272a6d686 (diff)
Simplify counting for SIMD MixRow functions
Diffstat (limited to 'Alc')
-rw-r--r--Alc/mixer/mixer_neon.c19
-rw-r--r--Alc/mixer/mixer_sse.c17
2 files changed, 20 insertions, 16 deletions
diff --git a/Alc/mixer/mixer_neon.c b/Alc/mixer/mixer_neon.c
index b6181b42..0af977e6 100644
--- a/Alc/mixer/mixer_neon.c
+++ b/Alc/mixer/mixer_neon.c
@@ -250,13 +250,10 @@ void Mix_Neon(const ALfloat *data, ALsizei OutChans, ALfloat (*restrict OutBuffe
void MixRow_Neon(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*restrict data)[BUFFERSIZE], ALsizei InChans, ALsizei InPos, ALsizei BufferSize)
{
- float32x4_t gain4;
ALsizei c;
ASSUME(InChans > 0);
ASSUME(BufferSize > 0);
- data = ASSUME_ALIGNED(data, 16);
- OutBuffer = ASSUME_ALIGNED(OutBuffer, 16);
for(c = 0;c < InChans;c++)
{
@@ -265,13 +262,17 @@ void MixRow_Neon(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*restr
if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD))
continue;
- gain4 = vdupq_n_f32(gain);
- for(;BufferSize-pos > 3;pos += 4)
+ if(LIKELY(BufferSize > 3))
{
- const float32x4_t val4 = vld1q_f32(&data[c][InPos+pos]);
- float32x4_t dry4 = vld1q_f32(&OutBuffer[pos]);
- dry4 = vmlaq_f32(dry4, val4, gain4);
- vst1q_f32(&OutBuffer[pos], dry4);
+ ALsizei todo = BufferSize >> 2;
+ float32x4_t gain4 = vdupq_n_f32(gain);
+ do {
+ const float32x4_t val4 = vld1q_f32(&data[c][InPos+pos]);
+ float32x4_t dry4 = vld1q_f32(&OutBuffer[pos]);
+ dry4 = vmlaq_f32(dry4, val4, gain4);
+ vst1q_f32(&OutBuffer[pos], dry4);
+ pos += 4;
+ } while(--todo);
}
for(;pos < BufferSize;pos++)
OutBuffer[pos] += data[c][InPos+pos]*gain;
diff --git a/Alc/mixer/mixer_sse.c b/Alc/mixer/mixer_sse.c
index fa79eb4d..5b4208f9 100644
--- a/Alc/mixer/mixer_sse.c
+++ b/Alc/mixer/mixer_sse.c
@@ -215,7 +215,6 @@ void Mix_SSE(const ALfloat *data, ALsizei OutChans, ALfloat (*restrict OutBuffer
void MixRow_SSE(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*restrict data)[BUFFERSIZE], ALsizei InChans, ALsizei InPos, ALsizei BufferSize)
{
- __m128 gain4;
ALsizei c;
ASSUME(InChans > 0);
@@ -228,13 +227,17 @@ void MixRow_SSE(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*restri
if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD))
continue;
- gain4 = _mm_set1_ps(gain);
- for(;BufferSize-pos > 3;pos += 4)
+ if(LIKELY(BufferSize > 3))
{
- const __m128 val4 = _mm_load_ps(&data[c][InPos+pos]);
- __m128 dry4 = _mm_load_ps(&OutBuffer[pos]);
- dry4 = _mm_add_ps(dry4, _mm_mul_ps(val4, gain4));
- _mm_store_ps(&OutBuffer[pos], dry4);
+ ALsizei todo = BufferSize >> 2;
+ const __m128 gain4 = _mm_set1_ps(gain);
+ do {
+ const __m128 val4 = _mm_load_ps(&data[c][InPos+pos]);
+ __m128 dry4 = _mm_load_ps(&OutBuffer[pos]);
+ dry4 = _mm_add_ps(dry4, _mm_mul_ps(val4, gain4));
+ _mm_store_ps(&OutBuffer[pos], dry4);
+ pos += 4;
+ } while(--todo);
}
for(;pos < BufferSize;pos++)
OutBuffer[pos] += data[c][InPos+pos]*gain;