diff options
author | Chris Robinson <[email protected]> | 2018-05-19 08:18:08 -0700 |
---|---|---|
committer | Chris Robinson <[email protected]> | 2018-05-19 08:18:08 -0700 |
commit | 7501c8b48317ee2c09bf928fc1d4bf52c98dce5f (patch) | |
tree | 1b5aa48c064c24074299dbfa7fcfb01451cef272 | |
parent | 5e7513d0a0b3248f8f5bdba1ebc1d96fd7724a39 (diff) |
Simplify counting for the bsinc FIR loop
-rw-r--r-- | Alc/mixer/mixer_neon.c | 12 | ||||
-rw-r--r-- | Alc/mixer/mixer_sse.c | 12 |
2 files changed, 16 insertions, 8 deletions
diff --git a/Alc/mixer/mixer_neon.c b/Alc/mixer/mixer_neon.c index 0af977e6..9aa279a2 100644 --- a/Alc/mixer/mixer_neon.c +++ b/Alc/mixer/mixer_neon.c @@ -101,16 +101,20 @@ const ALfloat *Resample_bsinc_Neon(const InterpState *state, // Apply the scale and phase interpolated filter. r4 = vdupq_n_f32(0.0f); { + const ALsizei count = m >> 2; const float32x4_t pf4 = vdupq_n_f32(pf); - for(j = 0;j < m;j+=4,fil++,scd++,phd++,spd++) + + ASSUME(count > 0); + + for(j = 0;j < count;j++) { /* f = ((fil + sf*scd) + pf*(phd + sf*spd)) */ const float32x4_t f4 = vmlaq_f32( - vmlaq_f32(*fil, sf4, *scd), - pf4, vmlaq_f32(*phd, sf4, *spd) + vmlaq_f32(fil[j], sf4, scd[j]), + pf4, vmlaq_f32(phd[j], sf4, spd[j]) ); /* r += f*src */ - r4 = vmlaq_f32(r4, f4, vld1q_f32(&src[j])); + r4 = vmlaq_f32(r4, f4, vld1q_f32(&src[j*4])); } } r4 = vaddq_f32(r4, vcombine_f32(vrev64_f32(vget_high_f32(r4)), diff --git a/Alc/mixer/mixer_sse.c b/Alc/mixer/mixer_sse.c index 5b4208f9..d7d54993 100644 --- a/Alc/mixer/mixer_sse.c +++ b/Alc/mixer/mixer_sse.c @@ -45,17 +45,21 @@ const ALfloat *Resample_bsinc_SSE(const InterpState *state, const ALfloat *restr // Apply the scale and phase interpolated filter. r4 = _mm_setzero_ps(); { + const ALsizei count = m >> 2; const __m128 pf4 = _mm_set1_ps(pf); + + ASSUME(count > 0); + #define MLA4(x, y, z) _mm_add_ps(x, _mm_mul_ps(y, z)) - for(j = 0;j < m;j+=4,fil++,scd++,phd++,spd++) + for(j = 0;j < count;j++) { /* f = ((fil + sf*scd) + pf*(phd + sf*spd)) */ const __m128 f4 = MLA4( - MLA4(*fil, sf4, *scd), - pf4, MLA4(*phd, sf4, *spd) + MLA4(fil[j], sf4, scd[j]), + pf4, MLA4(phd[j], sf4, spd[j]) ); /* r += f*src */ - r4 = MLA4(r4, f4, _mm_loadu_ps(&src[j])); + r4 = MLA4(r4, f4, _mm_loadu_ps(&src[j*4])); } #undef MLA4 } |