aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Alc/mixer/mixer_neon.c12
-rw-r--r--Alc/mixer/mixer_sse.c12
2 files changed, 16 insertions, 8 deletions
diff --git a/Alc/mixer/mixer_neon.c b/Alc/mixer/mixer_neon.c
index 0af977e6..9aa279a2 100644
--- a/Alc/mixer/mixer_neon.c
+++ b/Alc/mixer/mixer_neon.c
@@ -101,16 +101,20 @@ const ALfloat *Resample_bsinc_Neon(const InterpState *state,
// Apply the scale and phase interpolated filter.
r4 = vdupq_n_f32(0.0f);
{
+ const ALsizei count = m >> 2;
const float32x4_t pf4 = vdupq_n_f32(pf);
- for(j = 0;j < m;j+=4,fil++,scd++,phd++,spd++)
+
+ ASSUME(count > 0);
+
+ for(j = 0;j < count;j++)
{
/* f = ((fil + sf*scd) + pf*(phd + sf*spd)) */
const float32x4_t f4 = vmlaq_f32(
- vmlaq_f32(*fil, sf4, *scd),
- pf4, vmlaq_f32(*phd, sf4, *spd)
+ vmlaq_f32(fil[j], sf4, scd[j]),
+ pf4, vmlaq_f32(phd[j], sf4, spd[j])
);
/* r += f*src */
- r4 = vmlaq_f32(r4, f4, vld1q_f32(&src[j]));
+ r4 = vmlaq_f32(r4, f4, vld1q_f32(&src[j*4]));
}
}
r4 = vaddq_f32(r4, vcombine_f32(vrev64_f32(vget_high_f32(r4)),
diff --git a/Alc/mixer/mixer_sse.c b/Alc/mixer/mixer_sse.c
index 5b4208f9..d7d54993 100644
--- a/Alc/mixer/mixer_sse.c
+++ b/Alc/mixer/mixer_sse.c
@@ -45,17 +45,21 @@ const ALfloat *Resample_bsinc_SSE(const InterpState *state, const ALfloat *restr
// Apply the scale and phase interpolated filter.
r4 = _mm_setzero_ps();
{
+ const ALsizei count = m >> 2;
const __m128 pf4 = _mm_set1_ps(pf);
+
+ ASSUME(count > 0);
+
#define MLA4(x, y, z) _mm_add_ps(x, _mm_mul_ps(y, z))
- for(j = 0;j < m;j+=4,fil++,scd++,phd++,spd++)
+ for(j = 0;j < count;j++)
{
/* f = ((fil + sf*scd) + pf*(phd + sf*spd)) */
const __m128 f4 = MLA4(
- MLA4(*fil, sf4, *scd),
- pf4, MLA4(*phd, sf4, *spd)
+ MLA4(fil[j], sf4, scd[j]),
+ pf4, MLA4(phd[j], sf4, spd[j])
);
/* r += f*src */
- r4 = MLA4(r4, f4, _mm_loadu_ps(&src[j]));
+ r4 = MLA4(r4, f4, _mm_loadu_ps(&src[j*4]));
}
#undef MLA4
}