2 files changed, 63 insertions, 80 deletions
diff --git a/Alc/mixer_c.c b/Alc/mixer_c.c
index bbed14d3..ef37b730 100644
--- a/Alc/mixer_c.c
+++ b/Alc/mixer_c.c
@@ -17,39 +17,6 @@ static inline ALfloat fir4_32(const ALfloat *vals, ALuint frac)
 static inline ALfloat fir8_32(const ALfloat *vals, ALuint frac)
 { return resample_fir8(vals[-3], vals[-2], vals[-1], vals[0], vals[1], vals[2], vals[3], vals[4], frac); }
 
-// Obtain the next sample from the interpolator.
-
-static inline ALfloat bsinc32(const BsincState *state, const ALfloat *vals, const ALuint frac)
-{
-    const ALfloat sf = state->sf;
-    ALfloat pf, r;
-    ALuint pi;
-
-    // Calculate the phase index and factor.
-#define FRAC_PHASE_BITDIFF (FRACTIONBITS-BSINC_PHASE_BITS)
-    pi = frac >> FRAC_PHASE_BITDIFF;
-    pf = (frac & ((1<<FRAC_PHASE_BITDIFF)-1)) * (1.0f/(1<<FRAC_PHASE_BITDIFF));
-#undef FRAC_PHASE_BITDIFF
-
-    r = 0.0f;
-    {
-        const ALuint m = state->m;
-        const ALint l = state->l;
-        const ALfloat *fil = state->coeffs[pi].filter;
-        const ALfloat *scd = state->coeffs[pi].scDelta;
-        const ALfloat *phd = state->coeffs[pi].phDelta;
-        const ALfloat *spd = state->coeffs[pi].spDelta;
-        ALuint j_f;
-        ALint j_s;
-
-        // Apply the scale and phase interpolated filter.
-        for(j_f = 0,j_s = l;j_f < m;j_f++,j_s++)
-            r += (fil[j_f] + sf*scd[j_f] + pf*(phd[j_f] + sf*spd[j_f])) *
-                 vals[j_s];
-    }
-    return r;
-}
-
 
 const ALfloat *Resample_copy32_C(const BsincState* UNUSED(state), const ALfloat *src, ALuint UNUSED(frac),
   ALuint UNUSED(increment), ALfloat *restrict dst, ALuint numsamples)
@@ -85,13 +52,38 @@ DECL_TEMPLATE(lerp32)
 DECL_TEMPLATE(fir4_32)
 DECL_TEMPLATE(fir8_32)
 
+#undef DECL_TEMPLATE
+
 const ALfloat *Resample_bsinc32_C(const BsincState *state, const ALfloat *src, ALuint frac,
                                   ALuint increment, ALfloat *restrict dst, ALuint dstlen)
 {
-    ALuint i;
+    const ALfloat *fil, *scd, *phd, *spd;
+    const ALfloat sf = state->sf;
+    const ALuint m = state->m;
+    const ALint l = state->l;
+    ALuint j_f, pi, i;
+    ALfloat pf, r;
+    ALint j_s;
+
     for(i = 0;i < dstlen;i++)
     {
-        dst[i] = bsinc32(state, src, frac);
+        // Calculate the phase index and factor.
+#define FRAC_PHASE_BITDIFF (FRACTIONBITS-BSINC_PHASE_BITS)
+        pi = frac >> FRAC_PHASE_BITDIFF;
+        pf = (frac & ((1<<FRAC_PHASE_BITDIFF)-1)) * (1.0f/(1<<FRAC_PHASE_BITDIFF));
+#undef FRAC_PHASE_BITDIFF
+
+        fil = state->coeffs[pi].filter;
+        scd = state->coeffs[pi].scDelta;
+        phd = state->coeffs[pi].phDelta;
+        spd = state->coeffs[pi].spDelta;
+
+        // Apply the scale and phase interpolated filter.
+        r = 0.0f;
+        for(j_f = 0,j_s = l;j_f < m;j_f++,j_s++)
+            r += (fil[j_f] + sf*scd[j_f] + pf*(phd[j_f] + sf*spd[j_f])) *
+                    src[j_s];
+        dst[i] = r;
 
         frac += increment;
         src  += frac>>FRACTIONBITS;
@@ -100,8 +92,6 @@ const ALfloat *Resample_bsinc32_C(const BsincState *state, const ALfloat *src, A
     return dst;
 }
 
-#undef DECL_TEMPLATE
-
 
 void ALfilterState_processC(ALfilterState *filter, ALfloat *restrict dst, const ALfloat *src, ALuint numsamples)
 {
diff --git a/Alc/mixer_sse.c b/Alc/mixer_sse.c
index 87a17e2c..090b7a5a 100644
--- a/Alc/mixer_sse.c
+++ b/Alc/mixer_sse.c
@@ -12,63 +12,56 @@
 #include "mixer_defs.h"
 
 
-// Obtain the next sample from the interpolator (SSE version).
-static inline ALfloat bsinc32_sse(const BsincState *state, const ALfloat *in, const ALuint frac)
+const ALfloat *Resample_bsinc32_SSE(const BsincState *state, const ALfloat *src, ALuint frac,
+                                    ALuint increment, ALfloat *restrict dst, ALuint dstlen)
 {
     const __m128 sf4 = _mm_set1_ps(state->sf);
-    ALfloat pf, r;
-    ALuint pi;
+    const ALuint m = state->m;
+    const ALint l = state->l;
+    const ALfloat *fil, *scd, *phd, *spd;
+    ALuint pi, j_f, i;
+    ALfloat pf;
+    ALint j_s;
+    __m128 r4;
 
-    // Calculate the phase index and factor.
+    for(i = 0;i < dstlen;i++)
+    {
+        // Calculate the phase index and factor.
 #define FRAC_PHASE_BITDIFF (FRACTIONBITS-BSINC_PHASE_BITS)
-    pi = frac >> FRAC_PHASE_BITDIFF;
-    pf = (frac & ((1<<FRAC_PHASE_BITDIFF)-1)) * (1.0f/(1<<FRAC_PHASE_BITDIFF));
+        pi = frac >> FRAC_PHASE_BITDIFF;
+        pf = (frac & ((1<<FRAC_PHASE_BITDIFF)-1)) * (1.0f/(1<<FRAC_PHASE_BITDIFF));
 #undef FRAC_PHASE_BITDIFF
 
-    {
-        const ALuint m = state->m;
-        const ALint l = state->l;
-        const ALfloat *fil = state->coeffs[pi].filter;
-        const ALfloat *scd = state->coeffs[pi].scDelta;
-        const ALfloat *phd = state->coeffs[pi].phDelta;
-        const ALfloat *spd = state->coeffs[pi].spDelta;
-        const __m128 pf4 = _mm_set1_ps(pf);
-        __m128 r4 = _mm_setzero_ps();
-        ALuint j_f;
-        ALint j_s;
+        fil = state->coeffs[pi].filter;
+        scd = state->coeffs[pi].scDelta;
+        phd = state->coeffs[pi].phDelta;
+        spd = state->coeffs[pi].spDelta;
 
         // Apply the scale and phase interpolated filter.
-        for(j_f = 0,j_s = l;j_f < m;j_f+=4,j_s+=4)
+        r4 = _mm_setzero_ps();
         {
-            const __m128 f4 = _mm_add_ps(
-                _mm_add_ps(
-                    _mm_load_ps(&fil[j_f]),
-                    _mm_mul_ps(sf4, _mm_load_ps(&scd[j_f]))
-                ),
-                _mm_mul_ps(
-                    pf4,
+            const __m128 pf4 = _mm_set1_ps(pf);
+            for(j_f = 0,j_s = l;j_f < m;j_f+=4,j_s+=4)
+            {
+                const __m128 f4 = _mm_add_ps(
                     _mm_add_ps(
-                        _mm_load_ps(&phd[j_f]),
-                        _mm_mul_ps(sf4, _mm_load_ps(&spd[j_f]))
+                        _mm_load_ps(&fil[j_f]),
+                        _mm_mul_ps(sf4, _mm_load_ps(&scd[j_f]))
+                    ),
+                    _mm_mul_ps(
+                        pf4,
+                        _mm_add_ps(
+                            _mm_load_ps(&phd[j_f]),
+                            _mm_mul_ps(sf4, _mm_load_ps(&spd[j_f]))
+                        )
                     )
-                )
-            );
-            r4 = _mm_add_ps(r4, _mm_mul_ps(f4, _mm_loadu_ps(&in[j_s])));
+                );
+                r4 = _mm_add_ps(r4, _mm_mul_ps(f4, _mm_loadu_ps(&src[j_s])));
+            }
         }
         r4 = _mm_add_ps(r4, _mm_shuffle_ps(r4, r4, _MM_SHUFFLE(0, 1, 2, 3)));
         r4 = _mm_add_ps(r4, _mm_movehl_ps(r4, r4));
-        r = _mm_cvtss_f32(r4);
-    }
-    return r;
-}
-
-const ALfloat *Resample_bsinc32_SSE(const BsincState *state, const ALfloat *src, ALuint frac,
-                                    ALuint increment, ALfloat *restrict dst, ALuint dstlen)
-{
-    ALuint i;
-    for(i = 0;i < dstlen;i++)
-    {
-        dst[i] = bsinc32_sse(state, src, frac);
+        dst[i] = _mm_cvtss_f32(r4);
 
         frac += increment;
         src  += frac>>FRACTIONBITS;