diff options
-rw-r--r-- | Alc/mixer_c.c | 64 | ||||
-rw-r--r-- | Alc/mixer_sse.c | 79 |
2 files changed, 63 insertions, 80 deletions
diff --git a/Alc/mixer_c.c b/Alc/mixer_c.c index bbed14d3..ef37b730 100644 --- a/Alc/mixer_c.c +++ b/Alc/mixer_c.c @@ -17,39 +17,6 @@ static inline ALfloat fir4_32(const ALfloat *vals, ALuint frac) static inline ALfloat fir8_32(const ALfloat *vals, ALuint frac) { return resample_fir8(vals[-3], vals[-2], vals[-1], vals[0], vals[1], vals[2], vals[3], vals[4], frac); } -// Obtain the next sample from the interpolator. - -static inline ALfloat bsinc32(const BsincState *state, const ALfloat *vals, const ALuint frac) -{ - const ALfloat sf = state->sf; - ALfloat pf, r; - ALuint pi; - - // Calculate the phase index and factor. -#define FRAC_PHASE_BITDIFF (FRACTIONBITS-BSINC_PHASE_BITS) - pi = frac >> FRAC_PHASE_BITDIFF; - pf = (frac & ((1<<FRAC_PHASE_BITDIFF)-1)) * (1.0f/(1<<FRAC_PHASE_BITDIFF)); -#undef FRAC_PHASE_BITDIFF - - r = 0.0f; - { - const ALuint m = state->m; - const ALint l = state->l; - const ALfloat *fil = state->coeffs[pi].filter; - const ALfloat *scd = state->coeffs[pi].scDelta; - const ALfloat *phd = state->coeffs[pi].phDelta; - const ALfloat *spd = state->coeffs[pi].spDelta; - ALuint j_f; - ALint j_s; - - // Apply the scale and phase interpolated filter. - for(j_f = 0,j_s = l;j_f < m;j_f++,j_s++) - r += (fil[j_f] + sf*scd[j_f] + pf*(phd[j_f] + sf*spd[j_f])) * - vals[j_s]; - } - return r; -} - const ALfloat *Resample_copy32_C(const BsincState* UNUSED(state), const ALfloat *src, ALuint UNUSED(frac), ALuint UNUSED(increment), ALfloat *restrict dst, ALuint numsamples) @@ -85,13 +52,38 @@ DECL_TEMPLATE(lerp32) DECL_TEMPLATE(fir4_32) DECL_TEMPLATE(fir8_32) +#undef DECL_TEMPLATE + const ALfloat *Resample_bsinc32_C(const BsincState *state, const ALfloat *src, ALuint frac, ALuint increment, ALfloat *restrict dst, ALuint dstlen) { - ALuint i; + const ALfloat *fil, *scd, *phd, *spd; + const ALfloat sf = state->sf; + const ALuint m = state->m; + const ALint l = state->l; + ALuint j_f, pi, i; + ALfloat pf, r; + ALint j_s; + for(i = 0;i < dstlen;i++) { - dst[i] = bsinc32(state, src, frac); + // Calculate the phase index and factor. +#define FRAC_PHASE_BITDIFF (FRACTIONBITS-BSINC_PHASE_BITS) + pi = frac >> FRAC_PHASE_BITDIFF; + pf = (frac & ((1<<FRAC_PHASE_BITDIFF)-1)) * (1.0f/(1<<FRAC_PHASE_BITDIFF)); +#undef FRAC_PHASE_BITDIFF + + fil = state->coeffs[pi].filter; + scd = state->coeffs[pi].scDelta; + phd = state->coeffs[pi].phDelta; + spd = state->coeffs[pi].spDelta; + + // Apply the scale and phase interpolated filter. + r = 0.0f; + for(j_f = 0,j_s = l;j_f < m;j_f++,j_s++) + r += (fil[j_f] + sf*scd[j_f] + pf*(phd[j_f] + sf*spd[j_f])) * + src[j_s]; + dst[i] = r; frac += increment; src += frac>>FRACTIONBITS; @@ -100,8 +92,6 @@ const ALfloat *Resample_bsinc32_C(const BsincState *state, const ALfloat *src, A return dst; } -#undef DECL_TEMPLATE - void ALfilterState_processC(ALfilterState *filter, ALfloat *restrict dst, const ALfloat *src, ALuint numsamples) { diff --git a/Alc/mixer_sse.c b/Alc/mixer_sse.c index 87a17e2c..090b7a5a 100644 --- a/Alc/mixer_sse.c +++ b/Alc/mixer_sse.c @@ -12,63 +12,56 @@ #include "mixer_defs.h" -// Obtain the next sample from the interpolator (SSE version). -static inline ALfloat bsinc32_sse(const BsincState *state, const ALfloat *in, const ALuint frac) +const ALfloat *Resample_bsinc32_SSE(const BsincState *state, const ALfloat *src, ALuint frac, + ALuint increment, ALfloat *restrict dst, ALuint dstlen) { const __m128 sf4 = _mm_set1_ps(state->sf); - ALfloat pf, r; - ALuint pi; + const ALuint m = state->m; + const ALint l = state->l; + const ALfloat *fil, *scd, *phd, *spd; + ALuint pi, j_f, i; + ALfloat pf; + ALint j_s; + __m128 r4; - // Calculate the phase index and factor. + for(i = 0;i < dstlen;i++) + { + // Calculate the phase index and factor. #define FRAC_PHASE_BITDIFF (FRACTIONBITS-BSINC_PHASE_BITS) - pi = frac >> FRAC_PHASE_BITDIFF; - pf = (frac & ((1<<FRAC_PHASE_BITDIFF)-1)) * (1.0f/(1<<FRAC_PHASE_BITDIFF)); + pi = frac >> FRAC_PHASE_BITDIFF; + pf = (frac & ((1<<FRAC_PHASE_BITDIFF)-1)) * (1.0f/(1<<FRAC_PHASE_BITDIFF)); #undef FRAC_PHASE_BITDIFF - { - const ALuint m = state->m; - const ALint l = state->l; - const ALfloat *fil = state->coeffs[pi].filter; - const ALfloat *scd = state->coeffs[pi].scDelta; - const ALfloat *phd = state->coeffs[pi].phDelta; - const ALfloat *spd = state->coeffs[pi].spDelta; - const __m128 pf4 = _mm_set1_ps(pf); - __m128 r4 = _mm_setzero_ps(); - ALuint j_f; - ALint j_s; + fil = state->coeffs[pi].filter; + scd = state->coeffs[pi].scDelta; + phd = state->coeffs[pi].phDelta; + spd = state->coeffs[pi].spDelta; // Apply the scale and phase interpolated filter. - for(j_f = 0,j_s = l;j_f < m;j_f+=4,j_s+=4) + r4 = _mm_setzero_ps(); { - const __m128 f4 = _mm_add_ps( - _mm_add_ps( - _mm_load_ps(&fil[j_f]), - _mm_mul_ps(sf4, _mm_load_ps(&scd[j_f])) - ), - _mm_mul_ps( - pf4, + const __m128 pf4 = _mm_set1_ps(pf); + for(j_f = 0,j_s = l;j_f < m;j_f+=4,j_s+=4) + { + const __m128 f4 = _mm_add_ps( _mm_add_ps( - _mm_load_ps(&phd[j_f]), - _mm_mul_ps(sf4, _mm_load_ps(&spd[j_f])) + _mm_load_ps(&fil[j_f]), + _mm_mul_ps(sf4, _mm_load_ps(&scd[j_f])) + ), + _mm_mul_ps( + pf4, + _mm_add_ps( + _mm_load_ps(&phd[j_f]), + _mm_mul_ps(sf4, _mm_load_ps(&spd[j_f])) + ) ) - ) - ); - r4 = _mm_add_ps(r4, _mm_mul_ps(f4, _mm_loadu_ps(&in[j_s]))); + ); + r4 = _mm_add_ps(r4, _mm_mul_ps(f4, _mm_loadu_ps(&src[j_s]))); + } } r4 = _mm_add_ps(r4, _mm_shuffle_ps(r4, r4, _MM_SHUFFLE(0, 1, 2, 3))); r4 = _mm_add_ps(r4, _mm_movehl_ps(r4, r4)); - r = _mm_cvtss_f32(r4); - } - return r; -} - -const ALfloat *Resample_bsinc32_SSE(const BsincState *state, const ALfloat *src, ALuint frac, - ALuint increment, ALfloat *restrict dst, ALuint dstlen) -{ - ALuint i; - for(i = 0;i < dstlen;i++) - { - dst[i] = bsinc32_sse(state, src, frac); + dst[i] = _mm_cvtss_f32(r4); frac += increment; src += frac>>FRACTIONBITS; |