diff options
author | Chris Robinson <[email protected]> | 2015-11-05 09:42:08 -0800 |
---|---|---|
committer | Chris Robinson <[email protected]> | 2015-11-05 09:42:08 -0800 |
commit | b9e192b78a384ff13d87c606502373725042509c (patch) | |
tree | e5b99bba51f713e2f671b9ffbc37b22a1cdf5ba1 /Alc/mixer_sse.c | |
parent | dce3d0c7bf8f68c0dc4d98870f9e8119742004c0 (diff) |
Implement a band-limited sinc resampler
This is essentially a 12-point sinc resampler, unless it's resampling to a rate
higher than the output, at which point it will vary between 12 and 24 points
and do anti-aliasing to avoid/reduce frequencies going over nyquist.
Code provided by Christopher Fitzgerald.
Diffstat (limited to 'Alc/mixer_sse.c')
-rw-r--r-- | Alc/mixer_sse.c | 66 |
1 files changed, 66 insertions, 0 deletions
diff --git a/Alc/mixer_sse.c b/Alc/mixer_sse.c index e67c9cca..87a17e2c 100644 --- a/Alc/mixer_sse.c +++ b/Alc/mixer_sse.c @@ -12,6 +12,72 @@ #include "mixer_defs.h" +// Obtain the next sample from the interpolator (SSE version). +static inline ALfloat bsinc32_sse(const BsincState *state, const ALfloat *in, const ALuint frac) +{ + const __m128 sf4 = _mm_set1_ps(state->sf); + ALfloat pf, r; + ALuint pi; + + // Calculate the phase index and factor. +#define FRAC_PHASE_BITDIFF (FRACTIONBITS-BSINC_PHASE_BITS) + pi = frac >> FRAC_PHASE_BITDIFF; + pf = (frac & ((1<<FRAC_PHASE_BITDIFF)-1)) * (1.0f/(1<<FRAC_PHASE_BITDIFF)); +#undef FRAC_PHASE_BITDIFF + + { + const ALuint m = state->m; + const ALint l = state->l; + const ALfloat *fil = state->coeffs[pi].filter; + const ALfloat *scd = state->coeffs[pi].scDelta; + const ALfloat *phd = state->coeffs[pi].phDelta; + const ALfloat *spd = state->coeffs[pi].spDelta; + const __m128 pf4 = _mm_set1_ps(pf); + __m128 r4 = _mm_setzero_ps(); + ALuint j_f; + ALint j_s; + + // Apply the scale and phase interpolated filter. + for(j_f = 0,j_s = l;j_f < m;j_f+=4,j_s+=4) + { + const __m128 f4 = _mm_add_ps( + _mm_add_ps( + _mm_load_ps(&fil[j_f]), + _mm_mul_ps(sf4, _mm_load_ps(&scd[j_f])) + ), + _mm_mul_ps( + pf4, + _mm_add_ps( + _mm_load_ps(&phd[j_f]), + _mm_mul_ps(sf4, _mm_load_ps(&spd[j_f])) + ) + ) + ); + r4 = _mm_add_ps(r4, _mm_mul_ps(f4, _mm_loadu_ps(&in[j_s]))); + } + r4 = _mm_add_ps(r4, _mm_shuffle_ps(r4, r4, _MM_SHUFFLE(0, 1, 2, 3))); + r4 = _mm_add_ps(r4, _mm_movehl_ps(r4, r4)); + r = _mm_cvtss_f32(r4); + } + return r; +} + +const ALfloat *Resample_bsinc32_SSE(const BsincState *state, const ALfloat *src, ALuint frac, + ALuint increment, ALfloat *restrict dst, ALuint dstlen) +{ + ALuint i; + for(i = 0;i < dstlen;i++) + { + dst[i] = bsinc32_sse(state, src, frac); + + frac += increment; + src += frac>>FRACTIONBITS; + frac &= FRACTIONMASK; + } + return dst; +} + + static inline void SetupCoeffs(ALfloat (*restrict OutCoeffs)[2], const HrtfParams *hrtfparams, ALuint IrSize, ALuint Counter) |