aboutsummaryrefslogtreecommitdiffstats
path: root/Alc/mixer_sse.c
diff options
context:
space:
mode:
authorChris Robinson <[email protected]>2015-11-05 09:42:08 -0800
committerChris Robinson <[email protected]>2015-11-05 09:42:08 -0800
commitb9e192b78a384ff13d87c606502373725042509c (patch)
treee5b99bba51f713e2f671b9ffbc37b22a1cdf5ba1 /Alc/mixer_sse.c
parentdce3d0c7bf8f68c0dc4d98870f9e8119742004c0 (diff)
Implement a band-limited sinc resampler
This is essentially a 12-point sinc resampler, unless it's resampling to a rate higher than the output, at which point it will vary between 12 and 24 points and do anti-aliasing to avoid/reduce frequencies going over nyquist. Code provided by Christopher Fitzgerald.
Diffstat (limited to 'Alc/mixer_sse.c')
-rw-r--r--Alc/mixer_sse.c66
1 files changed, 66 insertions, 0 deletions
diff --git a/Alc/mixer_sse.c b/Alc/mixer_sse.c
index e67c9cca..87a17e2c 100644
--- a/Alc/mixer_sse.c
+++ b/Alc/mixer_sse.c
@@ -12,6 +12,72 @@
#include "mixer_defs.h"
+// Obtain the next sample from the interpolator (SSE version).
+static inline ALfloat bsinc32_sse(const BsincState *state, const ALfloat *in, const ALuint frac)
+{
+ const __m128 sf4 = _mm_set1_ps(state->sf);
+ ALfloat pf, r;
+ ALuint pi;
+
+ // Calculate the phase index and factor.
+#define FRAC_PHASE_BITDIFF (FRACTIONBITS-BSINC_PHASE_BITS)
+ pi = frac >> FRAC_PHASE_BITDIFF;
+ pf = (frac & ((1<<FRAC_PHASE_BITDIFF)-1)) * (1.0f/(1<<FRAC_PHASE_BITDIFF));
+#undef FRAC_PHASE_BITDIFF
+
+ {
+ const ALuint m = state->m;
+ const ALint l = state->l;
+ const ALfloat *fil = state->coeffs[pi].filter;
+ const ALfloat *scd = state->coeffs[pi].scDelta;
+ const ALfloat *phd = state->coeffs[pi].phDelta;
+ const ALfloat *spd = state->coeffs[pi].spDelta;
+ const __m128 pf4 = _mm_set1_ps(pf);
+ __m128 r4 = _mm_setzero_ps();
+ ALuint j_f;
+ ALint j_s;
+
+ // Apply the scale and phase interpolated filter.
+ for(j_f = 0,j_s = l;j_f < m;j_f+=4,j_s+=4)
+ {
+ const __m128 f4 = _mm_add_ps(
+ _mm_add_ps(
+ _mm_load_ps(&fil[j_f]),
+ _mm_mul_ps(sf4, _mm_load_ps(&scd[j_f]))
+ ),
+ _mm_mul_ps(
+ pf4,
+ _mm_add_ps(
+ _mm_load_ps(&phd[j_f]),
+ _mm_mul_ps(sf4, _mm_load_ps(&spd[j_f]))
+ )
+ )
+ );
+ r4 = _mm_add_ps(r4, _mm_mul_ps(f4, _mm_loadu_ps(&in[j_s])));
+ }
+ r4 = _mm_add_ps(r4, _mm_shuffle_ps(r4, r4, _MM_SHUFFLE(0, 1, 2, 3)));
+ r4 = _mm_add_ps(r4, _mm_movehl_ps(r4, r4));
+ r = _mm_cvtss_f32(r4);
+ }
+ return r;
+}
+
+const ALfloat *Resample_bsinc32_SSE(const BsincState *state, const ALfloat *src, ALuint frac,
+ ALuint increment, ALfloat *restrict dst, ALuint dstlen)
+{
+ ALuint i;
+ for(i = 0;i < dstlen;i++)
+ {
+ dst[i] = bsinc32_sse(state, src, frac);
+
+ frac += increment;
+ src += frac>>FRACTIONBITS;
+ frac &= FRACTIONMASK;
+ }
+ return dst;
+}
+
+
static inline void SetupCoeffs(ALfloat (*restrict OutCoeffs)[2],
const HrtfParams *hrtfparams,
ALuint IrSize, ALuint Counter)