aboutsummaryrefslogtreecommitdiffstats
path: root/Alc
diff options
context:
space:
mode:
Diffstat (limited to 'Alc')
-rw-r--r--Alc/mixer_c.c64
-rw-r--r--Alc/mixer_sse.c79
2 files changed, 63 insertions, 80 deletions
diff --git a/Alc/mixer_c.c b/Alc/mixer_c.c
index bbed14d3..ef37b730 100644
--- a/Alc/mixer_c.c
+++ b/Alc/mixer_c.c
@@ -17,39 +17,6 @@ static inline ALfloat fir4_32(const ALfloat *vals, ALuint frac)
static inline ALfloat fir8_32(const ALfloat *vals, ALuint frac)
{ return resample_fir8(vals[-3], vals[-2], vals[-1], vals[0], vals[1], vals[2], vals[3], vals[4], frac); }
-// Obtain the next sample from the interpolator.
-
-static inline ALfloat bsinc32(const BsincState *state, const ALfloat *vals, const ALuint frac)
-{
- const ALfloat sf = state->sf;
- ALfloat pf, r;
- ALuint pi;
-
- // Calculate the phase index and factor.
-#define FRAC_PHASE_BITDIFF (FRACTIONBITS-BSINC_PHASE_BITS)
- pi = frac >> FRAC_PHASE_BITDIFF;
- pf = (frac & ((1<<FRAC_PHASE_BITDIFF)-1)) * (1.0f/(1<<FRAC_PHASE_BITDIFF));
-#undef FRAC_PHASE_BITDIFF
-
- r = 0.0f;
- {
- const ALuint m = state->m;
- const ALint l = state->l;
- const ALfloat *fil = state->coeffs[pi].filter;
- const ALfloat *scd = state->coeffs[pi].scDelta;
- const ALfloat *phd = state->coeffs[pi].phDelta;
- const ALfloat *spd = state->coeffs[pi].spDelta;
- ALuint j_f;
- ALint j_s;
-
- // Apply the scale and phase interpolated filter.
- for(j_f = 0,j_s = l;j_f < m;j_f++,j_s++)
- r += (fil[j_f] + sf*scd[j_f] + pf*(phd[j_f] + sf*spd[j_f])) *
- vals[j_s];
- }
- return r;
-}
-
const ALfloat *Resample_copy32_C(const BsincState* UNUSED(state), const ALfloat *src, ALuint UNUSED(frac),
ALuint UNUSED(increment), ALfloat *restrict dst, ALuint numsamples)
@@ -85,13 +52,38 @@ DECL_TEMPLATE(lerp32)
DECL_TEMPLATE(fir4_32)
DECL_TEMPLATE(fir8_32)
+#undef DECL_TEMPLATE
+
const ALfloat *Resample_bsinc32_C(const BsincState *state, const ALfloat *src, ALuint frac,
ALuint increment, ALfloat *restrict dst, ALuint dstlen)
{
- ALuint i;
+ const ALfloat *fil, *scd, *phd, *spd;
+ const ALfloat sf = state->sf;
+ const ALuint m = state->m;
+ const ALint l = state->l;
+ ALuint j_f, pi, i;
+ ALfloat pf, r;
+ ALint j_s;
+
for(i = 0;i < dstlen;i++)
{
- dst[i] = bsinc32(state, src, frac);
+ // Calculate the phase index and factor.
+#define FRAC_PHASE_BITDIFF (FRACTIONBITS-BSINC_PHASE_BITS)
+ pi = frac >> FRAC_PHASE_BITDIFF;
+ pf = (frac & ((1<<FRAC_PHASE_BITDIFF)-1)) * (1.0f/(1<<FRAC_PHASE_BITDIFF));
+#undef FRAC_PHASE_BITDIFF
+
+ fil = state->coeffs[pi].filter;
+ scd = state->coeffs[pi].scDelta;
+ phd = state->coeffs[pi].phDelta;
+ spd = state->coeffs[pi].spDelta;
+
+ // Apply the scale and phase interpolated filter.
+ r = 0.0f;
+ for(j_f = 0,j_s = l;j_f < m;j_f++,j_s++)
+ r += (fil[j_f] + sf*scd[j_f] + pf*(phd[j_f] + sf*spd[j_f])) *
+ src[j_s];
+ dst[i] = r;
frac += increment;
src += frac>>FRACTIONBITS;
@@ -100,8 +92,6 @@ const ALfloat *Resample_bsinc32_C(const BsincState *state, const ALfloat *src, A
return dst;
}
-#undef DECL_TEMPLATE
-
void ALfilterState_processC(ALfilterState *filter, ALfloat *restrict dst, const ALfloat *src, ALuint numsamples)
{
diff --git a/Alc/mixer_sse.c b/Alc/mixer_sse.c
index 87a17e2c..090b7a5a 100644
--- a/Alc/mixer_sse.c
+++ b/Alc/mixer_sse.c
@@ -12,63 +12,56 @@
#include "mixer_defs.h"
-// Obtain the next sample from the interpolator (SSE version).
-static inline ALfloat bsinc32_sse(const BsincState *state, const ALfloat *in, const ALuint frac)
+const ALfloat *Resample_bsinc32_SSE(const BsincState *state, const ALfloat *src, ALuint frac,
+ ALuint increment, ALfloat *restrict dst, ALuint dstlen)
{
const __m128 sf4 = _mm_set1_ps(state->sf);
- ALfloat pf, r;
- ALuint pi;
+ const ALuint m = state->m;
+ const ALint l = state->l;
+ const ALfloat *fil, *scd, *phd, *spd;
+ ALuint pi, j_f, i;
+ ALfloat pf;
+ ALint j_s;
+ __m128 r4;
- // Calculate the phase index and factor.
+ for(i = 0;i < dstlen;i++)
+ {
+ // Calculate the phase index and factor.
#define FRAC_PHASE_BITDIFF (FRACTIONBITS-BSINC_PHASE_BITS)
- pi = frac >> FRAC_PHASE_BITDIFF;
- pf = (frac & ((1<<FRAC_PHASE_BITDIFF)-1)) * (1.0f/(1<<FRAC_PHASE_BITDIFF));
+ pi = frac >> FRAC_PHASE_BITDIFF;
+ pf = (frac & ((1<<FRAC_PHASE_BITDIFF)-1)) * (1.0f/(1<<FRAC_PHASE_BITDIFF));
#undef FRAC_PHASE_BITDIFF
- {
- const ALuint m = state->m;
- const ALint l = state->l;
- const ALfloat *fil = state->coeffs[pi].filter;
- const ALfloat *scd = state->coeffs[pi].scDelta;
- const ALfloat *phd = state->coeffs[pi].phDelta;
- const ALfloat *spd = state->coeffs[pi].spDelta;
- const __m128 pf4 = _mm_set1_ps(pf);
- __m128 r4 = _mm_setzero_ps();
- ALuint j_f;
- ALint j_s;
+ fil = state->coeffs[pi].filter;
+ scd = state->coeffs[pi].scDelta;
+ phd = state->coeffs[pi].phDelta;
+ spd = state->coeffs[pi].spDelta;
// Apply the scale and phase interpolated filter.
- for(j_f = 0,j_s = l;j_f < m;j_f+=4,j_s+=4)
+ r4 = _mm_setzero_ps();
{
- const __m128 f4 = _mm_add_ps(
- _mm_add_ps(
- _mm_load_ps(&fil[j_f]),
- _mm_mul_ps(sf4, _mm_load_ps(&scd[j_f]))
- ),
- _mm_mul_ps(
- pf4,
+ const __m128 pf4 = _mm_set1_ps(pf);
+ for(j_f = 0,j_s = l;j_f < m;j_f+=4,j_s+=4)
+ {
+ const __m128 f4 = _mm_add_ps(
_mm_add_ps(
- _mm_load_ps(&phd[j_f]),
- _mm_mul_ps(sf4, _mm_load_ps(&spd[j_f]))
+ _mm_load_ps(&fil[j_f]),
+ _mm_mul_ps(sf4, _mm_load_ps(&scd[j_f]))
+ ),
+ _mm_mul_ps(
+ pf4,
+ _mm_add_ps(
+ _mm_load_ps(&phd[j_f]),
+ _mm_mul_ps(sf4, _mm_load_ps(&spd[j_f]))
+ )
)
- )
- );
- r4 = _mm_add_ps(r4, _mm_mul_ps(f4, _mm_loadu_ps(&in[j_s])));
+ );
+ r4 = _mm_add_ps(r4, _mm_mul_ps(f4, _mm_loadu_ps(&src[j_s])));
+ }
}
r4 = _mm_add_ps(r4, _mm_shuffle_ps(r4, r4, _MM_SHUFFLE(0, 1, 2, 3)));
r4 = _mm_add_ps(r4, _mm_movehl_ps(r4, r4));
- r = _mm_cvtss_f32(r4);
- }
- return r;
-}
-
-const ALfloat *Resample_bsinc32_SSE(const BsincState *state, const ALfloat *src, ALuint frac,
- ALuint increment, ALfloat *restrict dst, ALuint dstlen)
-{
- ALuint i;
- for(i = 0;i < dstlen;i++)
- {
- dst[i] = bsinc32_sse(state, src, frac);
+ dst[i] = _mm_cvtss_f32(r4);
frac += increment;
src += frac>>FRACTIONBITS;