diff options
author | Chris Robinson <[email protected]> | 2020-04-03 02:39:23 -0700 |
---|---|---|
committer | Chris Robinson <[email protected]> | 2020-04-03 02:39:23 -0700 |
commit | 367d4af07c61b3d0e705cfc10f4840647c92b026 (patch) | |
tree | 876ddb7b2a1e9396ac106d9c982b8c301b8fd481 /alc/mixer | |
parent | ebe30fb0bd47ca3c16d19863ae9053d8f93bd1f0 (diff) |
Avoid unnecessary duplication in the resamplers
Diffstat (limited to 'alc/mixer')
-rw-r--r-- | alc/mixer/mixer_c.cpp | 25 | ||||
-rw-r--r-- | alc/mixer/mixer_neon.cpp | 24 | ||||
-rw-r--r-- | alc/mixer/mixer_sse.cpp | 32 |
3 files changed, 34 insertions, 47 deletions
diff --git a/alc/mixer/mixer_c.cpp b/alc/mixer/mixer_c.cpp index 6b68d821..cdfc57d9 100644 --- a/alc/mixer/mixer_c.cpp +++ b/alc/mixer/mixer_c.cpp @@ -13,6 +13,9 @@ namespace { +#define FRAC_PHASE_BITDIFF (FRACTIONBITS - BSINC_PHASE_BITS) +#define FRAC_PHASE_DIFFONE (1<<FRAC_PHASE_BITDIFF) + inline float do_point(const InterpState&, const float *RESTRICT vals, const ALuint) { return vals[0]; } inline float do_lerp(const InterpState&, const float *RESTRICT vals, const ALuint frac) @@ -24,11 +27,8 @@ inline float do_bsinc(const InterpState &istate, const float *RESTRICT vals, con const size_t m{istate.bsinc.m}; // Calculate the phase index and factor. -#define FRAC_PHASE_BITDIFF (FRACTIONBITS-BSINC_PHASE_BITS) const ALuint pi{frac >> FRAC_PHASE_BITDIFF}; - const float pf{static_cast<float>(frac & ((1<<FRAC_PHASE_BITDIFF)-1)) * - (1.0f/(1<<FRAC_PHASE_BITDIFF))}; -#undef FRAC_PHASE_BITDIFF + const float pf{static_cast<float>(frac & (FRAC_PHASE_DIFFONE-1)) * (1.0f/FRAC_PHASE_DIFFONE)}; const float *fil{istate.bsinc.filter + m*pi*4}; const float *phd{fil + m}; @@ -46,11 +46,8 @@ inline float do_fastbsinc(const InterpState &istate, const float *RESTRICT vals, const size_t m{istate.bsinc.m}; // Calculate the phase index and factor. -#define FRAC_PHASE_BITDIFF (FRACTIONBITS-BSINC_PHASE_BITS) const ALuint pi{frac >> FRAC_PHASE_BITDIFF}; - const float pf{static_cast<float>(frac & ((1<<FRAC_PHASE_BITDIFF)-1)) * - (1.0f/(1<<FRAC_PHASE_BITDIFF))}; -#undef FRAC_PHASE_BITDIFF + const float pf{static_cast<float>(frac & (FRAC_PHASE_DIFFONE-1)) * (1.0f/FRAC_PHASE_DIFFONE)}; const float *fil{istate.bsinc.filter + m*pi*4}; const float *phd{fil + m}; @@ -68,19 +65,15 @@ const float *DoResample(const InterpState *state, const float *RESTRICT src, ALu ALuint increment, const al::span<float> dst) { const InterpState istate{*state}; - auto proc_sample = [&src,&frac,istate,increment]() -> float + for(float &out : dst) { - const float ret{Sampler(istate, src, frac)}; + out = Sampler(istate, src, frac); frac += increment; src += frac>>FRACTIONBITS; frac &= FRACTIONMASK; - - return ret; - }; - std::generate(dst.begin(), dst.end(), proc_sample); - - return dst.begin(); + } + return dst.data(); } inline void ApplyCoeffs(float2 *RESTRICT Values, const ALuint IrSize, const HrirArray &Coeffs, diff --git a/alc/mixer/mixer_neon.cpp b/alc/mixer/mixer_neon.cpp index 2cdf21c3..092958b0 100644 --- a/alc/mixer/mixer_neon.cpp +++ b/alc/mixer/mixer_neon.cpp @@ -16,6 +16,9 @@ namespace { +#define FRAC_PHASE_BITDIFF (FRACTIONBITS - BSINC_PHASE_BITS) +#define FRAC_PHASE_DIFFONE (1<<FRAC_PHASE_BITDIFF) + inline void ApplyCoeffs(float2 *RESTRICT Values, const ALuint IrSize, const HrirArray &Coeffs, const float left, const float right) { @@ -107,11 +110,9 @@ const ALfloat *Resample_<BSincTag,NEONTag>(const InterpState *state, const ALflo for(float &out_sample : dst) { // Calculate the phase index and factor. -#define FRAC_PHASE_BITDIFF (FRACTIONBITS-BSINC_PHASE_BITS) const ALuint pi{frac >> FRAC_PHASE_BITDIFF}; - const float pf{static_cast<float>(frac & ((1<<FRAC_PHASE_BITDIFF)-1)) * - (1.0f/(1<<FRAC_PHASE_BITDIFF))}; -#undef FRAC_PHASE_BITDIFF + const float pf{static_cast<float>(frac & (FRAC_PHASE_DIFFONE-1)) * + (1.0f/FRAC_PHASE_DIFFONE)}; // Apply the scale and phase interpolated filter. float32x4_t r4{vdupq_n_f32(0.0f)}; @@ -127,9 +128,8 @@ const ALfloat *Resample_<BSincTag,NEONTag>(const InterpState *state, const ALflo do { /* f = ((fil + sf*scd) + pf*(phd + sf*spd)) */ const float32x4_t f4 = vmlaq_f32( - vmlaq_f32(vld1q_f32(fil), sf4, vld1q_f32(scd)), - pf4, vmlaq_f32(vld1q_f32(phd), sf4, vld1q_f32(spd))); - fil += 4; scd += 4; phd += 4; spd += 4; + vmlaq_f32(vld1q_f32(&fil[j]), sf4, vld1q_f32(&scd[j])), + pf4, vmlaq_f32(vld1q_f32(&phd[j]), sf4, vld1q_f32(&spd[j]))); /* r += f*src */ r4 = vmlaq_f32(r4, f4, vld1q_f32(&src[j])); j += 4; @@ -156,11 +156,9 @@ const ALfloat *Resample_<FastBSincTag,NEONTag>(const InterpState *state, for(float &out_sample : dst) { // Calculate the phase index and factor. -#define FRAC_PHASE_BITDIFF (FRACTIONBITS-BSINC_PHASE_BITS) const ALuint pi{frac >> FRAC_PHASE_BITDIFF}; - const float pf{static_cast<float>(frac & ((1<<FRAC_PHASE_BITDIFF)-1)) * - (1.0f/(1<<FRAC_PHASE_BITDIFF))}; -#undef FRAC_PHASE_BITDIFF + const float pf{static_cast<float>(frac & (FRAC_PHASE_DIFFONE-1)) * + (1.0f/FRAC_PHASE_DIFFONE)}; // Apply the phase interpolated filter. float32x4_t r4{vdupq_n_f32(0.0f)}; @@ -173,10 +171,10 @@ const ALfloat *Resample_<FastBSincTag,NEONTag>(const InterpState *state, do { /* f = fil + pf*phd */ - const float32x4_t f4 = vmlaq_f32(vld1q_f32(fil), pf4, vld1q_f32(phd)); + const float32x4_t f4 = vmlaq_f32(vld1q_f32(&fil[j]), pf4, vld1q_f32(&phd[j])); /* r += f*src */ r4 = vmlaq_f32(r4, f4, vld1q_f32(&src[j])); - fil += 4; phd += 4; j += 4; + j += 4; } while(--td); } r4 = vaddq_f32(r4, vrev64q_f32(r4)); diff --git a/alc/mixer/mixer_sse.cpp b/alc/mixer/mixer_sse.cpp index 32345522..58e9c76b 100644 --- a/alc/mixer/mixer_sse.cpp +++ b/alc/mixer/mixer_sse.cpp @@ -16,6 +16,11 @@ namespace { +#define FRAC_PHASE_BITDIFF (FRACTIONBITS - BSINC_PHASE_BITS) +#define FRAC_PHASE_DIFFONE (1<<FRAC_PHASE_BITDIFF) + +#define MLA4(x, y, z) _mm_add_ps(x, _mm_mul_ps(y, z)) + inline void ApplyCoeffs(float2 *RESTRICT Values, const ALuint IrSize, const HrirArray &Coeffs, const float left, const float right) { @@ -57,7 +62,7 @@ inline void ApplyCoeffs(float2 *RESTRICT Values, const ALuint IrSize, const Hrir { const __m128 coeffs{_mm_load_ps(&Coeffs[i][0])}; __m128 vals{_mm_load_ps(&Values[i][0])}; - vals = _mm_add_ps(vals, _mm_mul_ps(lrlr, coeffs)); + vals = MLA4(vals, lrlr, coeffs); _mm_store_ps(&Values[i][0], vals); } } @@ -77,11 +82,9 @@ const ALfloat *Resample_<BSincTag,SSETag>(const InterpState *state, const ALfloa for(float &out_sample : dst) { // Calculate the phase index and factor. -#define FRAC_PHASE_BITDIFF (FRACTIONBITS-BSINC_PHASE_BITS) const ALuint pi{frac >> FRAC_PHASE_BITDIFF}; - const float pf{static_cast<float>(frac & ((1<<FRAC_PHASE_BITDIFF)-1)) * - (1.0f/(1<<FRAC_PHASE_BITDIFF))}; -#undef FRAC_PHASE_BITDIFF + const float pf{static_cast<float>(frac & (FRAC_PHASE_DIFFONE-1)) * + (1.0f/FRAC_PHASE_DIFFONE)}; // Apply the scale and phase interpolated filter. __m128 r4{_mm_setzero_ps()}; @@ -94,18 +97,15 @@ const ALfloat *Resample_<BSincTag,SSETag>(const InterpState *state, const ALfloa size_t td{m >> 2}; size_t j{0u}; -#define MLA4(x, y, z) _mm_add_ps(x, _mm_mul_ps(y, z)) do { /* f = ((fil + sf*scd) + pf*(phd + sf*spd)) */ const __m128 f4 = MLA4( - MLA4(_mm_load_ps(fil), sf4, _mm_load_ps(scd)), - pf4, MLA4(_mm_load_ps(phd), sf4, _mm_load_ps(spd))); - fil += 4; scd += 4; phd += 4; spd += 4; + MLA4(_mm_load_ps(&fil[j]), sf4, _mm_load_ps(&scd[j])), + pf4, MLA4(_mm_load_ps(&phd[j]), sf4, _mm_load_ps(&spd[j]))); /* r += f*src */ r4 = MLA4(r4, f4, _mm_loadu_ps(&src[j])); j += 4; } while(--td); -#undef MLA4 } r4 = _mm_add_ps(r4, _mm_shuffle_ps(r4, r4, _MM_SHUFFLE(0, 1, 2, 3))); r4 = _mm_add_ps(r4, _mm_movehl_ps(r4, r4)); @@ -129,11 +129,9 @@ const ALfloat *Resample_<FastBSincTag,SSETag>(const InterpState *state, for(float &out_sample : dst) { // Calculate the phase index and factor. -#define FRAC_PHASE_BITDIFF (FRACTIONBITS-BSINC_PHASE_BITS) const ALuint pi{frac >> FRAC_PHASE_BITDIFF}; - const float pf{static_cast<float>(frac & ((1<<FRAC_PHASE_BITDIFF)-1)) * - (1.0f/(1<<FRAC_PHASE_BITDIFF))}; -#undef FRAC_PHASE_BITDIFF + const float pf{static_cast<float>(frac & (FRAC_PHASE_DIFFONE-1)) * + (1.0f/FRAC_PHASE_DIFFONE)}; // Apply the phase interpolated filter. __m128 r4{_mm_setzero_ps()}; @@ -144,15 +142,13 @@ const ALfloat *Resample_<FastBSincTag,SSETag>(const InterpState *state, size_t td{m >> 2}; size_t j{0u}; -#define MLA4(x, y, z) _mm_add_ps(x, _mm_mul_ps(y, z)) do { /* f = fil + pf*phd */ - const __m128 f4 = MLA4(_mm_load_ps(fil), pf4, _mm_load_ps(phd)); + const __m128 f4 = MLA4(_mm_load_ps(&fil[j]), pf4, _mm_load_ps(&phd[j])); /* r += f*src */ r4 = MLA4(r4, f4, _mm_loadu_ps(&src[j])); - fil += 4; phd += 4; j += 4; + j += 4; } while(--td); -#undef MLA4 } r4 = _mm_add_ps(r4, _mm_shuffle_ps(r4, r4, _MM_SHUFFLE(0, 1, 2, 3))); r4 = _mm_add_ps(r4, _mm_movehl_ps(r4, r4)); |