aboutsummaryrefslogtreecommitdiffstats
path: root/alc/mixer
diff options
context:
space:
mode:
authorChris Robinson <[email protected]>2020-04-03 02:39:23 -0700
committerChris Robinson <[email protected]>2020-04-03 02:39:23 -0700
commit367d4af07c61b3d0e705cfc10f4840647c92b026 (patch)
tree876ddb7b2a1e9396ac106d9c982b8c301b8fd481 /alc/mixer
parentebe30fb0bd47ca3c16d19863ae9053d8f93bd1f0 (diff)
Avoid unnecessary duplication in the resamplers
Diffstat (limited to 'alc/mixer')
-rw-r--r--alc/mixer/mixer_c.cpp25
-rw-r--r--alc/mixer/mixer_neon.cpp24
-rw-r--r--alc/mixer/mixer_sse.cpp32
3 files changed, 34 insertions, 47 deletions
diff --git a/alc/mixer/mixer_c.cpp b/alc/mixer/mixer_c.cpp
index 6b68d821..cdfc57d9 100644
--- a/alc/mixer/mixer_c.cpp
+++ b/alc/mixer/mixer_c.cpp
@@ -13,6 +13,9 @@
namespace {
+#define FRAC_PHASE_BITDIFF (FRACTIONBITS - BSINC_PHASE_BITS)
+#define FRAC_PHASE_DIFFONE (1<<FRAC_PHASE_BITDIFF)
+
inline float do_point(const InterpState&, const float *RESTRICT vals, const ALuint)
{ return vals[0]; }
inline float do_lerp(const InterpState&, const float *RESTRICT vals, const ALuint frac)
@@ -24,11 +27,8 @@ inline float do_bsinc(const InterpState &istate, const float *RESTRICT vals, con
const size_t m{istate.bsinc.m};
// Calculate the phase index and factor.
-#define FRAC_PHASE_BITDIFF (FRACTIONBITS-BSINC_PHASE_BITS)
const ALuint pi{frac >> FRAC_PHASE_BITDIFF};
- const float pf{static_cast<float>(frac & ((1<<FRAC_PHASE_BITDIFF)-1)) *
- (1.0f/(1<<FRAC_PHASE_BITDIFF))};
-#undef FRAC_PHASE_BITDIFF
+ const float pf{static_cast<float>(frac & (FRAC_PHASE_DIFFONE-1)) * (1.0f/FRAC_PHASE_DIFFONE)};
const float *fil{istate.bsinc.filter + m*pi*4};
const float *phd{fil + m};
@@ -46,11 +46,8 @@ inline float do_fastbsinc(const InterpState &istate, const float *RESTRICT vals,
const size_t m{istate.bsinc.m};
// Calculate the phase index and factor.
-#define FRAC_PHASE_BITDIFF (FRACTIONBITS-BSINC_PHASE_BITS)
const ALuint pi{frac >> FRAC_PHASE_BITDIFF};
- const float pf{static_cast<float>(frac & ((1<<FRAC_PHASE_BITDIFF)-1)) *
- (1.0f/(1<<FRAC_PHASE_BITDIFF))};
-#undef FRAC_PHASE_BITDIFF
+ const float pf{static_cast<float>(frac & (FRAC_PHASE_DIFFONE-1)) * (1.0f/FRAC_PHASE_DIFFONE)};
const float *fil{istate.bsinc.filter + m*pi*4};
const float *phd{fil + m};
@@ -68,19 +65,15 @@ const float *DoResample(const InterpState *state, const float *RESTRICT src, ALu
ALuint increment, const al::span<float> dst)
{
const InterpState istate{*state};
- auto proc_sample = [&src,&frac,istate,increment]() -> float
+ for(float &out : dst)
{
- const float ret{Sampler(istate, src, frac)};
+ out = Sampler(istate, src, frac);
frac += increment;
src += frac>>FRACTIONBITS;
frac &= FRACTIONMASK;
-
- return ret;
- };
- std::generate(dst.begin(), dst.end(), proc_sample);
-
- return dst.begin();
+ }
+ return dst.data();
}
inline void ApplyCoeffs(float2 *RESTRICT Values, const ALuint IrSize, const HrirArray &Coeffs,
diff --git a/alc/mixer/mixer_neon.cpp b/alc/mixer/mixer_neon.cpp
index 2cdf21c3..092958b0 100644
--- a/alc/mixer/mixer_neon.cpp
+++ b/alc/mixer/mixer_neon.cpp
@@ -16,6 +16,9 @@
namespace {
+#define FRAC_PHASE_BITDIFF (FRACTIONBITS - BSINC_PHASE_BITS)
+#define FRAC_PHASE_DIFFONE (1<<FRAC_PHASE_BITDIFF)
+
inline void ApplyCoeffs(float2 *RESTRICT Values, const ALuint IrSize, const HrirArray &Coeffs,
const float left, const float right)
{
@@ -107,11 +110,9 @@ const ALfloat *Resample_<BSincTag,NEONTag>(const InterpState *state, const ALflo
for(float &out_sample : dst)
{
// Calculate the phase index and factor.
-#define FRAC_PHASE_BITDIFF (FRACTIONBITS-BSINC_PHASE_BITS)
const ALuint pi{frac >> FRAC_PHASE_BITDIFF};
- const float pf{static_cast<float>(frac & ((1<<FRAC_PHASE_BITDIFF)-1)) *
- (1.0f/(1<<FRAC_PHASE_BITDIFF))};
-#undef FRAC_PHASE_BITDIFF
+ const float pf{static_cast<float>(frac & (FRAC_PHASE_DIFFONE-1)) *
+ (1.0f/FRAC_PHASE_DIFFONE)};
// Apply the scale and phase interpolated filter.
float32x4_t r4{vdupq_n_f32(0.0f)};
@@ -127,9 +128,8 @@ const ALfloat *Resample_<BSincTag,NEONTag>(const InterpState *state, const ALflo
do {
/* f = ((fil + sf*scd) + pf*(phd + sf*spd)) */
const float32x4_t f4 = vmlaq_f32(
- vmlaq_f32(vld1q_f32(fil), sf4, vld1q_f32(scd)),
- pf4, vmlaq_f32(vld1q_f32(phd), sf4, vld1q_f32(spd)));
- fil += 4; scd += 4; phd += 4; spd += 4;
+ vmlaq_f32(vld1q_f32(&fil[j]), sf4, vld1q_f32(&scd[j])),
+ pf4, vmlaq_f32(vld1q_f32(&phd[j]), sf4, vld1q_f32(&spd[j])));
/* r += f*src */
r4 = vmlaq_f32(r4, f4, vld1q_f32(&src[j]));
j += 4;
@@ -156,11 +156,9 @@ const ALfloat *Resample_<FastBSincTag,NEONTag>(const InterpState *state,
for(float &out_sample : dst)
{
// Calculate the phase index and factor.
-#define FRAC_PHASE_BITDIFF (FRACTIONBITS-BSINC_PHASE_BITS)
const ALuint pi{frac >> FRAC_PHASE_BITDIFF};
- const float pf{static_cast<float>(frac & ((1<<FRAC_PHASE_BITDIFF)-1)) *
- (1.0f/(1<<FRAC_PHASE_BITDIFF))};
-#undef FRAC_PHASE_BITDIFF
+ const float pf{static_cast<float>(frac & (FRAC_PHASE_DIFFONE-1)) *
+ (1.0f/FRAC_PHASE_DIFFONE)};
// Apply the phase interpolated filter.
float32x4_t r4{vdupq_n_f32(0.0f)};
@@ -173,10 +171,10 @@ const ALfloat *Resample_<FastBSincTag,NEONTag>(const InterpState *state,
do {
/* f = fil + pf*phd */
- const float32x4_t f4 = vmlaq_f32(vld1q_f32(fil), pf4, vld1q_f32(phd));
+ const float32x4_t f4 = vmlaq_f32(vld1q_f32(&fil[j]), pf4, vld1q_f32(&phd[j]));
/* r += f*src */
r4 = vmlaq_f32(r4, f4, vld1q_f32(&src[j]));
- fil += 4; phd += 4; j += 4;
+ j += 4;
} while(--td);
}
r4 = vaddq_f32(r4, vrev64q_f32(r4));
diff --git a/alc/mixer/mixer_sse.cpp b/alc/mixer/mixer_sse.cpp
index 32345522..58e9c76b 100644
--- a/alc/mixer/mixer_sse.cpp
+++ b/alc/mixer/mixer_sse.cpp
@@ -16,6 +16,11 @@
namespace {
+#define FRAC_PHASE_BITDIFF (FRACTIONBITS - BSINC_PHASE_BITS)
+#define FRAC_PHASE_DIFFONE (1<<FRAC_PHASE_BITDIFF)
+
+#define MLA4(x, y, z) _mm_add_ps(x, _mm_mul_ps(y, z))
+
inline void ApplyCoeffs(float2 *RESTRICT Values, const ALuint IrSize, const HrirArray &Coeffs,
const float left, const float right)
{
@@ -57,7 +62,7 @@ inline void ApplyCoeffs(float2 *RESTRICT Values, const ALuint IrSize, const Hrir
{
const __m128 coeffs{_mm_load_ps(&Coeffs[i][0])};
__m128 vals{_mm_load_ps(&Values[i][0])};
- vals = _mm_add_ps(vals, _mm_mul_ps(lrlr, coeffs));
+ vals = MLA4(vals, lrlr, coeffs);
_mm_store_ps(&Values[i][0], vals);
}
}
@@ -77,11 +82,9 @@ const ALfloat *Resample_<BSincTag,SSETag>(const InterpState *state, const ALfloa
for(float &out_sample : dst)
{
// Calculate the phase index and factor.
-#define FRAC_PHASE_BITDIFF (FRACTIONBITS-BSINC_PHASE_BITS)
const ALuint pi{frac >> FRAC_PHASE_BITDIFF};
- const float pf{static_cast<float>(frac & ((1<<FRAC_PHASE_BITDIFF)-1)) *
- (1.0f/(1<<FRAC_PHASE_BITDIFF))};
-#undef FRAC_PHASE_BITDIFF
+ const float pf{static_cast<float>(frac & (FRAC_PHASE_DIFFONE-1)) *
+ (1.0f/FRAC_PHASE_DIFFONE)};
// Apply the scale and phase interpolated filter.
__m128 r4{_mm_setzero_ps()};
@@ -94,18 +97,15 @@ const ALfloat *Resample_<BSincTag,SSETag>(const InterpState *state, const ALfloa
size_t td{m >> 2};
size_t j{0u};
-#define MLA4(x, y, z) _mm_add_ps(x, _mm_mul_ps(y, z))
do {
/* f = ((fil + sf*scd) + pf*(phd + sf*spd)) */
const __m128 f4 = MLA4(
- MLA4(_mm_load_ps(fil), sf4, _mm_load_ps(scd)),
- pf4, MLA4(_mm_load_ps(phd), sf4, _mm_load_ps(spd)));
- fil += 4; scd += 4; phd += 4; spd += 4;
+ MLA4(_mm_load_ps(&fil[j]), sf4, _mm_load_ps(&scd[j])),
+ pf4, MLA4(_mm_load_ps(&phd[j]), sf4, _mm_load_ps(&spd[j])));
/* r += f*src */
r4 = MLA4(r4, f4, _mm_loadu_ps(&src[j]));
j += 4;
} while(--td);
-#undef MLA4
}
r4 = _mm_add_ps(r4, _mm_shuffle_ps(r4, r4, _MM_SHUFFLE(0, 1, 2, 3)));
r4 = _mm_add_ps(r4, _mm_movehl_ps(r4, r4));
@@ -129,11 +129,9 @@ const ALfloat *Resample_<FastBSincTag,SSETag>(const InterpState *state,
for(float &out_sample : dst)
{
// Calculate the phase index and factor.
-#define FRAC_PHASE_BITDIFF (FRACTIONBITS-BSINC_PHASE_BITS)
const ALuint pi{frac >> FRAC_PHASE_BITDIFF};
- const float pf{static_cast<float>(frac & ((1<<FRAC_PHASE_BITDIFF)-1)) *
- (1.0f/(1<<FRAC_PHASE_BITDIFF))};
-#undef FRAC_PHASE_BITDIFF
+ const float pf{static_cast<float>(frac & (FRAC_PHASE_DIFFONE-1)) *
+ (1.0f/FRAC_PHASE_DIFFONE)};
// Apply the phase interpolated filter.
__m128 r4{_mm_setzero_ps()};
@@ -144,15 +142,13 @@ const ALfloat *Resample_<FastBSincTag,SSETag>(const InterpState *state,
size_t td{m >> 2};
size_t j{0u};
-#define MLA4(x, y, z) _mm_add_ps(x, _mm_mul_ps(y, z))
do {
/* f = fil + pf*phd */
- const __m128 f4 = MLA4(_mm_load_ps(fil), pf4, _mm_load_ps(phd));
+ const __m128 f4 = MLA4(_mm_load_ps(&fil[j]), pf4, _mm_load_ps(&phd[j]));
/* r += f*src */
r4 = MLA4(r4, f4, _mm_loadu_ps(&src[j]));
- fil += 4; phd += 4; j += 4;
+ j += 4;
} while(--td);
-#undef MLA4
}
r4 = _mm_add_ps(r4, _mm_shuffle_ps(r4, r4, _MM_SHUFFLE(0, 1, 2, 3)));
r4 = _mm_add_ps(r4, _mm_movehl_ps(r4, r4));