aboutsummaryrefslogtreecommitdiffstats
path: root/Alc/mixer/mixer_sse.cpp
diff options
context:
space:
mode:
authorChris Robinson <[email protected]>2019-03-29 11:28:38 -0700
committerChris Robinson <[email protected]>2019-03-29 11:33:04 -0700
commitcc91490b6104b5304655c6e4367371ea929d20bb (patch)
treed536557a349bc9bbd85fcc215456d96ab70fc13b /Alc/mixer/mixer_sse.cpp
parentfe7918465ed203b4731140aaf13c00d2aa9b1041 (diff)
Use a temporary buffer for HRTF filter accumulation
Similar to the history buffer, to avoid using the state buffer as a ring buffer.
Diffstat (limited to 'Alc/mixer/mixer_sse.cpp')
-rw-r--r--Alc/mixer/mixer_sse.cpp88
1 files changed, 33 insertions, 55 deletions
diff --git a/Alc/mixer/mixer_sse.cpp b/Alc/mixer/mixer_sse.cpp
index 69aeaa62..629fa428 100644
--- a/Alc/mixer/mixer_sse.cpp
+++ b/Alc/mixer/mixer_sse.cpp
@@ -76,97 +76,75 @@ const ALfloat *Resample_<BSincTag,SSETag>(const InterpState *state, const ALfloa
}
-static inline void ApplyCoeffs(ALsizei Offset, HrirArray<ALfloat> &Values, const ALsizei IrSize,
+static inline void ApplyCoeffs(ALsizei Offset, float2 *RESTRICT Values, const ALsizei IrSize,
const HrirArray<ALfloat> &Coeffs, const ALfloat left, const ALfloat right)
{
const __m128 lrlr{_mm_setr_ps(left, right, left, right)};
ASSUME(IrSize >= 2);
- ASSUME(&Values != &Coeffs);
- ASSUME(Offset >= 0 && Offset < HRIR_LENGTH);
if((Offset&1))
{
- ALsizei count{mini(IrSize-1, HRIR_LENGTH - Offset)};
- ASSUME(count >= 1);
-
__m128 imp0, imp1;
__m128 coeffs{_mm_load_ps(&Coeffs[0][0])};
- __m128 vals{_mm_loadl_pi(_mm_setzero_ps(), reinterpret_cast<__m64*>(&Values[Offset][0]))};
+ __m128 vals{_mm_loadl_pi(_mm_setzero_ps(), reinterpret_cast<__m64*>(&Values[0][0]))};
imp0 = _mm_mul_ps(lrlr, coeffs);
vals = _mm_add_ps(imp0, vals);
- _mm_storel_pi(reinterpret_cast<__m64*>(&Values[Offset][0]), vals);
- ++Offset;
- for(ALsizei i{1};;)
+ _mm_storel_pi(reinterpret_cast<__m64*>(&Values[0][0]), vals);
+ ALsizei i{1};
+ for(;i < IrSize-1;i += 2)
{
- for(;i < count;i += 2)
- {
- coeffs = _mm_load_ps(&Coeffs[i+1][0]);
- vals = _mm_load_ps(&Values[Offset][0]);
- imp1 = _mm_mul_ps(lrlr, coeffs);
- imp0 = _mm_shuffle_ps(imp0, imp1, _MM_SHUFFLE(1, 0, 3, 2));
- vals = _mm_add_ps(imp0, vals);
- _mm_store_ps(&Values[Offset][0], vals);
- imp0 = imp1;
- Offset += 2;
- }
- Offset &= HRIR_MASK;
- if(i >= IrSize-1)
- break;
- count = IrSize-1;
+ coeffs = _mm_load_ps(&Coeffs[i+1][0]);
+ vals = _mm_load_ps(&Values[i][0]);
+ imp1 = _mm_mul_ps(lrlr, coeffs);
+ imp0 = _mm_shuffle_ps(imp0, imp1, _MM_SHUFFLE(1, 0, 3, 2));
+ vals = _mm_add_ps(imp0, vals);
+ _mm_store_ps(&Values[i][0], vals);
+ imp0 = imp1;
}
- vals = _mm_loadl_pi(vals, reinterpret_cast<__m64*>(&Values[Offset][0]));
+ vals = _mm_loadl_pi(vals, reinterpret_cast<__m64*>(&Values[i][0]));
imp0 = _mm_movehl_ps(imp0, imp0);
vals = _mm_add_ps(imp0, vals);
- _mm_storel_pi(reinterpret_cast<__m64*>(&Values[Offset][0]), vals);
+ _mm_storel_pi(reinterpret_cast<__m64*>(&Values[i][0]), vals);
}
else
{
- ALsizei count{mini(IrSize, HRIR_LENGTH - Offset)};
- ASSUME(count >= 2);
-
- for(ALsizei i{0};;)
+ for(ALsizei i{0};i < IrSize;i += 2)
{
- for(;i < count;i += 2)
- {
- __m128 coeffs{_mm_load_ps(&Coeffs[i][0])};
- __m128 vals{_mm_load_ps(&Values[Offset][0])};
- vals = _mm_add_ps(vals, _mm_mul_ps(lrlr, coeffs));
- _mm_store_ps(&Values[Offset][0], vals);
- Offset += 2;
- }
- if(i >= IrSize)
- break;
- Offset = 0;
- count = IrSize;
+ __m128 coeffs{_mm_load_ps(&Coeffs[i][0])};
+ __m128 vals{_mm_load_ps(&Values[i][0])};
+ vals = _mm_add_ps(vals, _mm_mul_ps(lrlr, coeffs));
+ _mm_store_ps(&Values[i][0], vals);
}
}
}
template<>
void MixHrtf_<SSETag>(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, const ALfloat *data,
- ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize, MixHrtfParams *hrtfparams,
- HrtfState *hrtfstate, const ALsizei BufferSize)
+ float2 *RESTRICT AccumSamples, const ALsizei OutPos, const ALsizei IrSize,
+ MixHrtfParams *hrtfparams, const ALsizei BufferSize)
{
- MixHrtfBase<ApplyCoeffs>(LeftOut, RightOut, data, Offset, OutPos, IrSize, hrtfparams,
- hrtfstate, BufferSize);
+ MixHrtfBase<ApplyCoeffs>(LeftOut, RightOut, data, AccumSamples, OutPos, IrSize, hrtfparams,
+ BufferSize);
}
template<>
void MixHrtfBlend_<SSETag>(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut,
- const ALfloat *data, ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize,
- const HrtfParams *oldparams, MixHrtfParams *newparams, HrtfState *hrtfstate,
- const ALsizei BufferSize)
+ const ALfloat *data, float2 *RESTRICT AccumSamples, const ALsizei OutPos, const ALsizei IrSize,
+ const HrtfParams *oldparams, MixHrtfParams *newparams, const ALsizei BufferSize)
{
- MixHrtfBlendBase<ApplyCoeffs>(LeftOut, RightOut, data, Offset, OutPos, IrSize, oldparams,
- newparams, hrtfstate, BufferSize);
+ MixHrtfBlendBase<ApplyCoeffs>(LeftOut, RightOut, data, AccumSamples, OutPos, IrSize, oldparams,
+ newparams, BufferSize);
}
template<>
void MixDirectHrtf_<SSETag>(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut,
- const ALfloat (*data)[BUFFERSIZE], DirectHrtfState *State, const ALsizei NumChans,
- const ALsizei BufferSize)
-{ MixDirectHrtfBase<ApplyCoeffs>(LeftOut, RightOut, data, State, NumChans, BufferSize); }
+ const ALfloat (*data)[BUFFERSIZE], float2 *RESTRICT AccumSamples, DirectHrtfState *State,
+ const ALsizei NumChans, const ALsizei BufferSize)
+{
+ MixDirectHrtfBase<ApplyCoeffs>(LeftOut, RightOut, data, AccumSamples, State, NumChans,
+ BufferSize);
+}
template<>