aboutsummaryrefslogtreecommitdiffstats
path: root/Alc/mixer
diff options
context:
space:
mode:
authorChris Robinson <[email protected]>2019-03-29 11:28:38 -0700
committerChris Robinson <[email protected]>2019-03-29 11:33:04 -0700
commitcc91490b6104b5304655c6e4367371ea929d20bb (patch)
treed536557a349bc9bbd85fcc215456d96ab70fc13b /Alc/mixer
parentfe7918465ed203b4731140aaf13c00d2aa9b1041 (diff)
Use a temporary buffer for HRTF filter accumulation
Similar to the history buffer, to avoid using the state buffer as a ring buffer.
Diffstat (limited to 'Alc/mixer')
-rw-r--r--Alc/mixer/defs.h6
-rw-r--r--Alc/mixer/hrtfbase.h145
-rw-r--r--Alc/mixer/mixer_c.cpp47
-rw-r--r--Alc/mixer/mixer_neon.cpp39
-rw-r--r--Alc/mixer/mixer_sse.cpp88
5 files changed, 122 insertions, 203 deletions
diff --git a/Alc/mixer/defs.h b/Alc/mixer/defs.h
index 1aa6ba38..cd301833 100644
--- a/Alc/mixer/defs.h
+++ b/Alc/mixer/defs.h
@@ -35,11 +35,11 @@ template<typename InstTag>
void MixRow_(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*data)[BUFFERSIZE], const ALsizei InChans, const ALsizei InPos, const ALsizei BufferSize);
template<typename InstTag>
-void MixHrtf_(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, const ALfloat *data, ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize, MixHrtfParams *hrtfparams, HrtfState *hrtfstate, const ALsizei BufferSize);
+void MixHrtf_(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, const ALfloat *data, float2 *RESTRICT AccumSamples, const ALsizei OutPos, const ALsizei IrSize, MixHrtfParams *hrtfparams, const ALsizei BufferSize);
template<typename InstTag>
-void MixHrtfBlend_(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, const ALfloat *data, ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize, const HrtfParams *oldparams, MixHrtfParams *newparams, HrtfState *hrtfstate, const ALsizei BufferSize);
+void MixHrtfBlend_(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, const ALfloat *data, float2 *RESTRICT AccumSamples, const ALsizei OutPos, const ALsizei IrSize, const HrtfParams *oldparams, MixHrtfParams *newparams, const ALsizei BufferSize);
template<typename InstTag>
-void MixDirectHrtf_(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, const ALfloat (*data)[BUFFERSIZE], DirectHrtfState *State, const ALsizei NumChans, const ALsizei BufferSize);
+void MixDirectHrtf_(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, const ALfloat (*data)[BUFFERSIZE], float2 *RESTRICT AccumSamples, DirectHrtfState *State, const ALsizei NumChans, const ALsizei BufferSize);
/* Vectorized resampler helpers */
inline void InitiatePositionArrays(ALsizei frac, ALint increment, ALsizei *RESTRICT frac_arr, ALsizei *RESTRICT pos_arr, ALsizei size)
diff --git a/Alc/mixer/hrtfbase.h b/Alc/mixer/hrtfbase.h
index 8ad4a99c..162d7289 100644
--- a/Alc/mixer/hrtfbase.h
+++ b/Alc/mixer/hrtfbase.h
@@ -1,18 +1,20 @@
#ifndef MIXER_HRTFBASE_H
#define MIXER_HRTFBASE_H
+#include <algorithm>
+
#include "alu.h"
#include "../hrtf.h"
#include "opthelpers.h"
-using ApplyCoeffsT = void(ALsizei Offset, HrirArray<ALfloat> &Values, const ALsizei irSize,
+using ApplyCoeffsT = void(ALsizei Offset, float2 *RESTRICT Values, const ALsizei irSize,
const HrirArray<ALfloat> &Coeffs, const ALfloat left, const ALfloat right);
template<ApplyCoeffsT &ApplyCoeffs>
inline void MixHrtfBase(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, const ALfloat *data,
- ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize, MixHrtfParams *hrtfparams,
- HrtfState *hrtfstate, const ALsizei BufferSize)
+ float2 *RESTRICT AccumSamples, const ALsizei OutPos, const ALsizei IrSize,
+ MixHrtfParams *hrtfparams, const ALsizei BufferSize)
{
ASSUME(OutPos >= 0);
ASSUME(IrSize >= 4);
@@ -28,49 +30,27 @@ inline void MixHrtfBase(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, c
HRTF_HISTORY_LENGTH - hrtfparams->Delay[1] };
ASSUME(Delay[0] >= 0 && Delay[1] >= 0);
- Offset &= HRIR_MASK;
- ALsizei HeadOffset{(Offset+IrSize-1)&HRIR_MASK};
-
- LeftOut += OutPos;
- RightOut += OutPos;
- for(ALsizei i{0};i < BufferSize;)
+ for(ALsizei i{0};i < BufferSize;++i)
{
- /* Calculate the number of samples we can do until one of the indices
- * wraps on its buffer, or we reach the end.
- */
- const ALsizei todo_hrir{HRIR_LENGTH - maxi(HeadOffset, Offset)};
- const ALsizei todo{mini(BufferSize-i, todo_hrir) + i};
- ASSUME(todo > i);
-
- for(;i < todo;++i)
- {
- hrtfstate->Values[HeadOffset][0] = 0.0f;
- hrtfstate->Values[HeadOffset][1] = 0.0f;
- ++HeadOffset;
-
- const ALfloat g{gain + gainstep*stepcount};
- const ALfloat left{data[Delay[0]++] * g};
- const ALfloat right{data[Delay[1]++] * g};
- ApplyCoeffs(Offset, hrtfstate->Values, IrSize, Coeffs, left, right);
-
- *(LeftOut++) += hrtfstate->Values[Offset][0];
- *(RightOut++) += hrtfstate->Values[Offset][1];
- ++Offset;
+ const ALfloat g{gain + gainstep*stepcount};
+ const ALfloat left{data[Delay[0]++] * g};
+ const ALfloat right{data[Delay[1]++] * g};
+ ApplyCoeffs(i, AccumSamples+i, IrSize, Coeffs, left, right);
- stepcount += 1.0f;
- }
-
- HeadOffset &= HRIR_MASK;
- Offset &= HRIR_MASK;
+ stepcount += 1.0f;
}
+ for(ALsizei i{0};i < BufferSize;++i)
+ LeftOut[OutPos+i] += AccumSamples[i][0];
+ for(ALsizei i{0};i < BufferSize;++i)
+ RightOut[OutPos+i] += AccumSamples[i][1];
+
hrtfparams->Gain = gain + gainstep*stepcount;
}
template<ApplyCoeffsT &ApplyCoeffs>
inline void MixHrtfBlendBase(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut,
- const ALfloat *data, ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize,
- const HrtfParams *oldparams, MixHrtfParams *newparams, HrtfState *hrtfstate,
- const ALsizei BufferSize)
+ const ALfloat *data, float2 *RESTRICT AccumSamples, const ALsizei OutPos, const ALsizei IrSize,
+ const HrtfParams *oldparams, MixHrtfParams *newparams, const ALsizei BufferSize)
{
const auto &OldCoeffs = oldparams->Coeffs;
const ALfloat oldGain{oldparams->Gain};
@@ -92,50 +72,32 @@ inline void MixHrtfBlendBase(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightO
HRTF_HISTORY_LENGTH - newparams->Delay[1] };
ASSUME(NewDelay[0] >= 0 && NewDelay[1] >= 0);
- Offset &= HRIR_MASK;
- ALsizei HeadOffset{(Offset+IrSize-1)&HRIR_MASK};
-
- LeftOut += OutPos;
- RightOut += OutPos;
- for(ALsizei i{0};i < BufferSize;)
+ for(ALsizei i{0};i < BufferSize;++i)
{
- const ALsizei todo_hrir{HRIR_LENGTH - maxi(HeadOffset, Offset)};
- const ALsizei todo{mini(BufferSize-i, todo_hrir) + i};
- ASSUME(todo > i);
+ ALfloat g{oldGain + oldGainStep*stepcount};
+ ALfloat left{data[OldDelay[0]++] * g};
+ ALfloat right{data[OldDelay[1]++] * g};
+ ApplyCoeffs(i, AccumSamples+i, IrSize, OldCoeffs, left, right);
- for(;i < todo;++i)
- {
- hrtfstate->Values[HeadOffset][0] = 0.0f;
- hrtfstate->Values[HeadOffset][1] = 0.0f;
- ++HeadOffset;
-
- ALfloat g{oldGain + oldGainStep*stepcount};
- ALfloat left{data[OldDelay[0]++] * g};
- ALfloat right{data[OldDelay[1]++] * g};
- ApplyCoeffs(Offset, hrtfstate->Values, IrSize, OldCoeffs, left, right);
-
- g = newGainStep*stepcount;
- left = data[NewDelay[0]++] * g;
- right = data[NewDelay[1]++] * g;
- ApplyCoeffs(Offset, hrtfstate->Values, IrSize, NewCoeffs, left, right);
+ g = newGainStep*stepcount;
+ left = data[NewDelay[0]++] * g;
+ right = data[NewDelay[1]++] * g;
+ ApplyCoeffs(i, AccumSamples+i, IrSize, NewCoeffs, left, right);
- *(LeftOut++) += hrtfstate->Values[Offset][0];
- *(RightOut++) += hrtfstate->Values[Offset][1];
- ++Offset;
-
- stepcount += 1.0f;
- }
-
- HeadOffset &= HRIR_MASK;
- Offset &= HRIR_MASK;
+ stepcount += 1.0f;
}
+ for(ALsizei i{0};i < BufferSize;++i)
+ LeftOut[OutPos+i] += AccumSamples[i][0];
+ for(ALsizei i{0};i < BufferSize;++i)
+ RightOut[OutPos+i] += AccumSamples[i][1];
+
newparams->Gain = newGainStep*stepcount;
}
template<ApplyCoeffsT &ApplyCoeffs>
inline void MixDirectHrtfBase(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut,
- const ALfloat (*data)[BUFFERSIZE], DirectHrtfState *State, const ALsizei NumChans,
- const ALsizei BufferSize)
+ const ALfloat (*data)[BUFFERSIZE], float2 *RESTRICT AccumSamples, DirectHrtfState *State,
+ const ALsizei NumChans, const ALsizei BufferSize)
{
ASSUME(NumChans > 0);
ASSUME(BufferSize > 0);
@@ -147,32 +109,23 @@ inline void MixDirectHrtfBase(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT Right
{
const ALfloat (&input)[BUFFERSIZE] = data[c];
const auto &Coeffs = State->Chan[c].Coeffs;
- auto &Values = State->Chan[c].Values;
- ALsizei Offset{State->Offset&HRIR_MASK};
- ALsizei HeadOffset{(Offset+IrSize-1)&HRIR_MASK};
- for(ALsizei i{0};i < BufferSize;)
+ auto accum_iter = std::copy_n(State->Chan[c].Values.begin(),
+ State->Chan[c].Values.size(), AccumSamples);
+ std::fill_n(accum_iter, BufferSize, float2{});
+
+ for(ALsizei i{0};i < BufferSize;++i)
{
- const ALsizei todo_hrir{HRIR_LENGTH - maxi(HeadOffset, Offset)};
- const ALsizei todo{mini(BufferSize-i, todo_hrir) + i};
- ASSUME(todo > i);
-
- for(;i < todo;++i)
- {
- Values[HeadOffset][0] = 0.0f;
- Values[HeadOffset][1] = 0.0f;
- ++HeadOffset;
-
- const ALfloat insample{input[i]};
- ApplyCoeffs(Offset, Values, IrSize, Coeffs, insample, insample);
-
- LeftOut[i] += Values[Offset][0];
- RightOut[i] += Values[Offset][1];
- ++Offset;
- }
- HeadOffset &= HRIR_MASK;
- Offset &= HRIR_MASK;
+ const ALfloat insample{input[i]};
+ ApplyCoeffs(i, AccumSamples+i, IrSize, Coeffs, insample, insample);
}
+ for(ALsizei i{0};i < BufferSize;++i)
+ LeftOut[i] += AccumSamples[i][0];
+ for(ALsizei i{0};i < BufferSize;++i)
+ RightOut[i] += AccumSamples[i][1];
+
+ std::copy_n(AccumSamples + BufferSize, State->Chan[c].Values.size(),
+ State->Chan[c].Values.begin());
}
}
diff --git a/Alc/mixer/mixer_c.cpp b/Alc/mixer/mixer_c.cpp
index ba094999..1c22115d 100644
--- a/Alc/mixer/mixer_c.cpp
+++ b/Alc/mixer/mixer_c.cpp
@@ -103,54 +103,43 @@ const ALfloat *Resample_<BSincTag,CTag>(const InterpState *state, const ALfloat
{ return DoResample<do_bsinc>(state, src-state->bsinc.l, frac, increment, dst, dstlen); }
-static inline void ApplyCoeffs(ALsizei Offset, HrirArray<ALfloat> &Values, const ALsizei IrSize,
+static inline void ApplyCoeffs(ALsizei /*Offset*/, float2 *RESTRICT Values, const ALsizei IrSize,
const HrirArray<ALfloat> &Coeffs, const ALfloat left, const ALfloat right)
{
- ASSUME(Offset >= 0 && Offset < HRIR_LENGTH);
ASSUME(IrSize >= 2);
- ASSUME(&Values != &Coeffs);
-
- ALsizei count{mini(IrSize, HRIR_LENGTH - Offset)};
- ASSUME(count > 0);
- for(ALsizei c{0};;)
+ for(ALsizei c{0};c < IrSize;++c)
{
- for(;c < count;++c)
- {
- Values[Offset][0] += Coeffs[c][0] * left;
- Values[Offset][1] += Coeffs[c][1] * right;
- ++Offset;
- }
- if(c >= IrSize)
- break;
- Offset = 0;
- count = IrSize;
+ Values[c][0] += Coeffs[c][0] * left;
+ Values[c][1] += Coeffs[c][1] * right;
}
}
template<>
void MixHrtf_<CTag>(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, const ALfloat *data,
- ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize, MixHrtfParams *hrtfparams,
- HrtfState *hrtfstate, const ALsizei BufferSize)
+ float2 *RESTRICT AccumSamples, const ALsizei OutPos, const ALsizei IrSize,
+ MixHrtfParams *hrtfparams, const ALsizei BufferSize)
{
- MixHrtfBase<ApplyCoeffs>(LeftOut, RightOut, data, Offset, OutPos, IrSize, hrtfparams,
- hrtfstate, BufferSize);
+ MixHrtfBase<ApplyCoeffs>(LeftOut, RightOut, data, AccumSamples, OutPos, IrSize, hrtfparams,
+ BufferSize);
}
template<>
void MixHrtfBlend_<CTag>(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut,
- const ALfloat *data, ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize,
- const HrtfParams *oldparams, MixHrtfParams *newparams, HrtfState *hrtfstate,
- const ALsizei BufferSize)
+ const ALfloat *data, float2 *RESTRICT AccumSamples, const ALsizei OutPos, const ALsizei IrSize,
+ const HrtfParams *oldparams, MixHrtfParams *newparams, const ALsizei BufferSize)
{
- MixHrtfBlendBase<ApplyCoeffs>(LeftOut, RightOut, data, Offset, OutPos, IrSize, oldparams,
- newparams, hrtfstate, BufferSize);
+ MixHrtfBlendBase<ApplyCoeffs>(LeftOut, RightOut, data, AccumSamples, OutPos, IrSize, oldparams,
+ newparams, BufferSize);
}
template<>
void MixDirectHrtf_<CTag>(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut,
- const ALfloat (*data)[BUFFERSIZE], DirectHrtfState *State, const ALsizei NumChans,
- const ALsizei BufferSize)
-{ MixDirectHrtfBase<ApplyCoeffs>(LeftOut, RightOut, data, State, NumChans, BufferSize); }
+ const ALfloat (*data)[BUFFERSIZE], float2 *RESTRICT AccumSamples, DirectHrtfState *State,
+ const ALsizei NumChans, const ALsizei BufferSize)
+{
+ MixDirectHrtfBase<ApplyCoeffs>(LeftOut, RightOut, data, AccumSamples, State, NumChans,
+ BufferSize);
+}
template<>
diff --git a/Alc/mixer/mixer_neon.cpp b/Alc/mixer/mixer_neon.cpp
index 9e8324aa..cdd96296 100644
--- a/Alc/mixer/mixer_neon.cpp
+++ b/Alc/mixer/mixer_neon.cpp
@@ -136,11 +136,10 @@ const ALfloat *Resample_<BSincTag,NEONTag>(const InterpState *state, const ALflo
}
-static inline void ApplyCoeffs(ALsizei Offset, HrirArray<ALfloat> &Values, const ALsizei IrSize,
+static inline void ApplyCoeffs(ALsizei /*Offset*/, float2 *RESTRICT Values, const ALsizei IrSize,
const HrirArray<ALfloat> &Coeffs, const ALfloat left, const ALfloat right)
{
ASSUME(IrSize >= 2);
- ASSUME(&Values != &Coeffs);
float32x4_t leftright4;
{
@@ -152,43 +151,43 @@ static inline void ApplyCoeffs(ALsizei Offset, HrirArray<ALfloat> &Values, const
for(ALsizei c{0};c < IrSize;c += 2)
{
- const ALsizei o0 = (Offset+c)&HRIR_MASK;
- const ALsizei o1 = (o0+1)&HRIR_MASK;
- float32x4_t vals = vcombine_f32(vld1_f32((float32_t*)&Values[o0][0]),
- vld1_f32((float32_t*)&Values[o1][0]));
+ float32x4_t vals = vcombine_f32(vld1_f32((float32_t*)&Values[c ][0]),
+ vld1_f32((float32_t*)&Values[c+1][0]));
float32x4_t coefs = vld1q_f32((float32_t*)&Coeffs[c][0]);
vals = vmlaq_f32(vals, coefs, leftright4);
- vst1_f32((float32_t*)&Values[o0][0], vget_low_f32(vals));
- vst1_f32((float32_t*)&Values[o1][0], vget_high_f32(vals));
+ vst1_f32((float32_t*)&Values[c ][0], vget_low_f32(vals));
+ vst1_f32((float32_t*)&Values[c+1][0], vget_high_f32(vals));
}
}
template<>
void MixHrtf_<NEONTag>(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, const ALfloat *data,
- ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize, MixHrtfParams *hrtfparams,
- HrtfState *hrtfstate, const ALsizei BufferSize)
+ float2 *RESTRICT AccumSamples, const ALsizei OutPos, const ALsizei IrSize,
+ MixHrtfParams *hrtfparams, const ALsizei BufferSize)
{
- MixHrtfBase<ApplyCoeffs>(LeftOut, RightOut, data, Offset, OutPos, IrSize, hrtfparams,
- hrtfstate, BufferSize);
+ MixHrtfBase<ApplyCoeffs>(LeftOut, RightOut, data, AccumSamples, OutPos, IrSize, hrtfparams,
+ BufferSize);
}
template<>
void MixHrtfBlend_<NEONTag>(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut,
- const ALfloat *data, ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize,
- const HrtfParams *oldparams, MixHrtfParams *newparams, HrtfState *hrtfstate,
- const ALsizei BufferSize)
+ const ALfloat *data, float2 *RESTRICT AccumSamples, const ALsizei OutPos, const ALsizei IrSize,
+ const HrtfParams *oldparams, MixHrtfParams *newparams, const ALsizei BufferSize)
{
- MixHrtfBlendBase<ApplyCoeffs>(LeftOut, RightOut, data, Offset, OutPos, IrSize, oldparams,
- newparams, hrtfstate, BufferSize);
+ MixHrtfBlendBase<ApplyCoeffs>(LeftOut, RightOut, data, AccumSamples, OutPos, IrSize, oldparams,
+ newparams, BufferSize);
}
template<>
void MixDirectHrtf_<NEONTag>(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut,
- const ALfloat (*data)[BUFFERSIZE], DirectHrtfState *State, const ALsizei NumChans,
- const ALsizei BufferSize)
-{ MixDirectHrtfBase<ApplyCoeffs>(LeftOut, RightOut, data, State, NumChans, BufferSize); }
+ const ALfloat (*data)[BUFFERSIZE], float2 *RESTRICT AccumSamples, DirectHrtfState *State,
+ const ALsizei NumChans, const ALsizei BufferSize)
+{
+ MixDirectHrtfBase<ApplyCoeffs>(LeftOut, RightOut, data, AccumSamples, State, NumChans,
+ BufferSize);
+}
template<>
diff --git a/Alc/mixer/mixer_sse.cpp b/Alc/mixer/mixer_sse.cpp
index 69aeaa62..629fa428 100644
--- a/Alc/mixer/mixer_sse.cpp
+++ b/Alc/mixer/mixer_sse.cpp
@@ -76,97 +76,75 @@ const ALfloat *Resample_<BSincTag,SSETag>(const InterpState *state, const ALfloa
}
-static inline void ApplyCoeffs(ALsizei Offset, HrirArray<ALfloat> &Values, const ALsizei IrSize,
+static inline void ApplyCoeffs(ALsizei Offset, float2 *RESTRICT Values, const ALsizei IrSize,
const HrirArray<ALfloat> &Coeffs, const ALfloat left, const ALfloat right)
{
const __m128 lrlr{_mm_setr_ps(left, right, left, right)};
ASSUME(IrSize >= 2);
- ASSUME(&Values != &Coeffs);
- ASSUME(Offset >= 0 && Offset < HRIR_LENGTH);
if((Offset&1))
{
- ALsizei count{mini(IrSize-1, HRIR_LENGTH - Offset)};
- ASSUME(count >= 1);
-
__m128 imp0, imp1;
__m128 coeffs{_mm_load_ps(&Coeffs[0][0])};
- __m128 vals{_mm_loadl_pi(_mm_setzero_ps(), reinterpret_cast<__m64*>(&Values[Offset][0]))};
+ __m128 vals{_mm_loadl_pi(_mm_setzero_ps(), reinterpret_cast<__m64*>(&Values[0][0]))};
imp0 = _mm_mul_ps(lrlr, coeffs);
vals = _mm_add_ps(imp0, vals);
- _mm_storel_pi(reinterpret_cast<__m64*>(&Values[Offset][0]), vals);
- ++Offset;
- for(ALsizei i{1};;)
+ _mm_storel_pi(reinterpret_cast<__m64*>(&Values[0][0]), vals);
+ ALsizei i{1};
+ for(;i < IrSize-1;i += 2)
{
- for(;i < count;i += 2)
- {
- coeffs = _mm_load_ps(&Coeffs[i+1][0]);
- vals = _mm_load_ps(&Values[Offset][0]);
- imp1 = _mm_mul_ps(lrlr, coeffs);
- imp0 = _mm_shuffle_ps(imp0, imp1, _MM_SHUFFLE(1, 0, 3, 2));
- vals = _mm_add_ps(imp0, vals);
- _mm_store_ps(&Values[Offset][0], vals);
- imp0 = imp1;
- Offset += 2;
- }
- Offset &= HRIR_MASK;
- if(i >= IrSize-1)
- break;
- count = IrSize-1;
+ coeffs = _mm_load_ps(&Coeffs[i+1][0]);
+ vals = _mm_load_ps(&Values[i][0]);
+ imp1 = _mm_mul_ps(lrlr, coeffs);
+ imp0 = _mm_shuffle_ps(imp0, imp1, _MM_SHUFFLE(1, 0, 3, 2));
+ vals = _mm_add_ps(imp0, vals);
+ _mm_store_ps(&Values[i][0], vals);
+ imp0 = imp1;
}
- vals = _mm_loadl_pi(vals, reinterpret_cast<__m64*>(&Values[Offset][0]));
+ vals = _mm_loadl_pi(vals, reinterpret_cast<__m64*>(&Values[i][0]));
imp0 = _mm_movehl_ps(imp0, imp0);
vals = _mm_add_ps(imp0, vals);
- _mm_storel_pi(reinterpret_cast<__m64*>(&Values[Offset][0]), vals);
+ _mm_storel_pi(reinterpret_cast<__m64*>(&Values[i][0]), vals);
}
else
{
- ALsizei count{mini(IrSize, HRIR_LENGTH - Offset)};
- ASSUME(count >= 2);
-
- for(ALsizei i{0};;)
+ for(ALsizei i{0};i < IrSize;i += 2)
{
- for(;i < count;i += 2)
- {
- __m128 coeffs{_mm_load_ps(&Coeffs[i][0])};
- __m128 vals{_mm_load_ps(&Values[Offset][0])};
- vals = _mm_add_ps(vals, _mm_mul_ps(lrlr, coeffs));
- _mm_store_ps(&Values[Offset][0], vals);
- Offset += 2;
- }
- if(i >= IrSize)
- break;
- Offset = 0;
- count = IrSize;
+ __m128 coeffs{_mm_load_ps(&Coeffs[i][0])};
+ __m128 vals{_mm_load_ps(&Values[i][0])};
+ vals = _mm_add_ps(vals, _mm_mul_ps(lrlr, coeffs));
+ _mm_store_ps(&Values[i][0], vals);
}
}
}
template<>
void MixHrtf_<SSETag>(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, const ALfloat *data,
- ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize, MixHrtfParams *hrtfparams,
- HrtfState *hrtfstate, const ALsizei BufferSize)
+ float2 *RESTRICT AccumSamples, const ALsizei OutPos, const ALsizei IrSize,
+ MixHrtfParams *hrtfparams, const ALsizei BufferSize)
{
- MixHrtfBase<ApplyCoeffs>(LeftOut, RightOut, data, Offset, OutPos, IrSize, hrtfparams,
- hrtfstate, BufferSize);
+ MixHrtfBase<ApplyCoeffs>(LeftOut, RightOut, data, AccumSamples, OutPos, IrSize, hrtfparams,
+ BufferSize);
}
template<>
void MixHrtfBlend_<SSETag>(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut,
- const ALfloat *data, ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize,
- const HrtfParams *oldparams, MixHrtfParams *newparams, HrtfState *hrtfstate,
- const ALsizei BufferSize)
+ const ALfloat *data, float2 *RESTRICT AccumSamples, const ALsizei OutPos, const ALsizei IrSize,
+ const HrtfParams *oldparams, MixHrtfParams *newparams, const ALsizei BufferSize)
{
- MixHrtfBlendBase<ApplyCoeffs>(LeftOut, RightOut, data, Offset, OutPos, IrSize, oldparams,
- newparams, hrtfstate, BufferSize);
+ MixHrtfBlendBase<ApplyCoeffs>(LeftOut, RightOut, data, AccumSamples, OutPos, IrSize, oldparams,
+ newparams, BufferSize);
}
template<>
void MixDirectHrtf_<SSETag>(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut,
- const ALfloat (*data)[BUFFERSIZE], DirectHrtfState *State, const ALsizei NumChans,
- const ALsizei BufferSize)
-{ MixDirectHrtfBase<ApplyCoeffs>(LeftOut, RightOut, data, State, NumChans, BufferSize); }
+ const ALfloat (*data)[BUFFERSIZE], float2 *RESTRICT AccumSamples, DirectHrtfState *State,
+ const ALsizei NumChans, const ALsizei BufferSize)
+{
+ MixDirectHrtfBase<ApplyCoeffs>(LeftOut, RightOut, data, AccumSamples, State, NumChans,
+ BufferSize);
+}
template<>