diff options
author | Chris Robinson <[email protected]> | 2019-03-29 11:28:38 -0700 |
---|---|---|
committer | Chris Robinson <[email protected]> | 2019-03-29 11:33:04 -0700 |
commit | cc91490b6104b5304655c6e4367371ea929d20bb (patch) | |
tree | d536557a349bc9bbd85fcc215456d96ab70fc13b | |
parent | fe7918465ed203b4731140aaf13c00d2aa9b1041 (diff) |
Use a temporary buffer for HRTF filter accumulation
Similar to the history buffer, to avoid using the state buffer as a ring
buffer.
-rw-r--r-- | Alc/alu.cpp | 4 | ||||
-rw-r--r-- | Alc/hrtf.h | 2 | ||||
-rw-r--r-- | Alc/mixer/defs.h | 6 | ||||
-rw-r--r-- | Alc/mixer/hrtfbase.h | 145 | ||||
-rw-r--r-- | Alc/mixer/mixer_c.cpp | 47 | ||||
-rw-r--r-- | Alc/mixer/mixer_neon.cpp | 39 | ||||
-rw-r--r-- | Alc/mixer/mixer_sse.cpp | 88 | ||||
-rw-r--r-- | Alc/mixvoice.cpp | 42 | ||||
-rw-r--r-- | OpenAL32/Include/alMain.h | 1 | ||||
-rw-r--r-- | OpenAL32/Include/alu.h | 13 |
10 files changed, 170 insertions, 217 deletions
diff --git a/Alc/alu.cpp b/Alc/alu.cpp index 11ef209b..1939b7b0 100644 --- a/Alc/alu.cpp +++ b/Alc/alu.cpp @@ -134,8 +134,8 @@ void ProcessHrtf(ALCdevice *device, const ALsizei SamplesToDo) ALfloat *RightOut{device->RealOut.Buffer[ridx]}; DirectHrtfState *state{device->mHrtfState.get()}; - MixDirectHrtf(LeftOut, RightOut, device->Dry.Buffer, state, device->Dry.NumChannels, - SamplesToDo); + MixDirectHrtf(LeftOut, RightOut, device->Dry.Buffer, device->HrtfAccumData, state, + device->Dry.NumChannels, SamplesToDo); state->Offset += SamplesToDo; } @@ -60,6 +60,8 @@ struct EnumeratedHrtf { }; +using float2 = std::array<float,2>; + template<typename T> using HrirArray = std::array<std::array<T,2>,HRIR_LENGTH>; diff --git a/Alc/mixer/defs.h b/Alc/mixer/defs.h index 1aa6ba38..cd301833 100644 --- a/Alc/mixer/defs.h +++ b/Alc/mixer/defs.h @@ -35,11 +35,11 @@ template<typename InstTag> void MixRow_(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*data)[BUFFERSIZE], const ALsizei InChans, const ALsizei InPos, const ALsizei BufferSize); template<typename InstTag> -void MixHrtf_(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, const ALfloat *data, ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize, MixHrtfParams *hrtfparams, HrtfState *hrtfstate, const ALsizei BufferSize); +void MixHrtf_(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, const ALfloat *data, float2 *RESTRICT AccumSamples, const ALsizei OutPos, const ALsizei IrSize, MixHrtfParams *hrtfparams, const ALsizei BufferSize); template<typename InstTag> -void MixHrtfBlend_(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, const ALfloat *data, ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize, const HrtfParams *oldparams, MixHrtfParams *newparams, HrtfState *hrtfstate, const ALsizei BufferSize); +void MixHrtfBlend_(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, const ALfloat *data, float2 *RESTRICT AccumSamples, const ALsizei OutPos, const ALsizei IrSize, const HrtfParams *oldparams, MixHrtfParams *newparams, const ALsizei BufferSize); template<typename InstTag> -void MixDirectHrtf_(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, const ALfloat (*data)[BUFFERSIZE], DirectHrtfState *State, const ALsizei NumChans, const ALsizei BufferSize); +void MixDirectHrtf_(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, const ALfloat (*data)[BUFFERSIZE], float2 *RESTRICT AccumSamples, DirectHrtfState *State, const ALsizei NumChans, const ALsizei BufferSize); /* Vectorized resampler helpers */ inline void InitiatePositionArrays(ALsizei frac, ALint increment, ALsizei *RESTRICT frac_arr, ALsizei *RESTRICT pos_arr, ALsizei size) diff --git a/Alc/mixer/hrtfbase.h b/Alc/mixer/hrtfbase.h index 8ad4a99c..162d7289 100644 --- a/Alc/mixer/hrtfbase.h +++ b/Alc/mixer/hrtfbase.h @@ -1,18 +1,20 @@ #ifndef MIXER_HRTFBASE_H #define MIXER_HRTFBASE_H +#include <algorithm> + #include "alu.h" #include "../hrtf.h" #include "opthelpers.h" -using ApplyCoeffsT = void(ALsizei Offset, HrirArray<ALfloat> &Values, const ALsizei irSize, +using ApplyCoeffsT = void(ALsizei Offset, float2 *RESTRICT Values, const ALsizei irSize, const HrirArray<ALfloat> &Coeffs, const ALfloat left, const ALfloat right); template<ApplyCoeffsT &ApplyCoeffs> inline void MixHrtfBase(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, const ALfloat *data, - ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize, MixHrtfParams *hrtfparams, - HrtfState *hrtfstate, const ALsizei BufferSize) + float2 *RESTRICT AccumSamples, const ALsizei OutPos, const ALsizei IrSize, + MixHrtfParams *hrtfparams, const ALsizei BufferSize) { ASSUME(OutPos >= 0); ASSUME(IrSize >= 4); @@ -28,49 +30,27 @@ inline void MixHrtfBase(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, c HRTF_HISTORY_LENGTH - hrtfparams->Delay[1] }; ASSUME(Delay[0] >= 0 && Delay[1] >= 0); - Offset &= HRIR_MASK; - ALsizei HeadOffset{(Offset+IrSize-1)&HRIR_MASK}; - - LeftOut += OutPos; - RightOut += OutPos; - for(ALsizei i{0};i < BufferSize;) + for(ALsizei i{0};i < BufferSize;++i) { - /* Calculate the number of samples we can do until one of the indices - * wraps on its buffer, or we reach the end. - */ - const ALsizei todo_hrir{HRIR_LENGTH - maxi(HeadOffset, Offset)}; - const ALsizei todo{mini(BufferSize-i, todo_hrir) + i}; - ASSUME(todo > i); - - for(;i < todo;++i) - { - hrtfstate->Values[HeadOffset][0] = 0.0f; - hrtfstate->Values[HeadOffset][1] = 0.0f; - ++HeadOffset; - - const ALfloat g{gain + gainstep*stepcount}; - const ALfloat left{data[Delay[0]++] * g}; - const ALfloat right{data[Delay[1]++] * g}; - ApplyCoeffs(Offset, hrtfstate->Values, IrSize, Coeffs, left, right); - - *(LeftOut++) += hrtfstate->Values[Offset][0]; - *(RightOut++) += hrtfstate->Values[Offset][1]; - ++Offset; + const ALfloat g{gain + gainstep*stepcount}; + const ALfloat left{data[Delay[0]++] * g}; + const ALfloat right{data[Delay[1]++] * g}; + ApplyCoeffs(i, AccumSamples+i, IrSize, Coeffs, left, right); - stepcount += 1.0f; - } - - HeadOffset &= HRIR_MASK; - Offset &= HRIR_MASK; + stepcount += 1.0f; } + for(ALsizei i{0};i < BufferSize;++i) + LeftOut[OutPos+i] += AccumSamples[i][0]; + for(ALsizei i{0};i < BufferSize;++i) + RightOut[OutPos+i] += AccumSamples[i][1]; + hrtfparams->Gain = gain + gainstep*stepcount; } template<ApplyCoeffsT &ApplyCoeffs> inline void MixHrtfBlendBase(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, - const ALfloat *data, ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize, - const HrtfParams *oldparams, MixHrtfParams *newparams, HrtfState *hrtfstate, - const ALsizei BufferSize) + const ALfloat *data, float2 *RESTRICT AccumSamples, const ALsizei OutPos, const ALsizei IrSize, + const HrtfParams *oldparams, MixHrtfParams *newparams, const ALsizei BufferSize) { const auto &OldCoeffs = oldparams->Coeffs; const ALfloat oldGain{oldparams->Gain}; @@ -92,50 +72,32 @@ inline void MixHrtfBlendBase(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightO HRTF_HISTORY_LENGTH - newparams->Delay[1] }; ASSUME(NewDelay[0] >= 0 && NewDelay[1] >= 0); - Offset &= HRIR_MASK; - ALsizei HeadOffset{(Offset+IrSize-1)&HRIR_MASK}; - - LeftOut += OutPos; - RightOut += OutPos; - for(ALsizei i{0};i < BufferSize;) + for(ALsizei i{0};i < BufferSize;++i) { - const ALsizei todo_hrir{HRIR_LENGTH - maxi(HeadOffset, Offset)}; - const ALsizei todo{mini(BufferSize-i, todo_hrir) + i}; - ASSUME(todo > i); + ALfloat g{oldGain + oldGainStep*stepcount}; + ALfloat left{data[OldDelay[0]++] * g}; + ALfloat right{data[OldDelay[1]++] * g}; + ApplyCoeffs(i, AccumSamples+i, IrSize, OldCoeffs, left, right); - for(;i < todo;++i) - { - hrtfstate->Values[HeadOffset][0] = 0.0f; - hrtfstate->Values[HeadOffset][1] = 0.0f; - ++HeadOffset; - - ALfloat g{oldGain + oldGainStep*stepcount}; - ALfloat left{data[OldDelay[0]++] * g}; - ALfloat right{data[OldDelay[1]++] * g}; - ApplyCoeffs(Offset, hrtfstate->Values, IrSize, OldCoeffs, left, right); - - g = newGainStep*stepcount; - left = data[NewDelay[0]++] * g; - right = data[NewDelay[1]++] * g; - ApplyCoeffs(Offset, hrtfstate->Values, IrSize, NewCoeffs, left, right); + g = newGainStep*stepcount; + left = data[NewDelay[0]++] * g; + right = data[NewDelay[1]++] * g; + ApplyCoeffs(i, AccumSamples+i, IrSize, NewCoeffs, left, right); - *(LeftOut++) += hrtfstate->Values[Offset][0]; - *(RightOut++) += hrtfstate->Values[Offset][1]; - ++Offset; - - stepcount += 1.0f; - } - - HeadOffset &= HRIR_MASK; - Offset &= HRIR_MASK; + stepcount += 1.0f; } + for(ALsizei i{0};i < BufferSize;++i) + LeftOut[OutPos+i] += AccumSamples[i][0]; + for(ALsizei i{0};i < BufferSize;++i) + RightOut[OutPos+i] += AccumSamples[i][1]; + newparams->Gain = newGainStep*stepcount; } template<ApplyCoeffsT &ApplyCoeffs> inline void MixDirectHrtfBase(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, - const ALfloat (*data)[BUFFERSIZE], DirectHrtfState *State, const ALsizei NumChans, - const ALsizei BufferSize) + const ALfloat (*data)[BUFFERSIZE], float2 *RESTRICT AccumSamples, DirectHrtfState *State, + const ALsizei NumChans, const ALsizei BufferSize) { ASSUME(NumChans > 0); ASSUME(BufferSize > 0); @@ -147,32 +109,23 @@ inline void MixDirectHrtfBase(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT Right { const ALfloat (&input)[BUFFERSIZE] = data[c]; const auto &Coeffs = State->Chan[c].Coeffs; - auto &Values = State->Chan[c].Values; - ALsizei Offset{State->Offset&HRIR_MASK}; - ALsizei HeadOffset{(Offset+IrSize-1)&HRIR_MASK}; - for(ALsizei i{0};i < BufferSize;) + auto accum_iter = std::copy_n(State->Chan[c].Values.begin(), + State->Chan[c].Values.size(), AccumSamples); + std::fill_n(accum_iter, BufferSize, float2{}); + + for(ALsizei i{0};i < BufferSize;++i) { - const ALsizei todo_hrir{HRIR_LENGTH - maxi(HeadOffset, Offset)}; - const ALsizei todo{mini(BufferSize-i, todo_hrir) + i}; - ASSUME(todo > i); - - for(;i < todo;++i) - { - Values[HeadOffset][0] = 0.0f; - Values[HeadOffset][1] = 0.0f; - ++HeadOffset; - - const ALfloat insample{input[i]}; - ApplyCoeffs(Offset, Values, IrSize, Coeffs, insample, insample); - - LeftOut[i] += Values[Offset][0]; - RightOut[i] += Values[Offset][1]; - ++Offset; - } - HeadOffset &= HRIR_MASK; - Offset &= HRIR_MASK; + const ALfloat insample{input[i]}; + ApplyCoeffs(i, AccumSamples+i, IrSize, Coeffs, insample, insample); } + for(ALsizei i{0};i < BufferSize;++i) + LeftOut[i] += AccumSamples[i][0]; + for(ALsizei i{0};i < BufferSize;++i) + RightOut[i] += AccumSamples[i][1]; + + std::copy_n(AccumSamples + BufferSize, State->Chan[c].Values.size(), + State->Chan[c].Values.begin()); } } diff --git a/Alc/mixer/mixer_c.cpp b/Alc/mixer/mixer_c.cpp index ba094999..1c22115d 100644 --- a/Alc/mixer/mixer_c.cpp +++ b/Alc/mixer/mixer_c.cpp @@ -103,54 +103,43 @@ const ALfloat *Resample_<BSincTag,CTag>(const InterpState *state, const ALfloat { return DoResample<do_bsinc>(state, src-state->bsinc.l, frac, increment, dst, dstlen); } -static inline void ApplyCoeffs(ALsizei Offset, HrirArray<ALfloat> &Values, const ALsizei IrSize, +static inline void ApplyCoeffs(ALsizei /*Offset*/, float2 *RESTRICT Values, const ALsizei IrSize, const HrirArray<ALfloat> &Coeffs, const ALfloat left, const ALfloat right) { - ASSUME(Offset >= 0 && Offset < HRIR_LENGTH); ASSUME(IrSize >= 2); - ASSUME(&Values != &Coeffs); - - ALsizei count{mini(IrSize, HRIR_LENGTH - Offset)}; - ASSUME(count > 0); - for(ALsizei c{0};;) + for(ALsizei c{0};c < IrSize;++c) { - for(;c < count;++c) - { - Values[Offset][0] += Coeffs[c][0] * left; - Values[Offset][1] += Coeffs[c][1] * right; - ++Offset; - } - if(c >= IrSize) - break; - Offset = 0; - count = IrSize; + Values[c][0] += Coeffs[c][0] * left; + Values[c][1] += Coeffs[c][1] * right; } } template<> void MixHrtf_<CTag>(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, const ALfloat *data, - ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize, MixHrtfParams *hrtfparams, - HrtfState *hrtfstate, const ALsizei BufferSize) + float2 *RESTRICT AccumSamples, const ALsizei OutPos, const ALsizei IrSize, + MixHrtfParams *hrtfparams, const ALsizei BufferSize) { - MixHrtfBase<ApplyCoeffs>(LeftOut, RightOut, data, Offset, OutPos, IrSize, hrtfparams, - hrtfstate, BufferSize); + MixHrtfBase<ApplyCoeffs>(LeftOut, RightOut, data, AccumSamples, OutPos, IrSize, hrtfparams, + BufferSize); } template<> void MixHrtfBlend_<CTag>(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, - const ALfloat *data, ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize, - const HrtfParams *oldparams, MixHrtfParams *newparams, HrtfState *hrtfstate, - const ALsizei BufferSize) + const ALfloat *data, float2 *RESTRICT AccumSamples, const ALsizei OutPos, const ALsizei IrSize, + const HrtfParams *oldparams, MixHrtfParams *newparams, const ALsizei BufferSize) { - MixHrtfBlendBase<ApplyCoeffs>(LeftOut, RightOut, data, Offset, OutPos, IrSize, oldparams, - newparams, hrtfstate, BufferSize); + MixHrtfBlendBase<ApplyCoeffs>(LeftOut, RightOut, data, AccumSamples, OutPos, IrSize, oldparams, + newparams, BufferSize); } template<> void MixDirectHrtf_<CTag>(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, - const ALfloat (*data)[BUFFERSIZE], DirectHrtfState *State, const ALsizei NumChans, - const ALsizei BufferSize) -{ MixDirectHrtfBase<ApplyCoeffs>(LeftOut, RightOut, data, State, NumChans, BufferSize); } + const ALfloat (*data)[BUFFERSIZE], float2 *RESTRICT AccumSamples, DirectHrtfState *State, + const ALsizei NumChans, const ALsizei BufferSize) +{ + MixDirectHrtfBase<ApplyCoeffs>(LeftOut, RightOut, data, AccumSamples, State, NumChans, + BufferSize); +} template<> diff --git a/Alc/mixer/mixer_neon.cpp b/Alc/mixer/mixer_neon.cpp index 9e8324aa..cdd96296 100644 --- a/Alc/mixer/mixer_neon.cpp +++ b/Alc/mixer/mixer_neon.cpp @@ -136,11 +136,10 @@ const ALfloat *Resample_<BSincTag,NEONTag>(const InterpState *state, const ALflo } -static inline void ApplyCoeffs(ALsizei Offset, HrirArray<ALfloat> &Values, const ALsizei IrSize, +static inline void ApplyCoeffs(ALsizei /*Offset*/, float2 *RESTRICT Values, const ALsizei IrSize, const HrirArray<ALfloat> &Coeffs, const ALfloat left, const ALfloat right) { ASSUME(IrSize >= 2); - ASSUME(&Values != &Coeffs); float32x4_t leftright4; { @@ -152,43 +151,43 @@ static inline void ApplyCoeffs(ALsizei Offset, HrirArray<ALfloat> &Values, const for(ALsizei c{0};c < IrSize;c += 2) { - const ALsizei o0 = (Offset+c)&HRIR_MASK; - const ALsizei o1 = (o0+1)&HRIR_MASK; - float32x4_t vals = vcombine_f32(vld1_f32((float32_t*)&Values[o0][0]), - vld1_f32((float32_t*)&Values[o1][0])); + float32x4_t vals = vcombine_f32(vld1_f32((float32_t*)&Values[c ][0]), + vld1_f32((float32_t*)&Values[c+1][0])); float32x4_t coefs = vld1q_f32((float32_t*)&Coeffs[c][0]); vals = vmlaq_f32(vals, coefs, leftright4); - vst1_f32((float32_t*)&Values[o0][0], vget_low_f32(vals)); - vst1_f32((float32_t*)&Values[o1][0], vget_high_f32(vals)); + vst1_f32((float32_t*)&Values[c ][0], vget_low_f32(vals)); + vst1_f32((float32_t*)&Values[c+1][0], vget_high_f32(vals)); } } template<> void MixHrtf_<NEONTag>(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, const ALfloat *data, - ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize, MixHrtfParams *hrtfparams, - HrtfState *hrtfstate, const ALsizei BufferSize) + float2 *RESTRICT AccumSamples, const ALsizei OutPos, const ALsizei IrSize, + MixHrtfParams *hrtfparams, const ALsizei BufferSize) { - MixHrtfBase<ApplyCoeffs>(LeftOut, RightOut, data, Offset, OutPos, IrSize, hrtfparams, - hrtfstate, BufferSize); + MixHrtfBase<ApplyCoeffs>(LeftOut, RightOut, data, AccumSamples, OutPos, IrSize, hrtfparams, + BufferSize); } template<> void MixHrtfBlend_<NEONTag>(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, - const ALfloat *data, ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize, - const HrtfParams *oldparams, MixHrtfParams *newparams, HrtfState *hrtfstate, - const ALsizei BufferSize) + const ALfloat *data, float2 *RESTRICT AccumSamples, const ALsizei OutPos, const ALsizei IrSize, + const HrtfParams *oldparams, MixHrtfParams *newparams, const ALsizei BufferSize) { - MixHrtfBlendBase<ApplyCoeffs>(LeftOut, RightOut, data, Offset, OutPos, IrSize, oldparams, - newparams, hrtfstate, BufferSize); + MixHrtfBlendBase<ApplyCoeffs>(LeftOut, RightOut, data, AccumSamples, OutPos, IrSize, oldparams, + newparams, BufferSize); } template<> void MixDirectHrtf_<NEONTag>(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, - const ALfloat (*data)[BUFFERSIZE], DirectHrtfState *State, const ALsizei NumChans, - const ALsizei BufferSize) -{ MixDirectHrtfBase<ApplyCoeffs>(LeftOut, RightOut, data, State, NumChans, BufferSize); } + const ALfloat (*data)[BUFFERSIZE], float2 *RESTRICT AccumSamples, DirectHrtfState *State, + const ALsizei NumChans, const ALsizei BufferSize) +{ + MixDirectHrtfBase<ApplyCoeffs>(LeftOut, RightOut, data, AccumSamples, State, NumChans, + BufferSize); +} template<> diff --git a/Alc/mixer/mixer_sse.cpp b/Alc/mixer/mixer_sse.cpp index 69aeaa62..629fa428 100644 --- a/Alc/mixer/mixer_sse.cpp +++ b/Alc/mixer/mixer_sse.cpp @@ -76,97 +76,75 @@ const ALfloat *Resample_<BSincTag,SSETag>(const InterpState *state, const ALfloa } -static inline void ApplyCoeffs(ALsizei Offset, HrirArray<ALfloat> &Values, const ALsizei IrSize, +static inline void ApplyCoeffs(ALsizei Offset, float2 *RESTRICT Values, const ALsizei IrSize, const HrirArray<ALfloat> &Coeffs, const ALfloat left, const ALfloat right) { const __m128 lrlr{_mm_setr_ps(left, right, left, right)}; ASSUME(IrSize >= 2); - ASSUME(&Values != &Coeffs); - ASSUME(Offset >= 0 && Offset < HRIR_LENGTH); if((Offset&1)) { - ALsizei count{mini(IrSize-1, HRIR_LENGTH - Offset)}; - ASSUME(count >= 1); - __m128 imp0, imp1; __m128 coeffs{_mm_load_ps(&Coeffs[0][0])}; - __m128 vals{_mm_loadl_pi(_mm_setzero_ps(), reinterpret_cast<__m64*>(&Values[Offset][0]))}; + __m128 vals{_mm_loadl_pi(_mm_setzero_ps(), reinterpret_cast<__m64*>(&Values[0][0]))}; imp0 = _mm_mul_ps(lrlr, coeffs); vals = _mm_add_ps(imp0, vals); - _mm_storel_pi(reinterpret_cast<__m64*>(&Values[Offset][0]), vals); - ++Offset; - for(ALsizei i{1};;) + _mm_storel_pi(reinterpret_cast<__m64*>(&Values[0][0]), vals); + ALsizei i{1}; + for(;i < IrSize-1;i += 2) { - for(;i < count;i += 2) - { - coeffs = _mm_load_ps(&Coeffs[i+1][0]); - vals = _mm_load_ps(&Values[Offset][0]); - imp1 = _mm_mul_ps(lrlr, coeffs); - imp0 = _mm_shuffle_ps(imp0, imp1, _MM_SHUFFLE(1, 0, 3, 2)); - vals = _mm_add_ps(imp0, vals); - _mm_store_ps(&Values[Offset][0], vals); - imp0 = imp1; - Offset += 2; - } - Offset &= HRIR_MASK; - if(i >= IrSize-1) - break; - count = IrSize-1; + coeffs = _mm_load_ps(&Coeffs[i+1][0]); + vals = _mm_load_ps(&Values[i][0]); + imp1 = _mm_mul_ps(lrlr, coeffs); + imp0 = _mm_shuffle_ps(imp0, imp1, _MM_SHUFFLE(1, 0, 3, 2)); + vals = _mm_add_ps(imp0, vals); + _mm_store_ps(&Values[i][0], vals); + imp0 = imp1; } - vals = _mm_loadl_pi(vals, reinterpret_cast<__m64*>(&Values[Offset][0])); + vals = _mm_loadl_pi(vals, reinterpret_cast<__m64*>(&Values[i][0])); imp0 = _mm_movehl_ps(imp0, imp0); vals = _mm_add_ps(imp0, vals); - _mm_storel_pi(reinterpret_cast<__m64*>(&Values[Offset][0]), vals); + _mm_storel_pi(reinterpret_cast<__m64*>(&Values[i][0]), vals); } else { - ALsizei count{mini(IrSize, HRIR_LENGTH - Offset)}; - ASSUME(count >= 2); - - for(ALsizei i{0};;) + for(ALsizei i{0};i < IrSize;i += 2) { - for(;i < count;i += 2) - { - __m128 coeffs{_mm_load_ps(&Coeffs[i][0])}; - __m128 vals{_mm_load_ps(&Values[Offset][0])}; - vals = _mm_add_ps(vals, _mm_mul_ps(lrlr, coeffs)); - _mm_store_ps(&Values[Offset][0], vals); - Offset += 2; - } - if(i >= IrSize) - break; - Offset = 0; - count = IrSize; + __m128 coeffs{_mm_load_ps(&Coeffs[i][0])}; + __m128 vals{_mm_load_ps(&Values[i][0])}; + vals = _mm_add_ps(vals, _mm_mul_ps(lrlr, coeffs)); + _mm_store_ps(&Values[i][0], vals); } } } template<> void MixHrtf_<SSETag>(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, const ALfloat *data, - ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize, MixHrtfParams *hrtfparams, - HrtfState *hrtfstate, const ALsizei BufferSize) + float2 *RESTRICT AccumSamples, const ALsizei OutPos, const ALsizei IrSize, + MixHrtfParams *hrtfparams, const ALsizei BufferSize) { - MixHrtfBase<ApplyCoeffs>(LeftOut, RightOut, data, Offset, OutPos, IrSize, hrtfparams, - hrtfstate, BufferSize); + MixHrtfBase<ApplyCoeffs>(LeftOut, RightOut, data, AccumSamples, OutPos, IrSize, hrtfparams, + BufferSize); } template<> void MixHrtfBlend_<SSETag>(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, - const ALfloat *data, ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize, - const HrtfParams *oldparams, MixHrtfParams *newparams, HrtfState *hrtfstate, - const ALsizei BufferSize) + const ALfloat *data, float2 *RESTRICT AccumSamples, const ALsizei OutPos, const ALsizei IrSize, + const HrtfParams *oldparams, MixHrtfParams *newparams, const ALsizei BufferSize) { - MixHrtfBlendBase<ApplyCoeffs>(LeftOut, RightOut, data, Offset, OutPos, IrSize, oldparams, - newparams, hrtfstate, BufferSize); + MixHrtfBlendBase<ApplyCoeffs>(LeftOut, RightOut, data, AccumSamples, OutPos, IrSize, oldparams, + newparams, BufferSize); } template<> void MixDirectHrtf_<SSETag>(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, - const ALfloat (*data)[BUFFERSIZE], DirectHrtfState *State, const ALsizei NumChans, - const ALsizei BufferSize) -{ MixDirectHrtfBase<ApplyCoeffs>(LeftOut, RightOut, data, State, NumChans, BufferSize); } + const ALfloat (*data)[BUFFERSIZE], float2 *RESTRICT AccumSamples, DirectHrtfState *State, + const ALsizei NumChans, const ALsizei BufferSize) +{ + MixDirectHrtfBase<ApplyCoeffs>(LeftOut, RightOut, data, AccumSamples, State, NumChans, + BufferSize); +} template<> diff --git a/Alc/mixvoice.cpp b/Alc/mixvoice.cpp index 558586d3..cc95d0fa 100644 --- a/Alc/mixvoice.cpp +++ b/Alc/mixvoice.cpp @@ -625,6 +625,7 @@ void MixVoice(ALvoice *voice, ALvoice::State vstate, const ALuint SourceID, ALCc if((voice->mFlags&VOICE_HAS_HRTF)) { auto &HrtfSamples = Device->HrtfSourceData; + auto &AccumSamples = Device->HrtfAccumData; const ALfloat TargetGain{UNLIKELY(vstate == ALvoice::Stopping) ? 0.0f : parms.Hrtf.Target.Gain}; ALsizei fademix{0}; @@ -641,12 +642,24 @@ void MixVoice(ALvoice *voice, ALvoice::State vstate, const ALuint SourceID, ALCc std::copy_n(std::begin(HrtfSamples) + DstBufferSize, parms.Hrtf.State.History.size(), parms.Hrtf.State.History.begin()); + /* Copy the current filtered values being accumulated into + * the temp buffer. + */ + auto accum_iter = std::copy_n(parms.Hrtf.State.Values.begin(), + parms.Hrtf.State.Values.size(), std::begin(AccumSamples)); + /* If fading, the old gain is not silence, and this is the * first mixing pass, fade between the IRs. */ if(Counter && (parms.Hrtf.Old.Gain > GAIN_SILENCE_THRESHOLD) && OutPos == 0) { fademix = mini(DstBufferSize, 128); + + /* Clear the accumulation buffer that will start + * getting filled in. + */ + std::fill_n(accum_iter, fademix, float2{}); + ALfloat gain{TargetGain}; /* The new coefficients need to fade in completely @@ -670,19 +683,32 @@ void MixVoice(ALvoice *voice, ALvoice::State vstate, const ALuint SourceID, ALCc MixHrtfBlendSamples( voice->mDirect.Buffer[OutLIdx], voice->mDirect.Buffer[OutRIdx], - HrtfSamples, voice->mOffset, OutPos, IrSize, &parms.Hrtf.Old, - &hrtfparams, &parms.Hrtf.State, fademix); + HrtfSamples, AccumSamples, OutPos, IrSize, &parms.Hrtf.Old, + &hrtfparams, fademix); /* Update the old parameters with the result. */ parms.Hrtf.Old = parms.Hrtf.Target; if(fademix < Counter) parms.Hrtf.Old.Gain = hrtfparams.Gain; else parms.Hrtf.Old.Gain = TargetGain; + + /* Copy the new in-progress accumulation values to the + * front of the temp buffer for the following mix. + */ + accum_iter = std::copy(std::begin(AccumSamples) + fademix, + std::begin(AccumSamples) + fademix + HRIR_LENGTH, + std::begin(AccumSamples)); } - if(fademix < DstBufferSize) + if(LIKELY(fademix < DstBufferSize)) { const ALsizei todo{DstBufferSize - fademix}; + + /* Clear the accumulation buffer that will start + * getting filled in. + */ + std::fill_n(accum_iter, todo, float2{}); + ALfloat gain{TargetGain}; /* Interpolate the target gain if the gain fading lasts @@ -704,8 +730,8 @@ void MixVoice(ALvoice *voice, ALvoice::State vstate, const ALuint SourceID, ALCc static_cast<ALfloat>(todo); MixHrtfSamples( voice->mDirect.Buffer[OutLIdx], voice->mDirect.Buffer[OutRIdx], - HrtfSamples+fademix, voice->mOffset+fademix, OutPos+fademix, IrSize, - &hrtfparams, &parms.Hrtf.State, todo); + HrtfSamples+fademix, AccumSamples, OutPos+fademix, IrSize, &hrtfparams, + todo); /* Store the interpolated gain or the final target gain * depending if the fade is done. */ @@ -714,6 +740,12 @@ void MixVoice(ALvoice *voice, ALvoice::State vstate, const ALuint SourceID, ALCc else parms.Hrtf.Old.Gain = TargetGain; } + + /* Copy the new in-progress accumulation values back for + * the next mix. + */ + std::copy_n(std::begin(AccumSamples) + DstBufferSize, + parms.Hrtf.State.Values.size(), parms.Hrtf.State.Values.begin()); } else if((voice->mFlags&VOICE_HAS_NFC)) { diff --git a/OpenAL32/Include/alMain.h b/OpenAL32/Include/alMain.h index 3226e582..daa3a09a 100644 --- a/OpenAL32/Include/alMain.h +++ b/OpenAL32/Include/alMain.h @@ -407,6 +407,7 @@ struct ALCdevice { alignas(16) ALfloat HrtfSourceData[BUFFERSIZE + HRTF_HISTORY_LENGTH]; alignas(16) ALfloat NfcSampleData[BUFFERSIZE]; }; + alignas(16) float2 HrtfAccumData[BUFFERSIZE + HRIR_LENGTH]; /* Mixing buffer used by the Dry mix and Real output. */ al::vector<std::array<ALfloat,BUFFERSIZE>, 16> MixBuffer; diff --git a/OpenAL32/Include/alu.h b/OpenAL32/Include/alu.h index 6cdd7d9f..fb0d7d37 100644 --- a/OpenAL32/Include/alu.h +++ b/OpenAL32/Include/alu.h @@ -298,15 +298,14 @@ using RowMixerFunc = void(*)(ALfloat *OutBuffer, const ALfloat *gains, const ALfloat (*data)[BUFFERSIZE], const ALsizei InChans, const ALsizei InPos, const ALsizei BufferSize); using HrtfMixerFunc = void(*)(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, - const ALfloat *data, ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize, - MixHrtfParams *hrtfparams, HrtfState *hrtfstate, const ALsizei BufferSize); + const ALfloat *data, float2 *RESTRICT AccumSamples, const ALsizei OutPos, const ALsizei IrSize, + MixHrtfParams *hrtfparams, const ALsizei BufferSize); using HrtfMixerBlendFunc = void(*)(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, - const ALfloat *data, ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize, - const HrtfParams *oldparams, MixHrtfParams *newparams, HrtfState *hrtfstate, - const ALsizei BufferSize); + const ALfloat *data, float2 *RESTRICT AccumSamples, const ALsizei OutPos, const ALsizei IrSize, + const HrtfParams *oldparams, MixHrtfParams *newparams, const ALsizei BufferSize); using HrtfDirectMixerFunc = void(*)(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, - const ALfloat (*data)[BUFFERSIZE], DirectHrtfState *State, const ALsizei NumChans, - const ALsizei BufferSize); + const ALfloat (*data)[BUFFERSIZE], float2 *RESTRICT AccumSamples, DirectHrtfState *State, + const ALsizei NumChans, const ALsizei BufferSize); #define GAIN_MIX_MAX (1000.0f) /* +60dB */ |