From 5a9a1c8d7d97f589d978ff00859797e76b087a73 Mon Sep 17 00:00:00 2001 From: Chris Robinson Date: Mon, 31 Dec 2018 04:12:20 -0800 Subject: Further improve HRTF methods to avoid masking in the inner loops --- Alc/mixer/hrtf_inc.cpp | 208 +++++++++++++++++++++++++++++++----------------- Alc/mixer/mixer_c.cpp | 15 ++-- Alc/mixer/mixer_sse.cpp | 32 ++++---- OpenAL32/Include/alu.h | 8 +- 4 files changed, 163 insertions(+), 100 deletions(-) diff --git a/Alc/mixer/hrtf_inc.cpp b/Alc/mixer/hrtf_inc.cpp index e82bad85..caac7e54 100644 --- a/Alc/mixer/hrtf_inc.cpp +++ b/Alc/mixer/hrtf_inc.cpp @@ -14,114 +14,178 @@ static inline void ApplyCoeffs(ALsizei Offset, ALfloat (&Values)[HRIR_LENGTH][2] void MixHrtf(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, - const ALfloat *data, ALsizei Offset, ALsizei OutPos, + const ALfloat *data, ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize, MixHrtfParams *hrtfparams, HrtfState *hrtfstate, - ALsizei BufferSize) + const ALsizei BufferSize) { - const ALfloat (&Coeffs)[HRIR_LENGTH][2] = *hrtfparams->Coeffs; - const ALsizei Delay[2] = { hrtfparams->Delay[0], hrtfparams->Delay[1] }; - const ALfloat gainstep = hrtfparams->GainStep; - const ALfloat gain = hrtfparams->Gain; - ALfloat g, stepcount = 0.0f; - ALfloat left, right; - ALsizei i; - + ASSUME(OutPos >= 0); ASSUME(IrSize >= 4); ASSUME(BufferSize > 0); - LeftOut += OutPos; - RightOut += OutPos; - for(i = 0;i < BufferSize;i++) - { - hrtfstate->History[Offset&HRTF_HISTORY_MASK] = *(data++); - - g = gain + gainstep*stepcount; - left = hrtfstate->History[(Offset-Delay[0])&HRTF_HISTORY_MASK]*g; - right = hrtfstate->History[(Offset-Delay[1])&HRTF_HISTORY_MASK]*g; + const ALfloat (&Coeffs)[HRIR_LENGTH][2] = *hrtfparams->Coeffs; + const ALfloat gainstep{hrtfparams->GainStep}; + const ALfloat gain{hrtfparams->Gain}; + ALfloat stepcount{0.0f}; - hrtfstate->Values[(Offset+IrSize-1)&HRIR_MASK][0] = 0.0f; - hrtfstate->Values[(Offset+IrSize-1)&HRIR_MASK][1] = 0.0f; + ALsizei HistOffset{Offset&HRTF_HISTORY_MASK}; + ALsizei Delay[2]{ + (HistOffset-hrtfparams->Delay[0])&HRTF_HISTORY_MASK, + (HistOffset-hrtfparams->Delay[1])&HRTF_HISTORY_MASK }; - ApplyCoeffs(Offset, hrtfstate->Values, IrSize, Coeffs, left, right); - *(LeftOut++) += hrtfstate->Values[Offset&HRIR_MASK][0]; - *(RightOut++) += hrtfstate->Values[Offset&HRIR_MASK][1]; + Offset &= HRIR_MASK; + ALsizei HeadOffset{(Offset+IrSize-1)&HRIR_MASK}; - stepcount += 1.0f; - Offset++; + LeftOut += OutPos; + RightOut += OutPos; + for(ALsizei i{0};i < BufferSize;) + { + /* Calculate the number of samples we can do until one of the indices + * wraps on its buffer, or we reach the end. + */ + const ALsizei todo_hist{HRTF_HISTORY_LENGTH - maxi(maxi(HistOffset, Delay[0]), Delay[1])}; + const ALsizei todo_hrir{HRIR_LENGTH - maxi(HeadOffset, Offset)}; + const ALsizei todo{mini(BufferSize-i, mini(todo_hist, todo_hrir)) + i}; + ASSUME(todo > i); + + for(;i < todo;++i) + { + hrtfstate->Values[HeadOffset][0] = 0.0f; + hrtfstate->Values[HeadOffset][1] = 0.0f; + ++HeadOffset; + + hrtfstate->History[HistOffset++] = *(data++); + + const ALfloat g{gain + gainstep*stepcount}; + const ALfloat left{hrtfstate->History[Delay[0]++] * g}; + const ALfloat right{hrtfstate->History[Delay[1]++] * g}; + ApplyCoeffs(Offset, hrtfstate->Values, IrSize, Coeffs, left, right); + + *(LeftOut++) += hrtfstate->Values[Offset][0]; + *(RightOut++) += hrtfstate->Values[Offset][1]; + ++Offset; + + stepcount += 1.0f; + } + + HeadOffset &= HRIR_MASK; + HistOffset &= HRTF_HISTORY_MASK; + Delay[0] &= HRTF_HISTORY_MASK; + Delay[1] &= HRTF_HISTORY_MASK; + Offset &= HRIR_MASK; } hrtfparams->Gain = gain + gainstep*stepcount; } void MixHrtfBlend(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, - const ALfloat *data, ALsizei Offset, ALsizei OutPos, + const ALfloat *data, ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize, const HrtfParams *oldparams, MixHrtfParams *newparams, HrtfState *hrtfstate, - ALsizei BufferSize) + const ALsizei BufferSize) { const ALfloat (&OldCoeffs)[HRIR_LENGTH][2] = oldparams->Coeffs; - const ALsizei OldDelay[2] = { oldparams->Delay[0], oldparams->Delay[1] }; - const ALfloat oldGain = oldparams->Gain; - const ALfloat oldGainStep = -oldGain / (ALfloat)BufferSize; + const ALfloat oldGain{oldparams->Gain}; + const ALfloat oldGainStep{-oldGain / (ALfloat)BufferSize}; const ALfloat (&NewCoeffs)[HRIR_LENGTH][2] = *newparams->Coeffs; - const ALsizei NewDelay[2] = { newparams->Delay[0], newparams->Delay[1] }; - const ALfloat newGain = newparams->Gain; - const ALfloat newGainStep = newparams->GainStep; - ALfloat g, stepcount = 0.0f; - ALfloat left, right; - ALsizei i; + const ALfloat newGainStep{newparams->GainStep}; + ALfloat stepcount{0.0f}; + ASSUME(OutPos >= 0); ASSUME(IrSize >= 4); ASSUME(BufferSize > 0); + ALsizei HistOffset{Offset&HRTF_HISTORY_MASK}; + ALsizei OldDelay[2]{ + (HistOffset-oldparams->Delay[0])&HRTF_HISTORY_MASK, + (HistOffset-oldparams->Delay[1])&HRTF_HISTORY_MASK }; + ALsizei NewDelay[2]{ + (HistOffset-newparams->Delay[0])&HRTF_HISTORY_MASK, + (HistOffset-newparams->Delay[1])&HRTF_HISTORY_MASK }; + + Offset &= HRIR_MASK; + ALsizei HeadOffset{(Offset+IrSize-1)&HRIR_MASK}; + LeftOut += OutPos; RightOut += OutPos; - for(i = 0;i < BufferSize;i++) + for(ALsizei i{0};i < BufferSize;) { - hrtfstate->Values[(Offset+IrSize-1)&HRIR_MASK][0] = 0.0f; - hrtfstate->Values[(Offset+IrSize-1)&HRIR_MASK][1] = 0.0f; - - hrtfstate->History[Offset&HRTF_HISTORY_MASK] = *(data++); - - g = oldGain + oldGainStep*stepcount; - left = hrtfstate->History[(Offset-OldDelay[0])&HRTF_HISTORY_MASK]*g; - right = hrtfstate->History[(Offset-OldDelay[1])&HRTF_HISTORY_MASK]*g; - ApplyCoeffs(Offset, hrtfstate->Values, IrSize, OldCoeffs, left, right); - - g = newGain + newGainStep*stepcount; - left = hrtfstate->History[(Offset-NewDelay[0])&HRTF_HISTORY_MASK]*g; - right = hrtfstate->History[(Offset-NewDelay[1])&HRTF_HISTORY_MASK]*g; - ApplyCoeffs(Offset, hrtfstate->Values, IrSize, NewCoeffs, left, right); - - *(LeftOut++) += hrtfstate->Values[Offset&HRIR_MASK][0]; - *(RightOut++) += hrtfstate->Values[Offset&HRIR_MASK][1]; - - stepcount += 1.0f; - Offset++; + const ALsizei todo_hist{HRTF_HISTORY_LENGTH - + maxi(maxi(maxi(maxi(HistOffset, OldDelay[0]), OldDelay[1]), NewDelay[0]), NewDelay[1]) + }; + const ALsizei todo_hrir{HRIR_LENGTH - maxi(HeadOffset, Offset)}; + const ALsizei todo{mini(BufferSize-i, mini(todo_hist, todo_hrir)) + i}; + ASSUME(todo > i); + + for(;i < todo;++i) + { + hrtfstate->Values[HeadOffset][0] = 0.0f; + hrtfstate->Values[HeadOffset][1] = 0.0f; + ++HeadOffset; + + hrtfstate->History[HistOffset++] = *(data++); + + ALfloat g{oldGain + oldGainStep*stepcount}; + ALfloat left{hrtfstate->History[OldDelay[0]++] * g}; + ALfloat right{hrtfstate->History[OldDelay[1]++] * g}; + ApplyCoeffs(Offset, hrtfstate->Values, IrSize, OldCoeffs, left, right); + + g = newGainStep*stepcount; + left = hrtfstate->History[NewDelay[0]++] * g; + right = hrtfstate->History[NewDelay[1]++] * g; + ApplyCoeffs(Offset, hrtfstate->Values, IrSize, NewCoeffs, left, right); + + *(LeftOut++) += hrtfstate->Values[Offset][0]; + *(RightOut++) += hrtfstate->Values[Offset][1]; + ++Offset; + + stepcount += 1.0f; + } + + HeadOffset &= HRIR_MASK; + HistOffset &= HRTF_HISTORY_MASK; + OldDelay[0] &= HRTF_HISTORY_MASK; + OldDelay[1] &= HRTF_HISTORY_MASK; + NewDelay[0] &= HRTF_HISTORY_MASK; + NewDelay[1] &= HRTF_HISTORY_MASK; + Offset &= HRIR_MASK; } - newparams->Gain = newGain + newGainStep*stepcount; + newparams->Gain = newGainStep*stepcount; } void MixDirectHrtf(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, const ALfloat *data, DirectHrtfState *State, const ALsizei Chan, const ALsizei BufferSize) { - const ALsizei IrSize{State->IrSize}; - ALsizei Offset{State->Offset}; - ALfloat (&Values)[HRIR_LENGTH][2] = State->Chan[Chan].Values; + ASSUME(Chan >= 0); + ASSUME(BufferSize > 0); + const ALfloat (&Coeffs)[HRIR_LENGTH][2] = State->Chan[Chan].Coeffs; + ALfloat (&Values)[HRIR_LENGTH][2] = State->Chan[Chan].Values; + ALsizei Offset{State->Offset&HRIR_MASK}; + const ALsizei IrSize{State->IrSize}; ASSUME(IrSize >= 4); - ASSUME(BufferSize > 0); - for(ALsizei i{0};i < BufferSize;i++) + ALsizei HeadOffset{(Offset+IrSize-1)&HRIR_MASK}; + for(ALsizei i{0};i < BufferSize;) { - Values[(Offset+IrSize)&HRIR_MASK][0] = 0.0f; - Values[(Offset+IrSize)&HRIR_MASK][1] = 0.0f; - Offset++; - - const ALfloat insample{*(data++)}; - ApplyCoeffs(Offset, Values, IrSize, Coeffs, insample, insample); - *(LeftOut++) += Values[Offset&HRIR_MASK][0]; - *(RightOut++) += Values[Offset&HRIR_MASK][1]; + const ALsizei todo_hrir{HRIR_LENGTH - maxi(HeadOffset, Offset)}; + const ALsizei todo{mini(BufferSize-i, todo_hrir) + i}; + ASSUME(todo > i); + + for(;i < todo;++i) + { + Values[HeadOffset][0] = 0.0f; + Values[HeadOffset][1] = 0.0f; + ++HeadOffset; + + const ALfloat insample{*(data++)}; + ApplyCoeffs(Offset, Values, IrSize, Coeffs, insample, insample); + + *(LeftOut++) += Values[Offset][0]; + *(RightOut++) += Values[Offset][1]; + ++Offset; + } + HeadOffset &= HRIR_MASK; + Offset &= HRIR_MASK; } } diff --git a/Alc/mixer/mixer_c.cpp b/Alc/mixer/mixer_c.cpp index d98b8e2e..22d3642e 100644 --- a/Alc/mixer/mixer_c.cpp +++ b/Alc/mixer/mixer_c.cpp @@ -107,24 +107,23 @@ static inline void ApplyCoeffs(ALsizei Offset, ALfloat (&Values)[HRIR_LENGTH][2] const ALsizei IrSize, const ALfloat (&Coeffs)[HRIR_LENGTH][2], const ALfloat left, const ALfloat right) { - ALsizei off{Offset&HRIR_MASK}; - ALsizei count{mini(IrSize, HRIR_LENGTH - off)}; - + ASSUME(Offset >= 0 && Offset < HRIR_LENGTH); ASSUME(IrSize >= 2); ASSUME(&Values != &Coeffs); - ASSUME(count > 0); + ALsizei count{mini(IrSize, HRIR_LENGTH - Offset)}; + ASSUME(count > 0); for(ALsizei c{0};;) { for(;c < count;++c) { - Values[off][0] += Coeffs[c][0] * left; - Values[off][1] += Coeffs[c][1] * right; - ++off; + Values[Offset][0] += Coeffs[c][0] * left; + Values[Offset][1] += Coeffs[c][1] * right; + ++Offset; } if(c >= IrSize) break; - off = 0; + Offset = 0; count = IrSize; } } diff --git a/Alc/mixer/mixer_sse.cpp b/Alc/mixer/mixer_sse.cpp index 5d82e5ae..2637883b 100644 --- a/Alc/mixer/mixer_sse.cpp +++ b/Alc/mixer/mixer_sse.cpp @@ -90,45 +90,45 @@ static inline void ApplyCoeffs(ALsizei Offset, ALfloat (&Values)[HRIR_LENGTH][2] ASSUME(IrSize >= 2); ASSUME(&Values != &Coeffs); - ALsizei off{Offset&HRIR_MASK}; + ASSUME(Offset >= 0 && Offset < HRIR_LENGTH); if((Offset&1)) { - ALsizei count{mini(IrSize-1, HRIR_LENGTH - off)}; + ALsizei count{mini(IrSize-1, HRIR_LENGTH - Offset)}; ASSUME(count >= 1); __m128 imp0, imp1; coeffs = _mm_load_ps(&Coeffs[0][0]); - vals = _mm_loadl_pi(vals, (__m64*)&Values[off][0]); + vals = _mm_loadl_pi(vals, (__m64*)&Values[Offset][0]); imp0 = _mm_mul_ps(lrlr, coeffs); vals = _mm_add_ps(imp0, vals); - _mm_storel_pi((__m64*)&Values[off][0], vals); - ++off; + _mm_storel_pi((__m64*)&Values[Offset][0], vals); + ++Offset; for(ALsizei i{1};;) { for(;i < count;i += 2) { coeffs = _mm_load_ps(&Coeffs[i+1][0]); - vals = _mm_load_ps(&Values[off][0]); + vals = _mm_load_ps(&Values[Offset][0]); imp1 = _mm_mul_ps(lrlr, coeffs); imp0 = _mm_shuffle_ps(imp0, imp1, _MM_SHUFFLE(1, 0, 3, 2)); vals = _mm_add_ps(imp0, vals); - _mm_store_ps(&Values[off][0], vals); + _mm_store_ps(&Values[Offset][0], vals); imp0 = imp1; - off += 2; + Offset += 2; } - off &= HRIR_MASK; + Offset &= HRIR_MASK; if(i >= IrSize-1) break; count = IrSize-1; } - vals = _mm_loadl_pi(vals, (__m64*)&Values[off][0]); + vals = _mm_loadl_pi(vals, (__m64*)&Values[Offset][0]); imp0 = _mm_movehl_ps(imp0, imp0); vals = _mm_add_ps(imp0, vals); - _mm_storel_pi((__m64*)&Values[off][0], vals); + _mm_storel_pi((__m64*)&Values[Offset][0], vals); } else { - ALsizei count{mini(IrSize, HRIR_LENGTH - off)}; + ALsizei count{mini(IrSize, HRIR_LENGTH - Offset)}; ASSUME(count >= 2); for(ALsizei i{0};;) @@ -136,14 +136,14 @@ static inline void ApplyCoeffs(ALsizei Offset, ALfloat (&Values)[HRIR_LENGTH][2] for(;i < count;i += 2) { coeffs = _mm_load_ps(&Coeffs[i][0]); - vals = _mm_load_ps(&Values[off][0]); + vals = _mm_load_ps(&Values[Offset][0]); vals = _mm_add_ps(vals, _mm_mul_ps(lrlr, coeffs)); - _mm_store_ps(&Values[off][0], vals); - off += 2; + _mm_store_ps(&Values[Offset][0], vals); + Offset += 2; } if(i >= IrSize) break; - off = 0; + Offset = 0; count = IrSize; } } diff --git a/OpenAL32/Include/alu.h b/OpenAL32/Include/alu.h index 4fcc4c9c..08e6319e 100644 --- a/OpenAL32/Include/alu.h +++ b/OpenAL32/Include/alu.h @@ -280,12 +280,12 @@ using RowMixerFunc = void(*)(ALfloat *OutBuffer, const ALfloat *gains, const ALfloat (*RESTRICT data)[BUFFERSIZE], ALsizei InChans, ALsizei InPos, ALsizei BufferSize); using HrtfMixerFunc = void(*)(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, - const ALfloat *data, ALsizei Offset, ALsizei OutPos, const ALsizei IrSize, - MixHrtfParams *hrtfparams, HrtfState *hrtfstate, ALsizei BufferSize); + const ALfloat *data, ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize, + MixHrtfParams *hrtfparams, HrtfState *hrtfstate, const ALsizei BufferSize); using HrtfMixerBlendFunc = void(*)(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, - const ALfloat *data, ALsizei Offset, ALsizei OutPos, const ALsizei IrSize, + const ALfloat *data, ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize, const HrtfParams *oldparams, MixHrtfParams *newparams, HrtfState *hrtfstate, - ALsizei BufferSize); + const ALsizei BufferSize); using HrtfDirectMixerFunc = void(*)(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, const ALfloat *data, DirectHrtfState *State, const ALsizei Chan, const ALsizei BufferSize); -- cgit v1.2.3