aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Robinson <[email protected]>2018-12-31 04:12:20 -0800
committerChris Robinson <[email protected]>2018-12-31 04:12:20 -0800
commit5a9a1c8d7d97f589d978ff00859797e76b087a73 (patch)
tree3130e5c1f0c8d6e5257847c75ad3ddec4ddd3273
parent5e03941701c112083b5dce14257fd8c51262f04f (diff)
Further improve HRTF methods to avoid masking in the inner loops
-rw-r--r--Alc/mixer/hrtf_inc.cpp208
-rw-r--r--Alc/mixer/mixer_c.cpp15
-rw-r--r--Alc/mixer/mixer_sse.cpp32
-rw-r--r--OpenAL32/Include/alu.h8
4 files changed, 163 insertions, 100 deletions
diff --git a/Alc/mixer/hrtf_inc.cpp b/Alc/mixer/hrtf_inc.cpp
index e82bad85..caac7e54 100644
--- a/Alc/mixer/hrtf_inc.cpp
+++ b/Alc/mixer/hrtf_inc.cpp
@@ -14,114 +14,178 @@ static inline void ApplyCoeffs(ALsizei Offset, ALfloat (&Values)[HRIR_LENGTH][2]
void MixHrtf(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut,
- const ALfloat *data, ALsizei Offset, ALsizei OutPos,
+ const ALfloat *data, ALsizei Offset, const ALsizei OutPos,
const ALsizei IrSize, MixHrtfParams *hrtfparams, HrtfState *hrtfstate,
- ALsizei BufferSize)
+ const ALsizei BufferSize)
{
- const ALfloat (&Coeffs)[HRIR_LENGTH][2] = *hrtfparams->Coeffs;
- const ALsizei Delay[2] = { hrtfparams->Delay[0], hrtfparams->Delay[1] };
- const ALfloat gainstep = hrtfparams->GainStep;
- const ALfloat gain = hrtfparams->Gain;
- ALfloat g, stepcount = 0.0f;
- ALfloat left, right;
- ALsizei i;
-
+ ASSUME(OutPos >= 0);
ASSUME(IrSize >= 4);
ASSUME(BufferSize > 0);
- LeftOut += OutPos;
- RightOut += OutPos;
- for(i = 0;i < BufferSize;i++)
- {
- hrtfstate->History[Offset&HRTF_HISTORY_MASK] = *(data++);
-
- g = gain + gainstep*stepcount;
- left = hrtfstate->History[(Offset-Delay[0])&HRTF_HISTORY_MASK]*g;
- right = hrtfstate->History[(Offset-Delay[1])&HRTF_HISTORY_MASK]*g;
+ const ALfloat (&Coeffs)[HRIR_LENGTH][2] = *hrtfparams->Coeffs;
+ const ALfloat gainstep{hrtfparams->GainStep};
+ const ALfloat gain{hrtfparams->Gain};
+ ALfloat stepcount{0.0f};
- hrtfstate->Values[(Offset+IrSize-1)&HRIR_MASK][0] = 0.0f;
- hrtfstate->Values[(Offset+IrSize-1)&HRIR_MASK][1] = 0.0f;
+ ALsizei HistOffset{Offset&HRTF_HISTORY_MASK};
+ ALsizei Delay[2]{
+ (HistOffset-hrtfparams->Delay[0])&HRTF_HISTORY_MASK,
+ (HistOffset-hrtfparams->Delay[1])&HRTF_HISTORY_MASK };
- ApplyCoeffs(Offset, hrtfstate->Values, IrSize, Coeffs, left, right);
- *(LeftOut++) += hrtfstate->Values[Offset&HRIR_MASK][0];
- *(RightOut++) += hrtfstate->Values[Offset&HRIR_MASK][1];
+ Offset &= HRIR_MASK;
+ ALsizei HeadOffset{(Offset+IrSize-1)&HRIR_MASK};
- stepcount += 1.0f;
- Offset++;
+ LeftOut += OutPos;
+ RightOut += OutPos;
+ for(ALsizei i{0};i < BufferSize;)
+ {
+ /* Calculate the number of samples we can do until one of the indices
+ * wraps on its buffer, or we reach the end.
+ */
+ const ALsizei todo_hist{HRTF_HISTORY_LENGTH - maxi(maxi(HistOffset, Delay[0]), Delay[1])};
+ const ALsizei todo_hrir{HRIR_LENGTH - maxi(HeadOffset, Offset)};
+ const ALsizei todo{mini(BufferSize-i, mini(todo_hist, todo_hrir)) + i};
+ ASSUME(todo > i);
+
+ for(;i < todo;++i)
+ {
+ hrtfstate->Values[HeadOffset][0] = 0.0f;
+ hrtfstate->Values[HeadOffset][1] = 0.0f;
+ ++HeadOffset;
+
+ hrtfstate->History[HistOffset++] = *(data++);
+
+ const ALfloat g{gain + gainstep*stepcount};
+ const ALfloat left{hrtfstate->History[Delay[0]++] * g};
+ const ALfloat right{hrtfstate->History[Delay[1]++] * g};
+ ApplyCoeffs(Offset, hrtfstate->Values, IrSize, Coeffs, left, right);
+
+ *(LeftOut++) += hrtfstate->Values[Offset][0];
+ *(RightOut++) += hrtfstate->Values[Offset][1];
+ ++Offset;
+
+ stepcount += 1.0f;
+ }
+
+ HeadOffset &= HRIR_MASK;
+ HistOffset &= HRTF_HISTORY_MASK;
+ Delay[0] &= HRTF_HISTORY_MASK;
+ Delay[1] &= HRTF_HISTORY_MASK;
+ Offset &= HRIR_MASK;
}
hrtfparams->Gain = gain + gainstep*stepcount;
}
void MixHrtfBlend(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut,
- const ALfloat *data, ALsizei Offset, ALsizei OutPos,
+ const ALfloat *data, ALsizei Offset, const ALsizei OutPos,
const ALsizei IrSize, const HrtfParams *oldparams,
MixHrtfParams *newparams, HrtfState *hrtfstate,
- ALsizei BufferSize)
+ const ALsizei BufferSize)
{
const ALfloat (&OldCoeffs)[HRIR_LENGTH][2] = oldparams->Coeffs;
- const ALsizei OldDelay[2] = { oldparams->Delay[0], oldparams->Delay[1] };
- const ALfloat oldGain = oldparams->Gain;
- const ALfloat oldGainStep = -oldGain / (ALfloat)BufferSize;
+ const ALfloat oldGain{oldparams->Gain};
+ const ALfloat oldGainStep{-oldGain / (ALfloat)BufferSize};
const ALfloat (&NewCoeffs)[HRIR_LENGTH][2] = *newparams->Coeffs;
- const ALsizei NewDelay[2] = { newparams->Delay[0], newparams->Delay[1] };
- const ALfloat newGain = newparams->Gain;
- const ALfloat newGainStep = newparams->GainStep;
- ALfloat g, stepcount = 0.0f;
- ALfloat left, right;
- ALsizei i;
+ const ALfloat newGainStep{newparams->GainStep};
+ ALfloat stepcount{0.0f};
+ ASSUME(OutPos >= 0);
ASSUME(IrSize >= 4);
ASSUME(BufferSize > 0);
+ ALsizei HistOffset{Offset&HRTF_HISTORY_MASK};
+ ALsizei OldDelay[2]{
+ (HistOffset-oldparams->Delay[0])&HRTF_HISTORY_MASK,
+ (HistOffset-oldparams->Delay[1])&HRTF_HISTORY_MASK };
+ ALsizei NewDelay[2]{
+ (HistOffset-newparams->Delay[0])&HRTF_HISTORY_MASK,
+ (HistOffset-newparams->Delay[1])&HRTF_HISTORY_MASK };
+
+ Offset &= HRIR_MASK;
+ ALsizei HeadOffset{(Offset+IrSize-1)&HRIR_MASK};
+
LeftOut += OutPos;
RightOut += OutPos;
- for(i = 0;i < BufferSize;i++)
+ for(ALsizei i{0};i < BufferSize;)
{
- hrtfstate->Values[(Offset+IrSize-1)&HRIR_MASK][0] = 0.0f;
- hrtfstate->Values[(Offset+IrSize-1)&HRIR_MASK][1] = 0.0f;
-
- hrtfstate->History[Offset&HRTF_HISTORY_MASK] = *(data++);
-
- g = oldGain + oldGainStep*stepcount;
- left = hrtfstate->History[(Offset-OldDelay[0])&HRTF_HISTORY_MASK]*g;
- right = hrtfstate->History[(Offset-OldDelay[1])&HRTF_HISTORY_MASK]*g;
- ApplyCoeffs(Offset, hrtfstate->Values, IrSize, OldCoeffs, left, right);
-
- g = newGain + newGainStep*stepcount;
- left = hrtfstate->History[(Offset-NewDelay[0])&HRTF_HISTORY_MASK]*g;
- right = hrtfstate->History[(Offset-NewDelay[1])&HRTF_HISTORY_MASK]*g;
- ApplyCoeffs(Offset, hrtfstate->Values, IrSize, NewCoeffs, left, right);
-
- *(LeftOut++) += hrtfstate->Values[Offset&HRIR_MASK][0];
- *(RightOut++) += hrtfstate->Values[Offset&HRIR_MASK][1];
-
- stepcount += 1.0f;
- Offset++;
+ const ALsizei todo_hist{HRTF_HISTORY_LENGTH -
+ maxi(maxi(maxi(maxi(HistOffset, OldDelay[0]), OldDelay[1]), NewDelay[0]), NewDelay[1])
+ };
+ const ALsizei todo_hrir{HRIR_LENGTH - maxi(HeadOffset, Offset)};
+ const ALsizei todo{mini(BufferSize-i, mini(todo_hist, todo_hrir)) + i};
+ ASSUME(todo > i);
+
+ for(;i < todo;++i)
+ {
+ hrtfstate->Values[HeadOffset][0] = 0.0f;
+ hrtfstate->Values[HeadOffset][1] = 0.0f;
+ ++HeadOffset;
+
+ hrtfstate->History[HistOffset++] = *(data++);
+
+ ALfloat g{oldGain + oldGainStep*stepcount};
+ ALfloat left{hrtfstate->History[OldDelay[0]++] * g};
+ ALfloat right{hrtfstate->History[OldDelay[1]++] * g};
+ ApplyCoeffs(Offset, hrtfstate->Values, IrSize, OldCoeffs, left, right);
+
+ g = newGainStep*stepcount;
+ left = hrtfstate->History[NewDelay[0]++] * g;
+ right = hrtfstate->History[NewDelay[1]++] * g;
+ ApplyCoeffs(Offset, hrtfstate->Values, IrSize, NewCoeffs, left, right);
+
+ *(LeftOut++) += hrtfstate->Values[Offset][0];
+ *(RightOut++) += hrtfstate->Values[Offset][1];
+ ++Offset;
+
+ stepcount += 1.0f;
+ }
+
+ HeadOffset &= HRIR_MASK;
+ HistOffset &= HRTF_HISTORY_MASK;
+ OldDelay[0] &= HRTF_HISTORY_MASK;
+ OldDelay[1] &= HRTF_HISTORY_MASK;
+ NewDelay[0] &= HRTF_HISTORY_MASK;
+ NewDelay[1] &= HRTF_HISTORY_MASK;
+ Offset &= HRIR_MASK;
}
- newparams->Gain = newGain + newGainStep*stepcount;
+ newparams->Gain = newGainStep*stepcount;
}
void MixDirectHrtf(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut,
const ALfloat *data, DirectHrtfState *State, const ALsizei Chan,
const ALsizei BufferSize)
{
- const ALsizei IrSize{State->IrSize};
- ALsizei Offset{State->Offset};
- ALfloat (&Values)[HRIR_LENGTH][2] = State->Chan[Chan].Values;
+ ASSUME(Chan >= 0);
+ ASSUME(BufferSize > 0);
+
const ALfloat (&Coeffs)[HRIR_LENGTH][2] = State->Chan[Chan].Coeffs;
+ ALfloat (&Values)[HRIR_LENGTH][2] = State->Chan[Chan].Values;
+ ALsizei Offset{State->Offset&HRIR_MASK};
+ const ALsizei IrSize{State->IrSize};
ASSUME(IrSize >= 4);
- ASSUME(BufferSize > 0);
- for(ALsizei i{0};i < BufferSize;i++)
+ ALsizei HeadOffset{(Offset+IrSize-1)&HRIR_MASK};
+ for(ALsizei i{0};i < BufferSize;)
{
- Values[(Offset+IrSize)&HRIR_MASK][0] = 0.0f;
- Values[(Offset+IrSize)&HRIR_MASK][1] = 0.0f;
- Offset++;
-
- const ALfloat insample{*(data++)};
- ApplyCoeffs(Offset, Values, IrSize, Coeffs, insample, insample);
- *(LeftOut++) += Values[Offset&HRIR_MASK][0];
- *(RightOut++) += Values[Offset&HRIR_MASK][1];
+ const ALsizei todo_hrir{HRIR_LENGTH - maxi(HeadOffset, Offset)};
+ const ALsizei todo{mini(BufferSize-i, todo_hrir) + i};
+ ASSUME(todo > i);
+
+ for(;i < todo;++i)
+ {
+ Values[HeadOffset][0] = 0.0f;
+ Values[HeadOffset][1] = 0.0f;
+ ++HeadOffset;
+
+ const ALfloat insample{*(data++)};
+ ApplyCoeffs(Offset, Values, IrSize, Coeffs, insample, insample);
+
+ *(LeftOut++) += Values[Offset][0];
+ *(RightOut++) += Values[Offset][1];
+ ++Offset;
+ }
+ HeadOffset &= HRIR_MASK;
+ Offset &= HRIR_MASK;
}
}
diff --git a/Alc/mixer/mixer_c.cpp b/Alc/mixer/mixer_c.cpp
index d98b8e2e..22d3642e 100644
--- a/Alc/mixer/mixer_c.cpp
+++ b/Alc/mixer/mixer_c.cpp
@@ -107,24 +107,23 @@ static inline void ApplyCoeffs(ALsizei Offset, ALfloat (&Values)[HRIR_LENGTH][2]
const ALsizei IrSize, const ALfloat (&Coeffs)[HRIR_LENGTH][2],
const ALfloat left, const ALfloat right)
{
- ALsizei off{Offset&HRIR_MASK};
- ALsizei count{mini(IrSize, HRIR_LENGTH - off)};
-
+ ASSUME(Offset >= 0 && Offset < HRIR_LENGTH);
ASSUME(IrSize >= 2);
ASSUME(&Values != &Coeffs);
- ASSUME(count > 0);
+ ALsizei count{mini(IrSize, HRIR_LENGTH - Offset)};
+ ASSUME(count > 0);
for(ALsizei c{0};;)
{
for(;c < count;++c)
{
- Values[off][0] += Coeffs[c][0] * left;
- Values[off][1] += Coeffs[c][1] * right;
- ++off;
+ Values[Offset][0] += Coeffs[c][0] * left;
+ Values[Offset][1] += Coeffs[c][1] * right;
+ ++Offset;
}
if(c >= IrSize)
break;
- off = 0;
+ Offset = 0;
count = IrSize;
}
}
diff --git a/Alc/mixer/mixer_sse.cpp b/Alc/mixer/mixer_sse.cpp
index 5d82e5ae..2637883b 100644
--- a/Alc/mixer/mixer_sse.cpp
+++ b/Alc/mixer/mixer_sse.cpp
@@ -90,45 +90,45 @@ static inline void ApplyCoeffs(ALsizei Offset, ALfloat (&Values)[HRIR_LENGTH][2]
ASSUME(IrSize >= 2);
ASSUME(&Values != &Coeffs);
- ALsizei off{Offset&HRIR_MASK};
+ ASSUME(Offset >= 0 && Offset < HRIR_LENGTH);
if((Offset&1))
{
- ALsizei count{mini(IrSize-1, HRIR_LENGTH - off)};
+ ALsizei count{mini(IrSize-1, HRIR_LENGTH - Offset)};
ASSUME(count >= 1);
__m128 imp0, imp1;
coeffs = _mm_load_ps(&Coeffs[0][0]);
- vals = _mm_loadl_pi(vals, (__m64*)&Values[off][0]);
+ vals = _mm_loadl_pi(vals, (__m64*)&Values[Offset][0]);
imp0 = _mm_mul_ps(lrlr, coeffs);
vals = _mm_add_ps(imp0, vals);
- _mm_storel_pi((__m64*)&Values[off][0], vals);
- ++off;
+ _mm_storel_pi((__m64*)&Values[Offset][0], vals);
+ ++Offset;
for(ALsizei i{1};;)
{
for(;i < count;i += 2)
{
coeffs = _mm_load_ps(&Coeffs[i+1][0]);
- vals = _mm_load_ps(&Values[off][0]);
+ vals = _mm_load_ps(&Values[Offset][0]);
imp1 = _mm_mul_ps(lrlr, coeffs);
imp0 = _mm_shuffle_ps(imp0, imp1, _MM_SHUFFLE(1, 0, 3, 2));
vals = _mm_add_ps(imp0, vals);
- _mm_store_ps(&Values[off][0], vals);
+ _mm_store_ps(&Values[Offset][0], vals);
imp0 = imp1;
- off += 2;
+ Offset += 2;
}
- off &= HRIR_MASK;
+ Offset &= HRIR_MASK;
if(i >= IrSize-1)
break;
count = IrSize-1;
}
- vals = _mm_loadl_pi(vals, (__m64*)&Values[off][0]);
+ vals = _mm_loadl_pi(vals, (__m64*)&Values[Offset][0]);
imp0 = _mm_movehl_ps(imp0, imp0);
vals = _mm_add_ps(imp0, vals);
- _mm_storel_pi((__m64*)&Values[off][0], vals);
+ _mm_storel_pi((__m64*)&Values[Offset][0], vals);
}
else
{
- ALsizei count{mini(IrSize, HRIR_LENGTH - off)};
+ ALsizei count{mini(IrSize, HRIR_LENGTH - Offset)};
ASSUME(count >= 2);
for(ALsizei i{0};;)
@@ -136,14 +136,14 @@ static inline void ApplyCoeffs(ALsizei Offset, ALfloat (&Values)[HRIR_LENGTH][2]
for(;i < count;i += 2)
{
coeffs = _mm_load_ps(&Coeffs[i][0]);
- vals = _mm_load_ps(&Values[off][0]);
+ vals = _mm_load_ps(&Values[Offset][0]);
vals = _mm_add_ps(vals, _mm_mul_ps(lrlr, coeffs));
- _mm_store_ps(&Values[off][0], vals);
- off += 2;
+ _mm_store_ps(&Values[Offset][0], vals);
+ Offset += 2;
}
if(i >= IrSize)
break;
- off = 0;
+ Offset = 0;
count = IrSize;
}
}
diff --git a/OpenAL32/Include/alu.h b/OpenAL32/Include/alu.h
index 4fcc4c9c..08e6319e 100644
--- a/OpenAL32/Include/alu.h
+++ b/OpenAL32/Include/alu.h
@@ -280,12 +280,12 @@ using RowMixerFunc = void(*)(ALfloat *OutBuffer, const ALfloat *gains,
const ALfloat (*RESTRICT data)[BUFFERSIZE], ALsizei InChans, ALsizei InPos,
ALsizei BufferSize);
using HrtfMixerFunc = void(*)(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut,
- const ALfloat *data, ALsizei Offset, ALsizei OutPos, const ALsizei IrSize,
- MixHrtfParams *hrtfparams, HrtfState *hrtfstate, ALsizei BufferSize);
+ const ALfloat *data, ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize,
+ MixHrtfParams *hrtfparams, HrtfState *hrtfstate, const ALsizei BufferSize);
using HrtfMixerBlendFunc = void(*)(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut,
- const ALfloat *data, ALsizei Offset, ALsizei OutPos, const ALsizei IrSize,
+ const ALfloat *data, ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize,
const HrtfParams *oldparams, MixHrtfParams *newparams, HrtfState *hrtfstate,
- ALsizei BufferSize);
+ const ALsizei BufferSize);
using HrtfDirectMixerFunc = void(*)(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut,
const ALfloat *data, DirectHrtfState *State, const ALsizei Chan, const ALsizei BufferSize);