aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Alc/mixer/hrtf_inc.cpp7
-rw-r--r--Alc/mixer/mixer_c.cpp22
-rw-r--r--Alc/mixer/mixer_sse.cpp68
3 files changed, 62 insertions, 35 deletions
diff --git a/Alc/mixer/hrtf_inc.cpp b/Alc/mixer/hrtf_inc.cpp
index 22715abc..d811bd61 100644
--- a/Alc/mixer/hrtf_inc.cpp
+++ b/Alc/mixer/hrtf_inc.cpp
@@ -107,19 +107,16 @@ void MixDirectHrtf(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut,
const ALfloat (*RESTRICT Coeffs)[2], ALfloat (*RESTRICT Values)[2],
ALsizei BufferSize)
{
- ALfloat insample;
- ALsizei i;
-
ASSUME(IrSize >= 4);
ASSUME(BufferSize > 0);
- for(i = 0;i < BufferSize;i++)
+ for(ALsizei i{0};i < BufferSize;i++)
{
Values[(Offset+IrSize)&HRIR_MASK][0] = 0.0f;
Values[(Offset+IrSize)&HRIR_MASK][1] = 0.0f;
Offset++;
- insample = *(data++);
+ const ALfloat insample{*(data++)};
ApplyCoeffs(Offset, Values, IrSize, Coeffs, insample, insample);
*(LeftOut++) += Values[Offset&HRIR_MASK][0];
*(RightOut++) += Values[Offset&HRIR_MASK][1];
diff --git a/Alc/mixer/mixer_c.cpp b/Alc/mixer/mixer_c.cpp
index 7a2a6319..bbf58325 100644
--- a/Alc/mixer/mixer_c.cpp
+++ b/Alc/mixer/mixer_c.cpp
@@ -108,12 +108,24 @@ static inline void ApplyCoeffs(ALsizei Offset, ALfloat (*RESTRICT Values)[2],
const ALfloat (*RESTRICT Coeffs)[2],
ALfloat left, ALfloat right)
{
- ALsizei c;
- for(c = 0;c < IrSize;c++)
+ ALsizei off{Offset&HRIR_MASK};
+ ALsizei count{mini(IrSize, HRIR_LENGTH - off)};
+
+ ASSUME(IrSize > 0);
+ ASSUME(count > 0);
+
+ for(ALsizei c{0};;)
{
- const ALsizei off = (Offset+c)&HRIR_MASK;
- Values[off][0] += Coeffs[c][0] * left;
- Values[off][1] += Coeffs[c][1] * right;
+ for(;c < count;++c)
+ {
+ Values[off][0] += Coeffs[c][0] * left;
+ Values[off][1] += Coeffs[c][1] * right;
+ ++off;
+ }
+ if(c >= IrSize)
+ break;
+ off = 0;
+ count = IrSize;
}
}
diff --git a/Alc/mixer/mixer_sse.cpp b/Alc/mixer/mixer_sse.cpp
index f0620cb5..09307697 100644
--- a/Alc/mixer/mixer_sse.cpp
+++ b/Alc/mixer/mixer_sse.cpp
@@ -87,46 +87,64 @@ static inline void ApplyCoeffs(ALsizei Offset, ALfloat (*RESTRICT Values)[2],
const __m128 lrlr = _mm_setr_ps(left, right, left, right);
__m128 vals = _mm_setzero_ps();
__m128 coeffs;
- ALsizei i;
+ ASSUME(IrSize > 1);
+
+ ALsizei off{Offset&HRIR_MASK};
if((Offset&1))
{
- const ALsizei o0 = Offset&HRIR_MASK;
- const ALsizei o1 = (Offset+IrSize-1)&HRIR_MASK;
- __m128 imp0, imp1;
+ ALsizei count{mini(IrSize-1, HRIR_LENGTH - off)};
+ ASSUME(count >= 1);
+ __m128 imp0, imp1;
coeffs = _mm_load_ps(&Coeffs[0][0]);
- vals = _mm_loadl_pi(vals, (__m64*)&Values[o0][0]);
+ vals = _mm_loadl_pi(vals, (__m64*)&Values[off][0]);
imp0 = _mm_mul_ps(lrlr, coeffs);
vals = _mm_add_ps(imp0, vals);
- _mm_storel_pi((__m64*)&Values[o0][0], vals);
- for(i = 1;i < IrSize-1;i += 2)
+ _mm_storel_pi((__m64*)&Values[off][0], vals);
+ ++off;
+ for(ALsizei i{1};;)
{
- const ALsizei o2 = (Offset+i)&HRIR_MASK;
-
- coeffs = _mm_load_ps(&Coeffs[i+1][0]);
- vals = _mm_load_ps(&Values[o2][0]);
- imp1 = _mm_mul_ps(lrlr, coeffs);
- imp0 = _mm_shuffle_ps(imp0, imp1, _MM_SHUFFLE(1, 0, 3, 2));
- vals = _mm_add_ps(imp0, vals);
- _mm_store_ps(&Values[o2][0], vals);
- imp0 = imp1;
+ for(;i < count;i += 2)
+ {
+ coeffs = _mm_load_ps(&Coeffs[i+1][0]);
+ vals = _mm_load_ps(&Values[off][0]);
+ imp1 = _mm_mul_ps(lrlr, coeffs);
+ imp0 = _mm_shuffle_ps(imp0, imp1, _MM_SHUFFLE(1, 0, 3, 2));
+ vals = _mm_add_ps(imp0, vals);
+ _mm_store_ps(&Values[off][0], vals);
+ imp0 = imp1;
+ off += 2;
+ }
+ off &= HRIR_MASK;
+ if(i >= IrSize-1)
+ break;
+ count = IrSize-1;
}
- vals = _mm_loadl_pi(vals, (__m64*)&Values[o1][0]);
+ vals = _mm_loadl_pi(vals, (__m64*)&Values[off][0]);
imp0 = _mm_movehl_ps(imp0, imp0);
vals = _mm_add_ps(imp0, vals);
- _mm_storel_pi((__m64*)&Values[o1][0], vals);
+ _mm_storel_pi((__m64*)&Values[off][0], vals);
}
else
{
- for(i = 0;i < IrSize;i += 2)
- {
- const ALsizei o = (Offset + i)&HRIR_MASK;
+ ALsizei count{mini(IrSize, HRIR_LENGTH - off)};
+ ASSUME(count >= 2);
- coeffs = _mm_load_ps(&Coeffs[i][0]);
- vals = _mm_load_ps(&Values[o][0]);
- vals = _mm_add_ps(vals, _mm_mul_ps(lrlr, coeffs));
- _mm_store_ps(&Values[o][0], vals);
+ for(ALsizei i{0};;)
+ {
+ for(;i < count;i += 2)
+ {
+ coeffs = _mm_load_ps(&Coeffs[i][0]);
+ vals = _mm_load_ps(&Values[off][0]);
+ vals = _mm_add_ps(vals, _mm_mul_ps(lrlr, coeffs));
+ _mm_store_ps(&Values[off][0], vals);
+ off += 2;
+ }
+ if(i >= IrSize)
+ break;
+ off = 0;
+ count = IrSize;
}
}
}