diff options
-rw-r--r-- | Alc/mixer/hrtf_inc.cpp | 7 | ||||
-rw-r--r-- | Alc/mixer/mixer_c.cpp | 22 | ||||
-rw-r--r-- | Alc/mixer/mixer_sse.cpp | 68 |
3 files changed, 62 insertions, 35 deletions
diff --git a/Alc/mixer/hrtf_inc.cpp b/Alc/mixer/hrtf_inc.cpp index 22715abc..d811bd61 100644 --- a/Alc/mixer/hrtf_inc.cpp +++ b/Alc/mixer/hrtf_inc.cpp @@ -107,19 +107,16 @@ void MixDirectHrtf(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, const ALfloat (*RESTRICT Coeffs)[2], ALfloat (*RESTRICT Values)[2], ALsizei BufferSize) { - ALfloat insample; - ALsizei i; - ASSUME(IrSize >= 4); ASSUME(BufferSize > 0); - for(i = 0;i < BufferSize;i++) + for(ALsizei i{0};i < BufferSize;i++) { Values[(Offset+IrSize)&HRIR_MASK][0] = 0.0f; Values[(Offset+IrSize)&HRIR_MASK][1] = 0.0f; Offset++; - insample = *(data++); + const ALfloat insample{*(data++)}; ApplyCoeffs(Offset, Values, IrSize, Coeffs, insample, insample); *(LeftOut++) += Values[Offset&HRIR_MASK][0]; *(RightOut++) += Values[Offset&HRIR_MASK][1]; diff --git a/Alc/mixer/mixer_c.cpp b/Alc/mixer/mixer_c.cpp index 7a2a6319..bbf58325 100644 --- a/Alc/mixer/mixer_c.cpp +++ b/Alc/mixer/mixer_c.cpp @@ -108,12 +108,24 @@ static inline void ApplyCoeffs(ALsizei Offset, ALfloat (*RESTRICT Values)[2], const ALfloat (*RESTRICT Coeffs)[2], ALfloat left, ALfloat right) { - ALsizei c; - for(c = 0;c < IrSize;c++) + ALsizei off{Offset&HRIR_MASK}; + ALsizei count{mini(IrSize, HRIR_LENGTH - off)}; + + ASSUME(IrSize > 0); + ASSUME(count > 0); + + for(ALsizei c{0};;) { - const ALsizei off = (Offset+c)&HRIR_MASK; - Values[off][0] += Coeffs[c][0] * left; - Values[off][1] += Coeffs[c][1] * right; + for(;c < count;++c) + { + Values[off][0] += Coeffs[c][0] * left; + Values[off][1] += Coeffs[c][1] * right; + ++off; + } + if(c >= IrSize) + break; + off = 0; + count = IrSize; } } diff --git a/Alc/mixer/mixer_sse.cpp b/Alc/mixer/mixer_sse.cpp index f0620cb5..09307697 100644 --- a/Alc/mixer/mixer_sse.cpp +++ b/Alc/mixer/mixer_sse.cpp @@ -87,46 +87,64 @@ static inline void ApplyCoeffs(ALsizei Offset, ALfloat (*RESTRICT Values)[2], const __m128 lrlr = _mm_setr_ps(left, right, left, right); __m128 vals = _mm_setzero_ps(); __m128 coeffs; - ALsizei i; + ASSUME(IrSize > 1); + + ALsizei off{Offset&HRIR_MASK}; if((Offset&1)) { - const ALsizei o0 = Offset&HRIR_MASK; - const ALsizei o1 = (Offset+IrSize-1)&HRIR_MASK; - __m128 imp0, imp1; + ALsizei count{mini(IrSize-1, HRIR_LENGTH - off)}; + ASSUME(count >= 1); + __m128 imp0, imp1; coeffs = _mm_load_ps(&Coeffs[0][0]); - vals = _mm_loadl_pi(vals, (__m64*)&Values[o0][0]); + vals = _mm_loadl_pi(vals, (__m64*)&Values[off][0]); imp0 = _mm_mul_ps(lrlr, coeffs); vals = _mm_add_ps(imp0, vals); - _mm_storel_pi((__m64*)&Values[o0][0], vals); - for(i = 1;i < IrSize-1;i += 2) + _mm_storel_pi((__m64*)&Values[off][0], vals); + ++off; + for(ALsizei i{1};;) { - const ALsizei o2 = (Offset+i)&HRIR_MASK; - - coeffs = _mm_load_ps(&Coeffs[i+1][0]); - vals = _mm_load_ps(&Values[o2][0]); - imp1 = _mm_mul_ps(lrlr, coeffs); - imp0 = _mm_shuffle_ps(imp0, imp1, _MM_SHUFFLE(1, 0, 3, 2)); - vals = _mm_add_ps(imp0, vals); - _mm_store_ps(&Values[o2][0], vals); - imp0 = imp1; + for(;i < count;i += 2) + { + coeffs = _mm_load_ps(&Coeffs[i+1][0]); + vals = _mm_load_ps(&Values[off][0]); + imp1 = _mm_mul_ps(lrlr, coeffs); + imp0 = _mm_shuffle_ps(imp0, imp1, _MM_SHUFFLE(1, 0, 3, 2)); + vals = _mm_add_ps(imp0, vals); + _mm_store_ps(&Values[off][0], vals); + imp0 = imp1; + off += 2; + } + off &= HRIR_MASK; + if(i >= IrSize-1) + break; + count = IrSize-1; } - vals = _mm_loadl_pi(vals, (__m64*)&Values[o1][0]); + vals = _mm_loadl_pi(vals, (__m64*)&Values[off][0]); imp0 = _mm_movehl_ps(imp0, imp0); vals = _mm_add_ps(imp0, vals); - _mm_storel_pi((__m64*)&Values[o1][0], vals); + _mm_storel_pi((__m64*)&Values[off][0], vals); } else { - for(i = 0;i < IrSize;i += 2) - { - const ALsizei o = (Offset + i)&HRIR_MASK; + ALsizei count{mini(IrSize, HRIR_LENGTH - off)}; + ASSUME(count >= 2); - coeffs = _mm_load_ps(&Coeffs[i][0]); - vals = _mm_load_ps(&Values[o][0]); - vals = _mm_add_ps(vals, _mm_mul_ps(lrlr, coeffs)); - _mm_store_ps(&Values[o][0], vals); + for(ALsizei i{0};;) + { + for(;i < count;i += 2) + { + coeffs = _mm_load_ps(&Coeffs[i][0]); + vals = _mm_load_ps(&Values[off][0]); + vals = _mm_add_ps(vals, _mm_mul_ps(lrlr, coeffs)); + _mm_store_ps(&Values[off][0], vals); + off += 2; + } + if(i >= IrSize) + break; + off = 0; + count = IrSize; } } } |