diff options
author | Chris Robinson <[email protected]> | 2014-11-23 10:49:54 -0800 |
---|---|---|
committer | Chris Robinson <[email protected]> | 2014-11-23 10:49:54 -0800 |
commit | 45d6bb58a4293c5b1ab229cea86e0ef24a2a084b (patch) | |
tree | ec03ad6eac812ae209f8d973687afa5b99616133 /Alc/mixer_sse.c | |
parent | fc3608b381c0492674b4cfc1da0dcf5389ace722 (diff) |
Partially revert "Use a different method for HRTF mixing"
The sound localization with virtual channel mixing was just too poor, so while
it's more costly to do per-source HRTF mixing, it's unavoidable if you want
good localization.
This is only partially reverted because having the virtual channel is still
beneficial, particularly with B-Format rendering and effect mixing which
otherwise skip HRTF processing. As before, the number of virtual channels can
potentially be customized, specifying more or less channels depending on the
system's needs.
Diffstat (limited to 'Alc/mixer_sse.c')
-rw-r--r-- | Alc/mixer_sse.c | 62 |
1 files changed, 62 insertions, 0 deletions
diff --git a/Alc/mixer_sse.c b/Alc/mixer_sse.c index d0dca40e..d86cf749 100644 --- a/Alc/mixer_sse.c +++ b/Alc/mixer_sse.c @@ -19,6 +19,68 @@ #include "mixer_defs.h" +static inline void ApplyCoeffsStep(ALuint Offset, ALfloat (*restrict Values)[2], + const ALuint IrSize, + ALfloat (*restrict Coeffs)[2], + const ALfloat (*restrict CoeffStep)[2], + ALfloat left, ALfloat right) +{ + const __m128 lrlr = _mm_setr_ps(left, right, left, right); + __m128 coeffs, deltas, imp0, imp1; + __m128 vals = _mm_setzero_ps(); + ALuint i; + + if((Offset&1)) + { + const ALuint o0 = Offset&HRIR_MASK; + const ALuint o1 = (Offset+IrSize-1)&HRIR_MASK; + + coeffs = _mm_load_ps(&Coeffs[0][0]); + deltas = _mm_load_ps(&CoeffStep[0][0]); + vals = _mm_loadl_pi(vals, (__m64*)&Values[o0][0]); + imp0 = _mm_mul_ps(lrlr, coeffs); + coeffs = _mm_add_ps(coeffs, deltas); + vals = _mm_add_ps(imp0, vals); + _mm_store_ps(&Coeffs[0][0], coeffs); + _mm_storel_pi((__m64*)&Values[o0][0], vals); + for(i = 1;i < IrSize-1;i += 2) + { + const ALuint o2 = (Offset+i)&HRIR_MASK; + + coeffs = _mm_load_ps(&Coeffs[i+1][0]); + deltas = _mm_load_ps(&CoeffStep[i+1][0]); + vals = _mm_load_ps(&Values[o2][0]); + imp1 = _mm_mul_ps(lrlr, coeffs); + coeffs = _mm_add_ps(coeffs, deltas); + imp0 = _mm_shuffle_ps(imp0, imp1, _MM_SHUFFLE(1, 0, 3, 2)); + vals = _mm_add_ps(imp0, vals); + _mm_store_ps(&Coeffs[i+1][0], coeffs); + _mm_store_ps(&Values[o2][0], vals); + imp0 = imp1; + } + vals = _mm_loadl_pi(vals, (__m64*)&Values[o1][0]); + imp0 = _mm_movehl_ps(imp0, imp0); + vals = _mm_add_ps(imp0, vals); + _mm_storel_pi((__m64*)&Values[o1][0], vals); + } + else + { + for(i = 0;i < IrSize;i += 2) + { + const ALuint o = (Offset + i)&HRIR_MASK; + + coeffs = _mm_load_ps(&Coeffs[i][0]); + deltas = _mm_load_ps(&CoeffStep[i][0]); + vals = _mm_load_ps(&Values[o][0]); + imp0 = _mm_mul_ps(lrlr, coeffs); + coeffs = _mm_add_ps(coeffs, deltas); + vals = _mm_add_ps(imp0, vals); + _mm_store_ps(&Coeffs[i][0], coeffs); + _mm_store_ps(&Values[o][0], vals); + } + } +} + static inline void ApplyCoeffs(ALuint Offset, ALfloat (*restrict Values)[2], const ALuint IrSize, ALfloat (*restrict Coeffs)[2], |