summaryrefslogtreecommitdiffstats
path: root/Alc
diff options
context:
space:
mode:
authorChris Robinson <[email protected]>2012-08-15 08:19:04 -0700
committerChris Robinson <[email protected]>2012-08-15 08:19:04 -0700
commit0bca771a88f83e77cd6a5eb04757125b4fa5dc6c (patch)
tree5d38eb665129c0b708e1b61634922b54a96d05fb /Alc
parentf4ff63e2715b28e8ed1fd54e48e3ee6077f1689d (diff)
Apply HRTF mixer coefficients with stepping using SSE
Diffstat (limited to 'Alc')
-rw-r--r--Alc/mixer_c.c16
-rw-r--r--Alc/mixer_inc.c10
-rw-r--r--Alc/mixer_neon.c16
-rw-r--r--Alc/mixer_sse.c34
4 files changed, 65 insertions, 11 deletions
diff --git a/Alc/mixer_c.c b/Alc/mixer_c.c
index f59b3190..eeb75bf4 100644
--- a/Alc/mixer_c.c
+++ b/Alc/mixer_c.c
@@ -6,6 +6,22 @@
#include "alu.h"
+static __inline void ApplyCoeffsStep(ALuint Offset, ALfloat (*RESTRICT Values)[2],
+ ALfloat (*RESTRICT Coeffs)[2],
+ ALfloat (*RESTRICT CoeffStep)[2],
+ ALfloat left, ALfloat right)
+{
+ ALuint c;
+ for(c = 0;c < HRIR_LENGTH;c++)
+ {
+ const ALuint off = (Offset+c)&HRIR_MASK;
+ Values[off][0] += Coeffs[c][0] * left;
+ Values[off][1] += Coeffs[c][1] * right;
+ Coeffs[c][0] += CoeffStep[c][0];
+ Coeffs[c][1] += CoeffStep[c][1];
+ }
+}
+
static __inline void ApplyCoeffs(ALuint Offset, ALfloat (*RESTRICT Values)[2],
ALfloat (*RESTRICT Coeffs)[2],
ALfloat left, ALfloat right)
diff --git a/Alc/mixer_inc.c b/Alc/mixer_inc.c
index b7051f64..3e9d7f46 100644
--- a/Alc/mixer_inc.c
+++ b/Alc/mixer_inc.c
@@ -107,15 +107,7 @@ void MERGE4(MixDirect_Hrtf_,SAMPLER,_,SUFFIX)(
Values[Offset&HRIR_MASK][1] = 0.0f;
Offset++;
- for(c = 0;c < HRIR_LENGTH;c++)
- {
- const ALuint off = (Offset+c)&HRIR_MASK;
- Values[off][0] += Coeffs[c][0] * left;
- Values[off][1] += Coeffs[c][1] * right;
- Coeffs[c][0] += CoeffStep[c][0];
- Coeffs[c][1] += CoeffStep[c][1];
- }
-
+ ApplyCoeffsStep(Offset, Values, Coeffs, CoeffStep, left, right);
DryBuffer[OutPos][FrontLeft] += Values[Offset&HRIR_MASK][0];
DryBuffer[OutPos][FrontRight] += Values[Offset&HRIR_MASK][1];
diff --git a/Alc/mixer_neon.c b/Alc/mixer_neon.c
index 10385e69..b550b837 100644
--- a/Alc/mixer_neon.c
+++ b/Alc/mixer_neon.c
@@ -10,6 +10,22 @@
#include "alu.h"
+static __inline void ApplyCoeffsStep(ALuint Offset, ALfloat (*RESTRICT Values)[2],
+ ALfloat (*RESTRICT Coeffs)[2],
+ ALfloat (*RESTRICT CoeffStep)[2],
+ ALfloat left, ALfloat right)
+{
+ ALuint c;
+ for(c = 0;c < HRIR_LENGTH;c++)
+ {
+ const ALuint off = (Offset+c)&HRIR_MASK;
+ Values[off][0] += Coeffs[c][0] * left;
+ Values[off][1] += Coeffs[c][1] * right;
+ Coeffs[c][0] += CoeffStep[c][0];
+ Coeffs[c][1] += CoeffStep[c][1];
+ }
+}
+
static __inline void ApplyCoeffs(ALuint Offset, ALfloat (*RESTRICT Values)[2],
ALfloat (*RESTRICT Coeffs)[2],
ALfloat left, ALfloat right)
diff --git a/Alc/mixer_sse.c b/Alc/mixer_sse.c
index 2da2b6b8..bbca54ec 100644
--- a/Alc/mixer_sse.c
+++ b/Alc/mixer_sse.c
@@ -10,18 +10,48 @@
#include "alu.h"
+static __inline void ApplyCoeffsStep(ALuint Offset, ALfloat (*RESTRICT Values)[2],
+ ALfloat (*RESTRICT Coeffs)[2],
+ ALfloat (*RESTRICT CoeffStep)[2],
+ ALfloat left, ALfloat right)
+{
+ const __m128 lrlr = { left, right, left, right };
+ __m128 vals = { 0.0f, 0.0f, 0.0f, 0.0f };
+ __m128 coeffs, coeffstep;
+ ALuint c;
+ for(c = 0;c < HRIR_LENGTH;c += 2)
+ {
+ const ALuint o0 = (Offset++)&HRIR_MASK;
+ const ALuint o1 = (Offset++)&HRIR_MASK;
+
+ coeffs = _mm_load_ps(&Coeffs[c][0]);
+ vals = _mm_loadl_pi(vals, (__m64*)&Values[o0][0]);
+ vals = _mm_loadh_pi(vals, (__m64*)&Values[o1][0]);
+
+ vals = _mm_add_ps(vals, _mm_mul_ps(coeffs, lrlr));
+ _mm_storel_pi((__m64*)&Values[o0][0], vals);
+ _mm_storeh_pi((__m64*)&Values[o1][0], vals);
+
+ coeffstep = _mm_load_ps(&CoeffStep[c][0]);
+ coeffs = _mm_add_ps(coeffs, coeffstep);
+ _mm_store_ps(&Coeffs[c][0], coeffs);
+ }
+}
+
static __inline void ApplyCoeffs(ALuint Offset, ALfloat (*RESTRICT Values)[2],
ALfloat (*RESTRICT Coeffs)[2],
ALfloat left, ALfloat right)
{
const __m128 lrlr = { left, right, left, right };
+ __m128 vals = { 0.0f, 0.0f, 0.0f, 0.0f };
+ __m128 coeffs;
ALuint c;
for(c = 0;c < HRIR_LENGTH;c += 2)
{
const ALuint o0 = (Offset++)&HRIR_MASK;
const ALuint o1 = (Offset++)&HRIR_MASK;
- __m128 coeffs = _mm_load_ps(&Coeffs[c][0]);
- __m128 vals = { 0.0f, 0.0f, 0.0f, 0.0f };
+
+ coeffs = _mm_load_ps(&Coeffs[c][0]);
vals = _mm_loadl_pi(vals, (__m64*)&Values[o0][0]);
vals = _mm_loadh_pi(vals, (__m64*)&Values[o1][0]);