summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Robinson <[email protected]>2012-08-29 01:56:04 -0700
committerChris Robinson <[email protected]>2012-08-29 01:56:04 -0700
commit3c042a054d3cf714959ec58d81e77fc6d8cbb650 (patch)
tree54d059752660607c040f8c62e082a979a1959857
parent332d66c18301b913d46e22fd24c3d4782bea1568 (diff)
Add an SSE-enhanced path for applying the mixer matrix
-rw-r--r--Alc/mixer_c.c9
-rw-r--r--Alc/mixer_inc.c22
-rw-r--r--Alc/mixer_neon.c9
-rw-r--r--Alc/mixer_sse.c15
4 files changed, 48 insertions, 7 deletions
diff --git a/Alc/mixer_c.c b/Alc/mixer_c.c
index eeb75bf4..57f39090 100644
--- a/Alc/mixer_c.c
+++ b/Alc/mixer_c.c
@@ -35,6 +35,15 @@ static __inline void ApplyCoeffs(ALuint Offset, ALfloat (*RESTRICT Values)[2],
}
}
+
+static __inline void ApplyValue(ALfloat *RESTRICT Output, ALfloat value, const ALfloat *DrySend)
+{
+ ALuint c;
+ for(c = 0;c < MaxChannels;c++)
+ Output[c] += value*DrySend[c];
+}
+
+
#define SUFFIX C
#define SAMPLER point32
#include "mixer_inc.c"
diff --git a/Alc/mixer_inc.c b/Alc/mixer_inc.c
index 6ae00d3d..bc817993 100644
--- a/Alc/mixer_inc.c
+++ b/Alc/mixer_inc.c
@@ -21,6 +21,17 @@
#define MERGE4(a,b,c,d) REAL_MERGE4(a,b,c,d)
+static __inline void ApplyCoeffsStep(ALuint Offset, ALfloat (*RESTRICT Values)[2],
+ ALfloat (*RESTRICT Coeffs)[2],
+ ALfloat (*RESTRICT CoeffStep)[2],
+ ALfloat left, ALfloat right);
+static __inline void ApplyCoeffs(ALuint Offset, ALfloat (*RESTRICT Values)[2],
+ ALfloat (*RESTRICT Coeffs)[2],
+ ALfloat left, ALfloat right);
+static __inline void ApplyValue(ALfloat *RESTRICT Output, ALfloat value,
+ const ALfloat *DrySend);
+
+
void MERGE4(MixDirect_Hrtf_,SAMPLER,_,SUFFIX)(
ALsource *Source, ALCdevice *Device, DirectParams *params,
const ALfloat *RESTRICT data, ALuint srcfrac,
@@ -169,7 +180,7 @@ void MERGE4(MixDirect_,SAMPLER,_,SUFFIX)(
const ALuint NumChannels = Source->NumChannels;
ALfloat (*RESTRICT DryBuffer)[MaxChannels];
ALfloat *RESTRICT ClickRemoval, *RESTRICT PendingClicks;
- ALfloat DrySend[MaxChannels];
+ ALIGN(16) ALfloat DrySend[MaxChannels];
FILTER *DryFilter;
ALuint pos, frac;
ALuint BufferIdx;
@@ -197,16 +208,14 @@ void MERGE4(MixDirect_,SAMPLER,_,SUFFIX)(
value = SAMPLER(data + pos*NumChannels + i, NumChannels, frac);
value = lpFilter2PC(DryFilter, i, value);
- for(c = 0;c < MaxChannels;c++)
- ClickRemoval[c] -= value*DrySend[c];
+ ApplyValue(ClickRemoval, -value, DrySend);
}
for(BufferIdx = 0;BufferIdx < BufferSize;BufferIdx++)
{
value = SAMPLER(data + pos*NumChannels + i, NumChannels, frac);
value = lpFilter2P(DryFilter, i, value);
- for(c = 0;c < MaxChannels;c++)
- DryBuffer[OutPos][c] += value*DrySend[c];
+ ApplyValue(DryBuffer[OutPos], value, DrySend);
frac += increment;
pos += frac>>FRACTIONBITS;
@@ -218,8 +227,7 @@ void MERGE4(MixDirect_,SAMPLER,_,SUFFIX)(
value = SAMPLER(data + pos*NumChannels + i, NumChannels, frac);
value = lpFilter2PC(DryFilter, i, value);
- for(c = 0;c < MaxChannels;c++)
- PendingClicks[c] += value*DrySend[c];
+ ApplyValue(PendingClicks, value, DrySend);
}
OutPos -= BufferSize;
}
diff --git a/Alc/mixer_neon.c b/Alc/mixer_neon.c
index b550b837..bcdbf9b8 100644
--- a/Alc/mixer_neon.c
+++ b/Alc/mixer_neon.c
@@ -53,6 +53,15 @@ static __inline void ApplyCoeffs(ALuint Offset, ALfloat (*RESTRICT Values)[2],
}
}
+
+static __inline void ApplyValue(ALfloat *RESTRICT Output, ALfloat value, const ALfloat *DrySend)
+{
+ ALuint c;
+ for(c = 0;c < MaxChannels;c++)
+ Output[c] += value*DrySend[c];
+}
+
+
#define SUFFIX Neon
#define SAMPLER point32
#include "mixer_inc.c"
diff --git a/Alc/mixer_sse.c b/Alc/mixer_sse.c
index bbca54ec..270817fb 100644
--- a/Alc/mixer_sse.c
+++ b/Alc/mixer_sse.c
@@ -61,6 +61,21 @@ static __inline void ApplyCoeffs(ALuint Offset, ALfloat (*RESTRICT Values)[2],
}
}
+
+static __inline void ApplyValue(ALfloat *RESTRICT Output, ALfloat value, const ALfloat *DrySend)
+{
+ const __m128 val4 = _mm_set1_ps(value);
+ ALuint c;
+ for(c = 0;c < MaxChannels;c += 4)
+ {
+ const __m128 gains = _mm_load_ps(&DrySend[c]);
+ __m128 out = _mm_load_ps(&Output[c]);
+ out = _mm_add_ps(out, _mm_mul_ps(val4, gains));
+ _mm_store_ps(&Output[c], out);
+ }
+}
+
+
#define SUFFIX SSE
#define SAMPLER point32
#include "mixer_inc.c"