aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Alc/mixer.c3
-rw-r--r--Alc/mixer_defs.h15
-rw-r--r--Alc/mixer_sse2.c23
-rw-r--r--Alc/mixer_sse41.c25
4 files changed, 21 insertions, 45 deletions
diff --git a/Alc/mixer.c b/Alc/mixer.c
index 8830a3fe..6a4abfc6 100644
--- a/Alc/mixer.c
+++ b/Alc/mixer.c
@@ -37,6 +37,9 @@
#include "bs2b.h"
+extern inline void InitiatePositionArrays(ALuint frac, ALuint increment, ALuint *frac_arr, ALuint *pos_arr, ALuint size);
+
+
static inline ALfloat Sample_ALbyte(ALbyte val)
{ return val * (1.0f/127.0f); }
diff --git a/Alc/mixer_defs.h b/Alc/mixer_defs.h
index 94e0b24f..caa06c25 100644
--- a/Alc/mixer_defs.h
+++ b/Alc/mixer_defs.h
@@ -4,6 +4,7 @@
#include "AL/alc.h"
#include "AL/al.h"
#include "alMain.h"
+#include "alu.h"
struct MixGains;
struct MixGainMono;
@@ -43,6 +44,20 @@ void MixSend_SSE(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data,
ALuint BufferSize);
/* SSE resamplers */
+inline void InitiatePositionArrays(ALuint frac, ALuint increment, ALuint *frac_arr, ALuint *pos_arr, ALuint size)
+{
+ ALuint i;
+
+ pos_arr[0] = 0;
+ frac_arr[0] = frac;
+ for(i = 1;i < size;i++)
+ {
+ ALuint frac_tmp = frac_arr[i-1] + increment;
+ pos_arr[i] = pos_arr[i-1] + (frac_tmp>>FRACTIONBITS);
+ frac_arr[i] = frac_tmp&FRACTIONMASK;
+ }
+}
+
const ALfloat *Resample_lerp32_SSE2(const ALfloat *src, ALuint frac, ALuint increment,
ALfloat *restrict dst, ALuint numsamples);
const ALfloat *Resample_lerp32_SSE41(const ALfloat *src, ALuint frac, ALuint increment,
diff --git a/Alc/mixer_sse2.c b/Alc/mixer_sse2.c
index b1cfa598..0d85365e 100644
--- a/Alc/mixer_sse2.c
+++ b/Alc/mixer_sse2.c
@@ -27,27 +27,6 @@
#include "mixer_defs.h"
-static inline void InitiatePositionArrays(ALuint frac, ALuint increment,
- ALuint *frac_arr, ALuint *pos_arr)
-{
- ALuint frac_tmp;
-
- pos_arr[0] = 0;
- frac_arr[0] = frac;
-
- frac_tmp = frac_arr[0] + increment;
- pos_arr[1] = pos_arr[0] + (frac_tmp>>FRACTIONBITS);
- frac_arr[1] = frac_tmp & FRACTIONMASK;
-
- frac_tmp = frac_arr[1] + increment;
- pos_arr[2] = pos_arr[1] + (frac_tmp>>FRACTIONBITS);
- frac_arr[2] = frac_tmp & FRACTIONMASK;
-
- frac_tmp = frac_arr[2] + increment;
- pos_arr[3] = pos_arr[2] + (frac_tmp>>FRACTIONBITS);
- frac_arr[3] = frac_tmp & FRACTIONMASK;
-}
-
const ALfloat *Resample_lerp32_SSE2(const ALfloat *src, ALuint frac, ALuint increment,
ALfloat *restrict dst, ALuint numsamples)
{
@@ -60,7 +39,7 @@ const ALfloat *Resample_lerp32_SSE2(const ALfloat *src, ALuint frac, ALuint incr
ALuint pos;
ALuint i;
- InitiatePositionArrays(frac, increment, frac_.i, pos_.i);
+ InitiatePositionArrays(frac, increment, frac_.i, pos_.i, 4);
frac4 = _mm_castps_si128(_mm_load_ps(frac_.f));
pos4 = _mm_castps_si128(_mm_load_ps(pos_.f));
diff --git a/Alc/mixer_sse41.c b/Alc/mixer_sse41.c
index 958fecec..db89abfa 100644
--- a/Alc/mixer_sse41.c
+++ b/Alc/mixer_sse41.c
@@ -28,27 +28,6 @@
#include "mixer_defs.h"
-static inline void InitiatePositionArrays(ALuint frac, ALuint increment,
- ALuint *frac_arr, ALuint *pos_arr)
-{
- ALuint frac_tmp;
-
- pos_arr[0] = 0;
- frac_arr[0] = frac;
-
- frac_tmp = frac_arr[0] + increment;
- pos_arr[1] = pos_arr[0] + (frac_tmp>>FRACTIONBITS);
- frac_arr[1] = frac_tmp & FRACTIONMASK;
-
- frac_tmp = frac_arr[1] + increment;
- pos_arr[2] = pos_arr[1] + (frac_tmp>>FRACTIONBITS);
- frac_arr[2] = frac_tmp & FRACTIONMASK;
-
- frac_tmp = frac_arr[2] + increment;
- pos_arr[3] = pos_arr[2] + (frac_tmp>>FRACTIONBITS);
- frac_arr[3] = frac_tmp & FRACTIONMASK;
-}
-
const ALfloat *Resample_lerp32_SSE41(const ALfloat *src, ALuint frac, ALuint increment,
ALfloat *restrict dst, ALuint numsamples)
{
@@ -61,12 +40,12 @@ const ALfloat *Resample_lerp32_SSE41(const ALfloat *src, ALuint frac, ALuint inc
ALuint pos;
ALuint i;
- InitiatePositionArrays(frac, increment, frac_.i, pos_.i);
+ InitiatePositionArrays(frac, increment, frac_.i, pos_.i, 4);
frac4 = _mm_castps_si128(_mm_load_ps(frac_.f));
pos4 = _mm_castps_si128(_mm_load_ps(pos_.f));
- for(i = 0;i < numsamples-3;i += 4)
+ for(i = 0;numsamples-i > 3;i += 4)
{
const __m128 val1 = _mm_setr_ps(src[pos_.i[0]], src[pos_.i[1]], src[pos_.i[2]], src[pos_.i[3]]);
const __m128 val2 = _mm_setr_ps(src[pos_.i[0]+1], src[pos_.i[1]+1], src[pos_.i[2]+1], src[pos_.i[3]+1]);