diff options
-rw-r--r-- | Alc/mixer.c | 3 | ||||
-rw-r--r-- | Alc/mixer_defs.h | 15 | ||||
-rw-r--r-- | Alc/mixer_sse2.c | 23 | ||||
-rw-r--r-- | Alc/mixer_sse41.c | 25 |
4 files changed, 21 insertions, 45 deletions
diff --git a/Alc/mixer.c b/Alc/mixer.c index 8830a3fe..6a4abfc6 100644 --- a/Alc/mixer.c +++ b/Alc/mixer.c @@ -37,6 +37,9 @@ #include "bs2b.h" +extern inline void InitiatePositionArrays(ALuint frac, ALuint increment, ALuint *frac_arr, ALuint *pos_arr, ALuint size); + + static inline ALfloat Sample_ALbyte(ALbyte val) { return val * (1.0f/127.0f); } diff --git a/Alc/mixer_defs.h b/Alc/mixer_defs.h index 94e0b24f..caa06c25 100644 --- a/Alc/mixer_defs.h +++ b/Alc/mixer_defs.h @@ -4,6 +4,7 @@ #include "AL/alc.h" #include "AL/al.h" #include "alMain.h" +#include "alu.h" struct MixGains; struct MixGainMono; @@ -43,6 +44,20 @@ void MixSend_SSE(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data, ALuint BufferSize); /* SSE resamplers */ +inline void InitiatePositionArrays(ALuint frac, ALuint increment, ALuint *frac_arr, ALuint *pos_arr, ALuint size) +{ + ALuint i; + + pos_arr[0] = 0; + frac_arr[0] = frac; + for(i = 1;i < size;i++) + { + ALuint frac_tmp = frac_arr[i-1] + increment; + pos_arr[i] = pos_arr[i-1] + (frac_tmp>>FRACTIONBITS); + frac_arr[i] = frac_tmp&FRACTIONMASK; + } +} + const ALfloat *Resample_lerp32_SSE2(const ALfloat *src, ALuint frac, ALuint increment, ALfloat *restrict dst, ALuint numsamples); const ALfloat *Resample_lerp32_SSE41(const ALfloat *src, ALuint frac, ALuint increment, diff --git a/Alc/mixer_sse2.c b/Alc/mixer_sse2.c index b1cfa598..0d85365e 100644 --- a/Alc/mixer_sse2.c +++ b/Alc/mixer_sse2.c @@ -27,27 +27,6 @@ #include "mixer_defs.h" -static inline void InitiatePositionArrays(ALuint frac, ALuint increment, - ALuint *frac_arr, ALuint *pos_arr) -{ - ALuint frac_tmp; - - pos_arr[0] = 0; - frac_arr[0] = frac; - - frac_tmp = frac_arr[0] + increment; - pos_arr[1] = pos_arr[0] + (frac_tmp>>FRACTIONBITS); - frac_arr[1] = frac_tmp & FRACTIONMASK; - - frac_tmp = frac_arr[1] + increment; - pos_arr[2] = pos_arr[1] + (frac_tmp>>FRACTIONBITS); - frac_arr[2] = frac_tmp & FRACTIONMASK; - - frac_tmp = frac_arr[2] + increment; - pos_arr[3] = pos_arr[2] + (frac_tmp>>FRACTIONBITS); - frac_arr[3] = frac_tmp & FRACTIONMASK; -} - const ALfloat *Resample_lerp32_SSE2(const ALfloat *src, ALuint frac, ALuint increment, ALfloat *restrict dst, ALuint numsamples) { @@ -60,7 +39,7 @@ const ALfloat *Resample_lerp32_SSE2(const ALfloat *src, ALuint frac, ALuint incr ALuint pos; ALuint i; - InitiatePositionArrays(frac, increment, frac_.i, pos_.i); + InitiatePositionArrays(frac, increment, frac_.i, pos_.i, 4); frac4 = _mm_castps_si128(_mm_load_ps(frac_.f)); pos4 = _mm_castps_si128(_mm_load_ps(pos_.f)); diff --git a/Alc/mixer_sse41.c b/Alc/mixer_sse41.c index 958fecec..db89abfa 100644 --- a/Alc/mixer_sse41.c +++ b/Alc/mixer_sse41.c @@ -28,27 +28,6 @@ #include "mixer_defs.h" -static inline void InitiatePositionArrays(ALuint frac, ALuint increment, - ALuint *frac_arr, ALuint *pos_arr) -{ - ALuint frac_tmp; - - pos_arr[0] = 0; - frac_arr[0] = frac; - - frac_tmp = frac_arr[0] + increment; - pos_arr[1] = pos_arr[0] + (frac_tmp>>FRACTIONBITS); - frac_arr[1] = frac_tmp & FRACTIONMASK; - - frac_tmp = frac_arr[1] + increment; - pos_arr[2] = pos_arr[1] + (frac_tmp>>FRACTIONBITS); - frac_arr[2] = frac_tmp & FRACTIONMASK; - - frac_tmp = frac_arr[2] + increment; - pos_arr[3] = pos_arr[2] + (frac_tmp>>FRACTIONBITS); - frac_arr[3] = frac_tmp & FRACTIONMASK; -} - const ALfloat *Resample_lerp32_SSE41(const ALfloat *src, ALuint frac, ALuint increment, ALfloat *restrict dst, ALuint numsamples) { @@ -61,12 +40,12 @@ const ALfloat *Resample_lerp32_SSE41(const ALfloat *src, ALuint frac, ALuint inc ALuint pos; ALuint i; - InitiatePositionArrays(frac, increment, frac_.i, pos_.i); + InitiatePositionArrays(frac, increment, frac_.i, pos_.i, 4); frac4 = _mm_castps_si128(_mm_load_ps(frac_.f)); pos4 = _mm_castps_si128(_mm_load_ps(pos_.f)); - for(i = 0;i < numsamples-3;i += 4) + for(i = 0;numsamples-i > 3;i += 4) { const __m128 val1 = _mm_setr_ps(src[pos_.i[0]], src[pos_.i[1]], src[pos_.i[2]], src[pos_.i[3]]); const __m128 val2 = _mm_setr_ps(src[pos_.i[0]+1], src[pos_.i[1]+1], src[pos_.i[2]+1], src[pos_.i[3]+1]); |