diff options
-rw-r--r-- | Alc/ALu.c | 4 | ||||
-rw-r--r-- | Alc/mixer_defs.h | 3 | ||||
-rw-r--r-- | Alc/mixer_sse.c | 47 |
3 files changed, 54 insertions, 0 deletions
@@ -60,6 +60,10 @@ static ResamplerFunc SelectResampler(enum Resampler Resampler, ALuint increment) case PointResampler: return Resample_point32_C; case LinearResampler: +#ifdef HAVE_SSE + if((CPUCapFlags&CPU_CAP_SSE)) + return Resample_lerp32_SSE; +#endif return Resample_lerp32_C; case CubicResampler: return Resample_cubic32_C; diff --git a/Alc/mixer_defs.h b/Alc/mixer_defs.h index 38cdb935..91ae24d6 100644 --- a/Alc/mixer_defs.h +++ b/Alc/mixer_defs.h @@ -15,6 +15,9 @@ void Resample_point32_C(const ALfloat *src, ALuint frac, ALuint increment, ALuin void Resample_lerp32_C(const ALfloat *src, ALuint frac, ALuint increment, ALuint NumChannels, ALfloat *RESTRICT dst, ALuint dstlen); void Resample_cubic32_C(const ALfloat *src, ALuint frac, ALuint increment, ALuint NumChannels, ALfloat *RESTRICT dst, ALuint dstlen); +/* SSE resamplers */ +void Resample_lerp32_SSE(const ALfloat *src, ALuint frac, ALuint increment, ALuint NumChannels, ALfloat *RESTRICT dst, ALuint dstlen); + /* C mixers */ void MixDirect_Hrtf_C(struct ALsource*,ALCdevice*,struct DirectParams*,const ALfloat*RESTRICT,ALuint,ALuint,ALuint,ALuint); diff --git a/Alc/mixer_sse.c b/Alc/mixer_sse.c index 792fead6..aff0152b 100644 --- a/Alc/mixer_sse.c +++ b/Alc/mixer_sse.c @@ -12,6 +12,53 @@ #include "alSource.h" #include "mixer_defs.h" +static __inline ALfloat lerp32(const ALfloat *vals, ALint step, ALuint frac) +{ return lerp(vals[0], vals[step], frac * (1.0f/FRACTIONONE)); } + +void Resample_lerp32_SSE(const ALfloat *data, ALuint frac, + ALuint increment, ALuint NumChannels, ALfloat *RESTRICT OutBuffer, + ALuint BufferSize) +{ + ALIGN(16) float value[3][4]; + ALuint pos = 0; + ALuint i, j; + + for(i = 0;i < BufferSize+1-3;i+=4) + { + __m128 x, y, a; + for(j = 0;j < 4;j++) + { + value[0][j] = data[(pos )*NumChannels]; + value[1][j] = data[(pos+1)*NumChannels]; + value[2][j] = frac * (1.0f/FRACTIONONE); + + frac += increment; + pos += frac>>FRACTIONBITS; + frac &= FRACTIONMASK; + } + + x = _mm_load_ps(value[0]); + y = _mm_load_ps(value[1]); + y = _mm_sub_ps(y, x); + + a = _mm_load_ps(value[2]); + y = _mm_mul_ps(y, a); + + x = _mm_add_ps(x, y); + + _mm_store_ps(&OutBuffer[i], x); + } + for(;i < BufferSize+1;i++) + { + OutBuffer[i] = lerp32(data + pos*NumChannels, NumChannels, frac); + + frac += increment; + pos += frac>>FRACTIONBITS; + frac &= FRACTIONMASK; + } +} + + static __inline void ApplyCoeffsStep(ALuint Offset, ALfloat (*RESTRICT Values)[2], const ALuint IrSize, |