diff options
-rw-r--r-- | Alc/mixer.c | 510 | ||||
-rw-r--r-- | Alc/mixer_c.c | 32 | ||||
-rw-r--r-- | Alc/mixer_defs.h | 60 | ||||
-rw-r--r-- | Alc/mixer_inc.c | 305 | ||||
-rw-r--r-- | Alc/mixer_neon.c | 50 | ||||
-rw-r--r-- | Alc/mixer_sse.c | 48 | ||||
-rw-r--r-- | CMakeLists.txt | 31 |
7 files changed, 617 insertions, 419 deletions
diff --git a/Alc/mixer.c b/Alc/mixer.c index f5d9bb09..7454ec48 100644 --- a/Alc/mixer.c +++ b/Alc/mixer.c @@ -25,12 +25,6 @@ #include <string.h> #include <ctype.h> #include <assert.h> -#ifdef HAVE_XMMINTRIN_H -#include <xmmintrin.h> -#endif -#ifdef HAVE_ARM_NEON_H -#include <arm_neon.h> -#endif #include "alMain.h" #include "AL/al.h" @@ -42,408 +36,52 @@ #include "alu.h" #include "bs2b.h" +#include "mixer_defs.h" -static __inline ALfloat point32(const ALfloat *vals, ALint step, ALint frac) -{ return vals[0]; (void)step; (void)frac; } -static __inline ALfloat lerp32(const ALfloat *vals, ALint step, ALint frac) -{ return lerp(vals[0], vals[step], frac * (1.0f/FRACTIONONE)); } -static __inline ALfloat cubic32(const ALfloat *vals, ALint step, ALint frac) -{ return cubic(vals[-step], vals[0], vals[step], vals[step+step], - frac * (1.0f/FRACTIONONE)); } - -#ifdef __GNUC__ -#define LIKELY(x) __builtin_expect(!!(x), 1) -#define UNLIKELY(x) __builtin_expect(!!(x), 0) -#else -#define LIKELY(x) (x) -#define UNLIKELY(x) (x) -#endif -static __inline void ApplyCoeffsC(ALuint Offset, ALfloat (*RESTRICT Values)[2], - ALfloat (*RESTRICT Coeffs)[2], - ALfloat left, ALfloat right) +DryMixerFunc SelectDirectMixer(enum Resampler Resampler) { - ALuint c; - for(c = 0;c < HRIR_LENGTH;c++) - { - const ALuint off = (Offset+c)&HRIR_MASK; - Values[off][0] += Coeffs[c][0] * left; - Values[off][1] += Coeffs[c][1] * right; - } -} - -#define DECL_TEMPLATE(sampler,acc) \ -static void MixDirect_Hrtf_##sampler##_##acc( \ - ALsource *Source, ALCdevice *Device, DirectParams *params, \ - const ALfloat *RESTRICT data, ALuint srcfrac, \ - ALuint OutPos, ALuint SamplesToDo, ALuint BufferSize) \ -{ \ - const ALuint NumChannels = Source->NumChannels; \ - const ALint *RESTRICT DelayStep = params->Hrtf.DelayStep; \ - ALfloat (*RESTRICT DryBuffer)[MaxChannels]; \ - ALfloat *RESTRICT ClickRemoval, *RESTRICT PendingClicks; \ - ALfloat (*RESTRICT CoeffStep)[2] = params->Hrtf.CoeffStep; \ - ALuint pos, frac; \ - FILTER *DryFilter; \ - ALuint BufferIdx; \ - ALuint increment; \ - ALfloat value; \ - ALuint i, c; \ - \ - increment = Source->Params.Step; \ - \ - DryBuffer = Device->DryBuffer; \ - ClickRemoval = Device->ClickRemoval; \ - PendingClicks = Device->PendingClicks; \ - DryFilter = ¶ms->iirFilter; \ - \ - for(i = 0;i < NumChannels;i++) \ - { \ - ALfloat (*RESTRICT TargetCoeffs)[2] = params->Hrtf.Coeffs[i]; \ - ALuint *RESTRICT TargetDelay = params->Hrtf.Delay[i]; \ - ALfloat *RESTRICT History = Source->Hrtf.History[i]; \ - ALfloat (*RESTRICT Values)[2] = Source->Hrtf.Values[i]; \ - ALint Counter = maxu(Source->Hrtf.Counter, OutPos) - OutPos; \ - ALuint Offset = Source->Hrtf.Offset + OutPos; \ - ALfloat Coeffs[HRIR_LENGTH][2]; \ - ALuint Delay[2]; \ - ALfloat left, right; \ - \ - pos = 0; \ - frac = srcfrac; \ - \ - for(c = 0;c < HRIR_LENGTH;c++) \ - { \ - Coeffs[c][0] = TargetCoeffs[c][0] - (CoeffStep[c][0]*Counter); \ - Coeffs[c][1] = TargetCoeffs[c][1] - (CoeffStep[c][1]*Counter); \ - } \ - \ - Delay[0] = TargetDelay[0] - (DelayStep[0]*Counter); \ - Delay[1] = TargetDelay[1] - (DelayStep[1]*Counter); \ - \ - if(LIKELY(OutPos == 0)) \ - { \ - value = sampler(data + pos*NumChannels + i, NumChannels, frac); \ - value = lpFilter2PC(DryFilter, i, value); \ - \ - History[Offset&SRC_HISTORY_MASK] = value; \ - left = lerp(History[(Offset-(Delay[0]>>HRTFDELAY_BITS))&SRC_HISTORY_MASK], \ - History[(Offset-(Delay[0]>>HRTFDELAY_BITS)-1)&SRC_HISTORY_MASK], \ - (Delay[0]&HRTFDELAY_MASK)/(ALfloat)HRTFDELAY_FRACONE); \ - right = lerp(History[(Offset-(Delay[1]>>HRTFDELAY_BITS))&SRC_HISTORY_MASK], \ - History[(Offset-(Delay[1]>>HRTFDELAY_BITS)-1)&SRC_HISTORY_MASK], \ - (Delay[1]&HRTFDELAY_MASK)/(ALfloat)HRTFDELAY_FRACONE); \ - \ - ClickRemoval[FrontLeft] -= Values[(Offset+1)&HRIR_MASK][0] + \ - Coeffs[0][0] * left; \ - ClickRemoval[FrontRight] -= Values[(Offset+1)&HRIR_MASK][1] + \ - Coeffs[0][1] * right; \ - } \ - for(BufferIdx = 0;BufferIdx < BufferSize && Counter > 0;BufferIdx++) \ - { \ - value = sampler(data + pos*NumChannels + i, NumChannels, frac); \ - value = lpFilter2P(DryFilter, i, value); \ - \ - History[Offset&SRC_HISTORY_MASK] = value; \ - left = lerp(History[(Offset-(Delay[0]>>HRTFDELAY_BITS))&SRC_HISTORY_MASK], \ - History[(Offset-(Delay[0]>>HRTFDELAY_BITS)-1)&SRC_HISTORY_MASK], \ - (Delay[0]&HRTFDELAY_MASK)/(ALfloat)HRTFDELAY_FRACONE); \ - right = lerp(History[(Offset-(Delay[1]>>HRTFDELAY_BITS))&SRC_HISTORY_MASK], \ - History[(Offset-(Delay[1]>>HRTFDELAY_BITS)-1)&SRC_HISTORY_MASK], \ - (Delay[1]&HRTFDELAY_MASK)/(ALfloat)HRTFDELAY_FRACONE); \ - \ - Delay[0] += DelayStep[0]; \ - Delay[1] += DelayStep[1]; \ - \ - Values[Offset&HRIR_MASK][0] = 0.0f; \ - Values[Offset&HRIR_MASK][1] = 0.0f; \ - Offset++; \ - \ - for(c = 0;c < HRIR_LENGTH;c++) \ - { \ - const ALuint off = (Offset+c)&HRIR_MASK; \ - Values[off][0] += Coeffs[c][0] * left; \ - Values[off][1] += Coeffs[c][1] * right; \ - Coeffs[c][0] += CoeffStep[c][0]; \ - Coeffs[c][1] += CoeffStep[c][1]; \ - } \ - \ - DryBuffer[OutPos][FrontLeft] += Values[Offset&HRIR_MASK][0]; \ - DryBuffer[OutPos][FrontRight] += Values[Offset&HRIR_MASK][1]; \ - \ - frac += increment; \ - pos += frac>>FRACTIONBITS; \ - frac &= FRACTIONMASK; \ - OutPos++; \ - Counter--; \ - } \ - \ - Delay[0] >>= HRTFDELAY_BITS; \ - Delay[1] >>= HRTFDELAY_BITS; \ - for(;BufferIdx < BufferSize;BufferIdx++) \ - { \ - value = sampler(data + pos*NumChannels + i, NumChannels, frac); \ - value = lpFilter2P(DryFilter, i, value); \ - \ - History[Offset&SRC_HISTORY_MASK] = value; \ - left = History[(Offset-Delay[0])&SRC_HISTORY_MASK]; \ - right = History[(Offset-Delay[1])&SRC_HISTORY_MASK]; \ - \ - Values[Offset&HRIR_MASK][0] = 0.0f; \ - Values[Offset&HRIR_MASK][1] = 0.0f; \ - Offset++; \ - \ - ApplyCoeffs##acc(Offset, Values, Coeffs, left, right); \ - DryBuffer[OutPos][FrontLeft] += Values[Offset&HRIR_MASK][0]; \ - DryBuffer[OutPos][FrontRight] += Values[Offset&HRIR_MASK][1]; \ - \ - frac += increment; \ - pos += frac>>FRACTIONBITS; \ - frac &= FRACTIONMASK; \ - OutPos++; \ - } \ - if(LIKELY(OutPos == SamplesToDo)) \ - { \ - value = sampler(data + pos*NumChannels + i, NumChannels, frac); \ - value = lpFilter2PC(DryFilter, i, value); \ - \ - History[Offset&SRC_HISTORY_MASK] = value; \ - left = History[(Offset-Delay[0])&SRC_HISTORY_MASK]; \ - right = History[(Offset-Delay[1])&SRC_HISTORY_MASK]; \ - \ - PendingClicks[FrontLeft] += Values[(Offset+1)&HRIR_MASK][0] + \ - Coeffs[0][0] * left; \ - PendingClicks[FrontRight] += Values[(Offset+1)&HRIR_MASK][1] + \ - Coeffs[0][1] * right; \ - } \ - OutPos -= BufferSize; \ - } \ -} - -DECL_TEMPLATE(point32, C) -DECL_TEMPLATE(lerp32, C) -DECL_TEMPLATE(cubic32, C) - #ifdef HAVE_XMMINTRIN_H - -static __inline void ApplyCoeffsSSE(ALuint Offset, ALfloat (*RESTRICT Values)[2], - ALfloat (*RESTRICT Coeffs)[2], - ALfloat left, ALfloat right) -{ - const __m128 lrlr = { left, right, left, right }; - ALuint c; - for(c = 0;c < HRIR_LENGTH;c += 2) + if((CPUCapFlags&CPU_CAP_SSE)) { - const ALuint o0 = (Offset++)&HRIR_MASK; - const ALuint o1 = (Offset++)&HRIR_MASK; - __m128 vals = { 0.0f, 0.0f, 0.0f, 0.0f }; - __m128 coeffs = { 0.0f, 0.0f, 0.0f, 0.0f }; - - vals = _mm_loadl_pi(vals, (__m64*)&Values[o0][0]); - vals = _mm_loadh_pi(vals, (__m64*)&Values[o1][0]); - coeffs = _mm_loadl_pi(coeffs, (__m64*)&Coeffs[c ][0]); - coeffs = _mm_loadh_pi(coeffs, (__m64*)&Coeffs[c+1][0]); - - vals = _mm_add_ps(vals, _mm_mul_ps(coeffs, lrlr)); - - _mm_storel_pi((__m64*)&Values[o0][0], vals); - _mm_storeh_pi((__m64*)&Values[o1][0], vals); + switch(Resampler) + { + case PointResampler: + return MixDirect_point32_SSE; + case LinearResampler: + return MixDirect_lerp32_SSE; + case CubicResampler: + return MixDirect_cubic32_SSE; + case ResamplerMax: + break; + } } -} - -DECL_TEMPLATE(point32, SSE) -DECL_TEMPLATE(lerp32, SSE) -DECL_TEMPLATE(cubic32, SSE) - #endif - #ifdef HAVE_ARM_NEON_H - -static __inline void ApplyCoeffsNeon(ALuint Offset, ALfloat (*RESTRICT Values)[2], - ALfloat (*RESTRICT Coeffs)[2], - ALfloat left, ALfloat right) -{ - ALuint c; - float32x4_t leftright4; - { - float32x2_t leftright2 = vdup_n_f32(0.0); - leftright2 = vset_lane_f32(left, leftright2, 0); - leftright2 = vset_lane_f32(right, leftright2, 1); - leftright4 = vcombine_f32(leftright2, leftright2); - } - for(c = 0;c < HRIR_LENGTH;c += 2) + if((CPUCapFlags&CPU_CAP_NEON)) { - const ALuint o0 = (Offset+c)&HRIR_MASK; - const ALuint o1 = (o0+1)&HRIR_MASK; - float32x4_t vals = vcombine_f32(vld1_f32((float32_t*)&Values[o0][0]), - vld1_f32((float32_t*)&Values[o1][0])); - float32x4_t coefs = vld1q_f32((float32_t*)&Coeffs[c][0]); - - vals = vmlaq_f32(vals, coefs, leftright4); - - vst1_f32((float32_t*)&Values[o0][0], vget_low_f32(vals)); - vst1_f32((float32_t*)&Values[o1][0], vget_high_f32(vals)); + switch(Resampler) + { + case PointResampler: + return MixDirect_point32_Neon; + case LinearResampler: + return MixDirect_lerp32_Neon; + case CubicResampler: + return MixDirect_cubic32_Neon; + case ResamplerMax: + break; + } } -} - -DECL_TEMPLATE(point32, Neon) -DECL_TEMPLATE(lerp32, Neon) -DECL_TEMPLATE(cubic32, Neon) - #endif -#undef DECL_TEMPLATE - - -#define DECL_TEMPLATE(sampler) \ -static void MixDirect_##sampler(ALsource *Source, ALCdevice *Device, \ - DirectParams *params, const ALfloat *RESTRICT data, ALuint srcfrac, \ - ALuint OutPos, ALuint SamplesToDo, ALuint BufferSize) \ -{ \ - const ALuint NumChannels = Source->NumChannels; \ - ALfloat (*RESTRICT DryBuffer)[MaxChannels]; \ - ALfloat *RESTRICT ClickRemoval, *RESTRICT PendingClicks; \ - ALfloat DrySend[MaxChannels]; \ - FILTER *DryFilter; \ - ALuint pos, frac; \ - ALuint BufferIdx; \ - ALuint increment; \ - ALfloat value; \ - ALuint i, c; \ - \ - increment = Source->Params.Step; \ - \ - DryBuffer = Device->DryBuffer; \ - ClickRemoval = Device->ClickRemoval; \ - PendingClicks = Device->PendingClicks; \ - DryFilter = ¶ms->iirFilter; \ - \ - for(i = 0;i < NumChannels;i++) \ - { \ - for(c = 0;c < MaxChannels;c++) \ - DrySend[c] = params->Gains[i][c]; \ - \ - pos = 0; \ - frac = srcfrac; \ - \ - if(OutPos == 0) \ - { \ - value = sampler(data + pos*NumChannels + i, NumChannels, frac); \ - \ - value = lpFilter2PC(DryFilter, i, value); \ - for(c = 0;c < MaxChannels;c++) \ - ClickRemoval[c] -= value*DrySend[c]; \ - } \ - for(BufferIdx = 0;BufferIdx < BufferSize;BufferIdx++) \ - { \ - value = sampler(data + pos*NumChannels + i, NumChannels, frac); \ - \ - value = lpFilter2P(DryFilter, i, value); \ - for(c = 0;c < MaxChannels;c++) \ - DryBuffer[OutPos][c] += value*DrySend[c]; \ - \ - frac += increment; \ - pos += frac>>FRACTIONBITS; \ - frac &= FRACTIONMASK; \ - OutPos++; \ - } \ - if(OutPos == SamplesToDo) \ - { \ - value = sampler(data + pos*NumChannels + i, NumChannels, frac); \ - \ - value = lpFilter2PC(DryFilter, i, value); \ - for(c = 0;c < MaxChannels;c++) \ - PendingClicks[c] += value*DrySend[c]; \ - } \ - OutPos -= BufferSize; \ - } \ -} - -DECL_TEMPLATE(point32) -DECL_TEMPLATE(lerp32) -DECL_TEMPLATE(cubic32) - -#undef DECL_TEMPLATE - -#define DECL_TEMPLATE(sampler) \ -static void MixSend_##sampler(ALsource *Source, ALuint sendidx, \ - SendParams *params, const ALfloat *RESTRICT data, ALuint srcfrac, \ - ALuint OutPos, ALuint SamplesToDo, ALuint BufferSize) \ -{ \ - const ALuint NumChannels = Source->NumChannels; \ - ALeffectslot *Slot; \ - ALfloat WetSend; \ - ALfloat *WetBuffer; \ - ALfloat *WetClickRemoval; \ - ALfloat *WetPendingClicks; \ - FILTER *WetFilter; \ - ALuint pos, frac; \ - ALuint BufferIdx; \ - ALuint increment; \ - ALfloat value; \ - ALuint i; \ - \ - increment = Source->Params.Step; \ - \ - Slot = Source->Params.Slot[sendidx]; \ - WetBuffer = Slot->WetBuffer; \ - WetClickRemoval = Slot->ClickRemoval; \ - WetPendingClicks = Slot->PendingClicks; \ - WetFilter = ¶ms->iirFilter; \ - WetSend = params->Gain; \ - \ - for(i = 0;i < NumChannels;i++) \ - { \ - pos = 0; \ - frac = srcfrac; \ - \ - if(OutPos == 0) \ - { \ - value = sampler(data + pos*NumChannels + i, NumChannels, frac); \ - \ - value = lpFilter2PC(WetFilter, i, value); \ - WetClickRemoval[0] -= value * WetSend; \ - } \ - for(BufferIdx = 0;BufferIdx < BufferSize;BufferIdx++) \ - { \ - value = sampler(data + pos*NumChannels + i, NumChannels, frac); \ - \ - value = lpFilter2P(WetFilter, i, value); \ - WetBuffer[OutPos] += value * WetSend; \ - \ - frac += increment; \ - pos += frac>>FRACTIONBITS; \ - frac &= FRACTIONMASK; \ - OutPos++; \ - } \ - if(OutPos == SamplesToDo) \ - { \ - value = sampler(data + pos*NumChannels + i, NumChannels, frac); \ - \ - value = lpFilter2PC(WetFilter, i, value); \ - WetPendingClicks[0] += value * WetSend; \ - } \ - OutPos -= BufferSize; \ - } \ -} - -DECL_TEMPLATE(point32) -DECL_TEMPLATE(lerp32) -DECL_TEMPLATE(cubic32) - -#undef DECL_TEMPLATE - - -DryMixerFunc SelectDirectMixer(enum Resampler Resampler) -{ switch(Resampler) { case PointResampler: - return MixDirect_point32; + return MixDirect_point32_C; case LinearResampler: - return MixDirect_lerp32; + return MixDirect_lerp32_C; case CubicResampler: - return MixDirect_cubic32; + return MixDirect_cubic32_C; case ResamplerMax: break; } @@ -452,37 +90,46 @@ DryMixerFunc SelectDirectMixer(enum Resampler Resampler) DryMixerFunc SelectHrtfMixer(enum Resampler Resampler) { - switch(Resampler) - { - case PointResampler: #ifdef HAVE_XMMINTRIN_H - if((CPUCapFlags&CPU_CAP_SSE)) + if((CPUCapFlags&CPU_CAP_SSE)) + { + switch(Resampler) + { + case PointResampler: return MixDirect_Hrtf_point32_SSE; + case LinearResampler: + return MixDirect_Hrtf_lerp32_SSE; + case CubicResampler: + return MixDirect_Hrtf_cubic32_SSE; + case ResamplerMax: + break; + } + } #endif #ifdef HAVE_ARM_NEON_H - if((CPUCapFlags&CPU_CAP_NEON)) + if((CPUCapFlags&CPU_CAP_NEON)) + { + switch(Resampler) + { + case PointResampler: return MixDirect_Hrtf_point32_Neon; + case LinearResampler: + return MixDirect_Hrtf_lerp32_Neon; + case CubicResampler: + return MixDirect_Hrtf_cubic32_Neon; + case ResamplerMax: + break; + } + } #endif + + switch(Resampler) + { + case PointResampler: return MixDirect_Hrtf_point32_C; case LinearResampler: -#ifdef HAVE_XMMINTRIN_H - if((CPUCapFlags&CPU_CAP_SSE)) - return MixDirect_Hrtf_lerp32_SSE; -#endif -#ifdef HAVE_ARM_NEON_H - if((CPUCapFlags&CPU_CAP_NEON)) - return MixDirect_Hrtf_lerp32_Neon; -#endif return MixDirect_Hrtf_lerp32_C; case CubicResampler: -#ifdef HAVE_XMMINTRIN_H - if((CPUCapFlags&CPU_CAP_SSE)) - return MixDirect_Hrtf_cubic32_SSE; -#endif -#ifdef HAVE_ARM_NEON_H - if((CPUCapFlags&CPU_CAP_NEON)) - return MixDirect_Hrtf_cubic32_Neon; -#endif return MixDirect_Hrtf_cubic32_C; case ResamplerMax: break; @@ -492,14 +139,47 @@ DryMixerFunc SelectHrtfMixer(enum Resampler Resampler) WetMixerFunc SelectSendMixer(enum Resampler Resampler) { +#ifdef HAVE_XMMINTRIN_H + if((CPUCapFlags&CPU_CAP_SSE)) + { + switch(Resampler) + { + case PointResampler: + return MixSend_point32_SSE; + case LinearResampler: + return MixSend_lerp32_SSE; + case CubicResampler: + return MixSend_cubic32_SSE; + case ResamplerMax: + break; + } + } +#endif +#ifdef HAVE_ARM_NEON_H + if((CPUCapFlags&CPU_CAP_NEON)) + { + switch(Resampler) + { + case PointResampler: + return MixSend_point32_Neon; + case LinearResampler: + return MixSend_lerp32_Neon; + case CubicResampler: + return MixSend_cubic32_Neon; + case ResamplerMax: + break; + } + } +#endif + switch(Resampler) { case PointResampler: - return MixSend_point32; + return MixSend_point32_C; case LinearResampler: - return MixSend_lerp32; + return MixSend_lerp32_C; case CubicResampler: - return MixSend_cubic32; + return MixSend_cubic32_C; case ResamplerMax: break; } diff --git a/Alc/mixer_c.c b/Alc/mixer_c.c new file mode 100644 index 00000000..f59b3190 --- /dev/null +++ b/Alc/mixer_c.c @@ -0,0 +1,32 @@ +#include "config.h" + +#include "AL/al.h" +#include "AL/alc.h" +#include "alMain.h" +#include "alu.h" + + +static __inline void ApplyCoeffs(ALuint Offset, ALfloat (*RESTRICT Values)[2], + ALfloat (*RESTRICT Coeffs)[2], + ALfloat left, ALfloat right) +{ + ALuint c; + for(c = 0;c < HRIR_LENGTH;c++) + { + const ALuint off = (Offset+c)&HRIR_MASK; + Values[off][0] += Coeffs[c][0] * left; + Values[off][1] += Coeffs[c][1] * right; + } +} + +#define SUFFIX C +#define SAMPLER point32 +#include "mixer_inc.c" +#undef SAMPLER +#define SAMPLER lerp32 +#include "mixer_inc.c" +#undef SAMPLER +#define SAMPLER cubic32 +#include "mixer_inc.c" +#undef SAMPLER +#undef SUFFIX diff --git a/Alc/mixer_defs.h b/Alc/mixer_defs.h new file mode 100644 index 00000000..75ac9bda --- /dev/null +++ b/Alc/mixer_defs.h @@ -0,0 +1,60 @@ +#ifndef MIXER_DEFS_H +#define MIXER_DEFS_H + +#include "AL/alc.h" +#include "AL/al.h" +#include "alMain.h" +#include "alu.h" + +static __inline ALfloat point32(const ALfloat *vals, ALint step, ALint frac) +{ return vals[0]; (void)step; (void)frac; } +static __inline ALfloat lerp32(const ALfloat *vals, ALint step, ALint frac) +{ return lerp(vals[0], vals[step], frac * (1.0f/FRACTIONONE)); } +static __inline ALfloat cubic32(const ALfloat *vals, ALint step, ALint frac) +{ return cubic(vals[-step], vals[0], vals[step], vals[step+step], + frac * (1.0f/FRACTIONONE)); } + +struct ALsource; +struct DirectParams; +struct SendParams; + +/* C mixers */ +void MixDirect_Hrtf_point32_C(struct ALsource*,ALCdevice*,struct DirectParams*,const ALfloat*RESTRICT,ALuint,ALuint,ALuint,ALuint); +void MixDirect_Hrtf_lerp32_C(struct ALsource*,ALCdevice*,struct DirectParams*,const ALfloat*RESTRICT,ALuint,ALuint,ALuint,ALuint); +void MixDirect_Hrtf_cubic32_C(struct ALsource*,ALCdevice*,struct DirectParams*,const ALfloat*RESTRICT,ALuint,ALuint,ALuint,ALuint); + +void MixDirect_point32_C(struct ALsource*,ALCdevice*,struct DirectParams*,const ALfloat*RESTRICT,ALuint,ALuint,ALuint,ALuint); +void MixDirect_lerp32_C(struct ALsource*,ALCdevice*,struct DirectParams*,const ALfloat*RESTRICT,ALuint,ALuint,ALuint,ALuint); +void MixDirect_cubic32_C(struct ALsource*,ALCdevice*,struct DirectParams*,const ALfloat*RESTRICT,ALuint,ALuint,ALuint,ALuint); + +void MixSend_point32_C(struct ALsource*,ALuint,struct SendParams*,const ALfloat*RESTRICT,ALuint,ALuint,ALuint,ALuint); +void MixSend_lerp32_C(struct ALsource*,ALuint,struct SendParams*,const ALfloat*RESTRICT,ALuint,ALuint,ALuint,ALuint); +void MixSend_cubic32_C(struct ALsource*,ALuint,struct SendParams*,const ALfloat*RESTRICT,ALuint,ALuint,ALuint,ALuint); + +/* SSE mixers */ +void MixDirect_Hrtf_point32_SSE(struct ALsource*,ALCdevice*,struct DirectParams*,const ALfloat*RESTRICT,ALuint,ALuint,ALuint,ALuint); +void MixDirect_Hrtf_lerp32_SSE(struct ALsource*,ALCdevice*,struct DirectParams*,const ALfloat*RESTRICT,ALuint,ALuint,ALuint,ALuint); +void MixDirect_Hrtf_cubic32_SSE(struct ALsource*,ALCdevice*,struct DirectParams*,const ALfloat*RESTRICT,ALuint,ALuint,ALuint,ALuint); + +void MixDirect_point32_SSE(struct ALsource*,ALCdevice*,struct DirectParams*,const ALfloat*RESTRICT,ALuint,ALuint,ALuint,ALuint); +void MixDirect_lerp32_SSE(struct ALsource*,ALCdevice*,struct DirectParams*,const ALfloat*RESTRICT,ALuint,ALuint,ALuint,ALuint); +void MixDirect_cubic32_SSE(struct ALsource*,ALCdevice*,struct DirectParams*,const ALfloat*RESTRICT,ALuint,ALuint,ALuint,ALuint); + +void MixSend_point32_SSE(struct ALsource*,ALuint,struct SendParams*,const ALfloat*RESTRICT,ALuint,ALuint,ALuint,ALuint); +void MixSend_lerp32_SSE(struct ALsource*,ALuint,struct SendParams*,const ALfloat*RESTRICT,ALuint,ALuint,ALuint,ALuint); +void MixSend_cubic32_SSE(struct ALsource*,ALuint,struct SendParams*,const ALfloat*RESTRICT,ALuint,ALuint,ALuint,ALuint); + +/* Neon mixers */ +void MixDirect_Hrtf_point32_Neon(struct ALsource*,ALCdevice*,struct DirectParams*,const ALfloat*RESTRICT,ALuint,ALuint,ALuint,ALuint); +void MixDirect_Hrtf_lerp32_Neon(struct ALsource*,ALCdevice*,struct DirectParams*,const ALfloat*RESTRICT,ALuint,ALuint,ALuint,ALuint); +void MixDirect_Hrtf_cubic32_Neon(struct ALsource*,ALCdevice*,struct DirectParams*,const ALfloat*RESTRICT,ALuint,ALuint,ALuint,ALuint); + +void MixDirect_point32_Neon(struct ALsource*,ALCdevice*,struct DirectParams*,const ALfloat*RESTRICT,ALuint,ALuint,ALuint,ALuint); +void MixDirect_lerp32_Neon(struct ALsource*,ALCdevice*,struct DirectParams*,const ALfloat*RESTRICT,ALuint,ALuint,ALuint,ALuint); +void MixDirect_cubic32_Neon(struct ALsource*,ALCdevice*,struct DirectParams*,const ALfloat*RESTRICT,ALuint,ALuint,ALuint,ALuint); + +void MixSend_point32_Neon(struct ALsource*,ALuint,struct SendParams*,const ALfloat*RESTRICT,ALuint,ALuint,ALuint,ALuint); +void MixSend_lerp32_Neon(struct ALsource*,ALuint,struct SendParams*,const ALfloat*RESTRICT,ALuint,ALuint,ALuint,ALuint); +void MixSend_cubic32_Neon(struct ALsource*,ALuint,struct SendParams*,const ALfloat*RESTRICT,ALuint,ALuint,ALuint,ALuint); + +#endif /* MIXER_DEFS_H */ diff --git a/Alc/mixer_inc.c b/Alc/mixer_inc.c new file mode 100644 index 00000000..21166b60 --- /dev/null +++ b/Alc/mixer_inc.c @@ -0,0 +1,305 @@ +#include "config.h" + +#include "AL/alc.h" +#include "AL/al.h" +#include "alMain.h" +#include "alSource.h" +#include "alAuxEffectSlot.h" +#include "mixer_defs.h" + +#ifdef __GNUC__ +#define LIKELY(x) __builtin_expect(!!(x), 1) +#define UNLIKELY(x) __builtin_expect(!!(x), 0) +#else +#define LIKELY(x) (x) +#define UNLIKELY(x) (x) +#endif + +#define REAL_MERGE2(a,b) a##b +#define MERGE2(a,b) REAL_MERGE2(a,b) +#define REAL_MERGE4(a,b,c,d) a##b##c##d +#define MERGE4(a,b,c,d) REAL_MERGE4(a,b,c,d) + + +void MERGE4(MixDirect_Hrtf_,SAMPLER,_,SUFFIX)( + ALsource *Source, ALCdevice *Device, DirectParams *params, + const ALfloat *RESTRICT data, ALuint srcfrac, + ALuint OutPos, ALuint SamplesToDo, ALuint BufferSize) +{ + const ALuint NumChannels = Source->NumChannels; + const ALint *RESTRICT DelayStep = params->Hrtf.DelayStep; + ALfloat (*RESTRICT DryBuffer)[MaxChannels]; + ALfloat *RESTRICT ClickRemoval, *RESTRICT PendingClicks; + ALfloat (*RESTRICT CoeffStep)[2] = params->Hrtf.CoeffStep; + ALuint pos, frac; + FILTER *DryFilter; + ALuint BufferIdx; + ALuint increment; + ALfloat value; + ALuint i, c; + + increment = Source->Params.Step; + + DryBuffer = Device->DryBuffer; + ClickRemoval = Device->ClickRemoval; + PendingClicks = Device->PendingClicks; + DryFilter = ¶ms->iirFilter; + + for(i = 0;i < NumChannels;i++) + { + ALfloat (*RESTRICT TargetCoeffs)[2] = params->Hrtf.Coeffs[i]; + ALuint *RESTRICT TargetDelay = params->Hrtf.Delay[i]; + ALfloat *RESTRICT History = Source->Hrtf.History[i]; + ALfloat (*RESTRICT Values)[2] = Source->Hrtf.Values[i]; + ALint Counter = maxu(Source->Hrtf.Counter, OutPos) - OutPos; + ALuint Offset = Source->Hrtf.Offset + OutPos; + ALfloat Coeffs[HRIR_LENGTH][2]; + ALuint Delay[2]; + ALfloat left, right; + + pos = 0; + frac = srcfrac; + + for(c = 0;c < HRIR_LENGTH;c++) + { + Coeffs[c][0] = TargetCoeffs[c][0] - (CoeffStep[c][0]*Counter); + Coeffs[c][1] = TargetCoeffs[c][1] - (CoeffStep[c][1]*Counter); + } + + Delay[0] = TargetDelay[0] - (DelayStep[0]*Counter); + Delay[1] = TargetDelay[1] - (DelayStep[1]*Counter); + + if(LIKELY(OutPos == 0)) + { + value = SAMPLER(data + pos*NumChannels + i, NumChannels, frac); + value = lpFilter2PC(DryFilter, i, value); + + History[Offset&SRC_HISTORY_MASK] = value; + left = lerp(History[(Offset-(Delay[0]>>HRTFDELAY_BITS))&SRC_HISTORY_MASK], + History[(Offset-(Delay[0]>>HRTFDELAY_BITS)-1)&SRC_HISTORY_MASK], + (Delay[0]&HRTFDELAY_MASK)*(1.0f/HRTFDELAY_FRACONE)); + right = lerp(History[(Offset-(Delay[1]>>HRTFDELAY_BITS))&SRC_HISTORY_MASK], + History[(Offset-(Delay[1]>>HRTFDELAY_BITS)-1)&SRC_HISTORY_MASK], + (Delay[1]&HRTFDELAY_MASK)*(1.0f/HRTFDELAY_FRACONE)); + + ClickRemoval[FrontLeft] -= Values[(Offset+1)&HRIR_MASK][0] + + Coeffs[0][0] * left; + ClickRemoval[FrontRight] -= Values[(Offset+1)&HRIR_MASK][1] + + Coeffs[0][1] * right; + } + for(BufferIdx = 0;BufferIdx < BufferSize && Counter > 0;BufferIdx++) + { + value = SAMPLER(data + pos*NumChannels + i, NumChannels, frac); + value = lpFilter2P(DryFilter, i, value); + + History[Offset&SRC_HISTORY_MASK] = value; + left = lerp(History[(Offset-(Delay[0]>>HRTFDELAY_BITS))&SRC_HISTORY_MASK], + History[(Offset-(Delay[0]>>HRTFDELAY_BITS)-1)&SRC_HISTORY_MASK], + (Delay[0]&HRTFDELAY_MASK)*(1.0f/HRTFDELAY_FRACONE)); + right = lerp(History[(Offset-(Delay[1]>>HRTFDELAY_BITS))&SRC_HISTORY_MASK], + History[(Offset-(Delay[1]>>HRTFDELAY_BITS)-1)&SRC_HISTORY_MASK], + (Delay[1]&HRTFDELAY_MASK)*(1.0f/HRTFDELAY_FRACONE)); + + Delay[0] += DelayStep[0]; + Delay[1] += DelayStep[1]; + + Values[Offset&HRIR_MASK][0] = 0.0f; + Values[Offset&HRIR_MASK][1] = 0.0f; + Offset++; + + for(c = 0;c < HRIR_LENGTH;c++) + { + const ALuint off = (Offset+c)&HRIR_MASK; + Values[off][0] += Coeffs[c][0] * left; + Values[off][1] += Coeffs[c][1] * right; + Coeffs[c][0] += CoeffStep[c][0]; + Coeffs[c][1] += CoeffStep[c][1]; + } + + DryBuffer[OutPos][FrontLeft] += Values[Offset&HRIR_MASK][0]; + DryBuffer[OutPos][FrontRight] += Values[Offset&HRIR_MASK][1]; + + frac += increment; + pos += frac>>FRACTIONBITS; + frac &= FRACTIONMASK; + OutPos++; + Counter--; + } + + Delay[0] >>= HRTFDELAY_BITS; + Delay[1] >>= HRTFDELAY_BITS; + for(;BufferIdx < BufferSize;BufferIdx++) + { + value = SAMPLER(data + pos*NumChannels + i, NumChannels, frac); + value = lpFilter2P(DryFilter, i, value); + + History[Offset&SRC_HISTORY_MASK] = value; + left = History[(Offset-Delay[0])&SRC_HISTORY_MASK]; + right = History[(Offset-Delay[1])&SRC_HISTORY_MASK]; + + Values[Offset&HRIR_MASK][0] = 0.0f; + Values[Offset&HRIR_MASK][1] = 0.0f; + Offset++; + + ApplyCoeffs(Offset, Values, Coeffs, left, right); + DryBuffer[OutPos][FrontLeft] += Values[Offset&HRIR_MASK][0]; + DryBuffer[OutPos][FrontRight] += Values[Offset&HRIR_MASK][1]; + + frac += increment; + pos += frac>>FRACTIONBITS; + frac &= FRACTIONMASK; + OutPos++; + } + if(LIKELY(OutPos == SamplesToDo)) + { + value = SAMPLER(data + pos*NumChannels + i, NumChannels, frac); + value = lpFilter2PC(DryFilter, i, value); + + History[Offset&SRC_HISTORY_MASK] = value; + left = History[(Offset-Delay[0])&SRC_HISTORY_MASK]; + right = History[(Offset-Delay[1])&SRC_HISTORY_MASK]; + + PendingClicks[FrontLeft] += Values[(Offset+1)&HRIR_MASK][0] + + Coeffs[0][0] * left; + PendingClicks[FrontRight] += Values[(Offset+1)&HRIR_MASK][1] + + Coeffs[0][1] * right; + } + OutPos -= BufferSize; + } +} + + +void MERGE4(MixDirect_,SAMPLER,_,SUFFIX)( + ALsource *Source, ALCdevice *Device, DirectParams *params, + const ALfloat *RESTRICT data, ALuint srcfrac, + ALuint OutPos, ALuint SamplesToDo, ALuint BufferSize) +{ + const ALuint NumChannels = Source->NumChannels; + ALfloat (*RESTRICT DryBuffer)[MaxChannels]; + ALfloat *RESTRICT ClickRemoval, *RESTRICT PendingClicks; + ALfloat DrySend[MaxChannels]; + FILTER *DryFilter; + ALuint pos, frac; + ALuint BufferIdx; + ALuint increment; + ALfloat value; + ALuint i, c; + + increment = Source->Params.Step; + + DryBuffer = Device->DryBuffer; + ClickRemoval = Device->ClickRemoval; + PendingClicks = Device->PendingClicks; + DryFilter = ¶ms->iirFilter; + + for(i = 0;i < NumChannels;i++) + { + for(c = 0;c < MaxChannels;c++) + DrySend[c] = params->Gains[i][c]; + + pos = 0; + frac = srcfrac; + + if(OutPos == 0) + { + value = SAMPLER(data + pos*NumChannels + i, NumChannels, frac); + + value = lpFilter2PC(DryFilter, i, value); + for(c = 0;c < MaxChannels;c++) + ClickRemoval[c] -= value*DrySend[c]; + } + for(BufferIdx = 0;BufferIdx < BufferSize;BufferIdx++) + { + value = SAMPLER(data + pos*NumChannels + i, NumChannels, frac); + + value = lpFilter2P(DryFilter, i, value); + for(c = 0;c < MaxChannels;c++) + DryBuffer[OutPos][c] += value*DrySend[c]; + + frac += increment; + pos += frac>>FRACTIONBITS; + frac &= FRACTIONMASK; + OutPos++; + } + if(OutPos == SamplesToDo) + { + value = SAMPLER(data + pos*NumChannels + i, NumChannels, frac); + + value = lpFilter2PC(DryFilter, i, value); + for(c = 0;c < MaxChannels;c++) + PendingClicks[c] += value*DrySend[c]; + } + OutPos -= BufferSize; + } +} + + +void MERGE4(MixSend_,SAMPLER,_,SUFFIX)( + ALsource *Source, ALuint sendidx, SendParams *params, + const ALfloat *RESTRICT data, ALuint srcfrac, + ALuint OutPos, ALuint SamplesToDo, ALuint BufferSize) +{ + const ALuint NumChannels = Source->NumChannels; + ALeffectslot *Slot; + ALfloat WetSend; + ALfloat *WetBuffer; + ALfloat *WetClickRemoval; + ALfloat *WetPendingClicks; + FILTER *WetFilter; + ALuint pos, frac; + ALuint BufferIdx; + ALuint increment; + ALfloat value; + ALuint i; + + increment = Source->Params.Step; + + Slot = Source->Params.Slot[sendidx]; + WetBuffer = Slot->WetBuffer; + WetClickRemoval = Slot->ClickRemoval; + WetPendingClicks = Slot->PendingClicks; + WetFilter = ¶ms->iirFilter; + WetSend = params->Gain; + + for(i = 0;i < NumChannels;i++) + { + pos = 0; + frac = srcfrac; + + if(OutPos == 0) + { + value = SAMPLER(data + pos*NumChannels + i, NumChannels, frac); + + value = lpFilter2PC(WetFilter, i, value); + WetClickRemoval[0] -= value * WetSend; + } + for(BufferIdx = 0;BufferIdx < BufferSize;BufferIdx++) + { + value = SAMPLER(data + pos*NumChannels + i, NumChannels, frac); + + value = lpFilter2P(WetFilter, i, value); + WetBuffer[OutPos] += value * WetSend; + + frac += increment; + pos += frac>>FRACTIONBITS; + frac &= FRACTIONMASK; + OutPos++; + } + if(OutPos == SamplesToDo) + { + value = SAMPLER(data + pos*NumChannels + i, NumChannels, frac); + + value = lpFilter2PC(WetFilter, i, value); + WetPendingClicks[0] += value * WetSend; + } + OutPos -= BufferSize; + } +} + +#undef MERGE4 +#undef REAL_MERGE4 +#undef MERGE2 +#undef REAL_MERGE2 + +#undef UNLIKELY +#undef LIKELY diff --git a/Alc/mixer_neon.c b/Alc/mixer_neon.c new file mode 100644 index 00000000..10385e69 --- /dev/null +++ b/Alc/mixer_neon.c @@ -0,0 +1,50 @@ +#include "config.h" + +#ifdef HAVE_ARM_NEON_H +#include <arm_neon.h> +#endif + +#include "AL/al.h" +#include "AL/alc.h" +#include "alMain.h" +#include "alu.h" + + +static __inline void ApplyCoeffs(ALuint Offset, ALfloat (*RESTRICT Values)[2], + ALfloat (*RESTRICT Coeffs)[2], + ALfloat left, ALfloat right) +{ + ALuint c; + float32x4_t leftright4; + { + float32x2_t leftright2 = vdup_n_f32(0.0); + leftright2 = vset_lane_f32(left, leftright2, 0); + leftright2 = vset_lane_f32(right, leftright2, 1); + leftright4 = vcombine_f32(leftright2, leftright2); + } + for(c = 0;c < HRIR_LENGTH;c += 2) + { + const ALuint o0 = (Offset+c)&HRIR_MASK; + const ALuint o1 = (o0+1)&HRIR_MASK; + float32x4_t vals = vcombine_f32(vld1_f32((float32_t*)&Values[o0][0]), + vld1_f32((float32_t*)&Values[o1][0])); + float32x4_t coefs = vld1q_f32((float32_t*)&Coeffs[c][0]); + + vals = vmlaq_f32(vals, coefs, leftright4); + + vst1_f32((float32_t*)&Values[o0][0], vget_low_f32(vals)); + vst1_f32((float32_t*)&Values[o1][0], vget_high_f32(vals)); + } +} + +#define SUFFIX Neon +#define SAMPLER point32 +#include "mixer_inc.c" +#undef SAMPLER +#define SAMPLER lerp32 +#include "mixer_inc.c" +#undef SAMPLER +#define SAMPLER cubic32 +#include "mixer_inc.c" +#undef SAMPLER +#undef SUFFIX diff --git a/Alc/mixer_sse.c b/Alc/mixer_sse.c new file mode 100644 index 00000000..e189971a --- /dev/null +++ b/Alc/mixer_sse.c @@ -0,0 +1,48 @@ +#include "config.h" + +#ifdef HAVE_XMMINTRIN_H +#include <xmmintrin.h> +#endif + +#include "AL/al.h" +#include "AL/alc.h" +#include "alMain.h" +#include "alu.h" + + +static __inline void ApplyCoeffs(ALuint Offset, ALfloat (*RESTRICT Values)[2], + ALfloat (*RESTRICT Coeffs)[2], + ALfloat left, ALfloat right) +{ + const __m128 lrlr = { left, right, left, right }; + ALuint c; + for(c = 0;c < HRIR_LENGTH;c += 2) + { + const ALuint o0 = (Offset++)&HRIR_MASK; + const ALuint o1 = (Offset++)&HRIR_MASK; + __m128 vals = { 0.0f, 0.0f, 0.0f, 0.0f }; + __m128 coeffs = { 0.0f, 0.0f, 0.0f, 0.0f }; + + vals = _mm_loadl_pi(vals, (__m64*)&Values[o0][0]); + vals = _mm_loadh_pi(vals, (__m64*)&Values[o1][0]); + coeffs = _mm_loadl_pi(coeffs, (__m64*)&Coeffs[c ][0]); + coeffs = _mm_loadh_pi(coeffs, (__m64*)&Coeffs[c+1][0]); + + vals = _mm_add_ps(vals, _mm_mul_ps(coeffs, lrlr)); + + _mm_storel_pi((__m64*)&Values[o0][0], vals); + _mm_storeh_pi((__m64*)&Values[o1][0], vals); + } +} + +#define SUFFIX SSE +#define SAMPLER point32 +#include "mixer_inc.c" +#undef SAMPLER +#define SAMPLER lerp32 +#include "mixer_inc.c" +#undef SAMPLER +#define SAMPLER cubic32 +#include "mixer_inc.c" +#undef SAMPLER +#undef SUFFIX diff --git a/CMakeLists.txt b/CMakeLists.txt index f4a0d1f7..619bdf68 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -217,6 +217,7 @@ ENDIF() CHECK_C_SOURCE_COMPILES("int foo(const char *str, ...) __attribute__((format(printf, 1, 2))); int main() {return 0;}" HAVE_GCC_FORMAT) +CHECK_INCLUDE_FILE(cpuid.h HAVE_CPUID_H) CHECK_INCLUDE_FILE(fenv.h HAVE_FENV_H) CHECK_INCLUDE_FILE(fpu_control.h HAVE_FPU_CONTROL_H) CHECK_INCLUDE_FILE(float.h HAVE_FLOAT_H) @@ -225,9 +226,6 @@ CHECK_INCLUDE_FILE(guiddef.h HAVE_GUIDDEF_H) IF(NOT HAVE_GUIDDEF_H) CHECK_INCLUDE_FILE(initguid.h HAVE_INITGUID_H) ENDIF() -CHECK_INCLUDE_FILE(xmmintrin.h HAVE_XMMINTRIN_H) -CHECK_INCLUDE_FILE(arm_neon.h HAVE_ARM_NEON_H) -CHECK_INCLUDE_FILE(cpuid.h HAVE_CPUID_H) # Some systems need libm for some of the following math functions to work CHECK_LIBRARY_EXISTS(m pow "" HAVE_LIBM) @@ -423,9 +421,31 @@ SET(ALC_OBJS Alc/ALc.c Alc/alcThread.c Alc/bs2b.c Alc/helpers.c + Alc/panning.c Alc/hrtf.c Alc/mixer.c - Alc/panning.c + Alc/mixer_c.c +) + + +SET(CPU_EXTS "Default") + +# Check for SSE support +CHECK_INCLUDE_FILE(xmmintrin.h HAVE_XMMINTRIN_H) +IF(HAVE_XMMINTRIN_H) + SET(ALC_OBJS ${ALC_OBJS} Alc/mixer_sse.c) + SET(CPU_EXTS "${CPU_EXTS}, SSE") +ENDIF() + +# Check for ARM Neon support +CHECK_INCLUDE_FILE(arm_neon.h HAVE_ARM_NEON_H) +IF(HAVE_ARM_NEON_H) + SET(ALC_OBJS ${ALC_OBJS} Alc/mixer_neon.c) + SET(CPU_EXTS "${CPU_EXTS}, Neon") +ENDIF() + + +SET(ALC_OBJS ${ALC_OBJS} # Default backends, always available Alc/backends/loopback.c Alc/backends/null.c @@ -723,6 +743,9 @@ MESSAGE(STATUS "") MESSAGE(STATUS "Building OpenAL with support for the following backends:") MESSAGE(STATUS " ${BACKENDS}") MESSAGE(STATUS "") +MESSAGE(STATUS "Building with support for CPU extensions:") +MESSAGE(STATUS " ${CPU_EXTS}") +MESSAGE(STATUS "") IF(WIN32) IF(NOT HAVE_DSOUND) |