diff options
author | Chris Robinson <[email protected]> | 2012-08-13 09:18:46 -0700 |
---|---|---|
committer | Chris Robinson <[email protected]> | 2012-08-13 09:18:46 -0700 |
commit | 0a3eba08d8af653a4e7ec803b86c458560e14484 (patch) | |
tree | 291106afc04d77b74baea3cdf87d68ebe9a7fd31 | |
parent | add53e28c2ad6909fe8ed3efdafc419c90023477 (diff) |
Allow building multiple types of mixers, selecting based on CPU caps
-rw-r--r-- | Alc/mixer.c | 111 |
1 files changed, 64 insertions, 47 deletions
diff --git a/Alc/mixer.c b/Alc/mixer.c index 79ee31d1..e7fada8f 100644 --- a/Alc/mixer.c +++ b/Alc/mixer.c @@ -25,6 +25,9 @@ #include <string.h> #include <ctype.h> #include <assert.h> +#ifdef HAVE_ARM_NEON_H +#include <arm_neon.h> +#endif #include "alMain.h" #include "AL/al.h" @@ -53,41 +56,9 @@ static __inline ALfloat cubic32(const ALfloat *vals, ALint step, ALint frac) #define UNLIKELY(x) (x) #endif -#if defined(__ARM_NEON__) && defined(HAVE_ARM_NEON_H) -#include <arm_neon.h> - -static __inline void ApplyCoeffs(ALuint Offset, ALfloat (*RESTRICT Values)[2], - ALfloat (*RESTRICT Coeffs)[2], - ALfloat left, ALfloat right) -{ - ALuint c; - float32x4_t leftright4; - { - float32x2_t leftright2 = vdup_n_f32(0.0); - leftright2 = vset_lane_f32(left, leftright2, 0); - leftright2 = vset_lane_f32(right, leftright2, 1); - leftright4 = vcombine_f32(leftright2, leftright2); - } - for(c = 0;c < HRIR_LENGTH;c += 2) - { - const ALuint o0 = (Offset+c)&HRIR_MASK; - const ALuint o1 = (o0+1)&HRIR_MASK; - float32x4_t vals = vcombine_f32(vld1_f32((float32_t*)&Values[o0][0]), - vld1_f32((float32_t*)&Values[o1][0])); - float32x4_t coefs = vld1q_f32((float32_t*)&Coeffs[c][0]); - - vals = vmlaq_f32(vals, coefs, leftright4); - - vst1_f32((float32_t*)&Values[o0][0], vget_low_f32(vals)); - vst1_f32((float32_t*)&Values[o1][0], vget_high_f32(vals)); - } -} - -#else - -static __inline void ApplyCoeffs(ALuint Offset, ALfloat (*RESTRICT Values)[2], - ALfloat (*RESTRICT Coeffs)[2], - ALfloat left, ALfloat right) +static __inline void ApplyCoeffsC(ALuint Offset, ALfloat (*RESTRICT Values)[2], + ALfloat (*RESTRICT Coeffs)[2], + ALfloat left, ALfloat right) { ALuint c; for(c = 0;c < HRIR_LENGTH;c++) @@ -98,11 +69,10 @@ static __inline void ApplyCoeffs(ALuint Offset, ALfloat (*RESTRICT Values)[2], } } -#endif - -#define DECL_TEMPLATE(sampler) \ -static void MixDirect_Hrtf_##sampler(ALsource *Source, ALCdevice *Device, \ - DirectParams *params, const ALfloat *RESTRICT data, ALuint srcfrac, \ +#define DECL_TEMPLATE(sampler,acc) \ +static void MixDirect_Hrtf_##sampler##_##acc( \ + ALsource *Source, ALCdevice *Device, DirectParams *params, \ + const ALfloat *RESTRICT data, ALuint srcfrac, \ ALuint OutPos, ALuint SamplesToDo, ALuint BufferSize) \ { \ const ALuint NumChannels = Source->NumChannels; \ @@ -220,7 +190,7 @@ static void MixDirect_Hrtf_##sampler(ALsource *Source, ALCdevice *Device, \ Values[Offset&HRIR_MASK][1] = 0.0f; \ Offset++; \ \ - ApplyCoeffs(Offset, Values, Coeffs, left, right); \ + ApplyCoeffs##acc(Offset, Values, Coeffs, left, right); \ DryBuffer[OutPos][FrontLeft] += Values[Offset&HRIR_MASK][0]; \ DryBuffer[OutPos][FrontRight] += Values[Offset&HRIR_MASK][1]; \ \ @@ -247,9 +217,44 @@ static void MixDirect_Hrtf_##sampler(ALsource *Source, ALCdevice *Device, \ } \ } -DECL_TEMPLATE(point32) -DECL_TEMPLATE(lerp32) -DECL_TEMPLATE(cubic32) +DECL_TEMPLATE(point32, C) +DECL_TEMPLATE(lerp32, C) +DECL_TEMPLATE(cubic32, C) + +#ifdef HAVE_ARM_NEON_H + +static __inline void ApplyCoeffsNeon(ALuint Offset, ALfloat (*RESTRICT Values)[2], + ALfloat (*RESTRICT Coeffs)[2], + ALfloat left, ALfloat right) +{ + ALuint c; + float32x4_t leftright4; + { + float32x2_t leftright2 = vdup_n_f32(0.0); + leftright2 = vset_lane_f32(left, leftright2, 0); + leftright2 = vset_lane_f32(right, leftright2, 1); + leftright4 = vcombine_f32(leftright2, leftright2); + } + for(c = 0;c < HRIR_LENGTH;c += 2) + { + const ALuint o0 = (Offset+c)&HRIR_MASK; + const ALuint o1 = (o0+1)&HRIR_MASK; + float32x4_t vals = vcombine_f32(vld1_f32((float32_t*)&Values[o0][0]), + vld1_f32((float32_t*)&Values[o1][0])); + float32x4_t coefs = vld1q_f32((float32_t*)&Coeffs[c][0]); + + vals = vmlaq_f32(vals, coefs, leftright4); + + vst1_f32((float32_t*)&Values[o0][0], vget_low_f32(vals)); + vst1_f32((float32_t*)&Values[o1][0], vget_high_f32(vals)); + } +} + +DECL_TEMPLATE(point32, Neon) +DECL_TEMPLATE(lerp32, Neon) +DECL_TEMPLATE(cubic32, Neon) + +#endif #undef DECL_TEMPLATE @@ -414,11 +419,23 @@ DryMixerFunc SelectHrtfMixer(enum Resampler Resampler) switch(Resampler) { case PointResampler: - return MixDirect_Hrtf_point32; +#ifdef HAVE_ARM_NEON_H + if((CPUCapFlags&CPU_CAP_NEON)) + return MixDirect_Hrtf_point32_Neon; +#endif + return MixDirect_Hrtf_point32_C; case LinearResampler: - return MixDirect_Hrtf_lerp32; +#ifdef HAVE_ARM_NEON_H + if((CPUCapFlags&CPU_CAP_NEON)) + return MixDirect_Hrtf_lerp32_Neon; +#endif + return MixDirect_Hrtf_lerp32_C; case CubicResampler: - return MixDirect_Hrtf_cubic32; +#ifdef HAVE_ARM_NEON_H + if((CPUCapFlags&CPU_CAP_NEON)) + return MixDirect_Hrtf_cubic32_Neon; +#endif + return MixDirect_Hrtf_cubic32_C; case ResamplerMax: break; } |