diff options
-rw-r--r-- | Alc/ALc.c | 6 | ||||
-rw-r--r-- | Alc/ALu.c | 8 | ||||
-rw-r--r-- | Alc/mixer_defs.h | 2 | ||||
-rw-r--r-- | Alc/mixer_neon.c | 80 |
4 files changed, 91 insertions, 5 deletions
@@ -1745,8 +1745,8 @@ static ALCenum UpdateDeviceParams(ALCdevice *device, const ALCint *attrList) device->UpdateSize = (ALuint64)device->UpdateSize * freq / device->Frequency; - /* SSE does best with the update size being a multiple of 4 */ - if((CPUCapFlags&CPU_CAP_SSE)) + /* SSE and Neon do best with the update size being a multiple of 4 */ + if((CPUCapFlags&(CPU_CAP_SSE|CPU_CAP_NEON)) != 0) device->UpdateSize = (device->UpdateSize+3)&~3; device->Frequency = freq; @@ -1861,6 +1861,8 @@ static ALCenum UpdateDeviceParams(ALCdevice *device, const ALCint *attrList) { if((CPUCapFlags&CPU_CAP_SSE)) WARN("SSE performs best with multiple of 4 update sizes (%u)\n", device->UpdateSize); + if((CPUCapFlags&CPU_CAP_NEON)) + WARN("NEON performs best with multiple of 4 update sizes (%u)\n", device->UpdateSize); } SetMixerFPUMode(&oldMode); @@ -118,6 +118,10 @@ static DryMixerFunc SelectDirectMixer(void) if((CPUCapFlags&CPU_CAP_SSE)) return MixDirect_SSE; #endif +#ifdef HAVE_NEON + if((CPUCapFlags&CPU_CAP_NEON)) + return MixDirect_Neon; +#endif return MixDirect_C; } @@ -128,6 +132,10 @@ static WetMixerFunc SelectSendMixer(void) if((CPUCapFlags&CPU_CAP_SSE)) return MixSend_SSE; #endif +#ifdef HAVE_NEON + if((CPUCapFlags&CPU_CAP_NEON)) + return MixSend_Neon; +#endif return MixSend_C; } diff --git a/Alc/mixer_defs.h b/Alc/mixer_defs.h index 5e43af15..f8968a0a 100644 --- a/Alc/mixer_defs.h +++ b/Alc/mixer_defs.h @@ -27,5 +27,7 @@ void MixSend_SSE(const struct SendParams*,const ALfloat*restrict,ALuint,ALuint,A /* Neon mixers */ void MixDirect_Hrtf_Neon(const struct DirectParams*,const ALfloat*restrict,ALuint,ALuint,ALuint,ALuint); +void MixDirect_Neon(const struct DirectParams*,const ALfloat*restrict,ALuint,ALuint,ALuint,ALuint); +void MixSend_Neon(const struct SendParams*,const ALfloat*restrict,ALuint,ALuint,ALuint); #endif /* MIXER_DEFS_H */ diff --git a/Alc/mixer_neon.c b/Alc/mixer_neon.c index 571221be..0aa450ad 100644 --- a/Alc/mixer_neon.c +++ b/Alc/mixer_neon.c @@ -14,11 +14,15 @@ static inline void ApplyCoeffsStep(const ALuint IrSize, ALfloat (*restrict Coeffs)[2], const ALfloat (*restrict CoeffStep)[2]) { + float32x4_t coeffs, deltas; ALuint c; - for(c = 0;c < IrSize;c++) + + for(c = 0;c < IrSize;c += 2) { - Coeffs[c][0] += CoeffStep[c][0]; - Coeffs[c][1] += CoeffStep[c][1]; + coeffs = vld1q_f32(&Coeffs[c][0]); + deltas = vld1q_f32(&CoeffStep[c][0]); + coeffs = vaddq_f32(coeffs, deltas); + vst1q_f32(&Coeffs[c][0], coeffs); } } @@ -54,3 +58,73 @@ static inline void ApplyCoeffs(ALuint Offset, ALfloat (*restrict Values)[2], #define SUFFIX Neon #include "mixer_inc.c" #undef SUFFIX + + +void MixDirect_Neon(const DirectParams *params, const ALfloat *restrict data, ALuint srcchan, + ALuint OutPos, ALuint SamplesToDo, ALuint BufferSize) +{ + ALfloat (*restrict OutBuffer)[BUFFERSIZE] = params->OutBuffer; + ALfloat *restrict ClickRemoval = params->ClickRemoval; + ALfloat *restrict PendingClicks = params->PendingClicks; + ALfloat DrySend; + float32x4_t gain; + ALuint pos; + ALuint c; + + for(c = 0;c < MaxChannels;c++) + { + DrySend = params->Gains[srcchan][c]; + if(!(DrySend > GAIN_SILENCE_THRESHOLD)) + continue; + + if(OutPos == 0) + ClickRemoval[c] -= data[0]*DrySend; + + gain = vdupq_n_f32(DrySend); + for(pos = 0;BufferSize-pos > 3;pos += 4) + { + const float32x4_t val4 = vld1q_f32(&data[pos]); + float32x4_t dry4 = vld1q_f32(&OutBuffer[c][OutPos+pos]); + dry4 = vaddq_f32(dry4, vmulq_f32(val4, gain)); + vst1q_f32(&OutBuffer[c][OutPos+pos], dry4); + } + for(;pos < BufferSize;pos++) + OutBuffer[c][OutPos+pos] += data[pos]*DrySend; + + if(OutPos+pos == SamplesToDo) + PendingClicks[c] += data[pos]*DrySend; + } +} + + +void MixSend_Neon(const SendParams *params, const ALfloat *restrict data, + ALuint OutPos, ALuint SamplesToDo, ALuint BufferSize) +{ + ALfloat (*restrict OutBuffer)[BUFFERSIZE] = params->OutBuffer; + ALfloat *restrict ClickRemoval = params->ClickRemoval; + ALfloat *restrict PendingClicks = params->PendingClicks; + ALfloat WetGain; + float32x4_t gain; + ALuint pos; + + WetGain = params->Gain; + if(!(WetGain > GAIN_SILENCE_THRESHOLD)) + return; + + if(OutPos == 0) + ClickRemoval[0] -= data[0] * WetGain; + + gain = vdupq_n_f32(WetGain); + for(pos = 0;BufferSize-pos > 3;pos += 4) + { + const float32x4_t val4 = vld1q_f32(&data[pos]); + float32x4_t wet4 = vld1q_f32(&OutBuffer[0][OutPos+pos]); + wet4 = vaddq_f32(wet4, vmulq_f32(val4, gain)); + vst1q_f32(&OutBuffer[0][OutPos+pos], wet4); + } + for(;pos < BufferSize;pos++) + OutBuffer[0][OutPos+pos] += data[pos] * WetGain; + + if(OutPos+pos == SamplesToDo) + PendingClicks[0] += data[pos] * WetGain; +} |