aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Robinson <[email protected]>2014-01-26 01:34:39 -0800
committerChris Robinson <[email protected]>2014-01-26 01:34:39 -0800
commita4bc0a46e9e0fe001d4c14909b47ed4ac8d81960 (patch)
treea0ba33c23b3ceeb36e44f3d0212091398da6bcf3
parent49baa9128dd98e986639def4f24c7522d9ec6b56 (diff)
Implement dry and wet mixers for Neon
Code provided by Philippe Simons <[email protected]>.
-rw-r--r--Alc/ALc.c6
-rw-r--r--Alc/ALu.c8
-rw-r--r--Alc/mixer_defs.h2
-rw-r--r--Alc/mixer_neon.c80
4 files changed, 91 insertions, 5 deletions
diff --git a/Alc/ALc.c b/Alc/ALc.c
index fea0320d..7d8f14cd 100644
--- a/Alc/ALc.c
+++ b/Alc/ALc.c
@@ -1745,8 +1745,8 @@ static ALCenum UpdateDeviceParams(ALCdevice *device, const ALCint *attrList)
device->UpdateSize = (ALuint64)device->UpdateSize * freq /
device->Frequency;
- /* SSE does best with the update size being a multiple of 4 */
- if((CPUCapFlags&CPU_CAP_SSE))
+ /* SSE and Neon do best with the update size being a multiple of 4 */
+ if((CPUCapFlags&(CPU_CAP_SSE|CPU_CAP_NEON)) != 0)
device->UpdateSize = (device->UpdateSize+3)&~3;
device->Frequency = freq;
@@ -1861,6 +1861,8 @@ static ALCenum UpdateDeviceParams(ALCdevice *device, const ALCint *attrList)
{
if((CPUCapFlags&CPU_CAP_SSE))
WARN("SSE performs best with multiple of 4 update sizes (%u)\n", device->UpdateSize);
+ if((CPUCapFlags&CPU_CAP_NEON))
+ WARN("NEON performs best with multiple of 4 update sizes (%u)\n", device->UpdateSize);
}
SetMixerFPUMode(&oldMode);
diff --git a/Alc/ALu.c b/Alc/ALu.c
index 34ac6687..bf09450f 100644
--- a/Alc/ALu.c
+++ b/Alc/ALu.c
@@ -118,6 +118,10 @@ static DryMixerFunc SelectDirectMixer(void)
if((CPUCapFlags&CPU_CAP_SSE))
return MixDirect_SSE;
#endif
+#ifdef HAVE_NEON
+ if((CPUCapFlags&CPU_CAP_NEON))
+ return MixDirect_Neon;
+#endif
return MixDirect_C;
}
@@ -128,6 +132,10 @@ static WetMixerFunc SelectSendMixer(void)
if((CPUCapFlags&CPU_CAP_SSE))
return MixSend_SSE;
#endif
+#ifdef HAVE_NEON
+ if((CPUCapFlags&CPU_CAP_NEON))
+ return MixSend_Neon;
+#endif
return MixSend_C;
}
diff --git a/Alc/mixer_defs.h b/Alc/mixer_defs.h
index 5e43af15..f8968a0a 100644
--- a/Alc/mixer_defs.h
+++ b/Alc/mixer_defs.h
@@ -27,5 +27,7 @@ void MixSend_SSE(const struct SendParams*,const ALfloat*restrict,ALuint,ALuint,A
/* Neon mixers */
void MixDirect_Hrtf_Neon(const struct DirectParams*,const ALfloat*restrict,ALuint,ALuint,ALuint,ALuint);
+void MixDirect_Neon(const struct DirectParams*,const ALfloat*restrict,ALuint,ALuint,ALuint,ALuint);
+void MixSend_Neon(const struct SendParams*,const ALfloat*restrict,ALuint,ALuint,ALuint);
#endif /* MIXER_DEFS_H */
diff --git a/Alc/mixer_neon.c b/Alc/mixer_neon.c
index 571221be..0aa450ad 100644
--- a/Alc/mixer_neon.c
+++ b/Alc/mixer_neon.c
@@ -14,11 +14,15 @@ static inline void ApplyCoeffsStep(const ALuint IrSize,
ALfloat (*restrict Coeffs)[2],
const ALfloat (*restrict CoeffStep)[2])
{
+ float32x4_t coeffs, deltas;
ALuint c;
- for(c = 0;c < IrSize;c++)
+
+ for(c = 0;c < IrSize;c += 2)
{
- Coeffs[c][0] += CoeffStep[c][0];
- Coeffs[c][1] += CoeffStep[c][1];
+ coeffs = vld1q_f32(&Coeffs[c][0]);
+ deltas = vld1q_f32(&CoeffStep[c][0]);
+ coeffs = vaddq_f32(coeffs, deltas);
+ vst1q_f32(&Coeffs[c][0], coeffs);
}
}
@@ -54,3 +58,73 @@ static inline void ApplyCoeffs(ALuint Offset, ALfloat (*restrict Values)[2],
#define SUFFIX Neon
#include "mixer_inc.c"
#undef SUFFIX
+
+
+void MixDirect_Neon(const DirectParams *params, const ALfloat *restrict data, ALuint srcchan,
+ ALuint OutPos, ALuint SamplesToDo, ALuint BufferSize)
+{
+ ALfloat (*restrict OutBuffer)[BUFFERSIZE] = params->OutBuffer;
+ ALfloat *restrict ClickRemoval = params->ClickRemoval;
+ ALfloat *restrict PendingClicks = params->PendingClicks;
+ ALfloat DrySend;
+ float32x4_t gain;
+ ALuint pos;
+ ALuint c;
+
+ for(c = 0;c < MaxChannels;c++)
+ {
+ DrySend = params->Gains[srcchan][c];
+ if(!(DrySend > GAIN_SILENCE_THRESHOLD))
+ continue;
+
+ if(OutPos == 0)
+ ClickRemoval[c] -= data[0]*DrySend;
+
+ gain = vdupq_n_f32(DrySend);
+ for(pos = 0;BufferSize-pos > 3;pos += 4)
+ {
+ const float32x4_t val4 = vld1q_f32(&data[pos]);
+ float32x4_t dry4 = vld1q_f32(&OutBuffer[c][OutPos+pos]);
+ dry4 = vaddq_f32(dry4, vmulq_f32(val4, gain));
+ vst1q_f32(&OutBuffer[c][OutPos+pos], dry4);
+ }
+ for(;pos < BufferSize;pos++)
+ OutBuffer[c][OutPos+pos] += data[pos]*DrySend;
+
+ if(OutPos+pos == SamplesToDo)
+ PendingClicks[c] += data[pos]*DrySend;
+ }
+}
+
+
+void MixSend_Neon(const SendParams *params, const ALfloat *restrict data,
+ ALuint OutPos, ALuint SamplesToDo, ALuint BufferSize)
+{
+ ALfloat (*restrict OutBuffer)[BUFFERSIZE] = params->OutBuffer;
+ ALfloat *restrict ClickRemoval = params->ClickRemoval;
+ ALfloat *restrict PendingClicks = params->PendingClicks;
+ ALfloat WetGain;
+ float32x4_t gain;
+ ALuint pos;
+
+ WetGain = params->Gain;
+ if(!(WetGain > GAIN_SILENCE_THRESHOLD))
+ return;
+
+ if(OutPos == 0)
+ ClickRemoval[0] -= data[0] * WetGain;
+
+ gain = vdupq_n_f32(WetGain);
+ for(pos = 0;BufferSize-pos > 3;pos += 4)
+ {
+ const float32x4_t val4 = vld1q_f32(&data[pos]);
+ float32x4_t wet4 = vld1q_f32(&OutBuffer[0][OutPos+pos]);
+ wet4 = vaddq_f32(wet4, vmulq_f32(val4, gain));
+ vst1q_f32(&OutBuffer[0][OutPos+pos], wet4);
+ }
+ for(;pos < BufferSize;pos++)
+ OutBuffer[0][OutPos+pos] += data[pos] * WetGain;
+
+ if(OutPos+pos == SamplesToDo)
+ PendingClicks[0] += data[pos] * WetGain;
+}