diff options
author | Chris Robinson <[email protected]> | 2016-06-01 23:39:13 -0700 |
---|---|---|
committer | Chris Robinson <[email protected]> | 2016-06-01 23:39:13 -0700 |
commit | b7da69510c85b776ba119d69c4c74aa38d412594 (patch) | |
tree | 3eeda7d1786a007165e84b85e0ad8e236cad62db | |
parent | a16d0b192e9833ae0abd6e1e7ef2305c34705497 (diff) |
Implement a Neon-enhanced MixRow
-rw-r--r-- | Alc/bformatdec.c | 4 | ||||
-rw-r--r-- | Alc/mixer_defs.h | 2 | ||||
-rw-r--r-- | Alc/mixer_neon.c | 25 |
3 files changed, 31 insertions, 0 deletions
diff --git a/Alc/bformatdec.c b/Alc/bformatdec.c index 7da50692..9ebaba27 100644 --- a/Alc/bformatdec.c +++ b/Alc/bformatdec.c @@ -159,6 +159,10 @@ static inline MatrixMixerFunc SelectMixer(void) if((CPUCapFlags&CPU_CAP_SSE)) return MixRow_SSE; #endif +#ifdef HAVE_NEON + if((CPUCapFlags&CPU_CAP_NEON)) + return MixRow_Neon; +#endif return MixRow_C; } diff --git a/Alc/mixer_defs.h b/Alc/mixer_defs.h index 8d208b9c..8b934c58 100644 --- a/Alc/mixer_defs.h +++ b/Alc/mixer_defs.h @@ -80,5 +80,7 @@ void MixHrtf_Neon(ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALuint lidx, ALuint struct HrtfState *hrtfstate, ALuint BufferSize); void Mix_Neon(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE], struct MixGains *Gains, ALuint Counter, ALuint OutPos, ALuint BufferSize); +void MixRow_Neon(ALfloat *OutBuffer, const ALfloat *Mtx, ALfloat (*restrict data)[BUFFERSIZE], + ALuint InChans, ALuint BufferSize); #endif /* MIXER_DEFS_H */ diff --git a/Alc/mixer_neon.c b/Alc/mixer_neon.c index 96936ef5..073f62c8 100644 --- a/Alc/mixer_neon.c +++ b/Alc/mixer_neon.c @@ -118,3 +118,28 @@ void Mix_Neon(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer OutBuffer[c][OutPos+pos] += data[pos]*gain; } } + +void MixRow_Neon(ALfloat *OutBuffer, const ALfloat *Mtx, ALfloat (*restrict data)[BUFFERSIZE], ALuint InChans, ALuint BufferSize) +{ + float32x4_t gain4; + ALuint c; + + for(c = 0;c < InChans;c++) + { + ALuint pos = 0; + ALfloat gain = Mtx[c]; + if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD)) + continue; + + gain4 = vdupq_n_f32(gain); + for(;BufferSize-pos > 3;pos += 4) + { + const float32x4_t val4 = vld1q_f32(&data[c][pos]); + float32x4_t dry4 = vld1q_f32(&OutBuffer[pos]); + dry4 = vmlaq_f32(dry4, val4, gain4); + vst1q_f32(&OutBuffer[pos], dry4); + } + for(;pos < BufferSize;pos++) + OutBuffer[pos] += data[c][pos]*gain; + } +} |