aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Robinson <[email protected]>2016-06-01 23:39:13 -0700
committerChris Robinson <[email protected]>2016-06-01 23:39:13 -0700
commitb7da69510c85b776ba119d69c4c74aa38d412594 (patch)
tree3eeda7d1786a007165e84b85e0ad8e236cad62db
parenta16d0b192e9833ae0abd6e1e7ef2305c34705497 (diff)
Implement a Neon-enhanced MixRow
-rw-r--r--Alc/bformatdec.c4
-rw-r--r--Alc/mixer_defs.h2
-rw-r--r--Alc/mixer_neon.c25
3 files changed, 31 insertions, 0 deletions
diff --git a/Alc/bformatdec.c b/Alc/bformatdec.c
index 7da50692..9ebaba27 100644
--- a/Alc/bformatdec.c
+++ b/Alc/bformatdec.c
@@ -159,6 +159,10 @@ static inline MatrixMixerFunc SelectMixer(void)
if((CPUCapFlags&CPU_CAP_SSE))
return MixRow_SSE;
#endif
+#ifdef HAVE_NEON
+ if((CPUCapFlags&CPU_CAP_NEON))
+ return MixRow_Neon;
+#endif
return MixRow_C;
}
diff --git a/Alc/mixer_defs.h b/Alc/mixer_defs.h
index 8d208b9c..8b934c58 100644
--- a/Alc/mixer_defs.h
+++ b/Alc/mixer_defs.h
@@ -80,5 +80,7 @@ void MixHrtf_Neon(ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALuint lidx, ALuint
struct HrtfState *hrtfstate, ALuint BufferSize);
void Mix_Neon(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE],
struct MixGains *Gains, ALuint Counter, ALuint OutPos, ALuint BufferSize);
+void MixRow_Neon(ALfloat *OutBuffer, const ALfloat *Mtx, ALfloat (*restrict data)[BUFFERSIZE],
+ ALuint InChans, ALuint BufferSize);
#endif /* MIXER_DEFS_H */
diff --git a/Alc/mixer_neon.c b/Alc/mixer_neon.c
index 96936ef5..073f62c8 100644
--- a/Alc/mixer_neon.c
+++ b/Alc/mixer_neon.c
@@ -118,3 +118,28 @@ void Mix_Neon(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer
OutBuffer[c][OutPos+pos] += data[pos]*gain;
}
}
+
+void MixRow_Neon(ALfloat *OutBuffer, const ALfloat *Mtx, ALfloat (*restrict data)[BUFFERSIZE], ALuint InChans, ALuint BufferSize)
+{
+ float32x4_t gain4;
+ ALuint c;
+
+ for(c = 0;c < InChans;c++)
+ {
+ ALuint pos = 0;
+ ALfloat gain = Mtx[c];
+ if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD))
+ continue;
+
+ gain4 = vdupq_n_f32(gain);
+ for(;BufferSize-pos > 3;pos += 4)
+ {
+ const float32x4_t val4 = vld1q_f32(&data[c][pos]);
+ float32x4_t dry4 = vld1q_f32(&OutBuffer[pos]);
+ dry4 = vmlaq_f32(dry4, val4, gain4);
+ vst1q_f32(&OutBuffer[pos], dry4);
+ }
+ for(;pos < BufferSize;pos++)
+ OutBuffer[pos] += data[c][pos]*gain;
+ }
+}