aboutsummaryrefslogtreecommitdiffstats
path: root/Alc
diff options
context:
space:
mode:
authorChris Robinson <[email protected]>2016-05-31 10:18:34 -0700
committerChris Robinson <[email protected]>2016-05-31 10:18:34 -0700
commit5e64882be9ad3e3a1552e41befef5a6216f4ecfe (patch)
tree47059e787d77c86a0e2575e0d5a1d6b13798b8b4 /Alc
parent72d2febccbc670843669494fe5bc052839f54294 (diff)
Use SSE for applying the HQ B-Format decoder matrices
Diffstat (limited to 'Alc')
-rw-r--r--Alc/bformatdec.c56
-rw-r--r--Alc/mixer_c.c21
-rw-r--r--Alc/mixer_defs.h4
-rw-r--r--Alc/mixer_sse.c25
4 files changed, 79 insertions, 27 deletions
diff --git a/Alc/bformatdec.c b/Alc/bformatdec.c
index 49052cb8..a871fb09 100644
--- a/Alc/bformatdec.c
+++ b/Alc/bformatdec.c
@@ -3,6 +3,7 @@
#include "bformatdec.h"
#include "ambdec.h"
+#include "mixer_defs.h"
#include "alu.h"
#include "threads.h"
@@ -151,12 +152,27 @@ static const ALfloat CubeMatrixLF[8][MAX_AMBI_COEFFS] = {
};
static ALfloat CubeEncoder[8][MAX_AMBI_COEFFS];
-static alonce_flag encoder_inited = AL_ONCE_FLAG_INIT;
-static void init_encoder(void)
+static inline MatrixMixerFunc SelectMixer(void)
+{
+#ifdef HAVE_SSE
+ if((CPUCapFlags&CPU_CAP_SSE))
+ return MixRow_SSE;
+#endif
+ return MixRow_C;
+}
+
+static MatrixMixerFunc MixMatrixRow = MixRow_C;
+
+
+static alonce_flag bformatdec_inited = AL_ONCE_FLAG_INIT;
+
+static void init_bformatdec(void)
{
ALuint i, j;
+ MixMatrixRow = SelectMixer();
+
CalcXYZCoeffs(-0.577350269f, 0.577350269f, -0.577350269f, 0.0f, CubeEncoder[0]);
CalcXYZCoeffs( 0.577350269f, 0.577350269f, -0.577350269f, 0.0f, CubeEncoder[1]);
CalcXYZCoeffs(-0.577350269f, 0.577350269f, 0.577350269f, 0.0f, CubeEncoder[2]);
@@ -226,7 +242,7 @@ typedef struct BFormatDec {
BFormatDec *bformatdec_alloc()
{
- alcall_once(&encoder_inited, init_encoder);
+ alcall_once(&bformatdec_inited, init_bformatdec);
return al_calloc(16, sizeof(BFormatDec));
}
@@ -435,20 +451,6 @@ void bformatdec_reset(BFormatDec *dec, const AmbDecConf *conf, ALuint chancount,
}
-static void apply_row(ALfloat *out, const ALfloat *mtx, ALfloat (*restrict in)[BUFFERSIZE], ALuint inchans, ALuint todo)
-{
- ALuint c, i;
-
- for(c = 0;c < inchans;c++)
- {
- ALfloat gain = mtx[c];
- if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD))
- continue;
- for(i = 0;i < todo;i++)
- out[i] += in[c][i] * gain;
- }
-}
-
void bformatdec_process(struct BFormatDec *dec, ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALuint OutChannels, ALfloat (*restrict InSamples)[BUFFERSIZE], ALuint SamplesToDo)
{
ALuint chan, i;
@@ -465,10 +467,10 @@ void bformatdec_process(struct BFormatDec *dec, ALfloat (*restrict OutBuffer)[BU
continue;
memset(dec->ChannelMix, 0, SamplesToDo*sizeof(ALfloat));
- apply_row(dec->ChannelMix, dec->MatrixHF[chan], dec->SamplesHF,
- dec->NumChannels, SamplesToDo);
- apply_row(dec->ChannelMix, dec->MatrixLF[chan], dec->SamplesLF,
- dec->NumChannels, SamplesToDo);
+ MixMatrixRow(dec->ChannelMix, dec->MatrixHF[chan], dec->SamplesHF,
+ dec->NumChannels, SamplesToDo);
+ MixMatrixRow(dec->ChannelMix, dec->MatrixLF[chan], dec->SamplesLF,
+ dec->NumChannels, SamplesToDo);
if(dec->Delay[chan].Length > 0)
{
@@ -504,8 +506,8 @@ void bformatdec_process(struct BFormatDec *dec, ALfloat (*restrict OutBuffer)[BU
continue;
memset(dec->ChannelMix, 0, SamplesToDo*sizeof(ALfloat));
- apply_row(dec->ChannelMix, dec->MatrixHF[chan], InSamples,
- dec->NumChannels, SamplesToDo);
+ MixMatrixRow(dec->ChannelMix, dec->MatrixHF[chan], InSamples,
+ dec->NumChannels, SamplesToDo);
if(dec->Delay[chan].Length > 0)
{
@@ -556,10 +558,10 @@ void bformatdec_upSample(struct BFormatDec *dec, ALfloat (*restrict OutBuffer)[B
for(k = 0;k < dec->UpSampler.NumChannels;k++)
{
memset(dec->ChannelMix, 0, SamplesToDo*sizeof(ALfloat));
- apply_row(dec->ChannelMix, dec->UpSampler.MatrixHF[k], dec->SamplesHF,
- InChannels, SamplesToDo);
- apply_row(dec->ChannelMix, dec->UpSampler.MatrixLF[k], dec->SamplesLF,
- InChannels, SamplesToDo);
+ MixMatrixRow(dec->ChannelMix, dec->UpSampler.MatrixHF[k], dec->SamplesHF,
+ InChannels, SamplesToDo);
+ MixMatrixRow(dec->ChannelMix, dec->UpSampler.MatrixLF[k], dec->SamplesLF,
+ InChannels, SamplesToDo);
for(j = 0;j < dec->NumChannels;j++)
{
diff --git a/Alc/mixer_c.c b/Alc/mixer_c.c
index e9d26140..7952ec93 100644
--- a/Alc/mixer_c.c
+++ b/Alc/mixer_c.c
@@ -167,3 +167,24 @@ void Mix_C(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)[B
OutBuffer[c][OutPos+pos] += data[pos]*gain;
}
}
+
+/* Basically the inverse of the above. Rather than one input going to multiple
+ * outputs (each with its own gain), it's multiple inputs (each with its own
+ * gain) going to one output. This applies one row (vs one column) of a matrix
+ * transform. And as the matrices are more or less static once set up, no
+ * stepping is necessary.
+ */
+void MixRow_C(ALfloat *OutBuffer, const ALfloat *Mtx, ALfloat (*restrict data)[BUFFERSIZE], ALuint InChans, ALuint BufferSize)
+{
+ ALuint c, i;
+
+ for(c = 0;c < InChans;c++)
+ {
+ ALfloat gain = Mtx[c];
+ if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD))
+ continue;
+
+ for(i = 0;i < BufferSize;i++)
+ OutBuffer[i] += data[c][i] * gain;
+ }
+}
diff --git a/Alc/mixer_defs.h b/Alc/mixer_defs.h
index 5db804b1..8d208b9c 100644
--- a/Alc/mixer_defs.h
+++ b/Alc/mixer_defs.h
@@ -27,6 +27,8 @@ void MixHrtf_C(ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALuint lidx, ALuint ri
struct HrtfState *hrtfstate, ALuint BufferSize);
void Mix_C(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE],
struct MixGains *Gains, ALuint Counter, ALuint OutPos, ALuint BufferSize);
+void MixRow_C(ALfloat *OutBuffer, const ALfloat *Mtx, ALfloat (*restrict data)[BUFFERSIZE],
+ ALuint InChans, ALuint BufferSize);
/* SSE mixers */
void MixHrtf_SSE(ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALuint lidx, ALuint ridx,
@@ -35,6 +37,8 @@ void MixHrtf_SSE(ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALuint lidx, ALuint
struct HrtfState *hrtfstate, ALuint BufferSize);
void Mix_SSE(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE],
struct MixGains *Gains, ALuint Counter, ALuint OutPos, ALuint BufferSize);
+void MixRow_SSE(ALfloat *OutBuffer, const ALfloat *Mtx, ALfloat (*restrict data)[BUFFERSIZE],
+ ALuint InChans, ALuint BufferSize);
/* SSE resamplers */
inline void InitiatePositionArrays(ALuint frac, ALuint increment, ALuint *frac_arr, ALuint *pos_arr, ALuint size)
diff --git a/Alc/mixer_sse.c b/Alc/mixer_sse.c
index 942e0453..120ac4a0 100644
--- a/Alc/mixer_sse.c
+++ b/Alc/mixer_sse.c
@@ -260,3 +260,28 @@ void Mix_SSE(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)
OutBuffer[c][OutPos+pos] += data[pos]*gain;
}
}
+
+void MixRow_SSE(ALfloat *OutBuffer, const ALfloat *Mtx, ALfloat (*restrict data)[BUFFERSIZE], ALuint InChans, ALuint BufferSize)
+{
+ __m128 gain4;
+ ALuint c;
+
+ for(c = 0;c < InChans;c++)
+ {
+ ALuint pos = 0;
+ ALfloat gain = Mtx[c];
+ if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD))
+ continue;
+
+ gain4 = _mm_set1_ps(gain);
+ for(;BufferSize-pos > 3;pos += 4)
+ {
+ const __m128 val4 = _mm_load_ps(&data[c][pos]);
+ __m128 dry4 = _mm_load_ps(&OutBuffer[pos]);
+ dry4 = _mm_add_ps(dry4, _mm_mul_ps(val4, gain4));
+ _mm_store_ps(&OutBuffer[pos], dry4);
+ }
+ for(;pos < BufferSize;pos++)
+ OutBuffer[pos] += data[c][pos]*gain;
+ }
+}