aboutsummaryrefslogtreecommitdiffstats
path: root/Alc
diff options
context:
space:
mode:
authorChris Robinson <[email protected]>2014-06-13 13:34:19 -0700
committerChris Robinson <[email protected]>2014-06-13 13:34:19 -0700
commita8deaf12f433281b8d996aa593ebff196e3a8189 (patch)
tree12ee2e1548da800950f9568566781fe72ee9fbb1 /Alc
parentc29eb6348980bf101f2a043d3f3b017dc1c48538 (diff)
Combine the direct and send mixers
Diffstat (limited to 'Alc')
-rw-r--r--Alc/ALu.c39
-rw-r--r--Alc/mixer.c10
-rw-r--r--Alc/mixer_c.c55
-rw-r--r--Alc/mixer_defs.h42
-rw-r--r--Alc/mixer_inc.c18
-rw-r--r--Alc/mixer_neon.c72
-rw-r--r--Alc/mixer_sse.c111
7 files changed, 95 insertions, 252 deletions
diff --git a/Alc/ALu.c b/Alc/ALu.c
index bddabeae..0b5c2998 100644
--- a/Alc/ALu.c
+++ b/Alc/ALu.c
@@ -115,42 +115,28 @@ static HrtfMixerFunc SelectHrtfMixer(void)
{
#ifdef HAVE_SSE
if((CPUCapFlags&CPU_CAP_SSE))
- return MixDirect_Hrtf_SSE;
+ return MixHrtf_SSE;
#endif
#ifdef HAVE_NEON
if((CPUCapFlags&CPU_CAP_NEON))
- return MixDirect_Hrtf_Neon;
+ return MixHrtf_Neon;
#endif
- return MixDirect_Hrtf_C;
+ return MixHrtf_C;
}
-static MixerFunc SelectDirectMixer(void)
+static MixerFunc SelectMixer(void)
{
#ifdef HAVE_SSE
if((CPUCapFlags&CPU_CAP_SSE))
- return MixDirect_SSE;
+ return Mix_SSE;
#endif
#ifdef HAVE_NEON
if((CPUCapFlags&CPU_CAP_NEON))
- return MixDirect_Neon;
+ return Mix_Neon;
#endif
- return MixDirect_C;
-}
-
-static MixerFunc SelectSendMixer(void)
-{
-#ifdef HAVE_SSE
- if((CPUCapFlags&CPU_CAP_SSE))
- return MixSend_SSE;
-#endif
-#ifdef HAVE_NEON
- if((CPUCapFlags&CPU_CAP_NEON))
- return MixSend_Neon;
-#endif
-
- return MixSend_C;
+ return Mix_C;
}
@@ -480,7 +466,6 @@ ALvoid CalcNonAttnSourceParams(ALactivesource *src, const ALCcontext *ALContext)
}
src->IsHrtf = AL_FALSE;
- src->Dry.Mix = SelectDirectMixer();
}
else if(Device->Hrtf)
{
@@ -512,7 +497,6 @@ ALvoid CalcNonAttnSourceParams(ALactivesource *src, const ALCcontext *ALContext)
src->Direct.Mix.Hrtf.IrSize = GetHrtfIrSize(Device->Hrtf);
src->IsHrtf = AL_TRUE;
- src->Dry.HrtfMix = SelectHrtfMixer();
}
else
{
@@ -574,7 +558,6 @@ ALvoid CalcNonAttnSourceParams(ALactivesource *src, const ALCcontext *ALContext)
}
src->IsHrtf = AL_FALSE;
- src->Dry.Mix = SelectDirectMixer();
}
for(i = 0;i < NumSends;i++)
{
@@ -598,7 +581,8 @@ ALvoid CalcNonAttnSourceParams(ALactivesource *src, const ALCcontext *ALContext)
src->Send[i].Counter = 64;
}
}
- src->WetMix = SelectSendMixer();
+ src->Mix = SelectMixer();
+ src->HrtfMix = SelectHrtfMixer();
{
ALfloat gainhf = maxf(0.01f, DryGainHF);
@@ -1038,7 +1022,6 @@ ALvoid CalcSourceParams(ALactivesource *src, const ALCcontext *ALContext)
src->Direct.Mix.Hrtf.IrSize = GetHrtfIrSize(Device->Hrtf);
src->IsHrtf = AL_TRUE;
- src->Dry.HrtfMix = SelectHrtfMixer();
}
else
{
@@ -1100,7 +1083,6 @@ ALvoid CalcSourceParams(ALactivesource *src, const ALCcontext *ALContext)
}
src->IsHrtf = AL_FALSE;
- src->Dry.Mix = SelectDirectMixer();
}
for(i = 0;i < NumSends;i++)
{
@@ -1124,7 +1106,8 @@ ALvoid CalcSourceParams(ALactivesource *src, const ALCcontext *ALContext)
src->Send[i].Counter = 64;
}
}
- src->WetMix = SelectSendMixer();
+ src->Mix = SelectMixer();
+ src->HrtfMix = SelectHrtfMixer();
{
ALfloat gainhf = maxf(0.01f, DryGainHF);
diff --git a/Alc/mixer.c b/Alc/mixer.c
index c7abbfed..1ecc8d88 100644
--- a/Alc/mixer.c
+++ b/Alc/mixer.c
@@ -358,10 +358,10 @@ ALvoid MixSource(ALactivesource *src, ALCdevice *Device, ALuint SamplesToDo)
parms->Filters[chan].ActiveType
);
if(!src->IsHrtf)
- src->Dry.Mix(parms->OutBuffer, samples, parms->Mix.Gains[chan],
- parms->Counter, OutPos, DstBufferSize);
+ src->Mix(samples, MaxChannels, parms->OutBuffer, parms->Mix.Gains[chan],
+ parms->Counter, OutPos, DstBufferSize);
else
- src->Dry.HrtfMix(
+ src->HrtfMix(
parms->OutBuffer, samples, parms->Counter, src->Offset,
OutPos, parms->Mix.Hrtf.IrSize, &parms->Mix.Hrtf.Params[chan],
&parms->Mix.Hrtf.State[chan], DstBufferSize
@@ -381,8 +381,8 @@ ALvoid MixSource(ALactivesource *src, ALCdevice *Device, ALuint SamplesToDo)
Device->FilteredData, ResampledData, DstBufferSize,
parms->Filters[chan].ActiveType
);
- src->WetMix(parms->OutBuffer, samples, &parms->Gain,
- parms->Counter, OutPos, DstBufferSize);
+ src->Mix(samples, 1, parms->OutBuffer, &parms->Gain,
+ parms->Counter, OutPos, DstBufferSize);
}
}
/* Update positions */
diff --git a/Alc/mixer_c.c b/Alc/mixer_c.c
index f919ad79..f3a229e5 100644
--- a/Alc/mixer_c.c
+++ b/Alc/mixer_c.c
@@ -95,61 +95,32 @@ static inline void ApplyCoeffs(ALuint Offset, ALfloat (*restrict Values)[2],
#undef SUFFIX
-void MixDirect_C(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data,
- MixGains *Gains, ALuint Counter, ALuint OutPos, ALuint BufferSize)
+void Mix_C(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE],
+ MixGains *Gains, ALuint Counter, ALuint OutPos, ALuint BufferSize)
{
- ALfloat DrySend, Step;
+ ALfloat gain, step;
ALuint c;
- for(c = 0;c < MaxChannels;c++)
+ for(c = 0;c < OutChans;c++)
{
ALuint pos = 0;
- DrySend = Gains[c].Current;
- Step = Gains[c].Step;
- if(Step != 1.0f && Counter > 0)
+ gain = Gains[c].Current;
+ step = Gains[c].Step;
+ if(step != 1.0f && Counter > 0)
{
for(;pos < BufferSize && pos < Counter;pos++)
{
- OutBuffer[c][OutPos+pos] += data[pos]*DrySend;
- DrySend *= Step;
+ OutBuffer[c][OutPos+pos] += data[pos]*gain;
+ gain *= step;
}
if(pos == Counter)
- DrySend = Gains[c].Target;
- Gains[c].Current = DrySend;
+ gain = Gains[c].Target;
+ Gains[c].Current = gain;
}
- if(!(DrySend > GAIN_SILENCE_THRESHOLD))
+ if(!(gain > GAIN_SILENCE_THRESHOLD))
continue;
for(;pos < BufferSize;pos++)
- OutBuffer[c][OutPos+pos] += data[pos]*DrySend;
- }
-}
-
-
-void MixSend_C(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data,
- MixGains *Gain, ALuint Counter, ALuint OutPos, ALuint BufferSize)
-{
- ALfloat WetSend, Step;
-
- {
- ALuint pos = 0;
- WetSend = Gain[0].Current;
- Step = Gain[0].Step;
- if(Step != 1.0f && Counter > 0)
- {
- for(;pos < BufferSize && pos < Counter;pos++)
- {
- OutBuffer[0][OutPos+pos] += data[pos]*WetSend;
- WetSend *= Step;
- }
- if(pos == Counter)
- WetSend = Gain[0].Target;
- Gain[0].Current = WetSend;
- }
-
- if(!(WetSend > GAIN_SILENCE_THRESHOLD))
- return;
- for(;pos < BufferSize;pos++)
- OutBuffer[0][OutPos+pos] += data[pos] * WetSend;
+ OutBuffer[c][OutPos+pos] += data[pos]*gain;
}
}
diff --git a/Alc/mixer_defs.h b/Alc/mixer_defs.h
index 2ade14f0..c1500ed2 100644
--- a/Alc/mixer_defs.h
+++ b/Alc/mixer_defs.h
@@ -19,28 +19,20 @@ const ALfloat *Resample_cubic32_C(const ALfloat *src, ALuint frac, ALuint increm
/* C mixers */
-void MixDirect_Hrtf_C(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data,
- ALuint Counter, ALuint Offset, ALuint OutPos, const ALuint IrSize,
- const struct HrtfParams *hrtfparams, struct HrtfState *hrtfstate,
- ALuint BufferSize);
-void MixDirect_C(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data,
- struct MixGains *Gains, ALuint Counter, ALuint OutPos,
- ALuint BufferSize);
-void MixSend_C(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data,
- struct MixGains *Gain, ALuint Counter, ALuint OutPos,
+void MixHrtf_C(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data,
+ ALuint Counter, ALuint Offset, ALuint OutPos, const ALuint IrSize,
+ const struct HrtfParams *hrtfparams, struct HrtfState *hrtfstate,
ALuint BufferSize);
+void Mix_C(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE],
+ struct MixGains *Gains, ALuint Counter, ALuint OutPos, ALuint BufferSize);
/* SSE mixers */
-void MixDirect_Hrtf_SSE(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data,
- ALuint Counter, ALuint Offset, ALuint OutPos, const ALuint IrSize,
- const struct HrtfParams *hrtfparams, struct HrtfState *hrtfstate,
- ALuint BufferSize);
-void MixDirect_SSE(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data,
- struct MixGains *Gains, ALuint Counter, ALuint OutPos,
- ALuint BufferSize);
-void MixSend_SSE(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data,
- struct MixGains *Gain, ALuint Counter, ALuint OutPos,
+void MixHrtf_SSE(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data,
+ ALuint Counter, ALuint Offset, ALuint OutPos, const ALuint IrSize,
+ const struct HrtfParams *hrtfparams, struct HrtfState *hrtfstate,
ALuint BufferSize);
+void Mix_SSE(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE],
+ struct MixGains *Gains, ALuint Counter, ALuint OutPos, ALuint BufferSize);
/* SSE resamplers */
inline void InitiatePositionArrays(ALuint frac, ALuint increment, ALuint *frac_arr, ALuint *pos_arr, ALuint size)
@@ -63,15 +55,11 @@ const ALfloat *Resample_lerp32_SSE41(const ALfloat *src, ALuint frac, ALuint inc
ALfloat *restrict dst, ALuint numsamples);
/* Neon mixers */
-void MixDirect_Hrtf_Neon(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data,
- ALuint Counter, ALuint Offset, ALuint OutPos, const ALuint IrSize,
- const struct HrtfParams *hrtfparams, struct HrtfState *hrtfstate,
- ALuint BufferSize);
-void MixDirect_Neon(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data,
- struct MixGains *Gains, ALuint Counter, ALuint OutPos,
- ALuint BufferSize);
-void MixSend_Neon(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data,
- struct MixGains *Gain, ALuint Counter, ALuint OutPos,
+void MixHrtf_Neon(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data,
+ ALuint Counter, ALuint Offset, ALuint OutPos, const ALuint IrSize,
+ const struct HrtfParams *hrtfparams, struct HrtfState *hrtfstate,
ALuint BufferSize);
+void Mix_Neon(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE],
+ struct MixGains *Gains, ALuint Counter, ALuint OutPos, ALuint BufferSize);
#endif /* MIXER_DEFS_H */
diff --git a/Alc/mixer_inc.c b/Alc/mixer_inc.c
index 7c90ae9c..ab6f32c5 100644
--- a/Alc/mixer_inc.c
+++ b/Alc/mixer_inc.c
@@ -8,10 +8,10 @@
#include "align.h"
-#define REAL_MERGE2(a,b) a##b
-#define MERGE2(a,b) REAL_MERGE2(a,b)
+#define REAL_MERGE(a,b) a##b
+#define MERGE(a,b) REAL_MERGE(a,b)
-#define MixDirect_Hrtf MERGE2(MixDirect_Hrtf_,SUFFIX)
+#define MixHrtf MERGE(MixHrtf_,SUFFIX)
static inline void ApplyCoeffsStep(ALuint Offset, ALfloat (*restrict Values)[2],
@@ -25,9 +25,9 @@ static inline void ApplyCoeffs(ALuint Offset, ALfloat (*restrict Values)[2],
ALfloat left, ALfloat right);
-void MixDirect_Hrtf(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data,
- ALuint Counter, ALuint Offset, ALuint OutPos, const ALuint IrSize,
- const HrtfParams *hrtfparams, HrtfState *hrtfstate, ALuint BufferSize)
+void MixHrtf(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data,
+ ALuint Counter, ALuint Offset, ALuint OutPos, const ALuint IrSize,
+ const HrtfParams *hrtfparams, HrtfState *hrtfstate, ALuint BufferSize)
{
alignas(16) ALfloat Coeffs[HRIR_LENGTH][2];
ALuint Delay[2];
@@ -87,7 +87,7 @@ void MixDirect_Hrtf(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *da
}
-#undef MixDirect_Hrtf
+#undef MixHrtf
-#undef MERGE2
-#undef REAL_MERGE2
+#undef MERGE
+#undef REAL_MERGE
diff --git a/Alc/mixer_neon.c b/Alc/mixer_neon.c
index ecae2692..7b6da2b9 100644
--- a/Alc/mixer_neon.c
+++ b/Alc/mixer_neon.c
@@ -75,84 +75,44 @@ static inline void ApplyCoeffs(ALuint Offset, ALfloat (*restrict Values)[2],
#undef SUFFIX
-void MixDirect_Neon(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data,
+void MixDirect_Neon(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE],
MixGains *Gains, ALuint Counter, ALuint OutPos, ALuint BufferSize)
{
- ALfloat DrySend, Step;
- float32x4_t gain;
+ ALfloat gain, step;
+ float32x4_t gain4;
ALuint c;
- for(c = 0;c < MaxChannels;c++)
+ for(c = 0;c < OutChans;c++)
{
ALuint pos = 0;
- DrySend = Gains[c].Current;
- Step = Gains[c].Step;
- if(Step != 1.0f && Counter > 0)
+ gain = Gains[c].Current;
+ step = Gains[c].Step;
+ if(step != 1.0f && Counter > 0)
{
for(;pos < BufferSize && pos < Counter;pos++)
{
- OutBuffer[c][OutPos+pos] += data[pos]*DrySend;
- DrySend *= Step;
+ OutBuffer[c][OutPos+pos] += data[pos]*gain;
+ gain *= step;
}
if(pos == Counter)
- DrySend = Gains[c].Target;
- Gains[c].Current = DrySend;
+ gain = Gains[c].Target;
+ Gains[c].Current = gain;
/* Mix until pos is aligned with 4 or the mix is done. */
for(;pos < BufferSize && (pos&3) != 0;pos++)
- OutBuffer[c][OutPos+pos] += data[pos]*DrySend;
+ OutBuffer[c][OutPos+pos] += data[pos]*gain;
}
- if(!(DrySend > GAIN_SILENCE_THRESHOLD))
+ if(!(gain > GAIN_SILENCE_THRESHOLD))
continue;
- gain = vdupq_n_f32(DrySend);
+ gain4 = vdupq_n_f32(gain);
for(;BufferSize-pos > 3;pos += 4)
{
const float32x4_t val4 = vld1q_f32(&data[pos]);
float32x4_t dry4 = vld1q_f32(&OutBuffer[c][OutPos+pos]);
- dry4 = vaddq_f32(dry4, vmulq_f32(val4, gain));
+ dry4 = vaddq_f32(dry4, vmulq_f32(val4, gain4));
vst1q_f32(&OutBuffer[c][OutPos+pos], dry4);
}
for(;pos < BufferSize;pos++)
- OutBuffer[c][OutPos+pos] += data[pos]*DrySend;
- }
-}
-
-
-void MixSend_Neon(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data,
- MixGains *Gain, ALuint Counter, ALuint OutPos, ALuint BufferSize)
-{
- ALfloat WetGain, Step;
- float32x4_t gain;
-
- {
- ALuint pos = 0;
- WetGain = Gain[0].Current;
- Step = Gain[0].Step;
- if(Step != 1.0f && Counter > 0)
- {
- for(;pos < BufferSize && pos < Counter;pos++)
- {
- OutBuffer[0][OutPos+pos] += data[pos]*WetGain;
- WetGain *= Step;
- }
- if(pos == Counter)
- WetGain = Gain[0].Target;
- Gain[0].Current = WetGain;
- for(;pos < BufferSize && (pos&3) != 0;pos++)
- OutBuffer[0][OutPos+pos] += data[pos]*WetGain;
- }
-
- if(!(WetGain > GAIN_SILENCE_THRESHOLD))
- return;
- gain = vdupq_n_f32(WetGain);
- for(;BufferSize-pos > 3;pos += 4)
- {
- const float32x4_t val4 = vld1q_f32(&data[pos]);
- float32x4_t wet4 = vld1q_f32(&OutBuffer[0][OutPos+pos]);
- wet4 = vaddq_f32(wet4, vmulq_f32(val4, gain));
- vst1q_f32(&OutBuffer[0][OutPos+pos], wet4);
- }
- for(;pos < BufferSize;pos++)
- OutBuffer[0][OutPos+pos] += data[pos] * WetGain;
+ OutBuffer[c][OutPos+pos] += data[pos]*gain;
}
}
diff --git a/Alc/mixer_sse.c b/Alc/mixer_sse.c
index 64fd1c12..970619ec 100644
--- a/Alc/mixer_sse.c
+++ b/Alc/mixer_sse.c
@@ -138,124 +138,65 @@ static inline void ApplyCoeffs(ALuint Offset, ALfloat (*restrict Values)[2],
#undef SUFFIX
-void MixDirect_SSE(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data,
- MixGains *Gains, ALuint Counter, ALuint OutPos, ALuint BufferSize)
+void Mix_SSE(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE],
+ MixGains *Gains, ALuint Counter, ALuint OutPos, ALuint BufferSize)
{
- ALfloat DrySend, Step;
- __m128 gain, step;
+ ALfloat gain, step;
+ __m128 gain4, step4;
ALuint c;
- for(c = 0;c < MaxChannels;c++)
+ for(c = 0;c < OutChans;c++)
{
ALuint pos = 0;
- DrySend = Gains[c].Current;
- Step = Gains[c].Step;
- if(Step != 1.0f && Counter > 0)
+ gain = Gains[c].Current;
+ step = Gains[c].Step;
+ if(step != 1.0f && Counter > 0)
{
/* Mix with applying gain steps in aligned multiples of 4. */
if(BufferSize-pos > 3 && Counter-pos > 3)
{
- gain = _mm_setr_ps(
- DrySend,
- DrySend * Step,
- DrySend * Step * Step,
- DrySend * Step * Step * Step
+ gain4 = _mm_setr_ps(
+ gain,
+ gain * step,
+ gain * step * step,
+ gain * step * step * step
);
- step = _mm_set1_ps(Step * Step * Step * Step);
+ step4 = _mm_set1_ps(step * step * step * step);
do {
const __m128 val4 = _mm_load_ps(&data[pos]);
__m128 dry4 = _mm_load_ps(&OutBuffer[c][OutPos+pos]);
- dry4 = _mm_add_ps(dry4, _mm_mul_ps(val4, gain));
- gain = _mm_mul_ps(gain, step);
+ dry4 = _mm_add_ps(dry4, _mm_mul_ps(val4, gain4));
+ gain4 = _mm_mul_ps(gain4, step4);
_mm_store_ps(&OutBuffer[c][OutPos+pos], dry4);
pos += 4;
} while(BufferSize-pos > 3 && Counter-pos > 3);
- DrySend = _mm_cvtss_f32(gain);
+ gain = _mm_cvtss_f32(gain4);
}
/* Mix with applying left over gain steps that aren't aligned multiples of 4. */
for(;pos < BufferSize && pos < Counter;pos++)
{
- OutBuffer[c][OutPos+pos] += data[pos]*DrySend;
- DrySend *= Step;
+ OutBuffer[c][OutPos+pos] += data[pos]*gain;
+ gain *= step;
}
if(pos == Counter)
- DrySend = Gains[c].Target;
- Gains[c].Current = DrySend;
+ gain = Gains[c].Target;
+ Gains[c].Current = gain;
/* Mix until pos is aligned with 4 or the mix is done. */
for(;pos < BufferSize && (pos&3) != 0;pos++)
- OutBuffer[c][OutPos+pos] += data[pos]*DrySend;
+ OutBuffer[c][OutPos+pos] += data[pos]*gain;
}
- if(!(DrySend > GAIN_SILENCE_THRESHOLD))
+ if(!(gain > GAIN_SILENCE_THRESHOLD))
continue;
- gain = _mm_set1_ps(DrySend);
+ gain4 = _mm_set1_ps(gain);
for(;BufferSize-pos > 3;pos += 4)
{
const __m128 val4 = _mm_load_ps(&data[pos]);
__m128 dry4 = _mm_load_ps(&OutBuffer[c][OutPos+pos]);
- dry4 = _mm_add_ps(dry4, _mm_mul_ps(val4, gain));
+ dry4 = _mm_add_ps(dry4, _mm_mul_ps(val4, gain4));
_mm_store_ps(&OutBuffer[c][OutPos+pos], dry4);
}
for(;pos < BufferSize;pos++)
- OutBuffer[c][OutPos+pos] += data[pos]*DrySend;
- }
-}
-
-
-void MixSend_SSE(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data,
- MixGains *Gain, ALuint Counter, ALuint OutPos, ALuint BufferSize)
-{
- ALfloat WetGain, Step;
- __m128 gain, step;
-
- {
- ALuint pos = 0;
- WetGain = Gain[0].Current;
- Step = Gain[0].Step;
- if(Step != 1.0f && Counter > 0)
- {
- if(BufferSize-pos > 3 && Counter-pos > 3)
- {
- gain = _mm_setr_ps(
- WetGain,
- WetGain * Step,
- WetGain * Step * Step,
- WetGain * Step * Step * Step
- );
- step = _mm_set1_ps(Step * Step * Step * Step);
- do {
- const __m128 val4 = _mm_load_ps(&data[pos]);
- __m128 dry4 = _mm_load_ps(&OutBuffer[0][OutPos+pos]);
- dry4 = _mm_add_ps(dry4, _mm_mul_ps(val4, gain));
- gain = _mm_mul_ps(gain, step);
- _mm_store_ps(&OutBuffer[0][OutPos+pos], dry4);
- pos += 4;
- } while(BufferSize-pos > 3 && Counter-pos > 3);
- WetGain = _mm_cvtss_f32(gain);
- }
- for(;pos < BufferSize && pos < Counter;pos++)
- {
- OutBuffer[0][OutPos+pos] += data[pos]*WetGain;
- WetGain *= Step;
- }
- if(pos == Counter)
- WetGain = Gain[0].Target;
- Gain[0].Current = WetGain;
- for(;pos < BufferSize && (pos&3) != 0;pos++)
- OutBuffer[0][OutPos+pos] += data[pos]*WetGain;
- }
-
- if(!(WetGain > GAIN_SILENCE_THRESHOLD))
- return;
- gain = _mm_set1_ps(WetGain);
- for(;BufferSize-pos > 3;pos += 4)
- {
- const __m128 val4 = _mm_load_ps(&data[pos]);
- __m128 wet4 = _mm_load_ps(&OutBuffer[0][OutPos+pos]);
- wet4 = _mm_add_ps(wet4, _mm_mul_ps(val4, gain));
- _mm_store_ps(&OutBuffer[0][OutPos+pos], wet4);
- }
- for(;pos < BufferSize;pos++)
- OutBuffer[0][OutPos+pos] += data[pos] * WetGain;
+ OutBuffer[c][OutPos+pos] += data[pos]*gain;
}
}