aboutsummaryrefslogtreecommitdiffstats
path: root/Alc/mixer_sse.c
diff options
context:
space:
mode:
Diffstat (limited to 'Alc/mixer_sse.c')
-rw-r--r--Alc/mixer_sse.c75
1 files changed, 52 insertions, 23 deletions
diff --git a/Alc/mixer_sse.c b/Alc/mixer_sse.c
index 38435dac..4ae15201 100644
--- a/Alc/mixer_sse.c
+++ b/Alc/mixer_sse.c
@@ -204,33 +204,62 @@ void MixDirect_SSE(DirectParams *params, const ALfloat *restrict data, ALuint sr
void MixSend_SSE(SendParams *params, const ALfloat *restrict data,
- ALuint OutPos, ALuint SamplesToDo, ALuint BufferSize)
+ ALuint OutPos, ALuint UNUSED(SamplesToDo), ALuint BufferSize)
{
ALfloat (*restrict OutBuffer)[BUFFERSIZE] = params->OutBuffer;
- ALfloat *restrict ClickRemoval = params->ClickRemoval;
- ALfloat *restrict PendingClicks = params->PendingClicks;
- ALfloat WetGain;
- __m128 gain;
- ALuint pos;
+ ALuint Counter = maxu(params->Counter, OutPos) - OutPos;
+ ALfloat WetGain, Step;
+ __m128 gain, step;
- WetGain = params->Gain;
- if(!(WetGain > GAIN_SILENCE_THRESHOLD))
- return;
+ {
+ ALuint pos = 0;
- if(OutPos == 0)
- ClickRemoval[0] -= data[0] * WetGain;
+ Step = params->Gain.Step;
+ if(Step != 1.0f && Counter > 0)
+ {
+ WetGain = params->Gain.Current;
+ if(BufferSize-pos > 3 && Counter-pos > 3)
+ {
+ gain = _mm_set_ps(
+ WetGain,
+ WetGain * Step,
+ WetGain * Step * Step,
+ WetGain * Step * Step * Step
+ );
+ step = _mm_set1_ps(Step * Step * Step * Step);
+ do {
+ const __m128 val4 = _mm_load_ps(&data[pos]);
+ __m128 dry4 = _mm_load_ps(&OutBuffer[0][OutPos+pos]);
+ dry4 = _mm_add_ps(dry4, _mm_mul_ps(val4, gain));
+ gain = _mm_mul_ps(gain, step);
+ _mm_store_ps(&OutBuffer[0][OutPos+pos], dry4);
+ pos += 4;
+ } while(BufferSize-pos > 3 && Counter-pos > 3);
+ WetGain = _mm_cvtss_f32(_mm_shuffle_ps(gain, gain, _MM_SHUFFLE(3, 3, 3, 3)));
+ }
+ if(!(BufferSize-pos > 3))
+ {
+ for(;pos < BufferSize && pos < Counter;pos++)
+ {
+ OutBuffer[0][OutPos+pos] += data[pos]*WetGain;
+ WetGain *= Step;
+ }
+ }
+ params->Gain.Current = WetGain;
+ }
- gain = _mm_set1_ps(WetGain);
- for(pos = 0;BufferSize-pos > 3;pos += 4)
- {
- const __m128 val4 = _mm_load_ps(&data[pos]);
- __m128 wet4 = _mm_load_ps(&OutBuffer[0][OutPos+pos]);
- wet4 = _mm_add_ps(wet4, _mm_mul_ps(val4, gain));
- _mm_store_ps(&OutBuffer[0][OutPos+pos], wet4);
+ WetGain = params->Gain.Target;
+ if(!(WetGain > GAIN_SILENCE_THRESHOLD))
+ return;
+ gain = _mm_set1_ps(WetGain);
+ for(;BufferSize-pos > 3;pos += 4)
+ {
+ const __m128 val4 = _mm_load_ps(&data[pos]);
+ __m128 wet4 = _mm_load_ps(&OutBuffer[0][OutPos+pos]);
+ wet4 = _mm_add_ps(wet4, _mm_mul_ps(val4, gain));
+ _mm_store_ps(&OutBuffer[0][OutPos+pos], wet4);
+ }
+ for(;pos < BufferSize;pos++)
+ OutBuffer[0][OutPos+pos] += data[pos] * WetGain;
}
- for(;pos < BufferSize;pos++)
- OutBuffer[0][OutPos+pos] += data[pos] * WetGain;
-
- if(OutPos+pos == SamplesToDo)
- PendingClicks[0] += data[pos] * WetGain;
}