diff options
-rw-r--r-- | Alc/mixer_neon.c | 41 | ||||
-rw-r--r-- | Alc/mixer_sse.c | 25 |
2 files changed, 22 insertions, 44 deletions
diff --git a/Alc/mixer_neon.c b/Alc/mixer_neon.c index 5a598f5c..1b695d81 100644 --- a/Alc/mixer_neon.c +++ b/Alc/mixer_neon.c @@ -93,25 +93,14 @@ void MixDirect_Neon(DirectParams *params, const ALfloat *restrict data, ALuint s if(Step != 1.0f && Counter > 0) { DrySend = params->Mix.Gains.Current[srcchan][c]; - for(;BufferSize-pos > 3 && Counter-pos > 3;pos+=4) + for(;pos < BufferSize && pos < Counter;pos++) { - OutBuffer[c][OutPos+pos ] += data[pos ]*DrySend; + OutBuffer[c][OutPos+pos] += data[pos]*DrySend; DrySend *= Step; - OutBuffer[c][OutPos+pos+1] += data[pos+1]*DrySend; - DrySend *= Step; - OutBuffer[c][OutPos+pos+2] += data[pos+2]*DrySend; - DrySend *= Step; - OutBuffer[c][OutPos+pos+4] += data[pos+3]*DrySend; - DrySend *= Step; - } - if(!(BufferSize-pos > 3)) - { - for(;pos < BufferSize && pos < Counter;pos++) - { - OutBuffer[c][OutPos+pos] += data[pos]*DrySend; - DrySend *= Step; - } } + /* Mix until pos is aligned with 4 or the mix is done. */ + for(;pos < BufferSize && (pos&3) != 0;pos++) + OutBuffer[c][OutPos+pos] += data[pos]*DrySend; params->Mix.Gains.Current[srcchan][c] = DrySend; } @@ -146,25 +135,13 @@ void MixSend_Neon(SendParams *params, const ALfloat *restrict data, if(Step != 1.0f && Counter > 0) { WetGain = params->Gain.Current; - for(;BufferSize-pos > 3 && Counter-pos > 3;pos+=4) + for(;pos < BufferSize && pos < Counter;pos++) { - OutBuffer[0][OutPos+pos ] += data[pos ]*WetGain; + OutBuffer[0][OutPos+pos] += data[pos]*WetGain; WetGain *= Step; - OutBuffer[0][OutPos+pos+1] += data[pos+1]*WetGain; - WetGain *= Step; - OutBuffer[0][OutPos+pos+2] += data[pos+2]*WetGain; - WetGain *= Step; - OutBuffer[0][OutPos+pos+4] += data[pos+3]*WetGain; - WetGain *= Step; - } - if(!(BufferSize-pos > 3)) - { - for(;pos < BufferSize && pos < Counter;pos++) - { - OutBuffer[0][OutPos+pos] += data[pos]*WetGain; - WetGain *= Step; - } } + for(;pos < BufferSize && (pos&3) != 0;pos++) + OutBuffer[0][OutPos+pos] += data[pos]*WetGain; params->Gain.Current = WetGain; } diff --git a/Alc/mixer_sse.c b/Alc/mixer_sse.c index c17a7e08..a2b8461d 100644 --- a/Alc/mixer_sse.c +++ b/Alc/mixer_sse.c @@ -156,6 +156,7 @@ void MixDirect_SSE(DirectParams *params, const ALfloat *restrict data, ALuint sr if(Step != 1.0f && Counter > 0) { DrySend = params->Mix.Gains.Current[srcchan][c]; + /* Mix with applying gain steps in aligned multiples of 4. */ if(BufferSize-pos > 3 && Counter-pos > 3) { gain = _mm_set_ps( @@ -175,14 +176,15 @@ void MixDirect_SSE(DirectParams *params, const ALfloat *restrict data, ALuint sr } while(BufferSize-pos > 3 && Counter-pos > 3); DrySend = _mm_cvtss_f32(_mm_shuffle_ps(gain, gain, _MM_SHUFFLE(3, 3, 3, 3))); } - if(!(BufferSize-pos > 3)) + /* Mix with applying left over gain steps that aren't aligned multiples of 4. */ + for(;pos < BufferSize && pos < Counter;pos++) { - for(;pos < BufferSize && pos < Counter;pos++) - { - OutBuffer[c][OutPos+pos] += data[pos]*DrySend; - DrySend *= Step; - } + OutBuffer[c][OutPos+pos] += data[pos]*DrySend; + DrySend *= Step; } + /* Mix until pos is aligned with 4 or the mix is done. */ + for(;pos < BufferSize && (pos&3) != 0;pos++) + OutBuffer[c][OutPos+pos] += data[pos]*DrySend; params->Mix.Gains.Current[srcchan][c] = DrySend; } @@ -237,14 +239,13 @@ void MixSend_SSE(SendParams *params, const ALfloat *restrict data, } while(BufferSize-pos > 3 && Counter-pos > 3); WetGain = _mm_cvtss_f32(_mm_shuffle_ps(gain, gain, _MM_SHUFFLE(3, 3, 3, 3))); } - if(!(BufferSize-pos > 3)) + for(;pos < BufferSize && pos < Counter;pos++) { - for(;pos < BufferSize && pos < Counter;pos++) - { - OutBuffer[0][OutPos+pos] += data[pos]*WetGain; - WetGain *= Step; - } + OutBuffer[0][OutPos+pos] += data[pos]*WetGain; + WetGain *= Step; } + for(;pos < BufferSize && (pos&3) != 0;pos++) + OutBuffer[0][OutPos+pos] += data[pos]*WetGain; params->Gain.Current = WetGain; } |