diff options
author | Chris Robinson <[email protected]> | 2019-01-12 21:08:34 -0800 |
---|---|---|
committer | Chris Robinson <[email protected]> | 2019-01-12 21:08:34 -0800 |
commit | 867161d55f91622a6e1a4266e31e85a5443f7cfe (patch) | |
tree | d0eac44940c2e3ab725ca210c357f7e99c428939 /Alc | |
parent | 5b382a69b6da10717daa8de388246053df0d27a4 (diff) |
Constify some parameters
Diffstat (limited to 'Alc')
-rw-r--r-- | Alc/alu.cpp | 10 | ||||
-rw-r--r-- | Alc/mixer/defs.h | 57 | ||||
-rw-r--r-- | Alc/mixer/mixer_c.cpp | 17 | ||||
-rw-r--r-- | Alc/mixer/mixer_neon.cpp | 79 | ||||
-rw-r--r-- | Alc/mixer/mixer_sse.cpp | 29 |
5 files changed, 97 insertions, 95 deletions
diff --git a/Alc/alu.cpp b/Alc/alu.cpp index b3ca26cb..f7a8eb6b 100644 --- a/Alc/alu.cpp +++ b/Alc/alu.cpp @@ -126,7 +126,7 @@ inline HrtfDirectMixerFunc SelectHrtfMixer(void) } -void ProcessHrtf(ALCdevice *device, ALsizei SamplesToDo) +void ProcessHrtf(ALCdevice *device, const ALsizei SamplesToDo) { if(AmbiUpsampler *ambiup{device->AmbiUp.get()}) ambiup->process(device->Dry.Buffer, device->Dry.NumChannels, device->FOAOut.Buffer, @@ -144,7 +144,7 @@ void ProcessHrtf(ALCdevice *device, ALsizei SamplesToDo) state->Offset += SamplesToDo; } -void ProcessAmbiDec(ALCdevice *device, ALsizei SamplesToDo) +void ProcessAmbiDec(ALCdevice *device, const ALsizei SamplesToDo) { BFormatDec *ambidec{device->AmbiDecoder.get()}; if(device->Dry.Buffer != device->FOAOut.Buffer) @@ -154,13 +154,13 @@ void ProcessAmbiDec(ALCdevice *device, ALsizei SamplesToDo) SamplesToDo); } -void ProcessAmbiUp(ALCdevice *device, ALsizei SamplesToDo) +void ProcessAmbiUp(ALCdevice *device, const ALsizei SamplesToDo) { device->AmbiUp->process(device->RealOut.Buffer, device->RealOut.NumChannels, device->FOAOut.Buffer, device->FOAOut.NumChannels, SamplesToDo); } -void ProcessUhj(ALCdevice *device, ALsizei SamplesToDo) +void ProcessUhj(ALCdevice *device, const ALsizei SamplesToDo) { /* UHJ is stereo output only. */ const int lidx{(device->RealOut.ChannelName[0]==FrontLeft) ? 0 : 1}; @@ -172,7 +172,7 @@ void ProcessUhj(ALCdevice *device, ALsizei SamplesToDo) device->Dry.Buffer, SamplesToDo); } -void ProcessBs2b(ALCdevice *device, ALsizei SamplesToDo) +void ProcessBs2b(ALCdevice *device, const ALsizei SamplesToDo) { /* BS2B is stereo output only. */ const int lidx{(device->RealOut.ChannelName[0]==FrontLeft) ? 0 : 1}; diff --git a/Alc/mixer/defs.h b/Alc/mixer/defs.h index bac93d6f..649e6d44 100644 --- a/Alc/mixer/defs.h +++ b/Alc/mixer/defs.h @@ -22,43 +22,41 @@ const ALfloat *Resample_bsinc_C(const InterpState *state, const ALfloat *RESTRIC /* C mixers */ void MixHrtf_C(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, - const ALfloat *data, ALsizei Offset, ALsizei OutPos, + const ALfloat *data, ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize, MixHrtfParams *hrtfparams, - HrtfState *hrtfstate, ALsizei BufferSize); + HrtfState *hrtfstate, const ALsizei BufferSize); void MixHrtfBlend_C(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, - const ALfloat *data, ALsizei Offset, ALsizei OutPos, + const ALfloat *data, ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize, const HrtfParams *oldparams, MixHrtfParams *newparams, HrtfState *hrtfstate, - ALsizei BufferSize); + const ALsizei BufferSize); void MixDirectHrtf_C(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, const ALfloat (*data)[BUFFERSIZE], DirectHrtfState *State, const ALsizei NumChans, const ALsizei BufferSize); -void Mix_C(const ALfloat *data, ALsizei OutChans, ALfloat (*RESTRICT OutBuffer)[BUFFERSIZE], - ALfloat *CurrentGains, const ALfloat *TargetGains, ALsizei Counter, ALsizei OutPos, - ALsizei BufferSize); -void MixRow_C(ALfloat *OutBuffer, const ALfloat *Gains, - const ALfloat (*RESTRICT data)[BUFFERSIZE], ALsizei InChans, - ALsizei InPos, ALsizei BufferSize); +void Mix_C(const ALfloat *data, const ALsizei OutChans, ALfloat (*OutBuffer)[BUFFERSIZE], + ALfloat *CurrentGains, const ALfloat *TargetGains, const ALsizei Counter, + const ALsizei OutPos, const ALsizei BufferSize); +void MixRow_C(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*data)[BUFFERSIZE], + const ALsizei InChans, const ALsizei InPos, const ALsizei BufferSize); /* SSE mixers */ void MixHrtf_SSE(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, - const ALfloat *data, ALsizei Offset, ALsizei OutPos, + const ALfloat *data, ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize, MixHrtfParams *hrtfparams, - HrtfState *hrtfstate, ALsizei BufferSize); + HrtfState *hrtfstate, const ALsizei BufferSize); void MixHrtfBlend_SSE(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, - const ALfloat *data, ALsizei Offset, ALsizei OutPos, + const ALfloat *data, ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize, const HrtfParams *oldparams, MixHrtfParams *newparams, HrtfState *hrtfstate, - ALsizei BufferSize); + const ALsizei BufferSize); void MixDirectHrtf_SSE(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, const ALfloat (*data)[BUFFERSIZE], DirectHrtfState *State, const ALsizei NumChans, const ALsizei BufferSize); -void Mix_SSE(const ALfloat *data, ALsizei OutChans, ALfloat (*RESTRICT OutBuffer)[BUFFERSIZE], - ALfloat *CurrentGains, const ALfloat *TargetGains, ALsizei Counter, ALsizei OutPos, - ALsizei BufferSize); -void MixRow_SSE(ALfloat *OutBuffer, const ALfloat *Gains, - const ALfloat (*RESTRICT data)[BUFFERSIZE], ALsizei InChans, - ALsizei InPos, ALsizei BufferSize); +void Mix_SSE(const ALfloat *data, const ALsizei OutChans, ALfloat (*OutBuffer)[BUFFERSIZE], + ALfloat *CurrentGains, const ALfloat *TargetGains, const ALsizei Counter, + const ALsizei OutPos, const ALsizei BufferSize); +void MixRow_SSE(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*data)[BUFFERSIZE], + const ALsizei InChans, const ALsizei InPos, const ALsizei BufferSize); /* SSE resamplers */ inline void InitiatePositionArrays(ALsizei frac, ALint increment, ALsizei *RESTRICT frac_arr, ALsizei *RESTRICT pos_arr, ALsizei size) @@ -88,23 +86,22 @@ const ALfloat *Resample_bsinc_SSE(const InterpState *state, const ALfloat *RESTR /* Neon mixers */ void MixHrtf_Neon(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, - const ALfloat *data, ALsizei Offset, ALsizei OutPos, + const ALfloat *data, ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize, MixHrtfParams *hrtfparams, - HrtfState *hrtfstate, ALsizei BufferSize); + HrtfState *hrtfstate, const ALsizei BufferSize); void MixHrtfBlend_Neon(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, - const ALfloat *data, ALsizei Offset, ALsizei OutPos, + const ALfloat *data, ALsizei Offset, const ALsizei OutPos, const ALsizei IrSize, const HrtfParams *oldparams, MixHrtfParams *newparams, HrtfState *hrtfstate, - ALsizei BufferSize); + const ALsizei BufferSize); void MixDirectHrtf_Neon(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, const ALfloat (*data)[BUFFERSIZE], DirectHrtfState *State, const ALsizei NumChans, const ALsizei BufferSize); -void Mix_Neon(const ALfloat *data, ALsizei OutChans, ALfloat (*RESTRICT OutBuffer)[BUFFERSIZE], - ALfloat *CurrentGains, const ALfloat *TargetGains, ALsizei Counter, ALsizei OutPos, - ALsizei BufferSize); -void MixRow_Neon(ALfloat *OutBuffer, const ALfloat *Gains, - const ALfloat (*RESTRICT data)[BUFFERSIZE], ALsizei InChans, - ALsizei InPos, ALsizei BufferSize); +void Mix_Neon(const ALfloat *data, const ALsizei OutChans, ALfloat (*OutBuffer)[BUFFERSIZE], + ALfloat *CurrentGains, const ALfloat *TargetGains, const ALsizei Counter, + const ALsizei OutPos, const ALsizei BufferSize); +void MixRow_Neon(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*data)[BUFFERSIZE], + const ALsizei InChans, const ALsizei InPos, const ALsizei BufferSize); /* Neon resamplers */ const ALfloat *Resample_lerp_Neon(const InterpState *state, const ALfloat *RESTRICT src, diff --git a/Alc/mixer/mixer_c.cpp b/Alc/mixer/mixer_c.cpp index 38bb7b64..0d8b1d5f 100644 --- a/Alc/mixer/mixer_c.cpp +++ b/Alc/mixer/mixer_c.cpp @@ -130,9 +130,9 @@ static inline void ApplyCoeffs(ALsizei Offset, ALfloat (&Values)[HRIR_LENGTH][2] #include "hrtf_inc.cpp" -void Mix_C(const ALfloat *data, ALsizei OutChans, ALfloat (*RESTRICT OutBuffer)[BUFFERSIZE], - ALfloat *CurrentGains, const ALfloat *TargetGains, ALsizei Counter, ALsizei OutPos, - ALsizei BufferSize) +void Mix_C(const ALfloat *data, const ALsizei OutChans, ALfloat (*OutBuffer)[BUFFERSIZE], + ALfloat *CurrentGains, const ALfloat *TargetGains, const ALsizei Counter, + const ALsizei OutPos, const ALsizei BufferSize) { ASSUME(OutChans > 0); ASSUME(BufferSize > 0); @@ -140,6 +140,7 @@ void Mix_C(const ALfloat *data, ALsizei OutChans, ALfloat (*RESTRICT OutBuffer)[ const ALfloat delta{(Counter > 0) ? 1.0f / static_cast<ALfloat>(Counter) : 0.0f}; for(ALsizei c{0};c < OutChans;c++) { + ALfloat *RESTRICT dst{&OutBuffer[c][OutPos]}; ALsizei pos{0}; ALfloat gain{CurrentGains[c]}; @@ -151,7 +152,7 @@ void Mix_C(const ALfloat *data, ALsizei OutChans, ALfloat (*RESTRICT OutBuffer)[ ALfloat step_count{0.0f}; for(;pos < minsize;pos++) { - OutBuffer[c][OutPos+pos] += data[pos] * (gain + step*step_count); + dst[pos] += data[pos] * (gain + step*step_count); step_count += 1.0f; } if(pos == Counter) @@ -164,7 +165,7 @@ void Mix_C(const ALfloat *data, ALsizei OutChans, ALfloat (*RESTRICT OutBuffer)[ if(!(std::fabs(gain) > GAIN_SILENCE_THRESHOLD)) continue; for(;pos < BufferSize;pos++) - OutBuffer[c][OutPos+pos] += data[pos]*gain; + dst[pos] += data[pos]*gain; } } @@ -174,18 +175,20 @@ void Mix_C(const ALfloat *data, ALsizei OutChans, ALfloat (*RESTRICT OutBuffer)[ * transform. And as the matrices are more or less static once set up, no * stepping is necessary. */ -void MixRow_C(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*RESTRICT data)[BUFFERSIZE], ALsizei InChans, ALsizei InPos, ALsizei BufferSize) +void MixRow_C(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*data)[BUFFERSIZE], + const ALsizei InChans, const ALsizei InPos, const ALsizei BufferSize) { ASSUME(InChans > 0); ASSUME(BufferSize > 0); for(ALsizei c{0};c < InChans;c++) { + const ALfloat *RESTRICT src{&data[c][InPos]}; const ALfloat gain{Gains[c]}; if(!(std::fabs(gain) > GAIN_SILENCE_THRESHOLD)) continue; for(ALsizei i{0};i < BufferSize;i++) - OutBuffer[i] += data[c][InPos+i] * gain; + OutBuffer[i] += src[i] * gain; } } diff --git a/Alc/mixer/mixer_neon.cpp b/Alc/mixer/mixer_neon.cpp index 4843922f..8d70d979 100644 --- a/Alc/mixer/mixer_neon.cpp +++ b/Alc/mixer/mixer_neon.cpp @@ -169,47 +169,46 @@ static inline void ApplyCoeffs(ALsizei Offset, ALfloat (&Values)[HRIR_LENGTH][2] #include "hrtf_inc.cpp" -void Mix_Neon(const ALfloat *data, ALsizei OutChans, ALfloat (*RESTRICT OutBuffer)[BUFFERSIZE], - ALfloat *CurrentGains, const ALfloat *TargetGains, ALsizei Counter, ALsizei OutPos, - ALsizei BufferSize) +void Mix_Neon(const ALfloat *data, const ALsizei OutChans, ALfloat (*OutBuffer)[BUFFERSIZE], + ALfloat *CurrentGains, const ALfloat *TargetGains, const ALsizei Counter, + const ALsizei OutPos, const ALsizei BufferSize) { - const ALfloat delta = (Counter > 0) ? 1.0f/(ALfloat)Counter : 0.0f; - ALsizei c; - ASSUME(OutChans > 0); ASSUME(BufferSize > 0); - for(c = 0;c < OutChans;c++) + const ALfloat delta{(Counter > 0) ? 1.0f/(ALfloat)Counter : 0.0f}; + for(ALsizei c{0};c < OutChans;c++) { - ALsizei pos = 0; - ALfloat gain = CurrentGains[c]; - const ALfloat diff = TargetGains[c] - gain; + ALfloat *RESTRICT dst{al::assume_aligned<16>(&OutBuffer[c][OutPos])}; + ALsizei pos{0}; + ALfloat gain{CurrentGains[c]}; + const ALfloat diff{TargetGains[c] - gain}; - if(fabsf(diff) > std::numeric_limits<float>::epsilon()) + if(std::fabs(diff) > std::numeric_limits<float>::epsilon()) { - ALsizei minsize = mini(BufferSize, Counter); - const ALfloat step = diff * delta; - ALfloat step_count = 0.0f; + ALsizei minsize{mini(BufferSize, Counter)}; + const ALfloat step{diff * delta}; + ALfloat step_count{0.0f}; /* Mix with applying gain steps in aligned multiples of 4. */ if(LIKELY(minsize > 3)) { - const float32x4_t four4 = vdupq_n_f32(4.0f); - const float32x4_t step4 = vdupq_n_f32(step); - const float32x4_t gain4 = vdupq_n_f32(gain); - float32x4_t step_count4 = vsetq_lane_f32(0.0f, + const float32x4_t four4{vdupq_n_f32(4.0f)}; + const float32x4_t step4{vdupq_n_f32(step)}; + const float32x4_t gain4{vdupq_n_f32(gain)}; + float32x4_t step_count4{vsetq_lane_f32(0.0f, vsetq_lane_f32(1.0f, vsetq_lane_f32(2.0f, vsetq_lane_f32(3.0f, vdupq_n_f32(0.0f), 3), 2), 1), 0 - ); - ALsizei todo = minsize >> 2; + )}; + ALsizei todo{minsize >> 2}; do { const float32x4_t val4 = vld1q_f32(&data[pos]); - float32x4_t dry4 = vld1q_f32(&OutBuffer[c][OutPos+pos]); + float32x4_t dry4 = vld1q_f32(&dst[pos]); dry4 = vmlaq_f32(dry4, val4, vmlaq_f32(gain4, step4, step_count4)); step_count4 = vaddq_f32(step_count4, four4); - vst1q_f32(&OutBuffer[c][OutPos+pos], dry4); + vst1q_f32(&dst[pos], dry4); pos += 4; } while(--todo); /* NOTE: step_count4 now represents the next four counts after @@ -221,7 +220,7 @@ void Mix_Neon(const ALfloat *data, ALsizei OutChans, ALfloat (*RESTRICT OutBuffe /* Mix with applying left over gain steps that aren't aligned multiples of 4. */ for(;pos < minsize;pos++) { - OutBuffer[c][OutPos+pos] += data[pos]*(gain + step*step_count); + dst[pos] += data[pos]*(gain + step*step_count); step_count += 1.0f; } if(pos == Counter) @@ -233,48 +232,48 @@ void Mix_Neon(const ALfloat *data, ALsizei OutChans, ALfloat (*RESTRICT OutBuffe /* Mix until pos is aligned with 4 or the mix is done. */ minsize = mini(BufferSize, (pos+3)&~3); for(;pos < minsize;pos++) - OutBuffer[c][OutPos+pos] += data[pos]*gain; + dst[pos] += data[pos]*gain; } - if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD)) + if(!(std::fabs(gain) > GAIN_SILENCE_THRESHOLD)) continue; if(LIKELY(BufferSize-pos > 3)) { - ALsizei todo = (BufferSize-pos) >> 2; + ALsizei todo{(BufferSize-pos) >> 2}; const float32x4_t gain4 = vdupq_n_f32(gain); do { const float32x4_t val4 = vld1q_f32(&data[pos]); - float32x4_t dry4 = vld1q_f32(&OutBuffer[c][OutPos+pos]); + float32x4_t dry4 = vld1q_f32(&dst[pos]); dry4 = vmlaq_f32(dry4, val4, gain4); - vst1q_f32(&OutBuffer[c][OutPos+pos], dry4); + vst1q_f32(&dst[pos], dry4); pos += 4; } while(--todo); } for(;pos < BufferSize;pos++) - OutBuffer[c][OutPos+pos] += data[pos]*gain; + dst[pos] += data[pos]*gain; } } -void MixRow_Neon(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*RESTRICT data)[BUFFERSIZE], ALsizei InChans, ALsizei InPos, ALsizei BufferSize) +void MixRow_Neon(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*data)[BUFFERSIZE], + const ALsizei InChans, const ALsizei InPos, const ALsizei BufferSize) { - ALsizei c; - ASSUME(InChans > 0); ASSUME(BufferSize > 0); - for(c = 0;c < InChans;c++) + for(ALsizei c{0};c < InChans;c++) { - ALsizei pos = 0; - const ALfloat gain = Gains[c]; - if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD)) + const ALfloat *RESTRICT src{al::assume_aligned<16>(&data[c][InPos])}; + ALsizei pos{0}; + const ALfloat gain{Gains[c]}; + if(!(std::fabs(gain) > GAIN_SILENCE_THRESHOLD)) continue; if(LIKELY(BufferSize > 3)) { - ALsizei todo = BufferSize >> 2; - float32x4_t gain4 = vdupq_n_f32(gain); + ALsizei todo{BufferSize >> 2}; + float32x4_t gain4{vdupq_n_f32(gain)}; do { - const float32x4_t val4 = vld1q_f32(&data[c][InPos+pos]); + const float32x4_t val4 = vld1q_f32(&src[pos]); float32x4_t dry4 = vld1q_f32(&OutBuffer[pos]); dry4 = vmlaq_f32(dry4, val4, gain4); vst1q_f32(&OutBuffer[pos], dry4); @@ -282,6 +281,6 @@ void MixRow_Neon(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*RESTR } while(--todo); } for(;pos < BufferSize;pos++) - OutBuffer[pos] += data[c][InPos+pos]*gain; + OutBuffer[pos] += src[pos]*gain; } } diff --git a/Alc/mixer/mixer_sse.cpp b/Alc/mixer/mixer_sse.cpp index 9eadfd9e..d18af086 100644 --- a/Alc/mixer/mixer_sse.cpp +++ b/Alc/mixer/mixer_sse.cpp @@ -149,9 +149,9 @@ static inline void ApplyCoeffs(ALsizei Offset, ALfloat (&Values)[HRIR_LENGTH][2] #include "hrtf_inc.cpp" -void Mix_SSE(const ALfloat *data, ALsizei OutChans, ALfloat (*RESTRICT OutBuffer)[BUFFERSIZE], - ALfloat *CurrentGains, const ALfloat *TargetGains, ALsizei Counter, ALsizei OutPos, - ALsizei BufferSize) +void Mix_SSE(const ALfloat *data, const ALsizei OutChans, ALfloat (*OutBuffer)[BUFFERSIZE], + ALfloat *CurrentGains, const ALfloat *TargetGains, const ALsizei Counter, + const ALsizei OutPos, const ALsizei BufferSize) { ASSUME(OutChans > 0); ASSUME(BufferSize > 0); @@ -159,6 +159,7 @@ void Mix_SSE(const ALfloat *data, ALsizei OutChans, ALfloat (*RESTRICT OutBuffer const ALfloat delta{(Counter > 0) ? 1.0f / static_cast<ALfloat>(Counter) : 0.0f}; for(ALsizei c{0};c < OutChans;c++) { + ALfloat *RESTRICT dst{al::assume_aligned<16>(&OutBuffer[c][OutPos])}; ALsizei pos{0}; ALfloat gain{CurrentGains[c]}; const ALfloat diff{TargetGains[c] - gain}; @@ -178,12 +179,12 @@ void Mix_SSE(const ALfloat *data, ALsizei OutChans, ALfloat (*RESTRICT OutBuffer ALsizei todo{minsize >> 2}; do { const __m128 val4{_mm_load_ps(&data[pos])}; - __m128 dry4{_mm_load_ps(&OutBuffer[c][OutPos+pos])}; + __m128 dry4{_mm_load_ps(&dst[pos])}; #define MLA4(x, y, z) _mm_add_ps(x, _mm_mul_ps(y, z)) /* dry += val * (gain + step*step_count) */ dry4 = MLA4(dry4, val4, MLA4(gain4, step4, step_count4)); #undef MLA4 - _mm_store_ps(&OutBuffer[c][OutPos+pos], dry4); + _mm_store_ps(&dst[pos], dry4); step_count4 = _mm_add_ps(step_count4, four4); pos += 4; } while(--todo); @@ -196,7 +197,7 @@ void Mix_SSE(const ALfloat *data, ALsizei OutChans, ALfloat (*RESTRICT OutBuffer /* Mix with applying left over gain steps that aren't aligned multiples of 4. */ for(;pos < minsize;pos++) { - OutBuffer[c][OutPos+pos] += data[pos]*(gain + step*step_count); + dst[pos] += data[pos]*(gain + step*step_count); step_count += 1.0f; } if(pos == Counter) @@ -208,7 +209,7 @@ void Mix_SSE(const ALfloat *data, ALsizei OutChans, ALfloat (*RESTRICT OutBuffer /* Mix until pos is aligned with 4 or the mix is done. */ minsize = mini(BufferSize, (pos+3)&~3); for(;pos < minsize;pos++) - OutBuffer[c][OutPos+pos] += data[pos]*gain; + dst[pos] += data[pos]*gain; } if(!(std::fabs(gain) > GAIN_SILENCE_THRESHOLD)) @@ -219,24 +220,26 @@ void Mix_SSE(const ALfloat *data, ALsizei OutChans, ALfloat (*RESTRICT OutBuffer const __m128 gain4{_mm_set1_ps(gain)}; do { const __m128 val4{_mm_load_ps(&data[pos])}; - __m128 dry4{_mm_load_ps(&OutBuffer[c][OutPos+pos])}; + __m128 dry4{_mm_load_ps(&dst[pos])}; dry4 = _mm_add_ps(dry4, _mm_mul_ps(val4, gain4)); - _mm_store_ps(&OutBuffer[c][OutPos+pos], dry4); + _mm_store_ps(&dst[pos], dry4); pos += 4; } while(--todo); } for(;pos < BufferSize;pos++) - OutBuffer[c][OutPos+pos] += data[pos]*gain; + dst[pos] += data[pos]*gain; } } -void MixRow_SSE(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*RESTRICT data)[BUFFERSIZE], ALsizei InChans, ALsizei InPos, ALsizei BufferSize) +void MixRow_SSE(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*data)[BUFFERSIZE], + const ALsizei InChans, const ALsizei InPos, const ALsizei BufferSize) { ASSUME(InChans > 0); ASSUME(BufferSize > 0); for(ALsizei c{0};c < InChans;c++) { + const ALfloat *RESTRICT src{al::assume_aligned<16>(&data[c][InPos])}; const ALfloat gain{Gains[c]}; if(!(std::fabs(gain) > GAIN_SILENCE_THRESHOLD)) continue; @@ -247,7 +250,7 @@ void MixRow_SSE(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*RESTRI ALsizei todo{BufferSize >> 2}; const __m128 gain4 = _mm_set1_ps(gain); do { - const __m128 val4{_mm_load_ps(&data[c][InPos+pos])}; + const __m128 val4{_mm_load_ps(&src[pos])}; __m128 dry4{_mm_load_ps(&OutBuffer[pos])}; dry4 = _mm_add_ps(dry4, _mm_mul_ps(val4, gain4)); _mm_store_ps(&OutBuffer[pos], dry4); @@ -255,6 +258,6 @@ void MixRow_SSE(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*RESTRI } while(--todo); } for(;pos < BufferSize;pos++) - OutBuffer[pos] += data[c][InPos+pos]*gain; + OutBuffer[pos] += src[pos]*gain; } } |