diff options
-rw-r--r-- | Alc/mixer/hrtf_inc.c | 39 | ||||
-rw-r--r-- | Alc/mixer/mixer_c.c | 7 | ||||
-rw-r--r-- | Alc/mixer/mixer_neon.c | 66 | ||||
-rw-r--r-- | Alc/mixer/mixer_sse.c | 64 |
4 files changed, 100 insertions, 76 deletions
diff --git a/Alc/mixer/hrtf_inc.c b/Alc/mixer/hrtf_inc.c index d6bd8042..3ef22f24 100644 --- a/Alc/mixer/hrtf_inc.c +++ b/Alc/mixer/hrtf_inc.c @@ -22,8 +22,9 @@ void MixHrtf(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, { const ALfloat (*Coeffs)[2] = ASSUME_ALIGNED(hrtfparams->Coeffs, 16); const ALsizei Delay[2] = { hrtfparams->Delay[0], hrtfparams->Delay[1] }; - ALfloat gainstep = hrtfparams->GainStep; - ALfloat gain = hrtfparams->Gain; + const ALfloat gainstep = hrtfparams->GainStep; + const ALfloat gain = hrtfparams->Gain; + ALfloat g, stepcount = 0.0f; ALfloat left, right; ALsizei i; @@ -35,8 +36,10 @@ void MixHrtf(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, for(i = 0;i < BufferSize;i++) { hrtfstate->History[Offset&HRTF_HISTORY_MASK] = *(data++); - left = hrtfstate->History[(Offset-Delay[0])&HRTF_HISTORY_MASK]*gain; - right = hrtfstate->History[(Offset-Delay[1])&HRTF_HISTORY_MASK]*gain; + + g = gain + gainstep*stepcount; + left = hrtfstate->History[(Offset-Delay[0])&HRTF_HISTORY_MASK]*g; + right = hrtfstate->History[(Offset-Delay[1])&HRTF_HISTORY_MASK]*g; hrtfstate->Values[(Offset+IrSize-1)&HRIR_MASK][0] = 0.0f; hrtfstate->Values[(Offset+IrSize-1)&HRIR_MASK][1] = 0.0f; @@ -45,10 +48,10 @@ void MixHrtf(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, *(LeftOut++) += hrtfstate->Values[Offset&HRIR_MASK][0]; *(RightOut++) += hrtfstate->Values[Offset&HRIR_MASK][1]; - gain += gainstep; + stepcount += 1.0f; Offset++; } - hrtfparams->Gain = gain; + hrtfparams->Gain = gain + gainstep*stepcount; } void MixHrtfBlend(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, @@ -59,12 +62,13 @@ void MixHrtfBlend(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, { const ALfloat (*OldCoeffs)[2] = ASSUME_ALIGNED(oldparams->Coeffs, 16); const ALsizei OldDelay[2] = { oldparams->Delay[0], oldparams->Delay[1] }; - ALfloat oldGain = oldparams->Gain; - ALfloat oldGainStep = -oldGain / (ALfloat)BufferSize; + const ALfloat oldGain = oldparams->Gain; + const ALfloat oldGainStep = -oldGain / (ALfloat)BufferSize; const ALfloat (*NewCoeffs)[2] = ASSUME_ALIGNED(newparams->Coeffs, 16); const ALsizei NewDelay[2] = { newparams->Delay[0], newparams->Delay[1] }; - ALfloat newGain = newparams->Gain; - ALfloat newGainStep = newparams->GainStep; + const ALfloat newGain = newparams->Gain; + const ALfloat newGainStep = newparams->GainStep; + ALfloat g, stepcount = 0.0f; ALfloat left, right; ALsizei i; @@ -80,22 +84,23 @@ void MixHrtfBlend(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, hrtfstate->History[Offset&HRTF_HISTORY_MASK] = *(data++); - left = hrtfstate->History[(Offset-OldDelay[0])&HRTF_HISTORY_MASK]*oldGain; - right = hrtfstate->History[(Offset-OldDelay[1])&HRTF_HISTORY_MASK]*oldGain; + g = oldGain + oldGainStep*stepcount; + left = hrtfstate->History[(Offset-OldDelay[0])&HRTF_HISTORY_MASK]*g; + right = hrtfstate->History[(Offset-OldDelay[1])&HRTF_HISTORY_MASK]*g; ApplyCoeffs(Offset, hrtfstate->Values, IrSize, OldCoeffs, left, right); - left = hrtfstate->History[(Offset-NewDelay[0])&HRTF_HISTORY_MASK]*newGain; - right = hrtfstate->History[(Offset-NewDelay[1])&HRTF_HISTORY_MASK]*newGain; + g = newGain + newGainStep*stepcount; + left = hrtfstate->History[(Offset-NewDelay[0])&HRTF_HISTORY_MASK]*g; + right = hrtfstate->History[(Offset-NewDelay[1])&HRTF_HISTORY_MASK]*g; ApplyCoeffs(Offset, hrtfstate->Values, IrSize, NewCoeffs, left, right); *(LeftOut++) += hrtfstate->Values[Offset&HRIR_MASK][0]; *(RightOut++) += hrtfstate->Values[Offset&HRIR_MASK][1]; - oldGain += oldGainStep; - newGain += newGainStep; + stepcount += 1.0f; Offset++; } - newparams->Gain = newGain; + newparams->Gain = newGain + newGainStep*stepcount; } void MixDirectHrtf(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, diff --git a/Alc/mixer/mixer_c.c b/Alc/mixer/mixer_c.c index 84485206..25149e00 100644 --- a/Alc/mixer/mixer_c.c +++ b/Alc/mixer/mixer_c.c @@ -134,13 +134,16 @@ void Mix_C(const ALfloat *data, ALsizei OutChans, ALfloat (*restrict OutBuffer)[ if(fabsf(step) > FLT_EPSILON) { ALsizei minsize = mini(BufferSize, Counter); + ALfloat step_count = 0.0f; for(;pos < minsize;pos++) { - OutBuffer[c][OutPos+pos] += data[pos]*gain; - gain += step; + OutBuffer[c][OutPos+pos] += data[pos] * (gain + step*step_count); + step_count += 1.0f; } if(pos == Counter) gain = TargetGains[c]; + else + gain += step*step_count; CurrentGains[c] = gain; } diff --git a/Alc/mixer/mixer_neon.c b/Alc/mixer/mixer_neon.c index 1a5e8ee7..b6181b42 100644 --- a/Alc/mixer/mixer_neon.c +++ b/Alc/mixer/mixer_neon.c @@ -165,8 +165,7 @@ void Mix_Neon(const ALfloat *data, ALsizei OutChans, ALfloat (*restrict OutBuffe ALfloat *CurrentGains, const ALfloat *TargetGains, ALsizei Counter, ALsizei OutPos, ALsizei BufferSize) { - ALfloat gain, delta, step; - float32x4_t gain4; + const ALfloat delta = (Counter > 0) ? 1.0f/(ALfloat)Counter : 0.0f; ALsizei c; ASSUME(OutChans > 0); @@ -174,47 +173,54 @@ void Mix_Neon(const ALfloat *data, ALsizei OutChans, ALfloat (*restrict OutBuffe data = ASSUME_ALIGNED(data, 16); OutBuffer = ASSUME_ALIGNED(OutBuffer, 16); - delta = (Counter > 0) ? 1.0f/(ALfloat)Counter : 0.0f; - for(c = 0;c < OutChans;c++) { ALsizei pos = 0; - gain = CurrentGains[c]; - step = (TargetGains[c] - gain) * delta; + ALfloat gain = CurrentGains[c]; + const ALfloat step = (TargetGains[c] - gain) * delta; + if(fabsf(step) > FLT_EPSILON) { ALsizei minsize = mini(BufferSize, Counter); + ALfloat step_count = 0.0f; /* Mix with applying gain steps in aligned multiples of 4. */ - if(minsize-pos > 3) + if(LIKELY(minsize > 3)) { - float32x4_t step4; - gain4 = vsetq_lane_f32(gain, gain4, 0); - gain4 = vsetq_lane_f32(gain + step, gain4, 1); - gain4 = vsetq_lane_f32(gain + step + step, gain4, 2); - gain4 = vsetq_lane_f32(gain + step + step + step, gain4, 3); - step4 = vdupq_n_f32(step + step + step + step); + const float32x4_t four4 = vdupq_n_f32(4.0f); + const float32x4_t step4 = vdupq_n_f32(step); + const float32x4_t gain4 = vdupq_n_f32(gain); + float32x4_t step_count4 = vsetq_lane_f32(0.0f, + vsetq_lane_f32(1.0f, + vsetq_lane_f32(2.0f, + vsetq_lane_f32(3.0f, vdupq_n_f32(0.0f), 3), + 2), 1), 0 + ); + ALsizei todo = minsize >> 2; + do { const float32x4_t val4 = vld1q_f32(&data[pos]); float32x4_t dry4 = vld1q_f32(&OutBuffer[c][OutPos+pos]); - dry4 = vmlaq_f32(dry4, val4, gain4); - gain4 = vaddq_f32(gain4, step4); + dry4 = vmlaq_f32(dry4, val4, vmlaq_f32(gain4, step4, step_count4)); + step_count4 = vaddq_f32(step_count4, four4); vst1q_f32(&OutBuffer[c][OutPos+pos], dry4); pos += 4; - } while(minsize-pos > 3); - /* NOTE: gain4 now represents the next four gains after the - * last four mixed samples, so the lowest element represents - * the next gain to apply. + } while(--todo); + /* NOTE: step_count4 now represents the next four counts after + * the last four mixed samples, so the lowest element + * represents the next step count to apply. */ - gain = vgetq_lane_f32(gain4, 0); + step_count = vgetq_lane_f32(step_count4, 0); } /* Mix with applying left over gain steps that aren't aligned multiples of 4. */ for(;pos < minsize;pos++) { - OutBuffer[c][OutPos+pos] += data[pos]*gain; - gain += step; + OutBuffer[c][OutPos+pos] += data[pos]*(gain + step*step_count); + step_count += 1.0f; } if(pos == Counter) gain = TargetGains[c]; + else + gain += step*step_count; CurrentGains[c] = gain; /* Mix until pos is aligned with 4 or the mix is done. */ @@ -225,13 +231,17 @@ void Mix_Neon(const ALfloat *data, ALsizei OutChans, ALfloat (*restrict OutBuffe if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD)) continue; - gain4 = vdupq_n_f32(gain); - for(;BufferSize-pos > 3;pos += 4) + if(LIKELY(BufferSize-pos > 3)) { - const float32x4_t val4 = vld1q_f32(&data[pos]); - float32x4_t dry4 = vld1q_f32(&OutBuffer[c][OutPos+pos]); - dry4 = vmlaq_f32(dry4, val4, gain4); - vst1q_f32(&OutBuffer[c][OutPos+pos], dry4); + ALsizei todo = (BufferSize-pos) >> 2; + const float32x4_t gain4 = vdupq_n_f32(gain); + do { + const float32x4_t val4 = vld1q_f32(&data[pos]); + float32x4_t dry4 = vld1q_f32(&OutBuffer[c][OutPos+pos]); + dry4 = vmlaq_f32(dry4, val4, gain4); + vst1q_f32(&OutBuffer[c][OutPos+pos], dry4); + pos += 4; + } while(--todo); } for(;pos < BufferSize;pos++) OutBuffer[c][OutPos+pos] += data[pos]*gain; diff --git a/Alc/mixer/mixer_sse.c b/Alc/mixer/mixer_sse.c index a178477f..fa79eb4d 100644 --- a/Alc/mixer/mixer_sse.c +++ b/Alc/mixer/mixer_sse.c @@ -135,55 +135,57 @@ void Mix_SSE(const ALfloat *data, ALsizei OutChans, ALfloat (*restrict OutBuffer ALfloat *CurrentGains, const ALfloat *TargetGains, ALsizei Counter, ALsizei OutPos, ALsizei BufferSize) { - ALfloat gain, delta, step; - __m128 gain4; + const ALfloat delta = (Counter > 0) ? 1.0f/(ALfloat)Counter : 0.0f; ALsizei c; ASSUME(OutChans > 0); ASSUME(BufferSize > 0); - delta = (Counter > 0) ? 1.0f/(ALfloat)Counter : 0.0f; for(c = 0;c < OutChans;c++) { ALsizei pos = 0; - gain = CurrentGains[c]; - step = (TargetGains[c] - gain) * delta; + ALfloat gain = CurrentGains[c]; + const ALfloat step = (TargetGains[c] - gain) * delta; + if(fabsf(step) > FLT_EPSILON) { ALsizei minsize = mini(BufferSize, Counter); + ALfloat step_count = 0.0f; /* Mix with applying gain steps in aligned multiples of 4. */ - if(minsize-pos > 3) + if(LIKELY(minsize > 3)) { - __m128 step4; - gain4 = _mm_setr_ps( - gain, - gain + step, - gain + step + step, - gain + step + step + step - ); - step4 = _mm_set1_ps(step + step + step + step); + const __m128 four4 = _mm_set1_ps(4.0f); + const __m128 step4 = _mm_set1_ps(step); + const __m128 gain4 = _mm_set1_ps(gain); + __m128 step_count4 = _mm_setr_ps(0.0f, 1.0f, 2.0f, 3.0f); + ALsizei todo = minsize >> 2; do { const __m128 val4 = _mm_load_ps(&data[pos]); __m128 dry4 = _mm_load_ps(&OutBuffer[c][OutPos+pos]); - dry4 = _mm_add_ps(dry4, _mm_mul_ps(val4, gain4)); - gain4 = _mm_add_ps(gain4, step4); +#define MLA4(x, y, z) _mm_add_ps(x, _mm_mul_ps(y, z)) + /* dry += val * (gain + step*step_count) */ + dry4 = MLA4(dry4, val4, MLA4(gain4, step4, step_count4)); +#undef MLA4 _mm_store_ps(&OutBuffer[c][OutPos+pos], dry4); + step_count4 = _mm_add_ps(step_count4, four4); pos += 4; - } while(minsize-pos > 3); - /* NOTE: gain4 now represents the next four gains after the - * last four mixed samples, so the lowest element represents - * the next gain to apply. + } while(--todo); + /* NOTE: step_count4 now represents the next four counts after + * the last four mixed samples, so the lowest element + * represents the next step count to apply. */ - gain = _mm_cvtss_f32(gain4); + step_count = _mm_cvtss_f32(step_count4); } /* Mix with applying left over gain steps that aren't aligned multiples of 4. */ for(;pos < minsize;pos++) { - OutBuffer[c][OutPos+pos] += data[pos]*gain; - gain += step; + OutBuffer[c][OutPos+pos] += data[pos]*(gain + step*step_count); + step_count += 1.0f; } if(pos == Counter) gain = TargetGains[c]; + else + gain += step*step_count; CurrentGains[c] = gain; /* Mix until pos is aligned with 4 or the mix is done. */ @@ -194,13 +196,17 @@ void Mix_SSE(const ALfloat *data, ALsizei OutChans, ALfloat (*restrict OutBuffer if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD)) continue; - gain4 = _mm_set1_ps(gain); - for(;BufferSize-pos > 3;pos += 4) + if(LIKELY(BufferSize-pos > 3)) { - const __m128 val4 = _mm_load_ps(&data[pos]); - __m128 dry4 = _mm_load_ps(&OutBuffer[c][OutPos+pos]); - dry4 = _mm_add_ps(dry4, _mm_mul_ps(val4, gain4)); - _mm_store_ps(&OutBuffer[c][OutPos+pos], dry4); + ALsizei todo = (BufferSize-pos) >> 2; + const __m128 gain4 = _mm_set1_ps(gain); + do { + const __m128 val4 = _mm_load_ps(&data[pos]); + __m128 dry4 = _mm_load_ps(&OutBuffer[c][OutPos+pos]); + dry4 = _mm_add_ps(dry4, _mm_mul_ps(val4, gain4)); + _mm_store_ps(&OutBuffer[c][OutPos+pos], dry4); + pos += 4; + } while(--todo); } for(;pos < BufferSize;pos++) OutBuffer[c][OutPos+pos] += data[pos]*gain; |