diff options
author | Chris Robinson <[email protected]> | 2017-03-11 18:04:06 -0800 |
---|---|---|
committer | Chris Robinson <[email protected]> | 2017-03-11 18:04:06 -0800 |
commit | 96aaab93662be289d3b2c5312ae50502afa8d221 (patch) | |
tree | c270633e689c7a64edaea8a6c15305197b435ced /Alc | |
parent | feffe1e81a155ded0bcdb519a1a126fd8e908baa (diff) |
Rework HRTF coefficient fading
This improves fading between HRIRs as sources pan around. In particular, it
improves the issue with individual coefficients having various rounding errors
in the stepping values, as well as issues with interpolating delay values.
It does this by doing two mixing passes for each source. First using the last
coefficients that fade to silence, and then again using the new coefficients
that fade from silence. When added together, it creates a linear fade from one
to the other. Additionally, the gain is applied separately so the individual
coefficients don't step with rounding errors. Although this does increase CPU
cost since it's doing two mixes per source, each mix is a bit cheaper now since
the stepping is simplified to a single gain value, and the overall quality is
improved.
Diffstat (limited to 'Alc')
-rw-r--r-- | Alc/ALu.c | 10 | ||||
-rw-r--r-- | Alc/hrtf.c | 38 | ||||
-rw-r--r-- | Alc/hrtf.h | 2 | ||||
-rw-r--r-- | Alc/mixer.c | 87 | ||||
-rw-r--r-- | Alc/mixer_c.c | 19 | ||||
-rw-r--r-- | Alc/mixer_defs.h | 18 | ||||
-rw-r--r-- | Alc/mixer_inc.c | 81 | ||||
-rw-r--r-- | Alc/mixer_neon.c | 37 | ||||
-rw-r--r-- | Alc/mixer_sse.c | 67 |
9 files changed, 96 insertions, 263 deletions
@@ -634,10 +634,11 @@ static void CalcNonAttnSourceParams(ALvoice *voice, const struct ALsourceProps * /* Get the static HRIR coefficients and delays for this channel. */ GetHrtfCoeffs(Device->HrtfHandle, - chans[c].elevation, chans[c].angle, 0.0f, DryGain, + chans[c].elevation, chans[c].angle, 0.0f, voice->Direct.Params[c].Hrtf.Target.Coeffs, voice->Direct.Params[c].Hrtf.Target.Delay ); + voice->Direct.Params[c].Hrtf.Target.Gain = DryGain; /* Normal panning for auxiliary sends. */ CalcAngleCoeffs(chans[c].angle, chans[c].elevation, 0.0f, coeffs); @@ -1104,9 +1105,10 @@ static void CalcAttnSourceParams(ALvoice *voice, const struct ALsourceProps *pro spread = asinf(radius / Distance) * 2.0f; /* Get the HRIR coefficients and delays. */ - GetHrtfCoeffs(Device->HrtfHandle, ev, az, spread, DryGain, + GetHrtfCoeffs(Device->HrtfHandle, ev, az, spread, voice->Direct.Params[0].Hrtf.Target.Coeffs, voice->Direct.Params[0].Hrtf.Target.Delay); + voice->Direct.Params[0].Hrtf.Target.Gain = DryGain; CalcDirectionCoeffs(dir, spread, coeffs); @@ -1502,9 +1504,11 @@ void aluMixData(ALCdevice *device, ALvoid *buffer, ALsizei size) state = device->Hrtf; for(c = 0;c < device->Dry.NumChannels;c++) { + typedef ALfloat ALfloat2[2]; HrtfMix(device->RealOut.Buffer[lidx], device->RealOut.Buffer[ridx], device->Dry.Buffer[c], state->Offset, state->IrSize, - state->Chan[c].Coeffs, state->Chan[c].Values, SamplesToDo + SAFE_CONST(ALfloat2*,state->Chan[c].Coeffs), + state->Chan[c].Values, SamplesToDo ); } state->Offset += SamplesToDo; @@ -77,10 +77,9 @@ static ALsizei CalcAzIndex(ALsizei azcount, ALfloat az) } /* Calculates static HRIR coefficients and delays for the given polar elevation - * and azimuth in radians. The coefficients are normalized and attenuated by - * the specified gain. + * and azimuth in radians. The coefficients are normalized. */ -void GetHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat azimuth, ALfloat spread, ALfloat gain, ALfloat (*coeffs)[2], ALsizei *delays) +void GetHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat azimuth, ALfloat spread, ALfloat (*coeffs)[2], ALsizei *delays) { ALsizei evidx, azidx, lidx, ridx; ALsizei azcount, evoffset; @@ -102,36 +101,21 @@ void GetHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat azimuth, ridx = evoffset + ((azcount-azidx) % azcount); /* Calculate the HRIR delays. */ - delays[0] = fastf2i(Hrtf->delays[lidx]*dirfact + 0.5f) << HRTFDELAY_BITS; - delays[1] = fastf2i(Hrtf->delays[ridx]*dirfact + 0.5f) << HRTFDELAY_BITS; + delays[0] = fastf2i(Hrtf->delays[lidx]*dirfact + 0.5f); + delays[1] = fastf2i(Hrtf->delays[ridx]*dirfact + 0.5f); /* Calculate the sample offsets for the HRIR indices. */ lidx *= Hrtf->irSize; ridx *= Hrtf->irSize; - /* Calculate the normalized and attenuated HRIR coefficients. Zero the - * coefficients if gain is too low. - */ - if(gain > 0.0001f) - { - gain /= 32767.0f; - - i = 0; - coeffs[i][0] = lerp(PassthruCoeff, Hrtf->coeffs[lidx+i], dirfact)*gain; - coeffs[i][1] = lerp(PassthruCoeff, Hrtf->coeffs[ridx+i], dirfact)*gain; - for(i = 1;i < Hrtf->irSize;i++) - { - coeffs[i][0] = Hrtf->coeffs[lidx+i]*gain * dirfact; - coeffs[i][1] = Hrtf->coeffs[ridx+i]*gain * dirfact; - } - } - else + /* Calculate the normalized and attenuated HRIR coefficients. */ + i = 0; + coeffs[i][0] = lerp(PassthruCoeff, Hrtf->coeffs[lidx+i], dirfact) * (1.0f/32767.0f); + coeffs[i][1] = lerp(PassthruCoeff, Hrtf->coeffs[ridx+i], dirfact) * (1.0f/32767.0f); + for(i = 1;i < Hrtf->irSize;i++) { - for(i = 0;i < Hrtf->irSize;i++) - { - coeffs[i][0] = 0.0f; - coeffs[i][1] = 0.0f; - } + coeffs[i][0] = Hrtf->coeffs[lidx+i]*(1.0f/32767.0f) * dirfact; + coeffs[i][1] = Hrtf->coeffs[ridx+i]*(1.0f/32767.0f) * dirfact; } } @@ -36,7 +36,7 @@ void FreeHrtfs(void); vector_HrtfEntry EnumerateHrtf(const_al_string devname); void FreeHrtfList(vector_HrtfEntry *list); -void GetHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat azimuth, ALfloat spread, ALfloat gain, ALfloat (*coeffs)[2], ALsizei *delays); +void GetHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat azimuth, ALfloat spread, ALfloat (*coeffs)[2], ALsizei *delays); /** * Produces HRTF filter coefficients for decoding B-Format, given a set of diff --git a/Alc/mixer.c b/Alc/mixer.c index d8adb697..67e74396 100644 --- a/Alc/mixer.c +++ b/Alc/mixer.c @@ -380,7 +380,7 @@ ALboolean MixSource(ALvoice *voice, ALsource *Source, ALCdevice *Device, ALsizei ALint64 DataSize64; ALsizei Counter; ALsizei IrSize; - ALsizei chan, j; + ALsizei chan; ALsizei send; /* Get source info */ @@ -605,46 +605,63 @@ ALboolean MixSource(ALvoice *voice, ALsource *Source, ALCdevice *Device, ALsizei MixHrtfParams hrtfparams; int lidx, ridx; + lidx = GetChannelIdxByName(Device->RealOut, FrontLeft); + ridx = GetChannelIdxByName(Device->RealOut, FrontRight); + assert(lidx != -1 && ridx != -1); + if(!Counter) { - parms->Hrtf.Current = parms->Hrtf.Target; - for(j = 0;j < HRIR_LENGTH;j++) - { - hrtfparams.Steps.Coeffs[j][0] = 0.0f; - hrtfparams.Steps.Coeffs[j][1] = 0.0f; - } - hrtfparams.Steps.Delay[0] = 0; - hrtfparams.Steps.Delay[1] = 0; + parms->Hrtf.Old = parms->Hrtf.Target; + hrtfparams.Current = &parms->Hrtf.Target; + hrtfparams.Gain = parms->Hrtf.Target.Gain; + hrtfparams.GainStep = 0.0f; + MixHrtfSamples( + voice->Direct.Buffer[lidx], voice->Direct.Buffer[ridx], + samples, voice->Offset, OutPos, IrSize, &hrtfparams, + &parms->Hrtf.State, DstBufferSize + ); } else { - ALfloat delta = 1.0f / (ALfloat)Counter; - ALfloat coeffdiff; - ALint delaydiff; - for(j = 0;j < IrSize;j++) - { - coeffdiff = parms->Hrtf.Target.Coeffs[j][0] - parms->Hrtf.Current.Coeffs[j][0]; - hrtfparams.Steps.Coeffs[j][0] = coeffdiff * delta; - coeffdiff = parms->Hrtf.Target.Coeffs[j][1] - parms->Hrtf.Current.Coeffs[j][1]; - hrtfparams.Steps.Coeffs[j][1] = coeffdiff * delta; - } - delaydiff = parms->Hrtf.Target.Delay[0] - parms->Hrtf.Current.Delay[0]; - hrtfparams.Steps.Delay[0] = fastf2i((ALfloat)delaydiff * delta); - delaydiff = parms->Hrtf.Target.Delay[1] - parms->Hrtf.Current.Delay[1]; - hrtfparams.Steps.Delay[1] = fastf2i((ALfloat)delaydiff * delta); - } - hrtfparams.Target = &parms->Hrtf.Target; - hrtfparams.Current = &parms->Hrtf.Current; - - lidx = GetChannelIdxByName(Device->RealOut, FrontLeft); - ridx = GetChannelIdxByName(Device->RealOut, FrontRight); - assert(lidx != -1 && ridx != -1); + HrtfState backupstate = parms->Hrtf.State; + ALfloat gain; + + /* The old coefficients need to fade to silence + * completely since they'll be replaced after the mix. + * So it needs to fade out over DstBufferSize instead + * of Counter. + */ + hrtfparams.Current = &parms->Hrtf.Old; + hrtfparams.Gain = parms->Hrtf.Old.Gain; + hrtfparams.GainStep = -hrtfparams.Gain / + (ALfloat)DstBufferSize; + MixHrtfSamples( + voice->Direct.Buffer[lidx], voice->Direct.Buffer[ridx], + samples, voice->Offset, OutPos, IrSize, &hrtfparams, + &backupstate, DstBufferSize + ); - MixHrtfSamples( - voice->Direct.Buffer[lidx], voice->Direct.Buffer[ridx], - samples, Counter, voice->Offset, OutPos, IrSize, &hrtfparams, - &parms->Hrtf.State, DstBufferSize - ); + /* The new coefficients need to fade in completely + * since they're replacing the old ones. To keep the + * source gain fading consistent, interpolate between + * the old and new target gain given how much of the + * fade time this mix handles. + */ + gain = lerp(parms->Hrtf.Old.Gain, parms->Hrtf.Target.Gain, + minf(1.0f, (ALfloat)Counter / (ALfloat)DstBufferSize)); + hrtfparams.Current = &parms->Hrtf.Target; + hrtfparams.Gain = 0.0f; + hrtfparams.GainStep = gain / (ALfloat)DstBufferSize; + MixHrtfSamples( + voice->Direct.Buffer[lidx], voice->Direct.Buffer[ridx], + samples, voice->Offset, OutPos, IrSize, &hrtfparams, + &parms->Hrtf.State, DstBufferSize + ); + /* Update the old parameters with the result. */ + parms->Hrtf.Old = parms->Hrtf.Target; + if(Counter > DstBufferSize) + parms->Hrtf.Old.Gain = hrtfparams.Gain; + } } } diff --git a/Alc/mixer_c.c b/Alc/mixer_c.c index a3d79a46..bb945e88 100644 --- a/Alc/mixer_c.c +++ b/Alc/mixer_c.c @@ -132,26 +132,9 @@ void ALfilterState_processC(ALfilterState *filter, ALfloat *restrict dst, const } -static inline void ApplyCoeffsStep(ALsizei Offset, ALfloat (*restrict Values)[2], - const ALsizei IrSize, - ALfloat (*restrict Coeffs)[2], - const ALfloat (*restrict CoeffStep)[2], - ALfloat left, ALfloat right) -{ - ALsizei c; - for(c = 0;c < IrSize;c++) - { - const ALsizei off = (Offset+c)&HRIR_MASK; - Values[off][0] += Coeffs[c][0] * left; - Values[off][1] += Coeffs[c][1] * right; - Coeffs[c][0] += CoeffStep[c][0]; - Coeffs[c][1] += CoeffStep[c][1]; - } -} - static inline void ApplyCoeffs(ALsizei Offset, ALfloat (*restrict Values)[2], const ALsizei IrSize, - ALfloat (*restrict Coeffs)[2], + const ALfloat (*restrict Coeffs)[2], ALfloat left, ALfloat right) { ALsizei c; diff --git a/Alc/mixer_defs.h b/Alc/mixer_defs.h index b76c9aee..d4a49b53 100644 --- a/Alc/mixer_defs.h +++ b/Alc/mixer_defs.h @@ -21,12 +21,12 @@ const ALfloat *Resample_bsinc32_C(const InterpState *state, const ALfloat *restr /* C mixers */ void MixHrtf_C(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, - const ALfloat *data, ALsizei Counter, ALsizei Offset, ALsizei OutPos, - const ALsizei IrSize, const struct MixHrtfParams *hrtfparams, + const ALfloat *data, ALsizei Offset, ALsizei OutPos, + const ALsizei IrSize, struct MixHrtfParams *hrtfparams, struct HrtfState *hrtfstate, ALsizei BufferSize); void MixDirectHrtf_C(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, const ALfloat *data, ALsizei Offset, const ALsizei IrSize, - ALfloat (*restrict Coeffs)[2], ALfloat (*restrict Values)[2], + const ALfloat (*restrict Coeffs)[2], ALfloat (*restrict Values)[2], ALsizei BufferSize); void Mix_C(const ALfloat *data, ALsizei OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALfloat *CurrentGains, const ALfloat *TargetGains, ALsizei Counter, ALsizei OutPos, @@ -37,12 +37,12 @@ void MixRow_C(ALfloat *OutBuffer, const ALfloat *Gains, /* SSE mixers */ void MixHrtf_SSE(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, - const ALfloat *data, ALsizei Counter, ALsizei Offset, ALsizei OutPos, - const ALsizei IrSize, const struct MixHrtfParams *hrtfparams, + const ALfloat *data, ALsizei Offset, ALsizei OutPos, + const ALsizei IrSize, struct MixHrtfParams *hrtfparams, struct HrtfState *hrtfstate, ALsizei BufferSize); void MixDirectHrtf_SSE(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, const ALfloat *data, ALsizei Offset, const ALsizei IrSize, - ALfloat (*restrict Coeffs)[2], ALfloat (*restrict Values)[2], + const ALfloat (*restrict Coeffs)[2], ALfloat (*restrict Values)[2], ALsizei BufferSize); void Mix_SSE(const ALfloat *data, ALsizei OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALfloat *CurrentGains, const ALfloat *TargetGains, ALsizei Counter, ALsizei OutPos, @@ -86,12 +86,12 @@ const ALfloat *Resample_bsinc32_SSE(const InterpState *state, const ALfloat *res /* Neon mixers */ void MixHrtf_Neon(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, - const ALfloat *data, ALsizei Counter, ALsizei Offset, ALsizei OutPos, - const ALsizei IrSize, const struct MixHrtfParams *hrtfparams, + const ALfloat *data, ALsizei Offset, ALsizei OutPos, + const ALsizei IrSize, struct MixHrtfParams *hrtfparams, struct HrtfState *hrtfstate, ALsizei BufferSize); void MixDirectHrtf_Neon(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, const ALfloat *data, ALsizei Offset, const ALsizei IrSize, - ALfloat (*restrict Coeffs)[2], ALfloat (*restrict Values)[2], + const ALfloat (*restrict Coeffs)[2], ALfloat (*restrict Values)[2], ALsizei BufferSize); void Mix_Neon(const ALfloat *data, ALsizei OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALfloat *CurrentGains, const ALfloat *TargetGains, ALsizei Counter, ALsizei OutPos, diff --git a/Alc/mixer_inc.c b/Alc/mixer_inc.c index 8215743e..b42b0fd3 100644 --- a/Alc/mixer_inc.c +++ b/Alc/mixer_inc.c @@ -12,84 +12,27 @@ #define MAX_UPDATE_SAMPLES 128 -static inline void ApplyCoeffsStep(ALsizei Offset, ALfloat (*restrict Values)[2], - const ALsizei irSize, - ALfloat (*restrict Coeffs)[2], - const ALfloat (*restrict CoeffStep)[2], - ALfloat left, ALfloat right); static inline void ApplyCoeffs(ALsizei Offset, ALfloat (*restrict Values)[2], const ALsizei irSize, - ALfloat (*restrict Coeffs)[2], + const ALfloat (*restrict Coeffs)[2], ALfloat left, ALfloat right); void MixHrtf(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, - const ALfloat *data, ALsizei Counter, ALsizei Offset, ALsizei OutPos, - const ALsizei IrSize, const MixHrtfParams *hrtfparams, HrtfState *hrtfstate, + const ALfloat *data, ALsizei Offset, ALsizei OutPos, + const ALsizei IrSize, MixHrtfParams *hrtfparams, HrtfState *hrtfstate, ALsizei BufferSize) { - ALfloat (*Coeffs)[2] = hrtfparams->Current->Coeffs; + const ALfloat (*Coeffs)[2] = hrtfparams->Current->Coeffs; ALsizei Delay[2] = { hrtfparams->Current->Delay[0], hrtfparams->Current->Delay[1] }; - ALfloat out[MAX_UPDATE_SAMPLES][2]; + ALfloat gainstep = hrtfparams->GainStep; + ALfloat gain = hrtfparams->Gain; ALfloat left, right; - ALsizei minsize; ALsizei pos, i; - pos = 0; - if(Counter == 0) - goto skip_stepping; - - minsize = minu(BufferSize, Counter); - while(pos < minsize) - { - ALsizei todo = mini(minsize-pos, MAX_UPDATE_SAMPLES); - - for(i = 0;i < todo;i++) - { - hrtfstate->History[Offset&HRTF_HISTORY_MASK] = data[pos++]; - left = lerp(hrtfstate->History[(Offset-(Delay[0]>>HRTFDELAY_BITS))&HRTF_HISTORY_MASK], - hrtfstate->History[(Offset-(Delay[0]>>HRTFDELAY_BITS)-1)&HRTF_HISTORY_MASK], - (Delay[0]&HRTFDELAY_MASK)*(1.0f/HRTFDELAY_FRACONE)); - right = lerp(hrtfstate->History[(Offset-(Delay[1]>>HRTFDELAY_BITS))&HRTF_HISTORY_MASK], - hrtfstate->History[(Offset-(Delay[1]>>HRTFDELAY_BITS)-1)&HRTF_HISTORY_MASK], - (Delay[1]&HRTFDELAY_MASK)*(1.0f/HRTFDELAY_FRACONE)); - - Delay[0] += hrtfparams->Steps.Delay[0]; - Delay[1] += hrtfparams->Steps.Delay[1]; - - hrtfstate->Values[(Offset+IrSize)&HRIR_MASK][0] = 0.0f; - hrtfstate->Values[(Offset+IrSize)&HRIR_MASK][1] = 0.0f; - Offset++; - - ApplyCoeffsStep(Offset, hrtfstate->Values, IrSize, Coeffs, hrtfparams->Steps.Coeffs, left, right); - out[i][0] = hrtfstate->Values[Offset&HRIR_MASK][0]; - out[i][1] = hrtfstate->Values[Offset&HRIR_MASK][1]; - } - - for(i = 0;i < todo;i++) - LeftOut[OutPos+i] += out[i][0]; - for(i = 0;i < todo;i++) - RightOut[OutPos+i] += out[i][1]; - OutPos += todo; - } - - if(pos == Counter) - { - *hrtfparams->Current = *hrtfparams->Target; - Delay[0] = hrtfparams->Target->Delay[0]; - Delay[1] = hrtfparams->Target->Delay[1]; - } - else - { - hrtfparams->Current->Delay[0] = Delay[0]; - hrtfparams->Current->Delay[1] = Delay[1]; - } - -skip_stepping: - Delay[0] >>= HRTFDELAY_BITS; - Delay[1] >>= HRTFDELAY_BITS; - while(pos < BufferSize) + for(pos = 0;pos < BufferSize;) { + ALfloat out[MAX_UPDATE_SAMPLES][2]; ALsizei todo = mini(BufferSize-pos, MAX_UPDATE_SAMPLES); for(i = 0;i < todo;i++) @@ -103,8 +46,9 @@ skip_stepping: Offset++; ApplyCoeffs(Offset, hrtfstate->Values, IrSize, Coeffs, left, right); - out[i][0] = hrtfstate->Values[Offset&HRIR_MASK][0]; - out[i][1] = hrtfstate->Values[Offset&HRIR_MASK][1]; + out[i][0] = hrtfstate->Values[Offset&HRIR_MASK][0]*gain; + out[i][1] = hrtfstate->Values[Offset&HRIR_MASK][1]*gain; + gain += gainstep; } for(i = 0;i < todo;i++) @@ -113,11 +57,12 @@ skip_stepping: RightOut[OutPos+i] += out[i][1]; OutPos += todo; } + hrtfparams->Gain = gain; } void MixDirectHrtf(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, const ALfloat *data, ALsizei Offset, const ALsizei IrSize, - ALfloat (*restrict Coeffs)[2], ALfloat (*restrict Values)[2], + const ALfloat (*restrict Coeffs)[2], ALfloat (*restrict Values)[2], ALsizei BufferSize) { ALfloat out[MAX_UPDATE_SAMPLES][2]; diff --git a/Alc/mixer_neon.c b/Alc/mixer_neon.c index 727c5c55..390a1dd2 100644 --- a/Alc/mixer_neon.c +++ b/Alc/mixer_neon.c @@ -190,44 +190,9 @@ const ALfloat *Resample_bsinc32_Neon(const InterpState *state, } -static inline void ApplyCoeffsStep(ALsizei Offset, ALfloat (*restrict Values)[2], - const ALsizei IrSize, - ALfloat (*restrict Coeffs)[2], - const ALfloat (*restrict CoeffStep)[2], - ALfloat left, ALfloat right) -{ - ALsizei c; - float32x4_t leftright4; - { - float32x2_t leftright2 = vdup_n_f32(0.0); - leftright2 = vset_lane_f32(left, leftright2, 0); - leftright2 = vset_lane_f32(right, leftright2, 1); - leftright4 = vcombine_f32(leftright2, leftright2); - } - Values = ASSUME_ALIGNED(Values, 16); - Coeffs = ASSUME_ALIGNED(Coeffs, 16); - CoeffStep = ASSUME_ALIGNED(CoeffStep, 16); - for(c = 0;c < IrSize;c += 2) - { - const ALsizei o0 = (Offset+c)&HRIR_MASK; - const ALsizei o1 = (o0+1)&HRIR_MASK; - float32x4_t vals = vcombine_f32(vld1_f32((float32_t*)&Values[o0][0]), - vld1_f32((float32_t*)&Values[o1][0])); - float32x4_t coefs = vld1q_f32((float32_t*)&Coeffs[c][0]); - float32x4_t deltas = vld1q_f32(&CoeffStep[c][0]); - - vals = vmlaq_f32(vals, coefs, leftright4); - coefs = vaddq_f32(coefs, deltas); - - vst1_f32((float32_t*)&Values[o0][0], vget_low_f32(vals)); - vst1_f32((float32_t*)&Values[o1][0], vget_high_f32(vals)); - vst1q_f32(&Coeffs[c][0], coefs); - } -} - static inline void ApplyCoeffs(ALsizei Offset, ALfloat (*restrict Values)[2], const ALsizei IrSize, - ALfloat (*restrict Coeffs)[2], + const ALfloat (*restrict Coeffs)[2], ALfloat left, ALfloat right) { ALsizei c; diff --git a/Alc/mixer_sse.c b/Alc/mixer_sse.c index 7870a6d8..37ce953f 100644 --- a/Alc/mixer_sse.c +++ b/Alc/mixer_sse.c @@ -69,74 +69,9 @@ const ALfloat *Resample_bsinc32_SSE(const InterpState *state, const ALfloat *res } -static inline void ApplyCoeffsStep(ALsizei Offset, ALfloat (*restrict Values)[2], - const ALsizei IrSize, - ALfloat (*restrict Coeffs)[2], - const ALfloat (*restrict CoeffStep)[2], - ALfloat left, ALfloat right) -{ - const __m128 lrlr = _mm_setr_ps(left, right, left, right); - __m128 coeffs, deltas, imp0, imp1; - __m128 vals = _mm_setzero_ps(); - ALsizei i; - - Values = ASSUME_ALIGNED(Values, 16); - Coeffs = ASSUME_ALIGNED(Coeffs, 16); - CoeffStep = ASSUME_ALIGNED(CoeffStep, 16); - if((Offset&1)) - { - const ALsizei o0 = Offset&HRIR_MASK; - const ALsizei o1 = (Offset+IrSize-1)&HRIR_MASK; - - coeffs = _mm_load_ps(&Coeffs[0][0]); - deltas = _mm_load_ps(&CoeffStep[0][0]); - vals = _mm_loadl_pi(vals, (__m64*)&Values[o0][0]); - imp0 = _mm_mul_ps(lrlr, coeffs); - coeffs = _mm_add_ps(coeffs, deltas); - vals = _mm_add_ps(imp0, vals); - _mm_store_ps(&Coeffs[0][0], coeffs); - _mm_storel_pi((__m64*)&Values[o0][0], vals); - for(i = 1;i < IrSize-1;i += 2) - { - const ALsizei o2 = (Offset+i)&HRIR_MASK; - - coeffs = _mm_load_ps(&Coeffs[i+1][0]); - deltas = _mm_load_ps(&CoeffStep[i+1][0]); - vals = _mm_load_ps(&Values[o2][0]); - imp1 = _mm_mul_ps(lrlr, coeffs); - coeffs = _mm_add_ps(coeffs, deltas); - imp0 = _mm_shuffle_ps(imp0, imp1, _MM_SHUFFLE(1, 0, 3, 2)); - vals = _mm_add_ps(imp0, vals); - _mm_store_ps(&Coeffs[i+1][0], coeffs); - _mm_store_ps(&Values[o2][0], vals); - imp0 = imp1; - } - vals = _mm_loadl_pi(vals, (__m64*)&Values[o1][0]); - imp0 = _mm_movehl_ps(imp0, imp0); - vals = _mm_add_ps(imp0, vals); - _mm_storel_pi((__m64*)&Values[o1][0], vals); - } - else - { - for(i = 0;i < IrSize;i += 2) - { - const ALsizei o = (Offset + i)&HRIR_MASK; - - coeffs = _mm_load_ps(&Coeffs[i][0]); - deltas = _mm_load_ps(&CoeffStep[i][0]); - vals = _mm_load_ps(&Values[o][0]); - imp0 = _mm_mul_ps(lrlr, coeffs); - coeffs = _mm_add_ps(coeffs, deltas); - vals = _mm_add_ps(imp0, vals); - _mm_store_ps(&Coeffs[i][0], coeffs); - _mm_store_ps(&Values[o][0], vals); - } - } -} - static inline void ApplyCoeffs(ALsizei Offset, ALfloat (*restrict Values)[2], const ALsizei IrSize, - ALfloat (*restrict Coeffs)[2], + const ALfloat (*restrict Coeffs)[2], ALfloat left, ALfloat right) { const __m128 lrlr = _mm_setr_ps(left, right, left, right); |