diff options
Diffstat (limited to 'Alc')
-rw-r--r-- | Alc/ALu.c | 10 | ||||
-rw-r--r-- | Alc/hrtf.c | 38 | ||||
-rw-r--r-- | Alc/hrtf.h | 2 | ||||
-rw-r--r-- | Alc/mixer.c | 87 | ||||
-rw-r--r-- | Alc/mixer_c.c | 19 | ||||
-rw-r--r-- | Alc/mixer_defs.h | 18 | ||||
-rw-r--r-- | Alc/mixer_inc.c | 81 | ||||
-rw-r--r-- | Alc/mixer_neon.c | 37 | ||||
-rw-r--r-- | Alc/mixer_sse.c | 67 |
9 files changed, 96 insertions, 263 deletions
@@ -634,10 +634,11 @@ static void CalcNonAttnSourceParams(ALvoice *voice, const struct ALsourceProps * /* Get the static HRIR coefficients and delays for this channel. */ GetHrtfCoeffs(Device->HrtfHandle, - chans[c].elevation, chans[c].angle, 0.0f, DryGain, + chans[c].elevation, chans[c].angle, 0.0f, voice->Direct.Params[c].Hrtf.Target.Coeffs, voice->Direct.Params[c].Hrtf.Target.Delay ); + voice->Direct.Params[c].Hrtf.Target.Gain = DryGain; /* Normal panning for auxiliary sends. */ CalcAngleCoeffs(chans[c].angle, chans[c].elevation, 0.0f, coeffs); @@ -1104,9 +1105,10 @@ static void CalcAttnSourceParams(ALvoice *voice, const struct ALsourceProps *pro spread = asinf(radius / Distance) * 2.0f; /* Get the HRIR coefficients and delays. */ - GetHrtfCoeffs(Device->HrtfHandle, ev, az, spread, DryGain, + GetHrtfCoeffs(Device->HrtfHandle, ev, az, spread, voice->Direct.Params[0].Hrtf.Target.Coeffs, voice->Direct.Params[0].Hrtf.Target.Delay); + voice->Direct.Params[0].Hrtf.Target.Gain = DryGain; CalcDirectionCoeffs(dir, spread, coeffs); @@ -1502,9 +1504,11 @@ void aluMixData(ALCdevice *device, ALvoid *buffer, ALsizei size) state = device->Hrtf; for(c = 0;c < device->Dry.NumChannels;c++) { + typedef ALfloat ALfloat2[2]; HrtfMix(device->RealOut.Buffer[lidx], device->RealOut.Buffer[ridx], device->Dry.Buffer[c], state->Offset, state->IrSize, - state->Chan[c].Coeffs, state->Chan[c].Values, SamplesToDo + SAFE_CONST(ALfloat2*,state->Chan[c].Coeffs), + state->Chan[c].Values, SamplesToDo ); } state->Offset += SamplesToDo; @@ -77,10 +77,9 @@ static ALsizei CalcAzIndex(ALsizei azcount, ALfloat az) } /* Calculates static HRIR coefficients and delays for the given polar elevation - * and azimuth in radians. The coefficients are normalized and attenuated by - * the specified gain. + * and azimuth in radians. The coefficients are normalized. */ -void GetHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat azimuth, ALfloat spread, ALfloat gain, ALfloat (*coeffs)[2], ALsizei *delays) +void GetHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat azimuth, ALfloat spread, ALfloat (*coeffs)[2], ALsizei *delays) { ALsizei evidx, azidx, lidx, ridx; ALsizei azcount, evoffset; @@ -102,36 +101,21 @@ void GetHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat azimuth, ridx = evoffset + ((azcount-azidx) % azcount); /* Calculate the HRIR delays. */ - delays[0] = fastf2i(Hrtf->delays[lidx]*dirfact + 0.5f) << HRTFDELAY_BITS; - delays[1] = fastf2i(Hrtf->delays[ridx]*dirfact + 0.5f) << HRTFDELAY_BITS; + delays[0] = fastf2i(Hrtf->delays[lidx]*dirfact + 0.5f); + delays[1] = fastf2i(Hrtf->delays[ridx]*dirfact + 0.5f); /* Calculate the sample offsets for the HRIR indices. */ lidx *= Hrtf->irSize; ridx *= Hrtf->irSize; - /* Calculate the normalized and attenuated HRIR coefficients. Zero the - * coefficients if gain is too low. - */ - if(gain > 0.0001f) - { - gain /= 32767.0f; - - i = 0; - coeffs[i][0] = lerp(PassthruCoeff, Hrtf->coeffs[lidx+i], dirfact)*gain; - coeffs[i][1] = lerp(PassthruCoeff, Hrtf->coeffs[ridx+i], dirfact)*gain; - for(i = 1;i < Hrtf->irSize;i++) - { - coeffs[i][0] = Hrtf->coeffs[lidx+i]*gain * dirfact; - coeffs[i][1] = Hrtf->coeffs[ridx+i]*gain * dirfact; - } - } - else + /* Calculate the normalized and attenuated HRIR coefficients. */ + i = 0; + coeffs[i][0] = lerp(PassthruCoeff, Hrtf->coeffs[lidx+i], dirfact) * (1.0f/32767.0f); + coeffs[i][1] = lerp(PassthruCoeff, Hrtf->coeffs[ridx+i], dirfact) * (1.0f/32767.0f); + for(i = 1;i < Hrtf->irSize;i++) { - for(i = 0;i < Hrtf->irSize;i++) - { - coeffs[i][0] = 0.0f; - coeffs[i][1] = 0.0f; - } + coeffs[i][0] = Hrtf->coeffs[lidx+i]*(1.0f/32767.0f) * dirfact; + coeffs[i][1] = Hrtf->coeffs[ridx+i]*(1.0f/32767.0f) * dirfact; } } @@ -36,7 +36,7 @@ void FreeHrtfs(void); vector_HrtfEntry EnumerateHrtf(const_al_string devname); void FreeHrtfList(vector_HrtfEntry *list); -void GetHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat azimuth, ALfloat spread, ALfloat gain, ALfloat (*coeffs)[2], ALsizei *delays); +void GetHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat azimuth, ALfloat spread, ALfloat (*coeffs)[2], ALsizei *delays); /** * Produces HRTF filter coefficients for decoding B-Format, given a set of diff --git a/Alc/mixer.c b/Alc/mixer.c index d8adb697..67e74396 100644 --- a/Alc/mixer.c +++ b/Alc/mixer.c @@ -380,7 +380,7 @@ ALboolean MixSource(ALvoice *voice, ALsource *Source, ALCdevice *Device, ALsizei ALint64 DataSize64; ALsizei Counter; ALsizei IrSize; - ALsizei chan, j; + ALsizei chan; ALsizei send; /* Get source info */ @@ -605,46 +605,63 @@ ALboolean MixSource(ALvoice *voice, ALsource *Source, ALCdevice *Device, ALsizei MixHrtfParams hrtfparams; int lidx, ridx; + lidx = GetChannelIdxByName(Device->RealOut, FrontLeft); + ridx = GetChannelIdxByName(Device->RealOut, FrontRight); + assert(lidx != -1 && ridx != -1); + if(!Counter) { - parms->Hrtf.Current = parms->Hrtf.Target; - for(j = 0;j < HRIR_LENGTH;j++) - { - hrtfparams.Steps.Coeffs[j][0] = 0.0f; - hrtfparams.Steps.Coeffs[j][1] = 0.0f; - } - hrtfparams.Steps.Delay[0] = 0; - hrtfparams.Steps.Delay[1] = 0; + parms->Hrtf.Old = parms->Hrtf.Target; + hrtfparams.Current = &parms->Hrtf.Target; + hrtfparams.Gain = parms->Hrtf.Target.Gain; + hrtfparams.GainStep = 0.0f; + MixHrtfSamples( + voice->Direct.Buffer[lidx], voice->Direct.Buffer[ridx], + samples, voice->Offset, OutPos, IrSize, &hrtfparams, + &parms->Hrtf.State, DstBufferSize + ); } else { - ALfloat delta = 1.0f / (ALfloat)Counter; - ALfloat coeffdiff; - ALint delaydiff; - for(j = 0;j < IrSize;j++) - { - coeffdiff = parms->Hrtf.Target.Coeffs[j][0] - parms->Hrtf.Current.Coeffs[j][0]; - hrtfparams.Steps.Coeffs[j][0] = coeffdiff * delta; - coeffdiff = parms->Hrtf.Target.Coeffs[j][1] - parms->Hrtf.Current.Coeffs[j][1]; - hrtfparams.Steps.Coeffs[j][1] = coeffdiff * delta; - } - delaydiff = parms->Hrtf.Target.Delay[0] - parms->Hrtf.Current.Delay[0]; - hrtfparams.Steps.Delay[0] = fastf2i((ALfloat)delaydiff * delta); - delaydiff = parms->Hrtf.Target.Delay[1] - parms->Hrtf.Current.Delay[1]; - hrtfparams.Steps.Delay[1] = fastf2i((ALfloat)delaydiff * delta); - } - hrtfparams.Target = &parms->Hrtf.Target; - hrtfparams.Current = &parms->Hrtf.Current; - - lidx = GetChannelIdxByName(Device->RealOut, FrontLeft); - ridx = GetChannelIdxByName(Device->RealOut, FrontRight); - assert(lidx != -1 && ridx != -1); + HrtfState backupstate = parms->Hrtf.State; + ALfloat gain; + + /* The old coefficients need to fade to silence + * completely since they'll be replaced after the mix. + * So it needs to fade out over DstBufferSize instead + * of Counter. + */ + hrtfparams.Current = &parms->Hrtf.Old; + hrtfparams.Gain = parms->Hrtf.Old.Gain; + hrtfparams.GainStep = -hrtfparams.Gain / + (ALfloat)DstBufferSize; + MixHrtfSamples( + voice->Direct.Buffer[lidx], voice->Direct.Buffer[ridx], + samples, voice->Offset, OutPos, IrSize, &hrtfparams, + &backupstate, DstBufferSize + ); - MixHrtfSamples( - voice->Direct.Buffer[lidx], voice->Direct.Buffer[ridx], - samples, Counter, voice->Offset, OutPos, IrSize, &hrtfparams, - &parms->Hrtf.State, DstBufferSize - ); + /* The new coefficients need to fade in completely + * since they're replacing the old ones. To keep the + * source gain fading consistent, interpolate between + * the old and new target gain given how much of the + * fade time this mix handles. + */ + gain = lerp(parms->Hrtf.Old.Gain, parms->Hrtf.Target.Gain, + minf(1.0f, (ALfloat)Counter / (ALfloat)DstBufferSize)); + hrtfparams.Current = &parms->Hrtf.Target; + hrtfparams.Gain = 0.0f; + hrtfparams.GainStep = gain / (ALfloat)DstBufferSize; + MixHrtfSamples( + voice->Direct.Buffer[lidx], voice->Direct.Buffer[ridx], + samples, voice->Offset, OutPos, IrSize, &hrtfparams, + &parms->Hrtf.State, DstBufferSize + ); + /* Update the old parameters with the result. */ + parms->Hrtf.Old = parms->Hrtf.Target; + if(Counter > DstBufferSize) + parms->Hrtf.Old.Gain = hrtfparams.Gain; + } } } diff --git a/Alc/mixer_c.c b/Alc/mixer_c.c index a3d79a46..bb945e88 100644 --- a/Alc/mixer_c.c +++ b/Alc/mixer_c.c @@ -132,26 +132,9 @@ void ALfilterState_processC(ALfilterState *filter, ALfloat *restrict dst, const } -static inline void ApplyCoeffsStep(ALsizei Offset, ALfloat (*restrict Values)[2], - const ALsizei IrSize, - ALfloat (*restrict Coeffs)[2], - const ALfloat (*restrict CoeffStep)[2], - ALfloat left, ALfloat right) -{ - ALsizei c; - for(c = 0;c < IrSize;c++) - { - const ALsizei off = (Offset+c)&HRIR_MASK; - Values[off][0] += Coeffs[c][0] * left; - Values[off][1] += Coeffs[c][1] * right; - Coeffs[c][0] += CoeffStep[c][0]; - Coeffs[c][1] += CoeffStep[c][1]; - } -} - static inline void ApplyCoeffs(ALsizei Offset, ALfloat (*restrict Values)[2], const ALsizei IrSize, - ALfloat (*restrict Coeffs)[2], + const ALfloat (*restrict Coeffs)[2], ALfloat left, ALfloat right) { ALsizei c; diff --git a/Alc/mixer_defs.h b/Alc/mixer_defs.h index b76c9aee..d4a49b53 100644 --- a/Alc/mixer_defs.h +++ b/Alc/mixer_defs.h @@ -21,12 +21,12 @@ const ALfloat *Resample_bsinc32_C(const InterpState *state, const ALfloat *restr /* C mixers */ void MixHrtf_C(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, - const ALfloat *data, ALsizei Counter, ALsizei Offset, ALsizei OutPos, - const ALsizei IrSize, const struct MixHrtfParams *hrtfparams, + const ALfloat *data, ALsizei Offset, ALsizei OutPos, + const ALsizei IrSize, struct MixHrtfParams *hrtfparams, struct HrtfState *hrtfstate, ALsizei BufferSize); void MixDirectHrtf_C(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, const ALfloat *data, ALsizei Offset, const ALsizei IrSize, - ALfloat (*restrict Coeffs)[2], ALfloat (*restrict Values)[2], + const ALfloat (*restrict Coeffs)[2], ALfloat (*restrict Values)[2], ALsizei BufferSize); void Mix_C(const ALfloat *data, ALsizei OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALfloat *CurrentGains, const ALfloat *TargetGains, ALsizei Counter, ALsizei OutPos, @@ -37,12 +37,12 @@ void MixRow_C(ALfloat *OutBuffer, const ALfloat *Gains, /* SSE mixers */ void MixHrtf_SSE(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, - const ALfloat *data, ALsizei Counter, ALsizei Offset, ALsizei OutPos, - const ALsizei IrSize, const struct MixHrtfParams *hrtfparams, + const ALfloat *data, ALsizei Offset, ALsizei OutPos, + const ALsizei IrSize, struct MixHrtfParams *hrtfparams, struct HrtfState *hrtfstate, ALsizei BufferSize); void MixDirectHrtf_SSE(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, const ALfloat *data, ALsizei Offset, const ALsizei IrSize, - ALfloat (*restrict Coeffs)[2], ALfloat (*restrict Values)[2], + const ALfloat (*restrict Coeffs)[2], ALfloat (*restrict Values)[2], ALsizei BufferSize); void Mix_SSE(const ALfloat *data, ALsizei OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALfloat *CurrentGains, const ALfloat *TargetGains, ALsizei Counter, ALsizei OutPos, @@ -86,12 +86,12 @@ const ALfloat *Resample_bsinc32_SSE(const InterpState *state, const ALfloat *res /* Neon mixers */ void MixHrtf_Neon(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, - const ALfloat *data, ALsizei Counter, ALsizei Offset, ALsizei OutPos, - const ALsizei IrSize, const struct MixHrtfParams *hrtfparams, + const ALfloat *data, ALsizei Offset, ALsizei OutPos, + const ALsizei IrSize, struct MixHrtfParams *hrtfparams, struct HrtfState *hrtfstate, ALsizei BufferSize); void MixDirectHrtf_Neon(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, const ALfloat *data, ALsizei Offset, const ALsizei IrSize, - ALfloat (*restrict Coeffs)[2], ALfloat (*restrict Values)[2], + const ALfloat (*restrict Coeffs)[2], ALfloat (*restrict Values)[2], ALsizei BufferSize); void Mix_Neon(const ALfloat *data, ALsizei OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALfloat *CurrentGains, const ALfloat *TargetGains, ALsizei Counter, ALsizei OutPos, diff --git a/Alc/mixer_inc.c b/Alc/mixer_inc.c index 8215743e..b42b0fd3 100644 --- a/Alc/mixer_inc.c +++ b/Alc/mixer_inc.c @@ -12,84 +12,27 @@ #define MAX_UPDATE_SAMPLES 128 -static inline void ApplyCoeffsStep(ALsizei Offset, ALfloat (*restrict Values)[2], - const ALsizei irSize, - ALfloat (*restrict Coeffs)[2], - const ALfloat (*restrict CoeffStep)[2], - ALfloat left, ALfloat right); static inline void ApplyCoeffs(ALsizei Offset, ALfloat (*restrict Values)[2], const ALsizei irSize, - ALfloat (*restrict Coeffs)[2], + const ALfloat (*restrict Coeffs)[2], ALfloat left, ALfloat right); void MixHrtf(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, - const ALfloat *data, ALsizei Counter, ALsizei Offset, ALsizei OutPos, - const ALsizei IrSize, const MixHrtfParams *hrtfparams, HrtfState *hrtfstate, + const ALfloat *data, ALsizei Offset, ALsizei OutPos, + const ALsizei IrSize, MixHrtfParams *hrtfparams, HrtfState *hrtfstate, ALsizei BufferSize) { - ALfloat (*Coeffs)[2] = hrtfparams->Current->Coeffs; + const ALfloat (*Coeffs)[2] = hrtfparams->Current->Coeffs; ALsizei Delay[2] = { hrtfparams->Current->Delay[0], hrtfparams->Current->Delay[1] }; - ALfloat out[MAX_UPDATE_SAMPLES][2]; + ALfloat gainstep = hrtfparams->GainStep; + ALfloat gain = hrtfparams->Gain; ALfloat left, right; - ALsizei minsize; ALsizei pos, i; - pos = 0; - if(Counter == 0) - goto skip_stepping; - - minsize = minu(BufferSize, Counter); - while(pos < minsize) - { - ALsizei todo = mini(minsize-pos, MAX_UPDATE_SAMPLES); - - for(i = 0;i < todo;i++) - { - hrtfstate->History[Offset&HRTF_HISTORY_MASK] = data[pos++]; - left = lerp(hrtfstate->History[(Offset-(Delay[0]>>HRTFDELAY_BITS))&HRTF_HISTORY_MASK], - hrtfstate->History[(Offset-(Delay[0]>>HRTFDELAY_BITS)-1)&HRTF_HISTORY_MASK], - (Delay[0]&HRTFDELAY_MASK)*(1.0f/HRTFDELAY_FRACONE)); - right = lerp(hrtfstate->History[(Offset-(Delay[1]>>HRTFDELAY_BITS))&HRTF_HISTORY_MASK], - hrtfstate->History[(Offset-(Delay[1]>>HRTFDELAY_BITS)-1)&HRTF_HISTORY_MASK], - (Delay[1]&HRTFDELAY_MASK)*(1.0f/HRTFDELAY_FRACONE)); - - Delay[0] += hrtfparams->Steps.Delay[0]; - Delay[1] += hrtfparams->Steps.Delay[1]; - - hrtfstate->Values[(Offset+IrSize)&HRIR_MASK][0] = 0.0f; - hrtfstate->Values[(Offset+IrSize)&HRIR_MASK][1] = 0.0f; - Offset++; - - ApplyCoeffsStep(Offset, hrtfstate->Values, IrSize, Coeffs, hrtfparams->Steps.Coeffs, left, right); - out[i][0] = hrtfstate->Values[Offset&HRIR_MASK][0]; - out[i][1] = hrtfstate->Values[Offset&HRIR_MASK][1]; - } - - for(i = 0;i < todo;i++) - LeftOut[OutPos+i] += out[i][0]; - for(i = 0;i < todo;i++) - RightOut[OutPos+i] += out[i][1]; - OutPos += todo; - } - - if(pos == Counter) - { - *hrtfparams->Current = *hrtfparams->Target; - Delay[0] = hrtfparams->Target->Delay[0]; - Delay[1] = hrtfparams->Target->Delay[1]; - } - else - { - hrtfparams->Current->Delay[0] = Delay[0]; - hrtfparams->Current->Delay[1] = Delay[1]; - } - -skip_stepping: - Delay[0] >>= HRTFDELAY_BITS; - Delay[1] >>= HRTFDELAY_BITS; - while(pos < BufferSize) + for(pos = 0;pos < BufferSize;) { + ALfloat out[MAX_UPDATE_SAMPLES][2]; ALsizei todo = mini(BufferSize-pos, MAX_UPDATE_SAMPLES); for(i = 0;i < todo;i++) @@ -103,8 +46,9 @@ skip_stepping: Offset++; ApplyCoeffs(Offset, hrtfstate->Values, IrSize, Coeffs, left, right); - out[i][0] = hrtfstate->Values[Offset&HRIR_MASK][0]; - out[i][1] = hrtfstate->Values[Offset&HRIR_MASK][1]; + out[i][0] = hrtfstate->Values[Offset&HRIR_MASK][0]*gain; + out[i][1] = hrtfstate->Values[Offset&HRIR_MASK][1]*gain; + gain += gainstep; } for(i = 0;i < todo;i++) @@ -113,11 +57,12 @@ skip_stepping: RightOut[OutPos+i] += out[i][1]; OutPos += todo; } + hrtfparams->Gain = gain; } void MixDirectHrtf(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, const ALfloat *data, ALsizei Offset, const ALsizei IrSize, - ALfloat (*restrict Coeffs)[2], ALfloat (*restrict Values)[2], + const ALfloat (*restrict Coeffs)[2], ALfloat (*restrict Values)[2], ALsizei BufferSize) { ALfloat out[MAX_UPDATE_SAMPLES][2]; diff --git a/Alc/mixer_neon.c b/Alc/mixer_neon.c index 727c5c55..390a1dd2 100644 --- a/Alc/mixer_neon.c +++ b/Alc/mixer_neon.c @@ -190,44 +190,9 @@ const ALfloat *Resample_bsinc32_Neon(const InterpState *state, } -static inline void ApplyCoeffsStep(ALsizei Offset, ALfloat (*restrict Values)[2], - const ALsizei IrSize, - ALfloat (*restrict Coeffs)[2], - const ALfloat (*restrict CoeffStep)[2], - ALfloat left, ALfloat right) -{ - ALsizei c; - float32x4_t leftright4; - { - float32x2_t leftright2 = vdup_n_f32(0.0); - leftright2 = vset_lane_f32(left, leftright2, 0); - leftright2 = vset_lane_f32(right, leftright2, 1); - leftright4 = vcombine_f32(leftright2, leftright2); - } - Values = ASSUME_ALIGNED(Values, 16); - Coeffs = ASSUME_ALIGNED(Coeffs, 16); - CoeffStep = ASSUME_ALIGNED(CoeffStep, 16); - for(c = 0;c < IrSize;c += 2) - { - const ALsizei o0 = (Offset+c)&HRIR_MASK; - const ALsizei o1 = (o0+1)&HRIR_MASK; - float32x4_t vals = vcombine_f32(vld1_f32((float32_t*)&Values[o0][0]), - vld1_f32((float32_t*)&Values[o1][0])); - float32x4_t coefs = vld1q_f32((float32_t*)&Coeffs[c][0]); - float32x4_t deltas = vld1q_f32(&CoeffStep[c][0]); - - vals = vmlaq_f32(vals, coefs, leftright4); - coefs = vaddq_f32(coefs, deltas); - - vst1_f32((float32_t*)&Values[o0][0], vget_low_f32(vals)); - vst1_f32((float32_t*)&Values[o1][0], vget_high_f32(vals)); - vst1q_f32(&Coeffs[c][0], coefs); - } -} - static inline void ApplyCoeffs(ALsizei Offset, ALfloat (*restrict Values)[2], const ALsizei IrSize, - ALfloat (*restrict Coeffs)[2], + const ALfloat (*restrict Coeffs)[2], ALfloat left, ALfloat right) { ALsizei c; diff --git a/Alc/mixer_sse.c b/Alc/mixer_sse.c index 7870a6d8..37ce953f 100644 --- a/Alc/mixer_sse.c +++ b/Alc/mixer_sse.c @@ -69,74 +69,9 @@ const ALfloat *Resample_bsinc32_SSE(const InterpState *state, const ALfloat *res } -static inline void ApplyCoeffsStep(ALsizei Offset, ALfloat (*restrict Values)[2], - const ALsizei IrSize, - ALfloat (*restrict Coeffs)[2], - const ALfloat (*restrict CoeffStep)[2], - ALfloat left, ALfloat right) -{ - const __m128 lrlr = _mm_setr_ps(left, right, left, right); - __m128 coeffs, deltas, imp0, imp1; - __m128 vals = _mm_setzero_ps(); - ALsizei i; - - Values = ASSUME_ALIGNED(Values, 16); - Coeffs = ASSUME_ALIGNED(Coeffs, 16); - CoeffStep = ASSUME_ALIGNED(CoeffStep, 16); - if((Offset&1)) - { - const ALsizei o0 = Offset&HRIR_MASK; - const ALsizei o1 = (Offset+IrSize-1)&HRIR_MASK; - - coeffs = _mm_load_ps(&Coeffs[0][0]); - deltas = _mm_load_ps(&CoeffStep[0][0]); - vals = _mm_loadl_pi(vals, (__m64*)&Values[o0][0]); - imp0 = _mm_mul_ps(lrlr, coeffs); - coeffs = _mm_add_ps(coeffs, deltas); - vals = _mm_add_ps(imp0, vals); - _mm_store_ps(&Coeffs[0][0], coeffs); - _mm_storel_pi((__m64*)&Values[o0][0], vals); - for(i = 1;i < IrSize-1;i += 2) - { - const ALsizei o2 = (Offset+i)&HRIR_MASK; - - coeffs = _mm_load_ps(&Coeffs[i+1][0]); - deltas = _mm_load_ps(&CoeffStep[i+1][0]); - vals = _mm_load_ps(&Values[o2][0]); - imp1 = _mm_mul_ps(lrlr, coeffs); - coeffs = _mm_add_ps(coeffs, deltas); - imp0 = _mm_shuffle_ps(imp0, imp1, _MM_SHUFFLE(1, 0, 3, 2)); - vals = _mm_add_ps(imp0, vals); - _mm_store_ps(&Coeffs[i+1][0], coeffs); - _mm_store_ps(&Values[o2][0], vals); - imp0 = imp1; - } - vals = _mm_loadl_pi(vals, (__m64*)&Values[o1][0]); - imp0 = _mm_movehl_ps(imp0, imp0); - vals = _mm_add_ps(imp0, vals); - _mm_storel_pi((__m64*)&Values[o1][0], vals); - } - else - { - for(i = 0;i < IrSize;i += 2) - { - const ALsizei o = (Offset + i)&HRIR_MASK; - - coeffs = _mm_load_ps(&Coeffs[i][0]); - deltas = _mm_load_ps(&CoeffStep[i][0]); - vals = _mm_load_ps(&Values[o][0]); - imp0 = _mm_mul_ps(lrlr, coeffs); - coeffs = _mm_add_ps(coeffs, deltas); - vals = _mm_add_ps(imp0, vals); - _mm_store_ps(&Coeffs[i][0], coeffs); - _mm_store_ps(&Values[o][0], vals); - } - } -} - static inline void ApplyCoeffs(ALsizei Offset, ALfloat (*restrict Values)[2], const ALsizei IrSize, - ALfloat (*restrict Coeffs)[2], + const ALfloat (*restrict Coeffs)[2], ALfloat left, ALfloat right) { const __m128 lrlr = _mm_setr_ps(left, right, left, right); |