diff options
-rw-r--r-- | Alc/ALu.c | 106 | ||||
-rw-r--r-- | Alc/hrtf.c | 214 | ||||
-rw-r--r-- | Alc/hrtf.h | 4 | ||||
-rw-r--r-- | Alc/mixer.c | 25 | ||||
-rw-r--r-- | Alc/mixer_c.c | 17 | ||||
-rw-r--r-- | Alc/mixer_defs.h | 15 | ||||
-rw-r--r-- | Alc/mixer_inc.c | 48 | ||||
-rw-r--r-- | Alc/mixer_neon.c | 32 | ||||
-rw-r--r-- | Alc/mixer_sse.c | 62 | ||||
-rw-r--r-- | Alc/panning.c | 6 | ||||
-rw-r--r-- | OpenAL32/Include/alMain.h | 2 | ||||
-rw-r--r-- | OpenAL32/Include/alu.h | 10 | ||||
-rw-r--r-- | OpenAL32/alSource.c | 11 |
13 files changed, 520 insertions, 32 deletions
@@ -527,6 +527,39 @@ ALvoid CalcNonAttnSourceParams(ALvoice *voice, const ALsource *ALSource, const A voice->IsHrtf = AL_FALSE; } + else if(Device->Hrtf) + { + voice->Direct.OutBuffer = &voice->Direct.OutBuffer[voice->Direct.OutChannels]; + voice->Direct.OutChannels = 2; + for(c = 0;c < num_channels;c++) + { + if(chans[c].channel == LFE) + { + /* Skip LFE */ + voice->Direct.Hrtf.Params[c].Delay[0] = 0; + voice->Direct.Hrtf.Params[c].Delay[1] = 0; + for(i = 0;i < HRIR_LENGTH;i++) + { + voice->Direct.Hrtf.Params[c].Coeffs[i][0] = 0.0f; + voice->Direct.Hrtf.Params[c].Coeffs[i][1] = 0.0f; + } + } + else + { + /* Get the static HRIR coefficients and delays for this + * channel. */ + GetLerpedHrtfCoeffs(Device->Hrtf, + chans[c].elevation, chans[c].angle, 1.0f, DryGain, + voice->Direct.Hrtf.Params[c].Coeffs, + voice->Direct.Hrtf.Params[c].Delay); + } + } + voice->Direct.Counter = 0; + voice->Direct.Moving = AL_TRUE; + voice->Direct.Hrtf.IrSize = GetHrtfIrSize(Device->Hrtf); + + voice->IsHrtf = AL_TRUE; + } else { for(c = 0;c < num_channels;c++) @@ -934,6 +967,73 @@ ALvoid CalcSourceParams(ALvoice *voice, const ALsource *ALSource, const ALCconte BufferListItem = BufferListItem->next; } + if(Device->Hrtf) + { + /* Use a binaural HRTF algorithm for stereo headphone playback */ + ALfloat delta, ev = 0.0f, az = 0.0f; + ALfloat radius = ALSource->Radius; + ALfloat dirfact = 1.0f; + + voice->Direct.OutBuffer = &voice->Direct.OutBuffer[voice->Direct.OutChannels]; + voice->Direct.OutChannels = 2; + + if(Distance > FLT_EPSILON) + { + ALfloat invlen = 1.0f/Distance; + Position[0] *= invlen; + Position[1] *= invlen; + Position[2] *= invlen; + + /* Calculate elevation and azimuth only when the source is not at + * the listener. This prevents +0 and -0 Z from producing + * inconsistent panning. Also, clamp Y in case FP precision errors + * cause it to land outside of -1..+1. */ + ev = asinf(clampf(Position[1], -1.0f, 1.0f)); + az = atan2f(Position[0], -Position[2]*ZScale); + } + if(radius > Distance) + dirfact *= Distance / radius; + + /* Check to see if the HRIR is already moving. */ + if(voice->Direct.Moving) + { + /* Calculate the normalized HRTF transition factor (delta). */ + delta = CalcHrtfDelta(voice->Direct.Hrtf.Gain, DryGain, + voice->Direct.Hrtf.Dir, Position); + /* If the delta is large enough, get the moving HRIR target + * coefficients, target delays, steppping values, and counter. */ + if(delta > 0.001f) + { + ALuint counter = GetMovingHrtfCoeffs(Device->Hrtf, + ev, az, dirfact, DryGain, delta, voice->Direct.Counter, + voice->Direct.Hrtf.Params[0].Coeffs, voice->Direct.Hrtf.Params[0].Delay, + voice->Direct.Hrtf.Params[0].CoeffStep, voice->Direct.Hrtf.Params[0].DelayStep + ); + voice->Direct.Counter = counter; + voice->Direct.Hrtf.Gain = DryGain; + voice->Direct.Hrtf.Dir[0] = Position[0]; + voice->Direct.Hrtf.Dir[1] = Position[1]; + voice->Direct.Hrtf.Dir[2] = Position[2]; + } + } + else + { + /* Get the initial (static) HRIR coefficients and delays. */ + GetLerpedHrtfCoeffs(Device->Hrtf, ev, az, dirfact, DryGain, + voice->Direct.Hrtf.Params[0].Coeffs, + voice->Direct.Hrtf.Params[0].Delay); + voice->Direct.Counter = 0; + voice->Direct.Moving = AL_TRUE; + voice->Direct.Hrtf.Gain = DryGain; + voice->Direct.Hrtf.Dir[0] = Position[0]; + voice->Direct.Hrtf.Dir[1] = Position[1]; + voice->Direct.Hrtf.Dir[2] = Position[2]; + } + voice->Direct.Hrtf.IrSize = GetHrtfIrSize(Device->Hrtf); + + voice->IsHrtf = AL_TRUE; + } + else { MixGains *gains = voice->Direct.Gains[0]; ALfloat radius = ALSource->Radius; @@ -1168,8 +1268,10 @@ ALvoid aluMixData(ALCdevice *device, ALvoid *buffer, ALsizei size) HrtfMixerFunc HrtfMix = SelectHrtfMixer(); ALuint irsize = GetHrtfIrSize(device->Hrtf); for(c = 0;c < device->NumChannels;c++) - HrtfMix(&device->DryBuffer[outchanoffset], device->DryBuffer[c], device->Hrtf_Offset, irsize, - &device->Hrtf_Params[c], &device->Hrtf_State[c], SamplesToDo); + HrtfMix(&device->DryBuffer[outchanoffset], device->DryBuffer[c], 0.0f, + device->Hrtf_Offset, 0.0f, irsize, &device->Hrtf_Params[c], + &device->Hrtf_State[c], SamplesToDo + ); device->Hrtf_Offset += SamplesToDo; } else if(device->Bs2b) @@ -58,6 +58,10 @@ struct Hrtf { static const ALchar magicMarker00[8] = "MinPHR00"; static const ALchar magicMarker01[8] = "MinPHR01"; +/* First value for pass-through coefficients (remaining are 0), used for omni- + * directional sounds. */ +static const ALfloat PassthruCoeff = 32767.0f * 0.707106781187f/*sqrt(0.5)*/; + static struct Hrtf *LoadedHrtfs = NULL; /* Calculate the elevation indices given the polar elevation in radians. @@ -84,12 +88,45 @@ static void CalcAzIndices(ALuint azcount, ALfloat az, ALuint *azidx, ALfloat *az *azmu = az - floorf(az); } +/* Calculates the normalized HRTF transition factor (delta) from the changes + * in gain and listener to source angle between updates. The result is a + * normalized delta factor that can be used to calculate moving HRIR stepping + * values. + */ +ALfloat CalcHrtfDelta(ALfloat oldGain, ALfloat newGain, const ALfloat olddir[3], const ALfloat newdir[3]) +{ + ALfloat gainChange, angleChange, change; + + // Calculate the normalized dB gain change. + newGain = maxf(newGain, 0.0001f); + oldGain = maxf(oldGain, 0.0001f); + gainChange = fabsf(log10f(newGain / oldGain) / log10f(0.0001f)); + + // Calculate the angle change only when there is enough gain to notice it. + angleChange = 0.0f; + if(gainChange > 0.0001f || newGain > 0.0001f) + { + // No angle change when the directions are equal or degenerate (when + // both have zero length). + if(newdir[0] != olddir[0] || newdir[1] != olddir[1] || newdir[2] != olddir[2]) + { + ALfloat dotp = olddir[0]*newdir[0] + olddir[1]*newdir[1] + olddir[2]*newdir[2]; + angleChange = acosf(clampf(dotp, -1.0f, 1.0f)) / F_PI; + } + } + + // Use the largest of the two changes for the delta factor, and apply a + // significance shaping function to it. + change = maxf(angleChange * 25.0f, gainChange) * 2.0f; + return minf(change, 1.0f); +} + /* Calculates static HRIR coefficients and delays for the given polar * elevation and azimuth in radians. Linear interpolation is used to * increase the apparent resolution of the HRIR data set. The coefficients * are also normalized and attenuated by the specified gain. */ -void GetLerpedHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat azimuth, ALfloat (*coeffs)[2], ALuint *delays) +void GetLerpedHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat azimuth, ALfloat dirfact, ALfloat gain, ALfloat (*coeffs)[2], ALuint *delays) { ALuint evidx[2], lidx[4], ridx[4]; ALfloat mu[3], blend[4]; @@ -121,12 +158,12 @@ void GetLerpedHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat azi blend[3] = ( mu[1]) * ( mu[2]); /* Calculate the HRIR delays using linear interpolation. */ - delays[0] = fastf2u(Hrtf->delays[lidx[0]]*blend[0] + Hrtf->delays[lidx[1]]*blend[1] + - Hrtf->delays[lidx[2]]*blend[2] + Hrtf->delays[lidx[3]]*blend[3] + - 0.5f); - delays[1] = fastf2u(Hrtf->delays[ridx[0]]*blend[0] + Hrtf->delays[ridx[1]]*blend[1] + - Hrtf->delays[ridx[2]]*blend[2] + Hrtf->delays[ridx[3]]*blend[3] + - 0.5f); + delays[0] = fastf2u((Hrtf->delays[lidx[0]]*blend[0] + Hrtf->delays[lidx[1]]*blend[1] + + Hrtf->delays[lidx[2]]*blend[2] + Hrtf->delays[lidx[3]]*blend[3]) * + dirfact + 0.5f) << HRTFDELAY_BITS; + delays[1] = fastf2u((Hrtf->delays[ridx[0]]*blend[0] + Hrtf->delays[ridx[1]]*blend[1] + + Hrtf->delays[ridx[2]]*blend[2] + Hrtf->delays[ridx[3]]*blend[3]) * + dirfact + 0.5f) << HRTFDELAY_BITS; /* Calculate the sample offsets for the HRIR indices. */ lidx[0] *= Hrtf->irSize; @@ -138,16 +175,173 @@ void GetLerpedHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat azi ridx[2] *= Hrtf->irSize; ridx[3] *= Hrtf->irSize; - for(i = 0;i < Hrtf->irSize;i++) + /* Calculate the normalized and attenuated HRIR coefficients using linear + * interpolation when there is enough gain to warrant it. Zero the + * coefficients if gain is too low. + */ + if(gain > 0.0001f) { ALfloat c; + + i = 0; c = (Hrtf->coeffs[lidx[0]+i]*blend[0] + Hrtf->coeffs[lidx[1]+i]*blend[1] + Hrtf->coeffs[lidx[2]+i]*blend[2] + Hrtf->coeffs[lidx[3]+i]*blend[3]); - coeffs[i][0] = c * (1.0f/32767.0f); + coeffs[i][0] = lerp(PassthruCoeff, c, dirfact) * gain * (1.0f/32767.0f); c = (Hrtf->coeffs[ridx[0]+i]*blend[0] + Hrtf->coeffs[ridx[1]+i]*blend[1] + Hrtf->coeffs[ridx[2]+i]*blend[2] + Hrtf->coeffs[ridx[3]+i]*blend[3]); - coeffs[i][1] = c * (1.0f/32767.0f); + coeffs[i][1] = lerp(PassthruCoeff, c, dirfact) * gain * (1.0f/32767.0f); + + for(i = 1;i < Hrtf->irSize;i++) + { + c = (Hrtf->coeffs[lidx[0]+i]*blend[0] + Hrtf->coeffs[lidx[1]+i]*blend[1] + + Hrtf->coeffs[lidx[2]+i]*blend[2] + Hrtf->coeffs[lidx[3]+i]*blend[3]); + coeffs[i][0] = lerp(0.0f, c, dirfact) * gain * (1.0f/32767.0f); + c = (Hrtf->coeffs[ridx[0]+i]*blend[0] + Hrtf->coeffs[ridx[1]+i]*blend[1] + + Hrtf->coeffs[ridx[2]+i]*blend[2] + Hrtf->coeffs[ridx[3]+i]*blend[3]); + coeffs[i][1] = lerp(0.0f, c, dirfact) * gain * (1.0f/32767.0f); + } } + else + { + for(i = 0;i < Hrtf->irSize;i++) + { + coeffs[i][0] = 0.0f; + coeffs[i][1] = 0.0f; + } + } +} + +/* Calculates the moving HRIR target coefficients, target delays, and + * stepping values for the given polar elevation and azimuth in radians. + * Linear interpolation is used to increase the apparent resolution of the + * HRIR data set. The coefficients are also normalized and attenuated by the + * specified gain. Stepping resolution and count is determined using the + * given delta factor between 0.0 and 1.0. + */ +ALuint GetMovingHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat azimuth, ALfloat dirfact, ALfloat gain, ALfloat delta, ALint counter, ALfloat (*coeffs)[2], ALuint *delays, ALfloat (*coeffStep)[2], ALint *delayStep) +{ + ALuint evidx[2], lidx[4], ridx[4]; + ALfloat mu[3], blend[4]; + ALfloat left, right; + ALfloat step; + ALuint i; + + /* Claculate elevation indices and interpolation factor. */ + CalcEvIndices(Hrtf->evCount, elevation, evidx, &mu[2]); + + for(i = 0;i < 2;i++) + { + ALuint azcount = Hrtf->azCount[evidx[i]]; + ALuint evoffset = Hrtf->evOffset[evidx[i]]; + ALuint azidx[2]; + + /* Calculate azimuth indices and interpolation factor for this elevation. */ + CalcAzIndices(azcount, azimuth, azidx, &mu[i]); + + /* Calculate a set of linear HRIR indices for left and right channels. */ + lidx[i*2 + 0] = evoffset + azidx[0]; + lidx[i*2 + 1] = evoffset + azidx[1]; + ridx[i*2 + 0] = evoffset + ((azcount-azidx[0]) % azcount); + ridx[i*2 + 1] = evoffset + ((azcount-azidx[1]) % azcount); + } + + // Calculate the stepping parameters. + delta = maxf(floorf(delta*(Hrtf->sampleRate*0.015f) + 0.5f), 1.0f); + step = 1.0f / delta; + + /* Calculate 4 blending weights for 2D bilinear interpolation. */ + blend[0] = (1.0f-mu[0]) * (1.0f-mu[2]); + blend[1] = ( mu[0]) * (1.0f-mu[2]); + blend[2] = (1.0f-mu[1]) * ( mu[2]); + blend[3] = ( mu[1]) * ( mu[2]); + + /* Calculate the HRIR delays using linear interpolation. Then calculate + * the delay stepping values using the target and previous running + * delays. + */ + left = (ALfloat)(delays[0] - (delayStep[0] * counter)); + right = (ALfloat)(delays[1] - (delayStep[1] * counter)); + + delays[0] = fastf2u((Hrtf->delays[lidx[0]]*blend[0] + Hrtf->delays[lidx[1]]*blend[1] + + Hrtf->delays[lidx[2]]*blend[2] + Hrtf->delays[lidx[3]]*blend[3]) * + dirfact + 0.5f) << HRTFDELAY_BITS; + delays[1] = fastf2u((Hrtf->delays[ridx[0]]*blend[0] + Hrtf->delays[ridx[1]]*blend[1] + + Hrtf->delays[ridx[2]]*blend[2] + Hrtf->delays[ridx[3]]*blend[3]) * + dirfact + 0.5f) << HRTFDELAY_BITS; + + delayStep[0] = fastf2i(step * (delays[0] - left)); + delayStep[1] = fastf2i(step * (delays[1] - right)); + + /* Calculate the sample offsets for the HRIR indices. */ + lidx[0] *= Hrtf->irSize; + lidx[1] *= Hrtf->irSize; + lidx[2] *= Hrtf->irSize; + lidx[3] *= Hrtf->irSize; + ridx[0] *= Hrtf->irSize; + ridx[1] *= Hrtf->irSize; + ridx[2] *= Hrtf->irSize; + ridx[3] *= Hrtf->irSize; + + /* Calculate the normalized and attenuated target HRIR coefficients using + * linear interpolation when there is enough gain to warrant it. Zero + * the target coefficients if gain is too low. Then calculate the + * coefficient stepping values using the target and previous running + * coefficients. + */ + if(gain > 0.0001f) + { + ALfloat c; + + i = 0; + left = coeffs[i][0] - (coeffStep[i][0] * counter); + right = coeffs[i][1] - (coeffStep[i][1] * counter); + + c = (Hrtf->coeffs[lidx[0]+i]*blend[0] + Hrtf->coeffs[lidx[1]+i]*blend[1] + + Hrtf->coeffs[lidx[2]+i]*blend[2] + Hrtf->coeffs[lidx[3]+i]*blend[3]); + coeffs[i][0] = lerp(PassthruCoeff, c, dirfact) * gain * (1.0f/32767.0f);; + c = (Hrtf->coeffs[ridx[0]+i]*blend[0] + Hrtf->coeffs[ridx[1]+i]*blend[1] + + Hrtf->coeffs[ridx[2]+i]*blend[2] + Hrtf->coeffs[ridx[3]+i]*blend[3]); + coeffs[i][1] = lerp(PassthruCoeff, c, dirfact) * gain * (1.0f/32767.0f);; + + coeffStep[i][0] = step * (coeffs[i][0] - left); + coeffStep[i][1] = step * (coeffs[i][1] - right); + + for(i = 1;i < Hrtf->irSize;i++) + { + left = coeffs[i][0] - (coeffStep[i][0] * counter); + right = coeffs[i][1] - (coeffStep[i][1] * counter); + + c = (Hrtf->coeffs[lidx[0]+i]*blend[0] + Hrtf->coeffs[lidx[1]+i]*blend[1] + + Hrtf->coeffs[lidx[2]+i]*blend[2] + Hrtf->coeffs[lidx[3]+i]*blend[3]); + coeffs[i][0] = lerp(0.0f, c, dirfact) * gain * (1.0f/32767.0f);; + c = (Hrtf->coeffs[ridx[0]+i]*blend[0] + Hrtf->coeffs[ridx[1]+i]*blend[1] + + Hrtf->coeffs[ridx[2]+i]*blend[2] + Hrtf->coeffs[ridx[3]+i]*blend[3]); + coeffs[i][1] = lerp(0.0f, c, dirfact) * gain * (1.0f/32767.0f);; + + coeffStep[i][0] = step * (coeffs[i][0] - left); + coeffStep[i][1] = step * (coeffs[i][1] - right); + } + } + else + { + for(i = 0;i < Hrtf->irSize;i++) + { + left = coeffs[i][0] - (coeffStep[i][0] * counter); + right = coeffs[i][1] - (coeffStep[i][1] * counter); + + coeffs[i][0] = 0.0f; + coeffs[i][1] = 0.0f; + + coeffStep[i][0] = step * -left; + coeffStep[i][1] = step * -right; + } + } + + /* The stepping count is the number of samples necessary for the HRIR to + * complete its transition. The mixer will only apply stepping for this + * many samples. + */ + return fastf2u(delta); } @@ -21,6 +21,8 @@ ALCboolean FindHrtfFormat(enum DevFmtChannels *chans, ALCuint *srate); void FreeHrtfs(void); ALuint GetHrtfIrSize(const struct Hrtf *Hrtf); -void GetLerpedHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat azimuth, ALfloat (*coeffs)[2], ALuint *delays); +ALfloat CalcHrtfDelta(ALfloat oldGain, ALfloat newGain, const ALfloat olddir[3], const ALfloat newdir[3]); +void GetLerpedHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat azimuth, ALfloat dirfact, ALfloat gain, ALfloat (*coeffs)[2], ALuint *delays); +ALuint GetMovingHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat azimuth, ALfloat dirfact, ALfloat gain, ALfloat delta, ALint counter, ALfloat (*coeffs)[2], ALuint *delays, ALfloat (*coeffStep)[2], ALint *delayStep); #endif /* ALC_HRTF_H */ diff --git a/Alc/mixer.c b/Alc/mixer.c index 4a98ee8f..3f80434e 100644 --- a/Alc/mixer.c +++ b/Alc/mixer.c @@ -41,6 +41,20 @@ extern inline void InitiatePositionArrays(ALuint frac, ALuint increment, ALuint *frac_arr, ALuint *pos_arr, ALuint size); +static inline HrtfMixerFunc SelectHrtfMixer(void) +{ +#ifdef HAVE_SSE + if((CPUCapFlags&CPU_CAP_SSE)) + return MixHrtf_SSE; +#endif +#ifdef HAVE_NEON + if((CPUCapFlags&CPU_CAP_NEON)) + return MixHrtf_Neon; +#endif + + return MixHrtf_C; +} + static inline MixerFunc SelectMixer(void) { #ifdef HAVE_SSE @@ -165,6 +179,7 @@ static const ALfloat *DoFilters(ALfilterState *lpfilter, ALfilterState *hpfilter ALvoid MixSource(ALvoice *voice, ALsource *Source, ALCdevice *Device, ALuint SamplesToDo) { MixerFunc Mix; + HrtfMixerFunc HrtfMix; ResamplerFunc Resample; ALbufferlistitem *BufferListItem; ALuint DataPosInt, DataPosFrac; @@ -203,6 +218,7 @@ ALvoid MixSource(ALvoice *voice, ALsource *Source, ALCdevice *Device, ALuint Sam } Mix = SelectMixer(); + HrtfMix = SelectHrtfMixer(); Resample = ((increment == FRACTIONONE && DataPosFrac == 0) ? Resample_copy32_C : SelectResampler(Resampler)); @@ -415,8 +431,13 @@ ALvoid MixSource(ALvoice *voice, ALsource *Source, ALCdevice *Device, ALuint Sam Device->FilteredData, ResampledData, DstBufferSize, parms->Filters[chan].ActiveType ); - Mix(samples, parms->OutChannels, parms->OutBuffer, parms->Gains[chan], - parms->Counter, OutPos, DstBufferSize); + if(!voice->IsHrtf) + Mix(samples, parms->OutChannels, parms->OutBuffer, parms->Gains[chan], + parms->Counter, OutPos, DstBufferSize); + else + HrtfMix(parms->OutBuffer, samples, parms->Counter, voice->Offset, + OutPos, parms->Hrtf.IrSize, &parms->Hrtf.Params[chan], + &parms->Hrtf.State[chan], DstBufferSize); } /* Only the first channel for B-Format buffers (W channel) goes to diff --git a/Alc/mixer_c.c b/Alc/mixer_c.c index caedd339..0fdcc087 100644 --- a/Alc/mixer_c.c +++ b/Alc/mixer_c.c @@ -59,6 +59,23 @@ void ALfilterState_processC(ALfilterState *filter, ALfloat *restrict dst, const } +static inline void ApplyCoeffsStep(ALuint Offset, ALfloat (*restrict Values)[2], + const ALuint IrSize, + ALfloat (*restrict Coeffs)[2], + const ALfloat (*restrict CoeffStep)[2], + ALfloat left, ALfloat right) +{ + ALuint c; + for(c = 0;c < IrSize;c++) + { + const ALuint off = (Offset+c)&HRIR_MASK; + Values[off][0] += Coeffs[c][0] * left; + Values[off][1] += Coeffs[c][1] * right; + Coeffs[c][0] += CoeffStep[c][0]; + Coeffs[c][1] += CoeffStep[c][1]; + } +} + static inline void ApplyCoeffs(ALuint Offset, ALfloat (*restrict Values)[2], const ALuint IrSize, ALfloat (*restrict Coeffs)[2], diff --git a/Alc/mixer_defs.h b/Alc/mixer_defs.h index 62dad9dc..c1500ed2 100644 --- a/Alc/mixer_defs.h +++ b/Alc/mixer_defs.h @@ -20,15 +20,17 @@ const ALfloat *Resample_cubic32_C(const ALfloat *src, ALuint frac, ALuint increm /* C mixers */ void MixHrtf_C(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data, - ALuint Offset, const ALuint IrSize, const struct HrtfParams *hrtfparams, - struct HrtfState *hrtfstate, ALuint BufferSize); + ALuint Counter, ALuint Offset, ALuint OutPos, const ALuint IrSize, + const struct HrtfParams *hrtfparams, struct HrtfState *hrtfstate, + ALuint BufferSize); void Mix_C(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE], struct MixGains *Gains, ALuint Counter, ALuint OutPos, ALuint BufferSize); /* SSE mixers */ void MixHrtf_SSE(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data, - ALuint Offset, const ALuint IrSize, const struct HrtfParams *hrtfparams, - struct HrtfState *hrtfstate, ALuint BufferSize); + ALuint Counter, ALuint Offset, ALuint OutPos, const ALuint IrSize, + const struct HrtfParams *hrtfparams, struct HrtfState *hrtfstate, + ALuint BufferSize); void Mix_SSE(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE], struct MixGains *Gains, ALuint Counter, ALuint OutPos, ALuint BufferSize); @@ -54,8 +56,9 @@ const ALfloat *Resample_lerp32_SSE41(const ALfloat *src, ALuint frac, ALuint inc /* Neon mixers */ void MixHrtf_Neon(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data, - ALuint Offset, const ALuint IrSize, const struct HrtfParams *hrtfparams, - struct HrtfState *hrtfstate, ALuint BufferSize); + ALuint Counter, ALuint Offset, ALuint OutPos, const ALuint IrSize, + const struct HrtfParams *hrtfparams, struct HrtfState *hrtfstate, + ALuint BufferSize); void Mix_Neon(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE], struct MixGains *Gains, ALuint Counter, ALuint OutPos, ALuint BufferSize); diff --git a/Alc/mixer_inc.c b/Alc/mixer_inc.c index 46ccec7d..b4635b43 100644 --- a/Alc/mixer_inc.c +++ b/Alc/mixer_inc.c @@ -14,6 +14,11 @@ #define MixHrtf MERGE(MixHrtf_,SUFFIX) +static inline void ApplyCoeffsStep(ALuint Offset, ALfloat (*restrict Values)[2], + const ALuint irSize, + ALfloat (*restrict Coeffs)[2], + const ALfloat (*restrict CoeffStep)[2], + ALfloat left, ALfloat right); static inline void ApplyCoeffs(ALuint Offset, ALfloat (*restrict Values)[2], const ALuint irSize, ALfloat (*restrict Coeffs)[2], @@ -21,7 +26,7 @@ static inline void ApplyCoeffs(ALuint Offset, ALfloat (*restrict Values)[2], void MixHrtf(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data, - ALuint Offset, const ALuint IrSize, + ALuint Counter, ALuint Offset, ALuint OutPos, const ALuint IrSize, const HrtfParams *hrtfparams, HrtfState *hrtfstate, ALuint BufferSize) { alignas(16) ALfloat Coeffs[HRIR_LENGTH][2]; @@ -32,13 +37,39 @@ void MixHrtf(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data, for(c = 0;c < IrSize;c++) { - Coeffs[c][0] = hrtfparams->Coeffs[c][0]; - Coeffs[c][1] = hrtfparams->Coeffs[c][1]; + Coeffs[c][0] = hrtfparams->Coeffs[c][0] - (hrtfparams->CoeffStep[c][0]*Counter); + Coeffs[c][1] = hrtfparams->Coeffs[c][1] - (hrtfparams->CoeffStep[c][1]*Counter); } - Delay[0] = hrtfparams->Delay[0]; - Delay[1] = hrtfparams->Delay[1]; + Delay[0] = hrtfparams->Delay[0] - (hrtfparams->DelayStep[0]*Counter); + Delay[1] = hrtfparams->Delay[1] - (hrtfparams->DelayStep[1]*Counter); - for(pos = 0;pos < BufferSize;pos++) + pos = 0; + for(;pos < BufferSize && pos < Counter;pos++) + { + hrtfstate->History[Offset&HRTF_HISTORY_MASK] = data[pos]; + left = lerp(hrtfstate->History[(Offset-(Delay[0]>>HRTFDELAY_BITS))&HRTF_HISTORY_MASK], + hrtfstate->History[(Offset-(Delay[0]>>HRTFDELAY_BITS)-1)&HRTF_HISTORY_MASK], + (Delay[0]&HRTFDELAY_MASK)*(1.0f/HRTFDELAY_FRACONE)); + right = lerp(hrtfstate->History[(Offset-(Delay[1]>>HRTFDELAY_BITS))&HRTF_HISTORY_MASK], + hrtfstate->History[(Offset-(Delay[1]>>HRTFDELAY_BITS)-1)&HRTF_HISTORY_MASK], + (Delay[1]&HRTFDELAY_MASK)*(1.0f/HRTFDELAY_FRACONE)); + + Delay[0] += hrtfparams->DelayStep[0]; + Delay[1] += hrtfparams->DelayStep[1]; + + hrtfstate->Values[(Offset+IrSize)&HRIR_MASK][0] = 0.0f; + hrtfstate->Values[(Offset+IrSize)&HRIR_MASK][1] = 0.0f; + Offset++; + + ApplyCoeffsStep(Offset, hrtfstate->Values, IrSize, Coeffs, hrtfparams->CoeffStep, left, right); + OutBuffer[0][OutPos] += hrtfstate->Values[Offset&HRIR_MASK][0]; + OutBuffer[1][OutPos] += hrtfstate->Values[Offset&HRIR_MASK][1]; + OutPos++; + } + + Delay[0] >>= HRTFDELAY_BITS; + Delay[1] >>= HRTFDELAY_BITS; + for(;pos < BufferSize;pos++) { hrtfstate->History[Offset&HRTF_HISTORY_MASK] = data[pos]; left = hrtfstate->History[(Offset-Delay[0])&HRTF_HISTORY_MASK]; @@ -49,8 +80,9 @@ void MixHrtf(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data, Offset++; ApplyCoeffs(Offset, hrtfstate->Values, IrSize, Coeffs, left, right); - OutBuffer[0][pos] += hrtfstate->Values[Offset&HRIR_MASK][0]; - OutBuffer[1][pos] += hrtfstate->Values[Offset&HRIR_MASK][1]; + OutBuffer[0][OutPos] += hrtfstate->Values[Offset&HRIR_MASK][0]; + OutBuffer[1][OutPos] += hrtfstate->Values[Offset&HRIR_MASK][1]; + OutPos++; } } diff --git a/Alc/mixer_neon.c b/Alc/mixer_neon.c index 413bd627..8ce17644 100644 --- a/Alc/mixer_neon.c +++ b/Alc/mixer_neon.c @@ -9,6 +9,38 @@ #include "hrtf.h" +static inline void ApplyCoeffsStep(ALuint Offset, ALfloat (*restrict Values)[2], + const ALuint IrSize, + ALfloat (*restrict Coeffs)[2], + const ALfloat (*restrict CoeffStep)[2], + ALfloat left, ALfloat right) +{ + ALuint c; + float32x4_t leftright4; + { + float32x2_t leftright2 = vdup_n_f32(0.0); + leftright2 = vset_lane_f32(left, leftright2, 0); + leftright2 = vset_lane_f32(right, leftright2, 1); + leftright4 = vcombine_f32(leftright2, leftright2); + } + for(c = 0;c < IrSize;c += 2) + { + const ALuint o0 = (Offset+c)&HRIR_MASK; + const ALuint o1 = (o0+1)&HRIR_MASK; + float32x4_t vals = vcombine_f32(vld1_f32((float32_t*)&Values[o0][0]), + vld1_f32((float32_t*)&Values[o1][0])); + float32x4_t coefs = vld1q_f32((float32_t*)&Coeffs[c][0]); + float32x4_t deltas = vld1q_f32(&CoeffStep[c][0]); + + vals = vmlaq_f32(vals, coefs, leftright4); + coefs = vaddq_f32(coefs, deltas); + + vst1_f32((float32_t*)&Values[o0][0], vget_low_f32(vals)); + vst1_f32((float32_t*)&Values[o1][0], vget_high_f32(vals)); + vst1q_f32(&Coeffs[c][0], coefs); + } +} + static inline void ApplyCoeffs(ALuint Offset, ALfloat (*restrict Values)[2], const ALuint IrSize, ALfloat (*restrict Coeffs)[2], diff --git a/Alc/mixer_sse.c b/Alc/mixer_sse.c index d0dca40e..d86cf749 100644 --- a/Alc/mixer_sse.c +++ b/Alc/mixer_sse.c @@ -19,6 +19,68 @@ #include "mixer_defs.h" +static inline void ApplyCoeffsStep(ALuint Offset, ALfloat (*restrict Values)[2], + const ALuint IrSize, + ALfloat (*restrict Coeffs)[2], + const ALfloat (*restrict CoeffStep)[2], + ALfloat left, ALfloat right) +{ + const __m128 lrlr = _mm_setr_ps(left, right, left, right); + __m128 coeffs, deltas, imp0, imp1; + __m128 vals = _mm_setzero_ps(); + ALuint i; + + if((Offset&1)) + { + const ALuint o0 = Offset&HRIR_MASK; + const ALuint o1 = (Offset+IrSize-1)&HRIR_MASK; + + coeffs = _mm_load_ps(&Coeffs[0][0]); + deltas = _mm_load_ps(&CoeffStep[0][0]); + vals = _mm_loadl_pi(vals, (__m64*)&Values[o0][0]); + imp0 = _mm_mul_ps(lrlr, coeffs); + coeffs = _mm_add_ps(coeffs, deltas); + vals = _mm_add_ps(imp0, vals); + _mm_store_ps(&Coeffs[0][0], coeffs); + _mm_storel_pi((__m64*)&Values[o0][0], vals); + for(i = 1;i < IrSize-1;i += 2) + { + const ALuint o2 = (Offset+i)&HRIR_MASK; + + coeffs = _mm_load_ps(&Coeffs[i+1][0]); + deltas = _mm_load_ps(&CoeffStep[i+1][0]); + vals = _mm_load_ps(&Values[o2][0]); + imp1 = _mm_mul_ps(lrlr, coeffs); + coeffs = _mm_add_ps(coeffs, deltas); + imp0 = _mm_shuffle_ps(imp0, imp1, _MM_SHUFFLE(1, 0, 3, 2)); + vals = _mm_add_ps(imp0, vals); + _mm_store_ps(&Coeffs[i+1][0], coeffs); + _mm_store_ps(&Values[o2][0], vals); + imp0 = imp1; + } + vals = _mm_loadl_pi(vals, (__m64*)&Values[o1][0]); + imp0 = _mm_movehl_ps(imp0, imp0); + vals = _mm_add_ps(imp0, vals); + _mm_storel_pi((__m64*)&Values[o1][0], vals); + } + else + { + for(i = 0;i < IrSize;i += 2) + { + const ALuint o = (Offset + i)&HRIR_MASK; + + coeffs = _mm_load_ps(&Coeffs[i][0]); + deltas = _mm_load_ps(&CoeffStep[i][0]); + vals = _mm_load_ps(&Values[o][0]); + imp0 = _mm_mul_ps(lrlr, coeffs); + coeffs = _mm_add_ps(coeffs, deltas); + vals = _mm_add_ps(imp0, vals); + _mm_store_ps(&Coeffs[i][0], coeffs); + _mm_store_ps(&Values[o][0], vals); + } + } +} + static inline void ApplyCoeffs(ALuint Offset, ALfloat (*restrict Values)[2], const ALuint IrSize, ALfloat (*restrict Coeffs)[2], diff --git a/Alc/panning.c b/Alc/panning.c index 64be6f4b..81398b1b 100644 --- a/Alc/panning.c +++ b/Alc/panning.c @@ -387,8 +387,10 @@ ALvoid aluInitPanning(ALCdevice *device) device->ChannelName[i] = VirtualChans[i].channel; SetChannelMap(device, chanmap, count); for(i = 0;i < count;i++) - GetLerpedHrtfCoeffs(device->Hrtf, VirtualChans[i].elevation, VirtualChans[i].angle, - device->Hrtf_Params[i].Coeffs, device->Hrtf_Params[i].Delay); + GetLerpedHrtfCoeffs( + device->Hrtf, VirtualChans[i].elevation, VirtualChans[i].angle, 1.0f, 1.0f, + device->Hrtf_Params[i].Coeffs, device->Hrtf_Params[i].Delay + ); return; } diff --git a/OpenAL32/Include/alMain.h b/OpenAL32/Include/alMain.h index 56df4db3..1afeb1e6 100644 --- a/OpenAL32/Include/alMain.h +++ b/OpenAL32/Include/alMain.h @@ -617,7 +617,9 @@ typedef struct HrtfState { typedef struct HrtfParams { alignas(16) ALfloat Coeffs[HRIR_LENGTH][2]; + alignas(16) ALfloat CoeffStep[HRIR_LENGTH][2]; ALuint Delay[2]; + ALint DelayStep[2]; } HrtfParams; diff --git a/OpenAL32/Include/alu.h b/OpenAL32/Include/alu.h index 56c37fe8..0462fda8 100644 --- a/OpenAL32/Include/alu.h +++ b/OpenAL32/Include/alu.h @@ -71,6 +71,13 @@ typedef struct DirectParams { ALfilterState HighPass; } Filters[MAX_INPUT_CHANNELS]; + struct { + HrtfParams Params[MAX_INPUT_CHANNELS]; + HrtfState State[MAX_INPUT_CHANNELS]; + ALuint IrSize; + ALfloat Gain; + ALfloat Dir[3]; + } Hrtf; MixGains Gains[MAX_INPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; } DirectParams; @@ -99,7 +106,8 @@ typedef void (*MixerFunc)(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE], struct MixGains *Gains, ALuint Counter, ALuint OutPos, ALuint BufferSize); typedef void (*HrtfMixerFunc)(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data, - ALuint Offset, const ALuint IrSize, const HrtfParams *hrtfparams, + ALuint Counter, ALuint Offset, ALuint OutPos, + const ALuint IrSize, const HrtfParams *hrtfparams, HrtfState *hrtfstate, ALuint BufferSize); diff --git a/OpenAL32/alSource.c b/OpenAL32/alSource.c index 12bd9436..be3768f3 100644 --- a/OpenAL32/alSource.c +++ b/OpenAL32/alSource.c @@ -2599,6 +2599,17 @@ ALvoid SetSourceState(ALsource *Source, ALCcontext *Context, ALenum state) voice->Direct.Moving = AL_FALSE; voice->Direct.Counter = 0; + for(i = 0;i < MAX_INPUT_CHANNELS;i++) + { + ALsizei j; + for(j = 0;j < HRTF_HISTORY_LENGTH;j++) + voice->Direct.Hrtf.State[i].History[j] = 0.0f; + for(j = 0;j < HRIR_LENGTH;j++) + { + voice->Direct.Hrtf.State[i].Values[j][0] = 0.0f; + voice->Direct.Hrtf.State[i].Values[j][1] = 0.0f; + } + } for(i = 0;i < (ALsizei)device->NumAuxSends;i++) { voice->Send[i].Moving = AL_FALSE; |