13 files changed, 520 insertions, 32 deletions
diff --git a/Alc/ALu.c b/Alc/ALu.c
index f7408da0..c4292833 100644
--- a/Alc/ALu.c
+++ b/Alc/ALu.c
@@ -527,6 +527,39 @@ ALvoid CalcNonAttnSourceParams(ALvoice *voice, const ALsource *ALSource, const A
 
         voice->IsHrtf = AL_FALSE;
     }
+    else if(Device->Hrtf)
+    {
+        voice->Direct.OutBuffer = &voice->Direct.OutBuffer[voice->Direct.OutChannels];
+        voice->Direct.OutChannels = 2;
+        for(c = 0;c < num_channels;c++)
+        {
+            if(chans[c].channel == LFE)
+            {
+                /* Skip LFE */
+                voice->Direct.Hrtf.Params[c].Delay[0] = 0;
+                voice->Direct.Hrtf.Params[c].Delay[1] = 0;
+                for(i = 0;i < HRIR_LENGTH;i++)
+                {
+                    voice->Direct.Hrtf.Params[c].Coeffs[i][0] = 0.0f;
+                    voice->Direct.Hrtf.Params[c].Coeffs[i][1] = 0.0f;
+                }
+            }
+            else
+            {
+                /* Get the static HRIR coefficients and delays for this
+                 * channel. */
+                GetLerpedHrtfCoeffs(Device->Hrtf,
+                                    chans[c].elevation, chans[c].angle, 1.0f, DryGain,
+                                    voice->Direct.Hrtf.Params[c].Coeffs,
+                                    voice->Direct.Hrtf.Params[c].Delay);
+            }
+        }
+        voice->Direct.Counter = 0;
+        voice->Direct.Moving  = AL_TRUE;
+        voice->Direct.Hrtf.IrSize = GetHrtfIrSize(Device->Hrtf);
+
+        voice->IsHrtf = AL_TRUE;
+    }
     else
     {
         for(c = 0;c < num_channels;c++)
@@ -934,6 +967,73 @@ ALvoid CalcSourceParams(ALvoice *voice, const ALsource *ALSource, const ALCconte
         BufferListItem = BufferListItem->next;
     }
 
+    if(Device->Hrtf)
+    {
+        /* Use a binaural HRTF algorithm for stereo headphone playback */
+        ALfloat delta, ev = 0.0f, az = 0.0f;
+        ALfloat radius = ALSource->Radius;
+        ALfloat dirfact = 1.0f;
+
+        voice->Direct.OutBuffer = &voice->Direct.OutBuffer[voice->Direct.OutChannels];
+        voice->Direct.OutChannels = 2;
+
+        if(Distance > FLT_EPSILON)
+        {
+            ALfloat invlen = 1.0f/Distance;
+            Position[0] *= invlen;
+            Position[1] *= invlen;
+            Position[2] *= invlen;
+
+            /* Calculate elevation and azimuth only when the source is not at
+             * the listener. This prevents +0 and -0 Z from producing
+             * inconsistent panning. Also, clamp Y in case FP precision errors
+             * cause it to land outside of -1..+1. */
+            ev = asinf(clampf(Position[1], -1.0f, 1.0f));
+            az = atan2f(Position[0], -Position[2]*ZScale);
+        }
+        if(radius > Distance)
+            dirfact *= Distance / radius;
+
+        /* Check to see if the HRIR is already moving. */
+        if(voice->Direct.Moving)
+        {
+            /* Calculate the normalized HRTF transition factor (delta). */
+            delta = CalcHrtfDelta(voice->Direct.Hrtf.Gain, DryGain,
+                                  voice->Direct.Hrtf.Dir, Position);
+            /* If the delta is large enough, get the moving HRIR target
+             * coefficients, target delays, steppping values, and counter. */
+            if(delta > 0.001f)
+            {
+                ALuint counter = GetMovingHrtfCoeffs(Device->Hrtf,
+                    ev, az, dirfact, DryGain, delta, voice->Direct.Counter,
+                    voice->Direct.Hrtf.Params[0].Coeffs, voice->Direct.Hrtf.Params[0].Delay,
+                    voice->Direct.Hrtf.Params[0].CoeffStep, voice->Direct.Hrtf.Params[0].DelayStep
+                );
+                voice->Direct.Counter = counter;
+                voice->Direct.Hrtf.Gain = DryGain;
+                voice->Direct.Hrtf.Dir[0] = Position[0];
+                voice->Direct.Hrtf.Dir[1] = Position[1];
+                voice->Direct.Hrtf.Dir[2] = Position[2];
+            }
+        }
+        else
+        {
+            /* Get the initial (static) HRIR coefficients and delays. */
+            GetLerpedHrtfCoeffs(Device->Hrtf, ev, az, dirfact, DryGain,
+                                voice->Direct.Hrtf.Params[0].Coeffs,
+                                voice->Direct.Hrtf.Params[0].Delay);
+            voice->Direct.Counter = 0;
+            voice->Direct.Moving  = AL_TRUE;
+            voice->Direct.Hrtf.Gain = DryGain;
+            voice->Direct.Hrtf.Dir[0] = Position[0];
+            voice->Direct.Hrtf.Dir[1] = Position[1];
+            voice->Direct.Hrtf.Dir[2] = Position[2];
+        }
+        voice->Direct.Hrtf.IrSize = GetHrtfIrSize(Device->Hrtf);
+
+        voice->IsHrtf = AL_TRUE;
+    }
+    else
     {
         MixGains *gains = voice->Direct.Gains[0];
         ALfloat radius = ALSource->Radius;
@@ -1168,8 +1268,10 @@ ALvoid aluMixData(ALCdevice *device, ALvoid *buffer, ALsizei size)
             HrtfMixerFunc HrtfMix = SelectHrtfMixer();
             ALuint irsize = GetHrtfIrSize(device->Hrtf);
             for(c = 0;c < device->NumChannels;c++)
-                HrtfMix(&device->DryBuffer[outchanoffset], device->DryBuffer[c], device->Hrtf_Offset, irsize,
-                        &device->Hrtf_Params[c], &device->Hrtf_State[c], SamplesToDo);
+                HrtfMix(&device->DryBuffer[outchanoffset], device->DryBuffer[c], 0.0f,
+                    device->Hrtf_Offset, 0.0f, irsize, &device->Hrtf_Params[c],
+                    &device->Hrtf_State[c], SamplesToDo
+                );
             device->Hrtf_Offset += SamplesToDo;
         }
         else if(device->Bs2b)
diff --git a/Alc/hrtf.c b/Alc/hrtf.c
index 2e4156a0..1e371fa4 100644
--- a/Alc/hrtf.c
+++ b/Alc/hrtf.c
@@ -58,6 +58,10 @@ struct Hrtf {
 static const ALchar magicMarker00[8] = "MinPHR00";
 static const ALchar magicMarker01[8] = "MinPHR01";
 
+/* First value for pass-through coefficients (remaining are 0), used for omni-
+ * directional sounds. */
+static const ALfloat PassthruCoeff = 32767.0f * 0.707106781187f/*sqrt(0.5)*/;
+
 static struct Hrtf *LoadedHrtfs = NULL;
 
 /* Calculate the elevation indices given the polar elevation in radians.
@@ -84,12 +88,45 @@ static void CalcAzIndices(ALuint azcount, ALfloat az, ALuint *azidx, ALfloat *az
     *azmu = az - floorf(az);
 }
 
+/* Calculates the normalized HRTF transition factor (delta) from the changes
+ * in gain and listener to source angle between updates.  The result is a
+ * normalized delta factor that can be used to calculate moving HRIR stepping
+ * values.
+ */
+ALfloat CalcHrtfDelta(ALfloat oldGain, ALfloat newGain, const ALfloat olddir[3], const ALfloat newdir[3])
+{
+    ALfloat gainChange, angleChange, change;
+
+    // Calculate the normalized dB gain change.
+    newGain = maxf(newGain, 0.0001f);
+    oldGain = maxf(oldGain, 0.0001f);
+    gainChange = fabsf(log10f(newGain / oldGain) / log10f(0.0001f));
+
+    // Calculate the angle change only when there is enough gain to notice it.
+    angleChange = 0.0f;
+    if(gainChange > 0.0001f || newGain > 0.0001f)
+    {
+        // No angle change when the directions are equal or degenerate (when
+        // both have zero length).
+        if(newdir[0] != olddir[0] || newdir[1] != olddir[1] || newdir[2] != olddir[2])
+        {
+            ALfloat dotp = olddir[0]*newdir[0] + olddir[1]*newdir[1] + olddir[2]*newdir[2];
+            angleChange = acosf(clampf(dotp, -1.0f, 1.0f)) / F_PI;
+        }
+    }
+
+    // Use the largest of the two changes for the delta factor, and apply a
+    // significance shaping function to it.
+    change = maxf(angleChange * 25.0f, gainChange) * 2.0f;
+    return minf(change, 1.0f);
+}
+
 /* Calculates static HRIR coefficients and delays for the given polar
  * elevation and azimuth in radians.  Linear interpolation is used to
  * increase the apparent resolution of the HRIR data set.  The coefficients
  * are also normalized and attenuated by the specified gain.
  */
-void GetLerpedHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat azimuth, ALfloat (*coeffs)[2], ALuint *delays)
+void GetLerpedHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat azimuth, ALfloat dirfact, ALfloat gain, ALfloat (*coeffs)[2], ALuint *delays)
 {
     ALuint evidx[2], lidx[4], ridx[4];
     ALfloat mu[3], blend[4];
@@ -121,12 +158,12 @@ void GetLerpedHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat azi
     blend[3] = (     mu[1]) * (     mu[2]);
 
     /* Calculate the HRIR delays using linear interpolation. */
-    delays[0] = fastf2u(Hrtf->delays[lidx[0]]*blend[0] + Hrtf->delays[lidx[1]]*blend[1] +
-                        Hrtf->delays[lidx[2]]*blend[2] + Hrtf->delays[lidx[3]]*blend[3] +
-                        0.5f);
-    delays[1] = fastf2u(Hrtf->delays[ridx[0]]*blend[0] + Hrtf->delays[ridx[1]]*blend[1] +
-                        Hrtf->delays[ridx[2]]*blend[2] + Hrtf->delays[ridx[3]]*blend[3] +
-                        0.5f);
+    delays[0] = fastf2u((Hrtf->delays[lidx[0]]*blend[0] + Hrtf->delays[lidx[1]]*blend[1] +
+                         Hrtf->delays[lidx[2]]*blend[2] + Hrtf->delays[lidx[3]]*blend[3]) *
+                        dirfact + 0.5f) << HRTFDELAY_BITS;
+    delays[1] = fastf2u((Hrtf->delays[ridx[0]]*blend[0] + Hrtf->delays[ridx[1]]*blend[1] +
+                         Hrtf->delays[ridx[2]]*blend[2] + Hrtf->delays[ridx[3]]*blend[3]) *
+                        dirfact + 0.5f) << HRTFDELAY_BITS;
 
     /* Calculate the sample offsets for the HRIR indices. */
     lidx[0] *= Hrtf->irSize;
@@ -138,16 +175,173 @@ void GetLerpedHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat azi
     ridx[2] *= Hrtf->irSize;
     ridx[3] *= Hrtf->irSize;
 
-    for(i = 0;i < Hrtf->irSize;i++)
+    /* Calculate the normalized and attenuated HRIR coefficients using linear
+     * interpolation when there is enough gain to warrant it.  Zero the
+     * coefficients if gain is too low.
+     */
+    if(gain > 0.0001f)
     {
         ALfloat c;
+
+        i = 0;
         c = (Hrtf->coeffs[lidx[0]+i]*blend[0] + Hrtf->coeffs[lidx[1]+i]*blend[1] +
              Hrtf->coeffs[lidx[2]+i]*blend[2] + Hrtf->coeffs[lidx[3]+i]*blend[3]);
-        coeffs[i][0] = c * (1.0f/32767.0f);
+        coeffs[i][0] = lerp(PassthruCoeff, c, dirfact) * gain * (1.0f/32767.0f);
         c = (Hrtf->coeffs[ridx[0]+i]*blend[0] + Hrtf->coeffs[ridx[1]+i]*blend[1] +
              Hrtf->coeffs[ridx[2]+i]*blend[2] + Hrtf->coeffs[ridx[3]+i]*blend[3]);
-        coeffs[i][1] = c * (1.0f/32767.0f);
+        coeffs[i][1] = lerp(PassthruCoeff, c, dirfact) * gain * (1.0f/32767.0f);
+
+        for(i = 1;i < Hrtf->irSize;i++)
+        {
+            c = (Hrtf->coeffs[lidx[0]+i]*blend[0] + Hrtf->coeffs[lidx[1]+i]*blend[1] +
+                 Hrtf->coeffs[lidx[2]+i]*blend[2] + Hrtf->coeffs[lidx[3]+i]*blend[3]);
+            coeffs[i][0] = lerp(0.0f, c, dirfact) * gain * (1.0f/32767.0f);
+            c = (Hrtf->coeffs[ridx[0]+i]*blend[0] + Hrtf->coeffs[ridx[1]+i]*blend[1] +
+                 Hrtf->coeffs[ridx[2]+i]*blend[2] + Hrtf->coeffs[ridx[3]+i]*blend[3]);
+            coeffs[i][1] = lerp(0.0f, c, dirfact) * gain * (1.0f/32767.0f);
+        }
     }
+    else
+    {
+        for(i = 0;i < Hrtf->irSize;i++)
+        {
+            coeffs[i][0] = 0.0f;
+            coeffs[i][1] = 0.0f;
+        }
+    }
+}
+
+/* Calculates the moving HRIR target coefficients, target delays, and
+ * stepping values for the given polar elevation and azimuth in radians.
+ * Linear interpolation is used to increase the apparent resolution of the
+ * HRIR data set.  The coefficients are also normalized and attenuated by the
+ * specified gain.  Stepping resolution and count is determined using the
+ * given delta factor between 0.0 and 1.0.
+ */
+ALuint GetMovingHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat azimuth, ALfloat dirfact, ALfloat gain, ALfloat delta, ALint counter, ALfloat (*coeffs)[2], ALuint *delays, ALfloat (*coeffStep)[2], ALint *delayStep)
+{
+    ALuint evidx[2], lidx[4], ridx[4];
+    ALfloat mu[3], blend[4];
+    ALfloat left, right;
+    ALfloat step;
+    ALuint i;
+
+    /* Claculate elevation indices and interpolation factor. */
+    CalcEvIndices(Hrtf->evCount, elevation, evidx, &mu[2]);
+
+    for(i = 0;i < 2;i++)
+    {
+        ALuint azcount = Hrtf->azCount[evidx[i]];
+        ALuint evoffset = Hrtf->evOffset[evidx[i]];
+        ALuint azidx[2];
+
+        /* Calculate azimuth indices and interpolation factor for this elevation. */
+        CalcAzIndices(azcount, azimuth, azidx, &mu[i]);
+
+        /* Calculate a set of linear HRIR indices for left and right channels. */
+        lidx[i*2 + 0] = evoffset + azidx[0];
+        lidx[i*2 + 1] = evoffset + azidx[1];
+        ridx[i*2 + 0] = evoffset + ((azcount-azidx[0]) % azcount);
+        ridx[i*2 + 1] = evoffset + ((azcount-azidx[1]) % azcount);
+    }
+
+    // Calculate the stepping parameters.
+    delta = maxf(floorf(delta*(Hrtf->sampleRate*0.015f) + 0.5f), 1.0f);
+    step = 1.0f / delta;
+
+    /* Calculate 4 blending weights for 2D bilinear interpolation. */
+    blend[0] = (1.0f-mu[0]) * (1.0f-mu[2]);
+    blend[1] = (     mu[0]) * (1.0f-mu[2]);
+    blend[2] = (1.0f-mu[1]) * (     mu[2]);
+    blend[3] = (     mu[1]) * (     mu[2]);
+
+    /* Calculate the HRIR delays using linear interpolation.  Then calculate
+     * the delay stepping values using the target and previous running
+     * delays.
+     */
+    left = (ALfloat)(delays[0] - (delayStep[0] * counter));
+    right = (ALfloat)(delays[1] - (delayStep[1] * counter));
+
+    delays[0] = fastf2u((Hrtf->delays[lidx[0]]*blend[0] + Hrtf->delays[lidx[1]]*blend[1] +
+                         Hrtf->delays[lidx[2]]*blend[2] + Hrtf->delays[lidx[3]]*blend[3]) *
+                        dirfact + 0.5f) << HRTFDELAY_BITS;
+    delays[1] = fastf2u((Hrtf->delays[ridx[0]]*blend[0] + Hrtf->delays[ridx[1]]*blend[1] +
+                         Hrtf->delays[ridx[2]]*blend[2] + Hrtf->delays[ridx[3]]*blend[3]) *
+                        dirfact + 0.5f) << HRTFDELAY_BITS;
+
+    delayStep[0] = fastf2i(step * (delays[0] - left));
+    delayStep[1] = fastf2i(step * (delays[1] - right));
+
+    /* Calculate the sample offsets for the HRIR indices. */
+    lidx[0] *= Hrtf->irSize;
+    lidx[1] *= Hrtf->irSize;
+    lidx[2] *= Hrtf->irSize;
+    lidx[3] *= Hrtf->irSize;
+    ridx[0] *= Hrtf->irSize;
+    ridx[1] *= Hrtf->irSize;
+    ridx[2] *= Hrtf->irSize;
+    ridx[3] *= Hrtf->irSize;
+
+    /* Calculate the normalized and attenuated target HRIR coefficients using
+     * linear interpolation when there is enough gain to warrant it.  Zero
+     * the target coefficients if gain is too low.  Then calculate the
+     * coefficient stepping values using the target and previous running
+     * coefficients.
+     */
+    if(gain > 0.0001f)
+    {
+        ALfloat c;
+
+        i = 0;
+        left = coeffs[i][0] - (coeffStep[i][0] * counter);
+        right = coeffs[i][1] - (coeffStep[i][1] * counter);
+
+        c = (Hrtf->coeffs[lidx[0]+i]*blend[0] + Hrtf->coeffs[lidx[1]+i]*blend[1] +
+             Hrtf->coeffs[lidx[2]+i]*blend[2] + Hrtf->coeffs[lidx[3]+i]*blend[3]);
+        coeffs[i][0] = lerp(PassthruCoeff, c, dirfact) * gain * (1.0f/32767.0f);;
+        c = (Hrtf->coeffs[ridx[0]+i]*blend[0] + Hrtf->coeffs[ridx[1]+i]*blend[1] +
+             Hrtf->coeffs[ridx[2]+i]*blend[2] + Hrtf->coeffs[ridx[3]+i]*blend[3]);
+        coeffs[i][1] = lerp(PassthruCoeff, c, dirfact) * gain * (1.0f/32767.0f);;
+
+        coeffStep[i][0] = step * (coeffs[i][0] - left);
+        coeffStep[i][1] = step * (coeffs[i][1] - right);
+
+        for(i = 1;i < Hrtf->irSize;i++)
+        {
+            left = coeffs[i][0] - (coeffStep[i][0] * counter);
+            right = coeffs[i][1] - (coeffStep[i][1] * counter);
+
+            c = (Hrtf->coeffs[lidx[0]+i]*blend[0] + Hrtf->coeffs[lidx[1]+i]*blend[1] +
+                 Hrtf->coeffs[lidx[2]+i]*blend[2] + Hrtf->coeffs[lidx[3]+i]*blend[3]);
+            coeffs[i][0] = lerp(0.0f, c, dirfact) * gain * (1.0f/32767.0f);;
+            c = (Hrtf->coeffs[ridx[0]+i]*blend[0] + Hrtf->coeffs[ridx[1]+i]*blend[1] +
+                 Hrtf->coeffs[ridx[2]+i]*blend[2] + Hrtf->coeffs[ridx[3]+i]*blend[3]);
+            coeffs[i][1] = lerp(0.0f, c, dirfact) * gain * (1.0f/32767.0f);;
+
+            coeffStep[i][0] = step * (coeffs[i][0] - left);
+            coeffStep[i][1] = step * (coeffs[i][1] - right);
+        }
+    }
+    else
+    {
+        for(i = 0;i < Hrtf->irSize;i++)
+        {
+            left = coeffs[i][0] - (coeffStep[i][0] * counter);
+            right = coeffs[i][1] - (coeffStep[i][1] * counter);
+
+            coeffs[i][0] = 0.0f;
+            coeffs[i][1] = 0.0f;
+
+            coeffStep[i][0] = step * -left;
+            coeffStep[i][1] = step * -right;
+        }
+    }
+
+    /* The stepping count is the number of samples necessary for the HRIR to
+     * complete its transition.  The mixer will only apply stepping for this
+     * many samples.
+     */
+    return fastf2u(delta);
 }
 
 
diff --git a/Alc/hrtf.h b/Alc/hrtf.h
index 48636344..938bf552 100644
--- a/Alc/hrtf.h
+++ b/Alc/hrtf.h
@@ -21,6 +21,8 @@ ALCboolean FindHrtfFormat(enum DevFmtChannels *chans, ALCuint *srate);
 void FreeHrtfs(void);
 
 ALuint GetHrtfIrSize(const struct Hrtf *Hrtf);
-void GetLerpedHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat azimuth, ALfloat (*coeffs)[2], ALuint *delays);
+ALfloat CalcHrtfDelta(ALfloat oldGain, ALfloat newGain, const ALfloat olddir[3], const ALfloat newdir[3]);
+void GetLerpedHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat azimuth, ALfloat dirfact, ALfloat gain, ALfloat (*coeffs)[2], ALuint *delays);
+ALuint GetMovingHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat azimuth, ALfloat dirfact, ALfloat gain, ALfloat delta, ALint counter, ALfloat (*coeffs)[2], ALuint *delays, ALfloat (*coeffStep)[2], ALint *delayStep);
 
 #endif /* ALC_HRTF_H */
diff --git a/Alc/mixer.c b/Alc/mixer.c
index 4a98ee8f..3f80434e 100644
--- a/Alc/mixer.c
+++ b/Alc/mixer.c
@@ -41,6 +41,20 @@
 extern inline void InitiatePositionArrays(ALuint frac, ALuint increment, ALuint *frac_arr, ALuint *pos_arr, ALuint size);
 
 
+static inline HrtfMixerFunc SelectHrtfMixer(void)
+{
+#ifdef HAVE_SSE
+    if((CPUCapFlags&CPU_CAP_SSE))
+        return MixHrtf_SSE;
+#endif
+#ifdef HAVE_NEON
+    if((CPUCapFlags&CPU_CAP_NEON))
+        return MixHrtf_Neon;
+#endif
+
+    return MixHrtf_C;
+}
+
 static inline MixerFunc SelectMixer(void)
 {
 #ifdef HAVE_SSE
@@ -165,6 +179,7 @@ static const ALfloat *DoFilters(ALfilterState *lpfilter, ALfilterState *hpfilter
 ALvoid MixSource(ALvoice *voice, ALsource *Source, ALCdevice *Device, ALuint SamplesToDo)
 {
     MixerFunc Mix;
+    HrtfMixerFunc HrtfMix;
     ResamplerFunc Resample;
     ALbufferlistitem *BufferListItem;
     ALuint DataPosInt, DataPosFrac;
@@ -203,6 +218,7 @@ ALvoid MixSource(ALvoice *voice, ALsource *Source, ALCdevice *Device, ALuint Sam
     }
 
     Mix = SelectMixer();
+    HrtfMix = SelectHrtfMixer();
     Resample = ((increment == FRACTIONONE && DataPosFrac == 0) ?
                 Resample_copy32_C : SelectResampler(Resampler));
 
@@ -415,8 +431,13 @@ ALvoid MixSource(ALvoice *voice, ALsource *Source, ALCdevice *Device, ALuint Sam
                     Device->FilteredData, ResampledData, DstBufferSize,
                     parms->Filters[chan].ActiveType
                 );
-                Mix(samples, parms->OutChannels, parms->OutBuffer, parms->Gains[chan],
-                    parms->Counter, OutPos, DstBufferSize);
+                if(!voice->IsHrtf)
+                    Mix(samples, parms->OutChannels, parms->OutBuffer, parms->Gains[chan],
+                        parms->Counter, OutPos, DstBufferSize);
+                else
+                    HrtfMix(parms->OutBuffer, samples, parms->Counter, voice->Offset,
+                            OutPos, parms->Hrtf.IrSize, &parms->Hrtf.Params[chan],
+                            &parms->Hrtf.State[chan], DstBufferSize);
             }
 
             /* Only the first channel for B-Format buffers (W channel) goes to
diff --git a/Alc/mixer_c.c b/Alc/mixer_c.c
index caedd339..0fdcc087 100644
--- a/Alc/mixer_c.c
+++ b/Alc/mixer_c.c
@@ -59,6 +59,23 @@ void ALfilterState_processC(ALfilterState *filter, ALfloat *restrict dst, const
 }
 
 
+static inline void ApplyCoeffsStep(ALuint Offset, ALfloat (*restrict Values)[2],
+                                   const ALuint IrSize,
+                                   ALfloat (*restrict Coeffs)[2],
+                                   const ALfloat (*restrict CoeffStep)[2],
+                                   ALfloat left, ALfloat right)
+{
+    ALuint c;
+    for(c = 0;c < IrSize;c++)
+    {
+        const ALuint off = (Offset+c)&HRIR_MASK;
+        Values[off][0] += Coeffs[c][0] * left;
+        Values[off][1] += Coeffs[c][1] * right;
+        Coeffs[c][0] += CoeffStep[c][0];
+        Coeffs[c][1] += CoeffStep[c][1];
+    }
+}
+
 static inline void ApplyCoeffs(ALuint Offset, ALfloat (*restrict Values)[2],
                                const ALuint IrSize,
                                ALfloat (*restrict Coeffs)[2],
diff --git a/Alc/mixer_defs.h b/Alc/mixer_defs.h
index 62dad9dc..c1500ed2 100644
--- a/Alc/mixer_defs.h
+++ b/Alc/mixer_defs.h
@@ -20,15 +20,17 @@ const ALfloat *Resample_cubic32_C(const ALfloat *src, ALuint frac, ALuint increm
 
 /* C mixers */
 void MixHrtf_C(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data,
-               ALuint Offset, const ALuint IrSize, const struct HrtfParams *hrtfparams,
-               struct HrtfState *hrtfstate, ALuint BufferSize);
+               ALuint Counter, ALuint Offset, ALuint OutPos, const ALuint IrSize,
+               const struct HrtfParams *hrtfparams, struct HrtfState *hrtfstate,
+               ALuint BufferSize);
 void Mix_C(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE],
                  struct MixGains *Gains, ALuint Counter, ALuint OutPos, ALuint BufferSize);
 
 /* SSE mixers */
 void MixHrtf_SSE(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data,
-                 ALuint Offset, const ALuint IrSize, const struct HrtfParams *hrtfparams,
-                 struct HrtfState *hrtfstate, ALuint BufferSize);
+                 ALuint Counter, ALuint Offset, ALuint OutPos, const ALuint IrSize,
+                 const struct HrtfParams *hrtfparams, struct HrtfState *hrtfstate,
+                 ALuint BufferSize);
 void Mix_SSE(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE],
              struct MixGains *Gains, ALuint Counter, ALuint OutPos, ALuint BufferSize);
 
@@ -54,8 +56,9 @@ const ALfloat *Resample_lerp32_SSE41(const ALfloat *src, ALuint frac, ALuint inc
 
 /* Neon mixers */
 void MixHrtf_Neon(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data,
-                  ALuint Offset, const ALuint IrSize, const struct HrtfParams *hrtfparams,
-                  struct HrtfState *hrtfstate, ALuint BufferSize);
+                  ALuint Counter, ALuint Offset, ALuint OutPos, const ALuint IrSize,
+                  const struct HrtfParams *hrtfparams, struct HrtfState *hrtfstate,
+                  ALuint BufferSize);
 void Mix_Neon(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE],
               struct MixGains *Gains, ALuint Counter, ALuint OutPos, ALuint BufferSize);
 
diff --git a/Alc/mixer_inc.c b/Alc/mixer_inc.c
index 46ccec7d..b4635b43 100644
--- a/Alc/mixer_inc.c
+++ b/Alc/mixer_inc.c
@@ -14,6 +14,11 @@
 #define MixHrtf MERGE(MixHrtf_,SUFFIX)
 
 
+static inline void ApplyCoeffsStep(ALuint Offset, ALfloat (*restrict Values)[2],
+                                   const ALuint irSize,
+                                   ALfloat (*restrict Coeffs)[2],
+                                   const ALfloat (*restrict CoeffStep)[2],
+                                   ALfloat left, ALfloat right);
 static inline void ApplyCoeffs(ALuint Offset, ALfloat (*restrict Values)[2],
                                const ALuint irSize,
                                ALfloat (*restrict Coeffs)[2],
@@ -21,7 +26,7 @@ static inline void ApplyCoeffs(ALuint Offset, ALfloat (*restrict Values)[2],
 
 
 void MixHrtf(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data,
-             ALuint Offset, const ALuint IrSize,
+             ALuint Counter, ALuint Offset, ALuint OutPos, const ALuint IrSize,
              const HrtfParams *hrtfparams, HrtfState *hrtfstate, ALuint BufferSize)
 {
     alignas(16) ALfloat Coeffs[HRIR_LENGTH][2];
@@ -32,13 +37,39 @@ void MixHrtf(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data,
 
     for(c = 0;c < IrSize;c++)
     {
-        Coeffs[c][0] = hrtfparams->Coeffs[c][0];
-        Coeffs[c][1] = hrtfparams->Coeffs[c][1];
+        Coeffs[c][0] = hrtfparams->Coeffs[c][0] - (hrtfparams->CoeffStep[c][0]*Counter);
+        Coeffs[c][1] = hrtfparams->Coeffs[c][1] - (hrtfparams->CoeffStep[c][1]*Counter);
     }
-    Delay[0] = hrtfparams->Delay[0];
-    Delay[1] = hrtfparams->Delay[1];
+    Delay[0] = hrtfparams->Delay[0] - (hrtfparams->DelayStep[0]*Counter);
+    Delay[1] = hrtfparams->Delay[1] - (hrtfparams->DelayStep[1]*Counter);
 
-    for(pos = 0;pos < BufferSize;pos++)
+    pos = 0;
+    for(;pos < BufferSize && pos < Counter;pos++)
+    {
+        hrtfstate->History[Offset&HRTF_HISTORY_MASK] = data[pos];
+        left  = lerp(hrtfstate->History[(Offset-(Delay[0]>>HRTFDELAY_BITS))&HRTF_HISTORY_MASK],
+                     hrtfstate->History[(Offset-(Delay[0]>>HRTFDELAY_BITS)-1)&HRTF_HISTORY_MASK],
+                     (Delay[0]&HRTFDELAY_MASK)*(1.0f/HRTFDELAY_FRACONE));
+        right = lerp(hrtfstate->History[(Offset-(Delay[1]>>HRTFDELAY_BITS))&HRTF_HISTORY_MASK],
+                     hrtfstate->History[(Offset-(Delay[1]>>HRTFDELAY_BITS)-1)&HRTF_HISTORY_MASK],
+                     (Delay[1]&HRTFDELAY_MASK)*(1.0f/HRTFDELAY_FRACONE));
+
+        Delay[0] += hrtfparams->DelayStep[0];
+        Delay[1] += hrtfparams->DelayStep[1];
+
+        hrtfstate->Values[(Offset+IrSize)&HRIR_MASK][0] = 0.0f;
+        hrtfstate->Values[(Offset+IrSize)&HRIR_MASK][1] = 0.0f;
+        Offset++;
+
+        ApplyCoeffsStep(Offset, hrtfstate->Values, IrSize, Coeffs, hrtfparams->CoeffStep, left, right);
+        OutBuffer[0][OutPos] += hrtfstate->Values[Offset&HRIR_MASK][0];
+        OutBuffer[1][OutPos] += hrtfstate->Values[Offset&HRIR_MASK][1];
+        OutPos++;
+    }
+
+    Delay[0] >>= HRTFDELAY_BITS;
+    Delay[1] >>= HRTFDELAY_BITS;
+    for(;pos < BufferSize;pos++)
     {
         hrtfstate->History[Offset&HRTF_HISTORY_MASK] = data[pos];
         left = hrtfstate->History[(Offset-Delay[0])&HRTF_HISTORY_MASK];
@@ -49,8 +80,9 @@ void MixHrtf(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data,
         Offset++;
 
         ApplyCoeffs(Offset, hrtfstate->Values, IrSize, Coeffs, left, right);
-        OutBuffer[0][pos] += hrtfstate->Values[Offset&HRIR_MASK][0];
-        OutBuffer[1][pos] += hrtfstate->Values[Offset&HRIR_MASK][1];
+        OutBuffer[0][OutPos] += hrtfstate->Values[Offset&HRIR_MASK][0];
+        OutBuffer[1][OutPos] += hrtfstate->Values[Offset&HRIR_MASK][1];
+        OutPos++;
     }
 }
 
diff --git a/Alc/mixer_neon.c b/Alc/mixer_neon.c
index 413bd627..8ce17644 100644
--- a/Alc/mixer_neon.c
+++ b/Alc/mixer_neon.c
@@ -9,6 +9,38 @@
 #include "hrtf.h"
 
 
+static inline void ApplyCoeffsStep(ALuint Offset, ALfloat (*restrict Values)[2],
+                                   const ALuint IrSize,
+                                   ALfloat (*restrict Coeffs)[2],
+                                   const ALfloat (*restrict CoeffStep)[2],
+                                   ALfloat left, ALfloat right)
+{
+    ALuint c;
+    float32x4_t leftright4;
+    {
+        float32x2_t leftright2 = vdup_n_f32(0.0);
+        leftright2 = vset_lane_f32(left, leftright2, 0);
+        leftright2 = vset_lane_f32(right, leftright2, 1);
+        leftright4 = vcombine_f32(leftright2, leftright2);
+    }
+    for(c = 0;c < IrSize;c += 2)
+    {
+        const ALuint o0 = (Offset+c)&HRIR_MASK;
+        const ALuint o1 = (o0+1)&HRIR_MASK;
+        float32x4_t vals = vcombine_f32(vld1_f32((float32_t*)&Values[o0][0]),
+                                        vld1_f32((float32_t*)&Values[o1][0]));
+        float32x4_t coefs = vld1q_f32((float32_t*)&Coeffs[c][0]);
+        float32x4_t deltas = vld1q_f32(&CoeffStep[c][0]);
+
+        vals = vmlaq_f32(vals, coefs, leftright4);
+        coefs = vaddq_f32(coefs, deltas);
+
+        vst1_f32((float32_t*)&Values[o0][0], vget_low_f32(vals));
+        vst1_f32((float32_t*)&Values[o1][0], vget_high_f32(vals));
+        vst1q_f32(&Coeffs[c][0], coefs);
+    }
+}
+
 static inline void ApplyCoeffs(ALuint Offset, ALfloat (*restrict Values)[2],
                                const ALuint IrSize,
                                ALfloat (*restrict Coeffs)[2],
diff --git a/Alc/mixer_sse.c b/Alc/mixer_sse.c
index d0dca40e..d86cf749 100644
--- a/Alc/mixer_sse.c
+++ b/Alc/mixer_sse.c
@@ -19,6 +19,68 @@
 #include "mixer_defs.h"
 
 
+static inline void ApplyCoeffsStep(ALuint Offset, ALfloat (*restrict Values)[2],
+                                   const ALuint IrSize,
+                                   ALfloat (*restrict Coeffs)[2],
+                                   const ALfloat (*restrict CoeffStep)[2],
+                                   ALfloat left, ALfloat right)
+{
+    const __m128 lrlr = _mm_setr_ps(left, right, left, right);
+    __m128 coeffs, deltas, imp0, imp1;
+    __m128 vals = _mm_setzero_ps();
+    ALuint i;
+
+    if((Offset&1))
+    {
+        const ALuint o0 = Offset&HRIR_MASK;
+        const ALuint o1 = (Offset+IrSize-1)&HRIR_MASK;
+
+        coeffs = _mm_load_ps(&Coeffs[0][0]);
+        deltas = _mm_load_ps(&CoeffStep[0][0]);
+        vals = _mm_loadl_pi(vals, (__m64*)&Values[o0][0]);
+        imp0 = _mm_mul_ps(lrlr, coeffs);
+        coeffs = _mm_add_ps(coeffs, deltas);
+        vals = _mm_add_ps(imp0, vals);
+        _mm_store_ps(&Coeffs[0][0], coeffs);
+        _mm_storel_pi((__m64*)&Values[o0][0], vals);
+        for(i = 1;i < IrSize-1;i += 2)
+        {
+            const ALuint o2 = (Offset+i)&HRIR_MASK;
+
+            coeffs = _mm_load_ps(&Coeffs[i+1][0]);
+            deltas = _mm_load_ps(&CoeffStep[i+1][0]);
+            vals = _mm_load_ps(&Values[o2][0]);
+            imp1 = _mm_mul_ps(lrlr, coeffs);
+            coeffs = _mm_add_ps(coeffs, deltas);
+            imp0 = _mm_shuffle_ps(imp0, imp1, _MM_SHUFFLE(1, 0, 3, 2));
+            vals = _mm_add_ps(imp0, vals);
+            _mm_store_ps(&Coeffs[i+1][0], coeffs);
+            _mm_store_ps(&Values[o2][0], vals);
+            imp0 = imp1;
+        }
+        vals = _mm_loadl_pi(vals, (__m64*)&Values[o1][0]);
+        imp0 = _mm_movehl_ps(imp0, imp0);
+        vals = _mm_add_ps(imp0, vals);
+        _mm_storel_pi((__m64*)&Values[o1][0], vals);
+    }
+    else
+    {
+        for(i = 0;i < IrSize;i += 2)
+        {
+            const ALuint o = (Offset + i)&HRIR_MASK;
+
+            coeffs = _mm_load_ps(&Coeffs[i][0]);
+            deltas = _mm_load_ps(&CoeffStep[i][0]);
+            vals = _mm_load_ps(&Values[o][0]);
+            imp0 = _mm_mul_ps(lrlr, coeffs);
+            coeffs = _mm_add_ps(coeffs, deltas);
+            vals = _mm_add_ps(imp0, vals);
+            _mm_store_ps(&Coeffs[i][0], coeffs);
+            _mm_store_ps(&Values[o][0], vals);
+        }
+    }
+}
+
 static inline void ApplyCoeffs(ALuint Offset, ALfloat (*restrict Values)[2],
                                const ALuint IrSize,
                                ALfloat (*restrict Coeffs)[2],
diff --git a/Alc/panning.c b/Alc/panning.c
index 64be6f4b..81398b1b 100644
--- a/Alc/panning.c
+++ b/Alc/panning.c
@@ -387,8 +387,10 @@ ALvoid aluInitPanning(ALCdevice *device)
             device->ChannelName[i] = VirtualChans[i].channel;
         SetChannelMap(device, chanmap, count);
         for(i = 0;i < count;i++)
-            GetLerpedHrtfCoeffs(device->Hrtf, VirtualChans[i].elevation, VirtualChans[i].angle,
-                                device->Hrtf_Params[i].Coeffs, device->Hrtf_Params[i].Delay);
+            GetLerpedHrtfCoeffs(
+                device->Hrtf, VirtualChans[i].elevation, VirtualChans[i].angle, 1.0f, 1.0f,
+                device->Hrtf_Params[i].Coeffs, device->Hrtf_Params[i].Delay
+            );
 
         return;
     }
diff --git a/OpenAL32/Include/alMain.h b/OpenAL32/Include/alMain.h
index 56df4db3..1afeb1e6 100644
--- a/OpenAL32/Include/alMain.h
+++ b/OpenAL32/Include/alMain.h
@@ -617,7 +617,9 @@ typedef struct HrtfState {
 
 typedef struct HrtfParams {
     alignas(16) ALfloat Coeffs[HRIR_LENGTH][2];
+    alignas(16) ALfloat CoeffStep[HRIR_LENGTH][2];
     ALuint Delay[2];
+    ALint DelayStep[2];
 } HrtfParams;
 
 
diff --git a/OpenAL32/Include/alu.h b/OpenAL32/Include/alu.h
index 56c37fe8..0462fda8 100644
--- a/OpenAL32/Include/alu.h
+++ b/OpenAL32/Include/alu.h
@@ -71,6 +71,13 @@ typedef struct DirectParams {
         ALfilterState HighPass;
     } Filters[MAX_INPUT_CHANNELS];
 
+    struct {
+        HrtfParams Params[MAX_INPUT_CHANNELS];
+        HrtfState State[MAX_INPUT_CHANNELS];
+        ALuint IrSize;
+        ALfloat Gain;
+        ALfloat Dir[3];
+    } Hrtf;
     MixGains Gains[MAX_INPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
 } DirectParams;
 
@@ -99,7 +106,8 @@ typedef void (*MixerFunc)(const ALfloat *data, ALuint OutChans,
                           ALfloat (*restrict OutBuffer)[BUFFERSIZE], struct MixGains *Gains,
                           ALuint Counter, ALuint OutPos, ALuint BufferSize);
 typedef void (*HrtfMixerFunc)(ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat *data,
-                              ALuint Offset, const ALuint IrSize, const HrtfParams *hrtfparams,
+                              ALuint Counter, ALuint Offset, ALuint OutPos,
+                              const ALuint IrSize, const HrtfParams *hrtfparams,
                               HrtfState *hrtfstate, ALuint BufferSize);
 
 
diff --git a/OpenAL32/alSource.c b/OpenAL32/alSource.c
index 12bd9436..be3768f3 100644
--- a/OpenAL32/alSource.c
+++ b/OpenAL32/alSource.c
@@ -2599,6 +2599,17 @@ ALvoid SetSourceState(ALsource *Source, ALCcontext *Context, ALenum state)
 
         voice->Direct.Moving  = AL_FALSE;
         voice->Direct.Counter = 0;
+        for(i = 0;i < MAX_INPUT_CHANNELS;i++)
+        {
+            ALsizei j;
+            for(j = 0;j < HRTF_HISTORY_LENGTH;j++)
+                voice->Direct.Hrtf.State[i].History[j] = 0.0f;
+            for(j = 0;j < HRIR_LENGTH;j++)
+            {
+                voice->Direct.Hrtf.State[i].Values[j][0] = 0.0f;
+                voice->Direct.Hrtf.State[i].Values[j][1] = 0.0f;
+            }
+        }
         for(i = 0;i < (ALsizei)device->NumAuxSends;i++)
         {
             voice->Send[i].Moving  = AL_FALSE;