From 4f92bd59387bd2478487ebd312a045f54747e4a2 Mon Sep 17 00:00:00 2001 From: Chris Robinson Date: Sun, 5 Aug 2018 20:29:03 -0700 Subject: Apply the vector all-pass separate on uninterleaved samples With the vector all-pass applied in a self-contained function, the individual steps of the early and late reverb stages can be better optimized with tighter loops. That allows for more data to be held local, resulting in less thrashing from reloading the same values multiple times. There is room for further improvement, depending on the length of the early delay lines and all-pass delay lines allowing for bulk reads. --- Alc/effects/reverb.c | 469 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 310 insertions(+), 159 deletions(-) (limited to 'Alc/effects') diff --git a/Alc/effects/reverb.c b/Alc/effects/reverb.c index 7ef95cf3..a14b6926 100644 --- a/Alc/effects/reverb.c +++ b/Alc/effects/reverb.c @@ -1252,9 +1252,9 @@ static inline ALfloat FadedDelayLineOut(const DelayLineI *Delay, const ALsizei o return Delay->Line[off0&Delay->Mask][c]*(1.0f-mu) + Delay->Line[off1&Delay->Mask][c]*( mu); } -#define UnfadedDelayLineOut(d, o0, o1, c, mu) DelayLineOut(d, o0, c) -static inline ALvoid DelayLineIn(DelayLineI *Delay, ALsizei offset, const ALsizei c, + +static inline ALvoid DelayLineIn(const DelayLineI *Delay, ALsizei offset, const ALsizei c, const ALfloat *restrict in, ALsizei count) { ALsizei i; @@ -1262,15 +1262,7 @@ static inline ALvoid DelayLineIn(DelayLineI *Delay, ALsizei offset, const ALsize Delay->Line[(offset++)&Delay->Mask][c] = *(in++); } -static inline ALvoid DelayLineIn4(DelayLineI *Delay, ALsizei offset, const ALfloat in[NUM_LINES]) -{ - ALsizei i; - offset &= Delay->Mask; - for(i = 0;i < NUM_LINES;i++) - Delay->Line[offset][i] = in[i]; -} - -static inline ALvoid DelayLineIn4Rev(DelayLineI *Delay, ALsizei offset, const ALfloat in[NUM_LINES]) +static inline ALvoid DelayLineIn4Rev(const DelayLineI *Delay, ALsizei offset, const ALfloat in[NUM_LINES]) { ALsizei i; offset &= Delay->Mask; @@ -1324,6 +1316,8 @@ static inline void VectorPartialScatter(ALfloat *restrict out, const ALfloat *re out[2] = xCoeff*in[2] + yCoeff*( in[0] + -in[1] + in[3]); out[3] = xCoeff*in[3] + yCoeff*(-in[0] + -in[1] + -in[2] ); } +#define VectorScatterDelayIn(delay, o, in, xcoeff, ycoeff) \ + VectorPartialScatter((delay)->Line[(o)&(delay)->Mask], in, xcoeff, ycoeff) /* Same as above, but reverses the input. */ static inline void VectorPartialScatterRev(ALfloat *restrict out, const ALfloat *restrict in, @@ -1334,6 +1328,8 @@ static inline void VectorPartialScatterRev(ALfloat *restrict out, const ALfloat out[2] = xCoeff*in[1] + yCoeff*(in[0] + -in[2] + in[3]); out[3] = xCoeff*in[0] + yCoeff*( -in[1] + -in[2] + -in[3]); } +#define VectorScatterRevDelayIn(delay, o, in, xcoeff, ycoeff) \ + VectorPartialScatterRev((delay)->Line[(o)&(delay)->Mask], in, xcoeff, ycoeff) /* This applies a Gerzon multiple-in/multiple-out (MIMO) vector all-pass * filter to the 4-line input. @@ -1345,34 +1341,72 @@ static inline void VectorPartialScatterRev(ALfloat *restrict out, const ALfloat * Two static specializations are used for transitional (cross-faded) delay * line processing and non-transitional processing. */ -#define DECL_TEMPLATE(T) \ -static void VectorAllpass_##T(ALfloat *restrict out, \ - const ALfloat *restrict in, \ - const ALsizei offset, const ALfloat feedCoeff, \ - const ALfloat xCoeff, const ALfloat yCoeff, \ - const ALfloat mu, VecAllpass *Vap) \ -{ \ - ALfloat f[NUM_LINES], fs[NUM_LINES]; \ - ALfloat input; \ - ALsizei i; \ - \ - (void)mu; /* Ignore for Unfaded. */ \ - \ - for(i = 0;i < NUM_LINES;i++) \ - { \ - input = in[i]; \ - out[i] = T##DelayLineOut(&Vap->Delay, offset-Vap->Offset[i][0], \ - offset-Vap->Offset[i][1], i, mu) - \ - feedCoeff*input; \ - f[i] = input + feedCoeff*out[i]; \ - } \ - VectorPartialScatter(fs, f, xCoeff, yCoeff); \ - \ - DelayLineIn4(&Vap->Delay, offset, fs); \ +static void VectorAllpass_Unfaded(ALfloat (*restrict samples)[MAX_UPDATE_SAMPLES], + ALsizei offset, const ALfloat feedCoeff, + const ALfloat xCoeff, const ALfloat yCoeff, + ALsizei todo, VecAllpass *Vap) +{ + const DelayLineI delay = Vap->Delay; + ALsizei vap_offset[NUM_LINES]; + ALsizei i, j; + + ASSUME(todo > 0); + + for(j = 0;j < NUM_LINES;j++) + vap_offset[j] = offset-Vap->Offset[j][0]; + for(i = 0;i < todo;i++) + { + ALfloat f[NUM_LINES]; + + for(j = 0;j < NUM_LINES;j++) + { + ALfloat input = samples[j][i]; + ALfloat out = DelayLineOut(&delay, vap_offset[j]++, j) - feedCoeff*input; + f[j] = input + feedCoeff*out; + + samples[j][i] = out; + } + + VectorScatterDelayIn(&delay, offset, f, xCoeff, yCoeff); + ++offset; + } +} +static void VectorAllpass_Faded(ALfloat (*restrict samples)[MAX_UPDATE_SAMPLES], + ALsizei offset, const ALfloat feedCoeff, + const ALfloat xCoeff, const ALfloat yCoeff, + ALfloat fade, ALsizei todo, VecAllpass *Vap) +{ + const DelayLineI delay = Vap->Delay; + ALsizei vap_offset[NUM_LINES][2]; + ALsizei i, j; + + ASSUME(todo > 0); + + for(j = 0;j < NUM_LINES;j++) + { + vap_offset[j][0] = offset-Vap->Offset[j][0]; + vap_offset[j][1] = offset-Vap->Offset[j][1]; + } + for(i = 0;i < todo;i++) + { + ALfloat f[NUM_LINES]; + + for(j = 0;j < NUM_LINES;j++) + { + ALfloat input = samples[j][i]; + ALfloat out = + FadedDelayLineOut(&delay, vap_offset[j][0]++, vap_offset[j][1]++, j, fade) - + feedCoeff*input; + f[j] = input + feedCoeff*out; + + samples[j][i] = out; + } + fade += FadeStep; + + VectorScatterDelayIn(&delay, offset, f, xCoeff, yCoeff); + ++offset; + } } -DECL_TEMPLATE(Unfaded) -DECL_TEMPLATE(Faded) -#undef DECL_TEMPLATE /* This generates early reflections. * @@ -1393,51 +1427,128 @@ DECL_TEMPLATE(Faded) * Two static specializations are used for transitional (cross-faded) delay * line processing and non-transitional processing. */ -#define DECL_TEMPLATE(T) \ -static void EarlyReflection_##T(ALreverbState *State, const ALsizei todo, \ - ALfloat fade, \ - ALfloat (*restrict out)[MAX_UPDATE_SAMPLES]) \ -{ \ - ALsizei offset = State->Offset; \ - const ALfloat apFeedCoeff = State->ApFeedCoeff; \ - const ALfloat mixX = State->MixX; \ - const ALfloat mixY = State->MixY; \ - ALfloat f[NUM_LINES], fr[NUM_LINES]; \ - ALsizei i, j; \ - \ - for(i = 0;i < todo;i++) \ - { \ - for(j = 0;j < NUM_LINES;j++) \ - fr[j] = T##DelayLineOut(&State->Delay, \ - offset-State->EarlyDelayTap[j][0], \ - offset-State->EarlyDelayTap[j][1], j, fade \ - ) * State->EarlyDelayCoeff[j]; \ - \ - VectorAllpass_##T(f, fr, offset, apFeedCoeff, mixX, mixY, fade, \ - &State->Early.VecAp); \ - \ - DelayLineIn4Rev(&State->Early.Delay, offset, f); \ - \ - for(j = 0;j < NUM_LINES;j++) \ - f[j] += T##DelayLineOut(&State->Early.Delay, \ - offset-State->Early.Offset[j][0], \ - offset-State->Early.Offset[j][1], j, fade \ - ) * State->Early.Coeff[j]; \ - \ - for(j = 0;j < NUM_LINES;j++) \ - out[j][i] = f[j]; \ - \ - VectorPartialScatterRev(fr, f, mixX, mixY); \ - \ - DelayLineIn4(&State->Delay, offset-State->LateFeedTap, fr); \ - \ - offset++; \ - fade += FadeStep; \ - } \ +static void EarlyReflection_Unfaded(ALreverbState *State, const ALsizei todo, + ALfloat (*restrict out)[MAX_UPDATE_SAMPLES]) +{ + ALfloat (*restrict temps)[MAX_UPDATE_SAMPLES] = State->TempSamples; + const DelayLineI early_delay = State->Early.Delay; + const DelayLineI main_delay = State->Delay; + ALsizei early_feedb_tap[NUM_LINES]; + ALfloat early_feedb_coeff[NUM_LINES]; + const ALfloat mixX = State->MixX; + const ALfloat mixY = State->MixY; + ALsizei offset = State->Offset; + ALsizei late_feed_tap; + ALsizei i, j; + + ASSUME(todo > 0); + + /* First, load decorrelated samples from the main delay line as the primary + * reflections. + */ + for(j = 0;j < NUM_LINES;j++) + { + ALsizei early_delay_tap = offset - State->EarlyDelayTap[j][0]; + ALfloat coeff = State->EarlyDelayCoeff[j]; + for(i = 0;i < todo;i++) + temps[j][i] = DelayLineOut(&main_delay, early_delay_tap++, j) * coeff; + } + + /* Apply a vector all-pass, to help color the initial reflections based on + * the diffusion strength. + */ + VectorAllpass_Unfaded(temps, offset, State->ApFeedCoeff, mixX, mixY, todo, + &State->Early.VecAp); + + for(j = 0;j < NUM_LINES;j++) + { + early_feedb_tap[j] = offset - State->Early.Offset[j][0]; + early_feedb_coeff[j] = State->Early.Coeff[j]; + } + late_feed_tap = offset - State->LateFeedTap; + for(i = 0;i < todo;i++) + { + ALfloat f[NUM_LINES]; + + for(j = 0;j < NUM_LINES;j++) + f[j] = temps[j][i]; + + /* Apply a delay and bounce to generate secondary reflections, combine + * with the primary reflections and write out the result for mixing. + */ + DelayLineIn4Rev(&early_delay, offset, f); + for(j = 0;j < NUM_LINES;j++) + { + f[j] += DelayLineOut(&early_delay, early_feedb_tap[j]++, j) * early_feedb_coeff[j]; + out[j][i] = f[j]; + } + + /* Also write the result back to the main delay line for the late + * reverb stage to pick up at the appropriate time, appplying a scatter + * and bounce to improve the initial diffusion in the late reverb. + */ + VectorScatterRevDelayIn(&main_delay, late_feed_tap++, f, mixX, mixY); + offset++; + } +} +static void EarlyReflection_Faded(ALreverbState *State, const ALsizei todo, ALfloat fade, + ALfloat (*restrict out)[MAX_UPDATE_SAMPLES]) +{ + ALfloat (*restrict temps)[MAX_UPDATE_SAMPLES] = State->TempSamples; + const DelayLineI early_delay = State->Early.Delay; + const DelayLineI main_delay = State->Delay; + ALsizei early_feedb_tap[NUM_LINES][2]; + ALfloat early_feedb_coeff[NUM_LINES]; + const ALfloat mixX = State->MixX; + const ALfloat mixY = State->MixY; + ALsizei offset = State->Offset; + ALsizei late_feed_tap; + ALsizei i, j; + + ASSUME(todo > 0); + + for(j = 0;j < NUM_LINES;j++) + { + ALsizei early_delay_tap0 = offset - State->EarlyDelayTap[j][0]; + ALsizei early_delay_tap1 = offset - State->EarlyDelayTap[j][1]; + ALfloat coeff = State->EarlyDelayCoeff[j]; + for(i = 0;i < todo;i++) + temps[j][i] = FadedDelayLineOut(&main_delay, + early_delay_tap0++, early_delay_tap1++, j, fade + ) * coeff; + } + + VectorAllpass_Faded(temps, offset, State->ApFeedCoeff, mixX, mixY, fade, todo, + &State->Early.VecAp); + + for(j = 0;j < NUM_LINES;j++) + { + early_feedb_tap[j][0] = offset - State->Early.Offset[j][0]; + early_feedb_tap[j][1] = offset - State->Early.Offset[j][1]; + early_feedb_coeff[j] = State->Early.Coeff[j]; + } + late_feed_tap = offset - State->LateFeedTap; + for(i = 0;i < todo;i++) + { + ALfloat f[NUM_LINES]; + + for(j = 0;j < NUM_LINES;j++) + f[j] = temps[j][i]; + + DelayLineIn4Rev(&early_delay, offset, f); + for(j = 0;j < NUM_LINES;j++) + { + f[j] += FadedDelayLineOut(&early_delay, + early_feedb_tap[j][0]++, early_feedb_tap[j][1]++, j, fade + ) * early_feedb_coeff[j]; + out[j][i] = f[j]; + } + fade += FadeStep; + + VectorScatterRevDelayIn(&main_delay, late_feed_tap++, f, mixX, mixY); + offset++; + } } -DECL_TEMPLATE(Unfaded) -DECL_TEMPLATE(Faded) -#undef DECL_TEMPLATE /* Applies the two T60 damping filter sections. */ static inline void LateT60Filter(ALfloat *restrict samples, const ALsizei todo, T60Filter *filter) @@ -1452,6 +1563,8 @@ static inline void LateT60Filter(ALfloat *restrict samples, const ALsizei todo, ALfloat lfz = filter->LFState; ALsizei i; + ASSUME(todo > 0); + for(i = 0;i < todo;i++) { ALfloat in = samples[i]; @@ -1471,8 +1584,8 @@ static inline void LateT60Filter(ALfloat *restrict samples, const ALsizei todo, /* This generates the reverb tail using a modified feed-back delay network * (FDN). * - * Results from the early reflections are attenuated by the density gain and - * mixed with the output from the late delay lines. + * Results from the early reflections are mixed with the output from the late + * delay lines. * * The late response is then completed by T60 and all-pass filtering the mix. * @@ -1482,61 +1595,99 @@ static inline void LateT60Filter(ALfloat *restrict samples, const ALsizei todo, * Two variations are made, one for for transitional (cross-faded) delay line * processing and one for non-transitional processing. */ -#define DECL_TEMPLATE(T) \ -static void LateReverb_##T(ALreverbState *State, const ALsizei todo, \ - ALfloat fade, \ - ALfloat (*restrict out)[MAX_UPDATE_SAMPLES]) \ -{ \ - ALfloat (*restrict temps)[MAX_UPDATE_SAMPLES] = State->TempSamples; \ - const ALfloat apFeedCoeff = State->ApFeedCoeff; \ - const ALfloat mixX = State->MixX; \ - const ALfloat mixY = State->MixY; \ - ALsizei offset; \ - ALsizei i, j; \ - \ - for(j = 0;j < NUM_LINES;j++) \ - { \ - ALfloat fader = fade; \ - offset = State->Offset; \ - for(i = 0;i < todo;i++) \ - { \ - temps[j][i] = T##DelayLineOut(&State->Delay, \ - offset - State->LateDelayTap[j][0], \ - offset - State->LateDelayTap[j][1], j, fader \ - ) + T##DelayLineOut(&State->Late.Delay, \ - offset - State->Late.Offset[j][0], \ - offset - State->Late.Offset[j][1], j, fader \ - ); \ - ++offset; \ - fader += FadeStep; \ - } \ - LateT60Filter(temps[j], todo, &State->Late.T60[j]); \ - } \ - \ - offset = State->Offset; \ - for(i = 0;i < todo;i++) \ - { \ - ALfloat f[NUM_LINES], fr[NUM_LINES]; \ - for(j = 0;j < NUM_LINES;j++) \ - fr[j] = temps[j][i]; \ - \ - VectorAllpass_##T(f, fr, offset, apFeedCoeff, mixX, mixY, fade, \ - &State->Late.VecAp); \ - \ - for(j = 0;j < NUM_LINES;j++) \ - out[j][i] = f[j]; \ - \ - VectorPartialScatterRev(fr, f, mixX, mixY); \ - \ - DelayLineIn4(&State->Late.Delay, offset, fr); \ - \ - offset++; \ - fade += FadeStep; \ - } \ +static void LateReverb_Unfaded(ALreverbState *State, const ALsizei todo, + ALfloat (*restrict out)[MAX_UPDATE_SAMPLES]) +{ + ALfloat (*restrict temps)[MAX_UPDATE_SAMPLES] = State->TempSamples; + const DelayLineI late_delay = State->Late.Delay; + const DelayLineI main_delay = State->Delay; + const ALfloat mixX = State->MixX; + const ALfloat mixY = State->MixY; + ALsizei offset = State->Offset; + ALsizei i, j; + + ASSUME(todo > 0); + + /* First, load decorrelated samples from the main and feedback delay lines. + * Filter the signal to apply its frequency-dependent decay. + */ + for(j = 0;j < NUM_LINES;j++) + { + ALsizei late_delay_tap = offset - State->LateDelayTap[j][0]; + ALsizei late_feedb_tap = offset - State->Late.Offset[j][0]; + for(i = 0;i < todo;i++) + temps[j][i] = DelayLineOut(&main_delay, late_delay_tap++, j) + + DelayLineOut(&late_delay, late_feedb_tap++, j); + LateT60Filter(temps[j], todo, &State->Late.T60[j]); + } + + /* Apply a vector all-pass to improve micro-surface diffusion, and write + * out the results for mixing. + */ + VectorAllpass_Unfaded(temps, offset, State->ApFeedCoeff, mixX, mixY, todo, &State->Late.VecAp); + + for(j = 0;j < NUM_LINES;j++) + memcpy(out[j], temps[j], todo*sizeof(ALfloat)); + + for(i = 0;i < todo;i++) + { + ALfloat f[NUM_LINES]; + for(j = 0;j < NUM_LINES;j++) + f[j] = temps[j][i]; + + /* Finally, scatter and bounce the results to refeed the feedback + * buffer. + */ + VectorScatterRevDelayIn(&late_delay, offset, f, mixX, mixY); + offset++; + } +} +static void LateReverb_Faded(ALreverbState *State, const ALsizei todo, ALfloat fade, + ALfloat (*restrict out)[MAX_UPDATE_SAMPLES]) +{ + ALfloat (*restrict temps)[MAX_UPDATE_SAMPLES] = State->TempSamples; + const DelayLineI late_delay = State->Late.Delay; + const DelayLineI main_delay = State->Delay; + const ALfloat mixX = State->MixX; + const ALfloat mixY = State->MixY; + ALsizei offset = State->Offset; + ALsizei i, j; + + ASSUME(todo > 0); + + for(j = 0;j < NUM_LINES;j++) + { + ALsizei late_delay_tap0 = offset - State->LateDelayTap[j][0]; + ALsizei late_delay_tap1 = offset - State->LateDelayTap[j][1]; + ALsizei late_feedb_tap0 = offset - State->Late.Offset[j][0]; + ALsizei late_feedb_tap1 = offset - State->Late.Offset[j][1]; + ALfloat fader = fade; + for(i = 0;i < todo;i++) + { + temps[j][i] = + FadedDelayLineOut(&main_delay, late_delay_tap0++, late_delay_tap1++, j, fader) + + FadedDelayLineOut(&late_delay, late_feedb_tap0++, late_feedb_tap1++, j, fader); + fader += FadeStep; + } + LateT60Filter(temps[j], todo, &State->Late.T60[j]); + } + + VectorAllpass_Faded(temps, offset, State->ApFeedCoeff, mixX, mixY, fade, todo, + &State->Late.VecAp); + + for(j = 0;j < NUM_LINES;j++) + memcpy(out[j], temps[j], todo*sizeof(ALfloat)); + + for(i = 0;i < todo;i++) + { + ALfloat f[NUM_LINES]; + for(j = 0;j < NUM_LINES;j++) + f[j] = temps[j][i]; + + VectorScatterRevDelayIn(&late_delay, offset, f, mixX, mixY); + offset++; + } } -DECL_TEMPLATE(Unfaded) -DECL_TEMPLATE(Faded) -#undef DECL_TEMPLATE static ALvoid ALreverbState_process(ALreverbState *State, ALsizei SamplesToDo, const ALfloat (*restrict SamplesIn)[BUFFERSIZE], ALfloat (*restrict SamplesOut)[BUFFERSIZE], ALsizei NumChannels) { @@ -1572,10 +1723,10 @@ static ALvoid ALreverbState_process(ALreverbState *State, ALsizei SamplesToDo, c DelayLineIn(&State->Delay, State->Offset, c, samples[1], todo); } - if(LIKELY(fadeCount >= FADE_SAMPLES)) + if(UNLIKELY(fadeCount < FADE_SAMPLES)) { - /* Generate and mix early reflections. */ - EarlyReflection_Unfaded(State, todo, fade, samples); + /* Generate early reflections. */ + EarlyReflection_Faded(State, todo, fade, samples); /* Mix the A-Format results to output, implicitly converting back * to B-Format. */ @@ -1585,24 +1736,6 @@ static ALvoid ALreverbState_process(ALreverbState *State, ALsizei SamplesToDo, c SamplesToDo-base, base, todo ); - /* Generate and mix late reverb. */ - LateReverb_Unfaded(State, todo, fade, samples); - for(c = 0;c < NUM_LINES;c++) - MixSamples(samples[c], NumChannels, SamplesOut, - State->Late.CurrentGain[c], State->Late.PanGain[c], - SamplesToDo-base, base, todo - ); - } - else - { - /* Generate early reflections. */ - EarlyReflection_Faded(State, todo, fade, samples); - for(c = 0;c < NUM_LINES;c++) - MixSamples(samples[c], NumChannels, SamplesOut, - State->Early.CurrentGain[c], State->Early.PanGain[c], - SamplesToDo-base, base, todo - ); - /* Generate and mix late reverb. */ LateReverb_Faded(State, todo, fade, samples); for(c = 0;c < NUM_LINES;c++) @@ -1630,6 +1763,24 @@ static ALvoid ALreverbState_process(ALreverbState *State, ALsizei SamplesToDo, c } } } + else + { + /* Generate and mix early reflections. */ + EarlyReflection_Unfaded(State, todo, samples); + for(c = 0;c < NUM_LINES;c++) + MixSamples(samples[c], NumChannels, SamplesOut, + State->Early.CurrentGain[c], State->Early.PanGain[c], + SamplesToDo-base, base, todo + ); + + /* Generate and mix late reverb. */ + LateReverb_Unfaded(State, todo, samples); + for(c = 0;c < NUM_LINES;c++) + MixSamples(samples[c], NumChannels, SamplesOut, + State->Late.CurrentGain[c], State->Late.PanGain[c], + SamplesToDo-base, base, todo + ); + } /* Step all delays forward. */ State->Offset += todo; -- cgit v1.2.3