diff options
author | Chris Robinson <[email protected]> | 2022-10-21 10:33:41 -0700 |
---|---|---|
committer | Chris Robinson <[email protected]> | 2022-10-21 10:33:41 -0700 |
commit | ee40a2e7e4867a769765d447a15ac88832eb8aa0 (patch) | |
tree | 0ff9157bc237db41795d27dd731b0191289bb5e2 /core | |
parent | 53b63f329c3daa82f3c02ed43b67338af3a3d77d (diff) |
Add an IIR filter option for UHJ encoding/decoding
This uses the reversed-allpass trick to maintain linear phase. with a 256-
sample look-ahead/delay to minimize distortion. This should better preserve low
frequencies while maintaining a proper phase response.
Diffstat (limited to 'core')
-rw-r--r-- | core/uhjfilter.cpp | 273 | ||||
-rw-r--r-- | core/uhjfilter.h | 88 | ||||
-rw-r--r-- | core/voice.cpp | 53 |
3 files changed, 384 insertions, 30 deletions
diff --git a/core/uhjfilter.cpp b/core/uhjfilter.cpp index 4917ffa8..7b2c1191 100644 --- a/core/uhjfilter.cpp +++ b/core/uhjfilter.cpp @@ -12,7 +12,7 @@ #include "phase_shifter.h" -size_t UhjQuality{UhjLengthStd}; +UhjQualityType UhjQuality{UhjQualityType::Default}; namespace { @@ -27,6 +27,108 @@ struct GetPhaseShifter<UhjLengthLq> { static auto& Get() noexcept { return PShif template<> struct GetPhaseShifter<UhjLengthHq> { static auto& Get() noexcept { return PShiftHq; } }; + +constexpr float square(float x) noexcept +{ return x*x; } + +/* Filter coefficients for the 'base' all-pass IIR, which applies a frequency- + * dependent phase-shift of N degrees. The output of the filter requires a 1- + * sample delay. + */ +constexpr std::array<float,4> Filter1Coeff{{ + square(0.6923878f), square(0.9360654322959f), square(0.9882295226860f), + square(0.9987488452737f) +}}; +/* Filter coefficients for the shifted all-pass IIR, which applies a frequency- + * dependent phase-shift of N+90 degrees. + */ +constexpr std::array<float,4> Filter2Coeff{{ + square(0.4021921162426f), square(0.8561710882420f), square(0.9722909545651f), + square(0.9952884791278f) +}}; + +/* This applies the base all-pass filter in reverse, resulting in a phase-shift + * of -N degrees. Extra samples are provided at the back of the input to reduce + * the amount of error at the back of the output. + */ +void allpass_process_rev(const al::span<const float> src, float *RESTRICT dst) +{ + float z[4][2]{}; + + auto proc_sample = [&z](float x) noexcept -> float + { + for(size_t i{0};i < 4;++i) + { + const float y{x*Filter1Coeff[i] + z[i][0]}; + z[i][0] = z[i][1]; + z[i][1] = y*Filter1Coeff[i] - x; + x = y; + } + return x; + }; + std::transform(src.rbegin(), src.rend(), std::make_reverse_iterator(dst+src.size()), + proc_sample); +} + +/* This applies the shifted all-pass filter to the output of the base filter, + * resulting in a phase shift of -N + N + 90 degrees, or just 90 degrees. + */ +void allpass_process(const al::span<UhjAllPassState,4> state, const al::span<const float> src, + const size_t forwardSamples, float *RESTRICT dst) +{ + float z[4][2]{{state[0].z[0], state[0].z[1]}, {state[1].z[0], state[1].z[1]}, + {state[2].z[0], state[2].z[1]}, {state[3].z[0], state[3].z[1]}}; + + auto proc_sample = [&z](float x) noexcept -> float + { + for(size_t i{0};i < 4;++i) + { + const float y{x*Filter2Coeff[i] + z[i][0]}; + z[i][0] = z[i][1]; + z[i][1] = y*Filter2Coeff[i] - x; + x = y; + } + return x; + }; + auto dstiter = std::transform(src.begin(), src.begin()+forwardSamples, dst, proc_sample); + for(size_t i{0};i < 4;++i) + { + state[i].z[0] = z[i][0]; + state[i].z[1] = z[i][1]; + } + + std::transform(src.begin()+forwardSamples, src.end(), dstiter, proc_sample); +} + +/* This applies the shifted all-pass filter to the output of the base filter, + * adding to the output buffer. + */ +void allpass_process_add(const al::span<UhjAllPassState,4> state, const al::span<const float> src, + float *RESTRICT dst) +{ + float z[4][2]{{state[0].z[0], state[0].z[1]}, {state[1].z[0], state[1].z[1]}, + {state[2].z[0], state[2].z[1]}, {state[3].z[0], state[3].z[1]}}; + + auto proc_sample = [&z](float x, const float dst) noexcept -> float + { + for(size_t i{0};i < 4;++i) + { + const float y{x*Filter2Coeff[i] + z[i][0]}; + z[i][0] = z[i][1]; + z[i][1] = y*Filter2Coeff[i] - x; + x = y; + } + return x + dst; + }; + std::transform(src.begin(), src.end(), dst, dst, proc_sample); + + for(size_t i{0};i < 4;++i) + { + state[i].z[0] = z[i][0]; + state[i].z[1] = z[i][1]; + } +} + } // namespace @@ -101,6 +203,50 @@ void UhjEncoder<N>::encode(float *LeftOut, float *RightOut, std::copy(mD.cbegin()+SamplesToDo, mD.cbegin()+SamplesToDo+sFilterDelay, mD.begin()); } +void UhjEncoderIIR::encode(float *LeftOut, float *RightOut, + const al::span<const float *const, 3> InSamples, const size_t SamplesToDo) +{ + ASSUME(SamplesToDo > 0); + + float *RESTRICT left{al::assume_aligned<16>(LeftOut)}; + float *RESTRICT right{al::assume_aligned<16>(RightOut)}; + + const float *RESTRICT winput{al::assume_aligned<16>(InSamples[0])}; + const float *RESTRICT xinput{al::assume_aligned<16>(InSamples[1])}; + const float *RESTRICT yinput{al::assume_aligned<16>(InSamples[2])}; + + /* Combine the previously delayed S/D signal with the input. Include any + * existing direct signal with it. + */ + + /* S = 0.9396926*W + 0.1855740*X */ + for(size_t i{0};i < SamplesToDo;++i) + mS[sFilterDelay+i] = 0.9396926f*winput[i] + 0.1855740f*xinput[i] + (left[i] + right[i]); + + /* D = 0.6554516*Y */ + for(size_t i{0};i < SamplesToDo;++i) + mD[sFilterDelay+i] = 0.6554516f*yinput[i] + (left[i] - right[i]); + + /* D += j(-0.3420201*W + 0.5098604*X) */ + std::transform(winput, winput+SamplesToDo, xinput, mWXTemp.begin()+sFilterDelay, + [](const float w, const float x) noexcept { return -0.3420201f*w + 0.5098604f*x; }); + allpass_process_rev({mWXTemp.data()+1, SamplesToDo+sFilterDelay-1}, mRevTemp.data()); + allpass_process_add(mFilterWX, {mRevTemp.data(), SamplesToDo}, mD.data()); + + /* Left = (S + D)/2.0 */ + for(size_t i{0};i < SamplesToDo;i++) + left[i] = (mS[i] + mD[i]) * 0.5f; + /* Right = (S - D)/2.0 */ + for(size_t i{0};i < SamplesToDo;i++) + right[i] = (mS[i] - mD[i]) * 0.5f; + + /* Copy the future samples to the front for next time. */ + std::copy(mS.cbegin()+SamplesToDo, mS.cbegin()+SamplesToDo+sFilterDelay, mS.begin()); + std::copy(mD.cbegin()+SamplesToDo, mD.cbegin()+SamplesToDo+sFilterDelay, mD.begin()); + std::copy(mWXTemp.cbegin()+SamplesToDo, mWXTemp.cbegin()+SamplesToDo+sFilterDelay, + mWXTemp.begin()); +} + /* Decoding UHJ is done as: * @@ -180,6 +326,65 @@ void UhjDecoder<N>::decode(const al::span<float*> samples, const size_t samplesT } } +void UhjDecoderIIR::decode(const al::span<float*> samples, const size_t samplesToDo, + const size_t forwardSamples) +{ + static_assert(sFilterDelay <= sMaxDelay, "Filter delay is too large"); + + ASSUME(samplesToDo > 0); + + { + const float *RESTRICT left{al::assume_aligned<16>(samples[0])}; + const float *RESTRICT right{al::assume_aligned<16>(samples[1])}; + const float *RESTRICT t{al::assume_aligned<16>(samples[2])}; + + /* S = Left + Right */ + for(size_t i{0};i < samplesToDo+sFilterDelay;++i) + mS[i] = left[i] + right[i]; + + /* D = Left - Right */ + for(size_t i{0};i < samplesToDo+sFilterDelay;++i) + mD[i] = left[i] - right[i]; + + /* T */ + for(size_t i{0};i < samplesToDo+sFilterDelay;++i) + mT[i] = t[i]; + } + + float *RESTRICT woutput{al::assume_aligned<16>(samples[0])}; + float *RESTRICT xoutput{al::assume_aligned<16>(samples[1])}; + float *RESTRICT youtput{al::assume_aligned<16>(samples[2])}; + + /* Precompute j(0.828331*D + 0.767820*T) and store in xoutput. */ + std::transform(mD.cbegin(), mD.cbegin()+samplesToDo+sFilterDelay, mT.cbegin(), mTemp.begin(), + [](const float d, const float t) noexcept { return 0.828331f*d + 0.767820f*t; }); + allpass_process_rev({mTemp.data()+1, samplesToDo+sFilterDelay-1}, mRevTemp.data()); + allpass_process(mFilterDT, {mRevTemp.data(), samplesToDo}, forwardSamples, xoutput); + + /* W = 0.981532*S + 0.197484*j(0.828331*D + 0.767820*T) */ + for(size_t i{0};i < samplesToDo;++i) + woutput[i] = 0.981532f*mS[i] + 0.197484f*xoutput[i]; + /* X = 0.418496*S - j(0.828331*D + 0.767820*T) */ + for(size_t i{0};i < samplesToDo;++i) + xoutput[i] = 0.418496f*mS[i] - xoutput[i]; + + /* Precompute j*S and store in youtput. */ + allpass_process_rev({mS.data()+1, samplesToDo+sFilterDelay-1}, mRevTemp.data()); + allpass_process(mFilterS, {mRevTemp.data(), samplesToDo}, forwardSamples, youtput); + + /* Y = 0.795968*D - 0.676392*T + j(0.186633*S) */ + for(size_t i{0};i < samplesToDo;++i) + youtput[i] = 0.795968f*mD[i] - 0.676392f*mT[i] + 0.186633f*youtput[i]; + + if(samples.size() > 3) + { + float *RESTRICT zoutput{al::assume_aligned<16>(samples[3])}; + /* Z = 1.023332*Q */ + for(size_t i{0};i < samplesToDo;++i) + zoutput[i] = 1.023332f*zoutput[i]; + } +} + /* Super Stereo processing is done as: * @@ -265,6 +470,72 @@ void UhjStereoDecoder<N>::decode(const al::span<float*> samples, const size_t sa youtput[i] = 1.6822415f*mD[i] - 0.2156194f*youtput[i]; } +void UhjStereoDecoderIIR::decode(const al::span<float*> samples, const size_t samplesToDo, + const size_t forwardSamples) +{ + static_assert(sFilterDelay <= sMaxDelay, "Filter delay is too large"); + + ASSUME(samplesToDo > 0); + + { + const float *RESTRICT left{al::assume_aligned<16>(samples[0])}; + const float *RESTRICT right{al::assume_aligned<16>(samples[1])}; + + for(size_t i{0};i < samplesToDo+sFilterDelay;++i) + mS[i] = left[i] + right[i]; + + /* Pre-apply the width factor to the difference signal D. Smoothly + * interpolate when it changes. + */ + const float wtarget{mWidthControl}; + const float wcurrent{unlikely(mCurrentWidth < 0.0f) ? wtarget : mCurrentWidth}; + if(likely(wtarget == wcurrent) || unlikely(forwardSamples == 0)) + { + for(size_t i{0};i < samplesToDo+sFilterDelay;++i) + mD[i] = (left[i] - right[i]) * wcurrent; + mCurrentWidth = wcurrent; + } + else + { + const float wstep{(wtarget - wcurrent) / static_cast<float>(forwardSamples)}; + float fi{0.0f}; + size_t i{0}; + for(;i < forwardSamples;++i) + { + mD[i] = (left[i] - right[i]) * (wcurrent + wstep*fi); + fi += 1.0f; + } + for(;i < samplesToDo+sFilterDelay;++i) + mD[i] = (left[i] - right[i]) * wtarget; + mCurrentWidth = wtarget; + } + } + + float *RESTRICT woutput{al::assume_aligned<16>(samples[0])}; + float *RESTRICT xoutput{al::assume_aligned<16>(samples[1])}; + float *RESTRICT youtput{al::assume_aligned<16>(samples[2])}; + + /* Precompute j*D and store in xoutput. */ + allpass_process_rev({mD.data()+1, samplesToDo+sFilterDelay-1}, mRevTemp.data()); + allpass_process(mFilterD, {mRevTemp.data(), samplesToDo}, forwardSamples, xoutput); + + /* W = 0.6098637*S - 0.6896511*j*w*D */ + for(size_t i{0};i < samplesToDo;++i) + woutput[i] = 0.6098637f*mS[i] - 0.6896511f*xoutput[i]; + /* X = 0.8624776*S + 0.7626955*j*w*D */ + for(size_t i{0};i < samplesToDo;++i) + xoutput[i] = 0.8624776f*mS[i] + 0.7626955f*xoutput[i]; + + /* Precompute j*S and store in youtput. */ + allpass_process_rev({mS.data()+1, samplesToDo+sFilterDelay-1}, mRevTemp.data()); + allpass_process(mFilterS, {mRevTemp.data(), samplesToDo}, forwardSamples, xoutput); + + /* Y = 1.6822415*w*D - 0.2156194*j*S */ + for(size_t i{0};i < samplesToDo;++i) + youtput[i] = 1.6822415f*mD[i] - 0.2156194f*youtput[i]; +} + + template struct UhjEncoder<UhjLengthLq>; template struct UhjDecoder<UhjLengthLq>; template struct UhjStereoDecoder<UhjLengthLq>; diff --git a/core/uhjfilter.h b/core/uhjfilter.h index c6ec3363..a276cb06 100644 --- a/core/uhjfilter.h +++ b/core/uhjfilter.h @@ -11,9 +11,21 @@ static constexpr size_t UhjLengthLq{256}; static constexpr size_t UhjLengthHq{512}; -static constexpr size_t UhjLengthStd{UhjLengthLq}; -extern size_t UhjQuality; +enum class UhjQualityType : uint8_t { + IIR = 0, + FIR256, + FIR512, + Default = FIR256 +}; + +extern UhjQualityType UhjQuality; + + +struct UhjAllPassState { + /* Last two delayed components for direct form II. */ + float z[2]; +}; struct UhjEncoderBase { @@ -56,10 +68,39 @@ struct UhjEncoder final : public UhjEncoderBase { DEF_NEWDEL(UhjEncoder) }; +struct UhjEncoderIIR final : public UhjEncoderBase { + static constexpr size_t sFilterDelay{256}; + + /* Delays and processing storage for the unfiltered signal. */ + alignas(16) std::array<float,BufferLineSize+sFilterDelay> mS{}; + alignas(16) std::array<float,BufferLineSize+sFilterDelay> mD{}; + + alignas(16) std::array<float,BufferLineSize+sFilterDelay> mWXTemp{}; + alignas(16) std::array<float,BufferLineSize+sFilterDelay> mRevTemp{}; + + UhjAllPassState mFilterWX[4]; + + size_t getDelay() noexcept override { return sFilterDelay; } + + /** + * Encodes a 2-channel UHJ (stereo-compatible) signal from a B-Format input + * signal. The input must use FuMa channel ordering and UHJ scaling (FuMa + * with an additional +3dB boost). + */ + void encode(float *LeftOut, float *RightOut, const al::span<const float*const,3> InSamples, + const size_t SamplesToDo) override; + + DEF_NEWDEL(UhjEncoderIIR) +}; + struct DecoderBase { static constexpr size_t sMaxDelay{256}; + /* For 2-channel UHJ, shelf filters should use these LF responses. */ + static constexpr float sWLFScale{0.661f}; + static constexpr float sXYLFScale{1.293f}; + virtual ~DecoderBase() = default; virtual void decode(const al::span<float*> samples, const size_t samplesToDo, @@ -74,12 +115,9 @@ struct DecoderBase { template<size_t N> struct UhjDecoder final : public DecoderBase { + /* This isn't a true delay, just the number of extra input samples needed. */ static constexpr size_t sFilterDelay{N/2}; - /* For 2-channel UHJ, shelf filters should use these LF responses. */ - static constexpr float sWLFScale{0.661f}; - static constexpr float sXYLFScale{1.293f}; - alignas(16) std::array<float,BufferLineSize+MaxResamplerEdge+sFilterDelay> mS{}; alignas(16) std::array<float,BufferLineSize+MaxResamplerEdge+sFilterDelay> mD{}; alignas(16) std::array<float,BufferLineSize+MaxResamplerEdge+sFilterDelay> mT{}; @@ -103,6 +141,25 @@ struct UhjDecoder final : public DecoderBase { DEF_NEWDEL(UhjDecoder) }; +struct UhjDecoderIIR final : public DecoderBase { + static constexpr size_t sFilterDelay{256}; + + alignas(16) std::array<float,BufferLineSize+MaxResamplerEdge+sFilterDelay> mS{}; + alignas(16) std::array<float,BufferLineSize+MaxResamplerEdge+sFilterDelay> mD{}; + alignas(16) std::array<float,BufferLineSize+MaxResamplerEdge+sFilterDelay> mT{}; + + alignas(16) std::array<float,BufferLineSize+MaxResamplerEdge + sFilterDelay> mTemp{}; + alignas(16) std::array<float,BufferLineSize+MaxResamplerEdge + sFilterDelay> mRevTemp{}; + + UhjAllPassState mFilterDT[4]; + UhjAllPassState mFilterS[4]; + + void decode(const al::span<float*> samples, const size_t samplesToDo, + const size_t forwardSamples) override; + + DEF_NEWDEL(UhjDecoderIIR) +}; + template<size_t N> struct UhjStereoDecoder final : public DecoderBase { static constexpr size_t sFilterDelay{N/2}; @@ -129,4 +186,23 @@ struct UhjStereoDecoder final : public DecoderBase { DEF_NEWDEL(UhjStereoDecoder) }; +struct UhjStereoDecoderIIR final : public DecoderBase { + static constexpr size_t sFilterDelay{256}; + + float mCurrentWidth{-1.0f}; + + alignas(16) std::array<float,BufferLineSize+MaxResamplerEdge+sFilterDelay> mS{}; + alignas(16) std::array<float,BufferLineSize+MaxResamplerEdge+sFilterDelay> mD{}; + + alignas(16) std::array<float,BufferLineSize+MaxResamplerEdge + sFilterDelay> mRevTemp{}; + + UhjAllPassState mFilterD[4]; + UhjAllPassState mFilterS[4]; + + void decode(const al::span<float*> samples, const size_t samplesToDo, + const size_t forwardSamples) override; + + DEF_NEWDEL(UhjStereoDecoderIIR) +}; + #endif /* CORE_UHJFILTER_H */ diff --git a/core/voice.cpp b/core/voice.cpp index 74363fc1..ae8582da 100644 --- a/core/voice.cpp +++ b/core/voice.cpp @@ -852,37 +852,44 @@ void Voice::prepare(DeviceBase *device) mPrevSamples.reserve(maxu(2, num_channels)); mPrevSamples.resize(num_channels); + mDecoder = nullptr; + mDecoderPadding = 0; if(mFmtChannels == FmtSuperStereo) { - if(UhjQuality >= UhjLengthHq) - { - mDecoder = std::make_unique<UhjStereoDecoder<UhjLengthHq>>(); - mDecoderPadding = UhjStereoDecoder<UhjLengthHq>::sFilterDelay; - } - else + switch(UhjQuality) { + case UhjQualityType::IIR: + mDecoder = std::make_unique<UhjStereoDecoderIIR>(); + mDecoderPadding = UhjStereoDecoderIIR::sFilterDelay; + break; + case UhjQualityType::FIR256: mDecoder = std::make_unique<UhjStereoDecoder<UhjLengthLq>>(); mDecoderPadding = UhjStereoDecoder<UhjLengthLq>::sFilterDelay; + break; + case UhjQualityType::FIR512: + mDecoder = std::make_unique<UhjStereoDecoder<UhjLengthHq>>(); + mDecoderPadding = UhjStereoDecoder<UhjLengthHq>::sFilterDelay; + break; } } else if(IsUHJ(mFmtChannels)) { - if(UhjQuality >= UhjLengthHq) - { - mDecoder = std::make_unique<UhjDecoder<UhjLengthHq>>(); - mDecoderPadding = UhjDecoder<UhjLengthHq>::sFilterDelay; - } - else + switch(UhjQuality) { + case UhjQualityType::IIR: + mDecoder = std::make_unique<UhjDecoderIIR>(); + mDecoderPadding = UhjDecoderIIR::sFilterDelay; + break; + case UhjQualityType::FIR256: mDecoder = std::make_unique<UhjDecoder<UhjLengthLq>>(); mDecoderPadding = UhjDecoder<UhjLengthLq>::sFilterDelay; + break; + case UhjQualityType::FIR512: + mDecoder = std::make_unique<UhjDecoder<UhjLengthHq>>(); + mDecoderPadding = UhjDecoder<UhjLengthHq>::sFilterDelay; + break; } } - else - { - mDecoder = nullptr; - mDecoderPadding = 0; - } /* Clear the stepping value explicitly so the mixer knows not to mix this * until the update gets applied. @@ -925,11 +932,11 @@ void Voice::prepare(DeviceBase *device) if(mFmtChannels == FmtUHJ2) { mChans[0].mAmbiHFScale = 1.0f; - mChans[0].mAmbiLFScale = UhjDecoder<UhjLengthStd>::sWLFScale; + mChans[0].mAmbiLFScale = DecoderBase::sWLFScale; mChans[1].mAmbiHFScale = 1.0f; - mChans[1].mAmbiLFScale = UhjDecoder<UhjLengthStd>::sXYLFScale; + mChans[1].mAmbiLFScale = DecoderBase::sXYLFScale; mChans[2].mAmbiHFScale = 1.0f; - mChans[2].mAmbiLFScale = UhjDecoder<UhjLengthStd>::sXYLFScale; + mChans[2].mAmbiLFScale = DecoderBase::sXYLFScale; } mFlags.set(VoiceIsAmbisonic); } @@ -949,9 +956,9 @@ void Voice::prepare(DeviceBase *device) chandata.mDryParams.NFCtrlFilter = device->mNFCtrlFilter; std::fill_n(chandata.mWetParams.begin(), device->NumAuxSends, SendParams{}); } - mChans[0].mAmbiLFScale = UhjDecoder<UhjLengthStd>::sWLFScale; - mChans[1].mAmbiLFScale = UhjDecoder<UhjLengthStd>::sXYLFScale; - mChans[2].mAmbiLFScale = UhjDecoder<UhjLengthStd>::sXYLFScale; + mChans[0].mAmbiLFScale = DecoderBase::sWLFScale; + mChans[1].mAmbiLFScale = DecoderBase::sXYLFScale; + mChans[2].mAmbiLFScale = DecoderBase::sXYLFScale; mFlags.set(VoiceIsAmbisonic); } else |