Move a couple more things to core

author: Chris Robinson <[email protected]> 2020-12-04 13:13:52 -0800
committer: Chris Robinson <[email protected]> 2020-12-04 13:13:52 -0800
commit: c4132b80ede60ead27fae595623ac61674ed166a (patch)
tree: 989d90c1c73f7fde3b97e9860a45478cd59aa2b7 /core
parent: 3a3094c88762dbeadda6418bbb6a060a85e29f3f (diff)
4 files changed, 517 insertions, 0 deletions
diff --git a/core/bs2b.cpp b/core/bs2b.cpp
new file mode 100644
index 00000000..00207bc0
--- /dev/null
+++ b/core/bs2b.cpp
@@ -0,0 +1,183 @@
+/*-
+ * Copyright (c) 2005 Boris Mikhaylov
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "config.h"
+
+#include <algorithm>
+#include <cmath>
+#include <iterator>
+
+#include "bs2b.h"
+#include "math_defs.h"
+
+
+/* Set up all data. */
+static void init(struct bs2b *bs2b)
+{
+    float Fc_lo, Fc_hi;
+    float G_lo, G_hi;
+    float x, g;
+
+    switch(bs2b->level)
+    {
+    case BS2B_LOW_CLEVEL: /* Low crossfeed level */
+        Fc_lo = 360.0f;
+        Fc_hi = 501.0f;
+        G_lo  = 0.398107170553497f;
+        G_hi  = 0.205671765275719f;
+        break;
+
+    case BS2B_MIDDLE_CLEVEL: /* Middle crossfeed level */
+        Fc_lo = 500.0f;
+        Fc_hi = 711.0f;
+        G_lo  = 0.459726988530872f;
+        G_hi  = 0.228208484414988f;
+        break;
+
+    case BS2B_HIGH_CLEVEL: /* High crossfeed level (virtual speakers are closer to itself) */
+        Fc_lo = 700.0f;
+        Fc_hi = 1021.0f;
+        G_lo  = 0.530884444230988f;
+        G_hi  = 0.250105790667544f;
+        break;
+
+    case BS2B_LOW_ECLEVEL: /* Low easy crossfeed level */
+        Fc_lo = 360.0f;
+        Fc_hi = 494.0f;
+        G_lo  = 0.316227766016838f;
+        G_hi  = 0.168236228897329f;
+        break;
+
+    case BS2B_MIDDLE_ECLEVEL: /* Middle easy crossfeed level */
+        Fc_lo = 500.0f;
+        Fc_hi = 689.0f;
+        G_lo  = 0.354813389233575f;
+        G_hi  = 0.187169483835901f;
+        break;
+
+    default: /* High easy crossfeed level */
+        bs2b->level = BS2B_HIGH_ECLEVEL;
+
+        Fc_lo = 700.0f;
+        Fc_hi = 975.0f;
+        G_lo  = 0.398107170553497f;
+        G_hi  = 0.205671765275719f;
+        break;
+    } /* switch */
+
+    g = 1.0f / (1.0f - G_hi + G_lo);
+
+    /* $fc = $Fc / $s;
+     * $d  = 1 / 2 / pi / $fc;
+     * $x  = exp(-1 / $d);
+     */
+    x           = std::exp(-al::MathDefs<float>::Tau() * Fc_lo / static_cast<float>(bs2b->srate));
+    bs2b->b1_lo = x;
+    bs2b->a0_lo = G_lo * (1.0f - x) * g;
+
+    x           = std::exp(-al::MathDefs<float>::Tau() * Fc_hi / static_cast<float>(bs2b->srate));
+    bs2b->b1_hi = x;
+    bs2b->a0_hi = (1.0f - G_hi * (1.0f - x)) * g;
+    bs2b->a1_hi = -x * g;
+} /* init */
+
+
+/* Exported functions.
+ * See descriptions in "bs2b.h"
+ */
+
+void bs2b_set_params(struct bs2b *bs2b, int level, int srate)
+{
+    if(srate <= 0) srate = 1;
+
+    bs2b->level = level;
+    bs2b->srate = srate;
+    init(bs2b);
+} /* bs2b_set_params */
+
+int bs2b_get_level(struct bs2b *bs2b)
+{
+    return bs2b->level;
+} /* bs2b_get_level */
+
+int bs2b_get_srate(struct bs2b *bs2b)
+{
+    return bs2b->srate;
+} /* bs2b_get_srate */
+
+void bs2b_clear(struct bs2b *bs2b)
+{
+    std::fill(std::begin(bs2b->history), std::end(bs2b->history), bs2b::t_last_sample{});
+} /* bs2b_clear */
+
+void bs2b_cross_feed(struct bs2b *bs2b, float *Left, float *Right, size_t SamplesToDo)
+{
+    const float a0_lo{bs2b->a0_lo};
+    const float b1_lo{bs2b->b1_lo};
+    const float a0_hi{bs2b->a0_hi};
+    const float a1_hi{bs2b->a1_hi};
+    const float b1_hi{bs2b->b1_hi};
+    float lsamples[128][2];
+    float rsamples[128][2];
+
+    for(size_t base{0};base < SamplesToDo;)
+    {
+        const size_t todo{std::min<size_t>(128, SamplesToDo-base)};
+
+        /* Process left input */
+        float z_lo{bs2b->history[0].lo};
+        float z_hi{bs2b->history[0].hi};
+        for(size_t i{0};i < todo;i++)
+        {
+            lsamples[i][0] = a0_lo*Left[i] + z_lo;
+            z_lo = b1_lo*lsamples[i][0];
+
+            lsamples[i][1] = a0_hi*Left[i] + z_hi;
+            z_hi = a1_hi*Left[i] + b1_hi*lsamples[i][1];
+        }
+        bs2b->history[0].lo = z_lo;
+        bs2b->history[0].hi = z_hi;
+
+        /* Process right input */
+        z_lo = bs2b->history[1].lo;
+        z_hi = bs2b->history[1].hi;
+        for(size_t i{0};i < todo;i++)
+        {
+            rsamples[i][0] = a0_lo*Right[i] + z_lo;
+            z_lo = b1_lo*rsamples[i][0];
+
+            rsamples[i][1] = a0_hi*Right[i] + z_hi;
+            z_hi = a1_hi*Right[i] + b1_hi*rsamples[i][1];
+        }
+        bs2b->history[1].lo = z_lo;
+        bs2b->history[1].hi = z_hi;
+
+        /* Crossfeed */
+        for(size_t i{0};i < todo;i++)
+            *(Left++) = lsamples[i][1] + rsamples[i][0];
+        for(size_t i{0};i < todo;i++)
+            *(Right++) = rsamples[i][1] + lsamples[i][0];
+
+        base += todo;
+    }
+} /* bs2b_cross_feed */
diff --git a/core/bs2b.h b/core/bs2b.h
new file mode 100644
index 00000000..4d0b9dd8
--- /dev/null
+++ b/core/bs2b.h
@@ -0,0 +1,89 @@
+/*-
+ * Copyright (c) 2005 Boris Mikhaylov
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef CORE_BS2B_H
+#define CORE_BS2B_H
+
+#include "almalloc.h"
+
+/* Number of crossfeed levels */
+#define BS2B_CLEVELS           3
+
+/* Normal crossfeed levels */
+#define BS2B_HIGH_CLEVEL       3
+#define BS2B_MIDDLE_CLEVEL     2
+#define BS2B_LOW_CLEVEL        1
+
+/* Easy crossfeed levels */
+#define BS2B_HIGH_ECLEVEL      BS2B_HIGH_CLEVEL    + BS2B_CLEVELS
+#define BS2B_MIDDLE_ECLEVEL    BS2B_MIDDLE_CLEVEL  + BS2B_CLEVELS
+#define BS2B_LOW_ECLEVEL       BS2B_LOW_CLEVEL     + BS2B_CLEVELS
+
+/* Default crossfeed levels */
+#define BS2B_DEFAULT_CLEVEL    BS2B_HIGH_ECLEVEL
+/* Default sample rate (Hz) */
+#define BS2B_DEFAULT_SRATE     44100
+
+struct bs2b {
+    int level;  /* Crossfeed level */
+    int srate;   /* Sample rate (Hz) */
+
+    /* Lowpass IIR filter coefficients */
+    float a0_lo;
+    float b1_lo;
+
+    /* Highboost IIR filter coefficients */
+    float a0_hi;
+    float a1_hi;
+    float b1_hi;
+
+    /* Buffer of filter history
+     * [0] - first channel, [1] - second channel
+     */
+    struct t_last_sample {
+        float lo;
+        float hi;
+    } history[2];
+
+    DEF_NEWDEL(bs2b)
+};
+
+/* Clear buffers and set new coefficients with new crossfeed level and sample
+ * rate values.
+ * level - crossfeed level of *LEVEL values.
+ * srate - sample rate by Hz.
+ */
+void bs2b_set_params(bs2b *bs2b, int level, int srate);
+
+/* Return current crossfeed level value */
+int bs2b_get_level(bs2b *bs2b);
+
+/* Return current sample rate value */
+int bs2b_get_srate(bs2b *bs2b);
+
+/* Clear buffer */
+void bs2b_clear(bs2b *bs2b);
+
+void bs2b_cross_feed(bs2b *bs2b, float *Left, float *Right, size_t SamplesToDo);
+
+#endif /* CORE_BS2B_H */
diff --git a/core/uhjfilter.cpp b/core/uhjfilter.cpp
new file mode 100644
index 00000000..a415f07c
--- /dev/null
+++ b/core/uhjfilter.cpp
@@ -0,0 +1,206 @@
+
+#include "config.h"
+
+#include "uhjfilter.h"
+
+#ifdef HAVE_SSE_INTRINSICS
+#include <xmmintrin.h>
+#endif
+
+#include <algorithm>
+#include <iterator>
+
+#include "alcomplex.h"
+#include "alnumeric.h"
+#include "opthelpers.h"
+
+
+namespace {
+
+using complex_d = std::complex<double>;
+
+std::array<float,Uhj2Encoder::sFilterSize> GenerateFilter()
+{
+    /* Some notes on this filter construction.
+     *
+     * A wide-band phase-shift filter needs a delay to maintain linearity. A
+     * dirac impulse in the center of a time-domain buffer represents a filter
+     * passing all frequencies through as-is with a pure delay. Converting that
+     * to the frequency domain, adjusting the phase of each frequency bin by
+     * +90 degrees, then converting back to the time domain, results in a FIR
+     * filter that applies a +90 degree wide-band phase-shift.
+     *
+     * A particularly notable aspect of the time-domain filter response is that
+     * every other coefficient is 0. This allows doubling the effective size of
+     * the filter, by storing only the non-0 coefficients and double-stepping
+     * over the input to apply it.
+     *
+     * Additionally, the resulting filter is independent of the sample rate.
+     * The same filter can be applied regardless of the device's sample rate
+     * and achieve the same effect.
+     */
+    constexpr size_t fft_size{Uhj2Encoder::sFilterSize * 2};
+    constexpr size_t half_size{fft_size / 2};
+
+    /* Generate a frequency domain impulse with a +90 degree phase offset.
+     * Reconstruct the mirrored frequencies to convert to the time domain.
+     */
+    auto fftBuffer = std::make_unique<complex_d[]>(fft_size);
+    std::fill_n(fftBuffer.get(), fft_size, complex_d{});
+    fftBuffer[half_size] = 1.0;
+
+    forward_fft({fftBuffer.get(), fft_size});
+    for(size_t i{0};i < half_size+1;++i)
+        fftBuffer[i] = complex_d{-fftBuffer[i].imag(), fftBuffer[i].real()};
+    for(size_t i{half_size+1};i < fft_size;++i)
+        fftBuffer[i] = std::conj(fftBuffer[fft_size - i]);
+    inverse_fft({fftBuffer.get(), fft_size});
+
+    /* Reverse the filter for simpler processing, and store only the non-0
+     * coefficients.
+     */
+    auto ret = std::make_unique<std::array<float,Uhj2Encoder::sFilterSize>>();
+    auto fftiter = fftBuffer.get() + half_size + (Uhj2Encoder::sFilterSize-1);
+    for(float &coeff : *ret)
+    {
+        coeff = static_cast<float>(fftiter->real() / double{fft_size});
+        fftiter -= 2;
+    }
+    return *ret;
+}
+alignas(16) const auto PShiftCoeffs = GenerateFilter();
+
+
+void allpass_process(al::span<float> dst, const float *RESTRICT src)
+{
+#ifdef HAVE_SSE_INTRINSICS
+    size_t pos{0};
+    if(size_t todo{dst.size()>>1})
+    {
+        do {
+            __m128 r04{_mm_setzero_ps()};
+            __m128 r14{_mm_setzero_ps()};
+            for(size_t j{0};j < PShiftCoeffs.size();j+=4)
+            {
+                const __m128 coeffs{_mm_load_ps(&PShiftCoeffs[j])};
+                const __m128 s0{_mm_loadu_ps(&src[j*2])};
+                const __m128 s1{_mm_loadu_ps(&src[j*2 + 4])};
+
+                __m128 s{_mm_shuffle_ps(s0, s1, _MM_SHUFFLE(2, 0, 2, 0))};
+                r04 = _mm_add_ps(r04, _mm_mul_ps(s, coeffs));
+
+                s = _mm_shuffle_ps(s0, s1, _MM_SHUFFLE(3, 1, 3, 1));
+                r14 = _mm_add_ps(r14, _mm_mul_ps(s, coeffs));
+            }
+            r04 = _mm_add_ps(r04, _mm_shuffle_ps(r04, r04, _MM_SHUFFLE(0, 1, 2, 3)));
+            r04 = _mm_add_ps(r04, _mm_movehl_ps(r04, r04));
+            dst[pos++] += _mm_cvtss_f32(r04);
+
+            r14 = _mm_add_ps(r14, _mm_shuffle_ps(r14, r14, _MM_SHUFFLE(0, 1, 2, 3)));
+            r14 = _mm_add_ps(r14, _mm_movehl_ps(r14, r14));
+            dst[pos++] += _mm_cvtss_f32(r14);
+
+            src += 2;
+        } while(--todo);
+    }
+    if((dst.size()&1))
+    {
+        __m128 r4{_mm_setzero_ps()};
+        for(size_t j{0};j < PShiftCoeffs.size();j+=4)
+        {
+            const __m128 coeffs{_mm_load_ps(&PShiftCoeffs[j])};
+            /* NOTE: This could alternatively be done with two unaligned loads
+             * and a shuffle. Which would be better?
+             */
+            const __m128 s{_mm_setr_ps(src[j*2], src[j*2 + 2], src[j*2 + 4], src[j*2 + 6])};
+            r4 = _mm_add_ps(r4, _mm_mul_ps(s, coeffs));
+        }
+        r4 = _mm_add_ps(r4, _mm_shuffle_ps(r4, r4, _MM_SHUFFLE(0, 1, 2, 3)));
+        r4 = _mm_add_ps(r4, _mm_movehl_ps(r4, r4));
+
+        dst[pos] += _mm_cvtss_f32(r4);
+    }
+
+#else
+
+    for(float &output : dst)
+    {
+        float ret{0.0f};
+        for(size_t j{0};j < PShiftCoeffs.size();++j)
+            ret += src[j*2] * PShiftCoeffs[j];
+
+        output += ret;
+        ++src;
+    }
+#endif
+}
+
+} // namespace
+
+
+/* Encoding 2-channel UHJ from B-Format is done as:
+ *
+ * S = 0.9396926*W + 0.1855740*X
+ * D = j(-0.3420201*W + 0.5098604*X) + 0.6554516*Y
+ *
+ * Left = (S + D)/2.0
+ * Right = (S - D)/2.0
+ *
+ * where j is a wide-band +90 degree phase shift.
+ *
+ * The phase shift is done using a FIR filter derived from an FFT'd impulse
+ * with the desired shift.
+ */
+
+void Uhj2Encoder::encode(FloatBufferLine &LeftOut, FloatBufferLine &RightOut,
+    const FloatBufferLine *InSamples, const size_t SamplesToDo)
+{
+    ASSUME(SamplesToDo > 0);
+
+    float *RESTRICT left{al::assume_aligned<16>(LeftOut.data())};
+    float *RESTRICT right{al::assume_aligned<16>(RightOut.data())};
+
+    const float *RESTRICT winput{al::assume_aligned<16>(InSamples[0].data())};
+    const float *RESTRICT xinput{al::assume_aligned<16>(InSamples[1].data())};
+    const float *RESTRICT yinput{al::assume_aligned<16>(InSamples[2].data())};
+
+    /* Combine the previously delayed mid/side signal with the input. */
+
+    /* S = 0.9396926*W + 0.1855740*X */
+    auto miditer = std::copy(mMidDelay.cbegin(), mMidDelay.cend(), mMid.begin());
+    std::transform(winput, winput+SamplesToDo, xinput, miditer,
+        [](const float w, const float x) noexcept -> float
+        { return 0.9396926f*w + 0.1855740f*x; });
+
+    /* D = 0.6554516*Y */
+    auto sideiter = std::copy(mSideDelay.cbegin(), mSideDelay.cend(), mSide.begin());
+    std::transform(yinput, yinput+SamplesToDo, sideiter,
+        [](const float y) noexcept -> float { return 0.6554516f*y; });
+
+    /* Include any existing direct signal in the mid/side buffers. */
+    for(size_t i{0};i < SamplesToDo;++i,++miditer)
+        *miditer += left[i] + right[i];
+    for(size_t i{0};i < SamplesToDo;++i,++sideiter)
+        *sideiter += left[i] - right[i];
+
+    /* Copy the future samples back to the delay buffers for next time. */
+    std::copy_n(mMid.cbegin()+SamplesToDo, mMidDelay.size(), mMidDelay.begin());
+    std::copy_n(mSide.cbegin()+SamplesToDo, mSideDelay.size(), mSideDelay.begin());
+
+    /* Now add the all-passed signal into the side signal. */
+
+    /* D += j(-0.3420201*W + 0.5098604*X) */
+    auto tmpiter = std::copy(mSideHistory.cbegin(), mSideHistory.cend(), mTemp.begin());
+    std::transform(winput, winput+SamplesToDo, xinput, tmpiter,
+        [](const float w, const float x) noexcept -> float
+        { return -0.3420201f*w + 0.5098604f*x; });
+    std::copy_n(mTemp.cbegin()+SamplesToDo, mSideHistory.size(), mSideHistory.begin());
+    allpass_process({mSide.data(), SamplesToDo}, mTemp.data());
+
+    /* Left = (S + D)/2.0 */
+    for(size_t i{0};i < SamplesToDo;i++)
+        left[i] = (mMid[i] + mSide[i]) * 0.5f;
+    /* Right = (S - D)/2.0 */
+    for(size_t i{0};i < SamplesToDo;i++)
+        right[i] = (mMid[i] - mSide[i]) * 0.5f;
+}
diff --git a/core/uhjfilter.h b/core/uhjfilter.h
new file mode 100644
index 00000000..c2cb8722
--- /dev/null
+++ b/core/uhjfilter.h
@@ -0,0 +1,39 @@
+#ifndef CORE_UHJFILTER_H
+#define CORE_UHJFILTER_H
+
+#include <array>
+
+#include "almalloc.h"
+#include "bufferline.h"
+
+
+struct Uhj2Encoder {
+    /* A particular property of the filter allows it to cover nearly twice its
+     * length, so the filter size is also the effective delay (despite being
+     * center-aligned).
+     */
+    constexpr static size_t sFilterSize{128};
+
+    /* Delays for the unfiltered signal. */
+    alignas(16) std::array<float,sFilterSize> mMidDelay{};
+    alignas(16) std::array<float,sFilterSize> mSideDelay{};
+
+    alignas(16) std::array<float,BufferLineSize+sFilterSize> mMid{};
+    alignas(16) std::array<float,BufferLineSize+sFilterSize> mSide{};
+
+    /* History for the FIR filter. */
+    alignas(16) std::array<float,sFilterSize*2 - 1> mSideHistory{};
+
+    alignas(16) std::array<float,BufferLineSize + sFilterSize*2> mTemp{};
+
+    /**
+     * Encodes a 2-channel UHJ (stereo-compatible) signal from a B-Format input
+     * signal. The input must use FuMa channel ordering and scaling.
+     */
+    void encode(FloatBufferLine &LeftOut, FloatBufferLine &RightOut,
+        const FloatBufferLine *InSamples, const size_t SamplesToDo);
+
+    DEF_NEWDEL(Uhj2Encoder)
+};
+
+#endif /* CORE_UHJFILTER_H */
author	Chris Robinson <[email protected]>	2020-12-04 13:13:52 -0800
committer	Chris Robinson <[email protected]>	2020-12-04 13:13:52 -0800
commit	c4132b80ede60ead27fae595623ac61674ed166a (patch)
tree	989d90c1c73f7fde3b97e9860a45478cd59aa2b7 /core
parent	3a3094c88762dbeadda6418bbb6a060a85e29f3f (diff)