aboutsummaryrefslogtreecommitdiffstats
path: root/core
diff options
context:
space:
mode:
Diffstat (limited to 'core')
-rw-r--r--core/ambdec.cpp306
-rw-r--r--core/ambdec.h55
-rw-r--r--core/ambidefs.cpp308
-rw-r--r--core/ambidefs.h250
-rw-r--r--core/async_event.h55
-rw-r--r--core/bformatdec.cpp170
-rw-r--r--core/bformatdec.h71
-rw-r--r--core/bs2b.cpp183
-rw-r--r--core/bs2b.h89
-rw-r--r--core/bsinc_defs.h12
-rw-r--r--core/bsinc_tables.cpp295
-rw-r--r--core/bsinc_tables.h17
-rw-r--r--core/buffer_storage.cpp81
-rw-r--r--core/buffer_storage.h115
-rw-r--r--core/bufferline.h17
-rw-r--r--core/context.cpp164
-rw-r--r--core/context.h171
-rw-r--r--core/converter.cpp346
-rw-r--r--core/converter.h66
-rw-r--r--core/cpu_caps.cpp141
-rw-r--r--core/cpu_caps.h26
-rw-r--r--core/cubic_defs.h13
-rw-r--r--core/cubic_tables.cpp59
-rw-r--r--core/cubic_tables.h17
-rw-r--r--core/dbus_wrap.cpp46
-rw-r--r--core/dbus_wrap.h87
-rw-r--r--core/devformat.cpp67
-rw-r--r--core/devformat.h122
-rw-r--r--core/device.cpp23
-rw-r--r--core/device.h345
-rw-r--r--core/effects/base.h197
-rw-r--r--core/effectslot.cpp19
-rw-r--r--core/effectslot.h89
-rw-r--r--core/except.cpp30
-rw-r--r--core/except.h31
-rw-r--r--core/filters/biquad.cpp168
-rw-r--r--core/filters/biquad.h144
-rw-r--r--core/filters/nfc.cpp367
-rw-r--r--core/filters/nfc.h63
-rw-r--r--core/filters/splitter.cpp179
-rw-r--r--core/filters/splitter.h40
-rw-r--r--core/fmt_traits.cpp79
-rw-r--r--core/fmt_traits.h81
-rw-r--r--core/fpu_ctrl.cpp61
-rw-r--r--core/fpu_ctrl.h21
-rw-r--r--core/front_stablizer.h31
-rw-r--r--core/helpers.cpp569
-rw-r--r--core/helpers.h18
-rw-r--r--core/hrtf.cpp1473
-rw-r--r--core/hrtf.h89
-rw-r--r--core/logging.cpp89
-rw-r--r--core/logging.h51
-rw-r--r--core/mastering.cpp439
-rw-r--r--core/mastering.h105
-rw-r--r--core/mixer.cpp95
-rw-r--r--core/mixer.h109
-rw-r--r--core/mixer/defs.h109
-rw-r--r--core/mixer/hrtfbase.h129
-rw-r--r--core/mixer/hrtfdefs.h53
-rw-r--r--core/mixer/mixer_c.cpp218
-rw-r--r--core/mixer/mixer_neon.cpp362
-rw-r--r--core/mixer/mixer_sse.cpp327
-rw-r--r--core/mixer/mixer_sse2.cpp90
-rw-r--r--core/mixer/mixer_sse3.cpp0
-rw-r--r--core/mixer/mixer_sse41.cpp95
-rw-r--r--core/resampler_limits.h12
-rw-r--r--core/rtkit.cpp236
-rw-r--r--core/rtkit.h71
-rw-r--r--core/uhjfilter.cpp539
-rw-r--r--core/uhjfilter.h234
-rw-r--r--core/uiddefs.cpp37
-rw-r--r--core/voice.cpp1304
-rw-r--r--core/voice.h280
-rw-r--r--core/voice_change.h31
74 files changed, 12481 insertions, 0 deletions
diff --git a/core/ambdec.cpp b/core/ambdec.cpp
new file mode 100644
index 00000000..8ca182c4
--- /dev/null
+++ b/core/ambdec.cpp
@@ -0,0 +1,306 @@
+
+#include "config.h"
+
+#include "ambdec.h"
+
+#include <algorithm>
+#include <cctype>
+#include <cstdarg>
+#include <cstddef>
+#include <cstdio>
+#include <iterator>
+#include <sstream>
+#include <string>
+
+#include "albit.h"
+#include "alfstream.h"
+#include "alspan.h"
+#include "opthelpers.h"
+
+
+namespace {
+
+std::string read_word(std::istream &f)
+{
+ std::string ret;
+ f >> ret;
+ return ret;
+}
+
+bool is_at_end(const std::string &buffer, std::size_t endpos)
+{
+ while(endpos < buffer.length() && std::isspace(buffer[endpos]))
+ ++endpos;
+ return !(endpos < buffer.length() && buffer[endpos] != '#');
+}
+
+
+enum class ReaderScope {
+ Global,
+ Speakers,
+ LFMatrix,
+ HFMatrix,
+};
+
+#ifdef __USE_MINGW_ANSI_STDIO
+[[gnu::format(gnu_printf,2,3)]]
+#else
+[[gnu::format(printf,2,3)]]
+#endif
+al::optional<std::string> make_error(size_t linenum, const char *fmt, ...)
+{
+ al::optional<std::string> ret;
+ auto &str = ret.emplace();
+
+ str.resize(256);
+ int printed{std::snprintf(const_cast<char*>(str.data()), str.length(), "Line %zu: ", linenum)};
+ if(printed < 0) printed = 0;
+ auto plen = std::min(static_cast<size_t>(printed), str.length());
+
+ std::va_list args, args2;
+ va_start(args, fmt);
+ va_copy(args2, args);
+ const int msglen{std::vsnprintf(&str[plen], str.size()-plen, fmt, args)};
+ if(msglen >= 0 && static_cast<size_t>(msglen) >= str.size()-plen)
+ {
+ str.resize(static_cast<size_t>(msglen) + plen + 1u);
+ std::vsnprintf(&str[plen], str.size()-plen, fmt, args2);
+ }
+ va_end(args2);
+ va_end(args);
+
+ return ret;
+}
+
+} // namespace
+
+AmbDecConf::~AmbDecConf() = default;
+
+
+al::optional<std::string> AmbDecConf::load(const char *fname) noexcept
+{
+ al::ifstream f{fname};
+ if(!f.is_open())
+ return std::string("Failed to open file \"")+fname+"\"";
+
+ ReaderScope scope{ReaderScope::Global};
+ size_t speaker_pos{0};
+ size_t lfmatrix_pos{0};
+ size_t hfmatrix_pos{0};
+ size_t linenum{0};
+
+ std::string buffer;
+ while(f.good() && std::getline(f, buffer))
+ {
+ ++linenum;
+
+ std::istringstream istr{buffer};
+ std::string command{read_word(istr)};
+ if(command.empty() || command[0] == '#')
+ continue;
+
+ if(command == "/}")
+ {
+ if(scope == ReaderScope::Global)
+ return make_error(linenum, "Unexpected /} in global scope");
+ scope = ReaderScope::Global;
+ continue;
+ }
+
+ if(scope == ReaderScope::Speakers)
+ {
+ if(command == "add_spkr")
+ {
+ if(speaker_pos == NumSpeakers)
+ return make_error(linenum, "Too many speakers specified");
+
+ AmbDecConf::SpeakerConf &spkr = Speakers[speaker_pos++];
+ istr >> spkr.Name;
+ istr >> spkr.Distance;
+ istr >> spkr.Azimuth;
+ istr >> spkr.Elevation;
+ istr >> spkr.Connection;
+ }
+ else
+ return make_error(linenum, "Unexpected speakers command: %s", command.c_str());
+ }
+ else if(scope == ReaderScope::LFMatrix || scope == ReaderScope::HFMatrix)
+ {
+ auto &gains = (scope == ReaderScope::LFMatrix) ? LFOrderGain : HFOrderGain;
+ auto *matrix = (scope == ReaderScope::LFMatrix) ? LFMatrix : HFMatrix;
+ auto &pos = (scope == ReaderScope::LFMatrix) ? lfmatrix_pos : hfmatrix_pos;
+
+ if(command == "order_gain")
+ {
+ size_t toread{(ChanMask > Ambi3OrderMask) ? 5u : 4u};
+ std::size_t curgain{0u};
+ float value{};
+ while(toread)
+ {
+ --toread;
+ istr >> value;
+ if(curgain < al::size(gains))
+ gains[curgain++] = value;
+ }
+ }
+ else if(command == "add_row")
+ {
+ if(pos == NumSpeakers)
+ return make_error(linenum, "Too many matrix rows specified");
+
+ unsigned int mask{ChanMask};
+
+ AmbDecConf::CoeffArray &mtxrow = matrix[pos++];
+ mtxrow.fill(0.0f);
+
+ float value{};
+ while(mask)
+ {
+ auto idx = static_cast<unsigned>(al::countr_zero(mask));
+ mask &= ~(1u << idx);
+
+ istr >> value;
+ if(idx < mtxrow.size())
+ mtxrow[idx] = value;
+ }
+ }
+ else
+ return make_error(linenum, "Unexpected matrix command: %s", command.c_str());
+ }
+ // Global scope commands
+ else if(command == "/description")
+ {
+ while(istr.good() && std::isspace(istr.peek()))
+ istr.ignore();
+ std::getline(istr, Description);
+ while(!Description.empty() && std::isspace(Description.back()))
+ Description.pop_back();
+ }
+ else if(command == "/version")
+ {
+ if(Version)
+ return make_error(linenum, "Duplicate version definition");
+ istr >> Version;
+ if(Version != 3)
+ return make_error(linenum, "Unsupported version: %d", Version);
+ }
+ else if(command == "/dec/chan_mask")
+ {
+ if(ChanMask)
+ return make_error(linenum, "Duplicate chan_mask definition");
+ istr >> std::hex >> ChanMask >> std::dec;
+
+ if(!ChanMask || ChanMask > Ambi4OrderMask)
+ return make_error(linenum, "Invalid chan_mask: 0x%x", ChanMask);
+ if(ChanMask > Ambi3OrderMask && CoeffScale == AmbDecScale::FuMa)
+ return make_error(linenum, "FuMa not compatible with over third-order");
+ }
+ else if(command == "/dec/freq_bands")
+ {
+ if(FreqBands)
+ return make_error(linenum, "Duplicate freq_bands");
+ istr >> FreqBands;
+ if(FreqBands != 1 && FreqBands != 2)
+ return make_error(linenum, "Invalid freq_bands: %u", FreqBands);
+ }
+ else if(command == "/dec/speakers")
+ {
+ if(NumSpeakers)
+ return make_error(linenum, "Duplicate speakers");
+ istr >> NumSpeakers;
+ if(!NumSpeakers)
+ return make_error(linenum, "Invalid speakers: %zu", NumSpeakers);
+ Speakers = std::make_unique<SpeakerConf[]>(NumSpeakers);
+ }
+ else if(command == "/dec/coeff_scale")
+ {
+ if(CoeffScale != AmbDecScale::Unset)
+ return make_error(linenum, "Duplicate coeff_scale");
+
+ std::string scale{read_word(istr)};
+ if(scale == "n3d") CoeffScale = AmbDecScale::N3D;
+ else if(scale == "sn3d") CoeffScale = AmbDecScale::SN3D;
+ else if(scale == "fuma") CoeffScale = AmbDecScale::FuMa;
+ else
+ return make_error(linenum, "Unexpected coeff_scale: %s", scale.c_str());
+
+ if(ChanMask > Ambi3OrderMask && CoeffScale == AmbDecScale::FuMa)
+ return make_error(linenum, "FuMa not compatible with over third-order");
+ }
+ else if(command == "/opt/xover_freq")
+ {
+ istr >> XOverFreq;
+ }
+ else if(command == "/opt/xover_ratio")
+ {
+ istr >> XOverRatio;
+ }
+ else if(command == "/opt/input_scale" || command == "/opt/nfeff_comp"
+ || command == "/opt/delay_comp" || command == "/opt/level_comp")
+ {
+ /* Unused */
+ read_word(istr);
+ }
+ else if(command == "/speakers/{")
+ {
+ if(!NumSpeakers)
+ return make_error(linenum, "Speakers defined without a count");
+ scope = ReaderScope::Speakers;
+ }
+ else if(command == "/lfmatrix/{" || command == "/hfmatrix/{" || command == "/matrix/{")
+ {
+ if(!NumSpeakers)
+ return make_error(linenum, "Matrix defined without a speaker count");
+ if(!ChanMask)
+ return make_error(linenum, "Matrix defined without a channel mask");
+
+ if(!Matrix)
+ {
+ Matrix = std::make_unique<CoeffArray[]>(NumSpeakers * FreqBands);
+ LFMatrix = Matrix.get();
+ HFMatrix = LFMatrix + NumSpeakers*(FreqBands-1);
+ }
+
+ if(FreqBands == 1)
+ {
+ if(command != "/matrix/{")
+ return make_error(linenum, "Unexpected \"%s\" for a single-band decoder",
+ command.c_str());
+ scope = ReaderScope::HFMatrix;
+ }
+ else
+ {
+ if(command == "/lfmatrix/{")
+ scope = ReaderScope::LFMatrix;
+ else if(command == "/hfmatrix/{")
+ scope = ReaderScope::HFMatrix;
+ else
+ return make_error(linenum, "Unexpected \"%s\" for a dual-band decoder",
+ command.c_str());
+ }
+ }
+ else if(command == "/end")
+ {
+ const auto endpos = static_cast<std::size_t>(istr.tellg());
+ if(!is_at_end(buffer, endpos))
+ return make_error(linenum, "Extra junk on end: %s", buffer.substr(endpos).c_str());
+
+ if(speaker_pos < NumSpeakers || hfmatrix_pos < NumSpeakers
+ || (FreqBands == 2 && lfmatrix_pos < NumSpeakers))
+ return make_error(linenum, "Incomplete decoder definition");
+ if(CoeffScale == AmbDecScale::Unset)
+ return make_error(linenum, "No coefficient scaling defined");
+
+ return al::nullopt;
+ }
+ else
+ return make_error(linenum, "Unexpected command: %s", command.c_str());
+
+ istr.clear();
+ const auto endpos = static_cast<std::size_t>(istr.tellg());
+ if(!is_at_end(buffer, endpos))
+ return make_error(linenum, "Extra junk on line: %s", buffer.substr(endpos).c_str());
+ buffer.clear();
+ }
+ return make_error(linenum, "Unexpected end of file");
+}
diff --git a/core/ambdec.h b/core/ambdec.h
new file mode 100644
index 00000000..7f739781
--- /dev/null
+++ b/core/ambdec.h
@@ -0,0 +1,55 @@
+#ifndef CORE_AMBDEC_H
+#define CORE_AMBDEC_H
+
+#include <array>
+#include <memory>
+#include <string>
+
+#include "aloptional.h"
+#include "core/ambidefs.h"
+
+/* Helpers to read .ambdec configuration files. */
+
+enum class AmbDecScale {
+ Unset,
+ N3D,
+ SN3D,
+ FuMa,
+};
+struct AmbDecConf {
+ std::string Description;
+ int Version{0}; /* Must be 3 */
+
+ unsigned int ChanMask{0u};
+ unsigned int FreqBands{0u}; /* Must be 1 or 2 */
+ AmbDecScale CoeffScale{AmbDecScale::Unset};
+
+ float XOverFreq{0.0f};
+ float XOverRatio{0.0f};
+
+ struct SpeakerConf {
+ std::string Name;
+ float Distance{0.0f};
+ float Azimuth{0.0f};
+ float Elevation{0.0f};
+ std::string Connection;
+ };
+ size_t NumSpeakers{0};
+ std::unique_ptr<SpeakerConf[]> Speakers;
+
+ using CoeffArray = std::array<float,MaxAmbiChannels>;
+ std::unique_ptr<CoeffArray[]> Matrix;
+
+ /* Unused when FreqBands == 1 */
+ float LFOrderGain[MaxAmbiOrder+1]{};
+ CoeffArray *LFMatrix;
+
+ float HFOrderGain[MaxAmbiOrder+1]{};
+ CoeffArray *HFMatrix;
+
+ ~AmbDecConf();
+
+ al::optional<std::string> load(const char *fname) noexcept;
+};
+
+#endif /* CORE_AMBDEC_H */
diff --git a/core/ambidefs.cpp b/core/ambidefs.cpp
new file mode 100644
index 00000000..70d6f356
--- /dev/null
+++ b/core/ambidefs.cpp
@@ -0,0 +1,308 @@
+
+#include "config.h"
+
+#include "ambidefs.h"
+
+#include "alnumbers.h"
+
+
+namespace {
+
+using AmbiChannelFloatArray = std::array<float,MaxAmbiChannels>;
+
+constexpr auto inv_sqrt2f = static_cast<float>(1.0/al::numbers::sqrt2);
+constexpr auto inv_sqrt3f = static_cast<float>(1.0/al::numbers::sqrt3);
+
+
+/* These HF gains are derived from the same 32-point speaker array. The scale
+ * factor between orders represents the same scale factors for any (regular)
+ * speaker array decoder. e.g. Given a first-order source and second-order
+ * output, applying an HF scale of HFScales[1][0] / HFScales[2][0] to channel 0
+ * will result in that channel being subsequently decoded for second-order as
+ * if it was a first-order decoder for that same speaker array.
+ */
+constexpr std::array<std::array<float,MaxAmbiOrder+1>,MaxAmbiOrder+1> HFScales{{
+ {{ 4.000000000e+00f, 2.309401077e+00f, 1.192569588e+00f, 7.189495850e-01f }},
+ {{ 4.000000000e+00f, 2.309401077e+00f, 1.192569588e+00f, 7.189495850e-01f }},
+ {{ 2.981423970e+00f, 2.309401077e+00f, 1.192569588e+00f, 7.189495850e-01f }},
+ {{ 2.359168820e+00f, 2.031565936e+00f, 1.444598386e+00f, 7.189495850e-01f }},
+ /* 1.947005434e+00f, 1.764337084e+00f, 1.424707344e+00f, 9.755104127e-01f, 4.784482742e-01f */
+}};
+
+/* Same as above, but using a 10-point horizontal-only speaker array. Should
+ * only be used when the device is mixing in 2D B-Format for horizontal-only
+ * output.
+ */
+constexpr std::array<std::array<float,MaxAmbiOrder+1>,MaxAmbiOrder+1> HFScales2D{{
+ {{ 2.236067977e+00f, 1.581138830e+00f, 9.128709292e-01f, 6.050756345e-01f }},
+ {{ 2.236067977e+00f, 1.581138830e+00f, 9.128709292e-01f, 6.050756345e-01f }},
+ {{ 1.825741858e+00f, 1.581138830e+00f, 9.128709292e-01f, 6.050756345e-01f }},
+ {{ 1.581138830e+00f, 1.460781803e+00f, 1.118033989e+00f, 6.050756345e-01f }},
+ /* 1.414213562e+00f, 1.344997024e+00f, 1.144122806e+00f, 8.312538756e-01f, 4.370160244e-01f */
+}};
+
+
+/* This calculates a first-order "upsampler" matrix. It combines a first-order
+ * decoder matrix with a max-order encoder matrix, creating a matrix that
+ * behaves as if the B-Format input signal is first decoded to a speaker array
+ * at first-order, then those speaker feeds are encoded to a higher-order
+ * signal. While not perfect, this should accurately encode a lower-order
+ * signal into a higher-order signal.
+ */
+constexpr std::array<std::array<float,4>,8> FirstOrderDecoder{{
+ {{ 1.250000000e-01f, 1.250000000e-01f, 1.250000000e-01f, 1.250000000e-01f, }},
+ {{ 1.250000000e-01f, 1.250000000e-01f, 1.250000000e-01f, -1.250000000e-01f, }},
+ {{ 1.250000000e-01f, -1.250000000e-01f, 1.250000000e-01f, 1.250000000e-01f, }},
+ {{ 1.250000000e-01f, -1.250000000e-01f, 1.250000000e-01f, -1.250000000e-01f, }},
+ {{ 1.250000000e-01f, 1.250000000e-01f, -1.250000000e-01f, 1.250000000e-01f, }},
+ {{ 1.250000000e-01f, 1.250000000e-01f, -1.250000000e-01f, -1.250000000e-01f, }},
+ {{ 1.250000000e-01f, -1.250000000e-01f, -1.250000000e-01f, 1.250000000e-01f, }},
+ {{ 1.250000000e-01f, -1.250000000e-01f, -1.250000000e-01f, -1.250000000e-01f, }},
+}};
+constexpr std::array<AmbiChannelFloatArray,8> FirstOrderEncoder{{
+ CalcAmbiCoeffs( inv_sqrt3f, inv_sqrt3f, inv_sqrt3f),
+ CalcAmbiCoeffs( inv_sqrt3f, inv_sqrt3f, -inv_sqrt3f),
+ CalcAmbiCoeffs(-inv_sqrt3f, inv_sqrt3f, inv_sqrt3f),
+ CalcAmbiCoeffs(-inv_sqrt3f, inv_sqrt3f, -inv_sqrt3f),
+ CalcAmbiCoeffs( inv_sqrt3f, -inv_sqrt3f, inv_sqrt3f),
+ CalcAmbiCoeffs( inv_sqrt3f, -inv_sqrt3f, -inv_sqrt3f),
+ CalcAmbiCoeffs(-inv_sqrt3f, -inv_sqrt3f, inv_sqrt3f),
+ CalcAmbiCoeffs(-inv_sqrt3f, -inv_sqrt3f, -inv_sqrt3f),
+}};
+static_assert(FirstOrderDecoder.size() == FirstOrderEncoder.size(), "First-order mismatch");
+
+/* This calculates a 2D first-order "upsampler" matrix. Same as the first-order
+ * matrix, just using a more optimized speaker array for horizontal-only
+ * content.
+ */
+constexpr std::array<std::array<float,4>,4> FirstOrder2DDecoder{{
+ {{ 2.500000000e-01f, 2.041241452e-01f, 0.0f, 2.041241452e-01f, }},
+ {{ 2.500000000e-01f, 2.041241452e-01f, 0.0f, -2.041241452e-01f, }},
+ {{ 2.500000000e-01f, -2.041241452e-01f, 0.0f, 2.041241452e-01f, }},
+ {{ 2.500000000e-01f, -2.041241452e-01f, 0.0f, -2.041241452e-01f, }},
+}};
+constexpr std::array<AmbiChannelFloatArray,4> FirstOrder2DEncoder{{
+ CalcAmbiCoeffs( inv_sqrt2f, 0.0f, inv_sqrt2f),
+ CalcAmbiCoeffs( inv_sqrt2f, 0.0f, -inv_sqrt2f),
+ CalcAmbiCoeffs(-inv_sqrt2f, 0.0f, inv_sqrt2f),
+ CalcAmbiCoeffs(-inv_sqrt2f, 0.0f, -inv_sqrt2f),
+}};
+static_assert(FirstOrder2DDecoder.size() == FirstOrder2DEncoder.size(), "First-order 2D mismatch");
+
+
+/* This calculates a second-order "upsampler" matrix. Same as the first-order
+ * matrix, just using a slightly more dense speaker array suitable for second-
+ * order content.
+ */
+constexpr std::array<std::array<float,9>,12> SecondOrderDecoder{{
+ {{ 8.333333333e-02f, 0.000000000e+00f, -7.588274978e-02f, 1.227808683e-01f, 0.000000000e+00f, 0.000000000e+00f, -1.591525047e-02f, -1.443375673e-01f, 1.167715449e-01f, }},
+ {{ 8.333333333e-02f, -1.227808683e-01f, 0.000000000e+00f, 7.588274978e-02f, -1.443375673e-01f, 0.000000000e+00f, -9.316949906e-02f, 0.000000000e+00f, -7.216878365e-02f, }},
+ {{ 8.333333333e-02f, -7.588274978e-02f, 1.227808683e-01f, 0.000000000e+00f, 0.000000000e+00f, -1.443375673e-01f, 1.090847495e-01f, 0.000000000e+00f, -4.460276122e-02f, }},
+ {{ 8.333333333e-02f, 0.000000000e+00f, 7.588274978e-02f, 1.227808683e-01f, 0.000000000e+00f, 0.000000000e+00f, -1.591525047e-02f, 1.443375673e-01f, 1.167715449e-01f, }},
+ {{ 8.333333333e-02f, -1.227808683e-01f, 0.000000000e+00f, -7.588274978e-02f, 1.443375673e-01f, 0.000000000e+00f, -9.316949906e-02f, 0.000000000e+00f, -7.216878365e-02f, }},
+ {{ 8.333333333e-02f, 7.588274978e-02f, -1.227808683e-01f, 0.000000000e+00f, 0.000000000e+00f, -1.443375673e-01f, 1.090847495e-01f, 0.000000000e+00f, -4.460276122e-02f, }},
+ {{ 8.333333333e-02f, 0.000000000e+00f, -7.588274978e-02f, -1.227808683e-01f, 0.000000000e+00f, 0.000000000e+00f, -1.591525047e-02f, 1.443375673e-01f, 1.167715449e-01f, }},
+ {{ 8.333333333e-02f, 1.227808683e-01f, 0.000000000e+00f, -7.588274978e-02f, -1.443375673e-01f, 0.000000000e+00f, -9.316949906e-02f, 0.000000000e+00f, -7.216878365e-02f, }},
+ {{ 8.333333333e-02f, 7.588274978e-02f, 1.227808683e-01f, 0.000000000e+00f, 0.000000000e+00f, 1.443375673e-01f, 1.090847495e-01f, 0.000000000e+00f, -4.460276122e-02f, }},
+ {{ 8.333333333e-02f, 0.000000000e+00f, 7.588274978e-02f, -1.227808683e-01f, 0.000000000e+00f, 0.000000000e+00f, -1.591525047e-02f, -1.443375673e-01f, 1.167715449e-01f, }},
+ {{ 8.333333333e-02f, 1.227808683e-01f, 0.000000000e+00f, 7.588274978e-02f, 1.443375673e-01f, 0.000000000e+00f, -9.316949906e-02f, 0.000000000e+00f, -7.216878365e-02f, }},
+ {{ 8.333333333e-02f, -7.588274978e-02f, -1.227808683e-01f, 0.000000000e+00f, 0.000000000e+00f, 1.443375673e-01f, 1.090847495e-01f, 0.000000000e+00f, -4.460276122e-02f, }},
+}};
+constexpr std::array<AmbiChannelFloatArray,12> SecondOrderEncoder{{
+ CalcAmbiCoeffs( 0.000000000e+00f, -5.257311121e-01f, 8.506508084e-01f),
+ CalcAmbiCoeffs(-8.506508084e-01f, 0.000000000e+00f, 5.257311121e-01f),
+ CalcAmbiCoeffs(-5.257311121e-01f, 8.506508084e-01f, 0.000000000e+00f),
+ CalcAmbiCoeffs( 0.000000000e+00f, 5.257311121e-01f, 8.506508084e-01f),
+ CalcAmbiCoeffs(-8.506508084e-01f, 0.000000000e+00f, -5.257311121e-01f),
+ CalcAmbiCoeffs( 5.257311121e-01f, -8.506508084e-01f, 0.000000000e+00f),
+ CalcAmbiCoeffs( 0.000000000e+00f, -5.257311121e-01f, -8.506508084e-01f),
+ CalcAmbiCoeffs( 8.506508084e-01f, 0.000000000e+00f, -5.257311121e-01f),
+ CalcAmbiCoeffs( 5.257311121e-01f, 8.506508084e-01f, 0.000000000e+00f),
+ CalcAmbiCoeffs( 0.000000000e+00f, 5.257311121e-01f, -8.506508084e-01f),
+ CalcAmbiCoeffs( 8.506508084e-01f, 0.000000000e+00f, 5.257311121e-01f),
+ CalcAmbiCoeffs(-5.257311121e-01f, -8.506508084e-01f, 0.000000000e+00f),
+}};
+static_assert(SecondOrderDecoder.size() == SecondOrderEncoder.size(), "Second-order mismatch");
+
+/* This calculates a 2D second-order "upsampler" matrix. Same as the second-
+ * order matrix, just using a more optimized speaker array for horizontal-only
+ * content.
+ */
+constexpr std::array<std::array<float,9>,6> SecondOrder2DDecoder{{
+ {{ 1.666666667e-01f, -9.622504486e-02f, 0.0f, 1.666666667e-01f, -1.490711985e-01f, 0.0f, 0.0f, 0.0f, 8.606629658e-02f, }},
+ {{ 1.666666667e-01f, -1.924500897e-01f, 0.0f, 0.000000000e+00f, 0.000000000e+00f, 0.0f, 0.0f, 0.0f, -1.721325932e-01f, }},
+ {{ 1.666666667e-01f, -9.622504486e-02f, 0.0f, -1.666666667e-01f, 1.490711985e-01f, 0.0f, 0.0f, 0.0f, 8.606629658e-02f, }},
+ {{ 1.666666667e-01f, 9.622504486e-02f, 0.0f, -1.666666667e-01f, -1.490711985e-01f, 0.0f, 0.0f, 0.0f, 8.606629658e-02f, }},
+ {{ 1.666666667e-01f, 1.924500897e-01f, 0.0f, 0.000000000e+00f, 0.000000000e+00f, 0.0f, 0.0f, 0.0f, -1.721325932e-01f, }},
+ {{ 1.666666667e-01f, 9.622504486e-02f, 0.0f, 1.666666667e-01f, 1.490711985e-01f, 0.0f, 0.0f, 0.0f, 8.606629658e-02f, }},
+}};
+constexpr std::array<AmbiChannelFloatArray,6> SecondOrder2DEncoder{{
+ CalcAmbiCoeffs(-0.50000000000f, 0.0f, 0.86602540379f),
+ CalcAmbiCoeffs(-1.00000000000f, 0.0f, 0.00000000000f),
+ CalcAmbiCoeffs(-0.50000000000f, 0.0f, -0.86602540379f),
+ CalcAmbiCoeffs( 0.50000000000f, 0.0f, -0.86602540379f),
+ CalcAmbiCoeffs( 1.00000000000f, 0.0f, 0.00000000000f),
+ CalcAmbiCoeffs( 0.50000000000f, 0.0f, 0.86602540379f),
+}};
+static_assert(SecondOrder2DDecoder.size() == SecondOrder2DEncoder.size(),
+ "Second-order 2D mismatch");
+
+
+/* This calculates a third-order "upsampler" matrix. Same as the first-order
+ * matrix, just using a more dense speaker array suitable for third-order
+ * content.
+ */
+constexpr std::array<std::array<float,16>,20> ThirdOrderDecoder{{
+ {{ 5.000000000e-02f, 3.090169944e-02f, 8.090169944e-02f, 0.000000000e+00f, 0.000000000e+00f, 6.454972244e-02f, 9.045084972e-02f, 0.000000000e+00f, -1.232790000e-02f, -1.256118221e-01f, 0.000000000e+00f, 1.126112056e-01f, 7.944389175e-02f, 0.000000000e+00f, 2.421151497e-02f, 0.000000000e+00f, }},
+ {{ 5.000000000e-02f, -3.090169944e-02f, 8.090169944e-02f, 0.000000000e+00f, 0.000000000e+00f, -6.454972244e-02f, 9.045084972e-02f, 0.000000000e+00f, -1.232790000e-02f, 1.256118221e-01f, 0.000000000e+00f, -1.126112056e-01f, 7.944389175e-02f, 0.000000000e+00f, 2.421151497e-02f, 0.000000000e+00f, }},
+ {{ 5.000000000e-02f, 3.090169944e-02f, -8.090169944e-02f, 0.000000000e+00f, 0.000000000e+00f, -6.454972244e-02f, 9.045084972e-02f, 0.000000000e+00f, -1.232790000e-02f, -1.256118221e-01f, 0.000000000e+00f, 1.126112056e-01f, -7.944389175e-02f, 0.000000000e+00f, -2.421151497e-02f, 0.000000000e+00f, }},
+ {{ 5.000000000e-02f, -3.090169944e-02f, -8.090169944e-02f, 0.000000000e+00f, 0.000000000e+00f, 6.454972244e-02f, 9.045084972e-02f, 0.000000000e+00f, -1.232790000e-02f, 1.256118221e-01f, 0.000000000e+00f, -1.126112056e-01f, -7.944389175e-02f, 0.000000000e+00f, -2.421151497e-02f, 0.000000000e+00f, }},
+ {{ 5.000000000e-02f, 8.090169944e-02f, 0.000000000e+00f, 3.090169944e-02f, 6.454972244e-02f, 0.000000000e+00f, -5.590169944e-02f, 0.000000000e+00f, -7.216878365e-02f, -7.763237543e-02f, 0.000000000e+00f, -2.950836627e-02f, 0.000000000e+00f, -1.497759251e-01f, 0.000000000e+00f, -7.763237543e-02f, }},
+ {{ 5.000000000e-02f, 8.090169944e-02f, 0.000000000e+00f, -3.090169944e-02f, -6.454972244e-02f, 0.000000000e+00f, -5.590169944e-02f, 0.000000000e+00f, -7.216878365e-02f, -7.763237543e-02f, 0.000000000e+00f, -2.950836627e-02f, 0.000000000e+00f, 1.497759251e-01f, 0.000000000e+00f, 7.763237543e-02f, }},
+ {{ 5.000000000e-02f, -8.090169944e-02f, 0.000000000e+00f, 3.090169944e-02f, -6.454972244e-02f, 0.000000000e+00f, -5.590169944e-02f, 0.000000000e+00f, -7.216878365e-02f, 7.763237543e-02f, 0.000000000e+00f, 2.950836627e-02f, 0.000000000e+00f, -1.497759251e-01f, 0.000000000e+00f, -7.763237543e-02f, }},
+ {{ 5.000000000e-02f, -8.090169944e-02f, 0.000000000e+00f, -3.090169944e-02f, 6.454972244e-02f, 0.000000000e+00f, -5.590169944e-02f, 0.000000000e+00f, -7.216878365e-02f, 7.763237543e-02f, 0.000000000e+00f, 2.950836627e-02f, 0.000000000e+00f, 1.497759251e-01f, 0.000000000e+00f, 7.763237543e-02f, }},
+ {{ 5.000000000e-02f, 0.000000000e+00f, 3.090169944e-02f, 8.090169944e-02f, 0.000000000e+00f, 0.000000000e+00f, -3.454915028e-02f, 6.454972244e-02f, 8.449668365e-02f, 0.000000000e+00f, 0.000000000e+00f, 0.000000000e+00f, 3.034486645e-02f, -6.779013272e-02f, 1.659481923e-01f, 4.797944664e-02f, }},
+ {{ 5.000000000e-02f, 0.000000000e+00f, 3.090169944e-02f, -8.090169944e-02f, 0.000000000e+00f, 0.000000000e+00f, -3.454915028e-02f, -6.454972244e-02f, 8.449668365e-02f, 0.000000000e+00f, 0.000000000e+00f, 0.000000000e+00f, 3.034486645e-02f, 6.779013272e-02f, 1.659481923e-01f, -4.797944664e-02f, }},
+ {{ 5.000000000e-02f, 0.000000000e+00f, -3.090169944e-02f, 8.090169944e-02f, 0.000000000e+00f, 0.000000000e+00f, -3.454915028e-02f, -6.454972244e-02f, 8.449668365e-02f, 0.000000000e+00f, 0.000000000e+00f, 0.000000000e+00f, -3.034486645e-02f, -6.779013272e-02f, -1.659481923e-01f, 4.797944664e-02f, }},
+ {{ 5.000000000e-02f, 0.000000000e+00f, -3.090169944e-02f, -8.090169944e-02f, 0.000000000e+00f, 0.000000000e+00f, -3.454915028e-02f, 6.454972244e-02f, 8.449668365e-02f, 0.000000000e+00f, 0.000000000e+00f, 0.000000000e+00f, -3.034486645e-02f, 6.779013272e-02f, -1.659481923e-01f, -4.797944664e-02f, }},
+ {{ 5.000000000e-02f, 5.000000000e-02f, 5.000000000e-02f, 5.000000000e-02f, 6.454972244e-02f, 6.454972244e-02f, 0.000000000e+00f, 6.454972244e-02f, 0.000000000e+00f, 1.016220987e-01f, 6.338656910e-02f, -1.092600649e-02f, -7.364853795e-02f, 1.011266756e-01f, -7.086833869e-02f, -1.482646439e-02f, }},
+ {{ 5.000000000e-02f, 5.000000000e-02f, 5.000000000e-02f, -5.000000000e-02f, -6.454972244e-02f, 6.454972244e-02f, 0.000000000e+00f, -6.454972244e-02f, 0.000000000e+00f, 1.016220987e-01f, -6.338656910e-02f, -1.092600649e-02f, -7.364853795e-02f, -1.011266756e-01f, -7.086833869e-02f, 1.482646439e-02f, }},
+ {{ 5.000000000e-02f, -5.000000000e-02f, 5.000000000e-02f, 5.000000000e-02f, -6.454972244e-02f, -6.454972244e-02f, 0.000000000e+00f, 6.454972244e-02f, 0.000000000e+00f, -1.016220987e-01f, -6.338656910e-02f, 1.092600649e-02f, -7.364853795e-02f, 1.011266756e-01f, -7.086833869e-02f, -1.482646439e-02f, }},
+ {{ 5.000000000e-02f, -5.000000000e-02f, 5.000000000e-02f, -5.000000000e-02f, 6.454972244e-02f, -6.454972244e-02f, 0.000000000e+00f, -6.454972244e-02f, 0.000000000e+00f, -1.016220987e-01f, 6.338656910e-02f, 1.092600649e-02f, -7.364853795e-02f, -1.011266756e-01f, -7.086833869e-02f, 1.482646439e-02f, }},
+ {{ 5.000000000e-02f, 5.000000000e-02f, -5.000000000e-02f, 5.000000000e-02f, 6.454972244e-02f, -6.454972244e-02f, 0.000000000e+00f, -6.454972244e-02f, 0.000000000e+00f, 1.016220987e-01f, -6.338656910e-02f, -1.092600649e-02f, 7.364853795e-02f, 1.011266756e-01f, 7.086833869e-02f, -1.482646439e-02f, }},
+ {{ 5.000000000e-02f, 5.000000000e-02f, -5.000000000e-02f, -5.000000000e-02f, -6.454972244e-02f, -6.454972244e-02f, 0.000000000e+00f, 6.454972244e-02f, 0.000000000e+00f, 1.016220987e-01f, 6.338656910e-02f, -1.092600649e-02f, 7.364853795e-02f, -1.011266756e-01f, 7.086833869e-02f, 1.482646439e-02f, }},
+ {{ 5.000000000e-02f, -5.000000000e-02f, -5.000000000e-02f, 5.000000000e-02f, -6.454972244e-02f, 6.454972244e-02f, 0.000000000e+00f, -6.454972244e-02f, 0.000000000e+00f, -1.016220987e-01f, 6.338656910e-02f, 1.092600649e-02f, 7.364853795e-02f, 1.011266756e-01f, 7.086833869e-02f, -1.482646439e-02f, }},
+ {{ 5.000000000e-02f, -5.000000000e-02f, -5.000000000e-02f, -5.000000000e-02f, 6.454972244e-02f, 6.454972244e-02f, 0.000000000e+00f, 6.454972244e-02f, 0.000000000e+00f, -1.016220987e-01f, -6.338656910e-02f, 1.092600649e-02f, 7.364853795e-02f, -1.011266756e-01f, 7.086833869e-02f, 1.482646439e-02f, }},
+}};
+constexpr std::array<AmbiChannelFloatArray,20> ThirdOrderEncoder{{
+ CalcAmbiCoeffs( 0.35682208976f, 0.93417235897f, 0.00000000000f),
+ CalcAmbiCoeffs(-0.35682208976f, 0.93417235897f, 0.00000000000f),
+ CalcAmbiCoeffs( 0.35682208976f, -0.93417235897f, 0.00000000000f),
+ CalcAmbiCoeffs(-0.35682208976f, -0.93417235897f, 0.00000000000f),
+ CalcAmbiCoeffs( 0.93417235897f, 0.00000000000f, 0.35682208976f),
+ CalcAmbiCoeffs( 0.93417235897f, 0.00000000000f, -0.35682208976f),
+ CalcAmbiCoeffs(-0.93417235897f, 0.00000000000f, 0.35682208976f),
+ CalcAmbiCoeffs(-0.93417235897f, 0.00000000000f, -0.35682208976f),
+ CalcAmbiCoeffs( 0.00000000000f, 0.35682208976f, 0.93417235897f),
+ CalcAmbiCoeffs( 0.00000000000f, 0.35682208976f, -0.93417235897f),
+ CalcAmbiCoeffs( 0.00000000000f, -0.35682208976f, 0.93417235897f),
+ CalcAmbiCoeffs( 0.00000000000f, -0.35682208976f, -0.93417235897f),
+ CalcAmbiCoeffs( inv_sqrt3f, inv_sqrt3f, inv_sqrt3f),
+ CalcAmbiCoeffs( inv_sqrt3f, inv_sqrt3f, -inv_sqrt3f),
+ CalcAmbiCoeffs( -inv_sqrt3f, inv_sqrt3f, inv_sqrt3f),
+ CalcAmbiCoeffs( -inv_sqrt3f, inv_sqrt3f, -inv_sqrt3f),
+ CalcAmbiCoeffs( inv_sqrt3f, -inv_sqrt3f, inv_sqrt3f),
+ CalcAmbiCoeffs( inv_sqrt3f, -inv_sqrt3f, -inv_sqrt3f),
+ CalcAmbiCoeffs( -inv_sqrt3f, -inv_sqrt3f, inv_sqrt3f),
+ CalcAmbiCoeffs( -inv_sqrt3f, -inv_sqrt3f, -inv_sqrt3f),
+}};
+static_assert(ThirdOrderDecoder.size() == ThirdOrderEncoder.size(), "Third-order mismatch");
+
+/* This calculates a 2D third-order "upsampler" matrix. Same as the third-order
+ * matrix, just using a more optimized speaker array for horizontal-only
+ * content.
+ */
+constexpr std::array<std::array<float,16>,8> ThirdOrder2DDecoder{{
+ {{ 1.250000000e-01f, -5.523559567e-02f, 0.0f, 1.333505242e-01f, -9.128709292e-02f, 0.0f, 0.0f, 0.0f, 9.128709292e-02f, -1.104247249e-01f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 4.573941867e-02f, }},
+ {{ 1.250000000e-01f, -1.333505242e-01f, 0.0f, 5.523559567e-02f, -9.128709292e-02f, 0.0f, 0.0f, 0.0f, -9.128709292e-02f, 4.573941867e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -1.104247249e-01f, }},
+ {{ 1.250000000e-01f, -1.333505242e-01f, 0.0f, -5.523559567e-02f, 9.128709292e-02f, 0.0f, 0.0f, 0.0f, -9.128709292e-02f, 4.573941867e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.104247249e-01f, }},
+ {{ 1.250000000e-01f, -5.523559567e-02f, 0.0f, -1.333505242e-01f, 9.128709292e-02f, 0.0f, 0.0f, 0.0f, 9.128709292e-02f, -1.104247249e-01f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -4.573941867e-02f, }},
+ {{ 1.250000000e-01f, 5.523559567e-02f, 0.0f, -1.333505242e-01f, -9.128709292e-02f, 0.0f, 0.0f, 0.0f, 9.128709292e-02f, 1.104247249e-01f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -4.573941867e-02f, }},
+ {{ 1.250000000e-01f, 1.333505242e-01f, 0.0f, -5.523559567e-02f, -9.128709292e-02f, 0.0f, 0.0f, 0.0f, -9.128709292e-02f, -4.573941867e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.104247249e-01f, }},
+ {{ 1.250000000e-01f, 1.333505242e-01f, 0.0f, 5.523559567e-02f, 9.128709292e-02f, 0.0f, 0.0f, 0.0f, -9.128709292e-02f, -4.573941867e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -1.104247249e-01f, }},
+ {{ 1.250000000e-01f, 5.523559567e-02f, 0.0f, 1.333505242e-01f, 9.128709292e-02f, 0.0f, 0.0f, 0.0f, 9.128709292e-02f, 1.104247249e-01f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 4.573941867e-02f, }},
+}};
+constexpr std::array<AmbiChannelFloatArray,8> ThirdOrder2DEncoder{{
+ CalcAmbiCoeffs(-0.38268343237f, 0.0f, 0.92387953251f),
+ CalcAmbiCoeffs(-0.92387953251f, 0.0f, 0.38268343237f),
+ CalcAmbiCoeffs(-0.92387953251f, 0.0f, -0.38268343237f),
+ CalcAmbiCoeffs(-0.38268343237f, 0.0f, -0.92387953251f),
+ CalcAmbiCoeffs( 0.38268343237f, 0.0f, -0.92387953251f),
+ CalcAmbiCoeffs( 0.92387953251f, 0.0f, -0.38268343237f),
+ CalcAmbiCoeffs( 0.92387953251f, 0.0f, 0.38268343237f),
+ CalcAmbiCoeffs( 0.38268343237f, 0.0f, 0.92387953251f),
+}};
+static_assert(ThirdOrder2DDecoder.size() == ThirdOrder2DEncoder.size(), "Third-order 2D mismatch");
+
+
+/* This calculates a 2D fourth-order "upsampler" matrix. There is no 3D fourth-
+ * order upsampler since fourth-order is the max order we'll be supporting for
+ * the foreseeable future. This is only necessary for mixing horizontal-only
+ * fourth-order content to 3D.
+ */
+constexpr std::array<std::array<float,25>,10> FourthOrder2DDecoder{{
+ {{ 1.000000000e-01f, 3.568220898e-02f, 0.0f, 1.098185471e-01f, 6.070619982e-02f, 0.0f, 0.0f, 0.0f, 8.355491589e-02f, 7.735682057e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 5.620301997e-02f, 8.573754253e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 2.785781628e-02f, }},
+ {{ 1.000000000e-01f, 9.341723590e-02f, 0.0f, 6.787159473e-02f, 9.822469464e-02f, 0.0f, 0.0f, 0.0f, -3.191513794e-02f, 2.954767620e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -9.093839659e-02f, -5.298871540e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -7.293270986e-02f, }},
+ {{ 1.000000000e-01f, 1.154700538e-01f, 0.0f, 0.000000000e+00f, 0.000000000e+00f, 0.0f, 0.0f, 0.0f, -1.032795559e-01f, -9.561828875e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.000000000e+00f, 0.000000000e+00f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 9.014978717e-02f, }},
+ {{ 1.000000000e-01f, 9.341723590e-02f, 0.0f, -6.787159473e-02f, -9.822469464e-02f, 0.0f, 0.0f, 0.0f, -3.191513794e-02f, 2.954767620e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 9.093839659e-02f, 5.298871540e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -7.293270986e-02f, }},
+ {{ 1.000000000e-01f, 3.568220898e-02f, 0.0f, -1.098185471e-01f, -6.070619982e-02f, 0.0f, 0.0f, 0.0f, 8.355491589e-02f, 7.735682057e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -5.620301997e-02f, -8.573754253e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 2.785781628e-02f, }},
+ {{ 1.000000000e-01f, -3.568220898e-02f, 0.0f, -1.098185471e-01f, 6.070619982e-02f, 0.0f, 0.0f, 0.0f, 8.355491589e-02f, -7.735682057e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -5.620301997e-02f, 8.573754253e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 2.785781628e-02f, }},
+ {{ 1.000000000e-01f, -9.341723590e-02f, 0.0f, -6.787159473e-02f, 9.822469464e-02f, 0.0f, 0.0f, 0.0f, -3.191513794e-02f, -2.954767620e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 9.093839659e-02f, -5.298871540e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -7.293270986e-02f, }},
+ {{ 1.000000000e-01f, -1.154700538e-01f, 0.0f, 0.000000000e+00f, 0.000000000e+00f, 0.0f, 0.0f, 0.0f, -1.032795559e-01f, 9.561828875e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.000000000e+00f, 0.000000000e+00f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 9.014978717e-02f, }},
+ {{ 1.000000000e-01f, -9.341723590e-02f, 0.0f, 6.787159473e-02f, -9.822469464e-02f, 0.0f, 0.0f, 0.0f, -3.191513794e-02f, -2.954767620e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -9.093839659e-02f, 5.298871540e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -7.293270986e-02f, }},
+ {{ 1.000000000e-01f, -3.568220898e-02f, 0.0f, 1.098185471e-01f, -6.070619982e-02f, 0.0f, 0.0f, 0.0f, 8.355491589e-02f, -7.735682057e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 5.620301997e-02f, -8.573754253e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 2.785781628e-02f, }},
+}};
+constexpr std::array<AmbiChannelFloatArray,10> FourthOrder2DEncoder{{
+ CalcAmbiCoeffs( 3.090169944e-01f, 0.000000000e+00f, 9.510565163e-01f),
+ CalcAmbiCoeffs( 8.090169944e-01f, 0.000000000e+00f, 5.877852523e-01f),
+ CalcAmbiCoeffs( 1.000000000e+00f, 0.000000000e+00f, 0.000000000e+00f),
+ CalcAmbiCoeffs( 8.090169944e-01f, 0.000000000e+00f, -5.877852523e-01f),
+ CalcAmbiCoeffs( 3.090169944e-01f, 0.000000000e+00f, -9.510565163e-01f),
+ CalcAmbiCoeffs(-3.090169944e-01f, 0.000000000e+00f, -9.510565163e-01f),
+ CalcAmbiCoeffs(-8.090169944e-01f, 0.000000000e+00f, -5.877852523e-01f),
+ CalcAmbiCoeffs(-1.000000000e+00f, 0.000000000e+00f, 0.000000000e+00f),
+ CalcAmbiCoeffs(-8.090169944e-01f, 0.000000000e+00f, 5.877852523e-01f),
+ CalcAmbiCoeffs(-3.090169944e-01f, 0.000000000e+00f, 9.510565163e-01f),
+}};
+static_assert(FourthOrder2DDecoder.size() == FourthOrder2DEncoder.size(), "Fourth-order 2D mismatch");
+
+
+template<size_t N, size_t M>
+auto CalcAmbiUpsampler(const std::array<std::array<float,N>,M> &decoder,
+ const std::array<AmbiChannelFloatArray,M> &encoder)
+{
+ std::array<AmbiChannelFloatArray,N> res{};
+
+ for(size_t i{0};i < decoder[0].size();++i)
+ {
+ for(size_t j{0};j < encoder[0].size();++j)
+ {
+ double sum{0.0};
+ for(size_t k{0};k < decoder.size();++k)
+ sum += double{decoder[k][i]} * encoder[k][j];
+ res[i][j] = static_cast<float>(sum);
+ }
+ }
+
+ return res;
+}
+
+} // namespace
+
+const std::array<AmbiChannelFloatArray,4> AmbiScale::FirstOrderUp{CalcAmbiUpsampler(FirstOrderDecoder, FirstOrderEncoder)};
+const std::array<AmbiChannelFloatArray,4> AmbiScale::FirstOrder2DUp{CalcAmbiUpsampler(FirstOrder2DDecoder, FirstOrder2DEncoder)};
+const std::array<AmbiChannelFloatArray,9> AmbiScale::SecondOrderUp{CalcAmbiUpsampler(SecondOrderDecoder, SecondOrderEncoder)};
+const std::array<AmbiChannelFloatArray,9> AmbiScale::SecondOrder2DUp{CalcAmbiUpsampler(SecondOrder2DDecoder, SecondOrder2DEncoder)};
+const std::array<AmbiChannelFloatArray,16> AmbiScale::ThirdOrderUp{CalcAmbiUpsampler(ThirdOrderDecoder, ThirdOrderEncoder)};
+const std::array<AmbiChannelFloatArray,16> AmbiScale::ThirdOrder2DUp{CalcAmbiUpsampler(ThirdOrder2DDecoder, ThirdOrder2DEncoder)};
+const std::array<AmbiChannelFloatArray,25> AmbiScale::FourthOrder2DUp{CalcAmbiUpsampler(FourthOrder2DDecoder, FourthOrder2DEncoder)};
+
+
+std::array<float,MaxAmbiOrder+1> AmbiScale::GetHFOrderScales(const uint src_order,
+ const uint dev_order, const bool horizontalOnly) noexcept
+{
+ std::array<float,MaxAmbiOrder+1> res{};
+
+ if(!horizontalOnly)
+ {
+ for(size_t i{0};i < MaxAmbiOrder+1;++i)
+ res[i] = HFScales[src_order][i] / HFScales[dev_order][i];
+ }
+ else
+ {
+ for(size_t i{0};i < MaxAmbiOrder+1;++i)
+ res[i] = HFScales2D[src_order][i] / HFScales2D[dev_order][i];
+ }
+
+ return res;
+}
diff --git a/core/ambidefs.h b/core/ambidefs.h
new file mode 100644
index 00000000..b7d2bcd1
--- /dev/null
+++ b/core/ambidefs.h
@@ -0,0 +1,250 @@
+#ifndef CORE_AMBIDEFS_H
+#define CORE_AMBIDEFS_H
+
+#include <array>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "alnumbers.h"
+
+
+using uint = unsigned int;
+
+/* The maximum number of Ambisonics channels. For a given order (o), the size
+ * needed will be (o+1)**2, thus zero-order has 1, first-order has 4, second-
+ * order has 9, third-order has 16, and fourth-order has 25.
+ */
+constexpr uint8_t MaxAmbiOrder{3};
+constexpr inline size_t AmbiChannelsFromOrder(size_t order) noexcept
+{ return (order+1) * (order+1); }
+constexpr size_t MaxAmbiChannels{AmbiChannelsFromOrder(MaxAmbiOrder)};
+
+/* A bitmask of ambisonic channels for 0 to 4th order. This only specifies up
+ * to 4th order, which is the highest order a 32-bit mask value can specify (a
+ * 64-bit mask could handle up to 7th order).
+ */
+constexpr uint Ambi0OrderMask{0x00000001};
+constexpr uint Ambi1OrderMask{0x0000000f};
+constexpr uint Ambi2OrderMask{0x000001ff};
+constexpr uint Ambi3OrderMask{0x0000ffff};
+constexpr uint Ambi4OrderMask{0x01ffffff};
+
+/* A bitmask of ambisonic channels with height information. If none of these
+ * channels are used/needed, there's no height (e.g. with most surround sound
+ * speaker setups). This is ACN ordering, with bit 0 being ACN 0, etc.
+ */
+constexpr uint AmbiPeriphonicMask{0xfe7ce4};
+
+/* The maximum number of ambisonic channels for 2D (non-periphonic)
+ * representation. This is 2 per each order above zero-order, plus 1 for zero-
+ * order. Or simply, o*2 + 1.
+ */
+constexpr inline size_t Ambi2DChannelsFromOrder(size_t order) noexcept
+{ return order*2 + 1; }
+constexpr size_t MaxAmbi2DChannels{Ambi2DChannelsFromOrder(MaxAmbiOrder)};
+
+
+/* NOTE: These are scale factors as applied to Ambisonics content. Decoder
+ * coefficients should be divided by these values to get proper scalings.
+ */
+struct AmbiScale {
+ static auto& FromN3D() noexcept
+ {
+ static constexpr const std::array<float,MaxAmbiChannels> ret{{
+ 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+ 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f
+ }};
+ return ret;
+ }
+ static auto& FromSN3D() noexcept
+ {
+ static constexpr const std::array<float,MaxAmbiChannels> ret{{
+ 1.000000000f, /* ACN 0, sqrt(1) */
+ 1.732050808f, /* ACN 1, sqrt(3) */
+ 1.732050808f, /* ACN 2, sqrt(3) */
+ 1.732050808f, /* ACN 3, sqrt(3) */
+ 2.236067978f, /* ACN 4, sqrt(5) */
+ 2.236067978f, /* ACN 5, sqrt(5) */
+ 2.236067978f, /* ACN 6, sqrt(5) */
+ 2.236067978f, /* ACN 7, sqrt(5) */
+ 2.236067978f, /* ACN 8, sqrt(5) */
+ 2.645751311f, /* ACN 9, sqrt(7) */
+ 2.645751311f, /* ACN 10, sqrt(7) */
+ 2.645751311f, /* ACN 11, sqrt(7) */
+ 2.645751311f, /* ACN 12, sqrt(7) */
+ 2.645751311f, /* ACN 13, sqrt(7) */
+ 2.645751311f, /* ACN 14, sqrt(7) */
+ 2.645751311f, /* ACN 15, sqrt(7) */
+ }};
+ return ret;
+ }
+ static auto& FromFuMa() noexcept
+ {
+ static constexpr const std::array<float,MaxAmbiChannels> ret{{
+ 1.414213562f, /* ACN 0 (W), sqrt(2) */
+ 1.732050808f, /* ACN 1 (Y), sqrt(3) */
+ 1.732050808f, /* ACN 2 (Z), sqrt(3) */
+ 1.732050808f, /* ACN 3 (X), sqrt(3) */
+ 1.936491673f, /* ACN 4 (V), sqrt(15)/2 */
+ 1.936491673f, /* ACN 5 (T), sqrt(15)/2 */
+ 2.236067978f, /* ACN 6 (R), sqrt(5) */
+ 1.936491673f, /* ACN 7 (S), sqrt(15)/2 */
+ 1.936491673f, /* ACN 8 (U), sqrt(15)/2 */
+ 2.091650066f, /* ACN 9 (Q), sqrt(35/8) */
+ 1.972026594f, /* ACN 10 (O), sqrt(35)/3 */
+ 2.231093404f, /* ACN 11 (M), sqrt(224/45) */
+ 2.645751311f, /* ACN 12 (K), sqrt(7) */
+ 2.231093404f, /* ACN 13 (L), sqrt(224/45) */
+ 1.972026594f, /* ACN 14 (N), sqrt(35)/3 */
+ 2.091650066f, /* ACN 15 (P), sqrt(35/8) */
+ }};
+ return ret;
+ }
+ static auto& FromUHJ() noexcept
+ {
+ static constexpr const std::array<float,MaxAmbiChannels> ret{{
+ 1.000000000f, /* ACN 0 (W), sqrt(1) */
+ 1.224744871f, /* ACN 1 (Y), sqrt(3/2) */
+ 1.224744871f, /* ACN 2 (Z), sqrt(3/2) */
+ 1.224744871f, /* ACN 3 (X), sqrt(3/2) */
+ /* Higher orders not relevant for UHJ. */
+ 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+ }};
+ return ret;
+ }
+
+ /* Retrieves per-order HF scaling factors for "upsampling" ambisonic data. */
+ static std::array<float,MaxAmbiOrder+1> GetHFOrderScales(const uint src_order,
+ const uint dev_order, const bool horizontalOnly) noexcept;
+
+ static const std::array<std::array<float,MaxAmbiChannels>,4> FirstOrderUp;
+ static const std::array<std::array<float,MaxAmbiChannels>,4> FirstOrder2DUp;
+ static const std::array<std::array<float,MaxAmbiChannels>,9> SecondOrderUp;
+ static const std::array<std::array<float,MaxAmbiChannels>,9> SecondOrder2DUp;
+ static const std::array<std::array<float,MaxAmbiChannels>,16> ThirdOrderUp;
+ static const std::array<std::array<float,MaxAmbiChannels>,16> ThirdOrder2DUp;
+ static const std::array<std::array<float,MaxAmbiChannels>,25> FourthOrder2DUp;
+};
+
+struct AmbiIndex {
+ static auto& FromFuMa() noexcept
+ {
+ static constexpr const std::array<uint8_t,MaxAmbiChannels> ret{{
+ 0, /* W */
+ 3, /* X */
+ 1, /* Y */
+ 2, /* Z */
+ 6, /* R */
+ 7, /* S */
+ 5, /* T */
+ 8, /* U */
+ 4, /* V */
+ 12, /* K */
+ 13, /* L */
+ 11, /* M */
+ 14, /* N */
+ 10, /* O */
+ 15, /* P */
+ 9, /* Q */
+ }};
+ return ret;
+ }
+ static auto& FromFuMa2D() noexcept
+ {
+ static constexpr const std::array<uint8_t,MaxAmbi2DChannels> ret{{
+ 0, /* W */
+ 3, /* X */
+ 1, /* Y */
+ 8, /* U */
+ 4, /* V */
+ 15, /* P */
+ 9, /* Q */
+ }};
+ return ret;
+ }
+
+ static auto& FromACN() noexcept
+ {
+ static constexpr const std::array<uint8_t,MaxAmbiChannels> ret{{
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15
+ }};
+ return ret;
+ }
+ static auto& FromACN2D() noexcept
+ {
+ static constexpr const std::array<uint8_t,MaxAmbi2DChannels> ret{{
+ 0, 1,3, 4,8, 9,15
+ }};
+ return ret;
+ }
+
+ static auto& OrderFromChannel() noexcept
+ {
+ static constexpr const std::array<uint8_t,MaxAmbiChannels> ret{{
+ 0, 1,1,1, 2,2,2,2,2, 3,3,3,3,3,3,3,
+ }};
+ return ret;
+ }
+ static auto& OrderFrom2DChannel() noexcept
+ {
+ static constexpr const std::array<uint8_t,MaxAmbi2DChannels> ret{{
+ 0, 1,1, 2,2, 3,3,
+ }};
+ return ret;
+ }
+};
+
+
+/**
+ * Calculates ambisonic encoder coefficients using the X, Y, and Z direction
+ * components, which must represent a normalized (unit length) vector.
+ *
+ * NOTE: The components use ambisonic coordinates. As a result:
+ *
+ * Ambisonic Y = OpenAL -X
+ * Ambisonic Z = OpenAL Y
+ * Ambisonic X = OpenAL -Z
+ *
+ * The components are ordered such that OpenAL's X, Y, and Z are the first,
+ * second, and third parameters respectively -- simply negate X and Z.
+ */
+constexpr auto CalcAmbiCoeffs(const float y, const float z, const float x)
+{
+ const float xx{x*x}, yy{y*y}, zz{z*z}, xy{x*y}, yz{y*z}, xz{x*z};
+
+ return std::array<float,MaxAmbiChannels>{{
+ /* Zeroth-order */
+ 1.0f, /* ACN 0 = 1 */
+ /* First-order */
+ al::numbers::sqrt3_v<float> * y, /* ACN 1 = sqrt(3) * Y */
+ al::numbers::sqrt3_v<float> * z, /* ACN 2 = sqrt(3) * Z */
+ al::numbers::sqrt3_v<float> * x, /* ACN 3 = sqrt(3) * X */
+ /* Second-order */
+ 3.872983346e+00f * xy, /* ACN 4 = sqrt(15) * X * Y */
+ 3.872983346e+00f * yz, /* ACN 5 = sqrt(15) * Y * Z */
+ 1.118033989e+00f * (3.0f*zz - 1.0f), /* ACN 6 = sqrt(5)/2 * (3*Z*Z - 1) */
+ 3.872983346e+00f * xz, /* ACN 7 = sqrt(15) * X * Z */
+ 1.936491673e+00f * (xx - yy), /* ACN 8 = sqrt(15)/2 * (X*X - Y*Y) */
+ /* Third-order */
+ 2.091650066e+00f * (y*(3.0f*xx - yy)), /* ACN 9 = sqrt(35/8) * Y * (3*X*X - Y*Y) */
+ 1.024695076e+01f * (z*xy), /* ACN 10 = sqrt(105) * Z * X * Y */
+ 1.620185175e+00f * (y*(5.0f*zz - 1.0f)), /* ACN 11 = sqrt(21/8) * Y * (5*Z*Z - 1) */
+ 1.322875656e+00f * (z*(5.0f*zz - 3.0f)), /* ACN 12 = sqrt(7)/2 * Z * (5*Z*Z - 3) */
+ 1.620185175e+00f * (x*(5.0f*zz - 1.0f)), /* ACN 13 = sqrt(21/8) * X * (5*Z*Z - 1) */
+ 5.123475383e+00f * (z*(xx - yy)), /* ACN 14 = sqrt(105)/2 * Z * (X*X - Y*Y) */
+ 2.091650066e+00f * (x*(xx - 3.0f*yy)), /* ACN 15 = sqrt(35/8) * X * (X*X - 3*Y*Y) */
+ /* Fourth-order */
+ /* ACN 16 = sqrt(35)*3/2 * X * Y * (X*X - Y*Y) */
+ /* ACN 17 = sqrt(35/2)*3/2 * (3*X*X - Y*Y) * Y * Z */
+ /* ACN 18 = sqrt(5)*3/2 * X * Y * (7*Z*Z - 1) */
+ /* ACN 19 = sqrt(5/2)*3/2 * Y * Z * (7*Z*Z - 3) */
+ /* ACN 20 = 3/8 * (35*Z*Z*Z*Z - 30*Z*Z + 3) */
+ /* ACN 21 = sqrt(5/2)*3/2 * X * Z * (7*Z*Z - 3) */
+ /* ACN 22 = sqrt(5)*3/4 * (X*X - Y*Y) * (7*Z*Z - 1) */
+ /* ACN 23 = sqrt(35/2)*3/2 * (X*X - 3*Y*Y) * X * Z */
+ /* ACN 24 = sqrt(35)*3/8 * (X*X*X*X - 6*X*X*Y*Y + Y*Y*Y*Y) */
+ }};
+}
+
+#endif /* CORE_AMBIDEFS_H */
diff --git a/core/async_event.h b/core/async_event.h
new file mode 100644
index 00000000..5a2f5f91
--- /dev/null
+++ b/core/async_event.h
@@ -0,0 +1,55 @@
+#ifndef CORE_EVENT_H
+#define CORE_EVENT_H
+
+#include "almalloc.h"
+
+struct EffectState;
+
+using uint = unsigned int;
+
+
+struct AsyncEvent {
+ enum : uint {
+ /* User event types. */
+ SourceStateChange,
+ BufferCompleted,
+ Disconnected,
+ UserEventCount,
+
+ /* Internal events, always processed. */
+ ReleaseEffectState = 128,
+
+ /* End event thread processing. */
+ KillThread,
+ };
+
+ enum class SrcState {
+ Reset,
+ Stop,
+ Play,
+ Pause
+ };
+
+ const uint EnumType;
+ union {
+ char dummy;
+ struct {
+ uint id;
+ SrcState state;
+ } srcstate;
+ struct {
+ uint id;
+ uint count;
+ } bufcomp;
+ struct {
+ char msg[244];
+ } disconnect;
+ EffectState *mEffectState;
+ } u{};
+
+ constexpr AsyncEvent(uint type) noexcept : EnumType{type} { }
+
+ DISABLE_ALLOC()
+};
+
+#endif
diff --git a/core/bformatdec.cpp b/core/bformatdec.cpp
new file mode 100644
index 00000000..129b9976
--- /dev/null
+++ b/core/bformatdec.cpp
@@ -0,0 +1,170 @@
+
+#include "config.h"
+
+#include "bformatdec.h"
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <utility>
+
+#include "almalloc.h"
+#include "alnumbers.h"
+#include "filters/splitter.h"
+#include "front_stablizer.h"
+#include "mixer.h"
+#include "opthelpers.h"
+
+
+BFormatDec::BFormatDec(const size_t inchans, const al::span<const ChannelDec> coeffs,
+ const al::span<const ChannelDec> coeffslf, const float xover_f0norm,
+ std::unique_ptr<FrontStablizer> stablizer)
+ : mStablizer{std::move(stablizer)}, mDualBand{!coeffslf.empty()}, mChannelDec{inchans}
+{
+ if(!mDualBand)
+ {
+ for(size_t j{0};j < mChannelDec.size();++j)
+ {
+ float *outcoeffs{mChannelDec[j].mGains.Single};
+ for(const ChannelDec &incoeffs : coeffs)
+ *(outcoeffs++) = incoeffs[j];
+ }
+ }
+ else
+ {
+ mChannelDec[0].mXOver.init(xover_f0norm);
+ for(size_t j{1};j < mChannelDec.size();++j)
+ mChannelDec[j].mXOver = mChannelDec[0].mXOver;
+
+ for(size_t j{0};j < mChannelDec.size();++j)
+ {
+ float *outcoeffs{mChannelDec[j].mGains.Dual[sHFBand]};
+ for(const ChannelDec &incoeffs : coeffs)
+ *(outcoeffs++) = incoeffs[j];
+
+ outcoeffs = mChannelDec[j].mGains.Dual[sLFBand];
+ for(const ChannelDec &incoeffs : coeffslf)
+ *(outcoeffs++) = incoeffs[j];
+ }
+ }
+}
+
+
+void BFormatDec::process(const al::span<FloatBufferLine> OutBuffer,
+ const FloatBufferLine *InSamples, const size_t SamplesToDo)
+{
+ ASSUME(SamplesToDo > 0);
+
+ if(mDualBand)
+ {
+ const al::span<float> hfSamples{mSamples[sHFBand].data(), SamplesToDo};
+ const al::span<float> lfSamples{mSamples[sLFBand].data(), SamplesToDo};
+ for(auto &chandec : mChannelDec)
+ {
+ chandec.mXOver.process({InSamples->data(), SamplesToDo}, hfSamples.data(),
+ lfSamples.data());
+ MixSamples(hfSamples, OutBuffer, chandec.mGains.Dual[sHFBand],
+ chandec.mGains.Dual[sHFBand], 0, 0);
+ MixSamples(lfSamples, OutBuffer, chandec.mGains.Dual[sLFBand],
+ chandec.mGains.Dual[sLFBand], 0, 0);
+ ++InSamples;
+ }
+ }
+ else
+ {
+ for(auto &chandec : mChannelDec)
+ {
+ MixSamples({InSamples->data(), SamplesToDo}, OutBuffer, chandec.mGains.Single,
+ chandec.mGains.Single, 0, 0);
+ ++InSamples;
+ }
+ }
+}
+
+void BFormatDec::processStablize(const al::span<FloatBufferLine> OutBuffer,
+ const FloatBufferLine *InSamples, const size_t lidx, const size_t ridx, const size_t cidx,
+ const size_t SamplesToDo)
+{
+ ASSUME(SamplesToDo > 0);
+
+ /* Move the existing direct L/R signal out so it doesn't get processed by
+ * the stablizer.
+ */
+ float *RESTRICT mid{al::assume_aligned<16>(mStablizer->MidDirect.data())};
+ float *RESTRICT side{al::assume_aligned<16>(mStablizer->Side.data())};
+ for(size_t i{0};i < SamplesToDo;++i)
+ {
+ mid[i] = OutBuffer[lidx][i] + OutBuffer[ridx][i];
+ side[i] = OutBuffer[lidx][i] - OutBuffer[ridx][i];
+ }
+ std::fill_n(OutBuffer[lidx].begin(), SamplesToDo, 0.0f);
+ std::fill_n(OutBuffer[ridx].begin(), SamplesToDo, 0.0f);
+
+ /* Decode the B-Format input to OutBuffer. */
+ process(OutBuffer, InSamples, SamplesToDo);
+
+ /* Include the decoded side signal with the direct side signal. */
+ for(size_t i{0};i < SamplesToDo;++i)
+ side[i] += OutBuffer[lidx][i] - OutBuffer[ridx][i];
+
+ /* Get the decoded mid signal and band-split it. */
+ std::transform(OutBuffer[lidx].cbegin(), OutBuffer[lidx].cbegin()+SamplesToDo,
+ OutBuffer[ridx].cbegin(), mStablizer->Temp.begin(),
+ [](const float l, const float r) noexcept { return l + r; });
+
+ mStablizer->MidFilter.process({mStablizer->Temp.data(), SamplesToDo}, mStablizer->MidHF.data(),
+ mStablizer->MidLF.data());
+
+ /* Apply an all-pass to all channels to match the band-splitter's phase
+ * shift. This is to keep the phase synchronized between the existing
+ * signal and the split mid signal.
+ */
+ const size_t NumChannels{OutBuffer.size()};
+ for(size_t i{0u};i < NumChannels;i++)
+ {
+ /* Skip the left and right channels, which are going to get overwritten,
+ * and substitute the direct mid signal and direct+decoded side signal.
+ */
+ if(i == lidx)
+ mStablizer->ChannelFilters[i].processAllPass({mid, SamplesToDo});
+ else if(i == ridx)
+ mStablizer->ChannelFilters[i].processAllPass({side, SamplesToDo});
+ else
+ mStablizer->ChannelFilters[i].processAllPass({OutBuffer[i].data(), SamplesToDo});
+ }
+
+ /* This pans the separate low- and high-frequency signals between being on
+ * the center channel and the left+right channels. The low-frequency signal
+ * is panned 1/3rd toward center and the high-frequency signal is panned
+ * 1/4th toward center. These values can be tweaked.
+ */
+ const float cos_lf{std::cos(1.0f/3.0f * (al::numbers::pi_v<float>*0.5f))};
+ const float cos_hf{std::cos(1.0f/4.0f * (al::numbers::pi_v<float>*0.5f))};
+ const float sin_lf{std::sin(1.0f/3.0f * (al::numbers::pi_v<float>*0.5f))};
+ const float sin_hf{std::sin(1.0f/4.0f * (al::numbers::pi_v<float>*0.5f))};
+ for(size_t i{0};i < SamplesToDo;i++)
+ {
+ /* Add the direct mid signal to the processed mid signal so it can be
+ * properly combined with the direct+decoded side signal.
+ */
+ const float m{mStablizer->MidLF[i]*cos_lf + mStablizer->MidHF[i]*cos_hf + mid[i]};
+ const float c{mStablizer->MidLF[i]*sin_lf + mStablizer->MidHF[i]*sin_hf};
+ const float s{side[i]};
+
+ /* The generated center channel signal adds to the existing signal,
+ * while the modified left and right channels replace.
+ */
+ OutBuffer[lidx][i] = (m + s) * 0.5f;
+ OutBuffer[ridx][i] = (m - s) * 0.5f;
+ OutBuffer[cidx][i] += c * 0.5f;
+ }
+}
+
+
+std::unique_ptr<BFormatDec> BFormatDec::Create(const size_t inchans,
+ const al::span<const ChannelDec> coeffs, const al::span<const ChannelDec> coeffslf,
+ const float xover_f0norm, std::unique_ptr<FrontStablizer> stablizer)
+{
+ return std::make_unique<BFormatDec>(inchans, coeffs, coeffslf, xover_f0norm,
+ std::move(stablizer));
+}
diff --git a/core/bformatdec.h b/core/bformatdec.h
new file mode 100644
index 00000000..7a27a5a4
--- /dev/null
+++ b/core/bformatdec.h
@@ -0,0 +1,71 @@
+#ifndef CORE_BFORMATDEC_H
+#define CORE_BFORMATDEC_H
+
+#include <array>
+#include <cstddef>
+#include <memory>
+
+#include "almalloc.h"
+#include "alspan.h"
+#include "ambidefs.h"
+#include "bufferline.h"
+#include "devformat.h"
+#include "filters/splitter.h"
+#include "vector.h"
+
+struct FrontStablizer;
+
+
+using ChannelDec = std::array<float,MaxAmbiChannels>;
+
+class BFormatDec {
+ static constexpr size_t sHFBand{0};
+ static constexpr size_t sLFBand{1};
+ static constexpr size_t sNumBands{2};
+
+ struct ChannelDecoder {
+ union MatrixU {
+ float Dual[sNumBands][MAX_OUTPUT_CHANNELS];
+ float Single[MAX_OUTPUT_CHANNELS];
+ } mGains{};
+
+ /* NOTE: BandSplitter filter is unused with single-band decoding. */
+ BandSplitter mXOver;
+ };
+
+ alignas(16) std::array<FloatBufferLine,2> mSamples;
+
+ const std::unique_ptr<FrontStablizer> mStablizer;
+ const bool mDualBand{false};
+
+ /* TODO: This should ideally be a FlexArray, since ChannelDecoder is rather
+ * small and only a few are needed (3, 4, 5, 7, typically). But that can
+ * only be used in a standard layout struct, and a std::unique_ptr member
+ * (mStablizer) causes GCC and Clang to warn it's not.
+ */
+ al::vector<ChannelDecoder> mChannelDec;
+
+public:
+ BFormatDec(const size_t inchans, const al::span<const ChannelDec> coeffs,
+ const al::span<const ChannelDec> coeffslf, const float xover_f0norm,
+ std::unique_ptr<FrontStablizer> stablizer);
+
+ bool hasStablizer() const noexcept { return mStablizer != nullptr; }
+
+ /* Decodes the ambisonic input to the given output channels. */
+ void process(const al::span<FloatBufferLine> OutBuffer, const FloatBufferLine *InSamples,
+ const size_t SamplesToDo);
+
+ /* Decodes the ambisonic input to the given output channels with stablization. */
+ void processStablize(const al::span<FloatBufferLine> OutBuffer,
+ const FloatBufferLine *InSamples, const size_t lidx, const size_t ridx, const size_t cidx,
+ const size_t SamplesToDo);
+
+ static std::unique_ptr<BFormatDec> Create(const size_t inchans,
+ const al::span<const ChannelDec> coeffs, const al::span<const ChannelDec> coeffslf,
+ const float xover_f0norm, std::unique_ptr<FrontStablizer> stablizer);
+
+ DEF_NEWDEL(BFormatDec)
+};
+
+#endif /* CORE_BFORMATDEC_H */
diff --git a/core/bs2b.cpp b/core/bs2b.cpp
new file mode 100644
index 00000000..303bf9bd
--- /dev/null
+++ b/core/bs2b.cpp
@@ -0,0 +1,183 @@
+/*-
+ * Copyright (c) 2005 Boris Mikhaylov
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "config.h"
+
+#include <algorithm>
+#include <cmath>
+#include <iterator>
+
+#include "alnumbers.h"
+#include "bs2b.h"
+
+
+/* Set up all data. */
+static void init(struct bs2b *bs2b)
+{
+ float Fc_lo, Fc_hi;
+ float G_lo, G_hi;
+ float x, g;
+
+ switch(bs2b->level)
+ {
+ case BS2B_LOW_CLEVEL: /* Low crossfeed level */
+ Fc_lo = 360.0f;
+ Fc_hi = 501.0f;
+ G_lo = 0.398107170553497f;
+ G_hi = 0.205671765275719f;
+ break;
+
+ case BS2B_MIDDLE_CLEVEL: /* Middle crossfeed level */
+ Fc_lo = 500.0f;
+ Fc_hi = 711.0f;
+ G_lo = 0.459726988530872f;
+ G_hi = 0.228208484414988f;
+ break;
+
+ case BS2B_HIGH_CLEVEL: /* High crossfeed level (virtual speakers are closer to itself) */
+ Fc_lo = 700.0f;
+ Fc_hi = 1021.0f;
+ G_lo = 0.530884444230988f;
+ G_hi = 0.250105790667544f;
+ break;
+
+ case BS2B_LOW_ECLEVEL: /* Low easy crossfeed level */
+ Fc_lo = 360.0f;
+ Fc_hi = 494.0f;
+ G_lo = 0.316227766016838f;
+ G_hi = 0.168236228897329f;
+ break;
+
+ case BS2B_MIDDLE_ECLEVEL: /* Middle easy crossfeed level */
+ Fc_lo = 500.0f;
+ Fc_hi = 689.0f;
+ G_lo = 0.354813389233575f;
+ G_hi = 0.187169483835901f;
+ break;
+
+ default: /* High easy crossfeed level */
+ bs2b->level = BS2B_HIGH_ECLEVEL;
+
+ Fc_lo = 700.0f;
+ Fc_hi = 975.0f;
+ G_lo = 0.398107170553497f;
+ G_hi = 0.205671765275719f;
+ break;
+ } /* switch */
+
+ g = 1.0f / (1.0f - G_hi + G_lo);
+
+ /* $fc = $Fc / $s;
+ * $d = 1 / 2 / pi / $fc;
+ * $x = exp(-1 / $d);
+ */
+ x = std::exp(-al::numbers::pi_v<float>*2.0f*Fc_lo/static_cast<float>(bs2b->srate));
+ bs2b->b1_lo = x;
+ bs2b->a0_lo = G_lo * (1.0f - x) * g;
+
+ x = std::exp(-al::numbers::pi_v<float>*2.0f*Fc_hi/static_cast<float>(bs2b->srate));
+ bs2b->b1_hi = x;
+ bs2b->a0_hi = (1.0f - G_hi * (1.0f - x)) * g;
+ bs2b->a1_hi = -x * g;
+} /* init */
+
+
+/* Exported functions.
+ * See descriptions in "bs2b.h"
+ */
+
+void bs2b_set_params(struct bs2b *bs2b, int level, int srate)
+{
+ if(srate <= 0) srate = 1;
+
+ bs2b->level = level;
+ bs2b->srate = srate;
+ init(bs2b);
+} /* bs2b_set_params */
+
+int bs2b_get_level(struct bs2b *bs2b)
+{
+ return bs2b->level;
+} /* bs2b_get_level */
+
+int bs2b_get_srate(struct bs2b *bs2b)
+{
+ return bs2b->srate;
+} /* bs2b_get_srate */
+
+void bs2b_clear(struct bs2b *bs2b)
+{
+ std::fill(std::begin(bs2b->history), std::end(bs2b->history), bs2b::t_last_sample{});
+} /* bs2b_clear */
+
+void bs2b_cross_feed(struct bs2b *bs2b, float *Left, float *Right, size_t SamplesToDo)
+{
+ const float a0_lo{bs2b->a0_lo};
+ const float b1_lo{bs2b->b1_lo};
+ const float a0_hi{bs2b->a0_hi};
+ const float a1_hi{bs2b->a1_hi};
+ const float b1_hi{bs2b->b1_hi};
+ float lsamples[128][2];
+ float rsamples[128][2];
+
+ for(size_t base{0};base < SamplesToDo;)
+ {
+ const size_t todo{std::min<size_t>(128, SamplesToDo-base)};
+
+ /* Process left input */
+ float z_lo{bs2b->history[0].lo};
+ float z_hi{bs2b->history[0].hi};
+ for(size_t i{0};i < todo;i++)
+ {
+ lsamples[i][0] = a0_lo*Left[i] + z_lo;
+ z_lo = b1_lo*lsamples[i][0];
+
+ lsamples[i][1] = a0_hi*Left[i] + z_hi;
+ z_hi = a1_hi*Left[i] + b1_hi*lsamples[i][1];
+ }
+ bs2b->history[0].lo = z_lo;
+ bs2b->history[0].hi = z_hi;
+
+ /* Process right input */
+ z_lo = bs2b->history[1].lo;
+ z_hi = bs2b->history[1].hi;
+ for(size_t i{0};i < todo;i++)
+ {
+ rsamples[i][0] = a0_lo*Right[i] + z_lo;
+ z_lo = b1_lo*rsamples[i][0];
+
+ rsamples[i][1] = a0_hi*Right[i] + z_hi;
+ z_hi = a1_hi*Right[i] + b1_hi*rsamples[i][1];
+ }
+ bs2b->history[1].lo = z_lo;
+ bs2b->history[1].hi = z_hi;
+
+ /* Crossfeed */
+ for(size_t i{0};i < todo;i++)
+ *(Left++) = lsamples[i][1] + rsamples[i][0];
+ for(size_t i{0};i < todo;i++)
+ *(Right++) = rsamples[i][1] + lsamples[i][0];
+
+ base += todo;
+ }
+} /* bs2b_cross_feed */
diff --git a/core/bs2b.h b/core/bs2b.h
new file mode 100644
index 00000000..4d0b9dd8
--- /dev/null
+++ b/core/bs2b.h
@@ -0,0 +1,89 @@
+/*-
+ * Copyright (c) 2005 Boris Mikhaylov
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef CORE_BS2B_H
+#define CORE_BS2B_H
+
+#include "almalloc.h"
+
+/* Number of crossfeed levels */
+#define BS2B_CLEVELS 3
+
+/* Normal crossfeed levels */
+#define BS2B_HIGH_CLEVEL 3
+#define BS2B_MIDDLE_CLEVEL 2
+#define BS2B_LOW_CLEVEL 1
+
+/* Easy crossfeed levels */
+#define BS2B_HIGH_ECLEVEL BS2B_HIGH_CLEVEL + BS2B_CLEVELS
+#define BS2B_MIDDLE_ECLEVEL BS2B_MIDDLE_CLEVEL + BS2B_CLEVELS
+#define BS2B_LOW_ECLEVEL BS2B_LOW_CLEVEL + BS2B_CLEVELS
+
+/* Default crossfeed levels */
+#define BS2B_DEFAULT_CLEVEL BS2B_HIGH_ECLEVEL
+/* Default sample rate (Hz) */
+#define BS2B_DEFAULT_SRATE 44100
+
+struct bs2b {
+ int level; /* Crossfeed level */
+ int srate; /* Sample rate (Hz) */
+
+ /* Lowpass IIR filter coefficients */
+ float a0_lo;
+ float b1_lo;
+
+ /* Highboost IIR filter coefficients */
+ float a0_hi;
+ float a1_hi;
+ float b1_hi;
+
+ /* Buffer of filter history
+ * [0] - first channel, [1] - second channel
+ */
+ struct t_last_sample {
+ float lo;
+ float hi;
+ } history[2];
+
+ DEF_NEWDEL(bs2b)
+};
+
+/* Clear buffers and set new coefficients with new crossfeed level and sample
+ * rate values.
+ * level - crossfeed level of *LEVEL values.
+ * srate - sample rate by Hz.
+ */
+void bs2b_set_params(bs2b *bs2b, int level, int srate);
+
+/* Return current crossfeed level value */
+int bs2b_get_level(bs2b *bs2b);
+
+/* Return current sample rate value */
+int bs2b_get_srate(bs2b *bs2b);
+
+/* Clear buffer */
+void bs2b_clear(bs2b *bs2b);
+
+void bs2b_cross_feed(bs2b *bs2b, float *Left, float *Right, size_t SamplesToDo);
+
+#endif /* CORE_BS2B_H */
diff --git a/core/bsinc_defs.h b/core/bsinc_defs.h
new file mode 100644
index 00000000..01bd3c29
--- /dev/null
+++ b/core/bsinc_defs.h
@@ -0,0 +1,12 @@
+#ifndef CORE_BSINC_DEFS_H
+#define CORE_BSINC_DEFS_H
+
+/* The number of distinct scale and phase intervals within the bsinc filter
+ * tables.
+ */
+constexpr unsigned int BSincScaleBits{4};
+constexpr unsigned int BSincScaleCount{1 << BSincScaleBits};
+constexpr unsigned int BSincPhaseBits{5};
+constexpr unsigned int BSincPhaseCount{1 << BSincPhaseBits};
+
+#endif /* CORE_BSINC_DEFS_H */
diff --git a/core/bsinc_tables.cpp b/core/bsinc_tables.cpp
new file mode 100644
index 00000000..693645f4
--- /dev/null
+++ b/core/bsinc_tables.cpp
@@ -0,0 +1,295 @@
+
+#include "bsinc_tables.h"
+
+#include <algorithm>
+#include <array>
+#include <cassert>
+#include <cmath>
+#include <limits>
+#include <memory>
+#include <stdexcept>
+
+#include "alnumbers.h"
+#include "core/mixer/defs.h"
+
+
+namespace {
+
+using uint = unsigned int;
+
+
+/* This is the normalized cardinal sine (sinc) function.
+ *
+ * sinc(x) = { 1, x = 0
+ * { sin(pi x) / (pi x), otherwise.
+ */
+constexpr double Sinc(const double x)
+{
+ constexpr double epsilon{std::numeric_limits<double>::epsilon()};
+ if(!(x > epsilon || x < -epsilon))
+ return 1.0;
+ return std::sin(al::numbers::pi*x) / (al::numbers::pi*x);
+}
+
+/* The zero-order modified Bessel function of the first kind, used for the
+ * Kaiser window.
+ *
+ * I_0(x) = sum_{k=0}^inf (1 / k!)^2 (x / 2)^(2 k)
+ * = sum_{k=0}^inf ((x / 2)^k / k!)^2
+ */
+constexpr double BesselI_0(const double x) noexcept
+{
+ /* Start at k=1 since k=0 is trivial. */
+ const double x2{x / 2.0};
+ double term{1.0};
+ double sum{1.0};
+ double last_sum{};
+ int k{1};
+
+ /* Let the integration converge until the term of the sum is no longer
+ * significant.
+ */
+ do {
+ const double y{x2 / k};
+ ++k;
+ last_sum = sum;
+ term *= y * y;
+ sum += term;
+ } while(sum != last_sum);
+
+ return sum;
+}
+
+/* Calculate a Kaiser window from the given beta value and a normalized k
+ * [-1, 1].
+ *
+ * w(k) = { I_0(B sqrt(1 - k^2)) / I_0(B), -1 <= k <= 1
+ * { 0, elsewhere.
+ *
+ * Where k can be calculated as:
+ *
+ * k = i / l, where -l <= i <= l.
+ *
+ * or:
+ *
+ * k = 2 i / M - 1, where 0 <= i <= M.
+ */
+constexpr double Kaiser(const double beta, const double k, const double besseli_0_beta)
+{
+ if(!(k >= -1.0 && k <= 1.0))
+ return 0.0;
+ return BesselI_0(beta * std::sqrt(1.0 - k*k)) / besseli_0_beta;
+}
+
+/* Calculates the (normalized frequency) transition width of the Kaiser window.
+ * Rejection is in dB.
+ */
+constexpr double CalcKaiserWidth(const double rejection, const uint order) noexcept
+{
+ if(rejection > 21.19)
+ return (rejection - 7.95) / (2.285 * al::numbers::pi*2.0 * order);
+ /* This enforces a minimum rejection of just above 21.18dB */
+ return 5.79 / (al::numbers::pi*2.0 * order);
+}
+
+/* Calculates the beta value of the Kaiser window. Rejection is in dB. */
+constexpr double CalcKaiserBeta(const double rejection)
+{
+ if(rejection > 50.0)
+ return 0.1102 * (rejection-8.7);
+ else if(rejection >= 21.0)
+ return (0.5842 * std::pow(rejection-21.0, 0.4)) + (0.07886 * (rejection-21.0));
+ return 0.0;
+}
+
+
+struct BSincHeader {
+ double width{};
+ double beta{};
+ double scaleBase{};
+ double scaleRange{};
+ double besseli_0_beta{};
+
+ uint a[BSincScaleCount]{};
+ uint total_size{};
+
+ constexpr BSincHeader(uint Rejection, uint Order) noexcept
+ {
+ width = CalcKaiserWidth(Rejection, Order);
+ beta = CalcKaiserBeta(Rejection);
+ scaleBase = width / 2.0;
+ scaleRange = 1.0 - scaleBase;
+ besseli_0_beta = BesselI_0(beta);
+
+ uint num_points{Order+1};
+ for(uint si{0};si < BSincScaleCount;++si)
+ {
+ const double scale{scaleBase + (scaleRange * (si+1) / BSincScaleCount)};
+ const uint a_{std::min(static_cast<uint>(num_points / 2.0 / scale), num_points)};
+ const uint m{2 * a_};
+
+ a[si] = a_;
+ total_size += 4 * BSincPhaseCount * ((m+3) & ~3u);
+ }
+ }
+};
+
+/* 11th and 23rd order filters (12 and 24-point respectively) with a 60dB drop
+ * at nyquist. Each filter will scale up the order when downsampling, to 23rd
+ * and 47th order respectively.
+ */
+constexpr BSincHeader bsinc12_hdr{60, 11};
+constexpr BSincHeader bsinc24_hdr{60, 23};
+
+
+/* NOTE: GCC 5 has an issue with BSincHeader objects being in an anonymous
+ * namespace while also being used as non-type template parameters.
+ */
+#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 6
+
+/* The number of sample points is double the a value (rounded up to a multiple
+ * of 4), and scale index 0 includes the doubling for downsampling. bsinc24 is
+ * currently the highest quality filter, and will use the most sample points.
+ */
+constexpr uint BSincPointsMax{(bsinc24_hdr.a[0]*2 + 3) & ~3u};
+static_assert(BSincPointsMax <= MaxResamplerPadding, "MaxResamplerPadding is too small");
+
+template<size_t total_size>
+struct BSincFilterArray {
+ alignas(16) std::array<float, total_size> mTable;
+ const BSincHeader &hdr;
+
+ BSincFilterArray(const BSincHeader &hdr_) : hdr{hdr_}
+ {
+#else
+template<const BSincHeader &hdr>
+struct BSincFilterArray {
+ alignas(16) std::array<float, hdr.total_size> mTable{};
+
+ BSincFilterArray()
+ {
+ constexpr uint BSincPointsMax{(hdr.a[0]*2 + 3) & ~3u};
+ static_assert(BSincPointsMax <= MaxResamplerPadding, "MaxResamplerPadding is too small");
+#endif
+ using filter_type = double[BSincPhaseCount+1][BSincPointsMax];
+ auto filter = std::make_unique<filter_type[]>(BSincScaleCount);
+
+ /* Calculate the Kaiser-windowed Sinc filter coefficients for each
+ * scale and phase index.
+ */
+ for(uint si{0};si < BSincScaleCount;++si)
+ {
+ const uint m{hdr.a[si] * 2};
+ const size_t o{(BSincPointsMax-m) / 2};
+ const double scale{hdr.scaleBase + (hdr.scaleRange * (si+1) / BSincScaleCount)};
+ const double cutoff{scale - (hdr.scaleBase * std::max(1.0, scale*2.0))};
+ const auto a = static_cast<double>(hdr.a[si]);
+ const double l{a - 1.0/BSincPhaseCount};
+
+ /* Do one extra phase index so that the phase delta has a proper
+ * target for its last index.
+ */
+ for(uint pi{0};pi <= BSincPhaseCount;++pi)
+ {
+ const double phase{std::floor(l) + (pi/double{BSincPhaseCount})};
+
+ for(uint i{0};i < m;++i)
+ {
+ const double x{i - phase};
+ filter[si][pi][o+i] = Kaiser(hdr.beta, x/l, hdr.besseli_0_beta) * cutoff *
+ Sinc(cutoff*x);
+ }
+ }
+ }
+
+ size_t idx{0};
+ for(size_t si{0};si < BSincScaleCount;++si)
+ {
+ const size_t m{((hdr.a[si]*2) + 3) & ~3u};
+ const size_t o{(BSincPointsMax-m) / 2};
+
+ /* Write out each phase index's filter and phase delta for this
+ * quality scale.
+ */
+ for(size_t pi{0};pi < BSincPhaseCount;++pi)
+ {
+ for(size_t i{0};i < m;++i)
+ mTable[idx++] = static_cast<float>(filter[si][pi][o+i]);
+
+ /* Linear interpolation between phases is simplified by pre-
+ * calculating the delta (b - a) in: x = a + f (b - a)
+ */
+ for(size_t i{0};i < m;++i)
+ {
+ const double phDelta{filter[si][pi+1][o+i] - filter[si][pi][o+i]};
+ mTable[idx++] = static_cast<float>(phDelta);
+ }
+ }
+ /* Calculate and write out each phase index's filter quality scale
+ * deltas. The last scale index doesn't have any scale or scale-
+ * phase deltas.
+ */
+ if(si == BSincScaleCount-1)
+ {
+ for(size_t i{0};i < BSincPhaseCount*m*2;++i)
+ mTable[idx++] = 0.0f;
+ }
+ else for(size_t pi{0};pi < BSincPhaseCount;++pi)
+ {
+ /* Linear interpolation between scales is also simplified.
+ *
+ * Given a difference in the number of points between scales,
+ * the destination points will be 0, thus: x = a + f (-a)
+ */
+ for(size_t i{0};i < m;++i)
+ {
+ const double scDelta{filter[si+1][pi][o+i] - filter[si][pi][o+i]};
+ mTable[idx++] = static_cast<float>(scDelta);
+ }
+
+ /* This last simplification is done to complete the bilinear
+ * equation for the combination of phase and scale.
+ */
+ for(size_t i{0};i < m;++i)
+ {
+ const double spDelta{(filter[si+1][pi+1][o+i] - filter[si+1][pi][o+i]) -
+ (filter[si][pi+1][o+i] - filter[si][pi][o+i])};
+ mTable[idx++] = static_cast<float>(spDelta);
+ }
+ }
+ }
+ assert(idx == hdr.total_size);
+ }
+
+ constexpr const BSincHeader &getHeader() const noexcept { return hdr; }
+ constexpr const float *getTable() const noexcept { return &mTable.front(); }
+};
+
+#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 6
+const BSincFilterArray<bsinc12_hdr.total_size> bsinc12_filter{bsinc12_hdr};
+const BSincFilterArray<bsinc24_hdr.total_size> bsinc24_filter{bsinc24_hdr};
+#else
+const BSincFilterArray<bsinc12_hdr> bsinc12_filter{};
+const BSincFilterArray<bsinc24_hdr> bsinc24_filter{};
+#endif
+
+template<typename T>
+constexpr BSincTable GenerateBSincTable(const T &filter)
+{
+ BSincTable ret{};
+ const BSincHeader &hdr = filter.getHeader();
+ ret.scaleBase = static_cast<float>(hdr.scaleBase);
+ ret.scaleRange = static_cast<float>(1.0 / hdr.scaleRange);
+ for(size_t i{0};i < BSincScaleCount;++i)
+ ret.m[i] = ((hdr.a[i]*2) + 3) & ~3u;
+ ret.filterOffset[0] = 0;
+ for(size_t i{1};i < BSincScaleCount;++i)
+ ret.filterOffset[i] = ret.filterOffset[i-1] + ret.m[i-1]*4*BSincPhaseCount;
+ ret.Tab = filter.getTable();
+ return ret;
+}
+
+} // namespace
+
+const BSincTable gBSinc12{GenerateBSincTable(bsinc12_filter)};
+const BSincTable gBSinc24{GenerateBSincTable(bsinc24_filter)};
diff --git a/core/bsinc_tables.h b/core/bsinc_tables.h
new file mode 100644
index 00000000..aca4b274
--- /dev/null
+++ b/core/bsinc_tables.h
@@ -0,0 +1,17 @@
+#ifndef CORE_BSINC_TABLES_H
+#define CORE_BSINC_TABLES_H
+
+#include "bsinc_defs.h"
+
+
+struct BSincTable {
+ float scaleBase, scaleRange;
+ unsigned int m[BSincScaleCount];
+ unsigned int filterOffset[BSincScaleCount];
+ const float *Tab;
+};
+
+extern const BSincTable gBSinc12;
+extern const BSincTable gBSinc24;
+
+#endif /* CORE_BSINC_TABLES_H */
diff --git a/core/buffer_storage.cpp b/core/buffer_storage.cpp
new file mode 100644
index 00000000..98ca2c1b
--- /dev/null
+++ b/core/buffer_storage.cpp
@@ -0,0 +1,81 @@
+
+#include "config.h"
+
+#include "buffer_storage.h"
+
+#include <stdint.h>
+
+
+const char *NameFromFormat(FmtType type) noexcept
+{
+ switch(type)
+ {
+ case FmtUByte: return "UInt8";
+ case FmtShort: return "Int16";
+ case FmtFloat: return "Float";
+ case FmtDouble: return "Double";
+ case FmtMulaw: return "muLaw";
+ case FmtAlaw: return "aLaw";
+ case FmtIMA4: return "IMA4 ADPCM";
+ case FmtMSADPCM: return "MS ADPCM";
+ }
+ return "<internal error>";
+}
+
+const char *NameFromFormat(FmtChannels channels) noexcept
+{
+ switch(channels)
+ {
+ case FmtMono: return "Mono";
+ case FmtStereo: return "Stereo";
+ case FmtRear: return "Rear";
+ case FmtQuad: return "Quadraphonic";
+ case FmtX51: return "Surround 5.1";
+ case FmtX61: return "Surround 6.1";
+ case FmtX71: return "Surround 7.1";
+ case FmtBFormat2D: return "B-Format 2D";
+ case FmtBFormat3D: return "B-Format 3D";
+ case FmtUHJ2: return "UHJ2";
+ case FmtUHJ3: return "UHJ3";
+ case FmtUHJ4: return "UHJ4";
+ case FmtSuperStereo: return "Super Stereo";
+ }
+ return "<internal error>";
+}
+
+uint BytesFromFmt(FmtType type) noexcept
+{
+ switch(type)
+ {
+ case FmtUByte: return sizeof(uint8_t);
+ case FmtShort: return sizeof(int16_t);
+ case FmtFloat: return sizeof(float);
+ case FmtDouble: return sizeof(double);
+ case FmtMulaw: return sizeof(uint8_t);
+ case FmtAlaw: return sizeof(uint8_t);
+ case FmtIMA4: break;
+ case FmtMSADPCM: break;
+ }
+ return 0;
+}
+
+uint ChannelsFromFmt(FmtChannels chans, uint ambiorder) noexcept
+{
+ switch(chans)
+ {
+ case FmtMono: return 1;
+ case FmtStereo: return 2;
+ case FmtRear: return 2;
+ case FmtQuad: return 4;
+ case FmtX51: return 6;
+ case FmtX61: return 7;
+ case FmtX71: return 8;
+ case FmtBFormat2D: return (ambiorder*2) + 1;
+ case FmtBFormat3D: return (ambiorder+1) * (ambiorder+1);
+ case FmtUHJ2: return 2;
+ case FmtUHJ3: return 3;
+ case FmtUHJ4: return 4;
+ case FmtSuperStereo: return 2;
+ }
+ return 0;
+}
diff --git a/core/buffer_storage.h b/core/buffer_storage.h
new file mode 100644
index 00000000..282d5b53
--- /dev/null
+++ b/core/buffer_storage.h
@@ -0,0 +1,115 @@
+#ifndef CORE_BUFFER_STORAGE_H
+#define CORE_BUFFER_STORAGE_H
+
+#include <atomic>
+
+#include "albyte.h"
+#include "alnumeric.h"
+#include "alspan.h"
+#include "ambidefs.h"
+
+
+using uint = unsigned int;
+
+/* Storable formats */
+enum FmtType : unsigned char {
+ FmtUByte,
+ FmtShort,
+ FmtFloat,
+ FmtDouble,
+ FmtMulaw,
+ FmtAlaw,
+ FmtIMA4,
+ FmtMSADPCM,
+};
+enum FmtChannels : unsigned char {
+ FmtMono,
+ FmtStereo,
+ FmtRear,
+ FmtQuad,
+ FmtX51, /* (WFX order) */
+ FmtX61, /* (WFX order) */
+ FmtX71, /* (WFX order) */
+ FmtBFormat2D,
+ FmtBFormat3D,
+ FmtUHJ2, /* 2-channel UHJ, aka "BHJ", stereo-compatible */
+ FmtUHJ3, /* 3-channel UHJ, aka "THJ" */
+ FmtUHJ4, /* 4-channel UHJ, aka "PHJ" */
+ FmtSuperStereo, /* Stereo processed with Super Stereo. */
+};
+
+enum class AmbiLayout : unsigned char {
+ FuMa,
+ ACN,
+};
+enum class AmbiScaling : unsigned char {
+ FuMa,
+ SN3D,
+ N3D,
+ UHJ,
+};
+
+const char *NameFromFormat(FmtType type) noexcept;
+const char *NameFromFormat(FmtChannels channels) noexcept;
+
+uint BytesFromFmt(FmtType type) noexcept;
+uint ChannelsFromFmt(FmtChannels chans, uint ambiorder) noexcept;
+inline uint FrameSizeFromFmt(FmtChannels chans, FmtType type, uint ambiorder) noexcept
+{ return ChannelsFromFmt(chans, ambiorder) * BytesFromFmt(type); }
+
+constexpr bool IsBFormat(FmtChannels chans) noexcept
+{ return chans == FmtBFormat2D || chans == FmtBFormat3D; }
+
+/* Super Stereo is considered part of the UHJ family here, since it goes
+ * through similar processing as UHJ, both result in a B-Format signal, and
+ * needs the same consideration as BHJ (three channel result with only two
+ * channel input).
+ */
+constexpr bool IsUHJ(FmtChannels chans) noexcept
+{ return chans == FmtUHJ2 || chans == FmtUHJ3 || chans == FmtUHJ4 || chans == FmtSuperStereo; }
+
+/** Ambisonic formats are either B-Format or UHJ formats. */
+constexpr bool IsAmbisonic(FmtChannels chans) noexcept
+{ return IsBFormat(chans) || IsUHJ(chans); }
+
+constexpr bool Is2DAmbisonic(FmtChannels chans) noexcept
+{
+ return chans == FmtBFormat2D || chans == FmtUHJ2 || chans == FmtUHJ3
+ || chans == FmtSuperStereo;
+}
+
+
+using CallbackType = int(*)(void*, void*, int);
+
+struct BufferStorage {
+ CallbackType mCallback{nullptr};
+ void *mUserData{nullptr};
+
+ al::span<al::byte> mData;
+
+ uint mSampleRate{0u};
+ FmtChannels mChannels{FmtMono};
+ FmtType mType{FmtShort};
+ uint mSampleLen{0u};
+ uint mBlockAlign{0u};
+
+ AmbiLayout mAmbiLayout{AmbiLayout::FuMa};
+ AmbiScaling mAmbiScaling{AmbiScaling::FuMa};
+ uint mAmbiOrder{0u};
+
+ inline uint bytesFromFmt() const noexcept { return BytesFromFmt(mType); }
+ inline uint channelsFromFmt() const noexcept
+ { return ChannelsFromFmt(mChannels, mAmbiOrder); }
+ inline uint frameSizeFromFmt() const noexcept { return channelsFromFmt() * bytesFromFmt(); }
+
+ inline uint blockSizeFromFmt() const noexcept
+ {
+ if(mType == FmtIMA4) return ((mBlockAlign-1)/2 + 4) * channelsFromFmt();
+ if(mType == FmtMSADPCM) return ((mBlockAlign-2)/2 + 7) * channelsFromFmt();
+ return frameSizeFromFmt();
+ };
+
+ inline bool isBFormat() const noexcept { return IsBFormat(mChannels); }
+};
+
+#endif /* CORE_BUFFER_STORAGE_H */
diff --git a/core/bufferline.h b/core/bufferline.h
new file mode 100644
index 00000000..8b445f3f
--- /dev/null
+++ b/core/bufferline.h
@@ -0,0 +1,17 @@
+#ifndef CORE_BUFFERLINE_H
+#define CORE_BUFFERLINE_H
+
+#include <array>
+
+#include "alspan.h"
+
+/* Size for temporary storage of buffer data, in floats. Larger values need
+ * more memory and are harder on cache, while smaller values may need more
+ * iterations for mixing.
+ */
+constexpr int BufferLineSize{1024};
+
+using FloatBufferLine = std::array<float,BufferLineSize>;
+using FloatBufferSpan = al::span<float,BufferLineSize>;
+
+#endif /* CORE_BUFFERLINE_H */
diff --git a/core/context.cpp b/core/context.cpp
new file mode 100644
index 00000000..d68d8327
--- /dev/null
+++ b/core/context.cpp
@@ -0,0 +1,164 @@
+
+#include "config.h"
+
+#include <cassert>
+#include <memory>
+
+#include "async_event.h"
+#include "context.h"
+#include "device.h"
+#include "effectslot.h"
+#include "logging.h"
+#include "ringbuffer.h"
+#include "voice.h"
+#include "voice_change.h"
+
+
+#ifdef __cpp_lib_atomic_is_always_lock_free
+static_assert(std::atomic<ContextBase::AsyncEventBitset>::is_always_lock_free, "atomic<bitset> isn't lock-free");
+#endif
+
+ContextBase::ContextBase(DeviceBase *device) : mDevice{device}
+{ assert(mEnabledEvts.is_lock_free()); }
+
+ContextBase::~ContextBase()
+{
+ size_t count{0};
+ ContextProps *cprops{mParams.ContextUpdate.exchange(nullptr, std::memory_order_relaxed)};
+ if(cprops)
+ {
+ ++count;
+ delete cprops;
+ }
+ cprops = mFreeContextProps.exchange(nullptr, std::memory_order_acquire);
+ while(cprops)
+ {
+ std::unique_ptr<ContextProps> old{cprops};
+ cprops = old->next.load(std::memory_order_relaxed);
+ ++count;
+ }
+ TRACE("Freed %zu context property object%s\n", count, (count==1)?"":"s");
+
+ count = 0;
+ EffectSlotProps *eprops{mFreeEffectslotProps.exchange(nullptr, std::memory_order_acquire)};
+ while(eprops)
+ {
+ std::unique_ptr<EffectSlotProps> old{eprops};
+ eprops = old->next.load(std::memory_order_relaxed);
+ ++count;
+ }
+ TRACE("Freed %zu AuxiliaryEffectSlot property object%s\n", count, (count==1)?"":"s");
+
+ if(EffectSlotArray *curarray{mActiveAuxSlots.exchange(nullptr, std::memory_order_relaxed)})
+ {
+ al::destroy_n(curarray->end(), curarray->size());
+ delete curarray;
+ }
+
+ delete mVoices.exchange(nullptr, std::memory_order_relaxed);
+
+ if(mAsyncEvents)
+ {
+ count = 0;
+ auto evt_vec = mAsyncEvents->getReadVector();
+ if(evt_vec.first.len > 0)
+ {
+ al::destroy_n(reinterpret_cast<AsyncEvent*>(evt_vec.first.buf), evt_vec.first.len);
+ count += evt_vec.first.len;
+ }
+ if(evt_vec.second.len > 0)
+ {
+ al::destroy_n(reinterpret_cast<AsyncEvent*>(evt_vec.second.buf), evt_vec.second.len);
+ count += evt_vec.second.len;
+ }
+ if(count > 0)
+ TRACE("Destructed %zu orphaned event%s\n", count, (count==1)?"":"s");
+ mAsyncEvents->readAdvance(count);
+ }
+}
+
+
+void ContextBase::allocVoiceChanges()
+{
+ constexpr size_t clustersize{128};
+
+ VoiceChangeCluster cluster{std::make_unique<VoiceChange[]>(clustersize)};
+ for(size_t i{1};i < clustersize;++i)
+ cluster[i-1].mNext.store(std::addressof(cluster[i]), std::memory_order_relaxed);
+ cluster[clustersize-1].mNext.store(mVoiceChangeTail, std::memory_order_relaxed);
+
+ mVoiceChangeClusters.emplace_back(std::move(cluster));
+ mVoiceChangeTail = mVoiceChangeClusters.back().get();
+}
+
+void ContextBase::allocVoiceProps()
+{
+ constexpr size_t clustersize{32};
+
+ TRACE("Increasing allocated voice properties to %zu\n",
+ (mVoicePropClusters.size()+1) * clustersize);
+
+ VoicePropsCluster cluster{std::make_unique<VoicePropsItem[]>(clustersize)};
+ for(size_t i{1};i < clustersize;++i)
+ cluster[i-1].next.store(std::addressof(cluster[i]), std::memory_order_relaxed);
+ mVoicePropClusters.emplace_back(std::move(cluster));
+
+ VoicePropsItem *oldhead{mFreeVoiceProps.load(std::memory_order_acquire)};
+ do {
+ mVoicePropClusters.back()[clustersize-1].next.store(oldhead, std::memory_order_relaxed);
+ } while(mFreeVoiceProps.compare_exchange_weak(oldhead, mVoicePropClusters.back().get(),
+ std::memory_order_acq_rel, std::memory_order_acquire) == false);
+}
+
+void ContextBase::allocVoices(size_t addcount)
+{
+ constexpr size_t clustersize{32};
+ /* Convert element count to cluster count. */
+ addcount = (addcount+(clustersize-1)) / clustersize;
+
+ if(addcount >= std::numeric_limits<int>::max()/clustersize - mVoiceClusters.size())
+ throw std::runtime_error{"Allocating too many voices"};
+ const size_t totalcount{(mVoiceClusters.size()+addcount) * clustersize};
+ TRACE("Increasing allocated voices to %zu\n", totalcount);
+
+ auto newarray = VoiceArray::Create(totalcount);
+ while(addcount)
+ {
+ mVoiceClusters.emplace_back(std::make_unique<Voice[]>(clustersize));
+ --addcount;
+ }
+
+ auto voice_iter = newarray->begin();
+ for(VoiceCluster &cluster : mVoiceClusters)
+ {
+ for(size_t i{0};i < clustersize;++i)
+ *(voice_iter++) = &cluster[i];
+ }
+
+ if(auto *oldvoices = mVoices.exchange(newarray.release(), std::memory_order_acq_rel))
+ {
+ mDevice->waitForMix();
+ delete oldvoices;
+ }
+}
+
+
+EffectSlot *ContextBase::getEffectSlot()
+{
+ for(auto& cluster : mEffectSlotClusters)
+ {
+ for(size_t i{0};i < EffectSlotClusterSize;++i)
+ {
+ if(!cluster[i].InUse)
+ return &cluster[i];
+ }
+ }
+
+ if(1 >= std::numeric_limits<int>::max()/EffectSlotClusterSize - mEffectSlotClusters.size())
+ throw std::runtime_error{"Allocating too many effect slots"};
+ const size_t totalcount{(mEffectSlotClusters.size()+1) * EffectSlotClusterSize};
+ TRACE("Increasing allocated effect slots to %zu\n", totalcount);
+
+ mEffectSlotClusters.emplace_back(std::make_unique<EffectSlot[]>(EffectSlotClusterSize));
+ return getEffectSlot();
+}
diff --git a/core/context.h b/core/context.h
new file mode 100644
index 00000000..9723eac3
--- /dev/null
+++ b/core/context.h
@@ -0,0 +1,171 @@
+#ifndef CORE_CONTEXT_H
+#define CORE_CONTEXT_H
+
+#include <array>
+#include <atomic>
+#include <bitset>
+#include <cstddef>
+#include <memory>
+#include <thread>
+
+#include "almalloc.h"
+#include "alspan.h"
+#include "async_event.h"
+#include "atomic.h"
+#include "bufferline.h"
+#include "threads.h"
+#include "vecmat.h"
+#include "vector.h"
+
+struct DeviceBase;
+struct EffectSlot;
+struct EffectSlotProps;
+struct RingBuffer;
+struct Voice;
+struct VoiceChange;
+struct VoicePropsItem;
+
+using uint = unsigned int;
+
+
+constexpr float SpeedOfSoundMetersPerSec{343.3f};
+
+constexpr float AirAbsorbGainHF{0.99426f}; /* -0.05dB */
+
+enum class DistanceModel : unsigned char {
+ Disable,
+ Inverse, InverseClamped,
+ Linear, LinearClamped,
+ Exponent, ExponentClamped,
+
+ Default = InverseClamped
+};
+
+
+struct ContextProps {
+ std::array<float,3> Position;
+ std::array<float,3> Velocity;
+ std::array<float,3> OrientAt;
+ std::array<float,3> OrientUp;
+ float Gain;
+ float MetersPerUnit;
+ float AirAbsorptionGainHF;
+
+ float DopplerFactor;
+ float DopplerVelocity;
+ float SpeedOfSound;
+ bool SourceDistanceModel;
+ DistanceModel mDistanceModel;
+
+ std::atomic<ContextProps*> next;
+
+ DEF_NEWDEL(ContextProps)
+};
+
+struct ContextParams {
+ /* Pointer to the most recent property values that are awaiting an update. */
+ std::atomic<ContextProps*> ContextUpdate{nullptr};
+
+ alu::Vector Position{};
+ alu::Matrix Matrix{alu::Matrix::Identity()};
+ alu::Vector Velocity{};
+
+ float Gain{1.0f};
+ float MetersPerUnit{1.0f};
+ float AirAbsorptionGainHF{AirAbsorbGainHF};
+
+ float DopplerFactor{1.0f};
+ float SpeedOfSound{SpeedOfSoundMetersPerSec}; /* in units per sec! */
+
+ bool SourceDistanceModel{false};
+ DistanceModel mDistanceModel{};
+};
+
+struct ContextBase {
+ DeviceBase *const mDevice;
+
+ /* Counter for the pre-mixing updates, in 31.1 fixed point (lowest bit
+ * indicates if updates are currently happening).
+ */
+ RefCount mUpdateCount{0u};
+ std::atomic<bool> mHoldUpdates{false};
+ std::atomic<bool> mStopVoicesOnDisconnect{true};
+
+ float mGainBoost{1.0f};
+
+ /* Linked lists of unused property containers, free to use for future
+ * updates.
+ */
+ std::atomic<ContextProps*> mFreeContextProps{nullptr};
+ std::atomic<VoicePropsItem*> mFreeVoiceProps{nullptr};
+ std::atomic<EffectSlotProps*> mFreeEffectslotProps{nullptr};
+
+ /* The voice change tail is the beginning of the "free" elements, up to and
+ * *excluding* the current. If tail==current, there's no free elements and
+ * new ones need to be allocated. The current voice change is the element
+ * last processed, and any after are pending.
+ */
+ VoiceChange *mVoiceChangeTail{};
+ std::atomic<VoiceChange*> mCurrentVoiceChange{};
+
+ void allocVoiceChanges();
+ void allocVoiceProps();
+
+
+ ContextParams mParams;
+
+ using VoiceArray = al::FlexArray<Voice*>;
+ std::atomic<VoiceArray*> mVoices{};
+ std::atomic<size_t> mActiveVoiceCount{};
+
+ void allocVoices(size_t addcount);
+ al::span<Voice*> getVoicesSpan() const noexcept
+ {
+ return {mVoices.load(std::memory_order_relaxed)->data(),
+ mActiveVoiceCount.load(std::memory_order_relaxed)};
+ }
+ al::span<Voice*> getVoicesSpanAcquired() const noexcept
+ {
+ return {mVoices.load(std::memory_order_acquire)->data(),
+ mActiveVoiceCount.load(std::memory_order_acquire)};
+ }
+
+
+ using EffectSlotArray = al::FlexArray<EffectSlot*>;
+ std::atomic<EffectSlotArray*> mActiveAuxSlots{nullptr};
+
+ std::thread mEventThread;
+ al::semaphore mEventSem;
+ std::unique_ptr<RingBuffer> mAsyncEvents;
+ using AsyncEventBitset = std::bitset<AsyncEvent::UserEventCount>;
+ std::atomic<AsyncEventBitset> mEnabledEvts{0u};
+
+ /* Asynchronous voice change actions are processed as a linked list of
+ * VoiceChange objects by the mixer, which is atomically appended to.
+ * However, to avoid allocating each object individually, they're allocated
+ * in clusters that are stored in a vector for easy automatic cleanup.
+ */
+ using VoiceChangeCluster = std::unique_ptr<VoiceChange[]>;
+ al::vector<VoiceChangeCluster> mVoiceChangeClusters;
+
+ using VoiceCluster = std::unique_ptr<Voice[]>;
+ al::vector<VoiceCluster> mVoiceClusters;
+
+ using VoicePropsCluster = std::unique_ptr<VoicePropsItem[]>;
+ al::vector<VoicePropsCluster> mVoicePropClusters;
+
+
+ static constexpr size_t EffectSlotClusterSize{4};
+ EffectSlot *getEffectSlot();
+
+ using EffectSlotCluster = std::unique_ptr<EffectSlot[]>;
+ al::vector<EffectSlotCluster> mEffectSlotClusters;
+
+
+ ContextBase(DeviceBase *device);
+ ContextBase(const ContextBase&) = delete;
+ ContextBase& operator=(const ContextBase&) = delete;
+ ~ContextBase();
+};
+
+#endif /* CORE_CONTEXT_H */
diff --git a/core/converter.cpp b/core/converter.cpp
new file mode 100644
index 00000000..a5141448
--- /dev/null
+++ b/core/converter.cpp
@@ -0,0 +1,346 @@
+
+#include "config.h"
+
+#include "converter.h"
+
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <cstdint>
+#include <iterator>
+#include <limits.h>
+
+#include "albit.h"
+#include "albyte.h"
+#include "alnumeric.h"
+#include "fpu_ctrl.h"
+
+
+namespace {
+
+constexpr uint MaxPitch{10};
+
+static_assert((BufferLineSize-1)/MaxPitch > 0, "MaxPitch is too large for BufferLineSize!");
+static_assert((INT_MAX>>MixerFracBits)/MaxPitch > BufferLineSize,
+ "MaxPitch and/or BufferLineSize are too large for MixerFracBits!");
+
+/* Base template left undefined. Should be marked =delete, but Clang 3.8.1
+ * chokes on that given the inline specializations.
+ */
+template<DevFmtType T>
+inline float LoadSample(DevFmtType_t<T> val) noexcept;
+
+template<> inline float LoadSample<DevFmtByte>(DevFmtType_t<DevFmtByte> val) noexcept
+{ return val * (1.0f/128.0f); }
+template<> inline float LoadSample<DevFmtShort>(DevFmtType_t<DevFmtShort> val) noexcept
+{ return val * (1.0f/32768.0f); }
+template<> inline float LoadSample<DevFmtInt>(DevFmtType_t<DevFmtInt> val) noexcept
+{ return static_cast<float>(val) * (1.0f/2147483648.0f); }
+template<> inline float LoadSample<DevFmtFloat>(DevFmtType_t<DevFmtFloat> val) noexcept
+{ return val; }
+
+template<> inline float LoadSample<DevFmtUByte>(DevFmtType_t<DevFmtUByte> val) noexcept
+{ return LoadSample<DevFmtByte>(static_cast<int8_t>(val - 128)); }
+template<> inline float LoadSample<DevFmtUShort>(DevFmtType_t<DevFmtUShort> val) noexcept
+{ return LoadSample<DevFmtShort>(static_cast<int16_t>(val - 32768)); }
+template<> inline float LoadSample<DevFmtUInt>(DevFmtType_t<DevFmtUInt> val) noexcept
+{ return LoadSample<DevFmtInt>(static_cast<int32_t>(val - 2147483648u)); }
+
+
+template<DevFmtType T>
+inline void LoadSampleArray(float *RESTRICT dst, const void *src, const size_t srcstep,
+ const size_t samples) noexcept
+{
+ const DevFmtType_t<T> *ssrc = static_cast<const DevFmtType_t<T>*>(src);
+ for(size_t i{0u};i < samples;i++)
+ dst[i] = LoadSample<T>(ssrc[i*srcstep]);
+}
+
+void LoadSamples(float *dst, const void *src, const size_t srcstep, const DevFmtType srctype,
+ const size_t samples) noexcept
+{
+#define HANDLE_FMT(T) \
+ case T: LoadSampleArray<T>(dst, src, srcstep, samples); break
+ switch(srctype)
+ {
+ HANDLE_FMT(DevFmtByte);
+ HANDLE_FMT(DevFmtUByte);
+ HANDLE_FMT(DevFmtShort);
+ HANDLE_FMT(DevFmtUShort);
+ HANDLE_FMT(DevFmtInt);
+ HANDLE_FMT(DevFmtUInt);
+ HANDLE_FMT(DevFmtFloat);
+ }
+#undef HANDLE_FMT
+}
+
+
+template<DevFmtType T>
+inline DevFmtType_t<T> StoreSample(float) noexcept;
+
+template<> inline float StoreSample<DevFmtFloat>(float val) noexcept
+{ return val; }
+template<> inline int32_t StoreSample<DevFmtInt>(float val) noexcept
+{ return fastf2i(clampf(val*2147483648.0f, -2147483648.0f, 2147483520.0f)); }
+template<> inline int16_t StoreSample<DevFmtShort>(float val) noexcept
+{ return static_cast<int16_t>(fastf2i(clampf(val*32768.0f, -32768.0f, 32767.0f))); }
+template<> inline int8_t StoreSample<DevFmtByte>(float val) noexcept
+{ return static_cast<int8_t>(fastf2i(clampf(val*128.0f, -128.0f, 127.0f))); }
+
+/* Define unsigned output variations. */
+template<> inline uint32_t StoreSample<DevFmtUInt>(float val) noexcept
+{ return static_cast<uint32_t>(StoreSample<DevFmtInt>(val)) + 2147483648u; }
+template<> inline uint16_t StoreSample<DevFmtUShort>(float val) noexcept
+{ return static_cast<uint16_t>(StoreSample<DevFmtShort>(val) + 32768); }
+template<> inline uint8_t StoreSample<DevFmtUByte>(float val) noexcept
+{ return static_cast<uint8_t>(StoreSample<DevFmtByte>(val) + 128); }
+
+template<DevFmtType T>
+inline void StoreSampleArray(void *dst, const float *RESTRICT src, const size_t dststep,
+ const size_t samples) noexcept
+{
+ DevFmtType_t<T> *sdst = static_cast<DevFmtType_t<T>*>(dst);
+ for(size_t i{0u};i < samples;i++)
+ sdst[i*dststep] = StoreSample<T>(src[i]);
+}
+
+
+void StoreSamples(void *dst, const float *src, const size_t dststep, const DevFmtType dsttype,
+ const size_t samples) noexcept
+{
+#define HANDLE_FMT(T) \
+ case T: StoreSampleArray<T>(dst, src, dststep, samples); break
+ switch(dsttype)
+ {
+ HANDLE_FMT(DevFmtByte);
+ HANDLE_FMT(DevFmtUByte);
+ HANDLE_FMT(DevFmtShort);
+ HANDLE_FMT(DevFmtUShort);
+ HANDLE_FMT(DevFmtInt);
+ HANDLE_FMT(DevFmtUInt);
+ HANDLE_FMT(DevFmtFloat);
+ }
+#undef HANDLE_FMT
+}
+
+
+template<DevFmtType T>
+void Mono2Stereo(float *RESTRICT dst, const void *src, const size_t frames) noexcept
+{
+ const DevFmtType_t<T> *ssrc = static_cast<const DevFmtType_t<T>*>(src);
+ for(size_t i{0u};i < frames;i++)
+ dst[i*2 + 1] = dst[i*2 + 0] = LoadSample<T>(ssrc[i]) * 0.707106781187f;
+}
+
+template<DevFmtType T>
+void Multi2Mono(uint chanmask, const size_t step, const float scale, float *RESTRICT dst,
+ const void *src, const size_t frames) noexcept
+{
+ const DevFmtType_t<T> *ssrc = static_cast<const DevFmtType_t<T>*>(src);
+ std::fill_n(dst, frames, 0.0f);
+ for(size_t c{0};chanmask;++c)
+ {
+ if((chanmask&1)) LIKELY
+ {
+ for(size_t i{0u};i < frames;i++)
+ dst[i] += LoadSample<T>(ssrc[i*step + c]);
+ }
+ chanmask >>= 1;
+ }
+ for(size_t i{0u};i < frames;i++)
+ dst[i] *= scale;
+}
+
+} // namespace
+
+SampleConverterPtr SampleConverter::Create(DevFmtType srcType, DevFmtType dstType, size_t numchans,
+ uint srcRate, uint dstRate, Resampler resampler)
+{
+ if(numchans < 1 || srcRate < 1 || dstRate < 1)
+ return nullptr;
+
+ SampleConverterPtr converter{new(FamCount(numchans)) SampleConverter{numchans}};
+ converter->mSrcType = srcType;
+ converter->mDstType = dstType;
+ converter->mSrcTypeSize = BytesFromDevFmt(srcType);
+ converter->mDstTypeSize = BytesFromDevFmt(dstType);
+
+ converter->mSrcPrepCount = MaxResamplerPadding;
+ converter->mFracOffset = 0;
+ for(auto &chan : converter->mChan)
+ {
+ const al::span<float> buffer{chan.PrevSamples};
+ std::fill(buffer.begin(), buffer.end(), 0.0f);
+ }
+
+ /* Have to set the mixer FPU mode since that's what the resampler code expects. */
+ FPUCtl mixer_mode{};
+ auto step = static_cast<uint>(
+ mind(srcRate*double{MixerFracOne}/dstRate + 0.5, MaxPitch*MixerFracOne));
+ converter->mIncrement = maxu(step, 1);
+ if(converter->mIncrement == MixerFracOne)
+ converter->mResample = [](const InterpState*, const float *RESTRICT src, uint, const uint,
+ const al::span<float> dst) { std::copy_n(src, dst.size(), dst.begin()); };
+ else
+ converter->mResample = PrepareResampler(resampler, converter->mIncrement,
+ &converter->mState);
+
+ return converter;
+}
+
+uint SampleConverter::availableOut(uint srcframes) const
+{
+ if(srcframes < 1)
+ {
+ /* No output samples if there's no input samples. */
+ return 0;
+ }
+
+ const uint prepcount{mSrcPrepCount};
+ if(prepcount < MaxResamplerPadding && MaxResamplerPadding - prepcount >= srcframes)
+ {
+ /* Not enough input samples to generate an output sample. */
+ return 0;
+ }
+
+ uint64_t DataSize64{prepcount};
+ DataSize64 += srcframes;
+ DataSize64 -= MaxResamplerPadding;
+ DataSize64 <<= MixerFracBits;
+ DataSize64 -= mFracOffset;
+
+ /* If we have a full prep, we can generate at least one sample. */
+ return static_cast<uint>(clampu64((DataSize64 + mIncrement-1)/mIncrement, 1,
+ std::numeric_limits<int>::max()));
+}
+
+uint SampleConverter::convert(const void **src, uint *srcframes, void *dst, uint dstframes)
+{
+ const uint SrcFrameSize{static_cast<uint>(mChan.size()) * mSrcTypeSize};
+ const uint DstFrameSize{static_cast<uint>(mChan.size()) * mDstTypeSize};
+ const uint increment{mIncrement};
+ auto SamplesIn = static_cast<const al::byte*>(*src);
+ uint NumSrcSamples{*srcframes};
+
+ FPUCtl mixer_mode{};
+ uint pos{0};
+ while(pos < dstframes && NumSrcSamples > 0)
+ {
+ const uint prepcount{mSrcPrepCount};
+ const uint readable{minu(NumSrcSamples, BufferLineSize - prepcount)};
+
+ if(prepcount < MaxResamplerPadding && MaxResamplerPadding-prepcount >= readable)
+ {
+ /* Not enough input samples to generate an output sample. Store
+ * what we're given for later.
+ */
+ for(size_t chan{0u};chan < mChan.size();chan++)
+ LoadSamples(&mChan[chan].PrevSamples[prepcount], SamplesIn + mSrcTypeSize*chan,
+ mChan.size(), mSrcType, readable);
+
+ mSrcPrepCount = prepcount + readable;
+ NumSrcSamples = 0;
+ break;
+ }
+
+ float *RESTRICT SrcData{mSrcSamples};
+ float *RESTRICT DstData{mDstSamples};
+ uint DataPosFrac{mFracOffset};
+ uint64_t DataSize64{prepcount};
+ DataSize64 += readable;
+ DataSize64 -= MaxResamplerPadding;
+ DataSize64 <<= MixerFracBits;
+ DataSize64 -= DataPosFrac;
+
+ /* If we have a full prep, we can generate at least one sample. */
+ auto DstSize = static_cast<uint>(
+ clampu64((DataSize64 + increment-1)/increment, 1, BufferLineSize));
+ DstSize = minu(DstSize, dstframes-pos);
+
+ const uint DataPosEnd{DstSize*increment + DataPosFrac};
+ const uint SrcDataEnd{DataPosEnd>>MixerFracBits};
+
+ assert(prepcount+readable >= SrcDataEnd);
+ const uint nextprep{minu(prepcount + readable - SrcDataEnd, MaxResamplerPadding)};
+
+ for(size_t chan{0u};chan < mChan.size();chan++)
+ {
+ const al::byte *SrcSamples{SamplesIn + mSrcTypeSize*chan};
+ al::byte *DstSamples = static_cast<al::byte*>(dst) + mDstTypeSize*chan;
+
+ /* Load the previous samples into the source data first, then the
+ * new samples from the input buffer.
+ */
+ std::copy_n(mChan[chan].PrevSamples, prepcount, SrcData);
+ LoadSamples(SrcData + prepcount, SrcSamples, mChan.size(), mSrcType, readable);
+
+ /* Store as many prep samples for next time as possible, given the
+ * number of output samples being generated.
+ */
+ std::copy_n(SrcData+SrcDataEnd, nextprep, mChan[chan].PrevSamples);
+ std::fill(std::begin(mChan[chan].PrevSamples)+nextprep,
+ std::end(mChan[chan].PrevSamples), 0.0f);
+
+ /* Now resample, and store the result in the output buffer. */
+ mResample(&mState, SrcData+MaxResamplerEdge, DataPosFrac, increment,
+ {DstData, DstSize});
+
+ StoreSamples(DstSamples, DstData, mChan.size(), mDstType, DstSize);
+ }
+
+ /* Update the number of prep samples still available, as well as the
+ * fractional offset.
+ */
+ mSrcPrepCount = nextprep;
+ mFracOffset = DataPosEnd & MixerFracMask;
+
+ /* Update the src and dst pointers in case there's still more to do. */
+ const uint srcread{minu(NumSrcSamples, SrcDataEnd + mSrcPrepCount - prepcount)};
+ SamplesIn += SrcFrameSize*srcread;
+ NumSrcSamples -= srcread;
+
+ dst = static_cast<al::byte*>(dst) + DstFrameSize*DstSize;
+ pos += DstSize;
+ }
+
+ *src = SamplesIn;
+ *srcframes = NumSrcSamples;
+
+ return pos;
+}
+
+
+void ChannelConverter::convert(const void *src, float *dst, uint frames) const
+{
+ if(mDstChans == DevFmtMono)
+ {
+ const float scale{std::sqrt(1.0f / static_cast<float>(al::popcount(mChanMask)))};
+ switch(mSrcType)
+ {
+#define HANDLE_FMT(T) case T: Multi2Mono<T>(mChanMask, mSrcStep, scale, dst, src, frames); break
+ HANDLE_FMT(DevFmtByte);
+ HANDLE_FMT(DevFmtUByte);
+ HANDLE_FMT(DevFmtShort);
+ HANDLE_FMT(DevFmtUShort);
+ HANDLE_FMT(DevFmtInt);
+ HANDLE_FMT(DevFmtUInt);
+ HANDLE_FMT(DevFmtFloat);
+#undef HANDLE_FMT
+ }
+ }
+ else if(mChanMask == 0x1 && mDstChans == DevFmtStereo)
+ {
+ switch(mSrcType)
+ {
+#define HANDLE_FMT(T) case T: Mono2Stereo<T>(dst, src, frames); break
+ HANDLE_FMT(DevFmtByte);
+ HANDLE_FMT(DevFmtUByte);
+ HANDLE_FMT(DevFmtShort);
+ HANDLE_FMT(DevFmtUShort);
+ HANDLE_FMT(DevFmtInt);
+ HANDLE_FMT(DevFmtUInt);
+ HANDLE_FMT(DevFmtFloat);
+#undef HANDLE_FMT
+ }
+ }
+}
diff --git a/core/converter.h b/core/converter.h
new file mode 100644
index 00000000..01becea2
--- /dev/null
+++ b/core/converter.h
@@ -0,0 +1,66 @@
+#ifndef CORE_CONVERTER_H
+#define CORE_CONVERTER_H
+
+#include <chrono>
+#include <cstddef>
+#include <memory>
+
+#include "almalloc.h"
+#include "devformat.h"
+#include "mixer/defs.h"
+
+using uint = unsigned int;
+
+
+struct SampleConverter {
+ DevFmtType mSrcType{};
+ DevFmtType mDstType{};
+ uint mSrcTypeSize{};
+ uint mDstTypeSize{};
+
+ uint mSrcPrepCount{};
+
+ uint mFracOffset{};
+ uint mIncrement{};
+ InterpState mState{};
+ ResamplerFunc mResample{};
+
+ alignas(16) float mSrcSamples[BufferLineSize]{};
+ alignas(16) float mDstSamples[BufferLineSize]{};
+
+ struct ChanSamples {
+ alignas(16) float PrevSamples[MaxResamplerPadding];
+ };
+ al::FlexArray<ChanSamples> mChan;
+
+ SampleConverter(size_t numchans) : mChan{numchans} { }
+
+ uint convert(const void **src, uint *srcframes, void *dst, uint dstframes);
+ uint availableOut(uint srcframes) const;
+
+ using SampleOffset = std::chrono::duration<int64_t, std::ratio<1,MixerFracOne>>;
+ SampleOffset currentInputDelay() const noexcept
+ {
+ const int64_t prep{int64_t{mSrcPrepCount} - MaxResamplerEdge};
+ return SampleOffset{(prep<<MixerFracBits) + mFracOffset};
+ }
+
+ static std::unique_ptr<SampleConverter> Create(DevFmtType srcType, DevFmtType dstType,
+ size_t numchans, uint srcRate, uint dstRate, Resampler resampler);
+
+ DEF_FAM_NEWDEL(SampleConverter, mChan)
+};
+using SampleConverterPtr = std::unique_ptr<SampleConverter>;
+
+struct ChannelConverter {
+ DevFmtType mSrcType{};
+ uint mSrcStep{};
+ uint mChanMask{};
+ DevFmtChannels mDstChans{};
+
+ bool is_active() const noexcept { return mChanMask != 0; }
+
+ void convert(const void *src, float *dst, uint frames) const;
+};
+
+#endif /* CORE_CONVERTER_H */
diff --git a/core/cpu_caps.cpp b/core/cpu_caps.cpp
new file mode 100644
index 00000000..d4b4d86c
--- /dev/null
+++ b/core/cpu_caps.cpp
@@ -0,0 +1,141 @@
+
+#include "config.h"
+
+#include "cpu_caps.h"
+
+#if defined(_WIN32) && (defined(_M_ARM) || defined(_M_ARM64))
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#ifndef PF_ARM_NEON_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_NEON_INSTRUCTIONS_AVAILABLE 19
+#endif
+#endif
+
+#if defined(HAVE_CPUID_H)
+#include <cpuid.h>
+#elif defined(HAVE_INTRIN_H)
+#include <intrin.h>
+#endif
+
+#include <array>
+#include <cctype>
+#include <string>
+
+
+int CPUCapFlags{0};
+
+namespace {
+
+#if defined(HAVE_GCC_GET_CPUID) \
+ && (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64))
+using reg_type = unsigned int;
+inline std::array<reg_type,4> get_cpuid(unsigned int f)
+{
+ std::array<reg_type,4> ret{};
+ __get_cpuid(f, ret.data(), &ret[1], &ret[2], &ret[3]);
+ return ret;
+}
+#define CAN_GET_CPUID
+#elif defined(HAVE_CPUID_INTRINSIC) \
+ && (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64))
+using reg_type = int;
+inline std::array<reg_type,4> get_cpuid(unsigned int f)
+{
+ std::array<reg_type,4> ret{};
+ (__cpuid)(ret.data(), f);
+ return ret;
+}
+#define CAN_GET_CPUID
+#endif
+
+} // namespace
+
+al::optional<CPUInfo> GetCPUInfo()
+{
+ CPUInfo ret;
+
+#ifdef CAN_GET_CPUID
+ auto cpuregs = get_cpuid(0);
+ if(cpuregs[0] == 0)
+ return al::nullopt;
+
+ const reg_type maxfunc{cpuregs[0]};
+
+ cpuregs = get_cpuid(0x80000000);
+ const reg_type maxextfunc{cpuregs[0]};
+
+ ret.mVendor.append(reinterpret_cast<char*>(&cpuregs[1]), 4);
+ ret.mVendor.append(reinterpret_cast<char*>(&cpuregs[3]), 4);
+ ret.mVendor.append(reinterpret_cast<char*>(&cpuregs[2]), 4);
+ auto iter_end = std::remove(ret.mVendor.begin(), ret.mVendor.end(), '\0');
+ iter_end = std::unique(ret.mVendor.begin(), iter_end,
+ [](auto&& c0, auto&& c1) { return std::isspace(c0) && std::isspace(c1); });
+ ret.mVendor.erase(iter_end, ret.mVendor.end());
+ if(!ret.mVendor.empty() && std::isspace(ret.mVendor.back()))
+ ret.mVendor.pop_back();
+ if(!ret.mVendor.empty() && std::isspace(ret.mVendor.front()))
+ ret.mVendor.erase(ret.mVendor.begin());
+
+ if(maxextfunc >= 0x80000004)
+ {
+ cpuregs = get_cpuid(0x80000002);
+ ret.mName.append(reinterpret_cast<char*>(cpuregs.data()), 16);
+ cpuregs = get_cpuid(0x80000003);
+ ret.mName.append(reinterpret_cast<char*>(cpuregs.data()), 16);
+ cpuregs = get_cpuid(0x80000004);
+ ret.mName.append(reinterpret_cast<char*>(cpuregs.data()), 16);
+ iter_end = std::remove(ret.mName.begin(), ret.mName.end(), '\0');
+ iter_end = std::unique(ret.mName.begin(), iter_end,
+ [](auto&& c0, auto&& c1) { return std::isspace(c0) && std::isspace(c1); });
+ ret.mName.erase(iter_end, ret.mName.end());
+ if(!ret.mName.empty() && std::isspace(ret.mName.back()))
+ ret.mName.pop_back();
+ if(!ret.mName.empty() && std::isspace(ret.mName.front()))
+ ret.mName.erase(ret.mName.begin());
+ }
+
+ if(maxfunc >= 1)
+ {
+ cpuregs = get_cpuid(1);
+ if((cpuregs[3]&(1<<25)))
+ ret.mCaps |= CPU_CAP_SSE;
+ if((ret.mCaps&CPU_CAP_SSE) && (cpuregs[3]&(1<<26)))
+ ret.mCaps |= CPU_CAP_SSE2;
+ if((ret.mCaps&CPU_CAP_SSE2) && (cpuregs[2]&(1<<0)))
+ ret.mCaps |= CPU_CAP_SSE3;
+ if((ret.mCaps&CPU_CAP_SSE3) && (cpuregs[2]&(1<<19)))
+ ret.mCaps |= CPU_CAP_SSE4_1;
+ }
+
+#else
+
+ /* Assume support for whatever's supported if we can't check for it */
+#if defined(HAVE_SSE4_1)
+#warning "Assuming SSE 4.1 run-time support!"
+ ret.mCaps |= CPU_CAP_SSE | CPU_CAP_SSE2 | CPU_CAP_SSE3 | CPU_CAP_SSE4_1;
+#elif defined(HAVE_SSE3)
+#warning "Assuming SSE 3 run-time support!"
+ ret.mCaps |= CPU_CAP_SSE | CPU_CAP_SSE2 | CPU_CAP_SSE3;
+#elif defined(HAVE_SSE2)
+#warning "Assuming SSE 2 run-time support!"
+ ret.mCaps |= CPU_CAP_SSE | CPU_CAP_SSE2;
+#elif defined(HAVE_SSE)
+#warning "Assuming SSE run-time support!"
+ ret.mCaps |= CPU_CAP_SSE;
+#endif
+#endif /* CAN_GET_CPUID */
+
+#ifdef HAVE_NEON
+#ifdef __ARM_NEON
+ ret.mCaps |= CPU_CAP_NEON;
+#elif defined(_WIN32) && (defined(_M_ARM) || defined(_M_ARM64))
+ if(IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
+ ret.mCaps |= CPU_CAP_NEON;
+#else
+#warning "Assuming NEON run-time support!"
+ ret.mCaps |= CPU_CAP_NEON;
+#endif
+#endif
+
+ return ret;
+}
diff --git a/core/cpu_caps.h b/core/cpu_caps.h
new file mode 100644
index 00000000..ffd671d0
--- /dev/null
+++ b/core/cpu_caps.h
@@ -0,0 +1,26 @@
+#ifndef CORE_CPU_CAPS_H
+#define CORE_CPU_CAPS_H
+
+#include <string>
+
+#include "aloptional.h"
+
+
+extern int CPUCapFlags;
+enum {
+ CPU_CAP_SSE = 1<<0,
+ CPU_CAP_SSE2 = 1<<1,
+ CPU_CAP_SSE3 = 1<<2,
+ CPU_CAP_SSE4_1 = 1<<3,
+ CPU_CAP_NEON = 1<<4,
+};
+
+struct CPUInfo {
+ std::string mVendor;
+ std::string mName;
+ int mCaps{0};
+};
+
+al::optional<CPUInfo> GetCPUInfo();
+
+#endif /* CORE_CPU_CAPS_H */
diff --git a/core/cubic_defs.h b/core/cubic_defs.h
new file mode 100644
index 00000000..33751c97
--- /dev/null
+++ b/core/cubic_defs.h
@@ -0,0 +1,13 @@
+#ifndef CORE_CUBIC_DEFS_H
+#define CORE_CUBIC_DEFS_H
+
+/* The number of distinct phase intervals within the cubic filter tables. */
+constexpr unsigned int CubicPhaseBits{5};
+constexpr unsigned int CubicPhaseCount{1 << CubicPhaseBits};
+
+struct CubicCoefficients {
+ float mCoeffs[4];
+ float mDeltas[4];
+};
+
+#endif /* CORE_CUBIC_DEFS_H */
diff --git a/core/cubic_tables.cpp b/core/cubic_tables.cpp
new file mode 100644
index 00000000..73ec6b3f
--- /dev/null
+++ b/core/cubic_tables.cpp
@@ -0,0 +1,59 @@
+
+#include "cubic_tables.h"
+
+#include <algorithm>
+#include <array>
+#include <cassert>
+#include <cmath>
+#include <limits>
+#include <memory>
+#include <stdexcept>
+
+#include "alnumbers.h"
+#include "core/mixer/defs.h"
+
+
+namespace {
+
+using uint = unsigned int;
+
+struct SplineFilterArray {
+ alignas(16) CubicCoefficients mTable[CubicPhaseCount]{};
+
+ constexpr SplineFilterArray()
+ {
+ /* Fill in the main coefficients. */
+ for(size_t pi{0};pi < CubicPhaseCount;++pi)
+ {
+ const double mu{static_cast<double>(pi) / CubicPhaseCount};
+ const double mu2{mu*mu}, mu3{mu2*mu};
+ mTable[pi].mCoeffs[0] = static_cast<float>(-0.5*mu3 + mu2 + -0.5*mu);
+ mTable[pi].mCoeffs[1] = static_cast<float>( 1.5*mu3 + -2.5*mu2 + 1.0);
+ mTable[pi].mCoeffs[2] = static_cast<float>(-1.5*mu3 + 2.0*mu2 + 0.5*mu);
+ mTable[pi].mCoeffs[3] = static_cast<float>( 0.5*mu3 + -0.5*mu2);
+ }
+
+ /* Fill in the coefficient deltas. */
+ for(size_t pi{0};pi < CubicPhaseCount-1;++pi)
+ {
+ mTable[pi].mDeltas[0] = mTable[pi+1].mCoeffs[0] - mTable[pi].mCoeffs[0];
+ mTable[pi].mDeltas[1] = mTable[pi+1].mCoeffs[1] - mTable[pi].mCoeffs[1];
+ mTable[pi].mDeltas[2] = mTable[pi+1].mCoeffs[2] - mTable[pi].mCoeffs[2];
+ mTable[pi].mDeltas[3] = mTable[pi+1].mCoeffs[3] - mTable[pi].mCoeffs[3];
+ }
+
+ const size_t pi{CubicPhaseCount - 1};
+ mTable[pi].mDeltas[0] = -mTable[pi].mCoeffs[0];
+ mTable[pi].mDeltas[1] = -mTable[pi].mCoeffs[1];
+ mTable[pi].mDeltas[2] = 1.0f - mTable[pi].mCoeffs[2];
+ mTable[pi].mDeltas[3] = -mTable[pi].mCoeffs[3];
+ }
+
+ constexpr auto getTable() const noexcept { return al::as_span(mTable); }
+};
+
+constexpr SplineFilterArray SplineFilter{};
+
+} // namespace
+
+const CubicTable gCubicSpline{SplineFilter.getTable()};
diff --git a/core/cubic_tables.h b/core/cubic_tables.h
new file mode 100644
index 00000000..88097ae2
--- /dev/null
+++ b/core/cubic_tables.h
@@ -0,0 +1,17 @@
+#ifndef CORE_CUBIC_TABLES_H
+#define CORE_CUBIC_TABLES_H
+
+#include "alspan.h"
+#include "cubic_defs.h"
+
+
+struct CubicTable {
+ al::span<const CubicCoefficients,CubicPhaseCount> Tab;
+};
+
+/* A Catmull-Rom spline. The spline passes through the center two samples,
+ * ensuring no discontinuity while moving through a series of samples.
+ */
+extern const CubicTable gCubicSpline;
+
+#endif /* CORE_CUBIC_TABLES_H */
diff --git a/core/dbus_wrap.cpp b/core/dbus_wrap.cpp
new file mode 100644
index 00000000..7f221706
--- /dev/null
+++ b/core/dbus_wrap.cpp
@@ -0,0 +1,46 @@
+
+#include "config.h"
+
+#include "dbus_wrap.h"
+
+#ifdef HAVE_DYNLOAD
+
+#include <mutex>
+#include <type_traits>
+
+#include "logging.h"
+
+
+void *dbus_handle{nullptr};
+#define DECL_FUNC(x) decltype(p##x) p##x{};
+DBUS_FUNCTIONS(DECL_FUNC)
+#undef DECL_FUNC
+
+void PrepareDBus()
+{
+ static constexpr char libname[] = "libdbus-1.so.3";
+
+ auto load_func = [](auto &f, const char *name) -> void
+ { f = reinterpret_cast<std::remove_reference_t<decltype(f)>>(GetSymbol(dbus_handle, name)); };
+#define LOAD_FUNC(x) do { \
+ load_func(p##x, #x); \
+ if(!p##x) \
+ { \
+ WARN("Failed to load function %s\n", #x); \
+ CloseLib(dbus_handle); \
+ dbus_handle = nullptr; \
+ return; \
+ } \
+} while(0);
+
+ dbus_handle = LoadLib(libname);
+ if(!dbus_handle)
+ {
+ WARN("Failed to load %s\n", libname);
+ return;
+ }
+
+DBUS_FUNCTIONS(LOAD_FUNC)
+#undef LOAD_FUNC
+}
+#endif
diff --git a/core/dbus_wrap.h b/core/dbus_wrap.h
new file mode 100644
index 00000000..09eaacf9
--- /dev/null
+++ b/core/dbus_wrap.h
@@ -0,0 +1,87 @@
+#ifndef CORE_DBUS_WRAP_H
+#define CORE_DBUS_WRAP_H
+
+#include <memory>
+
+#include <dbus/dbus.h>
+
+#include "dynload.h"
+
+#ifdef HAVE_DYNLOAD
+
+#include <mutex>
+
+#define DBUS_FUNCTIONS(MAGIC) \
+MAGIC(dbus_error_init) \
+MAGIC(dbus_error_free) \
+MAGIC(dbus_bus_get) \
+MAGIC(dbus_connection_set_exit_on_disconnect) \
+MAGIC(dbus_connection_unref) \
+MAGIC(dbus_connection_send_with_reply_and_block) \
+MAGIC(dbus_message_unref) \
+MAGIC(dbus_message_new_method_call) \
+MAGIC(dbus_message_append_args) \
+MAGIC(dbus_message_iter_init) \
+MAGIC(dbus_message_iter_next) \
+MAGIC(dbus_message_iter_recurse) \
+MAGIC(dbus_message_iter_get_arg_type) \
+MAGIC(dbus_message_iter_get_basic) \
+MAGIC(dbus_set_error_from_message)
+
+extern void *dbus_handle;
+#define DECL_FUNC(x) extern decltype(x) *p##x;
+DBUS_FUNCTIONS(DECL_FUNC)
+#undef DECL_FUNC
+
+#ifndef IN_IDE_PARSER
+#define dbus_error_init (*pdbus_error_init)
+#define dbus_error_free (*pdbus_error_free)
+#define dbus_bus_get (*pdbus_bus_get)
+#define dbus_connection_set_exit_on_disconnect (*pdbus_connection_set_exit_on_disconnect)
+#define dbus_connection_unref (*pdbus_connection_unref)
+#define dbus_connection_send_with_reply_and_block (*pdbus_connection_send_with_reply_and_block)
+#define dbus_message_unref (*pdbus_message_unref)
+#define dbus_message_new_method_call (*pdbus_message_new_method_call)
+#define dbus_message_append_args (*pdbus_message_append_args)
+#define dbus_message_iter_init (*pdbus_message_iter_init)
+#define dbus_message_iter_next (*pdbus_message_iter_next)
+#define dbus_message_iter_recurse (*pdbus_message_iter_recurse)
+#define dbus_message_iter_get_arg_type (*pdbus_message_iter_get_arg_type)
+#define dbus_message_iter_get_basic (*pdbus_message_iter_get_basic)
+#define dbus_set_error_from_message (*pdbus_set_error_from_message)
+#endif
+
+void PrepareDBus();
+
+inline auto HasDBus()
+{
+ static std::once_flag init_dbus{};
+ std::call_once(init_dbus, []{ PrepareDBus(); });
+ return dbus_handle;
+}
+
+#else
+
+constexpr bool HasDBus() noexcept { return true; }
+#endif /* HAVE_DYNLOAD */
+
+
+namespace dbus {
+
+struct Error {
+ Error() { dbus_error_init(&mError); }
+ ~Error() { dbus_error_free(&mError); }
+ DBusError* operator->() { return &mError; }
+ DBusError &get() { return mError; }
+private:
+ DBusError mError{};
+};
+
+struct ConnectionDeleter {
+ void operator()(DBusConnection *c) { dbus_connection_unref(c); }
+};
+using ConnectionPtr = std::unique_ptr<DBusConnection,ConnectionDeleter>;
+
+} // namespace dbus
+
+#endif /* CORE_DBUS_WRAP_H */
diff --git a/core/devformat.cpp b/core/devformat.cpp
new file mode 100644
index 00000000..acdabc4f
--- /dev/null
+++ b/core/devformat.cpp
@@ -0,0 +1,67 @@
+
+#include "config.h"
+
+#include "devformat.h"
+
+
+uint BytesFromDevFmt(DevFmtType type) noexcept
+{
+ switch(type)
+ {
+ case DevFmtByte: return sizeof(int8_t);
+ case DevFmtUByte: return sizeof(uint8_t);
+ case DevFmtShort: return sizeof(int16_t);
+ case DevFmtUShort: return sizeof(uint16_t);
+ case DevFmtInt: return sizeof(int32_t);
+ case DevFmtUInt: return sizeof(uint32_t);
+ case DevFmtFloat: return sizeof(float);
+ }
+ return 0;
+}
+uint ChannelsFromDevFmt(DevFmtChannels chans, uint ambiorder) noexcept
+{
+ switch(chans)
+ {
+ case DevFmtMono: return 1;
+ case DevFmtStereo: return 2;
+ case DevFmtQuad: return 4;
+ case DevFmtX51: return 6;
+ case DevFmtX61: return 7;
+ case DevFmtX71: return 8;
+ case DevFmtX714: return 12;
+ case DevFmtX3D71: return 8;
+ case DevFmtAmbi3D: return (ambiorder+1) * (ambiorder+1);
+ }
+ return 0;
+}
+
+const char *DevFmtTypeString(DevFmtType type) noexcept
+{
+ switch(type)
+ {
+ case DevFmtByte: return "Int8";
+ case DevFmtUByte: return "UInt8";
+ case DevFmtShort: return "Int16";
+ case DevFmtUShort: return "UInt16";
+ case DevFmtInt: return "Int32";
+ case DevFmtUInt: return "UInt32";
+ case DevFmtFloat: return "Float32";
+ }
+ return "(unknown type)";
+}
+const char *DevFmtChannelsString(DevFmtChannels chans) noexcept
+{
+ switch(chans)
+ {
+ case DevFmtMono: return "Mono";
+ case DevFmtStereo: return "Stereo";
+ case DevFmtQuad: return "Quadraphonic";
+ case DevFmtX51: return "5.1 Surround";
+ case DevFmtX61: return "6.1 Surround";
+ case DevFmtX71: return "7.1 Surround";
+ case DevFmtX714: return "7.1.4 Surround";
+ case DevFmtX3D71: return "3D7.1 Surround";
+ case DevFmtAmbi3D: return "Ambisonic 3D";
+ }
+ return "(unknown channels)";
+}
diff --git a/core/devformat.h b/core/devformat.h
new file mode 100644
index 00000000..485826a3
--- /dev/null
+++ b/core/devformat.h
@@ -0,0 +1,122 @@
+#ifndef CORE_DEVFORMAT_H
+#define CORE_DEVFORMAT_H
+
+#include <cstdint>
+
+
+using uint = unsigned int;
+
+enum Channel : unsigned char {
+ FrontLeft = 0,
+ FrontRight,
+ FrontCenter,
+ LFE,
+ BackLeft,
+ BackRight,
+ BackCenter,
+ SideLeft,
+ SideRight,
+
+ TopCenter,
+ TopFrontLeft,
+ TopFrontCenter,
+ TopFrontRight,
+ TopBackLeft,
+ TopBackCenter,
+ TopBackRight,
+
+ Aux0,
+ Aux1,
+ Aux2,
+ Aux3,
+ Aux4,
+ Aux5,
+ Aux6,
+ Aux7,
+ Aux8,
+ Aux9,
+ Aux10,
+ Aux11,
+ Aux12,
+ Aux13,
+ Aux14,
+ Aux15,
+
+ MaxChannels
+};
+
+
+/* Device formats */
+enum DevFmtType : unsigned char {
+ DevFmtByte,
+ DevFmtUByte,
+ DevFmtShort,
+ DevFmtUShort,
+ DevFmtInt,
+ DevFmtUInt,
+ DevFmtFloat,
+
+ DevFmtTypeDefault = DevFmtFloat
+};
+enum DevFmtChannels : unsigned char {
+ DevFmtMono,
+ DevFmtStereo,
+ DevFmtQuad,
+ DevFmtX51,
+ DevFmtX61,
+ DevFmtX71,
+ DevFmtX714,
+ DevFmtX3D71,
+ DevFmtAmbi3D,
+
+ DevFmtChannelsDefault = DevFmtStereo
+};
+#define MAX_OUTPUT_CHANNELS 16
+
+/* DevFmtType traits, providing the type, etc given a DevFmtType. */
+template<DevFmtType T>
+struct DevFmtTypeTraits { };
+
+template<>
+struct DevFmtTypeTraits<DevFmtByte> { using Type = int8_t; };
+template<>
+struct DevFmtTypeTraits<DevFmtUByte> { using Type = uint8_t; };
+template<>
+struct DevFmtTypeTraits<DevFmtShort> { using Type = int16_t; };
+template<>
+struct DevFmtTypeTraits<DevFmtUShort> { using Type = uint16_t; };
+template<>
+struct DevFmtTypeTraits<DevFmtInt> { using Type = int32_t; };
+template<>
+struct DevFmtTypeTraits<DevFmtUInt> { using Type = uint32_t; };
+template<>
+struct DevFmtTypeTraits<DevFmtFloat> { using Type = float; };
+
+template<DevFmtType T>
+using DevFmtType_t = typename DevFmtTypeTraits<T>::Type;
+
+
+uint BytesFromDevFmt(DevFmtType type) noexcept;
+uint ChannelsFromDevFmt(DevFmtChannels chans, uint ambiorder) noexcept;
+inline uint FrameSizeFromDevFmt(DevFmtChannels chans, DevFmtType type, uint ambiorder) noexcept
+{ return ChannelsFromDevFmt(chans, ambiorder) * BytesFromDevFmt(type); }
+
+const char *DevFmtTypeString(DevFmtType type) noexcept;
+const char *DevFmtChannelsString(DevFmtChannels chans) noexcept;
+
+enum class DevAmbiLayout : bool {
+ FuMa,
+ ACN,
+
+ Default = ACN
+};
+
+enum class DevAmbiScaling : unsigned char {
+ FuMa,
+ SN3D,
+ N3D,
+
+ Default = SN3D
+};
+
+#endif /* CORE_DEVFORMAT_H */
diff --git a/core/device.cpp b/core/device.cpp
new file mode 100644
index 00000000..2766c5e4
--- /dev/null
+++ b/core/device.cpp
@@ -0,0 +1,23 @@
+
+#include "config.h"
+
+#include "bformatdec.h"
+#include "bs2b.h"
+#include "device.h"
+#include "front_stablizer.h"
+#include "hrtf.h"
+#include "mastering.h"
+
+
+al::FlexArray<ContextBase*> DeviceBase::sEmptyContextArray{0u};
+
+
+DeviceBase::DeviceBase(DeviceType type) : Type{type}, mContexts{&sEmptyContextArray}
+{
+}
+
+DeviceBase::~DeviceBase()
+{
+ auto *oldarray = mContexts.exchange(nullptr, std::memory_order_relaxed);
+ if(oldarray != &sEmptyContextArray) delete oldarray;
+}
diff --git a/core/device.h b/core/device.h
new file mode 100644
index 00000000..9aaf7adb
--- /dev/null
+++ b/core/device.h
@@ -0,0 +1,345 @@
+#ifndef CORE_DEVICE_H
+#define CORE_DEVICE_H
+
+#include <stddef.h>
+
+#include <array>
+#include <atomic>
+#include <bitset>
+#include <chrono>
+#include <memory>
+#include <mutex>
+#include <string>
+
+#include "almalloc.h"
+#include "alspan.h"
+#include "ambidefs.h"
+#include "atomic.h"
+#include "bufferline.h"
+#include "devformat.h"
+#include "filters/nfc.h"
+#include "intrusive_ptr.h"
+#include "mixer/hrtfdefs.h"
+#include "opthelpers.h"
+#include "resampler_limits.h"
+#include "uhjfilter.h"
+#include "vector.h"
+
+class BFormatDec;
+struct bs2b;
+struct Compressor;
+struct ContextBase;
+struct DirectHrtfState;
+struct HrtfStore;
+
+using uint = unsigned int;
+
+
+#define MIN_OUTPUT_RATE 8000
+#define MAX_OUTPUT_RATE 192000
+#define DEFAULT_OUTPUT_RATE 48000
+
+#define DEFAULT_UPDATE_SIZE 960 /* 20ms */
+#define DEFAULT_NUM_UPDATES 3
+
+
+enum class DeviceType : unsigned char {
+ Playback,
+ Capture,
+ Loopback
+};
+
+
+enum class RenderMode : unsigned char {
+ Normal,
+ Pairwise,
+ Hrtf
+};
+
+enum class StereoEncoding : unsigned char {
+ Basic,
+ Uhj,
+ Hrtf,
+
+ Default = Basic
+};
+
+
+struct InputRemixMap {
+ struct TargetMix { Channel channel; float mix; };
+
+ Channel channel;
+ al::span<const TargetMix> targets;
+};
+
+
+struct DistanceComp {
+ /* Maximum delay in samples for speaker distance compensation. */
+ static constexpr uint MaxDelay{1024};
+
+ struct ChanData {
+ float Gain{1.0f};
+ uint Length{0u}; /* Valid range is [0...MaxDelay). */
+ float *Buffer{nullptr};
+ };
+
+ std::array<ChanData,MAX_OUTPUT_CHANNELS> mChannels;
+ al::FlexArray<float,16> mSamples;
+
+ DistanceComp(size_t count) : mSamples{count} { }
+
+ static std::unique_ptr<DistanceComp> Create(size_t numsamples)
+ { return std::unique_ptr<DistanceComp>{new(FamCount(numsamples)) DistanceComp{numsamples}}; }
+
+ DEF_FAM_NEWDEL(DistanceComp, mSamples)
+};
+
+
+constexpr uint InvalidChannelIndex{~0u};
+
+struct BFChannelConfig {
+ float Scale;
+ uint Index;
+};
+
+struct MixParams {
+ /* Coefficient channel mapping for mixing to the buffer. */
+ std::array<BFChannelConfig,MaxAmbiChannels> AmbiMap{};
+
+ al::span<FloatBufferLine> Buffer;
+
+ /**
+ * Helper to set an identity/pass-through panning for ambisonic mixing. The
+ * source is expected to be a 3D ACN/N3D ambisonic buffer, and for each
+ * channel [0...count), the given functor is called with the source channel
+ * index, destination channel index, and the gain for that channel. If the
+ * destination channel is INVALID_CHANNEL_INDEX, the given source channel
+ * is not used for output.
+ */
+ template<typename F>
+ void setAmbiMixParams(const MixParams &inmix, const float gainbase, F func) const
+ {
+ const size_t numIn{inmix.Buffer.size()};
+ const size_t numOut{Buffer.size()};
+ for(size_t i{0};i < numIn;++i)
+ {
+ auto idx = InvalidChannelIndex;
+ auto gain = 0.0f;
+
+ for(size_t j{0};j < numOut;++j)
+ {
+ if(AmbiMap[j].Index == inmix.AmbiMap[i].Index)
+ {
+ idx = static_cast<uint>(j);
+ gain = AmbiMap[j].Scale * gainbase;
+ break;
+ }
+ }
+ func(i, idx, gain);
+ }
+ }
+};
+
+struct RealMixParams {
+ al::span<const InputRemixMap> RemixMap;
+ std::array<uint,MaxChannels> ChannelIndex{};
+
+ al::span<FloatBufferLine> Buffer;
+};
+
+using AmbiRotateMatrix = std::array<std::array<float,MaxAmbiChannels>,MaxAmbiChannels>;
+
+enum {
+ // Frequency was requested by the app or config file
+ FrequencyRequest,
+ // Channel configuration was requested by the app or config file
+ ChannelsRequest,
+ // Sample type was requested by the config file
+ SampleTypeRequest,
+
+ // Specifies if the DSP is paused at user request
+ DevicePaused,
+ // Specifies if the device is currently running
+ DeviceRunning,
+
+ // Specifies if the output plays directly on/in ears (headphones, headset,
+ // ear buds, etc).
+ DirectEar,
+
+ DeviceFlagsCount
+};
+
+struct DeviceBase {
+ /* To avoid extraneous allocations, a 0-sized FlexArray<ContextBase*> is
+ * defined globally as a sharable object.
+ */
+ static al::FlexArray<ContextBase*> sEmptyContextArray;
+
+ std::atomic<bool> Connected{true};
+ const DeviceType Type{};
+
+ uint Frequency{};
+ uint UpdateSize{};
+ uint BufferSize{};
+
+ DevFmtChannels FmtChans{};
+ DevFmtType FmtType{};
+ uint mAmbiOrder{0};
+ float mXOverFreq{400.0f};
+ /* If the main device mix is horizontal/2D only. */
+ bool m2DMixing{false};
+ /* For DevFmtAmbi* output only, specifies the channel order and
+ * normalization.
+ */
+ DevAmbiLayout mAmbiLayout{DevAmbiLayout::Default};
+ DevAmbiScaling mAmbiScale{DevAmbiScaling::Default};
+
+ std::string DeviceName;
+
+ // Device flags
+ std::bitset<DeviceFlagsCount> Flags{};
+
+ uint NumAuxSends{};
+
+ /* Rendering mode. */
+ RenderMode mRenderMode{RenderMode::Normal};
+
+ /* The average speaker distance as determined by the ambdec configuration,
+ * HRTF data set, or the NFC-HOA reference delay. Only used for NFC.
+ */
+ float AvgSpeakerDist{0.0f};
+
+ /* The default NFC filter. Not used directly, but is pre-initialized with
+ * the control distance from AvgSpeakerDist.
+ */
+ NfcFilter mNFCtrlFilter{};
+
+ uint SamplesDone{0u};
+ std::chrono::nanoseconds ClockBase{0};
+ std::chrono::nanoseconds FixedLatency{0};
+
+ AmbiRotateMatrix mAmbiRotateMatrix{};
+ AmbiRotateMatrix mAmbiRotateMatrix2{};
+
+ /* Temp storage used for mixer processing. */
+ static constexpr size_t MixerLineSize{BufferLineSize + DecoderBase::sMaxPadding};
+ static constexpr size_t MixerChannelsMax{16};
+ using MixerBufferLine = std::array<float,MixerLineSize>;
+ alignas(16) std::array<MixerBufferLine,MixerChannelsMax> mSampleData;
+ alignas(16) std::array<float,MixerLineSize+MaxResamplerPadding> mResampleData;
+
+ alignas(16) float FilteredData[BufferLineSize];
+ union {
+ alignas(16) float HrtfSourceData[BufferLineSize + HrtfHistoryLength];
+ alignas(16) float NfcSampleData[BufferLineSize];
+ };
+
+ /* Persistent storage for HRTF mixing. */
+ alignas(16) float2 HrtfAccumData[BufferLineSize + HrirLength];
+
+ /* Mixing buffer used by the Dry mix and Real output. */
+ al::vector<FloatBufferLine, 16> MixBuffer;
+
+ /* The "dry" path corresponds to the main output. */
+ MixParams Dry;
+ uint NumChannelsPerOrder[MaxAmbiOrder+1]{};
+
+ /* "Real" output, which will be written to the device buffer. May alias the
+ * dry buffer.
+ */
+ RealMixParams RealOut;
+
+ /* HRTF state and info */
+ std::unique_ptr<DirectHrtfState> mHrtfState;
+ al::intrusive_ptr<HrtfStore> mHrtf;
+ uint mIrSize{0};
+
+ /* Ambisonic-to-UHJ encoder */
+ std::unique_ptr<UhjEncoderBase> mUhjEncoder;
+
+ /* Ambisonic decoder for speakers */
+ std::unique_ptr<BFormatDec> AmbiDecoder;
+
+ /* Stereo-to-binaural filter */
+ std::unique_ptr<bs2b> Bs2b;
+
+ using PostProc = void(DeviceBase::*)(const size_t SamplesToDo);
+ PostProc PostProcess{nullptr};
+
+ std::unique_ptr<Compressor> Limiter;
+
+ /* Delay buffers used to compensate for speaker distances. */
+ std::unique_ptr<DistanceComp> ChannelDelays;
+
+ /* Dithering control. */
+ float DitherDepth{0.0f};
+ uint DitherSeed{0u};
+
+ /* Running count of the mixer invocations, in 31.1 fixed point. This
+ * actually increments *twice* when mixing, first at the start and then at
+ * the end, so the bottom bit indicates if the device is currently mixing
+ * and the upper bits indicates how many mixes have been done.
+ */
+ RefCount MixCount{0u};
+
+ // Contexts created on this device
+ std::atomic<al::FlexArray<ContextBase*>*> mContexts{nullptr};
+
+
+ DeviceBase(DeviceType type);
+ DeviceBase(const DeviceBase&) = delete;
+ DeviceBase& operator=(const DeviceBase&) = delete;
+ ~DeviceBase();
+
+ uint bytesFromFmt() const noexcept { return BytesFromDevFmt(FmtType); }
+ uint channelsFromFmt() const noexcept { return ChannelsFromDevFmt(FmtChans, mAmbiOrder); }
+ uint frameSizeFromFmt() const noexcept { return bytesFromFmt() * channelsFromFmt(); }
+
+ uint waitForMix() const noexcept
+ {
+ uint refcount;
+ while((refcount=MixCount.load(std::memory_order_acquire))&1) {
+ }
+ return refcount;
+ }
+
+ void ProcessHrtf(const size_t SamplesToDo);
+ void ProcessAmbiDec(const size_t SamplesToDo);
+ void ProcessAmbiDecStablized(const size_t SamplesToDo);
+ void ProcessUhj(const size_t SamplesToDo);
+ void ProcessBs2b(const size_t SamplesToDo);
+
+ inline void postProcess(const size_t SamplesToDo)
+ { if(PostProcess) LIKELY (this->*PostProcess)(SamplesToDo); }
+
+ void renderSamples(const al::span<float*> outBuffers, const uint numSamples);
+ void renderSamples(void *outBuffer, const uint numSamples, const size_t frameStep);
+
+ /* Caller must lock the device state, and the mixer must not be running. */
+#ifdef __USE_MINGW_ANSI_STDIO
+ [[gnu::format(gnu_printf,2,3)]]
+#else
+ [[gnu::format(printf,2,3)]]
+#endif
+ void handleDisconnect(const char *msg, ...);
+
+ /**
+ * Returns the index for the given channel name (e.g. FrontCenter), or
+ * INVALID_CHANNEL_INDEX if it doesn't exist.
+ */
+ uint channelIdxByName(Channel chan) const noexcept
+ { return RealOut.ChannelIndex[chan]; }
+
+ DISABLE_ALLOC()
+
+private:
+ uint renderSamples(const uint numSamples);
+};
+
+/* Must be less than 15 characters (16 including terminating null) for
+ * compatibility with pthread_setname_np limitations. */
+#define MIXER_THREAD_NAME "alsoft-mixer"
+
+#define RECORD_THREAD_NAME "alsoft-record"
+
+#endif /* CORE_DEVICE_H */
diff --git a/core/effects/base.h b/core/effects/base.h
new file mode 100644
index 00000000..4ee19f37
--- /dev/null
+++ b/core/effects/base.h
@@ -0,0 +1,197 @@
+#ifndef CORE_EFFECTS_BASE_H
+#define CORE_EFFECTS_BASE_H
+
+#include <stddef.h>
+
+#include "albyte.h"
+#include "almalloc.h"
+#include "alspan.h"
+#include "atomic.h"
+#include "core/bufferline.h"
+#include "intrusive_ptr.h"
+
+struct BufferStorage;
+struct ContextBase;
+struct DeviceBase;
+struct EffectSlot;
+struct MixParams;
+struct RealMixParams;
+
+
+/** Target gain for the reverb decay feedback reaching the decay time. */
+constexpr float ReverbDecayGain{0.001f}; /* -60 dB */
+
+constexpr float ReverbMaxReflectionsDelay{0.3f};
+constexpr float ReverbMaxLateReverbDelay{0.1f};
+
+enum class ChorusWaveform {
+ Sinusoid,
+ Triangle
+};
+
+constexpr float ChorusMaxDelay{0.016f};
+constexpr float FlangerMaxDelay{0.004f};
+
+constexpr float EchoMaxDelay{0.207f};
+constexpr float EchoMaxLRDelay{0.404f};
+
+enum class FShifterDirection {
+ Down,
+ Up,
+ Off
+};
+
+enum class ModulatorWaveform {
+ Sinusoid,
+ Sawtooth,
+ Square
+};
+
+enum class VMorpherPhenome {
+ A, E, I, O, U,
+ AA, AE, AH, AO, EH, ER, IH, IY, UH, UW,
+ B, D, F, G, J, K, L, M, N, P, R, S, T, V, Z
+};
+
+enum class VMorpherWaveform {
+ Sinusoid,
+ Triangle,
+ Sawtooth
+};
+
+union EffectProps {
+ struct {
+ float Density;
+ float Diffusion;
+ float Gain;
+ float GainHF;
+ float GainLF;
+ float DecayTime;
+ float DecayHFRatio;
+ float DecayLFRatio;
+ float ReflectionsGain;
+ float ReflectionsDelay;
+ float ReflectionsPan[3];
+ float LateReverbGain;
+ float LateReverbDelay;
+ float LateReverbPan[3];
+ float EchoTime;
+ float EchoDepth;
+ float ModulationTime;
+ float ModulationDepth;
+ float AirAbsorptionGainHF;
+ float HFReference;
+ float LFReference;
+ float RoomRolloffFactor;
+ bool DecayHFLimit;
+ } Reverb;
+
+ struct {
+ float AttackTime;
+ float ReleaseTime;
+ float Resonance;
+ float PeakGain;
+ } Autowah;
+
+ struct {
+ ChorusWaveform Waveform;
+ int Phase;
+ float Rate;
+ float Depth;
+ float Feedback;
+ float Delay;
+ } Chorus; /* Also Flanger */
+
+ struct {
+ bool OnOff;
+ } Compressor;
+
+ struct {
+ float Edge;
+ float Gain;
+ float LowpassCutoff;
+ float EQCenter;
+ float EQBandwidth;
+ } Distortion;
+
+ struct {
+ float Delay;
+ float LRDelay;
+
+ float Damping;
+ float Feedback;
+
+ float Spread;
+ } Echo;
+
+ struct {
+ float LowCutoff;
+ float LowGain;
+ float Mid1Center;
+ float Mid1Gain;
+ float Mid1Width;
+ float Mid2Center;
+ float Mid2Gain;
+ float Mid2Width;
+ float HighCutoff;
+ float HighGain;
+ } Equalizer;
+
+ struct {
+ float Frequency;
+ FShifterDirection LeftDirection;
+ FShifterDirection RightDirection;
+ } Fshifter;
+
+ struct {
+ float Frequency;
+ float HighPassCutoff;
+ ModulatorWaveform Waveform;
+ } Modulator;
+
+ struct {
+ int CoarseTune;
+ int FineTune;
+ } Pshifter;
+
+ struct {
+ float Rate;
+ VMorpherPhenome PhonemeA;
+ VMorpherPhenome PhonemeB;
+ int PhonemeACoarseTuning;
+ int PhonemeBCoarseTuning;
+ VMorpherWaveform Waveform;
+ } Vmorpher;
+
+ struct {
+ float Gain;
+ } Dedicated;
+};
+
+
+struct EffectTarget {
+ MixParams *Main;
+ RealMixParams *RealOut;
+};
+
+struct EffectState : public al::intrusive_ref<EffectState> {
+ al::span<FloatBufferLine> mOutTarget;
+
+
+ virtual ~EffectState() = default;
+
+ virtual void deviceUpdate(const DeviceBase *device, const BufferStorage *buffer) = 0;
+ virtual void update(const ContextBase *context, const EffectSlot *slot,
+ const EffectProps *props, const EffectTarget target) = 0;
+ virtual void process(const size_t samplesToDo, const al::span<const FloatBufferLine> samplesIn,
+ const al::span<FloatBufferLine> samplesOut) = 0;
+};
+
+
+struct EffectStateFactory {
+ virtual ~EffectStateFactory() = default;
+
+ virtual al::intrusive_ptr<EffectState> create() = 0;
+};
+
+#endif /* CORE_EFFECTS_BASE_H */
diff --git a/core/effectslot.cpp b/core/effectslot.cpp
new file mode 100644
index 00000000..db8aa078
--- /dev/null
+++ b/core/effectslot.cpp
@@ -0,0 +1,19 @@
+
+#include "config.h"
+
+#include "effectslot.h"
+
+#include <stddef.h>
+
+#include "almalloc.h"
+#include "context.h"
+
+
+EffectSlotArray *EffectSlot::CreatePtrArray(size_t count) noexcept
+{
+ /* Allocate space for twice as many pointers, so the mixer has scratch
+ * space to store a sorted list during mixing.
+ */
+ void *ptr{al_calloc(alignof(EffectSlotArray), EffectSlotArray::Sizeof(count*2))};
+ return al::construct_at(static_cast<EffectSlotArray*>(ptr), count);
+}
diff --git a/core/effectslot.h b/core/effectslot.h
new file mode 100644
index 00000000..2624ae5f
--- /dev/null
+++ b/core/effectslot.h
@@ -0,0 +1,89 @@
+#ifndef CORE_EFFECTSLOT_H
+#define CORE_EFFECTSLOT_H
+
+#include <atomic>
+
+#include "almalloc.h"
+#include "device.h"
+#include "effects/base.h"
+#include "intrusive_ptr.h"
+
+struct EffectSlot;
+struct WetBuffer;
+
+using EffectSlotArray = al::FlexArray<EffectSlot*>;
+
+
+enum class EffectSlotType : unsigned char {
+ None,
+ Reverb,
+ Chorus,
+ Distortion,
+ Echo,
+ Flanger,
+ FrequencyShifter,
+ VocalMorpher,
+ PitchShifter,
+ RingModulator,
+ Autowah,
+ Compressor,
+ Equalizer,
+ EAXReverb,
+ DedicatedLFE,
+ DedicatedDialog,
+ Convolution
+};
+
+struct EffectSlotProps {
+ float Gain;
+ bool AuxSendAuto;
+ EffectSlot *Target;
+
+ EffectSlotType Type;
+ EffectProps Props;
+
+ al::intrusive_ptr<EffectState> State;
+
+ std::atomic<EffectSlotProps*> next;
+
+ DEF_NEWDEL(EffectSlotProps)
+};
+
+
+struct EffectSlot {
+ bool InUse{false};
+
+ std::atomic<EffectSlotProps*> Update{nullptr};
+
+ /* Wet buffer configuration is ACN channel order with N3D scaling.
+ * Consequently, effects that only want to work with mono input can use
+ * channel 0 by itself. Effects that want multichannel can process the
+ * ambisonics signal and make a B-Format source pan.
+ */
+ MixParams Wet;
+
+ float Gain{1.0f};
+ bool AuxSendAuto{true};
+ EffectSlot *Target{nullptr};
+
+ EffectSlotType EffectType{EffectSlotType::None};
+ EffectProps mEffectProps{};
+ al::intrusive_ptr<EffectState> mEffectState;
+
+ float RoomRolloff{0.0f}; /* Added to the source's room rolloff, not multiplied. */
+ float DecayTime{0.0f};
+ float DecayLFRatio{0.0f};
+ float DecayHFRatio{0.0f};
+ bool DecayHFLimit{false};
+ float AirAbsorptionGainHF{1.0f};
+
+ /* Mixing buffer used by the Wet mix. */
+ al::vector<FloatBufferLine,16> mWetBuffer;
+
+
+ static EffectSlotArray *CreatePtrArray(size_t count) noexcept;
+
+ DEF_NEWDEL(EffectSlot)
+};
+
+#endif /* CORE_EFFECTSLOT_H */
diff --git a/core/except.cpp b/core/except.cpp
new file mode 100644
index 00000000..45fd4eb5
--- /dev/null
+++ b/core/except.cpp
@@ -0,0 +1,30 @@
+
+#include "config.h"
+
+#include "except.h"
+
+#include <cstdio>
+#include <cstdarg>
+
+#include "opthelpers.h"
+
+
+namespace al {
+
+base_exception::~base_exception() = default;
+
+void base_exception::setMessage(const char* msg, std::va_list args)
+{
+ std::va_list args2;
+ va_copy(args2, args);
+ int msglen{std::vsnprintf(nullptr, 0, msg, args)};
+ if(msglen > 0) LIKELY
+ {
+ mMessage.resize(static_cast<size_t>(msglen)+1);
+ std::vsnprintf(const_cast<char*>(mMessage.data()), mMessage.length(), msg, args2);
+ mMessage.pop_back();
+ }
+ va_end(args2);
+}
+
+} // namespace al
diff --git a/core/except.h b/core/except.h
new file mode 100644
index 00000000..0e28e9df
--- /dev/null
+++ b/core/except.h
@@ -0,0 +1,31 @@
+#ifndef CORE_EXCEPT_H
+#define CORE_EXCEPT_H
+
+#include <cstdarg>
+#include <exception>
+#include <string>
+#include <utility>
+
+
+namespace al {
+
+class base_exception : public std::exception {
+ std::string mMessage;
+
+protected:
+ base_exception() = default;
+ virtual ~base_exception();
+
+ void setMessage(const char *msg, std::va_list args);
+
+public:
+ const char *what() const noexcept override { return mMessage.c_str(); }
+};
+
+} // namespace al
+
+#define START_API_FUNC try
+
+#define END_API_FUNC catch(...) { std::terminate(); }
+
+#endif /* CORE_EXCEPT_H */
diff --git a/core/filters/biquad.cpp b/core/filters/biquad.cpp
new file mode 100644
index 00000000..a0a62eb8
--- /dev/null
+++ b/core/filters/biquad.cpp
@@ -0,0 +1,168 @@
+
+#include "config.h"
+
+#include "biquad.h"
+
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+
+#include "alnumbers.h"
+#include "opthelpers.h"
+
+
+template<typename Real>
+void BiquadFilterR<Real>::setParams(BiquadType type, Real f0norm, Real gain, Real rcpQ)
+{
+ /* HACK: Limit gain to -100dB. This shouldn't ever happen, all callers
+ * already clamp to minimum of 0.001, or have a limited range of values
+ * that don't go below 0.126. But it seems to with some callers. This needs
+ * to be investigated.
+ */
+ gain = std::max(gain, Real(0.00001));
+
+ const Real w0{al::numbers::pi_v<Real>*2.0f * f0norm};
+ const Real sin_w0{std::sin(w0)};
+ const Real cos_w0{std::cos(w0)};
+ const Real alpha{sin_w0/2.0f * rcpQ};
+
+ Real sqrtgain_alpha_2;
+ Real a[3]{ 1.0f, 0.0f, 0.0f };
+ Real b[3]{ 1.0f, 0.0f, 0.0f };
+
+ /* Calculate filter coefficients depending on filter type */
+ switch(type)
+ {
+ case BiquadType::HighShelf:
+ sqrtgain_alpha_2 = 2.0f * std::sqrt(gain) * alpha;
+ b[0] = gain*((gain+1.0f) + (gain-1.0f)*cos_w0 + sqrtgain_alpha_2);
+ b[1] = -2.0f*gain*((gain-1.0f) + (gain+1.0f)*cos_w0 );
+ b[2] = gain*((gain+1.0f) + (gain-1.0f)*cos_w0 - sqrtgain_alpha_2);
+ a[0] = (gain+1.0f) - (gain-1.0f)*cos_w0 + sqrtgain_alpha_2;
+ a[1] = 2.0f* ((gain-1.0f) - (gain+1.0f)*cos_w0 );
+ a[2] = (gain+1.0f) - (gain-1.0f)*cos_w0 - sqrtgain_alpha_2;
+ break;
+ case BiquadType::LowShelf:
+ sqrtgain_alpha_2 = 2.0f * std::sqrt(gain) * alpha;
+ b[0] = gain*((gain+1.0f) - (gain-1.0f)*cos_w0 + sqrtgain_alpha_2);
+ b[1] = 2.0f*gain*((gain-1.0f) - (gain+1.0f)*cos_w0 );
+ b[2] = gain*((gain+1.0f) - (gain-1.0f)*cos_w0 - sqrtgain_alpha_2);
+ a[0] = (gain+1.0f) + (gain-1.0f)*cos_w0 + sqrtgain_alpha_2;
+ a[1] = -2.0f* ((gain-1.0f) + (gain+1.0f)*cos_w0 );
+ a[2] = (gain+1.0f) + (gain-1.0f)*cos_w0 - sqrtgain_alpha_2;
+ break;
+ case BiquadType::Peaking:
+ b[0] = 1.0f + alpha * gain;
+ b[1] = -2.0f * cos_w0;
+ b[2] = 1.0f - alpha * gain;
+ a[0] = 1.0f + alpha / gain;
+ a[1] = -2.0f * cos_w0;
+ a[2] = 1.0f - alpha / gain;
+ break;
+
+ case BiquadType::LowPass:
+ b[0] = (1.0f - cos_w0) / 2.0f;
+ b[1] = 1.0f - cos_w0;
+ b[2] = (1.0f - cos_w0) / 2.0f;
+ a[0] = 1.0f + alpha;
+ a[1] = -2.0f * cos_w0;
+ a[2] = 1.0f - alpha;
+ break;
+ case BiquadType::HighPass:
+ b[0] = (1.0f + cos_w0) / 2.0f;
+ b[1] = -(1.0f + cos_w0);
+ b[2] = (1.0f + cos_w0) / 2.0f;
+ a[0] = 1.0f + alpha;
+ a[1] = -2.0f * cos_w0;
+ a[2] = 1.0f - alpha;
+ break;
+ case BiquadType::BandPass:
+ b[0] = alpha;
+ b[1] = 0.0f;
+ b[2] = -alpha;
+ a[0] = 1.0f + alpha;
+ a[1] = -2.0f * cos_w0;
+ a[2] = 1.0f - alpha;
+ break;
+ }
+
+ mA1 = a[1] / a[0];
+ mA2 = a[2] / a[0];
+ mB0 = b[0] / a[0];
+ mB1 = b[1] / a[0];
+ mB2 = b[2] / a[0];
+}
+
+template<typename Real>
+void BiquadFilterR<Real>::process(const al::span<const Real> src, Real *dst)
+{
+ const Real b0{mB0};
+ const Real b1{mB1};
+ const Real b2{mB2};
+ const Real a1{mA1};
+ const Real a2{mA2};
+ Real z1{mZ1};
+ Real z2{mZ2};
+
+ /* Processing loop is Transposed Direct Form II. This requires less storage
+ * compared to Direct Form I (only two delay components, instead of a four-
+ * sample history; the last two inputs and outputs), and works better for
+ * floating-point which favors summing similarly-sized values while being
+ * less bothered by overflow.
+ *
+ * See: http://www.earlevel.com/main/2003/02/28/biquads/
+ */
+ auto proc_sample = [b0,b1,b2,a1,a2,&z1,&z2](Real input) noexcept -> Real
+ {
+ const Real output{input*b0 + z1};
+ z1 = input*b1 - output*a1 + z2;
+ z2 = input*b2 - output*a2;
+ return output;
+ };
+ std::transform(src.cbegin(), src.cend(), dst, proc_sample);
+
+ mZ1 = z1;
+ mZ2 = z2;
+}
+
+template<typename Real>
+void BiquadFilterR<Real>::dualProcess(BiquadFilterR &other, const al::span<const Real> src,
+ Real *dst)
+{
+ const Real b00{mB0};
+ const Real b01{mB1};
+ const Real b02{mB2};
+ const Real a01{mA1};
+ const Real a02{mA2};
+ const Real b10{other.mB0};
+ const Real b11{other.mB1};
+ const Real b12{other.mB2};
+ const Real a11{other.mA1};
+ const Real a12{other.mA2};
+ Real z01{mZ1};
+ Real z02{mZ2};
+ Real z11{other.mZ1};
+ Real z12{other.mZ2};
+
+ auto proc_sample = [b00,b01,b02,a01,a02,b10,b11,b12,a11,a12,&z01,&z02,&z11,&z12](Real input) noexcept -> Real
+ {
+ const Real tmpout{input*b00 + z01};
+ z01 = input*b01 - tmpout*a01 + z02;
+ z02 = input*b02 - tmpout*a02;
+ input = tmpout;
+
+ const Real output{input*b10 + z11};
+ z11 = input*b11 - output*a11 + z12;
+ z12 = input*b12 - output*a12;
+ return output;
+ };
+ std::transform(src.cbegin(), src.cend(), dst, proc_sample);
+
+ mZ1 = z01;
+ mZ2 = z02;
+ other.mZ1 = z11;
+ other.mZ2 = z12;
+}
+
+template class BiquadFilterR<float>;
+template class BiquadFilterR<double>;
diff --git a/core/filters/biquad.h b/core/filters/biquad.h
new file mode 100644
index 00000000..75a4009b
--- /dev/null
+++ b/core/filters/biquad.h
@@ -0,0 +1,144 @@
+#ifndef CORE_FILTERS_BIQUAD_H
+#define CORE_FILTERS_BIQUAD_H
+
+#include <algorithm>
+#include <cmath>
+#include <cstddef>
+#include <utility>
+
+#include "alnumbers.h"
+#include "alspan.h"
+
+
+/* Filters implementation is based on the "Cookbook formulae for audio
+ * EQ biquad filter coefficients" by Robert Bristow-Johnson
+ * http://www.musicdsp.org/files/Audio-EQ-Cookbook.txt
+ */
+/* Implementation note: For the shelf and peaking filters, the specified gain
+ * is for the centerpoint of the transition band. This better fits EFX filter
+ * behavior, which expects the shelf's reference frequency to reach the given
+ * gain. To set the gain for the shelf or peak itself, use the square root of
+ * the desired linear gain (or halve the dB gain).
+ */
+
+enum class BiquadType {
+ /** EFX-style low-pass filter, specifying a gain and reference frequency. */
+ HighShelf,
+ /** EFX-style high-pass filter, specifying a gain and reference frequency. */
+ LowShelf,
+ /** Peaking filter, specifying a gain and reference frequency. */
+ Peaking,
+
+ /** Low-pass cut-off filter, specifying a cut-off frequency. */
+ LowPass,
+ /** High-pass cut-off filter, specifying a cut-off frequency. */
+ HighPass,
+ /** Band-pass filter, specifying a center frequency. */
+ BandPass,
+};
+
+template<typename Real>
+class BiquadFilterR {
+ /* Last two delayed components for direct form II. */
+ Real mZ1{0}, mZ2{0};
+ /* Transfer function coefficients "b" (numerator) */
+ Real mB0{1}, mB1{0}, mB2{0};
+ /* Transfer function coefficients "a" (denominator; a0 is pre-applied). */
+ Real mA1{0}, mA2{0};
+
+ void setParams(BiquadType type, Real f0norm, Real gain, Real rcpQ);
+
+ /**
+ * Calculates the rcpQ (i.e. 1/Q) coefficient for shelving filters, using
+ * the reference gain and shelf slope parameter.
+ * \param gain 0 < gain
+ * \param slope 0 < slope <= 1
+ */
+ static Real rcpQFromSlope(Real gain, Real slope)
+ { return std::sqrt((gain + Real{1}/gain)*(Real{1}/slope - Real{1}) + Real{2}); }
+
+ /**
+ * Calculates the rcpQ (i.e. 1/Q) coefficient for filters, using the
+ * normalized reference frequency and bandwidth.
+ * \param f0norm 0 < f0norm < 0.5.
+ * \param bandwidth 0 < bandwidth
+ */
+ static Real rcpQFromBandwidth(Real f0norm, Real bandwidth)
+ {
+ const Real w0{al::numbers::pi_v<Real>*Real{2} * f0norm};
+ return 2.0f*std::sinh(std::log(Real{2})/Real{2}*bandwidth*w0/std::sin(w0));
+ }
+
+public:
+ void clear() noexcept { mZ1 = mZ2 = Real{0}; }
+
+ /**
+ * Sets the filter state for the specified filter type and its parameters.
+ *
+ * \param type The type of filter to apply.
+ * \param f0norm The normalized reference frequency (ref / sample_rate).
+ * This is the center point for the Shelf, Peaking, and BandPass filter
+ * types, or the cutoff frequency for the LowPass and HighPass filter
+ * types.
+ * \param gain The gain for the reference frequency response. Only used by
+ * the Shelf and Peaking filter types.
+ * \param slope Slope steepness of the transition band.
+ */
+ void setParamsFromSlope(BiquadType type, Real f0norm, Real gain, Real slope)
+ {
+ gain = std::max<Real>(gain, 0.001f); /* Limit -60dB */
+ setParams(type, f0norm, gain, rcpQFromSlope(gain, slope));
+ }
+
+ /**
+ * Sets the filter state for the specified filter type and its parameters.
+ *
+ * \param type The type of filter to apply.
+ * \param f0norm The normalized reference frequency (ref / sample_rate).
+ * This is the center point for the Shelf, Peaking, and BandPass filter
+ * types, or the cutoff frequency for the LowPass and HighPass filter
+ * types.
+ * \param gain The gain for the reference frequency response. Only used by
+ * the Shelf and Peaking filter types.
+ * \param bandwidth Normalized bandwidth of the transition band.
+ */
+ void setParamsFromBandwidth(BiquadType type, Real f0norm, Real gain, Real bandwidth)
+ { setParams(type, f0norm, gain, rcpQFromBandwidth(f0norm, bandwidth)); }
+
+ void copyParamsFrom(const BiquadFilterR &other)
+ {
+ mB0 = other.mB0;
+ mB1 = other.mB1;
+ mB2 = other.mB2;
+ mA1 = other.mA1;
+ mA2 = other.mA2;
+ }
+
+ void process(const al::span<const Real> src, Real *dst);
+ /** Processes this filter and the other at the same time. */
+ void dualProcess(BiquadFilterR &other, const al::span<const Real> src, Real *dst);
+
+ /* Rather hacky. It's just here to support "manual" processing. */
+ std::pair<Real,Real> getComponents() const noexcept { return {mZ1, mZ2}; }
+ void setComponents(Real z1, Real z2) noexcept { mZ1 = z1; mZ2 = z2; }
+ Real processOne(const Real in, Real &z1, Real &z2) const noexcept
+ {
+ const Real out{in*mB0 + z1};
+ z1 = in*mB1 - out*mA1 + z2;
+ z2 = in*mB2 - out*mA2;
+ return out;
+ }
+};
+
+template<typename Real>
+struct DualBiquadR {
+ BiquadFilterR<Real> &f0, &f1;
+
+ void process(const al::span<const Real> src, Real *dst)
+ { f0.dualProcess(f1, src, dst); }
+};
+
+using BiquadFilter = BiquadFilterR<float>;
+using DualBiquad = DualBiquadR<float>;
+
+#endif /* CORE_FILTERS_BIQUAD_H */
diff --git a/core/filters/nfc.cpp b/core/filters/nfc.cpp
new file mode 100644
index 00000000..aa64c613
--- /dev/null
+++ b/core/filters/nfc.cpp
@@ -0,0 +1,367 @@
+
+#include "config.h"
+
+#include "nfc.h"
+
+#include <algorithm>
+
+#include "opthelpers.h"
+
+
+/* Near-field control filters are the basis for handling the near-field effect.
+ * The near-field effect is a bass-boost present in the directional components
+ * of a recorded signal, created as a result of the wavefront curvature (itself
+ * a function of sound distance). Proper reproduction dictates this be
+ * compensated for using a bass-cut given the playback speaker distance, to
+ * avoid excessive bass in the playback.
+ *
+ * For real-time rendered audio, emulating the near-field effect based on the
+ * sound source's distance, and subsequently compensating for it at output
+ * based on the speaker distances, can create a more realistic perception of
+ * sound distance beyond a simple 1/r attenuation.
+ *
+ * These filters do just that. Each one applies a low-shelf filter, created as
+ * the combination of a bass-boost for a given sound source distance (near-
+ * field emulation) along with a bass-cut for a given control/speaker distance
+ * (near-field compensation).
+ *
+ * Note that it is necessary to apply a cut along with the boost, since the
+ * boost alone is unstable in higher-order ambisonics as it causes an infinite
+ * DC gain (even first-order ambisonics requires there to be no DC offset for
+ * the boost to work). Consequently, ambisonics requires a control parameter to
+ * be used to avoid an unstable boost-only filter. NFC-HOA defines this control
+ * as a reference delay, calculated with:
+ *
+ * reference_delay = control_distance / speed_of_sound
+ *
+ * This means w0 (for input) or w1 (for output) should be set to:
+ *
+ * wN = 1 / (reference_delay * sample_rate)
+ *
+ * when dealing with NFC-HOA content. For FOA input content, which does not
+ * specify a reference_delay variable, w0 should be set to 0 to apply only
+ * near-field compensation for output. It's important that w1 be a finite,
+ * positive, non-0 value or else the bass-boost will become unstable again.
+ * Also, w0 should not be too large compared to w1, to avoid excessively loud
+ * low frequencies.
+ */
+
+namespace {
+
+constexpr float B[5][4] = {
+ { 0.0f },
+ { 1.0f },
+ { 3.0f, 3.0f },
+ { 3.6778f, 6.4595f, 2.3222f },
+ { 4.2076f, 11.4877f, 5.7924f, 9.1401f }
+};
+
+NfcFilter1 NfcFilterCreate1(const float w0, const float w1) noexcept
+{
+ NfcFilter1 nfc{};
+ float b_00, g_0;
+ float r;
+
+ /* Calculate bass-cut coefficients. */
+ r = 0.5f * w1;
+ b_00 = B[1][0] * r;
+ g_0 = 1.0f + b_00;
+
+ nfc.base_gain = 1.0f / g_0;
+ nfc.a1 = 2.0f * b_00 / g_0;
+
+ /* Calculate bass-boost coefficients. */
+ r = 0.5f * w0;
+ b_00 = B[1][0] * r;
+ g_0 = 1.0f + b_00;
+
+ nfc.gain = nfc.base_gain * g_0;
+ nfc.b1 = 2.0f * b_00 / g_0;
+
+ return nfc;
+}
+
+void NfcFilterAdjust1(NfcFilter1 *nfc, const float w0) noexcept
+{
+ const float r{0.5f * w0};
+ const float b_00{B[1][0] * r};
+ const float g_0{1.0f + b_00};
+
+ nfc->gain = nfc->base_gain * g_0;
+ nfc->b1 = 2.0f * b_00 / g_0;
+}
+
+
+NfcFilter2 NfcFilterCreate2(const float w0, const float w1) noexcept
+{
+ NfcFilter2 nfc{};
+ float b_10, b_11, g_1;
+ float r;
+
+ /* Calculate bass-cut coefficients. */
+ r = 0.5f * w1;
+ b_10 = B[2][0] * r;
+ b_11 = B[2][1] * r * r;
+ g_1 = 1.0f + b_10 + b_11;
+
+ nfc.base_gain = 1.0f / g_1;
+ nfc.a1 = (2.0f*b_10 + 4.0f*b_11) / g_1;
+ nfc.a2 = 4.0f * b_11 / g_1;
+
+ /* Calculate bass-boost coefficients. */
+ r = 0.5f * w0;
+ b_10 = B[2][0] * r;
+ b_11 = B[2][1] * r * r;
+ g_1 = 1.0f + b_10 + b_11;
+
+ nfc.gain = nfc.base_gain * g_1;
+ nfc.b1 = (2.0f*b_10 + 4.0f*b_11) / g_1;
+ nfc.b2 = 4.0f * b_11 / g_1;
+
+ return nfc;
+}
+
+void NfcFilterAdjust2(NfcFilter2 *nfc, const float w0) noexcept
+{
+ const float r{0.5f * w0};
+ const float b_10{B[2][0] * r};
+ const float b_11{B[2][1] * r * r};
+ const float g_1{1.0f + b_10 + b_11};
+
+ nfc->gain = nfc->base_gain * g_1;
+ nfc->b1 = (2.0f*b_10 + 4.0f*b_11) / g_1;
+ nfc->b2 = 4.0f * b_11 / g_1;
+}
+
+
+NfcFilter3 NfcFilterCreate3(const float w0, const float w1) noexcept
+{
+ NfcFilter3 nfc{};
+ float b_10, b_11, g_1;
+ float b_00, g_0;
+ float r;
+
+ /* Calculate bass-cut coefficients. */
+ r = 0.5f * w1;
+ b_10 = B[3][0] * r;
+ b_11 = B[3][1] * r * r;
+ b_00 = B[3][2] * r;
+ g_1 = 1.0f + b_10 + b_11;
+ g_0 = 1.0f + b_00;
+
+ nfc.base_gain = 1.0f / (g_1 * g_0);
+ nfc.a1 = (2.0f*b_10 + 4.0f*b_11) / g_1;
+ nfc.a2 = 4.0f * b_11 / g_1;
+ nfc.a3 = 2.0f * b_00 / g_0;
+
+ /* Calculate bass-boost coefficients. */
+ r = 0.5f * w0;
+ b_10 = B[3][0] * r;
+ b_11 = B[3][1] * r * r;
+ b_00 = B[3][2] * r;
+ g_1 = 1.0f + b_10 + b_11;
+ g_0 = 1.0f + b_00;
+
+ nfc.gain = nfc.base_gain * (g_1 * g_0);
+ nfc.b1 = (2.0f*b_10 + 4.0f*b_11) / g_1;
+ nfc.b2 = 4.0f * b_11 / g_1;
+ nfc.b3 = 2.0f * b_00 / g_0;
+
+ return nfc;
+}
+
+void NfcFilterAdjust3(NfcFilter3 *nfc, const float w0) noexcept
+{
+ const float r{0.5f * w0};
+ const float b_10{B[3][0] * r};
+ const float b_11{B[3][1] * r * r};
+ const float b_00{B[3][2] * r};
+ const float g_1{1.0f + b_10 + b_11};
+ const float g_0{1.0f + b_00};
+
+ nfc->gain = nfc->base_gain * (g_1 * g_0);
+ nfc->b1 = (2.0f*b_10 + 4.0f*b_11) / g_1;
+ nfc->b2 = 4.0f * b_11 / g_1;
+ nfc->b3 = 2.0f * b_00 / g_0;
+}
+
+
+NfcFilter4 NfcFilterCreate4(const float w0, const float w1) noexcept
+{
+ NfcFilter4 nfc{};
+ float b_10, b_11, g_1;
+ float b_00, b_01, g_0;
+ float r;
+
+ /* Calculate bass-cut coefficients. */
+ r = 0.5f * w1;
+ b_10 = B[4][0] * r;
+ b_11 = B[4][1] * r * r;
+ b_00 = B[4][2] * r;
+ b_01 = B[4][3] * r * r;
+ g_1 = 1.0f + b_10 + b_11;
+ g_0 = 1.0f + b_00 + b_01;
+
+ nfc.base_gain = 1.0f / (g_1 * g_0);
+ nfc.a1 = (2.0f*b_10 + 4.0f*b_11) / g_1;
+ nfc.a2 = 4.0f * b_11 / g_1;
+ nfc.a3 = (2.0f*b_00 + 4.0f*b_01) / g_0;
+ nfc.a4 = 4.0f * b_01 / g_0;
+
+ /* Calculate bass-boost coefficients. */
+ r = 0.5f * w0;
+ b_10 = B[4][0] * r;
+ b_11 = B[4][1] * r * r;
+ b_00 = B[4][2] * r;
+ b_01 = B[4][3] * r * r;
+ g_1 = 1.0f + b_10 + b_11;
+ g_0 = 1.0f + b_00 + b_01;
+
+ nfc.gain = nfc.base_gain * (g_1 * g_0);
+ nfc.b1 = (2.0f*b_10 + 4.0f*b_11) / g_1;
+ nfc.b2 = 4.0f * b_11 / g_1;
+ nfc.b3 = (2.0f*b_00 + 4.0f*b_01) / g_0;
+ nfc.b4 = 4.0f * b_01 / g_0;
+
+ return nfc;
+}
+
+void NfcFilterAdjust4(NfcFilter4 *nfc, const float w0) noexcept
+{
+ const float r{0.5f * w0};
+ const float b_10{B[4][0] * r};
+ const float b_11{B[4][1] * r * r};
+ const float b_00{B[4][2] * r};
+ const float b_01{B[4][3] * r * r};
+ const float g_1{1.0f + b_10 + b_11};
+ const float g_0{1.0f + b_00 + b_01};
+
+ nfc->gain = nfc->base_gain * (g_1 * g_0);
+ nfc->b1 = (2.0f*b_10 + 4.0f*b_11) / g_1;
+ nfc->b2 = 4.0f * b_11 / g_1;
+ nfc->b3 = (2.0f*b_00 + 4.0f*b_01) / g_0;
+ nfc->b4 = 4.0f * b_01 / g_0;
+}
+
+} // namespace
+
+void NfcFilter::init(const float w1) noexcept
+{
+ first = NfcFilterCreate1(0.0f, w1);
+ second = NfcFilterCreate2(0.0f, w1);
+ third = NfcFilterCreate3(0.0f, w1);
+ fourth = NfcFilterCreate4(0.0f, w1);
+}
+
+void NfcFilter::adjust(const float w0) noexcept
+{
+ NfcFilterAdjust1(&first, w0);
+ NfcFilterAdjust2(&second, w0);
+ NfcFilterAdjust3(&third, w0);
+ NfcFilterAdjust4(&fourth, w0);
+}
+
+
+void NfcFilter::process1(const al::span<const float> src, float *RESTRICT dst)
+{
+ const float gain{first.gain};
+ const float b1{first.b1};
+ const float a1{first.a1};
+ float z1{first.z[0]};
+ auto proc_sample = [gain,b1,a1,&z1](const float in) noexcept -> float
+ {
+ const float y{in*gain - a1*z1};
+ const float out{y + b1*z1};
+ z1 += y;
+ return out;
+ };
+ std::transform(src.cbegin(), src.cend(), dst, proc_sample);
+ first.z[0] = z1;
+}
+
+void NfcFilter::process2(const al::span<const float> src, float *RESTRICT dst)
+{
+ const float gain{second.gain};
+ const float b1{second.b1};
+ const float b2{second.b2};
+ const float a1{second.a1};
+ const float a2{second.a2};
+ float z1{second.z[0]};
+ float z2{second.z[1]};
+ auto proc_sample = [gain,b1,b2,a1,a2,&z1,&z2](const float in) noexcept -> float
+ {
+ const float y{in*gain - a1*z1 - a2*z2};
+ const float out{y + b1*z1 + b2*z2};
+ z2 += z1;
+ z1 += y;
+ return out;
+ };
+ std::transform(src.cbegin(), src.cend(), dst, proc_sample);
+ second.z[0] = z1;
+ second.z[1] = z2;
+}
+
+void NfcFilter::process3(const al::span<const float> src, float *RESTRICT dst)
+{
+ const float gain{third.gain};
+ const float b1{third.b1};
+ const float b2{third.b2};
+ const float b3{third.b3};
+ const float a1{third.a1};
+ const float a2{third.a2};
+ const float a3{third.a3};
+ float z1{third.z[0]};
+ float z2{third.z[1]};
+ float z3{third.z[2]};
+ auto proc_sample = [gain,b1,b2,b3,a1,a2,a3,&z1,&z2,&z3](const float in) noexcept -> float
+ {
+ float y{in*gain - a1*z1 - a2*z2};
+ float out{y + b1*z1 + b2*z2};
+ z2 += z1;
+ z1 += y;
+
+ y = out - a3*z3;
+ out = y + b3*z3;
+ z3 += y;
+ return out;
+ };
+ std::transform(src.cbegin(), src.cend(), dst, proc_sample);
+ third.z[0] = z1;
+ third.z[1] = z2;
+ third.z[2] = z3;
+}
+
+void NfcFilter::process4(const al::span<const float> src, float *RESTRICT dst)
+{
+ const float gain{fourth.gain};
+ const float b1{fourth.b1};
+ const float b2{fourth.b2};
+ const float b3{fourth.b3};
+ const float b4{fourth.b4};
+ const float a1{fourth.a1};
+ const float a2{fourth.a2};
+ const float a3{fourth.a3};
+ const float a4{fourth.a4};
+ float z1{fourth.z[0]};
+ float z2{fourth.z[1]};
+ float z3{fourth.z[2]};
+ float z4{fourth.z[3]};
+ auto proc_sample = [gain,b1,b2,b3,b4,a1,a2,a3,a4,&z1,&z2,&z3,&z4](const float in) noexcept -> float
+ {
+ float y{in*gain - a1*z1 - a2*z2};
+ float out{y + b1*z1 + b2*z2};
+ z2 += z1;
+ z1 += y;
+
+ y = out - a3*z3 - a4*z4;
+ out = y + b3*z3 + b4*z4;
+ z4 += z3;
+ z3 += y;
+ return out;
+ };
+ std::transform(src.cbegin(), src.cend(), dst, proc_sample);
+ fourth.z[0] = z1;
+ fourth.z[1] = z2;
+ fourth.z[2] = z3;
+ fourth.z[3] = z4;
+}
diff --git a/core/filters/nfc.h b/core/filters/nfc.h
new file mode 100644
index 00000000..33f67a5f
--- /dev/null
+++ b/core/filters/nfc.h
@@ -0,0 +1,63 @@
+#ifndef CORE_FILTERS_NFC_H
+#define CORE_FILTERS_NFC_H
+
+#include <cstddef>
+
+#include "alspan.h"
+
+
+struct NfcFilter1 {
+ float base_gain, gain;
+ float b1, a1;
+ float z[1];
+};
+struct NfcFilter2 {
+ float base_gain, gain;
+ float b1, b2, a1, a2;
+ float z[2];
+};
+struct NfcFilter3 {
+ float base_gain, gain;
+ float b1, b2, b3, a1, a2, a3;
+ float z[3];
+};
+struct NfcFilter4 {
+ float base_gain, gain;
+ float b1, b2, b3, b4, a1, a2, a3, a4;
+ float z[4];
+};
+
+class NfcFilter {
+ NfcFilter1 first;
+ NfcFilter2 second;
+ NfcFilter3 third;
+ NfcFilter4 fourth;
+
+public:
+ /* NOTE:
+ * w0 = speed_of_sound / (source_distance * sample_rate);
+ * w1 = speed_of_sound / (control_distance * sample_rate);
+ *
+ * Generally speaking, the control distance should be approximately the
+ * average speaker distance, or based on the reference delay if outputing
+ * NFC-HOA. It must not be negative, 0, or infinite. The source distance
+ * should not be too small relative to the control distance.
+ */
+
+ void init(const float w1) noexcept;
+ void adjust(const float w0) noexcept;
+
+ /* Near-field control filter for first-order ambisonic channels (1-3). */
+ void process1(const al::span<const float> src, float *RESTRICT dst);
+
+ /* Near-field control filter for second-order ambisonic channels (4-8). */
+ void process2(const al::span<const float> src, float *RESTRICT dst);
+
+ /* Near-field control filter for third-order ambisonic channels (9-15). */
+ void process3(const al::span<const float> src, float *RESTRICT dst);
+
+ /* Near-field control filter for fourth-order ambisonic channels (16-24). */
+ void process4(const al::span<const float> src, float *RESTRICT dst);
+};
+
+#endif /* CORE_FILTERS_NFC_H */
diff --git a/core/filters/splitter.cpp b/core/filters/splitter.cpp
new file mode 100644
index 00000000..983ba36f
--- /dev/null
+++ b/core/filters/splitter.cpp
@@ -0,0 +1,179 @@
+
+#include "config.h"
+
+#include "splitter.h"
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+
+#include "alnumbers.h"
+#include "opthelpers.h"
+
+
+template<typename Real>
+void BandSplitterR<Real>::init(Real f0norm)
+{
+ const Real w{f0norm * (al::numbers::pi_v<Real>*2)};
+ const Real cw{std::cos(w)};
+ if(cw > std::numeric_limits<float>::epsilon())
+ mCoeff = (std::sin(w) - 1.0f) / cw;
+ else
+ mCoeff = cw * -0.5f;
+
+ mLpZ1 = 0.0f;
+ mLpZ2 = 0.0f;
+ mApZ1 = 0.0f;
+}
+
+template<typename Real>
+void BandSplitterR<Real>::process(const al::span<const Real> input, Real *hpout, Real *lpout)
+{
+ const Real ap_coeff{mCoeff};
+ const Real lp_coeff{mCoeff*0.5f + 0.5f};
+ Real lp_z1{mLpZ1};
+ Real lp_z2{mLpZ2};
+ Real ap_z1{mApZ1};
+ auto proc_sample = [ap_coeff,lp_coeff,&lp_z1,&lp_z2,&ap_z1,&lpout](const Real in) noexcept -> Real
+ {
+ /* Low-pass sample processing. */
+ Real d{(in - lp_z1) * lp_coeff};
+ Real lp_y{lp_z1 + d};
+ lp_z1 = lp_y + d;
+
+ d = (lp_y - lp_z2) * lp_coeff;
+ lp_y = lp_z2 + d;
+ lp_z2 = lp_y + d;
+
+ *(lpout++) = lp_y;
+
+ /* All-pass sample processing. */
+ Real ap_y{in*ap_coeff + ap_z1};
+ ap_z1 = in - ap_y*ap_coeff;
+
+ /* High-pass generated from removing low-passed output. */
+ return ap_y - lp_y;
+ };
+ std::transform(input.cbegin(), input.cend(), hpout, proc_sample);
+ mLpZ1 = lp_z1;
+ mLpZ2 = lp_z2;
+ mApZ1 = ap_z1;
+}
+
+template<typename Real>
+void BandSplitterR<Real>::processHfScale(const al::span<const Real> input, Real *RESTRICT output,
+ const Real hfscale)
+{
+ const Real ap_coeff{mCoeff};
+ const Real lp_coeff{mCoeff*0.5f + 0.5f};
+ Real lp_z1{mLpZ1};
+ Real lp_z2{mLpZ2};
+ Real ap_z1{mApZ1};
+ auto proc_sample = [hfscale,ap_coeff,lp_coeff,&lp_z1,&lp_z2,&ap_z1](const Real in) noexcept -> Real
+ {
+ /* Low-pass sample processing. */
+ Real d{(in - lp_z1) * lp_coeff};
+ Real lp_y{lp_z1 + d};
+ lp_z1 = lp_y + d;
+
+ d = (lp_y - lp_z2) * lp_coeff;
+ lp_y = lp_z2 + d;
+ lp_z2 = lp_y + d;
+
+ /* All-pass sample processing. */
+ Real ap_y{in*ap_coeff + ap_z1};
+ ap_z1 = in - ap_y*ap_coeff;
+
+ /* High-pass generated by removing the low-passed signal, which is then
+ * scaled and added back to the low-passed signal.
+ */
+ return (ap_y-lp_y)*hfscale + lp_y;
+ };
+ std::transform(input.begin(), input.end(), output, proc_sample);
+ mLpZ1 = lp_z1;
+ mLpZ2 = lp_z2;
+ mApZ1 = ap_z1;
+}
+
+template<typename Real>
+void BandSplitterR<Real>::processHfScale(const al::span<Real> samples, const Real hfscale)
+{
+ const Real ap_coeff{mCoeff};
+ const Real lp_coeff{mCoeff*0.5f + 0.5f};
+ Real lp_z1{mLpZ1};
+ Real lp_z2{mLpZ2};
+ Real ap_z1{mApZ1};
+ auto proc_sample = [hfscale,ap_coeff,lp_coeff,&lp_z1,&lp_z2,&ap_z1](const Real in) noexcept -> Real
+ {
+ /* Low-pass sample processing. */
+ Real d{(in - lp_z1) * lp_coeff};
+ Real lp_y{lp_z1 + d};
+ lp_z1 = lp_y + d;
+
+ d = (lp_y - lp_z2) * lp_coeff;
+ lp_y = lp_z2 + d;
+ lp_z2 = lp_y + d;
+
+ /* All-pass sample processing. */
+ Real ap_y{in*ap_coeff + ap_z1};
+ ap_z1 = in - ap_y*ap_coeff;
+
+ /* High-pass generated by removing the low-passed signal, which is then
+ * scaled and added back to the low-passed signal.
+ */
+ return (ap_y-lp_y)*hfscale + lp_y;
+ };
+ std::transform(samples.begin(), samples.end(), samples.begin(), proc_sample);
+ mLpZ1 = lp_z1;
+ mLpZ2 = lp_z2;
+ mApZ1 = ap_z1;
+}
+
+template<typename Real>
+void BandSplitterR<Real>::processScale(const al::span<Real> samples, const Real hfscale, const Real lfscale)
+{
+ const Real ap_coeff{mCoeff};
+ const Real lp_coeff{mCoeff*0.5f + 0.5f};
+ Real lp_z1{mLpZ1};
+ Real lp_z2{mLpZ2};
+ Real ap_z1{mApZ1};
+ auto proc_sample = [hfscale,lfscale,ap_coeff,lp_coeff,&lp_z1,&lp_z2,&ap_z1](const Real in) noexcept -> Real
+ {
+ Real d{(in - lp_z1) * lp_coeff};
+ Real lp_y{lp_z1 + d};
+ lp_z1 = lp_y + d;
+
+ d = (lp_y - lp_z2) * lp_coeff;
+ lp_y = lp_z2 + d;
+ lp_z2 = lp_y + d;
+
+ Real ap_y{in*ap_coeff + ap_z1};
+ ap_z1 = in - ap_y*ap_coeff;
+
+ /* Apply separate factors to the high and low frequencies. */
+ return (ap_y-lp_y)*hfscale + lp_y*lfscale;
+ };
+ std::transform(samples.begin(), samples.end(), samples.begin(), proc_sample);
+ mLpZ1 = lp_z1;
+ mLpZ2 = lp_z2;
+ mApZ1 = ap_z1;
+}
+
+template<typename Real>
+void BandSplitterR<Real>::processAllPass(const al::span<Real> samples)
+{
+ const Real coeff{mCoeff};
+ Real z1{mApZ1};
+ auto proc_sample = [coeff,&z1](const Real in) noexcept -> Real
+ {
+ const Real out{in*coeff + z1};
+ z1 = in - out*coeff;
+ return out;
+ };
+ std::transform(samples.cbegin(), samples.cend(), samples.begin(), proc_sample);
+ mApZ1 = z1;
+}
+
+
+template class BandSplitterR<float>;
+template class BandSplitterR<double>;
diff --git a/core/filters/splitter.h b/core/filters/splitter.h
new file mode 100644
index 00000000..e853eb38
--- /dev/null
+++ b/core/filters/splitter.h
@@ -0,0 +1,40 @@
+#ifndef CORE_FILTERS_SPLITTER_H
+#define CORE_FILTERS_SPLITTER_H
+
+#include <cstddef>
+
+#include "alspan.h"
+
+
+/* Band splitter. Splits a signal into two phase-matching frequency bands. */
+template<typename Real>
+class BandSplitterR {
+ Real mCoeff{0.0f};
+ Real mLpZ1{0.0f};
+ Real mLpZ2{0.0f};
+ Real mApZ1{0.0f};
+
+public:
+ BandSplitterR() = default;
+ BandSplitterR(const BandSplitterR&) = default;
+ BandSplitterR(Real f0norm) { init(f0norm); }
+ BandSplitterR& operator=(const BandSplitterR&) = default;
+
+ void init(Real f0norm);
+ void clear() noexcept { mLpZ1 = mLpZ2 = mApZ1 = 0.0f; }
+ void process(const al::span<const Real> input, Real *hpout, Real *lpout);
+
+ void processHfScale(const al::span<const Real> input, Real *output, const Real hfscale);
+
+ void processHfScale(const al::span<Real> samples, const Real hfscale);
+ void processScale(const al::span<Real> samples, const Real hfscale, const Real lfscale);
+
+ /**
+ * The all-pass portion of the band splitter. Applies the same phase shift
+ * without splitting or scaling the signal.
+ */
+ void processAllPass(const al::span<Real> samples);
+};
+using BandSplitter = BandSplitterR<float>;
+
+#endif /* CORE_FILTERS_SPLITTER_H */
diff --git a/core/fmt_traits.cpp b/core/fmt_traits.cpp
new file mode 100644
index 00000000..054d8766
--- /dev/null
+++ b/core/fmt_traits.cpp
@@ -0,0 +1,79 @@
+
+#include "config.h"
+
+#include "fmt_traits.h"
+
+
+namespace al {
+
+const int16_t muLawDecompressionTable[256] = {
+ -32124,-31100,-30076,-29052,-28028,-27004,-25980,-24956,
+ -23932,-22908,-21884,-20860,-19836,-18812,-17788,-16764,
+ -15996,-15484,-14972,-14460,-13948,-13436,-12924,-12412,
+ -11900,-11388,-10876,-10364, -9852, -9340, -8828, -8316,
+ -7932, -7676, -7420, -7164, -6908, -6652, -6396, -6140,
+ -5884, -5628, -5372, -5116, -4860, -4604, -4348, -4092,
+ -3900, -3772, -3644, -3516, -3388, -3260, -3132, -3004,
+ -2876, -2748, -2620, -2492, -2364, -2236, -2108, -1980,
+ -1884, -1820, -1756, -1692, -1628, -1564, -1500, -1436,
+ -1372, -1308, -1244, -1180, -1116, -1052, -988, -924,
+ -876, -844, -812, -780, -748, -716, -684, -652,
+ -620, -588, -556, -524, -492, -460, -428, -396,
+ -372, -356, -340, -324, -308, -292, -276, -260,
+ -244, -228, -212, -196, -180, -164, -148, -132,
+ -120, -112, -104, -96, -88, -80, -72, -64,
+ -56, -48, -40, -32, -24, -16, -8, 0,
+ 32124, 31100, 30076, 29052, 28028, 27004, 25980, 24956,
+ 23932, 22908, 21884, 20860, 19836, 18812, 17788, 16764,
+ 15996, 15484, 14972, 14460, 13948, 13436, 12924, 12412,
+ 11900, 11388, 10876, 10364, 9852, 9340, 8828, 8316,
+ 7932, 7676, 7420, 7164, 6908, 6652, 6396, 6140,
+ 5884, 5628, 5372, 5116, 4860, 4604, 4348, 4092,
+ 3900, 3772, 3644, 3516, 3388, 3260, 3132, 3004,
+ 2876, 2748, 2620, 2492, 2364, 2236, 2108, 1980,
+ 1884, 1820, 1756, 1692, 1628, 1564, 1500, 1436,
+ 1372, 1308, 1244, 1180, 1116, 1052, 988, 924,
+ 876, 844, 812, 780, 748, 716, 684, 652,
+ 620, 588, 556, 524, 492, 460, 428, 396,
+ 372, 356, 340, 324, 308, 292, 276, 260,
+ 244, 228, 212, 196, 180, 164, 148, 132,
+ 120, 112, 104, 96, 88, 80, 72, 64,
+ 56, 48, 40, 32, 24, 16, 8, 0
+};
+
+const int16_t aLawDecompressionTable[256] = {
+ -5504, -5248, -6016, -5760, -4480, -4224, -4992, -4736,
+ -7552, -7296, -8064, -7808, -6528, -6272, -7040, -6784,
+ -2752, -2624, -3008, -2880, -2240, -2112, -2496, -2368,
+ -3776, -3648, -4032, -3904, -3264, -3136, -3520, -3392,
+ -22016,-20992,-24064,-23040,-17920,-16896,-19968,-18944,
+ -30208,-29184,-32256,-31232,-26112,-25088,-28160,-27136,
+ -11008,-10496,-12032,-11520, -8960, -8448, -9984, -9472,
+ -15104,-14592,-16128,-15616,-13056,-12544,-14080,-13568,
+ -344, -328, -376, -360, -280, -264, -312, -296,
+ -472, -456, -504, -488, -408, -392, -440, -424,
+ -88, -72, -120, -104, -24, -8, -56, -40,
+ -216, -200, -248, -232, -152, -136, -184, -168,
+ -1376, -1312, -1504, -1440, -1120, -1056, -1248, -1184,
+ -1888, -1824, -2016, -1952, -1632, -1568, -1760, -1696,
+ -688, -656, -752, -720, -560, -528, -624, -592,
+ -944, -912, -1008, -976, -816, -784, -880, -848,
+ 5504, 5248, 6016, 5760, 4480, 4224, 4992, 4736,
+ 7552, 7296, 8064, 7808, 6528, 6272, 7040, 6784,
+ 2752, 2624, 3008, 2880, 2240, 2112, 2496, 2368,
+ 3776, 3648, 4032, 3904, 3264, 3136, 3520, 3392,
+ 22016, 20992, 24064, 23040, 17920, 16896, 19968, 18944,
+ 30208, 29184, 32256, 31232, 26112, 25088, 28160, 27136,
+ 11008, 10496, 12032, 11520, 8960, 8448, 9984, 9472,
+ 15104, 14592, 16128, 15616, 13056, 12544, 14080, 13568,
+ 344, 328, 376, 360, 280, 264, 312, 296,
+ 472, 456, 504, 488, 408, 392, 440, 424,
+ 88, 72, 120, 104, 24, 8, 56, 40,
+ 216, 200, 248, 232, 152, 136, 184, 168,
+ 1376, 1312, 1504, 1440, 1120, 1056, 1248, 1184,
+ 1888, 1824, 2016, 1952, 1632, 1568, 1760, 1696,
+ 688, 656, 752, 720, 560, 528, 624, 592,
+ 944, 912, 1008, 976, 816, 784, 880, 848
+};
+
+} // namespace al
diff --git a/core/fmt_traits.h b/core/fmt_traits.h
new file mode 100644
index 00000000..f797f836
--- /dev/null
+++ b/core/fmt_traits.h
@@ -0,0 +1,81 @@
+#ifndef CORE_FMT_TRAITS_H
+#define CORE_FMT_TRAITS_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "albyte.h"
+#include "buffer_storage.h"
+
+
+namespace al {
+
+extern const int16_t muLawDecompressionTable[256];
+extern const int16_t aLawDecompressionTable[256];
+
+
+template<FmtType T>
+struct FmtTypeTraits { };
+
+template<>
+struct FmtTypeTraits<FmtUByte> {
+ using Type = uint8_t;
+
+ template<typename OutT>
+ static constexpr inline OutT to(const Type val) noexcept
+ { return val*OutT{1.0/128.0} - OutT{1.0}; }
+};
+template<>
+struct FmtTypeTraits<FmtShort> {
+ using Type = int16_t;
+
+ template<typename OutT>
+ static constexpr inline OutT to(const Type val) noexcept { return val*OutT{1.0/32768.0}; }
+};
+template<>
+struct FmtTypeTraits<FmtFloat> {
+ using Type = float;
+
+ template<typename OutT>
+ static constexpr inline OutT to(const Type val) noexcept { return val; }
+};
+template<>
+struct FmtTypeTraits<FmtDouble> {
+ using Type = double;
+
+ template<typename OutT>
+ static constexpr inline OutT to(const Type val) noexcept { return static_cast<OutT>(val); }
+};
+template<>
+struct FmtTypeTraits<FmtMulaw> {
+ using Type = uint8_t;
+
+ template<typename OutT>
+ static constexpr inline OutT to(const Type val) noexcept
+ { return muLawDecompressionTable[val] * OutT{1.0/32768.0}; }
+};
+template<>
+struct FmtTypeTraits<FmtAlaw> {
+ using Type = uint8_t;
+
+ template<typename OutT>
+ static constexpr inline OutT to(const Type val) noexcept
+ { return aLawDecompressionTable[val] * OutT{1.0/32768.0}; }
+};
+
+
+template<FmtType SrcType, typename DstT>
+inline void LoadSampleArray(DstT *RESTRICT dst, const al::byte *src, const size_t srcstep,
+ const size_t samples) noexcept
+{
+ using TypeTraits = FmtTypeTraits<SrcType>;
+ using SampleType = typename TypeTraits::Type;
+
+ const SampleType *RESTRICT ssrc{reinterpret_cast<const SampleType*>(src)};
+ for(size_t i{0u};i < samples;i++)
+ dst[i] = TypeTraits::template to<DstT>(ssrc[i*srcstep]);
+}
+
+} // namespace al
+
+#endif /* CORE_FMT_TRAITS_H */
diff --git a/core/fpu_ctrl.cpp b/core/fpu_ctrl.cpp
new file mode 100644
index 00000000..0cf0d6e7
--- /dev/null
+++ b/core/fpu_ctrl.cpp
@@ -0,0 +1,61 @@
+
+#include "config.h"
+
+#include "fpu_ctrl.h"
+
+#ifdef HAVE_INTRIN_H
+#include <intrin.h>
+#endif
+#ifdef HAVE_SSE_INTRINSICS
+#include <emmintrin.h>
+#ifndef _MM_DENORMALS_ZERO_MASK
+/* Some headers seem to be missing these? */
+#define _MM_DENORMALS_ZERO_MASK 0x0040u
+#define _MM_DENORMALS_ZERO_ON 0x0040u
+#endif
+#endif
+
+#include "cpu_caps.h"
+
+
+void FPUCtl::enter() noexcept
+{
+ if(this->in_mode) return;
+
+#if defined(HAVE_SSE_INTRINSICS)
+ this->sse_state = _mm_getcsr();
+ unsigned int sseState{this->sse_state};
+ sseState &= ~(_MM_FLUSH_ZERO_MASK | _MM_DENORMALS_ZERO_MASK);
+ sseState |= _MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON;
+ _mm_setcsr(sseState);
+
+#elif defined(__GNUC__) && defined(HAVE_SSE)
+
+ if((CPUCapFlags&CPU_CAP_SSE))
+ {
+ __asm__ __volatile__("stmxcsr %0" : "=m" (*&this->sse_state));
+ unsigned int sseState{this->sse_state};
+ sseState |= 0x8000; /* set flush-to-zero */
+ if((CPUCapFlags&CPU_CAP_SSE2))
+ sseState |= 0x0040; /* set denormals-are-zero */
+ __asm__ __volatile__("ldmxcsr %0" : : "m" (*&sseState));
+ }
+#endif
+
+ this->in_mode = true;
+}
+
+void FPUCtl::leave() noexcept
+{
+ if(!this->in_mode) return;
+
+#if defined(HAVE_SSE_INTRINSICS)
+ _mm_setcsr(this->sse_state);
+
+#elif defined(__GNUC__) && defined(HAVE_SSE)
+
+ if((CPUCapFlags&CPU_CAP_SSE))
+ __asm__ __volatile__("ldmxcsr %0" : : "m" (*&this->sse_state));
+#endif
+ this->in_mode = false;
+}
diff --git a/core/fpu_ctrl.h b/core/fpu_ctrl.h
new file mode 100644
index 00000000..9554313a
--- /dev/null
+++ b/core/fpu_ctrl.h
@@ -0,0 +1,21 @@
+#ifndef CORE_FPU_CTRL_H
+#define CORE_FPU_CTRL_H
+
+class FPUCtl {
+#if defined(HAVE_SSE_INTRINSICS) || (defined(__GNUC__) && defined(HAVE_SSE))
+ unsigned int sse_state{};
+#endif
+ bool in_mode{};
+
+public:
+ FPUCtl() noexcept { enter(); in_mode = true; }
+ ~FPUCtl() { if(in_mode) leave(); }
+
+ FPUCtl(const FPUCtl&) = delete;
+ FPUCtl& operator=(const FPUCtl&) = delete;
+
+ void enter() noexcept;
+ void leave() noexcept;
+};
+
+#endif /* CORE_FPU_CTRL_H */
diff --git a/core/front_stablizer.h b/core/front_stablizer.h
new file mode 100644
index 00000000..6825111a
--- /dev/null
+++ b/core/front_stablizer.h
@@ -0,0 +1,31 @@
+#ifndef CORE_FRONT_STABLIZER_H
+#define CORE_FRONT_STABLIZER_H
+
+#include <array>
+#include <memory>
+
+#include "almalloc.h"
+#include "bufferline.h"
+#include "filters/splitter.h"
+
+
+struct FrontStablizer {
+ FrontStablizer(size_t numchans) : ChannelFilters{numchans} { }
+
+ alignas(16) std::array<float,BufferLineSize> MidDirect{};
+ alignas(16) std::array<float,BufferLineSize> Side{};
+ alignas(16) std::array<float,BufferLineSize> Temp{};
+
+ BandSplitter MidFilter;
+ alignas(16) FloatBufferLine MidLF{};
+ alignas(16) FloatBufferLine MidHF{};
+
+ al::FlexArray<BandSplitter,16> ChannelFilters;
+
+ static std::unique_ptr<FrontStablizer> Create(size_t numchans)
+ { return std::unique_ptr<FrontStablizer>{new(FamCount(numchans)) FrontStablizer{numchans}}; }
+
+ DEF_FAM_NEWDEL(FrontStablizer, ChannelFilters)
+};
+
+#endif /* CORE_FRONT_STABLIZER_H */
diff --git a/core/helpers.cpp b/core/helpers.cpp
new file mode 100644
index 00000000..99cf009c
--- /dev/null
+++ b/core/helpers.cpp
@@ -0,0 +1,569 @@
+
+#include "config.h"
+
+#include "helpers.h"
+
+#include <algorithm>
+#include <cerrno>
+#include <cstdarg>
+#include <cstdlib>
+#include <cstdio>
+#include <cstring>
+#include <mutex>
+#include <limits>
+#include <string>
+#include <tuple>
+
+#include "almalloc.h"
+#include "alfstream.h"
+#include "alnumeric.h"
+#include "aloptional.h"
+#include "alspan.h"
+#include "alstring.h"
+#include "logging.h"
+#include "strutils.h"
+#include "vector.h"
+
+
+/* Mixing thread piority level */
+int RTPrioLevel{1};
+
+/* Allow reducing the process's RTTime limit for RTKit. */
+bool AllowRTTimeLimit{true};
+
+
+#ifdef _WIN32
+
+#include <shlobj.h>
+
+const PathNamePair &GetProcBinary()
+{
+ static al::optional<PathNamePair> procbin;
+ if(procbin) return *procbin;
+
+ auto fullpath = al::vector<WCHAR>(256);
+ DWORD len{GetModuleFileNameW(nullptr, fullpath.data(), static_cast<DWORD>(fullpath.size()))};
+ while(len == fullpath.size())
+ {
+ fullpath.resize(fullpath.size() << 1);
+ len = GetModuleFileNameW(nullptr, fullpath.data(), static_cast<DWORD>(fullpath.size()));
+ }
+ if(len == 0)
+ {
+ ERR("Failed to get process name: error %lu\n", GetLastError());
+ procbin.emplace();
+ return *procbin;
+ }
+
+ fullpath.resize(len);
+ if(fullpath.back() != 0)
+ fullpath.push_back(0);
+
+ std::replace(fullpath.begin(), fullpath.end(), '/', '\\');
+ auto sep = std::find(fullpath.rbegin()+1, fullpath.rend(), '\\');
+ if(sep != fullpath.rend())
+ {
+ *sep = 0;
+ procbin.emplace(wstr_to_utf8(fullpath.data()), wstr_to_utf8(al::to_address(sep.base())));
+ }
+ else
+ procbin.emplace(std::string{}, wstr_to_utf8(fullpath.data()));
+
+ TRACE("Got binary: %s, %s\n", procbin->path.c_str(), procbin->fname.c_str());
+ return *procbin;
+}
+
+namespace {
+
+void DirectorySearch(const char *path, const char *ext, al::vector<std::string> *const results)
+{
+ std::string pathstr{path};
+ pathstr += "\\*";
+ pathstr += ext;
+ TRACE("Searching %s\n", pathstr.c_str());
+
+ std::wstring wpath{utf8_to_wstr(pathstr.c_str())};
+ WIN32_FIND_DATAW fdata;
+ HANDLE hdl{FindFirstFileW(wpath.c_str(), &fdata)};
+ if(hdl == INVALID_HANDLE_VALUE) return;
+
+ const auto base = results->size();
+
+ do {
+ results->emplace_back();
+ std::string &str = results->back();
+ str = path;
+ str += '\\';
+ str += wstr_to_utf8(fdata.cFileName);
+ } while(FindNextFileW(hdl, &fdata));
+ FindClose(hdl);
+
+ const al::span<std::string> newlist{results->data()+base, results->size()-base};
+ std::sort(newlist.begin(), newlist.end());
+ for(const auto &name : newlist)
+ TRACE(" got %s\n", name.c_str());
+}
+
+} // namespace
+
+al::vector<std::string> SearchDataFiles(const char *ext, const char *subdir)
+{
+ auto is_slash = [](int c) noexcept -> int { return (c == '\\' || c == '/'); };
+
+ static std::mutex search_lock;
+ std::lock_guard<std::mutex> _{search_lock};
+
+ /* If the path is absolute, use it directly. */
+ al::vector<std::string> results;
+ if(isalpha(subdir[0]) && subdir[1] == ':' && is_slash(subdir[2]))
+ {
+ std::string path{subdir};
+ std::replace(path.begin(), path.end(), '/', '\\');
+ DirectorySearch(path.c_str(), ext, &results);
+ return results;
+ }
+ if(subdir[0] == '\\' && subdir[1] == '\\' && subdir[2] == '?' && subdir[3] == '\\')
+ {
+ DirectorySearch(subdir, ext, &results);
+ return results;
+ }
+
+ std::string path;
+
+ /* Search the app-local directory. */
+ if(auto localpath = al::getenv(L"ALSOFT_LOCAL_PATH"))
+ {
+ path = wstr_to_utf8(localpath->c_str());
+ if(is_slash(path.back()))
+ path.pop_back();
+ }
+ else if(WCHAR *cwdbuf{_wgetcwd(nullptr, 0)})
+ {
+ path = wstr_to_utf8(cwdbuf);
+ if(is_slash(path.back()))
+ path.pop_back();
+ free(cwdbuf);
+ }
+ else
+ path = ".";
+ std::replace(path.begin(), path.end(), '/', '\\');
+ DirectorySearch(path.c_str(), ext, &results);
+
+ /* Search the local and global data dirs. */
+ static const int ids[2]{ CSIDL_APPDATA, CSIDL_COMMON_APPDATA };
+ for(int id : ids)
+ {
+ WCHAR buffer[MAX_PATH];
+ if(SHGetSpecialFolderPathW(nullptr, buffer, id, FALSE) == FALSE)
+ continue;
+
+ path = wstr_to_utf8(buffer);
+ if(!is_slash(path.back()))
+ path += '\\';
+ path += subdir;
+ std::replace(path.begin(), path.end(), '/', '\\');
+
+ DirectorySearch(path.c_str(), ext, &results);
+ }
+
+ return results;
+}
+
+void SetRTPriority(void)
+{
+ if(RTPrioLevel > 0)
+ {
+ if(!SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL))
+ ERR("Failed to set priority level for thread\n");
+ }
+}
+
+#else
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <dirent.h>
+#ifdef __FreeBSD__
+#include <sys/sysctl.h>
+#endif
+#ifdef __HAIKU__
+#include <FindDirectory.h>
+#endif
+#ifdef HAVE_PROC_PIDPATH
+#include <libproc.h>
+#endif
+#if defined(HAVE_PTHREAD_SETSCHEDPARAM) && !defined(__OpenBSD__)
+#include <pthread.h>
+#include <sched.h>
+#endif
+#ifdef HAVE_RTKIT
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include "dbus_wrap.h"
+#include "rtkit.h"
+#ifndef RLIMIT_RTTIME
+#define RLIMIT_RTTIME 15
+#endif
+#endif
+
+const PathNamePair &GetProcBinary()
+{
+ static al::optional<PathNamePair> procbin;
+ if(procbin) return *procbin;
+
+ al::vector<char> pathname;
+#ifdef __FreeBSD__
+ size_t pathlen;
+ int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 };
+ if(sysctl(mib, 4, nullptr, &pathlen, nullptr, 0) == -1)
+ WARN("Failed to sysctl kern.proc.pathname: %s\n", strerror(errno));
+ else
+ {
+ pathname.resize(pathlen + 1);
+ sysctl(mib, 4, pathname.data(), &pathlen, nullptr, 0);
+ pathname.resize(pathlen);
+ }
+#endif
+#ifdef HAVE_PROC_PIDPATH
+ if(pathname.empty())
+ {
+ char procpath[PROC_PIDPATHINFO_MAXSIZE]{};
+ const pid_t pid{getpid()};
+ if(proc_pidpath(pid, procpath, sizeof(procpath)) < 1)
+ ERR("proc_pidpath(%d, ...) failed: %s\n", pid, strerror(errno));
+ else
+ pathname.insert(pathname.end(), procpath, procpath+strlen(procpath));
+ }
+#endif
+#ifdef __HAIKU__
+ if(pathname.empty())
+ {
+ char procpath[PATH_MAX];
+ if(find_path(B_APP_IMAGE_SYMBOL, B_FIND_PATH_IMAGE_PATH, NULL, procpath, sizeof(procpath)) == B_OK)
+ pathname.insert(pathname.end(), procpath, procpath+strlen(procpath));
+ }
+#endif
+#ifndef __SWITCH__
+ if(pathname.empty())
+ {
+ static const char SelfLinkNames[][32]{
+ "/proc/self/exe",
+ "/proc/self/file",
+ "/proc/curproc/exe",
+ "/proc/curproc/file"
+ };
+
+ pathname.resize(256);
+
+ const char *selfname{};
+ ssize_t len{};
+ for(const char *name : SelfLinkNames)
+ {
+ selfname = name;
+ len = readlink(selfname, pathname.data(), pathname.size());
+ if(len >= 0 || errno != ENOENT) break;
+ }
+
+ while(len > 0 && static_cast<size_t>(len) == pathname.size())
+ {
+ pathname.resize(pathname.size() << 1);
+ len = readlink(selfname, pathname.data(), pathname.size());
+ }
+ if(len <= 0)
+ {
+ WARN("Failed to readlink %s: %s\n", selfname, strerror(errno));
+ len = 0;
+ }
+
+ pathname.resize(static_cast<size_t>(len));
+ }
+#endif
+ while(!pathname.empty() && pathname.back() == 0)
+ pathname.pop_back();
+
+ auto sep = std::find(pathname.crbegin(), pathname.crend(), '/');
+ if(sep != pathname.crend())
+ procbin.emplace(std::string(pathname.cbegin(), sep.base()-1),
+ std::string(sep.base(), pathname.cend()));
+ else
+ procbin.emplace(std::string{}, std::string(pathname.cbegin(), pathname.cend()));
+
+ TRACE("Got binary: \"%s\", \"%s\"\n", procbin->path.c_str(), procbin->fname.c_str());
+ return *procbin;
+}
+
+namespace {
+
+void DirectorySearch(const char *path, const char *ext, al::vector<std::string> *const results)
+{
+ TRACE("Searching %s for *%s\n", path, ext);
+ DIR *dir{opendir(path)};
+ if(!dir) return;
+
+ const auto base = results->size();
+ const size_t extlen{strlen(ext)};
+
+ while(struct dirent *dirent{readdir(dir)})
+ {
+ if(strcmp(dirent->d_name, ".") == 0 || strcmp(dirent->d_name, "..") == 0)
+ continue;
+
+ const size_t len{strlen(dirent->d_name)};
+ if(len <= extlen) continue;
+ if(al::strcasecmp(dirent->d_name+len-extlen, ext) != 0)
+ continue;
+
+ results->emplace_back();
+ std::string &str = results->back();
+ str = path;
+ if(str.back() != '/')
+ str.push_back('/');
+ str += dirent->d_name;
+ }
+ closedir(dir);
+
+ const al::span<std::string> newlist{results->data()+base, results->size()-base};
+ std::sort(newlist.begin(), newlist.end());
+ for(const auto &name : newlist)
+ TRACE(" got %s\n", name.c_str());
+}
+
+} // namespace
+
+al::vector<std::string> SearchDataFiles(const char *ext, const char *subdir)
+{
+ static std::mutex search_lock;
+ std::lock_guard<std::mutex> _{search_lock};
+
+ al::vector<std::string> results;
+ if(subdir[0] == '/')
+ {
+ DirectorySearch(subdir, ext, &results);
+ return results;
+ }
+
+ /* Search the app-local directory. */
+ if(auto localpath = al::getenv("ALSOFT_LOCAL_PATH"))
+ DirectorySearch(localpath->c_str(), ext, &results);
+ else
+ {
+ al::vector<char> cwdbuf(256);
+ while(!getcwd(cwdbuf.data(), cwdbuf.size()))
+ {
+ if(errno != ERANGE)
+ {
+ cwdbuf.clear();
+ break;
+ }
+ cwdbuf.resize(cwdbuf.size() << 1);
+ }
+ if(cwdbuf.empty())
+ DirectorySearch(".", ext, &results);
+ else
+ {
+ DirectorySearch(cwdbuf.data(), ext, &results);
+ cwdbuf.clear();
+ }
+ }
+
+ // Search local data dir
+ if(auto datapath = al::getenv("XDG_DATA_HOME"))
+ {
+ std::string &path = *datapath;
+ if(path.back() != '/')
+ path += '/';
+ path += subdir;
+ DirectorySearch(path.c_str(), ext, &results);
+ }
+ else if(auto homepath = al::getenv("HOME"))
+ {
+ std::string &path = *homepath;
+ if(path.back() == '/')
+ path.pop_back();
+ path += "/.local/share/";
+ path += subdir;
+ DirectorySearch(path.c_str(), ext, &results);
+ }
+
+ // Search global data dirs
+ std::string datadirs{al::getenv("XDG_DATA_DIRS").value_or("/usr/local/share/:/usr/share/")};
+
+ size_t curpos{0u};
+ while(curpos < datadirs.size())
+ {
+ size_t nextpos{datadirs.find(':', curpos)};
+
+ std::string path{(nextpos != std::string::npos) ?
+ datadirs.substr(curpos, nextpos++ - curpos) : datadirs.substr(curpos)};
+ curpos = nextpos;
+
+ if(path.empty()) continue;
+ if(path.back() != '/')
+ path += '/';
+ path += subdir;
+
+ DirectorySearch(path.c_str(), ext, &results);
+ }
+
+#ifdef ALSOFT_INSTALL_DATADIR
+ // Search the installation data directory
+ {
+ std::string path{ALSOFT_INSTALL_DATADIR};
+ if(!path.empty())
+ {
+ if(path.back() != '/')
+ path += '/';
+ path += subdir;
+ DirectorySearch(path.c_str(), ext, &results);
+ }
+ }
+#endif
+
+ return results;
+}
+
+namespace {
+
+bool SetRTPriorityPthread(int prio)
+{
+ int err{ENOTSUP};
+#if defined(HAVE_PTHREAD_SETSCHEDPARAM) && !defined(__OpenBSD__)
+ /* Get the min and max priority for SCHED_RR. Limit the max priority to
+ * half, for now, to ensure the thread can't take the highest priority and
+ * go rogue.
+ */
+ int rtmin{sched_get_priority_min(SCHED_RR)};
+ int rtmax{sched_get_priority_max(SCHED_RR)};
+ rtmax = (rtmax-rtmin)/2 + rtmin;
+
+ struct sched_param param{};
+ param.sched_priority = clampi(prio, rtmin, rtmax);
+#ifdef SCHED_RESET_ON_FORK
+ err = pthread_setschedparam(pthread_self(), SCHED_RR|SCHED_RESET_ON_FORK, &param);
+ if(err == EINVAL)
+#endif
+ err = pthread_setschedparam(pthread_self(), SCHED_RR, &param);
+ if(err == 0) return true;
+
+#else
+
+ std::ignore = prio;
+#endif
+ WARN("pthread_setschedparam failed: %s (%d)\n", std::strerror(err), err);
+ return false;
+}
+
+bool SetRTPriorityRTKit(int prio)
+{
+#ifdef HAVE_RTKIT
+ if(!HasDBus())
+ {
+ WARN("D-Bus not available\n");
+ return false;
+ }
+ dbus::Error error;
+ dbus::ConnectionPtr conn{dbus_bus_get(DBUS_BUS_SYSTEM, &error.get())};
+ if(!conn)
+ {
+ WARN("D-Bus connection failed with %s: %s\n", error->name, error->message);
+ return false;
+ }
+
+ /* Don't stupidly exit if the connection dies while doing this. */
+ dbus_connection_set_exit_on_disconnect(conn.get(), false);
+
+ int nicemin{};
+ int err{rtkit_get_min_nice_level(conn.get(), &nicemin)};
+ if(err == -ENOENT)
+ {
+ err = std::abs(err);
+ ERR("Could not query RTKit: %s (%d)\n", std::strerror(err), err);
+ return false;
+ }
+ int rtmax{rtkit_get_max_realtime_priority(conn.get())};
+ TRACE("Maximum real-time priority: %d, minimum niceness: %d\n", rtmax, nicemin);
+
+ auto limit_rttime = [](DBusConnection *c) -> int
+ {
+ using ulonglong = unsigned long long;
+ long long maxrttime{rtkit_get_rttime_usec_max(c)};
+ if(maxrttime <= 0) return static_cast<int>(std::abs(maxrttime));
+ const ulonglong umaxtime{static_cast<ulonglong>(maxrttime)};
+
+ struct rlimit rlim{};
+ if(getrlimit(RLIMIT_RTTIME, &rlim) != 0)
+ return errno;
+
+ TRACE("RTTime max: %llu (hard: %llu, soft: %llu)\n", umaxtime,
+ static_cast<ulonglong>(rlim.rlim_max), static_cast<ulonglong>(rlim.rlim_cur));
+ if(rlim.rlim_max > umaxtime)
+ {
+ rlim.rlim_max = static_cast<rlim_t>(std::min<ulonglong>(umaxtime,
+ std::numeric_limits<rlim_t>::max()));
+ rlim.rlim_cur = std::min(rlim.rlim_cur, rlim.rlim_max);
+ if(setrlimit(RLIMIT_RTTIME, &rlim) != 0)
+ return errno;
+ }
+ return 0;
+ };
+ if(rtmax > 0)
+ {
+ if(AllowRTTimeLimit)
+ {
+ err = limit_rttime(conn.get());
+ if(err != 0)
+ WARN("Failed to set RLIMIT_RTTIME for RTKit: %s (%d)\n",
+ std::strerror(err), err);
+ }
+
+ /* Limit the maximum real-time priority to half. */
+ rtmax = (rtmax+1)/2;
+ prio = clampi(prio, 1, rtmax);
+
+ TRACE("Making real-time with priority %d (max: %d)\n", prio, rtmax);
+ err = rtkit_make_realtime(conn.get(), 0, prio);
+ if(err == 0) return true;
+
+ err = std::abs(err);
+ WARN("Failed to set real-time priority: %s (%d)\n", std::strerror(err), err);
+ }
+ /* Don't try to set the niceness for non-Linux systems. Standard POSIX has
+ * niceness as a per-process attribute, while the intent here is for the
+ * audio processing thread only to get a priority boost. Currently only
+ * Linux is known to have per-thread niceness.
+ */
+#ifdef __linux__
+ if(nicemin < 0)
+ {
+ TRACE("Making high priority with niceness %d\n", nicemin);
+ err = rtkit_make_high_priority(conn.get(), 0, nicemin);
+ if(err == 0) return true;
+
+ err = std::abs(err);
+ WARN("Failed to set high priority: %s (%d)\n", std::strerror(err), err);
+ }
+#endif /* __linux__ */
+
+#else
+
+ std::ignore = prio;
+ WARN("D-Bus not supported\n");
+#endif
+ return false;
+}
+
+} // namespace
+
+void SetRTPriority()
+{
+ if(RTPrioLevel <= 0)
+ return;
+
+ if(SetRTPriorityPthread(RTPrioLevel))
+ return;
+ if(SetRTPriorityRTKit(RTPrioLevel))
+ return;
+}
+
+#endif
diff --git a/core/helpers.h b/core/helpers.h
new file mode 100644
index 00000000..f0bfcf1b
--- /dev/null
+++ b/core/helpers.h
@@ -0,0 +1,18 @@
+#ifndef CORE_HELPERS_H
+#define CORE_HELPERS_H
+
+#include <string>
+
+#include "vector.h"
+
+
+struct PathNamePair { std::string path, fname; };
+const PathNamePair &GetProcBinary(void);
+
+extern int RTPrioLevel;
+extern bool AllowRTTimeLimit;
+void SetRTPriority(void);
+
+al::vector<std::string> SearchDataFiles(const char *match, const char *subdir);
+
+#endif /* CORE_HELPERS_H */
diff --git a/core/hrtf.cpp b/core/hrtf.cpp
new file mode 100644
index 00000000..d5c7573a
--- /dev/null
+++ b/core/hrtf.cpp
@@ -0,0 +1,1473 @@
+
+#include "config.h"
+
+#include "hrtf.h"
+
+#include <algorithm>
+#include <array>
+#include <cassert>
+#include <cctype>
+#include <cmath>
+#include <cstdint>
+#include <cstdio>
+#include <cstring>
+#include <fstream>
+#include <iterator>
+#include <memory>
+#include <mutex>
+#include <numeric>
+#include <type_traits>
+#include <utility>
+
+#include "albit.h"
+#include "albyte.h"
+#include "alfstream.h"
+#include "almalloc.h"
+#include "alnumbers.h"
+#include "alnumeric.h"
+#include "aloptional.h"
+#include "alspan.h"
+#include "ambidefs.h"
+#include "filters/splitter.h"
+#include "helpers.h"
+#include "logging.h"
+#include "mixer/hrtfdefs.h"
+#include "opthelpers.h"
+#include "polyphase_resampler.h"
+#include "vector.h"
+
+
+namespace {
+
+struct HrtfEntry {
+ std::string mDispName;
+ std::string mFilename;
+
+ /* GCC warns when it tries to inline this. */
+ ~HrtfEntry();
+};
+HrtfEntry::~HrtfEntry() = default;
+
+struct LoadedHrtf {
+ std::string mFilename;
+ std::unique_ptr<HrtfStore> mEntry;
+
+ template<typename T, typename U>
+ LoadedHrtf(T&& name, U&& entry)
+ : mFilename{std::forward<T>(name)}, mEntry{std::forward<U>(entry)}
+ { }
+ LoadedHrtf(LoadedHrtf&&) = default;
+ /* GCC warns when it tries to inline this. */
+ ~LoadedHrtf();
+
+ LoadedHrtf& operator=(LoadedHrtf&&) = default;
+};
+LoadedHrtf::~LoadedHrtf() = default;
+
+
+/* Data set limits must be the same as or more flexible than those defined in
+ * the makemhr utility.
+ */
+constexpr uint MinFdCount{1};
+constexpr uint MaxFdCount{16};
+
+constexpr uint MinFdDistance{50};
+constexpr uint MaxFdDistance{2500};
+
+constexpr uint MinEvCount{5};
+constexpr uint MaxEvCount{181};
+
+constexpr uint MinAzCount{1};
+constexpr uint MaxAzCount{255};
+
+constexpr uint MaxHrirDelay{HrtfHistoryLength - 1};
+
+constexpr uint HrirDelayFracBits{2};
+constexpr uint HrirDelayFracOne{1 << HrirDelayFracBits};
+constexpr uint HrirDelayFracHalf{HrirDelayFracOne >> 1};
+
+static_assert(MaxHrirDelay*HrirDelayFracOne < 256, "MAX_HRIR_DELAY or DELAY_FRAC too large");
+
+constexpr char magicMarker00[8]{'M','i','n','P','H','R','0','0'};
+constexpr char magicMarker01[8]{'M','i','n','P','H','R','0','1'};
+constexpr char magicMarker02[8]{'M','i','n','P','H','R','0','2'};
+constexpr char magicMarker03[8]{'M','i','n','P','H','R','0','3'};
+
+/* First value for pass-through coefficients (remaining are 0), used for omni-
+ * directional sounds. */
+constexpr auto PassthruCoeff = static_cast<float>(1.0/al::numbers::sqrt2);
+
+std::mutex LoadedHrtfLock;
+al::vector<LoadedHrtf> LoadedHrtfs;
+
+std::mutex EnumeratedHrtfLock;
+al::vector<HrtfEntry> EnumeratedHrtfs;
+
+
+class databuf final : public std::streambuf {
+ int_type underflow() override
+ { return traits_type::eof(); }
+
+ pos_type seekoff(off_type offset, std::ios_base::seekdir whence, std::ios_base::openmode mode) override
+ {
+ if((mode&std::ios_base::out) || !(mode&std::ios_base::in))
+ return traits_type::eof();
+
+ char_type *cur;
+ switch(whence)
+ {
+ case std::ios_base::beg:
+ if(offset < 0 || offset > egptr()-eback())
+ return traits_type::eof();
+ cur = eback() + offset;
+ break;
+
+ case std::ios_base::cur:
+ if((offset >= 0 && offset > egptr()-gptr()) ||
+ (offset < 0 && -offset > gptr()-eback()))
+ return traits_type::eof();
+ cur = gptr() + offset;
+ break;
+
+ case std::ios_base::end:
+ if(offset > 0 || -offset > egptr()-eback())
+ return traits_type::eof();
+ cur = egptr() + offset;
+ break;
+
+ default:
+ return traits_type::eof();
+ }
+
+ setg(eback(), cur, egptr());
+ return cur - eback();
+ }
+
+ pos_type seekpos(pos_type pos, std::ios_base::openmode mode) override
+ {
+ // Simplified version of seekoff
+ if((mode&std::ios_base::out) || !(mode&std::ios_base::in))
+ return traits_type::eof();
+
+ if(pos < 0 || pos > egptr()-eback())
+ return traits_type::eof();
+
+ setg(eback(), eback() + static_cast<size_t>(pos), egptr());
+ return pos;
+ }
+
+public:
+ databuf(const char_type *start_, const char_type *end_) noexcept
+ {
+ setg(const_cast<char_type*>(start_), const_cast<char_type*>(start_),
+ const_cast<char_type*>(end_));
+ }
+};
+
+class idstream final : public std::istream {
+ databuf mStreamBuf;
+
+public:
+ idstream(const char *start_, const char *end_)
+ : std::istream{nullptr}, mStreamBuf{start_, end_}
+ { init(&mStreamBuf); }
+};
+
+
+struct IdxBlend { uint idx; float blend; };
+/* Calculate the elevation index given the polar elevation in radians. This
+ * will return an index between 0 and (evcount - 1).
+ */
+IdxBlend CalcEvIndex(uint evcount, float ev)
+{
+ ev = (al::numbers::pi_v<float>*0.5f + ev) * static_cast<float>(evcount-1) *
+ al::numbers::inv_pi_v<float>;
+ uint idx{float2uint(ev)};
+
+ return IdxBlend{minu(idx, evcount-1), ev-static_cast<float>(idx)};
+}
+
+/* Calculate the azimuth index given the polar azimuth in radians. This will
+ * return an index between 0 and (azcount - 1).
+ */
+IdxBlend CalcAzIndex(uint azcount, float az)
+{
+ az = (al::numbers::pi_v<float>*2.0f + az) * static_cast<float>(azcount) *
+ (al::numbers::inv_pi_v<float>*0.5f);
+ uint idx{float2uint(az)};
+
+ return IdxBlend{idx%azcount, az-static_cast<float>(idx)};
+}
+
+} // namespace
+
+
+/* Calculates static HRIR coefficients and delays for the given polar elevation
+ * and azimuth in radians. The coefficients are normalized.
+ */
+void HrtfStore::getCoeffs(float elevation, float azimuth, float distance, float spread,
+ HrirArray &coeffs, const al::span<uint,2> delays)
+{
+ const float dirfact{1.0f - (al::numbers::inv_pi_v<float>/2.0f * spread)};
+
+ size_t ebase{0};
+ auto match_field = [&ebase,distance](const Field &field) noexcept -> bool
+ {
+ if(distance >= field.distance)
+ return true;
+ ebase += field.evCount;
+ return false;
+ };
+ auto field = std::find_if(mFields.begin(), mFields.end()-1, match_field);
+
+ /* Calculate the elevation indices. */
+ const auto elev0 = CalcEvIndex(field->evCount, elevation);
+ const size_t elev1_idx{minu(elev0.idx+1, field->evCount-1)};
+ const size_t ir0offset{mElev[ebase + elev0.idx].irOffset};
+ const size_t ir1offset{mElev[ebase + elev1_idx].irOffset};
+
+ /* Calculate azimuth indices. */
+ const auto az0 = CalcAzIndex(mElev[ebase + elev0.idx].azCount, azimuth);
+ const auto az1 = CalcAzIndex(mElev[ebase + elev1_idx].azCount, azimuth);
+
+ /* Calculate the HRIR indices to blend. */
+ const size_t idx[4]{
+ ir0offset + az0.idx,
+ ir0offset + ((az0.idx+1) % mElev[ebase + elev0.idx].azCount),
+ ir1offset + az1.idx,
+ ir1offset + ((az1.idx+1) % mElev[ebase + elev1_idx].azCount)
+ };
+
+ /* Calculate bilinear blending weights, attenuated according to the
+ * directional panning factor.
+ */
+ const float blend[4]{
+ (1.0f-elev0.blend) * (1.0f-az0.blend) * dirfact,
+ (1.0f-elev0.blend) * ( az0.blend) * dirfact,
+ ( elev0.blend) * (1.0f-az1.blend) * dirfact,
+ ( elev0.blend) * ( az1.blend) * dirfact
+ };
+
+ /* Calculate the blended HRIR delays. */
+ float d{mDelays[idx[0]][0]*blend[0] + mDelays[idx[1]][0]*blend[1] + mDelays[idx[2]][0]*blend[2]
+ + mDelays[idx[3]][0]*blend[3]};
+ delays[0] = fastf2u(d * float{1.0f/HrirDelayFracOne});
+ d = mDelays[idx[0]][1]*blend[0] + mDelays[idx[1]][1]*blend[1] + mDelays[idx[2]][1]*blend[2]
+ + mDelays[idx[3]][1]*blend[3];
+ delays[1] = fastf2u(d * float{1.0f/HrirDelayFracOne});
+
+ /* Calculate the blended HRIR coefficients. */
+ float *coeffout{al::assume_aligned<16>(coeffs[0].data())};
+ coeffout[0] = PassthruCoeff * (1.0f-dirfact);
+ coeffout[1] = PassthruCoeff * (1.0f-dirfact);
+ std::fill_n(coeffout+2, size_t{HrirLength-1}*2, 0.0f);
+ for(size_t c{0};c < 4;c++)
+ {
+ const float *srccoeffs{al::assume_aligned<16>(mCoeffs[idx[c]][0].data())};
+ const float mult{blend[c]};
+ auto blend_coeffs = [mult](const float src, const float coeff) noexcept -> float
+ { return src*mult + coeff; };
+ std::transform(srccoeffs, srccoeffs + HrirLength*2, coeffout, coeffout, blend_coeffs);
+ }
+}
+
+
+std::unique_ptr<DirectHrtfState> DirectHrtfState::Create(size_t num_chans)
+{ return std::unique_ptr<DirectHrtfState>{new(FamCount(num_chans)) DirectHrtfState{num_chans}}; }
+
+void DirectHrtfState::build(const HrtfStore *Hrtf, const uint irSize, const bool perHrirMin,
+ const al::span<const AngularPoint> AmbiPoints, const float (*AmbiMatrix)[MaxAmbiChannels],
+ const float XOverFreq, const al::span<const float,MaxAmbiOrder+1> AmbiOrderHFGain)
+{
+ using double2 = std::array<double,2>;
+ struct ImpulseResponse {
+ const ConstHrirSpan hrir;
+ uint ldelay, rdelay;
+ };
+
+ const double xover_norm{double{XOverFreq} / Hrtf->mSampleRate};
+ mChannels[0].mSplitter.init(static_cast<float>(xover_norm));
+ for(size_t i{0};i < mChannels.size();++i)
+ {
+ const size_t order{AmbiIndex::OrderFromChannel()[i]};
+ mChannels[i].mSplitter = mChannels[0].mSplitter;
+ mChannels[i].mHfScale = AmbiOrderHFGain[order];
+ }
+
+ uint min_delay{HrtfHistoryLength*HrirDelayFracOne}, max_delay{0};
+ al::vector<ImpulseResponse> impres; impres.reserve(AmbiPoints.size());
+ auto calc_res = [Hrtf,&max_delay,&min_delay](const AngularPoint &pt) -> ImpulseResponse
+ {
+ auto &field = Hrtf->mFields[0];
+ const auto elev0 = CalcEvIndex(field.evCount, pt.Elev.value);
+ const size_t elev1_idx{minu(elev0.idx+1, field.evCount-1)};
+ const size_t ir0offset{Hrtf->mElev[elev0.idx].irOffset};
+ const size_t ir1offset{Hrtf->mElev[elev1_idx].irOffset};
+
+ const auto az0 = CalcAzIndex(Hrtf->mElev[elev0.idx].azCount, pt.Azim.value);
+ const auto az1 = CalcAzIndex(Hrtf->mElev[elev1_idx].azCount, pt.Azim.value);
+
+ const size_t idx[4]{
+ ir0offset + az0.idx,
+ ir0offset + ((az0.idx+1) % Hrtf->mElev[elev0.idx].azCount),
+ ir1offset + az1.idx,
+ ir1offset + ((az1.idx+1) % Hrtf->mElev[elev1_idx].azCount)
+ };
+
+ /* The largest blend factor serves as the closest HRIR. */
+ const size_t irOffset{idx[(elev0.blend >= 0.5f)*2 + (az1.blend >= 0.5f)]};
+ ImpulseResponse res{Hrtf->mCoeffs[irOffset],
+ Hrtf->mDelays[irOffset][0], Hrtf->mDelays[irOffset][1]};
+
+ min_delay = minu(min_delay, minu(res.ldelay, res.rdelay));
+ max_delay = maxu(max_delay, maxu(res.ldelay, res.rdelay));
+
+ return res;
+ };
+ std::transform(AmbiPoints.begin(), AmbiPoints.end(), std::back_inserter(impres), calc_res);
+ auto hrir_delay_round = [](const uint d) noexcept -> uint
+ { return (d+HrirDelayFracHalf) >> HrirDelayFracBits; };
+
+ TRACE("Min delay: %.2f, max delay: %.2f, FIR length: %u\n",
+ min_delay/double{HrirDelayFracOne}, max_delay/double{HrirDelayFracOne}, irSize);
+
+ auto tmpres = al::vector<std::array<double2,HrirLength>>(mChannels.size());
+ max_delay = 0;
+ for(size_t c{0u};c < AmbiPoints.size();++c)
+ {
+ const ConstHrirSpan hrir{impres[c].hrir};
+ const uint base_delay{perHrirMin ? minu(impres[c].ldelay, impres[c].rdelay) : min_delay};
+ const uint ldelay{hrir_delay_round(impres[c].ldelay - base_delay)};
+ const uint rdelay{hrir_delay_round(impres[c].rdelay - base_delay)};
+ max_delay = maxu(max_delay, maxu(impres[c].ldelay, impres[c].rdelay) - base_delay);
+
+ for(size_t i{0u};i < mChannels.size();++i)
+ {
+ const double mult{AmbiMatrix[c][i]};
+ const size_t numirs{HrirLength - maxz(ldelay, rdelay)};
+ size_t lidx{ldelay}, ridx{rdelay};
+ for(size_t j{0};j < numirs;++j)
+ {
+ tmpres[i][lidx++][0] += hrir[j][0] * mult;
+ tmpres[i][ridx++][1] += hrir[j][1] * mult;
+ }
+ }
+ }
+ impres.clear();
+
+ for(size_t i{0u};i < mChannels.size();++i)
+ {
+ auto copy_arr = [](const double2 &in) noexcept -> float2
+ { return float2{{static_cast<float>(in[0]), static_cast<float>(in[1])}}; };
+ std::transform(tmpres[i].cbegin(), tmpres[i].cend(), mChannels[i].mCoeffs.begin(),
+ copy_arr);
+ }
+ tmpres.clear();
+
+ const uint max_length{minu(hrir_delay_round(max_delay) + irSize, HrirLength)};
+ TRACE("New max delay: %.2f, FIR length: %u\n", max_delay/double{HrirDelayFracOne},
+ max_length);
+ mIrSize = max_length;
+}
+
+
+namespace {
+
+std::unique_ptr<HrtfStore> CreateHrtfStore(uint rate, uint8_t irSize,
+ const al::span<const HrtfStore::Field> fields,
+ const al::span<const HrtfStore::Elevation> elevs, const HrirArray *coeffs,
+ const ubyte2 *delays, const char *filename)
+{
+ const size_t irCount{size_t{elevs.back().azCount} + elevs.back().irOffset};
+ size_t total{sizeof(HrtfStore)};
+ total = RoundUp(total, alignof(HrtfStore::Field)); /* Align for field infos */
+ total += sizeof(std::declval<HrtfStore&>().mFields[0])*fields.size();
+ total = RoundUp(total, alignof(HrtfStore::Elevation)); /* Align for elevation infos */
+ total += sizeof(std::declval<HrtfStore&>().mElev[0])*elevs.size();
+ total = RoundUp(total, 16); /* Align for coefficients using SIMD */
+ total += sizeof(std::declval<HrtfStore&>().mCoeffs[0])*irCount;
+ total += sizeof(std::declval<HrtfStore&>().mDelays[0])*irCount;
+
+ std::unique_ptr<HrtfStore> Hrtf{};
+ if(void *ptr{al_calloc(16, total)})
+ {
+ Hrtf.reset(al::construct_at(static_cast<HrtfStore*>(ptr)));
+ InitRef(Hrtf->mRef, 1u);
+ Hrtf->mSampleRate = rate;
+ Hrtf->mIrSize = irSize;
+
+ /* Set up pointers to storage following the main HRTF struct. */
+ char *base = reinterpret_cast<char*>(Hrtf.get());
+ size_t offset{sizeof(HrtfStore)};
+
+ offset = RoundUp(offset, alignof(HrtfStore::Field)); /* Align for field infos */
+ auto field_ = reinterpret_cast<HrtfStore::Field*>(base + offset);
+ offset += sizeof(field_[0])*fields.size();
+
+ offset = RoundUp(offset, alignof(HrtfStore::Elevation)); /* Align for elevation infos */
+ auto elev_ = reinterpret_cast<HrtfStore::Elevation*>(base + offset);
+ offset += sizeof(elev_[0])*elevs.size();
+
+ offset = RoundUp(offset, 16); /* Align for coefficients using SIMD */
+ auto coeffs_ = reinterpret_cast<HrirArray*>(base + offset);
+ offset += sizeof(coeffs_[0])*irCount;
+
+ auto delays_ = reinterpret_cast<ubyte2*>(base + offset);
+ offset += sizeof(delays_[0])*irCount;
+
+ if(offset != total)
+ throw std::runtime_error{"HrtfStore allocation size mismatch"};
+
+ /* Copy input data to storage. */
+ std::uninitialized_copy(fields.cbegin(), fields.cend(), field_);
+ std::uninitialized_copy(elevs.cbegin(), elevs.cend(), elev_);
+ std::uninitialized_copy_n(coeffs, irCount, coeffs_);
+ std::uninitialized_copy_n(delays, irCount, delays_);
+
+ /* Finally, assign the storage pointers. */
+ Hrtf->mFields = al::as_span(field_, fields.size());
+ Hrtf->mElev = elev_;
+ Hrtf->mCoeffs = coeffs_;
+ Hrtf->mDelays = delays_;
+ }
+ else
+ ERR("Out of memory allocating storage for %s.\n", filename);
+
+ return Hrtf;
+}
+
+void MirrorLeftHrirs(const al::span<const HrtfStore::Elevation> elevs, HrirArray *coeffs,
+ ubyte2 *delays)
+{
+ for(const auto &elev : elevs)
+ {
+ const ushort evoffset{elev.irOffset};
+ const ushort azcount{elev.azCount};
+ for(size_t j{0};j < azcount;j++)
+ {
+ const size_t lidx{evoffset + j};
+ const size_t ridx{evoffset + ((azcount-j) % azcount)};
+
+ const size_t irSize{coeffs[ridx].size()};
+ for(size_t k{0};k < irSize;k++)
+ coeffs[ridx][k][1] = coeffs[lidx][k][0];
+ delays[ridx][1] = delays[lidx][0];
+ }
+ }
+}
+
+
+template<size_t num_bits, typename T>
+constexpr std::enable_if_t<std::is_signed<T>::value && num_bits < sizeof(T)*8,
+T> fixsign(T value) noexcept
+{
+ constexpr auto signbit = static_cast<T>(1u << (num_bits-1));
+ return static_cast<T>((value^signbit) - signbit);
+}
+
+template<size_t num_bits, typename T>
+constexpr std::enable_if_t<!std::is_signed<T>::value || num_bits == sizeof(T)*8,
+T> fixsign(T value) noexcept
+{ return value; }
+
+template<typename T, size_t num_bits=sizeof(T)*8>
+inline std::enable_if_t<al::endian::native == al::endian::little,
+T> readle(std::istream &data)
+{
+ static_assert((num_bits&7) == 0, "num_bits must be a multiple of 8");
+ static_assert(num_bits <= sizeof(T)*8, "num_bits is too large for the type");
+
+ T ret{};
+ if(!data.read(reinterpret_cast<char*>(&ret), num_bits/8))
+ return static_cast<T>(EOF);
+
+ return fixsign<num_bits>(ret);
+}
+
+template<typename T, size_t num_bits=sizeof(T)*8>
+inline std::enable_if_t<al::endian::native == al::endian::big,
+T> readle(std::istream &data)
+{
+ static_assert((num_bits&7) == 0, "num_bits must be a multiple of 8");
+ static_assert(num_bits <= sizeof(T)*8, "num_bits is too large for the type");
+
+ T ret{};
+ al::byte b[sizeof(T)]{};
+ if(!data.read(reinterpret_cast<char*>(b), num_bits/8))
+ return static_cast<T>(EOF);
+ std::reverse_copy(std::begin(b), std::end(b), reinterpret_cast<al::byte*>(&ret));
+
+ return fixsign<num_bits>(ret);
+}
+
+template<>
+inline uint8_t readle<uint8_t,8>(std::istream &data)
+{ return static_cast<uint8_t>(data.get()); }
+
+
+std::unique_ptr<HrtfStore> LoadHrtf00(std::istream &data, const char *filename)
+{
+ uint rate{readle<uint32_t>(data)};
+ ushort irCount{readle<uint16_t>(data)};
+ ushort irSize{readle<uint16_t>(data)};
+ ubyte evCount{readle<uint8_t>(data)};
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+
+ if(irSize < MinIrLength || irSize > HrirLength)
+ {
+ ERR("Unsupported HRIR size, irSize=%d (%d to %d)\n", irSize, MinIrLength, HrirLength);
+ return nullptr;
+ }
+ if(evCount < MinEvCount || evCount > MaxEvCount)
+ {
+ ERR("Unsupported elevation count: evCount=%d (%d to %d)\n",
+ evCount, MinEvCount, MaxEvCount);
+ return nullptr;
+ }
+
+ auto elevs = al::vector<HrtfStore::Elevation>(evCount);
+ for(auto &elev : elevs)
+ elev.irOffset = readle<uint16_t>(data);
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+ for(size_t i{1};i < evCount;i++)
+ {
+ if(elevs[i].irOffset <= elevs[i-1].irOffset)
+ {
+ ERR("Invalid evOffset: evOffset[%zu]=%d (last=%d)\n", i, elevs[i].irOffset,
+ elevs[i-1].irOffset);
+ return nullptr;
+ }
+ }
+ if(irCount <= elevs.back().irOffset)
+ {
+ ERR("Invalid evOffset: evOffset[%zu]=%d (irCount=%d)\n",
+ elevs.size()-1, elevs.back().irOffset, irCount);
+ return nullptr;
+ }
+
+ for(size_t i{1};i < evCount;i++)
+ {
+ elevs[i-1].azCount = static_cast<ushort>(elevs[i].irOffset - elevs[i-1].irOffset);
+ if(elevs[i-1].azCount < MinAzCount || elevs[i-1].azCount > MaxAzCount)
+ {
+ ERR("Unsupported azimuth count: azCount[%zd]=%d (%d to %d)\n",
+ i-1, elevs[i-1].azCount, MinAzCount, MaxAzCount);
+ return nullptr;
+ }
+ }
+ elevs.back().azCount = static_cast<ushort>(irCount - elevs.back().irOffset);
+ if(elevs.back().azCount < MinAzCount || elevs.back().azCount > MaxAzCount)
+ {
+ ERR("Unsupported azimuth count: azCount[%zu]=%d (%d to %d)\n",
+ elevs.size()-1, elevs.back().azCount, MinAzCount, MaxAzCount);
+ return nullptr;
+ }
+
+ auto coeffs = al::vector<HrirArray>(irCount, HrirArray{});
+ auto delays = al::vector<ubyte2>(irCount);
+ for(auto &hrir : coeffs)
+ {
+ for(auto &val : al::span<float2>{hrir.data(), irSize})
+ val[0] = readle<int16_t>(data) / 32768.0f;
+ }
+ for(auto &val : delays)
+ val[0] = readle<uint8_t>(data);
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+ for(size_t i{0};i < irCount;i++)
+ {
+ if(delays[i][0] > MaxHrirDelay)
+ {
+ ERR("Invalid delays[%zd]: %d (%d)\n", i, delays[i][0], MaxHrirDelay);
+ return nullptr;
+ }
+ delays[i][0] <<= HrirDelayFracBits;
+ }
+
+ /* Mirror the left ear responses to the right ear. */
+ MirrorLeftHrirs({elevs.data(), elevs.size()}, coeffs.data(), delays.data());
+
+ const HrtfStore::Field field[1]{{0.0f, evCount}};
+ return CreateHrtfStore(rate, static_cast<uint8_t>(irSize), field, {elevs.data(), elevs.size()},
+ coeffs.data(), delays.data(), filename);
+}
+
+std::unique_ptr<HrtfStore> LoadHrtf01(std::istream &data, const char *filename)
+{
+ uint rate{readle<uint32_t>(data)};
+ uint8_t irSize{readle<uint8_t>(data)};
+ ubyte evCount{readle<uint8_t>(data)};
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+
+ if(irSize < MinIrLength || irSize > HrirLength)
+ {
+ ERR("Unsupported HRIR size, irSize=%d (%d to %d)\n", irSize, MinIrLength, HrirLength);
+ return nullptr;
+ }
+ if(evCount < MinEvCount || evCount > MaxEvCount)
+ {
+ ERR("Unsupported elevation count: evCount=%d (%d to %d)\n",
+ evCount, MinEvCount, MaxEvCount);
+ return nullptr;
+ }
+
+ auto elevs = al::vector<HrtfStore::Elevation>(evCount);
+ for(auto &elev : elevs)
+ elev.azCount = readle<uint8_t>(data);
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+ for(size_t i{0};i < evCount;++i)
+ {
+ if(elevs[i].azCount < MinAzCount || elevs[i].azCount > MaxAzCount)
+ {
+ ERR("Unsupported azimuth count: azCount[%zd]=%d (%d to %d)\n", i, elevs[i].azCount,
+ MinAzCount, MaxAzCount);
+ return nullptr;
+ }
+ }
+
+ elevs[0].irOffset = 0;
+ for(size_t i{1};i < evCount;i++)
+ elevs[i].irOffset = static_cast<ushort>(elevs[i-1].irOffset + elevs[i-1].azCount);
+ const ushort irCount{static_cast<ushort>(elevs.back().irOffset + elevs.back().azCount)};
+
+ auto coeffs = al::vector<HrirArray>(irCount, HrirArray{});
+ auto delays = al::vector<ubyte2>(irCount);
+ for(auto &hrir : coeffs)
+ {
+ for(auto &val : al::span<float2>{hrir.data(), irSize})
+ val[0] = readle<int16_t>(data) / 32768.0f;
+ }
+ for(auto &val : delays)
+ val[0] = readle<uint8_t>(data);
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+ for(size_t i{0};i < irCount;i++)
+ {
+ if(delays[i][0] > MaxHrirDelay)
+ {
+ ERR("Invalid delays[%zd]: %d (%d)\n", i, delays[i][0], MaxHrirDelay);
+ return nullptr;
+ }
+ delays[i][0] <<= HrirDelayFracBits;
+ }
+
+ /* Mirror the left ear responses to the right ear. */
+ MirrorLeftHrirs({elevs.data(), elevs.size()}, coeffs.data(), delays.data());
+
+ const HrtfStore::Field field[1]{{0.0f, evCount}};
+ return CreateHrtfStore(rate, irSize, field, {elevs.data(), elevs.size()}, coeffs.data(),
+ delays.data(), filename);
+}
+
+std::unique_ptr<HrtfStore> LoadHrtf02(std::istream &data, const char *filename)
+{
+ constexpr ubyte SampleType_S16{0};
+ constexpr ubyte SampleType_S24{1};
+ constexpr ubyte ChanType_LeftOnly{0};
+ constexpr ubyte ChanType_LeftRight{1};
+
+ uint rate{readle<uint32_t>(data)};
+ ubyte sampleType{readle<uint8_t>(data)};
+ ubyte channelType{readle<uint8_t>(data)};
+ uint8_t irSize{readle<uint8_t>(data)};
+ ubyte fdCount{readle<uint8_t>(data)};
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+
+ if(sampleType > SampleType_S24)
+ {
+ ERR("Unsupported sample type: %d\n", sampleType);
+ return nullptr;
+ }
+ if(channelType > ChanType_LeftRight)
+ {
+ ERR("Unsupported channel type: %d\n", channelType);
+ return nullptr;
+ }
+
+ if(irSize < MinIrLength || irSize > HrirLength)
+ {
+ ERR("Unsupported HRIR size, irSize=%d (%d to %d)\n", irSize, MinIrLength, HrirLength);
+ return nullptr;
+ }
+ if(fdCount < 1 || fdCount > MaxFdCount)
+ {
+ ERR("Unsupported number of field-depths: fdCount=%d (%d to %d)\n", fdCount, MinFdCount,
+ MaxFdCount);
+ return nullptr;
+ }
+
+ auto fields = al::vector<HrtfStore::Field>(fdCount);
+ auto elevs = al::vector<HrtfStore::Elevation>{};
+ for(size_t f{0};f < fdCount;f++)
+ {
+ const ushort distance{readle<uint16_t>(data)};
+ const ubyte evCount{readle<uint8_t>(data)};
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+
+ if(distance < MinFdDistance || distance > MaxFdDistance)
+ {
+ ERR("Unsupported field distance[%zu]=%d (%d to %d millimeters)\n", f, distance,
+ MinFdDistance, MaxFdDistance);
+ return nullptr;
+ }
+ if(evCount < MinEvCount || evCount > MaxEvCount)
+ {
+ ERR("Unsupported elevation count: evCount[%zu]=%d (%d to %d)\n", f, evCount,
+ MinEvCount, MaxEvCount);
+ return nullptr;
+ }
+
+ fields[f].distance = distance / 1000.0f;
+ fields[f].evCount = evCount;
+ if(f > 0 && fields[f].distance <= fields[f-1].distance)
+ {
+ ERR("Field distance[%zu] is not after previous (%f > %f)\n", f, fields[f].distance,
+ fields[f-1].distance);
+ return nullptr;
+ }
+
+ const size_t ebase{elevs.size()};
+ elevs.resize(ebase + evCount);
+ for(auto &elev : al::span<HrtfStore::Elevation>(elevs.data()+ebase, evCount))
+ elev.azCount = readle<uint8_t>(data);
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+
+ for(size_t e{0};e < evCount;e++)
+ {
+ if(elevs[ebase+e].azCount < MinAzCount || elevs[ebase+e].azCount > MaxAzCount)
+ {
+ ERR("Unsupported azimuth count: azCount[%zu][%zu]=%d (%d to %d)\n", f, e,
+ elevs[ebase+e].azCount, MinAzCount, MaxAzCount);
+ return nullptr;
+ }
+ }
+ }
+
+ elevs[0].irOffset = 0;
+ std::partial_sum(elevs.cbegin(), elevs.cend(), elevs.begin(),
+ [](const HrtfStore::Elevation &last, const HrtfStore::Elevation &cur)
+ -> HrtfStore::Elevation
+ {
+ return HrtfStore::Elevation{cur.azCount,
+ static_cast<ushort>(last.azCount + last.irOffset)};
+ });
+ const auto irTotal = static_cast<ushort>(elevs.back().azCount + elevs.back().irOffset);
+
+ auto coeffs = al::vector<HrirArray>(irTotal, HrirArray{});
+ auto delays = al::vector<ubyte2>(irTotal);
+ if(channelType == ChanType_LeftOnly)
+ {
+ if(sampleType == SampleType_S16)
+ {
+ for(auto &hrir : coeffs)
+ {
+ for(auto &val : al::span<float2>{hrir.data(), irSize})
+ val[0] = readle<int16_t>(data) / 32768.0f;
+ }
+ }
+ else if(sampleType == SampleType_S24)
+ {
+ for(auto &hrir : coeffs)
+ {
+ for(auto &val : al::span<float2>{hrir.data(), irSize})
+ val[0] = static_cast<float>(readle<int,24>(data)) / 8388608.0f;
+ }
+ }
+ for(auto &val : delays)
+ val[0] = readle<uint8_t>(data);
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+ for(size_t i{0};i < irTotal;++i)
+ {
+ if(delays[i][0] > MaxHrirDelay)
+ {
+ ERR("Invalid delays[%zu][0]: %d (%d)\n", i, delays[i][0], MaxHrirDelay);
+ return nullptr;
+ }
+ delays[i][0] <<= HrirDelayFracBits;
+ }
+
+ /* Mirror the left ear responses to the right ear. */
+ MirrorLeftHrirs({elevs.data(), elevs.size()}, coeffs.data(), delays.data());
+ }
+ else if(channelType == ChanType_LeftRight)
+ {
+ if(sampleType == SampleType_S16)
+ {
+ for(auto &hrir : coeffs)
+ {
+ for(auto &val : al::span<float2>{hrir.data(), irSize})
+ {
+ val[0] = readle<int16_t>(data) / 32768.0f;
+ val[1] = readle<int16_t>(data) / 32768.0f;
+ }
+ }
+ }
+ else if(sampleType == SampleType_S24)
+ {
+ for(auto &hrir : coeffs)
+ {
+ for(auto &val : al::span<float2>{hrir.data(), irSize})
+ {
+ val[0] = static_cast<float>(readle<int,24>(data)) / 8388608.0f;
+ val[1] = static_cast<float>(readle<int,24>(data)) / 8388608.0f;
+ }
+ }
+ }
+ for(auto &val : delays)
+ {
+ val[0] = readle<uint8_t>(data);
+ val[1] = readle<uint8_t>(data);
+ }
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+
+ for(size_t i{0};i < irTotal;++i)
+ {
+ if(delays[i][0] > MaxHrirDelay)
+ {
+ ERR("Invalid delays[%zu][0]: %d (%d)\n", i, delays[i][0], MaxHrirDelay);
+ return nullptr;
+ }
+ if(delays[i][1] > MaxHrirDelay)
+ {
+ ERR("Invalid delays[%zu][1]: %d (%d)\n", i, delays[i][1], MaxHrirDelay);
+ return nullptr;
+ }
+ delays[i][0] <<= HrirDelayFracBits;
+ delays[i][1] <<= HrirDelayFracBits;
+ }
+ }
+
+ if(fdCount > 1)
+ {
+ auto fields_ = al::vector<HrtfStore::Field>(fields.size());
+ auto elevs_ = al::vector<HrtfStore::Elevation>(elevs.size());
+ auto coeffs_ = al::vector<HrirArray>(coeffs.size());
+ auto delays_ = al::vector<ubyte2>(delays.size());
+
+ /* Simple reverse for the per-field elements. */
+ std::reverse_copy(fields.cbegin(), fields.cend(), fields_.begin());
+
+ /* Each field has a group of elevations, which each have an azimuth
+ * count. Reverse the order of the groups, keeping the relative order
+ * of per-group azimuth counts.
+ */
+ auto elevs__end = elevs_.end();
+ auto copy_azs = [&elevs,&elevs__end](const ptrdiff_t ebase, const HrtfStore::Field &field)
+ -> ptrdiff_t
+ {
+ auto elevs_src = elevs.begin()+ebase;
+ elevs__end = std::copy_backward(elevs_src, elevs_src+field.evCount, elevs__end);
+ return ebase + field.evCount;
+ };
+ (void)std::accumulate(fields.cbegin(), fields.cend(), ptrdiff_t{0}, copy_azs);
+ assert(elevs_.begin() == elevs__end);
+
+ /* Reestablish the IR offset for each elevation index, given the new
+ * ordering of elevations.
+ */
+ elevs_[0].irOffset = 0;
+ std::partial_sum(elevs_.cbegin(), elevs_.cend(), elevs_.begin(),
+ [](const HrtfStore::Elevation &last, const HrtfStore::Elevation &cur)
+ -> HrtfStore::Elevation
+ {
+ return HrtfStore::Elevation{cur.azCount,
+ static_cast<ushort>(last.azCount + last.irOffset)};
+ });
+
+ /* Reverse the order of each field's group of IRs. */
+ auto coeffs_end = coeffs_.end();
+ auto delays_end = delays_.end();
+ auto copy_irs = [&elevs,&coeffs,&delays,&coeffs_end,&delays_end](
+ const ptrdiff_t ebase, const HrtfStore::Field &field) -> ptrdiff_t
+ {
+ auto accum_az = [](int count, const HrtfStore::Elevation &elev) noexcept -> int
+ { return count + elev.azCount; };
+ const auto elevs_mid = elevs.cbegin() + ebase;
+ const auto elevs_end = elevs_mid + field.evCount;
+ const int abase{std::accumulate(elevs.cbegin(), elevs_mid, 0, accum_az)};
+ const int num_azs{std::accumulate(elevs_mid, elevs_end, 0, accum_az)};
+
+ coeffs_end = std::copy_backward(coeffs.cbegin() + abase,
+ coeffs.cbegin() + (abase+num_azs), coeffs_end);
+ delays_end = std::copy_backward(delays.cbegin() + abase,
+ delays.cbegin() + (abase+num_azs), delays_end);
+
+ return ebase + field.evCount;
+ };
+ (void)std::accumulate(fields.cbegin(), fields.cend(), ptrdiff_t{0}, copy_irs);
+ assert(coeffs_.begin() == coeffs_end);
+ assert(delays_.begin() == delays_end);
+
+ fields = std::move(fields_);
+ elevs = std::move(elevs_);
+ coeffs = std::move(coeffs_);
+ delays = std::move(delays_);
+ }
+
+ return CreateHrtfStore(rate, irSize, {fields.data(), fields.size()},
+ {elevs.data(), elevs.size()}, coeffs.data(), delays.data(), filename);
+}
+
+std::unique_ptr<HrtfStore> LoadHrtf03(std::istream &data, const char *filename)
+{
+ constexpr ubyte ChanType_LeftOnly{0};
+ constexpr ubyte ChanType_LeftRight{1};
+
+ uint rate{readle<uint32_t>(data)};
+ ubyte channelType{readle<uint8_t>(data)};
+ uint8_t irSize{readle<uint8_t>(data)};
+ ubyte fdCount{readle<uint8_t>(data)};
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+
+ if(channelType > ChanType_LeftRight)
+ {
+ ERR("Unsupported channel type: %d\n", channelType);
+ return nullptr;
+ }
+
+ if(irSize < MinIrLength || irSize > HrirLength)
+ {
+ ERR("Unsupported HRIR size, irSize=%d (%d to %d)\n", irSize, MinIrLength, HrirLength);
+ return nullptr;
+ }
+ if(fdCount < 1 || fdCount > MaxFdCount)
+ {
+ ERR("Unsupported number of field-depths: fdCount=%d (%d to %d)\n", fdCount, MinFdCount,
+ MaxFdCount);
+ return nullptr;
+ }
+
+ auto fields = al::vector<HrtfStore::Field>(fdCount);
+ auto elevs = al::vector<HrtfStore::Elevation>{};
+ for(size_t f{0};f < fdCount;f++)
+ {
+ const ushort distance{readle<uint16_t>(data)};
+ const ubyte evCount{readle<uint8_t>(data)};
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+
+ if(distance < MinFdDistance || distance > MaxFdDistance)
+ {
+ ERR("Unsupported field distance[%zu]=%d (%d to %d millimeters)\n", f, distance,
+ MinFdDistance, MaxFdDistance);
+ return nullptr;
+ }
+ if(evCount < MinEvCount || evCount > MaxEvCount)
+ {
+ ERR("Unsupported elevation count: evCount[%zu]=%d (%d to %d)\n", f, evCount,
+ MinEvCount, MaxEvCount);
+ return nullptr;
+ }
+
+ fields[f].distance = distance / 1000.0f;
+ fields[f].evCount = evCount;
+ if(f > 0 && fields[f].distance > fields[f-1].distance)
+ {
+ ERR("Field distance[%zu] is not before previous (%f <= %f)\n", f, fields[f].distance,
+ fields[f-1].distance);
+ return nullptr;
+ }
+
+ const size_t ebase{elevs.size()};
+ elevs.resize(ebase + evCount);
+ for(auto &elev : al::span<HrtfStore::Elevation>(elevs.data()+ebase, evCount))
+ elev.azCount = readle<uint8_t>(data);
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+
+ for(size_t e{0};e < evCount;e++)
+ {
+ if(elevs[ebase+e].azCount < MinAzCount || elevs[ebase+e].azCount > MaxAzCount)
+ {
+ ERR("Unsupported azimuth count: azCount[%zu][%zu]=%d (%d to %d)\n", f, e,
+ elevs[ebase+e].azCount, MinAzCount, MaxAzCount);
+ return nullptr;
+ }
+ }
+ }
+
+ elevs[0].irOffset = 0;
+ std::partial_sum(elevs.cbegin(), elevs.cend(), elevs.begin(),
+ [](const HrtfStore::Elevation &last, const HrtfStore::Elevation &cur)
+ -> HrtfStore::Elevation
+ {
+ return HrtfStore::Elevation{cur.azCount,
+ static_cast<ushort>(last.azCount + last.irOffset)};
+ });
+ const auto irTotal = static_cast<ushort>(elevs.back().azCount + elevs.back().irOffset);
+
+ auto coeffs = al::vector<HrirArray>(irTotal, HrirArray{});
+ auto delays = al::vector<ubyte2>(irTotal);
+ if(channelType == ChanType_LeftOnly)
+ {
+ for(auto &hrir : coeffs)
+ {
+ for(auto &val : al::span<float2>{hrir.data(), irSize})
+ val[0] = static_cast<float>(readle<int,24>(data)) / 8388608.0f;
+ }
+ for(auto &val : delays)
+ val[0] = readle<uint8_t>(data);
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+ for(size_t i{0};i < irTotal;++i)
+ {
+ if(delays[i][0] > MaxHrirDelay<<HrirDelayFracBits)
+ {
+ ERR("Invalid delays[%zu][0]: %f (%d)\n", i,
+ delays[i][0] / float{HrirDelayFracOne}, MaxHrirDelay);
+ return nullptr;
+ }
+ }
+
+ /* Mirror the left ear responses to the right ear. */
+ MirrorLeftHrirs({elevs.data(), elevs.size()}, coeffs.data(), delays.data());
+ }
+ else if(channelType == ChanType_LeftRight)
+ {
+ for(auto &hrir : coeffs)
+ {
+ for(auto &val : al::span<float2>{hrir.data(), irSize})
+ {
+ val[0] = static_cast<float>(readle<int,24>(data)) / 8388608.0f;
+ val[1] = static_cast<float>(readle<int,24>(data)) / 8388608.0f;
+ }
+ }
+ for(auto &val : delays)
+ {
+ val[0] = readle<uint8_t>(data);
+ val[1] = readle<uint8_t>(data);
+ }
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+
+ for(size_t i{0};i < irTotal;++i)
+ {
+ if(delays[i][0] > MaxHrirDelay<<HrirDelayFracBits)
+ {
+ ERR("Invalid delays[%zu][0]: %f (%d)\n", i,
+ delays[i][0] / float{HrirDelayFracOne}, MaxHrirDelay);
+ return nullptr;
+ }
+ if(delays[i][1] > MaxHrirDelay<<HrirDelayFracBits)
+ {
+ ERR("Invalid delays[%zu][1]: %f (%d)\n", i,
+ delays[i][1] / float{HrirDelayFracOne}, MaxHrirDelay);
+ return nullptr;
+ }
+ }
+ }
+
+ return CreateHrtfStore(rate, irSize, {fields.data(), fields.size()},
+ {elevs.data(), elevs.size()}, coeffs.data(), delays.data(), filename);
+}
+
+
+bool checkName(const std::string &name)
+{
+ auto match_name = [&name](const HrtfEntry &entry) -> bool { return name == entry.mDispName; };
+ auto &enum_names = EnumeratedHrtfs;
+ return std::find_if(enum_names.cbegin(), enum_names.cend(), match_name) != enum_names.cend();
+}
+
+void AddFileEntry(const std::string &filename)
+{
+ /* Check if this file has already been enumerated. */
+ auto enum_iter = std::find_if(EnumeratedHrtfs.cbegin(), EnumeratedHrtfs.cend(),
+ [&filename](const HrtfEntry &entry) -> bool
+ { return entry.mFilename == filename; });
+ if(enum_iter != EnumeratedHrtfs.cend())
+ {
+ TRACE("Skipping duplicate file entry %s\n", filename.c_str());
+ return;
+ }
+
+ /* TODO: Get a human-readable name from the HRTF data (possibly coming in a
+ * format update). */
+ size_t namepos{filename.find_last_of('/')+1};
+ if(!namepos) namepos = filename.find_last_of('\\')+1;
+
+ size_t extpos{filename.find_last_of('.')};
+ if(extpos <= namepos) extpos = std::string::npos;
+
+ const std::string basename{(extpos == std::string::npos) ?
+ filename.substr(namepos) : filename.substr(namepos, extpos-namepos)};
+ std::string newname{basename};
+ int count{1};
+ while(checkName(newname))
+ {
+ newname = basename;
+ newname += " #";
+ newname += std::to_string(++count);
+ }
+ EnumeratedHrtfs.emplace_back(HrtfEntry{newname, filename});
+ const HrtfEntry &entry = EnumeratedHrtfs.back();
+
+ TRACE("Adding file entry \"%s\"\n", entry.mFilename.c_str());
+}
+
+/* Unfortunate that we have to duplicate AddFileEntry to take a memory buffer
+ * for input instead of opening the given filename.
+ */
+void AddBuiltInEntry(const std::string &dispname, uint residx)
+{
+ const std::string filename{'!'+std::to_string(residx)+'_'+dispname};
+
+ auto enum_iter = std::find_if(EnumeratedHrtfs.cbegin(), EnumeratedHrtfs.cend(),
+ [&filename](const HrtfEntry &entry) -> bool
+ { return entry.mFilename == filename; });
+ if(enum_iter != EnumeratedHrtfs.cend())
+ {
+ TRACE("Skipping duplicate file entry %s\n", filename.c_str());
+ return;
+ }
+
+ /* TODO: Get a human-readable name from the HRTF data (possibly coming in a
+ * format update). */
+
+ std::string newname{dispname};
+ int count{1};
+ while(checkName(newname))
+ {
+ newname = dispname;
+ newname += " #";
+ newname += std::to_string(++count);
+ }
+ EnumeratedHrtfs.emplace_back(HrtfEntry{newname, filename});
+ const HrtfEntry &entry = EnumeratedHrtfs.back();
+
+ TRACE("Adding built-in entry \"%s\"\n", entry.mFilename.c_str());
+}
+
+
+#define IDR_DEFAULT_HRTF_MHR 1
+
+#ifndef ALSOFT_EMBED_HRTF_DATA
+
+al::span<const char> GetResource(int /*name*/)
+{ return {}; }
+
+#else
+
+constexpr unsigned char hrtf_default[]{
+#include "default_hrtf.txt"
+};
+
+al::span<const char> GetResource(int name)
+{
+ if(name == IDR_DEFAULT_HRTF_MHR)
+ return {reinterpret_cast<const char*>(hrtf_default), sizeof(hrtf_default)};
+ return {};
+}
+#endif
+
+} // namespace
+
+
+al::vector<std::string> EnumerateHrtf(al::optional<std::string> pathopt)
+{
+ std::lock_guard<std::mutex> _{EnumeratedHrtfLock};
+ EnumeratedHrtfs.clear();
+
+ bool usedefaults{true};
+ if(pathopt)
+ {
+ const char *pathlist{pathopt->c_str()};
+ while(pathlist && *pathlist)
+ {
+ const char *next, *end;
+
+ while(isspace(*pathlist) || *pathlist == ',')
+ pathlist++;
+ if(*pathlist == '\0')
+ continue;
+
+ next = strchr(pathlist, ',');
+ if(next)
+ end = next++;
+ else
+ {
+ end = pathlist + strlen(pathlist);
+ usedefaults = false;
+ }
+
+ while(end != pathlist && isspace(*(end-1)))
+ --end;
+ if(end != pathlist)
+ {
+ const std::string pname{pathlist, end};
+ for(const auto &fname : SearchDataFiles(".mhr", pname.c_str()))
+ AddFileEntry(fname);
+ }
+
+ pathlist = next;
+ }
+ }
+
+ if(usedefaults)
+ {
+ for(const auto &fname : SearchDataFiles(".mhr", "openal/hrtf"))
+ AddFileEntry(fname);
+
+ if(!GetResource(IDR_DEFAULT_HRTF_MHR).empty())
+ AddBuiltInEntry("Built-In HRTF", IDR_DEFAULT_HRTF_MHR);
+ }
+
+ al::vector<std::string> list;
+ list.reserve(EnumeratedHrtfs.size());
+ for(auto &entry : EnumeratedHrtfs)
+ list.emplace_back(entry.mDispName);
+
+ return list;
+}
+
+HrtfStorePtr GetLoadedHrtf(const std::string &name, const uint devrate)
+{
+ std::lock_guard<std::mutex> _{EnumeratedHrtfLock};
+ auto entry_iter = std::find_if(EnumeratedHrtfs.cbegin(), EnumeratedHrtfs.cend(),
+ [&name](const HrtfEntry &entry) -> bool { return entry.mDispName == name; });
+ if(entry_iter == EnumeratedHrtfs.cend())
+ return nullptr;
+ const std::string &fname = entry_iter->mFilename;
+
+ std::lock_guard<std::mutex> __{LoadedHrtfLock};
+ auto hrtf_lt_fname = [](LoadedHrtf &hrtf, const std::string &filename) -> bool
+ { return hrtf.mFilename < filename; };
+ auto handle = std::lower_bound(LoadedHrtfs.begin(), LoadedHrtfs.end(), fname, hrtf_lt_fname);
+ while(handle != LoadedHrtfs.end() && handle->mFilename == fname)
+ {
+ HrtfStore *hrtf{handle->mEntry.get()};
+ if(hrtf && hrtf->mSampleRate == devrate)
+ {
+ hrtf->add_ref();
+ return HrtfStorePtr{hrtf};
+ }
+ ++handle;
+ }
+
+ std::unique_ptr<std::istream> stream;
+ int residx{};
+ char ch{};
+ if(sscanf(fname.c_str(), "!%d%c", &residx, &ch) == 2 && ch == '_')
+ {
+ TRACE("Loading %s...\n", fname.c_str());
+ al::span<const char> res{GetResource(residx)};
+ if(res.empty())
+ {
+ ERR("Could not get resource %u, %s\n", residx, name.c_str());
+ return nullptr;
+ }
+ stream = std::make_unique<idstream>(res.begin(), res.end());
+ }
+ else
+ {
+ TRACE("Loading %s...\n", fname.c_str());
+ auto fstr = std::make_unique<al::ifstream>(fname.c_str(), std::ios::binary);
+ if(!fstr->is_open())
+ {
+ ERR("Could not open %s\n", fname.c_str());
+ return nullptr;
+ }
+ stream = std::move(fstr);
+ }
+
+ std::unique_ptr<HrtfStore> hrtf;
+ char magic[sizeof(magicMarker03)];
+ stream->read(magic, sizeof(magic));
+ if(stream->gcount() < static_cast<std::streamsize>(sizeof(magicMarker03)))
+ ERR("%s data is too short (%zu bytes)\n", name.c_str(), stream->gcount());
+ else if(memcmp(magic, magicMarker03, sizeof(magicMarker03)) == 0)
+ {
+ TRACE("Detected data set format v3\n");
+ hrtf = LoadHrtf03(*stream, name.c_str());
+ }
+ else if(memcmp(magic, magicMarker02, sizeof(magicMarker02)) == 0)
+ {
+ TRACE("Detected data set format v2\n");
+ hrtf = LoadHrtf02(*stream, name.c_str());
+ }
+ else if(memcmp(magic, magicMarker01, sizeof(magicMarker01)) == 0)
+ {
+ TRACE("Detected data set format v1\n");
+ hrtf = LoadHrtf01(*stream, name.c_str());
+ }
+ else if(memcmp(magic, magicMarker00, sizeof(magicMarker00)) == 0)
+ {
+ TRACE("Detected data set format v0\n");
+ hrtf = LoadHrtf00(*stream, name.c_str());
+ }
+ else
+ ERR("Invalid header in %s: \"%.8s\"\n", name.c_str(), magic);
+ stream.reset();
+
+ if(!hrtf)
+ {
+ ERR("Failed to load %s\n", name.c_str());
+ return nullptr;
+ }
+
+ if(hrtf->mSampleRate != devrate)
+ {
+ TRACE("Resampling HRTF %s (%uhz -> %uhz)\n", name.c_str(), hrtf->mSampleRate, devrate);
+
+ /* Calculate the last elevation's index and get the total IR count. */
+ const size_t lastEv{std::accumulate(hrtf->mFields.begin(), hrtf->mFields.end(), size_t{0},
+ [](const size_t curval, const HrtfStore::Field &field) noexcept -> size_t
+ { return curval + field.evCount; }
+ ) - 1};
+ const size_t irCount{size_t{hrtf->mElev[lastEv].irOffset} + hrtf->mElev[lastEv].azCount};
+
+ /* Resample all the IRs. */
+ std::array<std::array<double,HrirLength>,2> inout;
+ PPhaseResampler rs;
+ rs.init(hrtf->mSampleRate, devrate);
+ for(size_t i{0};i < irCount;++i)
+ {
+ HrirArray &coeffs = const_cast<HrirArray&>(hrtf->mCoeffs[i]);
+ for(size_t j{0};j < 2;++j)
+ {
+ std::transform(coeffs.cbegin(), coeffs.cend(), inout[0].begin(),
+ [j](const float2 &in) noexcept -> double { return in[j]; });
+ rs.process(HrirLength, inout[0].data(), HrirLength, inout[1].data());
+ for(size_t k{0};k < HrirLength;++k)
+ coeffs[k][j] = static_cast<float>(inout[1][k]);
+ }
+ }
+ rs = {};
+
+ /* Scale the delays for the new sample rate. */
+ float max_delay{0.0f};
+ auto new_delays = al::vector<float2>(irCount);
+ const float rate_scale{static_cast<float>(devrate)/static_cast<float>(hrtf->mSampleRate)};
+ for(size_t i{0};i < irCount;++i)
+ {
+ for(size_t j{0};j < 2;++j)
+ {
+ const float new_delay{std::round(hrtf->mDelays[i][j] * rate_scale) /
+ float{HrirDelayFracOne}};
+ max_delay = maxf(max_delay, new_delay);
+ new_delays[i][j] = new_delay;
+ }
+ }
+
+ /* If the new delays exceed the max, scale it down to fit (essentially
+ * shrinking the head radius; not ideal but better than a per-delay
+ * clamp).
+ */
+ float delay_scale{HrirDelayFracOne};
+ if(max_delay > MaxHrirDelay)
+ {
+ WARN("Resampled delay exceeds max (%.2f > %d)\n", max_delay, MaxHrirDelay);
+ delay_scale *= float{MaxHrirDelay} / max_delay;
+ }
+
+ for(size_t i{0};i < irCount;++i)
+ {
+ ubyte2 &delays = const_cast<ubyte2&>(hrtf->mDelays[i]);
+ for(size_t j{0};j < 2;++j)
+ delays[j] = static_cast<ubyte>(float2int(new_delays[i][j]*delay_scale + 0.5f));
+ }
+
+ /* Scale the IR size for the new sample rate and update the stored
+ * sample rate.
+ */
+ const float newIrSize{std::round(static_cast<float>(hrtf->mIrSize) * rate_scale)};
+ hrtf->mIrSize = static_cast<uint8_t>(minf(HrirLength, newIrSize));
+ hrtf->mSampleRate = devrate;
+ }
+
+ TRACE("Loaded HRTF %s for sample rate %uhz, %u-sample filter\n", name.c_str(),
+ hrtf->mSampleRate, hrtf->mIrSize);
+ handle = LoadedHrtfs.emplace(handle, fname, std::move(hrtf));
+
+ return HrtfStorePtr{handle->mEntry.get()};
+}
+
+
+void HrtfStore::add_ref()
+{
+ auto ref = IncrementRef(mRef);
+ TRACE("HrtfStore %p increasing refcount to %u\n", decltype(std::declval<void*>()){this}, ref);
+}
+
+void HrtfStore::dec_ref()
+{
+ auto ref = DecrementRef(mRef);
+ TRACE("HrtfStore %p decreasing refcount to %u\n", decltype(std::declval<void*>()){this}, ref);
+ if(ref == 0)
+ {
+ std::lock_guard<std::mutex> _{LoadedHrtfLock};
+
+ /* Go through and remove all unused HRTFs. */
+ auto remove_unused = [](LoadedHrtf &hrtf) -> bool
+ {
+ HrtfStore *entry{hrtf.mEntry.get()};
+ if(entry && ReadRef(entry->mRef) == 0)
+ {
+ TRACE("Unloading unused HRTF %s\n", hrtf.mFilename.data());
+ hrtf.mEntry = nullptr;
+ return true;
+ }
+ return false;
+ };
+ auto iter = std::remove_if(LoadedHrtfs.begin(), LoadedHrtfs.end(), remove_unused);
+ LoadedHrtfs.erase(iter, LoadedHrtfs.end());
+ }
+}
diff --git a/core/hrtf.h b/core/hrtf.h
new file mode 100644
index 00000000..eb18682a
--- /dev/null
+++ b/core/hrtf.h
@@ -0,0 +1,89 @@
+#ifndef CORE_HRTF_H
+#define CORE_HRTF_H
+
+#include <array>
+#include <cstddef>
+#include <memory>
+#include <string>
+
+#include "almalloc.h"
+#include "aloptional.h"
+#include "alspan.h"
+#include "atomic.h"
+#include "ambidefs.h"
+#include "bufferline.h"
+#include "mixer/hrtfdefs.h"
+#include "intrusive_ptr.h"
+#include "vector.h"
+
+
+struct HrtfStore {
+ RefCount mRef;
+
+ uint mSampleRate : 24;
+ uint mIrSize : 8;
+
+ struct Field {
+ float distance;
+ ubyte evCount;
+ };
+ /* NOTE: Fields are stored *backwards*. field[0] is the farthest field, and
+ * field[fdCount-1] is the nearest.
+ */
+ al::span<const Field> mFields;
+
+ struct Elevation {
+ ushort azCount;
+ ushort irOffset;
+ };
+ Elevation *mElev;
+ const HrirArray *mCoeffs;
+ const ubyte2 *mDelays;
+
+ void getCoeffs(float elevation, float azimuth, float distance, float spread, HrirArray &coeffs,
+ const al::span<uint,2> delays);
+
+ void add_ref();
+ void dec_ref();
+
+ DEF_PLACE_NEWDEL()
+};
+using HrtfStorePtr = al::intrusive_ptr<HrtfStore>;
+
+
+struct EvRadians { float value; };
+struct AzRadians { float value; };
+struct AngularPoint {
+ EvRadians Elev;
+ AzRadians Azim;
+};
+
+
+struct DirectHrtfState {
+ std::array<float,BufferLineSize> mTemp;
+
+ /* HRTF filter state for dry buffer content */
+ uint mIrSize{0};
+ al::FlexArray<HrtfChannelState> mChannels;
+
+ DirectHrtfState(size_t numchans) : mChannels{numchans} { }
+ /**
+ * Produces HRTF filter coefficients for decoding B-Format, given a set of
+ * virtual speaker positions, a matching decoding matrix, and per-order
+ * high-frequency gains for the decoder. The calculated impulse responses
+ * are ordered and scaled according to the matrix input.
+ */
+ void build(const HrtfStore *Hrtf, const uint irSize, const bool perHrirMin,
+ const al::span<const AngularPoint> AmbiPoints, const float (*AmbiMatrix)[MaxAmbiChannels],
+ const float XOverFreq, const al::span<const float,MaxAmbiOrder+1> AmbiOrderHFGain);
+
+ static std::unique_ptr<DirectHrtfState> Create(size_t num_chans);
+
+ DEF_FAM_NEWDEL(DirectHrtfState, mChannels)
+};
+
+
+al::vector<std::string> EnumerateHrtf(al::optional<std::string> pathopt);
+HrtfStorePtr GetLoadedHrtf(const std::string &name, const uint devrate);
+
+#endif /* CORE_HRTF_H */
diff --git a/core/logging.cpp b/core/logging.cpp
new file mode 100644
index 00000000..34a95e5a
--- /dev/null
+++ b/core/logging.cpp
@@ -0,0 +1,89 @@
+
+#include "config.h"
+
+#include "logging.h"
+
+#include <cstdarg>
+#include <cstdio>
+#include <string>
+
+#include "alspan.h"
+#include "strutils.h"
+#include "vector.h"
+
+
+#if defined(_WIN32)
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#elif defined(__ANDROID__)
+#include <android/log.h>
+#endif
+
+void al_print(LogLevel level, FILE *logfile, const char *fmt, ...)
+{
+ /* Kind of ugly since string literals are const char arrays with a size
+ * that includes the null terminator, which we want to exclude from the
+ * span.
+ */
+ auto prefix = al::as_span("[ALSOFT] (--) ").first<14>();
+ switch(level)
+ {
+ case LogLevel::Disable: break;
+ case LogLevel::Error: prefix = al::as_span("[ALSOFT] (EE) ").first<14>(); break;
+ case LogLevel::Warning: prefix = al::as_span("[ALSOFT] (WW) ").first<14>(); break;
+ case LogLevel::Trace: prefix = al::as_span("[ALSOFT] (II) ").first<14>(); break;
+ }
+
+ al::vector<char> dynmsg;
+ std::array<char,256> stcmsg{};
+
+ char *str{stcmsg.data()};
+ auto prefend1 = std::copy_n(prefix.begin(), prefix.size(), stcmsg.begin());
+ al::span<char> msg{prefend1, stcmsg.end()};
+
+ std::va_list args, args2;
+ va_start(args, fmt);
+ va_copy(args2, args);
+ const int msglen{std::vsnprintf(msg.data(), msg.size(), fmt, args)};
+ if(msglen >= 0 && static_cast<size_t>(msglen) >= msg.size()) UNLIKELY
+ {
+ dynmsg.resize(static_cast<size_t>(msglen)+prefix.size() + 1u);
+
+ str = dynmsg.data();
+ auto prefend2 = std::copy_n(prefix.begin(), prefix.size(), dynmsg.begin());
+ msg = {prefend2, dynmsg.end()};
+
+ std::vsnprintf(msg.data(), msg.size(), fmt, args2);
+ }
+ va_end(args2);
+ va_end(args);
+
+ if(gLogLevel >= level)
+ {
+ fputs(str, logfile);
+ fflush(logfile);
+ }
+#if defined(_WIN32) && !defined(NDEBUG)
+ /* OutputDebugStringW has no 'level' property to distinguish between
+ * informational, warning, or error debug messages. So only print them for
+ * non-Release builds.
+ */
+ std::wstring wstr{utf8_to_wstr(str)};
+ OutputDebugStringW(wstr.c_str());
+#elif defined(__ANDROID__)
+ auto android_severity = [](LogLevel l) noexcept
+ {
+ switch(l)
+ {
+ case LogLevel::Trace: return ANDROID_LOG_DEBUG;
+ case LogLevel::Warning: return ANDROID_LOG_WARN;
+ case LogLevel::Error: return ANDROID_LOG_ERROR;
+ /* Should not happen. */
+ case LogLevel::Disable:
+ break;
+ }
+ return ANDROID_LOG_ERROR;
+ };
+ __android_log_print(android_severity(level), "openal", "%s", str);
+#endif
+}
diff --git a/core/logging.h b/core/logging.h
new file mode 100644
index 00000000..f4b6ab56
--- /dev/null
+++ b/core/logging.h
@@ -0,0 +1,51 @@
+#ifndef CORE_LOGGING_H
+#define CORE_LOGGING_H
+
+#include <stdio.h>
+
+#include "opthelpers.h"
+
+
+enum class LogLevel {
+ Disable,
+ Error,
+ Warning,
+ Trace
+};
+extern LogLevel gLogLevel;
+
+extern FILE *gLogFile;
+
+#ifdef __USE_MINGW_ANSI_STDIO
+[[gnu::format(gnu_printf,3,4)]]
+#else
+[[gnu::format(printf,3,4)]]
+#endif
+void al_print(LogLevel level, FILE *logfile, const char *fmt, ...);
+
+#if (!defined(_WIN32) || defined(NDEBUG)) && !defined(__ANDROID__)
+#define TRACE(...) do { \
+ if(gLogLevel >= LogLevel::Trace) UNLIKELY \
+ al_print(LogLevel::Trace, gLogFile, __VA_ARGS__); \
+} while(0)
+
+#define WARN(...) do { \
+ if(gLogLevel >= LogLevel::Warning) UNLIKELY \
+ al_print(LogLevel::Warning, gLogFile, __VA_ARGS__); \
+} while(0)
+
+#define ERR(...) do { \
+ if(gLogLevel >= LogLevel::Error) UNLIKELY \
+ al_print(LogLevel::Error, gLogFile, __VA_ARGS__); \
+} while(0)
+
+#else
+
+#define TRACE(...) al_print(LogLevel::Trace, gLogFile, __VA_ARGS__)
+
+#define WARN(...) al_print(LogLevel::Warning, gLogFile, __VA_ARGS__)
+
+#define ERR(...) al_print(LogLevel::Error, gLogFile, __VA_ARGS__)
+#endif
+
+#endif /* CORE_LOGGING_H */
diff --git a/core/mastering.cpp b/core/mastering.cpp
new file mode 100644
index 00000000..97a4008e
--- /dev/null
+++ b/core/mastering.cpp
@@ -0,0 +1,439 @@
+
+#include "config.h"
+
+#include "mastering.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstddef>
+#include <functional>
+#include <iterator>
+#include <limits>
+#include <new>
+
+#include "almalloc.h"
+#include "alnumeric.h"
+#include "alspan.h"
+#include "opthelpers.h"
+
+
+/* These structures assume BufferLineSize is a power of 2. */
+static_assert((BufferLineSize & (BufferLineSize-1)) == 0, "BufferLineSize is not a power of 2");
+
+struct SlidingHold {
+ alignas(16) float mValues[BufferLineSize];
+ uint mExpiries[BufferLineSize];
+ uint mLowerIndex;
+ uint mUpperIndex;
+ uint mLength;
+};
+
+
+namespace {
+
+using namespace std::placeholders;
+
+/* This sliding hold follows the input level with an instant attack and a
+ * fixed duration hold before an instant release to the next highest level.
+ * It is a sliding window maximum (descending maxima) implementation based on
+ * Richard Harter's ascending minima algorithm available at:
+ *
+ * http://www.richardhartersworld.com/cri/2001/slidingmin.html
+ */
+float UpdateSlidingHold(SlidingHold *Hold, const uint i, const float in)
+{
+ static constexpr uint mask{BufferLineSize - 1};
+ const uint length{Hold->mLength};
+ float (&values)[BufferLineSize] = Hold->mValues;
+ uint (&expiries)[BufferLineSize] = Hold->mExpiries;
+ uint lowerIndex{Hold->mLowerIndex};
+ uint upperIndex{Hold->mUpperIndex};
+
+ if(i >= expiries[upperIndex])
+ upperIndex = (upperIndex + 1) & mask;
+
+ if(in >= values[upperIndex])
+ {
+ values[upperIndex] = in;
+ expiries[upperIndex] = i + length;
+ lowerIndex = upperIndex;
+ }
+ else
+ {
+ do {
+ do {
+ if(!(in >= values[lowerIndex]))
+ goto found_place;
+ } while(lowerIndex--);
+ lowerIndex = mask;
+ } while(true);
+ found_place:
+
+ lowerIndex = (lowerIndex + 1) & mask;
+ values[lowerIndex] = in;
+ expiries[lowerIndex] = i + length;
+ }
+
+ Hold->mLowerIndex = lowerIndex;
+ Hold->mUpperIndex = upperIndex;
+
+ return values[upperIndex];
+}
+
+void ShiftSlidingHold(SlidingHold *Hold, const uint n)
+{
+ auto exp_begin = std::begin(Hold->mExpiries) + Hold->mUpperIndex;
+ auto exp_last = std::begin(Hold->mExpiries) + Hold->mLowerIndex;
+ if(exp_last-exp_begin < 0)
+ {
+ std::transform(exp_begin, std::end(Hold->mExpiries), exp_begin,
+ [n](uint e){ return e - n; });
+ exp_begin = std::begin(Hold->mExpiries);
+ }
+ std::transform(exp_begin, exp_last+1, exp_begin, [n](uint e){ return e - n; });
+}
+
+
+/* Multichannel compression is linked via the absolute maximum of all
+ * channels.
+ */
+void LinkChannels(Compressor *Comp, const uint SamplesToDo, const FloatBufferLine *OutBuffer)
+{
+ const size_t numChans{Comp->mNumChans};
+
+ ASSUME(SamplesToDo > 0);
+ ASSUME(numChans > 0);
+
+ auto side_begin = std::begin(Comp->mSideChain) + Comp->mLookAhead;
+ std::fill(side_begin, side_begin+SamplesToDo, 0.0f);
+
+ auto fill_max = [SamplesToDo,side_begin](const FloatBufferLine &input) -> void
+ {
+ const float *RESTRICT buffer{al::assume_aligned<16>(input.data())};
+ auto max_abs = std::bind(maxf, _1, std::bind(static_cast<float(&)(float)>(std::fabs), _2));
+ std::transform(side_begin, side_begin+SamplesToDo, buffer, side_begin, max_abs);
+ };
+ std::for_each(OutBuffer, OutBuffer+numChans, fill_max);
+}
+
+/* This calculates the squared crest factor of the control signal for the
+ * basic automation of the attack/release times. As suggested by the paper,
+ * it uses an instantaneous squared peak detector and a squared RMS detector
+ * both with 200ms release times.
+ */
+void CrestDetector(Compressor *Comp, const uint SamplesToDo)
+{
+ const float a_crest{Comp->mCrestCoeff};
+ float y2_peak{Comp->mLastPeakSq};
+ float y2_rms{Comp->mLastRmsSq};
+
+ ASSUME(SamplesToDo > 0);
+
+ auto calc_crest = [&y2_rms,&y2_peak,a_crest](const float x_abs) noexcept -> float
+ {
+ const float x2{clampf(x_abs * x_abs, 0.000001f, 1000000.0f)};
+
+ y2_peak = maxf(x2, lerpf(x2, y2_peak, a_crest));
+ y2_rms = lerpf(x2, y2_rms, a_crest);
+ return y2_peak / y2_rms;
+ };
+ auto side_begin = std::begin(Comp->mSideChain) + Comp->mLookAhead;
+ std::transform(side_begin, side_begin+SamplesToDo, std::begin(Comp->mCrestFactor), calc_crest);
+
+ Comp->mLastPeakSq = y2_peak;
+ Comp->mLastRmsSq = y2_rms;
+}
+
+/* The side-chain starts with a simple peak detector (based on the absolute
+ * value of the incoming signal) and performs most of its operations in the
+ * log domain.
+ */
+void PeakDetector(Compressor *Comp, const uint SamplesToDo)
+{
+ ASSUME(SamplesToDo > 0);
+
+ /* Clamp the minimum amplitude to near-zero and convert to logarithm. */
+ auto side_begin = std::begin(Comp->mSideChain) + Comp->mLookAhead;
+ std::transform(side_begin, side_begin+SamplesToDo, side_begin,
+ [](float s) { return std::log(maxf(0.000001f, s)); });
+}
+
+/* An optional hold can be used to extend the peak detector so it can more
+ * solidly detect fast transients. This is best used when operating as a
+ * limiter.
+ */
+void PeakHoldDetector(Compressor *Comp, const uint SamplesToDo)
+{
+ ASSUME(SamplesToDo > 0);
+
+ SlidingHold *hold{Comp->mHold};
+ uint i{0};
+ auto detect_peak = [&i,hold](const float x_abs) -> float
+ {
+ const float x_G{std::log(maxf(0.000001f, x_abs))};
+ return UpdateSlidingHold(hold, i++, x_G);
+ };
+ auto side_begin = std::begin(Comp->mSideChain) + Comp->mLookAhead;
+ std::transform(side_begin, side_begin+SamplesToDo, side_begin, detect_peak);
+
+ ShiftSlidingHold(hold, SamplesToDo);
+}
+
+/* This is the heart of the feed-forward compressor. It operates in the log
+ * domain (to better match human hearing) and can apply some basic automation
+ * to knee width, attack/release times, make-up/post gain, and clipping
+ * reduction.
+ */
+void GainCompressor(Compressor *Comp, const uint SamplesToDo)
+{
+ const bool autoKnee{Comp->mAuto.Knee};
+ const bool autoAttack{Comp->mAuto.Attack};
+ const bool autoRelease{Comp->mAuto.Release};
+ const bool autoPostGain{Comp->mAuto.PostGain};
+ const bool autoDeclip{Comp->mAuto.Declip};
+ const uint lookAhead{Comp->mLookAhead};
+ const float threshold{Comp->mThreshold};
+ const float slope{Comp->mSlope};
+ const float attack{Comp->mAttack};
+ const float release{Comp->mRelease};
+ const float c_est{Comp->mGainEstimate};
+ const float a_adp{Comp->mAdaptCoeff};
+ const float *crestFactor{Comp->mCrestFactor};
+ float postGain{Comp->mPostGain};
+ float knee{Comp->mKnee};
+ float t_att{attack};
+ float t_rel{release - attack};
+ float a_att{std::exp(-1.0f / t_att)};
+ float a_rel{std::exp(-1.0f / t_rel)};
+ float y_1{Comp->mLastRelease};
+ float y_L{Comp->mLastAttack};
+ float c_dev{Comp->mLastGainDev};
+
+ ASSUME(SamplesToDo > 0);
+
+ for(float &sideChain : al::span<float>{Comp->mSideChain, SamplesToDo})
+ {
+ if(autoKnee)
+ knee = maxf(0.0f, 2.5f * (c_dev + c_est));
+ const float knee_h{0.5f * knee};
+
+ /* This is the gain computer. It applies a static compression curve
+ * to the control signal.
+ */
+ const float x_over{std::addressof(sideChain)[lookAhead] - threshold};
+ const float y_G{
+ (x_over <= -knee_h) ? 0.0f :
+ (std::fabs(x_over) < knee_h) ? (x_over + knee_h) * (x_over + knee_h) / (2.0f * knee) :
+ x_over};
+
+ const float y2_crest{*(crestFactor++)};
+ if(autoAttack)
+ {
+ t_att = 2.0f*attack/y2_crest;
+ a_att = std::exp(-1.0f / t_att);
+ }
+ if(autoRelease)
+ {
+ t_rel = 2.0f*release/y2_crest - t_att;
+ a_rel = std::exp(-1.0f / t_rel);
+ }
+
+ /* Gain smoothing (ballistics) is done via a smooth decoupled peak
+ * detector. The attack time is subtracted from the release time
+ * above to compensate for the chained operating mode.
+ */
+ const float x_L{-slope * y_G};
+ y_1 = maxf(x_L, lerpf(x_L, y_1, a_rel));
+ y_L = lerpf(y_1, y_L, a_att);
+
+ /* Knee width and make-up gain automation make use of a smoothed
+ * measurement of deviation between the control signal and estimate.
+ * The estimate is also used to bias the measurement to hot-start its
+ * average.
+ */
+ c_dev = lerpf(-(y_L+c_est), c_dev, a_adp);
+
+ if(autoPostGain)
+ {
+ /* Clipping reduction is only viable when make-up gain is being
+ * automated. It modifies the deviation to further attenuate the
+ * control signal when clipping is detected. The adaptation time
+ * is sufficiently long enough to suppress further clipping at the
+ * same output level.
+ */
+ if(autoDeclip)
+ c_dev = maxf(c_dev, sideChain - y_L - threshold - c_est);
+
+ postGain = -(c_dev + c_est);
+ }
+
+ sideChain = std::exp(postGain - y_L);
+ }
+
+ Comp->mLastRelease = y_1;
+ Comp->mLastAttack = y_L;
+ Comp->mLastGainDev = c_dev;
+}
+
+/* Combined with the hold time, a look-ahead delay can improve handling of
+ * fast transients by allowing the envelope time to converge prior to
+ * reaching the offending impulse. This is best used when operating as a
+ * limiter.
+ */
+void SignalDelay(Compressor *Comp, const uint SamplesToDo, FloatBufferLine *OutBuffer)
+{
+ const size_t numChans{Comp->mNumChans};
+ const uint lookAhead{Comp->mLookAhead};
+
+ ASSUME(SamplesToDo > 0);
+ ASSUME(numChans > 0);
+ ASSUME(lookAhead > 0);
+
+ for(size_t c{0};c < numChans;c++)
+ {
+ float *inout{al::assume_aligned<16>(OutBuffer[c].data())};
+ float *delaybuf{al::assume_aligned<16>(Comp->mDelay[c].data())};
+
+ auto inout_end = inout + SamplesToDo;
+ if(SamplesToDo >= lookAhead) LIKELY
+ {
+ auto delay_end = std::rotate(inout, inout_end - lookAhead, inout_end);
+ std::swap_ranges(inout, delay_end, delaybuf);
+ }
+ else
+ {
+ auto delay_start = std::swap_ranges(inout, inout_end, delaybuf);
+ std::rotate(delaybuf, delay_start, delaybuf + lookAhead);
+ }
+ }
+}
+
+} // namespace
+
+
+std::unique_ptr<Compressor> Compressor::Create(const size_t NumChans, const float SampleRate,
+ const bool AutoKnee, const bool AutoAttack, const bool AutoRelease, const bool AutoPostGain,
+ const bool AutoDeclip, const float LookAheadTime, const float HoldTime, const float PreGainDb,
+ const float PostGainDb, const float ThresholdDb, const float Ratio, const float KneeDb,
+ const float AttackTime, const float ReleaseTime)
+{
+ const auto lookAhead = static_cast<uint>(
+ clampf(std::round(LookAheadTime*SampleRate), 0.0f, BufferLineSize-1));
+ const auto hold = static_cast<uint>(
+ clampf(std::round(HoldTime*SampleRate), 0.0f, BufferLineSize-1));
+
+ size_t size{sizeof(Compressor)};
+ if(lookAhead > 0)
+ {
+ size += sizeof(*Compressor::mDelay) * NumChans;
+ /* The sliding hold implementation doesn't handle a length of 1. A 1-
+ * sample hold is useless anyway, it would only ever give back what was
+ * just given to it.
+ */
+ if(hold > 1)
+ size += sizeof(*Compressor::mHold);
+ }
+
+ auto Comp = CompressorPtr{al::construct_at(static_cast<Compressor*>(al_calloc(16, size)))};
+ Comp->mNumChans = NumChans;
+ Comp->mAuto.Knee = AutoKnee;
+ Comp->mAuto.Attack = AutoAttack;
+ Comp->mAuto.Release = AutoRelease;
+ Comp->mAuto.PostGain = AutoPostGain;
+ Comp->mAuto.Declip = AutoPostGain && AutoDeclip;
+ Comp->mLookAhead = lookAhead;
+ Comp->mPreGain = std::pow(10.0f, PreGainDb / 20.0f);
+ Comp->mPostGain = PostGainDb * std::log(10.0f) / 20.0f;
+ Comp->mThreshold = ThresholdDb * std::log(10.0f) / 20.0f;
+ Comp->mSlope = 1.0f / maxf(1.0f, Ratio) - 1.0f;
+ Comp->mKnee = maxf(0.0f, KneeDb * std::log(10.0f) / 20.0f);
+ Comp->mAttack = maxf(1.0f, AttackTime * SampleRate);
+ Comp->mRelease = maxf(1.0f, ReleaseTime * SampleRate);
+
+ /* Knee width automation actually treats the compressor as a limiter. By
+ * varying the knee width, it can effectively be seen as applying
+ * compression over a wide range of ratios.
+ */
+ if(AutoKnee)
+ Comp->mSlope = -1.0f;
+
+ if(lookAhead > 0)
+ {
+ if(hold > 1)
+ {
+ Comp->mHold = al::construct_at(reinterpret_cast<SlidingHold*>(Comp.get() + 1));
+ Comp->mHold->mValues[0] = -std::numeric_limits<float>::infinity();
+ Comp->mHold->mExpiries[0] = hold;
+ Comp->mHold->mLength = hold;
+ Comp->mDelay = reinterpret_cast<FloatBufferLine*>(Comp->mHold + 1);
+ }
+ else
+ Comp->mDelay = reinterpret_cast<FloatBufferLine*>(Comp.get() + 1);
+ std::uninitialized_fill_n(Comp->mDelay, NumChans, FloatBufferLine{});
+ }
+
+ Comp->mCrestCoeff = std::exp(-1.0f / (0.200f * SampleRate)); // 200ms
+ Comp->mGainEstimate = Comp->mThreshold * -0.5f * Comp->mSlope;
+ Comp->mAdaptCoeff = std::exp(-1.0f / (2.0f * SampleRate)); // 2s
+
+ return Comp;
+}
+
+Compressor::~Compressor()
+{
+ if(mHold)
+ al::destroy_at(mHold);
+ mHold = nullptr;
+ if(mDelay)
+ al::destroy_n(mDelay, mNumChans);
+ mDelay = nullptr;
+}
+
+
+void Compressor::process(const uint SamplesToDo, FloatBufferLine *OutBuffer)
+{
+ const size_t numChans{mNumChans};
+
+ ASSUME(SamplesToDo > 0);
+ ASSUME(numChans > 0);
+
+ const float preGain{mPreGain};
+ if(preGain != 1.0f)
+ {
+ auto apply_gain = [SamplesToDo,preGain](FloatBufferLine &input) noexcept -> void
+ {
+ float *buffer{al::assume_aligned<16>(input.data())};
+ std::transform(buffer, buffer+SamplesToDo, buffer,
+ [preGain](float s) { return s * preGain; });
+ };
+ std::for_each(OutBuffer, OutBuffer+numChans, apply_gain);
+ }
+
+ LinkChannels(this, SamplesToDo, OutBuffer);
+
+ if(mAuto.Attack || mAuto.Release)
+ CrestDetector(this, SamplesToDo);
+
+ if(mHold)
+ PeakHoldDetector(this, SamplesToDo);
+ else
+ PeakDetector(this, SamplesToDo);
+
+ GainCompressor(this, SamplesToDo);
+
+ if(mDelay)
+ SignalDelay(this, SamplesToDo, OutBuffer);
+
+ const float (&sideChain)[BufferLineSize*2] = mSideChain;
+ auto apply_comp = [SamplesToDo,&sideChain](FloatBufferLine &input) noexcept -> void
+ {
+ float *buffer{al::assume_aligned<16>(input.data())};
+ const float *gains{al::assume_aligned<16>(&sideChain[0])};
+ std::transform(gains, gains+SamplesToDo, buffer, buffer,
+ [](float g, float s) { return g * s; });
+ };
+ std::for_each(OutBuffer, OutBuffer+numChans, apply_comp);
+
+ auto side_begin = std::begin(mSideChain) + SamplesToDo;
+ std::copy(side_begin, side_begin+mLookAhead, std::begin(mSideChain));
+}
diff --git a/core/mastering.h b/core/mastering.h
new file mode 100644
index 00000000..1a36937c
--- /dev/null
+++ b/core/mastering.h
@@ -0,0 +1,105 @@
+#ifndef CORE_MASTERING_H
+#define CORE_MASTERING_H
+
+#include <memory>
+
+#include "almalloc.h"
+#include "bufferline.h"
+
+struct SlidingHold;
+
+using uint = unsigned int;
+
+
+/* General topology and basic automation was based on the following paper:
+ *
+ * D. Giannoulis, M. Massberg and J. D. Reiss,
+ * "Parameter Automation in a Dynamic Range Compressor,"
+ * Journal of the Audio Engineering Society, v61 (10), Oct. 2013
+ *
+ * Available (along with supplemental reading) at:
+ *
+ * http://c4dm.eecs.qmul.ac.uk/audioengineering/compressors/
+ */
+struct Compressor {
+ size_t mNumChans{0u};
+
+ struct {
+ bool Knee : 1;
+ bool Attack : 1;
+ bool Release : 1;
+ bool PostGain : 1;
+ bool Declip : 1;
+ } mAuto{};
+
+ uint mLookAhead{0};
+
+ float mPreGain{0.0f};
+ float mPostGain{0.0f};
+
+ float mThreshold{0.0f};
+ float mSlope{0.0f};
+ float mKnee{0.0f};
+
+ float mAttack{0.0f};
+ float mRelease{0.0f};
+
+ alignas(16) float mSideChain[2*BufferLineSize]{};
+ alignas(16) float mCrestFactor[BufferLineSize]{};
+
+ SlidingHold *mHold{nullptr};
+ FloatBufferLine *mDelay{nullptr};
+
+ float mCrestCoeff{0.0f};
+ float mGainEstimate{0.0f};
+ float mAdaptCoeff{0.0f};
+
+ float mLastPeakSq{0.0f};
+ float mLastRmsSq{0.0f};
+ float mLastRelease{0.0f};
+ float mLastAttack{0.0f};
+ float mLastGainDev{0.0f};
+
+
+ ~Compressor();
+ void process(const uint SamplesToDo, FloatBufferLine *OutBuffer);
+ int getLookAhead() const noexcept { return static_cast<int>(mLookAhead); }
+
+ DEF_PLACE_NEWDEL()
+
+ /**
+ * The compressor is initialized with the following settings:
+ *
+ * \param NumChans Number of channels to process.
+ * \param SampleRate Sample rate to process.
+ * \param AutoKnee Whether to automate the knee width parameter.
+ * \param AutoAttack Whether to automate the attack time parameter.
+ * \param AutoRelease Whether to automate the release time parameter.
+ * \param AutoPostGain Whether to automate the make-up (post) gain
+ * parameter.
+ * \param AutoDeclip Whether to automate clipping reduction. Ignored
+ * when not automating make-up gain.
+ * \param LookAheadTime Look-ahead time (in seconds).
+ * \param HoldTime Peak hold-time (in seconds).
+ * \param PreGainDb Gain applied before detection (in dB).
+ * \param PostGainDb Make-up gain applied after compression (in dB).
+ * \param ThresholdDb Triggering threshold (in dB).
+ * \param Ratio Compression ratio (x:1). Set to INFINIFTY for true
+ * limiting. Ignored when automating knee width.
+ * \param KneeDb Knee width (in dB). Ignored when automating knee
+ * width.
+ * \param AttackTime Attack time (in seconds). Acts as a maximum when
+ * automating attack time.
+ * \param ReleaseTime Release time (in seconds). Acts as a maximum when
+ * automating release time.
+ */
+ static std::unique_ptr<Compressor> Create(const size_t NumChans, const float SampleRate,
+ const bool AutoKnee, const bool AutoAttack, const bool AutoRelease,
+ const bool AutoPostGain, const bool AutoDeclip, const float LookAheadTime,
+ const float HoldTime, const float PreGainDb, const float PostGainDb,
+ const float ThresholdDb, const float Ratio, const float KneeDb, const float AttackTime,
+ const float ReleaseTime);
+};
+using CompressorPtr = std::unique_ptr<Compressor>;
+
+#endif /* CORE_MASTERING_H */
diff --git a/core/mixer.cpp b/core/mixer.cpp
new file mode 100644
index 00000000..066c57bd
--- /dev/null
+++ b/core/mixer.cpp
@@ -0,0 +1,95 @@
+
+#include "config.h"
+
+#include "mixer.h"
+
+#include <cmath>
+
+#include "alnumbers.h"
+#include "devformat.h"
+#include "device.h"
+#include "mixer/defs.h"
+
+struct CTag;
+
+
+MixerOutFunc MixSamplesOut{Mix_<CTag>};
+MixerOneFunc MixSamplesOne{Mix_<CTag>};
+
+
+std::array<float,MaxAmbiChannels> CalcAmbiCoeffs(const float y, const float z, const float x,
+ const float spread)
+{
+ std::array<float,MaxAmbiChannels> coeffs{CalcAmbiCoeffs(y, z, x)};
+
+ if(spread > 0.0f)
+ {
+ /* Implement the spread by using a spherical source that subtends the
+ * angle spread. See:
+ * http://www.ppsloan.org/publications/StupidSH36.pdf - Appendix A3
+ *
+ * When adjusted for N3D normalization instead of SN3D, these
+ * calculations are:
+ *
+ * ZH0 = -sqrt(pi) * (-1+ca);
+ * ZH1 = 0.5*sqrt(pi) * sa*sa;
+ * ZH2 = -0.5*sqrt(pi) * ca*(-1+ca)*(ca+1);
+ * ZH3 = -0.125*sqrt(pi) * (-1+ca)*(ca+1)*(5*ca*ca - 1);
+ * ZH4 = -0.125*sqrt(pi) * ca*(-1+ca)*(ca+1)*(7*ca*ca - 3);
+ * ZH5 = -0.0625*sqrt(pi) * (-1+ca)*(ca+1)*(21*ca*ca*ca*ca - 14*ca*ca + 1);
+ *
+ * The gain of the source is compensated for size, so that the
+ * loudness doesn't depend on the spread. Thus:
+ *
+ * ZH0 = 1.0f;
+ * ZH1 = 0.5f * (ca+1.0f);
+ * ZH2 = 0.5f * (ca+1.0f)*ca;
+ * ZH3 = 0.125f * (ca+1.0f)*(5.0f*ca*ca - 1.0f);
+ * ZH4 = 0.125f * (ca+1.0f)*(7.0f*ca*ca - 3.0f)*ca;
+ * ZH5 = 0.0625f * (ca+1.0f)*(21.0f*ca*ca*ca*ca - 14.0f*ca*ca + 1.0f);
+ */
+ const float ca{std::cos(spread * 0.5f)};
+ /* Increase the source volume by up to +3dB for a full spread. */
+ const float scale{std::sqrt(1.0f + al::numbers::inv_pi_v<float>/2.0f*spread)};
+
+ const float ZH0_norm{scale};
+ const float ZH1_norm{scale * 0.5f * (ca+1.f)};
+ const float ZH2_norm{scale * 0.5f * (ca+1.f)*ca};
+ const float ZH3_norm{scale * 0.125f * (ca+1.f)*(5.f*ca*ca-1.f)};
+
+ /* Zeroth-order */
+ coeffs[0] *= ZH0_norm;
+ /* First-order */
+ coeffs[1] *= ZH1_norm;
+ coeffs[2] *= ZH1_norm;
+ coeffs[3] *= ZH1_norm;
+ /* Second-order */
+ coeffs[4] *= ZH2_norm;
+ coeffs[5] *= ZH2_norm;
+ coeffs[6] *= ZH2_norm;
+ coeffs[7] *= ZH2_norm;
+ coeffs[8] *= ZH2_norm;
+ /* Third-order */
+ coeffs[9] *= ZH3_norm;
+ coeffs[10] *= ZH3_norm;
+ coeffs[11] *= ZH3_norm;
+ coeffs[12] *= ZH3_norm;
+ coeffs[13] *= ZH3_norm;
+ coeffs[14] *= ZH3_norm;
+ coeffs[15] *= ZH3_norm;
+ }
+
+ return coeffs;
+}
+
+void ComputePanGains(const MixParams *mix, const float*RESTRICT coeffs, const float ingain,
+ const al::span<float,MaxAmbiChannels> gains)
+{
+ auto ambimap = mix->AmbiMap.cbegin();
+
+ auto iter = std::transform(ambimap, ambimap+mix->Buffer.size(), gains.begin(),
+ [coeffs,ingain](const BFChannelConfig &chanmap) noexcept -> float
+ { return chanmap.Scale * coeffs[chanmap.Index] * ingain; }
+ );
+ std::fill(iter, gains.end(), 0.0f);
+}
diff --git a/core/mixer.h b/core/mixer.h
new file mode 100644
index 00000000..aa7597bb
--- /dev/null
+++ b/core/mixer.h
@@ -0,0 +1,109 @@
+#ifndef CORE_MIXER_H
+#define CORE_MIXER_H
+
+#include <array>
+#include <cmath>
+#include <stddef.h>
+#include <type_traits>
+
+#include "alspan.h"
+#include "ambidefs.h"
+#include "bufferline.h"
+#include "devformat.h"
+
+struct MixParams;
+
+/* Mixer functions that handle one input and multiple output channels. */
+using MixerOutFunc = void(*)(const al::span<const float> InSamples,
+ const al::span<FloatBufferLine> OutBuffer, float *CurrentGains, const float *TargetGains,
+ const size_t Counter, const size_t OutPos);
+
+extern MixerOutFunc MixSamplesOut;
+inline void MixSamples(const al::span<const float> InSamples,
+ const al::span<FloatBufferLine> OutBuffer, float *CurrentGains, const float *TargetGains,
+ const size_t Counter, const size_t OutPos)
+{ MixSamplesOut(InSamples, OutBuffer, CurrentGains, TargetGains, Counter, OutPos); }
+
+/* Mixer functions that handle one input and one output channel. */
+using MixerOneFunc = void(*)(const al::span<const float> InSamples, float *OutBuffer,
+ float &CurrentGain, const float TargetGain, const size_t Counter);
+
+extern MixerOneFunc MixSamplesOne;
+inline void MixSamples(const al::span<const float> InSamples, float *OutBuffer, float &CurrentGain,
+ const float TargetGain, const size_t Counter)
+{ MixSamplesOne(InSamples, OutBuffer, CurrentGain, TargetGain, Counter); }
+
+
+/**
+ * Calculates ambisonic encoder coefficients using the X, Y, and Z direction
+ * components, which must represent a normalized (unit length) vector, and the
+ * spread is the angular width of the sound (0...tau).
+ *
+ * NOTE: The components use ambisonic coordinates. As a result:
+ *
+ * Ambisonic Y = OpenAL -X
+ * Ambisonic Z = OpenAL Y
+ * Ambisonic X = OpenAL -Z
+ *
+ * The components are ordered such that OpenAL's X, Y, and Z are the first,
+ * second, and third parameters respectively -- simply negate X and Z.
+ */
+std::array<float,MaxAmbiChannels> CalcAmbiCoeffs(const float y, const float z, const float x,
+ const float spread);
+
+/**
+ * CalcDirectionCoeffs
+ *
+ * Calculates ambisonic coefficients based on an OpenAL direction vector. The
+ * vector must be normalized (unit length), and the spread is the angular width
+ * of the sound (0...tau).
+ */
+inline std::array<float,MaxAmbiChannels> CalcDirectionCoeffs(const float (&dir)[3],
+ const float spread)
+{
+ /* Convert from OpenAL coords to Ambisonics. */
+ return CalcAmbiCoeffs(-dir[0], dir[1], -dir[2], spread);
+}
+
+/**
+ * CalcDirectionCoeffs
+ *
+ * Calculates ambisonic coefficients based on an OpenAL direction vector. The
+ * vector must be normalized (unit length).
+ */
+constexpr std::array<float,MaxAmbiChannels> CalcDirectionCoeffs(const float (&dir)[3])
+{
+ /* Convert from OpenAL coords to Ambisonics. */
+ return CalcAmbiCoeffs(-dir[0], dir[1], -dir[2]);
+}
+
+/**
+ * CalcAngleCoeffs
+ *
+ * Calculates ambisonic coefficients based on azimuth and elevation. The
+ * azimuth and elevation parameters are in radians, going right and up
+ * respectively.
+ */
+inline std::array<float,MaxAmbiChannels> CalcAngleCoeffs(const float azimuth,
+ const float elevation, const float spread)
+{
+ const float x{-std::sin(azimuth) * std::cos(elevation)};
+ const float y{ std::sin(elevation)};
+ const float z{ std::cos(azimuth) * std::cos(elevation)};
+
+ return CalcAmbiCoeffs(x, y, z, spread);
+}
+
+
+/**
+ * ComputePanGains
+ *
+ * Computes panning gains using the given channel decoder coefficients and the
+ * pre-calculated direction or angle coefficients. For B-Format sources, the
+ * coeffs are a 'slice' of a transform matrix for the input channel, used to
+ * scale and orient the sound samples.
+ */
+void ComputePanGains(const MixParams *mix, const float*RESTRICT coeffs, const float ingain,
+ const al::span<float,MaxAmbiChannels> gains);
+
+#endif /* CORE_MIXER_H */
diff --git a/core/mixer/defs.h b/core/mixer/defs.h
new file mode 100644
index 00000000..48daca9b
--- /dev/null
+++ b/core/mixer/defs.h
@@ -0,0 +1,109 @@
+#ifndef CORE_MIXER_DEFS_H
+#define CORE_MIXER_DEFS_H
+
+#include <array>
+#include <stdlib.h>
+
+#include "alspan.h"
+#include "core/bufferline.h"
+#include "core/resampler_limits.h"
+
+struct CubicCoefficients;
+struct HrtfChannelState;
+struct HrtfFilter;
+struct MixHrtfFilter;
+
+using uint = unsigned int;
+using float2 = std::array<float,2>;
+
+
+constexpr int MixerFracBits{16};
+constexpr int MixerFracOne{1 << MixerFracBits};
+constexpr int MixerFracMask{MixerFracOne - 1};
+constexpr int MixerFracHalf{MixerFracOne >> 1};
+
+constexpr float GainSilenceThreshold{0.00001f}; /* -100dB */
+
+
+enum class Resampler : uint8_t {
+ Point,
+ Linear,
+ Cubic,
+ FastBSinc12,
+ BSinc12,
+ FastBSinc24,
+ BSinc24,
+
+ Max = BSinc24
+};
+
+/* Interpolator state. Kind of a misnomer since the interpolator itself is
+ * stateless. This just keeps it from having to recompute scale-related
+ * mappings for every sample.
+ */
+struct BsincState {
+ float sf; /* Scale interpolation factor. */
+ uint m; /* Coefficient count. */
+ uint l; /* Left coefficient offset. */
+ /* Filter coefficients, followed by the phase, scale, and scale-phase
+ * delta coefficients. Starting at phase index 0, each subsequent phase
+ * index follows contiguously.
+ */
+ const float *filter;
+};
+
+struct CubicState {
+ /* Filter coefficients, and coefficient deltas. Starting at phase index 0,
+ * each subsequent phase index follows contiguously.
+ */
+ const CubicCoefficients *filter;
+};
+
+union InterpState {
+ CubicState cubic;
+ BsincState bsinc;
+};
+
+using ResamplerFunc = void(*)(const InterpState *state, const float *RESTRICT src, uint frac,
+ const uint increment, const al::span<float> dst);
+
+ResamplerFunc PrepareResampler(Resampler resampler, uint increment, InterpState *state);
+
+
+template<typename TypeTag, typename InstTag>
+void Resample_(const InterpState *state, const float *RESTRICT src, uint frac,
+ const uint increment, const al::span<float> dst);
+
+template<typename InstTag>
+void Mix_(const al::span<const float> InSamples, const al::span<FloatBufferLine> OutBuffer,
+ float *CurrentGains, const float *TargetGains, const size_t Counter, const size_t OutPos);
+template<typename InstTag>
+void Mix_(const al::span<const float> InSamples, float *OutBuffer, float &CurrentGain,
+ const float TargetGain, const size_t Counter);
+
+template<typename InstTag>
+void MixHrtf_(const float *InSamples, float2 *AccumSamples, const uint IrSize,
+ const MixHrtfFilter *hrtfparams, const size_t BufferSize);
+template<typename InstTag>
+void MixHrtfBlend_(const float *InSamples, float2 *AccumSamples, const uint IrSize,
+ const HrtfFilter *oldparams, const MixHrtfFilter *newparams, const size_t BufferSize);
+template<typename InstTag>
+void MixDirectHrtf_(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut,
+ const al::span<const FloatBufferLine> InSamples, float2 *AccumSamples,
+ float *TempBuf, HrtfChannelState *ChanState, const size_t IrSize, const size_t BufferSize);
+
+/* Vectorized resampler helpers */
+template<size_t N>
+inline void InitPosArrays(uint frac, uint increment, uint (&frac_arr)[N], uint (&pos_arr)[N])
+{
+ pos_arr[0] = 0;
+ frac_arr[0] = frac;
+ for(size_t i{1};i < N;i++)
+ {
+ const uint frac_tmp{frac_arr[i-1] + increment};
+ pos_arr[i] = pos_arr[i-1] + (frac_tmp>>MixerFracBits);
+ frac_arr[i] = frac_tmp&MixerFracMask;
+ }
+}
+
+#endif /* CORE_MIXER_DEFS_H */
diff --git a/core/mixer/hrtfbase.h b/core/mixer/hrtfbase.h
new file mode 100644
index 00000000..36f88e49
--- /dev/null
+++ b/core/mixer/hrtfbase.h
@@ -0,0 +1,129 @@
+#ifndef CORE_MIXER_HRTFBASE_H
+#define CORE_MIXER_HRTFBASE_H
+
+#include <algorithm>
+#include <cmath>
+
+#include "almalloc.h"
+#include "hrtfdefs.h"
+#include "opthelpers.h"
+
+
+using uint = unsigned int;
+
+using ApplyCoeffsT = void(&)(float2 *RESTRICT Values, const size_t irSize,
+ const ConstHrirSpan Coeffs, const float left, const float right);
+
+template<ApplyCoeffsT ApplyCoeffs>
+inline void MixHrtfBase(const float *InSamples, float2 *RESTRICT AccumSamples, const size_t IrSize,
+ const MixHrtfFilter *hrtfparams, const size_t BufferSize)
+{
+ ASSUME(BufferSize > 0);
+
+ const ConstHrirSpan Coeffs{hrtfparams->Coeffs};
+ const float gainstep{hrtfparams->GainStep};
+ const float gain{hrtfparams->Gain};
+
+ size_t ldelay{HrtfHistoryLength - hrtfparams->Delay[0]};
+ size_t rdelay{HrtfHistoryLength - hrtfparams->Delay[1]};
+ float stepcount{0.0f};
+ for(size_t i{0u};i < BufferSize;++i)
+ {
+ const float g{gain + gainstep*stepcount};
+ const float left{InSamples[ldelay++] * g};
+ const float right{InSamples[rdelay++] * g};
+ ApplyCoeffs(AccumSamples+i, IrSize, Coeffs, left, right);
+
+ stepcount += 1.0f;
+ }
+}
+
+template<ApplyCoeffsT ApplyCoeffs>
+inline void MixHrtfBlendBase(const float *InSamples, float2 *RESTRICT AccumSamples,
+ const size_t IrSize, const HrtfFilter *oldparams, const MixHrtfFilter *newparams,
+ const size_t BufferSize)
+{
+ ASSUME(BufferSize > 0);
+
+ const ConstHrirSpan OldCoeffs{oldparams->Coeffs};
+ const float oldGainStep{oldparams->Gain / static_cast<float>(BufferSize)};
+ const ConstHrirSpan NewCoeffs{newparams->Coeffs};
+ const float newGainStep{newparams->GainStep};
+
+ if(oldparams->Gain > GainSilenceThreshold) LIKELY
+ {
+ size_t ldelay{HrtfHistoryLength - oldparams->Delay[0]};
+ size_t rdelay{HrtfHistoryLength - oldparams->Delay[1]};
+ auto stepcount = static_cast<float>(BufferSize);
+ for(size_t i{0u};i < BufferSize;++i)
+ {
+ const float g{oldGainStep*stepcount};
+ const float left{InSamples[ldelay++] * g};
+ const float right{InSamples[rdelay++] * g};
+ ApplyCoeffs(AccumSamples+i, IrSize, OldCoeffs, left, right);
+
+ stepcount -= 1.0f;
+ }
+ }
+
+ if(newGainStep*static_cast<float>(BufferSize) > GainSilenceThreshold) LIKELY
+ {
+ size_t ldelay{HrtfHistoryLength+1 - newparams->Delay[0]};
+ size_t rdelay{HrtfHistoryLength+1 - newparams->Delay[1]};
+ float stepcount{1.0f};
+ for(size_t i{1u};i < BufferSize;++i)
+ {
+ const float g{newGainStep*stepcount};
+ const float left{InSamples[ldelay++] * g};
+ const float right{InSamples[rdelay++] * g};
+ ApplyCoeffs(AccumSamples+i, IrSize, NewCoeffs, left, right);
+
+ stepcount += 1.0f;
+ }
+ }
+}
+
+template<ApplyCoeffsT ApplyCoeffs>
+inline void MixDirectHrtfBase(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut,
+ const al::span<const FloatBufferLine> InSamples, float2 *RESTRICT AccumSamples,
+ float *TempBuf, HrtfChannelState *ChanState, const size_t IrSize, const size_t BufferSize)
+{
+ ASSUME(BufferSize > 0);
+
+ for(const FloatBufferLine &input : InSamples)
+ {
+ /* For dual-band processing, the signal needs extra scaling applied to
+ * the high frequency response. The band-splitter applies this scaling
+ * with a consistent phase shift regardless of the scale amount.
+ */
+ ChanState->mSplitter.processHfScale({input.data(), BufferSize}, TempBuf,
+ ChanState->mHfScale);
+
+ /* Now apply the HRIR coefficients to this channel. */
+ const float *RESTRICT tempbuf{al::assume_aligned<16>(TempBuf)};
+ const ConstHrirSpan Coeffs{ChanState->mCoeffs};
+ for(size_t i{0u};i < BufferSize;++i)
+ {
+ const float insample{tempbuf[i]};
+ ApplyCoeffs(AccumSamples+i, IrSize, Coeffs, insample, insample);
+ }
+
+ ++ChanState;
+ }
+
+ /* Add the HRTF signal to the existing "direct" signal. */
+ float *RESTRICT left{al::assume_aligned<16>(LeftOut.data())};
+ float *RESTRICT right{al::assume_aligned<16>(RightOut.data())};
+ for(size_t i{0u};i < BufferSize;++i)
+ left[i] += AccumSamples[i][0];
+ for(size_t i{0u};i < BufferSize;++i)
+ right[i] += AccumSamples[i][1];
+
+ /* Copy the new in-progress accumulation values to the front and clear the
+ * following samples for the next mix.
+ */
+ auto accum_iter = std::copy_n(AccumSamples+BufferSize, HrirLength, AccumSamples);
+ std::fill_n(accum_iter, BufferSize, float2{});
+}
+
+#endif /* CORE_MIXER_HRTFBASE_H */
diff --git a/core/mixer/hrtfdefs.h b/core/mixer/hrtfdefs.h
new file mode 100644
index 00000000..3c903ed8
--- /dev/null
+++ b/core/mixer/hrtfdefs.h
@@ -0,0 +1,53 @@
+#ifndef CORE_MIXER_HRTFDEFS_H
+#define CORE_MIXER_HRTFDEFS_H
+
+#include <array>
+
+#include "alspan.h"
+#include "core/ambidefs.h"
+#include "core/bufferline.h"
+#include "core/filters/splitter.h"
+
+
+using float2 = std::array<float,2>;
+using ubyte = unsigned char;
+using ubyte2 = std::array<ubyte,2>;
+using ushort = unsigned short;
+using uint = unsigned int;
+using uint2 = std::array<uint,2>;
+
+constexpr uint HrtfHistoryBits{6};
+constexpr uint HrtfHistoryLength{1 << HrtfHistoryBits};
+constexpr uint HrtfHistoryMask{HrtfHistoryLength - 1};
+
+constexpr uint HrirBits{7};
+constexpr uint HrirLength{1 << HrirBits};
+constexpr uint HrirMask{HrirLength - 1};
+
+constexpr uint MinIrLength{8};
+
+using HrirArray = std::array<float2,HrirLength>;
+using HrirSpan = al::span<float2,HrirLength>;
+using ConstHrirSpan = al::span<const float2,HrirLength>;
+
+struct MixHrtfFilter {
+ const ConstHrirSpan Coeffs;
+ uint2 Delay;
+ float Gain;
+ float GainStep;
+};
+
+struct HrtfFilter {
+ alignas(16) HrirArray Coeffs;
+ uint2 Delay;
+ float Gain;
+};
+
+
+struct HrtfChannelState {
+ BandSplitter mSplitter;
+ float mHfScale{};
+ alignas(16) HrirArray mCoeffs{};
+};
+
+#endif /* CORE_MIXER_HRTFDEFS_H */
diff --git a/core/mixer/mixer_c.cpp b/core/mixer/mixer_c.cpp
new file mode 100644
index 00000000..28a92ef7
--- /dev/null
+++ b/core/mixer/mixer_c.cpp
@@ -0,0 +1,218 @@
+#include "config.h"
+
+#include <cassert>
+#include <cmath>
+#include <limits>
+
+#include "alnumeric.h"
+#include "core/bsinc_defs.h"
+#include "core/cubic_defs.h"
+#include "defs.h"
+#include "hrtfbase.h"
+
+struct CTag;
+struct PointTag;
+struct LerpTag;
+struct CubicTag;
+struct BSincTag;
+struct FastBSincTag;
+
+
+namespace {
+
+constexpr uint BsincPhaseDiffBits{MixerFracBits - BSincPhaseBits};
+constexpr uint BsincPhaseDiffOne{1 << BsincPhaseDiffBits};
+constexpr uint BsincPhaseDiffMask{BsincPhaseDiffOne - 1u};
+
+constexpr uint CubicPhaseDiffBits{MixerFracBits - CubicPhaseBits};
+constexpr uint CubicPhaseDiffOne{1 << CubicPhaseDiffBits};
+constexpr uint CubicPhaseDiffMask{CubicPhaseDiffOne - 1u};
+
+inline float do_point(const InterpState&, const float *RESTRICT vals, const uint)
+{ return vals[0]; }
+inline float do_lerp(const InterpState&, const float *RESTRICT vals, const uint frac)
+{ return lerpf(vals[0], vals[1], static_cast<float>(frac)*(1.0f/MixerFracOne)); }
+inline float do_cubic(const InterpState &istate, const float *RESTRICT vals, const uint frac)
+{
+ /* Calculate the phase index and factor. */
+ const uint pi{frac >> CubicPhaseDiffBits};
+ const float pf{static_cast<float>(frac&CubicPhaseDiffMask) * (1.0f/CubicPhaseDiffOne)};
+
+ const float *RESTRICT fil{al::assume_aligned<16>(istate.cubic.filter[pi].mCoeffs)};
+ const float *RESTRICT phd{al::assume_aligned<16>(istate.cubic.filter[pi].mDeltas)};
+
+ /* Apply the phase interpolated filter. */
+ return (fil[0] + pf*phd[0])*vals[0] + (fil[1] + pf*phd[1])*vals[1]
+ + (fil[2] + pf*phd[2])*vals[2] + (fil[3] + pf*phd[3])*vals[3];
+}
+inline float do_bsinc(const InterpState &istate, const float *RESTRICT vals, const uint frac)
+{
+ const size_t m{istate.bsinc.m};
+ ASSUME(m > 0);
+
+ /* Calculate the phase index and factor. */
+ const uint pi{frac >> BsincPhaseDiffBits};
+ const float pf{static_cast<float>(frac&BsincPhaseDiffMask) * (1.0f/BsincPhaseDiffOne)};
+
+ const float *RESTRICT fil{istate.bsinc.filter + m*pi*2};
+ const float *RESTRICT phd{fil + m};
+ const float *RESTRICT scd{fil + BSincPhaseCount*2*m};
+ const float *RESTRICT spd{scd + m};
+
+ /* Apply the scale and phase interpolated filter. */
+ float r{0.0f};
+ for(size_t j_f{0};j_f < m;j_f++)
+ r += (fil[j_f] + istate.bsinc.sf*scd[j_f] + pf*(phd[j_f] + istate.bsinc.sf*spd[j_f])) * vals[j_f];
+ return r;
+}
+inline float do_fastbsinc(const InterpState &istate, const float *RESTRICT vals, const uint frac)
+{
+ const size_t m{istate.bsinc.m};
+ ASSUME(m > 0);
+
+ /* Calculate the phase index and factor. */
+ const uint pi{frac >> BsincPhaseDiffBits};
+ const float pf{static_cast<float>(frac&BsincPhaseDiffMask) * (1.0f/BsincPhaseDiffOne)};
+
+ const float *RESTRICT fil{istate.bsinc.filter + m*pi*2};
+ const float *RESTRICT phd{fil + m};
+
+ /* Apply the phase interpolated filter. */
+ float r{0.0f};
+ for(size_t j_f{0};j_f < m;j_f++)
+ r += (fil[j_f] + pf*phd[j_f]) * vals[j_f];
+ return r;
+}
+
+using SamplerT = float(&)(const InterpState&, const float*RESTRICT, const uint);
+template<SamplerT Sampler>
+void DoResample(const InterpState *state, const float *RESTRICT src, uint frac,
+ const uint increment, const al::span<float> dst)
+{
+ const InterpState istate{*state};
+ ASSUME(frac < MixerFracOne);
+ for(float &out : dst)
+ {
+ out = Sampler(istate, src, frac);
+
+ frac += increment;
+ src += frac>>MixerFracBits;
+ frac &= MixerFracMask;
+ }
+}
+
+inline void ApplyCoeffs(float2 *RESTRICT Values, const size_t IrSize, const ConstHrirSpan Coeffs,
+ const float left, const float right)
+{
+ ASSUME(IrSize >= MinIrLength);
+ for(size_t c{0};c < IrSize;++c)
+ {
+ Values[c][0] += Coeffs[c][0] * left;
+ Values[c][1] += Coeffs[c][1] * right;
+ }
+}
+
+force_inline void MixLine(const al::span<const float> InSamples, float *RESTRICT dst,
+ float &CurrentGain, const float TargetGain, const float delta, const size_t min_len,
+ size_t Counter)
+{
+ float gain{CurrentGain};
+ const float step{(TargetGain-gain) * delta};
+
+ size_t pos{0};
+ if(!(std::abs(step) > std::numeric_limits<float>::epsilon()))
+ gain = TargetGain;
+ else
+ {
+ float step_count{0.0f};
+ for(;pos != min_len;++pos)
+ {
+ dst[pos] += InSamples[pos] * (gain + step*step_count);
+ step_count += 1.0f;
+ }
+ if(pos == Counter)
+ gain = TargetGain;
+ else
+ gain += step*step_count;
+ }
+ CurrentGain = gain;
+
+ if(!(std::abs(gain) > GainSilenceThreshold))
+ return;
+ for(;pos != InSamples.size();++pos)
+ dst[pos] += InSamples[pos] * gain;
+}
+
+} // namespace
+
+template<>
+void Resample_<PointTag,CTag>(const InterpState *state, const float *RESTRICT src, uint frac,
+ const uint increment, const al::span<float> dst)
+{ DoResample<do_point>(state, src, frac, increment, dst); }
+
+template<>
+void Resample_<LerpTag,CTag>(const InterpState *state, const float *RESTRICT src, uint frac,
+ const uint increment, const al::span<float> dst)
+{ DoResample<do_lerp>(state, src, frac, increment, dst); }
+
+template<>
+void Resample_<CubicTag,CTag>(const InterpState *state, const float *RESTRICT src, uint frac,
+ const uint increment, const al::span<float> dst)
+{ DoResample<do_cubic>(state, src-1, frac, increment, dst); }
+
+template<>
+void Resample_<BSincTag,CTag>(const InterpState *state, const float *RESTRICT src, uint frac,
+ const uint increment, const al::span<float> dst)
+{ DoResample<do_bsinc>(state, src-state->bsinc.l, frac, increment, dst); }
+
+template<>
+void Resample_<FastBSincTag,CTag>(const InterpState *state, const float *RESTRICT src, uint frac,
+ const uint increment, const al::span<float> dst)
+{ DoResample<do_fastbsinc>(state, src-state->bsinc.l, frac, increment, dst); }
+
+
+template<>
+void MixHrtf_<CTag>(const float *InSamples, float2 *AccumSamples, const uint IrSize,
+ const MixHrtfFilter *hrtfparams, const size_t BufferSize)
+{ MixHrtfBase<ApplyCoeffs>(InSamples, AccumSamples, IrSize, hrtfparams, BufferSize); }
+
+template<>
+void MixHrtfBlend_<CTag>(const float *InSamples, float2 *AccumSamples, const uint IrSize,
+ const HrtfFilter *oldparams, const MixHrtfFilter *newparams, const size_t BufferSize)
+{
+ MixHrtfBlendBase<ApplyCoeffs>(InSamples, AccumSamples, IrSize, oldparams, newparams,
+ BufferSize);
+}
+
+template<>
+void MixDirectHrtf_<CTag>(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut,
+ const al::span<const FloatBufferLine> InSamples, float2 *AccumSamples,
+ float *TempBuf, HrtfChannelState *ChanState, const size_t IrSize, const size_t BufferSize)
+{
+ MixDirectHrtfBase<ApplyCoeffs>(LeftOut, RightOut, InSamples, AccumSamples, TempBuf, ChanState,
+ IrSize, BufferSize);
+}
+
+
+template<>
+void Mix_<CTag>(const al::span<const float> InSamples, const al::span<FloatBufferLine> OutBuffer,
+ float *CurrentGains, const float *TargetGains, const size_t Counter, const size_t OutPos)
+{
+ const float delta{(Counter > 0) ? 1.0f / static_cast<float>(Counter) : 0.0f};
+ const auto min_len = minz(Counter, InSamples.size());
+
+ for(FloatBufferLine &output : OutBuffer)
+ MixLine(InSamples, al::assume_aligned<16>(output.data()+OutPos), *CurrentGains++,
+ *TargetGains++, delta, min_len, Counter);
+}
+
+template<>
+void Mix_<CTag>(const al::span<const float> InSamples, float *OutBuffer, float &CurrentGain,
+ const float TargetGain, const size_t Counter)
+{
+ const float delta{(Counter > 0) ? 1.0f / static_cast<float>(Counter) : 0.0f};
+ const auto min_len = minz(Counter, InSamples.size());
+
+ MixLine(InSamples, al::assume_aligned<16>(OutBuffer), CurrentGain,
+ TargetGain, delta, min_len, Counter);
+}
diff --git a/core/mixer/mixer_neon.cpp b/core/mixer/mixer_neon.cpp
new file mode 100644
index 00000000..ef2936b3
--- /dev/null
+++ b/core/mixer/mixer_neon.cpp
@@ -0,0 +1,362 @@
+#include "config.h"
+
+#include <arm_neon.h>
+
+#include <cmath>
+#include <limits>
+
+#include "alnumeric.h"
+#include "core/bsinc_defs.h"
+#include "core/cubic_defs.h"
+#include "defs.h"
+#include "hrtfbase.h"
+
+struct NEONTag;
+struct LerpTag;
+struct CubicTag;
+struct BSincTag;
+struct FastBSincTag;
+
+
+#if defined(__GNUC__) && !defined(__clang__) && !defined(__ARM_NEON)
+#pragma GCC target("fpu=neon")
+#endif
+
+namespace {
+
+constexpr uint BSincPhaseDiffBits{MixerFracBits - BSincPhaseBits};
+constexpr uint BSincPhaseDiffOne{1 << BSincPhaseDiffBits};
+constexpr uint BSincPhaseDiffMask{BSincPhaseDiffOne - 1u};
+
+constexpr uint CubicPhaseDiffBits{MixerFracBits - CubicPhaseBits};
+constexpr uint CubicPhaseDiffOne{1 << CubicPhaseDiffBits};
+constexpr uint CubicPhaseDiffMask{CubicPhaseDiffOne - 1u};
+
+inline float32x4_t set_f4(float l0, float l1, float l2, float l3)
+{
+ float32x4_t ret{vmovq_n_f32(l0)};
+ ret = vsetq_lane_f32(l1, ret, 1);
+ ret = vsetq_lane_f32(l2, ret, 2);
+ ret = vsetq_lane_f32(l3, ret, 3);
+ return ret;
+}
+
+inline void ApplyCoeffs(float2 *RESTRICT Values, const size_t IrSize, const ConstHrirSpan Coeffs,
+ const float left, const float right)
+{
+ float32x4_t leftright4;
+ {
+ float32x2_t leftright2{vmov_n_f32(left)};
+ leftright2 = vset_lane_f32(right, leftright2, 1);
+ leftright4 = vcombine_f32(leftright2, leftright2);
+ }
+
+ ASSUME(IrSize >= MinIrLength);
+ for(size_t c{0};c < IrSize;c += 2)
+ {
+ float32x4_t vals = vld1q_f32(&Values[c][0]);
+ float32x4_t coefs = vld1q_f32(&Coeffs[c][0]);
+
+ vals = vmlaq_f32(vals, coefs, leftright4);
+
+ vst1q_f32(&Values[c][0], vals);
+ }
+}
+
+force_inline void MixLine(const al::span<const float> InSamples, float *RESTRICT dst,
+ float &CurrentGain, const float TargetGain, const float delta, const size_t min_len,
+ const size_t aligned_len, size_t Counter)
+{
+ float gain{CurrentGain};
+ const float step{(TargetGain-gain) * delta};
+
+ size_t pos{0};
+ if(!(std::abs(step) > std::numeric_limits<float>::epsilon()))
+ gain = TargetGain;
+ else
+ {
+ float step_count{0.0f};
+ /* Mix with applying gain steps in aligned multiples of 4. */
+ if(size_t todo{min_len >> 2})
+ {
+ const float32x4_t four4{vdupq_n_f32(4.0f)};
+ const float32x4_t step4{vdupq_n_f32(step)};
+ const float32x4_t gain4{vdupq_n_f32(gain)};
+ float32x4_t step_count4{vdupq_n_f32(0.0f)};
+ step_count4 = vsetq_lane_f32(1.0f, step_count4, 1);
+ step_count4 = vsetq_lane_f32(2.0f, step_count4, 2);
+ step_count4 = vsetq_lane_f32(3.0f, step_count4, 3);
+
+ do {
+ const float32x4_t val4 = vld1q_f32(&InSamples[pos]);
+ float32x4_t dry4 = vld1q_f32(&dst[pos]);
+ dry4 = vmlaq_f32(dry4, val4, vmlaq_f32(gain4, step4, step_count4));
+ step_count4 = vaddq_f32(step_count4, four4);
+ vst1q_f32(&dst[pos], dry4);
+ pos += 4;
+ } while(--todo);
+ /* NOTE: step_count4 now represents the next four counts after the
+ * last four mixed samples, so the lowest element represents the
+ * next step count to apply.
+ */
+ step_count = vgetq_lane_f32(step_count4, 0);
+ }
+ /* Mix with applying left over gain steps that aren't aligned multiples of 4. */
+ for(size_t leftover{min_len&3};leftover;++pos,--leftover)
+ {
+ dst[pos] += InSamples[pos] * (gain + step*step_count);
+ step_count += 1.0f;
+ }
+ if(pos == Counter)
+ gain = TargetGain;
+ else
+ gain += step*step_count;
+
+ /* Mix until pos is aligned with 4 or the mix is done. */
+ for(size_t leftover{aligned_len&3};leftover;++pos,--leftover)
+ dst[pos] += InSamples[pos] * gain;
+ }
+ CurrentGain = gain;
+
+ if(!(std::abs(gain) > GainSilenceThreshold))
+ return;
+ if(size_t todo{(InSamples.size()-pos) >> 2})
+ {
+ const float32x4_t gain4 = vdupq_n_f32(gain);
+ do {
+ const float32x4_t val4 = vld1q_f32(&InSamples[pos]);
+ float32x4_t dry4 = vld1q_f32(&dst[pos]);
+ dry4 = vmlaq_f32(dry4, val4, gain4);
+ vst1q_f32(&dst[pos], dry4);
+ pos += 4;
+ } while(--todo);
+ }
+ for(size_t leftover{(InSamples.size()-pos)&3};leftover;++pos,--leftover)
+ dst[pos] += InSamples[pos] * gain;
+}
+
+} // namespace
+
+template<>
+void Resample_<LerpTag,NEONTag>(const InterpState*, const float *RESTRICT src, uint frac,
+ const uint increment, const al::span<float> dst)
+{
+ ASSUME(frac < MixerFracOne);
+
+ const int32x4_t increment4 = vdupq_n_s32(static_cast<int>(increment*4));
+ const float32x4_t fracOne4 = vdupq_n_f32(1.0f/MixerFracOne);
+ const int32x4_t fracMask4 = vdupq_n_s32(MixerFracMask);
+ alignas(16) uint pos_[4], frac_[4];
+ int32x4_t pos4, frac4;
+
+ InitPosArrays(frac, increment, frac_, pos_);
+ frac4 = vld1q_s32(reinterpret_cast<int*>(frac_));
+ pos4 = vld1q_s32(reinterpret_cast<int*>(pos_));
+
+ auto dst_iter = dst.begin();
+ for(size_t todo{dst.size()>>2};todo;--todo)
+ {
+ const int pos0{vgetq_lane_s32(pos4, 0)};
+ const int pos1{vgetq_lane_s32(pos4, 1)};
+ const int pos2{vgetq_lane_s32(pos4, 2)};
+ const int pos3{vgetq_lane_s32(pos4, 3)};
+ const float32x4_t val1{set_f4(src[pos0], src[pos1], src[pos2], src[pos3])};
+ const float32x4_t val2{set_f4(src[pos0+1], src[pos1+1], src[pos2+1], src[pos3+1])};
+
+ /* val1 + (val2-val1)*mu */
+ const float32x4_t r0{vsubq_f32(val2, val1)};
+ const float32x4_t mu{vmulq_f32(vcvtq_f32_s32(frac4), fracOne4)};
+ const float32x4_t out{vmlaq_f32(val1, mu, r0)};
+
+ vst1q_f32(dst_iter, out);
+ dst_iter += 4;
+
+ frac4 = vaddq_s32(frac4, increment4);
+ pos4 = vaddq_s32(pos4, vshrq_n_s32(frac4, MixerFracBits));
+ frac4 = vandq_s32(frac4, fracMask4);
+ }
+
+ if(size_t todo{dst.size()&3})
+ {
+ src += static_cast<uint>(vgetq_lane_s32(pos4, 0));
+ frac = static_cast<uint>(vgetq_lane_s32(frac4, 0));
+
+ do {
+ *(dst_iter++) = lerpf(src[0], src[1], static_cast<float>(frac) * (1.0f/MixerFracOne));
+
+ frac += increment;
+ src += frac>>MixerFracBits;
+ frac &= MixerFracMask;
+ } while(--todo);
+ }
+}
+
+template<>
+void Resample_<CubicTag,NEONTag>(const InterpState *state, const float *RESTRICT src, uint frac,
+ const uint increment, const al::span<float> dst)
+{
+ ASSUME(frac < MixerFracOne);
+
+ const CubicCoefficients *RESTRICT filter = al::assume_aligned<16>(state->cubic.filter);
+
+ src -= 1;
+ for(float &out_sample : dst)
+ {
+ const uint pi{frac >> CubicPhaseDiffBits};
+ const float pf{static_cast<float>(frac&CubicPhaseDiffMask) * (1.0f/CubicPhaseDiffOne)};
+ const float32x4_t pf4{vdupq_n_f32(pf)};
+
+ /* Apply the phase interpolated filter. */
+
+ /* f = fil + pf*phd */
+ const float32x4_t f4 = vmlaq_f32(vld1q_f32(filter[pi].mCoeffs), pf4,
+ vld1q_f32(filter[pi].mDeltas));
+ /* r = f*src */
+ float32x4_t r4{vmulq_f32(f4, vld1q_f32(src))};
+
+ r4 = vaddq_f32(r4, vrev64q_f32(r4));
+ out_sample = vget_lane_f32(vadd_f32(vget_low_f32(r4), vget_high_f32(r4)), 0);
+
+ frac += increment;
+ src += frac>>MixerFracBits;
+ frac &= MixerFracMask;
+ }
+}
+
+template<>
+void Resample_<BSincTag,NEONTag>(const InterpState *state, const float *RESTRICT src, uint frac,
+ const uint increment, const al::span<float> dst)
+{
+ const float *const filter{state->bsinc.filter};
+ const float32x4_t sf4{vdupq_n_f32(state->bsinc.sf)};
+ const size_t m{state->bsinc.m};
+ ASSUME(m > 0);
+ ASSUME(frac < MixerFracOne);
+
+ src -= state->bsinc.l;
+ for(float &out_sample : dst)
+ {
+ // Calculate the phase index and factor.
+ const uint pi{frac >> BSincPhaseDiffBits};
+ const float pf{static_cast<float>(frac&BSincPhaseDiffMask) * (1.0f/BSincPhaseDiffOne)};
+
+ // Apply the scale and phase interpolated filter.
+ float32x4_t r4{vdupq_n_f32(0.0f)};
+ {
+ const float32x4_t pf4{vdupq_n_f32(pf)};
+ const float *RESTRICT fil{filter + m*pi*2};
+ const float *RESTRICT phd{fil + m};
+ const float *RESTRICT scd{fil + BSincPhaseCount*2*m};
+ const float *RESTRICT spd{scd + m};
+ size_t td{m >> 2};
+ size_t j{0u};
+
+ do {
+ /* f = ((fil + sf*scd) + pf*(phd + sf*spd)) */
+ const float32x4_t f4 = vmlaq_f32(
+ vmlaq_f32(vld1q_f32(&fil[j]), sf4, vld1q_f32(&scd[j])),
+ pf4, vmlaq_f32(vld1q_f32(&phd[j]), sf4, vld1q_f32(&spd[j])));
+ /* r += f*src */
+ r4 = vmlaq_f32(r4, f4, vld1q_f32(&src[j]));
+ j += 4;
+ } while(--td);
+ }
+ r4 = vaddq_f32(r4, vrev64q_f32(r4));
+ out_sample = vget_lane_f32(vadd_f32(vget_low_f32(r4), vget_high_f32(r4)), 0);
+
+ frac += increment;
+ src += frac>>MixerFracBits;
+ frac &= MixerFracMask;
+ }
+}
+
+template<>
+void Resample_<FastBSincTag,NEONTag>(const InterpState *state, const float *RESTRICT src, uint frac,
+ const uint increment, const al::span<float> dst)
+{
+ const float *const filter{state->bsinc.filter};
+ const size_t m{state->bsinc.m};
+ ASSUME(m > 0);
+ ASSUME(frac < MixerFracOne);
+
+ src -= state->bsinc.l;
+ for(float &out_sample : dst)
+ {
+ // Calculate the phase index and factor.
+ const uint pi{frac >> BSincPhaseDiffBits};
+ const float pf{static_cast<float>(frac&BSincPhaseDiffMask) * (1.0f/BSincPhaseDiffOne)};
+
+ // Apply the phase interpolated filter.
+ float32x4_t r4{vdupq_n_f32(0.0f)};
+ {
+ const float32x4_t pf4{vdupq_n_f32(pf)};
+ const float *RESTRICT fil{filter + m*pi*2};
+ const float *RESTRICT phd{fil + m};
+ size_t td{m >> 2};
+ size_t j{0u};
+
+ do {
+ /* f = fil + pf*phd */
+ const float32x4_t f4 = vmlaq_f32(vld1q_f32(&fil[j]), pf4, vld1q_f32(&phd[j]));
+ /* r += f*src */
+ r4 = vmlaq_f32(r4, f4, vld1q_f32(&src[j]));
+ j += 4;
+ } while(--td);
+ }
+ r4 = vaddq_f32(r4, vrev64q_f32(r4));
+ out_sample = vget_lane_f32(vadd_f32(vget_low_f32(r4), vget_high_f32(r4)), 0);
+
+ frac += increment;
+ src += frac>>MixerFracBits;
+ frac &= MixerFracMask;
+ }
+}
+
+
+template<>
+void MixHrtf_<NEONTag>(const float *InSamples, float2 *AccumSamples, const uint IrSize,
+ const MixHrtfFilter *hrtfparams, const size_t BufferSize)
+{ MixHrtfBase<ApplyCoeffs>(InSamples, AccumSamples, IrSize, hrtfparams, BufferSize); }
+
+template<>
+void MixHrtfBlend_<NEONTag>(const float *InSamples, float2 *AccumSamples, const uint IrSize,
+ const HrtfFilter *oldparams, const MixHrtfFilter *newparams, const size_t BufferSize)
+{
+ MixHrtfBlendBase<ApplyCoeffs>(InSamples, AccumSamples, IrSize, oldparams, newparams,
+ BufferSize);
+}
+
+template<>
+void MixDirectHrtf_<NEONTag>(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut,
+ const al::span<const FloatBufferLine> InSamples, float2 *AccumSamples,
+ float *TempBuf, HrtfChannelState *ChanState, const size_t IrSize, const size_t BufferSize)
+{
+ MixDirectHrtfBase<ApplyCoeffs>(LeftOut, RightOut, InSamples, AccumSamples, TempBuf, ChanState,
+ IrSize, BufferSize);
+}
+
+
+template<>
+void Mix_<NEONTag>(const al::span<const float> InSamples, const al::span<FloatBufferLine> OutBuffer,
+ float *CurrentGains, const float *TargetGains, const size_t Counter, const size_t OutPos)
+{
+ const float delta{(Counter > 0) ? 1.0f / static_cast<float>(Counter) : 0.0f};
+ const auto min_len = minz(Counter, InSamples.size());
+ const auto aligned_len = minz((min_len+3) & ~size_t{3}, InSamples.size()) - min_len;
+
+ for(FloatBufferLine &output : OutBuffer)
+ MixLine(InSamples, al::assume_aligned<16>(output.data()+OutPos), *CurrentGains++,
+ *TargetGains++, delta, min_len, aligned_len, Counter);
+}
+
+template<>
+void Mix_<NEONTag>(const al::span<const float> InSamples, float *OutBuffer, float &CurrentGain,
+ const float TargetGain, const size_t Counter)
+{
+ const float delta{(Counter > 0) ? 1.0f / static_cast<float>(Counter) : 0.0f};
+ const auto min_len = minz(Counter, InSamples.size());
+ const auto aligned_len = minz((min_len+3) & ~size_t{3}, InSamples.size()) - min_len;
+
+ MixLine(InSamples, al::assume_aligned<16>(OutBuffer), CurrentGain, TargetGain, delta, min_len,
+ aligned_len, Counter);
+}
diff --git a/core/mixer/mixer_sse.cpp b/core/mixer/mixer_sse.cpp
new file mode 100644
index 00000000..0aa5d5fb
--- /dev/null
+++ b/core/mixer/mixer_sse.cpp
@@ -0,0 +1,327 @@
+#include "config.h"
+
+#include <xmmintrin.h>
+
+#include <cmath>
+#include <limits>
+
+#include "alnumeric.h"
+#include "core/bsinc_defs.h"
+#include "core/cubic_defs.h"
+#include "defs.h"
+#include "hrtfbase.h"
+
+struct SSETag;
+struct CubicTag;
+struct BSincTag;
+struct FastBSincTag;
+
+
+#if defined(__GNUC__) && !defined(__clang__) && !defined(__SSE__)
+#pragma GCC target("sse")
+#endif
+
+namespace {
+
+constexpr uint BSincPhaseDiffBits{MixerFracBits - BSincPhaseBits};
+constexpr uint BSincPhaseDiffOne{1 << BSincPhaseDiffBits};
+constexpr uint BSincPhaseDiffMask{BSincPhaseDiffOne - 1u};
+
+constexpr uint CubicPhaseDiffBits{MixerFracBits - CubicPhaseBits};
+constexpr uint CubicPhaseDiffOne{1 << CubicPhaseDiffBits};
+constexpr uint CubicPhaseDiffMask{CubicPhaseDiffOne - 1u};
+
+#define MLA4(x, y, z) _mm_add_ps(x, _mm_mul_ps(y, z))
+
+inline void ApplyCoeffs(float2 *RESTRICT Values, const size_t IrSize, const ConstHrirSpan Coeffs,
+ const float left, const float right)
+{
+ const __m128 lrlr{_mm_setr_ps(left, right, left, right)};
+
+ ASSUME(IrSize >= MinIrLength);
+ /* This isn't technically correct to test alignment, but it's true for
+ * systems that support SSE, which is the only one that needs to know the
+ * alignment of Values (which alternates between 8- and 16-byte aligned).
+ */
+ if(!(reinterpret_cast<uintptr_t>(Values)&15))
+ {
+ for(size_t i{0};i < IrSize;i += 2)
+ {
+ const __m128 coeffs{_mm_load_ps(Coeffs[i].data())};
+ __m128 vals{_mm_load_ps(Values[i].data())};
+ vals = MLA4(vals, lrlr, coeffs);
+ _mm_store_ps(Values[i].data(), vals);
+ }
+ }
+ else
+ {
+ __m128 imp0, imp1;
+ __m128 coeffs{_mm_load_ps(Coeffs[0].data())};
+ __m128 vals{_mm_loadl_pi(_mm_setzero_ps(), reinterpret_cast<__m64*>(Values[0].data()))};
+ imp0 = _mm_mul_ps(lrlr, coeffs);
+ vals = _mm_add_ps(imp0, vals);
+ _mm_storel_pi(reinterpret_cast<__m64*>(Values[0].data()), vals);
+ size_t td{((IrSize+1)>>1) - 1};
+ size_t i{1};
+ do {
+ coeffs = _mm_load_ps(Coeffs[i+1].data());
+ vals = _mm_load_ps(Values[i].data());
+ imp1 = _mm_mul_ps(lrlr, coeffs);
+ imp0 = _mm_shuffle_ps(imp0, imp1, _MM_SHUFFLE(1, 0, 3, 2));
+ vals = _mm_add_ps(imp0, vals);
+ _mm_store_ps(Values[i].data(), vals);
+ imp0 = imp1;
+ i += 2;
+ } while(--td);
+ vals = _mm_loadl_pi(vals, reinterpret_cast<__m64*>(Values[i].data()));
+ imp0 = _mm_movehl_ps(imp0, imp0);
+ vals = _mm_add_ps(imp0, vals);
+ _mm_storel_pi(reinterpret_cast<__m64*>(Values[i].data()), vals);
+ }
+}
+
+force_inline void MixLine(const al::span<const float> InSamples, float *RESTRICT dst,
+ float &CurrentGain, const float TargetGain, const float delta, const size_t min_len,
+ const size_t aligned_len, size_t Counter)
+{
+ float gain{CurrentGain};
+ const float step{(TargetGain-gain) * delta};
+
+ size_t pos{0};
+ if(!(std::abs(step) > std::numeric_limits<float>::epsilon()))
+ gain = TargetGain;
+ else
+ {
+ float step_count{0.0f};
+ /* Mix with applying gain steps in aligned multiples of 4. */
+ if(size_t todo{min_len >> 2})
+ {
+ const __m128 four4{_mm_set1_ps(4.0f)};
+ const __m128 step4{_mm_set1_ps(step)};
+ const __m128 gain4{_mm_set1_ps(gain)};
+ __m128 step_count4{_mm_setr_ps(0.0f, 1.0f, 2.0f, 3.0f)};
+ do {
+ const __m128 val4{_mm_load_ps(&InSamples[pos])};
+ __m128 dry4{_mm_load_ps(&dst[pos])};
+
+ /* dry += val * (gain + step*step_count) */
+ dry4 = MLA4(dry4, val4, MLA4(gain4, step4, step_count4));
+
+ _mm_store_ps(&dst[pos], dry4);
+ step_count4 = _mm_add_ps(step_count4, four4);
+ pos += 4;
+ } while(--todo);
+ /* NOTE: step_count4 now represents the next four counts after the
+ * last four mixed samples, so the lowest element represents the
+ * next step count to apply.
+ */
+ step_count = _mm_cvtss_f32(step_count4);
+ }
+ /* Mix with applying left over gain steps that aren't aligned multiples of 4. */
+ for(size_t leftover{min_len&3};leftover;++pos,--leftover)
+ {
+ dst[pos] += InSamples[pos] * (gain + step*step_count);
+ step_count += 1.0f;
+ }
+ if(pos == Counter)
+ gain = TargetGain;
+ else
+ gain += step*step_count;
+
+ /* Mix until pos is aligned with 4 or the mix is done. */
+ for(size_t leftover{aligned_len&3};leftover;++pos,--leftover)
+ dst[pos] += InSamples[pos] * gain;
+ }
+ CurrentGain = gain;
+
+ if(!(std::abs(gain) > GainSilenceThreshold))
+ return;
+ if(size_t todo{(InSamples.size()-pos) >> 2})
+ {
+ const __m128 gain4{_mm_set1_ps(gain)};
+ do {
+ const __m128 val4{_mm_load_ps(&InSamples[pos])};
+ __m128 dry4{_mm_load_ps(&dst[pos])};
+ dry4 = _mm_add_ps(dry4, _mm_mul_ps(val4, gain4));
+ _mm_store_ps(&dst[pos], dry4);
+ pos += 4;
+ } while(--todo);
+ }
+ for(size_t leftover{(InSamples.size()-pos)&3};leftover;++pos,--leftover)
+ dst[pos] += InSamples[pos] * gain;
+}
+
+} // namespace
+
+template<>
+void Resample_<CubicTag,SSETag>(const InterpState *state, const float *RESTRICT src, uint frac,
+ const uint increment, const al::span<float> dst)
+{
+ ASSUME(frac < MixerFracOne);
+
+ const CubicCoefficients *RESTRICT filter = al::assume_aligned<16>(state->cubic.filter);
+
+ src -= 1;
+ for(float &out_sample : dst)
+ {
+ const uint pi{frac >> CubicPhaseDiffBits};
+ const float pf{static_cast<float>(frac&CubicPhaseDiffMask) * (1.0f/CubicPhaseDiffOne)};
+ const __m128 pf4{_mm_set1_ps(pf)};
+
+ /* Apply the phase interpolated filter. */
+
+ /* f = fil + pf*phd */
+ const __m128 f4 = MLA4(_mm_load_ps(filter[pi].mCoeffs), pf4,
+ _mm_load_ps(filter[pi].mDeltas));
+ /* r = f*src */
+ __m128 r4{_mm_mul_ps(f4, _mm_loadu_ps(src))};
+
+ r4 = _mm_add_ps(r4, _mm_shuffle_ps(r4, r4, _MM_SHUFFLE(0, 1, 2, 3)));
+ r4 = _mm_add_ps(r4, _mm_movehl_ps(r4, r4));
+ out_sample = _mm_cvtss_f32(r4);
+
+ frac += increment;
+ src += frac>>MixerFracBits;
+ frac &= MixerFracMask;
+ }
+}
+
+template<>
+void Resample_<BSincTag,SSETag>(const InterpState *state, const float *RESTRICT src, uint frac,
+ const uint increment, const al::span<float> dst)
+{
+ const float *const filter{state->bsinc.filter};
+ const __m128 sf4{_mm_set1_ps(state->bsinc.sf)};
+ const size_t m{state->bsinc.m};
+ ASSUME(m > 0);
+ ASSUME(frac < MixerFracOne);
+
+ src -= state->bsinc.l;
+ for(float &out_sample : dst)
+ {
+ // Calculate the phase index and factor.
+ const uint pi{frac >> BSincPhaseDiffBits};
+ const float pf{static_cast<float>(frac&BSincPhaseDiffMask) * (1.0f/BSincPhaseDiffOne)};
+
+ // Apply the scale and phase interpolated filter.
+ __m128 r4{_mm_setzero_ps()};
+ {
+ const __m128 pf4{_mm_set1_ps(pf)};
+ const float *RESTRICT fil{filter + m*pi*2};
+ const float *RESTRICT phd{fil + m};
+ const float *RESTRICT scd{fil + BSincPhaseCount*2*m};
+ const float *RESTRICT spd{scd + m};
+ size_t td{m >> 2};
+ size_t j{0u};
+
+ do {
+ /* f = ((fil + sf*scd) + pf*(phd + sf*spd)) */
+ const __m128 f4 = MLA4(
+ MLA4(_mm_load_ps(&fil[j]), sf4, _mm_load_ps(&scd[j])),
+ pf4, MLA4(_mm_load_ps(&phd[j]), sf4, _mm_load_ps(&spd[j])));
+ /* r += f*src */
+ r4 = MLA4(r4, f4, _mm_loadu_ps(&src[j]));
+ j += 4;
+ } while(--td);
+ }
+ r4 = _mm_add_ps(r4, _mm_shuffle_ps(r4, r4, _MM_SHUFFLE(0, 1, 2, 3)));
+ r4 = _mm_add_ps(r4, _mm_movehl_ps(r4, r4));
+ out_sample = _mm_cvtss_f32(r4);
+
+ frac += increment;
+ src += frac>>MixerFracBits;
+ frac &= MixerFracMask;
+ }
+}
+
+template<>
+void Resample_<FastBSincTag,SSETag>(const InterpState *state, const float *RESTRICT src, uint frac,
+ const uint increment, const al::span<float> dst)
+{
+ const float *const filter{state->bsinc.filter};
+ const size_t m{state->bsinc.m};
+ ASSUME(m > 0);
+ ASSUME(frac < MixerFracOne);
+
+ src -= state->bsinc.l;
+ for(float &out_sample : dst)
+ {
+ // Calculate the phase index and factor.
+ const uint pi{frac >> BSincPhaseDiffBits};
+ const float pf{static_cast<float>(frac&BSincPhaseDiffMask) * (1.0f/BSincPhaseDiffOne)};
+
+ // Apply the phase interpolated filter.
+ __m128 r4{_mm_setzero_ps()};
+ {
+ const __m128 pf4{_mm_set1_ps(pf)};
+ const float *RESTRICT fil{filter + m*pi*2};
+ const float *RESTRICT phd{fil + m};
+ size_t td{m >> 2};
+ size_t j{0u};
+
+ do {
+ /* f = fil + pf*phd */
+ const __m128 f4 = MLA4(_mm_load_ps(&fil[j]), pf4, _mm_load_ps(&phd[j]));
+ /* r += f*src */
+ r4 = MLA4(r4, f4, _mm_loadu_ps(&src[j]));
+ j += 4;
+ } while(--td);
+ }
+ r4 = _mm_add_ps(r4, _mm_shuffle_ps(r4, r4, _MM_SHUFFLE(0, 1, 2, 3)));
+ r4 = _mm_add_ps(r4, _mm_movehl_ps(r4, r4));
+ out_sample = _mm_cvtss_f32(r4);
+
+ frac += increment;
+ src += frac>>MixerFracBits;
+ frac &= MixerFracMask;
+ }
+}
+
+
+template<>
+void MixHrtf_<SSETag>(const float *InSamples, float2 *AccumSamples, const uint IrSize,
+ const MixHrtfFilter *hrtfparams, const size_t BufferSize)
+{ MixHrtfBase<ApplyCoeffs>(InSamples, AccumSamples, IrSize, hrtfparams, BufferSize); }
+
+template<>
+void MixHrtfBlend_<SSETag>(const float *InSamples, float2 *AccumSamples, const uint IrSize,
+ const HrtfFilter *oldparams, const MixHrtfFilter *newparams, const size_t BufferSize)
+{
+ MixHrtfBlendBase<ApplyCoeffs>(InSamples, AccumSamples, IrSize, oldparams, newparams,
+ BufferSize);
+}
+
+template<>
+void MixDirectHrtf_<SSETag>(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut,
+ const al::span<const FloatBufferLine> InSamples, float2 *AccumSamples,
+ float *TempBuf, HrtfChannelState *ChanState, const size_t IrSize, const size_t BufferSize)
+{
+ MixDirectHrtfBase<ApplyCoeffs>(LeftOut, RightOut, InSamples, AccumSamples, TempBuf, ChanState,
+ IrSize, BufferSize);
+}
+
+
+template<>
+void Mix_<SSETag>(const al::span<const float> InSamples, const al::span<FloatBufferLine> OutBuffer,
+ float *CurrentGains, const float *TargetGains, const size_t Counter, const size_t OutPos)
+{
+ const float delta{(Counter > 0) ? 1.0f / static_cast<float>(Counter) : 0.0f};
+ const auto min_len = minz(Counter, InSamples.size());
+ const auto aligned_len = minz((min_len+3) & ~size_t{3}, InSamples.size()) - min_len;
+
+ for(FloatBufferLine &output : OutBuffer)
+ MixLine(InSamples, al::assume_aligned<16>(output.data()+OutPos), *CurrentGains++,
+ *TargetGains++, delta, min_len, aligned_len, Counter);
+}
+
+template<>
+void Mix_<SSETag>(const al::span<const float> InSamples, float *OutBuffer, float &CurrentGain,
+ const float TargetGain, const size_t Counter)
+{
+ const float delta{(Counter > 0) ? 1.0f / static_cast<float>(Counter) : 0.0f};
+ const auto min_len = minz(Counter, InSamples.size());
+ const auto aligned_len = minz((min_len+3) & ~size_t{3}, InSamples.size()) - min_len;
+
+ MixLine(InSamples, al::assume_aligned<16>(OutBuffer), CurrentGain, TargetGain, delta, min_len,
+ aligned_len, Counter);
+}
diff --git a/core/mixer/mixer_sse2.cpp b/core/mixer/mixer_sse2.cpp
new file mode 100644
index 00000000..edaaf7a1
--- /dev/null
+++ b/core/mixer/mixer_sse2.cpp
@@ -0,0 +1,90 @@
+/**
+ * OpenAL cross platform audio library
+ * Copyright (C) 2014 by Timothy Arceri <[email protected]>.
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ * Or go to http://www.gnu.org/copyleft/lgpl.html
+ */
+
+#include "config.h"
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+
+#include "alnumeric.h"
+#include "defs.h"
+
+struct SSE2Tag;
+struct LerpTag;
+
+
+#if defined(__GNUC__) && !defined(__clang__) && !defined(__SSE2__)
+#pragma GCC target("sse2")
+#endif
+
+template<>
+void Resample_<LerpTag,SSE2Tag>(const InterpState*, const float *RESTRICT src, uint frac,
+ const uint increment, const al::span<float> dst)
+{
+ ASSUME(frac < MixerFracOne);
+
+ const __m128i increment4{_mm_set1_epi32(static_cast<int>(increment*4))};
+ const __m128 fracOne4{_mm_set1_ps(1.0f/MixerFracOne)};
+ const __m128i fracMask4{_mm_set1_epi32(MixerFracMask)};
+
+ alignas(16) uint pos_[4], frac_[4];
+ InitPosArrays(frac, increment, frac_, pos_);
+ __m128i frac4{_mm_setr_epi32(static_cast<int>(frac_[0]), static_cast<int>(frac_[1]),
+ static_cast<int>(frac_[2]), static_cast<int>(frac_[3]))};
+ __m128i pos4{_mm_setr_epi32(static_cast<int>(pos_[0]), static_cast<int>(pos_[1]),
+ static_cast<int>(pos_[2]), static_cast<int>(pos_[3]))};
+
+ auto dst_iter = dst.begin();
+ for(size_t todo{dst.size()>>2};todo;--todo)
+ {
+ const int pos0{_mm_cvtsi128_si32(pos4)};
+ const int pos1{_mm_cvtsi128_si32(_mm_srli_si128(pos4, 4))};
+ const int pos2{_mm_cvtsi128_si32(_mm_srli_si128(pos4, 8))};
+ const int pos3{_mm_cvtsi128_si32(_mm_srli_si128(pos4, 12))};
+ const __m128 val1{_mm_setr_ps(src[pos0 ], src[pos1 ], src[pos2 ], src[pos3 ])};
+ const __m128 val2{_mm_setr_ps(src[pos0+1], src[pos1+1], src[pos2+1], src[pos3+1])};
+
+ /* val1 + (val2-val1)*mu */
+ const __m128 r0{_mm_sub_ps(val2, val1)};
+ const __m128 mu{_mm_mul_ps(_mm_cvtepi32_ps(frac4), fracOne4)};
+ const __m128 out{_mm_add_ps(val1, _mm_mul_ps(mu, r0))};
+
+ _mm_store_ps(dst_iter, out);
+ dst_iter += 4;
+
+ frac4 = _mm_add_epi32(frac4, increment4);
+ pos4 = _mm_add_epi32(pos4, _mm_srli_epi32(frac4, MixerFracBits));
+ frac4 = _mm_and_si128(frac4, fracMask4);
+ }
+
+ if(size_t todo{dst.size()&3})
+ {
+ src += static_cast<uint>(_mm_cvtsi128_si32(pos4));
+ frac = static_cast<uint>(_mm_cvtsi128_si32(frac4));
+
+ do {
+ *(dst_iter++) = lerpf(src[0], src[1], static_cast<float>(frac) * (1.0f/MixerFracOne));
+
+ frac += increment;
+ src += frac>>MixerFracBits;
+ frac &= MixerFracMask;
+ } while(--todo);
+ }
+}
diff --git a/core/mixer/mixer_sse3.cpp b/core/mixer/mixer_sse3.cpp
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/core/mixer/mixer_sse3.cpp
diff --git a/core/mixer/mixer_sse41.cpp b/core/mixer/mixer_sse41.cpp
new file mode 100644
index 00000000..8ccd9fd3
--- /dev/null
+++ b/core/mixer/mixer_sse41.cpp
@@ -0,0 +1,95 @@
+/**
+ * OpenAL cross platform audio library
+ * Copyright (C) 2014 by Timothy Arceri <[email protected]>.
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ * Or go to http://www.gnu.org/copyleft/lgpl.html
+ */
+
+#include "config.h"
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+#include <smmintrin.h>
+
+#include "alnumeric.h"
+#include "defs.h"
+
+struct SSE4Tag;
+struct LerpTag;
+
+
+#if defined(__GNUC__) && !defined(__clang__) && !defined(__SSE4_1__)
+#pragma GCC target("sse4.1")
+#endif
+
+template<>
+void Resample_<LerpTag,SSE4Tag>(const InterpState*, const float *RESTRICT src, uint frac,
+ const uint increment, const al::span<float> dst)
+{
+ ASSUME(frac < MixerFracOne);
+
+ const __m128i increment4{_mm_set1_epi32(static_cast<int>(increment*4))};
+ const __m128 fracOne4{_mm_set1_ps(1.0f/MixerFracOne)};
+ const __m128i fracMask4{_mm_set1_epi32(MixerFracMask)};
+
+ alignas(16) uint pos_[4], frac_[4];
+ InitPosArrays(frac, increment, frac_, pos_);
+ __m128i frac4{_mm_setr_epi32(static_cast<int>(frac_[0]), static_cast<int>(frac_[1]),
+ static_cast<int>(frac_[2]), static_cast<int>(frac_[3]))};
+ __m128i pos4{_mm_setr_epi32(static_cast<int>(pos_[0]), static_cast<int>(pos_[1]),
+ static_cast<int>(pos_[2]), static_cast<int>(pos_[3]))};
+
+ auto dst_iter = dst.begin();
+ for(size_t todo{dst.size()>>2};todo;--todo)
+ {
+ const int pos0{_mm_extract_epi32(pos4, 0)};
+ const int pos1{_mm_extract_epi32(pos4, 1)};
+ const int pos2{_mm_extract_epi32(pos4, 2)};
+ const int pos3{_mm_extract_epi32(pos4, 3)};
+ const __m128 val1{_mm_setr_ps(src[pos0 ], src[pos1 ], src[pos2 ], src[pos3 ])};
+ const __m128 val2{_mm_setr_ps(src[pos0+1], src[pos1+1], src[pos2+1], src[pos3+1])};
+
+ /* val1 + (val2-val1)*mu */
+ const __m128 r0{_mm_sub_ps(val2, val1)};
+ const __m128 mu{_mm_mul_ps(_mm_cvtepi32_ps(frac4), fracOne4)};
+ const __m128 out{_mm_add_ps(val1, _mm_mul_ps(mu, r0))};
+
+ _mm_store_ps(dst_iter, out);
+ dst_iter += 4;
+
+ frac4 = _mm_add_epi32(frac4, increment4);
+ pos4 = _mm_add_epi32(pos4, _mm_srli_epi32(frac4, MixerFracBits));
+ frac4 = _mm_and_si128(frac4, fracMask4);
+ }
+
+ if(size_t todo{dst.size()&3})
+ {
+ /* NOTE: These four elements represent the position *after* the last
+ * four samples, so the lowest element is the next position to
+ * resample.
+ */
+ src += static_cast<uint>(_mm_cvtsi128_si32(pos4));
+ frac = static_cast<uint>(_mm_cvtsi128_si32(frac4));
+
+ do {
+ *(dst_iter++) = lerpf(src[0], src[1], static_cast<float>(frac) * (1.0f/MixerFracOne));
+
+ frac += increment;
+ src += frac>>MixerFracBits;
+ frac &= MixerFracMask;
+ } while(--todo);
+ }
+}
diff --git a/core/resampler_limits.h b/core/resampler_limits.h
new file mode 100644
index 00000000..9d4cefda
--- /dev/null
+++ b/core/resampler_limits.h
@@ -0,0 +1,12 @@
+#ifndef CORE_RESAMPLER_LIMITS_H
+#define CORE_RESAMPLER_LIMITS_H
+
+/* Maximum number of samples to pad on the ends of a buffer for resampling.
+ * Note that the padding is symmetric (half at the beginning and half at the
+ * end)!
+ */
+constexpr int MaxResamplerPadding{48};
+
+constexpr int MaxResamplerEdge{MaxResamplerPadding >> 1};
+
+#endif /* CORE_RESAMPLER_LIMITS_H */
diff --git a/core/rtkit.cpp b/core/rtkit.cpp
new file mode 100644
index 00000000..ff944ebf
--- /dev/null
+++ b/core/rtkit.cpp
@@ -0,0 +1,236 @@
+/*-*- Mode: C; c-basic-offset: 8 -*-*/
+
+/***
+ Copyright 2009 Lennart Poettering
+ Copyright 2010 David Henningsson <[email protected]>
+ Copyright 2021 Chris Robinson
+
+ Permission is hereby granted, free of charge, to any person
+ obtaining a copy of this software and associated documentation files
+ (the "Software"), to deal in the Software without restriction,
+ including without limitation the rights to use, copy, modify, merge,
+ publish, distribute, sublicense, and/or sell copies of the Software,
+ and to permit persons to whom the Software is furnished to do so,
+ subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+***/
+
+#include "config.h"
+
+#include "rtkit.h"
+
+#include <errno.h>
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <memory>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#ifdef __linux__
+#include <sys/syscall.h>
+#elif defined(__FreeBSD__)
+#include <sys/thr.h>
+#endif
+
+
+namespace dbus {
+
+constexpr int TypeString{'s'};
+constexpr int TypeVariant{'v'};
+constexpr int TypeInt32{'i'};
+constexpr int TypeUInt32{'u'};
+constexpr int TypeInt64{'x'};
+constexpr int TypeUInt64{'t'};
+constexpr int TypeInvalid{'\0'};
+
+struct MessageDeleter {
+ void operator()(DBusMessage *m) { dbus_message_unref(m); }
+};
+using MessagePtr = std::unique_ptr<DBusMessage,MessageDeleter>;
+
+} // namespace dbus
+
+namespace {
+
+inline pid_t _gettid()
+{
+#ifdef __linux__
+ return static_cast<pid_t>(syscall(SYS_gettid));
+#elif defined(__FreeBSD__)
+ long pid{};
+ thr_self(&pid);
+ return static_cast<pid_t>(pid);
+#else
+#warning gettid not available
+ return 0;
+#endif
+}
+
+int translate_error(const char *name)
+{
+ if(strcmp(name, DBUS_ERROR_NO_MEMORY) == 0)
+ return -ENOMEM;
+ if(strcmp(name, DBUS_ERROR_SERVICE_UNKNOWN) == 0
+ || strcmp(name, DBUS_ERROR_NAME_HAS_NO_OWNER) == 0)
+ return -ENOENT;
+ if(strcmp(name, DBUS_ERROR_ACCESS_DENIED) == 0
+ || strcmp(name, DBUS_ERROR_AUTH_FAILED) == 0)
+ return -EACCES;
+ return -EIO;
+}
+
+int rtkit_get_int_property(DBusConnection *connection, const char *propname, long long *propval)
+{
+ dbus::MessagePtr m{dbus_message_new_method_call(RTKIT_SERVICE_NAME, RTKIT_OBJECT_PATH,
+ "org.freedesktop.DBus.Properties", "Get")};
+ if(!m) return -ENOMEM;
+
+ const char *interfacestr = RTKIT_SERVICE_NAME;
+ auto ready = dbus_message_append_args(m.get(),
+ dbus::TypeString, &interfacestr,
+ dbus::TypeString, &propname,
+ dbus::TypeInvalid);
+ if(!ready) return -ENOMEM;
+
+ dbus::Error error;
+ dbus::MessagePtr r{dbus_connection_send_with_reply_and_block(connection, m.get(), -1,
+ &error.get())};
+ if(!r) return translate_error(error->name);
+
+ if(dbus_set_error_from_message(&error.get(), r.get()))
+ return translate_error(error->name);
+
+ int ret{-EBADMSG};
+ DBusMessageIter iter{};
+ dbus_message_iter_init(r.get(), &iter);
+ while(int curtype{dbus_message_iter_get_arg_type(&iter)})
+ {
+ if(curtype == dbus::TypeVariant)
+ {
+ DBusMessageIter subiter{};
+ dbus_message_iter_recurse(&iter, &subiter);
+
+ while((curtype=dbus_message_iter_get_arg_type(&subiter)) != dbus::TypeInvalid)
+ {
+ if(curtype == dbus::TypeInt32)
+ {
+ dbus_int32_t i32{};
+ dbus_message_iter_get_basic(&subiter, &i32);
+ *propval = i32;
+ ret = 0;
+ }
+
+ if(curtype == dbus::TypeInt64)
+ {
+ dbus_int64_t i64{};
+ dbus_message_iter_get_basic(&subiter, &i64);
+ *propval = i64;
+ ret = 0;
+ }
+
+ dbus_message_iter_next(&subiter);
+ }
+ }
+ dbus_message_iter_next(&iter);
+ }
+
+ return ret;
+}
+
+} // namespace
+
+int rtkit_get_max_realtime_priority(DBusConnection *connection)
+{
+ long long retval{};
+ int err{rtkit_get_int_property(connection, "MaxRealtimePriority", &retval)};
+ return err < 0 ? err : static_cast<int>(retval);
+}
+
+int rtkit_get_min_nice_level(DBusConnection *connection, int *min_nice_level)
+{
+ long long retval{};
+ int err{rtkit_get_int_property(connection, "MinNiceLevel", &retval)};
+ if(err >= 0) *min_nice_level = static_cast<int>(retval);
+ return err;
+}
+
+long long rtkit_get_rttime_usec_max(DBusConnection *connection)
+{
+ long long retval{};
+ int err{rtkit_get_int_property(connection, "RTTimeUSecMax", &retval)};
+ return err < 0 ? err : retval;
+}
+
+int rtkit_make_realtime(DBusConnection *connection, pid_t thread, int priority)
+{
+ if(thread == 0)
+ thread = _gettid();
+ if(thread == 0)
+ return -ENOTSUP;
+
+ dbus::MessagePtr m{dbus_message_new_method_call(RTKIT_SERVICE_NAME, RTKIT_OBJECT_PATH,
+ "org.freedesktop.RealtimeKit1", "MakeThreadRealtime")};
+ if(!m) return -ENOMEM;
+
+ auto u64 = static_cast<dbus_uint64_t>(thread);
+ auto u32 = static_cast<dbus_uint32_t>(priority);
+ auto ready = dbus_message_append_args(m.get(),
+ dbus::TypeUInt64, &u64,
+ dbus::TypeUInt32, &u32,
+ dbus::TypeInvalid);
+ if(!ready) return -ENOMEM;
+
+ dbus::Error error;
+ dbus::MessagePtr r{dbus_connection_send_with_reply_and_block(connection, m.get(), -1,
+ &error.get())};
+ if(!r) return translate_error(error->name);
+
+ if(dbus_set_error_from_message(&error.get(), r.get()))
+ return translate_error(error->name);
+
+ return 0;
+}
+
+int rtkit_make_high_priority(DBusConnection *connection, pid_t thread, int nice_level)
+{
+ if(thread == 0)
+ thread = _gettid();
+ if(thread == 0)
+ return -ENOTSUP;
+
+ dbus::MessagePtr m{dbus_message_new_method_call(RTKIT_SERVICE_NAME, RTKIT_OBJECT_PATH,
+ "org.freedesktop.RealtimeKit1", "MakeThreadHighPriority")};
+ if(!m) return -ENOMEM;
+
+ auto u64 = static_cast<dbus_uint64_t>(thread);
+ auto s32 = static_cast<dbus_int32_t>(nice_level);
+ auto ready = dbus_message_append_args(m.get(),
+ dbus::TypeUInt64, &u64,
+ dbus::TypeInt32, &s32,
+ dbus::TypeInvalid);
+ if(!ready) return -ENOMEM;
+
+ dbus::Error error;
+ dbus::MessagePtr r{dbus_connection_send_with_reply_and_block(connection, m.get(), -1,
+ &error.get())};
+ if(!r) return translate_error(error->name);
+
+ if(dbus_set_error_from_message(&error.get(), r.get()))
+ return translate_error(error->name);
+
+ return 0;
+}
diff --git a/core/rtkit.h b/core/rtkit.h
new file mode 100644
index 00000000..d4994e27
--- /dev/null
+++ b/core/rtkit.h
@@ -0,0 +1,71 @@
+/*-*- Mode: C; c-basic-offset: 8 -*-*/
+
+#ifndef foortkithfoo
+#define foortkithfoo
+
+/***
+ Copyright 2009 Lennart Poettering
+ Copyright 2010 David Henningsson <[email protected]>
+
+ Permission is hereby granted, free of charge, to any person
+ obtaining a copy of this software and associated documentation files
+ (the "Software"), to deal in the Software without restriction,
+ including without limitation the rights to use, copy, modify, merge,
+ publish, distribute, sublicense, and/or sell copies of the Software,
+ and to permit persons to whom the Software is furnished to do so,
+ subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+***/
+
+#include <sys/types.h>
+
+#include "dbus_wrap.h"
+
+/* This is the reference implementation for a client for
+ * RealtimeKit. You don't have to use this, but if do, just copy these
+ * sources into your repository */
+
+#define RTKIT_SERVICE_NAME "org.freedesktop.RealtimeKit1"
+#define RTKIT_OBJECT_PATH "/org/freedesktop/RealtimeKit1"
+
+/* This is mostly equivalent to sched_setparam(thread, SCHED_RR, {
+ * .sched_priority = priority }). 'thread' needs to be a kernel thread
+ * id as returned by gettid(), not a pthread_t! If 'thread' is 0 the
+ * current thread is used. The returned value is a negative errno
+ * style error code, or 0 on success. */
+int rtkit_make_realtime(DBusConnection *system_bus, pid_t thread, int priority);
+
+/* This is mostly equivalent to setpriority(PRIO_PROCESS, thread,
+ * nice_level). 'thread' needs to be a kernel thread id as returned by
+ * gettid(), not a pthread_t! If 'thread' is 0 the current thread is
+ * used. The returned value is a negative errno style error code, or 0
+ * on success.*/
+int rtkit_make_high_priority(DBusConnection *system_bus, pid_t thread, int nice_level);
+
+/* Return the maximum value of realtime priority available. Realtime requests
+ * above this value will fail. A negative value is an errno style error code.
+ */
+int rtkit_get_max_realtime_priority(DBusConnection *system_bus);
+
+/* Retreive the minimum value of nice level available. High prio requests
+ * below this value will fail. The returned value is a negative errno
+ * style error code, or 0 on success.*/
+int rtkit_get_min_nice_level(DBusConnection *system_bus, int *min_nice_level);
+
+/* Return the maximum value of RLIMIT_RTTIME to set before attempting a
+ * realtime request. A negative value is an errno style error code.
+ */
+long long rtkit_get_rttime_usec_max(DBusConnection *system_bus);
+
+#endif
diff --git a/core/uhjfilter.cpp b/core/uhjfilter.cpp
new file mode 100644
index 00000000..df50956a
--- /dev/null
+++ b/core/uhjfilter.cpp
@@ -0,0 +1,539 @@
+
+#include "config.h"
+
+#include "uhjfilter.h"
+
+#include <algorithm>
+#include <iterator>
+
+#include "alcomplex.h"
+#include "alnumeric.h"
+#include "opthelpers.h"
+#include "phase_shifter.h"
+
+
+UhjQualityType UhjDecodeQuality{UhjQualityType::Default};
+UhjQualityType UhjEncodeQuality{UhjQualityType::Default};
+
+
+namespace {
+
+const PhaseShifterT<UhjLength256> PShiftLq{};
+const PhaseShifterT<UhjLength512> PShiftHq{};
+
+template<size_t N>
+struct GetPhaseShifter;
+template<>
+struct GetPhaseShifter<UhjLength256> { static auto& Get() noexcept { return PShiftLq; } };
+template<>
+struct GetPhaseShifter<UhjLength512> { static auto& Get() noexcept { return PShiftHq; } };
+
+
+constexpr float square(float x) noexcept
+{ return x*x; }
+
+/* Filter coefficients for the 'base' all-pass IIR, which applies a frequency-
+ * dependent phase-shift of N degrees. The output of the filter requires a 1-
+ * sample delay.
+ */
+constexpr std::array<float,4> Filter1Coeff{{
+ square(0.6923878f), square(0.9360654322959f), square(0.9882295226860f),
+ square(0.9987488452737f)
+}};
+/* Filter coefficients for the offset all-pass IIR, which applies a frequency-
+ * dependent phase-shift of N+90 degrees.
+ */
+constexpr std::array<float,4> Filter2Coeff{{
+ square(0.4021921162426f), square(0.8561710882420f), square(0.9722909545651f),
+ square(0.9952884791278f)
+}};
+
+} // namespace
+
+void UhjAllPassFilter::process(const al::span<const float,4> coeffs,
+ const al::span<const float> src, const bool updateState, float *RESTRICT dst)
+{
+ auto state = mState;
+
+ auto proc_sample = [&state,coeffs](float x) noexcept -> float
+ {
+ for(size_t i{0};i < 4;++i)
+ {
+ const float y{x*coeffs[i] + state[i].z[0]};
+ state[i].z[0] = state[i].z[1];
+ state[i].z[1] = y*coeffs[i] - x;
+ x = y;
+ }
+ return x;
+ };
+ std::transform(src.begin(), src.end(), dst, proc_sample);
+ if(updateState) LIKELY mState = state;
+}
+
+
+/* Encoding UHJ from B-Format is done as:
+ *
+ * S = 0.9396926*W + 0.1855740*X
+ * D = j(-0.3420201*W + 0.5098604*X) + 0.6554516*Y
+ *
+ * Left = (S + D)/2.0
+ * Right = (S - D)/2.0
+ * T = j(-0.1432*W + 0.6512*X) - 0.7071068*Y
+ * Q = 0.9772*Z
+ *
+ * where j is a wide-band +90 degree phase shift. 3-channel UHJ excludes Q,
+ * while 2-channel excludes Q and T.
+ *
+ * The phase shift is done using a linear FIR filter derived from an FFT'd
+ * impulse with the desired shift.
+ */
+
+template<size_t N>
+void UhjEncoder<N>::encode(float *LeftOut, float *RightOut,
+ const al::span<const float*const,3> InSamples, const size_t SamplesToDo)
+{
+ const auto &PShift = GetPhaseShifter<N>::Get();
+
+ ASSUME(SamplesToDo > 0);
+
+ const float *RESTRICT winput{al::assume_aligned<16>(InSamples[0])};
+ const float *RESTRICT xinput{al::assume_aligned<16>(InSamples[1])};
+ const float *RESTRICT yinput{al::assume_aligned<16>(InSamples[2])};
+
+ std::copy_n(winput, SamplesToDo, mW.begin()+sFilterDelay);
+ std::copy_n(xinput, SamplesToDo, mX.begin()+sFilterDelay);
+ std::copy_n(yinput, SamplesToDo, mY.begin()+sFilterDelay);
+
+ /* S = 0.9396926*W + 0.1855740*X */
+ for(size_t i{0};i < SamplesToDo;++i)
+ mS[i] = 0.9396926f*mW[i] + 0.1855740f*mX[i];
+
+ /* Precompute j(-0.3420201*W + 0.5098604*X) and store in mD. */
+ std::transform(winput, winput+SamplesToDo, xinput, mWX.begin() + sWXInOffset,
+ [](const float w, const float x) noexcept -> float
+ { return -0.3420201f*w + 0.5098604f*x; });
+ PShift.process({mD.data(), SamplesToDo}, mWX.data());
+
+ /* D = j(-0.3420201*W + 0.5098604*X) + 0.6554516*Y */
+ for(size_t i{0};i < SamplesToDo;++i)
+ mD[i] = mD[i] + 0.6554516f*mY[i];
+
+ /* Copy the future samples to the front for next time. */
+ std::copy(mW.cbegin()+SamplesToDo, mW.cbegin()+SamplesToDo+sFilterDelay, mW.begin());
+ std::copy(mX.cbegin()+SamplesToDo, mX.cbegin()+SamplesToDo+sFilterDelay, mX.begin());
+ std::copy(mY.cbegin()+SamplesToDo, mY.cbegin()+SamplesToDo+sFilterDelay, mY.begin());
+ std::copy(mWX.cbegin()+SamplesToDo, mWX.cbegin()+SamplesToDo+sWXInOffset, mWX.begin());
+
+ /* Apply a delay to the existing output to align with the input delay. */
+ auto *delayBuffer = mDirectDelay.data();
+ for(float *buffer : {LeftOut, RightOut})
+ {
+ float *distbuf{al::assume_aligned<16>(delayBuffer->data())};
+ ++delayBuffer;
+
+ float *inout{al::assume_aligned<16>(buffer)};
+ auto inout_end = inout + SamplesToDo;
+ if(SamplesToDo >= sFilterDelay) LIKELY
+ {
+ auto delay_end = std::rotate(inout, inout_end - sFilterDelay, inout_end);
+ std::swap_ranges(inout, delay_end, distbuf);
+ }
+ else
+ {
+ auto delay_start = std::swap_ranges(inout, inout_end, distbuf);
+ std::rotate(distbuf, delay_start, distbuf + sFilterDelay);
+ }
+ }
+
+ /* Combine the direct signal with the produced output. */
+
+ /* Left = (S + D)/2.0 */
+ float *RESTRICT left{al::assume_aligned<16>(LeftOut)};
+ for(size_t i{0};i < SamplesToDo;i++)
+ left[i] += (mS[i] + mD[i]) * 0.5f;
+ /* Right = (S - D)/2.0 */
+ float *RESTRICT right{al::assume_aligned<16>(RightOut)};
+ for(size_t i{0};i < SamplesToDo;i++)
+ right[i] += (mS[i] - mD[i]) * 0.5f;
+}
+
+/* This encoding implementation uses two sets of four chained IIR filters to
+ * produce the desired relative phase shift. The first filter chain produces a
+ * phase shift of varying degrees over a wide range of frequencies, while the
+ * second filter chain produces a phase shift 90 degrees ahead of the first
+ * over the same range. Further details are described here:
+ *
+ * https://web.archive.org/web/20060708031958/http://www.biochem.oulu.fi/~oniemita/dsp/hilbert/
+ *
+ * 2-channel UHJ output requires the use of three filter chains. The S channel
+ * output uses a Filter1 chain on the W and X channel mix, while the D channel
+ * output uses a Filter1 chain on the Y channel plus a Filter2 chain on the W
+ * and X channel mix. This results in the W and X input mix on the D channel
+ * output having the required +90 degree phase shift relative to the other
+ * inputs.
+ */
+void UhjEncoderIIR::encode(float *LeftOut, float *RightOut,
+ const al::span<const float *const, 3> InSamples, const size_t SamplesToDo)
+{
+ ASSUME(SamplesToDo > 0);
+
+ const float *RESTRICT winput{al::assume_aligned<16>(InSamples[0])};
+ const float *RESTRICT xinput{al::assume_aligned<16>(InSamples[1])};
+ const float *RESTRICT yinput{al::assume_aligned<16>(InSamples[2])};
+
+ /* S = 0.9396926*W + 0.1855740*X */
+ std::transform(winput, winput+SamplesToDo, xinput, mTemp.begin(),
+ [](const float w, const float x) noexcept { return 0.9396926f*w + 0.1855740f*x; });
+ mFilter1WX.process(Filter1Coeff, {mTemp.data(), SamplesToDo}, true, mS.data()+1);
+ mS[0] = mDelayWX; mDelayWX = mS[SamplesToDo];
+
+ /* Precompute j(-0.3420201*W + 0.5098604*X) and store in mWX. */
+ std::transform(winput, winput+SamplesToDo, xinput, mTemp.begin(),
+ [](const float w, const float x) noexcept { return -0.3420201f*w + 0.5098604f*x; });
+ mFilter2WX.process(Filter2Coeff, {mTemp.data(), SamplesToDo}, true, mWX.data());
+
+ /* Apply filter1 to Y and store in mD. */
+ mFilter1Y.process(Filter1Coeff, {yinput, SamplesToDo}, SamplesToDo, mD.data()+1);
+ mD[0] = mDelayY; mDelayY = mD[SamplesToDo];
+
+ /* D = j(-0.3420201*W + 0.5098604*X) + 0.6554516*Y */
+ for(size_t i{0};i < SamplesToDo;++i)
+ mD[i] = mWX[i] + 0.6554516f*mD[i];
+
+ /* Apply the base filter to the existing output to align with the processed
+ * signal.
+ */
+ mFilter1Direct[0].process(Filter1Coeff, {LeftOut, SamplesToDo}, true, mTemp.data()+1);
+ mTemp[0] = mDirectDelay[0]; mDirectDelay[0] = mTemp[SamplesToDo];
+
+ /* Left = (S + D)/2.0 */
+ float *RESTRICT left{al::assume_aligned<16>(LeftOut)};
+ for(size_t i{0};i < SamplesToDo;i++)
+ left[i] = (mS[i] + mD[i])*0.5f + mTemp[i];
+
+ mFilter1Direct[1].process(Filter1Coeff, {RightOut, SamplesToDo}, true, mTemp.data()+1);
+ mTemp[0] = mDirectDelay[1]; mDirectDelay[1] = mTemp[SamplesToDo];
+
+ /* Right = (S - D)/2.0 */
+ float *RESTRICT right{al::assume_aligned<16>(RightOut)};
+ for(size_t i{0};i < SamplesToDo;i++)
+ right[i] = (mS[i] - mD[i])*0.5f + mTemp[i];
+}
+
+
+/* Decoding UHJ is done as:
+ *
+ * S = Left + Right
+ * D = Left - Right
+ *
+ * W = 0.981532*S + 0.197484*j(0.828331*D + 0.767820*T)
+ * X = 0.418496*S - j(0.828331*D + 0.767820*T)
+ * Y = 0.795968*D - 0.676392*T + j(0.186633*S)
+ * Z = 1.023332*Q
+ *
+ * where j is a +90 degree phase shift. 3-channel UHJ excludes Q, while 2-
+ * channel excludes Q and T.
+ */
+template<size_t N>
+void UhjDecoder<N>::decode(const al::span<float*> samples, const size_t samplesToDo,
+ const bool updateState)
+{
+ static_assert(sInputPadding <= sMaxPadding, "Filter padding is too large");
+
+ const auto &PShift = GetPhaseShifter<N>::Get();
+
+ ASSUME(samplesToDo > 0);
+
+ {
+ const float *RESTRICT left{al::assume_aligned<16>(samples[0])};
+ const float *RESTRICT right{al::assume_aligned<16>(samples[1])};
+ const float *RESTRICT t{al::assume_aligned<16>(samples[2])};
+
+ /* S = Left + Right */
+ for(size_t i{0};i < samplesToDo+sInputPadding;++i)
+ mS[i] = left[i] + right[i];
+
+ /* D = Left - Right */
+ for(size_t i{0};i < samplesToDo+sInputPadding;++i)
+ mD[i] = left[i] - right[i];
+
+ /* T */
+ for(size_t i{0};i < samplesToDo+sInputPadding;++i)
+ mT[i] = t[i];
+ }
+
+ float *RESTRICT woutput{al::assume_aligned<16>(samples[0])};
+ float *RESTRICT xoutput{al::assume_aligned<16>(samples[1])};
+ float *RESTRICT youtput{al::assume_aligned<16>(samples[2])};
+
+ /* Precompute j(0.828331*D + 0.767820*T) and store in xoutput. */
+ auto tmpiter = std::copy(mDTHistory.cbegin(), mDTHistory.cend(), mTemp.begin());
+ std::transform(mD.cbegin(), mD.cbegin()+samplesToDo+sInputPadding, mT.cbegin(), tmpiter,
+ [](const float d, const float t) noexcept { return 0.828331f*d + 0.767820f*t; });
+ if(updateState) LIKELY
+ std::copy_n(mTemp.cbegin()+samplesToDo, mDTHistory.size(), mDTHistory.begin());
+ PShift.process({xoutput, samplesToDo}, mTemp.data());
+
+ /* W = 0.981532*S + 0.197484*j(0.828331*D + 0.767820*T) */
+ for(size_t i{0};i < samplesToDo;++i)
+ woutput[i] = 0.981532f*mS[i] + 0.197484f*xoutput[i];
+ /* X = 0.418496*S - j(0.828331*D + 0.767820*T) */
+ for(size_t i{0};i < samplesToDo;++i)
+ xoutput[i] = 0.418496f*mS[i] - xoutput[i];
+
+ /* Precompute j*S and store in youtput. */
+ tmpiter = std::copy(mSHistory.cbegin(), mSHistory.cend(), mTemp.begin());
+ std::copy_n(mS.cbegin(), samplesToDo+sInputPadding, tmpiter);
+ if(updateState) LIKELY
+ std::copy_n(mTemp.cbegin()+samplesToDo, mSHistory.size(), mSHistory.begin());
+ PShift.process({youtput, samplesToDo}, mTemp.data());
+
+ /* Y = 0.795968*D - 0.676392*T + j(0.186633*S) */
+ for(size_t i{0};i < samplesToDo;++i)
+ youtput[i] = 0.795968f*mD[i] - 0.676392f*mT[i] + 0.186633f*youtput[i];
+
+ if(samples.size() > 3)
+ {
+ float *RESTRICT zoutput{al::assume_aligned<16>(samples[3])};
+ /* Z = 1.023332*Q */
+ for(size_t i{0};i < samplesToDo;++i)
+ zoutput[i] = 1.023332f*zoutput[i];
+ }
+}
+
+void UhjDecoderIIR::decode(const al::span<float*> samples, const size_t samplesToDo,
+ const bool updateState)
+{
+ static_assert(sInputPadding <= sMaxPadding, "Filter padding is too large");
+
+ ASSUME(samplesToDo > 0);
+
+ {
+ const float *RESTRICT left{al::assume_aligned<16>(samples[0])};
+ const float *RESTRICT right{al::assume_aligned<16>(samples[1])};
+
+ /* S = Left + Right */
+ for(size_t i{0};i < samplesToDo;++i)
+ mS[i] = left[i] + right[i];
+
+ /* D = Left - Right */
+ for(size_t i{0};i < samplesToDo;++i)
+ mD[i] = left[i] - right[i];
+ }
+
+ float *RESTRICT woutput{al::assume_aligned<16>(samples[0])};
+ float *RESTRICT xoutput{al::assume_aligned<16>(samples[1])};
+ float *RESTRICT youtput{al::assume_aligned<16>(samples[2])};
+
+ /* Precompute j(0.828331*D + 0.767820*T) and store in xoutput. */
+ std::transform(mD.cbegin(), mD.cbegin()+samplesToDo, youtput, mTemp.begin(),
+ [](const float d, const float t) noexcept { return 0.828331f*d + 0.767820f*t; });
+ mFilter2DT.process(Filter2Coeff, {mTemp.data(), samplesToDo}, updateState, xoutput);
+
+ /* Apply filter1 to S and store in mTemp. */
+ mTemp[0] = mDelayS;
+ mFilter1S.process(Filter1Coeff, {mS.data(), samplesToDo}, updateState, mTemp.data()+1);
+ if(updateState) LIKELY mDelayS = mTemp[samplesToDo];
+
+ /* W = 0.981532*S + 0.197484*j(0.828331*D + 0.767820*T) */
+ for(size_t i{0};i < samplesToDo;++i)
+ woutput[i] = 0.981532f*mTemp[i] + 0.197484f*xoutput[i];
+ /* X = 0.418496*S - j(0.828331*D + 0.767820*T) */
+ for(size_t i{0};i < samplesToDo;++i)
+ xoutput[i] = 0.418496f*mTemp[i] - xoutput[i];
+
+
+ /* Apply filter1 to (0.795968*D - 0.676392*T) and store in mTemp. */
+ std::transform(mD.cbegin(), mD.cbegin()+samplesToDo, youtput, youtput,
+ [](const float d, const float t) noexcept { return 0.795968f*d - 0.676392f*t; });
+ mTemp[0] = mDelayDT;
+ mFilter1DT.process(Filter1Coeff, {youtput, samplesToDo}, updateState, mTemp.data()+1);
+ if(updateState) LIKELY mDelayDT = mTemp[samplesToDo];
+
+ /* Precompute j*S and store in youtput. */
+ mFilter2S.process(Filter2Coeff, {mS.data(), samplesToDo}, updateState, youtput);
+
+ /* Y = 0.795968*D - 0.676392*T + j(0.186633*S) */
+ for(size_t i{0};i < samplesToDo;++i)
+ youtput[i] = mTemp[i] + 0.186633f*youtput[i];
+
+
+ if(samples.size() > 3)
+ {
+ float *RESTRICT zoutput{al::assume_aligned<16>(samples[3])};
+
+ /* Apply filter1 to Q and store in mTemp. */
+ mTemp[0] = mDelayQ;
+ mFilter1Q.process(Filter1Coeff, {zoutput, samplesToDo}, updateState, mTemp.data()+1);
+ if(updateState) LIKELY mDelayQ = mTemp[samplesToDo];
+
+ /* Z = 1.023332*Q */
+ for(size_t i{0};i < samplesToDo;++i)
+ zoutput[i] = 1.023332f*mTemp[i];
+ }
+}
+
+
+/* Super Stereo processing is done as:
+ *
+ * S = Left + Right
+ * D = Left - Right
+ *
+ * W = 0.6098637*S - 0.6896511*j*w*D
+ * X = 0.8624776*S + 0.7626955*j*w*D
+ * Y = 1.6822415*w*D - 0.2156194*j*S
+ *
+ * where j is a +90 degree phase shift. w is a variable control for the
+ * resulting stereo width, with the range 0 <= w <= 0.7.
+ */
+template<size_t N>
+void UhjStereoDecoder<N>::decode(const al::span<float*> samples, const size_t samplesToDo,
+ const bool updateState)
+{
+ static_assert(sInputPadding <= sMaxPadding, "Filter padding is too large");
+
+ const auto &PShift = GetPhaseShifter<N>::Get();
+
+ ASSUME(samplesToDo > 0);
+
+ {
+ const float *RESTRICT left{al::assume_aligned<16>(samples[0])};
+ const float *RESTRICT right{al::assume_aligned<16>(samples[1])};
+
+ for(size_t i{0};i < samplesToDo+sInputPadding;++i)
+ mS[i] = left[i] + right[i];
+
+ /* Pre-apply the width factor to the difference signal D. Smoothly
+ * interpolate when it changes.
+ */
+ const float wtarget{mWidthControl};
+ const float wcurrent{(mCurrentWidth < 0.0f) ? wtarget : mCurrentWidth};
+ if(wtarget == wcurrent || !updateState)
+ {
+ for(size_t i{0};i < samplesToDo+sInputPadding;++i)
+ mD[i] = (left[i] - right[i]) * wcurrent;
+ mCurrentWidth = wcurrent;
+ }
+ else
+ {
+ const float wstep{(wtarget - wcurrent) / static_cast<float>(samplesToDo)};
+ float fi{0.0f};
+ for(size_t i{0};i < samplesToDo;++i)
+ {
+ mD[i] = (left[i] - right[i]) * (wcurrent + wstep*fi);
+ fi += 1.0f;
+ }
+ for(size_t i{samplesToDo};i < samplesToDo+sInputPadding;++i)
+ mD[i] = (left[i] - right[i]) * wtarget;
+ mCurrentWidth = wtarget;
+ }
+ }
+
+ float *RESTRICT woutput{al::assume_aligned<16>(samples[0])};
+ float *RESTRICT xoutput{al::assume_aligned<16>(samples[1])};
+ float *RESTRICT youtput{al::assume_aligned<16>(samples[2])};
+
+ /* Precompute j*D and store in xoutput. */
+ auto tmpiter = std::copy(mDTHistory.cbegin(), mDTHistory.cend(), mTemp.begin());
+ std::copy_n(mD.cbegin(), samplesToDo+sInputPadding, tmpiter);
+ if(updateState) LIKELY
+ std::copy_n(mTemp.cbegin()+samplesToDo, mDTHistory.size(), mDTHistory.begin());
+ PShift.process({xoutput, samplesToDo}, mTemp.data());
+
+ /* W = 0.6098637*S - 0.6896511*j*w*D */
+ for(size_t i{0};i < samplesToDo;++i)
+ woutput[i] = 0.6098637f*mS[i] - 0.6896511f*xoutput[i];
+ /* X = 0.8624776*S + 0.7626955*j*w*D */
+ for(size_t i{0};i < samplesToDo;++i)
+ xoutput[i] = 0.8624776f*mS[i] + 0.7626955f*xoutput[i];
+
+ /* Precompute j*S and store in youtput. */
+ tmpiter = std::copy(mSHistory.cbegin(), mSHistory.cend(), mTemp.begin());
+ std::copy_n(mS.cbegin(), samplesToDo+sInputPadding, tmpiter);
+ if(updateState) LIKELY
+ std::copy_n(mTemp.cbegin()+samplesToDo, mSHistory.size(), mSHistory.begin());
+ PShift.process({youtput, samplesToDo}, mTemp.data());
+
+ /* Y = 1.6822415*w*D - 0.2156194*j*S */
+ for(size_t i{0};i < samplesToDo;++i)
+ youtput[i] = 1.6822415f*mD[i] - 0.2156194f*youtput[i];
+}
+
+void UhjStereoDecoderIIR::decode(const al::span<float*> samples, const size_t samplesToDo,
+ const bool updateState)
+{
+ static_assert(sInputPadding <= sMaxPadding, "Filter padding is too large");
+
+ ASSUME(samplesToDo > 0);
+
+ {
+ const float *RESTRICT left{al::assume_aligned<16>(samples[0])};
+ const float *RESTRICT right{al::assume_aligned<16>(samples[1])};
+
+ for(size_t i{0};i < samplesToDo;++i)
+ mS[i] = left[i] + right[i];
+
+ /* Pre-apply the width factor to the difference signal D. Smoothly
+ * interpolate when it changes.
+ */
+ const float wtarget{mWidthControl};
+ const float wcurrent{(mCurrentWidth < 0.0f) ? wtarget : mCurrentWidth};
+ if(wtarget == wcurrent || !updateState)
+ {
+ for(size_t i{0};i < samplesToDo;++i)
+ mD[i] = (left[i] - right[i]) * wcurrent;
+ mCurrentWidth = wcurrent;
+ }
+ else
+ {
+ const float wstep{(wtarget - wcurrent) / static_cast<float>(samplesToDo)};
+ float fi{0.0f};
+ for(size_t i{0};i < samplesToDo;++i)
+ {
+ mD[i] = (left[i] - right[i]) * (wcurrent + wstep*fi);
+ fi += 1.0f;
+ }
+ mCurrentWidth = wtarget;
+ }
+ }
+
+ float *RESTRICT woutput{al::assume_aligned<16>(samples[0])};
+ float *RESTRICT xoutput{al::assume_aligned<16>(samples[1])};
+ float *RESTRICT youtput{al::assume_aligned<16>(samples[2])};
+
+ /* Apply filter1 to S and store in mTemp. */
+ mTemp[0] = mDelayS;
+ mFilter1S.process(Filter1Coeff, {mS.data(), samplesToDo}, updateState, mTemp.data()+1);
+ if(updateState) LIKELY mDelayS = mTemp[samplesToDo];
+
+ /* Precompute j*D and store in xoutput. */
+ mFilter2D.process(Filter2Coeff, {mD.data(), samplesToDo}, updateState, xoutput);
+
+ /* W = 0.6098637*S - 0.6896511*j*w*D */
+ for(size_t i{0};i < samplesToDo;++i)
+ woutput[i] = 0.6098637f*mTemp[i] - 0.6896511f*xoutput[i];
+ /* X = 0.8624776*S + 0.7626955*j*w*D */
+ for(size_t i{0};i < samplesToDo;++i)
+ xoutput[i] = 0.8624776f*mTemp[i] + 0.7626955f*xoutput[i];
+
+ /* Precompute j*S and store in youtput. */
+ mFilter2S.process(Filter2Coeff, {mS.data(), samplesToDo}, updateState, youtput);
+
+ /* Apply filter1 to D and store in mTemp. */
+ mTemp[0] = mDelayD;
+ mFilter1D.process(Filter1Coeff, {mD.data(), samplesToDo}, updateState, mTemp.data()+1);
+ if(updateState) LIKELY mDelayD = mTemp[samplesToDo];
+
+ /* Y = 1.6822415*w*D - 0.2156194*j*S */
+ for(size_t i{0};i < samplesToDo;++i)
+ youtput[i] = 1.6822415f*mTemp[i] - 0.2156194f*youtput[i];
+}
+
+
+template struct UhjEncoder<UhjLength256>;
+template struct UhjDecoder<UhjLength256>;
+template struct UhjStereoDecoder<UhjLength256>;
+
+template struct UhjEncoder<UhjLength512>;
+template struct UhjDecoder<UhjLength512>;
+template struct UhjStereoDecoder<UhjLength512>;
diff --git a/core/uhjfilter.h b/core/uhjfilter.h
new file mode 100644
index 00000000..df308094
--- /dev/null
+++ b/core/uhjfilter.h
@@ -0,0 +1,234 @@
+#ifndef CORE_UHJFILTER_H
+#define CORE_UHJFILTER_H
+
+#include <array>
+
+#include "almalloc.h"
+#include "alspan.h"
+#include "bufferline.h"
+
+
+static constexpr size_t UhjLength256{256};
+static constexpr size_t UhjLength512{512};
+
+enum class UhjQualityType : uint8_t {
+ IIR = 0,
+ FIR256,
+ FIR512,
+ Default = IIR
+};
+
+extern UhjQualityType UhjDecodeQuality;
+extern UhjQualityType UhjEncodeQuality;
+
+
+struct UhjAllPassFilter {
+ struct AllPassState {
+ /* Last two delayed components for direct form II. */
+ float z[2];
+ };
+ std::array<AllPassState,4> mState;
+
+ void process(const al::span<const float,4> coeffs, const al::span<const float> src,
+ const bool update, float *RESTRICT dst);
+};
+
+
+struct UhjEncoderBase {
+ virtual ~UhjEncoderBase() = default;
+
+ virtual size_t getDelay() noexcept = 0;
+
+ /**
+ * Encodes a 2-channel UHJ (stereo-compatible) signal from a B-Format input
+ * signal. The input must use FuMa channel ordering and UHJ scaling (FuMa
+ * with an additional +3dB boost).
+ */
+ virtual void encode(float *LeftOut, float *RightOut,
+ const al::span<const float*const,3> InSamples, const size_t SamplesToDo) = 0;
+};
+
+template<size_t N>
+struct UhjEncoder final : public UhjEncoderBase {
+ static constexpr size_t sFilterDelay{N/2};
+
+ /* Delays and processing storage for the input signal. */
+ alignas(16) std::array<float,BufferLineSize+sFilterDelay> mW{};
+ alignas(16) std::array<float,BufferLineSize+sFilterDelay> mX{};
+ alignas(16) std::array<float,BufferLineSize+sFilterDelay> mY{};
+
+ alignas(16) std::array<float,BufferLineSize> mS{};
+ alignas(16) std::array<float,BufferLineSize> mD{};
+
+ /* History and temp storage for the FIR filter. New samples should be
+ * written to index sFilterDelay*2 - 1.
+ */
+ static constexpr size_t sWXInOffset{sFilterDelay*2 - 1};
+ alignas(16) std::array<float,BufferLineSize + sFilterDelay*2> mWX{};
+
+ alignas(16) std::array<std::array<float,sFilterDelay>,2> mDirectDelay{};
+
+ size_t getDelay() noexcept override { return sFilterDelay; }
+
+ /**
+ * Encodes a 2-channel UHJ (stereo-compatible) signal from a B-Format input
+ * signal. The input must use FuMa channel ordering and UHJ scaling (FuMa
+ * with an additional +3dB boost).
+ */
+ void encode(float *LeftOut, float *RightOut, const al::span<const float*const,3> InSamples,
+ const size_t SamplesToDo) override;
+
+ DEF_NEWDEL(UhjEncoder)
+};
+
+struct UhjEncoderIIR final : public UhjEncoderBase {
+ static constexpr size_t sFilterDelay{1};
+
+ /* Processing storage for the input signal. */
+ alignas(16) std::array<float,BufferLineSize+1> mS{};
+ alignas(16) std::array<float,BufferLineSize+1> mD{};
+ alignas(16) std::array<float,BufferLineSize+sFilterDelay> mWX{};
+ alignas(16) std::array<float,BufferLineSize+sFilterDelay> mTemp{};
+ float mDelayWX{}, mDelayY{};
+
+ UhjAllPassFilter mFilter1WX;
+ UhjAllPassFilter mFilter2WX;
+ UhjAllPassFilter mFilter1Y;
+
+ std::array<UhjAllPassFilter,2> mFilter1Direct;
+ std::array<float,2> mDirectDelay{};
+
+ size_t getDelay() noexcept override { return sFilterDelay; }
+
+ /**
+ * Encodes a 2-channel UHJ (stereo-compatible) signal from a B-Format input
+ * signal. The input must use FuMa channel ordering and UHJ scaling (FuMa
+ * with an additional +3dB boost).
+ */
+ void encode(float *LeftOut, float *RightOut, const al::span<const float*const,3> InSamples,
+ const size_t SamplesToDo) override;
+
+ DEF_NEWDEL(UhjEncoderIIR)
+};
+
+
+struct DecoderBase {
+ static constexpr size_t sMaxPadding{256};
+
+ /* For 2-channel UHJ, shelf filters should use these LF responses. */
+ static constexpr float sWLFScale{0.661f};
+ static constexpr float sXYLFScale{1.293f};
+
+ virtual ~DecoderBase() = default;
+
+ virtual void decode(const al::span<float*> samples, const size_t samplesToDo,
+ const bool updateState) = 0;
+
+ /**
+ * The width factor for Super Stereo processing. Can be changed in between
+ * calls to decode, with valid values being between 0...0.7.
+ */
+ float mWidthControl{0.593f};
+};
+
+template<size_t N>
+struct UhjDecoder final : public DecoderBase {
+ /* The number of extra sample frames needed for input. */
+ static constexpr size_t sInputPadding{N/2};
+
+ alignas(16) std::array<float,BufferLineSize+sInputPadding> mS{};
+ alignas(16) std::array<float,BufferLineSize+sInputPadding> mD{};
+ alignas(16) std::array<float,BufferLineSize+sInputPadding> mT{};
+
+ alignas(16) std::array<float,sInputPadding-1> mDTHistory{};
+ alignas(16) std::array<float,sInputPadding-1> mSHistory{};
+
+ alignas(16) std::array<float,BufferLineSize + sInputPadding*2> mTemp{};
+
+ /**
+ * Decodes a 3- or 4-channel UHJ signal into a B-Format signal with FuMa
+ * channel ordering and UHJ scaling. For 3-channel, the 3rd channel may be
+ * attenuated by 'n', where 0 <= n <= 1. So to decode 2-channel UHJ, supply
+ * 3 channels with the 3rd channel silent (n=0). The B-Format signal
+ * reconstructed from 2-channel UHJ should not be run through a normal
+ * B-Format decoder, as it needs different shelf filters.
+ */
+ void decode(const al::span<float*> samples, const size_t samplesToDo,
+ const bool updateState) override;
+
+ DEF_NEWDEL(UhjDecoder)
+};
+
+struct UhjDecoderIIR final : public DecoderBase {
+ /* FIXME: These IIR decoder filters actually have a 1-sample delay on the
+ * non-filtered components, which is not reflected in the source latency
+ * value. sInputPadding is 0, however, because it doesn't need any extra
+ * input samples.
+ */
+ static constexpr size_t sInputPadding{0};
+
+ alignas(16) std::array<float,BufferLineSize> mS{};
+ alignas(16) std::array<float,BufferLineSize> mD{};
+ alignas(16) std::array<float,BufferLineSize+1> mTemp{};
+ float mDelayS{}, mDelayDT{}, mDelayQ{};
+
+ UhjAllPassFilter mFilter1S;
+ UhjAllPassFilter mFilter2DT;
+ UhjAllPassFilter mFilter1DT;
+ UhjAllPassFilter mFilter2S;
+ UhjAllPassFilter mFilter1Q;
+
+ void decode(const al::span<float*> samples, const size_t samplesToDo,
+ const bool updateState) override;
+
+ DEF_NEWDEL(UhjDecoderIIR)
+};
+
+template<size_t N>
+struct UhjStereoDecoder final : public DecoderBase {
+ static constexpr size_t sInputPadding{N/2};
+
+ float mCurrentWidth{-1.0f};
+
+ alignas(16) std::array<float,BufferLineSize+sInputPadding> mS{};
+ alignas(16) std::array<float,BufferLineSize+sInputPadding> mD{};
+
+ alignas(16) std::array<float,sInputPadding-1> mDTHistory{};
+ alignas(16) std::array<float,sInputPadding-1> mSHistory{};
+
+ alignas(16) std::array<float,BufferLineSize + sInputPadding*2> mTemp{};
+
+ /**
+ * Applies Super Stereo processing on a stereo signal to create a B-Format
+ * signal with FuMa channel ordering and UHJ scaling. The samples span
+ * should contain 3 channels, the first two being the left and right stereo
+ * channels, and the third left empty.
+ */
+ void decode(const al::span<float*> samples, const size_t samplesToDo,
+ const bool updateState) override;
+
+ DEF_NEWDEL(UhjStereoDecoder)
+};
+
+struct UhjStereoDecoderIIR final : public DecoderBase {
+ static constexpr size_t sInputPadding{0};
+
+ float mCurrentWidth{-1.0f};
+
+ alignas(16) std::array<float,BufferLineSize> mS{};
+ alignas(16) std::array<float,BufferLineSize> mD{};
+ alignas(16) std::array<float,BufferLineSize+1> mTemp{};
+ float mDelayS{}, mDelayD{};
+
+ UhjAllPassFilter mFilter1S;
+ UhjAllPassFilter mFilter2D;
+ UhjAllPassFilter mFilter1D;
+ UhjAllPassFilter mFilter2S;
+
+ void decode(const al::span<float*> samples, const size_t samplesToDo,
+ const bool updateState) override;
+
+ DEF_NEWDEL(UhjStereoDecoderIIR)
+};
+
+#endif /* CORE_UHJFILTER_H */
diff --git a/core/uiddefs.cpp b/core/uiddefs.cpp
new file mode 100644
index 00000000..244c01a5
--- /dev/null
+++ b/core/uiddefs.cpp
@@ -0,0 +1,37 @@
+
+#include "config.h"
+
+
+#ifndef AL_NO_UID_DEFS
+
+#if defined(HAVE_GUIDDEF_H) || defined(HAVE_INITGUID_H)
+#define INITGUID
+#include <windows.h>
+#ifdef HAVE_GUIDDEF_H
+#include <guiddef.h>
+#else
+#include <initguid.h>
+#endif
+
+DEFINE_GUID(KSDATAFORMAT_SUBTYPE_PCM, 0x00000001, 0x0000, 0x0010, 0x80,0x00, 0x00,0xaa,0x00,0x38,0x9b,0x71);
+DEFINE_GUID(KSDATAFORMAT_SUBTYPE_IEEE_FLOAT, 0x00000003, 0x0000, 0x0010, 0x80,0x00, 0x00,0xaa,0x00,0x38,0x9b,0x71);
+
+DEFINE_GUID(IID_IDirectSoundNotify, 0xb0210783, 0x89cd, 0x11d0, 0xaf,0x08, 0x00,0xa0,0xc9,0x25,0xcd,0x16);
+
+DEFINE_GUID(CLSID_MMDeviceEnumerator, 0xbcde0395, 0xe52f, 0x467c, 0x8e,0x3d, 0xc4,0x57,0x92,0x91,0x69,0x2e);
+DEFINE_GUID(IID_IMMDeviceEnumerator, 0xa95664d2, 0x9614, 0x4f35, 0xa7,0x46, 0xde,0x8d,0xb6,0x36,0x17,0xe6);
+DEFINE_GUID(IID_IAudioClient, 0x1cb9ad4c, 0xdbfa, 0x4c32, 0xb1,0x78, 0xc2,0xf5,0x68,0xa7,0x03,0xb2);
+DEFINE_GUID(IID_IAudioRenderClient, 0xf294acfc, 0x3146, 0x4483, 0xa7,0xbf, 0xad,0xdc,0xa7,0xc2,0x60,0xe2);
+DEFINE_GUID(IID_IAudioCaptureClient, 0xc8adbd64, 0xe71e, 0x48a0, 0xa4,0xde, 0x18,0x5c,0x39,0x5c,0xd3,0x17);
+
+#ifdef HAVE_WASAPI
+#include <wtypes.h>
+#include <devpropdef.h>
+#include <propkeydef.h>
+DEFINE_DEVPROPKEY(DEVPKEY_Device_FriendlyName, 0xa45c254e, 0xdf1c, 0x4efd, 0x80,0x20, 0x67,0xd1,0x46,0xa8,0x50,0xe0, 14);
+DEFINE_PROPERTYKEY(PKEY_AudioEndpoint_FormFactor, 0x1da5d803, 0xd492, 0x4edd, 0x8c,0x23, 0xe0,0xc0,0xff,0xee,0x7f,0x0e, 0);
+DEFINE_PROPERTYKEY(PKEY_AudioEndpoint_GUID, 0x1da5d803, 0xd492, 0x4edd, 0x8c, 0x23,0xe0, 0xc0,0xff,0xee,0x7f,0x0e, 4 );
+#endif
+#endif
+
+#endif /* AL_NO_UID_DEFS */
diff --git a/core/voice.cpp b/core/voice.cpp
new file mode 100644
index 00000000..e8fbcccd
--- /dev/null
+++ b/core/voice.cpp
@@ -0,0 +1,1304 @@
+
+#include "config.h"
+
+#include "voice.h"
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <cassert>
+#include <climits>
+#include <cstdint>
+#include <iterator>
+#include <memory>
+#include <new>
+#include <stdlib.h>
+#include <utility>
+#include <vector>
+
+#include "albyte.h"
+#include "alnumeric.h"
+#include "aloptional.h"
+#include "alspan.h"
+#include "alstring.h"
+#include "ambidefs.h"
+#include "async_event.h"
+#include "buffer_storage.h"
+#include "context.h"
+#include "cpu_caps.h"
+#include "devformat.h"
+#include "device.h"
+#include "filters/biquad.h"
+#include "filters/nfc.h"
+#include "filters/splitter.h"
+#include "fmt_traits.h"
+#include "logging.h"
+#include "mixer.h"
+#include "mixer/defs.h"
+#include "mixer/hrtfdefs.h"
+#include "opthelpers.h"
+#include "resampler_limits.h"
+#include "ringbuffer.h"
+#include "vector.h"
+#include "voice_change.h"
+
+struct CTag;
+#ifdef HAVE_SSE
+struct SSETag;
+#endif
+#ifdef HAVE_NEON
+struct NEONTag;
+#endif
+
+
+static_assert(!(sizeof(DeviceBase::MixerBufferLine)&15),
+ "DeviceBase::MixerBufferLine must be a multiple of 16 bytes");
+static_assert(!(MaxResamplerEdge&3), "MaxResamplerEdge is not a multiple of 4");
+
+static_assert((BufferLineSize-1)/MaxPitch > 0, "MaxPitch is too large for BufferLineSize!");
+static_assert((INT_MAX>>MixerFracBits)/MaxPitch > BufferLineSize,
+ "MaxPitch and/or BufferLineSize are too large for MixerFracBits!");
+
+Resampler ResamplerDefault{Resampler::Cubic};
+
+namespace {
+
+using uint = unsigned int;
+using namespace std::chrono;
+
+using HrtfMixerFunc = void(*)(const float *InSamples, float2 *AccumSamples, const uint IrSize,
+ const MixHrtfFilter *hrtfparams, const size_t BufferSize);
+using HrtfMixerBlendFunc = void(*)(const float *InSamples, float2 *AccumSamples,
+ const uint IrSize, const HrtfFilter *oldparams, const MixHrtfFilter *newparams,
+ const size_t BufferSize);
+
+HrtfMixerFunc MixHrtfSamples{MixHrtf_<CTag>};
+HrtfMixerBlendFunc MixHrtfBlendSamples{MixHrtfBlend_<CTag>};
+
+inline MixerOutFunc SelectMixer()
+{
+#ifdef HAVE_NEON
+ if((CPUCapFlags&CPU_CAP_NEON))
+ return Mix_<NEONTag>;
+#endif
+#ifdef HAVE_SSE
+ if((CPUCapFlags&CPU_CAP_SSE))
+ return Mix_<SSETag>;
+#endif
+ return Mix_<CTag>;
+}
+
+inline MixerOneFunc SelectMixerOne()
+{
+#ifdef HAVE_NEON
+ if((CPUCapFlags&CPU_CAP_NEON))
+ return Mix_<NEONTag>;
+#endif
+#ifdef HAVE_SSE
+ if((CPUCapFlags&CPU_CAP_SSE))
+ return Mix_<SSETag>;
+#endif
+ return Mix_<CTag>;
+}
+
+inline HrtfMixerFunc SelectHrtfMixer()
+{
+#ifdef HAVE_NEON
+ if((CPUCapFlags&CPU_CAP_NEON))
+ return MixHrtf_<NEONTag>;
+#endif
+#ifdef HAVE_SSE
+ if((CPUCapFlags&CPU_CAP_SSE))
+ return MixHrtf_<SSETag>;
+#endif
+ return MixHrtf_<CTag>;
+}
+
+inline HrtfMixerBlendFunc SelectHrtfBlendMixer()
+{
+#ifdef HAVE_NEON
+ if((CPUCapFlags&CPU_CAP_NEON))
+ return MixHrtfBlend_<NEONTag>;
+#endif
+#ifdef HAVE_SSE
+ if((CPUCapFlags&CPU_CAP_SSE))
+ return MixHrtfBlend_<SSETag>;
+#endif
+ return MixHrtfBlend_<CTag>;
+}
+
+} // namespace
+
+void Voice::InitMixer(al::optional<std::string> resampler)
+{
+ if(resampler)
+ {
+ struct ResamplerEntry {
+ const char name[16];
+ const Resampler resampler;
+ };
+ constexpr ResamplerEntry ResamplerList[]{
+ { "none", Resampler::Point },
+ { "point", Resampler::Point },
+ { "linear", Resampler::Linear },
+ { "cubic", Resampler::Cubic },
+ { "bsinc12", Resampler::BSinc12 },
+ { "fast_bsinc12", Resampler::FastBSinc12 },
+ { "bsinc24", Resampler::BSinc24 },
+ { "fast_bsinc24", Resampler::FastBSinc24 },
+ };
+
+ const char *str{resampler->c_str()};
+ if(al::strcasecmp(str, "bsinc") == 0)
+ {
+ WARN("Resampler option \"%s\" is deprecated, using bsinc12\n", str);
+ str = "bsinc12";
+ }
+ else if(al::strcasecmp(str, "sinc4") == 0 || al::strcasecmp(str, "sinc8") == 0)
+ {
+ WARN("Resampler option \"%s\" is deprecated, using cubic\n", str);
+ str = "cubic";
+ }
+
+ auto iter = std::find_if(std::begin(ResamplerList), std::end(ResamplerList),
+ [str](const ResamplerEntry &entry) -> bool
+ { return al::strcasecmp(str, entry.name) == 0; });
+ if(iter == std::end(ResamplerList))
+ ERR("Invalid resampler: %s\n", str);
+ else
+ ResamplerDefault = iter->resampler;
+ }
+
+ MixSamplesOut = SelectMixer();
+ MixSamplesOne = SelectMixerOne();
+ MixHrtfBlendSamples = SelectHrtfBlendMixer();
+ MixHrtfSamples = SelectHrtfMixer();
+}
+
+
+namespace {
+
+/* IMA ADPCM Stepsize table */
+constexpr int IMAStep_size[89] = {
+ 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 19,
+ 21, 23, 25, 28, 31, 34, 37, 41, 45, 50, 55,
+ 60, 66, 73, 80, 88, 97, 107, 118, 130, 143, 157,
+ 173, 190, 209, 230, 253, 279, 307, 337, 371, 408, 449,
+ 494, 544, 598, 658, 724, 796, 876, 963, 1060, 1166, 1282,
+ 1411, 1552, 1707, 1878, 2066, 2272, 2499, 2749, 3024, 3327, 3660,
+ 4026, 4428, 4871, 5358, 5894, 6484, 7132, 7845, 8630, 9493,10442,
+ 11487,12635,13899,15289,16818,18500,20350,22358,24633,27086,29794,
+ 32767
+};
+
+/* IMA4 ADPCM Codeword decode table */
+constexpr int IMA4Codeword[16] = {
+ 1, 3, 5, 7, 9, 11, 13, 15,
+ -1,-3,-5,-7,-9,-11,-13,-15,
+};
+
+/* IMA4 ADPCM Step index adjust decode table */
+constexpr int IMA4Index_adjust[16] = {
+ -1,-1,-1,-1, 2, 4, 6, 8,
+ -1,-1,-1,-1, 2, 4, 6, 8
+};
+
+/* MSADPCM Adaption table */
+constexpr int MSADPCMAdaption[16] = {
+ 230, 230, 230, 230, 307, 409, 512, 614,
+ 768, 614, 512, 409, 307, 230, 230, 230
+};
+
+/* MSADPCM Adaption Coefficient tables */
+constexpr int MSADPCMAdaptionCoeff[7][2] = {
+ { 256, 0 },
+ { 512, -256 },
+ { 0, 0 },
+ { 192, 64 },
+ { 240, 0 },
+ { 460, -208 },
+ { 392, -232 }
+};
+
+
+void SendSourceStoppedEvent(ContextBase *context, uint id)
+{
+ RingBuffer *ring{context->mAsyncEvents.get()};
+ auto evt_vec = ring->getWriteVector();
+ if(evt_vec.first.len < 1) return;
+
+ AsyncEvent *evt{al::construct_at(reinterpret_cast<AsyncEvent*>(evt_vec.first.buf),
+ AsyncEvent::SourceStateChange)};
+ evt->u.srcstate.id = id;
+ evt->u.srcstate.state = AsyncEvent::SrcState::Stop;
+
+ ring->writeAdvance(1);
+}
+
+
+const float *DoFilters(BiquadFilter &lpfilter, BiquadFilter &hpfilter, float *dst,
+ const al::span<const float> src, int type)
+{
+ switch(type)
+ {
+ case AF_None:
+ lpfilter.clear();
+ hpfilter.clear();
+ break;
+
+ case AF_LowPass:
+ lpfilter.process(src, dst);
+ hpfilter.clear();
+ return dst;
+ case AF_HighPass:
+ lpfilter.clear();
+ hpfilter.process(src, dst);
+ return dst;
+
+ case AF_BandPass:
+ DualBiquad{lpfilter, hpfilter}.process(src, dst);
+ return dst;
+ }
+ return src.data();
+}
+
+
+template<FmtType Type>
+inline void LoadSamples(float *RESTRICT dstSamples, const al::byte *src, const size_t srcChan,
+ const size_t srcOffset, const size_t srcStep, const size_t /*samplesPerBlock*/,
+ const size_t samplesToLoad) noexcept
+{
+ constexpr size_t sampleSize{sizeof(typename al::FmtTypeTraits<Type>::Type)};
+ auto s = src + (srcOffset*srcStep + srcChan)*sampleSize;
+
+ al::LoadSampleArray<Type>(dstSamples, s, srcStep, samplesToLoad);
+}
+
+template<>
+inline void LoadSamples<FmtIMA4>(float *RESTRICT dstSamples, const al::byte *src,
+ const size_t srcChan, const size_t srcOffset, const size_t srcStep,
+ const size_t samplesPerBlock, const size_t samplesToLoad) noexcept
+{
+ const size_t blockBytes{((samplesPerBlock-1)/2 + 4)*srcStep};
+
+ /* Skip to the ADPCM block containing the srcOffset sample. */
+ src += srcOffset/samplesPerBlock*blockBytes;
+ /* Calculate how many samples need to be skipped in the block. */
+ size_t skip{srcOffset % samplesPerBlock};
+
+ /* NOTE: This could probably be optimized better. */
+ size_t wrote{0};
+ do {
+ /* Each IMA4 block starts with a signed 16-bit sample, and a signed
+ * 16-bit table index. The table index needs to be clamped.
+ */
+ int sample{src[srcChan*4] | (src[srcChan*4 + 1] << 8)};
+ int index{src[srcChan*4 + 2] | (src[srcChan*4 + 3] << 8)};
+
+ sample = (sample^0x8000) - 32768;
+ index = clampi((index^0x8000) - 32768, 0, al::size(IMAStep_size)-1);
+
+ if(skip == 0)
+ {
+ dstSamples[wrote++] = static_cast<float>(sample) / 32768.0f;
+ if(wrote == samplesToLoad) return;
+ }
+ else
+ --skip;
+
+ auto decode_sample = [&sample,&index](const uint nibble)
+ {
+ sample += IMA4Codeword[nibble] * IMAStep_size[index] / 8;
+ sample = clampi(sample, -32768, 32767);
+
+ index += IMA4Index_adjust[nibble];
+ index = clampi(index, 0, al::size(IMAStep_size)-1);
+
+ return sample;
+ };
+
+ /* The rest of the block is arranged as a series of nibbles, contained
+ * in 4 *bytes* per channel interleaved. So every 8 nibbles we need to
+ * skip 4 bytes per channel to get the next nibbles for this channel.
+ *
+ * First, decode the samples that we need to skip in the block (will
+ * always be less than the block size). They need to be decoded despite
+ * being ignored for proper state on the remaining samples.
+ */
+ const al::byte *nibbleData{src + (srcStep+srcChan)*4};
+ size_t nibbleOffset{0};
+ const size_t startOffset{skip + 1};
+ for(;skip;--skip)
+ {
+ const size_t byteShift{(nibbleOffset&1) * 4};
+ const size_t wordOffset{(nibbleOffset>>1) & ~size_t{3}};
+ const size_t byteOffset{wordOffset*srcStep + ((nibbleOffset>>1)&3u)};
+ ++nibbleOffset;
+
+ std::ignore = decode_sample((nibbleData[byteOffset]>>byteShift) & 15u);
+ }
+
+ /* Second, decode the rest of the block and write to the output, until
+ * the end of the block or the end of output.
+ */
+ const size_t todo{minz(samplesPerBlock-startOffset, samplesToLoad-wrote)};
+ for(size_t i{0};i < todo;++i)
+ {
+ const size_t byteShift{(nibbleOffset&1) * 4};
+ const size_t wordOffset{(nibbleOffset>>1) & ~size_t{3}};
+ const size_t byteOffset{wordOffset*srcStep + ((nibbleOffset>>1)&3u)};
+ ++nibbleOffset;
+
+ const int result{decode_sample((nibbleData[byteOffset]>>byteShift) & 15u)};
+ dstSamples[wrote++] = static_cast<float>(result) / 32768.0f;
+ }
+ if(wrote == samplesToLoad)
+ return;
+
+ src += blockBytes;
+ } while(true);
+}
+
+template<>
+inline void LoadSamples<FmtMSADPCM>(float *RESTRICT dstSamples, const al::byte *src,
+ const size_t srcChan, const size_t srcOffset, const size_t srcStep,
+ const size_t samplesPerBlock, const size_t samplesToLoad) noexcept
+{
+ const size_t blockBytes{((samplesPerBlock-2)/2 + 7)*srcStep};
+
+ src += srcOffset/samplesPerBlock*blockBytes;
+ size_t skip{srcOffset % samplesPerBlock};
+
+ size_t wrote{0};
+ do {
+ /* Each MS ADPCM block starts with an 8-bit block predictor, used to
+ * dictate how the two sample history values are mixed with the decoded
+ * sample, and an initial signed 16-bit delta value which scales the
+ * nibble sample value. This is followed by the two initial 16-bit
+ * sample history values.
+ */
+ const al::byte *input{src};
+ const uint8_t blockpred{std::min(input[srcChan], uint8_t{6})};
+ input += srcStep;
+ int delta{input[2*srcChan + 0] | (input[2*srcChan + 1] << 8)};
+ input += srcStep*2;
+
+ int sampleHistory[2]{};
+ sampleHistory[0] = input[2*srcChan + 0] | (input[2*srcChan + 1]<<8);
+ input += srcStep*2;
+ sampleHistory[1] = input[2*srcChan + 0] | (input[2*srcChan + 1]<<8);
+ input += srcStep*2;
+
+ const auto coeffs = al::as_span(MSADPCMAdaptionCoeff[blockpred]);
+ delta = (delta^0x8000) - 32768;
+ sampleHistory[0] = (sampleHistory[0]^0x8000) - 32768;
+ sampleHistory[1] = (sampleHistory[1]^0x8000) - 32768;
+
+ /* The second history sample is "older", so it's the first to be
+ * written out.
+ */
+ if(skip == 0)
+ {
+ dstSamples[wrote++] = static_cast<float>(sampleHistory[1]) / 32768.0f;
+ if(wrote == samplesToLoad) return;
+ dstSamples[wrote++] = static_cast<float>(sampleHistory[0]) / 32768.0f;
+ if(wrote == samplesToLoad) return;
+ }
+ else if(skip == 1)
+ {
+ --skip;
+ dstSamples[wrote++] = static_cast<float>(sampleHistory[0]) / 32768.0f;
+ if(wrote == samplesToLoad) return;
+ }
+ else
+ skip -= 2;
+
+ auto decode_sample = [&sampleHistory,&delta,coeffs](const int nibble)
+ {
+ int pred{(sampleHistory[0]*coeffs[0] + sampleHistory[1]*coeffs[1]) / 256};
+ pred += ((nibble^0x08) - 0x08) * delta;
+ pred = clampi(pred, -32768, 32767);
+
+ sampleHistory[1] = sampleHistory[0];
+ sampleHistory[0] = pred;
+
+ delta = (MSADPCMAdaption[nibble] * delta) / 256;
+ delta = maxi(16, delta);
+
+ return pred;
+ };
+
+ /* The rest of the block is a series of nibbles, interleaved per-
+ * channel. First, skip samples.
+ */
+ const size_t startOffset{skip + 2};
+ size_t nibbleOffset{srcChan};
+ for(;skip;--skip)
+ {
+ const size_t byteOffset{nibbleOffset>>1};
+ const size_t byteShift{((nibbleOffset&1)^1) * 4};
+ nibbleOffset += srcStep;
+
+ std::ignore = decode_sample((input[byteOffset]>>byteShift) & 15);
+ }
+
+ /* Now decode the rest of the block, until the end of the block or the
+ * dst buffer is filled.
+ */
+ const size_t todo{minz(samplesPerBlock-startOffset, samplesToLoad-wrote)};
+ for(size_t j{0};j < todo;++j)
+ {
+ const size_t byteOffset{nibbleOffset>>1};
+ const size_t byteShift{((nibbleOffset&1)^1) * 4};
+ nibbleOffset += srcStep;
+
+ const int sample{decode_sample((input[byteOffset]>>byteShift) & 15)};
+ dstSamples[wrote++] = static_cast<float>(sample) / 32768.0f;
+ }
+ if(wrote == samplesToLoad)
+ return;
+
+ src += blockBytes;
+ } while(true);
+}
+
+void LoadSamples(float *dstSamples, const al::byte *src, const size_t srcChan,
+ const size_t srcOffset, const FmtType srcType, const size_t srcStep,
+ const size_t samplesPerBlock, const size_t samplesToLoad) noexcept
+{
+#define HANDLE_FMT(T) case T: \
+ LoadSamples<T>(dstSamples, src, srcChan, srcOffset, srcStep, \
+ samplesPerBlock, samplesToLoad); \
+ break
+
+ switch(srcType)
+ {
+ HANDLE_FMT(FmtUByte);
+ HANDLE_FMT(FmtShort);
+ HANDLE_FMT(FmtFloat);
+ HANDLE_FMT(FmtDouble);
+ HANDLE_FMT(FmtMulaw);
+ HANDLE_FMT(FmtAlaw);
+ HANDLE_FMT(FmtIMA4);
+ HANDLE_FMT(FmtMSADPCM);
+ }
+#undef HANDLE_FMT
+}
+
+void LoadBufferStatic(VoiceBufferItem *buffer, VoiceBufferItem *bufferLoopItem,
+ const size_t dataPosInt, const FmtType sampleType, const size_t srcChannel,
+ const size_t srcStep, size_t samplesLoaded, const size_t samplesToLoad,
+ float *voiceSamples)
+{
+ if(!bufferLoopItem)
+ {
+ /* Load what's left to play from the buffer */
+ if(buffer->mSampleLen > dataPosInt) LIKELY
+ {
+ const size_t buffer_remaining{buffer->mSampleLen - dataPosInt};
+ const size_t remaining{minz(samplesToLoad-samplesLoaded, buffer_remaining)};
+ LoadSamples(voiceSamples+samplesLoaded, buffer->mSamples, srcChannel, dataPosInt,
+ sampleType, srcStep, buffer->mBlockAlign, remaining);
+ samplesLoaded += remaining;
+ }
+
+ if(const size_t toFill{samplesToLoad - samplesLoaded})
+ {
+ auto srcsamples = voiceSamples + samplesLoaded;
+ std::fill_n(srcsamples, toFill, *(srcsamples-1));
+ }
+ }
+ else
+ {
+ const size_t loopStart{buffer->mLoopStart};
+ const size_t loopEnd{buffer->mLoopEnd};
+ ASSUME(loopEnd > loopStart);
+
+ const size_t intPos{(dataPosInt < loopEnd) ? dataPosInt
+ : (((dataPosInt-loopStart)%(loopEnd-loopStart)) + loopStart)};
+
+ /* Load what's left of this loop iteration */
+ const size_t remaining{minz(samplesToLoad-samplesLoaded, loopEnd-dataPosInt)};
+ LoadSamples(voiceSamples+samplesLoaded, buffer->mSamples, srcChannel, intPos, sampleType,
+ srcStep, buffer->mBlockAlign, remaining);
+ samplesLoaded += remaining;
+
+ /* Load repeats of the loop to fill the buffer. */
+ const size_t loopSize{loopEnd - loopStart};
+ while(const size_t toFill{minz(samplesToLoad - samplesLoaded, loopSize)})
+ {
+ LoadSamples(voiceSamples+samplesLoaded, buffer->mSamples, srcChannel, loopStart,
+ sampleType, srcStep, buffer->mBlockAlign, toFill);
+ samplesLoaded += toFill;
+ }
+ }
+}
+
+void LoadBufferCallback(VoiceBufferItem *buffer, const size_t dataPosInt,
+ const size_t numCallbackSamples, const FmtType sampleType, const size_t srcChannel,
+ const size_t srcStep, size_t samplesLoaded, const size_t samplesToLoad, float *voiceSamples)
+{
+ /* Load what's left to play from the buffer */
+ if(numCallbackSamples > dataPosInt) LIKELY
+ {
+ const size_t remaining{minz(samplesToLoad-samplesLoaded, numCallbackSamples-dataPosInt)};
+ LoadSamples(voiceSamples+samplesLoaded, buffer->mSamples, srcChannel, dataPosInt,
+ sampleType, srcStep, buffer->mBlockAlign, remaining);
+ samplesLoaded += remaining;
+ }
+
+ if(const size_t toFill{samplesToLoad - samplesLoaded})
+ {
+ auto srcsamples = voiceSamples + samplesLoaded;
+ std::fill_n(srcsamples, toFill, *(srcsamples-1));
+ }
+}
+
+void LoadBufferQueue(VoiceBufferItem *buffer, VoiceBufferItem *bufferLoopItem,
+ size_t dataPosInt, const FmtType sampleType, const size_t srcChannel,
+ const size_t srcStep, size_t samplesLoaded, const size_t samplesToLoad,
+ float *voiceSamples)
+{
+ /* Crawl the buffer queue to fill in the temp buffer */
+ while(buffer && samplesLoaded != samplesToLoad)
+ {
+ if(dataPosInt >= buffer->mSampleLen)
+ {
+ dataPosInt -= buffer->mSampleLen;
+ buffer = buffer->mNext.load(std::memory_order_acquire);
+ if(!buffer) buffer = bufferLoopItem;
+ continue;
+ }
+
+ const size_t remaining{minz(samplesToLoad-samplesLoaded, buffer->mSampleLen-dataPosInt)};
+ LoadSamples(voiceSamples+samplesLoaded, buffer->mSamples, srcChannel, dataPosInt,
+ sampleType, srcStep, buffer->mBlockAlign, remaining);
+
+ samplesLoaded += remaining;
+ if(samplesLoaded == samplesToLoad)
+ break;
+
+ dataPosInt = 0;
+ buffer = buffer->mNext.load(std::memory_order_acquire);
+ if(!buffer) buffer = bufferLoopItem;
+ }
+ if(const size_t toFill{samplesToLoad - samplesLoaded})
+ {
+ auto srcsamples = voiceSamples + samplesLoaded;
+ std::fill_n(srcsamples, toFill, *(srcsamples-1));
+ }
+}
+
+
+void DoHrtfMix(const float *samples, const uint DstBufferSize, DirectParams &parms,
+ const float TargetGain, const uint Counter, uint OutPos, const bool IsPlaying,
+ DeviceBase *Device)
+{
+ const uint IrSize{Device->mIrSize};
+ auto &HrtfSamples = Device->HrtfSourceData;
+ auto &AccumSamples = Device->HrtfAccumData;
+
+ /* Copy the HRTF history and new input samples into a temp buffer. */
+ auto src_iter = std::copy(parms.Hrtf.History.begin(), parms.Hrtf.History.end(),
+ std::begin(HrtfSamples));
+ std::copy_n(samples, DstBufferSize, src_iter);
+ /* Copy the last used samples back into the history buffer for later. */
+ if(IsPlaying) LIKELY
+ std::copy_n(std::begin(HrtfSamples) + DstBufferSize, parms.Hrtf.History.size(),
+ parms.Hrtf.History.begin());
+
+ /* If fading and this is the first mixing pass, fade between the IRs. */
+ uint fademix{0u};
+ if(Counter && OutPos == 0)
+ {
+ fademix = minu(DstBufferSize, Counter);
+
+ float gain{TargetGain};
+
+ /* The new coefficients need to fade in completely since they're
+ * replacing the old ones. To keep the gain fading consistent,
+ * interpolate between the old and new target gains given how much of
+ * the fade time this mix handles.
+ */
+ if(Counter > fademix)
+ {
+ const float a{static_cast<float>(fademix) / static_cast<float>(Counter)};
+ gain = lerpf(parms.Hrtf.Old.Gain, TargetGain, a);
+ }
+
+ MixHrtfFilter hrtfparams{
+ parms.Hrtf.Target.Coeffs,
+ parms.Hrtf.Target.Delay,
+ 0.0f, gain / static_cast<float>(fademix)};
+ MixHrtfBlendSamples(HrtfSamples, AccumSamples+OutPos, IrSize, &parms.Hrtf.Old, &hrtfparams,
+ fademix);
+
+ /* Update the old parameters with the result. */
+ parms.Hrtf.Old = parms.Hrtf.Target;
+ parms.Hrtf.Old.Gain = gain;
+ OutPos += fademix;
+ }
+
+ if(fademix < DstBufferSize)
+ {
+ const uint todo{DstBufferSize - fademix};
+ float gain{TargetGain};
+
+ /* Interpolate the target gain if the gain fading lasts longer than
+ * this mix.
+ */
+ if(Counter > DstBufferSize)
+ {
+ const float a{static_cast<float>(todo) / static_cast<float>(Counter-fademix)};
+ gain = lerpf(parms.Hrtf.Old.Gain, TargetGain, a);
+ }
+
+ MixHrtfFilter hrtfparams{
+ parms.Hrtf.Target.Coeffs,
+ parms.Hrtf.Target.Delay,
+ parms.Hrtf.Old.Gain,
+ (gain - parms.Hrtf.Old.Gain) / static_cast<float>(todo)};
+ MixHrtfSamples(HrtfSamples+fademix, AccumSamples+OutPos, IrSize, &hrtfparams, todo);
+
+ /* Store the now-current gain for next time. */
+ parms.Hrtf.Old.Gain = gain;
+ }
+}
+
+void DoNfcMix(const al::span<const float> samples, FloatBufferLine *OutBuffer, DirectParams &parms,
+ const float *TargetGains, const uint Counter, const uint OutPos, DeviceBase *Device)
+{
+ using FilterProc = void (NfcFilter::*)(const al::span<const float>, float*);
+ static constexpr FilterProc NfcProcess[MaxAmbiOrder+1]{
+ nullptr, &NfcFilter::process1, &NfcFilter::process2, &NfcFilter::process3};
+
+ float *CurrentGains{parms.Gains.Current.data()};
+ MixSamples(samples, {OutBuffer, 1u}, CurrentGains, TargetGains, Counter, OutPos);
+ ++OutBuffer;
+ ++CurrentGains;
+ ++TargetGains;
+
+ const al::span<float> nfcsamples{Device->NfcSampleData, samples.size()};
+ size_t order{1};
+ while(const size_t chancount{Device->NumChannelsPerOrder[order]})
+ {
+ (parms.NFCtrlFilter.*NfcProcess[order])(samples, nfcsamples.data());
+ MixSamples(nfcsamples, {OutBuffer, chancount}, CurrentGains, TargetGains, Counter, OutPos);
+ OutBuffer += chancount;
+ CurrentGains += chancount;
+ TargetGains += chancount;
+ if(++order == MaxAmbiOrder+1)
+ break;
+ }
+}
+
+} // namespace
+
+void Voice::mix(const State vstate, ContextBase *Context, const nanoseconds deviceTime,
+ const uint SamplesToDo)
+{
+ static constexpr std::array<float,MAX_OUTPUT_CHANNELS> SilentTarget{};
+
+ ASSUME(SamplesToDo > 0);
+
+ DeviceBase *Device{Context->mDevice};
+ const uint NumSends{Device->NumAuxSends};
+
+ /* Get voice info */
+ int DataPosInt{mPosition.load(std::memory_order_relaxed)};
+ uint DataPosFrac{mPositionFrac.load(std::memory_order_relaxed)};
+ VoiceBufferItem *BufferListItem{mCurrentBuffer.load(std::memory_order_relaxed)};
+ VoiceBufferItem *BufferLoopItem{mLoopBuffer.load(std::memory_order_relaxed)};
+ const uint increment{mStep};
+ if(increment < 1) UNLIKELY
+ {
+ /* If the voice is supposed to be stopping but can't be mixed, just
+ * stop it before bailing.
+ */
+ if(vstate == Stopping)
+ mPlayState.store(Stopped, std::memory_order_release);
+ return;
+ }
+
+ /* If the static voice's current position is beyond the buffer loop end
+ * position, disable looping.
+ */
+ if(mFlags.test(VoiceIsStatic) && BufferLoopItem)
+ {
+ if(DataPosInt >= 0 && static_cast<uint>(DataPosInt) >= BufferListItem->mLoopEnd)
+ BufferLoopItem = nullptr;
+ }
+
+ uint OutPos{0u};
+
+ /* Check if we're doing a delayed start, and we start in this update. */
+ if(mStartTime > deviceTime) UNLIKELY
+ {
+ /* If the voice is supposed to be stopping but hasn't actually started
+ * yet, make sure its stopped.
+ */
+ if(vstate == Stopping)
+ {
+ mPlayState.store(Stopped, std::memory_order_release);
+ return;
+ }
+
+ /* If the start time is too far ahead, don't bother. */
+ auto diff = mStartTime - deviceTime;
+ if(diff >= seconds{1})
+ return;
+
+ /* Get the number of samples ahead of the current time that output
+ * should start at. Skip this update if it's beyond the output sample
+ * count.
+ *
+ * Round the start position to a multiple of 4, which some mixers want.
+ * This makes the start time accurate to 4 samples. This could be made
+ * sample-accurate by forcing non-SIMD functions on the first run.
+ */
+ seconds::rep sampleOffset{duration_cast<seconds>(diff * Device->Frequency).count()};
+ sampleOffset = (sampleOffset+2) & ~seconds::rep{3};
+ if(sampleOffset >= SamplesToDo)
+ return;
+
+ OutPos = static_cast<uint>(sampleOffset);
+ }
+
+ /* Calculate the number of samples to mix, and the number of (resampled)
+ * samples that need to be loaded (mixing samples and decoder padding).
+ */
+ const uint samplesToMix{SamplesToDo - OutPos};
+ const uint samplesToLoad{samplesToMix + mDecoderPadding};
+
+ /* Get a span of pointers to hold the floating point, deinterlaced,
+ * resampled buffer data to be mixed.
+ */
+ std::array<float*,DeviceBase::MixerChannelsMax> SamplePointers;
+ const al::span<float*> MixingSamples{SamplePointers.data(), mChans.size()};
+ auto get_bufferline = [](DeviceBase::MixerBufferLine &bufline) noexcept -> float*
+ { return bufline.data(); };
+ std::transform(Device->mSampleData.end() - mChans.size(), Device->mSampleData.end(),
+ MixingSamples.begin(), get_bufferline);
+
+ /* If there's a matching sample step and no phase offset, use a simple copy
+ * for resampling.
+ */
+ const ResamplerFunc Resample{(increment == MixerFracOne && DataPosFrac == 0)
+ ? ResamplerFunc{[](const InterpState*, const float *RESTRICT src, uint, const uint,
+ const al::span<float> dst) { std::copy_n(src, dst.size(), dst.begin()); }}
+ : mResampler};
+
+ /* UHJ2 and SuperStereo only have 2 buffer channels, but 3 mixing channels
+ * (3rd channel is generated from decoding).
+ */
+ const size_t realChannels{(mFmtChannels == FmtUHJ2 || mFmtChannels == FmtSuperStereo) ? 2u
+ : MixingSamples.size()};
+ for(size_t chan{0};chan < realChannels;++chan)
+ {
+ using ResBufType = decltype(DeviceBase::mResampleData);
+ static constexpr uint srcSizeMax{static_cast<uint>(ResBufType{}.size()-MaxResamplerEdge)};
+
+ const auto prevSamples = al::as_span(mPrevSamples[chan]);
+ const auto resampleBuffer = std::copy(prevSamples.cbegin(), prevSamples.cend(),
+ Device->mResampleData.begin()) - MaxResamplerEdge;
+ int intPos{DataPosInt};
+ uint fracPos{DataPosFrac};
+
+ /* Load samples for this channel from the available buffer(s), with
+ * resampling.
+ */
+ for(uint samplesLoaded{0};samplesLoaded < samplesToLoad;)
+ {
+ /* Calculate the number of dst samples that can be loaded this
+ * iteration, given the available resampler buffer size, and the
+ * number of src samples that are needed to load it.
+ */
+ auto calc_buffer_sizes = [fracPos,increment](uint dstBufferSize)
+ {
+ /* If ext=true, calculate the last written dst pos from the dst
+ * count, convert to the last read src pos, then add one to get
+ * the src count.
+ *
+ * If ext=false, convert the dst count to src count directly.
+ *
+ * Without this, the src count could be short by one when
+ * increment < 1.0, or not have a full src at the end when
+ * increment > 1.0.
+ */
+ const bool ext{increment <= MixerFracOne};
+ uint64_t dataSize64{dstBufferSize - ext};
+ dataSize64 = (dataSize64*increment + fracPos) >> MixerFracBits;
+ /* Also include resampler padding. */
+ dataSize64 += ext + MaxResamplerEdge;
+
+ if(dataSize64 <= srcSizeMax)
+ return std::make_pair(dstBufferSize, static_cast<uint>(dataSize64));
+
+ /* If the source size got saturated, we can't fill the desired
+ * dst size. Figure out how many dst samples we can fill.
+ */
+ dataSize64 = srcSizeMax - MaxResamplerEdge;
+ dataSize64 = ((dataSize64<<MixerFracBits) - fracPos) / increment;
+ if(dataSize64 < dstBufferSize)
+ {
+ /* Some resamplers require the destination being 16-byte
+ * aligned, so limit to a multiple of 4 samples to maintain
+ * alignment if we need to do another iteration after this.
+ */
+ dstBufferSize = static_cast<uint>(dataSize64) & ~3u;
+ }
+ return std::make_pair(dstBufferSize, srcSizeMax);
+ };
+ const auto bufferSizes = calc_buffer_sizes(samplesToLoad - samplesLoaded);
+ const auto dstBufferSize = bufferSizes.first;
+ const auto srcBufferSize = bufferSizes.second;
+
+ /* Load the necessary samples from the given buffer(s). */
+ if(!BufferListItem)
+ {
+ const uint avail{minu(srcBufferSize, MaxResamplerEdge)};
+ const uint tofill{maxu(srcBufferSize, MaxResamplerEdge)};
+
+ /* When loading from a voice that ended prematurely, only take
+ * the samples that get closest to 0 amplitude. This helps
+ * certain sounds fade out better.
+ */
+ auto abs_lt = [](const float lhs, const float rhs) noexcept -> bool
+ { return std::abs(lhs) < std::abs(rhs); };
+ auto srciter = std::min_element(resampleBuffer, resampleBuffer+avail, abs_lt);
+
+ std::fill(srciter+1, resampleBuffer+tofill, *srciter);
+ }
+ else
+ {
+ size_t srcSampleDelay{0};
+ if(intPos < 0) UNLIKELY
+ {
+ /* If the current position is negative, there's that many
+ * silent samples to load before using the buffer.
+ */
+ srcSampleDelay = static_cast<uint>(-intPos);
+ if(srcSampleDelay >= srcBufferSize)
+ {
+ /* If the number of silent source samples exceeds the
+ * number to load, the output will be silent.
+ */
+ std::fill_n(MixingSamples[chan]+samplesLoaded, dstBufferSize, 0.0f);
+ std::fill_n(resampleBuffer, srcBufferSize, 0.0f);
+ goto skip_resample;
+ }
+
+ std::fill_n(resampleBuffer, srcSampleDelay, 0.0f);
+ }
+ const uint uintPos{static_cast<uint>(maxi(intPos, 0))};
+
+ if(mFlags.test(VoiceIsStatic))
+ LoadBufferStatic(BufferListItem, BufferLoopItem, uintPos, mFmtType, chan,
+ mFrameStep, srcSampleDelay, srcBufferSize, al::to_address(resampleBuffer));
+ else if(mFlags.test(VoiceIsCallback))
+ {
+ const uint callbackBase{mCallbackBlockBase * mSamplesPerBlock};
+ const size_t bufferOffset{uintPos - callbackBase};
+ const size_t needSamples{bufferOffset + srcBufferSize - srcSampleDelay};
+ const size_t needBlocks{(needSamples + mSamplesPerBlock-1) / mSamplesPerBlock};
+ if(!mFlags.test(VoiceCallbackStopped) && needBlocks > mNumCallbackBlocks)
+ {
+ const size_t byteOffset{mNumCallbackBlocks*mBytesPerBlock};
+ const size_t needBytes{(needBlocks-mNumCallbackBlocks)*mBytesPerBlock};
+
+ const int gotBytes{BufferListItem->mCallback(BufferListItem->mUserData,
+ &BufferListItem->mSamples[byteOffset], static_cast<int>(needBytes))};
+ if(gotBytes < 0)
+ mFlags.set(VoiceCallbackStopped);
+ else if(static_cast<uint>(gotBytes) < needBytes)
+ {
+ mFlags.set(VoiceCallbackStopped);
+ mNumCallbackBlocks += static_cast<uint>(gotBytes) / mBytesPerBlock;
+ }
+ else
+ mNumCallbackBlocks = static_cast<uint>(needBlocks);
+ }
+ const size_t numSamples{uint{mNumCallbackBlocks} * mSamplesPerBlock};
+ LoadBufferCallback(BufferListItem, bufferOffset, numSamples, mFmtType, chan,
+ mFrameStep, srcSampleDelay, srcBufferSize, al::to_address(resampleBuffer));
+ }
+ else
+ LoadBufferQueue(BufferListItem, BufferLoopItem, uintPos, mFmtType, chan,
+ mFrameStep, srcSampleDelay, srcBufferSize, al::to_address(resampleBuffer));
+ }
+
+ Resample(&mResampleState, al::to_address(resampleBuffer), fracPos, increment,
+ {MixingSamples[chan]+samplesLoaded, dstBufferSize});
+
+ /* Store the last source samples used for next time. */
+ if(vstate == Playing) LIKELY
+ {
+ /* Only store samples for the end of the mix, excluding what
+ * gets loaded for decoder padding.
+ */
+ const uint loadEnd{samplesLoaded + dstBufferSize};
+ if(samplesToMix > samplesLoaded && samplesToMix <= loadEnd) LIKELY
+ {
+ const size_t dstOffset{samplesToMix - samplesLoaded};
+ const size_t srcOffset{(dstOffset*increment + fracPos) >> MixerFracBits};
+ std::copy_n(resampleBuffer-MaxResamplerEdge+srcOffset, prevSamples.size(),
+ prevSamples.begin());
+ }
+ }
+
+ skip_resample:
+ samplesLoaded += dstBufferSize;
+ if(samplesLoaded < samplesToLoad)
+ {
+ fracPos += dstBufferSize*increment;
+ const uint srcOffset{fracPos >> MixerFracBits};
+ fracPos &= MixerFracMask;
+ intPos += srcOffset;
+
+ /* If more samples need to be loaded, copy the back of the
+ * resampleBuffer to the front to reuse it. prevSamples isn't
+ * reliable since it's only updated for the end of the mix.
+ */
+ std::copy(resampleBuffer-MaxResamplerEdge+srcOffset,
+ resampleBuffer+MaxResamplerEdge+srcOffset, resampleBuffer-MaxResamplerEdge);
+ }
+ }
+ }
+ for(auto &samples : MixingSamples.subspan(realChannels))
+ std::fill_n(samples, samplesToLoad, 0.0f);
+
+ if(mDecoder)
+ mDecoder->decode(MixingSamples, samplesToMix, (vstate==Playing));
+
+ if(mFlags.test(VoiceIsAmbisonic))
+ {
+ auto voiceSamples = MixingSamples.begin();
+ for(auto &chandata : mChans)
+ {
+ chandata.mAmbiSplitter.processScale({*voiceSamples, samplesToMix},
+ chandata.mAmbiHFScale, chandata.mAmbiLFScale);
+ ++voiceSamples;
+ }
+ }
+
+ const uint Counter{mFlags.test(VoiceIsFading) ? minu(samplesToMix, 64u) : 0u};
+ if(!Counter)
+ {
+ /* No fading, just overwrite the old/current params. */
+ for(auto &chandata : mChans)
+ {
+ {
+ DirectParams &parms = chandata.mDryParams;
+ if(!mFlags.test(VoiceHasHrtf))
+ parms.Gains.Current = parms.Gains.Target;
+ else
+ parms.Hrtf.Old = parms.Hrtf.Target;
+ }
+ for(uint send{0};send < NumSends;++send)
+ {
+ if(mSend[send].Buffer.empty())
+ continue;
+
+ SendParams &parms = chandata.mWetParams[send];
+ parms.Gains.Current = parms.Gains.Target;
+ }
+ }
+ }
+
+ auto voiceSamples = MixingSamples.begin();
+ for(auto &chandata : mChans)
+ {
+ /* Now filter and mix to the appropriate outputs. */
+ const al::span<float,BufferLineSize> FilterBuf{Device->FilteredData};
+ {
+ DirectParams &parms = chandata.mDryParams;
+ const float *samples{DoFilters(parms.LowPass, parms.HighPass, FilterBuf.data(),
+ {*voiceSamples, samplesToMix}, mDirect.FilterType)};
+
+ if(mFlags.test(VoiceHasHrtf))
+ {
+ const float TargetGain{parms.Hrtf.Target.Gain * (vstate == Playing)};
+ DoHrtfMix(samples, samplesToMix, parms, TargetGain, Counter, OutPos,
+ (vstate == Playing), Device);
+ }
+ else
+ {
+ const float *TargetGains{(vstate == Playing) ? parms.Gains.Target.data()
+ : SilentTarget.data()};
+ if(mFlags.test(VoiceHasNfc))
+ DoNfcMix({samples, samplesToMix}, mDirect.Buffer.data(), parms,
+ TargetGains, Counter, OutPos, Device);
+ else
+ MixSamples({samples, samplesToMix}, mDirect.Buffer,
+ parms.Gains.Current.data(), TargetGains, Counter, OutPos);
+ }
+ }
+
+ for(uint send{0};send < NumSends;++send)
+ {
+ if(mSend[send].Buffer.empty())
+ continue;
+
+ SendParams &parms = chandata.mWetParams[send];
+ const float *samples{DoFilters(parms.LowPass, parms.HighPass, FilterBuf.data(),
+ {*voiceSamples, samplesToMix}, mSend[send].FilterType)};
+
+ const float *TargetGains{(vstate == Playing) ? parms.Gains.Target.data()
+ : SilentTarget.data()};
+ MixSamples({samples, samplesToMix}, mSend[send].Buffer,
+ parms.Gains.Current.data(), TargetGains, Counter, OutPos);
+ }
+
+ ++voiceSamples;
+ }
+
+ mFlags.set(VoiceIsFading);
+
+ /* Don't update positions and buffers if we were stopping. */
+ if(vstate == Stopping) UNLIKELY
+ {
+ mPlayState.store(Stopped, std::memory_order_release);
+ return;
+ }
+
+ /* Update voice positions and buffers as needed. */
+ DataPosFrac += increment*samplesToMix;
+ const uint SrcSamplesDone{DataPosFrac>>MixerFracBits};
+ DataPosInt += SrcSamplesDone;
+ DataPosFrac &= MixerFracMask;
+
+ uint buffers_done{0u};
+ if(BufferListItem && DataPosInt >= 0) LIKELY
+ {
+ if(mFlags.test(VoiceIsStatic))
+ {
+ if(BufferLoopItem)
+ {
+ /* Handle looping static source */
+ const uint LoopStart{BufferListItem->mLoopStart};
+ const uint LoopEnd{BufferListItem->mLoopEnd};
+ uint DataPosUInt{static_cast<uint>(DataPosInt)};
+ if(DataPosUInt >= LoopEnd)
+ {
+ assert(LoopEnd > LoopStart);
+ DataPosUInt = ((DataPosUInt-LoopStart)%(LoopEnd-LoopStart)) + LoopStart;
+ DataPosInt = static_cast<int>(DataPosUInt);
+ }
+ }
+ else
+ {
+ /* Handle non-looping static source */
+ if(static_cast<uint>(DataPosInt) >= BufferListItem->mSampleLen)
+ BufferListItem = nullptr;
+ }
+ }
+ else if(mFlags.test(VoiceIsCallback))
+ {
+ /* Handle callback buffer source */
+ const uint currentBlock{static_cast<uint>(DataPosInt) / mSamplesPerBlock};
+ const uint blocksDone{currentBlock - mCallbackBlockBase};
+ if(blocksDone < mNumCallbackBlocks)
+ {
+ const size_t byteOffset{blocksDone*mBytesPerBlock};
+ const size_t byteEnd{mNumCallbackBlocks*mBytesPerBlock};
+ al::byte *data{BufferListItem->mSamples};
+ std::copy(data+byteOffset, data+byteEnd, data);
+ mNumCallbackBlocks -= blocksDone;
+ mCallbackBlockBase += blocksDone;
+ }
+ else
+ {
+ BufferListItem = nullptr;
+ mNumCallbackBlocks = 0;
+ mCallbackBlockBase += blocksDone;
+ }
+ }
+ else
+ {
+ /* Handle streaming source */
+ do {
+ if(BufferListItem->mSampleLen > static_cast<uint>(DataPosInt))
+ break;
+
+ DataPosInt -= BufferListItem->mSampleLen;
+
+ ++buffers_done;
+ BufferListItem = BufferListItem->mNext.load(std::memory_order_relaxed);
+ if(!BufferListItem) BufferListItem = BufferLoopItem;
+ } while(BufferListItem);
+ }
+ }
+
+ /* Capture the source ID in case it gets reset for stopping. */
+ const uint SourceID{mSourceID.load(std::memory_order_relaxed)};
+
+ /* Update voice info */
+ mPosition.store(DataPosInt, std::memory_order_relaxed);
+ mPositionFrac.store(DataPosFrac, std::memory_order_relaxed);
+ mCurrentBuffer.store(BufferListItem, std::memory_order_relaxed);
+ if(!BufferListItem)
+ {
+ mLoopBuffer.store(nullptr, std::memory_order_relaxed);
+ mSourceID.store(0u, std::memory_order_relaxed);
+ }
+ std::atomic_thread_fence(std::memory_order_release);
+
+ /* Send any events now, after the position/buffer info was updated. */
+ const auto enabledevt = Context->mEnabledEvts.load(std::memory_order_acquire);
+ if(buffers_done > 0 && enabledevt.test(AsyncEvent::BufferCompleted))
+ {
+ RingBuffer *ring{Context->mAsyncEvents.get()};
+ auto evt_vec = ring->getWriteVector();
+ if(evt_vec.first.len > 0)
+ {
+ AsyncEvent *evt{al::construct_at(reinterpret_cast<AsyncEvent*>(evt_vec.first.buf),
+ AsyncEvent::BufferCompleted)};
+ evt->u.bufcomp.id = SourceID;
+ evt->u.bufcomp.count = buffers_done;
+ ring->writeAdvance(1);
+ }
+ }
+
+ if(!BufferListItem)
+ {
+ /* If the voice just ended, set it to Stopping so the next render
+ * ensures any residual noise fades to 0 amplitude.
+ */
+ mPlayState.store(Stopping, std::memory_order_release);
+ if(enabledevt.test(AsyncEvent::SourceStateChange))
+ SendSourceStoppedEvent(Context, SourceID);
+ }
+}
+
+void Voice::prepare(DeviceBase *device)
+{
+ /* Even if storing really high order ambisonics, we only mix channels for
+ * orders up to the device order. The rest are simply dropped.
+ */
+ uint num_channels{(mFmtChannels == FmtUHJ2 || mFmtChannels == FmtSuperStereo) ? 3 :
+ ChannelsFromFmt(mFmtChannels, minu(mAmbiOrder, device->mAmbiOrder))};
+ if(num_channels > device->mSampleData.size()) UNLIKELY
+ {
+ ERR("Unexpected channel count: %u (limit: %zu, %d:%d)\n", num_channels,
+ device->mSampleData.size(), mFmtChannels, mAmbiOrder);
+ num_channels = static_cast<uint>(device->mSampleData.size());
+ }
+ if(mChans.capacity() > 2 && num_channels < mChans.capacity())
+ {
+ decltype(mChans){}.swap(mChans);
+ decltype(mPrevSamples){}.swap(mPrevSamples);
+ }
+ mChans.reserve(maxu(2, num_channels));
+ mChans.resize(num_channels);
+ mPrevSamples.reserve(maxu(2, num_channels));
+ mPrevSamples.resize(num_channels);
+
+ mDecoder = nullptr;
+ mDecoderPadding = 0;
+ if(mFmtChannels == FmtSuperStereo)
+ {
+ switch(UhjDecodeQuality)
+ {
+ case UhjQualityType::IIR:
+ mDecoder = std::make_unique<UhjStereoDecoderIIR>();
+ mDecoderPadding = UhjStereoDecoderIIR::sInputPadding;
+ break;
+ case UhjQualityType::FIR256:
+ mDecoder = std::make_unique<UhjStereoDecoder<UhjLength256>>();
+ mDecoderPadding = UhjStereoDecoder<UhjLength256>::sInputPadding;
+ break;
+ case UhjQualityType::FIR512:
+ mDecoder = std::make_unique<UhjStereoDecoder<UhjLength512>>();
+ mDecoderPadding = UhjStereoDecoder<UhjLength512>::sInputPadding;
+ break;
+ }
+ }
+ else if(IsUHJ(mFmtChannels))
+ {
+ switch(UhjDecodeQuality)
+ {
+ case UhjQualityType::IIR:
+ mDecoder = std::make_unique<UhjDecoderIIR>();
+ mDecoderPadding = UhjDecoderIIR::sInputPadding;
+ break;
+ case UhjQualityType::FIR256:
+ mDecoder = std::make_unique<UhjDecoder<UhjLength256>>();
+ mDecoderPadding = UhjDecoder<UhjLength256>::sInputPadding;
+ break;
+ case UhjQualityType::FIR512:
+ mDecoder = std::make_unique<UhjDecoder<UhjLength512>>();
+ mDecoderPadding = UhjDecoder<UhjLength512>::sInputPadding;
+ break;
+ }
+ }
+
+ /* Clear the stepping value explicitly so the mixer knows not to mix this
+ * until the update gets applied.
+ */
+ mStep = 0;
+
+ /* Make sure the sample history is cleared. */
+ std::fill(mPrevSamples.begin(), mPrevSamples.end(), HistoryLine{});
+
+ if(mFmtChannels == FmtUHJ2 && !device->mUhjEncoder)
+ {
+ /* 2-channel UHJ needs different shelf filters. However, we can't just
+ * use different shelf filters after mixing it, given any old speaker
+ * setup the user has. To make this work, we apply the expected shelf
+ * filters for decoding UHJ2 to quad (only needs LF scaling), and act
+ * as if those 4 quad channels are encoded right back into B-Format.
+ *
+ * This isn't perfect, but without an entirely separate and limited
+ * UHJ2 path, it's better than nothing.
+ *
+ * Note this isn't needed with UHJ output (UHJ2->B-Format->UHJ2 is
+ * identity, so don't mess with it).
+ */
+ const BandSplitter splitter{device->mXOverFreq / static_cast<float>(device->Frequency)};
+ for(auto &chandata : mChans)
+ {
+ chandata.mAmbiHFScale = 1.0f;
+ chandata.mAmbiLFScale = 1.0f;
+ chandata.mAmbiSplitter = splitter;
+ chandata.mDryParams = DirectParams{};
+ chandata.mDryParams.NFCtrlFilter = device->mNFCtrlFilter;
+ std::fill_n(chandata.mWetParams.begin(), device->NumAuxSends, SendParams{});
+ }
+ mChans[0].mAmbiLFScale = DecoderBase::sWLFScale;
+ mChans[1].mAmbiLFScale = DecoderBase::sXYLFScale;
+ mChans[2].mAmbiLFScale = DecoderBase::sXYLFScale;
+ mFlags.set(VoiceIsAmbisonic);
+ }
+ /* Don't need to set the VoiceIsAmbisonic flag if the device is not higher
+ * order than the voice. No HF scaling is necessary to mix it.
+ */
+ else if(mAmbiOrder && device->mAmbiOrder > mAmbiOrder)
+ {
+ const uint8_t *OrderFromChan{Is2DAmbisonic(mFmtChannels) ?
+ AmbiIndex::OrderFrom2DChannel().data() : AmbiIndex::OrderFromChannel().data()};
+ const auto scales = AmbiScale::GetHFOrderScales(mAmbiOrder, device->mAmbiOrder,
+ device->m2DMixing);
+
+ const BandSplitter splitter{device->mXOverFreq / static_cast<float>(device->Frequency)};
+ for(auto &chandata : mChans)
+ {
+ chandata.mAmbiHFScale = scales[*(OrderFromChan++)];
+ chandata.mAmbiLFScale = 1.0f;
+ chandata.mAmbiSplitter = splitter;
+ chandata.mDryParams = DirectParams{};
+ chandata.mDryParams.NFCtrlFilter = device->mNFCtrlFilter;
+ std::fill_n(chandata.mWetParams.begin(), device->NumAuxSends, SendParams{});
+ }
+ mFlags.set(VoiceIsAmbisonic);
+ }
+ else
+ {
+ for(auto &chandata : mChans)
+ {
+ chandata.mDryParams = DirectParams{};
+ chandata.mDryParams.NFCtrlFilter = device->mNFCtrlFilter;
+ std::fill_n(chandata.mWetParams.begin(), device->NumAuxSends, SendParams{});
+ }
+ mFlags.reset(VoiceIsAmbisonic);
+ }
+}
diff --git a/core/voice.h b/core/voice.h
new file mode 100644
index 00000000..57ee7b01
--- /dev/null
+++ b/core/voice.h
@@ -0,0 +1,280 @@
+#ifndef CORE_VOICE_H
+#define CORE_VOICE_H
+
+#include <array>
+#include <atomic>
+#include <bitset>
+#include <chrono>
+#include <memory>
+#include <stddef.h>
+#include <string>
+
+#include "albyte.h"
+#include "almalloc.h"
+#include "aloptional.h"
+#include "alspan.h"
+#include "bufferline.h"
+#include "buffer_storage.h"
+#include "devformat.h"
+#include "filters/biquad.h"
+#include "filters/nfc.h"
+#include "filters/splitter.h"
+#include "mixer/defs.h"
+#include "mixer/hrtfdefs.h"
+#include "resampler_limits.h"
+#include "uhjfilter.h"
+#include "vector.h"
+
+struct ContextBase;
+struct DeviceBase;
+struct EffectSlot;
+enum class DistanceModel : unsigned char;
+
+using uint = unsigned int;
+
+
+#define MAX_SENDS 6
+
+
+enum class SpatializeMode : unsigned char {
+ Off,
+ On,
+ Auto
+};
+
+enum class DirectMode : unsigned char {
+ Off,
+ DropMismatch,
+ RemixMismatch
+};
+
+
+constexpr uint MaxPitch{10};
+
+
+enum {
+ AF_None = 0,
+ AF_LowPass = 1,
+ AF_HighPass = 2,
+ AF_BandPass = AF_LowPass | AF_HighPass
+};
+
+
+struct DirectParams {
+ BiquadFilter LowPass;
+ BiquadFilter HighPass;
+
+ NfcFilter NFCtrlFilter;
+
+ struct {
+ HrtfFilter Old;
+ HrtfFilter Target;
+ alignas(16) std::array<float,HrtfHistoryLength> History;
+ } Hrtf;
+
+ struct {
+ std::array<float,MAX_OUTPUT_CHANNELS> Current;
+ std::array<float,MAX_OUTPUT_CHANNELS> Target;
+ } Gains;
+};
+
+struct SendParams {
+ BiquadFilter LowPass;
+ BiquadFilter HighPass;
+
+ struct {
+ std::array<float,MaxAmbiChannels> Current;
+ std::array<float,MaxAmbiChannels> Target;
+ } Gains;
+};
+
+
+struct VoiceBufferItem {
+ std::atomic<VoiceBufferItem*> mNext{nullptr};
+
+ CallbackType mCallback{nullptr};
+ void *mUserData{nullptr};
+
+ uint mBlockAlign{0u};
+ uint mSampleLen{0u};
+ uint mLoopStart{0u};
+ uint mLoopEnd{0u};
+
+ al::byte *mSamples{nullptr};
+};
+
+
+struct VoiceProps {
+ float Pitch;
+ float Gain;
+ float OuterGain;
+ float MinGain;
+ float MaxGain;
+ float InnerAngle;
+ float OuterAngle;
+ float RefDistance;
+ float MaxDistance;
+ float RolloffFactor;
+ std::array<float,3> Position;
+ std::array<float,3> Velocity;
+ std::array<float,3> Direction;
+ std::array<float,3> OrientAt;
+ std::array<float,3> OrientUp;
+ bool HeadRelative;
+ DistanceModel mDistanceModel;
+ Resampler mResampler;
+ DirectMode DirectChannels;
+ SpatializeMode mSpatializeMode;
+
+ bool DryGainHFAuto;
+ bool WetGainAuto;
+ bool WetGainHFAuto;
+ float OuterGainHF;
+
+ float AirAbsorptionFactor;
+ float RoomRolloffFactor;
+ float DopplerFactor;
+
+ std::array<float,2> StereoPan;
+
+ float Radius;
+ float EnhWidth;
+
+ /** Direct filter and auxiliary send info. */
+ struct {
+ float Gain;
+ float GainHF;
+ float HFReference;
+ float GainLF;
+ float LFReference;
+ } Direct;
+ struct SendData {
+ EffectSlot *Slot;
+ float Gain;
+ float GainHF;
+ float HFReference;
+ float GainLF;
+ float LFReference;
+ } Send[MAX_SENDS];
+};
+
+struct VoicePropsItem : public VoiceProps {
+ std::atomic<VoicePropsItem*> next{nullptr};
+
+ DEF_NEWDEL(VoicePropsItem)
+};
+
+enum : uint {
+ VoiceIsStatic,
+ VoiceIsCallback,
+ VoiceIsAmbisonic,
+ VoiceCallbackStopped,
+ VoiceIsFading,
+ VoiceHasHrtf,
+ VoiceHasNfc,
+
+ VoiceFlagCount
+};
+
+struct Voice {
+ enum State {
+ Stopped,
+ Playing,
+ Stopping,
+ Pending
+ };
+
+ std::atomic<VoicePropsItem*> mUpdate{nullptr};
+
+ VoiceProps mProps;
+
+ std::atomic<uint> mSourceID{0u};
+ std::atomic<State> mPlayState{Stopped};
+ std::atomic<bool> mPendingChange{false};
+
+ /**
+ * Source offset in samples, relative to the currently playing buffer, NOT
+ * the whole queue.
+ */
+ std::atomic<int> mPosition;
+ /** Fractional (fixed-point) offset to the next sample. */
+ std::atomic<uint> mPositionFrac;
+
+ /* Current buffer queue item being played. */
+ std::atomic<VoiceBufferItem*> mCurrentBuffer;
+
+ /* Buffer queue item to loop to at end of queue (will be NULL for non-
+ * looping voices).
+ */
+ std::atomic<VoiceBufferItem*> mLoopBuffer;
+
+ std::chrono::nanoseconds mStartTime{};
+
+ /* Properties for the attached buffer(s). */
+ FmtChannels mFmtChannels;
+ FmtType mFmtType;
+ uint mFrequency;
+ uint mFrameStep; /**< In steps of the sample type size. */
+ uint mBytesPerBlock; /**< Or for PCM formats, BytesPerFrame. */
+ uint mSamplesPerBlock; /**< Always 1 for PCM formats. */
+ AmbiLayout mAmbiLayout;
+ AmbiScaling mAmbiScaling;
+ uint mAmbiOrder;
+
+ std::unique_ptr<DecoderBase> mDecoder;
+ uint mDecoderPadding{};
+
+ /** Current target parameters used for mixing. */
+ uint mStep{0};
+
+ ResamplerFunc mResampler;
+
+ InterpState mResampleState;
+
+ std::bitset<VoiceFlagCount> mFlags{};
+ uint mNumCallbackBlocks{0};
+ uint mCallbackBlockBase{0};
+
+ struct TargetData {
+ int FilterType;
+ al::span<FloatBufferLine> Buffer;
+ };
+ TargetData mDirect;
+ std::array<TargetData,MAX_SENDS> mSend;
+
+ /* The first MaxResamplerPadding/2 elements are the sample history from the
+ * previous mix, with an additional MaxResamplerPadding/2 elements that are
+ * now current (which may be overwritten if the buffer data is still
+ * available).
+ */
+ using HistoryLine = std::array<float,MaxResamplerPadding>;
+ al::vector<HistoryLine,16> mPrevSamples{2};
+
+ struct ChannelData {
+ float mAmbiHFScale, mAmbiLFScale;
+ BandSplitter mAmbiSplitter;
+
+ DirectParams mDryParams;
+ std::array<SendParams,MAX_SENDS> mWetParams;
+ };
+ al::vector<ChannelData> mChans{2};
+
+ Voice() = default;
+ ~Voice() = default;
+
+ Voice(const Voice&) = delete;
+ Voice& operator=(const Voice&) = delete;
+
+ void mix(const State vstate, ContextBase *Context, const std::chrono::nanoseconds deviceTime,
+ const uint SamplesToDo);
+
+ void prepare(DeviceBase *device);
+
+ static void InitMixer(al::optional<std::string> resampler);
+
+ DEF_NEWDEL(Voice)
+};
+
+extern Resampler ResamplerDefault;
+
+#endif /* CORE_VOICE_H */
diff --git a/core/voice_change.h b/core/voice_change.h
new file mode 100644
index 00000000..ddc6186f
--- /dev/null
+++ b/core/voice_change.h
@@ -0,0 +1,31 @@
+#ifndef VOICE_CHANGE_H
+#define VOICE_CHANGE_H
+
+#include <atomic>
+
+#include "almalloc.h"
+
+struct Voice;
+
+using uint = unsigned int;
+
+
+enum class VChangeState {
+ Reset,
+ Stop,
+ Play,
+ Pause,
+ Restart
+};
+struct VoiceChange {
+ Voice *mOldVoice{nullptr};
+ Voice *mVoice{nullptr};
+ uint mSourceID{0};
+ VChangeState mState{};
+
+ std::atomic<VoiceChange*> mNext{nullptr};
+
+ DEF_NEWDEL(VoiceChange)
+};
+
+#endif /* VOICE_CHANGE_H */