diff options
Diffstat (limited to 'core')
74 files changed, 12481 insertions, 0 deletions
diff --git a/core/ambdec.cpp b/core/ambdec.cpp new file mode 100644 index 00000000..8ca182c4 --- /dev/null +++ b/core/ambdec.cpp @@ -0,0 +1,306 @@ + +#include "config.h" + +#include "ambdec.h" + +#include <algorithm> +#include <cctype> +#include <cstdarg> +#include <cstddef> +#include <cstdio> +#include <iterator> +#include <sstream> +#include <string> + +#include "albit.h" +#include "alfstream.h" +#include "alspan.h" +#include "opthelpers.h" + + +namespace { + +std::string read_word(std::istream &f) +{ + std::string ret; + f >> ret; + return ret; +} + +bool is_at_end(const std::string &buffer, std::size_t endpos) +{ + while(endpos < buffer.length() && std::isspace(buffer[endpos])) + ++endpos; + return !(endpos < buffer.length() && buffer[endpos] != '#'); +} + + +enum class ReaderScope { + Global, + Speakers, + LFMatrix, + HFMatrix, +}; + +#ifdef __USE_MINGW_ANSI_STDIO +[[gnu::format(gnu_printf,2,3)]] +#else +[[gnu::format(printf,2,3)]] +#endif +al::optional<std::string> make_error(size_t linenum, const char *fmt, ...) +{ + al::optional<std::string> ret; + auto &str = ret.emplace(); + + str.resize(256); + int printed{std::snprintf(const_cast<char*>(str.data()), str.length(), "Line %zu: ", linenum)}; + if(printed < 0) printed = 0; + auto plen = std::min(static_cast<size_t>(printed), str.length()); + + std::va_list args, args2; + va_start(args, fmt); + va_copy(args2, args); + const int msglen{std::vsnprintf(&str[plen], str.size()-plen, fmt, args)}; + if(msglen >= 0 && static_cast<size_t>(msglen) >= str.size()-plen) + { + str.resize(static_cast<size_t>(msglen) + plen + 1u); + std::vsnprintf(&str[plen], str.size()-plen, fmt, args2); + } + va_end(args2); + va_end(args); + + return ret; +} + +} // namespace + +AmbDecConf::~AmbDecConf() = default; + + +al::optional<std::string> AmbDecConf::load(const char *fname) noexcept +{ + al::ifstream f{fname}; + if(!f.is_open()) + return std::string("Failed to open file \"")+fname+"\""; + + ReaderScope scope{ReaderScope::Global}; + size_t speaker_pos{0}; + size_t lfmatrix_pos{0}; + size_t hfmatrix_pos{0}; + size_t linenum{0}; + + std::string buffer; + while(f.good() && std::getline(f, buffer)) + { + ++linenum; + + std::istringstream istr{buffer}; + std::string command{read_word(istr)}; + if(command.empty() || command[0] == '#') + continue; + + if(command == "/}") + { + if(scope == ReaderScope::Global) + return make_error(linenum, "Unexpected /} in global scope"); + scope = ReaderScope::Global; + continue; + } + + if(scope == ReaderScope::Speakers) + { + if(command == "add_spkr") + { + if(speaker_pos == NumSpeakers) + return make_error(linenum, "Too many speakers specified"); + + AmbDecConf::SpeakerConf &spkr = Speakers[speaker_pos++]; + istr >> spkr.Name; + istr >> spkr.Distance; + istr >> spkr.Azimuth; + istr >> spkr.Elevation; + istr >> spkr.Connection; + } + else + return make_error(linenum, "Unexpected speakers command: %s", command.c_str()); + } + else if(scope == ReaderScope::LFMatrix || scope == ReaderScope::HFMatrix) + { + auto &gains = (scope == ReaderScope::LFMatrix) ? LFOrderGain : HFOrderGain; + auto *matrix = (scope == ReaderScope::LFMatrix) ? LFMatrix : HFMatrix; + auto &pos = (scope == ReaderScope::LFMatrix) ? lfmatrix_pos : hfmatrix_pos; + + if(command == "order_gain") + { + size_t toread{(ChanMask > Ambi3OrderMask) ? 5u : 4u}; + std::size_t curgain{0u}; + float value{}; + while(toread) + { + --toread; + istr >> value; + if(curgain < al::size(gains)) + gains[curgain++] = value; + } + } + else if(command == "add_row") + { + if(pos == NumSpeakers) + return make_error(linenum, "Too many matrix rows specified"); + + unsigned int mask{ChanMask}; + + AmbDecConf::CoeffArray &mtxrow = matrix[pos++]; + mtxrow.fill(0.0f); + + float value{}; + while(mask) + { + auto idx = static_cast<unsigned>(al::countr_zero(mask)); + mask &= ~(1u << idx); + + istr >> value; + if(idx < mtxrow.size()) + mtxrow[idx] = value; + } + } + else + return make_error(linenum, "Unexpected matrix command: %s", command.c_str()); + } + // Global scope commands + else if(command == "/description") + { + while(istr.good() && std::isspace(istr.peek())) + istr.ignore(); + std::getline(istr, Description); + while(!Description.empty() && std::isspace(Description.back())) + Description.pop_back(); + } + else if(command == "/version") + { + if(Version) + return make_error(linenum, "Duplicate version definition"); + istr >> Version; + if(Version != 3) + return make_error(linenum, "Unsupported version: %d", Version); + } + else if(command == "/dec/chan_mask") + { + if(ChanMask) + return make_error(linenum, "Duplicate chan_mask definition"); + istr >> std::hex >> ChanMask >> std::dec; + + if(!ChanMask || ChanMask > Ambi4OrderMask) + return make_error(linenum, "Invalid chan_mask: 0x%x", ChanMask); + if(ChanMask > Ambi3OrderMask && CoeffScale == AmbDecScale::FuMa) + return make_error(linenum, "FuMa not compatible with over third-order"); + } + else if(command == "/dec/freq_bands") + { + if(FreqBands) + return make_error(linenum, "Duplicate freq_bands"); + istr >> FreqBands; + if(FreqBands != 1 && FreqBands != 2) + return make_error(linenum, "Invalid freq_bands: %u", FreqBands); + } + else if(command == "/dec/speakers") + { + if(NumSpeakers) + return make_error(linenum, "Duplicate speakers"); + istr >> NumSpeakers; + if(!NumSpeakers) + return make_error(linenum, "Invalid speakers: %zu", NumSpeakers); + Speakers = std::make_unique<SpeakerConf[]>(NumSpeakers); + } + else if(command == "/dec/coeff_scale") + { + if(CoeffScale != AmbDecScale::Unset) + return make_error(linenum, "Duplicate coeff_scale"); + + std::string scale{read_word(istr)}; + if(scale == "n3d") CoeffScale = AmbDecScale::N3D; + else if(scale == "sn3d") CoeffScale = AmbDecScale::SN3D; + else if(scale == "fuma") CoeffScale = AmbDecScale::FuMa; + else + return make_error(linenum, "Unexpected coeff_scale: %s", scale.c_str()); + + if(ChanMask > Ambi3OrderMask && CoeffScale == AmbDecScale::FuMa) + return make_error(linenum, "FuMa not compatible with over third-order"); + } + else if(command == "/opt/xover_freq") + { + istr >> XOverFreq; + } + else if(command == "/opt/xover_ratio") + { + istr >> XOverRatio; + } + else if(command == "/opt/input_scale" || command == "/opt/nfeff_comp" + || command == "/opt/delay_comp" || command == "/opt/level_comp") + { + /* Unused */ + read_word(istr); + } + else if(command == "/speakers/{") + { + if(!NumSpeakers) + return make_error(linenum, "Speakers defined without a count"); + scope = ReaderScope::Speakers; + } + else if(command == "/lfmatrix/{" || command == "/hfmatrix/{" || command == "/matrix/{") + { + if(!NumSpeakers) + return make_error(linenum, "Matrix defined without a speaker count"); + if(!ChanMask) + return make_error(linenum, "Matrix defined without a channel mask"); + + if(!Matrix) + { + Matrix = std::make_unique<CoeffArray[]>(NumSpeakers * FreqBands); + LFMatrix = Matrix.get(); + HFMatrix = LFMatrix + NumSpeakers*(FreqBands-1); + } + + if(FreqBands == 1) + { + if(command != "/matrix/{") + return make_error(linenum, "Unexpected \"%s\" for a single-band decoder", + command.c_str()); + scope = ReaderScope::HFMatrix; + } + else + { + if(command == "/lfmatrix/{") + scope = ReaderScope::LFMatrix; + else if(command == "/hfmatrix/{") + scope = ReaderScope::HFMatrix; + else + return make_error(linenum, "Unexpected \"%s\" for a dual-band decoder", + command.c_str()); + } + } + else if(command == "/end") + { + const auto endpos = static_cast<std::size_t>(istr.tellg()); + if(!is_at_end(buffer, endpos)) + return make_error(linenum, "Extra junk on end: %s", buffer.substr(endpos).c_str()); + + if(speaker_pos < NumSpeakers || hfmatrix_pos < NumSpeakers + || (FreqBands == 2 && lfmatrix_pos < NumSpeakers)) + return make_error(linenum, "Incomplete decoder definition"); + if(CoeffScale == AmbDecScale::Unset) + return make_error(linenum, "No coefficient scaling defined"); + + return al::nullopt; + } + else + return make_error(linenum, "Unexpected command: %s", command.c_str()); + + istr.clear(); + const auto endpos = static_cast<std::size_t>(istr.tellg()); + if(!is_at_end(buffer, endpos)) + return make_error(linenum, "Extra junk on line: %s", buffer.substr(endpos).c_str()); + buffer.clear(); + } + return make_error(linenum, "Unexpected end of file"); +} diff --git a/core/ambdec.h b/core/ambdec.h new file mode 100644 index 00000000..7f739781 --- /dev/null +++ b/core/ambdec.h @@ -0,0 +1,55 @@ +#ifndef CORE_AMBDEC_H +#define CORE_AMBDEC_H + +#include <array> +#include <memory> +#include <string> + +#include "aloptional.h" +#include "core/ambidefs.h" + +/* Helpers to read .ambdec configuration files. */ + +enum class AmbDecScale { + Unset, + N3D, + SN3D, + FuMa, +}; +struct AmbDecConf { + std::string Description; + int Version{0}; /* Must be 3 */ + + unsigned int ChanMask{0u}; + unsigned int FreqBands{0u}; /* Must be 1 or 2 */ + AmbDecScale CoeffScale{AmbDecScale::Unset}; + + float XOverFreq{0.0f}; + float XOverRatio{0.0f}; + + struct SpeakerConf { + std::string Name; + float Distance{0.0f}; + float Azimuth{0.0f}; + float Elevation{0.0f}; + std::string Connection; + }; + size_t NumSpeakers{0}; + std::unique_ptr<SpeakerConf[]> Speakers; + + using CoeffArray = std::array<float,MaxAmbiChannels>; + std::unique_ptr<CoeffArray[]> Matrix; + + /* Unused when FreqBands == 1 */ + float LFOrderGain[MaxAmbiOrder+1]{}; + CoeffArray *LFMatrix; + + float HFOrderGain[MaxAmbiOrder+1]{}; + CoeffArray *HFMatrix; + + ~AmbDecConf(); + + al::optional<std::string> load(const char *fname) noexcept; +}; + +#endif /* CORE_AMBDEC_H */ diff --git a/core/ambidefs.cpp b/core/ambidefs.cpp new file mode 100644 index 00000000..70d6f356 --- /dev/null +++ b/core/ambidefs.cpp @@ -0,0 +1,308 @@ + +#include "config.h" + +#include "ambidefs.h" + +#include "alnumbers.h" + + +namespace { + +using AmbiChannelFloatArray = std::array<float,MaxAmbiChannels>; + +constexpr auto inv_sqrt2f = static_cast<float>(1.0/al::numbers::sqrt2); +constexpr auto inv_sqrt3f = static_cast<float>(1.0/al::numbers::sqrt3); + + +/* These HF gains are derived from the same 32-point speaker array. The scale + * factor between orders represents the same scale factors for any (regular) + * speaker array decoder. e.g. Given a first-order source and second-order + * output, applying an HF scale of HFScales[1][0] / HFScales[2][0] to channel 0 + * will result in that channel being subsequently decoded for second-order as + * if it was a first-order decoder for that same speaker array. + */ +constexpr std::array<std::array<float,MaxAmbiOrder+1>,MaxAmbiOrder+1> HFScales{{ + {{ 4.000000000e+00f, 2.309401077e+00f, 1.192569588e+00f, 7.189495850e-01f }}, + {{ 4.000000000e+00f, 2.309401077e+00f, 1.192569588e+00f, 7.189495850e-01f }}, + {{ 2.981423970e+00f, 2.309401077e+00f, 1.192569588e+00f, 7.189495850e-01f }}, + {{ 2.359168820e+00f, 2.031565936e+00f, 1.444598386e+00f, 7.189495850e-01f }}, + /* 1.947005434e+00f, 1.764337084e+00f, 1.424707344e+00f, 9.755104127e-01f, 4.784482742e-01f */ +}}; + +/* Same as above, but using a 10-point horizontal-only speaker array. Should + * only be used when the device is mixing in 2D B-Format for horizontal-only + * output. + */ +constexpr std::array<std::array<float,MaxAmbiOrder+1>,MaxAmbiOrder+1> HFScales2D{{ + {{ 2.236067977e+00f, 1.581138830e+00f, 9.128709292e-01f, 6.050756345e-01f }}, + {{ 2.236067977e+00f, 1.581138830e+00f, 9.128709292e-01f, 6.050756345e-01f }}, + {{ 1.825741858e+00f, 1.581138830e+00f, 9.128709292e-01f, 6.050756345e-01f }}, + {{ 1.581138830e+00f, 1.460781803e+00f, 1.118033989e+00f, 6.050756345e-01f }}, + /* 1.414213562e+00f, 1.344997024e+00f, 1.144122806e+00f, 8.312538756e-01f, 4.370160244e-01f */ +}}; + + +/* This calculates a first-order "upsampler" matrix. It combines a first-order + * decoder matrix with a max-order encoder matrix, creating a matrix that + * behaves as if the B-Format input signal is first decoded to a speaker array + * at first-order, then those speaker feeds are encoded to a higher-order + * signal. While not perfect, this should accurately encode a lower-order + * signal into a higher-order signal. + */ +constexpr std::array<std::array<float,4>,8> FirstOrderDecoder{{ + {{ 1.250000000e-01f, 1.250000000e-01f, 1.250000000e-01f, 1.250000000e-01f, }}, + {{ 1.250000000e-01f, 1.250000000e-01f, 1.250000000e-01f, -1.250000000e-01f, }}, + {{ 1.250000000e-01f, -1.250000000e-01f, 1.250000000e-01f, 1.250000000e-01f, }}, + {{ 1.250000000e-01f, -1.250000000e-01f, 1.250000000e-01f, -1.250000000e-01f, }}, + {{ 1.250000000e-01f, 1.250000000e-01f, -1.250000000e-01f, 1.250000000e-01f, }}, + {{ 1.250000000e-01f, 1.250000000e-01f, -1.250000000e-01f, -1.250000000e-01f, }}, + {{ 1.250000000e-01f, -1.250000000e-01f, -1.250000000e-01f, 1.250000000e-01f, }}, + {{ 1.250000000e-01f, -1.250000000e-01f, -1.250000000e-01f, -1.250000000e-01f, }}, +}}; +constexpr std::array<AmbiChannelFloatArray,8> FirstOrderEncoder{{ + CalcAmbiCoeffs( inv_sqrt3f, inv_sqrt3f, inv_sqrt3f), + CalcAmbiCoeffs( inv_sqrt3f, inv_sqrt3f, -inv_sqrt3f), + CalcAmbiCoeffs(-inv_sqrt3f, inv_sqrt3f, inv_sqrt3f), + CalcAmbiCoeffs(-inv_sqrt3f, inv_sqrt3f, -inv_sqrt3f), + CalcAmbiCoeffs( inv_sqrt3f, -inv_sqrt3f, inv_sqrt3f), + CalcAmbiCoeffs( inv_sqrt3f, -inv_sqrt3f, -inv_sqrt3f), + CalcAmbiCoeffs(-inv_sqrt3f, -inv_sqrt3f, inv_sqrt3f), + CalcAmbiCoeffs(-inv_sqrt3f, -inv_sqrt3f, -inv_sqrt3f), +}}; +static_assert(FirstOrderDecoder.size() == FirstOrderEncoder.size(), "First-order mismatch"); + +/* This calculates a 2D first-order "upsampler" matrix. Same as the first-order + * matrix, just using a more optimized speaker array for horizontal-only + * content. + */ +constexpr std::array<std::array<float,4>,4> FirstOrder2DDecoder{{ + {{ 2.500000000e-01f, 2.041241452e-01f, 0.0f, 2.041241452e-01f, }}, + {{ 2.500000000e-01f, 2.041241452e-01f, 0.0f, -2.041241452e-01f, }}, + {{ 2.500000000e-01f, -2.041241452e-01f, 0.0f, 2.041241452e-01f, }}, + {{ 2.500000000e-01f, -2.041241452e-01f, 0.0f, -2.041241452e-01f, }}, +}}; +constexpr std::array<AmbiChannelFloatArray,4> FirstOrder2DEncoder{{ + CalcAmbiCoeffs( inv_sqrt2f, 0.0f, inv_sqrt2f), + CalcAmbiCoeffs( inv_sqrt2f, 0.0f, -inv_sqrt2f), + CalcAmbiCoeffs(-inv_sqrt2f, 0.0f, inv_sqrt2f), + CalcAmbiCoeffs(-inv_sqrt2f, 0.0f, -inv_sqrt2f), +}}; +static_assert(FirstOrder2DDecoder.size() == FirstOrder2DEncoder.size(), "First-order 2D mismatch"); + + +/* This calculates a second-order "upsampler" matrix. Same as the first-order + * matrix, just using a slightly more dense speaker array suitable for second- + * order content. + */ +constexpr std::array<std::array<float,9>,12> SecondOrderDecoder{{ + {{ 8.333333333e-02f, 0.000000000e+00f, -7.588274978e-02f, 1.227808683e-01f, 0.000000000e+00f, 0.000000000e+00f, -1.591525047e-02f, -1.443375673e-01f, 1.167715449e-01f, }}, + {{ 8.333333333e-02f, -1.227808683e-01f, 0.000000000e+00f, 7.588274978e-02f, -1.443375673e-01f, 0.000000000e+00f, -9.316949906e-02f, 0.000000000e+00f, -7.216878365e-02f, }}, + {{ 8.333333333e-02f, -7.588274978e-02f, 1.227808683e-01f, 0.000000000e+00f, 0.000000000e+00f, -1.443375673e-01f, 1.090847495e-01f, 0.000000000e+00f, -4.460276122e-02f, }}, + {{ 8.333333333e-02f, 0.000000000e+00f, 7.588274978e-02f, 1.227808683e-01f, 0.000000000e+00f, 0.000000000e+00f, -1.591525047e-02f, 1.443375673e-01f, 1.167715449e-01f, }}, + {{ 8.333333333e-02f, -1.227808683e-01f, 0.000000000e+00f, -7.588274978e-02f, 1.443375673e-01f, 0.000000000e+00f, -9.316949906e-02f, 0.000000000e+00f, -7.216878365e-02f, }}, + {{ 8.333333333e-02f, 7.588274978e-02f, -1.227808683e-01f, 0.000000000e+00f, 0.000000000e+00f, -1.443375673e-01f, 1.090847495e-01f, 0.000000000e+00f, -4.460276122e-02f, }}, + {{ 8.333333333e-02f, 0.000000000e+00f, -7.588274978e-02f, -1.227808683e-01f, 0.000000000e+00f, 0.000000000e+00f, -1.591525047e-02f, 1.443375673e-01f, 1.167715449e-01f, }}, + {{ 8.333333333e-02f, 1.227808683e-01f, 0.000000000e+00f, -7.588274978e-02f, -1.443375673e-01f, 0.000000000e+00f, -9.316949906e-02f, 0.000000000e+00f, -7.216878365e-02f, }}, + {{ 8.333333333e-02f, 7.588274978e-02f, 1.227808683e-01f, 0.000000000e+00f, 0.000000000e+00f, 1.443375673e-01f, 1.090847495e-01f, 0.000000000e+00f, -4.460276122e-02f, }}, + {{ 8.333333333e-02f, 0.000000000e+00f, 7.588274978e-02f, -1.227808683e-01f, 0.000000000e+00f, 0.000000000e+00f, -1.591525047e-02f, -1.443375673e-01f, 1.167715449e-01f, }}, + {{ 8.333333333e-02f, 1.227808683e-01f, 0.000000000e+00f, 7.588274978e-02f, 1.443375673e-01f, 0.000000000e+00f, -9.316949906e-02f, 0.000000000e+00f, -7.216878365e-02f, }}, + {{ 8.333333333e-02f, -7.588274978e-02f, -1.227808683e-01f, 0.000000000e+00f, 0.000000000e+00f, 1.443375673e-01f, 1.090847495e-01f, 0.000000000e+00f, -4.460276122e-02f, }}, +}}; +constexpr std::array<AmbiChannelFloatArray,12> SecondOrderEncoder{{ + CalcAmbiCoeffs( 0.000000000e+00f, -5.257311121e-01f, 8.506508084e-01f), + CalcAmbiCoeffs(-8.506508084e-01f, 0.000000000e+00f, 5.257311121e-01f), + CalcAmbiCoeffs(-5.257311121e-01f, 8.506508084e-01f, 0.000000000e+00f), + CalcAmbiCoeffs( 0.000000000e+00f, 5.257311121e-01f, 8.506508084e-01f), + CalcAmbiCoeffs(-8.506508084e-01f, 0.000000000e+00f, -5.257311121e-01f), + CalcAmbiCoeffs( 5.257311121e-01f, -8.506508084e-01f, 0.000000000e+00f), + CalcAmbiCoeffs( 0.000000000e+00f, -5.257311121e-01f, -8.506508084e-01f), + CalcAmbiCoeffs( 8.506508084e-01f, 0.000000000e+00f, -5.257311121e-01f), + CalcAmbiCoeffs( 5.257311121e-01f, 8.506508084e-01f, 0.000000000e+00f), + CalcAmbiCoeffs( 0.000000000e+00f, 5.257311121e-01f, -8.506508084e-01f), + CalcAmbiCoeffs( 8.506508084e-01f, 0.000000000e+00f, 5.257311121e-01f), + CalcAmbiCoeffs(-5.257311121e-01f, -8.506508084e-01f, 0.000000000e+00f), +}}; +static_assert(SecondOrderDecoder.size() == SecondOrderEncoder.size(), "Second-order mismatch"); + +/* This calculates a 2D second-order "upsampler" matrix. Same as the second- + * order matrix, just using a more optimized speaker array for horizontal-only + * content. + */ +constexpr std::array<std::array<float,9>,6> SecondOrder2DDecoder{{ + {{ 1.666666667e-01f, -9.622504486e-02f, 0.0f, 1.666666667e-01f, -1.490711985e-01f, 0.0f, 0.0f, 0.0f, 8.606629658e-02f, }}, + {{ 1.666666667e-01f, -1.924500897e-01f, 0.0f, 0.000000000e+00f, 0.000000000e+00f, 0.0f, 0.0f, 0.0f, -1.721325932e-01f, }}, + {{ 1.666666667e-01f, -9.622504486e-02f, 0.0f, -1.666666667e-01f, 1.490711985e-01f, 0.0f, 0.0f, 0.0f, 8.606629658e-02f, }}, + {{ 1.666666667e-01f, 9.622504486e-02f, 0.0f, -1.666666667e-01f, -1.490711985e-01f, 0.0f, 0.0f, 0.0f, 8.606629658e-02f, }}, + {{ 1.666666667e-01f, 1.924500897e-01f, 0.0f, 0.000000000e+00f, 0.000000000e+00f, 0.0f, 0.0f, 0.0f, -1.721325932e-01f, }}, + {{ 1.666666667e-01f, 9.622504486e-02f, 0.0f, 1.666666667e-01f, 1.490711985e-01f, 0.0f, 0.0f, 0.0f, 8.606629658e-02f, }}, +}}; +constexpr std::array<AmbiChannelFloatArray,6> SecondOrder2DEncoder{{ + CalcAmbiCoeffs(-0.50000000000f, 0.0f, 0.86602540379f), + CalcAmbiCoeffs(-1.00000000000f, 0.0f, 0.00000000000f), + CalcAmbiCoeffs(-0.50000000000f, 0.0f, -0.86602540379f), + CalcAmbiCoeffs( 0.50000000000f, 0.0f, -0.86602540379f), + CalcAmbiCoeffs( 1.00000000000f, 0.0f, 0.00000000000f), + CalcAmbiCoeffs( 0.50000000000f, 0.0f, 0.86602540379f), +}}; +static_assert(SecondOrder2DDecoder.size() == SecondOrder2DEncoder.size(), + "Second-order 2D mismatch"); + + +/* This calculates a third-order "upsampler" matrix. Same as the first-order + * matrix, just using a more dense speaker array suitable for third-order + * content. + */ +constexpr std::array<std::array<float,16>,20> ThirdOrderDecoder{{ + {{ 5.000000000e-02f, 3.090169944e-02f, 8.090169944e-02f, 0.000000000e+00f, 0.000000000e+00f, 6.454972244e-02f, 9.045084972e-02f, 0.000000000e+00f, -1.232790000e-02f, -1.256118221e-01f, 0.000000000e+00f, 1.126112056e-01f, 7.944389175e-02f, 0.000000000e+00f, 2.421151497e-02f, 0.000000000e+00f, }}, + {{ 5.000000000e-02f, -3.090169944e-02f, 8.090169944e-02f, 0.000000000e+00f, 0.000000000e+00f, -6.454972244e-02f, 9.045084972e-02f, 0.000000000e+00f, -1.232790000e-02f, 1.256118221e-01f, 0.000000000e+00f, -1.126112056e-01f, 7.944389175e-02f, 0.000000000e+00f, 2.421151497e-02f, 0.000000000e+00f, }}, + {{ 5.000000000e-02f, 3.090169944e-02f, -8.090169944e-02f, 0.000000000e+00f, 0.000000000e+00f, -6.454972244e-02f, 9.045084972e-02f, 0.000000000e+00f, -1.232790000e-02f, -1.256118221e-01f, 0.000000000e+00f, 1.126112056e-01f, -7.944389175e-02f, 0.000000000e+00f, -2.421151497e-02f, 0.000000000e+00f, }}, + {{ 5.000000000e-02f, -3.090169944e-02f, -8.090169944e-02f, 0.000000000e+00f, 0.000000000e+00f, 6.454972244e-02f, 9.045084972e-02f, 0.000000000e+00f, -1.232790000e-02f, 1.256118221e-01f, 0.000000000e+00f, -1.126112056e-01f, -7.944389175e-02f, 0.000000000e+00f, -2.421151497e-02f, 0.000000000e+00f, }}, + {{ 5.000000000e-02f, 8.090169944e-02f, 0.000000000e+00f, 3.090169944e-02f, 6.454972244e-02f, 0.000000000e+00f, -5.590169944e-02f, 0.000000000e+00f, -7.216878365e-02f, -7.763237543e-02f, 0.000000000e+00f, -2.950836627e-02f, 0.000000000e+00f, -1.497759251e-01f, 0.000000000e+00f, -7.763237543e-02f, }}, + {{ 5.000000000e-02f, 8.090169944e-02f, 0.000000000e+00f, -3.090169944e-02f, -6.454972244e-02f, 0.000000000e+00f, -5.590169944e-02f, 0.000000000e+00f, -7.216878365e-02f, -7.763237543e-02f, 0.000000000e+00f, -2.950836627e-02f, 0.000000000e+00f, 1.497759251e-01f, 0.000000000e+00f, 7.763237543e-02f, }}, + {{ 5.000000000e-02f, -8.090169944e-02f, 0.000000000e+00f, 3.090169944e-02f, -6.454972244e-02f, 0.000000000e+00f, -5.590169944e-02f, 0.000000000e+00f, -7.216878365e-02f, 7.763237543e-02f, 0.000000000e+00f, 2.950836627e-02f, 0.000000000e+00f, -1.497759251e-01f, 0.000000000e+00f, -7.763237543e-02f, }}, + {{ 5.000000000e-02f, -8.090169944e-02f, 0.000000000e+00f, -3.090169944e-02f, 6.454972244e-02f, 0.000000000e+00f, -5.590169944e-02f, 0.000000000e+00f, -7.216878365e-02f, 7.763237543e-02f, 0.000000000e+00f, 2.950836627e-02f, 0.000000000e+00f, 1.497759251e-01f, 0.000000000e+00f, 7.763237543e-02f, }}, + {{ 5.000000000e-02f, 0.000000000e+00f, 3.090169944e-02f, 8.090169944e-02f, 0.000000000e+00f, 0.000000000e+00f, -3.454915028e-02f, 6.454972244e-02f, 8.449668365e-02f, 0.000000000e+00f, 0.000000000e+00f, 0.000000000e+00f, 3.034486645e-02f, -6.779013272e-02f, 1.659481923e-01f, 4.797944664e-02f, }}, + {{ 5.000000000e-02f, 0.000000000e+00f, 3.090169944e-02f, -8.090169944e-02f, 0.000000000e+00f, 0.000000000e+00f, -3.454915028e-02f, -6.454972244e-02f, 8.449668365e-02f, 0.000000000e+00f, 0.000000000e+00f, 0.000000000e+00f, 3.034486645e-02f, 6.779013272e-02f, 1.659481923e-01f, -4.797944664e-02f, }}, + {{ 5.000000000e-02f, 0.000000000e+00f, -3.090169944e-02f, 8.090169944e-02f, 0.000000000e+00f, 0.000000000e+00f, -3.454915028e-02f, -6.454972244e-02f, 8.449668365e-02f, 0.000000000e+00f, 0.000000000e+00f, 0.000000000e+00f, -3.034486645e-02f, -6.779013272e-02f, -1.659481923e-01f, 4.797944664e-02f, }}, + {{ 5.000000000e-02f, 0.000000000e+00f, -3.090169944e-02f, -8.090169944e-02f, 0.000000000e+00f, 0.000000000e+00f, -3.454915028e-02f, 6.454972244e-02f, 8.449668365e-02f, 0.000000000e+00f, 0.000000000e+00f, 0.000000000e+00f, -3.034486645e-02f, 6.779013272e-02f, -1.659481923e-01f, -4.797944664e-02f, }}, + {{ 5.000000000e-02f, 5.000000000e-02f, 5.000000000e-02f, 5.000000000e-02f, 6.454972244e-02f, 6.454972244e-02f, 0.000000000e+00f, 6.454972244e-02f, 0.000000000e+00f, 1.016220987e-01f, 6.338656910e-02f, -1.092600649e-02f, -7.364853795e-02f, 1.011266756e-01f, -7.086833869e-02f, -1.482646439e-02f, }}, + {{ 5.000000000e-02f, 5.000000000e-02f, 5.000000000e-02f, -5.000000000e-02f, -6.454972244e-02f, 6.454972244e-02f, 0.000000000e+00f, -6.454972244e-02f, 0.000000000e+00f, 1.016220987e-01f, -6.338656910e-02f, -1.092600649e-02f, -7.364853795e-02f, -1.011266756e-01f, -7.086833869e-02f, 1.482646439e-02f, }}, + {{ 5.000000000e-02f, -5.000000000e-02f, 5.000000000e-02f, 5.000000000e-02f, -6.454972244e-02f, -6.454972244e-02f, 0.000000000e+00f, 6.454972244e-02f, 0.000000000e+00f, -1.016220987e-01f, -6.338656910e-02f, 1.092600649e-02f, -7.364853795e-02f, 1.011266756e-01f, -7.086833869e-02f, -1.482646439e-02f, }}, + {{ 5.000000000e-02f, -5.000000000e-02f, 5.000000000e-02f, -5.000000000e-02f, 6.454972244e-02f, -6.454972244e-02f, 0.000000000e+00f, -6.454972244e-02f, 0.000000000e+00f, -1.016220987e-01f, 6.338656910e-02f, 1.092600649e-02f, -7.364853795e-02f, -1.011266756e-01f, -7.086833869e-02f, 1.482646439e-02f, }}, + {{ 5.000000000e-02f, 5.000000000e-02f, -5.000000000e-02f, 5.000000000e-02f, 6.454972244e-02f, -6.454972244e-02f, 0.000000000e+00f, -6.454972244e-02f, 0.000000000e+00f, 1.016220987e-01f, -6.338656910e-02f, -1.092600649e-02f, 7.364853795e-02f, 1.011266756e-01f, 7.086833869e-02f, -1.482646439e-02f, }}, + {{ 5.000000000e-02f, 5.000000000e-02f, -5.000000000e-02f, -5.000000000e-02f, -6.454972244e-02f, -6.454972244e-02f, 0.000000000e+00f, 6.454972244e-02f, 0.000000000e+00f, 1.016220987e-01f, 6.338656910e-02f, -1.092600649e-02f, 7.364853795e-02f, -1.011266756e-01f, 7.086833869e-02f, 1.482646439e-02f, }}, + {{ 5.000000000e-02f, -5.000000000e-02f, -5.000000000e-02f, 5.000000000e-02f, -6.454972244e-02f, 6.454972244e-02f, 0.000000000e+00f, -6.454972244e-02f, 0.000000000e+00f, -1.016220987e-01f, 6.338656910e-02f, 1.092600649e-02f, 7.364853795e-02f, 1.011266756e-01f, 7.086833869e-02f, -1.482646439e-02f, }}, + {{ 5.000000000e-02f, -5.000000000e-02f, -5.000000000e-02f, -5.000000000e-02f, 6.454972244e-02f, 6.454972244e-02f, 0.000000000e+00f, 6.454972244e-02f, 0.000000000e+00f, -1.016220987e-01f, -6.338656910e-02f, 1.092600649e-02f, 7.364853795e-02f, -1.011266756e-01f, 7.086833869e-02f, 1.482646439e-02f, }}, +}}; +constexpr std::array<AmbiChannelFloatArray,20> ThirdOrderEncoder{{ + CalcAmbiCoeffs( 0.35682208976f, 0.93417235897f, 0.00000000000f), + CalcAmbiCoeffs(-0.35682208976f, 0.93417235897f, 0.00000000000f), + CalcAmbiCoeffs( 0.35682208976f, -0.93417235897f, 0.00000000000f), + CalcAmbiCoeffs(-0.35682208976f, -0.93417235897f, 0.00000000000f), + CalcAmbiCoeffs( 0.93417235897f, 0.00000000000f, 0.35682208976f), + CalcAmbiCoeffs( 0.93417235897f, 0.00000000000f, -0.35682208976f), + CalcAmbiCoeffs(-0.93417235897f, 0.00000000000f, 0.35682208976f), + CalcAmbiCoeffs(-0.93417235897f, 0.00000000000f, -0.35682208976f), + CalcAmbiCoeffs( 0.00000000000f, 0.35682208976f, 0.93417235897f), + CalcAmbiCoeffs( 0.00000000000f, 0.35682208976f, -0.93417235897f), + CalcAmbiCoeffs( 0.00000000000f, -0.35682208976f, 0.93417235897f), + CalcAmbiCoeffs( 0.00000000000f, -0.35682208976f, -0.93417235897f), + CalcAmbiCoeffs( inv_sqrt3f, inv_sqrt3f, inv_sqrt3f), + CalcAmbiCoeffs( inv_sqrt3f, inv_sqrt3f, -inv_sqrt3f), + CalcAmbiCoeffs( -inv_sqrt3f, inv_sqrt3f, inv_sqrt3f), + CalcAmbiCoeffs( -inv_sqrt3f, inv_sqrt3f, -inv_sqrt3f), + CalcAmbiCoeffs( inv_sqrt3f, -inv_sqrt3f, inv_sqrt3f), + CalcAmbiCoeffs( inv_sqrt3f, -inv_sqrt3f, -inv_sqrt3f), + CalcAmbiCoeffs( -inv_sqrt3f, -inv_sqrt3f, inv_sqrt3f), + CalcAmbiCoeffs( -inv_sqrt3f, -inv_sqrt3f, -inv_sqrt3f), +}}; +static_assert(ThirdOrderDecoder.size() == ThirdOrderEncoder.size(), "Third-order mismatch"); + +/* This calculates a 2D third-order "upsampler" matrix. Same as the third-order + * matrix, just using a more optimized speaker array for horizontal-only + * content. + */ +constexpr std::array<std::array<float,16>,8> ThirdOrder2DDecoder{{ + {{ 1.250000000e-01f, -5.523559567e-02f, 0.0f, 1.333505242e-01f, -9.128709292e-02f, 0.0f, 0.0f, 0.0f, 9.128709292e-02f, -1.104247249e-01f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 4.573941867e-02f, }}, + {{ 1.250000000e-01f, -1.333505242e-01f, 0.0f, 5.523559567e-02f, -9.128709292e-02f, 0.0f, 0.0f, 0.0f, -9.128709292e-02f, 4.573941867e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -1.104247249e-01f, }}, + {{ 1.250000000e-01f, -1.333505242e-01f, 0.0f, -5.523559567e-02f, 9.128709292e-02f, 0.0f, 0.0f, 0.0f, -9.128709292e-02f, 4.573941867e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.104247249e-01f, }}, + {{ 1.250000000e-01f, -5.523559567e-02f, 0.0f, -1.333505242e-01f, 9.128709292e-02f, 0.0f, 0.0f, 0.0f, 9.128709292e-02f, -1.104247249e-01f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -4.573941867e-02f, }}, + {{ 1.250000000e-01f, 5.523559567e-02f, 0.0f, -1.333505242e-01f, -9.128709292e-02f, 0.0f, 0.0f, 0.0f, 9.128709292e-02f, 1.104247249e-01f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -4.573941867e-02f, }}, + {{ 1.250000000e-01f, 1.333505242e-01f, 0.0f, -5.523559567e-02f, -9.128709292e-02f, 0.0f, 0.0f, 0.0f, -9.128709292e-02f, -4.573941867e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.104247249e-01f, }}, + {{ 1.250000000e-01f, 1.333505242e-01f, 0.0f, 5.523559567e-02f, 9.128709292e-02f, 0.0f, 0.0f, 0.0f, -9.128709292e-02f, -4.573941867e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -1.104247249e-01f, }}, + {{ 1.250000000e-01f, 5.523559567e-02f, 0.0f, 1.333505242e-01f, 9.128709292e-02f, 0.0f, 0.0f, 0.0f, 9.128709292e-02f, 1.104247249e-01f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 4.573941867e-02f, }}, +}}; +constexpr std::array<AmbiChannelFloatArray,8> ThirdOrder2DEncoder{{ + CalcAmbiCoeffs(-0.38268343237f, 0.0f, 0.92387953251f), + CalcAmbiCoeffs(-0.92387953251f, 0.0f, 0.38268343237f), + CalcAmbiCoeffs(-0.92387953251f, 0.0f, -0.38268343237f), + CalcAmbiCoeffs(-0.38268343237f, 0.0f, -0.92387953251f), + CalcAmbiCoeffs( 0.38268343237f, 0.0f, -0.92387953251f), + CalcAmbiCoeffs( 0.92387953251f, 0.0f, -0.38268343237f), + CalcAmbiCoeffs( 0.92387953251f, 0.0f, 0.38268343237f), + CalcAmbiCoeffs( 0.38268343237f, 0.0f, 0.92387953251f), +}}; +static_assert(ThirdOrder2DDecoder.size() == ThirdOrder2DEncoder.size(), "Third-order 2D mismatch"); + + +/* This calculates a 2D fourth-order "upsampler" matrix. There is no 3D fourth- + * order upsampler since fourth-order is the max order we'll be supporting for + * the foreseeable future. This is only necessary for mixing horizontal-only + * fourth-order content to 3D. + */ +constexpr std::array<std::array<float,25>,10> FourthOrder2DDecoder{{ + {{ 1.000000000e-01f, 3.568220898e-02f, 0.0f, 1.098185471e-01f, 6.070619982e-02f, 0.0f, 0.0f, 0.0f, 8.355491589e-02f, 7.735682057e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 5.620301997e-02f, 8.573754253e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 2.785781628e-02f, }}, + {{ 1.000000000e-01f, 9.341723590e-02f, 0.0f, 6.787159473e-02f, 9.822469464e-02f, 0.0f, 0.0f, 0.0f, -3.191513794e-02f, 2.954767620e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -9.093839659e-02f, -5.298871540e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -7.293270986e-02f, }}, + {{ 1.000000000e-01f, 1.154700538e-01f, 0.0f, 0.000000000e+00f, 0.000000000e+00f, 0.0f, 0.0f, 0.0f, -1.032795559e-01f, -9.561828875e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.000000000e+00f, 0.000000000e+00f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 9.014978717e-02f, }}, + {{ 1.000000000e-01f, 9.341723590e-02f, 0.0f, -6.787159473e-02f, -9.822469464e-02f, 0.0f, 0.0f, 0.0f, -3.191513794e-02f, 2.954767620e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 9.093839659e-02f, 5.298871540e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -7.293270986e-02f, }}, + {{ 1.000000000e-01f, 3.568220898e-02f, 0.0f, -1.098185471e-01f, -6.070619982e-02f, 0.0f, 0.0f, 0.0f, 8.355491589e-02f, 7.735682057e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -5.620301997e-02f, -8.573754253e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 2.785781628e-02f, }}, + {{ 1.000000000e-01f, -3.568220898e-02f, 0.0f, -1.098185471e-01f, 6.070619982e-02f, 0.0f, 0.0f, 0.0f, 8.355491589e-02f, -7.735682057e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -5.620301997e-02f, 8.573754253e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 2.785781628e-02f, }}, + {{ 1.000000000e-01f, -9.341723590e-02f, 0.0f, -6.787159473e-02f, 9.822469464e-02f, 0.0f, 0.0f, 0.0f, -3.191513794e-02f, -2.954767620e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 9.093839659e-02f, -5.298871540e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -7.293270986e-02f, }}, + {{ 1.000000000e-01f, -1.154700538e-01f, 0.0f, 0.000000000e+00f, 0.000000000e+00f, 0.0f, 0.0f, 0.0f, -1.032795559e-01f, 9.561828875e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.000000000e+00f, 0.000000000e+00f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 9.014978717e-02f, }}, + {{ 1.000000000e-01f, -9.341723590e-02f, 0.0f, 6.787159473e-02f, -9.822469464e-02f, 0.0f, 0.0f, 0.0f, -3.191513794e-02f, -2.954767620e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -9.093839659e-02f, 5.298871540e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -7.293270986e-02f, }}, + {{ 1.000000000e-01f, -3.568220898e-02f, 0.0f, 1.098185471e-01f, -6.070619982e-02f, 0.0f, 0.0f, 0.0f, 8.355491589e-02f, -7.735682057e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 5.620301997e-02f, -8.573754253e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 2.785781628e-02f, }}, +}}; +constexpr std::array<AmbiChannelFloatArray,10> FourthOrder2DEncoder{{ + CalcAmbiCoeffs( 3.090169944e-01f, 0.000000000e+00f, 9.510565163e-01f), + CalcAmbiCoeffs( 8.090169944e-01f, 0.000000000e+00f, 5.877852523e-01f), + CalcAmbiCoeffs( 1.000000000e+00f, 0.000000000e+00f, 0.000000000e+00f), + CalcAmbiCoeffs( 8.090169944e-01f, 0.000000000e+00f, -5.877852523e-01f), + CalcAmbiCoeffs( 3.090169944e-01f, 0.000000000e+00f, -9.510565163e-01f), + CalcAmbiCoeffs(-3.090169944e-01f, 0.000000000e+00f, -9.510565163e-01f), + CalcAmbiCoeffs(-8.090169944e-01f, 0.000000000e+00f, -5.877852523e-01f), + CalcAmbiCoeffs(-1.000000000e+00f, 0.000000000e+00f, 0.000000000e+00f), + CalcAmbiCoeffs(-8.090169944e-01f, 0.000000000e+00f, 5.877852523e-01f), + CalcAmbiCoeffs(-3.090169944e-01f, 0.000000000e+00f, 9.510565163e-01f), +}}; +static_assert(FourthOrder2DDecoder.size() == FourthOrder2DEncoder.size(), "Fourth-order 2D mismatch"); + + +template<size_t N, size_t M> +auto CalcAmbiUpsampler(const std::array<std::array<float,N>,M> &decoder, + const std::array<AmbiChannelFloatArray,M> &encoder) +{ + std::array<AmbiChannelFloatArray,N> res{}; + + for(size_t i{0};i < decoder[0].size();++i) + { + for(size_t j{0};j < encoder[0].size();++j) + { + double sum{0.0}; + for(size_t k{0};k < decoder.size();++k) + sum += double{decoder[k][i]} * encoder[k][j]; + res[i][j] = static_cast<float>(sum); + } + } + + return res; +} + +} // namespace + +const std::array<AmbiChannelFloatArray,4> AmbiScale::FirstOrderUp{CalcAmbiUpsampler(FirstOrderDecoder, FirstOrderEncoder)}; +const std::array<AmbiChannelFloatArray,4> AmbiScale::FirstOrder2DUp{CalcAmbiUpsampler(FirstOrder2DDecoder, FirstOrder2DEncoder)}; +const std::array<AmbiChannelFloatArray,9> AmbiScale::SecondOrderUp{CalcAmbiUpsampler(SecondOrderDecoder, SecondOrderEncoder)}; +const std::array<AmbiChannelFloatArray,9> AmbiScale::SecondOrder2DUp{CalcAmbiUpsampler(SecondOrder2DDecoder, SecondOrder2DEncoder)}; +const std::array<AmbiChannelFloatArray,16> AmbiScale::ThirdOrderUp{CalcAmbiUpsampler(ThirdOrderDecoder, ThirdOrderEncoder)}; +const std::array<AmbiChannelFloatArray,16> AmbiScale::ThirdOrder2DUp{CalcAmbiUpsampler(ThirdOrder2DDecoder, ThirdOrder2DEncoder)}; +const std::array<AmbiChannelFloatArray,25> AmbiScale::FourthOrder2DUp{CalcAmbiUpsampler(FourthOrder2DDecoder, FourthOrder2DEncoder)}; + + +std::array<float,MaxAmbiOrder+1> AmbiScale::GetHFOrderScales(const uint src_order, + const uint dev_order, const bool horizontalOnly) noexcept +{ + std::array<float,MaxAmbiOrder+1> res{}; + + if(!horizontalOnly) + { + for(size_t i{0};i < MaxAmbiOrder+1;++i) + res[i] = HFScales[src_order][i] / HFScales[dev_order][i]; + } + else + { + for(size_t i{0};i < MaxAmbiOrder+1;++i) + res[i] = HFScales2D[src_order][i] / HFScales2D[dev_order][i]; + } + + return res; +} diff --git a/core/ambidefs.h b/core/ambidefs.h new file mode 100644 index 00000000..b7d2bcd1 --- /dev/null +++ b/core/ambidefs.h @@ -0,0 +1,250 @@ +#ifndef CORE_AMBIDEFS_H +#define CORE_AMBIDEFS_H + +#include <array> +#include <stddef.h> +#include <stdint.h> + +#include "alnumbers.h" + + +using uint = unsigned int; + +/* The maximum number of Ambisonics channels. For a given order (o), the size + * needed will be (o+1)**2, thus zero-order has 1, first-order has 4, second- + * order has 9, third-order has 16, and fourth-order has 25. + */ +constexpr uint8_t MaxAmbiOrder{3}; +constexpr inline size_t AmbiChannelsFromOrder(size_t order) noexcept +{ return (order+1) * (order+1); } +constexpr size_t MaxAmbiChannels{AmbiChannelsFromOrder(MaxAmbiOrder)}; + +/* A bitmask of ambisonic channels for 0 to 4th order. This only specifies up + * to 4th order, which is the highest order a 32-bit mask value can specify (a + * 64-bit mask could handle up to 7th order). + */ +constexpr uint Ambi0OrderMask{0x00000001}; +constexpr uint Ambi1OrderMask{0x0000000f}; +constexpr uint Ambi2OrderMask{0x000001ff}; +constexpr uint Ambi3OrderMask{0x0000ffff}; +constexpr uint Ambi4OrderMask{0x01ffffff}; + +/* A bitmask of ambisonic channels with height information. If none of these + * channels are used/needed, there's no height (e.g. with most surround sound + * speaker setups). This is ACN ordering, with bit 0 being ACN 0, etc. + */ +constexpr uint AmbiPeriphonicMask{0xfe7ce4}; + +/* The maximum number of ambisonic channels for 2D (non-periphonic) + * representation. This is 2 per each order above zero-order, plus 1 for zero- + * order. Or simply, o*2 + 1. + */ +constexpr inline size_t Ambi2DChannelsFromOrder(size_t order) noexcept +{ return order*2 + 1; } +constexpr size_t MaxAmbi2DChannels{Ambi2DChannelsFromOrder(MaxAmbiOrder)}; + + +/* NOTE: These are scale factors as applied to Ambisonics content. Decoder + * coefficients should be divided by these values to get proper scalings. + */ +struct AmbiScale { + static auto& FromN3D() noexcept + { + static constexpr const std::array<float,MaxAmbiChannels> ret{{ + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f + }}; + return ret; + } + static auto& FromSN3D() noexcept + { + static constexpr const std::array<float,MaxAmbiChannels> ret{{ + 1.000000000f, /* ACN 0, sqrt(1) */ + 1.732050808f, /* ACN 1, sqrt(3) */ + 1.732050808f, /* ACN 2, sqrt(3) */ + 1.732050808f, /* ACN 3, sqrt(3) */ + 2.236067978f, /* ACN 4, sqrt(5) */ + 2.236067978f, /* ACN 5, sqrt(5) */ + 2.236067978f, /* ACN 6, sqrt(5) */ + 2.236067978f, /* ACN 7, sqrt(5) */ + 2.236067978f, /* ACN 8, sqrt(5) */ + 2.645751311f, /* ACN 9, sqrt(7) */ + 2.645751311f, /* ACN 10, sqrt(7) */ + 2.645751311f, /* ACN 11, sqrt(7) */ + 2.645751311f, /* ACN 12, sqrt(7) */ + 2.645751311f, /* ACN 13, sqrt(7) */ + 2.645751311f, /* ACN 14, sqrt(7) */ + 2.645751311f, /* ACN 15, sqrt(7) */ + }}; + return ret; + } + static auto& FromFuMa() noexcept + { + static constexpr const std::array<float,MaxAmbiChannels> ret{{ + 1.414213562f, /* ACN 0 (W), sqrt(2) */ + 1.732050808f, /* ACN 1 (Y), sqrt(3) */ + 1.732050808f, /* ACN 2 (Z), sqrt(3) */ + 1.732050808f, /* ACN 3 (X), sqrt(3) */ + 1.936491673f, /* ACN 4 (V), sqrt(15)/2 */ + 1.936491673f, /* ACN 5 (T), sqrt(15)/2 */ + 2.236067978f, /* ACN 6 (R), sqrt(5) */ + 1.936491673f, /* ACN 7 (S), sqrt(15)/2 */ + 1.936491673f, /* ACN 8 (U), sqrt(15)/2 */ + 2.091650066f, /* ACN 9 (Q), sqrt(35/8) */ + 1.972026594f, /* ACN 10 (O), sqrt(35)/3 */ + 2.231093404f, /* ACN 11 (M), sqrt(224/45) */ + 2.645751311f, /* ACN 12 (K), sqrt(7) */ + 2.231093404f, /* ACN 13 (L), sqrt(224/45) */ + 1.972026594f, /* ACN 14 (N), sqrt(35)/3 */ + 2.091650066f, /* ACN 15 (P), sqrt(35/8) */ + }}; + return ret; + } + static auto& FromUHJ() noexcept + { + static constexpr const std::array<float,MaxAmbiChannels> ret{{ + 1.000000000f, /* ACN 0 (W), sqrt(1) */ + 1.224744871f, /* ACN 1 (Y), sqrt(3/2) */ + 1.224744871f, /* ACN 2 (Z), sqrt(3/2) */ + 1.224744871f, /* ACN 3 (X), sqrt(3/2) */ + /* Higher orders not relevant for UHJ. */ + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + }}; + return ret; + } + + /* Retrieves per-order HF scaling factors for "upsampling" ambisonic data. */ + static std::array<float,MaxAmbiOrder+1> GetHFOrderScales(const uint src_order, + const uint dev_order, const bool horizontalOnly) noexcept; + + static const std::array<std::array<float,MaxAmbiChannels>,4> FirstOrderUp; + static const std::array<std::array<float,MaxAmbiChannels>,4> FirstOrder2DUp; + static const std::array<std::array<float,MaxAmbiChannels>,9> SecondOrderUp; + static const std::array<std::array<float,MaxAmbiChannels>,9> SecondOrder2DUp; + static const std::array<std::array<float,MaxAmbiChannels>,16> ThirdOrderUp; + static const std::array<std::array<float,MaxAmbiChannels>,16> ThirdOrder2DUp; + static const std::array<std::array<float,MaxAmbiChannels>,25> FourthOrder2DUp; +}; + +struct AmbiIndex { + static auto& FromFuMa() noexcept + { + static constexpr const std::array<uint8_t,MaxAmbiChannels> ret{{ + 0, /* W */ + 3, /* X */ + 1, /* Y */ + 2, /* Z */ + 6, /* R */ + 7, /* S */ + 5, /* T */ + 8, /* U */ + 4, /* V */ + 12, /* K */ + 13, /* L */ + 11, /* M */ + 14, /* N */ + 10, /* O */ + 15, /* P */ + 9, /* Q */ + }}; + return ret; + } + static auto& FromFuMa2D() noexcept + { + static constexpr const std::array<uint8_t,MaxAmbi2DChannels> ret{{ + 0, /* W */ + 3, /* X */ + 1, /* Y */ + 8, /* U */ + 4, /* V */ + 15, /* P */ + 9, /* Q */ + }}; + return ret; + } + + static auto& FromACN() noexcept + { + static constexpr const std::array<uint8_t,MaxAmbiChannels> ret{{ + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15 + }}; + return ret; + } + static auto& FromACN2D() noexcept + { + static constexpr const std::array<uint8_t,MaxAmbi2DChannels> ret{{ + 0, 1,3, 4,8, 9,15 + }}; + return ret; + } + + static auto& OrderFromChannel() noexcept + { + static constexpr const std::array<uint8_t,MaxAmbiChannels> ret{{ + 0, 1,1,1, 2,2,2,2,2, 3,3,3,3,3,3,3, + }}; + return ret; + } + static auto& OrderFrom2DChannel() noexcept + { + static constexpr const std::array<uint8_t,MaxAmbi2DChannels> ret{{ + 0, 1,1, 2,2, 3,3, + }}; + return ret; + } +}; + + +/** + * Calculates ambisonic encoder coefficients using the X, Y, and Z direction + * components, which must represent a normalized (unit length) vector. + * + * NOTE: The components use ambisonic coordinates. As a result: + * + * Ambisonic Y = OpenAL -X + * Ambisonic Z = OpenAL Y + * Ambisonic X = OpenAL -Z + * + * The components are ordered such that OpenAL's X, Y, and Z are the first, + * second, and third parameters respectively -- simply negate X and Z. + */ +constexpr auto CalcAmbiCoeffs(const float y, const float z, const float x) +{ + const float xx{x*x}, yy{y*y}, zz{z*z}, xy{x*y}, yz{y*z}, xz{x*z}; + + return std::array<float,MaxAmbiChannels>{{ + /* Zeroth-order */ + 1.0f, /* ACN 0 = 1 */ + /* First-order */ + al::numbers::sqrt3_v<float> * y, /* ACN 1 = sqrt(3) * Y */ + al::numbers::sqrt3_v<float> * z, /* ACN 2 = sqrt(3) * Z */ + al::numbers::sqrt3_v<float> * x, /* ACN 3 = sqrt(3) * X */ + /* Second-order */ + 3.872983346e+00f * xy, /* ACN 4 = sqrt(15) * X * Y */ + 3.872983346e+00f * yz, /* ACN 5 = sqrt(15) * Y * Z */ + 1.118033989e+00f * (3.0f*zz - 1.0f), /* ACN 6 = sqrt(5)/2 * (3*Z*Z - 1) */ + 3.872983346e+00f * xz, /* ACN 7 = sqrt(15) * X * Z */ + 1.936491673e+00f * (xx - yy), /* ACN 8 = sqrt(15)/2 * (X*X - Y*Y) */ + /* Third-order */ + 2.091650066e+00f * (y*(3.0f*xx - yy)), /* ACN 9 = sqrt(35/8) * Y * (3*X*X - Y*Y) */ + 1.024695076e+01f * (z*xy), /* ACN 10 = sqrt(105) * Z * X * Y */ + 1.620185175e+00f * (y*(5.0f*zz - 1.0f)), /* ACN 11 = sqrt(21/8) * Y * (5*Z*Z - 1) */ + 1.322875656e+00f * (z*(5.0f*zz - 3.0f)), /* ACN 12 = sqrt(7)/2 * Z * (5*Z*Z - 3) */ + 1.620185175e+00f * (x*(5.0f*zz - 1.0f)), /* ACN 13 = sqrt(21/8) * X * (5*Z*Z - 1) */ + 5.123475383e+00f * (z*(xx - yy)), /* ACN 14 = sqrt(105)/2 * Z * (X*X - Y*Y) */ + 2.091650066e+00f * (x*(xx - 3.0f*yy)), /* ACN 15 = sqrt(35/8) * X * (X*X - 3*Y*Y) */ + /* Fourth-order */ + /* ACN 16 = sqrt(35)*3/2 * X * Y * (X*X - Y*Y) */ + /* ACN 17 = sqrt(35/2)*3/2 * (3*X*X - Y*Y) * Y * Z */ + /* ACN 18 = sqrt(5)*3/2 * X * Y * (7*Z*Z - 1) */ + /* ACN 19 = sqrt(5/2)*3/2 * Y * Z * (7*Z*Z - 3) */ + /* ACN 20 = 3/8 * (35*Z*Z*Z*Z - 30*Z*Z + 3) */ + /* ACN 21 = sqrt(5/2)*3/2 * X * Z * (7*Z*Z - 3) */ + /* ACN 22 = sqrt(5)*3/4 * (X*X - Y*Y) * (7*Z*Z - 1) */ + /* ACN 23 = sqrt(35/2)*3/2 * (X*X - 3*Y*Y) * X * Z */ + /* ACN 24 = sqrt(35)*3/8 * (X*X*X*X - 6*X*X*Y*Y + Y*Y*Y*Y) */ + }}; +} + +#endif /* CORE_AMBIDEFS_H */ diff --git a/core/async_event.h b/core/async_event.h new file mode 100644 index 00000000..5a2f5f91 --- /dev/null +++ b/core/async_event.h @@ -0,0 +1,55 @@ +#ifndef CORE_EVENT_H +#define CORE_EVENT_H + +#include "almalloc.h" + +struct EffectState; + +using uint = unsigned int; + + +struct AsyncEvent { + enum : uint { + /* User event types. */ + SourceStateChange, + BufferCompleted, + Disconnected, + UserEventCount, + + /* Internal events, always processed. */ + ReleaseEffectState = 128, + + /* End event thread processing. */ + KillThread, + }; + + enum class SrcState { + Reset, + Stop, + Play, + Pause + }; + + const uint EnumType; + union { + char dummy; + struct { + uint id; + SrcState state; + } srcstate; + struct { + uint id; + uint count; + } bufcomp; + struct { + char msg[244]; + } disconnect; + EffectState *mEffectState; + } u{}; + + constexpr AsyncEvent(uint type) noexcept : EnumType{type} { } + + DISABLE_ALLOC() +}; + +#endif diff --git a/core/bformatdec.cpp b/core/bformatdec.cpp new file mode 100644 index 00000000..129b9976 --- /dev/null +++ b/core/bformatdec.cpp @@ -0,0 +1,170 @@ + +#include "config.h" + +#include "bformatdec.h" + +#include <algorithm> +#include <array> +#include <cmath> +#include <utility> + +#include "almalloc.h" +#include "alnumbers.h" +#include "filters/splitter.h" +#include "front_stablizer.h" +#include "mixer.h" +#include "opthelpers.h" + + +BFormatDec::BFormatDec(const size_t inchans, const al::span<const ChannelDec> coeffs, + const al::span<const ChannelDec> coeffslf, const float xover_f0norm, + std::unique_ptr<FrontStablizer> stablizer) + : mStablizer{std::move(stablizer)}, mDualBand{!coeffslf.empty()}, mChannelDec{inchans} +{ + if(!mDualBand) + { + for(size_t j{0};j < mChannelDec.size();++j) + { + float *outcoeffs{mChannelDec[j].mGains.Single}; + for(const ChannelDec &incoeffs : coeffs) + *(outcoeffs++) = incoeffs[j]; + } + } + else + { + mChannelDec[0].mXOver.init(xover_f0norm); + for(size_t j{1};j < mChannelDec.size();++j) + mChannelDec[j].mXOver = mChannelDec[0].mXOver; + + for(size_t j{0};j < mChannelDec.size();++j) + { + float *outcoeffs{mChannelDec[j].mGains.Dual[sHFBand]}; + for(const ChannelDec &incoeffs : coeffs) + *(outcoeffs++) = incoeffs[j]; + + outcoeffs = mChannelDec[j].mGains.Dual[sLFBand]; + for(const ChannelDec &incoeffs : coeffslf) + *(outcoeffs++) = incoeffs[j]; + } + } +} + + +void BFormatDec::process(const al::span<FloatBufferLine> OutBuffer, + const FloatBufferLine *InSamples, const size_t SamplesToDo) +{ + ASSUME(SamplesToDo > 0); + + if(mDualBand) + { + const al::span<float> hfSamples{mSamples[sHFBand].data(), SamplesToDo}; + const al::span<float> lfSamples{mSamples[sLFBand].data(), SamplesToDo}; + for(auto &chandec : mChannelDec) + { + chandec.mXOver.process({InSamples->data(), SamplesToDo}, hfSamples.data(), + lfSamples.data()); + MixSamples(hfSamples, OutBuffer, chandec.mGains.Dual[sHFBand], + chandec.mGains.Dual[sHFBand], 0, 0); + MixSamples(lfSamples, OutBuffer, chandec.mGains.Dual[sLFBand], + chandec.mGains.Dual[sLFBand], 0, 0); + ++InSamples; + } + } + else + { + for(auto &chandec : mChannelDec) + { + MixSamples({InSamples->data(), SamplesToDo}, OutBuffer, chandec.mGains.Single, + chandec.mGains.Single, 0, 0); + ++InSamples; + } + } +} + +void BFormatDec::processStablize(const al::span<FloatBufferLine> OutBuffer, + const FloatBufferLine *InSamples, const size_t lidx, const size_t ridx, const size_t cidx, + const size_t SamplesToDo) +{ + ASSUME(SamplesToDo > 0); + + /* Move the existing direct L/R signal out so it doesn't get processed by + * the stablizer. + */ + float *RESTRICT mid{al::assume_aligned<16>(mStablizer->MidDirect.data())}; + float *RESTRICT side{al::assume_aligned<16>(mStablizer->Side.data())}; + for(size_t i{0};i < SamplesToDo;++i) + { + mid[i] = OutBuffer[lidx][i] + OutBuffer[ridx][i]; + side[i] = OutBuffer[lidx][i] - OutBuffer[ridx][i]; + } + std::fill_n(OutBuffer[lidx].begin(), SamplesToDo, 0.0f); + std::fill_n(OutBuffer[ridx].begin(), SamplesToDo, 0.0f); + + /* Decode the B-Format input to OutBuffer. */ + process(OutBuffer, InSamples, SamplesToDo); + + /* Include the decoded side signal with the direct side signal. */ + for(size_t i{0};i < SamplesToDo;++i) + side[i] += OutBuffer[lidx][i] - OutBuffer[ridx][i]; + + /* Get the decoded mid signal and band-split it. */ + std::transform(OutBuffer[lidx].cbegin(), OutBuffer[lidx].cbegin()+SamplesToDo, + OutBuffer[ridx].cbegin(), mStablizer->Temp.begin(), + [](const float l, const float r) noexcept { return l + r; }); + + mStablizer->MidFilter.process({mStablizer->Temp.data(), SamplesToDo}, mStablizer->MidHF.data(), + mStablizer->MidLF.data()); + + /* Apply an all-pass to all channels to match the band-splitter's phase + * shift. This is to keep the phase synchronized between the existing + * signal and the split mid signal. + */ + const size_t NumChannels{OutBuffer.size()}; + for(size_t i{0u};i < NumChannels;i++) + { + /* Skip the left and right channels, which are going to get overwritten, + * and substitute the direct mid signal and direct+decoded side signal. + */ + if(i == lidx) + mStablizer->ChannelFilters[i].processAllPass({mid, SamplesToDo}); + else if(i == ridx) + mStablizer->ChannelFilters[i].processAllPass({side, SamplesToDo}); + else + mStablizer->ChannelFilters[i].processAllPass({OutBuffer[i].data(), SamplesToDo}); + } + + /* This pans the separate low- and high-frequency signals between being on + * the center channel and the left+right channels. The low-frequency signal + * is panned 1/3rd toward center and the high-frequency signal is panned + * 1/4th toward center. These values can be tweaked. + */ + const float cos_lf{std::cos(1.0f/3.0f * (al::numbers::pi_v<float>*0.5f))}; + const float cos_hf{std::cos(1.0f/4.0f * (al::numbers::pi_v<float>*0.5f))}; + const float sin_lf{std::sin(1.0f/3.0f * (al::numbers::pi_v<float>*0.5f))}; + const float sin_hf{std::sin(1.0f/4.0f * (al::numbers::pi_v<float>*0.5f))}; + for(size_t i{0};i < SamplesToDo;i++) + { + /* Add the direct mid signal to the processed mid signal so it can be + * properly combined with the direct+decoded side signal. + */ + const float m{mStablizer->MidLF[i]*cos_lf + mStablizer->MidHF[i]*cos_hf + mid[i]}; + const float c{mStablizer->MidLF[i]*sin_lf + mStablizer->MidHF[i]*sin_hf}; + const float s{side[i]}; + + /* The generated center channel signal adds to the existing signal, + * while the modified left and right channels replace. + */ + OutBuffer[lidx][i] = (m + s) * 0.5f; + OutBuffer[ridx][i] = (m - s) * 0.5f; + OutBuffer[cidx][i] += c * 0.5f; + } +} + + +std::unique_ptr<BFormatDec> BFormatDec::Create(const size_t inchans, + const al::span<const ChannelDec> coeffs, const al::span<const ChannelDec> coeffslf, + const float xover_f0norm, std::unique_ptr<FrontStablizer> stablizer) +{ + return std::make_unique<BFormatDec>(inchans, coeffs, coeffslf, xover_f0norm, + std::move(stablizer)); +} diff --git a/core/bformatdec.h b/core/bformatdec.h new file mode 100644 index 00000000..7a27a5a4 --- /dev/null +++ b/core/bformatdec.h @@ -0,0 +1,71 @@ +#ifndef CORE_BFORMATDEC_H +#define CORE_BFORMATDEC_H + +#include <array> +#include <cstddef> +#include <memory> + +#include "almalloc.h" +#include "alspan.h" +#include "ambidefs.h" +#include "bufferline.h" +#include "devformat.h" +#include "filters/splitter.h" +#include "vector.h" + +struct FrontStablizer; + + +using ChannelDec = std::array<float,MaxAmbiChannels>; + +class BFormatDec { + static constexpr size_t sHFBand{0}; + static constexpr size_t sLFBand{1}; + static constexpr size_t sNumBands{2}; + + struct ChannelDecoder { + union MatrixU { + float Dual[sNumBands][MAX_OUTPUT_CHANNELS]; + float Single[MAX_OUTPUT_CHANNELS]; + } mGains{}; + + /* NOTE: BandSplitter filter is unused with single-band decoding. */ + BandSplitter mXOver; + }; + + alignas(16) std::array<FloatBufferLine,2> mSamples; + + const std::unique_ptr<FrontStablizer> mStablizer; + const bool mDualBand{false}; + + /* TODO: This should ideally be a FlexArray, since ChannelDecoder is rather + * small and only a few are needed (3, 4, 5, 7, typically). But that can + * only be used in a standard layout struct, and a std::unique_ptr member + * (mStablizer) causes GCC and Clang to warn it's not. + */ + al::vector<ChannelDecoder> mChannelDec; + +public: + BFormatDec(const size_t inchans, const al::span<const ChannelDec> coeffs, + const al::span<const ChannelDec> coeffslf, const float xover_f0norm, + std::unique_ptr<FrontStablizer> stablizer); + + bool hasStablizer() const noexcept { return mStablizer != nullptr; } + + /* Decodes the ambisonic input to the given output channels. */ + void process(const al::span<FloatBufferLine> OutBuffer, const FloatBufferLine *InSamples, + const size_t SamplesToDo); + + /* Decodes the ambisonic input to the given output channels with stablization. */ + void processStablize(const al::span<FloatBufferLine> OutBuffer, + const FloatBufferLine *InSamples, const size_t lidx, const size_t ridx, const size_t cidx, + const size_t SamplesToDo); + + static std::unique_ptr<BFormatDec> Create(const size_t inchans, + const al::span<const ChannelDec> coeffs, const al::span<const ChannelDec> coeffslf, + const float xover_f0norm, std::unique_ptr<FrontStablizer> stablizer); + + DEF_NEWDEL(BFormatDec) +}; + +#endif /* CORE_BFORMATDEC_H */ diff --git a/core/bs2b.cpp b/core/bs2b.cpp new file mode 100644 index 00000000..303bf9bd --- /dev/null +++ b/core/bs2b.cpp @@ -0,0 +1,183 @@ +/*- + * Copyright (c) 2005 Boris Mikhaylov + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "config.h" + +#include <algorithm> +#include <cmath> +#include <iterator> + +#include "alnumbers.h" +#include "bs2b.h" + + +/* Set up all data. */ +static void init(struct bs2b *bs2b) +{ + float Fc_lo, Fc_hi; + float G_lo, G_hi; + float x, g; + + switch(bs2b->level) + { + case BS2B_LOW_CLEVEL: /* Low crossfeed level */ + Fc_lo = 360.0f; + Fc_hi = 501.0f; + G_lo = 0.398107170553497f; + G_hi = 0.205671765275719f; + break; + + case BS2B_MIDDLE_CLEVEL: /* Middle crossfeed level */ + Fc_lo = 500.0f; + Fc_hi = 711.0f; + G_lo = 0.459726988530872f; + G_hi = 0.228208484414988f; + break; + + case BS2B_HIGH_CLEVEL: /* High crossfeed level (virtual speakers are closer to itself) */ + Fc_lo = 700.0f; + Fc_hi = 1021.0f; + G_lo = 0.530884444230988f; + G_hi = 0.250105790667544f; + break; + + case BS2B_LOW_ECLEVEL: /* Low easy crossfeed level */ + Fc_lo = 360.0f; + Fc_hi = 494.0f; + G_lo = 0.316227766016838f; + G_hi = 0.168236228897329f; + break; + + case BS2B_MIDDLE_ECLEVEL: /* Middle easy crossfeed level */ + Fc_lo = 500.0f; + Fc_hi = 689.0f; + G_lo = 0.354813389233575f; + G_hi = 0.187169483835901f; + break; + + default: /* High easy crossfeed level */ + bs2b->level = BS2B_HIGH_ECLEVEL; + + Fc_lo = 700.0f; + Fc_hi = 975.0f; + G_lo = 0.398107170553497f; + G_hi = 0.205671765275719f; + break; + } /* switch */ + + g = 1.0f / (1.0f - G_hi + G_lo); + + /* $fc = $Fc / $s; + * $d = 1 / 2 / pi / $fc; + * $x = exp(-1 / $d); + */ + x = std::exp(-al::numbers::pi_v<float>*2.0f*Fc_lo/static_cast<float>(bs2b->srate)); + bs2b->b1_lo = x; + bs2b->a0_lo = G_lo * (1.0f - x) * g; + + x = std::exp(-al::numbers::pi_v<float>*2.0f*Fc_hi/static_cast<float>(bs2b->srate)); + bs2b->b1_hi = x; + bs2b->a0_hi = (1.0f - G_hi * (1.0f - x)) * g; + bs2b->a1_hi = -x * g; +} /* init */ + + +/* Exported functions. + * See descriptions in "bs2b.h" + */ + +void bs2b_set_params(struct bs2b *bs2b, int level, int srate) +{ + if(srate <= 0) srate = 1; + + bs2b->level = level; + bs2b->srate = srate; + init(bs2b); +} /* bs2b_set_params */ + +int bs2b_get_level(struct bs2b *bs2b) +{ + return bs2b->level; +} /* bs2b_get_level */ + +int bs2b_get_srate(struct bs2b *bs2b) +{ + return bs2b->srate; +} /* bs2b_get_srate */ + +void bs2b_clear(struct bs2b *bs2b) +{ + std::fill(std::begin(bs2b->history), std::end(bs2b->history), bs2b::t_last_sample{}); +} /* bs2b_clear */ + +void bs2b_cross_feed(struct bs2b *bs2b, float *Left, float *Right, size_t SamplesToDo) +{ + const float a0_lo{bs2b->a0_lo}; + const float b1_lo{bs2b->b1_lo}; + const float a0_hi{bs2b->a0_hi}; + const float a1_hi{bs2b->a1_hi}; + const float b1_hi{bs2b->b1_hi}; + float lsamples[128][2]; + float rsamples[128][2]; + + for(size_t base{0};base < SamplesToDo;) + { + const size_t todo{std::min<size_t>(128, SamplesToDo-base)}; + + /* Process left input */ + float z_lo{bs2b->history[0].lo}; + float z_hi{bs2b->history[0].hi}; + for(size_t i{0};i < todo;i++) + { + lsamples[i][0] = a0_lo*Left[i] + z_lo; + z_lo = b1_lo*lsamples[i][0]; + + lsamples[i][1] = a0_hi*Left[i] + z_hi; + z_hi = a1_hi*Left[i] + b1_hi*lsamples[i][1]; + } + bs2b->history[0].lo = z_lo; + bs2b->history[0].hi = z_hi; + + /* Process right input */ + z_lo = bs2b->history[1].lo; + z_hi = bs2b->history[1].hi; + for(size_t i{0};i < todo;i++) + { + rsamples[i][0] = a0_lo*Right[i] + z_lo; + z_lo = b1_lo*rsamples[i][0]; + + rsamples[i][1] = a0_hi*Right[i] + z_hi; + z_hi = a1_hi*Right[i] + b1_hi*rsamples[i][1]; + } + bs2b->history[1].lo = z_lo; + bs2b->history[1].hi = z_hi; + + /* Crossfeed */ + for(size_t i{0};i < todo;i++) + *(Left++) = lsamples[i][1] + rsamples[i][0]; + for(size_t i{0};i < todo;i++) + *(Right++) = rsamples[i][1] + lsamples[i][0]; + + base += todo; + } +} /* bs2b_cross_feed */ diff --git a/core/bs2b.h b/core/bs2b.h new file mode 100644 index 00000000..4d0b9dd8 --- /dev/null +++ b/core/bs2b.h @@ -0,0 +1,89 @@ +/*- + * Copyright (c) 2005 Boris Mikhaylov + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef CORE_BS2B_H +#define CORE_BS2B_H + +#include "almalloc.h" + +/* Number of crossfeed levels */ +#define BS2B_CLEVELS 3 + +/* Normal crossfeed levels */ +#define BS2B_HIGH_CLEVEL 3 +#define BS2B_MIDDLE_CLEVEL 2 +#define BS2B_LOW_CLEVEL 1 + +/* Easy crossfeed levels */ +#define BS2B_HIGH_ECLEVEL BS2B_HIGH_CLEVEL + BS2B_CLEVELS +#define BS2B_MIDDLE_ECLEVEL BS2B_MIDDLE_CLEVEL + BS2B_CLEVELS +#define BS2B_LOW_ECLEVEL BS2B_LOW_CLEVEL + BS2B_CLEVELS + +/* Default crossfeed levels */ +#define BS2B_DEFAULT_CLEVEL BS2B_HIGH_ECLEVEL +/* Default sample rate (Hz) */ +#define BS2B_DEFAULT_SRATE 44100 + +struct bs2b { + int level; /* Crossfeed level */ + int srate; /* Sample rate (Hz) */ + + /* Lowpass IIR filter coefficients */ + float a0_lo; + float b1_lo; + + /* Highboost IIR filter coefficients */ + float a0_hi; + float a1_hi; + float b1_hi; + + /* Buffer of filter history + * [0] - first channel, [1] - second channel + */ + struct t_last_sample { + float lo; + float hi; + } history[2]; + + DEF_NEWDEL(bs2b) +}; + +/* Clear buffers and set new coefficients with new crossfeed level and sample + * rate values. + * level - crossfeed level of *LEVEL values. + * srate - sample rate by Hz. + */ +void bs2b_set_params(bs2b *bs2b, int level, int srate); + +/* Return current crossfeed level value */ +int bs2b_get_level(bs2b *bs2b); + +/* Return current sample rate value */ +int bs2b_get_srate(bs2b *bs2b); + +/* Clear buffer */ +void bs2b_clear(bs2b *bs2b); + +void bs2b_cross_feed(bs2b *bs2b, float *Left, float *Right, size_t SamplesToDo); + +#endif /* CORE_BS2B_H */ diff --git a/core/bsinc_defs.h b/core/bsinc_defs.h new file mode 100644 index 00000000..01bd3c29 --- /dev/null +++ b/core/bsinc_defs.h @@ -0,0 +1,12 @@ +#ifndef CORE_BSINC_DEFS_H +#define CORE_BSINC_DEFS_H + +/* The number of distinct scale and phase intervals within the bsinc filter + * tables. + */ +constexpr unsigned int BSincScaleBits{4}; +constexpr unsigned int BSincScaleCount{1 << BSincScaleBits}; +constexpr unsigned int BSincPhaseBits{5}; +constexpr unsigned int BSincPhaseCount{1 << BSincPhaseBits}; + +#endif /* CORE_BSINC_DEFS_H */ diff --git a/core/bsinc_tables.cpp b/core/bsinc_tables.cpp new file mode 100644 index 00000000..693645f4 --- /dev/null +++ b/core/bsinc_tables.cpp @@ -0,0 +1,295 @@ + +#include "bsinc_tables.h" + +#include <algorithm> +#include <array> +#include <cassert> +#include <cmath> +#include <limits> +#include <memory> +#include <stdexcept> + +#include "alnumbers.h" +#include "core/mixer/defs.h" + + +namespace { + +using uint = unsigned int; + + +/* This is the normalized cardinal sine (sinc) function. + * + * sinc(x) = { 1, x = 0 + * { sin(pi x) / (pi x), otherwise. + */ +constexpr double Sinc(const double x) +{ + constexpr double epsilon{std::numeric_limits<double>::epsilon()}; + if(!(x > epsilon || x < -epsilon)) + return 1.0; + return std::sin(al::numbers::pi*x) / (al::numbers::pi*x); +} + +/* The zero-order modified Bessel function of the first kind, used for the + * Kaiser window. + * + * I_0(x) = sum_{k=0}^inf (1 / k!)^2 (x / 2)^(2 k) + * = sum_{k=0}^inf ((x / 2)^k / k!)^2 + */ +constexpr double BesselI_0(const double x) noexcept +{ + /* Start at k=1 since k=0 is trivial. */ + const double x2{x / 2.0}; + double term{1.0}; + double sum{1.0}; + double last_sum{}; + int k{1}; + + /* Let the integration converge until the term of the sum is no longer + * significant. + */ + do { + const double y{x2 / k}; + ++k; + last_sum = sum; + term *= y * y; + sum += term; + } while(sum != last_sum); + + return sum; +} + +/* Calculate a Kaiser window from the given beta value and a normalized k + * [-1, 1]. + * + * w(k) = { I_0(B sqrt(1 - k^2)) / I_0(B), -1 <= k <= 1 + * { 0, elsewhere. + * + * Where k can be calculated as: + * + * k = i / l, where -l <= i <= l. + * + * or: + * + * k = 2 i / M - 1, where 0 <= i <= M. + */ +constexpr double Kaiser(const double beta, const double k, const double besseli_0_beta) +{ + if(!(k >= -1.0 && k <= 1.0)) + return 0.0; + return BesselI_0(beta * std::sqrt(1.0 - k*k)) / besseli_0_beta; +} + +/* Calculates the (normalized frequency) transition width of the Kaiser window. + * Rejection is in dB. + */ +constexpr double CalcKaiserWidth(const double rejection, const uint order) noexcept +{ + if(rejection > 21.19) + return (rejection - 7.95) / (2.285 * al::numbers::pi*2.0 * order); + /* This enforces a minimum rejection of just above 21.18dB */ + return 5.79 / (al::numbers::pi*2.0 * order); +} + +/* Calculates the beta value of the Kaiser window. Rejection is in dB. */ +constexpr double CalcKaiserBeta(const double rejection) +{ + if(rejection > 50.0) + return 0.1102 * (rejection-8.7); + else if(rejection >= 21.0) + return (0.5842 * std::pow(rejection-21.0, 0.4)) + (0.07886 * (rejection-21.0)); + return 0.0; +} + + +struct BSincHeader { + double width{}; + double beta{}; + double scaleBase{}; + double scaleRange{}; + double besseli_0_beta{}; + + uint a[BSincScaleCount]{}; + uint total_size{}; + + constexpr BSincHeader(uint Rejection, uint Order) noexcept + { + width = CalcKaiserWidth(Rejection, Order); + beta = CalcKaiserBeta(Rejection); + scaleBase = width / 2.0; + scaleRange = 1.0 - scaleBase; + besseli_0_beta = BesselI_0(beta); + + uint num_points{Order+1}; + for(uint si{0};si < BSincScaleCount;++si) + { + const double scale{scaleBase + (scaleRange * (si+1) / BSincScaleCount)}; + const uint a_{std::min(static_cast<uint>(num_points / 2.0 / scale), num_points)}; + const uint m{2 * a_}; + + a[si] = a_; + total_size += 4 * BSincPhaseCount * ((m+3) & ~3u); + } + } +}; + +/* 11th and 23rd order filters (12 and 24-point respectively) with a 60dB drop + * at nyquist. Each filter will scale up the order when downsampling, to 23rd + * and 47th order respectively. + */ +constexpr BSincHeader bsinc12_hdr{60, 11}; +constexpr BSincHeader bsinc24_hdr{60, 23}; + + +/* NOTE: GCC 5 has an issue with BSincHeader objects being in an anonymous + * namespace while also being used as non-type template parameters. + */ +#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 6 + +/* The number of sample points is double the a value (rounded up to a multiple + * of 4), and scale index 0 includes the doubling for downsampling. bsinc24 is + * currently the highest quality filter, and will use the most sample points. + */ +constexpr uint BSincPointsMax{(bsinc24_hdr.a[0]*2 + 3) & ~3u}; +static_assert(BSincPointsMax <= MaxResamplerPadding, "MaxResamplerPadding is too small"); + +template<size_t total_size> +struct BSincFilterArray { + alignas(16) std::array<float, total_size> mTable; + const BSincHeader &hdr; + + BSincFilterArray(const BSincHeader &hdr_) : hdr{hdr_} + { +#else +template<const BSincHeader &hdr> +struct BSincFilterArray { + alignas(16) std::array<float, hdr.total_size> mTable{}; + + BSincFilterArray() + { + constexpr uint BSincPointsMax{(hdr.a[0]*2 + 3) & ~3u}; + static_assert(BSincPointsMax <= MaxResamplerPadding, "MaxResamplerPadding is too small"); +#endif + using filter_type = double[BSincPhaseCount+1][BSincPointsMax]; + auto filter = std::make_unique<filter_type[]>(BSincScaleCount); + + /* Calculate the Kaiser-windowed Sinc filter coefficients for each + * scale and phase index. + */ + for(uint si{0};si < BSincScaleCount;++si) + { + const uint m{hdr.a[si] * 2}; + const size_t o{(BSincPointsMax-m) / 2}; + const double scale{hdr.scaleBase + (hdr.scaleRange * (si+1) / BSincScaleCount)}; + const double cutoff{scale - (hdr.scaleBase * std::max(1.0, scale*2.0))}; + const auto a = static_cast<double>(hdr.a[si]); + const double l{a - 1.0/BSincPhaseCount}; + + /* Do one extra phase index so that the phase delta has a proper + * target for its last index. + */ + for(uint pi{0};pi <= BSincPhaseCount;++pi) + { + const double phase{std::floor(l) + (pi/double{BSincPhaseCount})}; + + for(uint i{0};i < m;++i) + { + const double x{i - phase}; + filter[si][pi][o+i] = Kaiser(hdr.beta, x/l, hdr.besseli_0_beta) * cutoff * + Sinc(cutoff*x); + } + } + } + + size_t idx{0}; + for(size_t si{0};si < BSincScaleCount;++si) + { + const size_t m{((hdr.a[si]*2) + 3) & ~3u}; + const size_t o{(BSincPointsMax-m) / 2}; + + /* Write out each phase index's filter and phase delta for this + * quality scale. + */ + for(size_t pi{0};pi < BSincPhaseCount;++pi) + { + for(size_t i{0};i < m;++i) + mTable[idx++] = static_cast<float>(filter[si][pi][o+i]); + + /* Linear interpolation between phases is simplified by pre- + * calculating the delta (b - a) in: x = a + f (b - a) + */ + for(size_t i{0};i < m;++i) + { + const double phDelta{filter[si][pi+1][o+i] - filter[si][pi][o+i]}; + mTable[idx++] = static_cast<float>(phDelta); + } + } + /* Calculate and write out each phase index's filter quality scale + * deltas. The last scale index doesn't have any scale or scale- + * phase deltas. + */ + if(si == BSincScaleCount-1) + { + for(size_t i{0};i < BSincPhaseCount*m*2;++i) + mTable[idx++] = 0.0f; + } + else for(size_t pi{0};pi < BSincPhaseCount;++pi) + { + /* Linear interpolation between scales is also simplified. + * + * Given a difference in the number of points between scales, + * the destination points will be 0, thus: x = a + f (-a) + */ + for(size_t i{0};i < m;++i) + { + const double scDelta{filter[si+1][pi][o+i] - filter[si][pi][o+i]}; + mTable[idx++] = static_cast<float>(scDelta); + } + + /* This last simplification is done to complete the bilinear + * equation for the combination of phase and scale. + */ + for(size_t i{0};i < m;++i) + { + const double spDelta{(filter[si+1][pi+1][o+i] - filter[si+1][pi][o+i]) - + (filter[si][pi+1][o+i] - filter[si][pi][o+i])}; + mTable[idx++] = static_cast<float>(spDelta); + } + } + } + assert(idx == hdr.total_size); + } + + constexpr const BSincHeader &getHeader() const noexcept { return hdr; } + constexpr const float *getTable() const noexcept { return &mTable.front(); } +}; + +#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 6 +const BSincFilterArray<bsinc12_hdr.total_size> bsinc12_filter{bsinc12_hdr}; +const BSincFilterArray<bsinc24_hdr.total_size> bsinc24_filter{bsinc24_hdr}; +#else +const BSincFilterArray<bsinc12_hdr> bsinc12_filter{}; +const BSincFilterArray<bsinc24_hdr> bsinc24_filter{}; +#endif + +template<typename T> +constexpr BSincTable GenerateBSincTable(const T &filter) +{ + BSincTable ret{}; + const BSincHeader &hdr = filter.getHeader(); + ret.scaleBase = static_cast<float>(hdr.scaleBase); + ret.scaleRange = static_cast<float>(1.0 / hdr.scaleRange); + for(size_t i{0};i < BSincScaleCount;++i) + ret.m[i] = ((hdr.a[i]*2) + 3) & ~3u; + ret.filterOffset[0] = 0; + for(size_t i{1};i < BSincScaleCount;++i) + ret.filterOffset[i] = ret.filterOffset[i-1] + ret.m[i-1]*4*BSincPhaseCount; + ret.Tab = filter.getTable(); + return ret; +} + +} // namespace + +const BSincTable gBSinc12{GenerateBSincTable(bsinc12_filter)}; +const BSincTable gBSinc24{GenerateBSincTable(bsinc24_filter)}; diff --git a/core/bsinc_tables.h b/core/bsinc_tables.h new file mode 100644 index 00000000..aca4b274 --- /dev/null +++ b/core/bsinc_tables.h @@ -0,0 +1,17 @@ +#ifndef CORE_BSINC_TABLES_H +#define CORE_BSINC_TABLES_H + +#include "bsinc_defs.h" + + +struct BSincTable { + float scaleBase, scaleRange; + unsigned int m[BSincScaleCount]; + unsigned int filterOffset[BSincScaleCount]; + const float *Tab; +}; + +extern const BSincTable gBSinc12; +extern const BSincTable gBSinc24; + +#endif /* CORE_BSINC_TABLES_H */ diff --git a/core/buffer_storage.cpp b/core/buffer_storage.cpp new file mode 100644 index 00000000..98ca2c1b --- /dev/null +++ b/core/buffer_storage.cpp @@ -0,0 +1,81 @@ + +#include "config.h" + +#include "buffer_storage.h" + +#include <stdint.h> + + +const char *NameFromFormat(FmtType type) noexcept +{ + switch(type) + { + case FmtUByte: return "UInt8"; + case FmtShort: return "Int16"; + case FmtFloat: return "Float"; + case FmtDouble: return "Double"; + case FmtMulaw: return "muLaw"; + case FmtAlaw: return "aLaw"; + case FmtIMA4: return "IMA4 ADPCM"; + case FmtMSADPCM: return "MS ADPCM"; + } + return "<internal error>"; +} + +const char *NameFromFormat(FmtChannels channels) noexcept +{ + switch(channels) + { + case FmtMono: return "Mono"; + case FmtStereo: return "Stereo"; + case FmtRear: return "Rear"; + case FmtQuad: return "Quadraphonic"; + case FmtX51: return "Surround 5.1"; + case FmtX61: return "Surround 6.1"; + case FmtX71: return "Surround 7.1"; + case FmtBFormat2D: return "B-Format 2D"; + case FmtBFormat3D: return "B-Format 3D"; + case FmtUHJ2: return "UHJ2"; + case FmtUHJ3: return "UHJ3"; + case FmtUHJ4: return "UHJ4"; + case FmtSuperStereo: return "Super Stereo"; + } + return "<internal error>"; +} + +uint BytesFromFmt(FmtType type) noexcept +{ + switch(type) + { + case FmtUByte: return sizeof(uint8_t); + case FmtShort: return sizeof(int16_t); + case FmtFloat: return sizeof(float); + case FmtDouble: return sizeof(double); + case FmtMulaw: return sizeof(uint8_t); + case FmtAlaw: return sizeof(uint8_t); + case FmtIMA4: break; + case FmtMSADPCM: break; + } + return 0; +} + +uint ChannelsFromFmt(FmtChannels chans, uint ambiorder) noexcept +{ + switch(chans) + { + case FmtMono: return 1; + case FmtStereo: return 2; + case FmtRear: return 2; + case FmtQuad: return 4; + case FmtX51: return 6; + case FmtX61: return 7; + case FmtX71: return 8; + case FmtBFormat2D: return (ambiorder*2) + 1; + case FmtBFormat3D: return (ambiorder+1) * (ambiorder+1); + case FmtUHJ2: return 2; + case FmtUHJ3: return 3; + case FmtUHJ4: return 4; + case FmtSuperStereo: return 2; + } + return 0; +} diff --git a/core/buffer_storage.h b/core/buffer_storage.h new file mode 100644 index 00000000..282d5b53 --- /dev/null +++ b/core/buffer_storage.h @@ -0,0 +1,115 @@ +#ifndef CORE_BUFFER_STORAGE_H +#define CORE_BUFFER_STORAGE_H + +#include <atomic> + +#include "albyte.h" +#include "alnumeric.h" +#include "alspan.h" +#include "ambidefs.h" + + +using uint = unsigned int; + +/* Storable formats */ +enum FmtType : unsigned char { + FmtUByte, + FmtShort, + FmtFloat, + FmtDouble, + FmtMulaw, + FmtAlaw, + FmtIMA4, + FmtMSADPCM, +}; +enum FmtChannels : unsigned char { + FmtMono, + FmtStereo, + FmtRear, + FmtQuad, + FmtX51, /* (WFX order) */ + FmtX61, /* (WFX order) */ + FmtX71, /* (WFX order) */ + FmtBFormat2D, + FmtBFormat3D, + FmtUHJ2, /* 2-channel UHJ, aka "BHJ", stereo-compatible */ + FmtUHJ3, /* 3-channel UHJ, aka "THJ" */ + FmtUHJ4, /* 4-channel UHJ, aka "PHJ" */ + FmtSuperStereo, /* Stereo processed with Super Stereo. */ +}; + +enum class AmbiLayout : unsigned char { + FuMa, + ACN, +}; +enum class AmbiScaling : unsigned char { + FuMa, + SN3D, + N3D, + UHJ, +}; + +const char *NameFromFormat(FmtType type) noexcept; +const char *NameFromFormat(FmtChannels channels) noexcept; + +uint BytesFromFmt(FmtType type) noexcept; +uint ChannelsFromFmt(FmtChannels chans, uint ambiorder) noexcept; +inline uint FrameSizeFromFmt(FmtChannels chans, FmtType type, uint ambiorder) noexcept +{ return ChannelsFromFmt(chans, ambiorder) * BytesFromFmt(type); } + +constexpr bool IsBFormat(FmtChannels chans) noexcept +{ return chans == FmtBFormat2D || chans == FmtBFormat3D; } + +/* Super Stereo is considered part of the UHJ family here, since it goes + * through similar processing as UHJ, both result in a B-Format signal, and + * needs the same consideration as BHJ (three channel result with only two + * channel input). + */ +constexpr bool IsUHJ(FmtChannels chans) noexcept +{ return chans == FmtUHJ2 || chans == FmtUHJ3 || chans == FmtUHJ4 || chans == FmtSuperStereo; } + +/** Ambisonic formats are either B-Format or UHJ formats. */ +constexpr bool IsAmbisonic(FmtChannels chans) noexcept +{ return IsBFormat(chans) || IsUHJ(chans); } + +constexpr bool Is2DAmbisonic(FmtChannels chans) noexcept +{ + return chans == FmtBFormat2D || chans == FmtUHJ2 || chans == FmtUHJ3 + || chans == FmtSuperStereo; +} + + +using CallbackType = int(*)(void*, void*, int); + +struct BufferStorage { + CallbackType mCallback{nullptr}; + void *mUserData{nullptr}; + + al::span<al::byte> mData; + + uint mSampleRate{0u}; + FmtChannels mChannels{FmtMono}; + FmtType mType{FmtShort}; + uint mSampleLen{0u}; + uint mBlockAlign{0u}; + + AmbiLayout mAmbiLayout{AmbiLayout::FuMa}; + AmbiScaling mAmbiScaling{AmbiScaling::FuMa}; + uint mAmbiOrder{0u}; + + inline uint bytesFromFmt() const noexcept { return BytesFromFmt(mType); } + inline uint channelsFromFmt() const noexcept + { return ChannelsFromFmt(mChannels, mAmbiOrder); } + inline uint frameSizeFromFmt() const noexcept { return channelsFromFmt() * bytesFromFmt(); } + + inline uint blockSizeFromFmt() const noexcept + { + if(mType == FmtIMA4) return ((mBlockAlign-1)/2 + 4) * channelsFromFmt(); + if(mType == FmtMSADPCM) return ((mBlockAlign-2)/2 + 7) * channelsFromFmt(); + return frameSizeFromFmt(); + }; + + inline bool isBFormat() const noexcept { return IsBFormat(mChannels); } +}; + +#endif /* CORE_BUFFER_STORAGE_H */ diff --git a/core/bufferline.h b/core/bufferline.h new file mode 100644 index 00000000..8b445f3f --- /dev/null +++ b/core/bufferline.h @@ -0,0 +1,17 @@ +#ifndef CORE_BUFFERLINE_H +#define CORE_BUFFERLINE_H + +#include <array> + +#include "alspan.h" + +/* Size for temporary storage of buffer data, in floats. Larger values need + * more memory and are harder on cache, while smaller values may need more + * iterations for mixing. + */ +constexpr int BufferLineSize{1024}; + +using FloatBufferLine = std::array<float,BufferLineSize>; +using FloatBufferSpan = al::span<float,BufferLineSize>; + +#endif /* CORE_BUFFERLINE_H */ diff --git a/core/context.cpp b/core/context.cpp new file mode 100644 index 00000000..d68d8327 --- /dev/null +++ b/core/context.cpp @@ -0,0 +1,164 @@ + +#include "config.h" + +#include <cassert> +#include <memory> + +#include "async_event.h" +#include "context.h" +#include "device.h" +#include "effectslot.h" +#include "logging.h" +#include "ringbuffer.h" +#include "voice.h" +#include "voice_change.h" + + +#ifdef __cpp_lib_atomic_is_always_lock_free +static_assert(std::atomic<ContextBase::AsyncEventBitset>::is_always_lock_free, "atomic<bitset> isn't lock-free"); +#endif + +ContextBase::ContextBase(DeviceBase *device) : mDevice{device} +{ assert(mEnabledEvts.is_lock_free()); } + +ContextBase::~ContextBase() +{ + size_t count{0}; + ContextProps *cprops{mParams.ContextUpdate.exchange(nullptr, std::memory_order_relaxed)}; + if(cprops) + { + ++count; + delete cprops; + } + cprops = mFreeContextProps.exchange(nullptr, std::memory_order_acquire); + while(cprops) + { + std::unique_ptr<ContextProps> old{cprops}; + cprops = old->next.load(std::memory_order_relaxed); + ++count; + } + TRACE("Freed %zu context property object%s\n", count, (count==1)?"":"s"); + + count = 0; + EffectSlotProps *eprops{mFreeEffectslotProps.exchange(nullptr, std::memory_order_acquire)}; + while(eprops) + { + std::unique_ptr<EffectSlotProps> old{eprops}; + eprops = old->next.load(std::memory_order_relaxed); + ++count; + } + TRACE("Freed %zu AuxiliaryEffectSlot property object%s\n", count, (count==1)?"":"s"); + + if(EffectSlotArray *curarray{mActiveAuxSlots.exchange(nullptr, std::memory_order_relaxed)}) + { + al::destroy_n(curarray->end(), curarray->size()); + delete curarray; + } + + delete mVoices.exchange(nullptr, std::memory_order_relaxed); + + if(mAsyncEvents) + { + count = 0; + auto evt_vec = mAsyncEvents->getReadVector(); + if(evt_vec.first.len > 0) + { + al::destroy_n(reinterpret_cast<AsyncEvent*>(evt_vec.first.buf), evt_vec.first.len); + count += evt_vec.first.len; + } + if(evt_vec.second.len > 0) + { + al::destroy_n(reinterpret_cast<AsyncEvent*>(evt_vec.second.buf), evt_vec.second.len); + count += evt_vec.second.len; + } + if(count > 0) + TRACE("Destructed %zu orphaned event%s\n", count, (count==1)?"":"s"); + mAsyncEvents->readAdvance(count); + } +} + + +void ContextBase::allocVoiceChanges() +{ + constexpr size_t clustersize{128}; + + VoiceChangeCluster cluster{std::make_unique<VoiceChange[]>(clustersize)}; + for(size_t i{1};i < clustersize;++i) + cluster[i-1].mNext.store(std::addressof(cluster[i]), std::memory_order_relaxed); + cluster[clustersize-1].mNext.store(mVoiceChangeTail, std::memory_order_relaxed); + + mVoiceChangeClusters.emplace_back(std::move(cluster)); + mVoiceChangeTail = mVoiceChangeClusters.back().get(); +} + +void ContextBase::allocVoiceProps() +{ + constexpr size_t clustersize{32}; + + TRACE("Increasing allocated voice properties to %zu\n", + (mVoicePropClusters.size()+1) * clustersize); + + VoicePropsCluster cluster{std::make_unique<VoicePropsItem[]>(clustersize)}; + for(size_t i{1};i < clustersize;++i) + cluster[i-1].next.store(std::addressof(cluster[i]), std::memory_order_relaxed); + mVoicePropClusters.emplace_back(std::move(cluster)); + + VoicePropsItem *oldhead{mFreeVoiceProps.load(std::memory_order_acquire)}; + do { + mVoicePropClusters.back()[clustersize-1].next.store(oldhead, std::memory_order_relaxed); + } while(mFreeVoiceProps.compare_exchange_weak(oldhead, mVoicePropClusters.back().get(), + std::memory_order_acq_rel, std::memory_order_acquire) == false); +} + +void ContextBase::allocVoices(size_t addcount) +{ + constexpr size_t clustersize{32}; + /* Convert element count to cluster count. */ + addcount = (addcount+(clustersize-1)) / clustersize; + + if(addcount >= std::numeric_limits<int>::max()/clustersize - mVoiceClusters.size()) + throw std::runtime_error{"Allocating too many voices"}; + const size_t totalcount{(mVoiceClusters.size()+addcount) * clustersize}; + TRACE("Increasing allocated voices to %zu\n", totalcount); + + auto newarray = VoiceArray::Create(totalcount); + while(addcount) + { + mVoiceClusters.emplace_back(std::make_unique<Voice[]>(clustersize)); + --addcount; + } + + auto voice_iter = newarray->begin(); + for(VoiceCluster &cluster : mVoiceClusters) + { + for(size_t i{0};i < clustersize;++i) + *(voice_iter++) = &cluster[i]; + } + + if(auto *oldvoices = mVoices.exchange(newarray.release(), std::memory_order_acq_rel)) + { + mDevice->waitForMix(); + delete oldvoices; + } +} + + +EffectSlot *ContextBase::getEffectSlot() +{ + for(auto& cluster : mEffectSlotClusters) + { + for(size_t i{0};i < EffectSlotClusterSize;++i) + { + if(!cluster[i].InUse) + return &cluster[i]; + } + } + + if(1 >= std::numeric_limits<int>::max()/EffectSlotClusterSize - mEffectSlotClusters.size()) + throw std::runtime_error{"Allocating too many effect slots"}; + const size_t totalcount{(mEffectSlotClusters.size()+1) * EffectSlotClusterSize}; + TRACE("Increasing allocated effect slots to %zu\n", totalcount); + + mEffectSlotClusters.emplace_back(std::make_unique<EffectSlot[]>(EffectSlotClusterSize)); + return getEffectSlot(); +} diff --git a/core/context.h b/core/context.h new file mode 100644 index 00000000..9723eac3 --- /dev/null +++ b/core/context.h @@ -0,0 +1,171 @@ +#ifndef CORE_CONTEXT_H +#define CORE_CONTEXT_H + +#include <array> +#include <atomic> +#include <bitset> +#include <cstddef> +#include <memory> +#include <thread> + +#include "almalloc.h" +#include "alspan.h" +#include "async_event.h" +#include "atomic.h" +#include "bufferline.h" +#include "threads.h" +#include "vecmat.h" +#include "vector.h" + +struct DeviceBase; +struct EffectSlot; +struct EffectSlotProps; +struct RingBuffer; +struct Voice; +struct VoiceChange; +struct VoicePropsItem; + +using uint = unsigned int; + + +constexpr float SpeedOfSoundMetersPerSec{343.3f}; + +constexpr float AirAbsorbGainHF{0.99426f}; /* -0.05dB */ + +enum class DistanceModel : unsigned char { + Disable, + Inverse, InverseClamped, + Linear, LinearClamped, + Exponent, ExponentClamped, + + Default = InverseClamped +}; + + +struct ContextProps { + std::array<float,3> Position; + std::array<float,3> Velocity; + std::array<float,3> OrientAt; + std::array<float,3> OrientUp; + float Gain; + float MetersPerUnit; + float AirAbsorptionGainHF; + + float DopplerFactor; + float DopplerVelocity; + float SpeedOfSound; + bool SourceDistanceModel; + DistanceModel mDistanceModel; + + std::atomic<ContextProps*> next; + + DEF_NEWDEL(ContextProps) +}; + +struct ContextParams { + /* Pointer to the most recent property values that are awaiting an update. */ + std::atomic<ContextProps*> ContextUpdate{nullptr}; + + alu::Vector Position{}; + alu::Matrix Matrix{alu::Matrix::Identity()}; + alu::Vector Velocity{}; + + float Gain{1.0f}; + float MetersPerUnit{1.0f}; + float AirAbsorptionGainHF{AirAbsorbGainHF}; + + float DopplerFactor{1.0f}; + float SpeedOfSound{SpeedOfSoundMetersPerSec}; /* in units per sec! */ + + bool SourceDistanceModel{false}; + DistanceModel mDistanceModel{}; +}; + +struct ContextBase { + DeviceBase *const mDevice; + + /* Counter for the pre-mixing updates, in 31.1 fixed point (lowest bit + * indicates if updates are currently happening). + */ + RefCount mUpdateCount{0u}; + std::atomic<bool> mHoldUpdates{false}; + std::atomic<bool> mStopVoicesOnDisconnect{true}; + + float mGainBoost{1.0f}; + + /* Linked lists of unused property containers, free to use for future + * updates. + */ + std::atomic<ContextProps*> mFreeContextProps{nullptr}; + std::atomic<VoicePropsItem*> mFreeVoiceProps{nullptr}; + std::atomic<EffectSlotProps*> mFreeEffectslotProps{nullptr}; + + /* The voice change tail is the beginning of the "free" elements, up to and + * *excluding* the current. If tail==current, there's no free elements and + * new ones need to be allocated. The current voice change is the element + * last processed, and any after are pending. + */ + VoiceChange *mVoiceChangeTail{}; + std::atomic<VoiceChange*> mCurrentVoiceChange{}; + + void allocVoiceChanges(); + void allocVoiceProps(); + + + ContextParams mParams; + + using VoiceArray = al::FlexArray<Voice*>; + std::atomic<VoiceArray*> mVoices{}; + std::atomic<size_t> mActiveVoiceCount{}; + + void allocVoices(size_t addcount); + al::span<Voice*> getVoicesSpan() const noexcept + { + return {mVoices.load(std::memory_order_relaxed)->data(), + mActiveVoiceCount.load(std::memory_order_relaxed)}; + } + al::span<Voice*> getVoicesSpanAcquired() const noexcept + { + return {mVoices.load(std::memory_order_acquire)->data(), + mActiveVoiceCount.load(std::memory_order_acquire)}; + } + + + using EffectSlotArray = al::FlexArray<EffectSlot*>; + std::atomic<EffectSlotArray*> mActiveAuxSlots{nullptr}; + + std::thread mEventThread; + al::semaphore mEventSem; + std::unique_ptr<RingBuffer> mAsyncEvents; + using AsyncEventBitset = std::bitset<AsyncEvent::UserEventCount>; + std::atomic<AsyncEventBitset> mEnabledEvts{0u}; + + /* Asynchronous voice change actions are processed as a linked list of + * VoiceChange objects by the mixer, which is atomically appended to. + * However, to avoid allocating each object individually, they're allocated + * in clusters that are stored in a vector for easy automatic cleanup. + */ + using VoiceChangeCluster = std::unique_ptr<VoiceChange[]>; + al::vector<VoiceChangeCluster> mVoiceChangeClusters; + + using VoiceCluster = std::unique_ptr<Voice[]>; + al::vector<VoiceCluster> mVoiceClusters; + + using VoicePropsCluster = std::unique_ptr<VoicePropsItem[]>; + al::vector<VoicePropsCluster> mVoicePropClusters; + + + static constexpr size_t EffectSlotClusterSize{4}; + EffectSlot *getEffectSlot(); + + using EffectSlotCluster = std::unique_ptr<EffectSlot[]>; + al::vector<EffectSlotCluster> mEffectSlotClusters; + + + ContextBase(DeviceBase *device); + ContextBase(const ContextBase&) = delete; + ContextBase& operator=(const ContextBase&) = delete; + ~ContextBase(); +}; + +#endif /* CORE_CONTEXT_H */ diff --git a/core/converter.cpp b/core/converter.cpp new file mode 100644 index 00000000..a5141448 --- /dev/null +++ b/core/converter.cpp @@ -0,0 +1,346 @@ + +#include "config.h" + +#include "converter.h" + +#include <algorithm> +#include <cassert> +#include <cmath> +#include <cstdint> +#include <iterator> +#include <limits.h> + +#include "albit.h" +#include "albyte.h" +#include "alnumeric.h" +#include "fpu_ctrl.h" + + +namespace { + +constexpr uint MaxPitch{10}; + +static_assert((BufferLineSize-1)/MaxPitch > 0, "MaxPitch is too large for BufferLineSize!"); +static_assert((INT_MAX>>MixerFracBits)/MaxPitch > BufferLineSize, + "MaxPitch and/or BufferLineSize are too large for MixerFracBits!"); + +/* Base template left undefined. Should be marked =delete, but Clang 3.8.1 + * chokes on that given the inline specializations. + */ +template<DevFmtType T> +inline float LoadSample(DevFmtType_t<T> val) noexcept; + +template<> inline float LoadSample<DevFmtByte>(DevFmtType_t<DevFmtByte> val) noexcept +{ return val * (1.0f/128.0f); } +template<> inline float LoadSample<DevFmtShort>(DevFmtType_t<DevFmtShort> val) noexcept +{ return val * (1.0f/32768.0f); } +template<> inline float LoadSample<DevFmtInt>(DevFmtType_t<DevFmtInt> val) noexcept +{ return static_cast<float>(val) * (1.0f/2147483648.0f); } +template<> inline float LoadSample<DevFmtFloat>(DevFmtType_t<DevFmtFloat> val) noexcept +{ return val; } + +template<> inline float LoadSample<DevFmtUByte>(DevFmtType_t<DevFmtUByte> val) noexcept +{ return LoadSample<DevFmtByte>(static_cast<int8_t>(val - 128)); } +template<> inline float LoadSample<DevFmtUShort>(DevFmtType_t<DevFmtUShort> val) noexcept +{ return LoadSample<DevFmtShort>(static_cast<int16_t>(val - 32768)); } +template<> inline float LoadSample<DevFmtUInt>(DevFmtType_t<DevFmtUInt> val) noexcept +{ return LoadSample<DevFmtInt>(static_cast<int32_t>(val - 2147483648u)); } + + +template<DevFmtType T> +inline void LoadSampleArray(float *RESTRICT dst, const void *src, const size_t srcstep, + const size_t samples) noexcept +{ + const DevFmtType_t<T> *ssrc = static_cast<const DevFmtType_t<T>*>(src); + for(size_t i{0u};i < samples;i++) + dst[i] = LoadSample<T>(ssrc[i*srcstep]); +} + +void LoadSamples(float *dst, const void *src, const size_t srcstep, const DevFmtType srctype, + const size_t samples) noexcept +{ +#define HANDLE_FMT(T) \ + case T: LoadSampleArray<T>(dst, src, srcstep, samples); break + switch(srctype) + { + HANDLE_FMT(DevFmtByte); + HANDLE_FMT(DevFmtUByte); + HANDLE_FMT(DevFmtShort); + HANDLE_FMT(DevFmtUShort); + HANDLE_FMT(DevFmtInt); + HANDLE_FMT(DevFmtUInt); + HANDLE_FMT(DevFmtFloat); + } +#undef HANDLE_FMT +} + + +template<DevFmtType T> +inline DevFmtType_t<T> StoreSample(float) noexcept; + +template<> inline float StoreSample<DevFmtFloat>(float val) noexcept +{ return val; } +template<> inline int32_t StoreSample<DevFmtInt>(float val) noexcept +{ return fastf2i(clampf(val*2147483648.0f, -2147483648.0f, 2147483520.0f)); } +template<> inline int16_t StoreSample<DevFmtShort>(float val) noexcept +{ return static_cast<int16_t>(fastf2i(clampf(val*32768.0f, -32768.0f, 32767.0f))); } +template<> inline int8_t StoreSample<DevFmtByte>(float val) noexcept +{ return static_cast<int8_t>(fastf2i(clampf(val*128.0f, -128.0f, 127.0f))); } + +/* Define unsigned output variations. */ +template<> inline uint32_t StoreSample<DevFmtUInt>(float val) noexcept +{ return static_cast<uint32_t>(StoreSample<DevFmtInt>(val)) + 2147483648u; } +template<> inline uint16_t StoreSample<DevFmtUShort>(float val) noexcept +{ return static_cast<uint16_t>(StoreSample<DevFmtShort>(val) + 32768); } +template<> inline uint8_t StoreSample<DevFmtUByte>(float val) noexcept +{ return static_cast<uint8_t>(StoreSample<DevFmtByte>(val) + 128); } + +template<DevFmtType T> +inline void StoreSampleArray(void *dst, const float *RESTRICT src, const size_t dststep, + const size_t samples) noexcept +{ + DevFmtType_t<T> *sdst = static_cast<DevFmtType_t<T>*>(dst); + for(size_t i{0u};i < samples;i++) + sdst[i*dststep] = StoreSample<T>(src[i]); +} + + +void StoreSamples(void *dst, const float *src, const size_t dststep, const DevFmtType dsttype, + const size_t samples) noexcept +{ +#define HANDLE_FMT(T) \ + case T: StoreSampleArray<T>(dst, src, dststep, samples); break + switch(dsttype) + { + HANDLE_FMT(DevFmtByte); + HANDLE_FMT(DevFmtUByte); + HANDLE_FMT(DevFmtShort); + HANDLE_FMT(DevFmtUShort); + HANDLE_FMT(DevFmtInt); + HANDLE_FMT(DevFmtUInt); + HANDLE_FMT(DevFmtFloat); + } +#undef HANDLE_FMT +} + + +template<DevFmtType T> +void Mono2Stereo(float *RESTRICT dst, const void *src, const size_t frames) noexcept +{ + const DevFmtType_t<T> *ssrc = static_cast<const DevFmtType_t<T>*>(src); + for(size_t i{0u};i < frames;i++) + dst[i*2 + 1] = dst[i*2 + 0] = LoadSample<T>(ssrc[i]) * 0.707106781187f; +} + +template<DevFmtType T> +void Multi2Mono(uint chanmask, const size_t step, const float scale, float *RESTRICT dst, + const void *src, const size_t frames) noexcept +{ + const DevFmtType_t<T> *ssrc = static_cast<const DevFmtType_t<T>*>(src); + std::fill_n(dst, frames, 0.0f); + for(size_t c{0};chanmask;++c) + { + if((chanmask&1)) LIKELY + { + for(size_t i{0u};i < frames;i++) + dst[i] += LoadSample<T>(ssrc[i*step + c]); + } + chanmask >>= 1; + } + for(size_t i{0u};i < frames;i++) + dst[i] *= scale; +} + +} // namespace + +SampleConverterPtr SampleConverter::Create(DevFmtType srcType, DevFmtType dstType, size_t numchans, + uint srcRate, uint dstRate, Resampler resampler) +{ + if(numchans < 1 || srcRate < 1 || dstRate < 1) + return nullptr; + + SampleConverterPtr converter{new(FamCount(numchans)) SampleConverter{numchans}}; + converter->mSrcType = srcType; + converter->mDstType = dstType; + converter->mSrcTypeSize = BytesFromDevFmt(srcType); + converter->mDstTypeSize = BytesFromDevFmt(dstType); + + converter->mSrcPrepCount = MaxResamplerPadding; + converter->mFracOffset = 0; + for(auto &chan : converter->mChan) + { + const al::span<float> buffer{chan.PrevSamples}; + std::fill(buffer.begin(), buffer.end(), 0.0f); + } + + /* Have to set the mixer FPU mode since that's what the resampler code expects. */ + FPUCtl mixer_mode{}; + auto step = static_cast<uint>( + mind(srcRate*double{MixerFracOne}/dstRate + 0.5, MaxPitch*MixerFracOne)); + converter->mIncrement = maxu(step, 1); + if(converter->mIncrement == MixerFracOne) + converter->mResample = [](const InterpState*, const float *RESTRICT src, uint, const uint, + const al::span<float> dst) { std::copy_n(src, dst.size(), dst.begin()); }; + else + converter->mResample = PrepareResampler(resampler, converter->mIncrement, + &converter->mState); + + return converter; +} + +uint SampleConverter::availableOut(uint srcframes) const +{ + if(srcframes < 1) + { + /* No output samples if there's no input samples. */ + return 0; + } + + const uint prepcount{mSrcPrepCount}; + if(prepcount < MaxResamplerPadding && MaxResamplerPadding - prepcount >= srcframes) + { + /* Not enough input samples to generate an output sample. */ + return 0; + } + + uint64_t DataSize64{prepcount}; + DataSize64 += srcframes; + DataSize64 -= MaxResamplerPadding; + DataSize64 <<= MixerFracBits; + DataSize64 -= mFracOffset; + + /* If we have a full prep, we can generate at least one sample. */ + return static_cast<uint>(clampu64((DataSize64 + mIncrement-1)/mIncrement, 1, + std::numeric_limits<int>::max())); +} + +uint SampleConverter::convert(const void **src, uint *srcframes, void *dst, uint dstframes) +{ + const uint SrcFrameSize{static_cast<uint>(mChan.size()) * mSrcTypeSize}; + const uint DstFrameSize{static_cast<uint>(mChan.size()) * mDstTypeSize}; + const uint increment{mIncrement}; + auto SamplesIn = static_cast<const al::byte*>(*src); + uint NumSrcSamples{*srcframes}; + + FPUCtl mixer_mode{}; + uint pos{0}; + while(pos < dstframes && NumSrcSamples > 0) + { + const uint prepcount{mSrcPrepCount}; + const uint readable{minu(NumSrcSamples, BufferLineSize - prepcount)}; + + if(prepcount < MaxResamplerPadding && MaxResamplerPadding-prepcount >= readable) + { + /* Not enough input samples to generate an output sample. Store + * what we're given for later. + */ + for(size_t chan{0u};chan < mChan.size();chan++) + LoadSamples(&mChan[chan].PrevSamples[prepcount], SamplesIn + mSrcTypeSize*chan, + mChan.size(), mSrcType, readable); + + mSrcPrepCount = prepcount + readable; + NumSrcSamples = 0; + break; + } + + float *RESTRICT SrcData{mSrcSamples}; + float *RESTRICT DstData{mDstSamples}; + uint DataPosFrac{mFracOffset}; + uint64_t DataSize64{prepcount}; + DataSize64 += readable; + DataSize64 -= MaxResamplerPadding; + DataSize64 <<= MixerFracBits; + DataSize64 -= DataPosFrac; + + /* If we have a full prep, we can generate at least one sample. */ + auto DstSize = static_cast<uint>( + clampu64((DataSize64 + increment-1)/increment, 1, BufferLineSize)); + DstSize = minu(DstSize, dstframes-pos); + + const uint DataPosEnd{DstSize*increment + DataPosFrac}; + const uint SrcDataEnd{DataPosEnd>>MixerFracBits}; + + assert(prepcount+readable >= SrcDataEnd); + const uint nextprep{minu(prepcount + readable - SrcDataEnd, MaxResamplerPadding)}; + + for(size_t chan{0u};chan < mChan.size();chan++) + { + const al::byte *SrcSamples{SamplesIn + mSrcTypeSize*chan}; + al::byte *DstSamples = static_cast<al::byte*>(dst) + mDstTypeSize*chan; + + /* Load the previous samples into the source data first, then the + * new samples from the input buffer. + */ + std::copy_n(mChan[chan].PrevSamples, prepcount, SrcData); + LoadSamples(SrcData + prepcount, SrcSamples, mChan.size(), mSrcType, readable); + + /* Store as many prep samples for next time as possible, given the + * number of output samples being generated. + */ + std::copy_n(SrcData+SrcDataEnd, nextprep, mChan[chan].PrevSamples); + std::fill(std::begin(mChan[chan].PrevSamples)+nextprep, + std::end(mChan[chan].PrevSamples), 0.0f); + + /* Now resample, and store the result in the output buffer. */ + mResample(&mState, SrcData+MaxResamplerEdge, DataPosFrac, increment, + {DstData, DstSize}); + + StoreSamples(DstSamples, DstData, mChan.size(), mDstType, DstSize); + } + + /* Update the number of prep samples still available, as well as the + * fractional offset. + */ + mSrcPrepCount = nextprep; + mFracOffset = DataPosEnd & MixerFracMask; + + /* Update the src and dst pointers in case there's still more to do. */ + const uint srcread{minu(NumSrcSamples, SrcDataEnd + mSrcPrepCount - prepcount)}; + SamplesIn += SrcFrameSize*srcread; + NumSrcSamples -= srcread; + + dst = static_cast<al::byte*>(dst) + DstFrameSize*DstSize; + pos += DstSize; + } + + *src = SamplesIn; + *srcframes = NumSrcSamples; + + return pos; +} + + +void ChannelConverter::convert(const void *src, float *dst, uint frames) const +{ + if(mDstChans == DevFmtMono) + { + const float scale{std::sqrt(1.0f / static_cast<float>(al::popcount(mChanMask)))}; + switch(mSrcType) + { +#define HANDLE_FMT(T) case T: Multi2Mono<T>(mChanMask, mSrcStep, scale, dst, src, frames); break + HANDLE_FMT(DevFmtByte); + HANDLE_FMT(DevFmtUByte); + HANDLE_FMT(DevFmtShort); + HANDLE_FMT(DevFmtUShort); + HANDLE_FMT(DevFmtInt); + HANDLE_FMT(DevFmtUInt); + HANDLE_FMT(DevFmtFloat); +#undef HANDLE_FMT + } + } + else if(mChanMask == 0x1 && mDstChans == DevFmtStereo) + { + switch(mSrcType) + { +#define HANDLE_FMT(T) case T: Mono2Stereo<T>(dst, src, frames); break + HANDLE_FMT(DevFmtByte); + HANDLE_FMT(DevFmtUByte); + HANDLE_FMT(DevFmtShort); + HANDLE_FMT(DevFmtUShort); + HANDLE_FMT(DevFmtInt); + HANDLE_FMT(DevFmtUInt); + HANDLE_FMT(DevFmtFloat); +#undef HANDLE_FMT + } + } +} diff --git a/core/converter.h b/core/converter.h new file mode 100644 index 00000000..01becea2 --- /dev/null +++ b/core/converter.h @@ -0,0 +1,66 @@ +#ifndef CORE_CONVERTER_H +#define CORE_CONVERTER_H + +#include <chrono> +#include <cstddef> +#include <memory> + +#include "almalloc.h" +#include "devformat.h" +#include "mixer/defs.h" + +using uint = unsigned int; + + +struct SampleConverter { + DevFmtType mSrcType{}; + DevFmtType mDstType{}; + uint mSrcTypeSize{}; + uint mDstTypeSize{}; + + uint mSrcPrepCount{}; + + uint mFracOffset{}; + uint mIncrement{}; + InterpState mState{}; + ResamplerFunc mResample{}; + + alignas(16) float mSrcSamples[BufferLineSize]{}; + alignas(16) float mDstSamples[BufferLineSize]{}; + + struct ChanSamples { + alignas(16) float PrevSamples[MaxResamplerPadding]; + }; + al::FlexArray<ChanSamples> mChan; + + SampleConverter(size_t numchans) : mChan{numchans} { } + + uint convert(const void **src, uint *srcframes, void *dst, uint dstframes); + uint availableOut(uint srcframes) const; + + using SampleOffset = std::chrono::duration<int64_t, std::ratio<1,MixerFracOne>>; + SampleOffset currentInputDelay() const noexcept + { + const int64_t prep{int64_t{mSrcPrepCount} - MaxResamplerEdge}; + return SampleOffset{(prep<<MixerFracBits) + mFracOffset}; + } + + static std::unique_ptr<SampleConverter> Create(DevFmtType srcType, DevFmtType dstType, + size_t numchans, uint srcRate, uint dstRate, Resampler resampler); + + DEF_FAM_NEWDEL(SampleConverter, mChan) +}; +using SampleConverterPtr = std::unique_ptr<SampleConverter>; + +struct ChannelConverter { + DevFmtType mSrcType{}; + uint mSrcStep{}; + uint mChanMask{}; + DevFmtChannels mDstChans{}; + + bool is_active() const noexcept { return mChanMask != 0; } + + void convert(const void *src, float *dst, uint frames) const; +}; + +#endif /* CORE_CONVERTER_H */ diff --git a/core/cpu_caps.cpp b/core/cpu_caps.cpp new file mode 100644 index 00000000..d4b4d86c --- /dev/null +++ b/core/cpu_caps.cpp @@ -0,0 +1,141 @@ + +#include "config.h" + +#include "cpu_caps.h" + +#if defined(_WIN32) && (defined(_M_ARM) || defined(_M_ARM64)) +#define WIN32_LEAN_AND_MEAN +#include <windows.h> +#ifndef PF_ARM_NEON_INSTRUCTIONS_AVAILABLE +#define PF_ARM_NEON_INSTRUCTIONS_AVAILABLE 19 +#endif +#endif + +#if defined(HAVE_CPUID_H) +#include <cpuid.h> +#elif defined(HAVE_INTRIN_H) +#include <intrin.h> +#endif + +#include <array> +#include <cctype> +#include <string> + + +int CPUCapFlags{0}; + +namespace { + +#if defined(HAVE_GCC_GET_CPUID) \ + && (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)) +using reg_type = unsigned int; +inline std::array<reg_type,4> get_cpuid(unsigned int f) +{ + std::array<reg_type,4> ret{}; + __get_cpuid(f, ret.data(), &ret[1], &ret[2], &ret[3]); + return ret; +} +#define CAN_GET_CPUID +#elif defined(HAVE_CPUID_INTRINSIC) \ + && (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)) +using reg_type = int; +inline std::array<reg_type,4> get_cpuid(unsigned int f) +{ + std::array<reg_type,4> ret{}; + (__cpuid)(ret.data(), f); + return ret; +} +#define CAN_GET_CPUID +#endif + +} // namespace + +al::optional<CPUInfo> GetCPUInfo() +{ + CPUInfo ret; + +#ifdef CAN_GET_CPUID + auto cpuregs = get_cpuid(0); + if(cpuregs[0] == 0) + return al::nullopt; + + const reg_type maxfunc{cpuregs[0]}; + + cpuregs = get_cpuid(0x80000000); + const reg_type maxextfunc{cpuregs[0]}; + + ret.mVendor.append(reinterpret_cast<char*>(&cpuregs[1]), 4); + ret.mVendor.append(reinterpret_cast<char*>(&cpuregs[3]), 4); + ret.mVendor.append(reinterpret_cast<char*>(&cpuregs[2]), 4); + auto iter_end = std::remove(ret.mVendor.begin(), ret.mVendor.end(), '\0'); + iter_end = std::unique(ret.mVendor.begin(), iter_end, + [](auto&& c0, auto&& c1) { return std::isspace(c0) && std::isspace(c1); }); + ret.mVendor.erase(iter_end, ret.mVendor.end()); + if(!ret.mVendor.empty() && std::isspace(ret.mVendor.back())) + ret.mVendor.pop_back(); + if(!ret.mVendor.empty() && std::isspace(ret.mVendor.front())) + ret.mVendor.erase(ret.mVendor.begin()); + + if(maxextfunc >= 0x80000004) + { + cpuregs = get_cpuid(0x80000002); + ret.mName.append(reinterpret_cast<char*>(cpuregs.data()), 16); + cpuregs = get_cpuid(0x80000003); + ret.mName.append(reinterpret_cast<char*>(cpuregs.data()), 16); + cpuregs = get_cpuid(0x80000004); + ret.mName.append(reinterpret_cast<char*>(cpuregs.data()), 16); + iter_end = std::remove(ret.mName.begin(), ret.mName.end(), '\0'); + iter_end = std::unique(ret.mName.begin(), iter_end, + [](auto&& c0, auto&& c1) { return std::isspace(c0) && std::isspace(c1); }); + ret.mName.erase(iter_end, ret.mName.end()); + if(!ret.mName.empty() && std::isspace(ret.mName.back())) + ret.mName.pop_back(); + if(!ret.mName.empty() && std::isspace(ret.mName.front())) + ret.mName.erase(ret.mName.begin()); + } + + if(maxfunc >= 1) + { + cpuregs = get_cpuid(1); + if((cpuregs[3]&(1<<25))) + ret.mCaps |= CPU_CAP_SSE; + if((ret.mCaps&CPU_CAP_SSE) && (cpuregs[3]&(1<<26))) + ret.mCaps |= CPU_CAP_SSE2; + if((ret.mCaps&CPU_CAP_SSE2) && (cpuregs[2]&(1<<0))) + ret.mCaps |= CPU_CAP_SSE3; + if((ret.mCaps&CPU_CAP_SSE3) && (cpuregs[2]&(1<<19))) + ret.mCaps |= CPU_CAP_SSE4_1; + } + +#else + + /* Assume support for whatever's supported if we can't check for it */ +#if defined(HAVE_SSE4_1) +#warning "Assuming SSE 4.1 run-time support!" + ret.mCaps |= CPU_CAP_SSE | CPU_CAP_SSE2 | CPU_CAP_SSE3 | CPU_CAP_SSE4_1; +#elif defined(HAVE_SSE3) +#warning "Assuming SSE 3 run-time support!" + ret.mCaps |= CPU_CAP_SSE | CPU_CAP_SSE2 | CPU_CAP_SSE3; +#elif defined(HAVE_SSE2) +#warning "Assuming SSE 2 run-time support!" + ret.mCaps |= CPU_CAP_SSE | CPU_CAP_SSE2; +#elif defined(HAVE_SSE) +#warning "Assuming SSE run-time support!" + ret.mCaps |= CPU_CAP_SSE; +#endif +#endif /* CAN_GET_CPUID */ + +#ifdef HAVE_NEON +#ifdef __ARM_NEON + ret.mCaps |= CPU_CAP_NEON; +#elif defined(_WIN32) && (defined(_M_ARM) || defined(_M_ARM64)) + if(IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) + ret.mCaps |= CPU_CAP_NEON; +#else +#warning "Assuming NEON run-time support!" + ret.mCaps |= CPU_CAP_NEON; +#endif +#endif + + return ret; +} diff --git a/core/cpu_caps.h b/core/cpu_caps.h new file mode 100644 index 00000000..ffd671d0 --- /dev/null +++ b/core/cpu_caps.h @@ -0,0 +1,26 @@ +#ifndef CORE_CPU_CAPS_H +#define CORE_CPU_CAPS_H + +#include <string> + +#include "aloptional.h" + + +extern int CPUCapFlags; +enum { + CPU_CAP_SSE = 1<<0, + CPU_CAP_SSE2 = 1<<1, + CPU_CAP_SSE3 = 1<<2, + CPU_CAP_SSE4_1 = 1<<3, + CPU_CAP_NEON = 1<<4, +}; + +struct CPUInfo { + std::string mVendor; + std::string mName; + int mCaps{0}; +}; + +al::optional<CPUInfo> GetCPUInfo(); + +#endif /* CORE_CPU_CAPS_H */ diff --git a/core/cubic_defs.h b/core/cubic_defs.h new file mode 100644 index 00000000..33751c97 --- /dev/null +++ b/core/cubic_defs.h @@ -0,0 +1,13 @@ +#ifndef CORE_CUBIC_DEFS_H +#define CORE_CUBIC_DEFS_H + +/* The number of distinct phase intervals within the cubic filter tables. */ +constexpr unsigned int CubicPhaseBits{5}; +constexpr unsigned int CubicPhaseCount{1 << CubicPhaseBits}; + +struct CubicCoefficients { + float mCoeffs[4]; + float mDeltas[4]; +}; + +#endif /* CORE_CUBIC_DEFS_H */ diff --git a/core/cubic_tables.cpp b/core/cubic_tables.cpp new file mode 100644 index 00000000..73ec6b3f --- /dev/null +++ b/core/cubic_tables.cpp @@ -0,0 +1,59 @@ + +#include "cubic_tables.h" + +#include <algorithm> +#include <array> +#include <cassert> +#include <cmath> +#include <limits> +#include <memory> +#include <stdexcept> + +#include "alnumbers.h" +#include "core/mixer/defs.h" + + +namespace { + +using uint = unsigned int; + +struct SplineFilterArray { + alignas(16) CubicCoefficients mTable[CubicPhaseCount]{}; + + constexpr SplineFilterArray() + { + /* Fill in the main coefficients. */ + for(size_t pi{0};pi < CubicPhaseCount;++pi) + { + const double mu{static_cast<double>(pi) / CubicPhaseCount}; + const double mu2{mu*mu}, mu3{mu2*mu}; + mTable[pi].mCoeffs[0] = static_cast<float>(-0.5*mu3 + mu2 + -0.5*mu); + mTable[pi].mCoeffs[1] = static_cast<float>( 1.5*mu3 + -2.5*mu2 + 1.0); + mTable[pi].mCoeffs[2] = static_cast<float>(-1.5*mu3 + 2.0*mu2 + 0.5*mu); + mTable[pi].mCoeffs[3] = static_cast<float>( 0.5*mu3 + -0.5*mu2); + } + + /* Fill in the coefficient deltas. */ + for(size_t pi{0};pi < CubicPhaseCount-1;++pi) + { + mTable[pi].mDeltas[0] = mTable[pi+1].mCoeffs[0] - mTable[pi].mCoeffs[0]; + mTable[pi].mDeltas[1] = mTable[pi+1].mCoeffs[1] - mTable[pi].mCoeffs[1]; + mTable[pi].mDeltas[2] = mTable[pi+1].mCoeffs[2] - mTable[pi].mCoeffs[2]; + mTable[pi].mDeltas[3] = mTable[pi+1].mCoeffs[3] - mTable[pi].mCoeffs[3]; + } + + const size_t pi{CubicPhaseCount - 1}; + mTable[pi].mDeltas[0] = -mTable[pi].mCoeffs[0]; + mTable[pi].mDeltas[1] = -mTable[pi].mCoeffs[1]; + mTable[pi].mDeltas[2] = 1.0f - mTable[pi].mCoeffs[2]; + mTable[pi].mDeltas[3] = -mTable[pi].mCoeffs[3]; + } + + constexpr auto getTable() const noexcept { return al::as_span(mTable); } +}; + +constexpr SplineFilterArray SplineFilter{}; + +} // namespace + +const CubicTable gCubicSpline{SplineFilter.getTable()}; diff --git a/core/cubic_tables.h b/core/cubic_tables.h new file mode 100644 index 00000000..88097ae2 --- /dev/null +++ b/core/cubic_tables.h @@ -0,0 +1,17 @@ +#ifndef CORE_CUBIC_TABLES_H +#define CORE_CUBIC_TABLES_H + +#include "alspan.h" +#include "cubic_defs.h" + + +struct CubicTable { + al::span<const CubicCoefficients,CubicPhaseCount> Tab; +}; + +/* A Catmull-Rom spline. The spline passes through the center two samples, + * ensuring no discontinuity while moving through a series of samples. + */ +extern const CubicTable gCubicSpline; + +#endif /* CORE_CUBIC_TABLES_H */ diff --git a/core/dbus_wrap.cpp b/core/dbus_wrap.cpp new file mode 100644 index 00000000..7f221706 --- /dev/null +++ b/core/dbus_wrap.cpp @@ -0,0 +1,46 @@ + +#include "config.h" + +#include "dbus_wrap.h" + +#ifdef HAVE_DYNLOAD + +#include <mutex> +#include <type_traits> + +#include "logging.h" + + +void *dbus_handle{nullptr}; +#define DECL_FUNC(x) decltype(p##x) p##x{}; +DBUS_FUNCTIONS(DECL_FUNC) +#undef DECL_FUNC + +void PrepareDBus() +{ + static constexpr char libname[] = "libdbus-1.so.3"; + + auto load_func = [](auto &f, const char *name) -> void + { f = reinterpret_cast<std::remove_reference_t<decltype(f)>>(GetSymbol(dbus_handle, name)); }; +#define LOAD_FUNC(x) do { \ + load_func(p##x, #x); \ + if(!p##x) \ + { \ + WARN("Failed to load function %s\n", #x); \ + CloseLib(dbus_handle); \ + dbus_handle = nullptr; \ + return; \ + } \ +} while(0); + + dbus_handle = LoadLib(libname); + if(!dbus_handle) + { + WARN("Failed to load %s\n", libname); + return; + } + +DBUS_FUNCTIONS(LOAD_FUNC) +#undef LOAD_FUNC +} +#endif diff --git a/core/dbus_wrap.h b/core/dbus_wrap.h new file mode 100644 index 00000000..09eaacf9 --- /dev/null +++ b/core/dbus_wrap.h @@ -0,0 +1,87 @@ +#ifndef CORE_DBUS_WRAP_H +#define CORE_DBUS_WRAP_H + +#include <memory> + +#include <dbus/dbus.h> + +#include "dynload.h" + +#ifdef HAVE_DYNLOAD + +#include <mutex> + +#define DBUS_FUNCTIONS(MAGIC) \ +MAGIC(dbus_error_init) \ +MAGIC(dbus_error_free) \ +MAGIC(dbus_bus_get) \ +MAGIC(dbus_connection_set_exit_on_disconnect) \ +MAGIC(dbus_connection_unref) \ +MAGIC(dbus_connection_send_with_reply_and_block) \ +MAGIC(dbus_message_unref) \ +MAGIC(dbus_message_new_method_call) \ +MAGIC(dbus_message_append_args) \ +MAGIC(dbus_message_iter_init) \ +MAGIC(dbus_message_iter_next) \ +MAGIC(dbus_message_iter_recurse) \ +MAGIC(dbus_message_iter_get_arg_type) \ +MAGIC(dbus_message_iter_get_basic) \ +MAGIC(dbus_set_error_from_message) + +extern void *dbus_handle; +#define DECL_FUNC(x) extern decltype(x) *p##x; +DBUS_FUNCTIONS(DECL_FUNC) +#undef DECL_FUNC + +#ifndef IN_IDE_PARSER +#define dbus_error_init (*pdbus_error_init) +#define dbus_error_free (*pdbus_error_free) +#define dbus_bus_get (*pdbus_bus_get) +#define dbus_connection_set_exit_on_disconnect (*pdbus_connection_set_exit_on_disconnect) +#define dbus_connection_unref (*pdbus_connection_unref) +#define dbus_connection_send_with_reply_and_block (*pdbus_connection_send_with_reply_and_block) +#define dbus_message_unref (*pdbus_message_unref) +#define dbus_message_new_method_call (*pdbus_message_new_method_call) +#define dbus_message_append_args (*pdbus_message_append_args) +#define dbus_message_iter_init (*pdbus_message_iter_init) +#define dbus_message_iter_next (*pdbus_message_iter_next) +#define dbus_message_iter_recurse (*pdbus_message_iter_recurse) +#define dbus_message_iter_get_arg_type (*pdbus_message_iter_get_arg_type) +#define dbus_message_iter_get_basic (*pdbus_message_iter_get_basic) +#define dbus_set_error_from_message (*pdbus_set_error_from_message) +#endif + +void PrepareDBus(); + +inline auto HasDBus() +{ + static std::once_flag init_dbus{}; + std::call_once(init_dbus, []{ PrepareDBus(); }); + return dbus_handle; +} + +#else + +constexpr bool HasDBus() noexcept { return true; } +#endif /* HAVE_DYNLOAD */ + + +namespace dbus { + +struct Error { + Error() { dbus_error_init(&mError); } + ~Error() { dbus_error_free(&mError); } + DBusError* operator->() { return &mError; } + DBusError &get() { return mError; } +private: + DBusError mError{}; +}; + +struct ConnectionDeleter { + void operator()(DBusConnection *c) { dbus_connection_unref(c); } +}; +using ConnectionPtr = std::unique_ptr<DBusConnection,ConnectionDeleter>; + +} // namespace dbus + +#endif /* CORE_DBUS_WRAP_H */ diff --git a/core/devformat.cpp b/core/devformat.cpp new file mode 100644 index 00000000..acdabc4f --- /dev/null +++ b/core/devformat.cpp @@ -0,0 +1,67 @@ + +#include "config.h" + +#include "devformat.h" + + +uint BytesFromDevFmt(DevFmtType type) noexcept +{ + switch(type) + { + case DevFmtByte: return sizeof(int8_t); + case DevFmtUByte: return sizeof(uint8_t); + case DevFmtShort: return sizeof(int16_t); + case DevFmtUShort: return sizeof(uint16_t); + case DevFmtInt: return sizeof(int32_t); + case DevFmtUInt: return sizeof(uint32_t); + case DevFmtFloat: return sizeof(float); + } + return 0; +} +uint ChannelsFromDevFmt(DevFmtChannels chans, uint ambiorder) noexcept +{ + switch(chans) + { + case DevFmtMono: return 1; + case DevFmtStereo: return 2; + case DevFmtQuad: return 4; + case DevFmtX51: return 6; + case DevFmtX61: return 7; + case DevFmtX71: return 8; + case DevFmtX714: return 12; + case DevFmtX3D71: return 8; + case DevFmtAmbi3D: return (ambiorder+1) * (ambiorder+1); + } + return 0; +} + +const char *DevFmtTypeString(DevFmtType type) noexcept +{ + switch(type) + { + case DevFmtByte: return "Int8"; + case DevFmtUByte: return "UInt8"; + case DevFmtShort: return "Int16"; + case DevFmtUShort: return "UInt16"; + case DevFmtInt: return "Int32"; + case DevFmtUInt: return "UInt32"; + case DevFmtFloat: return "Float32"; + } + return "(unknown type)"; +} +const char *DevFmtChannelsString(DevFmtChannels chans) noexcept +{ + switch(chans) + { + case DevFmtMono: return "Mono"; + case DevFmtStereo: return "Stereo"; + case DevFmtQuad: return "Quadraphonic"; + case DevFmtX51: return "5.1 Surround"; + case DevFmtX61: return "6.1 Surround"; + case DevFmtX71: return "7.1 Surround"; + case DevFmtX714: return "7.1.4 Surround"; + case DevFmtX3D71: return "3D7.1 Surround"; + case DevFmtAmbi3D: return "Ambisonic 3D"; + } + return "(unknown channels)"; +} diff --git a/core/devformat.h b/core/devformat.h new file mode 100644 index 00000000..485826a3 --- /dev/null +++ b/core/devformat.h @@ -0,0 +1,122 @@ +#ifndef CORE_DEVFORMAT_H +#define CORE_DEVFORMAT_H + +#include <cstdint> + + +using uint = unsigned int; + +enum Channel : unsigned char { + FrontLeft = 0, + FrontRight, + FrontCenter, + LFE, + BackLeft, + BackRight, + BackCenter, + SideLeft, + SideRight, + + TopCenter, + TopFrontLeft, + TopFrontCenter, + TopFrontRight, + TopBackLeft, + TopBackCenter, + TopBackRight, + + Aux0, + Aux1, + Aux2, + Aux3, + Aux4, + Aux5, + Aux6, + Aux7, + Aux8, + Aux9, + Aux10, + Aux11, + Aux12, + Aux13, + Aux14, + Aux15, + + MaxChannels +}; + + +/* Device formats */ +enum DevFmtType : unsigned char { + DevFmtByte, + DevFmtUByte, + DevFmtShort, + DevFmtUShort, + DevFmtInt, + DevFmtUInt, + DevFmtFloat, + + DevFmtTypeDefault = DevFmtFloat +}; +enum DevFmtChannels : unsigned char { + DevFmtMono, + DevFmtStereo, + DevFmtQuad, + DevFmtX51, + DevFmtX61, + DevFmtX71, + DevFmtX714, + DevFmtX3D71, + DevFmtAmbi3D, + + DevFmtChannelsDefault = DevFmtStereo +}; +#define MAX_OUTPUT_CHANNELS 16 + +/* DevFmtType traits, providing the type, etc given a DevFmtType. */ +template<DevFmtType T> +struct DevFmtTypeTraits { }; + +template<> +struct DevFmtTypeTraits<DevFmtByte> { using Type = int8_t; }; +template<> +struct DevFmtTypeTraits<DevFmtUByte> { using Type = uint8_t; }; +template<> +struct DevFmtTypeTraits<DevFmtShort> { using Type = int16_t; }; +template<> +struct DevFmtTypeTraits<DevFmtUShort> { using Type = uint16_t; }; +template<> +struct DevFmtTypeTraits<DevFmtInt> { using Type = int32_t; }; +template<> +struct DevFmtTypeTraits<DevFmtUInt> { using Type = uint32_t; }; +template<> +struct DevFmtTypeTraits<DevFmtFloat> { using Type = float; }; + +template<DevFmtType T> +using DevFmtType_t = typename DevFmtTypeTraits<T>::Type; + + +uint BytesFromDevFmt(DevFmtType type) noexcept; +uint ChannelsFromDevFmt(DevFmtChannels chans, uint ambiorder) noexcept; +inline uint FrameSizeFromDevFmt(DevFmtChannels chans, DevFmtType type, uint ambiorder) noexcept +{ return ChannelsFromDevFmt(chans, ambiorder) * BytesFromDevFmt(type); } + +const char *DevFmtTypeString(DevFmtType type) noexcept; +const char *DevFmtChannelsString(DevFmtChannels chans) noexcept; + +enum class DevAmbiLayout : bool { + FuMa, + ACN, + + Default = ACN +}; + +enum class DevAmbiScaling : unsigned char { + FuMa, + SN3D, + N3D, + + Default = SN3D +}; + +#endif /* CORE_DEVFORMAT_H */ diff --git a/core/device.cpp b/core/device.cpp new file mode 100644 index 00000000..2766c5e4 --- /dev/null +++ b/core/device.cpp @@ -0,0 +1,23 @@ + +#include "config.h" + +#include "bformatdec.h" +#include "bs2b.h" +#include "device.h" +#include "front_stablizer.h" +#include "hrtf.h" +#include "mastering.h" + + +al::FlexArray<ContextBase*> DeviceBase::sEmptyContextArray{0u}; + + +DeviceBase::DeviceBase(DeviceType type) : Type{type}, mContexts{&sEmptyContextArray} +{ +} + +DeviceBase::~DeviceBase() +{ + auto *oldarray = mContexts.exchange(nullptr, std::memory_order_relaxed); + if(oldarray != &sEmptyContextArray) delete oldarray; +} diff --git a/core/device.h b/core/device.h new file mode 100644 index 00000000..9aaf7adb --- /dev/null +++ b/core/device.h @@ -0,0 +1,345 @@ +#ifndef CORE_DEVICE_H +#define CORE_DEVICE_H + +#include <stddef.h> + +#include <array> +#include <atomic> +#include <bitset> +#include <chrono> +#include <memory> +#include <mutex> +#include <string> + +#include "almalloc.h" +#include "alspan.h" +#include "ambidefs.h" +#include "atomic.h" +#include "bufferline.h" +#include "devformat.h" +#include "filters/nfc.h" +#include "intrusive_ptr.h" +#include "mixer/hrtfdefs.h" +#include "opthelpers.h" +#include "resampler_limits.h" +#include "uhjfilter.h" +#include "vector.h" + +class BFormatDec; +struct bs2b; +struct Compressor; +struct ContextBase; +struct DirectHrtfState; +struct HrtfStore; + +using uint = unsigned int; + + +#define MIN_OUTPUT_RATE 8000 +#define MAX_OUTPUT_RATE 192000 +#define DEFAULT_OUTPUT_RATE 48000 + +#define DEFAULT_UPDATE_SIZE 960 /* 20ms */ +#define DEFAULT_NUM_UPDATES 3 + + +enum class DeviceType : unsigned char { + Playback, + Capture, + Loopback +}; + + +enum class RenderMode : unsigned char { + Normal, + Pairwise, + Hrtf +}; + +enum class StereoEncoding : unsigned char { + Basic, + Uhj, + Hrtf, + + Default = Basic +}; + + +struct InputRemixMap { + struct TargetMix { Channel channel; float mix; }; + + Channel channel; + al::span<const TargetMix> targets; +}; + + +struct DistanceComp { + /* Maximum delay in samples for speaker distance compensation. */ + static constexpr uint MaxDelay{1024}; + + struct ChanData { + float Gain{1.0f}; + uint Length{0u}; /* Valid range is [0...MaxDelay). */ + float *Buffer{nullptr}; + }; + + std::array<ChanData,MAX_OUTPUT_CHANNELS> mChannels; + al::FlexArray<float,16> mSamples; + + DistanceComp(size_t count) : mSamples{count} { } + + static std::unique_ptr<DistanceComp> Create(size_t numsamples) + { return std::unique_ptr<DistanceComp>{new(FamCount(numsamples)) DistanceComp{numsamples}}; } + + DEF_FAM_NEWDEL(DistanceComp, mSamples) +}; + + +constexpr uint InvalidChannelIndex{~0u}; + +struct BFChannelConfig { + float Scale; + uint Index; +}; + +struct MixParams { + /* Coefficient channel mapping for mixing to the buffer. */ + std::array<BFChannelConfig,MaxAmbiChannels> AmbiMap{}; + + al::span<FloatBufferLine> Buffer; + + /** + * Helper to set an identity/pass-through panning for ambisonic mixing. The + * source is expected to be a 3D ACN/N3D ambisonic buffer, and for each + * channel [0...count), the given functor is called with the source channel + * index, destination channel index, and the gain for that channel. If the + * destination channel is INVALID_CHANNEL_INDEX, the given source channel + * is not used for output. + */ + template<typename F> + void setAmbiMixParams(const MixParams &inmix, const float gainbase, F func) const + { + const size_t numIn{inmix.Buffer.size()}; + const size_t numOut{Buffer.size()}; + for(size_t i{0};i < numIn;++i) + { + auto idx = InvalidChannelIndex; + auto gain = 0.0f; + + for(size_t j{0};j < numOut;++j) + { + if(AmbiMap[j].Index == inmix.AmbiMap[i].Index) + { + idx = static_cast<uint>(j); + gain = AmbiMap[j].Scale * gainbase; + break; + } + } + func(i, idx, gain); + } + } +}; + +struct RealMixParams { + al::span<const InputRemixMap> RemixMap; + std::array<uint,MaxChannels> ChannelIndex{}; + + al::span<FloatBufferLine> Buffer; +}; + +using AmbiRotateMatrix = std::array<std::array<float,MaxAmbiChannels>,MaxAmbiChannels>; + +enum { + // Frequency was requested by the app or config file + FrequencyRequest, + // Channel configuration was requested by the app or config file + ChannelsRequest, + // Sample type was requested by the config file + SampleTypeRequest, + + // Specifies if the DSP is paused at user request + DevicePaused, + // Specifies if the device is currently running + DeviceRunning, + + // Specifies if the output plays directly on/in ears (headphones, headset, + // ear buds, etc). + DirectEar, + + DeviceFlagsCount +}; + +struct DeviceBase { + /* To avoid extraneous allocations, a 0-sized FlexArray<ContextBase*> is + * defined globally as a sharable object. + */ + static al::FlexArray<ContextBase*> sEmptyContextArray; + + std::atomic<bool> Connected{true}; + const DeviceType Type{}; + + uint Frequency{}; + uint UpdateSize{}; + uint BufferSize{}; + + DevFmtChannels FmtChans{}; + DevFmtType FmtType{}; + uint mAmbiOrder{0}; + float mXOverFreq{400.0f}; + /* If the main device mix is horizontal/2D only. */ + bool m2DMixing{false}; + /* For DevFmtAmbi* output only, specifies the channel order and + * normalization. + */ + DevAmbiLayout mAmbiLayout{DevAmbiLayout::Default}; + DevAmbiScaling mAmbiScale{DevAmbiScaling::Default}; + + std::string DeviceName; + + // Device flags + std::bitset<DeviceFlagsCount> Flags{}; + + uint NumAuxSends{}; + + /* Rendering mode. */ + RenderMode mRenderMode{RenderMode::Normal}; + + /* The average speaker distance as determined by the ambdec configuration, + * HRTF data set, or the NFC-HOA reference delay. Only used for NFC. + */ + float AvgSpeakerDist{0.0f}; + + /* The default NFC filter. Not used directly, but is pre-initialized with + * the control distance from AvgSpeakerDist. + */ + NfcFilter mNFCtrlFilter{}; + + uint SamplesDone{0u}; + std::chrono::nanoseconds ClockBase{0}; + std::chrono::nanoseconds FixedLatency{0}; + + AmbiRotateMatrix mAmbiRotateMatrix{}; + AmbiRotateMatrix mAmbiRotateMatrix2{}; + + /* Temp storage used for mixer processing. */ + static constexpr size_t MixerLineSize{BufferLineSize + DecoderBase::sMaxPadding}; + static constexpr size_t MixerChannelsMax{16}; + using MixerBufferLine = std::array<float,MixerLineSize>; + alignas(16) std::array<MixerBufferLine,MixerChannelsMax> mSampleData; + alignas(16) std::array<float,MixerLineSize+MaxResamplerPadding> mResampleData; + + alignas(16) float FilteredData[BufferLineSize]; + union { + alignas(16) float HrtfSourceData[BufferLineSize + HrtfHistoryLength]; + alignas(16) float NfcSampleData[BufferLineSize]; + }; + + /* Persistent storage for HRTF mixing. */ + alignas(16) float2 HrtfAccumData[BufferLineSize + HrirLength]; + + /* Mixing buffer used by the Dry mix and Real output. */ + al::vector<FloatBufferLine, 16> MixBuffer; + + /* The "dry" path corresponds to the main output. */ + MixParams Dry; + uint NumChannelsPerOrder[MaxAmbiOrder+1]{}; + + /* "Real" output, which will be written to the device buffer. May alias the + * dry buffer. + */ + RealMixParams RealOut; + + /* HRTF state and info */ + std::unique_ptr<DirectHrtfState> mHrtfState; + al::intrusive_ptr<HrtfStore> mHrtf; + uint mIrSize{0}; + + /* Ambisonic-to-UHJ encoder */ + std::unique_ptr<UhjEncoderBase> mUhjEncoder; + + /* Ambisonic decoder for speakers */ + std::unique_ptr<BFormatDec> AmbiDecoder; + + /* Stereo-to-binaural filter */ + std::unique_ptr<bs2b> Bs2b; + + using PostProc = void(DeviceBase::*)(const size_t SamplesToDo); + PostProc PostProcess{nullptr}; + + std::unique_ptr<Compressor> Limiter; + + /* Delay buffers used to compensate for speaker distances. */ + std::unique_ptr<DistanceComp> ChannelDelays; + + /* Dithering control. */ + float DitherDepth{0.0f}; + uint DitherSeed{0u}; + + /* Running count of the mixer invocations, in 31.1 fixed point. This + * actually increments *twice* when mixing, first at the start and then at + * the end, so the bottom bit indicates if the device is currently mixing + * and the upper bits indicates how many mixes have been done. + */ + RefCount MixCount{0u}; + + // Contexts created on this device + std::atomic<al::FlexArray<ContextBase*>*> mContexts{nullptr}; + + + DeviceBase(DeviceType type); + DeviceBase(const DeviceBase&) = delete; + DeviceBase& operator=(const DeviceBase&) = delete; + ~DeviceBase(); + + uint bytesFromFmt() const noexcept { return BytesFromDevFmt(FmtType); } + uint channelsFromFmt() const noexcept { return ChannelsFromDevFmt(FmtChans, mAmbiOrder); } + uint frameSizeFromFmt() const noexcept { return bytesFromFmt() * channelsFromFmt(); } + + uint waitForMix() const noexcept + { + uint refcount; + while((refcount=MixCount.load(std::memory_order_acquire))&1) { + } + return refcount; + } + + void ProcessHrtf(const size_t SamplesToDo); + void ProcessAmbiDec(const size_t SamplesToDo); + void ProcessAmbiDecStablized(const size_t SamplesToDo); + void ProcessUhj(const size_t SamplesToDo); + void ProcessBs2b(const size_t SamplesToDo); + + inline void postProcess(const size_t SamplesToDo) + { if(PostProcess) LIKELY (this->*PostProcess)(SamplesToDo); } + + void renderSamples(const al::span<float*> outBuffers, const uint numSamples); + void renderSamples(void *outBuffer, const uint numSamples, const size_t frameStep); + + /* Caller must lock the device state, and the mixer must not be running. */ +#ifdef __USE_MINGW_ANSI_STDIO + [[gnu::format(gnu_printf,2,3)]] +#else + [[gnu::format(printf,2,3)]] +#endif + void handleDisconnect(const char *msg, ...); + + /** + * Returns the index for the given channel name (e.g. FrontCenter), or + * INVALID_CHANNEL_INDEX if it doesn't exist. + */ + uint channelIdxByName(Channel chan) const noexcept + { return RealOut.ChannelIndex[chan]; } + + DISABLE_ALLOC() + +private: + uint renderSamples(const uint numSamples); +}; + +/* Must be less than 15 characters (16 including terminating null) for + * compatibility with pthread_setname_np limitations. */ +#define MIXER_THREAD_NAME "alsoft-mixer" + +#define RECORD_THREAD_NAME "alsoft-record" + +#endif /* CORE_DEVICE_H */ diff --git a/core/effects/base.h b/core/effects/base.h new file mode 100644 index 00000000..4ee19f37 --- /dev/null +++ b/core/effects/base.h @@ -0,0 +1,197 @@ +#ifndef CORE_EFFECTS_BASE_H +#define CORE_EFFECTS_BASE_H + +#include <stddef.h> + +#include "albyte.h" +#include "almalloc.h" +#include "alspan.h" +#include "atomic.h" +#include "core/bufferline.h" +#include "intrusive_ptr.h" + +struct BufferStorage; +struct ContextBase; +struct DeviceBase; +struct EffectSlot; +struct MixParams; +struct RealMixParams; + + +/** Target gain for the reverb decay feedback reaching the decay time. */ +constexpr float ReverbDecayGain{0.001f}; /* -60 dB */ + +constexpr float ReverbMaxReflectionsDelay{0.3f}; +constexpr float ReverbMaxLateReverbDelay{0.1f}; + +enum class ChorusWaveform { + Sinusoid, + Triangle +}; + +constexpr float ChorusMaxDelay{0.016f}; +constexpr float FlangerMaxDelay{0.004f}; + +constexpr float EchoMaxDelay{0.207f}; +constexpr float EchoMaxLRDelay{0.404f}; + +enum class FShifterDirection { + Down, + Up, + Off +}; + +enum class ModulatorWaveform { + Sinusoid, + Sawtooth, + Square +}; + +enum class VMorpherPhenome { + A, E, I, O, U, + AA, AE, AH, AO, EH, ER, IH, IY, UH, UW, + B, D, F, G, J, K, L, M, N, P, R, S, T, V, Z +}; + +enum class VMorpherWaveform { + Sinusoid, + Triangle, + Sawtooth +}; + +union EffectProps { + struct { + float Density; + float Diffusion; + float Gain; + float GainHF; + float GainLF; + float DecayTime; + float DecayHFRatio; + float DecayLFRatio; + float ReflectionsGain; + float ReflectionsDelay; + float ReflectionsPan[3]; + float LateReverbGain; + float LateReverbDelay; + float LateReverbPan[3]; + float EchoTime; + float EchoDepth; + float ModulationTime; + float ModulationDepth; + float AirAbsorptionGainHF; + float HFReference; + float LFReference; + float RoomRolloffFactor; + bool DecayHFLimit; + } Reverb; + + struct { + float AttackTime; + float ReleaseTime; + float Resonance; + float PeakGain; + } Autowah; + + struct { + ChorusWaveform Waveform; + int Phase; + float Rate; + float Depth; + float Feedback; + float Delay; + } Chorus; /* Also Flanger */ + + struct { + bool OnOff; + } Compressor; + + struct { + float Edge; + float Gain; + float LowpassCutoff; + float EQCenter; + float EQBandwidth; + } Distortion; + + struct { + float Delay; + float LRDelay; + + float Damping; + float Feedback; + + float Spread; + } Echo; + + struct { + float LowCutoff; + float LowGain; + float Mid1Center; + float Mid1Gain; + float Mid1Width; + float Mid2Center; + float Mid2Gain; + float Mid2Width; + float HighCutoff; + float HighGain; + } Equalizer; + + struct { + float Frequency; + FShifterDirection LeftDirection; + FShifterDirection RightDirection; + } Fshifter; + + struct { + float Frequency; + float HighPassCutoff; + ModulatorWaveform Waveform; + } Modulator; + + struct { + int CoarseTune; + int FineTune; + } Pshifter; + + struct { + float Rate; + VMorpherPhenome PhonemeA; + VMorpherPhenome PhonemeB; + int PhonemeACoarseTuning; + int PhonemeBCoarseTuning; + VMorpherWaveform Waveform; + } Vmorpher; + + struct { + float Gain; + } Dedicated; +}; + + +struct EffectTarget { + MixParams *Main; + RealMixParams *RealOut; +}; + +struct EffectState : public al::intrusive_ref<EffectState> { + al::span<FloatBufferLine> mOutTarget; + + + virtual ~EffectState() = default; + + virtual void deviceUpdate(const DeviceBase *device, const BufferStorage *buffer) = 0; + virtual void update(const ContextBase *context, const EffectSlot *slot, + const EffectProps *props, const EffectTarget target) = 0; + virtual void process(const size_t samplesToDo, const al::span<const FloatBufferLine> samplesIn, + const al::span<FloatBufferLine> samplesOut) = 0; +}; + + +struct EffectStateFactory { + virtual ~EffectStateFactory() = default; + + virtual al::intrusive_ptr<EffectState> create() = 0; +}; + +#endif /* CORE_EFFECTS_BASE_H */ diff --git a/core/effectslot.cpp b/core/effectslot.cpp new file mode 100644 index 00000000..db8aa078 --- /dev/null +++ b/core/effectslot.cpp @@ -0,0 +1,19 @@ + +#include "config.h" + +#include "effectslot.h" + +#include <stddef.h> + +#include "almalloc.h" +#include "context.h" + + +EffectSlotArray *EffectSlot::CreatePtrArray(size_t count) noexcept +{ + /* Allocate space for twice as many pointers, so the mixer has scratch + * space to store a sorted list during mixing. + */ + void *ptr{al_calloc(alignof(EffectSlotArray), EffectSlotArray::Sizeof(count*2))}; + return al::construct_at(static_cast<EffectSlotArray*>(ptr), count); +} diff --git a/core/effectslot.h b/core/effectslot.h new file mode 100644 index 00000000..2624ae5f --- /dev/null +++ b/core/effectslot.h @@ -0,0 +1,89 @@ +#ifndef CORE_EFFECTSLOT_H +#define CORE_EFFECTSLOT_H + +#include <atomic> + +#include "almalloc.h" +#include "device.h" +#include "effects/base.h" +#include "intrusive_ptr.h" + +struct EffectSlot; +struct WetBuffer; + +using EffectSlotArray = al::FlexArray<EffectSlot*>; + + +enum class EffectSlotType : unsigned char { + None, + Reverb, + Chorus, + Distortion, + Echo, + Flanger, + FrequencyShifter, + VocalMorpher, + PitchShifter, + RingModulator, + Autowah, + Compressor, + Equalizer, + EAXReverb, + DedicatedLFE, + DedicatedDialog, + Convolution +}; + +struct EffectSlotProps { + float Gain; + bool AuxSendAuto; + EffectSlot *Target; + + EffectSlotType Type; + EffectProps Props; + + al::intrusive_ptr<EffectState> State; + + std::atomic<EffectSlotProps*> next; + + DEF_NEWDEL(EffectSlotProps) +}; + + +struct EffectSlot { + bool InUse{false}; + + std::atomic<EffectSlotProps*> Update{nullptr}; + + /* Wet buffer configuration is ACN channel order with N3D scaling. + * Consequently, effects that only want to work with mono input can use + * channel 0 by itself. Effects that want multichannel can process the + * ambisonics signal and make a B-Format source pan. + */ + MixParams Wet; + + float Gain{1.0f}; + bool AuxSendAuto{true}; + EffectSlot *Target{nullptr}; + + EffectSlotType EffectType{EffectSlotType::None}; + EffectProps mEffectProps{}; + al::intrusive_ptr<EffectState> mEffectState; + + float RoomRolloff{0.0f}; /* Added to the source's room rolloff, not multiplied. */ + float DecayTime{0.0f}; + float DecayLFRatio{0.0f}; + float DecayHFRatio{0.0f}; + bool DecayHFLimit{false}; + float AirAbsorptionGainHF{1.0f}; + + /* Mixing buffer used by the Wet mix. */ + al::vector<FloatBufferLine,16> mWetBuffer; + + + static EffectSlotArray *CreatePtrArray(size_t count) noexcept; + + DEF_NEWDEL(EffectSlot) +}; + +#endif /* CORE_EFFECTSLOT_H */ diff --git a/core/except.cpp b/core/except.cpp new file mode 100644 index 00000000..45fd4eb5 --- /dev/null +++ b/core/except.cpp @@ -0,0 +1,30 @@ + +#include "config.h" + +#include "except.h" + +#include <cstdio> +#include <cstdarg> + +#include "opthelpers.h" + + +namespace al { + +base_exception::~base_exception() = default; + +void base_exception::setMessage(const char* msg, std::va_list args) +{ + std::va_list args2; + va_copy(args2, args); + int msglen{std::vsnprintf(nullptr, 0, msg, args)}; + if(msglen > 0) LIKELY + { + mMessage.resize(static_cast<size_t>(msglen)+1); + std::vsnprintf(const_cast<char*>(mMessage.data()), mMessage.length(), msg, args2); + mMessage.pop_back(); + } + va_end(args2); +} + +} // namespace al diff --git a/core/except.h b/core/except.h new file mode 100644 index 00000000..0e28e9df --- /dev/null +++ b/core/except.h @@ -0,0 +1,31 @@ +#ifndef CORE_EXCEPT_H +#define CORE_EXCEPT_H + +#include <cstdarg> +#include <exception> +#include <string> +#include <utility> + + +namespace al { + +class base_exception : public std::exception { + std::string mMessage; + +protected: + base_exception() = default; + virtual ~base_exception(); + + void setMessage(const char *msg, std::va_list args); + +public: + const char *what() const noexcept override { return mMessage.c_str(); } +}; + +} // namespace al + +#define START_API_FUNC try + +#define END_API_FUNC catch(...) { std::terminate(); } + +#endif /* CORE_EXCEPT_H */ diff --git a/core/filters/biquad.cpp b/core/filters/biquad.cpp new file mode 100644 index 00000000..a0a62eb8 --- /dev/null +++ b/core/filters/biquad.cpp @@ -0,0 +1,168 @@ + +#include "config.h" + +#include "biquad.h" + +#include <algorithm> +#include <cassert> +#include <cmath> + +#include "alnumbers.h" +#include "opthelpers.h" + + +template<typename Real> +void BiquadFilterR<Real>::setParams(BiquadType type, Real f0norm, Real gain, Real rcpQ) +{ + /* HACK: Limit gain to -100dB. This shouldn't ever happen, all callers + * already clamp to minimum of 0.001, or have a limited range of values + * that don't go below 0.126. But it seems to with some callers. This needs + * to be investigated. + */ + gain = std::max(gain, Real(0.00001)); + + const Real w0{al::numbers::pi_v<Real>*2.0f * f0norm}; + const Real sin_w0{std::sin(w0)}; + const Real cos_w0{std::cos(w0)}; + const Real alpha{sin_w0/2.0f * rcpQ}; + + Real sqrtgain_alpha_2; + Real a[3]{ 1.0f, 0.0f, 0.0f }; + Real b[3]{ 1.0f, 0.0f, 0.0f }; + + /* Calculate filter coefficients depending on filter type */ + switch(type) + { + case BiquadType::HighShelf: + sqrtgain_alpha_2 = 2.0f * std::sqrt(gain) * alpha; + b[0] = gain*((gain+1.0f) + (gain-1.0f)*cos_w0 + sqrtgain_alpha_2); + b[1] = -2.0f*gain*((gain-1.0f) + (gain+1.0f)*cos_w0 ); + b[2] = gain*((gain+1.0f) + (gain-1.0f)*cos_w0 - sqrtgain_alpha_2); + a[0] = (gain+1.0f) - (gain-1.0f)*cos_w0 + sqrtgain_alpha_2; + a[1] = 2.0f* ((gain-1.0f) - (gain+1.0f)*cos_w0 ); + a[2] = (gain+1.0f) - (gain-1.0f)*cos_w0 - sqrtgain_alpha_2; + break; + case BiquadType::LowShelf: + sqrtgain_alpha_2 = 2.0f * std::sqrt(gain) * alpha; + b[0] = gain*((gain+1.0f) - (gain-1.0f)*cos_w0 + sqrtgain_alpha_2); + b[1] = 2.0f*gain*((gain-1.0f) - (gain+1.0f)*cos_w0 ); + b[2] = gain*((gain+1.0f) - (gain-1.0f)*cos_w0 - sqrtgain_alpha_2); + a[0] = (gain+1.0f) + (gain-1.0f)*cos_w0 + sqrtgain_alpha_2; + a[1] = -2.0f* ((gain-1.0f) + (gain+1.0f)*cos_w0 ); + a[2] = (gain+1.0f) + (gain-1.0f)*cos_w0 - sqrtgain_alpha_2; + break; + case BiquadType::Peaking: + b[0] = 1.0f + alpha * gain; + b[1] = -2.0f * cos_w0; + b[2] = 1.0f - alpha * gain; + a[0] = 1.0f + alpha / gain; + a[1] = -2.0f * cos_w0; + a[2] = 1.0f - alpha / gain; + break; + + case BiquadType::LowPass: + b[0] = (1.0f - cos_w0) / 2.0f; + b[1] = 1.0f - cos_w0; + b[2] = (1.0f - cos_w0) / 2.0f; + a[0] = 1.0f + alpha; + a[1] = -2.0f * cos_w0; + a[2] = 1.0f - alpha; + break; + case BiquadType::HighPass: + b[0] = (1.0f + cos_w0) / 2.0f; + b[1] = -(1.0f + cos_w0); + b[2] = (1.0f + cos_w0) / 2.0f; + a[0] = 1.0f + alpha; + a[1] = -2.0f * cos_w0; + a[2] = 1.0f - alpha; + break; + case BiquadType::BandPass: + b[0] = alpha; + b[1] = 0.0f; + b[2] = -alpha; + a[0] = 1.0f + alpha; + a[1] = -2.0f * cos_w0; + a[2] = 1.0f - alpha; + break; + } + + mA1 = a[1] / a[0]; + mA2 = a[2] / a[0]; + mB0 = b[0] / a[0]; + mB1 = b[1] / a[0]; + mB2 = b[2] / a[0]; +} + +template<typename Real> +void BiquadFilterR<Real>::process(const al::span<const Real> src, Real *dst) +{ + const Real b0{mB0}; + const Real b1{mB1}; + const Real b2{mB2}; + const Real a1{mA1}; + const Real a2{mA2}; + Real z1{mZ1}; + Real z2{mZ2}; + + /* Processing loop is Transposed Direct Form II. This requires less storage + * compared to Direct Form I (only two delay components, instead of a four- + * sample history; the last two inputs and outputs), and works better for + * floating-point which favors summing similarly-sized values while being + * less bothered by overflow. + * + * See: http://www.earlevel.com/main/2003/02/28/biquads/ + */ + auto proc_sample = [b0,b1,b2,a1,a2,&z1,&z2](Real input) noexcept -> Real + { + const Real output{input*b0 + z1}; + z1 = input*b1 - output*a1 + z2; + z2 = input*b2 - output*a2; + return output; + }; + std::transform(src.cbegin(), src.cend(), dst, proc_sample); + + mZ1 = z1; + mZ2 = z2; +} + +template<typename Real> +void BiquadFilterR<Real>::dualProcess(BiquadFilterR &other, const al::span<const Real> src, + Real *dst) +{ + const Real b00{mB0}; + const Real b01{mB1}; + const Real b02{mB2}; + const Real a01{mA1}; + const Real a02{mA2}; + const Real b10{other.mB0}; + const Real b11{other.mB1}; + const Real b12{other.mB2}; + const Real a11{other.mA1}; + const Real a12{other.mA2}; + Real z01{mZ1}; + Real z02{mZ2}; + Real z11{other.mZ1}; + Real z12{other.mZ2}; + + auto proc_sample = [b00,b01,b02,a01,a02,b10,b11,b12,a11,a12,&z01,&z02,&z11,&z12](Real input) noexcept -> Real + { + const Real tmpout{input*b00 + z01}; + z01 = input*b01 - tmpout*a01 + z02; + z02 = input*b02 - tmpout*a02; + input = tmpout; + + const Real output{input*b10 + z11}; + z11 = input*b11 - output*a11 + z12; + z12 = input*b12 - output*a12; + return output; + }; + std::transform(src.cbegin(), src.cend(), dst, proc_sample); + + mZ1 = z01; + mZ2 = z02; + other.mZ1 = z11; + other.mZ2 = z12; +} + +template class BiquadFilterR<float>; +template class BiquadFilterR<double>; diff --git a/core/filters/biquad.h b/core/filters/biquad.h new file mode 100644 index 00000000..75a4009b --- /dev/null +++ b/core/filters/biquad.h @@ -0,0 +1,144 @@ +#ifndef CORE_FILTERS_BIQUAD_H +#define CORE_FILTERS_BIQUAD_H + +#include <algorithm> +#include <cmath> +#include <cstddef> +#include <utility> + +#include "alnumbers.h" +#include "alspan.h" + + +/* Filters implementation is based on the "Cookbook formulae for audio + * EQ biquad filter coefficients" by Robert Bristow-Johnson + * http://www.musicdsp.org/files/Audio-EQ-Cookbook.txt + */ +/* Implementation note: For the shelf and peaking filters, the specified gain + * is for the centerpoint of the transition band. This better fits EFX filter + * behavior, which expects the shelf's reference frequency to reach the given + * gain. To set the gain for the shelf or peak itself, use the square root of + * the desired linear gain (or halve the dB gain). + */ + +enum class BiquadType { + /** EFX-style low-pass filter, specifying a gain and reference frequency. */ + HighShelf, + /** EFX-style high-pass filter, specifying a gain and reference frequency. */ + LowShelf, + /** Peaking filter, specifying a gain and reference frequency. */ + Peaking, + + /** Low-pass cut-off filter, specifying a cut-off frequency. */ + LowPass, + /** High-pass cut-off filter, specifying a cut-off frequency. */ + HighPass, + /** Band-pass filter, specifying a center frequency. */ + BandPass, +}; + +template<typename Real> +class BiquadFilterR { + /* Last two delayed components for direct form II. */ + Real mZ1{0}, mZ2{0}; + /* Transfer function coefficients "b" (numerator) */ + Real mB0{1}, mB1{0}, mB2{0}; + /* Transfer function coefficients "a" (denominator; a0 is pre-applied). */ + Real mA1{0}, mA2{0}; + + void setParams(BiquadType type, Real f0norm, Real gain, Real rcpQ); + + /** + * Calculates the rcpQ (i.e. 1/Q) coefficient for shelving filters, using + * the reference gain and shelf slope parameter. + * \param gain 0 < gain + * \param slope 0 < slope <= 1 + */ + static Real rcpQFromSlope(Real gain, Real slope) + { return std::sqrt((gain + Real{1}/gain)*(Real{1}/slope - Real{1}) + Real{2}); } + + /** + * Calculates the rcpQ (i.e. 1/Q) coefficient for filters, using the + * normalized reference frequency and bandwidth. + * \param f0norm 0 < f0norm < 0.5. + * \param bandwidth 0 < bandwidth + */ + static Real rcpQFromBandwidth(Real f0norm, Real bandwidth) + { + const Real w0{al::numbers::pi_v<Real>*Real{2} * f0norm}; + return 2.0f*std::sinh(std::log(Real{2})/Real{2}*bandwidth*w0/std::sin(w0)); + } + +public: + void clear() noexcept { mZ1 = mZ2 = Real{0}; } + + /** + * Sets the filter state for the specified filter type and its parameters. + * + * \param type The type of filter to apply. + * \param f0norm The normalized reference frequency (ref / sample_rate). + * This is the center point for the Shelf, Peaking, and BandPass filter + * types, or the cutoff frequency for the LowPass and HighPass filter + * types. + * \param gain The gain for the reference frequency response. Only used by + * the Shelf and Peaking filter types. + * \param slope Slope steepness of the transition band. + */ + void setParamsFromSlope(BiquadType type, Real f0norm, Real gain, Real slope) + { + gain = std::max<Real>(gain, 0.001f); /* Limit -60dB */ + setParams(type, f0norm, gain, rcpQFromSlope(gain, slope)); + } + + /** + * Sets the filter state for the specified filter type and its parameters. + * + * \param type The type of filter to apply. + * \param f0norm The normalized reference frequency (ref / sample_rate). + * This is the center point for the Shelf, Peaking, and BandPass filter + * types, or the cutoff frequency for the LowPass and HighPass filter + * types. + * \param gain The gain for the reference frequency response. Only used by + * the Shelf and Peaking filter types. + * \param bandwidth Normalized bandwidth of the transition band. + */ + void setParamsFromBandwidth(BiquadType type, Real f0norm, Real gain, Real bandwidth) + { setParams(type, f0norm, gain, rcpQFromBandwidth(f0norm, bandwidth)); } + + void copyParamsFrom(const BiquadFilterR &other) + { + mB0 = other.mB0; + mB1 = other.mB1; + mB2 = other.mB2; + mA1 = other.mA1; + mA2 = other.mA2; + } + + void process(const al::span<const Real> src, Real *dst); + /** Processes this filter and the other at the same time. */ + void dualProcess(BiquadFilterR &other, const al::span<const Real> src, Real *dst); + + /* Rather hacky. It's just here to support "manual" processing. */ + std::pair<Real,Real> getComponents() const noexcept { return {mZ1, mZ2}; } + void setComponents(Real z1, Real z2) noexcept { mZ1 = z1; mZ2 = z2; } + Real processOne(const Real in, Real &z1, Real &z2) const noexcept + { + const Real out{in*mB0 + z1}; + z1 = in*mB1 - out*mA1 + z2; + z2 = in*mB2 - out*mA2; + return out; + } +}; + +template<typename Real> +struct DualBiquadR { + BiquadFilterR<Real> &f0, &f1; + + void process(const al::span<const Real> src, Real *dst) + { f0.dualProcess(f1, src, dst); } +}; + +using BiquadFilter = BiquadFilterR<float>; +using DualBiquad = DualBiquadR<float>; + +#endif /* CORE_FILTERS_BIQUAD_H */ diff --git a/core/filters/nfc.cpp b/core/filters/nfc.cpp new file mode 100644 index 00000000..aa64c613 --- /dev/null +++ b/core/filters/nfc.cpp @@ -0,0 +1,367 @@ + +#include "config.h" + +#include "nfc.h" + +#include <algorithm> + +#include "opthelpers.h" + + +/* Near-field control filters are the basis for handling the near-field effect. + * The near-field effect is a bass-boost present in the directional components + * of a recorded signal, created as a result of the wavefront curvature (itself + * a function of sound distance). Proper reproduction dictates this be + * compensated for using a bass-cut given the playback speaker distance, to + * avoid excessive bass in the playback. + * + * For real-time rendered audio, emulating the near-field effect based on the + * sound source's distance, and subsequently compensating for it at output + * based on the speaker distances, can create a more realistic perception of + * sound distance beyond a simple 1/r attenuation. + * + * These filters do just that. Each one applies a low-shelf filter, created as + * the combination of a bass-boost for a given sound source distance (near- + * field emulation) along with a bass-cut for a given control/speaker distance + * (near-field compensation). + * + * Note that it is necessary to apply a cut along with the boost, since the + * boost alone is unstable in higher-order ambisonics as it causes an infinite + * DC gain (even first-order ambisonics requires there to be no DC offset for + * the boost to work). Consequently, ambisonics requires a control parameter to + * be used to avoid an unstable boost-only filter. NFC-HOA defines this control + * as a reference delay, calculated with: + * + * reference_delay = control_distance / speed_of_sound + * + * This means w0 (for input) or w1 (for output) should be set to: + * + * wN = 1 / (reference_delay * sample_rate) + * + * when dealing with NFC-HOA content. For FOA input content, which does not + * specify a reference_delay variable, w0 should be set to 0 to apply only + * near-field compensation for output. It's important that w1 be a finite, + * positive, non-0 value or else the bass-boost will become unstable again. + * Also, w0 should not be too large compared to w1, to avoid excessively loud + * low frequencies. + */ + +namespace { + +constexpr float B[5][4] = { + { 0.0f }, + { 1.0f }, + { 3.0f, 3.0f }, + { 3.6778f, 6.4595f, 2.3222f }, + { 4.2076f, 11.4877f, 5.7924f, 9.1401f } +}; + +NfcFilter1 NfcFilterCreate1(const float w0, const float w1) noexcept +{ + NfcFilter1 nfc{}; + float b_00, g_0; + float r; + + /* Calculate bass-cut coefficients. */ + r = 0.5f * w1; + b_00 = B[1][0] * r; + g_0 = 1.0f + b_00; + + nfc.base_gain = 1.0f / g_0; + nfc.a1 = 2.0f * b_00 / g_0; + + /* Calculate bass-boost coefficients. */ + r = 0.5f * w0; + b_00 = B[1][0] * r; + g_0 = 1.0f + b_00; + + nfc.gain = nfc.base_gain * g_0; + nfc.b1 = 2.0f * b_00 / g_0; + + return nfc; +} + +void NfcFilterAdjust1(NfcFilter1 *nfc, const float w0) noexcept +{ + const float r{0.5f * w0}; + const float b_00{B[1][0] * r}; + const float g_0{1.0f + b_00}; + + nfc->gain = nfc->base_gain * g_0; + nfc->b1 = 2.0f * b_00 / g_0; +} + + +NfcFilter2 NfcFilterCreate2(const float w0, const float w1) noexcept +{ + NfcFilter2 nfc{}; + float b_10, b_11, g_1; + float r; + + /* Calculate bass-cut coefficients. */ + r = 0.5f * w1; + b_10 = B[2][0] * r; + b_11 = B[2][1] * r * r; + g_1 = 1.0f + b_10 + b_11; + + nfc.base_gain = 1.0f / g_1; + nfc.a1 = (2.0f*b_10 + 4.0f*b_11) / g_1; + nfc.a2 = 4.0f * b_11 / g_1; + + /* Calculate bass-boost coefficients. */ + r = 0.5f * w0; + b_10 = B[2][0] * r; + b_11 = B[2][1] * r * r; + g_1 = 1.0f + b_10 + b_11; + + nfc.gain = nfc.base_gain * g_1; + nfc.b1 = (2.0f*b_10 + 4.0f*b_11) / g_1; + nfc.b2 = 4.0f * b_11 / g_1; + + return nfc; +} + +void NfcFilterAdjust2(NfcFilter2 *nfc, const float w0) noexcept +{ + const float r{0.5f * w0}; + const float b_10{B[2][0] * r}; + const float b_11{B[2][1] * r * r}; + const float g_1{1.0f + b_10 + b_11}; + + nfc->gain = nfc->base_gain * g_1; + nfc->b1 = (2.0f*b_10 + 4.0f*b_11) / g_1; + nfc->b2 = 4.0f * b_11 / g_1; +} + + +NfcFilter3 NfcFilterCreate3(const float w0, const float w1) noexcept +{ + NfcFilter3 nfc{}; + float b_10, b_11, g_1; + float b_00, g_0; + float r; + + /* Calculate bass-cut coefficients. */ + r = 0.5f * w1; + b_10 = B[3][0] * r; + b_11 = B[3][1] * r * r; + b_00 = B[3][2] * r; + g_1 = 1.0f + b_10 + b_11; + g_0 = 1.0f + b_00; + + nfc.base_gain = 1.0f / (g_1 * g_0); + nfc.a1 = (2.0f*b_10 + 4.0f*b_11) / g_1; + nfc.a2 = 4.0f * b_11 / g_1; + nfc.a3 = 2.0f * b_00 / g_0; + + /* Calculate bass-boost coefficients. */ + r = 0.5f * w0; + b_10 = B[3][0] * r; + b_11 = B[3][1] * r * r; + b_00 = B[3][2] * r; + g_1 = 1.0f + b_10 + b_11; + g_0 = 1.0f + b_00; + + nfc.gain = nfc.base_gain * (g_1 * g_0); + nfc.b1 = (2.0f*b_10 + 4.0f*b_11) / g_1; + nfc.b2 = 4.0f * b_11 / g_1; + nfc.b3 = 2.0f * b_00 / g_0; + + return nfc; +} + +void NfcFilterAdjust3(NfcFilter3 *nfc, const float w0) noexcept +{ + const float r{0.5f * w0}; + const float b_10{B[3][0] * r}; + const float b_11{B[3][1] * r * r}; + const float b_00{B[3][2] * r}; + const float g_1{1.0f + b_10 + b_11}; + const float g_0{1.0f + b_00}; + + nfc->gain = nfc->base_gain * (g_1 * g_0); + nfc->b1 = (2.0f*b_10 + 4.0f*b_11) / g_1; + nfc->b2 = 4.0f * b_11 / g_1; + nfc->b3 = 2.0f * b_00 / g_0; +} + + +NfcFilter4 NfcFilterCreate4(const float w0, const float w1) noexcept +{ + NfcFilter4 nfc{}; + float b_10, b_11, g_1; + float b_00, b_01, g_0; + float r; + + /* Calculate bass-cut coefficients. */ + r = 0.5f * w1; + b_10 = B[4][0] * r; + b_11 = B[4][1] * r * r; + b_00 = B[4][2] * r; + b_01 = B[4][3] * r * r; + g_1 = 1.0f + b_10 + b_11; + g_0 = 1.0f + b_00 + b_01; + + nfc.base_gain = 1.0f / (g_1 * g_0); + nfc.a1 = (2.0f*b_10 + 4.0f*b_11) / g_1; + nfc.a2 = 4.0f * b_11 / g_1; + nfc.a3 = (2.0f*b_00 + 4.0f*b_01) / g_0; + nfc.a4 = 4.0f * b_01 / g_0; + + /* Calculate bass-boost coefficients. */ + r = 0.5f * w0; + b_10 = B[4][0] * r; + b_11 = B[4][1] * r * r; + b_00 = B[4][2] * r; + b_01 = B[4][3] * r * r; + g_1 = 1.0f + b_10 + b_11; + g_0 = 1.0f + b_00 + b_01; + + nfc.gain = nfc.base_gain * (g_1 * g_0); + nfc.b1 = (2.0f*b_10 + 4.0f*b_11) / g_1; + nfc.b2 = 4.0f * b_11 / g_1; + nfc.b3 = (2.0f*b_00 + 4.0f*b_01) / g_0; + nfc.b4 = 4.0f * b_01 / g_0; + + return nfc; +} + +void NfcFilterAdjust4(NfcFilter4 *nfc, const float w0) noexcept +{ + const float r{0.5f * w0}; + const float b_10{B[4][0] * r}; + const float b_11{B[4][1] * r * r}; + const float b_00{B[4][2] * r}; + const float b_01{B[4][3] * r * r}; + const float g_1{1.0f + b_10 + b_11}; + const float g_0{1.0f + b_00 + b_01}; + + nfc->gain = nfc->base_gain * (g_1 * g_0); + nfc->b1 = (2.0f*b_10 + 4.0f*b_11) / g_1; + nfc->b2 = 4.0f * b_11 / g_1; + nfc->b3 = (2.0f*b_00 + 4.0f*b_01) / g_0; + nfc->b4 = 4.0f * b_01 / g_0; +} + +} // namespace + +void NfcFilter::init(const float w1) noexcept +{ + first = NfcFilterCreate1(0.0f, w1); + second = NfcFilterCreate2(0.0f, w1); + third = NfcFilterCreate3(0.0f, w1); + fourth = NfcFilterCreate4(0.0f, w1); +} + +void NfcFilter::adjust(const float w0) noexcept +{ + NfcFilterAdjust1(&first, w0); + NfcFilterAdjust2(&second, w0); + NfcFilterAdjust3(&third, w0); + NfcFilterAdjust4(&fourth, w0); +} + + +void NfcFilter::process1(const al::span<const float> src, float *RESTRICT dst) +{ + const float gain{first.gain}; + const float b1{first.b1}; + const float a1{first.a1}; + float z1{first.z[0]}; + auto proc_sample = [gain,b1,a1,&z1](const float in) noexcept -> float + { + const float y{in*gain - a1*z1}; + const float out{y + b1*z1}; + z1 += y; + return out; + }; + std::transform(src.cbegin(), src.cend(), dst, proc_sample); + first.z[0] = z1; +} + +void NfcFilter::process2(const al::span<const float> src, float *RESTRICT dst) +{ + const float gain{second.gain}; + const float b1{second.b1}; + const float b2{second.b2}; + const float a1{second.a1}; + const float a2{second.a2}; + float z1{second.z[0]}; + float z2{second.z[1]}; + auto proc_sample = [gain,b1,b2,a1,a2,&z1,&z2](const float in) noexcept -> float + { + const float y{in*gain - a1*z1 - a2*z2}; + const float out{y + b1*z1 + b2*z2}; + z2 += z1; + z1 += y; + return out; + }; + std::transform(src.cbegin(), src.cend(), dst, proc_sample); + second.z[0] = z1; + second.z[1] = z2; +} + +void NfcFilter::process3(const al::span<const float> src, float *RESTRICT dst) +{ + const float gain{third.gain}; + const float b1{third.b1}; + const float b2{third.b2}; + const float b3{third.b3}; + const float a1{third.a1}; + const float a2{third.a2}; + const float a3{third.a3}; + float z1{third.z[0]}; + float z2{third.z[1]}; + float z3{third.z[2]}; + auto proc_sample = [gain,b1,b2,b3,a1,a2,a3,&z1,&z2,&z3](const float in) noexcept -> float + { + float y{in*gain - a1*z1 - a2*z2}; + float out{y + b1*z1 + b2*z2}; + z2 += z1; + z1 += y; + + y = out - a3*z3; + out = y + b3*z3; + z3 += y; + return out; + }; + std::transform(src.cbegin(), src.cend(), dst, proc_sample); + third.z[0] = z1; + third.z[1] = z2; + third.z[2] = z3; +} + +void NfcFilter::process4(const al::span<const float> src, float *RESTRICT dst) +{ + const float gain{fourth.gain}; + const float b1{fourth.b1}; + const float b2{fourth.b2}; + const float b3{fourth.b3}; + const float b4{fourth.b4}; + const float a1{fourth.a1}; + const float a2{fourth.a2}; + const float a3{fourth.a3}; + const float a4{fourth.a4}; + float z1{fourth.z[0]}; + float z2{fourth.z[1]}; + float z3{fourth.z[2]}; + float z4{fourth.z[3]}; + auto proc_sample = [gain,b1,b2,b3,b4,a1,a2,a3,a4,&z1,&z2,&z3,&z4](const float in) noexcept -> float + { + float y{in*gain - a1*z1 - a2*z2}; + float out{y + b1*z1 + b2*z2}; + z2 += z1; + z1 += y; + + y = out - a3*z3 - a4*z4; + out = y + b3*z3 + b4*z4; + z4 += z3; + z3 += y; + return out; + }; + std::transform(src.cbegin(), src.cend(), dst, proc_sample); + fourth.z[0] = z1; + fourth.z[1] = z2; + fourth.z[2] = z3; + fourth.z[3] = z4; +} diff --git a/core/filters/nfc.h b/core/filters/nfc.h new file mode 100644 index 00000000..33f67a5f --- /dev/null +++ b/core/filters/nfc.h @@ -0,0 +1,63 @@ +#ifndef CORE_FILTERS_NFC_H +#define CORE_FILTERS_NFC_H + +#include <cstddef> + +#include "alspan.h" + + +struct NfcFilter1 { + float base_gain, gain; + float b1, a1; + float z[1]; +}; +struct NfcFilter2 { + float base_gain, gain; + float b1, b2, a1, a2; + float z[2]; +}; +struct NfcFilter3 { + float base_gain, gain; + float b1, b2, b3, a1, a2, a3; + float z[3]; +}; +struct NfcFilter4 { + float base_gain, gain; + float b1, b2, b3, b4, a1, a2, a3, a4; + float z[4]; +}; + +class NfcFilter { + NfcFilter1 first; + NfcFilter2 second; + NfcFilter3 third; + NfcFilter4 fourth; + +public: + /* NOTE: + * w0 = speed_of_sound / (source_distance * sample_rate); + * w1 = speed_of_sound / (control_distance * sample_rate); + * + * Generally speaking, the control distance should be approximately the + * average speaker distance, or based on the reference delay if outputing + * NFC-HOA. It must not be negative, 0, or infinite. The source distance + * should not be too small relative to the control distance. + */ + + void init(const float w1) noexcept; + void adjust(const float w0) noexcept; + + /* Near-field control filter for first-order ambisonic channels (1-3). */ + void process1(const al::span<const float> src, float *RESTRICT dst); + + /* Near-field control filter for second-order ambisonic channels (4-8). */ + void process2(const al::span<const float> src, float *RESTRICT dst); + + /* Near-field control filter for third-order ambisonic channels (9-15). */ + void process3(const al::span<const float> src, float *RESTRICT dst); + + /* Near-field control filter for fourth-order ambisonic channels (16-24). */ + void process4(const al::span<const float> src, float *RESTRICT dst); +}; + +#endif /* CORE_FILTERS_NFC_H */ diff --git a/core/filters/splitter.cpp b/core/filters/splitter.cpp new file mode 100644 index 00000000..983ba36f --- /dev/null +++ b/core/filters/splitter.cpp @@ -0,0 +1,179 @@ + +#include "config.h" + +#include "splitter.h" + +#include <algorithm> +#include <cmath> +#include <limits> + +#include "alnumbers.h" +#include "opthelpers.h" + + +template<typename Real> +void BandSplitterR<Real>::init(Real f0norm) +{ + const Real w{f0norm * (al::numbers::pi_v<Real>*2)}; + const Real cw{std::cos(w)}; + if(cw > std::numeric_limits<float>::epsilon()) + mCoeff = (std::sin(w) - 1.0f) / cw; + else + mCoeff = cw * -0.5f; + + mLpZ1 = 0.0f; + mLpZ2 = 0.0f; + mApZ1 = 0.0f; +} + +template<typename Real> +void BandSplitterR<Real>::process(const al::span<const Real> input, Real *hpout, Real *lpout) +{ + const Real ap_coeff{mCoeff}; + const Real lp_coeff{mCoeff*0.5f + 0.5f}; + Real lp_z1{mLpZ1}; + Real lp_z2{mLpZ2}; + Real ap_z1{mApZ1}; + auto proc_sample = [ap_coeff,lp_coeff,&lp_z1,&lp_z2,&ap_z1,&lpout](const Real in) noexcept -> Real + { + /* Low-pass sample processing. */ + Real d{(in - lp_z1) * lp_coeff}; + Real lp_y{lp_z1 + d}; + lp_z1 = lp_y + d; + + d = (lp_y - lp_z2) * lp_coeff; + lp_y = lp_z2 + d; + lp_z2 = lp_y + d; + + *(lpout++) = lp_y; + + /* All-pass sample processing. */ + Real ap_y{in*ap_coeff + ap_z1}; + ap_z1 = in - ap_y*ap_coeff; + + /* High-pass generated from removing low-passed output. */ + return ap_y - lp_y; + }; + std::transform(input.cbegin(), input.cend(), hpout, proc_sample); + mLpZ1 = lp_z1; + mLpZ2 = lp_z2; + mApZ1 = ap_z1; +} + +template<typename Real> +void BandSplitterR<Real>::processHfScale(const al::span<const Real> input, Real *RESTRICT output, + const Real hfscale) +{ + const Real ap_coeff{mCoeff}; + const Real lp_coeff{mCoeff*0.5f + 0.5f}; + Real lp_z1{mLpZ1}; + Real lp_z2{mLpZ2}; + Real ap_z1{mApZ1}; + auto proc_sample = [hfscale,ap_coeff,lp_coeff,&lp_z1,&lp_z2,&ap_z1](const Real in) noexcept -> Real + { + /* Low-pass sample processing. */ + Real d{(in - lp_z1) * lp_coeff}; + Real lp_y{lp_z1 + d}; + lp_z1 = lp_y + d; + + d = (lp_y - lp_z2) * lp_coeff; + lp_y = lp_z2 + d; + lp_z2 = lp_y + d; + + /* All-pass sample processing. */ + Real ap_y{in*ap_coeff + ap_z1}; + ap_z1 = in - ap_y*ap_coeff; + + /* High-pass generated by removing the low-passed signal, which is then + * scaled and added back to the low-passed signal. + */ + return (ap_y-lp_y)*hfscale + lp_y; + }; + std::transform(input.begin(), input.end(), output, proc_sample); + mLpZ1 = lp_z1; + mLpZ2 = lp_z2; + mApZ1 = ap_z1; +} + +template<typename Real> +void BandSplitterR<Real>::processHfScale(const al::span<Real> samples, const Real hfscale) +{ + const Real ap_coeff{mCoeff}; + const Real lp_coeff{mCoeff*0.5f + 0.5f}; + Real lp_z1{mLpZ1}; + Real lp_z2{mLpZ2}; + Real ap_z1{mApZ1}; + auto proc_sample = [hfscale,ap_coeff,lp_coeff,&lp_z1,&lp_z2,&ap_z1](const Real in) noexcept -> Real + { + /* Low-pass sample processing. */ + Real d{(in - lp_z1) * lp_coeff}; + Real lp_y{lp_z1 + d}; + lp_z1 = lp_y + d; + + d = (lp_y - lp_z2) * lp_coeff; + lp_y = lp_z2 + d; + lp_z2 = lp_y + d; + + /* All-pass sample processing. */ + Real ap_y{in*ap_coeff + ap_z1}; + ap_z1 = in - ap_y*ap_coeff; + + /* High-pass generated by removing the low-passed signal, which is then + * scaled and added back to the low-passed signal. + */ + return (ap_y-lp_y)*hfscale + lp_y; + }; + std::transform(samples.begin(), samples.end(), samples.begin(), proc_sample); + mLpZ1 = lp_z1; + mLpZ2 = lp_z2; + mApZ1 = ap_z1; +} + +template<typename Real> +void BandSplitterR<Real>::processScale(const al::span<Real> samples, const Real hfscale, const Real lfscale) +{ + const Real ap_coeff{mCoeff}; + const Real lp_coeff{mCoeff*0.5f + 0.5f}; + Real lp_z1{mLpZ1}; + Real lp_z2{mLpZ2}; + Real ap_z1{mApZ1}; + auto proc_sample = [hfscale,lfscale,ap_coeff,lp_coeff,&lp_z1,&lp_z2,&ap_z1](const Real in) noexcept -> Real + { + Real d{(in - lp_z1) * lp_coeff}; + Real lp_y{lp_z1 + d}; + lp_z1 = lp_y + d; + + d = (lp_y - lp_z2) * lp_coeff; + lp_y = lp_z2 + d; + lp_z2 = lp_y + d; + + Real ap_y{in*ap_coeff + ap_z1}; + ap_z1 = in - ap_y*ap_coeff; + + /* Apply separate factors to the high and low frequencies. */ + return (ap_y-lp_y)*hfscale + lp_y*lfscale; + }; + std::transform(samples.begin(), samples.end(), samples.begin(), proc_sample); + mLpZ1 = lp_z1; + mLpZ2 = lp_z2; + mApZ1 = ap_z1; +} + +template<typename Real> +void BandSplitterR<Real>::processAllPass(const al::span<Real> samples) +{ + const Real coeff{mCoeff}; + Real z1{mApZ1}; + auto proc_sample = [coeff,&z1](const Real in) noexcept -> Real + { + const Real out{in*coeff + z1}; + z1 = in - out*coeff; + return out; + }; + std::transform(samples.cbegin(), samples.cend(), samples.begin(), proc_sample); + mApZ1 = z1; +} + + +template class BandSplitterR<float>; +template class BandSplitterR<double>; diff --git a/core/filters/splitter.h b/core/filters/splitter.h new file mode 100644 index 00000000..e853eb38 --- /dev/null +++ b/core/filters/splitter.h @@ -0,0 +1,40 @@ +#ifndef CORE_FILTERS_SPLITTER_H +#define CORE_FILTERS_SPLITTER_H + +#include <cstddef> + +#include "alspan.h" + + +/* Band splitter. Splits a signal into two phase-matching frequency bands. */ +template<typename Real> +class BandSplitterR { + Real mCoeff{0.0f}; + Real mLpZ1{0.0f}; + Real mLpZ2{0.0f}; + Real mApZ1{0.0f}; + +public: + BandSplitterR() = default; + BandSplitterR(const BandSplitterR&) = default; + BandSplitterR(Real f0norm) { init(f0norm); } + BandSplitterR& operator=(const BandSplitterR&) = default; + + void init(Real f0norm); + void clear() noexcept { mLpZ1 = mLpZ2 = mApZ1 = 0.0f; } + void process(const al::span<const Real> input, Real *hpout, Real *lpout); + + void processHfScale(const al::span<const Real> input, Real *output, const Real hfscale); + + void processHfScale(const al::span<Real> samples, const Real hfscale); + void processScale(const al::span<Real> samples, const Real hfscale, const Real lfscale); + + /** + * The all-pass portion of the band splitter. Applies the same phase shift + * without splitting or scaling the signal. + */ + void processAllPass(const al::span<Real> samples); +}; +using BandSplitter = BandSplitterR<float>; + +#endif /* CORE_FILTERS_SPLITTER_H */ diff --git a/core/fmt_traits.cpp b/core/fmt_traits.cpp new file mode 100644 index 00000000..054d8766 --- /dev/null +++ b/core/fmt_traits.cpp @@ -0,0 +1,79 @@ + +#include "config.h" + +#include "fmt_traits.h" + + +namespace al { + +const int16_t muLawDecompressionTable[256] = { + -32124,-31100,-30076,-29052,-28028,-27004,-25980,-24956, + -23932,-22908,-21884,-20860,-19836,-18812,-17788,-16764, + -15996,-15484,-14972,-14460,-13948,-13436,-12924,-12412, + -11900,-11388,-10876,-10364, -9852, -9340, -8828, -8316, + -7932, -7676, -7420, -7164, -6908, -6652, -6396, -6140, + -5884, -5628, -5372, -5116, -4860, -4604, -4348, -4092, + -3900, -3772, -3644, -3516, -3388, -3260, -3132, -3004, + -2876, -2748, -2620, -2492, -2364, -2236, -2108, -1980, + -1884, -1820, -1756, -1692, -1628, -1564, -1500, -1436, + -1372, -1308, -1244, -1180, -1116, -1052, -988, -924, + -876, -844, -812, -780, -748, -716, -684, -652, + -620, -588, -556, -524, -492, -460, -428, -396, + -372, -356, -340, -324, -308, -292, -276, -260, + -244, -228, -212, -196, -180, -164, -148, -132, + -120, -112, -104, -96, -88, -80, -72, -64, + -56, -48, -40, -32, -24, -16, -8, 0, + 32124, 31100, 30076, 29052, 28028, 27004, 25980, 24956, + 23932, 22908, 21884, 20860, 19836, 18812, 17788, 16764, + 15996, 15484, 14972, 14460, 13948, 13436, 12924, 12412, + 11900, 11388, 10876, 10364, 9852, 9340, 8828, 8316, + 7932, 7676, 7420, 7164, 6908, 6652, 6396, 6140, + 5884, 5628, 5372, 5116, 4860, 4604, 4348, 4092, + 3900, 3772, 3644, 3516, 3388, 3260, 3132, 3004, + 2876, 2748, 2620, 2492, 2364, 2236, 2108, 1980, + 1884, 1820, 1756, 1692, 1628, 1564, 1500, 1436, + 1372, 1308, 1244, 1180, 1116, 1052, 988, 924, + 876, 844, 812, 780, 748, 716, 684, 652, + 620, 588, 556, 524, 492, 460, 428, 396, + 372, 356, 340, 324, 308, 292, 276, 260, + 244, 228, 212, 196, 180, 164, 148, 132, + 120, 112, 104, 96, 88, 80, 72, 64, + 56, 48, 40, 32, 24, 16, 8, 0 +}; + +const int16_t aLawDecompressionTable[256] = { + -5504, -5248, -6016, -5760, -4480, -4224, -4992, -4736, + -7552, -7296, -8064, -7808, -6528, -6272, -7040, -6784, + -2752, -2624, -3008, -2880, -2240, -2112, -2496, -2368, + -3776, -3648, -4032, -3904, -3264, -3136, -3520, -3392, + -22016,-20992,-24064,-23040,-17920,-16896,-19968,-18944, + -30208,-29184,-32256,-31232,-26112,-25088,-28160,-27136, + -11008,-10496,-12032,-11520, -8960, -8448, -9984, -9472, + -15104,-14592,-16128,-15616,-13056,-12544,-14080,-13568, + -344, -328, -376, -360, -280, -264, -312, -296, + -472, -456, -504, -488, -408, -392, -440, -424, + -88, -72, -120, -104, -24, -8, -56, -40, + -216, -200, -248, -232, -152, -136, -184, -168, + -1376, -1312, -1504, -1440, -1120, -1056, -1248, -1184, + -1888, -1824, -2016, -1952, -1632, -1568, -1760, -1696, + -688, -656, -752, -720, -560, -528, -624, -592, + -944, -912, -1008, -976, -816, -784, -880, -848, + 5504, 5248, 6016, 5760, 4480, 4224, 4992, 4736, + 7552, 7296, 8064, 7808, 6528, 6272, 7040, 6784, + 2752, 2624, 3008, 2880, 2240, 2112, 2496, 2368, + 3776, 3648, 4032, 3904, 3264, 3136, 3520, 3392, + 22016, 20992, 24064, 23040, 17920, 16896, 19968, 18944, + 30208, 29184, 32256, 31232, 26112, 25088, 28160, 27136, + 11008, 10496, 12032, 11520, 8960, 8448, 9984, 9472, + 15104, 14592, 16128, 15616, 13056, 12544, 14080, 13568, + 344, 328, 376, 360, 280, 264, 312, 296, + 472, 456, 504, 488, 408, 392, 440, 424, + 88, 72, 120, 104, 24, 8, 56, 40, + 216, 200, 248, 232, 152, 136, 184, 168, + 1376, 1312, 1504, 1440, 1120, 1056, 1248, 1184, + 1888, 1824, 2016, 1952, 1632, 1568, 1760, 1696, + 688, 656, 752, 720, 560, 528, 624, 592, + 944, 912, 1008, 976, 816, 784, 880, 848 +}; + +} // namespace al diff --git a/core/fmt_traits.h b/core/fmt_traits.h new file mode 100644 index 00000000..f797f836 --- /dev/null +++ b/core/fmt_traits.h @@ -0,0 +1,81 @@ +#ifndef CORE_FMT_TRAITS_H +#define CORE_FMT_TRAITS_H + +#include <stddef.h> +#include <stdint.h> + +#include "albyte.h" +#include "buffer_storage.h" + + +namespace al { + +extern const int16_t muLawDecompressionTable[256]; +extern const int16_t aLawDecompressionTable[256]; + + +template<FmtType T> +struct FmtTypeTraits { }; + +template<> +struct FmtTypeTraits<FmtUByte> { + using Type = uint8_t; + + template<typename OutT> + static constexpr inline OutT to(const Type val) noexcept + { return val*OutT{1.0/128.0} - OutT{1.0}; } +}; +template<> +struct FmtTypeTraits<FmtShort> { + using Type = int16_t; + + template<typename OutT> + static constexpr inline OutT to(const Type val) noexcept { return val*OutT{1.0/32768.0}; } +}; +template<> +struct FmtTypeTraits<FmtFloat> { + using Type = float; + + template<typename OutT> + static constexpr inline OutT to(const Type val) noexcept { return val; } +}; +template<> +struct FmtTypeTraits<FmtDouble> { + using Type = double; + + template<typename OutT> + static constexpr inline OutT to(const Type val) noexcept { return static_cast<OutT>(val); } +}; +template<> +struct FmtTypeTraits<FmtMulaw> { + using Type = uint8_t; + + template<typename OutT> + static constexpr inline OutT to(const Type val) noexcept + { return muLawDecompressionTable[val] * OutT{1.0/32768.0}; } +}; +template<> +struct FmtTypeTraits<FmtAlaw> { + using Type = uint8_t; + + template<typename OutT> + static constexpr inline OutT to(const Type val) noexcept + { return aLawDecompressionTable[val] * OutT{1.0/32768.0}; } +}; + + +template<FmtType SrcType, typename DstT> +inline void LoadSampleArray(DstT *RESTRICT dst, const al::byte *src, const size_t srcstep, + const size_t samples) noexcept +{ + using TypeTraits = FmtTypeTraits<SrcType>; + using SampleType = typename TypeTraits::Type; + + const SampleType *RESTRICT ssrc{reinterpret_cast<const SampleType*>(src)}; + for(size_t i{0u};i < samples;i++) + dst[i] = TypeTraits::template to<DstT>(ssrc[i*srcstep]); +} + +} // namespace al + +#endif /* CORE_FMT_TRAITS_H */ diff --git a/core/fpu_ctrl.cpp b/core/fpu_ctrl.cpp new file mode 100644 index 00000000..0cf0d6e7 --- /dev/null +++ b/core/fpu_ctrl.cpp @@ -0,0 +1,61 @@ + +#include "config.h" + +#include "fpu_ctrl.h" + +#ifdef HAVE_INTRIN_H +#include <intrin.h> +#endif +#ifdef HAVE_SSE_INTRINSICS +#include <emmintrin.h> +#ifndef _MM_DENORMALS_ZERO_MASK +/* Some headers seem to be missing these? */ +#define _MM_DENORMALS_ZERO_MASK 0x0040u +#define _MM_DENORMALS_ZERO_ON 0x0040u +#endif +#endif + +#include "cpu_caps.h" + + +void FPUCtl::enter() noexcept +{ + if(this->in_mode) return; + +#if defined(HAVE_SSE_INTRINSICS) + this->sse_state = _mm_getcsr(); + unsigned int sseState{this->sse_state}; + sseState &= ~(_MM_FLUSH_ZERO_MASK | _MM_DENORMALS_ZERO_MASK); + sseState |= _MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON; + _mm_setcsr(sseState); + +#elif defined(__GNUC__) && defined(HAVE_SSE) + + if((CPUCapFlags&CPU_CAP_SSE)) + { + __asm__ __volatile__("stmxcsr %0" : "=m" (*&this->sse_state)); + unsigned int sseState{this->sse_state}; + sseState |= 0x8000; /* set flush-to-zero */ + if((CPUCapFlags&CPU_CAP_SSE2)) + sseState |= 0x0040; /* set denormals-are-zero */ + __asm__ __volatile__("ldmxcsr %0" : : "m" (*&sseState)); + } +#endif + + this->in_mode = true; +} + +void FPUCtl::leave() noexcept +{ + if(!this->in_mode) return; + +#if defined(HAVE_SSE_INTRINSICS) + _mm_setcsr(this->sse_state); + +#elif defined(__GNUC__) && defined(HAVE_SSE) + + if((CPUCapFlags&CPU_CAP_SSE)) + __asm__ __volatile__("ldmxcsr %0" : : "m" (*&this->sse_state)); +#endif + this->in_mode = false; +} diff --git a/core/fpu_ctrl.h b/core/fpu_ctrl.h new file mode 100644 index 00000000..9554313a --- /dev/null +++ b/core/fpu_ctrl.h @@ -0,0 +1,21 @@ +#ifndef CORE_FPU_CTRL_H +#define CORE_FPU_CTRL_H + +class FPUCtl { +#if defined(HAVE_SSE_INTRINSICS) || (defined(__GNUC__) && defined(HAVE_SSE)) + unsigned int sse_state{}; +#endif + bool in_mode{}; + +public: + FPUCtl() noexcept { enter(); in_mode = true; } + ~FPUCtl() { if(in_mode) leave(); } + + FPUCtl(const FPUCtl&) = delete; + FPUCtl& operator=(const FPUCtl&) = delete; + + void enter() noexcept; + void leave() noexcept; +}; + +#endif /* CORE_FPU_CTRL_H */ diff --git a/core/front_stablizer.h b/core/front_stablizer.h new file mode 100644 index 00000000..6825111a --- /dev/null +++ b/core/front_stablizer.h @@ -0,0 +1,31 @@ +#ifndef CORE_FRONT_STABLIZER_H +#define CORE_FRONT_STABLIZER_H + +#include <array> +#include <memory> + +#include "almalloc.h" +#include "bufferline.h" +#include "filters/splitter.h" + + +struct FrontStablizer { + FrontStablizer(size_t numchans) : ChannelFilters{numchans} { } + + alignas(16) std::array<float,BufferLineSize> MidDirect{}; + alignas(16) std::array<float,BufferLineSize> Side{}; + alignas(16) std::array<float,BufferLineSize> Temp{}; + + BandSplitter MidFilter; + alignas(16) FloatBufferLine MidLF{}; + alignas(16) FloatBufferLine MidHF{}; + + al::FlexArray<BandSplitter,16> ChannelFilters; + + static std::unique_ptr<FrontStablizer> Create(size_t numchans) + { return std::unique_ptr<FrontStablizer>{new(FamCount(numchans)) FrontStablizer{numchans}}; } + + DEF_FAM_NEWDEL(FrontStablizer, ChannelFilters) +}; + +#endif /* CORE_FRONT_STABLIZER_H */ diff --git a/core/helpers.cpp b/core/helpers.cpp new file mode 100644 index 00000000..99cf009c --- /dev/null +++ b/core/helpers.cpp @@ -0,0 +1,569 @@ + +#include "config.h" + +#include "helpers.h" + +#include <algorithm> +#include <cerrno> +#include <cstdarg> +#include <cstdlib> +#include <cstdio> +#include <cstring> +#include <mutex> +#include <limits> +#include <string> +#include <tuple> + +#include "almalloc.h" +#include "alfstream.h" +#include "alnumeric.h" +#include "aloptional.h" +#include "alspan.h" +#include "alstring.h" +#include "logging.h" +#include "strutils.h" +#include "vector.h" + + +/* Mixing thread piority level */ +int RTPrioLevel{1}; + +/* Allow reducing the process's RTTime limit for RTKit. */ +bool AllowRTTimeLimit{true}; + + +#ifdef _WIN32 + +#include <shlobj.h> + +const PathNamePair &GetProcBinary() +{ + static al::optional<PathNamePair> procbin; + if(procbin) return *procbin; + + auto fullpath = al::vector<WCHAR>(256); + DWORD len{GetModuleFileNameW(nullptr, fullpath.data(), static_cast<DWORD>(fullpath.size()))}; + while(len == fullpath.size()) + { + fullpath.resize(fullpath.size() << 1); + len = GetModuleFileNameW(nullptr, fullpath.data(), static_cast<DWORD>(fullpath.size())); + } + if(len == 0) + { + ERR("Failed to get process name: error %lu\n", GetLastError()); + procbin.emplace(); + return *procbin; + } + + fullpath.resize(len); + if(fullpath.back() != 0) + fullpath.push_back(0); + + std::replace(fullpath.begin(), fullpath.end(), '/', '\\'); + auto sep = std::find(fullpath.rbegin()+1, fullpath.rend(), '\\'); + if(sep != fullpath.rend()) + { + *sep = 0; + procbin.emplace(wstr_to_utf8(fullpath.data()), wstr_to_utf8(al::to_address(sep.base()))); + } + else + procbin.emplace(std::string{}, wstr_to_utf8(fullpath.data())); + + TRACE("Got binary: %s, %s\n", procbin->path.c_str(), procbin->fname.c_str()); + return *procbin; +} + +namespace { + +void DirectorySearch(const char *path, const char *ext, al::vector<std::string> *const results) +{ + std::string pathstr{path}; + pathstr += "\\*"; + pathstr += ext; + TRACE("Searching %s\n", pathstr.c_str()); + + std::wstring wpath{utf8_to_wstr(pathstr.c_str())}; + WIN32_FIND_DATAW fdata; + HANDLE hdl{FindFirstFileW(wpath.c_str(), &fdata)}; + if(hdl == INVALID_HANDLE_VALUE) return; + + const auto base = results->size(); + + do { + results->emplace_back(); + std::string &str = results->back(); + str = path; + str += '\\'; + str += wstr_to_utf8(fdata.cFileName); + } while(FindNextFileW(hdl, &fdata)); + FindClose(hdl); + + const al::span<std::string> newlist{results->data()+base, results->size()-base}; + std::sort(newlist.begin(), newlist.end()); + for(const auto &name : newlist) + TRACE(" got %s\n", name.c_str()); +} + +} // namespace + +al::vector<std::string> SearchDataFiles(const char *ext, const char *subdir) +{ + auto is_slash = [](int c) noexcept -> int { return (c == '\\' || c == '/'); }; + + static std::mutex search_lock; + std::lock_guard<std::mutex> _{search_lock}; + + /* If the path is absolute, use it directly. */ + al::vector<std::string> results; + if(isalpha(subdir[0]) && subdir[1] == ':' && is_slash(subdir[2])) + { + std::string path{subdir}; + std::replace(path.begin(), path.end(), '/', '\\'); + DirectorySearch(path.c_str(), ext, &results); + return results; + } + if(subdir[0] == '\\' && subdir[1] == '\\' && subdir[2] == '?' && subdir[3] == '\\') + { + DirectorySearch(subdir, ext, &results); + return results; + } + + std::string path; + + /* Search the app-local directory. */ + if(auto localpath = al::getenv(L"ALSOFT_LOCAL_PATH")) + { + path = wstr_to_utf8(localpath->c_str()); + if(is_slash(path.back())) + path.pop_back(); + } + else if(WCHAR *cwdbuf{_wgetcwd(nullptr, 0)}) + { + path = wstr_to_utf8(cwdbuf); + if(is_slash(path.back())) + path.pop_back(); + free(cwdbuf); + } + else + path = "."; + std::replace(path.begin(), path.end(), '/', '\\'); + DirectorySearch(path.c_str(), ext, &results); + + /* Search the local and global data dirs. */ + static const int ids[2]{ CSIDL_APPDATA, CSIDL_COMMON_APPDATA }; + for(int id : ids) + { + WCHAR buffer[MAX_PATH]; + if(SHGetSpecialFolderPathW(nullptr, buffer, id, FALSE) == FALSE) + continue; + + path = wstr_to_utf8(buffer); + if(!is_slash(path.back())) + path += '\\'; + path += subdir; + std::replace(path.begin(), path.end(), '/', '\\'); + + DirectorySearch(path.c_str(), ext, &results); + } + + return results; +} + +void SetRTPriority(void) +{ + if(RTPrioLevel > 0) + { + if(!SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL)) + ERR("Failed to set priority level for thread\n"); + } +} + +#else + +#include <sys/types.h> +#include <unistd.h> +#include <dirent.h> +#ifdef __FreeBSD__ +#include <sys/sysctl.h> +#endif +#ifdef __HAIKU__ +#include <FindDirectory.h> +#endif +#ifdef HAVE_PROC_PIDPATH +#include <libproc.h> +#endif +#if defined(HAVE_PTHREAD_SETSCHEDPARAM) && !defined(__OpenBSD__) +#include <pthread.h> +#include <sched.h> +#endif +#ifdef HAVE_RTKIT +#include <sys/time.h> +#include <sys/resource.h> + +#include "dbus_wrap.h" +#include "rtkit.h" +#ifndef RLIMIT_RTTIME +#define RLIMIT_RTTIME 15 +#endif +#endif + +const PathNamePair &GetProcBinary() +{ + static al::optional<PathNamePair> procbin; + if(procbin) return *procbin; + + al::vector<char> pathname; +#ifdef __FreeBSD__ + size_t pathlen; + int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 }; + if(sysctl(mib, 4, nullptr, &pathlen, nullptr, 0) == -1) + WARN("Failed to sysctl kern.proc.pathname: %s\n", strerror(errno)); + else + { + pathname.resize(pathlen + 1); + sysctl(mib, 4, pathname.data(), &pathlen, nullptr, 0); + pathname.resize(pathlen); + } +#endif +#ifdef HAVE_PROC_PIDPATH + if(pathname.empty()) + { + char procpath[PROC_PIDPATHINFO_MAXSIZE]{}; + const pid_t pid{getpid()}; + if(proc_pidpath(pid, procpath, sizeof(procpath)) < 1) + ERR("proc_pidpath(%d, ...) failed: %s\n", pid, strerror(errno)); + else + pathname.insert(pathname.end(), procpath, procpath+strlen(procpath)); + } +#endif +#ifdef __HAIKU__ + if(pathname.empty()) + { + char procpath[PATH_MAX]; + if(find_path(B_APP_IMAGE_SYMBOL, B_FIND_PATH_IMAGE_PATH, NULL, procpath, sizeof(procpath)) == B_OK) + pathname.insert(pathname.end(), procpath, procpath+strlen(procpath)); + } +#endif +#ifndef __SWITCH__ + if(pathname.empty()) + { + static const char SelfLinkNames[][32]{ + "/proc/self/exe", + "/proc/self/file", + "/proc/curproc/exe", + "/proc/curproc/file" + }; + + pathname.resize(256); + + const char *selfname{}; + ssize_t len{}; + for(const char *name : SelfLinkNames) + { + selfname = name; + len = readlink(selfname, pathname.data(), pathname.size()); + if(len >= 0 || errno != ENOENT) break; + } + + while(len > 0 && static_cast<size_t>(len) == pathname.size()) + { + pathname.resize(pathname.size() << 1); + len = readlink(selfname, pathname.data(), pathname.size()); + } + if(len <= 0) + { + WARN("Failed to readlink %s: %s\n", selfname, strerror(errno)); + len = 0; + } + + pathname.resize(static_cast<size_t>(len)); + } +#endif + while(!pathname.empty() && pathname.back() == 0) + pathname.pop_back(); + + auto sep = std::find(pathname.crbegin(), pathname.crend(), '/'); + if(sep != pathname.crend()) + procbin.emplace(std::string(pathname.cbegin(), sep.base()-1), + std::string(sep.base(), pathname.cend())); + else + procbin.emplace(std::string{}, std::string(pathname.cbegin(), pathname.cend())); + + TRACE("Got binary: \"%s\", \"%s\"\n", procbin->path.c_str(), procbin->fname.c_str()); + return *procbin; +} + +namespace { + +void DirectorySearch(const char *path, const char *ext, al::vector<std::string> *const results) +{ + TRACE("Searching %s for *%s\n", path, ext); + DIR *dir{opendir(path)}; + if(!dir) return; + + const auto base = results->size(); + const size_t extlen{strlen(ext)}; + + while(struct dirent *dirent{readdir(dir)}) + { + if(strcmp(dirent->d_name, ".") == 0 || strcmp(dirent->d_name, "..") == 0) + continue; + + const size_t len{strlen(dirent->d_name)}; + if(len <= extlen) continue; + if(al::strcasecmp(dirent->d_name+len-extlen, ext) != 0) + continue; + + results->emplace_back(); + std::string &str = results->back(); + str = path; + if(str.back() != '/') + str.push_back('/'); + str += dirent->d_name; + } + closedir(dir); + + const al::span<std::string> newlist{results->data()+base, results->size()-base}; + std::sort(newlist.begin(), newlist.end()); + for(const auto &name : newlist) + TRACE(" got %s\n", name.c_str()); +} + +} // namespace + +al::vector<std::string> SearchDataFiles(const char *ext, const char *subdir) +{ + static std::mutex search_lock; + std::lock_guard<std::mutex> _{search_lock}; + + al::vector<std::string> results; + if(subdir[0] == '/') + { + DirectorySearch(subdir, ext, &results); + return results; + } + + /* Search the app-local directory. */ + if(auto localpath = al::getenv("ALSOFT_LOCAL_PATH")) + DirectorySearch(localpath->c_str(), ext, &results); + else + { + al::vector<char> cwdbuf(256); + while(!getcwd(cwdbuf.data(), cwdbuf.size())) + { + if(errno != ERANGE) + { + cwdbuf.clear(); + break; + } + cwdbuf.resize(cwdbuf.size() << 1); + } + if(cwdbuf.empty()) + DirectorySearch(".", ext, &results); + else + { + DirectorySearch(cwdbuf.data(), ext, &results); + cwdbuf.clear(); + } + } + + // Search local data dir + if(auto datapath = al::getenv("XDG_DATA_HOME")) + { + std::string &path = *datapath; + if(path.back() != '/') + path += '/'; + path += subdir; + DirectorySearch(path.c_str(), ext, &results); + } + else if(auto homepath = al::getenv("HOME")) + { + std::string &path = *homepath; + if(path.back() == '/') + path.pop_back(); + path += "/.local/share/"; + path += subdir; + DirectorySearch(path.c_str(), ext, &results); + } + + // Search global data dirs + std::string datadirs{al::getenv("XDG_DATA_DIRS").value_or("/usr/local/share/:/usr/share/")}; + + size_t curpos{0u}; + while(curpos < datadirs.size()) + { + size_t nextpos{datadirs.find(':', curpos)}; + + std::string path{(nextpos != std::string::npos) ? + datadirs.substr(curpos, nextpos++ - curpos) : datadirs.substr(curpos)}; + curpos = nextpos; + + if(path.empty()) continue; + if(path.back() != '/') + path += '/'; + path += subdir; + + DirectorySearch(path.c_str(), ext, &results); + } + +#ifdef ALSOFT_INSTALL_DATADIR + // Search the installation data directory + { + std::string path{ALSOFT_INSTALL_DATADIR}; + if(!path.empty()) + { + if(path.back() != '/') + path += '/'; + path += subdir; + DirectorySearch(path.c_str(), ext, &results); + } + } +#endif + + return results; +} + +namespace { + +bool SetRTPriorityPthread(int prio) +{ + int err{ENOTSUP}; +#if defined(HAVE_PTHREAD_SETSCHEDPARAM) && !defined(__OpenBSD__) + /* Get the min and max priority for SCHED_RR. Limit the max priority to + * half, for now, to ensure the thread can't take the highest priority and + * go rogue. + */ + int rtmin{sched_get_priority_min(SCHED_RR)}; + int rtmax{sched_get_priority_max(SCHED_RR)}; + rtmax = (rtmax-rtmin)/2 + rtmin; + + struct sched_param param{}; + param.sched_priority = clampi(prio, rtmin, rtmax); +#ifdef SCHED_RESET_ON_FORK + err = pthread_setschedparam(pthread_self(), SCHED_RR|SCHED_RESET_ON_FORK, ¶m); + if(err == EINVAL) +#endif + err = pthread_setschedparam(pthread_self(), SCHED_RR, ¶m); + if(err == 0) return true; + +#else + + std::ignore = prio; +#endif + WARN("pthread_setschedparam failed: %s (%d)\n", std::strerror(err), err); + return false; +} + +bool SetRTPriorityRTKit(int prio) +{ +#ifdef HAVE_RTKIT + if(!HasDBus()) + { + WARN("D-Bus not available\n"); + return false; + } + dbus::Error error; + dbus::ConnectionPtr conn{dbus_bus_get(DBUS_BUS_SYSTEM, &error.get())}; + if(!conn) + { + WARN("D-Bus connection failed with %s: %s\n", error->name, error->message); + return false; + } + + /* Don't stupidly exit if the connection dies while doing this. */ + dbus_connection_set_exit_on_disconnect(conn.get(), false); + + int nicemin{}; + int err{rtkit_get_min_nice_level(conn.get(), &nicemin)}; + if(err == -ENOENT) + { + err = std::abs(err); + ERR("Could not query RTKit: %s (%d)\n", std::strerror(err), err); + return false; + } + int rtmax{rtkit_get_max_realtime_priority(conn.get())}; + TRACE("Maximum real-time priority: %d, minimum niceness: %d\n", rtmax, nicemin); + + auto limit_rttime = [](DBusConnection *c) -> int + { + using ulonglong = unsigned long long; + long long maxrttime{rtkit_get_rttime_usec_max(c)}; + if(maxrttime <= 0) return static_cast<int>(std::abs(maxrttime)); + const ulonglong umaxtime{static_cast<ulonglong>(maxrttime)}; + + struct rlimit rlim{}; + if(getrlimit(RLIMIT_RTTIME, &rlim) != 0) + return errno; + + TRACE("RTTime max: %llu (hard: %llu, soft: %llu)\n", umaxtime, + static_cast<ulonglong>(rlim.rlim_max), static_cast<ulonglong>(rlim.rlim_cur)); + if(rlim.rlim_max > umaxtime) + { + rlim.rlim_max = static_cast<rlim_t>(std::min<ulonglong>(umaxtime, + std::numeric_limits<rlim_t>::max())); + rlim.rlim_cur = std::min(rlim.rlim_cur, rlim.rlim_max); + if(setrlimit(RLIMIT_RTTIME, &rlim) != 0) + return errno; + } + return 0; + }; + if(rtmax > 0) + { + if(AllowRTTimeLimit) + { + err = limit_rttime(conn.get()); + if(err != 0) + WARN("Failed to set RLIMIT_RTTIME for RTKit: %s (%d)\n", + std::strerror(err), err); + } + + /* Limit the maximum real-time priority to half. */ + rtmax = (rtmax+1)/2; + prio = clampi(prio, 1, rtmax); + + TRACE("Making real-time with priority %d (max: %d)\n", prio, rtmax); + err = rtkit_make_realtime(conn.get(), 0, prio); + if(err == 0) return true; + + err = std::abs(err); + WARN("Failed to set real-time priority: %s (%d)\n", std::strerror(err), err); + } + /* Don't try to set the niceness for non-Linux systems. Standard POSIX has + * niceness as a per-process attribute, while the intent here is for the + * audio processing thread only to get a priority boost. Currently only + * Linux is known to have per-thread niceness. + */ +#ifdef __linux__ + if(nicemin < 0) + { + TRACE("Making high priority with niceness %d\n", nicemin); + err = rtkit_make_high_priority(conn.get(), 0, nicemin); + if(err == 0) return true; + + err = std::abs(err); + WARN("Failed to set high priority: %s (%d)\n", std::strerror(err), err); + } +#endif /* __linux__ */ + +#else + + std::ignore = prio; + WARN("D-Bus not supported\n"); +#endif + return false; +} + +} // namespace + +void SetRTPriority() +{ + if(RTPrioLevel <= 0) + return; + + if(SetRTPriorityPthread(RTPrioLevel)) + return; + if(SetRTPriorityRTKit(RTPrioLevel)) + return; +} + +#endif diff --git a/core/helpers.h b/core/helpers.h new file mode 100644 index 00000000..f0bfcf1b --- /dev/null +++ b/core/helpers.h @@ -0,0 +1,18 @@ +#ifndef CORE_HELPERS_H +#define CORE_HELPERS_H + +#include <string> + +#include "vector.h" + + +struct PathNamePair { std::string path, fname; }; +const PathNamePair &GetProcBinary(void); + +extern int RTPrioLevel; +extern bool AllowRTTimeLimit; +void SetRTPriority(void); + +al::vector<std::string> SearchDataFiles(const char *match, const char *subdir); + +#endif /* CORE_HELPERS_H */ diff --git a/core/hrtf.cpp b/core/hrtf.cpp new file mode 100644 index 00000000..d5c7573a --- /dev/null +++ b/core/hrtf.cpp @@ -0,0 +1,1473 @@ + +#include "config.h" + +#include "hrtf.h" + +#include <algorithm> +#include <array> +#include <cassert> +#include <cctype> +#include <cmath> +#include <cstdint> +#include <cstdio> +#include <cstring> +#include <fstream> +#include <iterator> +#include <memory> +#include <mutex> +#include <numeric> +#include <type_traits> +#include <utility> + +#include "albit.h" +#include "albyte.h" +#include "alfstream.h" +#include "almalloc.h" +#include "alnumbers.h" +#include "alnumeric.h" +#include "aloptional.h" +#include "alspan.h" +#include "ambidefs.h" +#include "filters/splitter.h" +#include "helpers.h" +#include "logging.h" +#include "mixer/hrtfdefs.h" +#include "opthelpers.h" +#include "polyphase_resampler.h" +#include "vector.h" + + +namespace { + +struct HrtfEntry { + std::string mDispName; + std::string mFilename; + + /* GCC warns when it tries to inline this. */ + ~HrtfEntry(); +}; +HrtfEntry::~HrtfEntry() = default; + +struct LoadedHrtf { + std::string mFilename; + std::unique_ptr<HrtfStore> mEntry; + + template<typename T, typename U> + LoadedHrtf(T&& name, U&& entry) + : mFilename{std::forward<T>(name)}, mEntry{std::forward<U>(entry)} + { } + LoadedHrtf(LoadedHrtf&&) = default; + /* GCC warns when it tries to inline this. */ + ~LoadedHrtf(); + + LoadedHrtf& operator=(LoadedHrtf&&) = default; +}; +LoadedHrtf::~LoadedHrtf() = default; + + +/* Data set limits must be the same as or more flexible than those defined in + * the makemhr utility. + */ +constexpr uint MinFdCount{1}; +constexpr uint MaxFdCount{16}; + +constexpr uint MinFdDistance{50}; +constexpr uint MaxFdDistance{2500}; + +constexpr uint MinEvCount{5}; +constexpr uint MaxEvCount{181}; + +constexpr uint MinAzCount{1}; +constexpr uint MaxAzCount{255}; + +constexpr uint MaxHrirDelay{HrtfHistoryLength - 1}; + +constexpr uint HrirDelayFracBits{2}; +constexpr uint HrirDelayFracOne{1 << HrirDelayFracBits}; +constexpr uint HrirDelayFracHalf{HrirDelayFracOne >> 1}; + +static_assert(MaxHrirDelay*HrirDelayFracOne < 256, "MAX_HRIR_DELAY or DELAY_FRAC too large"); + +constexpr char magicMarker00[8]{'M','i','n','P','H','R','0','0'}; +constexpr char magicMarker01[8]{'M','i','n','P','H','R','0','1'}; +constexpr char magicMarker02[8]{'M','i','n','P','H','R','0','2'}; +constexpr char magicMarker03[8]{'M','i','n','P','H','R','0','3'}; + +/* First value for pass-through coefficients (remaining are 0), used for omni- + * directional sounds. */ +constexpr auto PassthruCoeff = static_cast<float>(1.0/al::numbers::sqrt2); + +std::mutex LoadedHrtfLock; +al::vector<LoadedHrtf> LoadedHrtfs; + +std::mutex EnumeratedHrtfLock; +al::vector<HrtfEntry> EnumeratedHrtfs; + + +class databuf final : public std::streambuf { + int_type underflow() override + { return traits_type::eof(); } + + pos_type seekoff(off_type offset, std::ios_base::seekdir whence, std::ios_base::openmode mode) override + { + if((mode&std::ios_base::out) || !(mode&std::ios_base::in)) + return traits_type::eof(); + + char_type *cur; + switch(whence) + { + case std::ios_base::beg: + if(offset < 0 || offset > egptr()-eback()) + return traits_type::eof(); + cur = eback() + offset; + break; + + case std::ios_base::cur: + if((offset >= 0 && offset > egptr()-gptr()) || + (offset < 0 && -offset > gptr()-eback())) + return traits_type::eof(); + cur = gptr() + offset; + break; + + case std::ios_base::end: + if(offset > 0 || -offset > egptr()-eback()) + return traits_type::eof(); + cur = egptr() + offset; + break; + + default: + return traits_type::eof(); + } + + setg(eback(), cur, egptr()); + return cur - eback(); + } + + pos_type seekpos(pos_type pos, std::ios_base::openmode mode) override + { + // Simplified version of seekoff + if((mode&std::ios_base::out) || !(mode&std::ios_base::in)) + return traits_type::eof(); + + if(pos < 0 || pos > egptr()-eback()) + return traits_type::eof(); + + setg(eback(), eback() + static_cast<size_t>(pos), egptr()); + return pos; + } + +public: + databuf(const char_type *start_, const char_type *end_) noexcept + { + setg(const_cast<char_type*>(start_), const_cast<char_type*>(start_), + const_cast<char_type*>(end_)); + } +}; + +class idstream final : public std::istream { + databuf mStreamBuf; + +public: + idstream(const char *start_, const char *end_) + : std::istream{nullptr}, mStreamBuf{start_, end_} + { init(&mStreamBuf); } +}; + + +struct IdxBlend { uint idx; float blend; }; +/* Calculate the elevation index given the polar elevation in radians. This + * will return an index between 0 and (evcount - 1). + */ +IdxBlend CalcEvIndex(uint evcount, float ev) +{ + ev = (al::numbers::pi_v<float>*0.5f + ev) * static_cast<float>(evcount-1) * + al::numbers::inv_pi_v<float>; + uint idx{float2uint(ev)}; + + return IdxBlend{minu(idx, evcount-1), ev-static_cast<float>(idx)}; +} + +/* Calculate the azimuth index given the polar azimuth in radians. This will + * return an index between 0 and (azcount - 1). + */ +IdxBlend CalcAzIndex(uint azcount, float az) +{ + az = (al::numbers::pi_v<float>*2.0f + az) * static_cast<float>(azcount) * + (al::numbers::inv_pi_v<float>*0.5f); + uint idx{float2uint(az)}; + + return IdxBlend{idx%azcount, az-static_cast<float>(idx)}; +} + +} // namespace + + +/* Calculates static HRIR coefficients and delays for the given polar elevation + * and azimuth in radians. The coefficients are normalized. + */ +void HrtfStore::getCoeffs(float elevation, float azimuth, float distance, float spread, + HrirArray &coeffs, const al::span<uint,2> delays) +{ + const float dirfact{1.0f - (al::numbers::inv_pi_v<float>/2.0f * spread)}; + + size_t ebase{0}; + auto match_field = [&ebase,distance](const Field &field) noexcept -> bool + { + if(distance >= field.distance) + return true; + ebase += field.evCount; + return false; + }; + auto field = std::find_if(mFields.begin(), mFields.end()-1, match_field); + + /* Calculate the elevation indices. */ + const auto elev0 = CalcEvIndex(field->evCount, elevation); + const size_t elev1_idx{minu(elev0.idx+1, field->evCount-1)}; + const size_t ir0offset{mElev[ebase + elev0.idx].irOffset}; + const size_t ir1offset{mElev[ebase + elev1_idx].irOffset}; + + /* Calculate azimuth indices. */ + const auto az0 = CalcAzIndex(mElev[ebase + elev0.idx].azCount, azimuth); + const auto az1 = CalcAzIndex(mElev[ebase + elev1_idx].azCount, azimuth); + + /* Calculate the HRIR indices to blend. */ + const size_t idx[4]{ + ir0offset + az0.idx, + ir0offset + ((az0.idx+1) % mElev[ebase + elev0.idx].azCount), + ir1offset + az1.idx, + ir1offset + ((az1.idx+1) % mElev[ebase + elev1_idx].azCount) + }; + + /* Calculate bilinear blending weights, attenuated according to the + * directional panning factor. + */ + const float blend[4]{ + (1.0f-elev0.blend) * (1.0f-az0.blend) * dirfact, + (1.0f-elev0.blend) * ( az0.blend) * dirfact, + ( elev0.blend) * (1.0f-az1.blend) * dirfact, + ( elev0.blend) * ( az1.blend) * dirfact + }; + + /* Calculate the blended HRIR delays. */ + float d{mDelays[idx[0]][0]*blend[0] + mDelays[idx[1]][0]*blend[1] + mDelays[idx[2]][0]*blend[2] + + mDelays[idx[3]][0]*blend[3]}; + delays[0] = fastf2u(d * float{1.0f/HrirDelayFracOne}); + d = mDelays[idx[0]][1]*blend[0] + mDelays[idx[1]][1]*blend[1] + mDelays[idx[2]][1]*blend[2] + + mDelays[idx[3]][1]*blend[3]; + delays[1] = fastf2u(d * float{1.0f/HrirDelayFracOne}); + + /* Calculate the blended HRIR coefficients. */ + float *coeffout{al::assume_aligned<16>(coeffs[0].data())}; + coeffout[0] = PassthruCoeff * (1.0f-dirfact); + coeffout[1] = PassthruCoeff * (1.0f-dirfact); + std::fill_n(coeffout+2, size_t{HrirLength-1}*2, 0.0f); + for(size_t c{0};c < 4;c++) + { + const float *srccoeffs{al::assume_aligned<16>(mCoeffs[idx[c]][0].data())}; + const float mult{blend[c]}; + auto blend_coeffs = [mult](const float src, const float coeff) noexcept -> float + { return src*mult + coeff; }; + std::transform(srccoeffs, srccoeffs + HrirLength*2, coeffout, coeffout, blend_coeffs); + } +} + + +std::unique_ptr<DirectHrtfState> DirectHrtfState::Create(size_t num_chans) +{ return std::unique_ptr<DirectHrtfState>{new(FamCount(num_chans)) DirectHrtfState{num_chans}}; } + +void DirectHrtfState::build(const HrtfStore *Hrtf, const uint irSize, const bool perHrirMin, + const al::span<const AngularPoint> AmbiPoints, const float (*AmbiMatrix)[MaxAmbiChannels], + const float XOverFreq, const al::span<const float,MaxAmbiOrder+1> AmbiOrderHFGain) +{ + using double2 = std::array<double,2>; + struct ImpulseResponse { + const ConstHrirSpan hrir; + uint ldelay, rdelay; + }; + + const double xover_norm{double{XOverFreq} / Hrtf->mSampleRate}; + mChannels[0].mSplitter.init(static_cast<float>(xover_norm)); + for(size_t i{0};i < mChannels.size();++i) + { + const size_t order{AmbiIndex::OrderFromChannel()[i]}; + mChannels[i].mSplitter = mChannels[0].mSplitter; + mChannels[i].mHfScale = AmbiOrderHFGain[order]; + } + + uint min_delay{HrtfHistoryLength*HrirDelayFracOne}, max_delay{0}; + al::vector<ImpulseResponse> impres; impres.reserve(AmbiPoints.size()); + auto calc_res = [Hrtf,&max_delay,&min_delay](const AngularPoint &pt) -> ImpulseResponse + { + auto &field = Hrtf->mFields[0]; + const auto elev0 = CalcEvIndex(field.evCount, pt.Elev.value); + const size_t elev1_idx{minu(elev0.idx+1, field.evCount-1)}; + const size_t ir0offset{Hrtf->mElev[elev0.idx].irOffset}; + const size_t ir1offset{Hrtf->mElev[elev1_idx].irOffset}; + + const auto az0 = CalcAzIndex(Hrtf->mElev[elev0.idx].azCount, pt.Azim.value); + const auto az1 = CalcAzIndex(Hrtf->mElev[elev1_idx].azCount, pt.Azim.value); + + const size_t idx[4]{ + ir0offset + az0.idx, + ir0offset + ((az0.idx+1) % Hrtf->mElev[elev0.idx].azCount), + ir1offset + az1.idx, + ir1offset + ((az1.idx+1) % Hrtf->mElev[elev1_idx].azCount) + }; + + /* The largest blend factor serves as the closest HRIR. */ + const size_t irOffset{idx[(elev0.blend >= 0.5f)*2 + (az1.blend >= 0.5f)]}; + ImpulseResponse res{Hrtf->mCoeffs[irOffset], + Hrtf->mDelays[irOffset][0], Hrtf->mDelays[irOffset][1]}; + + min_delay = minu(min_delay, minu(res.ldelay, res.rdelay)); + max_delay = maxu(max_delay, maxu(res.ldelay, res.rdelay)); + + return res; + }; + std::transform(AmbiPoints.begin(), AmbiPoints.end(), std::back_inserter(impres), calc_res); + auto hrir_delay_round = [](const uint d) noexcept -> uint + { return (d+HrirDelayFracHalf) >> HrirDelayFracBits; }; + + TRACE("Min delay: %.2f, max delay: %.2f, FIR length: %u\n", + min_delay/double{HrirDelayFracOne}, max_delay/double{HrirDelayFracOne}, irSize); + + auto tmpres = al::vector<std::array<double2,HrirLength>>(mChannels.size()); + max_delay = 0; + for(size_t c{0u};c < AmbiPoints.size();++c) + { + const ConstHrirSpan hrir{impres[c].hrir}; + const uint base_delay{perHrirMin ? minu(impres[c].ldelay, impres[c].rdelay) : min_delay}; + const uint ldelay{hrir_delay_round(impres[c].ldelay - base_delay)}; + const uint rdelay{hrir_delay_round(impres[c].rdelay - base_delay)}; + max_delay = maxu(max_delay, maxu(impres[c].ldelay, impres[c].rdelay) - base_delay); + + for(size_t i{0u};i < mChannels.size();++i) + { + const double mult{AmbiMatrix[c][i]}; + const size_t numirs{HrirLength - maxz(ldelay, rdelay)}; + size_t lidx{ldelay}, ridx{rdelay}; + for(size_t j{0};j < numirs;++j) + { + tmpres[i][lidx++][0] += hrir[j][0] * mult; + tmpres[i][ridx++][1] += hrir[j][1] * mult; + } + } + } + impres.clear(); + + for(size_t i{0u};i < mChannels.size();++i) + { + auto copy_arr = [](const double2 &in) noexcept -> float2 + { return float2{{static_cast<float>(in[0]), static_cast<float>(in[1])}}; }; + std::transform(tmpres[i].cbegin(), tmpres[i].cend(), mChannels[i].mCoeffs.begin(), + copy_arr); + } + tmpres.clear(); + + const uint max_length{minu(hrir_delay_round(max_delay) + irSize, HrirLength)}; + TRACE("New max delay: %.2f, FIR length: %u\n", max_delay/double{HrirDelayFracOne}, + max_length); + mIrSize = max_length; +} + + +namespace { + +std::unique_ptr<HrtfStore> CreateHrtfStore(uint rate, uint8_t irSize, + const al::span<const HrtfStore::Field> fields, + const al::span<const HrtfStore::Elevation> elevs, const HrirArray *coeffs, + const ubyte2 *delays, const char *filename) +{ + const size_t irCount{size_t{elevs.back().azCount} + elevs.back().irOffset}; + size_t total{sizeof(HrtfStore)}; + total = RoundUp(total, alignof(HrtfStore::Field)); /* Align for field infos */ + total += sizeof(std::declval<HrtfStore&>().mFields[0])*fields.size(); + total = RoundUp(total, alignof(HrtfStore::Elevation)); /* Align for elevation infos */ + total += sizeof(std::declval<HrtfStore&>().mElev[0])*elevs.size(); + total = RoundUp(total, 16); /* Align for coefficients using SIMD */ + total += sizeof(std::declval<HrtfStore&>().mCoeffs[0])*irCount; + total += sizeof(std::declval<HrtfStore&>().mDelays[0])*irCount; + + std::unique_ptr<HrtfStore> Hrtf{}; + if(void *ptr{al_calloc(16, total)}) + { + Hrtf.reset(al::construct_at(static_cast<HrtfStore*>(ptr))); + InitRef(Hrtf->mRef, 1u); + Hrtf->mSampleRate = rate; + Hrtf->mIrSize = irSize; + + /* Set up pointers to storage following the main HRTF struct. */ + char *base = reinterpret_cast<char*>(Hrtf.get()); + size_t offset{sizeof(HrtfStore)}; + + offset = RoundUp(offset, alignof(HrtfStore::Field)); /* Align for field infos */ + auto field_ = reinterpret_cast<HrtfStore::Field*>(base + offset); + offset += sizeof(field_[0])*fields.size(); + + offset = RoundUp(offset, alignof(HrtfStore::Elevation)); /* Align for elevation infos */ + auto elev_ = reinterpret_cast<HrtfStore::Elevation*>(base + offset); + offset += sizeof(elev_[0])*elevs.size(); + + offset = RoundUp(offset, 16); /* Align for coefficients using SIMD */ + auto coeffs_ = reinterpret_cast<HrirArray*>(base + offset); + offset += sizeof(coeffs_[0])*irCount; + + auto delays_ = reinterpret_cast<ubyte2*>(base + offset); + offset += sizeof(delays_[0])*irCount; + + if(offset != total) + throw std::runtime_error{"HrtfStore allocation size mismatch"}; + + /* Copy input data to storage. */ + std::uninitialized_copy(fields.cbegin(), fields.cend(), field_); + std::uninitialized_copy(elevs.cbegin(), elevs.cend(), elev_); + std::uninitialized_copy_n(coeffs, irCount, coeffs_); + std::uninitialized_copy_n(delays, irCount, delays_); + + /* Finally, assign the storage pointers. */ + Hrtf->mFields = al::as_span(field_, fields.size()); + Hrtf->mElev = elev_; + Hrtf->mCoeffs = coeffs_; + Hrtf->mDelays = delays_; + } + else + ERR("Out of memory allocating storage for %s.\n", filename); + + return Hrtf; +} + +void MirrorLeftHrirs(const al::span<const HrtfStore::Elevation> elevs, HrirArray *coeffs, + ubyte2 *delays) +{ + for(const auto &elev : elevs) + { + const ushort evoffset{elev.irOffset}; + const ushort azcount{elev.azCount}; + for(size_t j{0};j < azcount;j++) + { + const size_t lidx{evoffset + j}; + const size_t ridx{evoffset + ((azcount-j) % azcount)}; + + const size_t irSize{coeffs[ridx].size()}; + for(size_t k{0};k < irSize;k++) + coeffs[ridx][k][1] = coeffs[lidx][k][0]; + delays[ridx][1] = delays[lidx][0]; + } + } +} + + +template<size_t num_bits, typename T> +constexpr std::enable_if_t<std::is_signed<T>::value && num_bits < sizeof(T)*8, +T> fixsign(T value) noexcept +{ + constexpr auto signbit = static_cast<T>(1u << (num_bits-1)); + return static_cast<T>((value^signbit) - signbit); +} + +template<size_t num_bits, typename T> +constexpr std::enable_if_t<!std::is_signed<T>::value || num_bits == sizeof(T)*8, +T> fixsign(T value) noexcept +{ return value; } + +template<typename T, size_t num_bits=sizeof(T)*8> +inline std::enable_if_t<al::endian::native == al::endian::little, +T> readle(std::istream &data) +{ + static_assert((num_bits&7) == 0, "num_bits must be a multiple of 8"); + static_assert(num_bits <= sizeof(T)*8, "num_bits is too large for the type"); + + T ret{}; + if(!data.read(reinterpret_cast<char*>(&ret), num_bits/8)) + return static_cast<T>(EOF); + + return fixsign<num_bits>(ret); +} + +template<typename T, size_t num_bits=sizeof(T)*8> +inline std::enable_if_t<al::endian::native == al::endian::big, +T> readle(std::istream &data) +{ + static_assert((num_bits&7) == 0, "num_bits must be a multiple of 8"); + static_assert(num_bits <= sizeof(T)*8, "num_bits is too large for the type"); + + T ret{}; + al::byte b[sizeof(T)]{}; + if(!data.read(reinterpret_cast<char*>(b), num_bits/8)) + return static_cast<T>(EOF); + std::reverse_copy(std::begin(b), std::end(b), reinterpret_cast<al::byte*>(&ret)); + + return fixsign<num_bits>(ret); +} + +template<> +inline uint8_t readle<uint8_t,8>(std::istream &data) +{ return static_cast<uint8_t>(data.get()); } + + +std::unique_ptr<HrtfStore> LoadHrtf00(std::istream &data, const char *filename) +{ + uint rate{readle<uint32_t>(data)}; + ushort irCount{readle<uint16_t>(data)}; + ushort irSize{readle<uint16_t>(data)}; + ubyte evCount{readle<uint8_t>(data)}; + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + + if(irSize < MinIrLength || irSize > HrirLength) + { + ERR("Unsupported HRIR size, irSize=%d (%d to %d)\n", irSize, MinIrLength, HrirLength); + return nullptr; + } + if(evCount < MinEvCount || evCount > MaxEvCount) + { + ERR("Unsupported elevation count: evCount=%d (%d to %d)\n", + evCount, MinEvCount, MaxEvCount); + return nullptr; + } + + auto elevs = al::vector<HrtfStore::Elevation>(evCount); + for(auto &elev : elevs) + elev.irOffset = readle<uint16_t>(data); + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + for(size_t i{1};i < evCount;i++) + { + if(elevs[i].irOffset <= elevs[i-1].irOffset) + { + ERR("Invalid evOffset: evOffset[%zu]=%d (last=%d)\n", i, elevs[i].irOffset, + elevs[i-1].irOffset); + return nullptr; + } + } + if(irCount <= elevs.back().irOffset) + { + ERR("Invalid evOffset: evOffset[%zu]=%d (irCount=%d)\n", + elevs.size()-1, elevs.back().irOffset, irCount); + return nullptr; + } + + for(size_t i{1};i < evCount;i++) + { + elevs[i-1].azCount = static_cast<ushort>(elevs[i].irOffset - elevs[i-1].irOffset); + if(elevs[i-1].azCount < MinAzCount || elevs[i-1].azCount > MaxAzCount) + { + ERR("Unsupported azimuth count: azCount[%zd]=%d (%d to %d)\n", + i-1, elevs[i-1].azCount, MinAzCount, MaxAzCount); + return nullptr; + } + } + elevs.back().azCount = static_cast<ushort>(irCount - elevs.back().irOffset); + if(elevs.back().azCount < MinAzCount || elevs.back().azCount > MaxAzCount) + { + ERR("Unsupported azimuth count: azCount[%zu]=%d (%d to %d)\n", + elevs.size()-1, elevs.back().azCount, MinAzCount, MaxAzCount); + return nullptr; + } + + auto coeffs = al::vector<HrirArray>(irCount, HrirArray{}); + auto delays = al::vector<ubyte2>(irCount); + for(auto &hrir : coeffs) + { + for(auto &val : al::span<float2>{hrir.data(), irSize}) + val[0] = readle<int16_t>(data) / 32768.0f; + } + for(auto &val : delays) + val[0] = readle<uint8_t>(data); + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + for(size_t i{0};i < irCount;i++) + { + if(delays[i][0] > MaxHrirDelay) + { + ERR("Invalid delays[%zd]: %d (%d)\n", i, delays[i][0], MaxHrirDelay); + return nullptr; + } + delays[i][0] <<= HrirDelayFracBits; + } + + /* Mirror the left ear responses to the right ear. */ + MirrorLeftHrirs({elevs.data(), elevs.size()}, coeffs.data(), delays.data()); + + const HrtfStore::Field field[1]{{0.0f, evCount}}; + return CreateHrtfStore(rate, static_cast<uint8_t>(irSize), field, {elevs.data(), elevs.size()}, + coeffs.data(), delays.data(), filename); +} + +std::unique_ptr<HrtfStore> LoadHrtf01(std::istream &data, const char *filename) +{ + uint rate{readle<uint32_t>(data)}; + uint8_t irSize{readle<uint8_t>(data)}; + ubyte evCount{readle<uint8_t>(data)}; + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + + if(irSize < MinIrLength || irSize > HrirLength) + { + ERR("Unsupported HRIR size, irSize=%d (%d to %d)\n", irSize, MinIrLength, HrirLength); + return nullptr; + } + if(evCount < MinEvCount || evCount > MaxEvCount) + { + ERR("Unsupported elevation count: evCount=%d (%d to %d)\n", + evCount, MinEvCount, MaxEvCount); + return nullptr; + } + + auto elevs = al::vector<HrtfStore::Elevation>(evCount); + for(auto &elev : elevs) + elev.azCount = readle<uint8_t>(data); + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + for(size_t i{0};i < evCount;++i) + { + if(elevs[i].azCount < MinAzCount || elevs[i].azCount > MaxAzCount) + { + ERR("Unsupported azimuth count: azCount[%zd]=%d (%d to %d)\n", i, elevs[i].azCount, + MinAzCount, MaxAzCount); + return nullptr; + } + } + + elevs[0].irOffset = 0; + for(size_t i{1};i < evCount;i++) + elevs[i].irOffset = static_cast<ushort>(elevs[i-1].irOffset + elevs[i-1].azCount); + const ushort irCount{static_cast<ushort>(elevs.back().irOffset + elevs.back().azCount)}; + + auto coeffs = al::vector<HrirArray>(irCount, HrirArray{}); + auto delays = al::vector<ubyte2>(irCount); + for(auto &hrir : coeffs) + { + for(auto &val : al::span<float2>{hrir.data(), irSize}) + val[0] = readle<int16_t>(data) / 32768.0f; + } + for(auto &val : delays) + val[0] = readle<uint8_t>(data); + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + for(size_t i{0};i < irCount;i++) + { + if(delays[i][0] > MaxHrirDelay) + { + ERR("Invalid delays[%zd]: %d (%d)\n", i, delays[i][0], MaxHrirDelay); + return nullptr; + } + delays[i][0] <<= HrirDelayFracBits; + } + + /* Mirror the left ear responses to the right ear. */ + MirrorLeftHrirs({elevs.data(), elevs.size()}, coeffs.data(), delays.data()); + + const HrtfStore::Field field[1]{{0.0f, evCount}}; + return CreateHrtfStore(rate, irSize, field, {elevs.data(), elevs.size()}, coeffs.data(), + delays.data(), filename); +} + +std::unique_ptr<HrtfStore> LoadHrtf02(std::istream &data, const char *filename) +{ + constexpr ubyte SampleType_S16{0}; + constexpr ubyte SampleType_S24{1}; + constexpr ubyte ChanType_LeftOnly{0}; + constexpr ubyte ChanType_LeftRight{1}; + + uint rate{readle<uint32_t>(data)}; + ubyte sampleType{readle<uint8_t>(data)}; + ubyte channelType{readle<uint8_t>(data)}; + uint8_t irSize{readle<uint8_t>(data)}; + ubyte fdCount{readle<uint8_t>(data)}; + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + + if(sampleType > SampleType_S24) + { + ERR("Unsupported sample type: %d\n", sampleType); + return nullptr; + } + if(channelType > ChanType_LeftRight) + { + ERR("Unsupported channel type: %d\n", channelType); + return nullptr; + } + + if(irSize < MinIrLength || irSize > HrirLength) + { + ERR("Unsupported HRIR size, irSize=%d (%d to %d)\n", irSize, MinIrLength, HrirLength); + return nullptr; + } + if(fdCount < 1 || fdCount > MaxFdCount) + { + ERR("Unsupported number of field-depths: fdCount=%d (%d to %d)\n", fdCount, MinFdCount, + MaxFdCount); + return nullptr; + } + + auto fields = al::vector<HrtfStore::Field>(fdCount); + auto elevs = al::vector<HrtfStore::Elevation>{}; + for(size_t f{0};f < fdCount;f++) + { + const ushort distance{readle<uint16_t>(data)}; + const ubyte evCount{readle<uint8_t>(data)}; + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + + if(distance < MinFdDistance || distance > MaxFdDistance) + { + ERR("Unsupported field distance[%zu]=%d (%d to %d millimeters)\n", f, distance, + MinFdDistance, MaxFdDistance); + return nullptr; + } + if(evCount < MinEvCount || evCount > MaxEvCount) + { + ERR("Unsupported elevation count: evCount[%zu]=%d (%d to %d)\n", f, evCount, + MinEvCount, MaxEvCount); + return nullptr; + } + + fields[f].distance = distance / 1000.0f; + fields[f].evCount = evCount; + if(f > 0 && fields[f].distance <= fields[f-1].distance) + { + ERR("Field distance[%zu] is not after previous (%f > %f)\n", f, fields[f].distance, + fields[f-1].distance); + return nullptr; + } + + const size_t ebase{elevs.size()}; + elevs.resize(ebase + evCount); + for(auto &elev : al::span<HrtfStore::Elevation>(elevs.data()+ebase, evCount)) + elev.azCount = readle<uint8_t>(data); + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + + for(size_t e{0};e < evCount;e++) + { + if(elevs[ebase+e].azCount < MinAzCount || elevs[ebase+e].azCount > MaxAzCount) + { + ERR("Unsupported azimuth count: azCount[%zu][%zu]=%d (%d to %d)\n", f, e, + elevs[ebase+e].azCount, MinAzCount, MaxAzCount); + return nullptr; + } + } + } + + elevs[0].irOffset = 0; + std::partial_sum(elevs.cbegin(), elevs.cend(), elevs.begin(), + [](const HrtfStore::Elevation &last, const HrtfStore::Elevation &cur) + -> HrtfStore::Elevation + { + return HrtfStore::Elevation{cur.azCount, + static_cast<ushort>(last.azCount + last.irOffset)}; + }); + const auto irTotal = static_cast<ushort>(elevs.back().azCount + elevs.back().irOffset); + + auto coeffs = al::vector<HrirArray>(irTotal, HrirArray{}); + auto delays = al::vector<ubyte2>(irTotal); + if(channelType == ChanType_LeftOnly) + { + if(sampleType == SampleType_S16) + { + for(auto &hrir : coeffs) + { + for(auto &val : al::span<float2>{hrir.data(), irSize}) + val[0] = readle<int16_t>(data) / 32768.0f; + } + } + else if(sampleType == SampleType_S24) + { + for(auto &hrir : coeffs) + { + for(auto &val : al::span<float2>{hrir.data(), irSize}) + val[0] = static_cast<float>(readle<int,24>(data)) / 8388608.0f; + } + } + for(auto &val : delays) + val[0] = readle<uint8_t>(data); + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + for(size_t i{0};i < irTotal;++i) + { + if(delays[i][0] > MaxHrirDelay) + { + ERR("Invalid delays[%zu][0]: %d (%d)\n", i, delays[i][0], MaxHrirDelay); + return nullptr; + } + delays[i][0] <<= HrirDelayFracBits; + } + + /* Mirror the left ear responses to the right ear. */ + MirrorLeftHrirs({elevs.data(), elevs.size()}, coeffs.data(), delays.data()); + } + else if(channelType == ChanType_LeftRight) + { + if(sampleType == SampleType_S16) + { + for(auto &hrir : coeffs) + { + for(auto &val : al::span<float2>{hrir.data(), irSize}) + { + val[0] = readle<int16_t>(data) / 32768.0f; + val[1] = readle<int16_t>(data) / 32768.0f; + } + } + } + else if(sampleType == SampleType_S24) + { + for(auto &hrir : coeffs) + { + for(auto &val : al::span<float2>{hrir.data(), irSize}) + { + val[0] = static_cast<float>(readle<int,24>(data)) / 8388608.0f; + val[1] = static_cast<float>(readle<int,24>(data)) / 8388608.0f; + } + } + } + for(auto &val : delays) + { + val[0] = readle<uint8_t>(data); + val[1] = readle<uint8_t>(data); + } + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + + for(size_t i{0};i < irTotal;++i) + { + if(delays[i][0] > MaxHrirDelay) + { + ERR("Invalid delays[%zu][0]: %d (%d)\n", i, delays[i][0], MaxHrirDelay); + return nullptr; + } + if(delays[i][1] > MaxHrirDelay) + { + ERR("Invalid delays[%zu][1]: %d (%d)\n", i, delays[i][1], MaxHrirDelay); + return nullptr; + } + delays[i][0] <<= HrirDelayFracBits; + delays[i][1] <<= HrirDelayFracBits; + } + } + + if(fdCount > 1) + { + auto fields_ = al::vector<HrtfStore::Field>(fields.size()); + auto elevs_ = al::vector<HrtfStore::Elevation>(elevs.size()); + auto coeffs_ = al::vector<HrirArray>(coeffs.size()); + auto delays_ = al::vector<ubyte2>(delays.size()); + + /* Simple reverse for the per-field elements. */ + std::reverse_copy(fields.cbegin(), fields.cend(), fields_.begin()); + + /* Each field has a group of elevations, which each have an azimuth + * count. Reverse the order of the groups, keeping the relative order + * of per-group azimuth counts. + */ + auto elevs__end = elevs_.end(); + auto copy_azs = [&elevs,&elevs__end](const ptrdiff_t ebase, const HrtfStore::Field &field) + -> ptrdiff_t + { + auto elevs_src = elevs.begin()+ebase; + elevs__end = std::copy_backward(elevs_src, elevs_src+field.evCount, elevs__end); + return ebase + field.evCount; + }; + (void)std::accumulate(fields.cbegin(), fields.cend(), ptrdiff_t{0}, copy_azs); + assert(elevs_.begin() == elevs__end); + + /* Reestablish the IR offset for each elevation index, given the new + * ordering of elevations. + */ + elevs_[0].irOffset = 0; + std::partial_sum(elevs_.cbegin(), elevs_.cend(), elevs_.begin(), + [](const HrtfStore::Elevation &last, const HrtfStore::Elevation &cur) + -> HrtfStore::Elevation + { + return HrtfStore::Elevation{cur.azCount, + static_cast<ushort>(last.azCount + last.irOffset)}; + }); + + /* Reverse the order of each field's group of IRs. */ + auto coeffs_end = coeffs_.end(); + auto delays_end = delays_.end(); + auto copy_irs = [&elevs,&coeffs,&delays,&coeffs_end,&delays_end]( + const ptrdiff_t ebase, const HrtfStore::Field &field) -> ptrdiff_t + { + auto accum_az = [](int count, const HrtfStore::Elevation &elev) noexcept -> int + { return count + elev.azCount; }; + const auto elevs_mid = elevs.cbegin() + ebase; + const auto elevs_end = elevs_mid + field.evCount; + const int abase{std::accumulate(elevs.cbegin(), elevs_mid, 0, accum_az)}; + const int num_azs{std::accumulate(elevs_mid, elevs_end, 0, accum_az)}; + + coeffs_end = std::copy_backward(coeffs.cbegin() + abase, + coeffs.cbegin() + (abase+num_azs), coeffs_end); + delays_end = std::copy_backward(delays.cbegin() + abase, + delays.cbegin() + (abase+num_azs), delays_end); + + return ebase + field.evCount; + }; + (void)std::accumulate(fields.cbegin(), fields.cend(), ptrdiff_t{0}, copy_irs); + assert(coeffs_.begin() == coeffs_end); + assert(delays_.begin() == delays_end); + + fields = std::move(fields_); + elevs = std::move(elevs_); + coeffs = std::move(coeffs_); + delays = std::move(delays_); + } + + return CreateHrtfStore(rate, irSize, {fields.data(), fields.size()}, + {elevs.data(), elevs.size()}, coeffs.data(), delays.data(), filename); +} + +std::unique_ptr<HrtfStore> LoadHrtf03(std::istream &data, const char *filename) +{ + constexpr ubyte ChanType_LeftOnly{0}; + constexpr ubyte ChanType_LeftRight{1}; + + uint rate{readle<uint32_t>(data)}; + ubyte channelType{readle<uint8_t>(data)}; + uint8_t irSize{readle<uint8_t>(data)}; + ubyte fdCount{readle<uint8_t>(data)}; + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + + if(channelType > ChanType_LeftRight) + { + ERR("Unsupported channel type: %d\n", channelType); + return nullptr; + } + + if(irSize < MinIrLength || irSize > HrirLength) + { + ERR("Unsupported HRIR size, irSize=%d (%d to %d)\n", irSize, MinIrLength, HrirLength); + return nullptr; + } + if(fdCount < 1 || fdCount > MaxFdCount) + { + ERR("Unsupported number of field-depths: fdCount=%d (%d to %d)\n", fdCount, MinFdCount, + MaxFdCount); + return nullptr; + } + + auto fields = al::vector<HrtfStore::Field>(fdCount); + auto elevs = al::vector<HrtfStore::Elevation>{}; + for(size_t f{0};f < fdCount;f++) + { + const ushort distance{readle<uint16_t>(data)}; + const ubyte evCount{readle<uint8_t>(data)}; + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + + if(distance < MinFdDistance || distance > MaxFdDistance) + { + ERR("Unsupported field distance[%zu]=%d (%d to %d millimeters)\n", f, distance, + MinFdDistance, MaxFdDistance); + return nullptr; + } + if(evCount < MinEvCount || evCount > MaxEvCount) + { + ERR("Unsupported elevation count: evCount[%zu]=%d (%d to %d)\n", f, evCount, + MinEvCount, MaxEvCount); + return nullptr; + } + + fields[f].distance = distance / 1000.0f; + fields[f].evCount = evCount; + if(f > 0 && fields[f].distance > fields[f-1].distance) + { + ERR("Field distance[%zu] is not before previous (%f <= %f)\n", f, fields[f].distance, + fields[f-1].distance); + return nullptr; + } + + const size_t ebase{elevs.size()}; + elevs.resize(ebase + evCount); + for(auto &elev : al::span<HrtfStore::Elevation>(elevs.data()+ebase, evCount)) + elev.azCount = readle<uint8_t>(data); + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + + for(size_t e{0};e < evCount;e++) + { + if(elevs[ebase+e].azCount < MinAzCount || elevs[ebase+e].azCount > MaxAzCount) + { + ERR("Unsupported azimuth count: azCount[%zu][%zu]=%d (%d to %d)\n", f, e, + elevs[ebase+e].azCount, MinAzCount, MaxAzCount); + return nullptr; + } + } + } + + elevs[0].irOffset = 0; + std::partial_sum(elevs.cbegin(), elevs.cend(), elevs.begin(), + [](const HrtfStore::Elevation &last, const HrtfStore::Elevation &cur) + -> HrtfStore::Elevation + { + return HrtfStore::Elevation{cur.azCount, + static_cast<ushort>(last.azCount + last.irOffset)}; + }); + const auto irTotal = static_cast<ushort>(elevs.back().azCount + elevs.back().irOffset); + + auto coeffs = al::vector<HrirArray>(irTotal, HrirArray{}); + auto delays = al::vector<ubyte2>(irTotal); + if(channelType == ChanType_LeftOnly) + { + for(auto &hrir : coeffs) + { + for(auto &val : al::span<float2>{hrir.data(), irSize}) + val[0] = static_cast<float>(readle<int,24>(data)) / 8388608.0f; + } + for(auto &val : delays) + val[0] = readle<uint8_t>(data); + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + for(size_t i{0};i < irTotal;++i) + { + if(delays[i][0] > MaxHrirDelay<<HrirDelayFracBits) + { + ERR("Invalid delays[%zu][0]: %f (%d)\n", i, + delays[i][0] / float{HrirDelayFracOne}, MaxHrirDelay); + return nullptr; + } + } + + /* Mirror the left ear responses to the right ear. */ + MirrorLeftHrirs({elevs.data(), elevs.size()}, coeffs.data(), delays.data()); + } + else if(channelType == ChanType_LeftRight) + { + for(auto &hrir : coeffs) + { + for(auto &val : al::span<float2>{hrir.data(), irSize}) + { + val[0] = static_cast<float>(readle<int,24>(data)) / 8388608.0f; + val[1] = static_cast<float>(readle<int,24>(data)) / 8388608.0f; + } + } + for(auto &val : delays) + { + val[0] = readle<uint8_t>(data); + val[1] = readle<uint8_t>(data); + } + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + + for(size_t i{0};i < irTotal;++i) + { + if(delays[i][0] > MaxHrirDelay<<HrirDelayFracBits) + { + ERR("Invalid delays[%zu][0]: %f (%d)\n", i, + delays[i][0] / float{HrirDelayFracOne}, MaxHrirDelay); + return nullptr; + } + if(delays[i][1] > MaxHrirDelay<<HrirDelayFracBits) + { + ERR("Invalid delays[%zu][1]: %f (%d)\n", i, + delays[i][1] / float{HrirDelayFracOne}, MaxHrirDelay); + return nullptr; + } + } + } + + return CreateHrtfStore(rate, irSize, {fields.data(), fields.size()}, + {elevs.data(), elevs.size()}, coeffs.data(), delays.data(), filename); +} + + +bool checkName(const std::string &name) +{ + auto match_name = [&name](const HrtfEntry &entry) -> bool { return name == entry.mDispName; }; + auto &enum_names = EnumeratedHrtfs; + return std::find_if(enum_names.cbegin(), enum_names.cend(), match_name) != enum_names.cend(); +} + +void AddFileEntry(const std::string &filename) +{ + /* Check if this file has already been enumerated. */ + auto enum_iter = std::find_if(EnumeratedHrtfs.cbegin(), EnumeratedHrtfs.cend(), + [&filename](const HrtfEntry &entry) -> bool + { return entry.mFilename == filename; }); + if(enum_iter != EnumeratedHrtfs.cend()) + { + TRACE("Skipping duplicate file entry %s\n", filename.c_str()); + return; + } + + /* TODO: Get a human-readable name from the HRTF data (possibly coming in a + * format update). */ + size_t namepos{filename.find_last_of('/')+1}; + if(!namepos) namepos = filename.find_last_of('\\')+1; + + size_t extpos{filename.find_last_of('.')}; + if(extpos <= namepos) extpos = std::string::npos; + + const std::string basename{(extpos == std::string::npos) ? + filename.substr(namepos) : filename.substr(namepos, extpos-namepos)}; + std::string newname{basename}; + int count{1}; + while(checkName(newname)) + { + newname = basename; + newname += " #"; + newname += std::to_string(++count); + } + EnumeratedHrtfs.emplace_back(HrtfEntry{newname, filename}); + const HrtfEntry &entry = EnumeratedHrtfs.back(); + + TRACE("Adding file entry \"%s\"\n", entry.mFilename.c_str()); +} + +/* Unfortunate that we have to duplicate AddFileEntry to take a memory buffer + * for input instead of opening the given filename. + */ +void AddBuiltInEntry(const std::string &dispname, uint residx) +{ + const std::string filename{'!'+std::to_string(residx)+'_'+dispname}; + + auto enum_iter = std::find_if(EnumeratedHrtfs.cbegin(), EnumeratedHrtfs.cend(), + [&filename](const HrtfEntry &entry) -> bool + { return entry.mFilename == filename; }); + if(enum_iter != EnumeratedHrtfs.cend()) + { + TRACE("Skipping duplicate file entry %s\n", filename.c_str()); + return; + } + + /* TODO: Get a human-readable name from the HRTF data (possibly coming in a + * format update). */ + + std::string newname{dispname}; + int count{1}; + while(checkName(newname)) + { + newname = dispname; + newname += " #"; + newname += std::to_string(++count); + } + EnumeratedHrtfs.emplace_back(HrtfEntry{newname, filename}); + const HrtfEntry &entry = EnumeratedHrtfs.back(); + + TRACE("Adding built-in entry \"%s\"\n", entry.mFilename.c_str()); +} + + +#define IDR_DEFAULT_HRTF_MHR 1 + +#ifndef ALSOFT_EMBED_HRTF_DATA + +al::span<const char> GetResource(int /*name*/) +{ return {}; } + +#else + +constexpr unsigned char hrtf_default[]{ +#include "default_hrtf.txt" +}; + +al::span<const char> GetResource(int name) +{ + if(name == IDR_DEFAULT_HRTF_MHR) + return {reinterpret_cast<const char*>(hrtf_default), sizeof(hrtf_default)}; + return {}; +} +#endif + +} // namespace + + +al::vector<std::string> EnumerateHrtf(al::optional<std::string> pathopt) +{ + std::lock_guard<std::mutex> _{EnumeratedHrtfLock}; + EnumeratedHrtfs.clear(); + + bool usedefaults{true}; + if(pathopt) + { + const char *pathlist{pathopt->c_str()}; + while(pathlist && *pathlist) + { + const char *next, *end; + + while(isspace(*pathlist) || *pathlist == ',') + pathlist++; + if(*pathlist == '\0') + continue; + + next = strchr(pathlist, ','); + if(next) + end = next++; + else + { + end = pathlist + strlen(pathlist); + usedefaults = false; + } + + while(end != pathlist && isspace(*(end-1))) + --end; + if(end != pathlist) + { + const std::string pname{pathlist, end}; + for(const auto &fname : SearchDataFiles(".mhr", pname.c_str())) + AddFileEntry(fname); + } + + pathlist = next; + } + } + + if(usedefaults) + { + for(const auto &fname : SearchDataFiles(".mhr", "openal/hrtf")) + AddFileEntry(fname); + + if(!GetResource(IDR_DEFAULT_HRTF_MHR).empty()) + AddBuiltInEntry("Built-In HRTF", IDR_DEFAULT_HRTF_MHR); + } + + al::vector<std::string> list; + list.reserve(EnumeratedHrtfs.size()); + for(auto &entry : EnumeratedHrtfs) + list.emplace_back(entry.mDispName); + + return list; +} + +HrtfStorePtr GetLoadedHrtf(const std::string &name, const uint devrate) +{ + std::lock_guard<std::mutex> _{EnumeratedHrtfLock}; + auto entry_iter = std::find_if(EnumeratedHrtfs.cbegin(), EnumeratedHrtfs.cend(), + [&name](const HrtfEntry &entry) -> bool { return entry.mDispName == name; }); + if(entry_iter == EnumeratedHrtfs.cend()) + return nullptr; + const std::string &fname = entry_iter->mFilename; + + std::lock_guard<std::mutex> __{LoadedHrtfLock}; + auto hrtf_lt_fname = [](LoadedHrtf &hrtf, const std::string &filename) -> bool + { return hrtf.mFilename < filename; }; + auto handle = std::lower_bound(LoadedHrtfs.begin(), LoadedHrtfs.end(), fname, hrtf_lt_fname); + while(handle != LoadedHrtfs.end() && handle->mFilename == fname) + { + HrtfStore *hrtf{handle->mEntry.get()}; + if(hrtf && hrtf->mSampleRate == devrate) + { + hrtf->add_ref(); + return HrtfStorePtr{hrtf}; + } + ++handle; + } + + std::unique_ptr<std::istream> stream; + int residx{}; + char ch{}; + if(sscanf(fname.c_str(), "!%d%c", &residx, &ch) == 2 && ch == '_') + { + TRACE("Loading %s...\n", fname.c_str()); + al::span<const char> res{GetResource(residx)}; + if(res.empty()) + { + ERR("Could not get resource %u, %s\n", residx, name.c_str()); + return nullptr; + } + stream = std::make_unique<idstream>(res.begin(), res.end()); + } + else + { + TRACE("Loading %s...\n", fname.c_str()); + auto fstr = std::make_unique<al::ifstream>(fname.c_str(), std::ios::binary); + if(!fstr->is_open()) + { + ERR("Could not open %s\n", fname.c_str()); + return nullptr; + } + stream = std::move(fstr); + } + + std::unique_ptr<HrtfStore> hrtf; + char magic[sizeof(magicMarker03)]; + stream->read(magic, sizeof(magic)); + if(stream->gcount() < static_cast<std::streamsize>(sizeof(magicMarker03))) + ERR("%s data is too short (%zu bytes)\n", name.c_str(), stream->gcount()); + else if(memcmp(magic, magicMarker03, sizeof(magicMarker03)) == 0) + { + TRACE("Detected data set format v3\n"); + hrtf = LoadHrtf03(*stream, name.c_str()); + } + else if(memcmp(magic, magicMarker02, sizeof(magicMarker02)) == 0) + { + TRACE("Detected data set format v2\n"); + hrtf = LoadHrtf02(*stream, name.c_str()); + } + else if(memcmp(magic, magicMarker01, sizeof(magicMarker01)) == 0) + { + TRACE("Detected data set format v1\n"); + hrtf = LoadHrtf01(*stream, name.c_str()); + } + else if(memcmp(magic, magicMarker00, sizeof(magicMarker00)) == 0) + { + TRACE("Detected data set format v0\n"); + hrtf = LoadHrtf00(*stream, name.c_str()); + } + else + ERR("Invalid header in %s: \"%.8s\"\n", name.c_str(), magic); + stream.reset(); + + if(!hrtf) + { + ERR("Failed to load %s\n", name.c_str()); + return nullptr; + } + + if(hrtf->mSampleRate != devrate) + { + TRACE("Resampling HRTF %s (%uhz -> %uhz)\n", name.c_str(), hrtf->mSampleRate, devrate); + + /* Calculate the last elevation's index and get the total IR count. */ + const size_t lastEv{std::accumulate(hrtf->mFields.begin(), hrtf->mFields.end(), size_t{0}, + [](const size_t curval, const HrtfStore::Field &field) noexcept -> size_t + { return curval + field.evCount; } + ) - 1}; + const size_t irCount{size_t{hrtf->mElev[lastEv].irOffset} + hrtf->mElev[lastEv].azCount}; + + /* Resample all the IRs. */ + std::array<std::array<double,HrirLength>,2> inout; + PPhaseResampler rs; + rs.init(hrtf->mSampleRate, devrate); + for(size_t i{0};i < irCount;++i) + { + HrirArray &coeffs = const_cast<HrirArray&>(hrtf->mCoeffs[i]); + for(size_t j{0};j < 2;++j) + { + std::transform(coeffs.cbegin(), coeffs.cend(), inout[0].begin(), + [j](const float2 &in) noexcept -> double { return in[j]; }); + rs.process(HrirLength, inout[0].data(), HrirLength, inout[1].data()); + for(size_t k{0};k < HrirLength;++k) + coeffs[k][j] = static_cast<float>(inout[1][k]); + } + } + rs = {}; + + /* Scale the delays for the new sample rate. */ + float max_delay{0.0f}; + auto new_delays = al::vector<float2>(irCount); + const float rate_scale{static_cast<float>(devrate)/static_cast<float>(hrtf->mSampleRate)}; + for(size_t i{0};i < irCount;++i) + { + for(size_t j{0};j < 2;++j) + { + const float new_delay{std::round(hrtf->mDelays[i][j] * rate_scale) / + float{HrirDelayFracOne}}; + max_delay = maxf(max_delay, new_delay); + new_delays[i][j] = new_delay; + } + } + + /* If the new delays exceed the max, scale it down to fit (essentially + * shrinking the head radius; not ideal but better than a per-delay + * clamp). + */ + float delay_scale{HrirDelayFracOne}; + if(max_delay > MaxHrirDelay) + { + WARN("Resampled delay exceeds max (%.2f > %d)\n", max_delay, MaxHrirDelay); + delay_scale *= float{MaxHrirDelay} / max_delay; + } + + for(size_t i{0};i < irCount;++i) + { + ubyte2 &delays = const_cast<ubyte2&>(hrtf->mDelays[i]); + for(size_t j{0};j < 2;++j) + delays[j] = static_cast<ubyte>(float2int(new_delays[i][j]*delay_scale + 0.5f)); + } + + /* Scale the IR size for the new sample rate and update the stored + * sample rate. + */ + const float newIrSize{std::round(static_cast<float>(hrtf->mIrSize) * rate_scale)}; + hrtf->mIrSize = static_cast<uint8_t>(minf(HrirLength, newIrSize)); + hrtf->mSampleRate = devrate; + } + + TRACE("Loaded HRTF %s for sample rate %uhz, %u-sample filter\n", name.c_str(), + hrtf->mSampleRate, hrtf->mIrSize); + handle = LoadedHrtfs.emplace(handle, fname, std::move(hrtf)); + + return HrtfStorePtr{handle->mEntry.get()}; +} + + +void HrtfStore::add_ref() +{ + auto ref = IncrementRef(mRef); + TRACE("HrtfStore %p increasing refcount to %u\n", decltype(std::declval<void*>()){this}, ref); +} + +void HrtfStore::dec_ref() +{ + auto ref = DecrementRef(mRef); + TRACE("HrtfStore %p decreasing refcount to %u\n", decltype(std::declval<void*>()){this}, ref); + if(ref == 0) + { + std::lock_guard<std::mutex> _{LoadedHrtfLock}; + + /* Go through and remove all unused HRTFs. */ + auto remove_unused = [](LoadedHrtf &hrtf) -> bool + { + HrtfStore *entry{hrtf.mEntry.get()}; + if(entry && ReadRef(entry->mRef) == 0) + { + TRACE("Unloading unused HRTF %s\n", hrtf.mFilename.data()); + hrtf.mEntry = nullptr; + return true; + } + return false; + }; + auto iter = std::remove_if(LoadedHrtfs.begin(), LoadedHrtfs.end(), remove_unused); + LoadedHrtfs.erase(iter, LoadedHrtfs.end()); + } +} diff --git a/core/hrtf.h b/core/hrtf.h new file mode 100644 index 00000000..eb18682a --- /dev/null +++ b/core/hrtf.h @@ -0,0 +1,89 @@ +#ifndef CORE_HRTF_H +#define CORE_HRTF_H + +#include <array> +#include <cstddef> +#include <memory> +#include <string> + +#include "almalloc.h" +#include "aloptional.h" +#include "alspan.h" +#include "atomic.h" +#include "ambidefs.h" +#include "bufferline.h" +#include "mixer/hrtfdefs.h" +#include "intrusive_ptr.h" +#include "vector.h" + + +struct HrtfStore { + RefCount mRef; + + uint mSampleRate : 24; + uint mIrSize : 8; + + struct Field { + float distance; + ubyte evCount; + }; + /* NOTE: Fields are stored *backwards*. field[0] is the farthest field, and + * field[fdCount-1] is the nearest. + */ + al::span<const Field> mFields; + + struct Elevation { + ushort azCount; + ushort irOffset; + }; + Elevation *mElev; + const HrirArray *mCoeffs; + const ubyte2 *mDelays; + + void getCoeffs(float elevation, float azimuth, float distance, float spread, HrirArray &coeffs, + const al::span<uint,2> delays); + + void add_ref(); + void dec_ref(); + + DEF_PLACE_NEWDEL() +}; +using HrtfStorePtr = al::intrusive_ptr<HrtfStore>; + + +struct EvRadians { float value; }; +struct AzRadians { float value; }; +struct AngularPoint { + EvRadians Elev; + AzRadians Azim; +}; + + +struct DirectHrtfState { + std::array<float,BufferLineSize> mTemp; + + /* HRTF filter state for dry buffer content */ + uint mIrSize{0}; + al::FlexArray<HrtfChannelState> mChannels; + + DirectHrtfState(size_t numchans) : mChannels{numchans} { } + /** + * Produces HRTF filter coefficients for decoding B-Format, given a set of + * virtual speaker positions, a matching decoding matrix, and per-order + * high-frequency gains for the decoder. The calculated impulse responses + * are ordered and scaled according to the matrix input. + */ + void build(const HrtfStore *Hrtf, const uint irSize, const bool perHrirMin, + const al::span<const AngularPoint> AmbiPoints, const float (*AmbiMatrix)[MaxAmbiChannels], + const float XOverFreq, const al::span<const float,MaxAmbiOrder+1> AmbiOrderHFGain); + + static std::unique_ptr<DirectHrtfState> Create(size_t num_chans); + + DEF_FAM_NEWDEL(DirectHrtfState, mChannels) +}; + + +al::vector<std::string> EnumerateHrtf(al::optional<std::string> pathopt); +HrtfStorePtr GetLoadedHrtf(const std::string &name, const uint devrate); + +#endif /* CORE_HRTF_H */ diff --git a/core/logging.cpp b/core/logging.cpp new file mode 100644 index 00000000..34a95e5a --- /dev/null +++ b/core/logging.cpp @@ -0,0 +1,89 @@ + +#include "config.h" + +#include "logging.h" + +#include <cstdarg> +#include <cstdio> +#include <string> + +#include "alspan.h" +#include "strutils.h" +#include "vector.h" + + +#if defined(_WIN32) +#define WIN32_LEAN_AND_MEAN +#include <windows.h> +#elif defined(__ANDROID__) +#include <android/log.h> +#endif + +void al_print(LogLevel level, FILE *logfile, const char *fmt, ...) +{ + /* Kind of ugly since string literals are const char arrays with a size + * that includes the null terminator, which we want to exclude from the + * span. + */ + auto prefix = al::as_span("[ALSOFT] (--) ").first<14>(); + switch(level) + { + case LogLevel::Disable: break; + case LogLevel::Error: prefix = al::as_span("[ALSOFT] (EE) ").first<14>(); break; + case LogLevel::Warning: prefix = al::as_span("[ALSOFT] (WW) ").first<14>(); break; + case LogLevel::Trace: prefix = al::as_span("[ALSOFT] (II) ").first<14>(); break; + } + + al::vector<char> dynmsg; + std::array<char,256> stcmsg{}; + + char *str{stcmsg.data()}; + auto prefend1 = std::copy_n(prefix.begin(), prefix.size(), stcmsg.begin()); + al::span<char> msg{prefend1, stcmsg.end()}; + + std::va_list args, args2; + va_start(args, fmt); + va_copy(args2, args); + const int msglen{std::vsnprintf(msg.data(), msg.size(), fmt, args)}; + if(msglen >= 0 && static_cast<size_t>(msglen) >= msg.size()) UNLIKELY + { + dynmsg.resize(static_cast<size_t>(msglen)+prefix.size() + 1u); + + str = dynmsg.data(); + auto prefend2 = std::copy_n(prefix.begin(), prefix.size(), dynmsg.begin()); + msg = {prefend2, dynmsg.end()}; + + std::vsnprintf(msg.data(), msg.size(), fmt, args2); + } + va_end(args2); + va_end(args); + + if(gLogLevel >= level) + { + fputs(str, logfile); + fflush(logfile); + } +#if defined(_WIN32) && !defined(NDEBUG) + /* OutputDebugStringW has no 'level' property to distinguish between + * informational, warning, or error debug messages. So only print them for + * non-Release builds. + */ + std::wstring wstr{utf8_to_wstr(str)}; + OutputDebugStringW(wstr.c_str()); +#elif defined(__ANDROID__) + auto android_severity = [](LogLevel l) noexcept + { + switch(l) + { + case LogLevel::Trace: return ANDROID_LOG_DEBUG; + case LogLevel::Warning: return ANDROID_LOG_WARN; + case LogLevel::Error: return ANDROID_LOG_ERROR; + /* Should not happen. */ + case LogLevel::Disable: + break; + } + return ANDROID_LOG_ERROR; + }; + __android_log_print(android_severity(level), "openal", "%s", str); +#endif +} diff --git a/core/logging.h b/core/logging.h new file mode 100644 index 00000000..f4b6ab56 --- /dev/null +++ b/core/logging.h @@ -0,0 +1,51 @@ +#ifndef CORE_LOGGING_H +#define CORE_LOGGING_H + +#include <stdio.h> + +#include "opthelpers.h" + + +enum class LogLevel { + Disable, + Error, + Warning, + Trace +}; +extern LogLevel gLogLevel; + +extern FILE *gLogFile; + +#ifdef __USE_MINGW_ANSI_STDIO +[[gnu::format(gnu_printf,3,4)]] +#else +[[gnu::format(printf,3,4)]] +#endif +void al_print(LogLevel level, FILE *logfile, const char *fmt, ...); + +#if (!defined(_WIN32) || defined(NDEBUG)) && !defined(__ANDROID__) +#define TRACE(...) do { \ + if(gLogLevel >= LogLevel::Trace) UNLIKELY \ + al_print(LogLevel::Trace, gLogFile, __VA_ARGS__); \ +} while(0) + +#define WARN(...) do { \ + if(gLogLevel >= LogLevel::Warning) UNLIKELY \ + al_print(LogLevel::Warning, gLogFile, __VA_ARGS__); \ +} while(0) + +#define ERR(...) do { \ + if(gLogLevel >= LogLevel::Error) UNLIKELY \ + al_print(LogLevel::Error, gLogFile, __VA_ARGS__); \ +} while(0) + +#else + +#define TRACE(...) al_print(LogLevel::Trace, gLogFile, __VA_ARGS__) + +#define WARN(...) al_print(LogLevel::Warning, gLogFile, __VA_ARGS__) + +#define ERR(...) al_print(LogLevel::Error, gLogFile, __VA_ARGS__) +#endif + +#endif /* CORE_LOGGING_H */ diff --git a/core/mastering.cpp b/core/mastering.cpp new file mode 100644 index 00000000..97a4008e --- /dev/null +++ b/core/mastering.cpp @@ -0,0 +1,439 @@ + +#include "config.h" + +#include "mastering.h" + +#include <algorithm> +#include <cmath> +#include <cstddef> +#include <functional> +#include <iterator> +#include <limits> +#include <new> + +#include "almalloc.h" +#include "alnumeric.h" +#include "alspan.h" +#include "opthelpers.h" + + +/* These structures assume BufferLineSize is a power of 2. */ +static_assert((BufferLineSize & (BufferLineSize-1)) == 0, "BufferLineSize is not a power of 2"); + +struct SlidingHold { + alignas(16) float mValues[BufferLineSize]; + uint mExpiries[BufferLineSize]; + uint mLowerIndex; + uint mUpperIndex; + uint mLength; +}; + + +namespace { + +using namespace std::placeholders; + +/* This sliding hold follows the input level with an instant attack and a + * fixed duration hold before an instant release to the next highest level. + * It is a sliding window maximum (descending maxima) implementation based on + * Richard Harter's ascending minima algorithm available at: + * + * http://www.richardhartersworld.com/cri/2001/slidingmin.html + */ +float UpdateSlidingHold(SlidingHold *Hold, const uint i, const float in) +{ + static constexpr uint mask{BufferLineSize - 1}; + const uint length{Hold->mLength}; + float (&values)[BufferLineSize] = Hold->mValues; + uint (&expiries)[BufferLineSize] = Hold->mExpiries; + uint lowerIndex{Hold->mLowerIndex}; + uint upperIndex{Hold->mUpperIndex}; + + if(i >= expiries[upperIndex]) + upperIndex = (upperIndex + 1) & mask; + + if(in >= values[upperIndex]) + { + values[upperIndex] = in; + expiries[upperIndex] = i + length; + lowerIndex = upperIndex; + } + else + { + do { + do { + if(!(in >= values[lowerIndex])) + goto found_place; + } while(lowerIndex--); + lowerIndex = mask; + } while(true); + found_place: + + lowerIndex = (lowerIndex + 1) & mask; + values[lowerIndex] = in; + expiries[lowerIndex] = i + length; + } + + Hold->mLowerIndex = lowerIndex; + Hold->mUpperIndex = upperIndex; + + return values[upperIndex]; +} + +void ShiftSlidingHold(SlidingHold *Hold, const uint n) +{ + auto exp_begin = std::begin(Hold->mExpiries) + Hold->mUpperIndex; + auto exp_last = std::begin(Hold->mExpiries) + Hold->mLowerIndex; + if(exp_last-exp_begin < 0) + { + std::transform(exp_begin, std::end(Hold->mExpiries), exp_begin, + [n](uint e){ return e - n; }); + exp_begin = std::begin(Hold->mExpiries); + } + std::transform(exp_begin, exp_last+1, exp_begin, [n](uint e){ return e - n; }); +} + + +/* Multichannel compression is linked via the absolute maximum of all + * channels. + */ +void LinkChannels(Compressor *Comp, const uint SamplesToDo, const FloatBufferLine *OutBuffer) +{ + const size_t numChans{Comp->mNumChans}; + + ASSUME(SamplesToDo > 0); + ASSUME(numChans > 0); + + auto side_begin = std::begin(Comp->mSideChain) + Comp->mLookAhead; + std::fill(side_begin, side_begin+SamplesToDo, 0.0f); + + auto fill_max = [SamplesToDo,side_begin](const FloatBufferLine &input) -> void + { + const float *RESTRICT buffer{al::assume_aligned<16>(input.data())}; + auto max_abs = std::bind(maxf, _1, std::bind(static_cast<float(&)(float)>(std::fabs), _2)); + std::transform(side_begin, side_begin+SamplesToDo, buffer, side_begin, max_abs); + }; + std::for_each(OutBuffer, OutBuffer+numChans, fill_max); +} + +/* This calculates the squared crest factor of the control signal for the + * basic automation of the attack/release times. As suggested by the paper, + * it uses an instantaneous squared peak detector and a squared RMS detector + * both with 200ms release times. + */ +void CrestDetector(Compressor *Comp, const uint SamplesToDo) +{ + const float a_crest{Comp->mCrestCoeff}; + float y2_peak{Comp->mLastPeakSq}; + float y2_rms{Comp->mLastRmsSq}; + + ASSUME(SamplesToDo > 0); + + auto calc_crest = [&y2_rms,&y2_peak,a_crest](const float x_abs) noexcept -> float + { + const float x2{clampf(x_abs * x_abs, 0.000001f, 1000000.0f)}; + + y2_peak = maxf(x2, lerpf(x2, y2_peak, a_crest)); + y2_rms = lerpf(x2, y2_rms, a_crest); + return y2_peak / y2_rms; + }; + auto side_begin = std::begin(Comp->mSideChain) + Comp->mLookAhead; + std::transform(side_begin, side_begin+SamplesToDo, std::begin(Comp->mCrestFactor), calc_crest); + + Comp->mLastPeakSq = y2_peak; + Comp->mLastRmsSq = y2_rms; +} + +/* The side-chain starts with a simple peak detector (based on the absolute + * value of the incoming signal) and performs most of its operations in the + * log domain. + */ +void PeakDetector(Compressor *Comp, const uint SamplesToDo) +{ + ASSUME(SamplesToDo > 0); + + /* Clamp the minimum amplitude to near-zero and convert to logarithm. */ + auto side_begin = std::begin(Comp->mSideChain) + Comp->mLookAhead; + std::transform(side_begin, side_begin+SamplesToDo, side_begin, + [](float s) { return std::log(maxf(0.000001f, s)); }); +} + +/* An optional hold can be used to extend the peak detector so it can more + * solidly detect fast transients. This is best used when operating as a + * limiter. + */ +void PeakHoldDetector(Compressor *Comp, const uint SamplesToDo) +{ + ASSUME(SamplesToDo > 0); + + SlidingHold *hold{Comp->mHold}; + uint i{0}; + auto detect_peak = [&i,hold](const float x_abs) -> float + { + const float x_G{std::log(maxf(0.000001f, x_abs))}; + return UpdateSlidingHold(hold, i++, x_G); + }; + auto side_begin = std::begin(Comp->mSideChain) + Comp->mLookAhead; + std::transform(side_begin, side_begin+SamplesToDo, side_begin, detect_peak); + + ShiftSlidingHold(hold, SamplesToDo); +} + +/* This is the heart of the feed-forward compressor. It operates in the log + * domain (to better match human hearing) and can apply some basic automation + * to knee width, attack/release times, make-up/post gain, and clipping + * reduction. + */ +void GainCompressor(Compressor *Comp, const uint SamplesToDo) +{ + const bool autoKnee{Comp->mAuto.Knee}; + const bool autoAttack{Comp->mAuto.Attack}; + const bool autoRelease{Comp->mAuto.Release}; + const bool autoPostGain{Comp->mAuto.PostGain}; + const bool autoDeclip{Comp->mAuto.Declip}; + const uint lookAhead{Comp->mLookAhead}; + const float threshold{Comp->mThreshold}; + const float slope{Comp->mSlope}; + const float attack{Comp->mAttack}; + const float release{Comp->mRelease}; + const float c_est{Comp->mGainEstimate}; + const float a_adp{Comp->mAdaptCoeff}; + const float *crestFactor{Comp->mCrestFactor}; + float postGain{Comp->mPostGain}; + float knee{Comp->mKnee}; + float t_att{attack}; + float t_rel{release - attack}; + float a_att{std::exp(-1.0f / t_att)}; + float a_rel{std::exp(-1.0f / t_rel)}; + float y_1{Comp->mLastRelease}; + float y_L{Comp->mLastAttack}; + float c_dev{Comp->mLastGainDev}; + + ASSUME(SamplesToDo > 0); + + for(float &sideChain : al::span<float>{Comp->mSideChain, SamplesToDo}) + { + if(autoKnee) + knee = maxf(0.0f, 2.5f * (c_dev + c_est)); + const float knee_h{0.5f * knee}; + + /* This is the gain computer. It applies a static compression curve + * to the control signal. + */ + const float x_over{std::addressof(sideChain)[lookAhead] - threshold}; + const float y_G{ + (x_over <= -knee_h) ? 0.0f : + (std::fabs(x_over) < knee_h) ? (x_over + knee_h) * (x_over + knee_h) / (2.0f * knee) : + x_over}; + + const float y2_crest{*(crestFactor++)}; + if(autoAttack) + { + t_att = 2.0f*attack/y2_crest; + a_att = std::exp(-1.0f / t_att); + } + if(autoRelease) + { + t_rel = 2.0f*release/y2_crest - t_att; + a_rel = std::exp(-1.0f / t_rel); + } + + /* Gain smoothing (ballistics) is done via a smooth decoupled peak + * detector. The attack time is subtracted from the release time + * above to compensate for the chained operating mode. + */ + const float x_L{-slope * y_G}; + y_1 = maxf(x_L, lerpf(x_L, y_1, a_rel)); + y_L = lerpf(y_1, y_L, a_att); + + /* Knee width and make-up gain automation make use of a smoothed + * measurement of deviation between the control signal and estimate. + * The estimate is also used to bias the measurement to hot-start its + * average. + */ + c_dev = lerpf(-(y_L+c_est), c_dev, a_adp); + + if(autoPostGain) + { + /* Clipping reduction is only viable when make-up gain is being + * automated. It modifies the deviation to further attenuate the + * control signal when clipping is detected. The adaptation time + * is sufficiently long enough to suppress further clipping at the + * same output level. + */ + if(autoDeclip) + c_dev = maxf(c_dev, sideChain - y_L - threshold - c_est); + + postGain = -(c_dev + c_est); + } + + sideChain = std::exp(postGain - y_L); + } + + Comp->mLastRelease = y_1; + Comp->mLastAttack = y_L; + Comp->mLastGainDev = c_dev; +} + +/* Combined with the hold time, a look-ahead delay can improve handling of + * fast transients by allowing the envelope time to converge prior to + * reaching the offending impulse. This is best used when operating as a + * limiter. + */ +void SignalDelay(Compressor *Comp, const uint SamplesToDo, FloatBufferLine *OutBuffer) +{ + const size_t numChans{Comp->mNumChans}; + const uint lookAhead{Comp->mLookAhead}; + + ASSUME(SamplesToDo > 0); + ASSUME(numChans > 0); + ASSUME(lookAhead > 0); + + for(size_t c{0};c < numChans;c++) + { + float *inout{al::assume_aligned<16>(OutBuffer[c].data())}; + float *delaybuf{al::assume_aligned<16>(Comp->mDelay[c].data())}; + + auto inout_end = inout + SamplesToDo; + if(SamplesToDo >= lookAhead) LIKELY + { + auto delay_end = std::rotate(inout, inout_end - lookAhead, inout_end); + std::swap_ranges(inout, delay_end, delaybuf); + } + else + { + auto delay_start = std::swap_ranges(inout, inout_end, delaybuf); + std::rotate(delaybuf, delay_start, delaybuf + lookAhead); + } + } +} + +} // namespace + + +std::unique_ptr<Compressor> Compressor::Create(const size_t NumChans, const float SampleRate, + const bool AutoKnee, const bool AutoAttack, const bool AutoRelease, const bool AutoPostGain, + const bool AutoDeclip, const float LookAheadTime, const float HoldTime, const float PreGainDb, + const float PostGainDb, const float ThresholdDb, const float Ratio, const float KneeDb, + const float AttackTime, const float ReleaseTime) +{ + const auto lookAhead = static_cast<uint>( + clampf(std::round(LookAheadTime*SampleRate), 0.0f, BufferLineSize-1)); + const auto hold = static_cast<uint>( + clampf(std::round(HoldTime*SampleRate), 0.0f, BufferLineSize-1)); + + size_t size{sizeof(Compressor)}; + if(lookAhead > 0) + { + size += sizeof(*Compressor::mDelay) * NumChans; + /* The sliding hold implementation doesn't handle a length of 1. A 1- + * sample hold is useless anyway, it would only ever give back what was + * just given to it. + */ + if(hold > 1) + size += sizeof(*Compressor::mHold); + } + + auto Comp = CompressorPtr{al::construct_at(static_cast<Compressor*>(al_calloc(16, size)))}; + Comp->mNumChans = NumChans; + Comp->mAuto.Knee = AutoKnee; + Comp->mAuto.Attack = AutoAttack; + Comp->mAuto.Release = AutoRelease; + Comp->mAuto.PostGain = AutoPostGain; + Comp->mAuto.Declip = AutoPostGain && AutoDeclip; + Comp->mLookAhead = lookAhead; + Comp->mPreGain = std::pow(10.0f, PreGainDb / 20.0f); + Comp->mPostGain = PostGainDb * std::log(10.0f) / 20.0f; + Comp->mThreshold = ThresholdDb * std::log(10.0f) / 20.0f; + Comp->mSlope = 1.0f / maxf(1.0f, Ratio) - 1.0f; + Comp->mKnee = maxf(0.0f, KneeDb * std::log(10.0f) / 20.0f); + Comp->mAttack = maxf(1.0f, AttackTime * SampleRate); + Comp->mRelease = maxf(1.0f, ReleaseTime * SampleRate); + + /* Knee width automation actually treats the compressor as a limiter. By + * varying the knee width, it can effectively be seen as applying + * compression over a wide range of ratios. + */ + if(AutoKnee) + Comp->mSlope = -1.0f; + + if(lookAhead > 0) + { + if(hold > 1) + { + Comp->mHold = al::construct_at(reinterpret_cast<SlidingHold*>(Comp.get() + 1)); + Comp->mHold->mValues[0] = -std::numeric_limits<float>::infinity(); + Comp->mHold->mExpiries[0] = hold; + Comp->mHold->mLength = hold; + Comp->mDelay = reinterpret_cast<FloatBufferLine*>(Comp->mHold + 1); + } + else + Comp->mDelay = reinterpret_cast<FloatBufferLine*>(Comp.get() + 1); + std::uninitialized_fill_n(Comp->mDelay, NumChans, FloatBufferLine{}); + } + + Comp->mCrestCoeff = std::exp(-1.0f / (0.200f * SampleRate)); // 200ms + Comp->mGainEstimate = Comp->mThreshold * -0.5f * Comp->mSlope; + Comp->mAdaptCoeff = std::exp(-1.0f / (2.0f * SampleRate)); // 2s + + return Comp; +} + +Compressor::~Compressor() +{ + if(mHold) + al::destroy_at(mHold); + mHold = nullptr; + if(mDelay) + al::destroy_n(mDelay, mNumChans); + mDelay = nullptr; +} + + +void Compressor::process(const uint SamplesToDo, FloatBufferLine *OutBuffer) +{ + const size_t numChans{mNumChans}; + + ASSUME(SamplesToDo > 0); + ASSUME(numChans > 0); + + const float preGain{mPreGain}; + if(preGain != 1.0f) + { + auto apply_gain = [SamplesToDo,preGain](FloatBufferLine &input) noexcept -> void + { + float *buffer{al::assume_aligned<16>(input.data())}; + std::transform(buffer, buffer+SamplesToDo, buffer, + [preGain](float s) { return s * preGain; }); + }; + std::for_each(OutBuffer, OutBuffer+numChans, apply_gain); + } + + LinkChannels(this, SamplesToDo, OutBuffer); + + if(mAuto.Attack || mAuto.Release) + CrestDetector(this, SamplesToDo); + + if(mHold) + PeakHoldDetector(this, SamplesToDo); + else + PeakDetector(this, SamplesToDo); + + GainCompressor(this, SamplesToDo); + + if(mDelay) + SignalDelay(this, SamplesToDo, OutBuffer); + + const float (&sideChain)[BufferLineSize*2] = mSideChain; + auto apply_comp = [SamplesToDo,&sideChain](FloatBufferLine &input) noexcept -> void + { + float *buffer{al::assume_aligned<16>(input.data())}; + const float *gains{al::assume_aligned<16>(&sideChain[0])}; + std::transform(gains, gains+SamplesToDo, buffer, buffer, + [](float g, float s) { return g * s; }); + }; + std::for_each(OutBuffer, OutBuffer+numChans, apply_comp); + + auto side_begin = std::begin(mSideChain) + SamplesToDo; + std::copy(side_begin, side_begin+mLookAhead, std::begin(mSideChain)); +} diff --git a/core/mastering.h b/core/mastering.h new file mode 100644 index 00000000..1a36937c --- /dev/null +++ b/core/mastering.h @@ -0,0 +1,105 @@ +#ifndef CORE_MASTERING_H +#define CORE_MASTERING_H + +#include <memory> + +#include "almalloc.h" +#include "bufferline.h" + +struct SlidingHold; + +using uint = unsigned int; + + +/* General topology and basic automation was based on the following paper: + * + * D. Giannoulis, M. Massberg and J. D. Reiss, + * "Parameter Automation in a Dynamic Range Compressor," + * Journal of the Audio Engineering Society, v61 (10), Oct. 2013 + * + * Available (along with supplemental reading) at: + * + * http://c4dm.eecs.qmul.ac.uk/audioengineering/compressors/ + */ +struct Compressor { + size_t mNumChans{0u}; + + struct { + bool Knee : 1; + bool Attack : 1; + bool Release : 1; + bool PostGain : 1; + bool Declip : 1; + } mAuto{}; + + uint mLookAhead{0}; + + float mPreGain{0.0f}; + float mPostGain{0.0f}; + + float mThreshold{0.0f}; + float mSlope{0.0f}; + float mKnee{0.0f}; + + float mAttack{0.0f}; + float mRelease{0.0f}; + + alignas(16) float mSideChain[2*BufferLineSize]{}; + alignas(16) float mCrestFactor[BufferLineSize]{}; + + SlidingHold *mHold{nullptr}; + FloatBufferLine *mDelay{nullptr}; + + float mCrestCoeff{0.0f}; + float mGainEstimate{0.0f}; + float mAdaptCoeff{0.0f}; + + float mLastPeakSq{0.0f}; + float mLastRmsSq{0.0f}; + float mLastRelease{0.0f}; + float mLastAttack{0.0f}; + float mLastGainDev{0.0f}; + + + ~Compressor(); + void process(const uint SamplesToDo, FloatBufferLine *OutBuffer); + int getLookAhead() const noexcept { return static_cast<int>(mLookAhead); } + + DEF_PLACE_NEWDEL() + + /** + * The compressor is initialized with the following settings: + * + * \param NumChans Number of channels to process. + * \param SampleRate Sample rate to process. + * \param AutoKnee Whether to automate the knee width parameter. + * \param AutoAttack Whether to automate the attack time parameter. + * \param AutoRelease Whether to automate the release time parameter. + * \param AutoPostGain Whether to automate the make-up (post) gain + * parameter. + * \param AutoDeclip Whether to automate clipping reduction. Ignored + * when not automating make-up gain. + * \param LookAheadTime Look-ahead time (in seconds). + * \param HoldTime Peak hold-time (in seconds). + * \param PreGainDb Gain applied before detection (in dB). + * \param PostGainDb Make-up gain applied after compression (in dB). + * \param ThresholdDb Triggering threshold (in dB). + * \param Ratio Compression ratio (x:1). Set to INFINIFTY for true + * limiting. Ignored when automating knee width. + * \param KneeDb Knee width (in dB). Ignored when automating knee + * width. + * \param AttackTime Attack time (in seconds). Acts as a maximum when + * automating attack time. + * \param ReleaseTime Release time (in seconds). Acts as a maximum when + * automating release time. + */ + static std::unique_ptr<Compressor> Create(const size_t NumChans, const float SampleRate, + const bool AutoKnee, const bool AutoAttack, const bool AutoRelease, + const bool AutoPostGain, const bool AutoDeclip, const float LookAheadTime, + const float HoldTime, const float PreGainDb, const float PostGainDb, + const float ThresholdDb, const float Ratio, const float KneeDb, const float AttackTime, + const float ReleaseTime); +}; +using CompressorPtr = std::unique_ptr<Compressor>; + +#endif /* CORE_MASTERING_H */ diff --git a/core/mixer.cpp b/core/mixer.cpp new file mode 100644 index 00000000..066c57bd --- /dev/null +++ b/core/mixer.cpp @@ -0,0 +1,95 @@ + +#include "config.h" + +#include "mixer.h" + +#include <cmath> + +#include "alnumbers.h" +#include "devformat.h" +#include "device.h" +#include "mixer/defs.h" + +struct CTag; + + +MixerOutFunc MixSamplesOut{Mix_<CTag>}; +MixerOneFunc MixSamplesOne{Mix_<CTag>}; + + +std::array<float,MaxAmbiChannels> CalcAmbiCoeffs(const float y, const float z, const float x, + const float spread) +{ + std::array<float,MaxAmbiChannels> coeffs{CalcAmbiCoeffs(y, z, x)}; + + if(spread > 0.0f) + { + /* Implement the spread by using a spherical source that subtends the + * angle spread. See: + * http://www.ppsloan.org/publications/StupidSH36.pdf - Appendix A3 + * + * When adjusted for N3D normalization instead of SN3D, these + * calculations are: + * + * ZH0 = -sqrt(pi) * (-1+ca); + * ZH1 = 0.5*sqrt(pi) * sa*sa; + * ZH2 = -0.5*sqrt(pi) * ca*(-1+ca)*(ca+1); + * ZH3 = -0.125*sqrt(pi) * (-1+ca)*(ca+1)*(5*ca*ca - 1); + * ZH4 = -0.125*sqrt(pi) * ca*(-1+ca)*(ca+1)*(7*ca*ca - 3); + * ZH5 = -0.0625*sqrt(pi) * (-1+ca)*(ca+1)*(21*ca*ca*ca*ca - 14*ca*ca + 1); + * + * The gain of the source is compensated for size, so that the + * loudness doesn't depend on the spread. Thus: + * + * ZH0 = 1.0f; + * ZH1 = 0.5f * (ca+1.0f); + * ZH2 = 0.5f * (ca+1.0f)*ca; + * ZH3 = 0.125f * (ca+1.0f)*(5.0f*ca*ca - 1.0f); + * ZH4 = 0.125f * (ca+1.0f)*(7.0f*ca*ca - 3.0f)*ca; + * ZH5 = 0.0625f * (ca+1.0f)*(21.0f*ca*ca*ca*ca - 14.0f*ca*ca + 1.0f); + */ + const float ca{std::cos(spread * 0.5f)}; + /* Increase the source volume by up to +3dB for a full spread. */ + const float scale{std::sqrt(1.0f + al::numbers::inv_pi_v<float>/2.0f*spread)}; + + const float ZH0_norm{scale}; + const float ZH1_norm{scale * 0.5f * (ca+1.f)}; + const float ZH2_norm{scale * 0.5f * (ca+1.f)*ca}; + const float ZH3_norm{scale * 0.125f * (ca+1.f)*(5.f*ca*ca-1.f)}; + + /* Zeroth-order */ + coeffs[0] *= ZH0_norm; + /* First-order */ + coeffs[1] *= ZH1_norm; + coeffs[2] *= ZH1_norm; + coeffs[3] *= ZH1_norm; + /* Second-order */ + coeffs[4] *= ZH2_norm; + coeffs[5] *= ZH2_norm; + coeffs[6] *= ZH2_norm; + coeffs[7] *= ZH2_norm; + coeffs[8] *= ZH2_norm; + /* Third-order */ + coeffs[9] *= ZH3_norm; + coeffs[10] *= ZH3_norm; + coeffs[11] *= ZH3_norm; + coeffs[12] *= ZH3_norm; + coeffs[13] *= ZH3_norm; + coeffs[14] *= ZH3_norm; + coeffs[15] *= ZH3_norm; + } + + return coeffs; +} + +void ComputePanGains(const MixParams *mix, const float*RESTRICT coeffs, const float ingain, + const al::span<float,MaxAmbiChannels> gains) +{ + auto ambimap = mix->AmbiMap.cbegin(); + + auto iter = std::transform(ambimap, ambimap+mix->Buffer.size(), gains.begin(), + [coeffs,ingain](const BFChannelConfig &chanmap) noexcept -> float + { return chanmap.Scale * coeffs[chanmap.Index] * ingain; } + ); + std::fill(iter, gains.end(), 0.0f); +} diff --git a/core/mixer.h b/core/mixer.h new file mode 100644 index 00000000..aa7597bb --- /dev/null +++ b/core/mixer.h @@ -0,0 +1,109 @@ +#ifndef CORE_MIXER_H +#define CORE_MIXER_H + +#include <array> +#include <cmath> +#include <stddef.h> +#include <type_traits> + +#include "alspan.h" +#include "ambidefs.h" +#include "bufferline.h" +#include "devformat.h" + +struct MixParams; + +/* Mixer functions that handle one input and multiple output channels. */ +using MixerOutFunc = void(*)(const al::span<const float> InSamples, + const al::span<FloatBufferLine> OutBuffer, float *CurrentGains, const float *TargetGains, + const size_t Counter, const size_t OutPos); + +extern MixerOutFunc MixSamplesOut; +inline void MixSamples(const al::span<const float> InSamples, + const al::span<FloatBufferLine> OutBuffer, float *CurrentGains, const float *TargetGains, + const size_t Counter, const size_t OutPos) +{ MixSamplesOut(InSamples, OutBuffer, CurrentGains, TargetGains, Counter, OutPos); } + +/* Mixer functions that handle one input and one output channel. */ +using MixerOneFunc = void(*)(const al::span<const float> InSamples, float *OutBuffer, + float &CurrentGain, const float TargetGain, const size_t Counter); + +extern MixerOneFunc MixSamplesOne; +inline void MixSamples(const al::span<const float> InSamples, float *OutBuffer, float &CurrentGain, + const float TargetGain, const size_t Counter) +{ MixSamplesOne(InSamples, OutBuffer, CurrentGain, TargetGain, Counter); } + + +/** + * Calculates ambisonic encoder coefficients using the X, Y, and Z direction + * components, which must represent a normalized (unit length) vector, and the + * spread is the angular width of the sound (0...tau). + * + * NOTE: The components use ambisonic coordinates. As a result: + * + * Ambisonic Y = OpenAL -X + * Ambisonic Z = OpenAL Y + * Ambisonic X = OpenAL -Z + * + * The components are ordered such that OpenAL's X, Y, and Z are the first, + * second, and third parameters respectively -- simply negate X and Z. + */ +std::array<float,MaxAmbiChannels> CalcAmbiCoeffs(const float y, const float z, const float x, + const float spread); + +/** + * CalcDirectionCoeffs + * + * Calculates ambisonic coefficients based on an OpenAL direction vector. The + * vector must be normalized (unit length), and the spread is the angular width + * of the sound (0...tau). + */ +inline std::array<float,MaxAmbiChannels> CalcDirectionCoeffs(const float (&dir)[3], + const float spread) +{ + /* Convert from OpenAL coords to Ambisonics. */ + return CalcAmbiCoeffs(-dir[0], dir[1], -dir[2], spread); +} + +/** + * CalcDirectionCoeffs + * + * Calculates ambisonic coefficients based on an OpenAL direction vector. The + * vector must be normalized (unit length). + */ +constexpr std::array<float,MaxAmbiChannels> CalcDirectionCoeffs(const float (&dir)[3]) +{ + /* Convert from OpenAL coords to Ambisonics. */ + return CalcAmbiCoeffs(-dir[0], dir[1], -dir[2]); +} + +/** + * CalcAngleCoeffs + * + * Calculates ambisonic coefficients based on azimuth and elevation. The + * azimuth and elevation parameters are in radians, going right and up + * respectively. + */ +inline std::array<float,MaxAmbiChannels> CalcAngleCoeffs(const float azimuth, + const float elevation, const float spread) +{ + const float x{-std::sin(azimuth) * std::cos(elevation)}; + const float y{ std::sin(elevation)}; + const float z{ std::cos(azimuth) * std::cos(elevation)}; + + return CalcAmbiCoeffs(x, y, z, spread); +} + + +/** + * ComputePanGains + * + * Computes panning gains using the given channel decoder coefficients and the + * pre-calculated direction or angle coefficients. For B-Format sources, the + * coeffs are a 'slice' of a transform matrix for the input channel, used to + * scale and orient the sound samples. + */ +void ComputePanGains(const MixParams *mix, const float*RESTRICT coeffs, const float ingain, + const al::span<float,MaxAmbiChannels> gains); + +#endif /* CORE_MIXER_H */ diff --git a/core/mixer/defs.h b/core/mixer/defs.h new file mode 100644 index 00000000..48daca9b --- /dev/null +++ b/core/mixer/defs.h @@ -0,0 +1,109 @@ +#ifndef CORE_MIXER_DEFS_H +#define CORE_MIXER_DEFS_H + +#include <array> +#include <stdlib.h> + +#include "alspan.h" +#include "core/bufferline.h" +#include "core/resampler_limits.h" + +struct CubicCoefficients; +struct HrtfChannelState; +struct HrtfFilter; +struct MixHrtfFilter; + +using uint = unsigned int; +using float2 = std::array<float,2>; + + +constexpr int MixerFracBits{16}; +constexpr int MixerFracOne{1 << MixerFracBits}; +constexpr int MixerFracMask{MixerFracOne - 1}; +constexpr int MixerFracHalf{MixerFracOne >> 1}; + +constexpr float GainSilenceThreshold{0.00001f}; /* -100dB */ + + +enum class Resampler : uint8_t { + Point, + Linear, + Cubic, + FastBSinc12, + BSinc12, + FastBSinc24, + BSinc24, + + Max = BSinc24 +}; + +/* Interpolator state. Kind of a misnomer since the interpolator itself is + * stateless. This just keeps it from having to recompute scale-related + * mappings for every sample. + */ +struct BsincState { + float sf; /* Scale interpolation factor. */ + uint m; /* Coefficient count. */ + uint l; /* Left coefficient offset. */ + /* Filter coefficients, followed by the phase, scale, and scale-phase + * delta coefficients. Starting at phase index 0, each subsequent phase + * index follows contiguously. + */ + const float *filter; +}; + +struct CubicState { + /* Filter coefficients, and coefficient deltas. Starting at phase index 0, + * each subsequent phase index follows contiguously. + */ + const CubicCoefficients *filter; +}; + +union InterpState { + CubicState cubic; + BsincState bsinc; +}; + +using ResamplerFunc = void(*)(const InterpState *state, const float *RESTRICT src, uint frac, + const uint increment, const al::span<float> dst); + +ResamplerFunc PrepareResampler(Resampler resampler, uint increment, InterpState *state); + + +template<typename TypeTag, typename InstTag> +void Resample_(const InterpState *state, const float *RESTRICT src, uint frac, + const uint increment, const al::span<float> dst); + +template<typename InstTag> +void Mix_(const al::span<const float> InSamples, const al::span<FloatBufferLine> OutBuffer, + float *CurrentGains, const float *TargetGains, const size_t Counter, const size_t OutPos); +template<typename InstTag> +void Mix_(const al::span<const float> InSamples, float *OutBuffer, float &CurrentGain, + const float TargetGain, const size_t Counter); + +template<typename InstTag> +void MixHrtf_(const float *InSamples, float2 *AccumSamples, const uint IrSize, + const MixHrtfFilter *hrtfparams, const size_t BufferSize); +template<typename InstTag> +void MixHrtfBlend_(const float *InSamples, float2 *AccumSamples, const uint IrSize, + const HrtfFilter *oldparams, const MixHrtfFilter *newparams, const size_t BufferSize); +template<typename InstTag> +void MixDirectHrtf_(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut, + const al::span<const FloatBufferLine> InSamples, float2 *AccumSamples, + float *TempBuf, HrtfChannelState *ChanState, const size_t IrSize, const size_t BufferSize); + +/* Vectorized resampler helpers */ +template<size_t N> +inline void InitPosArrays(uint frac, uint increment, uint (&frac_arr)[N], uint (&pos_arr)[N]) +{ + pos_arr[0] = 0; + frac_arr[0] = frac; + for(size_t i{1};i < N;i++) + { + const uint frac_tmp{frac_arr[i-1] + increment}; + pos_arr[i] = pos_arr[i-1] + (frac_tmp>>MixerFracBits); + frac_arr[i] = frac_tmp&MixerFracMask; + } +} + +#endif /* CORE_MIXER_DEFS_H */ diff --git a/core/mixer/hrtfbase.h b/core/mixer/hrtfbase.h new file mode 100644 index 00000000..36f88e49 --- /dev/null +++ b/core/mixer/hrtfbase.h @@ -0,0 +1,129 @@ +#ifndef CORE_MIXER_HRTFBASE_H +#define CORE_MIXER_HRTFBASE_H + +#include <algorithm> +#include <cmath> + +#include "almalloc.h" +#include "hrtfdefs.h" +#include "opthelpers.h" + + +using uint = unsigned int; + +using ApplyCoeffsT = void(&)(float2 *RESTRICT Values, const size_t irSize, + const ConstHrirSpan Coeffs, const float left, const float right); + +template<ApplyCoeffsT ApplyCoeffs> +inline void MixHrtfBase(const float *InSamples, float2 *RESTRICT AccumSamples, const size_t IrSize, + const MixHrtfFilter *hrtfparams, const size_t BufferSize) +{ + ASSUME(BufferSize > 0); + + const ConstHrirSpan Coeffs{hrtfparams->Coeffs}; + const float gainstep{hrtfparams->GainStep}; + const float gain{hrtfparams->Gain}; + + size_t ldelay{HrtfHistoryLength - hrtfparams->Delay[0]}; + size_t rdelay{HrtfHistoryLength - hrtfparams->Delay[1]}; + float stepcount{0.0f}; + for(size_t i{0u};i < BufferSize;++i) + { + const float g{gain + gainstep*stepcount}; + const float left{InSamples[ldelay++] * g}; + const float right{InSamples[rdelay++] * g}; + ApplyCoeffs(AccumSamples+i, IrSize, Coeffs, left, right); + + stepcount += 1.0f; + } +} + +template<ApplyCoeffsT ApplyCoeffs> +inline void MixHrtfBlendBase(const float *InSamples, float2 *RESTRICT AccumSamples, + const size_t IrSize, const HrtfFilter *oldparams, const MixHrtfFilter *newparams, + const size_t BufferSize) +{ + ASSUME(BufferSize > 0); + + const ConstHrirSpan OldCoeffs{oldparams->Coeffs}; + const float oldGainStep{oldparams->Gain / static_cast<float>(BufferSize)}; + const ConstHrirSpan NewCoeffs{newparams->Coeffs}; + const float newGainStep{newparams->GainStep}; + + if(oldparams->Gain > GainSilenceThreshold) LIKELY + { + size_t ldelay{HrtfHistoryLength - oldparams->Delay[0]}; + size_t rdelay{HrtfHistoryLength - oldparams->Delay[1]}; + auto stepcount = static_cast<float>(BufferSize); + for(size_t i{0u};i < BufferSize;++i) + { + const float g{oldGainStep*stepcount}; + const float left{InSamples[ldelay++] * g}; + const float right{InSamples[rdelay++] * g}; + ApplyCoeffs(AccumSamples+i, IrSize, OldCoeffs, left, right); + + stepcount -= 1.0f; + } + } + + if(newGainStep*static_cast<float>(BufferSize) > GainSilenceThreshold) LIKELY + { + size_t ldelay{HrtfHistoryLength+1 - newparams->Delay[0]}; + size_t rdelay{HrtfHistoryLength+1 - newparams->Delay[1]}; + float stepcount{1.0f}; + for(size_t i{1u};i < BufferSize;++i) + { + const float g{newGainStep*stepcount}; + const float left{InSamples[ldelay++] * g}; + const float right{InSamples[rdelay++] * g}; + ApplyCoeffs(AccumSamples+i, IrSize, NewCoeffs, left, right); + + stepcount += 1.0f; + } + } +} + +template<ApplyCoeffsT ApplyCoeffs> +inline void MixDirectHrtfBase(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut, + const al::span<const FloatBufferLine> InSamples, float2 *RESTRICT AccumSamples, + float *TempBuf, HrtfChannelState *ChanState, const size_t IrSize, const size_t BufferSize) +{ + ASSUME(BufferSize > 0); + + for(const FloatBufferLine &input : InSamples) + { + /* For dual-band processing, the signal needs extra scaling applied to + * the high frequency response. The band-splitter applies this scaling + * with a consistent phase shift regardless of the scale amount. + */ + ChanState->mSplitter.processHfScale({input.data(), BufferSize}, TempBuf, + ChanState->mHfScale); + + /* Now apply the HRIR coefficients to this channel. */ + const float *RESTRICT tempbuf{al::assume_aligned<16>(TempBuf)}; + const ConstHrirSpan Coeffs{ChanState->mCoeffs}; + for(size_t i{0u};i < BufferSize;++i) + { + const float insample{tempbuf[i]}; + ApplyCoeffs(AccumSamples+i, IrSize, Coeffs, insample, insample); + } + + ++ChanState; + } + + /* Add the HRTF signal to the existing "direct" signal. */ + float *RESTRICT left{al::assume_aligned<16>(LeftOut.data())}; + float *RESTRICT right{al::assume_aligned<16>(RightOut.data())}; + for(size_t i{0u};i < BufferSize;++i) + left[i] += AccumSamples[i][0]; + for(size_t i{0u};i < BufferSize;++i) + right[i] += AccumSamples[i][1]; + + /* Copy the new in-progress accumulation values to the front and clear the + * following samples for the next mix. + */ + auto accum_iter = std::copy_n(AccumSamples+BufferSize, HrirLength, AccumSamples); + std::fill_n(accum_iter, BufferSize, float2{}); +} + +#endif /* CORE_MIXER_HRTFBASE_H */ diff --git a/core/mixer/hrtfdefs.h b/core/mixer/hrtfdefs.h new file mode 100644 index 00000000..3c903ed8 --- /dev/null +++ b/core/mixer/hrtfdefs.h @@ -0,0 +1,53 @@ +#ifndef CORE_MIXER_HRTFDEFS_H +#define CORE_MIXER_HRTFDEFS_H + +#include <array> + +#include "alspan.h" +#include "core/ambidefs.h" +#include "core/bufferline.h" +#include "core/filters/splitter.h" + + +using float2 = std::array<float,2>; +using ubyte = unsigned char; +using ubyte2 = std::array<ubyte,2>; +using ushort = unsigned short; +using uint = unsigned int; +using uint2 = std::array<uint,2>; + +constexpr uint HrtfHistoryBits{6}; +constexpr uint HrtfHistoryLength{1 << HrtfHistoryBits}; +constexpr uint HrtfHistoryMask{HrtfHistoryLength - 1}; + +constexpr uint HrirBits{7}; +constexpr uint HrirLength{1 << HrirBits}; +constexpr uint HrirMask{HrirLength - 1}; + +constexpr uint MinIrLength{8}; + +using HrirArray = std::array<float2,HrirLength>; +using HrirSpan = al::span<float2,HrirLength>; +using ConstHrirSpan = al::span<const float2,HrirLength>; + +struct MixHrtfFilter { + const ConstHrirSpan Coeffs; + uint2 Delay; + float Gain; + float GainStep; +}; + +struct HrtfFilter { + alignas(16) HrirArray Coeffs; + uint2 Delay; + float Gain; +}; + + +struct HrtfChannelState { + BandSplitter mSplitter; + float mHfScale{}; + alignas(16) HrirArray mCoeffs{}; +}; + +#endif /* CORE_MIXER_HRTFDEFS_H */ diff --git a/core/mixer/mixer_c.cpp b/core/mixer/mixer_c.cpp new file mode 100644 index 00000000..28a92ef7 --- /dev/null +++ b/core/mixer/mixer_c.cpp @@ -0,0 +1,218 @@ +#include "config.h" + +#include <cassert> +#include <cmath> +#include <limits> + +#include "alnumeric.h" +#include "core/bsinc_defs.h" +#include "core/cubic_defs.h" +#include "defs.h" +#include "hrtfbase.h" + +struct CTag; +struct PointTag; +struct LerpTag; +struct CubicTag; +struct BSincTag; +struct FastBSincTag; + + +namespace { + +constexpr uint BsincPhaseDiffBits{MixerFracBits - BSincPhaseBits}; +constexpr uint BsincPhaseDiffOne{1 << BsincPhaseDiffBits}; +constexpr uint BsincPhaseDiffMask{BsincPhaseDiffOne - 1u}; + +constexpr uint CubicPhaseDiffBits{MixerFracBits - CubicPhaseBits}; +constexpr uint CubicPhaseDiffOne{1 << CubicPhaseDiffBits}; +constexpr uint CubicPhaseDiffMask{CubicPhaseDiffOne - 1u}; + +inline float do_point(const InterpState&, const float *RESTRICT vals, const uint) +{ return vals[0]; } +inline float do_lerp(const InterpState&, const float *RESTRICT vals, const uint frac) +{ return lerpf(vals[0], vals[1], static_cast<float>(frac)*(1.0f/MixerFracOne)); } +inline float do_cubic(const InterpState &istate, const float *RESTRICT vals, const uint frac) +{ + /* Calculate the phase index and factor. */ + const uint pi{frac >> CubicPhaseDiffBits}; + const float pf{static_cast<float>(frac&CubicPhaseDiffMask) * (1.0f/CubicPhaseDiffOne)}; + + const float *RESTRICT fil{al::assume_aligned<16>(istate.cubic.filter[pi].mCoeffs)}; + const float *RESTRICT phd{al::assume_aligned<16>(istate.cubic.filter[pi].mDeltas)}; + + /* Apply the phase interpolated filter. */ + return (fil[0] + pf*phd[0])*vals[0] + (fil[1] + pf*phd[1])*vals[1] + + (fil[2] + pf*phd[2])*vals[2] + (fil[3] + pf*phd[3])*vals[3]; +} +inline float do_bsinc(const InterpState &istate, const float *RESTRICT vals, const uint frac) +{ + const size_t m{istate.bsinc.m}; + ASSUME(m > 0); + + /* Calculate the phase index and factor. */ + const uint pi{frac >> BsincPhaseDiffBits}; + const float pf{static_cast<float>(frac&BsincPhaseDiffMask) * (1.0f/BsincPhaseDiffOne)}; + + const float *RESTRICT fil{istate.bsinc.filter + m*pi*2}; + const float *RESTRICT phd{fil + m}; + const float *RESTRICT scd{fil + BSincPhaseCount*2*m}; + const float *RESTRICT spd{scd + m}; + + /* Apply the scale and phase interpolated filter. */ + float r{0.0f}; + for(size_t j_f{0};j_f < m;j_f++) + r += (fil[j_f] + istate.bsinc.sf*scd[j_f] + pf*(phd[j_f] + istate.bsinc.sf*spd[j_f])) * vals[j_f]; + return r; +} +inline float do_fastbsinc(const InterpState &istate, const float *RESTRICT vals, const uint frac) +{ + const size_t m{istate.bsinc.m}; + ASSUME(m > 0); + + /* Calculate the phase index and factor. */ + const uint pi{frac >> BsincPhaseDiffBits}; + const float pf{static_cast<float>(frac&BsincPhaseDiffMask) * (1.0f/BsincPhaseDiffOne)}; + + const float *RESTRICT fil{istate.bsinc.filter + m*pi*2}; + const float *RESTRICT phd{fil + m}; + + /* Apply the phase interpolated filter. */ + float r{0.0f}; + for(size_t j_f{0};j_f < m;j_f++) + r += (fil[j_f] + pf*phd[j_f]) * vals[j_f]; + return r; +} + +using SamplerT = float(&)(const InterpState&, const float*RESTRICT, const uint); +template<SamplerT Sampler> +void DoResample(const InterpState *state, const float *RESTRICT src, uint frac, + const uint increment, const al::span<float> dst) +{ + const InterpState istate{*state}; + ASSUME(frac < MixerFracOne); + for(float &out : dst) + { + out = Sampler(istate, src, frac); + + frac += increment; + src += frac>>MixerFracBits; + frac &= MixerFracMask; + } +} + +inline void ApplyCoeffs(float2 *RESTRICT Values, const size_t IrSize, const ConstHrirSpan Coeffs, + const float left, const float right) +{ + ASSUME(IrSize >= MinIrLength); + for(size_t c{0};c < IrSize;++c) + { + Values[c][0] += Coeffs[c][0] * left; + Values[c][1] += Coeffs[c][1] * right; + } +} + +force_inline void MixLine(const al::span<const float> InSamples, float *RESTRICT dst, + float &CurrentGain, const float TargetGain, const float delta, const size_t min_len, + size_t Counter) +{ + float gain{CurrentGain}; + const float step{(TargetGain-gain) * delta}; + + size_t pos{0}; + if(!(std::abs(step) > std::numeric_limits<float>::epsilon())) + gain = TargetGain; + else + { + float step_count{0.0f}; + for(;pos != min_len;++pos) + { + dst[pos] += InSamples[pos] * (gain + step*step_count); + step_count += 1.0f; + } + if(pos == Counter) + gain = TargetGain; + else + gain += step*step_count; + } + CurrentGain = gain; + + if(!(std::abs(gain) > GainSilenceThreshold)) + return; + for(;pos != InSamples.size();++pos) + dst[pos] += InSamples[pos] * gain; +} + +} // namespace + +template<> +void Resample_<PointTag,CTag>(const InterpState *state, const float *RESTRICT src, uint frac, + const uint increment, const al::span<float> dst) +{ DoResample<do_point>(state, src, frac, increment, dst); } + +template<> +void Resample_<LerpTag,CTag>(const InterpState *state, const float *RESTRICT src, uint frac, + const uint increment, const al::span<float> dst) +{ DoResample<do_lerp>(state, src, frac, increment, dst); } + +template<> +void Resample_<CubicTag,CTag>(const InterpState *state, const float *RESTRICT src, uint frac, + const uint increment, const al::span<float> dst) +{ DoResample<do_cubic>(state, src-1, frac, increment, dst); } + +template<> +void Resample_<BSincTag,CTag>(const InterpState *state, const float *RESTRICT src, uint frac, + const uint increment, const al::span<float> dst) +{ DoResample<do_bsinc>(state, src-state->bsinc.l, frac, increment, dst); } + +template<> +void Resample_<FastBSincTag,CTag>(const InterpState *state, const float *RESTRICT src, uint frac, + const uint increment, const al::span<float> dst) +{ DoResample<do_fastbsinc>(state, src-state->bsinc.l, frac, increment, dst); } + + +template<> +void MixHrtf_<CTag>(const float *InSamples, float2 *AccumSamples, const uint IrSize, + const MixHrtfFilter *hrtfparams, const size_t BufferSize) +{ MixHrtfBase<ApplyCoeffs>(InSamples, AccumSamples, IrSize, hrtfparams, BufferSize); } + +template<> +void MixHrtfBlend_<CTag>(const float *InSamples, float2 *AccumSamples, const uint IrSize, + const HrtfFilter *oldparams, const MixHrtfFilter *newparams, const size_t BufferSize) +{ + MixHrtfBlendBase<ApplyCoeffs>(InSamples, AccumSamples, IrSize, oldparams, newparams, + BufferSize); +} + +template<> +void MixDirectHrtf_<CTag>(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut, + const al::span<const FloatBufferLine> InSamples, float2 *AccumSamples, + float *TempBuf, HrtfChannelState *ChanState, const size_t IrSize, const size_t BufferSize) +{ + MixDirectHrtfBase<ApplyCoeffs>(LeftOut, RightOut, InSamples, AccumSamples, TempBuf, ChanState, + IrSize, BufferSize); +} + + +template<> +void Mix_<CTag>(const al::span<const float> InSamples, const al::span<FloatBufferLine> OutBuffer, + float *CurrentGains, const float *TargetGains, const size_t Counter, const size_t OutPos) +{ + const float delta{(Counter > 0) ? 1.0f / static_cast<float>(Counter) : 0.0f}; + const auto min_len = minz(Counter, InSamples.size()); + + for(FloatBufferLine &output : OutBuffer) + MixLine(InSamples, al::assume_aligned<16>(output.data()+OutPos), *CurrentGains++, + *TargetGains++, delta, min_len, Counter); +} + +template<> +void Mix_<CTag>(const al::span<const float> InSamples, float *OutBuffer, float &CurrentGain, + const float TargetGain, const size_t Counter) +{ + const float delta{(Counter > 0) ? 1.0f / static_cast<float>(Counter) : 0.0f}; + const auto min_len = minz(Counter, InSamples.size()); + + MixLine(InSamples, al::assume_aligned<16>(OutBuffer), CurrentGain, + TargetGain, delta, min_len, Counter); +} diff --git a/core/mixer/mixer_neon.cpp b/core/mixer/mixer_neon.cpp new file mode 100644 index 00000000..ef2936b3 --- /dev/null +++ b/core/mixer/mixer_neon.cpp @@ -0,0 +1,362 @@ +#include "config.h" + +#include <arm_neon.h> + +#include <cmath> +#include <limits> + +#include "alnumeric.h" +#include "core/bsinc_defs.h" +#include "core/cubic_defs.h" +#include "defs.h" +#include "hrtfbase.h" + +struct NEONTag; +struct LerpTag; +struct CubicTag; +struct BSincTag; +struct FastBSincTag; + + +#if defined(__GNUC__) && !defined(__clang__) && !defined(__ARM_NEON) +#pragma GCC target("fpu=neon") +#endif + +namespace { + +constexpr uint BSincPhaseDiffBits{MixerFracBits - BSincPhaseBits}; +constexpr uint BSincPhaseDiffOne{1 << BSincPhaseDiffBits}; +constexpr uint BSincPhaseDiffMask{BSincPhaseDiffOne - 1u}; + +constexpr uint CubicPhaseDiffBits{MixerFracBits - CubicPhaseBits}; +constexpr uint CubicPhaseDiffOne{1 << CubicPhaseDiffBits}; +constexpr uint CubicPhaseDiffMask{CubicPhaseDiffOne - 1u}; + +inline float32x4_t set_f4(float l0, float l1, float l2, float l3) +{ + float32x4_t ret{vmovq_n_f32(l0)}; + ret = vsetq_lane_f32(l1, ret, 1); + ret = vsetq_lane_f32(l2, ret, 2); + ret = vsetq_lane_f32(l3, ret, 3); + return ret; +} + +inline void ApplyCoeffs(float2 *RESTRICT Values, const size_t IrSize, const ConstHrirSpan Coeffs, + const float left, const float right) +{ + float32x4_t leftright4; + { + float32x2_t leftright2{vmov_n_f32(left)}; + leftright2 = vset_lane_f32(right, leftright2, 1); + leftright4 = vcombine_f32(leftright2, leftright2); + } + + ASSUME(IrSize >= MinIrLength); + for(size_t c{0};c < IrSize;c += 2) + { + float32x4_t vals = vld1q_f32(&Values[c][0]); + float32x4_t coefs = vld1q_f32(&Coeffs[c][0]); + + vals = vmlaq_f32(vals, coefs, leftright4); + + vst1q_f32(&Values[c][0], vals); + } +} + +force_inline void MixLine(const al::span<const float> InSamples, float *RESTRICT dst, + float &CurrentGain, const float TargetGain, const float delta, const size_t min_len, + const size_t aligned_len, size_t Counter) +{ + float gain{CurrentGain}; + const float step{(TargetGain-gain) * delta}; + + size_t pos{0}; + if(!(std::abs(step) > std::numeric_limits<float>::epsilon())) + gain = TargetGain; + else + { + float step_count{0.0f}; + /* Mix with applying gain steps in aligned multiples of 4. */ + if(size_t todo{min_len >> 2}) + { + const float32x4_t four4{vdupq_n_f32(4.0f)}; + const float32x4_t step4{vdupq_n_f32(step)}; + const float32x4_t gain4{vdupq_n_f32(gain)}; + float32x4_t step_count4{vdupq_n_f32(0.0f)}; + step_count4 = vsetq_lane_f32(1.0f, step_count4, 1); + step_count4 = vsetq_lane_f32(2.0f, step_count4, 2); + step_count4 = vsetq_lane_f32(3.0f, step_count4, 3); + + do { + const float32x4_t val4 = vld1q_f32(&InSamples[pos]); + float32x4_t dry4 = vld1q_f32(&dst[pos]); + dry4 = vmlaq_f32(dry4, val4, vmlaq_f32(gain4, step4, step_count4)); + step_count4 = vaddq_f32(step_count4, four4); + vst1q_f32(&dst[pos], dry4); + pos += 4; + } while(--todo); + /* NOTE: step_count4 now represents the next four counts after the + * last four mixed samples, so the lowest element represents the + * next step count to apply. + */ + step_count = vgetq_lane_f32(step_count4, 0); + } + /* Mix with applying left over gain steps that aren't aligned multiples of 4. */ + for(size_t leftover{min_len&3};leftover;++pos,--leftover) + { + dst[pos] += InSamples[pos] * (gain + step*step_count); + step_count += 1.0f; + } + if(pos == Counter) + gain = TargetGain; + else + gain += step*step_count; + + /* Mix until pos is aligned with 4 or the mix is done. */ + for(size_t leftover{aligned_len&3};leftover;++pos,--leftover) + dst[pos] += InSamples[pos] * gain; + } + CurrentGain = gain; + + if(!(std::abs(gain) > GainSilenceThreshold)) + return; + if(size_t todo{(InSamples.size()-pos) >> 2}) + { + const float32x4_t gain4 = vdupq_n_f32(gain); + do { + const float32x4_t val4 = vld1q_f32(&InSamples[pos]); + float32x4_t dry4 = vld1q_f32(&dst[pos]); + dry4 = vmlaq_f32(dry4, val4, gain4); + vst1q_f32(&dst[pos], dry4); + pos += 4; + } while(--todo); + } + for(size_t leftover{(InSamples.size()-pos)&3};leftover;++pos,--leftover) + dst[pos] += InSamples[pos] * gain; +} + +} // namespace + +template<> +void Resample_<LerpTag,NEONTag>(const InterpState*, const float *RESTRICT src, uint frac, + const uint increment, const al::span<float> dst) +{ + ASSUME(frac < MixerFracOne); + + const int32x4_t increment4 = vdupq_n_s32(static_cast<int>(increment*4)); + const float32x4_t fracOne4 = vdupq_n_f32(1.0f/MixerFracOne); + const int32x4_t fracMask4 = vdupq_n_s32(MixerFracMask); + alignas(16) uint pos_[4], frac_[4]; + int32x4_t pos4, frac4; + + InitPosArrays(frac, increment, frac_, pos_); + frac4 = vld1q_s32(reinterpret_cast<int*>(frac_)); + pos4 = vld1q_s32(reinterpret_cast<int*>(pos_)); + + auto dst_iter = dst.begin(); + for(size_t todo{dst.size()>>2};todo;--todo) + { + const int pos0{vgetq_lane_s32(pos4, 0)}; + const int pos1{vgetq_lane_s32(pos4, 1)}; + const int pos2{vgetq_lane_s32(pos4, 2)}; + const int pos3{vgetq_lane_s32(pos4, 3)}; + const float32x4_t val1{set_f4(src[pos0], src[pos1], src[pos2], src[pos3])}; + const float32x4_t val2{set_f4(src[pos0+1], src[pos1+1], src[pos2+1], src[pos3+1])}; + + /* val1 + (val2-val1)*mu */ + const float32x4_t r0{vsubq_f32(val2, val1)}; + const float32x4_t mu{vmulq_f32(vcvtq_f32_s32(frac4), fracOne4)}; + const float32x4_t out{vmlaq_f32(val1, mu, r0)}; + + vst1q_f32(dst_iter, out); + dst_iter += 4; + + frac4 = vaddq_s32(frac4, increment4); + pos4 = vaddq_s32(pos4, vshrq_n_s32(frac4, MixerFracBits)); + frac4 = vandq_s32(frac4, fracMask4); + } + + if(size_t todo{dst.size()&3}) + { + src += static_cast<uint>(vgetq_lane_s32(pos4, 0)); + frac = static_cast<uint>(vgetq_lane_s32(frac4, 0)); + + do { + *(dst_iter++) = lerpf(src[0], src[1], static_cast<float>(frac) * (1.0f/MixerFracOne)); + + frac += increment; + src += frac>>MixerFracBits; + frac &= MixerFracMask; + } while(--todo); + } +} + +template<> +void Resample_<CubicTag,NEONTag>(const InterpState *state, const float *RESTRICT src, uint frac, + const uint increment, const al::span<float> dst) +{ + ASSUME(frac < MixerFracOne); + + const CubicCoefficients *RESTRICT filter = al::assume_aligned<16>(state->cubic.filter); + + src -= 1; + for(float &out_sample : dst) + { + const uint pi{frac >> CubicPhaseDiffBits}; + const float pf{static_cast<float>(frac&CubicPhaseDiffMask) * (1.0f/CubicPhaseDiffOne)}; + const float32x4_t pf4{vdupq_n_f32(pf)}; + + /* Apply the phase interpolated filter. */ + + /* f = fil + pf*phd */ + const float32x4_t f4 = vmlaq_f32(vld1q_f32(filter[pi].mCoeffs), pf4, + vld1q_f32(filter[pi].mDeltas)); + /* r = f*src */ + float32x4_t r4{vmulq_f32(f4, vld1q_f32(src))}; + + r4 = vaddq_f32(r4, vrev64q_f32(r4)); + out_sample = vget_lane_f32(vadd_f32(vget_low_f32(r4), vget_high_f32(r4)), 0); + + frac += increment; + src += frac>>MixerFracBits; + frac &= MixerFracMask; + } +} + +template<> +void Resample_<BSincTag,NEONTag>(const InterpState *state, const float *RESTRICT src, uint frac, + const uint increment, const al::span<float> dst) +{ + const float *const filter{state->bsinc.filter}; + const float32x4_t sf4{vdupq_n_f32(state->bsinc.sf)}; + const size_t m{state->bsinc.m}; + ASSUME(m > 0); + ASSUME(frac < MixerFracOne); + + src -= state->bsinc.l; + for(float &out_sample : dst) + { + // Calculate the phase index and factor. + const uint pi{frac >> BSincPhaseDiffBits}; + const float pf{static_cast<float>(frac&BSincPhaseDiffMask) * (1.0f/BSincPhaseDiffOne)}; + + // Apply the scale and phase interpolated filter. + float32x4_t r4{vdupq_n_f32(0.0f)}; + { + const float32x4_t pf4{vdupq_n_f32(pf)}; + const float *RESTRICT fil{filter + m*pi*2}; + const float *RESTRICT phd{fil + m}; + const float *RESTRICT scd{fil + BSincPhaseCount*2*m}; + const float *RESTRICT spd{scd + m}; + size_t td{m >> 2}; + size_t j{0u}; + + do { + /* f = ((fil + sf*scd) + pf*(phd + sf*spd)) */ + const float32x4_t f4 = vmlaq_f32( + vmlaq_f32(vld1q_f32(&fil[j]), sf4, vld1q_f32(&scd[j])), + pf4, vmlaq_f32(vld1q_f32(&phd[j]), sf4, vld1q_f32(&spd[j]))); + /* r += f*src */ + r4 = vmlaq_f32(r4, f4, vld1q_f32(&src[j])); + j += 4; + } while(--td); + } + r4 = vaddq_f32(r4, vrev64q_f32(r4)); + out_sample = vget_lane_f32(vadd_f32(vget_low_f32(r4), vget_high_f32(r4)), 0); + + frac += increment; + src += frac>>MixerFracBits; + frac &= MixerFracMask; + } +} + +template<> +void Resample_<FastBSincTag,NEONTag>(const InterpState *state, const float *RESTRICT src, uint frac, + const uint increment, const al::span<float> dst) +{ + const float *const filter{state->bsinc.filter}; + const size_t m{state->bsinc.m}; + ASSUME(m > 0); + ASSUME(frac < MixerFracOne); + + src -= state->bsinc.l; + for(float &out_sample : dst) + { + // Calculate the phase index and factor. + const uint pi{frac >> BSincPhaseDiffBits}; + const float pf{static_cast<float>(frac&BSincPhaseDiffMask) * (1.0f/BSincPhaseDiffOne)}; + + // Apply the phase interpolated filter. + float32x4_t r4{vdupq_n_f32(0.0f)}; + { + const float32x4_t pf4{vdupq_n_f32(pf)}; + const float *RESTRICT fil{filter + m*pi*2}; + const float *RESTRICT phd{fil + m}; + size_t td{m >> 2}; + size_t j{0u}; + + do { + /* f = fil + pf*phd */ + const float32x4_t f4 = vmlaq_f32(vld1q_f32(&fil[j]), pf4, vld1q_f32(&phd[j])); + /* r += f*src */ + r4 = vmlaq_f32(r4, f4, vld1q_f32(&src[j])); + j += 4; + } while(--td); + } + r4 = vaddq_f32(r4, vrev64q_f32(r4)); + out_sample = vget_lane_f32(vadd_f32(vget_low_f32(r4), vget_high_f32(r4)), 0); + + frac += increment; + src += frac>>MixerFracBits; + frac &= MixerFracMask; + } +} + + +template<> +void MixHrtf_<NEONTag>(const float *InSamples, float2 *AccumSamples, const uint IrSize, + const MixHrtfFilter *hrtfparams, const size_t BufferSize) +{ MixHrtfBase<ApplyCoeffs>(InSamples, AccumSamples, IrSize, hrtfparams, BufferSize); } + +template<> +void MixHrtfBlend_<NEONTag>(const float *InSamples, float2 *AccumSamples, const uint IrSize, + const HrtfFilter *oldparams, const MixHrtfFilter *newparams, const size_t BufferSize) +{ + MixHrtfBlendBase<ApplyCoeffs>(InSamples, AccumSamples, IrSize, oldparams, newparams, + BufferSize); +} + +template<> +void MixDirectHrtf_<NEONTag>(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut, + const al::span<const FloatBufferLine> InSamples, float2 *AccumSamples, + float *TempBuf, HrtfChannelState *ChanState, const size_t IrSize, const size_t BufferSize) +{ + MixDirectHrtfBase<ApplyCoeffs>(LeftOut, RightOut, InSamples, AccumSamples, TempBuf, ChanState, + IrSize, BufferSize); +} + + +template<> +void Mix_<NEONTag>(const al::span<const float> InSamples, const al::span<FloatBufferLine> OutBuffer, + float *CurrentGains, const float *TargetGains, const size_t Counter, const size_t OutPos) +{ + const float delta{(Counter > 0) ? 1.0f / static_cast<float>(Counter) : 0.0f}; + const auto min_len = minz(Counter, InSamples.size()); + const auto aligned_len = minz((min_len+3) & ~size_t{3}, InSamples.size()) - min_len; + + for(FloatBufferLine &output : OutBuffer) + MixLine(InSamples, al::assume_aligned<16>(output.data()+OutPos), *CurrentGains++, + *TargetGains++, delta, min_len, aligned_len, Counter); +} + +template<> +void Mix_<NEONTag>(const al::span<const float> InSamples, float *OutBuffer, float &CurrentGain, + const float TargetGain, const size_t Counter) +{ + const float delta{(Counter > 0) ? 1.0f / static_cast<float>(Counter) : 0.0f}; + const auto min_len = minz(Counter, InSamples.size()); + const auto aligned_len = minz((min_len+3) & ~size_t{3}, InSamples.size()) - min_len; + + MixLine(InSamples, al::assume_aligned<16>(OutBuffer), CurrentGain, TargetGain, delta, min_len, + aligned_len, Counter); +} diff --git a/core/mixer/mixer_sse.cpp b/core/mixer/mixer_sse.cpp new file mode 100644 index 00000000..0aa5d5fb --- /dev/null +++ b/core/mixer/mixer_sse.cpp @@ -0,0 +1,327 @@ +#include "config.h" + +#include <xmmintrin.h> + +#include <cmath> +#include <limits> + +#include "alnumeric.h" +#include "core/bsinc_defs.h" +#include "core/cubic_defs.h" +#include "defs.h" +#include "hrtfbase.h" + +struct SSETag; +struct CubicTag; +struct BSincTag; +struct FastBSincTag; + + +#if defined(__GNUC__) && !defined(__clang__) && !defined(__SSE__) +#pragma GCC target("sse") +#endif + +namespace { + +constexpr uint BSincPhaseDiffBits{MixerFracBits - BSincPhaseBits}; +constexpr uint BSincPhaseDiffOne{1 << BSincPhaseDiffBits}; +constexpr uint BSincPhaseDiffMask{BSincPhaseDiffOne - 1u}; + +constexpr uint CubicPhaseDiffBits{MixerFracBits - CubicPhaseBits}; +constexpr uint CubicPhaseDiffOne{1 << CubicPhaseDiffBits}; +constexpr uint CubicPhaseDiffMask{CubicPhaseDiffOne - 1u}; + +#define MLA4(x, y, z) _mm_add_ps(x, _mm_mul_ps(y, z)) + +inline void ApplyCoeffs(float2 *RESTRICT Values, const size_t IrSize, const ConstHrirSpan Coeffs, + const float left, const float right) +{ + const __m128 lrlr{_mm_setr_ps(left, right, left, right)}; + + ASSUME(IrSize >= MinIrLength); + /* This isn't technically correct to test alignment, but it's true for + * systems that support SSE, which is the only one that needs to know the + * alignment of Values (which alternates between 8- and 16-byte aligned). + */ + if(!(reinterpret_cast<uintptr_t>(Values)&15)) + { + for(size_t i{0};i < IrSize;i += 2) + { + const __m128 coeffs{_mm_load_ps(Coeffs[i].data())}; + __m128 vals{_mm_load_ps(Values[i].data())}; + vals = MLA4(vals, lrlr, coeffs); + _mm_store_ps(Values[i].data(), vals); + } + } + else + { + __m128 imp0, imp1; + __m128 coeffs{_mm_load_ps(Coeffs[0].data())}; + __m128 vals{_mm_loadl_pi(_mm_setzero_ps(), reinterpret_cast<__m64*>(Values[0].data()))}; + imp0 = _mm_mul_ps(lrlr, coeffs); + vals = _mm_add_ps(imp0, vals); + _mm_storel_pi(reinterpret_cast<__m64*>(Values[0].data()), vals); + size_t td{((IrSize+1)>>1) - 1}; + size_t i{1}; + do { + coeffs = _mm_load_ps(Coeffs[i+1].data()); + vals = _mm_load_ps(Values[i].data()); + imp1 = _mm_mul_ps(lrlr, coeffs); + imp0 = _mm_shuffle_ps(imp0, imp1, _MM_SHUFFLE(1, 0, 3, 2)); + vals = _mm_add_ps(imp0, vals); + _mm_store_ps(Values[i].data(), vals); + imp0 = imp1; + i += 2; + } while(--td); + vals = _mm_loadl_pi(vals, reinterpret_cast<__m64*>(Values[i].data())); + imp0 = _mm_movehl_ps(imp0, imp0); + vals = _mm_add_ps(imp0, vals); + _mm_storel_pi(reinterpret_cast<__m64*>(Values[i].data()), vals); + } +} + +force_inline void MixLine(const al::span<const float> InSamples, float *RESTRICT dst, + float &CurrentGain, const float TargetGain, const float delta, const size_t min_len, + const size_t aligned_len, size_t Counter) +{ + float gain{CurrentGain}; + const float step{(TargetGain-gain) * delta}; + + size_t pos{0}; + if(!(std::abs(step) > std::numeric_limits<float>::epsilon())) + gain = TargetGain; + else + { + float step_count{0.0f}; + /* Mix with applying gain steps in aligned multiples of 4. */ + if(size_t todo{min_len >> 2}) + { + const __m128 four4{_mm_set1_ps(4.0f)}; + const __m128 step4{_mm_set1_ps(step)}; + const __m128 gain4{_mm_set1_ps(gain)}; + __m128 step_count4{_mm_setr_ps(0.0f, 1.0f, 2.0f, 3.0f)}; + do { + const __m128 val4{_mm_load_ps(&InSamples[pos])}; + __m128 dry4{_mm_load_ps(&dst[pos])}; + + /* dry += val * (gain + step*step_count) */ + dry4 = MLA4(dry4, val4, MLA4(gain4, step4, step_count4)); + + _mm_store_ps(&dst[pos], dry4); + step_count4 = _mm_add_ps(step_count4, four4); + pos += 4; + } while(--todo); + /* NOTE: step_count4 now represents the next four counts after the + * last four mixed samples, so the lowest element represents the + * next step count to apply. + */ + step_count = _mm_cvtss_f32(step_count4); + } + /* Mix with applying left over gain steps that aren't aligned multiples of 4. */ + for(size_t leftover{min_len&3};leftover;++pos,--leftover) + { + dst[pos] += InSamples[pos] * (gain + step*step_count); + step_count += 1.0f; + } + if(pos == Counter) + gain = TargetGain; + else + gain += step*step_count; + + /* Mix until pos is aligned with 4 or the mix is done. */ + for(size_t leftover{aligned_len&3};leftover;++pos,--leftover) + dst[pos] += InSamples[pos] * gain; + } + CurrentGain = gain; + + if(!(std::abs(gain) > GainSilenceThreshold)) + return; + if(size_t todo{(InSamples.size()-pos) >> 2}) + { + const __m128 gain4{_mm_set1_ps(gain)}; + do { + const __m128 val4{_mm_load_ps(&InSamples[pos])}; + __m128 dry4{_mm_load_ps(&dst[pos])}; + dry4 = _mm_add_ps(dry4, _mm_mul_ps(val4, gain4)); + _mm_store_ps(&dst[pos], dry4); + pos += 4; + } while(--todo); + } + for(size_t leftover{(InSamples.size()-pos)&3};leftover;++pos,--leftover) + dst[pos] += InSamples[pos] * gain; +} + +} // namespace + +template<> +void Resample_<CubicTag,SSETag>(const InterpState *state, const float *RESTRICT src, uint frac, + const uint increment, const al::span<float> dst) +{ + ASSUME(frac < MixerFracOne); + + const CubicCoefficients *RESTRICT filter = al::assume_aligned<16>(state->cubic.filter); + + src -= 1; + for(float &out_sample : dst) + { + const uint pi{frac >> CubicPhaseDiffBits}; + const float pf{static_cast<float>(frac&CubicPhaseDiffMask) * (1.0f/CubicPhaseDiffOne)}; + const __m128 pf4{_mm_set1_ps(pf)}; + + /* Apply the phase interpolated filter. */ + + /* f = fil + pf*phd */ + const __m128 f4 = MLA4(_mm_load_ps(filter[pi].mCoeffs), pf4, + _mm_load_ps(filter[pi].mDeltas)); + /* r = f*src */ + __m128 r4{_mm_mul_ps(f4, _mm_loadu_ps(src))}; + + r4 = _mm_add_ps(r4, _mm_shuffle_ps(r4, r4, _MM_SHUFFLE(0, 1, 2, 3))); + r4 = _mm_add_ps(r4, _mm_movehl_ps(r4, r4)); + out_sample = _mm_cvtss_f32(r4); + + frac += increment; + src += frac>>MixerFracBits; + frac &= MixerFracMask; + } +} + +template<> +void Resample_<BSincTag,SSETag>(const InterpState *state, const float *RESTRICT src, uint frac, + const uint increment, const al::span<float> dst) +{ + const float *const filter{state->bsinc.filter}; + const __m128 sf4{_mm_set1_ps(state->bsinc.sf)}; + const size_t m{state->bsinc.m}; + ASSUME(m > 0); + ASSUME(frac < MixerFracOne); + + src -= state->bsinc.l; + for(float &out_sample : dst) + { + // Calculate the phase index and factor. + const uint pi{frac >> BSincPhaseDiffBits}; + const float pf{static_cast<float>(frac&BSincPhaseDiffMask) * (1.0f/BSincPhaseDiffOne)}; + + // Apply the scale and phase interpolated filter. + __m128 r4{_mm_setzero_ps()}; + { + const __m128 pf4{_mm_set1_ps(pf)}; + const float *RESTRICT fil{filter + m*pi*2}; + const float *RESTRICT phd{fil + m}; + const float *RESTRICT scd{fil + BSincPhaseCount*2*m}; + const float *RESTRICT spd{scd + m}; + size_t td{m >> 2}; + size_t j{0u}; + + do { + /* f = ((fil + sf*scd) + pf*(phd + sf*spd)) */ + const __m128 f4 = MLA4( + MLA4(_mm_load_ps(&fil[j]), sf4, _mm_load_ps(&scd[j])), + pf4, MLA4(_mm_load_ps(&phd[j]), sf4, _mm_load_ps(&spd[j]))); + /* r += f*src */ + r4 = MLA4(r4, f4, _mm_loadu_ps(&src[j])); + j += 4; + } while(--td); + } + r4 = _mm_add_ps(r4, _mm_shuffle_ps(r4, r4, _MM_SHUFFLE(0, 1, 2, 3))); + r4 = _mm_add_ps(r4, _mm_movehl_ps(r4, r4)); + out_sample = _mm_cvtss_f32(r4); + + frac += increment; + src += frac>>MixerFracBits; + frac &= MixerFracMask; + } +} + +template<> +void Resample_<FastBSincTag,SSETag>(const InterpState *state, const float *RESTRICT src, uint frac, + const uint increment, const al::span<float> dst) +{ + const float *const filter{state->bsinc.filter}; + const size_t m{state->bsinc.m}; + ASSUME(m > 0); + ASSUME(frac < MixerFracOne); + + src -= state->bsinc.l; + for(float &out_sample : dst) + { + // Calculate the phase index and factor. + const uint pi{frac >> BSincPhaseDiffBits}; + const float pf{static_cast<float>(frac&BSincPhaseDiffMask) * (1.0f/BSincPhaseDiffOne)}; + + // Apply the phase interpolated filter. + __m128 r4{_mm_setzero_ps()}; + { + const __m128 pf4{_mm_set1_ps(pf)}; + const float *RESTRICT fil{filter + m*pi*2}; + const float *RESTRICT phd{fil + m}; + size_t td{m >> 2}; + size_t j{0u}; + + do { + /* f = fil + pf*phd */ + const __m128 f4 = MLA4(_mm_load_ps(&fil[j]), pf4, _mm_load_ps(&phd[j])); + /* r += f*src */ + r4 = MLA4(r4, f4, _mm_loadu_ps(&src[j])); + j += 4; + } while(--td); + } + r4 = _mm_add_ps(r4, _mm_shuffle_ps(r4, r4, _MM_SHUFFLE(0, 1, 2, 3))); + r4 = _mm_add_ps(r4, _mm_movehl_ps(r4, r4)); + out_sample = _mm_cvtss_f32(r4); + + frac += increment; + src += frac>>MixerFracBits; + frac &= MixerFracMask; + } +} + + +template<> +void MixHrtf_<SSETag>(const float *InSamples, float2 *AccumSamples, const uint IrSize, + const MixHrtfFilter *hrtfparams, const size_t BufferSize) +{ MixHrtfBase<ApplyCoeffs>(InSamples, AccumSamples, IrSize, hrtfparams, BufferSize); } + +template<> +void MixHrtfBlend_<SSETag>(const float *InSamples, float2 *AccumSamples, const uint IrSize, + const HrtfFilter *oldparams, const MixHrtfFilter *newparams, const size_t BufferSize) +{ + MixHrtfBlendBase<ApplyCoeffs>(InSamples, AccumSamples, IrSize, oldparams, newparams, + BufferSize); +} + +template<> +void MixDirectHrtf_<SSETag>(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut, + const al::span<const FloatBufferLine> InSamples, float2 *AccumSamples, + float *TempBuf, HrtfChannelState *ChanState, const size_t IrSize, const size_t BufferSize) +{ + MixDirectHrtfBase<ApplyCoeffs>(LeftOut, RightOut, InSamples, AccumSamples, TempBuf, ChanState, + IrSize, BufferSize); +} + + +template<> +void Mix_<SSETag>(const al::span<const float> InSamples, const al::span<FloatBufferLine> OutBuffer, + float *CurrentGains, const float *TargetGains, const size_t Counter, const size_t OutPos) +{ + const float delta{(Counter > 0) ? 1.0f / static_cast<float>(Counter) : 0.0f}; + const auto min_len = minz(Counter, InSamples.size()); + const auto aligned_len = minz((min_len+3) & ~size_t{3}, InSamples.size()) - min_len; + + for(FloatBufferLine &output : OutBuffer) + MixLine(InSamples, al::assume_aligned<16>(output.data()+OutPos), *CurrentGains++, + *TargetGains++, delta, min_len, aligned_len, Counter); +} + +template<> +void Mix_<SSETag>(const al::span<const float> InSamples, float *OutBuffer, float &CurrentGain, + const float TargetGain, const size_t Counter) +{ + const float delta{(Counter > 0) ? 1.0f / static_cast<float>(Counter) : 0.0f}; + const auto min_len = minz(Counter, InSamples.size()); + const auto aligned_len = minz((min_len+3) & ~size_t{3}, InSamples.size()) - min_len; + + MixLine(InSamples, al::assume_aligned<16>(OutBuffer), CurrentGain, TargetGain, delta, min_len, + aligned_len, Counter); +} diff --git a/core/mixer/mixer_sse2.cpp b/core/mixer/mixer_sse2.cpp new file mode 100644 index 00000000..edaaf7a1 --- /dev/null +++ b/core/mixer/mixer_sse2.cpp @@ -0,0 +1,90 @@ +/** + * OpenAL cross platform audio library + * Copyright (C) 2014 by Timothy Arceri <[email protected]>. + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * Or go to http://www.gnu.org/copyleft/lgpl.html + */ + +#include "config.h" + +#include <xmmintrin.h> +#include <emmintrin.h> + +#include "alnumeric.h" +#include "defs.h" + +struct SSE2Tag; +struct LerpTag; + + +#if defined(__GNUC__) && !defined(__clang__) && !defined(__SSE2__) +#pragma GCC target("sse2") +#endif + +template<> +void Resample_<LerpTag,SSE2Tag>(const InterpState*, const float *RESTRICT src, uint frac, + const uint increment, const al::span<float> dst) +{ + ASSUME(frac < MixerFracOne); + + const __m128i increment4{_mm_set1_epi32(static_cast<int>(increment*4))}; + const __m128 fracOne4{_mm_set1_ps(1.0f/MixerFracOne)}; + const __m128i fracMask4{_mm_set1_epi32(MixerFracMask)}; + + alignas(16) uint pos_[4], frac_[4]; + InitPosArrays(frac, increment, frac_, pos_); + __m128i frac4{_mm_setr_epi32(static_cast<int>(frac_[0]), static_cast<int>(frac_[1]), + static_cast<int>(frac_[2]), static_cast<int>(frac_[3]))}; + __m128i pos4{_mm_setr_epi32(static_cast<int>(pos_[0]), static_cast<int>(pos_[1]), + static_cast<int>(pos_[2]), static_cast<int>(pos_[3]))}; + + auto dst_iter = dst.begin(); + for(size_t todo{dst.size()>>2};todo;--todo) + { + const int pos0{_mm_cvtsi128_si32(pos4)}; + const int pos1{_mm_cvtsi128_si32(_mm_srli_si128(pos4, 4))}; + const int pos2{_mm_cvtsi128_si32(_mm_srli_si128(pos4, 8))}; + const int pos3{_mm_cvtsi128_si32(_mm_srli_si128(pos4, 12))}; + const __m128 val1{_mm_setr_ps(src[pos0 ], src[pos1 ], src[pos2 ], src[pos3 ])}; + const __m128 val2{_mm_setr_ps(src[pos0+1], src[pos1+1], src[pos2+1], src[pos3+1])}; + + /* val1 + (val2-val1)*mu */ + const __m128 r0{_mm_sub_ps(val2, val1)}; + const __m128 mu{_mm_mul_ps(_mm_cvtepi32_ps(frac4), fracOne4)}; + const __m128 out{_mm_add_ps(val1, _mm_mul_ps(mu, r0))}; + + _mm_store_ps(dst_iter, out); + dst_iter += 4; + + frac4 = _mm_add_epi32(frac4, increment4); + pos4 = _mm_add_epi32(pos4, _mm_srli_epi32(frac4, MixerFracBits)); + frac4 = _mm_and_si128(frac4, fracMask4); + } + + if(size_t todo{dst.size()&3}) + { + src += static_cast<uint>(_mm_cvtsi128_si32(pos4)); + frac = static_cast<uint>(_mm_cvtsi128_si32(frac4)); + + do { + *(dst_iter++) = lerpf(src[0], src[1], static_cast<float>(frac) * (1.0f/MixerFracOne)); + + frac += increment; + src += frac>>MixerFracBits; + frac &= MixerFracMask; + } while(--todo); + } +} diff --git a/core/mixer/mixer_sse3.cpp b/core/mixer/mixer_sse3.cpp new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/core/mixer/mixer_sse3.cpp diff --git a/core/mixer/mixer_sse41.cpp b/core/mixer/mixer_sse41.cpp new file mode 100644 index 00000000..8ccd9fd3 --- /dev/null +++ b/core/mixer/mixer_sse41.cpp @@ -0,0 +1,95 @@ +/** + * OpenAL cross platform audio library + * Copyright (C) 2014 by Timothy Arceri <[email protected]>. + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * Or go to http://www.gnu.org/copyleft/lgpl.html + */ + +#include "config.h" + +#include <xmmintrin.h> +#include <emmintrin.h> +#include <smmintrin.h> + +#include "alnumeric.h" +#include "defs.h" + +struct SSE4Tag; +struct LerpTag; + + +#if defined(__GNUC__) && !defined(__clang__) && !defined(__SSE4_1__) +#pragma GCC target("sse4.1") +#endif + +template<> +void Resample_<LerpTag,SSE4Tag>(const InterpState*, const float *RESTRICT src, uint frac, + const uint increment, const al::span<float> dst) +{ + ASSUME(frac < MixerFracOne); + + const __m128i increment4{_mm_set1_epi32(static_cast<int>(increment*4))}; + const __m128 fracOne4{_mm_set1_ps(1.0f/MixerFracOne)}; + const __m128i fracMask4{_mm_set1_epi32(MixerFracMask)}; + + alignas(16) uint pos_[4], frac_[4]; + InitPosArrays(frac, increment, frac_, pos_); + __m128i frac4{_mm_setr_epi32(static_cast<int>(frac_[0]), static_cast<int>(frac_[1]), + static_cast<int>(frac_[2]), static_cast<int>(frac_[3]))}; + __m128i pos4{_mm_setr_epi32(static_cast<int>(pos_[0]), static_cast<int>(pos_[1]), + static_cast<int>(pos_[2]), static_cast<int>(pos_[3]))}; + + auto dst_iter = dst.begin(); + for(size_t todo{dst.size()>>2};todo;--todo) + { + const int pos0{_mm_extract_epi32(pos4, 0)}; + const int pos1{_mm_extract_epi32(pos4, 1)}; + const int pos2{_mm_extract_epi32(pos4, 2)}; + const int pos3{_mm_extract_epi32(pos4, 3)}; + const __m128 val1{_mm_setr_ps(src[pos0 ], src[pos1 ], src[pos2 ], src[pos3 ])}; + const __m128 val2{_mm_setr_ps(src[pos0+1], src[pos1+1], src[pos2+1], src[pos3+1])}; + + /* val1 + (val2-val1)*mu */ + const __m128 r0{_mm_sub_ps(val2, val1)}; + const __m128 mu{_mm_mul_ps(_mm_cvtepi32_ps(frac4), fracOne4)}; + const __m128 out{_mm_add_ps(val1, _mm_mul_ps(mu, r0))}; + + _mm_store_ps(dst_iter, out); + dst_iter += 4; + + frac4 = _mm_add_epi32(frac4, increment4); + pos4 = _mm_add_epi32(pos4, _mm_srli_epi32(frac4, MixerFracBits)); + frac4 = _mm_and_si128(frac4, fracMask4); + } + + if(size_t todo{dst.size()&3}) + { + /* NOTE: These four elements represent the position *after* the last + * four samples, so the lowest element is the next position to + * resample. + */ + src += static_cast<uint>(_mm_cvtsi128_si32(pos4)); + frac = static_cast<uint>(_mm_cvtsi128_si32(frac4)); + + do { + *(dst_iter++) = lerpf(src[0], src[1], static_cast<float>(frac) * (1.0f/MixerFracOne)); + + frac += increment; + src += frac>>MixerFracBits; + frac &= MixerFracMask; + } while(--todo); + } +} diff --git a/core/resampler_limits.h b/core/resampler_limits.h new file mode 100644 index 00000000..9d4cefda --- /dev/null +++ b/core/resampler_limits.h @@ -0,0 +1,12 @@ +#ifndef CORE_RESAMPLER_LIMITS_H +#define CORE_RESAMPLER_LIMITS_H + +/* Maximum number of samples to pad on the ends of a buffer for resampling. + * Note that the padding is symmetric (half at the beginning and half at the + * end)! + */ +constexpr int MaxResamplerPadding{48}; + +constexpr int MaxResamplerEdge{MaxResamplerPadding >> 1}; + +#endif /* CORE_RESAMPLER_LIMITS_H */ diff --git a/core/rtkit.cpp b/core/rtkit.cpp new file mode 100644 index 00000000..ff944ebf --- /dev/null +++ b/core/rtkit.cpp @@ -0,0 +1,236 @@ +/*-*- Mode: C; c-basic-offset: 8 -*-*/ + +/*** + Copyright 2009 Lennart Poettering + Copyright 2010 David Henningsson <[email protected]> + Copyright 2021 Chris Robinson + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation files + (the "Software"), to deal in the Software without restriction, + including without limitation the rights to use, copy, modify, merge, + publish, distribute, sublicense, and/or sell copies of the Software, + and to permit persons to whom the Software is furnished to do so, + subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +***/ + +#include "config.h" + +#include "rtkit.h" + +#include <errno.h> + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include <memory> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#ifdef __linux__ +#include <sys/syscall.h> +#elif defined(__FreeBSD__) +#include <sys/thr.h> +#endif + + +namespace dbus { + +constexpr int TypeString{'s'}; +constexpr int TypeVariant{'v'}; +constexpr int TypeInt32{'i'}; +constexpr int TypeUInt32{'u'}; +constexpr int TypeInt64{'x'}; +constexpr int TypeUInt64{'t'}; +constexpr int TypeInvalid{'\0'}; + +struct MessageDeleter { + void operator()(DBusMessage *m) { dbus_message_unref(m); } +}; +using MessagePtr = std::unique_ptr<DBusMessage,MessageDeleter>; + +} // namespace dbus + +namespace { + +inline pid_t _gettid() +{ +#ifdef __linux__ + return static_cast<pid_t>(syscall(SYS_gettid)); +#elif defined(__FreeBSD__) + long pid{}; + thr_self(&pid); + return static_cast<pid_t>(pid); +#else +#warning gettid not available + return 0; +#endif +} + +int translate_error(const char *name) +{ + if(strcmp(name, DBUS_ERROR_NO_MEMORY) == 0) + return -ENOMEM; + if(strcmp(name, DBUS_ERROR_SERVICE_UNKNOWN) == 0 + || strcmp(name, DBUS_ERROR_NAME_HAS_NO_OWNER) == 0) + return -ENOENT; + if(strcmp(name, DBUS_ERROR_ACCESS_DENIED) == 0 + || strcmp(name, DBUS_ERROR_AUTH_FAILED) == 0) + return -EACCES; + return -EIO; +} + +int rtkit_get_int_property(DBusConnection *connection, const char *propname, long long *propval) +{ + dbus::MessagePtr m{dbus_message_new_method_call(RTKIT_SERVICE_NAME, RTKIT_OBJECT_PATH, + "org.freedesktop.DBus.Properties", "Get")}; + if(!m) return -ENOMEM; + + const char *interfacestr = RTKIT_SERVICE_NAME; + auto ready = dbus_message_append_args(m.get(), + dbus::TypeString, &interfacestr, + dbus::TypeString, &propname, + dbus::TypeInvalid); + if(!ready) return -ENOMEM; + + dbus::Error error; + dbus::MessagePtr r{dbus_connection_send_with_reply_and_block(connection, m.get(), -1, + &error.get())}; + if(!r) return translate_error(error->name); + + if(dbus_set_error_from_message(&error.get(), r.get())) + return translate_error(error->name); + + int ret{-EBADMSG}; + DBusMessageIter iter{}; + dbus_message_iter_init(r.get(), &iter); + while(int curtype{dbus_message_iter_get_arg_type(&iter)}) + { + if(curtype == dbus::TypeVariant) + { + DBusMessageIter subiter{}; + dbus_message_iter_recurse(&iter, &subiter); + + while((curtype=dbus_message_iter_get_arg_type(&subiter)) != dbus::TypeInvalid) + { + if(curtype == dbus::TypeInt32) + { + dbus_int32_t i32{}; + dbus_message_iter_get_basic(&subiter, &i32); + *propval = i32; + ret = 0; + } + + if(curtype == dbus::TypeInt64) + { + dbus_int64_t i64{}; + dbus_message_iter_get_basic(&subiter, &i64); + *propval = i64; + ret = 0; + } + + dbus_message_iter_next(&subiter); + } + } + dbus_message_iter_next(&iter); + } + + return ret; +} + +} // namespace + +int rtkit_get_max_realtime_priority(DBusConnection *connection) +{ + long long retval{}; + int err{rtkit_get_int_property(connection, "MaxRealtimePriority", &retval)}; + return err < 0 ? err : static_cast<int>(retval); +} + +int rtkit_get_min_nice_level(DBusConnection *connection, int *min_nice_level) +{ + long long retval{}; + int err{rtkit_get_int_property(connection, "MinNiceLevel", &retval)}; + if(err >= 0) *min_nice_level = static_cast<int>(retval); + return err; +} + +long long rtkit_get_rttime_usec_max(DBusConnection *connection) +{ + long long retval{}; + int err{rtkit_get_int_property(connection, "RTTimeUSecMax", &retval)}; + return err < 0 ? err : retval; +} + +int rtkit_make_realtime(DBusConnection *connection, pid_t thread, int priority) +{ + if(thread == 0) + thread = _gettid(); + if(thread == 0) + return -ENOTSUP; + + dbus::MessagePtr m{dbus_message_new_method_call(RTKIT_SERVICE_NAME, RTKIT_OBJECT_PATH, + "org.freedesktop.RealtimeKit1", "MakeThreadRealtime")}; + if(!m) return -ENOMEM; + + auto u64 = static_cast<dbus_uint64_t>(thread); + auto u32 = static_cast<dbus_uint32_t>(priority); + auto ready = dbus_message_append_args(m.get(), + dbus::TypeUInt64, &u64, + dbus::TypeUInt32, &u32, + dbus::TypeInvalid); + if(!ready) return -ENOMEM; + + dbus::Error error; + dbus::MessagePtr r{dbus_connection_send_with_reply_and_block(connection, m.get(), -1, + &error.get())}; + if(!r) return translate_error(error->name); + + if(dbus_set_error_from_message(&error.get(), r.get())) + return translate_error(error->name); + + return 0; +} + +int rtkit_make_high_priority(DBusConnection *connection, pid_t thread, int nice_level) +{ + if(thread == 0) + thread = _gettid(); + if(thread == 0) + return -ENOTSUP; + + dbus::MessagePtr m{dbus_message_new_method_call(RTKIT_SERVICE_NAME, RTKIT_OBJECT_PATH, + "org.freedesktop.RealtimeKit1", "MakeThreadHighPriority")}; + if(!m) return -ENOMEM; + + auto u64 = static_cast<dbus_uint64_t>(thread); + auto s32 = static_cast<dbus_int32_t>(nice_level); + auto ready = dbus_message_append_args(m.get(), + dbus::TypeUInt64, &u64, + dbus::TypeInt32, &s32, + dbus::TypeInvalid); + if(!ready) return -ENOMEM; + + dbus::Error error; + dbus::MessagePtr r{dbus_connection_send_with_reply_and_block(connection, m.get(), -1, + &error.get())}; + if(!r) return translate_error(error->name); + + if(dbus_set_error_from_message(&error.get(), r.get())) + return translate_error(error->name); + + return 0; +} diff --git a/core/rtkit.h b/core/rtkit.h new file mode 100644 index 00000000..d4994e27 --- /dev/null +++ b/core/rtkit.h @@ -0,0 +1,71 @@ +/*-*- Mode: C; c-basic-offset: 8 -*-*/ + +#ifndef foortkithfoo +#define foortkithfoo + +/*** + Copyright 2009 Lennart Poettering + Copyright 2010 David Henningsson <[email protected]> + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation files + (the "Software"), to deal in the Software without restriction, + including without limitation the rights to use, copy, modify, merge, + publish, distribute, sublicense, and/or sell copies of the Software, + and to permit persons to whom the Software is furnished to do so, + subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +***/ + +#include <sys/types.h> + +#include "dbus_wrap.h" + +/* This is the reference implementation for a client for + * RealtimeKit. You don't have to use this, but if do, just copy these + * sources into your repository */ + +#define RTKIT_SERVICE_NAME "org.freedesktop.RealtimeKit1" +#define RTKIT_OBJECT_PATH "/org/freedesktop/RealtimeKit1" + +/* This is mostly equivalent to sched_setparam(thread, SCHED_RR, { + * .sched_priority = priority }). 'thread' needs to be a kernel thread + * id as returned by gettid(), not a pthread_t! If 'thread' is 0 the + * current thread is used. The returned value is a negative errno + * style error code, or 0 on success. */ +int rtkit_make_realtime(DBusConnection *system_bus, pid_t thread, int priority); + +/* This is mostly equivalent to setpriority(PRIO_PROCESS, thread, + * nice_level). 'thread' needs to be a kernel thread id as returned by + * gettid(), not a pthread_t! If 'thread' is 0 the current thread is + * used. The returned value is a negative errno style error code, or 0 + * on success.*/ +int rtkit_make_high_priority(DBusConnection *system_bus, pid_t thread, int nice_level); + +/* Return the maximum value of realtime priority available. Realtime requests + * above this value will fail. A negative value is an errno style error code. + */ +int rtkit_get_max_realtime_priority(DBusConnection *system_bus); + +/* Retreive the minimum value of nice level available. High prio requests + * below this value will fail. The returned value is a negative errno + * style error code, or 0 on success.*/ +int rtkit_get_min_nice_level(DBusConnection *system_bus, int *min_nice_level); + +/* Return the maximum value of RLIMIT_RTTIME to set before attempting a + * realtime request. A negative value is an errno style error code. + */ +long long rtkit_get_rttime_usec_max(DBusConnection *system_bus); + +#endif diff --git a/core/uhjfilter.cpp b/core/uhjfilter.cpp new file mode 100644 index 00000000..df50956a --- /dev/null +++ b/core/uhjfilter.cpp @@ -0,0 +1,539 @@ + +#include "config.h" + +#include "uhjfilter.h" + +#include <algorithm> +#include <iterator> + +#include "alcomplex.h" +#include "alnumeric.h" +#include "opthelpers.h" +#include "phase_shifter.h" + + +UhjQualityType UhjDecodeQuality{UhjQualityType::Default}; +UhjQualityType UhjEncodeQuality{UhjQualityType::Default}; + + +namespace { + +const PhaseShifterT<UhjLength256> PShiftLq{}; +const PhaseShifterT<UhjLength512> PShiftHq{}; + +template<size_t N> +struct GetPhaseShifter; +template<> +struct GetPhaseShifter<UhjLength256> { static auto& Get() noexcept { return PShiftLq; } }; +template<> +struct GetPhaseShifter<UhjLength512> { static auto& Get() noexcept { return PShiftHq; } }; + + +constexpr float square(float x) noexcept +{ return x*x; } + +/* Filter coefficients for the 'base' all-pass IIR, which applies a frequency- + * dependent phase-shift of N degrees. The output of the filter requires a 1- + * sample delay. + */ +constexpr std::array<float,4> Filter1Coeff{{ + square(0.6923878f), square(0.9360654322959f), square(0.9882295226860f), + square(0.9987488452737f) +}}; +/* Filter coefficients for the offset all-pass IIR, which applies a frequency- + * dependent phase-shift of N+90 degrees. + */ +constexpr std::array<float,4> Filter2Coeff{{ + square(0.4021921162426f), square(0.8561710882420f), square(0.9722909545651f), + square(0.9952884791278f) +}}; + +} // namespace + +void UhjAllPassFilter::process(const al::span<const float,4> coeffs, + const al::span<const float> src, const bool updateState, float *RESTRICT dst) +{ + auto state = mState; + + auto proc_sample = [&state,coeffs](float x) noexcept -> float + { + for(size_t i{0};i < 4;++i) + { + const float y{x*coeffs[i] + state[i].z[0]}; + state[i].z[0] = state[i].z[1]; + state[i].z[1] = y*coeffs[i] - x; + x = y; + } + return x; + }; + std::transform(src.begin(), src.end(), dst, proc_sample); + if(updateState) LIKELY mState = state; +} + + +/* Encoding UHJ from B-Format is done as: + * + * S = 0.9396926*W + 0.1855740*X + * D = j(-0.3420201*W + 0.5098604*X) + 0.6554516*Y + * + * Left = (S + D)/2.0 + * Right = (S - D)/2.0 + * T = j(-0.1432*W + 0.6512*X) - 0.7071068*Y + * Q = 0.9772*Z + * + * where j is a wide-band +90 degree phase shift. 3-channel UHJ excludes Q, + * while 2-channel excludes Q and T. + * + * The phase shift is done using a linear FIR filter derived from an FFT'd + * impulse with the desired shift. + */ + +template<size_t N> +void UhjEncoder<N>::encode(float *LeftOut, float *RightOut, + const al::span<const float*const,3> InSamples, const size_t SamplesToDo) +{ + const auto &PShift = GetPhaseShifter<N>::Get(); + + ASSUME(SamplesToDo > 0); + + const float *RESTRICT winput{al::assume_aligned<16>(InSamples[0])}; + const float *RESTRICT xinput{al::assume_aligned<16>(InSamples[1])}; + const float *RESTRICT yinput{al::assume_aligned<16>(InSamples[2])}; + + std::copy_n(winput, SamplesToDo, mW.begin()+sFilterDelay); + std::copy_n(xinput, SamplesToDo, mX.begin()+sFilterDelay); + std::copy_n(yinput, SamplesToDo, mY.begin()+sFilterDelay); + + /* S = 0.9396926*W + 0.1855740*X */ + for(size_t i{0};i < SamplesToDo;++i) + mS[i] = 0.9396926f*mW[i] + 0.1855740f*mX[i]; + + /* Precompute j(-0.3420201*W + 0.5098604*X) and store in mD. */ + std::transform(winput, winput+SamplesToDo, xinput, mWX.begin() + sWXInOffset, + [](const float w, const float x) noexcept -> float + { return -0.3420201f*w + 0.5098604f*x; }); + PShift.process({mD.data(), SamplesToDo}, mWX.data()); + + /* D = j(-0.3420201*W + 0.5098604*X) + 0.6554516*Y */ + for(size_t i{0};i < SamplesToDo;++i) + mD[i] = mD[i] + 0.6554516f*mY[i]; + + /* Copy the future samples to the front for next time. */ + std::copy(mW.cbegin()+SamplesToDo, mW.cbegin()+SamplesToDo+sFilterDelay, mW.begin()); + std::copy(mX.cbegin()+SamplesToDo, mX.cbegin()+SamplesToDo+sFilterDelay, mX.begin()); + std::copy(mY.cbegin()+SamplesToDo, mY.cbegin()+SamplesToDo+sFilterDelay, mY.begin()); + std::copy(mWX.cbegin()+SamplesToDo, mWX.cbegin()+SamplesToDo+sWXInOffset, mWX.begin()); + + /* Apply a delay to the existing output to align with the input delay. */ + auto *delayBuffer = mDirectDelay.data(); + for(float *buffer : {LeftOut, RightOut}) + { + float *distbuf{al::assume_aligned<16>(delayBuffer->data())}; + ++delayBuffer; + + float *inout{al::assume_aligned<16>(buffer)}; + auto inout_end = inout + SamplesToDo; + if(SamplesToDo >= sFilterDelay) LIKELY + { + auto delay_end = std::rotate(inout, inout_end - sFilterDelay, inout_end); + std::swap_ranges(inout, delay_end, distbuf); + } + else + { + auto delay_start = std::swap_ranges(inout, inout_end, distbuf); + std::rotate(distbuf, delay_start, distbuf + sFilterDelay); + } + } + + /* Combine the direct signal with the produced output. */ + + /* Left = (S + D)/2.0 */ + float *RESTRICT left{al::assume_aligned<16>(LeftOut)}; + for(size_t i{0};i < SamplesToDo;i++) + left[i] += (mS[i] + mD[i]) * 0.5f; + /* Right = (S - D)/2.0 */ + float *RESTRICT right{al::assume_aligned<16>(RightOut)}; + for(size_t i{0};i < SamplesToDo;i++) + right[i] += (mS[i] - mD[i]) * 0.5f; +} + +/* This encoding implementation uses two sets of four chained IIR filters to + * produce the desired relative phase shift. The first filter chain produces a + * phase shift of varying degrees over a wide range of frequencies, while the + * second filter chain produces a phase shift 90 degrees ahead of the first + * over the same range. Further details are described here: + * + * https://web.archive.org/web/20060708031958/http://www.biochem.oulu.fi/~oniemita/dsp/hilbert/ + * + * 2-channel UHJ output requires the use of three filter chains. The S channel + * output uses a Filter1 chain on the W and X channel mix, while the D channel + * output uses a Filter1 chain on the Y channel plus a Filter2 chain on the W + * and X channel mix. This results in the W and X input mix on the D channel + * output having the required +90 degree phase shift relative to the other + * inputs. + */ +void UhjEncoderIIR::encode(float *LeftOut, float *RightOut, + const al::span<const float *const, 3> InSamples, const size_t SamplesToDo) +{ + ASSUME(SamplesToDo > 0); + + const float *RESTRICT winput{al::assume_aligned<16>(InSamples[0])}; + const float *RESTRICT xinput{al::assume_aligned<16>(InSamples[1])}; + const float *RESTRICT yinput{al::assume_aligned<16>(InSamples[2])}; + + /* S = 0.9396926*W + 0.1855740*X */ + std::transform(winput, winput+SamplesToDo, xinput, mTemp.begin(), + [](const float w, const float x) noexcept { return 0.9396926f*w + 0.1855740f*x; }); + mFilter1WX.process(Filter1Coeff, {mTemp.data(), SamplesToDo}, true, mS.data()+1); + mS[0] = mDelayWX; mDelayWX = mS[SamplesToDo]; + + /* Precompute j(-0.3420201*W + 0.5098604*X) and store in mWX. */ + std::transform(winput, winput+SamplesToDo, xinput, mTemp.begin(), + [](const float w, const float x) noexcept { return -0.3420201f*w + 0.5098604f*x; }); + mFilter2WX.process(Filter2Coeff, {mTemp.data(), SamplesToDo}, true, mWX.data()); + + /* Apply filter1 to Y and store in mD. */ + mFilter1Y.process(Filter1Coeff, {yinput, SamplesToDo}, SamplesToDo, mD.data()+1); + mD[0] = mDelayY; mDelayY = mD[SamplesToDo]; + + /* D = j(-0.3420201*W + 0.5098604*X) + 0.6554516*Y */ + for(size_t i{0};i < SamplesToDo;++i) + mD[i] = mWX[i] + 0.6554516f*mD[i]; + + /* Apply the base filter to the existing output to align with the processed + * signal. + */ + mFilter1Direct[0].process(Filter1Coeff, {LeftOut, SamplesToDo}, true, mTemp.data()+1); + mTemp[0] = mDirectDelay[0]; mDirectDelay[0] = mTemp[SamplesToDo]; + + /* Left = (S + D)/2.0 */ + float *RESTRICT left{al::assume_aligned<16>(LeftOut)}; + for(size_t i{0};i < SamplesToDo;i++) + left[i] = (mS[i] + mD[i])*0.5f + mTemp[i]; + + mFilter1Direct[1].process(Filter1Coeff, {RightOut, SamplesToDo}, true, mTemp.data()+1); + mTemp[0] = mDirectDelay[1]; mDirectDelay[1] = mTemp[SamplesToDo]; + + /* Right = (S - D)/2.0 */ + float *RESTRICT right{al::assume_aligned<16>(RightOut)}; + for(size_t i{0};i < SamplesToDo;i++) + right[i] = (mS[i] - mD[i])*0.5f + mTemp[i]; +} + + +/* Decoding UHJ is done as: + * + * S = Left + Right + * D = Left - Right + * + * W = 0.981532*S + 0.197484*j(0.828331*D + 0.767820*T) + * X = 0.418496*S - j(0.828331*D + 0.767820*T) + * Y = 0.795968*D - 0.676392*T + j(0.186633*S) + * Z = 1.023332*Q + * + * where j is a +90 degree phase shift. 3-channel UHJ excludes Q, while 2- + * channel excludes Q and T. + */ +template<size_t N> +void UhjDecoder<N>::decode(const al::span<float*> samples, const size_t samplesToDo, + const bool updateState) +{ + static_assert(sInputPadding <= sMaxPadding, "Filter padding is too large"); + + const auto &PShift = GetPhaseShifter<N>::Get(); + + ASSUME(samplesToDo > 0); + + { + const float *RESTRICT left{al::assume_aligned<16>(samples[0])}; + const float *RESTRICT right{al::assume_aligned<16>(samples[1])}; + const float *RESTRICT t{al::assume_aligned<16>(samples[2])}; + + /* S = Left + Right */ + for(size_t i{0};i < samplesToDo+sInputPadding;++i) + mS[i] = left[i] + right[i]; + + /* D = Left - Right */ + for(size_t i{0};i < samplesToDo+sInputPadding;++i) + mD[i] = left[i] - right[i]; + + /* T */ + for(size_t i{0};i < samplesToDo+sInputPadding;++i) + mT[i] = t[i]; + } + + float *RESTRICT woutput{al::assume_aligned<16>(samples[0])}; + float *RESTRICT xoutput{al::assume_aligned<16>(samples[1])}; + float *RESTRICT youtput{al::assume_aligned<16>(samples[2])}; + + /* Precompute j(0.828331*D + 0.767820*T) and store in xoutput. */ + auto tmpiter = std::copy(mDTHistory.cbegin(), mDTHistory.cend(), mTemp.begin()); + std::transform(mD.cbegin(), mD.cbegin()+samplesToDo+sInputPadding, mT.cbegin(), tmpiter, + [](const float d, const float t) noexcept { return 0.828331f*d + 0.767820f*t; }); + if(updateState) LIKELY + std::copy_n(mTemp.cbegin()+samplesToDo, mDTHistory.size(), mDTHistory.begin()); + PShift.process({xoutput, samplesToDo}, mTemp.data()); + + /* W = 0.981532*S + 0.197484*j(0.828331*D + 0.767820*T) */ + for(size_t i{0};i < samplesToDo;++i) + woutput[i] = 0.981532f*mS[i] + 0.197484f*xoutput[i]; + /* X = 0.418496*S - j(0.828331*D + 0.767820*T) */ + for(size_t i{0};i < samplesToDo;++i) + xoutput[i] = 0.418496f*mS[i] - xoutput[i]; + + /* Precompute j*S and store in youtput. */ + tmpiter = std::copy(mSHistory.cbegin(), mSHistory.cend(), mTemp.begin()); + std::copy_n(mS.cbegin(), samplesToDo+sInputPadding, tmpiter); + if(updateState) LIKELY + std::copy_n(mTemp.cbegin()+samplesToDo, mSHistory.size(), mSHistory.begin()); + PShift.process({youtput, samplesToDo}, mTemp.data()); + + /* Y = 0.795968*D - 0.676392*T + j(0.186633*S) */ + for(size_t i{0};i < samplesToDo;++i) + youtput[i] = 0.795968f*mD[i] - 0.676392f*mT[i] + 0.186633f*youtput[i]; + + if(samples.size() > 3) + { + float *RESTRICT zoutput{al::assume_aligned<16>(samples[3])}; + /* Z = 1.023332*Q */ + for(size_t i{0};i < samplesToDo;++i) + zoutput[i] = 1.023332f*zoutput[i]; + } +} + +void UhjDecoderIIR::decode(const al::span<float*> samples, const size_t samplesToDo, + const bool updateState) +{ + static_assert(sInputPadding <= sMaxPadding, "Filter padding is too large"); + + ASSUME(samplesToDo > 0); + + { + const float *RESTRICT left{al::assume_aligned<16>(samples[0])}; + const float *RESTRICT right{al::assume_aligned<16>(samples[1])}; + + /* S = Left + Right */ + for(size_t i{0};i < samplesToDo;++i) + mS[i] = left[i] + right[i]; + + /* D = Left - Right */ + for(size_t i{0};i < samplesToDo;++i) + mD[i] = left[i] - right[i]; + } + + float *RESTRICT woutput{al::assume_aligned<16>(samples[0])}; + float *RESTRICT xoutput{al::assume_aligned<16>(samples[1])}; + float *RESTRICT youtput{al::assume_aligned<16>(samples[2])}; + + /* Precompute j(0.828331*D + 0.767820*T) and store in xoutput. */ + std::transform(mD.cbegin(), mD.cbegin()+samplesToDo, youtput, mTemp.begin(), + [](const float d, const float t) noexcept { return 0.828331f*d + 0.767820f*t; }); + mFilter2DT.process(Filter2Coeff, {mTemp.data(), samplesToDo}, updateState, xoutput); + + /* Apply filter1 to S and store in mTemp. */ + mTemp[0] = mDelayS; + mFilter1S.process(Filter1Coeff, {mS.data(), samplesToDo}, updateState, mTemp.data()+1); + if(updateState) LIKELY mDelayS = mTemp[samplesToDo]; + + /* W = 0.981532*S + 0.197484*j(0.828331*D + 0.767820*T) */ + for(size_t i{0};i < samplesToDo;++i) + woutput[i] = 0.981532f*mTemp[i] + 0.197484f*xoutput[i]; + /* X = 0.418496*S - j(0.828331*D + 0.767820*T) */ + for(size_t i{0};i < samplesToDo;++i) + xoutput[i] = 0.418496f*mTemp[i] - xoutput[i]; + + + /* Apply filter1 to (0.795968*D - 0.676392*T) and store in mTemp. */ + std::transform(mD.cbegin(), mD.cbegin()+samplesToDo, youtput, youtput, + [](const float d, const float t) noexcept { return 0.795968f*d - 0.676392f*t; }); + mTemp[0] = mDelayDT; + mFilter1DT.process(Filter1Coeff, {youtput, samplesToDo}, updateState, mTemp.data()+1); + if(updateState) LIKELY mDelayDT = mTemp[samplesToDo]; + + /* Precompute j*S and store in youtput. */ + mFilter2S.process(Filter2Coeff, {mS.data(), samplesToDo}, updateState, youtput); + + /* Y = 0.795968*D - 0.676392*T + j(0.186633*S) */ + for(size_t i{0};i < samplesToDo;++i) + youtput[i] = mTemp[i] + 0.186633f*youtput[i]; + + + if(samples.size() > 3) + { + float *RESTRICT zoutput{al::assume_aligned<16>(samples[3])}; + + /* Apply filter1 to Q and store in mTemp. */ + mTemp[0] = mDelayQ; + mFilter1Q.process(Filter1Coeff, {zoutput, samplesToDo}, updateState, mTemp.data()+1); + if(updateState) LIKELY mDelayQ = mTemp[samplesToDo]; + + /* Z = 1.023332*Q */ + for(size_t i{0};i < samplesToDo;++i) + zoutput[i] = 1.023332f*mTemp[i]; + } +} + + +/* Super Stereo processing is done as: + * + * S = Left + Right + * D = Left - Right + * + * W = 0.6098637*S - 0.6896511*j*w*D + * X = 0.8624776*S + 0.7626955*j*w*D + * Y = 1.6822415*w*D - 0.2156194*j*S + * + * where j is a +90 degree phase shift. w is a variable control for the + * resulting stereo width, with the range 0 <= w <= 0.7. + */ +template<size_t N> +void UhjStereoDecoder<N>::decode(const al::span<float*> samples, const size_t samplesToDo, + const bool updateState) +{ + static_assert(sInputPadding <= sMaxPadding, "Filter padding is too large"); + + const auto &PShift = GetPhaseShifter<N>::Get(); + + ASSUME(samplesToDo > 0); + + { + const float *RESTRICT left{al::assume_aligned<16>(samples[0])}; + const float *RESTRICT right{al::assume_aligned<16>(samples[1])}; + + for(size_t i{0};i < samplesToDo+sInputPadding;++i) + mS[i] = left[i] + right[i]; + + /* Pre-apply the width factor to the difference signal D. Smoothly + * interpolate when it changes. + */ + const float wtarget{mWidthControl}; + const float wcurrent{(mCurrentWidth < 0.0f) ? wtarget : mCurrentWidth}; + if(wtarget == wcurrent || !updateState) + { + for(size_t i{0};i < samplesToDo+sInputPadding;++i) + mD[i] = (left[i] - right[i]) * wcurrent; + mCurrentWidth = wcurrent; + } + else + { + const float wstep{(wtarget - wcurrent) / static_cast<float>(samplesToDo)}; + float fi{0.0f}; + for(size_t i{0};i < samplesToDo;++i) + { + mD[i] = (left[i] - right[i]) * (wcurrent + wstep*fi); + fi += 1.0f; + } + for(size_t i{samplesToDo};i < samplesToDo+sInputPadding;++i) + mD[i] = (left[i] - right[i]) * wtarget; + mCurrentWidth = wtarget; + } + } + + float *RESTRICT woutput{al::assume_aligned<16>(samples[0])}; + float *RESTRICT xoutput{al::assume_aligned<16>(samples[1])}; + float *RESTRICT youtput{al::assume_aligned<16>(samples[2])}; + + /* Precompute j*D and store in xoutput. */ + auto tmpiter = std::copy(mDTHistory.cbegin(), mDTHistory.cend(), mTemp.begin()); + std::copy_n(mD.cbegin(), samplesToDo+sInputPadding, tmpiter); + if(updateState) LIKELY + std::copy_n(mTemp.cbegin()+samplesToDo, mDTHistory.size(), mDTHistory.begin()); + PShift.process({xoutput, samplesToDo}, mTemp.data()); + + /* W = 0.6098637*S - 0.6896511*j*w*D */ + for(size_t i{0};i < samplesToDo;++i) + woutput[i] = 0.6098637f*mS[i] - 0.6896511f*xoutput[i]; + /* X = 0.8624776*S + 0.7626955*j*w*D */ + for(size_t i{0};i < samplesToDo;++i) + xoutput[i] = 0.8624776f*mS[i] + 0.7626955f*xoutput[i]; + + /* Precompute j*S and store in youtput. */ + tmpiter = std::copy(mSHistory.cbegin(), mSHistory.cend(), mTemp.begin()); + std::copy_n(mS.cbegin(), samplesToDo+sInputPadding, tmpiter); + if(updateState) LIKELY + std::copy_n(mTemp.cbegin()+samplesToDo, mSHistory.size(), mSHistory.begin()); + PShift.process({youtput, samplesToDo}, mTemp.data()); + + /* Y = 1.6822415*w*D - 0.2156194*j*S */ + for(size_t i{0};i < samplesToDo;++i) + youtput[i] = 1.6822415f*mD[i] - 0.2156194f*youtput[i]; +} + +void UhjStereoDecoderIIR::decode(const al::span<float*> samples, const size_t samplesToDo, + const bool updateState) +{ + static_assert(sInputPadding <= sMaxPadding, "Filter padding is too large"); + + ASSUME(samplesToDo > 0); + + { + const float *RESTRICT left{al::assume_aligned<16>(samples[0])}; + const float *RESTRICT right{al::assume_aligned<16>(samples[1])}; + + for(size_t i{0};i < samplesToDo;++i) + mS[i] = left[i] + right[i]; + + /* Pre-apply the width factor to the difference signal D. Smoothly + * interpolate when it changes. + */ + const float wtarget{mWidthControl}; + const float wcurrent{(mCurrentWidth < 0.0f) ? wtarget : mCurrentWidth}; + if(wtarget == wcurrent || !updateState) + { + for(size_t i{0};i < samplesToDo;++i) + mD[i] = (left[i] - right[i]) * wcurrent; + mCurrentWidth = wcurrent; + } + else + { + const float wstep{(wtarget - wcurrent) / static_cast<float>(samplesToDo)}; + float fi{0.0f}; + for(size_t i{0};i < samplesToDo;++i) + { + mD[i] = (left[i] - right[i]) * (wcurrent + wstep*fi); + fi += 1.0f; + } + mCurrentWidth = wtarget; + } + } + + float *RESTRICT woutput{al::assume_aligned<16>(samples[0])}; + float *RESTRICT xoutput{al::assume_aligned<16>(samples[1])}; + float *RESTRICT youtput{al::assume_aligned<16>(samples[2])}; + + /* Apply filter1 to S and store in mTemp. */ + mTemp[0] = mDelayS; + mFilter1S.process(Filter1Coeff, {mS.data(), samplesToDo}, updateState, mTemp.data()+1); + if(updateState) LIKELY mDelayS = mTemp[samplesToDo]; + + /* Precompute j*D and store in xoutput. */ + mFilter2D.process(Filter2Coeff, {mD.data(), samplesToDo}, updateState, xoutput); + + /* W = 0.6098637*S - 0.6896511*j*w*D */ + for(size_t i{0};i < samplesToDo;++i) + woutput[i] = 0.6098637f*mTemp[i] - 0.6896511f*xoutput[i]; + /* X = 0.8624776*S + 0.7626955*j*w*D */ + for(size_t i{0};i < samplesToDo;++i) + xoutput[i] = 0.8624776f*mTemp[i] + 0.7626955f*xoutput[i]; + + /* Precompute j*S and store in youtput. */ + mFilter2S.process(Filter2Coeff, {mS.data(), samplesToDo}, updateState, youtput); + + /* Apply filter1 to D and store in mTemp. */ + mTemp[0] = mDelayD; + mFilter1D.process(Filter1Coeff, {mD.data(), samplesToDo}, updateState, mTemp.data()+1); + if(updateState) LIKELY mDelayD = mTemp[samplesToDo]; + + /* Y = 1.6822415*w*D - 0.2156194*j*S */ + for(size_t i{0};i < samplesToDo;++i) + youtput[i] = 1.6822415f*mTemp[i] - 0.2156194f*youtput[i]; +} + + +template struct UhjEncoder<UhjLength256>; +template struct UhjDecoder<UhjLength256>; +template struct UhjStereoDecoder<UhjLength256>; + +template struct UhjEncoder<UhjLength512>; +template struct UhjDecoder<UhjLength512>; +template struct UhjStereoDecoder<UhjLength512>; diff --git a/core/uhjfilter.h b/core/uhjfilter.h new file mode 100644 index 00000000..df308094 --- /dev/null +++ b/core/uhjfilter.h @@ -0,0 +1,234 @@ +#ifndef CORE_UHJFILTER_H +#define CORE_UHJFILTER_H + +#include <array> + +#include "almalloc.h" +#include "alspan.h" +#include "bufferline.h" + + +static constexpr size_t UhjLength256{256}; +static constexpr size_t UhjLength512{512}; + +enum class UhjQualityType : uint8_t { + IIR = 0, + FIR256, + FIR512, + Default = IIR +}; + +extern UhjQualityType UhjDecodeQuality; +extern UhjQualityType UhjEncodeQuality; + + +struct UhjAllPassFilter { + struct AllPassState { + /* Last two delayed components for direct form II. */ + float z[2]; + }; + std::array<AllPassState,4> mState; + + void process(const al::span<const float,4> coeffs, const al::span<const float> src, + const bool update, float *RESTRICT dst); +}; + + +struct UhjEncoderBase { + virtual ~UhjEncoderBase() = default; + + virtual size_t getDelay() noexcept = 0; + + /** + * Encodes a 2-channel UHJ (stereo-compatible) signal from a B-Format input + * signal. The input must use FuMa channel ordering and UHJ scaling (FuMa + * with an additional +3dB boost). + */ + virtual void encode(float *LeftOut, float *RightOut, + const al::span<const float*const,3> InSamples, const size_t SamplesToDo) = 0; +}; + +template<size_t N> +struct UhjEncoder final : public UhjEncoderBase { + static constexpr size_t sFilterDelay{N/2}; + + /* Delays and processing storage for the input signal. */ + alignas(16) std::array<float,BufferLineSize+sFilterDelay> mW{}; + alignas(16) std::array<float,BufferLineSize+sFilterDelay> mX{}; + alignas(16) std::array<float,BufferLineSize+sFilterDelay> mY{}; + + alignas(16) std::array<float,BufferLineSize> mS{}; + alignas(16) std::array<float,BufferLineSize> mD{}; + + /* History and temp storage for the FIR filter. New samples should be + * written to index sFilterDelay*2 - 1. + */ + static constexpr size_t sWXInOffset{sFilterDelay*2 - 1}; + alignas(16) std::array<float,BufferLineSize + sFilterDelay*2> mWX{}; + + alignas(16) std::array<std::array<float,sFilterDelay>,2> mDirectDelay{}; + + size_t getDelay() noexcept override { return sFilterDelay; } + + /** + * Encodes a 2-channel UHJ (stereo-compatible) signal from a B-Format input + * signal. The input must use FuMa channel ordering and UHJ scaling (FuMa + * with an additional +3dB boost). + */ + void encode(float *LeftOut, float *RightOut, const al::span<const float*const,3> InSamples, + const size_t SamplesToDo) override; + + DEF_NEWDEL(UhjEncoder) +}; + +struct UhjEncoderIIR final : public UhjEncoderBase { + static constexpr size_t sFilterDelay{1}; + + /* Processing storage for the input signal. */ + alignas(16) std::array<float,BufferLineSize+1> mS{}; + alignas(16) std::array<float,BufferLineSize+1> mD{}; + alignas(16) std::array<float,BufferLineSize+sFilterDelay> mWX{}; + alignas(16) std::array<float,BufferLineSize+sFilterDelay> mTemp{}; + float mDelayWX{}, mDelayY{}; + + UhjAllPassFilter mFilter1WX; + UhjAllPassFilter mFilter2WX; + UhjAllPassFilter mFilter1Y; + + std::array<UhjAllPassFilter,2> mFilter1Direct; + std::array<float,2> mDirectDelay{}; + + size_t getDelay() noexcept override { return sFilterDelay; } + + /** + * Encodes a 2-channel UHJ (stereo-compatible) signal from a B-Format input + * signal. The input must use FuMa channel ordering and UHJ scaling (FuMa + * with an additional +3dB boost). + */ + void encode(float *LeftOut, float *RightOut, const al::span<const float*const,3> InSamples, + const size_t SamplesToDo) override; + + DEF_NEWDEL(UhjEncoderIIR) +}; + + +struct DecoderBase { + static constexpr size_t sMaxPadding{256}; + + /* For 2-channel UHJ, shelf filters should use these LF responses. */ + static constexpr float sWLFScale{0.661f}; + static constexpr float sXYLFScale{1.293f}; + + virtual ~DecoderBase() = default; + + virtual void decode(const al::span<float*> samples, const size_t samplesToDo, + const bool updateState) = 0; + + /** + * The width factor for Super Stereo processing. Can be changed in between + * calls to decode, with valid values being between 0...0.7. + */ + float mWidthControl{0.593f}; +}; + +template<size_t N> +struct UhjDecoder final : public DecoderBase { + /* The number of extra sample frames needed for input. */ + static constexpr size_t sInputPadding{N/2}; + + alignas(16) std::array<float,BufferLineSize+sInputPadding> mS{}; + alignas(16) std::array<float,BufferLineSize+sInputPadding> mD{}; + alignas(16) std::array<float,BufferLineSize+sInputPadding> mT{}; + + alignas(16) std::array<float,sInputPadding-1> mDTHistory{}; + alignas(16) std::array<float,sInputPadding-1> mSHistory{}; + + alignas(16) std::array<float,BufferLineSize + sInputPadding*2> mTemp{}; + + /** + * Decodes a 3- or 4-channel UHJ signal into a B-Format signal with FuMa + * channel ordering and UHJ scaling. For 3-channel, the 3rd channel may be + * attenuated by 'n', where 0 <= n <= 1. So to decode 2-channel UHJ, supply + * 3 channels with the 3rd channel silent (n=0). The B-Format signal + * reconstructed from 2-channel UHJ should not be run through a normal + * B-Format decoder, as it needs different shelf filters. + */ + void decode(const al::span<float*> samples, const size_t samplesToDo, + const bool updateState) override; + + DEF_NEWDEL(UhjDecoder) +}; + +struct UhjDecoderIIR final : public DecoderBase { + /* FIXME: These IIR decoder filters actually have a 1-sample delay on the + * non-filtered components, which is not reflected in the source latency + * value. sInputPadding is 0, however, because it doesn't need any extra + * input samples. + */ + static constexpr size_t sInputPadding{0}; + + alignas(16) std::array<float,BufferLineSize> mS{}; + alignas(16) std::array<float,BufferLineSize> mD{}; + alignas(16) std::array<float,BufferLineSize+1> mTemp{}; + float mDelayS{}, mDelayDT{}, mDelayQ{}; + + UhjAllPassFilter mFilter1S; + UhjAllPassFilter mFilter2DT; + UhjAllPassFilter mFilter1DT; + UhjAllPassFilter mFilter2S; + UhjAllPassFilter mFilter1Q; + + void decode(const al::span<float*> samples, const size_t samplesToDo, + const bool updateState) override; + + DEF_NEWDEL(UhjDecoderIIR) +}; + +template<size_t N> +struct UhjStereoDecoder final : public DecoderBase { + static constexpr size_t sInputPadding{N/2}; + + float mCurrentWidth{-1.0f}; + + alignas(16) std::array<float,BufferLineSize+sInputPadding> mS{}; + alignas(16) std::array<float,BufferLineSize+sInputPadding> mD{}; + + alignas(16) std::array<float,sInputPadding-1> mDTHistory{}; + alignas(16) std::array<float,sInputPadding-1> mSHistory{}; + + alignas(16) std::array<float,BufferLineSize + sInputPadding*2> mTemp{}; + + /** + * Applies Super Stereo processing on a stereo signal to create a B-Format + * signal with FuMa channel ordering and UHJ scaling. The samples span + * should contain 3 channels, the first two being the left and right stereo + * channels, and the third left empty. + */ + void decode(const al::span<float*> samples, const size_t samplesToDo, + const bool updateState) override; + + DEF_NEWDEL(UhjStereoDecoder) +}; + +struct UhjStereoDecoderIIR final : public DecoderBase { + static constexpr size_t sInputPadding{0}; + + float mCurrentWidth{-1.0f}; + + alignas(16) std::array<float,BufferLineSize> mS{}; + alignas(16) std::array<float,BufferLineSize> mD{}; + alignas(16) std::array<float,BufferLineSize+1> mTemp{}; + float mDelayS{}, mDelayD{}; + + UhjAllPassFilter mFilter1S; + UhjAllPassFilter mFilter2D; + UhjAllPassFilter mFilter1D; + UhjAllPassFilter mFilter2S; + + void decode(const al::span<float*> samples, const size_t samplesToDo, + const bool updateState) override; + + DEF_NEWDEL(UhjStereoDecoderIIR) +}; + +#endif /* CORE_UHJFILTER_H */ diff --git a/core/uiddefs.cpp b/core/uiddefs.cpp new file mode 100644 index 00000000..244c01a5 --- /dev/null +++ b/core/uiddefs.cpp @@ -0,0 +1,37 @@ + +#include "config.h" + + +#ifndef AL_NO_UID_DEFS + +#if defined(HAVE_GUIDDEF_H) || defined(HAVE_INITGUID_H) +#define INITGUID +#include <windows.h> +#ifdef HAVE_GUIDDEF_H +#include <guiddef.h> +#else +#include <initguid.h> +#endif + +DEFINE_GUID(KSDATAFORMAT_SUBTYPE_PCM, 0x00000001, 0x0000, 0x0010, 0x80,0x00, 0x00,0xaa,0x00,0x38,0x9b,0x71); +DEFINE_GUID(KSDATAFORMAT_SUBTYPE_IEEE_FLOAT, 0x00000003, 0x0000, 0x0010, 0x80,0x00, 0x00,0xaa,0x00,0x38,0x9b,0x71); + +DEFINE_GUID(IID_IDirectSoundNotify, 0xb0210783, 0x89cd, 0x11d0, 0xaf,0x08, 0x00,0xa0,0xc9,0x25,0xcd,0x16); + +DEFINE_GUID(CLSID_MMDeviceEnumerator, 0xbcde0395, 0xe52f, 0x467c, 0x8e,0x3d, 0xc4,0x57,0x92,0x91,0x69,0x2e); +DEFINE_GUID(IID_IMMDeviceEnumerator, 0xa95664d2, 0x9614, 0x4f35, 0xa7,0x46, 0xde,0x8d,0xb6,0x36,0x17,0xe6); +DEFINE_GUID(IID_IAudioClient, 0x1cb9ad4c, 0xdbfa, 0x4c32, 0xb1,0x78, 0xc2,0xf5,0x68,0xa7,0x03,0xb2); +DEFINE_GUID(IID_IAudioRenderClient, 0xf294acfc, 0x3146, 0x4483, 0xa7,0xbf, 0xad,0xdc,0xa7,0xc2,0x60,0xe2); +DEFINE_GUID(IID_IAudioCaptureClient, 0xc8adbd64, 0xe71e, 0x48a0, 0xa4,0xde, 0x18,0x5c,0x39,0x5c,0xd3,0x17); + +#ifdef HAVE_WASAPI +#include <wtypes.h> +#include <devpropdef.h> +#include <propkeydef.h> +DEFINE_DEVPROPKEY(DEVPKEY_Device_FriendlyName, 0xa45c254e, 0xdf1c, 0x4efd, 0x80,0x20, 0x67,0xd1,0x46,0xa8,0x50,0xe0, 14); +DEFINE_PROPERTYKEY(PKEY_AudioEndpoint_FormFactor, 0x1da5d803, 0xd492, 0x4edd, 0x8c,0x23, 0xe0,0xc0,0xff,0xee,0x7f,0x0e, 0); +DEFINE_PROPERTYKEY(PKEY_AudioEndpoint_GUID, 0x1da5d803, 0xd492, 0x4edd, 0x8c, 0x23,0xe0, 0xc0,0xff,0xee,0x7f,0x0e, 4 ); +#endif +#endif + +#endif /* AL_NO_UID_DEFS */ diff --git a/core/voice.cpp b/core/voice.cpp new file mode 100644 index 00000000..e8fbcccd --- /dev/null +++ b/core/voice.cpp @@ -0,0 +1,1304 @@ + +#include "config.h" + +#include "voice.h" + +#include <algorithm> +#include <array> +#include <atomic> +#include <cassert> +#include <climits> +#include <cstdint> +#include <iterator> +#include <memory> +#include <new> +#include <stdlib.h> +#include <utility> +#include <vector> + +#include "albyte.h" +#include "alnumeric.h" +#include "aloptional.h" +#include "alspan.h" +#include "alstring.h" +#include "ambidefs.h" +#include "async_event.h" +#include "buffer_storage.h" +#include "context.h" +#include "cpu_caps.h" +#include "devformat.h" +#include "device.h" +#include "filters/biquad.h" +#include "filters/nfc.h" +#include "filters/splitter.h" +#include "fmt_traits.h" +#include "logging.h" +#include "mixer.h" +#include "mixer/defs.h" +#include "mixer/hrtfdefs.h" +#include "opthelpers.h" +#include "resampler_limits.h" +#include "ringbuffer.h" +#include "vector.h" +#include "voice_change.h" + +struct CTag; +#ifdef HAVE_SSE +struct SSETag; +#endif +#ifdef HAVE_NEON +struct NEONTag; +#endif + + +static_assert(!(sizeof(DeviceBase::MixerBufferLine)&15), + "DeviceBase::MixerBufferLine must be a multiple of 16 bytes"); +static_assert(!(MaxResamplerEdge&3), "MaxResamplerEdge is not a multiple of 4"); + +static_assert((BufferLineSize-1)/MaxPitch > 0, "MaxPitch is too large for BufferLineSize!"); +static_assert((INT_MAX>>MixerFracBits)/MaxPitch > BufferLineSize, + "MaxPitch and/or BufferLineSize are too large for MixerFracBits!"); + +Resampler ResamplerDefault{Resampler::Cubic}; + +namespace { + +using uint = unsigned int; +using namespace std::chrono; + +using HrtfMixerFunc = void(*)(const float *InSamples, float2 *AccumSamples, const uint IrSize, + const MixHrtfFilter *hrtfparams, const size_t BufferSize); +using HrtfMixerBlendFunc = void(*)(const float *InSamples, float2 *AccumSamples, + const uint IrSize, const HrtfFilter *oldparams, const MixHrtfFilter *newparams, + const size_t BufferSize); + +HrtfMixerFunc MixHrtfSamples{MixHrtf_<CTag>}; +HrtfMixerBlendFunc MixHrtfBlendSamples{MixHrtfBlend_<CTag>}; + +inline MixerOutFunc SelectMixer() +{ +#ifdef HAVE_NEON + if((CPUCapFlags&CPU_CAP_NEON)) + return Mix_<NEONTag>; +#endif +#ifdef HAVE_SSE + if((CPUCapFlags&CPU_CAP_SSE)) + return Mix_<SSETag>; +#endif + return Mix_<CTag>; +} + +inline MixerOneFunc SelectMixerOne() +{ +#ifdef HAVE_NEON + if((CPUCapFlags&CPU_CAP_NEON)) + return Mix_<NEONTag>; +#endif +#ifdef HAVE_SSE + if((CPUCapFlags&CPU_CAP_SSE)) + return Mix_<SSETag>; +#endif + return Mix_<CTag>; +} + +inline HrtfMixerFunc SelectHrtfMixer() +{ +#ifdef HAVE_NEON + if((CPUCapFlags&CPU_CAP_NEON)) + return MixHrtf_<NEONTag>; +#endif +#ifdef HAVE_SSE + if((CPUCapFlags&CPU_CAP_SSE)) + return MixHrtf_<SSETag>; +#endif + return MixHrtf_<CTag>; +} + +inline HrtfMixerBlendFunc SelectHrtfBlendMixer() +{ +#ifdef HAVE_NEON + if((CPUCapFlags&CPU_CAP_NEON)) + return MixHrtfBlend_<NEONTag>; +#endif +#ifdef HAVE_SSE + if((CPUCapFlags&CPU_CAP_SSE)) + return MixHrtfBlend_<SSETag>; +#endif + return MixHrtfBlend_<CTag>; +} + +} // namespace + +void Voice::InitMixer(al::optional<std::string> resampler) +{ + if(resampler) + { + struct ResamplerEntry { + const char name[16]; + const Resampler resampler; + }; + constexpr ResamplerEntry ResamplerList[]{ + { "none", Resampler::Point }, + { "point", Resampler::Point }, + { "linear", Resampler::Linear }, + { "cubic", Resampler::Cubic }, + { "bsinc12", Resampler::BSinc12 }, + { "fast_bsinc12", Resampler::FastBSinc12 }, + { "bsinc24", Resampler::BSinc24 }, + { "fast_bsinc24", Resampler::FastBSinc24 }, + }; + + const char *str{resampler->c_str()}; + if(al::strcasecmp(str, "bsinc") == 0) + { + WARN("Resampler option \"%s\" is deprecated, using bsinc12\n", str); + str = "bsinc12"; + } + else if(al::strcasecmp(str, "sinc4") == 0 || al::strcasecmp(str, "sinc8") == 0) + { + WARN("Resampler option \"%s\" is deprecated, using cubic\n", str); + str = "cubic"; + } + + auto iter = std::find_if(std::begin(ResamplerList), std::end(ResamplerList), + [str](const ResamplerEntry &entry) -> bool + { return al::strcasecmp(str, entry.name) == 0; }); + if(iter == std::end(ResamplerList)) + ERR("Invalid resampler: %s\n", str); + else + ResamplerDefault = iter->resampler; + } + + MixSamplesOut = SelectMixer(); + MixSamplesOne = SelectMixerOne(); + MixHrtfBlendSamples = SelectHrtfBlendMixer(); + MixHrtfSamples = SelectHrtfMixer(); +} + + +namespace { + +/* IMA ADPCM Stepsize table */ +constexpr int IMAStep_size[89] = { + 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 19, + 21, 23, 25, 28, 31, 34, 37, 41, 45, 50, 55, + 60, 66, 73, 80, 88, 97, 107, 118, 130, 143, 157, + 173, 190, 209, 230, 253, 279, 307, 337, 371, 408, 449, + 494, 544, 598, 658, 724, 796, 876, 963, 1060, 1166, 1282, + 1411, 1552, 1707, 1878, 2066, 2272, 2499, 2749, 3024, 3327, 3660, + 4026, 4428, 4871, 5358, 5894, 6484, 7132, 7845, 8630, 9493,10442, + 11487,12635,13899,15289,16818,18500,20350,22358,24633,27086,29794, + 32767 +}; + +/* IMA4 ADPCM Codeword decode table */ +constexpr int IMA4Codeword[16] = { + 1, 3, 5, 7, 9, 11, 13, 15, + -1,-3,-5,-7,-9,-11,-13,-15, +}; + +/* IMA4 ADPCM Step index adjust decode table */ +constexpr int IMA4Index_adjust[16] = { + -1,-1,-1,-1, 2, 4, 6, 8, + -1,-1,-1,-1, 2, 4, 6, 8 +}; + +/* MSADPCM Adaption table */ +constexpr int MSADPCMAdaption[16] = { + 230, 230, 230, 230, 307, 409, 512, 614, + 768, 614, 512, 409, 307, 230, 230, 230 +}; + +/* MSADPCM Adaption Coefficient tables */ +constexpr int MSADPCMAdaptionCoeff[7][2] = { + { 256, 0 }, + { 512, -256 }, + { 0, 0 }, + { 192, 64 }, + { 240, 0 }, + { 460, -208 }, + { 392, -232 } +}; + + +void SendSourceStoppedEvent(ContextBase *context, uint id) +{ + RingBuffer *ring{context->mAsyncEvents.get()}; + auto evt_vec = ring->getWriteVector(); + if(evt_vec.first.len < 1) return; + + AsyncEvent *evt{al::construct_at(reinterpret_cast<AsyncEvent*>(evt_vec.first.buf), + AsyncEvent::SourceStateChange)}; + evt->u.srcstate.id = id; + evt->u.srcstate.state = AsyncEvent::SrcState::Stop; + + ring->writeAdvance(1); +} + + +const float *DoFilters(BiquadFilter &lpfilter, BiquadFilter &hpfilter, float *dst, + const al::span<const float> src, int type) +{ + switch(type) + { + case AF_None: + lpfilter.clear(); + hpfilter.clear(); + break; + + case AF_LowPass: + lpfilter.process(src, dst); + hpfilter.clear(); + return dst; + case AF_HighPass: + lpfilter.clear(); + hpfilter.process(src, dst); + return dst; + + case AF_BandPass: + DualBiquad{lpfilter, hpfilter}.process(src, dst); + return dst; + } + return src.data(); +} + + +template<FmtType Type> +inline void LoadSamples(float *RESTRICT dstSamples, const al::byte *src, const size_t srcChan, + const size_t srcOffset, const size_t srcStep, const size_t /*samplesPerBlock*/, + const size_t samplesToLoad) noexcept +{ + constexpr size_t sampleSize{sizeof(typename al::FmtTypeTraits<Type>::Type)}; + auto s = src + (srcOffset*srcStep + srcChan)*sampleSize; + + al::LoadSampleArray<Type>(dstSamples, s, srcStep, samplesToLoad); +} + +template<> +inline void LoadSamples<FmtIMA4>(float *RESTRICT dstSamples, const al::byte *src, + const size_t srcChan, const size_t srcOffset, const size_t srcStep, + const size_t samplesPerBlock, const size_t samplesToLoad) noexcept +{ + const size_t blockBytes{((samplesPerBlock-1)/2 + 4)*srcStep}; + + /* Skip to the ADPCM block containing the srcOffset sample. */ + src += srcOffset/samplesPerBlock*blockBytes; + /* Calculate how many samples need to be skipped in the block. */ + size_t skip{srcOffset % samplesPerBlock}; + + /* NOTE: This could probably be optimized better. */ + size_t wrote{0}; + do { + /* Each IMA4 block starts with a signed 16-bit sample, and a signed + * 16-bit table index. The table index needs to be clamped. + */ + int sample{src[srcChan*4] | (src[srcChan*4 + 1] << 8)}; + int index{src[srcChan*4 + 2] | (src[srcChan*4 + 3] << 8)}; + + sample = (sample^0x8000) - 32768; + index = clampi((index^0x8000) - 32768, 0, al::size(IMAStep_size)-1); + + if(skip == 0) + { + dstSamples[wrote++] = static_cast<float>(sample) / 32768.0f; + if(wrote == samplesToLoad) return; + } + else + --skip; + + auto decode_sample = [&sample,&index](const uint nibble) + { + sample += IMA4Codeword[nibble] * IMAStep_size[index] / 8; + sample = clampi(sample, -32768, 32767); + + index += IMA4Index_adjust[nibble]; + index = clampi(index, 0, al::size(IMAStep_size)-1); + + return sample; + }; + + /* The rest of the block is arranged as a series of nibbles, contained + * in 4 *bytes* per channel interleaved. So every 8 nibbles we need to + * skip 4 bytes per channel to get the next nibbles for this channel. + * + * First, decode the samples that we need to skip in the block (will + * always be less than the block size). They need to be decoded despite + * being ignored for proper state on the remaining samples. + */ + const al::byte *nibbleData{src + (srcStep+srcChan)*4}; + size_t nibbleOffset{0}; + const size_t startOffset{skip + 1}; + for(;skip;--skip) + { + const size_t byteShift{(nibbleOffset&1) * 4}; + const size_t wordOffset{(nibbleOffset>>1) & ~size_t{3}}; + const size_t byteOffset{wordOffset*srcStep + ((nibbleOffset>>1)&3u)}; + ++nibbleOffset; + + std::ignore = decode_sample((nibbleData[byteOffset]>>byteShift) & 15u); + } + + /* Second, decode the rest of the block and write to the output, until + * the end of the block or the end of output. + */ + const size_t todo{minz(samplesPerBlock-startOffset, samplesToLoad-wrote)}; + for(size_t i{0};i < todo;++i) + { + const size_t byteShift{(nibbleOffset&1) * 4}; + const size_t wordOffset{(nibbleOffset>>1) & ~size_t{3}}; + const size_t byteOffset{wordOffset*srcStep + ((nibbleOffset>>1)&3u)}; + ++nibbleOffset; + + const int result{decode_sample((nibbleData[byteOffset]>>byteShift) & 15u)}; + dstSamples[wrote++] = static_cast<float>(result) / 32768.0f; + } + if(wrote == samplesToLoad) + return; + + src += blockBytes; + } while(true); +} + +template<> +inline void LoadSamples<FmtMSADPCM>(float *RESTRICT dstSamples, const al::byte *src, + const size_t srcChan, const size_t srcOffset, const size_t srcStep, + const size_t samplesPerBlock, const size_t samplesToLoad) noexcept +{ + const size_t blockBytes{((samplesPerBlock-2)/2 + 7)*srcStep}; + + src += srcOffset/samplesPerBlock*blockBytes; + size_t skip{srcOffset % samplesPerBlock}; + + size_t wrote{0}; + do { + /* Each MS ADPCM block starts with an 8-bit block predictor, used to + * dictate how the two sample history values are mixed with the decoded + * sample, and an initial signed 16-bit delta value which scales the + * nibble sample value. This is followed by the two initial 16-bit + * sample history values. + */ + const al::byte *input{src}; + const uint8_t blockpred{std::min(input[srcChan], uint8_t{6})}; + input += srcStep; + int delta{input[2*srcChan + 0] | (input[2*srcChan + 1] << 8)}; + input += srcStep*2; + + int sampleHistory[2]{}; + sampleHistory[0] = input[2*srcChan + 0] | (input[2*srcChan + 1]<<8); + input += srcStep*2; + sampleHistory[1] = input[2*srcChan + 0] | (input[2*srcChan + 1]<<8); + input += srcStep*2; + + const auto coeffs = al::as_span(MSADPCMAdaptionCoeff[blockpred]); + delta = (delta^0x8000) - 32768; + sampleHistory[0] = (sampleHistory[0]^0x8000) - 32768; + sampleHistory[1] = (sampleHistory[1]^0x8000) - 32768; + + /* The second history sample is "older", so it's the first to be + * written out. + */ + if(skip == 0) + { + dstSamples[wrote++] = static_cast<float>(sampleHistory[1]) / 32768.0f; + if(wrote == samplesToLoad) return; + dstSamples[wrote++] = static_cast<float>(sampleHistory[0]) / 32768.0f; + if(wrote == samplesToLoad) return; + } + else if(skip == 1) + { + --skip; + dstSamples[wrote++] = static_cast<float>(sampleHistory[0]) / 32768.0f; + if(wrote == samplesToLoad) return; + } + else + skip -= 2; + + auto decode_sample = [&sampleHistory,&delta,coeffs](const int nibble) + { + int pred{(sampleHistory[0]*coeffs[0] + sampleHistory[1]*coeffs[1]) / 256}; + pred += ((nibble^0x08) - 0x08) * delta; + pred = clampi(pred, -32768, 32767); + + sampleHistory[1] = sampleHistory[0]; + sampleHistory[0] = pred; + + delta = (MSADPCMAdaption[nibble] * delta) / 256; + delta = maxi(16, delta); + + return pred; + }; + + /* The rest of the block is a series of nibbles, interleaved per- + * channel. First, skip samples. + */ + const size_t startOffset{skip + 2}; + size_t nibbleOffset{srcChan}; + for(;skip;--skip) + { + const size_t byteOffset{nibbleOffset>>1}; + const size_t byteShift{((nibbleOffset&1)^1) * 4}; + nibbleOffset += srcStep; + + std::ignore = decode_sample((input[byteOffset]>>byteShift) & 15); + } + + /* Now decode the rest of the block, until the end of the block or the + * dst buffer is filled. + */ + const size_t todo{minz(samplesPerBlock-startOffset, samplesToLoad-wrote)}; + for(size_t j{0};j < todo;++j) + { + const size_t byteOffset{nibbleOffset>>1}; + const size_t byteShift{((nibbleOffset&1)^1) * 4}; + nibbleOffset += srcStep; + + const int sample{decode_sample((input[byteOffset]>>byteShift) & 15)}; + dstSamples[wrote++] = static_cast<float>(sample) / 32768.0f; + } + if(wrote == samplesToLoad) + return; + + src += blockBytes; + } while(true); +} + +void LoadSamples(float *dstSamples, const al::byte *src, const size_t srcChan, + const size_t srcOffset, const FmtType srcType, const size_t srcStep, + const size_t samplesPerBlock, const size_t samplesToLoad) noexcept +{ +#define HANDLE_FMT(T) case T: \ + LoadSamples<T>(dstSamples, src, srcChan, srcOffset, srcStep, \ + samplesPerBlock, samplesToLoad); \ + break + + switch(srcType) + { + HANDLE_FMT(FmtUByte); + HANDLE_FMT(FmtShort); + HANDLE_FMT(FmtFloat); + HANDLE_FMT(FmtDouble); + HANDLE_FMT(FmtMulaw); + HANDLE_FMT(FmtAlaw); + HANDLE_FMT(FmtIMA4); + HANDLE_FMT(FmtMSADPCM); + } +#undef HANDLE_FMT +} + +void LoadBufferStatic(VoiceBufferItem *buffer, VoiceBufferItem *bufferLoopItem, + const size_t dataPosInt, const FmtType sampleType, const size_t srcChannel, + const size_t srcStep, size_t samplesLoaded, const size_t samplesToLoad, + float *voiceSamples) +{ + if(!bufferLoopItem) + { + /* Load what's left to play from the buffer */ + if(buffer->mSampleLen > dataPosInt) LIKELY + { + const size_t buffer_remaining{buffer->mSampleLen - dataPosInt}; + const size_t remaining{minz(samplesToLoad-samplesLoaded, buffer_remaining)}; + LoadSamples(voiceSamples+samplesLoaded, buffer->mSamples, srcChannel, dataPosInt, + sampleType, srcStep, buffer->mBlockAlign, remaining); + samplesLoaded += remaining; + } + + if(const size_t toFill{samplesToLoad - samplesLoaded}) + { + auto srcsamples = voiceSamples + samplesLoaded; + std::fill_n(srcsamples, toFill, *(srcsamples-1)); + } + } + else + { + const size_t loopStart{buffer->mLoopStart}; + const size_t loopEnd{buffer->mLoopEnd}; + ASSUME(loopEnd > loopStart); + + const size_t intPos{(dataPosInt < loopEnd) ? dataPosInt + : (((dataPosInt-loopStart)%(loopEnd-loopStart)) + loopStart)}; + + /* Load what's left of this loop iteration */ + const size_t remaining{minz(samplesToLoad-samplesLoaded, loopEnd-dataPosInt)}; + LoadSamples(voiceSamples+samplesLoaded, buffer->mSamples, srcChannel, intPos, sampleType, + srcStep, buffer->mBlockAlign, remaining); + samplesLoaded += remaining; + + /* Load repeats of the loop to fill the buffer. */ + const size_t loopSize{loopEnd - loopStart}; + while(const size_t toFill{minz(samplesToLoad - samplesLoaded, loopSize)}) + { + LoadSamples(voiceSamples+samplesLoaded, buffer->mSamples, srcChannel, loopStart, + sampleType, srcStep, buffer->mBlockAlign, toFill); + samplesLoaded += toFill; + } + } +} + +void LoadBufferCallback(VoiceBufferItem *buffer, const size_t dataPosInt, + const size_t numCallbackSamples, const FmtType sampleType, const size_t srcChannel, + const size_t srcStep, size_t samplesLoaded, const size_t samplesToLoad, float *voiceSamples) +{ + /* Load what's left to play from the buffer */ + if(numCallbackSamples > dataPosInt) LIKELY + { + const size_t remaining{minz(samplesToLoad-samplesLoaded, numCallbackSamples-dataPosInt)}; + LoadSamples(voiceSamples+samplesLoaded, buffer->mSamples, srcChannel, dataPosInt, + sampleType, srcStep, buffer->mBlockAlign, remaining); + samplesLoaded += remaining; + } + + if(const size_t toFill{samplesToLoad - samplesLoaded}) + { + auto srcsamples = voiceSamples + samplesLoaded; + std::fill_n(srcsamples, toFill, *(srcsamples-1)); + } +} + +void LoadBufferQueue(VoiceBufferItem *buffer, VoiceBufferItem *bufferLoopItem, + size_t dataPosInt, const FmtType sampleType, const size_t srcChannel, + const size_t srcStep, size_t samplesLoaded, const size_t samplesToLoad, + float *voiceSamples) +{ + /* Crawl the buffer queue to fill in the temp buffer */ + while(buffer && samplesLoaded != samplesToLoad) + { + if(dataPosInt >= buffer->mSampleLen) + { + dataPosInt -= buffer->mSampleLen; + buffer = buffer->mNext.load(std::memory_order_acquire); + if(!buffer) buffer = bufferLoopItem; + continue; + } + + const size_t remaining{minz(samplesToLoad-samplesLoaded, buffer->mSampleLen-dataPosInt)}; + LoadSamples(voiceSamples+samplesLoaded, buffer->mSamples, srcChannel, dataPosInt, + sampleType, srcStep, buffer->mBlockAlign, remaining); + + samplesLoaded += remaining; + if(samplesLoaded == samplesToLoad) + break; + + dataPosInt = 0; + buffer = buffer->mNext.load(std::memory_order_acquire); + if(!buffer) buffer = bufferLoopItem; + } + if(const size_t toFill{samplesToLoad - samplesLoaded}) + { + auto srcsamples = voiceSamples + samplesLoaded; + std::fill_n(srcsamples, toFill, *(srcsamples-1)); + } +} + + +void DoHrtfMix(const float *samples, const uint DstBufferSize, DirectParams &parms, + const float TargetGain, const uint Counter, uint OutPos, const bool IsPlaying, + DeviceBase *Device) +{ + const uint IrSize{Device->mIrSize}; + auto &HrtfSamples = Device->HrtfSourceData; + auto &AccumSamples = Device->HrtfAccumData; + + /* Copy the HRTF history and new input samples into a temp buffer. */ + auto src_iter = std::copy(parms.Hrtf.History.begin(), parms.Hrtf.History.end(), + std::begin(HrtfSamples)); + std::copy_n(samples, DstBufferSize, src_iter); + /* Copy the last used samples back into the history buffer for later. */ + if(IsPlaying) LIKELY + std::copy_n(std::begin(HrtfSamples) + DstBufferSize, parms.Hrtf.History.size(), + parms.Hrtf.History.begin()); + + /* If fading and this is the first mixing pass, fade between the IRs. */ + uint fademix{0u}; + if(Counter && OutPos == 0) + { + fademix = minu(DstBufferSize, Counter); + + float gain{TargetGain}; + + /* The new coefficients need to fade in completely since they're + * replacing the old ones. To keep the gain fading consistent, + * interpolate between the old and new target gains given how much of + * the fade time this mix handles. + */ + if(Counter > fademix) + { + const float a{static_cast<float>(fademix) / static_cast<float>(Counter)}; + gain = lerpf(parms.Hrtf.Old.Gain, TargetGain, a); + } + + MixHrtfFilter hrtfparams{ + parms.Hrtf.Target.Coeffs, + parms.Hrtf.Target.Delay, + 0.0f, gain / static_cast<float>(fademix)}; + MixHrtfBlendSamples(HrtfSamples, AccumSamples+OutPos, IrSize, &parms.Hrtf.Old, &hrtfparams, + fademix); + + /* Update the old parameters with the result. */ + parms.Hrtf.Old = parms.Hrtf.Target; + parms.Hrtf.Old.Gain = gain; + OutPos += fademix; + } + + if(fademix < DstBufferSize) + { + const uint todo{DstBufferSize - fademix}; + float gain{TargetGain}; + + /* Interpolate the target gain if the gain fading lasts longer than + * this mix. + */ + if(Counter > DstBufferSize) + { + const float a{static_cast<float>(todo) / static_cast<float>(Counter-fademix)}; + gain = lerpf(parms.Hrtf.Old.Gain, TargetGain, a); + } + + MixHrtfFilter hrtfparams{ + parms.Hrtf.Target.Coeffs, + parms.Hrtf.Target.Delay, + parms.Hrtf.Old.Gain, + (gain - parms.Hrtf.Old.Gain) / static_cast<float>(todo)}; + MixHrtfSamples(HrtfSamples+fademix, AccumSamples+OutPos, IrSize, &hrtfparams, todo); + + /* Store the now-current gain for next time. */ + parms.Hrtf.Old.Gain = gain; + } +} + +void DoNfcMix(const al::span<const float> samples, FloatBufferLine *OutBuffer, DirectParams &parms, + const float *TargetGains, const uint Counter, const uint OutPos, DeviceBase *Device) +{ + using FilterProc = void (NfcFilter::*)(const al::span<const float>, float*); + static constexpr FilterProc NfcProcess[MaxAmbiOrder+1]{ + nullptr, &NfcFilter::process1, &NfcFilter::process2, &NfcFilter::process3}; + + float *CurrentGains{parms.Gains.Current.data()}; + MixSamples(samples, {OutBuffer, 1u}, CurrentGains, TargetGains, Counter, OutPos); + ++OutBuffer; + ++CurrentGains; + ++TargetGains; + + const al::span<float> nfcsamples{Device->NfcSampleData, samples.size()}; + size_t order{1}; + while(const size_t chancount{Device->NumChannelsPerOrder[order]}) + { + (parms.NFCtrlFilter.*NfcProcess[order])(samples, nfcsamples.data()); + MixSamples(nfcsamples, {OutBuffer, chancount}, CurrentGains, TargetGains, Counter, OutPos); + OutBuffer += chancount; + CurrentGains += chancount; + TargetGains += chancount; + if(++order == MaxAmbiOrder+1) + break; + } +} + +} // namespace + +void Voice::mix(const State vstate, ContextBase *Context, const nanoseconds deviceTime, + const uint SamplesToDo) +{ + static constexpr std::array<float,MAX_OUTPUT_CHANNELS> SilentTarget{}; + + ASSUME(SamplesToDo > 0); + + DeviceBase *Device{Context->mDevice}; + const uint NumSends{Device->NumAuxSends}; + + /* Get voice info */ + int DataPosInt{mPosition.load(std::memory_order_relaxed)}; + uint DataPosFrac{mPositionFrac.load(std::memory_order_relaxed)}; + VoiceBufferItem *BufferListItem{mCurrentBuffer.load(std::memory_order_relaxed)}; + VoiceBufferItem *BufferLoopItem{mLoopBuffer.load(std::memory_order_relaxed)}; + const uint increment{mStep}; + if(increment < 1) UNLIKELY + { + /* If the voice is supposed to be stopping but can't be mixed, just + * stop it before bailing. + */ + if(vstate == Stopping) + mPlayState.store(Stopped, std::memory_order_release); + return; + } + + /* If the static voice's current position is beyond the buffer loop end + * position, disable looping. + */ + if(mFlags.test(VoiceIsStatic) && BufferLoopItem) + { + if(DataPosInt >= 0 && static_cast<uint>(DataPosInt) >= BufferListItem->mLoopEnd) + BufferLoopItem = nullptr; + } + + uint OutPos{0u}; + + /* Check if we're doing a delayed start, and we start in this update. */ + if(mStartTime > deviceTime) UNLIKELY + { + /* If the voice is supposed to be stopping but hasn't actually started + * yet, make sure its stopped. + */ + if(vstate == Stopping) + { + mPlayState.store(Stopped, std::memory_order_release); + return; + } + + /* If the start time is too far ahead, don't bother. */ + auto diff = mStartTime - deviceTime; + if(diff >= seconds{1}) + return; + + /* Get the number of samples ahead of the current time that output + * should start at. Skip this update if it's beyond the output sample + * count. + * + * Round the start position to a multiple of 4, which some mixers want. + * This makes the start time accurate to 4 samples. This could be made + * sample-accurate by forcing non-SIMD functions on the first run. + */ + seconds::rep sampleOffset{duration_cast<seconds>(diff * Device->Frequency).count()}; + sampleOffset = (sampleOffset+2) & ~seconds::rep{3}; + if(sampleOffset >= SamplesToDo) + return; + + OutPos = static_cast<uint>(sampleOffset); + } + + /* Calculate the number of samples to mix, and the number of (resampled) + * samples that need to be loaded (mixing samples and decoder padding). + */ + const uint samplesToMix{SamplesToDo - OutPos}; + const uint samplesToLoad{samplesToMix + mDecoderPadding}; + + /* Get a span of pointers to hold the floating point, deinterlaced, + * resampled buffer data to be mixed. + */ + std::array<float*,DeviceBase::MixerChannelsMax> SamplePointers; + const al::span<float*> MixingSamples{SamplePointers.data(), mChans.size()}; + auto get_bufferline = [](DeviceBase::MixerBufferLine &bufline) noexcept -> float* + { return bufline.data(); }; + std::transform(Device->mSampleData.end() - mChans.size(), Device->mSampleData.end(), + MixingSamples.begin(), get_bufferline); + + /* If there's a matching sample step and no phase offset, use a simple copy + * for resampling. + */ + const ResamplerFunc Resample{(increment == MixerFracOne && DataPosFrac == 0) + ? ResamplerFunc{[](const InterpState*, const float *RESTRICT src, uint, const uint, + const al::span<float> dst) { std::copy_n(src, dst.size(), dst.begin()); }} + : mResampler}; + + /* UHJ2 and SuperStereo only have 2 buffer channels, but 3 mixing channels + * (3rd channel is generated from decoding). + */ + const size_t realChannels{(mFmtChannels == FmtUHJ2 || mFmtChannels == FmtSuperStereo) ? 2u + : MixingSamples.size()}; + for(size_t chan{0};chan < realChannels;++chan) + { + using ResBufType = decltype(DeviceBase::mResampleData); + static constexpr uint srcSizeMax{static_cast<uint>(ResBufType{}.size()-MaxResamplerEdge)}; + + const auto prevSamples = al::as_span(mPrevSamples[chan]); + const auto resampleBuffer = std::copy(prevSamples.cbegin(), prevSamples.cend(), + Device->mResampleData.begin()) - MaxResamplerEdge; + int intPos{DataPosInt}; + uint fracPos{DataPosFrac}; + + /* Load samples for this channel from the available buffer(s), with + * resampling. + */ + for(uint samplesLoaded{0};samplesLoaded < samplesToLoad;) + { + /* Calculate the number of dst samples that can be loaded this + * iteration, given the available resampler buffer size, and the + * number of src samples that are needed to load it. + */ + auto calc_buffer_sizes = [fracPos,increment](uint dstBufferSize) + { + /* If ext=true, calculate the last written dst pos from the dst + * count, convert to the last read src pos, then add one to get + * the src count. + * + * If ext=false, convert the dst count to src count directly. + * + * Without this, the src count could be short by one when + * increment < 1.0, or not have a full src at the end when + * increment > 1.0. + */ + const bool ext{increment <= MixerFracOne}; + uint64_t dataSize64{dstBufferSize - ext}; + dataSize64 = (dataSize64*increment + fracPos) >> MixerFracBits; + /* Also include resampler padding. */ + dataSize64 += ext + MaxResamplerEdge; + + if(dataSize64 <= srcSizeMax) + return std::make_pair(dstBufferSize, static_cast<uint>(dataSize64)); + + /* If the source size got saturated, we can't fill the desired + * dst size. Figure out how many dst samples we can fill. + */ + dataSize64 = srcSizeMax - MaxResamplerEdge; + dataSize64 = ((dataSize64<<MixerFracBits) - fracPos) / increment; + if(dataSize64 < dstBufferSize) + { + /* Some resamplers require the destination being 16-byte + * aligned, so limit to a multiple of 4 samples to maintain + * alignment if we need to do another iteration after this. + */ + dstBufferSize = static_cast<uint>(dataSize64) & ~3u; + } + return std::make_pair(dstBufferSize, srcSizeMax); + }; + const auto bufferSizes = calc_buffer_sizes(samplesToLoad - samplesLoaded); + const auto dstBufferSize = bufferSizes.first; + const auto srcBufferSize = bufferSizes.second; + + /* Load the necessary samples from the given buffer(s). */ + if(!BufferListItem) + { + const uint avail{minu(srcBufferSize, MaxResamplerEdge)}; + const uint tofill{maxu(srcBufferSize, MaxResamplerEdge)}; + + /* When loading from a voice that ended prematurely, only take + * the samples that get closest to 0 amplitude. This helps + * certain sounds fade out better. + */ + auto abs_lt = [](const float lhs, const float rhs) noexcept -> bool + { return std::abs(lhs) < std::abs(rhs); }; + auto srciter = std::min_element(resampleBuffer, resampleBuffer+avail, abs_lt); + + std::fill(srciter+1, resampleBuffer+tofill, *srciter); + } + else + { + size_t srcSampleDelay{0}; + if(intPos < 0) UNLIKELY + { + /* If the current position is negative, there's that many + * silent samples to load before using the buffer. + */ + srcSampleDelay = static_cast<uint>(-intPos); + if(srcSampleDelay >= srcBufferSize) + { + /* If the number of silent source samples exceeds the + * number to load, the output will be silent. + */ + std::fill_n(MixingSamples[chan]+samplesLoaded, dstBufferSize, 0.0f); + std::fill_n(resampleBuffer, srcBufferSize, 0.0f); + goto skip_resample; + } + + std::fill_n(resampleBuffer, srcSampleDelay, 0.0f); + } + const uint uintPos{static_cast<uint>(maxi(intPos, 0))}; + + if(mFlags.test(VoiceIsStatic)) + LoadBufferStatic(BufferListItem, BufferLoopItem, uintPos, mFmtType, chan, + mFrameStep, srcSampleDelay, srcBufferSize, al::to_address(resampleBuffer)); + else if(mFlags.test(VoiceIsCallback)) + { + const uint callbackBase{mCallbackBlockBase * mSamplesPerBlock}; + const size_t bufferOffset{uintPos - callbackBase}; + const size_t needSamples{bufferOffset + srcBufferSize - srcSampleDelay}; + const size_t needBlocks{(needSamples + mSamplesPerBlock-1) / mSamplesPerBlock}; + if(!mFlags.test(VoiceCallbackStopped) && needBlocks > mNumCallbackBlocks) + { + const size_t byteOffset{mNumCallbackBlocks*mBytesPerBlock}; + const size_t needBytes{(needBlocks-mNumCallbackBlocks)*mBytesPerBlock}; + + const int gotBytes{BufferListItem->mCallback(BufferListItem->mUserData, + &BufferListItem->mSamples[byteOffset], static_cast<int>(needBytes))}; + if(gotBytes < 0) + mFlags.set(VoiceCallbackStopped); + else if(static_cast<uint>(gotBytes) < needBytes) + { + mFlags.set(VoiceCallbackStopped); + mNumCallbackBlocks += static_cast<uint>(gotBytes) / mBytesPerBlock; + } + else + mNumCallbackBlocks = static_cast<uint>(needBlocks); + } + const size_t numSamples{uint{mNumCallbackBlocks} * mSamplesPerBlock}; + LoadBufferCallback(BufferListItem, bufferOffset, numSamples, mFmtType, chan, + mFrameStep, srcSampleDelay, srcBufferSize, al::to_address(resampleBuffer)); + } + else + LoadBufferQueue(BufferListItem, BufferLoopItem, uintPos, mFmtType, chan, + mFrameStep, srcSampleDelay, srcBufferSize, al::to_address(resampleBuffer)); + } + + Resample(&mResampleState, al::to_address(resampleBuffer), fracPos, increment, + {MixingSamples[chan]+samplesLoaded, dstBufferSize}); + + /* Store the last source samples used for next time. */ + if(vstate == Playing) LIKELY + { + /* Only store samples for the end of the mix, excluding what + * gets loaded for decoder padding. + */ + const uint loadEnd{samplesLoaded + dstBufferSize}; + if(samplesToMix > samplesLoaded && samplesToMix <= loadEnd) LIKELY + { + const size_t dstOffset{samplesToMix - samplesLoaded}; + const size_t srcOffset{(dstOffset*increment + fracPos) >> MixerFracBits}; + std::copy_n(resampleBuffer-MaxResamplerEdge+srcOffset, prevSamples.size(), + prevSamples.begin()); + } + } + + skip_resample: + samplesLoaded += dstBufferSize; + if(samplesLoaded < samplesToLoad) + { + fracPos += dstBufferSize*increment; + const uint srcOffset{fracPos >> MixerFracBits}; + fracPos &= MixerFracMask; + intPos += srcOffset; + + /* If more samples need to be loaded, copy the back of the + * resampleBuffer to the front to reuse it. prevSamples isn't + * reliable since it's only updated for the end of the mix. + */ + std::copy(resampleBuffer-MaxResamplerEdge+srcOffset, + resampleBuffer+MaxResamplerEdge+srcOffset, resampleBuffer-MaxResamplerEdge); + } + } + } + for(auto &samples : MixingSamples.subspan(realChannels)) + std::fill_n(samples, samplesToLoad, 0.0f); + + if(mDecoder) + mDecoder->decode(MixingSamples, samplesToMix, (vstate==Playing)); + + if(mFlags.test(VoiceIsAmbisonic)) + { + auto voiceSamples = MixingSamples.begin(); + for(auto &chandata : mChans) + { + chandata.mAmbiSplitter.processScale({*voiceSamples, samplesToMix}, + chandata.mAmbiHFScale, chandata.mAmbiLFScale); + ++voiceSamples; + } + } + + const uint Counter{mFlags.test(VoiceIsFading) ? minu(samplesToMix, 64u) : 0u}; + if(!Counter) + { + /* No fading, just overwrite the old/current params. */ + for(auto &chandata : mChans) + { + { + DirectParams &parms = chandata.mDryParams; + if(!mFlags.test(VoiceHasHrtf)) + parms.Gains.Current = parms.Gains.Target; + else + parms.Hrtf.Old = parms.Hrtf.Target; + } + for(uint send{0};send < NumSends;++send) + { + if(mSend[send].Buffer.empty()) + continue; + + SendParams &parms = chandata.mWetParams[send]; + parms.Gains.Current = parms.Gains.Target; + } + } + } + + auto voiceSamples = MixingSamples.begin(); + for(auto &chandata : mChans) + { + /* Now filter and mix to the appropriate outputs. */ + const al::span<float,BufferLineSize> FilterBuf{Device->FilteredData}; + { + DirectParams &parms = chandata.mDryParams; + const float *samples{DoFilters(parms.LowPass, parms.HighPass, FilterBuf.data(), + {*voiceSamples, samplesToMix}, mDirect.FilterType)}; + + if(mFlags.test(VoiceHasHrtf)) + { + const float TargetGain{parms.Hrtf.Target.Gain * (vstate == Playing)}; + DoHrtfMix(samples, samplesToMix, parms, TargetGain, Counter, OutPos, + (vstate == Playing), Device); + } + else + { + const float *TargetGains{(vstate == Playing) ? parms.Gains.Target.data() + : SilentTarget.data()}; + if(mFlags.test(VoiceHasNfc)) + DoNfcMix({samples, samplesToMix}, mDirect.Buffer.data(), parms, + TargetGains, Counter, OutPos, Device); + else + MixSamples({samples, samplesToMix}, mDirect.Buffer, + parms.Gains.Current.data(), TargetGains, Counter, OutPos); + } + } + + for(uint send{0};send < NumSends;++send) + { + if(mSend[send].Buffer.empty()) + continue; + + SendParams &parms = chandata.mWetParams[send]; + const float *samples{DoFilters(parms.LowPass, parms.HighPass, FilterBuf.data(), + {*voiceSamples, samplesToMix}, mSend[send].FilterType)}; + + const float *TargetGains{(vstate == Playing) ? parms.Gains.Target.data() + : SilentTarget.data()}; + MixSamples({samples, samplesToMix}, mSend[send].Buffer, + parms.Gains.Current.data(), TargetGains, Counter, OutPos); + } + + ++voiceSamples; + } + + mFlags.set(VoiceIsFading); + + /* Don't update positions and buffers if we were stopping. */ + if(vstate == Stopping) UNLIKELY + { + mPlayState.store(Stopped, std::memory_order_release); + return; + } + + /* Update voice positions and buffers as needed. */ + DataPosFrac += increment*samplesToMix; + const uint SrcSamplesDone{DataPosFrac>>MixerFracBits}; + DataPosInt += SrcSamplesDone; + DataPosFrac &= MixerFracMask; + + uint buffers_done{0u}; + if(BufferListItem && DataPosInt >= 0) LIKELY + { + if(mFlags.test(VoiceIsStatic)) + { + if(BufferLoopItem) + { + /* Handle looping static source */ + const uint LoopStart{BufferListItem->mLoopStart}; + const uint LoopEnd{BufferListItem->mLoopEnd}; + uint DataPosUInt{static_cast<uint>(DataPosInt)}; + if(DataPosUInt >= LoopEnd) + { + assert(LoopEnd > LoopStart); + DataPosUInt = ((DataPosUInt-LoopStart)%(LoopEnd-LoopStart)) + LoopStart; + DataPosInt = static_cast<int>(DataPosUInt); + } + } + else + { + /* Handle non-looping static source */ + if(static_cast<uint>(DataPosInt) >= BufferListItem->mSampleLen) + BufferListItem = nullptr; + } + } + else if(mFlags.test(VoiceIsCallback)) + { + /* Handle callback buffer source */ + const uint currentBlock{static_cast<uint>(DataPosInt) / mSamplesPerBlock}; + const uint blocksDone{currentBlock - mCallbackBlockBase}; + if(blocksDone < mNumCallbackBlocks) + { + const size_t byteOffset{blocksDone*mBytesPerBlock}; + const size_t byteEnd{mNumCallbackBlocks*mBytesPerBlock}; + al::byte *data{BufferListItem->mSamples}; + std::copy(data+byteOffset, data+byteEnd, data); + mNumCallbackBlocks -= blocksDone; + mCallbackBlockBase += blocksDone; + } + else + { + BufferListItem = nullptr; + mNumCallbackBlocks = 0; + mCallbackBlockBase += blocksDone; + } + } + else + { + /* Handle streaming source */ + do { + if(BufferListItem->mSampleLen > static_cast<uint>(DataPosInt)) + break; + + DataPosInt -= BufferListItem->mSampleLen; + + ++buffers_done; + BufferListItem = BufferListItem->mNext.load(std::memory_order_relaxed); + if(!BufferListItem) BufferListItem = BufferLoopItem; + } while(BufferListItem); + } + } + + /* Capture the source ID in case it gets reset for stopping. */ + const uint SourceID{mSourceID.load(std::memory_order_relaxed)}; + + /* Update voice info */ + mPosition.store(DataPosInt, std::memory_order_relaxed); + mPositionFrac.store(DataPosFrac, std::memory_order_relaxed); + mCurrentBuffer.store(BufferListItem, std::memory_order_relaxed); + if(!BufferListItem) + { + mLoopBuffer.store(nullptr, std::memory_order_relaxed); + mSourceID.store(0u, std::memory_order_relaxed); + } + std::atomic_thread_fence(std::memory_order_release); + + /* Send any events now, after the position/buffer info was updated. */ + const auto enabledevt = Context->mEnabledEvts.load(std::memory_order_acquire); + if(buffers_done > 0 && enabledevt.test(AsyncEvent::BufferCompleted)) + { + RingBuffer *ring{Context->mAsyncEvents.get()}; + auto evt_vec = ring->getWriteVector(); + if(evt_vec.first.len > 0) + { + AsyncEvent *evt{al::construct_at(reinterpret_cast<AsyncEvent*>(evt_vec.first.buf), + AsyncEvent::BufferCompleted)}; + evt->u.bufcomp.id = SourceID; + evt->u.bufcomp.count = buffers_done; + ring->writeAdvance(1); + } + } + + if(!BufferListItem) + { + /* If the voice just ended, set it to Stopping so the next render + * ensures any residual noise fades to 0 amplitude. + */ + mPlayState.store(Stopping, std::memory_order_release); + if(enabledevt.test(AsyncEvent::SourceStateChange)) + SendSourceStoppedEvent(Context, SourceID); + } +} + +void Voice::prepare(DeviceBase *device) +{ + /* Even if storing really high order ambisonics, we only mix channels for + * orders up to the device order. The rest are simply dropped. + */ + uint num_channels{(mFmtChannels == FmtUHJ2 || mFmtChannels == FmtSuperStereo) ? 3 : + ChannelsFromFmt(mFmtChannels, minu(mAmbiOrder, device->mAmbiOrder))}; + if(num_channels > device->mSampleData.size()) UNLIKELY + { + ERR("Unexpected channel count: %u (limit: %zu, %d:%d)\n", num_channels, + device->mSampleData.size(), mFmtChannels, mAmbiOrder); + num_channels = static_cast<uint>(device->mSampleData.size()); + } + if(mChans.capacity() > 2 && num_channels < mChans.capacity()) + { + decltype(mChans){}.swap(mChans); + decltype(mPrevSamples){}.swap(mPrevSamples); + } + mChans.reserve(maxu(2, num_channels)); + mChans.resize(num_channels); + mPrevSamples.reserve(maxu(2, num_channels)); + mPrevSamples.resize(num_channels); + + mDecoder = nullptr; + mDecoderPadding = 0; + if(mFmtChannels == FmtSuperStereo) + { + switch(UhjDecodeQuality) + { + case UhjQualityType::IIR: + mDecoder = std::make_unique<UhjStereoDecoderIIR>(); + mDecoderPadding = UhjStereoDecoderIIR::sInputPadding; + break; + case UhjQualityType::FIR256: + mDecoder = std::make_unique<UhjStereoDecoder<UhjLength256>>(); + mDecoderPadding = UhjStereoDecoder<UhjLength256>::sInputPadding; + break; + case UhjQualityType::FIR512: + mDecoder = std::make_unique<UhjStereoDecoder<UhjLength512>>(); + mDecoderPadding = UhjStereoDecoder<UhjLength512>::sInputPadding; + break; + } + } + else if(IsUHJ(mFmtChannels)) + { + switch(UhjDecodeQuality) + { + case UhjQualityType::IIR: + mDecoder = std::make_unique<UhjDecoderIIR>(); + mDecoderPadding = UhjDecoderIIR::sInputPadding; + break; + case UhjQualityType::FIR256: + mDecoder = std::make_unique<UhjDecoder<UhjLength256>>(); + mDecoderPadding = UhjDecoder<UhjLength256>::sInputPadding; + break; + case UhjQualityType::FIR512: + mDecoder = std::make_unique<UhjDecoder<UhjLength512>>(); + mDecoderPadding = UhjDecoder<UhjLength512>::sInputPadding; + break; + } + } + + /* Clear the stepping value explicitly so the mixer knows not to mix this + * until the update gets applied. + */ + mStep = 0; + + /* Make sure the sample history is cleared. */ + std::fill(mPrevSamples.begin(), mPrevSamples.end(), HistoryLine{}); + + if(mFmtChannels == FmtUHJ2 && !device->mUhjEncoder) + { + /* 2-channel UHJ needs different shelf filters. However, we can't just + * use different shelf filters after mixing it, given any old speaker + * setup the user has. To make this work, we apply the expected shelf + * filters for decoding UHJ2 to quad (only needs LF scaling), and act + * as if those 4 quad channels are encoded right back into B-Format. + * + * This isn't perfect, but without an entirely separate and limited + * UHJ2 path, it's better than nothing. + * + * Note this isn't needed with UHJ output (UHJ2->B-Format->UHJ2 is + * identity, so don't mess with it). + */ + const BandSplitter splitter{device->mXOverFreq / static_cast<float>(device->Frequency)}; + for(auto &chandata : mChans) + { + chandata.mAmbiHFScale = 1.0f; + chandata.mAmbiLFScale = 1.0f; + chandata.mAmbiSplitter = splitter; + chandata.mDryParams = DirectParams{}; + chandata.mDryParams.NFCtrlFilter = device->mNFCtrlFilter; + std::fill_n(chandata.mWetParams.begin(), device->NumAuxSends, SendParams{}); + } + mChans[0].mAmbiLFScale = DecoderBase::sWLFScale; + mChans[1].mAmbiLFScale = DecoderBase::sXYLFScale; + mChans[2].mAmbiLFScale = DecoderBase::sXYLFScale; + mFlags.set(VoiceIsAmbisonic); + } + /* Don't need to set the VoiceIsAmbisonic flag if the device is not higher + * order than the voice. No HF scaling is necessary to mix it. + */ + else if(mAmbiOrder && device->mAmbiOrder > mAmbiOrder) + { + const uint8_t *OrderFromChan{Is2DAmbisonic(mFmtChannels) ? + AmbiIndex::OrderFrom2DChannel().data() : AmbiIndex::OrderFromChannel().data()}; + const auto scales = AmbiScale::GetHFOrderScales(mAmbiOrder, device->mAmbiOrder, + device->m2DMixing); + + const BandSplitter splitter{device->mXOverFreq / static_cast<float>(device->Frequency)}; + for(auto &chandata : mChans) + { + chandata.mAmbiHFScale = scales[*(OrderFromChan++)]; + chandata.mAmbiLFScale = 1.0f; + chandata.mAmbiSplitter = splitter; + chandata.mDryParams = DirectParams{}; + chandata.mDryParams.NFCtrlFilter = device->mNFCtrlFilter; + std::fill_n(chandata.mWetParams.begin(), device->NumAuxSends, SendParams{}); + } + mFlags.set(VoiceIsAmbisonic); + } + else + { + for(auto &chandata : mChans) + { + chandata.mDryParams = DirectParams{}; + chandata.mDryParams.NFCtrlFilter = device->mNFCtrlFilter; + std::fill_n(chandata.mWetParams.begin(), device->NumAuxSends, SendParams{}); + } + mFlags.reset(VoiceIsAmbisonic); + } +} diff --git a/core/voice.h b/core/voice.h new file mode 100644 index 00000000..57ee7b01 --- /dev/null +++ b/core/voice.h @@ -0,0 +1,280 @@ +#ifndef CORE_VOICE_H +#define CORE_VOICE_H + +#include <array> +#include <atomic> +#include <bitset> +#include <chrono> +#include <memory> +#include <stddef.h> +#include <string> + +#include "albyte.h" +#include "almalloc.h" +#include "aloptional.h" +#include "alspan.h" +#include "bufferline.h" +#include "buffer_storage.h" +#include "devformat.h" +#include "filters/biquad.h" +#include "filters/nfc.h" +#include "filters/splitter.h" +#include "mixer/defs.h" +#include "mixer/hrtfdefs.h" +#include "resampler_limits.h" +#include "uhjfilter.h" +#include "vector.h" + +struct ContextBase; +struct DeviceBase; +struct EffectSlot; +enum class DistanceModel : unsigned char; + +using uint = unsigned int; + + +#define MAX_SENDS 6 + + +enum class SpatializeMode : unsigned char { + Off, + On, + Auto +}; + +enum class DirectMode : unsigned char { + Off, + DropMismatch, + RemixMismatch +}; + + +constexpr uint MaxPitch{10}; + + +enum { + AF_None = 0, + AF_LowPass = 1, + AF_HighPass = 2, + AF_BandPass = AF_LowPass | AF_HighPass +}; + + +struct DirectParams { + BiquadFilter LowPass; + BiquadFilter HighPass; + + NfcFilter NFCtrlFilter; + + struct { + HrtfFilter Old; + HrtfFilter Target; + alignas(16) std::array<float,HrtfHistoryLength> History; + } Hrtf; + + struct { + std::array<float,MAX_OUTPUT_CHANNELS> Current; + std::array<float,MAX_OUTPUT_CHANNELS> Target; + } Gains; +}; + +struct SendParams { + BiquadFilter LowPass; + BiquadFilter HighPass; + + struct { + std::array<float,MaxAmbiChannels> Current; + std::array<float,MaxAmbiChannels> Target; + } Gains; +}; + + +struct VoiceBufferItem { + std::atomic<VoiceBufferItem*> mNext{nullptr}; + + CallbackType mCallback{nullptr}; + void *mUserData{nullptr}; + + uint mBlockAlign{0u}; + uint mSampleLen{0u}; + uint mLoopStart{0u}; + uint mLoopEnd{0u}; + + al::byte *mSamples{nullptr}; +}; + + +struct VoiceProps { + float Pitch; + float Gain; + float OuterGain; + float MinGain; + float MaxGain; + float InnerAngle; + float OuterAngle; + float RefDistance; + float MaxDistance; + float RolloffFactor; + std::array<float,3> Position; + std::array<float,3> Velocity; + std::array<float,3> Direction; + std::array<float,3> OrientAt; + std::array<float,3> OrientUp; + bool HeadRelative; + DistanceModel mDistanceModel; + Resampler mResampler; + DirectMode DirectChannels; + SpatializeMode mSpatializeMode; + + bool DryGainHFAuto; + bool WetGainAuto; + bool WetGainHFAuto; + float OuterGainHF; + + float AirAbsorptionFactor; + float RoomRolloffFactor; + float DopplerFactor; + + std::array<float,2> StereoPan; + + float Radius; + float EnhWidth; + + /** Direct filter and auxiliary send info. */ + struct { + float Gain; + float GainHF; + float HFReference; + float GainLF; + float LFReference; + } Direct; + struct SendData { + EffectSlot *Slot; + float Gain; + float GainHF; + float HFReference; + float GainLF; + float LFReference; + } Send[MAX_SENDS]; +}; + +struct VoicePropsItem : public VoiceProps { + std::atomic<VoicePropsItem*> next{nullptr}; + + DEF_NEWDEL(VoicePropsItem) +}; + +enum : uint { + VoiceIsStatic, + VoiceIsCallback, + VoiceIsAmbisonic, + VoiceCallbackStopped, + VoiceIsFading, + VoiceHasHrtf, + VoiceHasNfc, + + VoiceFlagCount +}; + +struct Voice { + enum State { + Stopped, + Playing, + Stopping, + Pending + }; + + std::atomic<VoicePropsItem*> mUpdate{nullptr}; + + VoiceProps mProps; + + std::atomic<uint> mSourceID{0u}; + std::atomic<State> mPlayState{Stopped}; + std::atomic<bool> mPendingChange{false}; + + /** + * Source offset in samples, relative to the currently playing buffer, NOT + * the whole queue. + */ + std::atomic<int> mPosition; + /** Fractional (fixed-point) offset to the next sample. */ + std::atomic<uint> mPositionFrac; + + /* Current buffer queue item being played. */ + std::atomic<VoiceBufferItem*> mCurrentBuffer; + + /* Buffer queue item to loop to at end of queue (will be NULL for non- + * looping voices). + */ + std::atomic<VoiceBufferItem*> mLoopBuffer; + + std::chrono::nanoseconds mStartTime{}; + + /* Properties for the attached buffer(s). */ + FmtChannels mFmtChannels; + FmtType mFmtType; + uint mFrequency; + uint mFrameStep; /**< In steps of the sample type size. */ + uint mBytesPerBlock; /**< Or for PCM formats, BytesPerFrame. */ + uint mSamplesPerBlock; /**< Always 1 for PCM formats. */ + AmbiLayout mAmbiLayout; + AmbiScaling mAmbiScaling; + uint mAmbiOrder; + + std::unique_ptr<DecoderBase> mDecoder; + uint mDecoderPadding{}; + + /** Current target parameters used for mixing. */ + uint mStep{0}; + + ResamplerFunc mResampler; + + InterpState mResampleState; + + std::bitset<VoiceFlagCount> mFlags{}; + uint mNumCallbackBlocks{0}; + uint mCallbackBlockBase{0}; + + struct TargetData { + int FilterType; + al::span<FloatBufferLine> Buffer; + }; + TargetData mDirect; + std::array<TargetData,MAX_SENDS> mSend; + + /* The first MaxResamplerPadding/2 elements are the sample history from the + * previous mix, with an additional MaxResamplerPadding/2 elements that are + * now current (which may be overwritten if the buffer data is still + * available). + */ + using HistoryLine = std::array<float,MaxResamplerPadding>; + al::vector<HistoryLine,16> mPrevSamples{2}; + + struct ChannelData { + float mAmbiHFScale, mAmbiLFScale; + BandSplitter mAmbiSplitter; + + DirectParams mDryParams; + std::array<SendParams,MAX_SENDS> mWetParams; + }; + al::vector<ChannelData> mChans{2}; + + Voice() = default; + ~Voice() = default; + + Voice(const Voice&) = delete; + Voice& operator=(const Voice&) = delete; + + void mix(const State vstate, ContextBase *Context, const std::chrono::nanoseconds deviceTime, + const uint SamplesToDo); + + void prepare(DeviceBase *device); + + static void InitMixer(al::optional<std::string> resampler); + + DEF_NEWDEL(Voice) +}; + +extern Resampler ResamplerDefault; + +#endif /* CORE_VOICE_H */ diff --git a/core/voice_change.h b/core/voice_change.h new file mode 100644 index 00000000..ddc6186f --- /dev/null +++ b/core/voice_change.h @@ -0,0 +1,31 @@ +#ifndef VOICE_CHANGE_H +#define VOICE_CHANGE_H + +#include <atomic> + +#include "almalloc.h" + +struct Voice; + +using uint = unsigned int; + + +enum class VChangeState { + Reset, + Stop, + Play, + Pause, + Restart +}; +struct VoiceChange { + Voice *mOldVoice{nullptr}; + Voice *mVoice{nullptr}; + uint mSourceID{0}; + VChangeState mState{}; + + std::atomic<VoiceChange*> mNext{nullptr}; + + DEF_NEWDEL(VoiceChange) +}; + +#endif /* VOICE_CHANGE_H */ |