diff options
Diffstat (limited to 'core')
42 files changed, 5689 insertions, 324 deletions
diff --git a/core/ambidefs.cpp b/core/ambidefs.cpp new file mode 100644 index 00000000..2725748e --- /dev/null +++ b/core/ambidefs.cpp @@ -0,0 +1,44 @@ + +#include "config.h" + +#include "ambidefs.h" + +#include <cassert> + + +namespace { + +constexpr std::array<float,MaxAmbiOrder+1> Ambi3DDecoderHFScale{{ + 1.00000000e+00f, 1.00000000e+00f +}}; +constexpr std::array<float,MaxAmbiOrder+1> Ambi3DDecoderHFScale2O{{ + 7.45355990e-01f, 1.00000000e+00f, 1.00000000e+00f +}}; +constexpr std::array<float,MaxAmbiOrder+1> Ambi3DDecoderHFScale3O{{ + 5.89792205e-01f, 8.79693856e-01f, 1.00000000e+00f, 1.00000000e+00f +}}; + +inline auto& GetDecoderHFScales(uint order) noexcept +{ + if(order >= 3) return Ambi3DDecoderHFScale3O; + if(order == 2) return Ambi3DDecoderHFScale2O; + return Ambi3DDecoderHFScale; +} + +} // namespace + +auto AmbiScale::GetHFOrderScales(const uint in_order, const uint out_order) noexcept + -> std::array<float,MaxAmbiOrder+1> +{ + std::array<float,MaxAmbiOrder+1> ret{}; + + assert(out_order >= in_order); + + const auto &target = GetDecoderHFScales(out_order); + const auto &input = GetDecoderHFScales(in_order); + + for(size_t i{0};i < in_order+1;++i) + ret[i] = input[i] / target[i]; + + return ret; +} diff --git a/core/ambidefs.h b/core/ambidefs.h index a72f7b78..22739359 100644 --- a/core/ambidefs.h +++ b/core/ambidefs.h @@ -97,6 +97,10 @@ struct AmbiScale { }}; return ret; } + + /* Retrieves per-order HF scaling factors for "upsampling" ambisonic data. */ + static std::array<float,MaxAmbiOrder+1> GetHFOrderScales(const uint in_order, + const uint out_order) noexcept; }; struct AmbiIndex { diff --git a/core/async_event.h b/core/async_event.h new file mode 100644 index 00000000..054f0563 --- /dev/null +++ b/core/async_event.h @@ -0,0 +1,55 @@ +#ifndef CORE_EVENT_H +#define CORE_EVENT_H + +#include "almalloc.h" + +struct EffectState; + +using uint = unsigned int; + + +enum { + /* End event thread processing. */ + EventType_KillThread = 0, + + /* User event types. */ + EventType_SourceStateChange = 1<<0, + EventType_BufferCompleted = 1<<1, + EventType_Disconnected = 1<<2, + + /* Internal events. */ + EventType_ReleaseEffectState = 65536, +}; + +struct AsyncEvent { + enum class SrcState { + Reset, + Stop, + Play, + Pause + }; + + uint EnumType{0u}; + union { + char dummy; + struct { + uint id; + SrcState state; + } srcstate; + struct { + uint id; + uint count; + } bufcomp; + struct { + char msg[244]; + } disconnect; + EffectState *mEffectState; + } u{}; + + AsyncEvent() noexcept = default; + constexpr AsyncEvent(uint type) noexcept : EnumType{type} { } + + DISABLE_ALLOC() +}; + +#endif diff --git a/core/bformatdec.cpp b/core/bformatdec.cpp new file mode 100644 index 00000000..6bf85ec9 --- /dev/null +++ b/core/bformatdec.cpp @@ -0,0 +1,263 @@ + +#include "config.h" + +#include "bformatdec.h" + +#include <algorithm> +#include <array> +#include <cmath> +#include <utility> + +#include "almalloc.h" +#include "ambdec.h" +#include "filters/splitter.h" +#include "front_stablizer.h" +#include "math_defs.h" +#include "mixer.h" +#include "opthelpers.h" + + +namespace { + +inline auto& GetAmbiScales(AmbDecScale scaletype) noexcept +{ + if(scaletype == AmbDecScale::FuMa) return AmbiScale::FromFuMa(); + if(scaletype == AmbDecScale::SN3D) return AmbiScale::FromSN3D(); + return AmbiScale::FromN3D(); +} + +} // namespace + + +BFormatDec::BFormatDec(const AmbDecConf *conf, const bool allow_2band, const size_t inchans, + const uint srate, const uint (&chanmap)[MAX_OUTPUT_CHANNELS], + std::unique_ptr<FrontStablizer> stablizer) + : mStablizer{std::move(stablizer)}, mDualBand{allow_2band && (conf->FreqBands == 2)} + , mChannelDec{inchans} +{ + const bool periphonic{(conf->ChanMask&AmbiPeriphonicMask) != 0}; + auto&& coeff_scale = GetAmbiScales(conf->CoeffScale); + + if(!mDualBand) + { + for(size_t j{0},k{0};j < mChannelDec.size();++j) + { + const size_t acn{periphonic ? j : AmbiIndex::FromACN2D()[j]}; + if(!(conf->ChanMask&(1u<<acn))) continue; + const size_t order{AmbiIndex::OrderFromChannel()[acn]}; + const float gain{conf->HFOrderGain[order] / coeff_scale[acn]}; + for(size_t i{0u};i < conf->NumSpeakers;++i) + { + const size_t chanidx{chanmap[i]}; + mChannelDec[j].mGains.Single[chanidx] = conf->Matrix[i][k] * gain; + } + ++k; + } + } + else + { + mChannelDec[0].mXOver.init(conf->XOverFreq / static_cast<float>(srate)); + for(size_t j{1};j < mChannelDec.size();++j) + mChannelDec[j].mXOver = mChannelDec[0].mXOver; + + const float ratio{std::pow(10.0f, conf->XOverRatio / 40.0f)}; + for(size_t j{0},k{0};j < mChannelDec.size();++j) + { + const size_t acn{periphonic ? j : AmbiIndex::FromACN2D()[j]}; + if(!(conf->ChanMask&(1u<<acn))) continue; + const size_t order{AmbiIndex::OrderFromChannel()[acn]}; + const float hfGain{conf->HFOrderGain[order] * ratio / coeff_scale[acn]}; + const float lfGain{conf->LFOrderGain[order] / ratio / coeff_scale[acn]}; + for(size_t i{0u};i < conf->NumSpeakers;++i) + { + const size_t chanidx{chanmap[i]}; + mChannelDec[j].mGains.Dual[sHFBand][chanidx] = conf->HFMatrix[i][k] * hfGain; + mChannelDec[j].mGains.Dual[sLFBand][chanidx] = conf->LFMatrix[i][k] * lfGain; + } + ++k; + } + } +} + +BFormatDec::BFormatDec(const size_t inchans, const al::span<const ChannelDec> coeffs, + const al::span<const ChannelDec> coeffslf, std::unique_ptr<FrontStablizer> stablizer) + : mStablizer{std::move(stablizer)}, mDualBand{!coeffslf.empty()}, mChannelDec{inchans} +{ + if(!mDualBand) + { + for(size_t j{0};j < mChannelDec.size();++j) + { + float *outcoeffs{mChannelDec[j].mGains.Single}; + for(const ChannelDec &incoeffs : coeffs) + *(outcoeffs++) = incoeffs[j]; + } + } + else + { + for(size_t j{0};j < mChannelDec.size();++j) + { + float *outcoeffs{mChannelDec[j].mGains.Dual[sHFBand]}; + for(const ChannelDec &incoeffs : coeffs) + *(outcoeffs++) = incoeffs[j]; + + outcoeffs = mChannelDec[j].mGains.Dual[sLFBand]; + for(const ChannelDec &incoeffs : coeffslf) + *(outcoeffs++) = incoeffs[j]; + } + } +} + + +void BFormatDec::process(const al::span<FloatBufferLine> OutBuffer, + const FloatBufferLine *InSamples, const size_t SamplesToDo) +{ + ASSUME(SamplesToDo > 0); + + if(mDualBand) + { + const al::span<float> hfSamples{mSamples[sHFBand].data(), SamplesToDo}; + const al::span<float> lfSamples{mSamples[sLFBand].data(), SamplesToDo}; + for(auto &chandec : mChannelDec) + { + chandec.mXOver.process({InSamples->data(), SamplesToDo}, hfSamples.data(), + lfSamples.data()); + MixSamples(hfSamples, OutBuffer, chandec.mGains.Dual[sHFBand], + chandec.mGains.Dual[sHFBand], 0, 0); + MixSamples(lfSamples, OutBuffer, chandec.mGains.Dual[sLFBand], + chandec.mGains.Dual[sLFBand], 0, 0); + ++InSamples; + } + } + else + { + for(auto &chandec : mChannelDec) + { + MixSamples({InSamples->data(), SamplesToDo}, OutBuffer, chandec.mGains.Single, + chandec.mGains.Single, 0, 0); + ++InSamples; + } + } +} + +void BFormatDec::processStablize(const al::span<FloatBufferLine> OutBuffer, + const FloatBufferLine *InSamples, const size_t lidx, const size_t ridx, const size_t cidx, + const size_t SamplesToDo) +{ + ASSUME(SamplesToDo > 0); + + /* Move the existing direct L/R signal out so it doesn't get processed by + * the stablizer. Add a delay to it so it stays aligned with the stablizer + * delay. + */ + float *RESTRICT mid{al::assume_aligned<16>(mStablizer->MidDirect.data())}; + float *RESTRICT side{al::assume_aligned<16>(mStablizer->Side.data())}; + for(size_t i{0};i < SamplesToDo;++i) + { + mid[FrontStablizer::DelayLength+i] = OutBuffer[lidx][i] + OutBuffer[ridx][i]; + side[FrontStablizer::DelayLength+i] = OutBuffer[lidx][i] - OutBuffer[ridx][i]; + } + std::fill_n(OutBuffer[lidx].begin(), SamplesToDo, 0.0f); + std::fill_n(OutBuffer[ridx].begin(), SamplesToDo, 0.0f); + + /* Decode the B-Format input to OutBuffer. */ + process(OutBuffer, InSamples, SamplesToDo); + + /* Apply a delay to all channels, except the front-left and front-right, so + * they maintain correct timing. + */ + const size_t NumChannels{OutBuffer.size()}; + for(size_t i{0u};i < NumChannels;i++) + { + if(i == lidx || i == ridx) + continue; + + auto &DelayBuf = mStablizer->DelayBuf[i]; + auto buffer_end = OutBuffer[i].begin() + SamplesToDo; + if LIKELY(SamplesToDo >= FrontStablizer::DelayLength) + { + auto delay_end = std::rotate(OutBuffer[i].begin(), + buffer_end - FrontStablizer::DelayLength, buffer_end); + std::swap_ranges(OutBuffer[i].begin(), delay_end, DelayBuf.begin()); + } + else + { + auto delay_start = std::swap_ranges(OutBuffer[i].begin(), buffer_end, + DelayBuf.begin()); + std::rotate(DelayBuf.begin(), delay_start, DelayBuf.end()); + } + } + + /* Include the side signal for what was just decoded. */ + for(size_t i{0};i < SamplesToDo;++i) + side[FrontStablizer::DelayLength+i] += OutBuffer[lidx][i] - OutBuffer[ridx][i]; + + /* Combine the delayed mid signal with the decoded mid signal. Note that + * the samples are stored and combined in reverse, so the newest samples + * are at the front and the oldest at the back. + */ + al::span<float> tmpbuf{mStablizer->TempBuf.data(), SamplesToDo+FrontStablizer::DelayLength}; + auto tmpiter = tmpbuf.begin() + SamplesToDo; + std::copy(mStablizer->MidDelay.cbegin(), mStablizer->MidDelay.cend(), tmpiter); + for(size_t i{0};i < SamplesToDo;++i) + *--tmpiter = OutBuffer[lidx][i] + OutBuffer[ridx][i]; + /* Save the newest samples for next time. */ + std::copy_n(tmpbuf.cbegin(), mStablizer->MidDelay.size(), mStablizer->MidDelay.begin()); + + /* Apply an all-pass on the reversed signal, then reverse the samples to + * get the forward signal with a reversed phase shift. The future samples + * are included with the all-pass to reduce the error in the output + * samples (the smaller the delay, the more error is introduced). + */ + mStablizer->MidFilter.applyAllpass(tmpbuf); + tmpbuf = tmpbuf.subspan<FrontStablizer::DelayLength>(); + std::reverse(tmpbuf.begin(), tmpbuf.end()); + + /* Now apply the band-splitter, combining its phase shift with the reversed + * phase shift, restoring the original phase on the split signal. + */ + mStablizer->MidFilter.process(tmpbuf, mStablizer->MidHF.data(), mStablizer->MidLF.data()); + + /* This pans the separate low- and high-frequency signals between being on + * the center channel and the left+right channels. The low-frequency signal + * is panned 1/3rd toward center and the high-frequency signal is panned + * 1/4th toward center. These values can be tweaked. + */ + const float cos_lf{std::cos(1.0f/3.0f * (al::MathDefs<float>::Pi()*0.5f))}; + const float cos_hf{std::cos(1.0f/4.0f * (al::MathDefs<float>::Pi()*0.5f))}; + const float sin_lf{std::sin(1.0f/3.0f * (al::MathDefs<float>::Pi()*0.5f))}; + const float sin_hf{std::sin(1.0f/4.0f * (al::MathDefs<float>::Pi()*0.5f))}; + for(size_t i{0};i < SamplesToDo;i++) + { + const float m{mStablizer->MidLF[i]*cos_lf + mStablizer->MidHF[i]*cos_hf + mid[i]}; + const float c{mStablizer->MidLF[i]*sin_lf + mStablizer->MidHF[i]*sin_hf}; + const float s{side[i]}; + + /* The generated center channel signal adds to the existing signal, + * while the modified left and right channels replace. + */ + OutBuffer[lidx][i] = (m + s) * 0.5f; + OutBuffer[ridx][i] = (m - s) * 0.5f; + OutBuffer[cidx][i] += c * 0.5f; + } + /* Move the delayed mid/side samples to the front for next time. */ + auto mid_end = mStablizer->MidDirect.cbegin() + SamplesToDo; + std::copy(mid_end, mid_end+FrontStablizer::DelayLength, mStablizer->MidDirect.begin()); + auto side_end = mStablizer->Side.cbegin() + SamplesToDo; + std::copy(side_end, side_end+FrontStablizer::DelayLength, mStablizer->Side.begin()); +} + + +std::unique_ptr<BFormatDec> BFormatDec::Create(const AmbDecConf *conf, const bool allow_2band, + const size_t inchans, const uint srate, const uint (&chanmap)[MAX_OUTPUT_CHANNELS], + std::unique_ptr<FrontStablizer> stablizer) +{ + return std::unique_ptr<BFormatDec>{new(FamCount(inchans)) + BFormatDec{conf, allow_2band, inchans, srate, chanmap, std::move(stablizer)}}; +} +std::unique_ptr<BFormatDec> BFormatDec::Create(const size_t inchans, + const al::span<const ChannelDec> coeffs, const al::span<const ChannelDec> coeffslf, + std::unique_ptr<FrontStablizer> stablizer) +{ + return std::unique_ptr<BFormatDec>{new(FamCount(inchans)) + BFormatDec{inchans, coeffs, coeffslf, std::move(stablizer)}}; +} diff --git a/core/bformatdec.h b/core/bformatdec.h new file mode 100644 index 00000000..a0ae3f27 --- /dev/null +++ b/core/bformatdec.h @@ -0,0 +1,71 @@ +#ifndef CORE_BFORMATDEC_H +#define CORE_BFORMATDEC_H + +#include <array> +#include <cstddef> +#include <memory> + +#include "almalloc.h" +#include "alspan.h" +#include "ambidefs.h" +#include "bufferline.h" +#include "devformat.h" +#include "filters/splitter.h" + +struct AmbDecConf; +struct FrontStablizer; + + +using ChannelDec = std::array<float,MaxAmbiChannels>; + +class BFormatDec { + static constexpr size_t sHFBand{0}; + static constexpr size_t sLFBand{1}; + static constexpr size_t sNumBands{2}; + + struct ChannelDecoder { + union MatrixU { + float Dual[sNumBands][MAX_OUTPUT_CHANNELS]; + float Single[MAX_OUTPUT_CHANNELS]; + } mGains{}; + + /* NOTE: BandSplitter filter is unused with single-band decoding. */ + BandSplitter mXOver; + }; + + alignas(16) std::array<FloatBufferLine,2> mSamples; + + const std::unique_ptr<FrontStablizer> mStablizer; + const bool mDualBand{false}; + + al::FlexArray<ChannelDecoder> mChannelDec; + +public: + BFormatDec(const AmbDecConf *conf, const bool allow_2band, const size_t inchans, + const uint srate, const uint (&chanmap)[MAX_OUTPUT_CHANNELS], + std::unique_ptr<FrontStablizer> stablizer); + BFormatDec(const size_t inchans, const al::span<const ChannelDec> coeffs, + const al::span<const ChannelDec> coeffslf, std::unique_ptr<FrontStablizer> stablizer); + + bool hasStablizer() const noexcept { return mStablizer != nullptr; }; + + /* Decodes the ambisonic input to the given output channels. */ + void process(const al::span<FloatBufferLine> OutBuffer, const FloatBufferLine *InSamples, + const size_t SamplesToDo); + + /* Decodes the ambisonic input to the given output channels with stablization. */ + void processStablize(const al::span<FloatBufferLine> OutBuffer, + const FloatBufferLine *InSamples, const size_t lidx, const size_t ridx, const size_t cidx, + const size_t SamplesToDo); + + static std::unique_ptr<BFormatDec> Create(const AmbDecConf *conf, const bool allow_2band, + const size_t inchans, const uint srate, const uint (&chanmap)[MAX_OUTPUT_CHANNELS], + std::unique_ptr<FrontStablizer> stablizer); + static std::unique_ptr<BFormatDec> Create(const size_t inchans, + const al::span<const ChannelDec> coeffs, const al::span<const ChannelDec> coeffslf, + std::unique_ptr<FrontStablizer> stablizer); + + DEF_FAM_NEWDEL(BFormatDec, mChannelDec) +}; + +#endif /* CORE_BFORMATDEC_H */ diff --git a/core/bsinc_defs.h b/core/bsinc_defs.h index 43865289..f2958231 100644 --- a/core/bsinc_defs.h +++ b/core/bsinc_defs.h @@ -7,10 +7,4 @@ constexpr unsigned int BSincScaleCount{1 << BSincScaleBits}; constexpr unsigned int BSincPhaseBits{5}; constexpr unsigned int BSincPhaseCount{1 << BSincPhaseBits}; -/* The maximum number of sample points for the bsinc filters. The max points - * includes the doubling for downsampling, so the maximum number of base sample - * points is 24, which is 23rd order. - */ -constexpr unsigned int BSincPointsMax{48}; - #endif /* CORE_BSINC_DEFS_H */ diff --git a/core/bsinc_tables.cpp b/core/bsinc_tables.cpp index 315e1448..ff73c301 100644 --- a/core/bsinc_tables.cpp +++ b/core/bsinc_tables.cpp @@ -9,6 +9,7 @@ #include <memory> #include <stdexcept> +#include "core/mixer/defs.h" #include "math_defs.h" @@ -24,7 +25,8 @@ using uint = unsigned int; */ constexpr double Sinc(const double x) { - if(!(x > 1e-15 || x < -1e-15)) + constexpr double epsilon{std::numeric_limits<double>::epsilon()}; + if(!(x > epsilon || x < -epsilon)) return 1.0; return std::sin(al::MathDefs<double>::Pi()*x) / (al::MathDefs<double>::Pi()*x); } @@ -35,7 +37,7 @@ constexpr double Sinc(const double x) * I_0(x) = sum_{k=0}^inf (1 / k!)^2 (x / 2)^(2 k) * = sum_{k=0}^inf ((x / 2)^k / k!)^2 */ -constexpr double BesselI_0(const double x) +constexpr double BesselI_0(const double x) noexcept { /* Start at k=1 since k=0 is trivial. */ const double x2{x / 2.0}; @@ -82,7 +84,7 @@ constexpr double Kaiser(const double beta, const double k, const double besseli_ /* Calculates the (normalized frequency) transition width of the Kaiser window. * Rejection is in dB. */ -constexpr double CalcKaiserWidth(const double rejection, const uint order) +constexpr double CalcKaiserWidth(const double rejection, const uint order) noexcept { if(rejection > 21.19) return (rejection - 7.95) / (order * 2.285 * al::MathDefs<double>::Tau()); @@ -122,7 +124,7 @@ struct BSincHeader { uint num_points{Order+1}; for(uint si{0};si < BSincScaleCount;++si) { - const double scale{scaleBase + (scaleRange * si / (BSincScaleCount-1))}; + const double scale{scaleBase + (scaleRange * (si+1) / BSincScaleCount)}; const uint a_{std::min(static_cast<uint>(num_points / 2.0 / scale), num_points)}; const uint m{2 * a_}; @@ -144,21 +146,33 @@ constexpr BSincHeader bsinc24_hdr{60, 23}; * namespace while also being used as non-type template parameters. */ #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 6 + +/* The number of sample points is double the a value (rounded up to a multiple + * of 4), and scale index 0 includes the doubling for downsampling. bsinc24 is + * currently the highest quality filter, and will use the most sample points. + */ +constexpr uint BSincPointsMax{(bsinc24_hdr.a[0]*2 + 3) & ~3u}; +static_assert(BSincPointsMax <= MaxResamplerPadding, "MaxResamplerPadding is too small"); + template<size_t total_size> struct BSincFilterArray { alignas(16) std::array<float, total_size> mTable; + const BSincHeader &hdr; - BSincFilterArray(const BSincHeader &hdr) + BSincFilterArray(const BSincHeader &hdr_) : hdr{hdr_} + { #else template<const BSincHeader &hdr> struct BSincFilterArray { - alignas(16) std::array<float, hdr.total_size> mTable; + alignas(16) std::array<float, hdr.total_size> mTable{}; BSincFilterArray() -#endif { - using filter_type = double[][BSincPhaseCount+1][BSincPointsMax]; - auto filter = std::make_unique<filter_type>(BSincScaleCount); + constexpr uint BSincPointsMax{(hdr.a[0]*2 + 3) & ~3u}; + static_assert(BSincPointsMax <= MaxResamplerPadding, "MaxResamplerPadding is too small"); +#endif + using filter_type = double[BSincPhaseCount+1][BSincPointsMax]; + auto filter = std::make_unique<filter_type[]>(BSincScaleCount); /* Calculate the Kaiser-windowed Sinc filter coefficients for each * scale and phase index. @@ -167,38 +181,38 @@ struct BSincFilterArray { { const uint m{hdr.a[si] * 2}; const size_t o{(BSincPointsMax-m) / 2}; - const double scale{hdr.scaleBase + (hdr.scaleRange * si / (BSincScaleCount-1))}; - const double cutoff{scale - (hdr.scaleBase * std::max(0.5, scale) * 2.0)}; + const double scale{hdr.scaleBase + (hdr.scaleRange * (si+1) / BSincScaleCount)}; + const double cutoff{scale - (hdr.scaleBase * std::max(1.0, scale*2.0))}; const auto a = static_cast<double>(hdr.a[si]); - const double l{a - 1.0}; + const double l{a - 1.0/BSincPhaseCount}; /* Do one extra phase index so that the phase delta has a proper * target for its last index. */ for(uint pi{0};pi <= BSincPhaseCount;++pi) { - const double phase{l + (pi/double{BSincPhaseCount})}; + const double phase{std::floor(l) + (pi/double{BSincPhaseCount})}; for(uint i{0};i < m;++i) { const double x{i - phase}; - filter[si][pi][o+i] = Kaiser(hdr.beta, x/a, hdr.besseli_0_beta) * cutoff * + filter[si][pi][o+i] = Kaiser(hdr.beta, x/l, hdr.besseli_0_beta) * cutoff * Sinc(cutoff*x); } } } size_t idx{0}; - for(size_t si{0};si < BSincScaleCount-1;++si) + for(size_t si{0};si < BSincScaleCount;++si) { const size_t m{((hdr.a[si]*2) + 3) & ~3u}; const size_t o{(BSincPointsMax-m) / 2}; + /* Write out each phase index's filter and phase delta for this + * quality scale. + */ for(size_t pi{0};pi < BSincPhaseCount;++pi) { - /* Write out the filter. Also calculate and write out the phase - * and scale deltas. - */ for(size_t i{0};i < m;++i) mTable[idx++] = static_cast<float>(filter[si][pi][o+i]); @@ -210,11 +224,22 @@ struct BSincFilterArray { const double phDelta{filter[si][pi+1][o+i] - filter[si][pi][o+i]}; mTable[idx++] = static_cast<float>(phDelta); } - + } + /* Calculate and write out each phase index's filter quality scale + * deltas. The last scale index doesn't have any scale or scale- + * phase deltas. + */ + if(si == BSincScaleCount-1) + { + for(size_t i{0};i < BSincPhaseCount*m*2;++i) + mTable[idx++] = 0.0f; + } + else for(size_t pi{0};pi < BSincPhaseCount;++pi) + { /* Linear interpolation between scales is also simplified. * - * Given a difference in points between scales, the destination - * points will be 0, thus: x = a + f (-a) + * Given a difference in the number of points between scales, + * the destination points will be 0, thus: x = a + f (-a) */ for(size_t i{0};i < m;++i) { @@ -233,31 +258,11 @@ struct BSincFilterArray { } } } - { - /* The last scale index doesn't have any scale or scale-phase - * deltas. - */ - constexpr size_t si{BSincScaleCount-1}; - const size_t m{((hdr.a[si]*2) + 3) & ~3u}; - const size_t o{(BSincPointsMax-m) / 2}; - - for(size_t pi{0};pi < BSincPhaseCount;++pi) - { - for(size_t i{0};i < m;++i) - mTable[idx++] = static_cast<float>(filter[si][pi][o+i]); - for(size_t i{0};i < m;++i) - { - const double phDelta{filter[si][pi+1][o+i] - filter[si][pi][o+i]}; - mTable[idx++] = static_cast<float>(phDelta); - } - for(size_t i{0};i < m;++i) - mTable[idx++] = 0.0f; - for(size_t i{0};i < m;++i) - mTable[idx++] = 0.0f; - } - } assert(idx == hdr.total_size); } + + constexpr const BSincHeader &getHeader() const noexcept { return hdr; } + constexpr const float *getTable() const noexcept { return &mTable.front(); } }; #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 6 @@ -268,9 +273,11 @@ const BSincFilterArray<bsinc12_hdr> bsinc12_filter{}; const BSincFilterArray<bsinc24_hdr> bsinc24_filter{}; #endif -constexpr BSincTable GenerateBSincTable(const BSincHeader &hdr, const float *tab) +template<typename T> +constexpr BSincTable GenerateBSincTable(const T &filter) { BSincTable ret{}; + const BSincHeader &hdr = filter.getHeader(); ret.scaleBase = static_cast<float>(hdr.scaleBase); ret.scaleRange = static_cast<float>(1.0 / hdr.scaleRange); for(size_t i{0};i < BSincScaleCount;++i) @@ -278,11 +285,11 @@ constexpr BSincTable GenerateBSincTable(const BSincHeader &hdr, const float *tab ret.filterOffset[0] = 0; for(size_t i{1};i < BSincScaleCount;++i) ret.filterOffset[i] = ret.filterOffset[i-1] + ret.m[i-1]*4*BSincPhaseCount; - ret.Tab = tab; + ret.Tab = filter.getTable(); return ret; } } // namespace -const BSincTable bsinc12{GenerateBSincTable(bsinc12_hdr, &bsinc12_filter.mTable.front())}; -const BSincTable bsinc24{GenerateBSincTable(bsinc24_hdr, &bsinc24_filter.mTable.front())}; +const BSincTable bsinc12{GenerateBSincTable(bsinc12_filter)}; +const BSincTable bsinc24{GenerateBSincTable(bsinc24_filter)}; diff --git a/core/buffer_storage.cpp b/core/buffer_storage.cpp new file mode 100644 index 00000000..5179db13 --- /dev/null +++ b/core/buffer_storage.cpp @@ -0,0 +1,41 @@ + +#include "config.h" + +#include "buffer_storage.h" + +#include <stdint.h> + + +uint BytesFromFmt(FmtType type) noexcept +{ + switch(type) + { + case FmtUByte: return sizeof(uint8_t); + case FmtShort: return sizeof(int16_t); + case FmtFloat: return sizeof(float); + case FmtDouble: return sizeof(double); + case FmtMulaw: return sizeof(uint8_t); + case FmtAlaw: return sizeof(uint8_t); + } + return 0; +} + +uint ChannelsFromFmt(FmtChannels chans, uint ambiorder) noexcept +{ + switch(chans) + { + case FmtMono: return 1; + case FmtStereo: return 2; + case FmtRear: return 2; + case FmtQuad: return 4; + case FmtX51: return 6; + case FmtX61: return 7; + case FmtX71: return 8; + case FmtBFormat2D: return (ambiorder*2) + 1; + case FmtBFormat3D: return (ambiorder+1) * (ambiorder+1); + case FmtUHJ2: return 2; + case FmtUHJ3: return 3; + case FmtUHJ4: return 4; + } + return 0; +} diff --git a/core/buffer_storage.h b/core/buffer_storage.h new file mode 100644 index 00000000..59280354 --- /dev/null +++ b/core/buffer_storage.h @@ -0,0 +1,75 @@ +#ifndef CORE_BUFFER_STORAGE_H +#define CORE_BUFFER_STORAGE_H + +#include <atomic> + +#include "albyte.h" + + +using uint = unsigned int; + +/* Storable formats */ +enum FmtType : unsigned char { + FmtUByte, + FmtShort, + FmtFloat, + FmtDouble, + FmtMulaw, + FmtAlaw, +}; +enum FmtChannels : unsigned char { + FmtMono, + FmtStereo, + FmtRear, + FmtQuad, + FmtX51, /* (WFX order) */ + FmtX61, /* (WFX order) */ + FmtX71, /* (WFX order) */ + FmtBFormat2D, + FmtBFormat3D, + FmtUHJ2, /* 2-channel UHJ, aka "BHJ", stereo-compatible */ + FmtUHJ3, /* 3-channel UHJ, aka "THJ" */ + FmtUHJ4, /* 4-channel UHJ, aka "PHJ" */ +}; + +enum class AmbiLayout : unsigned char { + FuMa, + ACN, +}; +enum class AmbiScaling : unsigned char { + FuMa, + SN3D, + N3D, +}; + +uint BytesFromFmt(FmtType type) noexcept; +uint ChannelsFromFmt(FmtChannels chans, uint ambiorder) noexcept; +inline uint FrameSizeFromFmt(FmtChannels chans, FmtType type, uint ambiorder) noexcept +{ return ChannelsFromFmt(chans, ambiorder) * BytesFromFmt(type); } + + +using CallbackType = int(*)(void*, void*, int); + +struct BufferStorage { + CallbackType mCallback{nullptr}; + void *mUserData{nullptr}; + + uint mSampleRate{0u}; + FmtChannels mChannels{FmtMono}; + FmtType mType{FmtShort}; + uint mSampleLen{0u}; + + AmbiLayout mAmbiLayout{AmbiLayout::FuMa}; + AmbiScaling mAmbiScaling{AmbiScaling::FuMa}; + uint mAmbiOrder{0u}; + + inline uint bytesFromFmt() const noexcept { return BytesFromFmt(mType); } + inline uint channelsFromFmt() const noexcept + { return ChannelsFromFmt(mChannels, mAmbiOrder); } + inline uint frameSizeFromFmt() const noexcept { return channelsFromFmt() * bytesFromFmt(); } + + inline bool isBFormat() const noexcept + { return mChannels == FmtBFormat2D || mChannels == FmtBFormat3D; } +}; + +#endif /* CORE_BUFFER_STORAGE_H */ diff --git a/core/bufferline.h b/core/bufferline.h index 503e208d..8b445f3f 100644 --- a/core/bufferline.h +++ b/core/bufferline.h @@ -3,6 +3,8 @@ #include <array> +#include "alspan.h" + /* Size for temporary storage of buffer data, in floats. Larger values need * more memory and are harder on cache, while smaller values may need more * iterations for mixing. @@ -10,5 +12,6 @@ constexpr int BufferLineSize{1024}; using FloatBufferLine = std::array<float,BufferLineSize>; +using FloatBufferSpan = al::span<float,BufferLineSize>; #endif /* CORE_BUFFERLINE_H */ diff --git a/core/context.cpp b/core/context.cpp new file mode 100644 index 00000000..f1c310aa --- /dev/null +++ b/core/context.cpp @@ -0,0 +1,5 @@ + +#include "config.h" + +#include "context.h" + diff --git a/core/context.h b/core/context.h new file mode 100644 index 00000000..bf439053 --- /dev/null +++ b/core/context.h @@ -0,0 +1,171 @@ +#ifndef CORE_CONTEXT_H +#define CORE_CONTEXT_H + +#include <array> +#include <atomic> +#include <cstddef> +#include <memory> +#include <thread> + +#include "almalloc.h" +#include "alspan.h" +#include "atomic.h" +#include "core/bufferline.h" +#include "threads.h" +#include "vecmat.h" +#include "vector.h" + +struct DeviceBase; +struct EffectSlot; +struct EffectSlotProps; +struct RingBuffer; +struct Voice; +struct VoiceChange; +struct VoicePropsItem; + +using uint = unsigned int; + + +constexpr float SpeedOfSoundMetersPerSec{343.3f}; + +enum class DistanceModel : unsigned char { + Disable, + Inverse, InverseClamped, + Linear, LinearClamped, + Exponent, ExponentClamped, + + Default = InverseClamped +}; + + +struct WetBuffer { + bool mInUse; + al::FlexArray<FloatBufferLine, 16> mBuffer; + + WetBuffer(size_t count) : mBuffer{count} { } + + DEF_FAM_NEWDEL(WetBuffer, mBuffer) +}; +using WetBufferPtr = std::unique_ptr<WetBuffer>; + + +struct ContextProps { + float DopplerFactor; + float DopplerVelocity; + float SpeedOfSound; + bool SourceDistanceModel; + DistanceModel mDistanceModel; + + std::atomic<ContextProps*> next; + + DEF_NEWDEL(ContextProps) +}; + +struct ListenerProps { + std::array<float,3> Position; + std::array<float,3> Velocity; + std::array<float,3> OrientAt; + std::array<float,3> OrientUp; + float Gain; + float MetersPerUnit; + + std::atomic<ListenerProps*> next; + + DEF_NEWDEL(ListenerProps) +}; + +struct ContextParams { + /* Pointer to the most recent property values that are awaiting an update. */ + std::atomic<ContextProps*> ContextUpdate{nullptr}; + std::atomic<ListenerProps*> ListenerUpdate{nullptr}; + + alu::Matrix Matrix{alu::Matrix::Identity()}; + alu::Vector Velocity{}; + + float Gain{1.0f}; + float MetersPerUnit{1.0f}; + + float DopplerFactor{1.0f}; + float SpeedOfSound{343.3f}; /* in units per sec! */ + + bool SourceDistanceModel{false}; + DistanceModel mDistanceModel{}; +}; + +struct ContextBase { + DeviceBase *const mDevice; + + /* Counter for the pre-mixing updates, in 31.1 fixed point (lowest bit + * indicates if updates are currently happening). + */ + RefCount mUpdateCount{0u}; + std::atomic<bool> mHoldUpdates{false}; + std::atomic<bool> mStopVoicesOnDisconnect{true}; + + float mGainBoost{1.0f}; + + /* Linked lists of unused property containers, free to use for future + * updates. + */ + std::atomic<ContextProps*> mFreeContextProps{nullptr}; + std::atomic<ListenerProps*> mFreeListenerProps{nullptr}; + std::atomic<VoicePropsItem*> mFreeVoiceProps{nullptr}; + std::atomic<EffectSlotProps*> mFreeEffectslotProps{nullptr}; + + /* The voice change tail is the beginning of the "free" elements, up to and + * *excluding* the current. If tail==current, there's no free elements and + * new ones need to be allocated. The current voice change is the element + * last processed, and any after are pending. + */ + VoiceChange *mVoiceChangeTail{}; + std::atomic<VoiceChange*> mCurrentVoiceChange{}; + + void allocVoiceChanges(size_t addcount); + + + ContextParams mParams; + + using VoiceArray = al::FlexArray<Voice*>; + std::atomic<VoiceArray*> mVoices{}; + std::atomic<size_t> mActiveVoiceCount{}; + + void allocVoices(size_t addcount); + al::span<Voice*> getVoicesSpan() const noexcept + { + return {mVoices.load(std::memory_order_relaxed)->data(), + mActiveVoiceCount.load(std::memory_order_relaxed)}; + } + al::span<Voice*> getVoicesSpanAcquired() const noexcept + { + return {mVoices.load(std::memory_order_acquire)->data(), + mActiveVoiceCount.load(std::memory_order_acquire)}; + } + + + using EffectSlotArray = al::FlexArray<EffectSlot*>; + std::atomic<EffectSlotArray*> mActiveAuxSlots{nullptr}; + + std::thread mEventThread; + al::semaphore mEventSem; + std::unique_ptr<RingBuffer> mAsyncEvents; + std::atomic<uint> mEnabledEvts{0u}; + + /* Asynchronous voice change actions are processed as a linked list of + * VoiceChange objects by the mixer, which is atomically appended to. + * However, to avoid allocating each object individually, they're allocated + * in clusters that are stored in a vector for easy automatic cleanup. + */ + using VoiceChangeCluster = std::unique_ptr<VoiceChange[]>; + al::vector<VoiceChangeCluster> mVoiceChangeClusters; + + using VoiceCluster = std::unique_ptr<Voice[]>; + al::vector<VoiceCluster> mVoiceClusters; + + + ContextBase(DeviceBase *device); + ContextBase(const ContextBase&) = delete; + ContextBase& operator=(const ContextBase&) = delete; + ~ContextBase(); +}; + +#endif /* CORE_CONTEXT_H */ diff --git a/core/converter.cpp b/core/converter.cpp new file mode 100644 index 00000000..6a06b464 --- /dev/null +++ b/core/converter.cpp @@ -0,0 +1,371 @@ + +#include "config.h" + +#include "converter.h" + +#include <algorithm> +#include <cmath> +#include <cstdint> +#include <iterator> +#include <limits.h> + +#include "albit.h" +#include "albyte.h" +#include "alnumeric.h" +#include "fpu_ctrl.h" + +struct CTag; +struct CopyTag; + + +namespace { + +constexpr uint MaxPitch{10}; + +static_assert((BufferLineSize-1)/MaxPitch > 0, "MaxPitch is too large for BufferLineSize!"); +static_assert((INT_MAX>>MixerFracBits)/MaxPitch > BufferLineSize, + "MaxPitch and/or BufferLineSize are too large for MixerFracBits!"); + +/* Base template left undefined. Should be marked =delete, but Clang 3.8.1 + * chokes on that given the inline specializations. + */ +template<DevFmtType T> +inline float LoadSample(DevFmtType_t<T> val) noexcept; + +template<> inline float LoadSample<DevFmtByte>(DevFmtType_t<DevFmtByte> val) noexcept +{ return val * (1.0f/128.0f); } +template<> inline float LoadSample<DevFmtShort>(DevFmtType_t<DevFmtShort> val) noexcept +{ return val * (1.0f/32768.0f); } +template<> inline float LoadSample<DevFmtInt>(DevFmtType_t<DevFmtInt> val) noexcept +{ return static_cast<float>(val) * (1.0f/2147483648.0f); } +template<> inline float LoadSample<DevFmtFloat>(DevFmtType_t<DevFmtFloat> val) noexcept +{ return val; } + +template<> inline float LoadSample<DevFmtUByte>(DevFmtType_t<DevFmtUByte> val) noexcept +{ return LoadSample<DevFmtByte>(static_cast<int8_t>(val - 128)); } +template<> inline float LoadSample<DevFmtUShort>(DevFmtType_t<DevFmtUShort> val) noexcept +{ return LoadSample<DevFmtShort>(static_cast<int16_t>(val - 32768)); } +template<> inline float LoadSample<DevFmtUInt>(DevFmtType_t<DevFmtUInt> val) noexcept +{ return LoadSample<DevFmtInt>(static_cast<int32_t>(val - 2147483648u)); } + + +template<DevFmtType T> +inline void LoadSampleArray(float *RESTRICT dst, const void *src, const size_t srcstep, + const size_t samples) noexcept +{ + const DevFmtType_t<T> *ssrc = static_cast<const DevFmtType_t<T>*>(src); + for(size_t i{0u};i < samples;i++) + dst[i] = LoadSample<T>(ssrc[i*srcstep]); +} + +void LoadSamples(float *dst, const void *src, const size_t srcstep, const DevFmtType srctype, + const size_t samples) noexcept +{ +#define HANDLE_FMT(T) \ + case T: LoadSampleArray<T>(dst, src, srcstep, samples); break + switch(srctype) + { + HANDLE_FMT(DevFmtByte); + HANDLE_FMT(DevFmtUByte); + HANDLE_FMT(DevFmtShort); + HANDLE_FMT(DevFmtUShort); + HANDLE_FMT(DevFmtInt); + HANDLE_FMT(DevFmtUInt); + HANDLE_FMT(DevFmtFloat); + } +#undef HANDLE_FMT +} + + +template<DevFmtType T> +inline DevFmtType_t<T> StoreSample(float) noexcept; + +template<> inline float StoreSample<DevFmtFloat>(float val) noexcept +{ return val; } +template<> inline int32_t StoreSample<DevFmtInt>(float val) noexcept +{ return fastf2i(clampf(val*2147483648.0f, -2147483648.0f, 2147483520.0f)); } +template<> inline int16_t StoreSample<DevFmtShort>(float val) noexcept +{ return static_cast<int16_t>(fastf2i(clampf(val*32768.0f, -32768.0f, 32767.0f))); } +template<> inline int8_t StoreSample<DevFmtByte>(float val) noexcept +{ return static_cast<int8_t>(fastf2i(clampf(val*128.0f, -128.0f, 127.0f))); } + +/* Define unsigned output variations. */ +template<> inline uint32_t StoreSample<DevFmtUInt>(float val) noexcept +{ return static_cast<uint32_t>(StoreSample<DevFmtInt>(val)) + 2147483648u; } +template<> inline uint16_t StoreSample<DevFmtUShort>(float val) noexcept +{ return static_cast<uint16_t>(StoreSample<DevFmtShort>(val) + 32768); } +template<> inline uint8_t StoreSample<DevFmtUByte>(float val) noexcept +{ return static_cast<uint8_t>(StoreSample<DevFmtByte>(val) + 128); } + +template<DevFmtType T> +inline void StoreSampleArray(void *dst, const float *RESTRICT src, const size_t dststep, + const size_t samples) noexcept +{ + DevFmtType_t<T> *sdst = static_cast<DevFmtType_t<T>*>(dst); + for(size_t i{0u};i < samples;i++) + sdst[i*dststep] = StoreSample<T>(src[i]); +} + + +void StoreSamples(void *dst, const float *src, const size_t dststep, const DevFmtType dsttype, + const size_t samples) noexcept +{ +#define HANDLE_FMT(T) \ + case T: StoreSampleArray<T>(dst, src, dststep, samples); break + switch(dsttype) + { + HANDLE_FMT(DevFmtByte); + HANDLE_FMT(DevFmtUByte); + HANDLE_FMT(DevFmtShort); + HANDLE_FMT(DevFmtUShort); + HANDLE_FMT(DevFmtInt); + HANDLE_FMT(DevFmtUInt); + HANDLE_FMT(DevFmtFloat); + } +#undef HANDLE_FMT +} + + +template<DevFmtType T> +void Mono2Stereo(float *RESTRICT dst, const void *src, const size_t frames) noexcept +{ + const DevFmtType_t<T> *ssrc = static_cast<const DevFmtType_t<T>*>(src); + for(size_t i{0u};i < frames;i++) + dst[i*2 + 1] = dst[i*2 + 0] = LoadSample<T>(ssrc[i]) * 0.707106781187f; +} + +template<DevFmtType T> +void Multi2Mono(uint chanmask, const size_t step, const float scale, float *RESTRICT dst, + const void *src, const size_t frames) noexcept +{ + const DevFmtType_t<T> *ssrc = static_cast<const DevFmtType_t<T>*>(src); + std::fill_n(dst, frames, 0.0f); + for(size_t c{0};chanmask;++c) + { + if LIKELY((chanmask&1)) + { + for(size_t i{0u};i < frames;i++) + dst[i] += LoadSample<T>(ssrc[i*step + c]); + } + chanmask >>= 1; + } + for(size_t i{0u};i < frames;i++) + dst[i] *= scale; +} + +} // namespace + +SampleConverterPtr CreateSampleConverter(DevFmtType srcType, DevFmtType dstType, size_t numchans, + uint srcRate, uint dstRate, Resampler resampler) +{ + if(numchans < 1 || srcRate < 1 || dstRate < 1) + return nullptr; + + SampleConverterPtr converter{new(FamCount(numchans)) SampleConverter{numchans}}; + converter->mSrcType = srcType; + converter->mDstType = dstType; + converter->mSrcTypeSize = BytesFromDevFmt(srcType); + converter->mDstTypeSize = BytesFromDevFmt(dstType); + + converter->mSrcPrepCount = 0; + converter->mFracOffset = 0; + + /* Have to set the mixer FPU mode since that's what the resampler code expects. */ + FPUCtl mixer_mode{}; + auto step = static_cast<uint>( + mind(srcRate*double{MixerFracOne}/dstRate + 0.5, MaxPitch*MixerFracOne)); + converter->mIncrement = maxu(step, 1); + if(converter->mIncrement == MixerFracOne) + converter->mResample = Resample_<CopyTag,CTag>; + else + converter->mResample = PrepareResampler(resampler, converter->mIncrement, + &converter->mState); + + return converter; +} + +uint SampleConverter::availableOut(uint srcframes) const +{ + int prepcount{mSrcPrepCount}; + if(prepcount < 0) + { + /* Negative prepcount means we need to skip that many input samples. */ + if(static_cast<uint>(-prepcount) >= srcframes) + return 0; + srcframes -= static_cast<uint>(-prepcount); + prepcount = 0; + } + + if(srcframes < 1) + { + /* No output samples if there's no input samples. */ + return 0; + } + + if(prepcount < MaxResamplerPadding + && static_cast<uint>(MaxResamplerPadding - prepcount) >= srcframes) + { + /* Not enough input samples to generate an output sample. */ + return 0; + } + + auto DataSize64 = static_cast<uint64_t>(prepcount); + DataSize64 += srcframes; + DataSize64 -= MaxResamplerPadding; + DataSize64 <<= MixerFracBits; + DataSize64 -= mFracOffset; + + /* If we have a full prep, we can generate at least one sample. */ + return static_cast<uint>(clampu64((DataSize64 + mIncrement-1)/mIncrement, 1, + std::numeric_limits<int>::max())); +} + +uint SampleConverter::convert(const void **src, uint *srcframes, void *dst, uint dstframes) +{ + const uint SrcFrameSize{static_cast<uint>(mChan.size()) * mSrcTypeSize}; + const uint DstFrameSize{static_cast<uint>(mChan.size()) * mDstTypeSize}; + const uint increment{mIncrement}; + auto SamplesIn = static_cast<const al::byte*>(*src); + uint NumSrcSamples{*srcframes}; + + FPUCtl mixer_mode{}; + uint pos{0}; + while(pos < dstframes && NumSrcSamples > 0) + { + int prepcount{mSrcPrepCount}; + if(prepcount < 0) + { + /* Negative prepcount means we need to skip that many input samples. */ + if(static_cast<uint>(-prepcount) >= NumSrcSamples) + { + mSrcPrepCount = static_cast<int>(NumSrcSamples) + prepcount; + NumSrcSamples = 0; + break; + } + SamplesIn += SrcFrameSize*static_cast<uint>(-prepcount); + NumSrcSamples -= static_cast<uint>(-prepcount); + mSrcPrepCount = 0; + continue; + } + const uint toread{minu(NumSrcSamples, BufferLineSize - MaxResamplerPadding)}; + + if(prepcount < MaxResamplerPadding + && static_cast<uint>(MaxResamplerPadding - prepcount) >= toread) + { + /* Not enough input samples to generate an output sample. Store + * what we're given for later. + */ + for(size_t chan{0u};chan < mChan.size();chan++) + LoadSamples(&mChan[chan].PrevSamples[prepcount], SamplesIn + mSrcTypeSize*chan, + mChan.size(), mSrcType, toread); + + mSrcPrepCount = prepcount + static_cast<int>(toread); + NumSrcSamples = 0; + break; + } + + float *RESTRICT SrcData{mSrcSamples}; + float *RESTRICT DstData{mDstSamples}; + uint DataPosFrac{mFracOffset}; + auto DataSize64 = static_cast<uint64_t>(prepcount); + DataSize64 += toread; + DataSize64 -= MaxResamplerPadding; + DataSize64 <<= MixerFracBits; + DataSize64 -= DataPosFrac; + + /* If we have a full prep, we can generate at least one sample. */ + auto DstSize = static_cast<uint>( + clampu64((DataSize64 + increment-1)/increment, 1, BufferLineSize)); + DstSize = minu(DstSize, dstframes-pos); + + for(size_t chan{0u};chan < mChan.size();chan++) + { + const al::byte *SrcSamples{SamplesIn + mSrcTypeSize*chan}; + al::byte *DstSamples = static_cast<al::byte*>(dst) + mDstTypeSize*chan; + + /* Load the previous samples into the source data first, then the + * new samples from the input buffer. + */ + std::copy_n(mChan[chan].PrevSamples, prepcount, SrcData); + LoadSamples(SrcData + prepcount, SrcSamples, mChan.size(), mSrcType, toread); + + /* Store as many prep samples for next time as possible, given the + * number of output samples being generated. + */ + uint SrcDataEnd{(DstSize*increment + DataPosFrac)>>MixerFracBits}; + if(SrcDataEnd >= static_cast<uint>(prepcount)+toread) + std::fill(std::begin(mChan[chan].PrevSamples), + std::end(mChan[chan].PrevSamples), 0.0f); + else + { + const size_t len{minz(al::size(mChan[chan].PrevSamples), + static_cast<uint>(prepcount)+toread-SrcDataEnd)}; + std::copy_n(SrcData+SrcDataEnd, len, mChan[chan].PrevSamples); + std::fill(std::begin(mChan[chan].PrevSamples)+len, + std::end(mChan[chan].PrevSamples), 0.0f); + } + + /* Now resample, and store the result in the output buffer. */ + const float *ResampledData{mResample(&mState, SrcData+(MaxResamplerPadding>>1), + DataPosFrac, increment, {DstData, DstSize})}; + + StoreSamples(DstSamples, ResampledData, mChan.size(), mDstType, DstSize); + } + + /* Update the number of prep samples still available, as well as the + * fractional offset. + */ + DataPosFrac += increment*DstSize; + mSrcPrepCount = mini(prepcount + static_cast<int>(toread - (DataPosFrac>>MixerFracBits)), + MaxResamplerPadding); + mFracOffset = DataPosFrac & MixerFracMask; + + /* Update the src and dst pointers in case there's still more to do. */ + SamplesIn += SrcFrameSize*(DataPosFrac>>MixerFracBits); + NumSrcSamples -= minu(NumSrcSamples, (DataPosFrac>>MixerFracBits)); + + dst = static_cast<al::byte*>(dst) + DstFrameSize*DstSize; + pos += DstSize; + } + + *src = SamplesIn; + *srcframes = NumSrcSamples; + + return pos; +} + + +void ChannelConverter::convert(const void *src, float *dst, uint frames) const +{ + if(mDstChans == DevFmtMono) + { + const float scale{std::sqrt(1.0f / static_cast<float>(al::popcount(mChanMask)))}; + switch(mSrcType) + { +#define HANDLE_FMT(T) case T: Multi2Mono<T>(mChanMask, mSrcStep, scale, dst, src, frames); break + HANDLE_FMT(DevFmtByte); + HANDLE_FMT(DevFmtUByte); + HANDLE_FMT(DevFmtShort); + HANDLE_FMT(DevFmtUShort); + HANDLE_FMT(DevFmtInt); + HANDLE_FMT(DevFmtUInt); + HANDLE_FMT(DevFmtFloat); +#undef HANDLE_FMT + } + } + else if(mChanMask == 0x1 && mDstChans == DevFmtStereo) + { + switch(mSrcType) + { +#define HANDLE_FMT(T) case T: Mono2Stereo<T>(dst, src, frames); break + HANDLE_FMT(DevFmtByte); + HANDLE_FMT(DevFmtUByte); + HANDLE_FMT(DevFmtShort); + HANDLE_FMT(DevFmtUShort); + HANDLE_FMT(DevFmtInt); + HANDLE_FMT(DevFmtUInt); + HANDLE_FMT(DevFmtFloat); +#undef HANDLE_FMT + } + } +} diff --git a/core/converter.h b/core/converter.h new file mode 100644 index 00000000..2d22ae38 --- /dev/null +++ b/core/converter.h @@ -0,0 +1,59 @@ +#ifndef CORE_CONVERTER_H +#define CORE_CONVERTER_H + +#include <cstddef> +#include <memory> + +#include "almalloc.h" +#include "devformat.h" +#include "mixer/defs.h" + +using uint = unsigned int; + + +struct SampleConverter { + DevFmtType mSrcType{}; + DevFmtType mDstType{}; + uint mSrcTypeSize{}; + uint mDstTypeSize{}; + + int mSrcPrepCount{}; + + uint mFracOffset{}; + uint mIncrement{}; + InterpState mState{}; + ResamplerFunc mResample{}; + + alignas(16) float mSrcSamples[BufferLineSize]{}; + alignas(16) float mDstSamples[BufferLineSize]{}; + + struct ChanSamples { + alignas(16) float PrevSamples[MaxResamplerPadding]; + }; + al::FlexArray<ChanSamples> mChan; + + SampleConverter(size_t numchans) : mChan{numchans} { } + + uint convert(const void **src, uint *srcframes, void *dst, uint dstframes); + uint availableOut(uint srcframes) const; + + DEF_FAM_NEWDEL(SampleConverter, mChan) +}; +using SampleConverterPtr = std::unique_ptr<SampleConverter>; + +SampleConverterPtr CreateSampleConverter(DevFmtType srcType, DevFmtType dstType, size_t numchans, + uint srcRate, uint dstRate, Resampler resampler); + + +struct ChannelConverter { + DevFmtType mSrcType{}; + uint mSrcStep{}; + uint mChanMask{}; + DevFmtChannels mDstChans{}; + + bool is_active() const noexcept { return mChanMask != 0; } + + void convert(const void *src, float *dst, uint frames) const; +}; + +#endif /* CORE_CONVERTER_H */ diff --git a/core/dbus_wrap.cpp b/core/dbus_wrap.cpp new file mode 100644 index 00000000..506dd815 --- /dev/null +++ b/core/dbus_wrap.cpp @@ -0,0 +1,46 @@ + +#include "config.h" + +#include "dbus_wrap.h" + +#ifdef HAVE_DYNLOAD + +#include <mutex> +#include <type_traits> + +#include "logging.h" + + +void *dbus_handle{nullptr}; +#define DECL_FUNC(x) decltype(x) *p##x{}; +DBUS_FUNCTIONS(DECL_FUNC) +#undef DECL_FUNC + +void PrepareDBus() +{ + static constexpr char libname[] = "libdbus-1.so.3"; + + auto load_func = [](auto &f, const char *name) -> void + { f = reinterpret_cast<std::remove_reference_t<decltype(f)>>(GetSymbol(dbus_handle, name)); }; +#define LOAD_FUNC(x) do { \ + load_func(p##x, #x); \ + if(!p##x) \ + { \ + WARN("Failed to load function %s\n", #x); \ + CloseLib(dbus_handle); \ + dbus_handle = nullptr; \ + return; \ + } \ +} while(0); + + dbus_handle = LoadLib(libname); + if(!dbus_handle) + { + WARN("Failed to load %s\n", libname); + return; + } + +DBUS_FUNCTIONS(LOAD_FUNC) +#undef LOAD_FUNC +} +#endif diff --git a/core/dbus_wrap.h b/core/dbus_wrap.h new file mode 100644 index 00000000..61dbb971 --- /dev/null +++ b/core/dbus_wrap.h @@ -0,0 +1,75 @@ +#ifndef CORE_DBUS_WRAP_H +#define CORE_DBUS_WRAP_H + +#include <memory> + +#include <dbus/dbus.h> + +#include "dynload.h" + + +#define DBUS_FUNCTIONS(MAGIC) \ +MAGIC(dbus_error_init) \ +MAGIC(dbus_error_free) \ +MAGIC(dbus_bus_get) \ +MAGIC(dbus_connection_set_exit_on_disconnect) \ +MAGIC(dbus_connection_unref) \ +MAGIC(dbus_connection_send_with_reply_and_block) \ +MAGIC(dbus_message_unref) \ +MAGIC(dbus_message_new_method_call) \ +MAGIC(dbus_message_append_args) \ +MAGIC(dbus_message_iter_init) \ +MAGIC(dbus_message_iter_next) \ +MAGIC(dbus_message_iter_recurse) \ +MAGIC(dbus_message_iter_get_arg_type) \ +MAGIC(dbus_message_iter_get_basic) \ +MAGIC(dbus_set_error_from_message) + +#ifdef HAVE_DYNLOAD + +#include <mutex> + +extern void *dbus_handle; +#define DECL_FUNC(x) extern decltype(x) *p##x; +DBUS_FUNCTIONS(DECL_FUNC) +#undef DECL_FUNC + +void PrepareDBus(); + +inline auto HasDBus() +{ + static std::once_flag init_dbus{}; + std::call_once(init_dbus, PrepareDBus); + return dbus_handle; +} + +#else + +#define DECL_FUNC(x) constexpr auto p##x = &x; +DBUS_FUNCTIONS(DECL_FUNC) +#undef DECL_FUNC + +constexpr bool HasDBus() noexcept { return true; } +#endif /* HAVE_DYNLOAD */ + + +namespace dbus { + +struct Error { + Error() { (*pdbus_error_init)(&mError); } + ~Error() { (*pdbus_error_free)(&mError); } + DBusError* operator->() { return &mError; } + DBusError &get() { return mError; } +private: + DBusError mError{}; +}; + +struct ConnectionDeleter { + void operator()(DBusConnection *c) { (*pdbus_connection_unref)(c); } +}; +using ConnectionPtr = std::unique_ptr<DBusConnection,ConnectionDeleter>; + +} // namespace dbus + + +#endif /* CORE_DBUS_WRAP_H */ diff --git a/core/device.cpp b/core/device.cpp new file mode 100644 index 00000000..9705c0ac --- /dev/null +++ b/core/device.cpp @@ -0,0 +1,7 @@ + +#include "config.h" + +#include "device.h" + + +al::FlexArray<ContextBase*> DeviceBase::sEmptyContextArray{0u}; diff --git a/core/device.h b/core/device.h new file mode 100644 index 00000000..4cc822cc --- /dev/null +++ b/core/device.h @@ -0,0 +1,290 @@ +#ifndef CORE_DEVICE_H +#define CORE_DEVICE_H + +#include <stddef.h> + +#include <array> +#include <atomic> +#include <bitset> +#include <chrono> +#include <memory> +#include <mutex> +#include <string> + +#include "almalloc.h" +#include "alspan.h" +#include "ambidefs.h" +#include "atomic.h" +#include "core/bufferline.h" +#include "devformat.h" +#include "intrusive_ptr.h" +#include "mixer/hrtfdefs.h" +#include "opthelpers.h" +#include "vector.h" + +struct BackendBase; +class BFormatDec; +struct bs2b; +struct Compressor; +struct ContextBase; +struct DirectHrtfState; +struct HrtfStore; +struct UhjEncoder; + +using uint = unsigned int; + + +#define MIN_OUTPUT_RATE 8000 +#define MAX_OUTPUT_RATE 192000 +#define DEFAULT_OUTPUT_RATE 44100 + +#define DEFAULT_UPDATE_SIZE 882 /* 20ms */ +#define DEFAULT_NUM_UPDATES 3 + + +enum class DeviceType : unsigned char { + Playback, + Capture, + Loopback +}; + + +enum class RenderMode : unsigned char { + Normal, + Pairwise, + Hrtf +}; + + +struct InputRemixMap { + struct TargetMix { Channel channel; float mix; }; + + Channel channel; + std::array<TargetMix,2> targets; +}; + + +/* Maximum delay in samples for speaker distance compensation. */ +#define MAX_DELAY_LENGTH 1024 + +struct DistanceComp { + struct ChanData { + float Gain{1.0f}; + uint Length{0u}; /* Valid range is [0...MAX_DELAY_LENGTH). */ + float *Buffer{nullptr}; + }; + + std::array<ChanData,MAX_OUTPUT_CHANNELS> mChannels; + al::FlexArray<float,16> mSamples; + + DistanceComp(size_t count) : mSamples{count} { } + + static std::unique_ptr<DistanceComp> Create(size_t numsamples) + { return std::unique_ptr<DistanceComp>{new(FamCount(numsamples)) DistanceComp{numsamples}}; } + + DEF_FAM_NEWDEL(DistanceComp, mSamples) +}; + + +struct BFChannelConfig { + float Scale; + uint Index; +}; + + +struct MixParams { + /* Coefficient channel mapping for mixing to the buffer. */ + std::array<BFChannelConfig,MAX_OUTPUT_CHANNELS> AmbiMap{}; + + al::span<FloatBufferLine> Buffer; +}; + +struct RealMixParams { + al::span<const InputRemixMap> RemixMap; + std::array<uint,MaxChannels> ChannelIndex{}; + + al::span<FloatBufferLine> Buffer; +}; + +enum { + // Frequency was requested by the app or config file + FrequencyRequest, + // Channel configuration was requested by the config file + ChannelsRequest, + // Sample type was requested by the config file + SampleTypeRequest, + + // Specifies if the DSP is paused at user request + DevicePaused, + // Specifies if the device is currently running + DeviceRunning, + + DeviceFlagsCount +}; + +struct DeviceBase { + /* To avoid extraneous allocations, a 0-sized FlexArray<ContextBase*> is + * defined globally as a sharable object. + */ + static al::FlexArray<ContextBase*> sEmptyContextArray; + + std::atomic<bool> Connected{true}; + const DeviceType Type{}; + + uint Frequency{}; + uint UpdateSize{}; + uint BufferSize{}; + + DevFmtChannels FmtChans{}; + DevFmtType FmtType{}; + bool IsHeadphones{false}; + uint mAmbiOrder{0}; + float mXOverFreq{400.0f}; + /* For DevFmtAmbi* output only, specifies the channel order and + * normalization. + */ + DevAmbiLayout mAmbiLayout{DevAmbiLayout::Default}; + DevAmbiScaling mAmbiScale{DevAmbiScaling::Default}; + + std::string DeviceName; + + // Device flags + std::bitset<DeviceFlagsCount> Flags{}; + + uint NumAuxSends{}; + + /* Rendering mode. */ + RenderMode mRenderMode{RenderMode::Normal}; + + /* The average speaker distance as determined by the ambdec configuration, + * HRTF data set, or the NFC-HOA reference delay. Only used for NFC. + */ + float AvgSpeakerDist{0.0f}; + + uint SamplesDone{0u}; + std::chrono::nanoseconds ClockBase{0}; + std::chrono::nanoseconds FixedLatency{0}; + + /* Temp storage used for mixer processing. */ + alignas(16) float ResampledData[BufferLineSize]; + alignas(16) float FilteredData[BufferLineSize]; + union { + alignas(16) float HrtfSourceData[BufferLineSize + HrtfHistoryLength]; + alignas(16) float NfcSampleData[BufferLineSize]; + }; + + /* Persistent storage for HRTF mixing. */ + alignas(16) float2 HrtfAccumData[BufferLineSize + HrirLength + HrtfDirectDelay]; + + /* Mixing buffer used by the Dry mix and Real output. */ + al::vector<FloatBufferLine, 16> MixBuffer; + + /* The "dry" path corresponds to the main output. */ + MixParams Dry; + uint NumChannelsPerOrder[MaxAmbiOrder+1]{}; + + /* "Real" output, which will be written to the device buffer. May alias the + * dry buffer. + */ + RealMixParams RealOut; + + /* HRTF state and info */ + std::unique_ptr<DirectHrtfState> mHrtfState; + al::intrusive_ptr<HrtfStore> mHrtf; + uint mIrSize{0}; + + /* Ambisonic-to-UHJ encoder */ + std::unique_ptr<UhjEncoder> mUhjEncoder; + + /* Ambisonic decoder for speakers */ + std::unique_ptr<BFormatDec> AmbiDecoder; + + /* Stereo-to-binaural filter */ + std::unique_ptr<bs2b> Bs2b; + + using PostProc = void(DeviceBase::*)(const size_t SamplesToDo); + PostProc PostProcess{nullptr}; + + std::unique_ptr<Compressor> Limiter; + + /* Delay buffers used to compensate for speaker distances. */ + std::unique_ptr<DistanceComp> ChannelDelays; + + /* Dithering control. */ + float DitherDepth{0.0f}; + uint DitherSeed{0u}; + + /* Running count of the mixer invocations, in 31.1 fixed point. This + * actually increments *twice* when mixing, first at the start and then at + * the end, so the bottom bit indicates if the device is currently mixing + * and the upper bits indicates how many mixes have been done. + */ + RefCount MixCount{0u}; + + // Contexts created on this device + std::atomic<al::FlexArray<ContextBase*>*> mContexts{nullptr}; + + /* This lock protects the device state (format, update size, etc) from + * being from being changed in multiple threads, or being accessed while + * being changed. It's also used to serialize calls to the backend. + */ + std::mutex StateLock; + std::unique_ptr<BackendBase> Backend; + + + DeviceBase(DeviceType type); + DeviceBase(const DeviceBase&) = delete; + DeviceBase& operator=(const DeviceBase&) = delete; + ~DeviceBase(); + + uint bytesFromFmt() const noexcept { return BytesFromDevFmt(FmtType); } + uint channelsFromFmt() const noexcept { return ChannelsFromDevFmt(FmtChans, mAmbiOrder); } + uint frameSizeFromFmt() const noexcept { return bytesFromFmt() * channelsFromFmt(); } + + uint waitForMix() const noexcept + { + uint refcount; + while((refcount=MixCount.load(std::memory_order_acquire))&1) { + } + return refcount; + } + + void ProcessHrtf(const size_t SamplesToDo); + void ProcessAmbiDec(const size_t SamplesToDo); + void ProcessAmbiDecStablized(const size_t SamplesToDo); + void ProcessUhj(const size_t SamplesToDo); + void ProcessBs2b(const size_t SamplesToDo); + + inline void postProcess(const size_t SamplesToDo) + { if LIKELY(PostProcess) (this->*PostProcess)(SamplesToDo); } + + void renderSamples(void *outBuffer, const uint numSamples, const size_t frameStep); + + /* Caller must lock the device state, and the mixer must not be running. */ +#ifdef __USE_MINGW_ANSI_STDIO + [[gnu::format(gnu_printf,2,3)]] +#else + [[gnu::format(printf,2,3)]] +#endif + void handleDisconnect(const char *msg, ...); + + DISABLE_ALLOC() +}; + + +/* Must be less than 15 characters (16 including terminating null) for + * compatibility with pthread_setname_np limitations. */ +#define MIXER_THREAD_NAME "alsoft-mixer" + +#define RECORD_THREAD_NAME "alsoft-record" + + +/** + * Returns the index for the given channel name (e.g. FrontCenter), or + * INVALID_CHANNEL_INDEX if it doesn't exist. + */ +inline uint GetChannelIdxByName(const RealMixParams &real, Channel chan) noexcept +{ return real.ChannelIndex[chan]; } +#define INVALID_CHANNEL_INDEX ~0u + +#endif /* CORE_DEVICE_H */ diff --git a/core/front_stablizer.h b/core/front_stablizer.h new file mode 100644 index 00000000..3d328a8d --- /dev/null +++ b/core/front_stablizer.h @@ -0,0 +1,36 @@ +#ifndef CORE_FRONT_STABLIZER_H +#define CORE_FRONT_STABLIZER_H + +#include <array> +#include <memory> + +#include "almalloc.h" +#include "bufferline.h" +#include "filters/splitter.h" + + +struct FrontStablizer { + static constexpr size_t DelayLength{256u}; + + FrontStablizer(size_t numchans) : DelayBuf{numchans} { } + + alignas(16) std::array<float,BufferLineSize + DelayLength> Side{}; + alignas(16) std::array<float,BufferLineSize + DelayLength> MidDirect{}; + alignas(16) std::array<float,DelayLength> MidDelay{}; + + alignas(16) std::array<float,BufferLineSize + DelayLength> TempBuf{}; + + BandSplitter MidFilter; + alignas(16) FloatBufferLine MidLF{}; + alignas(16) FloatBufferLine MidHF{}; + + using DelayLine = std::array<float,DelayLength>; + al::FlexArray<DelayLine,16> DelayBuf; + + static std::unique_ptr<FrontStablizer> Create(size_t numchans) + { return std::unique_ptr<FrontStablizer>{new(FamCount(numchans)) FrontStablizer{numchans}}; } + + DEF_FAM_NEWDEL(FrontStablizer, DelayBuf) +}; + +#endif /* CORE_FRONT_STABLIZER_H */ diff --git a/core/helpers.cpp b/core/helpers.cpp new file mode 100644 index 00000000..dcb785c9 --- /dev/null +++ b/core/helpers.cpp @@ -0,0 +1,514 @@ + +#include "config.h" + +#include "helpers.h" + +#include <algorithm> +#include <cerrno> +#include <cstdarg> +#include <cstdlib> +#include <cstdio> +#include <cstring> +#include <mutex> +#include <limits> +#include <string> + +#include "almalloc.h" +#include "alfstream.h" +#include "aloptional.h" +#include "alspan.h" +#include "alstring.h" +#include "logging.h" +#include "strutils.h" +#include "vector.h" + + +/* Mixing thread piority level */ +int RTPrioLevel{1}; + +/* Allow reducing the process's RTTime limit for RTKit. */ +bool AllowRTTimeLimit{true}; + + +#ifdef _WIN32 + +#include <shlobj.h> + +const PathNamePair &GetProcBinary() +{ + static al::optional<PathNamePair> procbin; + if(procbin) return *procbin; + + auto fullpath = al::vector<WCHAR>(256); + DWORD len{GetModuleFileNameW(nullptr, fullpath.data(), static_cast<DWORD>(fullpath.size()))}; + while(len == fullpath.size()) + { + fullpath.resize(fullpath.size() << 1); + len = GetModuleFileNameW(nullptr, fullpath.data(), static_cast<DWORD>(fullpath.size())); + } + if(len == 0) + { + ERR("Failed to get process name: error %lu\n", GetLastError()); + procbin = al::make_optional<PathNamePair>(); + return *procbin; + } + + fullpath.resize(len); + if(fullpath.back() != 0) + fullpath.push_back(0); + + auto sep = std::find(fullpath.rbegin()+1, fullpath.rend(), '\\'); + sep = std::find(fullpath.rbegin()+1, sep, '/'); + if(sep != fullpath.rend()) + { + *sep = 0; + procbin = al::make_optional<PathNamePair>(wstr_to_utf8(fullpath.data()), + wstr_to_utf8(&*sep + 1)); + } + else + procbin = al::make_optional<PathNamePair>(std::string{}, wstr_to_utf8(fullpath.data())); + + TRACE("Got binary: %s, %s\n", procbin->path.c_str(), procbin->fname.c_str()); + return *procbin; +} + +namespace { + +void DirectorySearch(const char *path, const char *ext, al::vector<std::string> *const results) +{ + std::string pathstr{path}; + pathstr += "\\*"; + pathstr += ext; + TRACE("Searching %s\n", pathstr.c_str()); + + std::wstring wpath{utf8_to_wstr(pathstr.c_str())}; + WIN32_FIND_DATAW fdata; + HANDLE hdl{FindFirstFileW(wpath.c_str(), &fdata)}; + if(hdl == INVALID_HANDLE_VALUE) return; + + const auto base = results->size(); + + do { + results->emplace_back(); + std::string &str = results->back(); + str = path; + str += '\\'; + str += wstr_to_utf8(fdata.cFileName); + } while(FindNextFileW(hdl, &fdata)); + FindClose(hdl); + + const al::span<std::string> newlist{results->data()+base, results->size()-base}; + std::sort(newlist.begin(), newlist.end()); + for(const auto &name : newlist) + TRACE(" got %s\n", name.c_str()); +} + +} // namespace + +al::vector<std::string> SearchDataFiles(const char *ext, const char *subdir) +{ + auto is_slash = [](int c) noexcept -> int { return (c == '\\' || c == '/'); }; + + static std::mutex search_lock; + std::lock_guard<std::mutex> _{search_lock}; + + /* If the path is absolute, use it directly. */ + al::vector<std::string> results; + if(isalpha(subdir[0]) && subdir[1] == ':' && is_slash(subdir[2])) + { + std::string path{subdir}; + std::replace(path.begin(), path.end(), '/', '\\'); + DirectorySearch(path.c_str(), ext, &results); + return results; + } + if(subdir[0] == '\\' && subdir[1] == '\\' && subdir[2] == '?' && subdir[3] == '\\') + { + DirectorySearch(subdir, ext, &results); + return results; + } + + std::string path; + + /* Search the app-local directory. */ + if(auto localpath = al::getenv(L"ALSOFT_LOCAL_PATH")) + { + path = wstr_to_utf8(localpath->c_str()); + if(is_slash(path.back())) + path.pop_back(); + } + else if(WCHAR *cwdbuf{_wgetcwd(nullptr, 0)}) + { + path = wstr_to_utf8(cwdbuf); + if(is_slash(path.back())) + path.pop_back(); + free(cwdbuf); + } + else + path = "."; + std::replace(path.begin(), path.end(), '/', '\\'); + DirectorySearch(path.c_str(), ext, &results); + + /* Search the local and global data dirs. */ + static const int ids[2]{ CSIDL_APPDATA, CSIDL_COMMON_APPDATA }; + for(int id : ids) + { + WCHAR buffer[MAX_PATH]; + if(SHGetSpecialFolderPathW(nullptr, buffer, id, FALSE) == FALSE) + continue; + + path = wstr_to_utf8(buffer); + if(!is_slash(path.back())) + path += '\\'; + path += subdir; + std::replace(path.begin(), path.end(), '/', '\\'); + + DirectorySearch(path.c_str(), ext, &results); + } + + return results; +} + +void SetRTPriority(void) +{ + if(RTPrioLevel > 0) + { + if(!SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL)) + ERR("Failed to set priority level for thread\n"); + } +} + +#else + +#include <sys/types.h> +#include <unistd.h> +#include <dirent.h> +#ifdef __FreeBSD__ +#include <sys/sysctl.h> +#endif +#ifdef __HAIKU__ +#include <FindDirectory.h> +#endif +#ifdef HAVE_PROC_PIDPATH +#include <libproc.h> +#endif +#if defined(HAVE_PTHREAD_SETSCHEDPARAM) && !defined(__OpenBSD__) +#include <pthread.h> +#include <sched.h> +#endif +#ifdef HAVE_RTKIT +#include <sys/time.h> +#include <sys/resource.h> + +#include "dbus_wrap.h" +#include "rtkit.h" +#ifndef RLIMIT_RTTIME +#define RLIMIT_RTTIME 15 +#endif +#endif + +const PathNamePair &GetProcBinary() +{ + static al::optional<PathNamePair> procbin; + if(procbin) return *procbin; + + al::vector<char> pathname; +#ifdef __FreeBSD__ + size_t pathlen; + int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 }; + if(sysctl(mib, 4, nullptr, &pathlen, nullptr, 0) == -1) + WARN("Failed to sysctl kern.proc.pathname: %s\n", strerror(errno)); + else + { + pathname.resize(pathlen + 1); + sysctl(mib, 4, pathname.data(), &pathlen, nullptr, 0); + pathname.resize(pathlen); + } +#endif +#ifdef HAVE_PROC_PIDPATH + if(pathname.empty()) + { + char procpath[PROC_PIDPATHINFO_MAXSIZE]{}; + const pid_t pid{getpid()}; + if(proc_pidpath(pid, procpath, sizeof(procpath)) < 1) + ERR("proc_pidpath(%d, ...) failed: %s\n", pid, strerror(errno)); + else + pathname.insert(pathname.end(), procpath, procpath+strlen(procpath)); + } +#endif +#ifdef __HAIKU__ + if(pathname.empty()) + { + char procpath[PATH_MAX]; + if(find_path(B_APP_IMAGE_SYMBOL, B_FIND_PATH_IMAGE_PATH, NULL, procpath, sizeof(procpath)) == B_OK) + pathname.insert(pathname.end(), procpath, procpath+strlen(procpath)); + } +#endif +#ifndef __SWITCH__ + if(pathname.empty()) + { + static const char SelfLinkNames[][32]{ + "/proc/self/exe", + "/proc/self/file", + "/proc/curproc/exe", + "/proc/curproc/file" + }; + + pathname.resize(256); + + const char *selfname{}; + ssize_t len{}; + for(const char *name : SelfLinkNames) + { + selfname = name; + len = readlink(selfname, pathname.data(), pathname.size()); + if(len >= 0 || errno != ENOENT) break; + } + + while(len > 0 && static_cast<size_t>(len) == pathname.size()) + { + pathname.resize(pathname.size() << 1); + len = readlink(selfname, pathname.data(), pathname.size()); + } + if(len <= 0) + { + WARN("Failed to readlink %s: %s\n", selfname, strerror(errno)); + len = 0; + } + + pathname.resize(static_cast<size_t>(len)); + } +#endif + while(!pathname.empty() && pathname.back() == 0) + pathname.pop_back(); + + auto sep = std::find(pathname.crbegin(), pathname.crend(), '/'); + if(sep != pathname.crend()) + procbin = al::make_optional<PathNamePair>(std::string(pathname.cbegin(), sep.base()-1), + std::string(sep.base(), pathname.cend())); + else + procbin = al::make_optional<PathNamePair>(std::string{}, + std::string(pathname.cbegin(), pathname.cend())); + + TRACE("Got binary: \"%s\", \"%s\"\n", procbin->path.c_str(), procbin->fname.c_str()); + return *procbin; +} + +namespace { + +void DirectorySearch(const char *path, const char *ext, al::vector<std::string> *const results) +{ + TRACE("Searching %s for *%s\n", path, ext); + DIR *dir{opendir(path)}; + if(!dir) return; + + const auto base = results->size(); + const size_t extlen{strlen(ext)}; + + while(struct dirent *dirent{readdir(dir)}) + { + if(strcmp(dirent->d_name, ".") == 0 || strcmp(dirent->d_name, "..") == 0) + continue; + + const size_t len{strlen(dirent->d_name)}; + if(len <= extlen) continue; + if(al::strcasecmp(dirent->d_name+len-extlen, ext) != 0) + continue; + + results->emplace_back(); + std::string &str = results->back(); + str = path; + if(str.back() != '/') + str.push_back('/'); + str += dirent->d_name; + } + closedir(dir); + + const al::span<std::string> newlist{results->data()+base, results->size()-base}; + std::sort(newlist.begin(), newlist.end()); + for(const auto &name : newlist) + TRACE(" got %s\n", name.c_str()); +} + +} // namespace + +al::vector<std::string> SearchDataFiles(const char *ext, const char *subdir) +{ + static std::mutex search_lock; + std::lock_guard<std::mutex> _{search_lock}; + + al::vector<std::string> results; + if(subdir[0] == '/') + { + DirectorySearch(subdir, ext, &results); + return results; + } + + /* Search the app-local directory. */ + if(auto localpath = al::getenv("ALSOFT_LOCAL_PATH")) + DirectorySearch(localpath->c_str(), ext, &results); + else + { + al::vector<char> cwdbuf(256); + while(!getcwd(cwdbuf.data(), cwdbuf.size())) + { + if(errno != ERANGE) + { + cwdbuf.clear(); + break; + } + cwdbuf.resize(cwdbuf.size() << 1); + } + if(cwdbuf.empty()) + DirectorySearch(".", ext, &results); + else + { + DirectorySearch(cwdbuf.data(), ext, &results); + cwdbuf.clear(); + } + } + + // Search local data dir + if(auto datapath = al::getenv("XDG_DATA_HOME")) + { + std::string &path = *datapath; + if(path.back() != '/') + path += '/'; + path += subdir; + DirectorySearch(path.c_str(), ext, &results); + } + else if(auto homepath = al::getenv("HOME")) + { + std::string &path = *homepath; + if(path.back() == '/') + path.pop_back(); + path += "/.local/share/"; + path += subdir; + DirectorySearch(path.c_str(), ext, &results); + } + + // Search global data dirs + std::string datadirs{al::getenv("XDG_DATA_DIRS").value_or("/usr/local/share/:/usr/share/")}; + + size_t curpos{0u}; + while(curpos < datadirs.size()) + { + size_t nextpos{datadirs.find(':', curpos)}; + + std::string path{(nextpos != std::string::npos) ? + datadirs.substr(curpos, nextpos++ - curpos) : datadirs.substr(curpos)}; + curpos = nextpos; + + if(path.empty()) continue; + if(path.back() != '/') + path += '/'; + path += subdir; + + DirectorySearch(path.c_str(), ext, &results); + } + + return results; +} + +void SetRTPriority() +{ + if(RTPrioLevel <= 0) + return; + + int err{-ENOTSUP}; +#if defined(HAVE_PTHREAD_SETSCHEDPARAM) && !defined(__OpenBSD__) + struct sched_param param{}; + /* Use the minimum real-time priority possible for now (on Linux this + * should be 1 for SCHED_RR). + */ + param.sched_priority = sched_get_priority_min(SCHED_RR); +#ifdef SCHED_RESET_ON_FORK + err = pthread_setschedparam(pthread_self(), SCHED_RR|SCHED_RESET_ON_FORK, ¶m); + if(err == EINVAL) +#endif + err = pthread_setschedparam(pthread_self(), SCHED_RR, ¶m); + if(err == 0) return; + + WARN("pthread_setschedparam failed: %s (%d)\n", std::strerror(err), err); +#endif +#ifdef HAVE_RTKIT + if(HasDBus()) + { + dbus::Error error; + if(dbus::ConnectionPtr conn{(*pdbus_bus_get)(DBUS_BUS_SYSTEM, &error.get())}) + { + using ulonglong = unsigned long long; + auto limit_rttime = [](DBusConnection *c) -> int + { + long long maxrttime{rtkit_get_rttime_usec_max(c)}; + if(maxrttime <= 0) return static_cast<int>(std::abs(maxrttime)); + const ulonglong umaxtime{static_cast<ulonglong>(maxrttime)}; + + struct rlimit rlim{}; + if(getrlimit(RLIMIT_RTTIME, &rlim) != 0) + return errno; + TRACE("RTTime max: %llu (hard: %llu, soft: %llu)\n", umaxtime, + ulonglong{rlim.rlim_max}, ulonglong{rlim.rlim_cur}); + if(rlim.rlim_max > umaxtime) + { + rlim.rlim_max = static_cast<rlim_t>(std::min<ulonglong>(umaxtime, + std::numeric_limits<rlim_t>::max())); + rlim.rlim_cur = std::min(rlim.rlim_cur, rlim.rlim_max); + if(setrlimit(RLIMIT_RTTIME, &rlim) != 0) + return errno; + } + return 0; + }; + + /* Don't stupidly exit if the connection dies while doing this. */ + (*pdbus_connection_set_exit_on_disconnect)(conn.get(), false); + + int nicemin{}; + err = rtkit_get_min_nice_level(conn.get(), &nicemin); + if(err == -ENOENT) + { + err = std::abs(err); + ERR("Could not query RTKit: %s (%d)\n", std::strerror(err), err); + return; + } + int rtmax{rtkit_get_max_realtime_priority(conn.get())}; + TRACE("Maximum real-time priority: %d, minimum niceness: %d\n", rtmax, nicemin); + + err = EINVAL; + if(rtmax > 0) + { + if(AllowRTTimeLimit) + { + err = limit_rttime(conn.get()); + if(err != 0) + WARN("Failed to set RLIMIT_RTTIME for RTKit: %s (%d)\n", + std::strerror(err), err); + } + + /* Use half the maximum real-time priority allowed. */ + TRACE("Making real-time with priority %d\n", (rtmax+1)/2); + err = rtkit_make_realtime(conn.get(), 0, (rtmax+1)/2); + if(err == 0) return; + + err = std::abs(err); + WARN("Failed to set real-time priority: %s (%d)\n", std::strerror(err), err); + } + if(nicemin < 0) + { + TRACE("Making high priority with niceness %d\n", nicemin); + err = rtkit_make_high_priority(conn.get(), 0, nicemin); + if(err == 0) return; + + err = std::abs(err); + WARN("Failed to set high priority: %s (%d)\n", std::strerror(err), err); + } + } + else + WARN("D-Bus connection failed with %s: %s\n", error->name, error->message); + } + else + WARN("D-Bus not available\n"); +#endif + ERR("Could not set elevated priority: %s (%d)\n", std::strerror(err), err); +} + +#endif diff --git a/core/helpers.h b/core/helpers.h new file mode 100644 index 00000000..f0bfcf1b --- /dev/null +++ b/core/helpers.h @@ -0,0 +1,18 @@ +#ifndef CORE_HELPERS_H +#define CORE_HELPERS_H + +#include <string> + +#include "vector.h" + + +struct PathNamePair { std::string path, fname; }; +const PathNamePair &GetProcBinary(void); + +extern int RTPrioLevel; +extern bool AllowRTTimeLimit; +void SetRTPriority(void); + +al::vector<std::string> SearchDataFiles(const char *match, const char *subdir); + +#endif /* CORE_HELPERS_H */ diff --git a/core/hrtf.cpp b/core/hrtf.cpp new file mode 100644 index 00000000..e0ab8f0a --- /dev/null +++ b/core/hrtf.cpp @@ -0,0 +1,1447 @@ + +#include "config.h" + +#include "hrtf.h" + +#include <algorithm> +#include <array> +#include <cassert> +#include <cctype> +#include <cmath> +#include <cstdint> +#include <cstdio> +#include <cstring> +#include <fstream> +#include <iterator> +#include <memory> +#include <mutex> +#include <numeric> +#include <type_traits> +#include <utility> + +#include "albit.h" +#include "albyte.h" +#include "alfstream.h" +#include "almalloc.h" +#include "alnumeric.h" +#include "aloptional.h" +#include "alspan.h" +#include "ambidefs.h" +#include "filters/splitter.h" +#include "helpers.h" +#include "logging.h" +#include "math_defs.h" +#include "mixer/hrtfdefs.h" +#include "opthelpers.h" +#include "polyphase_resampler.h" +#include "vector.h" + + +namespace { + +struct HrtfEntry { + std::string mDispName; + std::string mFilename; +}; + +struct LoadedHrtf { + std::string mFilename; + std::unique_ptr<HrtfStore> mEntry; +}; + +/* Data set limits must be the same as or more flexible than those defined in + * the makemhr utility. + */ +constexpr uint MinFdCount{1}; +constexpr uint MaxFdCount{16}; + +constexpr uint MinFdDistance{50}; +constexpr uint MaxFdDistance{2500}; + +constexpr uint MinEvCount{5}; +constexpr uint MaxEvCount{181}; + +constexpr uint MinAzCount{1}; +constexpr uint MaxAzCount{255}; + +constexpr uint MaxHrirDelay{HrtfHistoryLength - 1}; + +constexpr uint HrirDelayFracBits{2}; +constexpr uint HrirDelayFracOne{1 << HrirDelayFracBits}; +constexpr uint HrirDelayFracHalf{HrirDelayFracOne >> 1}; + +static_assert(MaxHrirDelay*HrirDelayFracOne < 256, "MAX_HRIR_DELAY or DELAY_FRAC too large"); + +constexpr char magicMarker00[8]{'M','i','n','P','H','R','0','0'}; +constexpr char magicMarker01[8]{'M','i','n','P','H','R','0','1'}; +constexpr char magicMarker02[8]{'M','i','n','P','H','R','0','2'}; +constexpr char magicMarker03[8]{'M','i','n','P','H','R','0','3'}; + +/* First value for pass-through coefficients (remaining are 0), used for omni- + * directional sounds. */ +constexpr float PassthruCoeff{0.707106781187f/*sqrt(0.5)*/}; + +std::mutex LoadedHrtfLock; +al::vector<LoadedHrtf> LoadedHrtfs; + +std::mutex EnumeratedHrtfLock; +al::vector<HrtfEntry> EnumeratedHrtfs; + + +class databuf final : public std::streambuf { + int_type underflow() override + { return traits_type::eof(); } + + pos_type seekoff(off_type offset, std::ios_base::seekdir whence, std::ios_base::openmode mode) override + { + if((mode&std::ios_base::out) || !(mode&std::ios_base::in)) + return traits_type::eof(); + + char_type *cur; + switch(whence) + { + case std::ios_base::beg: + if(offset < 0 || offset > egptr()-eback()) + return traits_type::eof(); + cur = eback() + offset; + break; + + case std::ios_base::cur: + if((offset >= 0 && offset > egptr()-gptr()) || + (offset < 0 && -offset > gptr()-eback())) + return traits_type::eof(); + cur = gptr() + offset; + break; + + case std::ios_base::end: + if(offset > 0 || -offset > egptr()-eback()) + return traits_type::eof(); + cur = egptr() + offset; + break; + + default: + return traits_type::eof(); + } + + setg(eback(), cur, egptr()); + return cur - eback(); + } + + pos_type seekpos(pos_type pos, std::ios_base::openmode mode) override + { + // Simplified version of seekoff + if((mode&std::ios_base::out) || !(mode&std::ios_base::in)) + return traits_type::eof(); + + if(pos < 0 || pos > egptr()-eback()) + return traits_type::eof(); + + setg(eback(), eback() + static_cast<size_t>(pos), egptr()); + return pos; + } + +public: + databuf(const char_type *start_, const char_type *end_) noexcept + { + setg(const_cast<char_type*>(start_), const_cast<char_type*>(start_), + const_cast<char_type*>(end_)); + } +}; + +class idstream final : public std::istream { + databuf mStreamBuf; + +public: + idstream(const char *start_, const char *end_) + : std::istream{nullptr}, mStreamBuf{start_, end_} + { init(&mStreamBuf); } +}; + + +struct IdxBlend { uint idx; float blend; }; +/* Calculate the elevation index given the polar elevation in radians. This + * will return an index between 0 and (evcount - 1). + */ +IdxBlend CalcEvIndex(uint evcount, float ev) +{ + ev = (al::MathDefs<float>::Pi()*0.5f + ev) * static_cast<float>(evcount-1) / + al::MathDefs<float>::Pi(); + uint idx{float2uint(ev)}; + + return IdxBlend{minu(idx, evcount-1), ev-static_cast<float>(idx)}; +} + +/* Calculate the azimuth index given the polar azimuth in radians. This will + * return an index between 0 and (azcount - 1). + */ +IdxBlend CalcAzIndex(uint azcount, float az) +{ + az = (al::MathDefs<float>::Tau()+az) * static_cast<float>(azcount) / + al::MathDefs<float>::Tau(); + uint idx{float2uint(az)}; + + return IdxBlend{idx%azcount, az-static_cast<float>(idx)}; +} + +} // namespace + + +/* Calculates static HRIR coefficients and delays for the given polar elevation + * and azimuth in radians. The coefficients are normalized. + */ +void GetHrtfCoeffs(const HrtfStore *Hrtf, float elevation, float azimuth, float distance, + float spread, HrirArray &coeffs, const al::span<uint,2> delays) +{ + const float dirfact{1.0f - (spread / al::MathDefs<float>::Tau())}; + + const auto *field = Hrtf->field; + const auto *field_end = field + Hrtf->fdCount-1; + size_t ebase{0}; + while(distance < field->distance && field != field_end) + { + ebase += field->evCount; + ++field; + } + + /* Calculate the elevation indices. */ + const auto elev0 = CalcEvIndex(field->evCount, elevation); + const size_t elev1_idx{minu(elev0.idx+1, field->evCount-1)}; + const size_t ir0offset{Hrtf->elev[ebase + elev0.idx].irOffset}; + const size_t ir1offset{Hrtf->elev[ebase + elev1_idx].irOffset}; + + /* Calculate azimuth indices. */ + const auto az0 = CalcAzIndex(Hrtf->elev[ebase + elev0.idx].azCount, azimuth); + const auto az1 = CalcAzIndex(Hrtf->elev[ebase + elev1_idx].azCount, azimuth); + + /* Calculate the HRIR indices to blend. */ + const size_t idx[4]{ + ir0offset + az0.idx, + ir0offset + ((az0.idx+1) % Hrtf->elev[ebase + elev0.idx].azCount), + ir1offset + az1.idx, + ir1offset + ((az1.idx+1) % Hrtf->elev[ebase + elev1_idx].azCount) + }; + + /* Calculate bilinear blending weights, attenuated according to the + * directional panning factor. + */ + const float blend[4]{ + (1.0f-elev0.blend) * (1.0f-az0.blend) * dirfact, + (1.0f-elev0.blend) * ( az0.blend) * dirfact, + ( elev0.blend) * (1.0f-az1.blend) * dirfact, + ( elev0.blend) * ( az1.blend) * dirfact + }; + + /* Calculate the blended HRIR delays. */ + float d{Hrtf->delays[idx[0]][0]*blend[0] + Hrtf->delays[idx[1]][0]*blend[1] + + Hrtf->delays[idx[2]][0]*blend[2] + Hrtf->delays[idx[3]][0]*blend[3]}; + delays[0] = fastf2u(d * float{1.0f/HrirDelayFracOne}); + d = Hrtf->delays[idx[0]][1]*blend[0] + Hrtf->delays[idx[1]][1]*blend[1] + + Hrtf->delays[idx[2]][1]*blend[2] + Hrtf->delays[idx[3]][1]*blend[3]; + delays[1] = fastf2u(d * float{1.0f/HrirDelayFracOne}); + + /* Calculate the blended HRIR coefficients. */ + float *coeffout{al::assume_aligned<16>(&coeffs[0][0])}; + coeffout[0] = PassthruCoeff * (1.0f-dirfact); + coeffout[1] = PassthruCoeff * (1.0f-dirfact); + std::fill_n(coeffout+2, size_t{HrirLength-1}*2, 0.0f); + for(size_t c{0};c < 4;c++) + { + const float *srccoeffs{al::assume_aligned<16>(Hrtf->coeffs[idx[c]][0].data())}; + const float mult{blend[c]}; + auto blend_coeffs = [mult](const float src, const float coeff) noexcept -> float + { return src*mult + coeff; }; + std::transform(srccoeffs, srccoeffs + HrirLength*2, coeffout, coeffout, blend_coeffs); + } +} + + +std::unique_ptr<DirectHrtfState> DirectHrtfState::Create(size_t num_chans) +{ return std::unique_ptr<DirectHrtfState>{new(FamCount(num_chans)) DirectHrtfState{num_chans}}; } + +void DirectHrtfState::build(const HrtfStore *Hrtf, const uint irSize, + const al::span<const AngularPoint> AmbiPoints, const float (*AmbiMatrix)[MaxAmbiChannels], + const float XOverFreq, const al::span<const float,MaxAmbiOrder+1> AmbiOrderHFGain) +{ + using double2 = std::array<double,2>; + struct ImpulseResponse { + const ConstHrirSpan hrir; + uint ldelay, rdelay; + }; + + const double xover_norm{double{XOverFreq} / Hrtf->sampleRate}; + for(size_t i{0};i < mChannels.size();++i) + { + const size_t order{AmbiIndex::OrderFromChannel()[i]}; + mChannels[i].mSplitter.init(static_cast<float>(xover_norm)); + mChannels[i].mHfScale = AmbiOrderHFGain[order]; + } + + uint min_delay{HrtfHistoryLength*HrirDelayFracOne}, max_delay{0}; + al::vector<ImpulseResponse> impres; impres.reserve(AmbiPoints.size()); + auto calc_res = [Hrtf,&max_delay,&min_delay](const AngularPoint &pt) -> ImpulseResponse + { + auto &field = Hrtf->field[0]; + const auto elev0 = CalcEvIndex(field.evCount, pt.Elev.value); + const size_t elev1_idx{minu(elev0.idx+1, field.evCount-1)}; + const size_t ir0offset{Hrtf->elev[elev0.idx].irOffset}; + const size_t ir1offset{Hrtf->elev[elev1_idx].irOffset}; + + const auto az0 = CalcAzIndex(Hrtf->elev[elev0.idx].azCount, pt.Azim.value); + const auto az1 = CalcAzIndex(Hrtf->elev[elev1_idx].azCount, pt.Azim.value); + + const size_t idx[4]{ + ir0offset + az0.idx, + ir0offset + ((az0.idx+1) % Hrtf->elev[elev0.idx].azCount), + ir1offset + az1.idx, + ir1offset + ((az1.idx+1) % Hrtf->elev[elev1_idx].azCount) + }; + + const std::array<double,4> blend{{ + (1.0-elev0.blend) * (1.0-az0.blend), + (1.0-elev0.blend) * ( az0.blend), + ( elev0.blend) * (1.0-az1.blend), + ( elev0.blend) * ( az1.blend) + }}; + + /* The largest blend factor serves as the closest HRIR. */ + const size_t irOffset{idx[std::max_element(blend.begin(), blend.end()) - blend.begin()]}; + ImpulseResponse res{Hrtf->coeffs[irOffset], + Hrtf->delays[irOffset][0], Hrtf->delays[irOffset][1]}; + + min_delay = minu(min_delay, minu(res.ldelay, res.rdelay)); + max_delay = maxu(max_delay, maxu(res.ldelay, res.rdelay)); + + return res; + }; + std::transform(AmbiPoints.begin(), AmbiPoints.end(), std::back_inserter(impres), calc_res); + auto hrir_delay_round = [](const uint d) noexcept -> uint + { return (d+HrirDelayFracHalf) >> HrirDelayFracBits; }; + + TRACE("Min delay: %.2f, max delay: %.2f, FIR length: %u\n", + min_delay/double{HrirDelayFracOne}, max_delay/double{HrirDelayFracOne}, irSize); + + const bool per_hrir_min{mChannels.size() > AmbiChannelsFromOrder(1)}; + auto tmpres = al::vector<std::array<double2,HrirLength>>(mChannels.size()); + max_delay = 0; + for(size_t c{0u};c < AmbiPoints.size();++c) + { + const ConstHrirSpan hrir{impres[c].hrir}; + const uint base_delay{per_hrir_min ? minu(impres[c].ldelay, impres[c].rdelay) : min_delay}; + const uint ldelay{hrir_delay_round(impres[c].ldelay - base_delay)}; + const uint rdelay{hrir_delay_round(impres[c].rdelay - base_delay)}; + max_delay = maxu(max_delay, maxu(impres[c].ldelay, impres[c].rdelay) - base_delay); + + for(size_t i{0u};i < mChannels.size();++i) + { + const double mult{AmbiMatrix[c][i]}; + const size_t numirs{HrirLength - maxz(ldelay, rdelay)}; + size_t lidx{ldelay}, ridx{rdelay}; + for(size_t j{0};j < numirs;++j) + { + tmpres[i][lidx++][0] += hrir[j][0] * mult; + tmpres[i][ridx++][1] += hrir[j][1] * mult; + } + } + } + impres.clear(); + + for(size_t i{0u};i < mChannels.size();++i) + { + auto copy_arr = [](const double2 &in) noexcept -> float2 + { return float2{{static_cast<float>(in[0]), static_cast<float>(in[1])}}; }; + std::transform(tmpres[i].cbegin(), tmpres[i].cend(), mChannels[i].mCoeffs.begin(), + copy_arr); + } + tmpres.clear(); + + const uint max_length{minu(hrir_delay_round(max_delay) + irSize, HrirLength)}; + TRACE("New max delay: %.2f, FIR length: %u\n", max_delay/double{HrirDelayFracOne}, + max_length); + mIrSize = max_length; +} + + +namespace { + +std::unique_ptr<HrtfStore> CreateHrtfStore(uint rate, ushort irSize, + const al::span<const HrtfStore::Field> fields, + const al::span<const HrtfStore::Elevation> elevs, const HrirArray *coeffs, + const ubyte2 *delays, const char *filename) +{ + std::unique_ptr<HrtfStore> Hrtf; + + const size_t irCount{size_t{elevs.back().azCount} + elevs.back().irOffset}; + size_t total{sizeof(HrtfStore)}; + total = RoundUp(total, alignof(HrtfStore::Field)); /* Align for field infos */ + total += sizeof(HrtfStore::Field)*fields.size(); + total = RoundUp(total, alignof(HrtfStore::Elevation)); /* Align for elevation infos */ + total += sizeof(Hrtf->elev[0])*elevs.size(); + total = RoundUp(total, 16); /* Align for coefficients using SIMD */ + total += sizeof(Hrtf->coeffs[0])*irCount; + total += sizeof(Hrtf->delays[0])*irCount; + + Hrtf.reset(new (al_calloc(16, total)) HrtfStore{}); + if(!Hrtf) + ERR("Out of memory allocating storage for %s.\n", filename); + else + { + InitRef(Hrtf->mRef, 1u); + Hrtf->sampleRate = rate; + Hrtf->irSize = irSize; + Hrtf->fdCount = static_cast<uint>(fields.size()); + + /* Set up pointers to storage following the main HRTF struct. */ + char *base = reinterpret_cast<char*>(Hrtf.get()); + size_t offset{sizeof(HrtfStore)}; + + offset = RoundUp(offset, alignof(HrtfStore::Field)); /* Align for field infos */ + auto field_ = reinterpret_cast<HrtfStore::Field*>(base + offset); + offset += sizeof(field_[0])*fields.size(); + + offset = RoundUp(offset, alignof(HrtfStore::Elevation)); /* Align for elevation infos */ + auto elev_ = reinterpret_cast<HrtfStore::Elevation*>(base + offset); + offset += sizeof(elev_[0])*elevs.size(); + + offset = RoundUp(offset, 16); /* Align for coefficients using SIMD */ + auto coeffs_ = reinterpret_cast<HrirArray*>(base + offset); + offset += sizeof(coeffs_[0])*irCount; + + auto delays_ = reinterpret_cast<ubyte2*>(base + offset); + offset += sizeof(delays_[0])*irCount; + + assert(offset == total); + + /* Copy input data to storage. */ + std::copy(fields.cbegin(), fields.cend(), field_); + std::copy(elevs.cbegin(), elevs.cend(), elev_); + std::copy_n(coeffs, irCount, coeffs_); + std::copy_n(delays, irCount, delays_); + + /* Finally, assign the storage pointers. */ + Hrtf->field = field_; + Hrtf->elev = elev_; + Hrtf->coeffs = coeffs_; + Hrtf->delays = delays_; + } + + return Hrtf; +} + +void MirrorLeftHrirs(const al::span<const HrtfStore::Elevation> elevs, HrirArray *coeffs, + ubyte2 *delays) +{ + for(const auto &elev : elevs) + { + const ushort evoffset{elev.irOffset}; + const ushort azcount{elev.azCount}; + for(size_t j{0};j < azcount;j++) + { + const size_t lidx{evoffset + j}; + const size_t ridx{evoffset + ((azcount-j) % azcount)}; + + const size_t irSize{coeffs[ridx].size()}; + for(size_t k{0};k < irSize;k++) + coeffs[ridx][k][1] = coeffs[lidx][k][0]; + delays[ridx][1] = delays[lidx][0]; + } + } +} + + +template<typename T, size_t num_bits=sizeof(T)*8> +inline T readle(std::istream &data) +{ + static_assert((num_bits&7) == 0, "num_bits must be a multiple of 8"); + static_assert(num_bits <= sizeof(T)*8, "num_bits is too large for the type"); + + T ret{}; + if_constexpr(al::endian::native == al::endian::little) + { + if(!data.read(reinterpret_cast<char*>(&ret), num_bits/8)) + return static_cast<T>(EOF); + } + else + { + al::byte b[sizeof(T)]{}; + if(!data.read(reinterpret_cast<char*>(b), num_bits/8)) + return static_cast<T>(EOF); + std::reverse_copy(std::begin(b), std::end(b), reinterpret_cast<al::byte*>(&ret)); + } + + if_constexpr(std::is_signed<T>::value && num_bits < sizeof(T)*8) + { + constexpr auto signbit = static_cast<T>(1u << (num_bits-1)); + return static_cast<T>((ret^signbit) - signbit); + } + return ret; +} + +template<> +inline uint8_t readle<uint8_t,8>(std::istream &data) +{ return static_cast<uint8_t>(data.get()); } + + +std::unique_ptr<HrtfStore> LoadHrtf00(std::istream &data, const char *filename) +{ + uint rate{readle<uint32_t>(data)}; + ushort irCount{readle<uint16_t>(data)}; + ushort irSize{readle<uint16_t>(data)}; + ubyte evCount{readle<uint8_t>(data)}; + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + + if(irSize < MinIrLength || irSize > HrirLength) + { + ERR("Unsupported HRIR size, irSize=%d (%d to %d)\n", irSize, MinIrLength, HrirLength); + return nullptr; + } + if(evCount < MinEvCount || evCount > MaxEvCount) + { + ERR("Unsupported elevation count: evCount=%d (%d to %d)\n", + evCount, MinEvCount, MaxEvCount); + return nullptr; + } + + auto elevs = al::vector<HrtfStore::Elevation>(evCount); + for(auto &elev : elevs) + elev.irOffset = readle<uint16_t>(data); + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + for(size_t i{1};i < evCount;i++) + { + if(elevs[i].irOffset <= elevs[i-1].irOffset) + { + ERR("Invalid evOffset: evOffset[%zu]=%d (last=%d)\n", i, elevs[i].irOffset, + elevs[i-1].irOffset); + return nullptr; + } + } + if(irCount <= elevs.back().irOffset) + { + ERR("Invalid evOffset: evOffset[%zu]=%d (irCount=%d)\n", + elevs.size()-1, elevs.back().irOffset, irCount); + return nullptr; + } + + for(size_t i{1};i < evCount;i++) + { + elevs[i-1].azCount = static_cast<ushort>(elevs[i].irOffset - elevs[i-1].irOffset); + if(elevs[i-1].azCount < MinAzCount || elevs[i-1].azCount > MaxAzCount) + { + ERR("Unsupported azimuth count: azCount[%zd]=%d (%d to %d)\n", + i-1, elevs[i-1].azCount, MinAzCount, MaxAzCount); + return nullptr; + } + } + elevs.back().azCount = static_cast<ushort>(irCount - elevs.back().irOffset); + if(elevs.back().azCount < MinAzCount || elevs.back().azCount > MaxAzCount) + { + ERR("Unsupported azimuth count: azCount[%zu]=%d (%d to %d)\n", + elevs.size()-1, elevs.back().azCount, MinAzCount, MaxAzCount); + return nullptr; + } + + auto coeffs = al::vector<HrirArray>(irCount, HrirArray{}); + auto delays = al::vector<ubyte2>(irCount); + for(auto &hrir : coeffs) + { + for(auto &val : al::span<float2>{hrir.data(), irSize}) + val[0] = readle<int16_t>(data) / 32768.0f; + } + for(auto &val : delays) + val[0] = readle<uint8_t>(data); + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + for(size_t i{0};i < irCount;i++) + { + if(delays[i][0] > MaxHrirDelay) + { + ERR("Invalid delays[%zd]: %d (%d)\n", i, delays[i][0], MaxHrirDelay); + return nullptr; + } + delays[i][0] <<= HrirDelayFracBits; + } + + /* Mirror the left ear responses to the right ear. */ + MirrorLeftHrirs({elevs.data(), elevs.size()}, coeffs.data(), delays.data()); + + const HrtfStore::Field field[1]{{0.0f, evCount}}; + return CreateHrtfStore(rate, irSize, field, {elevs.data(), elevs.size()}, coeffs.data(), + delays.data(), filename); +} + +std::unique_ptr<HrtfStore> LoadHrtf01(std::istream &data, const char *filename) +{ + uint rate{readle<uint32_t>(data)}; + ushort irSize{readle<uint8_t>(data)}; + ubyte evCount{readle<uint8_t>(data)}; + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + + if(irSize < MinIrLength || irSize > HrirLength) + { + ERR("Unsupported HRIR size, irSize=%d (%d to %d)\n", irSize, MinIrLength, HrirLength); + return nullptr; + } + if(evCount < MinEvCount || evCount > MaxEvCount) + { + ERR("Unsupported elevation count: evCount=%d (%d to %d)\n", + evCount, MinEvCount, MaxEvCount); + return nullptr; + } + + auto elevs = al::vector<HrtfStore::Elevation>(evCount); + for(auto &elev : elevs) + elev.azCount = readle<uint8_t>(data); + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + for(size_t i{0};i < evCount;++i) + { + if(elevs[i].azCount < MinAzCount || elevs[i].azCount > MaxAzCount) + { + ERR("Unsupported azimuth count: azCount[%zd]=%d (%d to %d)\n", i, elevs[i].azCount, + MinAzCount, MaxAzCount); + return nullptr; + } + } + + elevs[0].irOffset = 0; + for(size_t i{1};i < evCount;i++) + elevs[i].irOffset = static_cast<ushort>(elevs[i-1].irOffset + elevs[i-1].azCount); + const ushort irCount{static_cast<ushort>(elevs.back().irOffset + elevs.back().azCount)}; + + auto coeffs = al::vector<HrirArray>(irCount, HrirArray{}); + auto delays = al::vector<ubyte2>(irCount); + for(auto &hrir : coeffs) + { + for(auto &val : al::span<float2>{hrir.data(), irSize}) + val[0] = readle<int16_t>(data) / 32768.0f; + } + for(auto &val : delays) + val[0] = readle<uint8_t>(data); + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + for(size_t i{0};i < irCount;i++) + { + if(delays[i][0] > MaxHrirDelay) + { + ERR("Invalid delays[%zd]: %d (%d)\n", i, delays[i][0], MaxHrirDelay); + return nullptr; + } + delays[i][0] <<= HrirDelayFracBits; + } + + /* Mirror the left ear responses to the right ear. */ + MirrorLeftHrirs({elevs.data(), elevs.size()}, coeffs.data(), delays.data()); + + const HrtfStore::Field field[1]{{0.0f, evCount}}; + return CreateHrtfStore(rate, irSize, field, {elevs.data(), elevs.size()}, coeffs.data(), + delays.data(), filename); +} + +std::unique_ptr<HrtfStore> LoadHrtf02(std::istream &data, const char *filename) +{ + constexpr ubyte SampleType_S16{0}; + constexpr ubyte SampleType_S24{1}; + constexpr ubyte ChanType_LeftOnly{0}; + constexpr ubyte ChanType_LeftRight{1}; + + uint rate{readle<uint32_t>(data)}; + ubyte sampleType{readle<uint8_t>(data)}; + ubyte channelType{readle<uint8_t>(data)}; + ushort irSize{readle<uint8_t>(data)}; + ubyte fdCount{readle<uint8_t>(data)}; + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + + if(sampleType > SampleType_S24) + { + ERR("Unsupported sample type: %d\n", sampleType); + return nullptr; + } + if(channelType > ChanType_LeftRight) + { + ERR("Unsupported channel type: %d\n", channelType); + return nullptr; + } + + if(irSize < MinIrLength || irSize > HrirLength) + { + ERR("Unsupported HRIR size, irSize=%d (%d to %d)\n", irSize, MinIrLength, HrirLength); + return nullptr; + } + if(fdCount < 1 || fdCount > MaxFdCount) + { + ERR("Unsupported number of field-depths: fdCount=%d (%d to %d)\n", fdCount, MinFdCount, + MaxFdCount); + return nullptr; + } + + auto fields = al::vector<HrtfStore::Field>(fdCount); + auto elevs = al::vector<HrtfStore::Elevation>{}; + for(size_t f{0};f < fdCount;f++) + { + const ushort distance{readle<uint16_t>(data)}; + const ubyte evCount{readle<uint8_t>(data)}; + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + + if(distance < MinFdDistance || distance > MaxFdDistance) + { + ERR("Unsupported field distance[%zu]=%d (%d to %d millimeters)\n", f, distance, + MinFdDistance, MaxFdDistance); + return nullptr; + } + if(evCount < MinEvCount || evCount > MaxEvCount) + { + ERR("Unsupported elevation count: evCount[%zu]=%d (%d to %d)\n", f, evCount, + MinEvCount, MaxEvCount); + return nullptr; + } + + fields[f].distance = distance / 1000.0f; + fields[f].evCount = evCount; + if(f > 0 && fields[f].distance <= fields[f-1].distance) + { + ERR("Field distance[%zu] is not after previous (%f > %f)\n", f, fields[f].distance, + fields[f-1].distance); + return nullptr; + } + + const size_t ebase{elevs.size()}; + elevs.resize(ebase + evCount); + for(auto &elev : al::span<HrtfStore::Elevation>(elevs.data()+ebase, evCount)) + elev.azCount = readle<uint8_t>(data); + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + + for(size_t e{0};e < evCount;e++) + { + if(elevs[ebase+e].azCount < MinAzCount || elevs[ebase+e].azCount > MaxAzCount) + { + ERR("Unsupported azimuth count: azCount[%zu][%zu]=%d (%d to %d)\n", f, e, + elevs[ebase+e].azCount, MinAzCount, MaxAzCount); + return nullptr; + } + } + } + + elevs[0].irOffset = 0; + std::partial_sum(elevs.cbegin(), elevs.cend(), elevs.begin(), + [](const HrtfStore::Elevation &last, const HrtfStore::Elevation &cur) + -> HrtfStore::Elevation + { + return HrtfStore::Elevation{cur.azCount, + static_cast<ushort>(last.azCount + last.irOffset)}; + }); + const auto irTotal = static_cast<ushort>(elevs.back().azCount + elevs.back().irOffset); + + auto coeffs = al::vector<HrirArray>(irTotal, HrirArray{}); + auto delays = al::vector<ubyte2>(irTotal); + if(channelType == ChanType_LeftOnly) + { + if(sampleType == SampleType_S16) + { + for(auto &hrir : coeffs) + { + for(auto &val : al::span<float2>{hrir.data(), irSize}) + val[0] = readle<int16_t>(data) / 32768.0f; + } + } + else if(sampleType == SampleType_S24) + { + for(auto &hrir : coeffs) + { + for(auto &val : al::span<float2>{hrir.data(), irSize}) + val[0] = static_cast<float>(readle<int,24>(data)) / 8388608.0f; + } + } + for(auto &val : delays) + val[0] = readle<uint8_t>(data); + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + for(size_t i{0};i < irTotal;++i) + { + if(delays[i][0] > MaxHrirDelay) + { + ERR("Invalid delays[%zu][0]: %d (%d)\n", i, delays[i][0], MaxHrirDelay); + return nullptr; + } + delays[i][0] <<= HrirDelayFracBits; + } + + /* Mirror the left ear responses to the right ear. */ + MirrorLeftHrirs({elevs.data(), elevs.size()}, coeffs.data(), delays.data()); + } + else if(channelType == ChanType_LeftRight) + { + if(sampleType == SampleType_S16) + { + for(auto &hrir : coeffs) + { + for(auto &val : al::span<float2>{hrir.data(), irSize}) + { + val[0] = readle<int16_t>(data) / 32768.0f; + val[1] = readle<int16_t>(data) / 32768.0f; + } + } + } + else if(sampleType == SampleType_S24) + { + for(auto &hrir : coeffs) + { + for(auto &val : al::span<float2>{hrir.data(), irSize}) + { + val[0] = static_cast<float>(readle<int,24>(data)) / 8388608.0f; + val[1] = static_cast<float>(readle<int,24>(data)) / 8388608.0f; + } + } + } + for(auto &val : delays) + { + val[0] = readle<uint8_t>(data); + val[1] = readle<uint8_t>(data); + } + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + + for(size_t i{0};i < irTotal;++i) + { + if(delays[i][0] > MaxHrirDelay) + { + ERR("Invalid delays[%zu][0]: %d (%d)\n", i, delays[i][0], MaxHrirDelay); + return nullptr; + } + if(delays[i][1] > MaxHrirDelay) + { + ERR("Invalid delays[%zu][1]: %d (%d)\n", i, delays[i][1], MaxHrirDelay); + return nullptr; + } + delays[i][0] <<= HrirDelayFracBits; + delays[i][1] <<= HrirDelayFracBits; + } + } + + if(fdCount > 1) + { + auto fields_ = al::vector<HrtfStore::Field>(fields.size()); + auto elevs_ = al::vector<HrtfStore::Elevation>(elevs.size()); + auto coeffs_ = al::vector<HrirArray>(coeffs.size()); + auto delays_ = al::vector<ubyte2>(delays.size()); + + /* Simple reverse for the per-field elements. */ + std::reverse_copy(fields.cbegin(), fields.cend(), fields_.begin()); + + /* Each field has a group of elevations, which each have an azimuth + * count. Reverse the order of the groups, keeping the relative order + * of per-group azimuth counts. + */ + auto elevs__end = elevs_.end(); + auto copy_azs = [&elevs,&elevs__end](const ptrdiff_t ebase, const HrtfStore::Field &field) + -> ptrdiff_t + { + auto elevs_src = elevs.begin()+ebase; + elevs__end = std::copy_backward(elevs_src, elevs_src+field.evCount, elevs__end); + return ebase + field.evCount; + }; + (void)std::accumulate(fields.cbegin(), fields.cend(), ptrdiff_t{0}, copy_azs); + assert(elevs_.begin() == elevs__end); + + /* Reestablish the IR offset for each elevation index, given the new + * ordering of elevations. + */ + elevs_[0].irOffset = 0; + std::partial_sum(elevs_.cbegin(), elevs_.cend(), elevs_.begin(), + [](const HrtfStore::Elevation &last, const HrtfStore::Elevation &cur) + -> HrtfStore::Elevation + { + return HrtfStore::Elevation{cur.azCount, + static_cast<ushort>(last.azCount + last.irOffset)}; + }); + + /* Reverse the order of each field's group of IRs. */ + auto coeffs_end = coeffs_.end(); + auto delays_end = delays_.end(); + auto copy_irs = [&elevs,&coeffs,&delays,&coeffs_end,&delays_end]( + const ptrdiff_t ebase, const HrtfStore::Field &field) -> ptrdiff_t + { + auto accum_az = [](int count, const HrtfStore::Elevation &elev) noexcept -> int + { return count + elev.azCount; }; + const auto elevs_mid = elevs.cbegin() + ebase; + const auto elevs_end = elevs_mid + field.evCount; + const int abase{std::accumulate(elevs.cbegin(), elevs_mid, 0, accum_az)}; + const int num_azs{std::accumulate(elevs_mid, elevs_end, 0, accum_az)}; + + coeffs_end = std::copy_backward(coeffs.cbegin() + abase, + coeffs.cbegin() + (abase+num_azs), coeffs_end); + delays_end = std::copy_backward(delays.cbegin() + abase, + delays.cbegin() + (abase+num_azs), delays_end); + + return ebase + field.evCount; + }; + (void)std::accumulate(fields.cbegin(), fields.cend(), ptrdiff_t{0}, copy_irs); + assert(coeffs_.begin() == coeffs_end); + assert(delays_.begin() == delays_end); + + fields = std::move(fields_); + elevs = std::move(elevs_); + coeffs = std::move(coeffs_); + delays = std::move(delays_); + } + + return CreateHrtfStore(rate, irSize, {fields.data(), fields.size()}, + {elevs.data(), elevs.size()}, coeffs.data(), delays.data(), filename); +} + +std::unique_ptr<HrtfStore> LoadHrtf03(std::istream &data, const char *filename) +{ + constexpr ubyte ChanType_LeftOnly{0}; + constexpr ubyte ChanType_LeftRight{1}; + + uint rate{readle<uint32_t>(data)}; + ubyte channelType{readle<uint8_t>(data)}; + ushort irSize{readle<uint8_t>(data)}; + ubyte fdCount{readle<uint8_t>(data)}; + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + + if(channelType > ChanType_LeftRight) + { + ERR("Unsupported channel type: %d\n", channelType); + return nullptr; + } + + if(irSize < MinIrLength || irSize > HrirLength) + { + ERR("Unsupported HRIR size, irSize=%d (%d to %d)\n", irSize, MinIrLength, HrirLength); + return nullptr; + } + if(fdCount < 1 || fdCount > MaxFdCount) + { + ERR("Unsupported number of field-depths: fdCount=%d (%d to %d)\n", fdCount, MinFdCount, + MaxFdCount); + return nullptr; + } + + auto fields = al::vector<HrtfStore::Field>(fdCount); + auto elevs = al::vector<HrtfStore::Elevation>{}; + for(size_t f{0};f < fdCount;f++) + { + const ushort distance{readle<uint16_t>(data)}; + const ubyte evCount{readle<uint8_t>(data)}; + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + + if(distance < MinFdDistance || distance > MaxFdDistance) + { + ERR("Unsupported field distance[%zu]=%d (%d to %d millimeters)\n", f, distance, + MinFdDistance, MaxFdDistance); + return nullptr; + } + if(evCount < MinEvCount || evCount > MaxEvCount) + { + ERR("Unsupported elevation count: evCount[%zu]=%d (%d to %d)\n", f, evCount, + MinEvCount, MaxEvCount); + return nullptr; + } + + fields[f].distance = distance / 1000.0f; + fields[f].evCount = evCount; + if(f > 0 && fields[f].distance > fields[f-1].distance) + { + ERR("Field distance[%zu] is not before previous (%f <= %f)\n", f, fields[f].distance, + fields[f-1].distance); + return nullptr; + } + + const size_t ebase{elevs.size()}; + elevs.resize(ebase + evCount); + for(auto &elev : al::span<HrtfStore::Elevation>(elevs.data()+ebase, evCount)) + elev.azCount = readle<uint8_t>(data); + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + + for(size_t e{0};e < evCount;e++) + { + if(elevs[ebase+e].azCount < MinAzCount || elevs[ebase+e].azCount > MaxAzCount) + { + ERR("Unsupported azimuth count: azCount[%zu][%zu]=%d (%d to %d)\n", f, e, + elevs[ebase+e].azCount, MinAzCount, MaxAzCount); + return nullptr; + } + } + } + + elevs[0].irOffset = 0; + std::partial_sum(elevs.cbegin(), elevs.cend(), elevs.begin(), + [](const HrtfStore::Elevation &last, const HrtfStore::Elevation &cur) + -> HrtfStore::Elevation + { + return HrtfStore::Elevation{cur.azCount, + static_cast<ushort>(last.azCount + last.irOffset)}; + }); + const auto irTotal = static_cast<ushort>(elevs.back().azCount + elevs.back().irOffset); + + auto coeffs = al::vector<HrirArray>(irTotal, HrirArray{}); + auto delays = al::vector<ubyte2>(irTotal); + if(channelType == ChanType_LeftOnly) + { + for(auto &hrir : coeffs) + { + for(auto &val : al::span<float2>{hrir.data(), irSize}) + val[0] = static_cast<float>(readle<int,24>(data)) / 8388608.0f; + } + for(auto &val : delays) + val[0] = readle<uint8_t>(data); + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + for(size_t i{0};i < irTotal;++i) + { + if(delays[i][0] > MaxHrirDelay<<HrirDelayFracBits) + { + ERR("Invalid delays[%zu][0]: %f (%d)\n", i, + delays[i][0] / float{HrirDelayFracOne}, MaxHrirDelay); + return nullptr; + } + } + + /* Mirror the left ear responses to the right ear. */ + MirrorLeftHrirs({elevs.data(), elevs.size()}, coeffs.data(), delays.data()); + } + else if(channelType == ChanType_LeftRight) + { + for(auto &hrir : coeffs) + { + for(auto &val : al::span<float2>{hrir.data(), irSize}) + { + val[0] = static_cast<float>(readle<int,24>(data)) / 8388608.0f; + val[1] = static_cast<float>(readle<int,24>(data)) / 8388608.0f; + } + } + for(auto &val : delays) + { + val[0] = readle<uint8_t>(data); + val[1] = readle<uint8_t>(data); + } + if(!data || data.eof()) + { + ERR("Failed reading %s\n", filename); + return nullptr; + } + + for(size_t i{0};i < irTotal;++i) + { + if(delays[i][0] > MaxHrirDelay<<HrirDelayFracBits) + { + ERR("Invalid delays[%zu][0]: %f (%d)\n", i, + delays[i][0] / float{HrirDelayFracOne}, MaxHrirDelay); + return nullptr; + } + if(delays[i][1] > MaxHrirDelay<<HrirDelayFracBits) + { + ERR("Invalid delays[%zu][1]: %f (%d)\n", i, + delays[i][1] / float{HrirDelayFracOne}, MaxHrirDelay); + return nullptr; + } + } + } + + return CreateHrtfStore(rate, irSize, {fields.data(), fields.size()}, + {elevs.data(), elevs.size()}, coeffs.data(), delays.data(), filename); +} + + +bool checkName(const std::string &name) +{ + auto match_name = [&name](const HrtfEntry &entry) -> bool { return name == entry.mDispName; }; + auto &enum_names = EnumeratedHrtfs; + return std::find_if(enum_names.cbegin(), enum_names.cend(), match_name) != enum_names.cend(); +} + +void AddFileEntry(const std::string &filename) +{ + /* Check if this file has already been enumerated. */ + auto enum_iter = std::find_if(EnumeratedHrtfs.cbegin(), EnumeratedHrtfs.cend(), + [&filename](const HrtfEntry &entry) -> bool + { return entry.mFilename == filename; }); + if(enum_iter != EnumeratedHrtfs.cend()) + { + TRACE("Skipping duplicate file entry %s\n", filename.c_str()); + return; + } + + /* TODO: Get a human-readable name from the HRTF data (possibly coming in a + * format update). */ + size_t namepos{filename.find_last_of('/')+1}; + if(!namepos) namepos = filename.find_last_of('\\')+1; + + size_t extpos{filename.find_last_of('.')}; + if(extpos <= namepos) extpos = std::string::npos; + + const std::string basename{(extpos == std::string::npos) ? + filename.substr(namepos) : filename.substr(namepos, extpos-namepos)}; + std::string newname{basename}; + int count{1}; + while(checkName(newname)) + { + newname = basename; + newname += " #"; + newname += std::to_string(++count); + } + EnumeratedHrtfs.emplace_back(HrtfEntry{newname, filename}); + const HrtfEntry &entry = EnumeratedHrtfs.back(); + + TRACE("Adding file entry \"%s\"\n", entry.mFilename.c_str()); +} + +/* Unfortunate that we have to duplicate AddFileEntry to take a memory buffer + * for input instead of opening the given filename. + */ +void AddBuiltInEntry(const std::string &dispname, uint residx) +{ + const std::string filename{'!'+std::to_string(residx)+'_'+dispname}; + + auto enum_iter = std::find_if(EnumeratedHrtfs.cbegin(), EnumeratedHrtfs.cend(), + [&filename](const HrtfEntry &entry) -> bool + { return entry.mFilename == filename; }); + if(enum_iter != EnumeratedHrtfs.cend()) + { + TRACE("Skipping duplicate file entry %s\n", filename.c_str()); + return; + } + + /* TODO: Get a human-readable name from the HRTF data (possibly coming in a + * format update). */ + + std::string newname{dispname}; + int count{1}; + while(checkName(newname)) + { + newname = dispname; + newname += " #"; + newname += std::to_string(++count); + } + EnumeratedHrtfs.emplace_back(HrtfEntry{newname, filename}); + const HrtfEntry &entry = EnumeratedHrtfs.back(); + + TRACE("Adding built-in entry \"%s\"\n", entry.mFilename.c_str()); +} + + +#define IDR_DEFAULT_HRTF_MHR 1 + +#ifndef ALSOFT_EMBED_HRTF_DATA + +al::span<const char> GetResource(int /*name*/) +{ return {}; } + +#else + +#include "hrtf_default.h" + +al::span<const char> GetResource(int name) +{ + if(name == IDR_DEFAULT_HRTF_MHR) + return {reinterpret_cast<const char*>(hrtf_default), sizeof(hrtf_default)}; + return {}; +} +#endif + +} // namespace + + +al::vector<std::string> EnumerateHrtf(al::optional<std::string> pathopt) +{ + std::lock_guard<std::mutex> _{EnumeratedHrtfLock}; + EnumeratedHrtfs.clear(); + + bool usedefaults{true}; + if(pathopt) + { + const char *pathlist{pathopt->c_str()}; + while(pathlist && *pathlist) + { + const char *next, *end; + + while(isspace(*pathlist) || *pathlist == ',') + pathlist++; + if(*pathlist == '\0') + continue; + + next = strchr(pathlist, ','); + if(next) + end = next++; + else + { + end = pathlist + strlen(pathlist); + usedefaults = false; + } + + while(end != pathlist && isspace(*(end-1))) + --end; + if(end != pathlist) + { + const std::string pname{pathlist, end}; + for(const auto &fname : SearchDataFiles(".mhr", pname.c_str())) + AddFileEntry(fname); + } + + pathlist = next; + } + } + + if(usedefaults) + { + for(const auto &fname : SearchDataFiles(".mhr", "openal/hrtf")) + AddFileEntry(fname); + + if(!GetResource(IDR_DEFAULT_HRTF_MHR).empty()) + AddBuiltInEntry("Built-In HRTF", IDR_DEFAULT_HRTF_MHR); + } + + al::vector<std::string> list; + list.reserve(EnumeratedHrtfs.size()); + for(auto &entry : EnumeratedHrtfs) + list.emplace_back(entry.mDispName); + + return list; +} + +HrtfStorePtr GetLoadedHrtf(const std::string &name, const uint devrate) +{ + std::lock_guard<std::mutex> _{EnumeratedHrtfLock}; + auto entry_iter = std::find_if(EnumeratedHrtfs.cbegin(), EnumeratedHrtfs.cend(), + [&name](const HrtfEntry &entry) -> bool { return entry.mDispName == name; }); + if(entry_iter == EnumeratedHrtfs.cend()) + return nullptr; + const std::string &fname = entry_iter->mFilename; + + std::lock_guard<std::mutex> __{LoadedHrtfLock}; + auto hrtf_lt_fname = [](LoadedHrtf &hrtf, const std::string &filename) -> bool + { return hrtf.mFilename < filename; }; + auto handle = std::lower_bound(LoadedHrtfs.begin(), LoadedHrtfs.end(), fname, hrtf_lt_fname); + while(handle != LoadedHrtfs.end() && handle->mFilename == fname) + { + HrtfStore *hrtf{handle->mEntry.get()}; + if(hrtf && hrtf->sampleRate == devrate) + { + hrtf->add_ref(); + return HrtfStorePtr{hrtf}; + } + ++handle; + } + + std::unique_ptr<std::istream> stream; + int residx{}; + char ch{}; + if(sscanf(fname.c_str(), "!%d%c", &residx, &ch) == 2 && ch == '_') + { + TRACE("Loading %s...\n", fname.c_str()); + al::span<const char> res{GetResource(residx)}; + if(res.empty()) + { + ERR("Could not get resource %u, %s\n", residx, name.c_str()); + return nullptr; + } + stream = std::make_unique<idstream>(res.begin(), res.end()); + } + else + { + TRACE("Loading %s...\n", fname.c_str()); + auto fstr = std::make_unique<al::ifstream>(fname.c_str(), std::ios::binary); + if(!fstr->is_open()) + { + ERR("Could not open %s\n", fname.c_str()); + return nullptr; + } + stream = std::move(fstr); + } + + std::unique_ptr<HrtfStore> hrtf; + char magic[sizeof(magicMarker03)]; + stream->read(magic, sizeof(magic)); + if(stream->gcount() < static_cast<std::streamsize>(sizeof(magicMarker03))) + ERR("%s data is too short (%zu bytes)\n", name.c_str(), stream->gcount()); + else if(memcmp(magic, magicMarker03, sizeof(magicMarker03)) == 0) + { + TRACE("Detected data set format v3\n"); + hrtf = LoadHrtf03(*stream, name.c_str()); + } + else if(memcmp(magic, magicMarker02, sizeof(magicMarker02)) == 0) + { + TRACE("Detected data set format v2\n"); + hrtf = LoadHrtf02(*stream, name.c_str()); + } + else if(memcmp(magic, magicMarker01, sizeof(magicMarker01)) == 0) + { + TRACE("Detected data set format v1\n"); + hrtf = LoadHrtf01(*stream, name.c_str()); + } + else if(memcmp(magic, magicMarker00, sizeof(magicMarker00)) == 0) + { + TRACE("Detected data set format v0\n"); + hrtf = LoadHrtf00(*stream, name.c_str()); + } + else + ERR("Invalid header in %s: \"%.8s\"\n", name.c_str(), magic); + stream.reset(); + + if(!hrtf) + { + ERR("Failed to load %s\n", name.c_str()); + return nullptr; + } + + if(hrtf->sampleRate != devrate) + { + TRACE("Resampling HRTF %s (%uhz -> %uhz)\n", name.c_str(), hrtf->sampleRate, devrate); + + /* Calculate the last elevation's index and get the total IR count. */ + const size_t lastEv{std::accumulate(hrtf->field, hrtf->field+hrtf->fdCount, size_t{0}, + [](const size_t curval, const HrtfStore::Field &field) noexcept -> size_t + { return curval + field.evCount; } + ) - 1}; + const size_t irCount{size_t{hrtf->elev[lastEv].irOffset} + hrtf->elev[lastEv].azCount}; + + /* Resample all the IRs. */ + std::array<std::array<double,HrirLength>,2> inout; + PPhaseResampler rs; + rs.init(hrtf->sampleRate, devrate); + for(size_t i{0};i < irCount;++i) + { + HrirArray &coeffs = const_cast<HrirArray&>(hrtf->coeffs[i]); + for(size_t j{0};j < 2;++j) + { + std::transform(coeffs.cbegin(), coeffs.cend(), inout[0].begin(), + [j](const float2 &in) noexcept -> double { return in[j]; }); + rs.process(HrirLength, inout[0].data(), HrirLength, inout[1].data()); + for(size_t k{0};k < HrirLength;++k) + coeffs[k][j] = static_cast<float>(inout[1][k]); + } + } + rs = {}; + + /* Scale the delays for the new sample rate. */ + float max_delay{0.0f}; + auto new_delays = al::vector<float2>(irCount); + const float rate_scale{static_cast<float>(devrate)/static_cast<float>(hrtf->sampleRate)}; + for(size_t i{0};i < irCount;++i) + { + for(size_t j{0};j < 2;++j) + { + const float new_delay{std::round(hrtf->delays[i][j] * rate_scale) / + float{HrirDelayFracOne}}; + max_delay = maxf(max_delay, new_delay); + new_delays[i][j] = new_delay; + } + } + + /* If the new delays exceed the max, scale it down to fit (essentially + * shrinking the head radius; not ideal but better than a per-delay + * clamp). + */ + float delay_scale{HrirDelayFracOne}; + if(max_delay > MaxHrirDelay) + { + WARN("Resampled delay exceeds max (%.2f > %d)\n", max_delay, MaxHrirDelay); + delay_scale *= float{MaxHrirDelay} / max_delay; + } + + for(size_t i{0};i < irCount;++i) + { + ubyte2 &delays = const_cast<ubyte2&>(hrtf->delays[i]); + for(size_t j{0};j < 2;++j) + delays[j] = static_cast<ubyte>(float2int(new_delays[i][j]*delay_scale + 0.5f)); + } + + /* Scale the IR size for the new sample rate and update the stored + * sample rate. + */ + const float newIrSize{std::round(static_cast<float>(hrtf->irSize) * rate_scale)}; + hrtf->irSize = static_cast<uint>(minf(HrirLength, newIrSize)); + hrtf->sampleRate = devrate; + } + + TRACE("Loaded HRTF %s for sample rate %uhz, %u-sample filter\n", name.c_str(), + hrtf->sampleRate, hrtf->irSize); + handle = LoadedHrtfs.emplace(handle, LoadedHrtf{fname, std::move(hrtf)}); + + return HrtfStorePtr{handle->mEntry.get()}; +} + + +void HrtfStore::add_ref() +{ + auto ref = IncrementRef(mRef); + TRACE("HrtfStore %p increasing refcount to %u\n", decltype(std::declval<void*>()){this}, ref); +} + +void HrtfStore::release() +{ + auto ref = DecrementRef(mRef); + TRACE("HrtfStore %p decreasing refcount to %u\n", decltype(std::declval<void*>()){this}, ref); + if(ref == 0) + { + std::lock_guard<std::mutex> _{LoadedHrtfLock}; + + /* Go through and remove all unused HRTFs. */ + auto remove_unused = [](LoadedHrtf &hrtf) -> bool + { + HrtfStore *entry{hrtf.mEntry.get()}; + if(entry && ReadRef(entry->mRef) == 0) + { + TRACE("Unloading unused HRTF %s\n", hrtf.mFilename.data()); + hrtf.mEntry = nullptr; + return true; + } + return false; + }; + auto iter = std::remove_if(LoadedHrtfs.begin(), LoadedHrtfs.end(), remove_unused); + LoadedHrtfs.erase(iter, LoadedHrtfs.end()); + } +} diff --git a/core/hrtf.h b/core/hrtf.h new file mode 100644 index 00000000..61e5bada --- /dev/null +++ b/core/hrtf.h @@ -0,0 +1,90 @@ +#ifndef CORE_HRTF_H +#define CORE_HRTF_H + +#include <array> +#include <cstddef> +#include <memory> +#include <string> + +#include "almalloc.h" +#include "aloptional.h" +#include "alspan.h" +#include "atomic.h" +#include "ambidefs.h" +#include "bufferline.h" +#include "mixer/hrtfdefs.h" +#include "intrusive_ptr.h" +#include "vector.h" + + +struct HrtfStore { + RefCount mRef; + + uint sampleRate; + uint irSize; + + struct Field { + float distance; + ubyte evCount; + }; + /* NOTE: Fields are stored *backwards*. field[0] is the farthest field, and + * field[fdCount-1] is the nearest. + */ + uint fdCount; + const Field *field; + + struct Elevation { + ushort azCount; + ushort irOffset; + }; + Elevation *elev; + const HrirArray *coeffs; + const ubyte2 *delays; + + void add_ref(); + void release(); + + DEF_PLACE_NEWDEL() +}; +using HrtfStorePtr = al::intrusive_ptr<HrtfStore>; + + +struct EvRadians { float value; }; +struct AzRadians { float value; }; +struct AngularPoint { + EvRadians Elev; + AzRadians Azim; +}; + + +struct DirectHrtfState { + std::array<float,HrtfDirectDelay+BufferLineSize> mTemp; + + /* HRTF filter state for dry buffer content */ + uint mIrSize{0}; + al::FlexArray<HrtfChannelState> mChannels; + + DirectHrtfState(size_t numchans) : mChannels{numchans} { } + /** + * Produces HRTF filter coefficients for decoding B-Format, given a set of + * virtual speaker positions, a matching decoding matrix, and per-order + * high-frequency gains for the decoder. The calculated impulse responses + * are ordered and scaled according to the matrix input. + */ + void build(const HrtfStore *Hrtf, const uint irSize, + const al::span<const AngularPoint> AmbiPoints, const float (*AmbiMatrix)[MaxAmbiChannels], + const float XOverFreq, const al::span<const float,MaxAmbiOrder+1> AmbiOrderHFGain); + + static std::unique_ptr<DirectHrtfState> Create(size_t num_chans); + + DEF_FAM_NEWDEL(DirectHrtfState, mChannels) +}; + + +al::vector<std::string> EnumerateHrtf(al::optional<std::string> pathopt); +HrtfStorePtr GetLoadedHrtf(const std::string &name, const uint devrate); + +void GetHrtfCoeffs(const HrtfStore *Hrtf, float elevation, float azimuth, float distance, + float spread, HrirArray &coeffs, const al::span<uint,2> delays); + +#endif /* CORE_HRTF_H */ diff --git a/core/logging.h b/core/logging.h index b931c27e..81465929 100644 --- a/core/logging.h +++ b/core/logging.h @@ -35,7 +35,12 @@ extern FILE *gLogFile; #else -[[gnu::format(printf,3,4)]] void al_print(LogLevel level, FILE *logfile, const char *fmt, ...); +#ifdef __USE_MINGW_ANSI_STDIO +[[gnu::format(gnu_printf,3,4)]] +#else +[[gnu::format(printf,3,4)]] +#endif +void al_print(LogLevel level, FILE *logfile, const char *fmt, ...); #define TRACE(...) al_print(LogLevel::Trace, gLogFile, "[ALSOFT] (II) " __VA_ARGS__) diff --git a/core/mixer.cpp b/core/mixer.cpp new file mode 100644 index 00000000..71e48fe3 --- /dev/null +++ b/core/mixer.cpp @@ -0,0 +1,126 @@ + +#include "config.h" + +#include "mixer.h" + +#include <cmath> + +#include "devformat.h" +#include "device.h" +#include "math_defs.h" +#include "mixer/defs.h" + +struct CTag; + + +MixerFunc MixSamples{Mix_<CTag>}; + + +std::array<float,MaxAmbiChannels> CalcAmbiCoeffs(const float y, const float z, const float x, + const float spread) +{ + std::array<float,MaxAmbiChannels> coeffs; + + /* Zeroth-order */ + coeffs[0] = 1.0f; /* ACN 0 = 1 */ + /* First-order */ + coeffs[1] = 1.732050808f * y; /* ACN 1 = sqrt(3) * Y */ + coeffs[2] = 1.732050808f * z; /* ACN 2 = sqrt(3) * Z */ + coeffs[3] = 1.732050808f * x; /* ACN 3 = sqrt(3) * X */ + /* Second-order */ + const float xx{x*x}, yy{y*y}, zz{z*z}, xy{x*y}, yz{y*z}, xz{x*z}; + coeffs[4] = 3.872983346f * xy; /* ACN 4 = sqrt(15) * X * Y */ + coeffs[5] = 3.872983346f * yz; /* ACN 5 = sqrt(15) * Y * Z */ + coeffs[6] = 1.118033989f * (3.0f*zz - 1.0f); /* ACN 6 = sqrt(5)/2 * (3*Z*Z - 1) */ + coeffs[7] = 3.872983346f * xz; /* ACN 7 = sqrt(15) * X * Z */ + coeffs[8] = 1.936491673f * (xx - yy); /* ACN 8 = sqrt(15)/2 * (X*X - Y*Y) */ + /* Third-order */ + coeffs[9] = 2.091650066f * (y*(3.0f*xx - yy)); /* ACN 9 = sqrt(35/8) * Y * (3*X*X - Y*Y) */ + coeffs[10] = 10.246950766f * (z*xy); /* ACN 10 = sqrt(105) * Z * X * Y */ + coeffs[11] = 1.620185175f * (y*(5.0f*zz - 1.0f)); /* ACN 11 = sqrt(21/8) * Y * (5*Z*Z - 1) */ + coeffs[12] = 1.322875656f * (z*(5.0f*zz - 3.0f)); /* ACN 12 = sqrt(7)/2 * Z * (5*Z*Z - 3) */ + coeffs[13] = 1.620185175f * (x*(5.0f*zz - 1.0f)); /* ACN 13 = sqrt(21/8) * X * (5*Z*Z - 1) */ + coeffs[14] = 5.123475383f * (z*(xx - yy)); /* ACN 14 = sqrt(105)/2 * Z * (X*X - Y*Y) */ + coeffs[15] = 2.091650066f * (x*(xx - 3.0f*yy)); /* ACN 15 = sqrt(35/8) * X * (X*X - 3*Y*Y) */ + /* Fourth-order */ + /* ACN 16 = sqrt(35)*3/2 * X * Y * (X*X - Y*Y) */ + /* ACN 17 = sqrt(35/2)*3/2 * (3*X*X - Y*Y) * Y * Z */ + /* ACN 18 = sqrt(5)*3/2 * X * Y * (7*Z*Z - 1) */ + /* ACN 19 = sqrt(5/2)*3/2 * Y * Z * (7*Z*Z - 3) */ + /* ACN 20 = 3/8 * (35*Z*Z*Z*Z - 30*Z*Z + 3) */ + /* ACN 21 = sqrt(5/2)*3/2 * X * Z * (7*Z*Z - 3) */ + /* ACN 22 = sqrt(5)*3/4 * (X*X - Y*Y) * (7*Z*Z - 1) */ + /* ACN 23 = sqrt(35/2)*3/2 * (X*X - 3*Y*Y) * X * Z */ + /* ACN 24 = sqrt(35)*3/8 * (X*X*X*X - 6*X*X*Y*Y + Y*Y*Y*Y) */ + + if(spread > 0.0f) + { + /* Implement the spread by using a spherical source that subtends the + * angle spread. See: + * http://www.ppsloan.org/publications/StupidSH36.pdf - Appendix A3 + * + * When adjusted for N3D normalization instead of SN3D, these + * calculations are: + * + * ZH0 = -sqrt(pi) * (-1+ca); + * ZH1 = 0.5*sqrt(pi) * sa*sa; + * ZH2 = -0.5*sqrt(pi) * ca*(-1+ca)*(ca+1); + * ZH3 = -0.125*sqrt(pi) * (-1+ca)*(ca+1)*(5*ca*ca - 1); + * ZH4 = -0.125*sqrt(pi) * ca*(-1+ca)*(ca+1)*(7*ca*ca - 3); + * ZH5 = -0.0625*sqrt(pi) * (-1+ca)*(ca+1)*(21*ca*ca*ca*ca - 14*ca*ca + 1); + * + * The gain of the source is compensated for size, so that the + * loudness doesn't depend on the spread. Thus: + * + * ZH0 = 1.0f; + * ZH1 = 0.5f * (ca+1.0f); + * ZH2 = 0.5f * (ca+1.0f)*ca; + * ZH3 = 0.125f * (ca+1.0f)*(5.0f*ca*ca - 1.0f); + * ZH4 = 0.125f * (ca+1.0f)*(7.0f*ca*ca - 3.0f)*ca; + * ZH5 = 0.0625f * (ca+1.0f)*(21.0f*ca*ca*ca*ca - 14.0f*ca*ca + 1.0f); + */ + const float ca{std::cos(spread * 0.5f)}; + /* Increase the source volume by up to +3dB for a full spread. */ + const float scale{std::sqrt(1.0f + spread/al::MathDefs<float>::Tau())}; + + const float ZH0_norm{scale}; + const float ZH1_norm{scale * 0.5f * (ca+1.f)}; + const float ZH2_norm{scale * 0.5f * (ca+1.f)*ca}; + const float ZH3_norm{scale * 0.125f * (ca+1.f)*(5.f*ca*ca-1.f)}; + + /* Zeroth-order */ + coeffs[0] *= ZH0_norm; + /* First-order */ + coeffs[1] *= ZH1_norm; + coeffs[2] *= ZH1_norm; + coeffs[3] *= ZH1_norm; + /* Second-order */ + coeffs[4] *= ZH2_norm; + coeffs[5] *= ZH2_norm; + coeffs[6] *= ZH2_norm; + coeffs[7] *= ZH2_norm; + coeffs[8] *= ZH2_norm; + /* Third-order */ + coeffs[9] *= ZH3_norm; + coeffs[10] *= ZH3_norm; + coeffs[11] *= ZH3_norm; + coeffs[12] *= ZH3_norm; + coeffs[13] *= ZH3_norm; + coeffs[14] *= ZH3_norm; + coeffs[15] *= ZH3_norm; + } + + return coeffs; +} + +void ComputePanGains(const MixParams *mix, const float*RESTRICT coeffs, const float ingain, + const al::span<float,MAX_OUTPUT_CHANNELS> gains) +{ + auto ambimap = mix->AmbiMap.cbegin(); + + auto iter = std::transform(ambimap, ambimap+mix->Buffer.size(), gains.begin(), + [coeffs,ingain](const BFChannelConfig &chanmap) noexcept -> float + { return chanmap.Scale * coeffs[chanmap.Index] * ingain; } + ); + std::fill(iter, gains.end(), 0.0f); +} diff --git a/core/mixer.h b/core/mixer.h new file mode 100644 index 00000000..309f4224 --- /dev/null +++ b/core/mixer.h @@ -0,0 +1,101 @@ +#ifndef CORE_MIXER_H +#define CORE_MIXER_H + +#include <array> +#include <cmath> +#include <stddef.h> +#include <type_traits> + +#include "alspan.h" +#include "ambidefs.h" +#include "bufferline.h" +#include "devformat.h" + +struct MixParams; + +using MixerFunc = void(*)(const al::span<const float> InSamples, + const al::span<FloatBufferLine> OutBuffer, float *CurrentGains, const float *TargetGains, + const size_t Counter, const size_t OutPos); + +extern MixerFunc MixSamples; + + +/** + * Calculates ambisonic encoder coefficients using the X, Y, and Z direction + * components, which must represent a normalized (unit length) vector, and the + * spread is the angular width of the sound (0...tau). + * + * NOTE: The components use ambisonic coordinates. As a result: + * + * Ambisonic Y = OpenAL -X + * Ambisonic Z = OpenAL Y + * Ambisonic X = OpenAL -Z + * + * The components are ordered such that OpenAL's X, Y, and Z are the first, + * second, and third parameters respectively -- simply negate X and Z. + */ +std::array<float,MaxAmbiChannels> CalcAmbiCoeffs(const float y, const float z, const float x, + const float spread); + +/** + * CalcDirectionCoeffs + * + * Calculates ambisonic coefficients based on an OpenAL direction vector. The + * vector must be normalized (unit length), and the spread is the angular width + * of the sound (0...tau). + */ +inline std::array<float,MaxAmbiChannels> CalcDirectionCoeffs(const float (&dir)[3], + const float spread) +{ + /* Convert from OpenAL coords to Ambisonics. */ + return CalcAmbiCoeffs(-dir[0], dir[1], -dir[2], spread); +} + +/** + * CalcAngleCoeffs + * + * Calculates ambisonic coefficients based on azimuth and elevation. The + * azimuth and elevation parameters are in radians, going right and up + * respectively. + */ +inline std::array<float,MaxAmbiChannels> CalcAngleCoeffs(const float azimuth, + const float elevation, const float spread) +{ + const float x{-std::sin(azimuth) * std::cos(elevation)}; + const float y{ std::sin(elevation)}; + const float z{ std::cos(azimuth) * std::cos(elevation)}; + + return CalcAmbiCoeffs(x, y, z, spread); +} + + +/** + * ComputePanGains + * + * Computes panning gains using the given channel decoder coefficients and the + * pre-calculated direction or angle coefficients. For B-Format sources, the + * coeffs are a 'slice' of a transform matrix for the input channel, used to + * scale and orient the sound samples. + */ +void ComputePanGains(const MixParams *mix, const float*RESTRICT coeffs, const float ingain, + const al::span<float,MAX_OUTPUT_CHANNELS> gains); + + +/** Helper to set an identity/pass-through panning for ambisonic mixing (3D input). */ +template<typename T, typename I, typename F> +auto SetAmbiPanIdentity(T iter, I count, F func) -> std::enable_if_t<std::is_integral<I>::value> +{ + if(count < 1) return; + + std::array<float,MaxAmbiChannels> coeffs{{1.0f}}; + func(*iter, coeffs); + ++iter; + for(I i{1};i < count;++i,++iter) + { + coeffs[i-1] = 0.0f; + coeffs[i ] = 1.0f; + func(*iter, coeffs); + } +} + +#endif /* CORE_MIXER_H */ diff --git a/core/mixer/defs.h b/core/mixer/defs.h index acf60350..ba304f22 100644 --- a/core/mixer/defs.h +++ b/core/mixer/defs.h @@ -6,6 +6,7 @@ #include "alspan.h" #include "core/bufferline.h" +#include "core/resampler_limits.h" struct HrtfChannelState; struct HrtfFilter; @@ -19,12 +20,6 @@ constexpr int MixerFracBits{12}; constexpr int MixerFracOne{1 << MixerFracBits}; constexpr int MixerFracMask{MixerFracOne - 1}; -/* Maximum number of samples to pad on the ends of a buffer for resampling. - * Note that the padding is symmetric (half at the beginning and half at the - * end)! - */ -constexpr int MaxResamplerPadding{48}; - constexpr float GainSilenceThreshold{0.00001f}; /* -100dB */ @@ -80,7 +75,7 @@ template<typename InstTag> void MixHrtfBlend_(const float *InSamples, float2 *AccumSamples, const uint IrSize, const HrtfFilter *oldparams, const MixHrtfFilter *newparams, const size_t BufferSize); template<typename InstTag> -void MixDirectHrtf_(FloatBufferLine &LeftOut, FloatBufferLine &RightOut, +void MixDirectHrtf_(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut, const al::span<const FloatBufferLine> InSamples, float2 *AccumSamples, float *TempBuf, HrtfChannelState *ChanState, const size_t IrSize, const size_t BufferSize); diff --git a/core/mixer/hrtfbase.h b/core/mixer/hrtfbase.h index 7419f960..79b09a3d 100644 --- a/core/mixer/hrtfbase.h +++ b/core/mixer/hrtfbase.h @@ -12,7 +12,7 @@ using uint = unsigned int; using ApplyCoeffsT = void(&)(float2 *RESTRICT Values, const size_t irSize, - const HrirArray &Coeffs, const float left, const float right); + const ConstHrirSpan Coeffs, const float left, const float right); template<ApplyCoeffsT ApplyCoeffs> inline void MixHrtfBase(const float *InSamples, float2 *RESTRICT AccumSamples, const size_t IrSize, @@ -20,7 +20,7 @@ inline void MixHrtfBase(const float *InSamples, float2 *RESTRICT AccumSamples, c { ASSUME(BufferSize > 0); - const HrirArray &Coeffs = *hrtfparams->Coeffs; + const ConstHrirSpan Coeffs{hrtfparams->Coeffs}; const float gainstep{hrtfparams->GainStep}; const float gain{hrtfparams->Gain}; @@ -45,9 +45,9 @@ inline void MixHrtfBlendBase(const float *InSamples, float2 *RESTRICT AccumSampl { ASSUME(BufferSize > 0); - const auto &OldCoeffs = oldparams->Coeffs; + const ConstHrirSpan OldCoeffs{oldparams->Coeffs}; const float oldGainStep{oldparams->Gain / static_cast<float>(BufferSize)}; - const auto &NewCoeffs = *newparams->Coeffs; + const ConstHrirSpan NewCoeffs{newparams->Coeffs}; const float newGainStep{newparams->GainStep}; if LIKELY(oldparams->Gain > GainSilenceThreshold) @@ -84,7 +84,7 @@ inline void MixHrtfBlendBase(const float *InSamples, float2 *RESTRICT AccumSampl } template<ApplyCoeffsT ApplyCoeffs> -inline void MixDirectHrtfBase(FloatBufferLine &LeftOut, FloatBufferLine &RightOut, +inline void MixDirectHrtfBase(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut, const al::span<const FloatBufferLine> InSamples, float2 *RESTRICT AccumSamples, float *TempBuf, HrtfChannelState *ChanState, const size_t IrSize, const size_t BufferSize) { @@ -133,7 +133,7 @@ inline void MixDirectHrtfBase(FloatBufferLine &LeftOut, FloatBufferLine &RightOu ChanState->mSplitter.processHfScale(tempbuf, ChanState->mHfScale); /* Now apply the HRIR coefficients to this channel. */ - const auto &Coeffs = ChanState->mCoeffs; + const ConstHrirSpan Coeffs{ChanState->mCoeffs}; for(size_t i{0u};i < BufferSize;++i) { const float insample{tempbuf[i]}; diff --git a/core/mixer/hrtfdefs.h b/core/mixer/hrtfdefs.h index 89a9bb8d..7046a31e 100644 --- a/core/mixer/hrtfdefs.h +++ b/core/mixer/hrtfdefs.h @@ -3,6 +3,7 @@ #include <array> +#include "alspan.h" #include "core/ambidefs.h" #include "core/bufferline.h" #include "core/filters/splitter.h" @@ -28,9 +29,11 @@ constexpr uint MinIrLength{8}; constexpr uint HrtfDirectDelay{256}; using HrirArray = std::array<float2,HrirLength>; +using HrirSpan = al::span<float2,HrirLength>; +using ConstHrirSpan = al::span<const float2,HrirLength>; struct MixHrtfFilter { - const HrirArray *Coeffs; + const ConstHrirSpan Coeffs; uint2 Delay; float Gain; float GainStep; diff --git a/core/mixer/mixer_c.cpp b/core/mixer/mixer_c.cpp index ff9538a4..f82f7dd1 100644 --- a/core/mixer/mixer_c.cpp +++ b/core/mixer/mixer_c.cpp @@ -32,15 +32,16 @@ inline float do_cubic(const InterpState&, const float *RESTRICT vals, const uint inline float do_bsinc(const InterpState &istate, const float *RESTRICT vals, const uint frac) { const size_t m{istate.bsinc.m}; + ASSUME(m > 0); // Calculate the phase index and factor. const uint pi{frac >> FracPhaseBitDiff}; const float pf{static_cast<float>(frac & (FracPhaseDiffOne-1)) * (1.0f/FracPhaseDiffOne)}; - const float *fil{istate.bsinc.filter + m*pi*4}; - const float *phd{fil + m}; - const float *scd{phd + m}; - const float *spd{scd + m}; + const float *RESTRICT fil{istate.bsinc.filter + m*pi*2}; + const float *RESTRICT phd{fil + m}; + const float *RESTRICT scd{fil + BSincPhaseCount*2*m}; + const float *RESTRICT spd{scd + m}; // Apply the scale and phase interpolated filter. float r{0.0f}; @@ -51,13 +52,14 @@ inline float do_bsinc(const InterpState &istate, const float *RESTRICT vals, con inline float do_fastbsinc(const InterpState &istate, const float *RESTRICT vals, const uint frac) { const size_t m{istate.bsinc.m}; + ASSUME(m > 0); // Calculate the phase index and factor. const uint pi{frac >> FracPhaseBitDiff}; const float pf{static_cast<float>(frac & (FracPhaseDiffOne-1)) * (1.0f/FracPhaseDiffOne)}; - const float *fil{istate.bsinc.filter + m*pi*4}; - const float *phd{fil + m}; + const float *RESTRICT fil{istate.bsinc.filter + m*pi*2}; + const float *RESTRICT phd{fil + m}; // Apply the phase interpolated filter. float r{0.0f}; @@ -83,7 +85,7 @@ float *DoResample(const InterpState *state, float *RESTRICT src, uint frac, uint return dst.data(); } -inline void ApplyCoeffs(float2 *RESTRICT Values, const size_t IrSize, const HrirArray &Coeffs, +inline void ApplyCoeffs(float2 *RESTRICT Values, const size_t IrSize, const ConstHrirSpan Coeffs, const float left, const float right) { ASSUME(IrSize >= MinIrLength); @@ -149,7 +151,7 @@ void MixHrtfBlend_<CTag>(const float *InSamples, float2 *AccumSamples, const uin } template<> -void MixDirectHrtf_<CTag>(FloatBufferLine &LeftOut, FloatBufferLine &RightOut, +void MixDirectHrtf_<CTag>(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut, const al::span<const FloatBufferLine> InSamples, float2 *AccumSamples, float *TempBuf, HrtfChannelState *ChanState, const size_t IrSize, const size_t BufferSize) { diff --git a/core/mixer/mixer_neon.cpp b/core/mixer/mixer_neon.cpp index f3e5f130..a3afdc6b 100644 --- a/core/mixer/mixer_neon.cpp +++ b/core/mixer/mixer_neon.cpp @@ -34,7 +34,7 @@ inline float32x4_t set_f4(float l0, float l1, float l2, float l3) constexpr uint FracPhaseBitDiff{MixerFracBits - BSincPhaseBits}; constexpr uint FracPhaseDiffOne{1 << FracPhaseBitDiff}; -inline void ApplyCoeffs(float2 *RESTRICT Values, const size_t IrSize, const HrirArray &Coeffs, +inline void ApplyCoeffs(float2 *RESTRICT Values, const size_t IrSize, const ConstHrirSpan Coeffs, const float left, const float right) { float32x4_t leftright4; @@ -118,6 +118,7 @@ float *Resample_<BSincTag,NEONTag>(const InterpState *state, float *RESTRICT src const float *const filter{state->bsinc.filter}; const float32x4_t sf4{vdupq_n_f32(state->bsinc.sf)}; const size_t m{state->bsinc.m}; + ASSUME(m > 0); src -= state->bsinc.l; for(float &out_sample : dst) @@ -130,10 +131,10 @@ float *Resample_<BSincTag,NEONTag>(const InterpState *state, float *RESTRICT src float32x4_t r4{vdupq_n_f32(0.0f)}; { const float32x4_t pf4{vdupq_n_f32(pf)}; - const float *fil{filter + m*pi*4}; - const float *phd{fil + m}; - const float *scd{phd + m}; - const float *spd{scd + m}; + const float *RESTRICT fil{filter + m*pi*2}; + const float *RESTRICT phd{fil + m}; + const float *RESTRICT scd{fil + BSincPhaseCount*2*m}; + const float *RESTRICT spd{scd + m}; size_t td{m >> 2}; size_t j{0u}; @@ -163,6 +164,7 @@ float *Resample_<FastBSincTag,NEONTag>(const InterpState *state, float *RESTRICT { const float *const filter{state->bsinc.filter}; const size_t m{state->bsinc.m}; + ASSUME(m > 0); src -= state->bsinc.l; for(float &out_sample : dst) @@ -175,8 +177,8 @@ float *Resample_<FastBSincTag,NEONTag>(const InterpState *state, float *RESTRICT float32x4_t r4{vdupq_n_f32(0.0f)}; { const float32x4_t pf4{vdupq_n_f32(pf)}; - const float *fil{filter + m*pi*4}; - const float *phd{fil + m}; + const float *RESTRICT fil{filter + m*pi*2}; + const float *RESTRICT phd{fil + m}; size_t td{m >> 2}; size_t j{0u}; @@ -213,7 +215,7 @@ void MixHrtfBlend_<NEONTag>(const float *InSamples, float2 *AccumSamples, const } template<> -void MixDirectHrtf_<NEONTag>(FloatBufferLine &LeftOut, FloatBufferLine &RightOut, +void MixDirectHrtf_<NEONTag>(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut, const al::span<const FloatBufferLine> InSamples, float2 *AccumSamples, float *TempBuf, HrtfChannelState *ChanState, const size_t IrSize, const size_t BufferSize) { diff --git a/core/mixer/mixer_sse.cpp b/core/mixer/mixer_sse.cpp index c0fd8fa1..3cfb00a5 100644 --- a/core/mixer/mixer_sse.cpp +++ b/core/mixer/mixer_sse.cpp @@ -26,7 +26,7 @@ constexpr uint FracPhaseDiffOne{1 << FracPhaseBitDiff}; #define MLA4(x, y, z) _mm_add_ps(x, _mm_mul_ps(y, z)) -inline void ApplyCoeffs(float2 *RESTRICT Values, const size_t IrSize, const HrirArray &Coeffs, +inline void ApplyCoeffs(float2 *RESTRICT Values, const size_t IrSize, const ConstHrirSpan Coeffs, const float left, const float right) { const __m128 lrlr{_mm_setr_ps(left, right, left, right)}; @@ -82,6 +82,7 @@ float *Resample_<BSincTag,SSETag>(const InterpState *state, float *RESTRICT src, const float *const filter{state->bsinc.filter}; const __m128 sf4{_mm_set1_ps(state->bsinc.sf)}; const size_t m{state->bsinc.m}; + ASSUME(m > 0); src -= state->bsinc.l; for(float &out_sample : dst) @@ -94,10 +95,10 @@ float *Resample_<BSincTag,SSETag>(const InterpState *state, float *RESTRICT src, __m128 r4{_mm_setzero_ps()}; { const __m128 pf4{_mm_set1_ps(pf)}; - const float *fil{filter + m*pi*4}; - const float *phd{fil + m}; - const float *scd{phd + m}; - const float *spd{scd + m}; + const float *RESTRICT fil{filter + m*pi*2}; + const float *RESTRICT phd{fil + m}; + const float *RESTRICT scd{fil + BSincPhaseCount*2*m}; + const float *RESTRICT spd{scd + m}; size_t td{m >> 2}; size_t j{0u}; @@ -128,6 +129,7 @@ float *Resample_<FastBSincTag,SSETag>(const InterpState *state, float *RESTRICT { const float *const filter{state->bsinc.filter}; const size_t m{state->bsinc.m}; + ASSUME(m > 0); src -= state->bsinc.l; for(float &out_sample : dst) @@ -140,8 +142,8 @@ float *Resample_<FastBSincTag,SSETag>(const InterpState *state, float *RESTRICT __m128 r4{_mm_setzero_ps()}; { const __m128 pf4{_mm_set1_ps(pf)}; - const float *fil{filter + m*pi*4}; - const float *phd{fil + m}; + const float *RESTRICT fil{filter + m*pi*2}; + const float *RESTRICT phd{fil + m}; size_t td{m >> 2}; size_t j{0u}; @@ -179,7 +181,7 @@ void MixHrtfBlend_<SSETag>(const float *InSamples, float2 *AccumSamples, const u } template<> -void MixDirectHrtf_<SSETag>(FloatBufferLine &LeftOut, FloatBufferLine &RightOut, +void MixDirectHrtf_<SSETag>(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut, const al::span<const FloatBufferLine> InSamples, float2 *AccumSamples, float *TempBuf, HrtfChannelState *ChanState, const size_t IrSize, const size_t BufferSize) { diff --git a/core/mixer/mixer_sse2.cpp b/core/mixer/mixer_sse2.cpp index f91d5dcd..99d04210 100644 --- a/core/mixer/mixer_sse2.cpp +++ b/core/mixer/mixer_sse2.cpp @@ -52,10 +52,10 @@ float *Resample_<LerpTag,SSE2Tag>(const InterpState*, float *RESTRICT src, uint auto dst_iter = dst.begin(); for(size_t todo{dst.size()>>2};todo;--todo) { - const int pos0{_mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(0, 0, 0, 0)))}; - const int pos1{_mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(1, 1, 1, 1)))}; - const int pos2{_mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(2, 2, 2, 2)))}; - const int pos3{_mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(3, 3, 3, 3)))}; + const int pos0{_mm_cvtsi128_si32(pos4)}; + const int pos1{_mm_cvtsi128_si32(_mm_srli_si128(pos4, 4))}; + const int pos2{_mm_cvtsi128_si32(_mm_srli_si128(pos4, 8))}; + const int pos3{_mm_cvtsi128_si32(_mm_srli_si128(pos4, 12))}; const __m128 val1{_mm_setr_ps(src[pos0 ], src[pos1 ], src[pos2 ], src[pos3 ])}; const __m128 val2{_mm_setr_ps(src[pos0+1], src[pos1+1], src[pos2+1], src[pos3+1])}; diff --git a/core/resampler_limits.h b/core/resampler_limits.h new file mode 100644 index 00000000..9d4cefda --- /dev/null +++ b/core/resampler_limits.h @@ -0,0 +1,12 @@ +#ifndef CORE_RESAMPLER_LIMITS_H +#define CORE_RESAMPLER_LIMITS_H + +/* Maximum number of samples to pad on the ends of a buffer for resampling. + * Note that the padding is symmetric (half at the beginning and half at the + * end)! + */ +constexpr int MaxResamplerPadding{48}; + +constexpr int MaxResamplerEdge{MaxResamplerPadding >> 1}; + +#endif /* CORE_RESAMPLER_LIMITS_H */ diff --git a/core/rtkit.cpp b/core/rtkit.cpp new file mode 100644 index 00000000..8b489e71 --- /dev/null +++ b/core/rtkit.cpp @@ -0,0 +1,240 @@ +/*-*- Mode: C; c-basic-offset: 8 -*-*/ + +/*** + Copyright 2009 Lennart Poettering + Copyright 2010 David Henningsson <[email protected]> + Copyright 2021 Chris Robinson + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation files + (the "Software"), to deal in the Software without restriction, + including without limitation the rights to use, copy, modify, merge, + publish, distribute, sublicense, and/or sell copies of the Software, + and to permit persons to whom the Software is furnished to do so, + subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +***/ + +#include "config.h" + +#include "rtkit.h" + +#include <errno.h> + +#ifdef __linux__ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include <memory> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/syscall.h> + + +namespace dbus { + constexpr int TypeString{'s'}; + constexpr int TypeVariant{'v'}; + constexpr int TypeInt32{'i'}; + constexpr int TypeUInt32{'u'}; + constexpr int TypeInt64{'x'}; + constexpr int TypeUInt64{'t'}; + constexpr int TypeInvalid{'\0'}; + + struct MessageDeleter { + void operator()(DBusMessage *m) { (*pdbus_message_unref)(m); } + }; + using MessagePtr = std::unique_ptr<DBusMessage,MessageDeleter>; +} // namespace dbus + +namespace { + +inline pid_t _gettid() +{ return static_cast<pid_t>(syscall(SYS_gettid)); } + +int translate_error(const char *name) +{ + if(strcmp(name, DBUS_ERROR_NO_MEMORY) == 0) + return -ENOMEM; + if(strcmp(name, DBUS_ERROR_SERVICE_UNKNOWN) == 0 + || strcmp(name, DBUS_ERROR_NAME_HAS_NO_OWNER) == 0) + return -ENOENT; + if(strcmp(name, DBUS_ERROR_ACCESS_DENIED) == 0 + || strcmp(name, DBUS_ERROR_AUTH_FAILED) == 0) + return -EACCES; + return -EIO; +} + +int rtkit_get_int_property(DBusConnection *connection, const char *propname, long long *propval) +{ + dbus::MessagePtr m{(*pdbus_message_new_method_call)(RTKIT_SERVICE_NAME, RTKIT_OBJECT_PATH, + "org.freedesktop.DBus.Properties", "Get")}; + if(!m) return -ENOMEM; + + const char *interfacestr = RTKIT_SERVICE_NAME; + auto ready = (*pdbus_message_append_args)(m.get(), + dbus::TypeString, &interfacestr, + dbus::TypeString, &propname, + dbus::TypeInvalid); + if(!ready) return -ENOMEM; + + dbus::Error error; + dbus::MessagePtr r{(*pdbus_connection_send_with_reply_and_block)(connection, m.get(), -1, + &error.get())}; + if(!r) return translate_error(error->name); + + if((*pdbus_set_error_from_message)(&error.get(), r.get())) + return translate_error(error->name); + + int ret{-EBADMSG}; + DBusMessageIter iter{}; + (*pdbus_message_iter_init)(r.get(), &iter); + while(int curtype{(*pdbus_message_iter_get_arg_type)(&iter)}) + { + if(curtype == dbus::TypeVariant) + { + DBusMessageIter subiter{}; + (*pdbus_message_iter_recurse)(&iter, &subiter); + + while((curtype=(*pdbus_message_iter_get_arg_type)(&subiter)) != dbus::TypeInvalid) + { + if(curtype == dbus::TypeInt32) + { + dbus_int32_t i32{}; + (*pdbus_message_iter_get_basic)(&subiter, &i32); + *propval = i32; + ret = 0; + } + + if(curtype == dbus::TypeInt64) + { + dbus_int64_t i64{}; + (*pdbus_message_iter_get_basic)(&subiter, &i64); + *propval = i64; + ret = 0; + } + + (*pdbus_message_iter_next)(&subiter); + } + } + (*pdbus_message_iter_next)(&iter); + } + + return ret; +} + +} // namespace + +extern "C" { +int rtkit_get_max_realtime_priority(DBusConnection *connection) +{ + long long retval{}; + int err{rtkit_get_int_property(connection, "MaxRealtimePriority", &retval)}; + return err < 0 ? err : static_cast<int>(retval); +} + +int rtkit_get_min_nice_level(DBusConnection *connection, int *min_nice_level) +{ + long long retval{}; + int err{rtkit_get_int_property(connection, "MinNiceLevel", &retval)}; + if(err >= 0) *min_nice_level = static_cast<int>(retval); + return err; +} + +long long rtkit_get_rttime_usec_max(DBusConnection *connection) +{ + long long retval{}; + int err{rtkit_get_int_property(connection, "RTTimeUSecMax", &retval)}; + return err < 0 ? err : retval; +} + +int rtkit_make_realtime(DBusConnection *connection, pid_t thread, int priority) +{ + if(thread == 0) + thread = _gettid(); + + dbus::MessagePtr m{(*pdbus_message_new_method_call)(RTKIT_SERVICE_NAME, RTKIT_OBJECT_PATH, + "org.freedesktop.RealtimeKit1", "MakeThreadRealtime")}; + if(!m) return -ENOMEM; + + auto u64 = static_cast<dbus_uint64_t>(thread); + auto u32 = static_cast<dbus_uint32_t>(priority); + auto ready = (*pdbus_message_append_args)(m.get(), + dbus::TypeUInt64, &u64, + dbus::TypeUInt32, &u32, + dbus::TypeInvalid); + if(!ready) return -ENOMEM; + + dbus::Error error; + dbus::MessagePtr r{(*pdbus_connection_send_with_reply_and_block)(connection, m.get(), -1, + &error.get())}; + if(!r) return translate_error(error->name); + + if((*pdbus_set_error_from_message)(&error.get(), r.get())) + return translate_error(error->name); + + return 0; +} + +int rtkit_make_high_priority(DBusConnection *connection, pid_t thread, int nice_level) +{ + if(thread == 0) + thread = _gettid(); + + dbus::MessagePtr m{(*pdbus_message_new_method_call)(RTKIT_SERVICE_NAME, RTKIT_OBJECT_PATH, + "org.freedesktop.RealtimeKit1", "MakeThreadHighPriority")}; + if(!m) return -ENOMEM; + + auto u64 = static_cast<dbus_uint64_t>(thread); + auto s32 = static_cast<dbus_int32_t>(nice_level); + auto ready = (*pdbus_message_append_args)(m.get(), + dbus::TypeUInt64, &u64, + dbus::TypeInt32, &s32, + dbus::TypeInvalid); + if(!ready) return -ENOMEM; + + dbus::Error error; + dbus::MessagePtr r{(*pdbus_connection_send_with_reply_and_block)(connection, m.get(), -1, + &error.get())}; + if(!r) return translate_error(error->name); + + if((*pdbus_set_error_from_message)(&error.get(), r.get())) + return translate_error(error->name); + + return 0; +} +} // extern "C" + +#else + +extern "C" { +int rtkit_make_realtime(DBusConnection *connection, pid_t thread, int priority) +{ return -ENOTSUP; } + +int rtkit_make_high_priority(DBusConnection *connection, pid_t thread, int nice_level) +{ return -ENOTSUP; } + +int rtkit_get_max_realtime_priority(DBusConnection *connection) +{ return -ENOTSUP; } + +int rtkit_get_min_nice_level(DBusConnection *connection, int *min_nice_level) +{ return -ENOTSUP; } + +long long rtkit_get_rttime_usec_max(DBusConnection *connection) +{ return -ENOTSUP; } +} // extern "C" + +#endif diff --git a/core/rtkit.h b/core/rtkit.h new file mode 100644 index 00000000..96e81d4a --- /dev/null +++ b/core/rtkit.h @@ -0,0 +1,80 @@ +/*-*- Mode: C; c-basic-offset: 8 -*-*/ + +#ifndef foortkithfoo +#define foortkithfoo + +/*** + Copyright 2009 Lennart Poettering + Copyright 2010 David Henningsson <[email protected]> + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation files + (the "Software"), to deal in the Software without restriction, + including without limitation the rights to use, copy, modify, merge, + publish, distribute, sublicense, and/or sell copies of the Software, + and to permit persons to whom the Software is furnished to do so, + subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +***/ + +#include <sys/types.h> + +#include "dbus_wrap.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* This is the reference implementation for a client for + * RealtimeKit. You don't have to use this, but if do, just copy these + * sources into your repository */ + +#define RTKIT_SERVICE_NAME "org.freedesktop.RealtimeKit1" +#define RTKIT_OBJECT_PATH "/org/freedesktop/RealtimeKit1" + +/* This is mostly equivalent to sched_setparam(thread, SCHED_RR, { + * .sched_priority = priority }). 'thread' needs to be a kernel thread + * id as returned by gettid(), not a pthread_t! If 'thread' is 0 the + * current thread is used. The returned value is a negative errno + * style error code, or 0 on success. */ +int rtkit_make_realtime(DBusConnection *system_bus, pid_t thread, int priority); + +/* This is mostly equivalent to setpriority(PRIO_PROCESS, thread, + * nice_level). 'thread' needs to be a kernel thread id as returned by + * gettid(), not a pthread_t! If 'thread' is 0 the current thread is + * used. The returned value is a negative errno style error code, or 0 + * on success.*/ +int rtkit_make_high_priority(DBusConnection *system_bus, pid_t thread, int nice_level); + +/* Return the maximum value of realtime priority available. Realtime requests + * above this value will fail. A negative value is an errno style error code. + */ +int rtkit_get_max_realtime_priority(DBusConnection *system_bus); + +/* Retreive the minimum value of nice level available. High prio requests + * below this value will fail. The returned value is a negative errno + * style error code, or 0 on success.*/ +int rtkit_get_min_nice_level(DBusConnection *system_bus, int *min_nice_level); + +/* Return the maximum value of RLIMIT_RTTIME to set before attempting a + * realtime request. A negative value is an errno style error code. + */ +long long rtkit_get_rttime_usec_max(DBusConnection *system_bus); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/core/uhjfilter.cpp b/core/uhjfilter.cpp index 92f35901..f1af4b94 100644 --- a/core/uhjfilter.cpp +++ b/core/uhjfilter.cpp @@ -3,227 +3,49 @@ #include "uhjfilter.h" -#ifdef HAVE_SSE_INTRINSICS -#include <xmmintrin.h> -#elif defined(HAVE_NEON) -#include <arm_neon.h> -#endif - #include <algorithm> #include <iterator> #include "alcomplex.h" #include "alnumeric.h" #include "opthelpers.h" +#include "phase_shifter.h" namespace { -using complex_d = std::complex<double>; - -struct PhaseShifterT { - alignas(16) std::array<float,Uhj2Encoder::sFilterSize> Coeffs; - - /* Some notes on this filter construction. - * - * A wide-band phase-shift filter needs a delay to maintain linearity. A - * dirac impulse in the center of a time-domain buffer represents a filter - * passing all frequencies through as-is with a pure delay. Converting that - * to the frequency domain, adjusting the phase of each frequency bin by - * +90 degrees, then converting back to the time domain, results in a FIR - * filter that applies a +90 degree wide-band phase-shift. - * - * A particularly notable aspect of the time-domain filter response is that - * every other coefficient is 0. This allows doubling the effective size of - * the filter, by storing only the non-0 coefficients and double-stepping - * over the input to apply it. - * - * Additionally, the resulting filter is independent of the sample rate. - * The same filter can be applied regardless of the device's sample rate - * and achieve the same effect. - */ - PhaseShifterT() - { - constexpr size_t fft_size{Uhj2Encoder::sFilterSize * 2}; - constexpr size_t half_size{fft_size / 2}; - - /* Generate a frequency domain impulse with a +90 degree phase offset. - * Reconstruct the mirrored frequencies to convert to the time domain. - */ - auto fftBuffer = std::make_unique<complex_d[]>(fft_size); - std::fill_n(fftBuffer.get(), fft_size, complex_d{}); - fftBuffer[half_size] = 1.0; - - forward_fft({fftBuffer.get(), fft_size}); - for(size_t i{0};i < half_size+1;++i) - fftBuffer[i] = complex_d{-fftBuffer[i].imag(), fftBuffer[i].real()}; - for(size_t i{half_size+1};i < fft_size;++i) - fftBuffer[i] = std::conj(fftBuffer[fft_size - i]); - inverse_fft({fftBuffer.get(), fft_size}); - - /* Reverse the filter for simpler processing, and store only the non-0 - * coefficients. - */ - auto fftiter = fftBuffer.get() + half_size + (Uhj2Encoder::sFilterSize-1); - for(float &coeff : Coeffs) - { - coeff = static_cast<float>(fftiter->real() / double{fft_size}); - fftiter -= 2; - } - } -}; -const PhaseShifterT PShift{}; - -void allpass_process(al::span<float> dst, const float *RESTRICT src) -{ -#ifdef HAVE_SSE_INTRINSICS - size_t pos{0}; - if(size_t todo{dst.size()>>1}) - { - do { - __m128 r04{_mm_setzero_ps()}; - __m128 r14{_mm_setzero_ps()}; - for(size_t j{0};j < PShift.Coeffs.size();j+=4) - { - const __m128 coeffs{_mm_load_ps(&PShift.Coeffs[j])}; - const __m128 s0{_mm_loadu_ps(&src[j*2])}; - const __m128 s1{_mm_loadu_ps(&src[j*2 + 4])}; - - __m128 s{_mm_shuffle_ps(s0, s1, _MM_SHUFFLE(2, 0, 2, 0))}; - r04 = _mm_add_ps(r04, _mm_mul_ps(s, coeffs)); - - s = _mm_shuffle_ps(s0, s1, _MM_SHUFFLE(3, 1, 3, 1)); - r14 = _mm_add_ps(r14, _mm_mul_ps(s, coeffs)); - } - r04 = _mm_add_ps(r04, _mm_shuffle_ps(r04, r04, _MM_SHUFFLE(0, 1, 2, 3))); - r04 = _mm_add_ps(r04, _mm_movehl_ps(r04, r04)); - dst[pos++] += _mm_cvtss_f32(r04); - - r14 = _mm_add_ps(r14, _mm_shuffle_ps(r14, r14, _MM_SHUFFLE(0, 1, 2, 3))); - r14 = _mm_add_ps(r14, _mm_movehl_ps(r14, r14)); - dst[pos++] += _mm_cvtss_f32(r14); - - src += 2; - } while(--todo); - } - if((dst.size()&1)) - { - __m128 r4{_mm_setzero_ps()}; - for(size_t j{0};j < PShift.Coeffs.size();j+=4) - { - const __m128 coeffs{_mm_load_ps(&PShift.Coeffs[j])}; - /* NOTE: This could alternatively be done with two unaligned loads - * and a shuffle. Which would be better? - */ - const __m128 s{_mm_setr_ps(src[j*2], src[j*2 + 2], src[j*2 + 4], src[j*2 + 6])}; - r4 = _mm_add_ps(r4, _mm_mul_ps(s, coeffs)); - } - r4 = _mm_add_ps(r4, _mm_shuffle_ps(r4, r4, _MM_SHUFFLE(0, 1, 2, 3))); - r4 = _mm_add_ps(r4, _mm_movehl_ps(r4, r4)); - - dst[pos] += _mm_cvtss_f32(r4); - } - -#elif defined(HAVE_NEON) +static_assert(UhjEncoder::sFilterDelay==UhjDecoder::sFilterDelay, "UHJ filter delays mismatch"); - size_t pos{0}; - if(size_t todo{dst.size()>>1}) - { - /* There doesn't seem to be NEON intrinsics to do this kind of stipple - * shuffling, so there's two custom methods for it. - */ - auto shuffle_2020 = [](float32x4_t a, float32x4_t b) - { - float32x4_t ret{vmovq_n_f32(vgetq_lane_f32(a, 0))}; - ret = vsetq_lane_f32(vgetq_lane_f32(a, 2), ret, 1); - ret = vsetq_lane_f32(vgetq_lane_f32(b, 0), ret, 2); - ret = vsetq_lane_f32(vgetq_lane_f32(b, 2), ret, 3); - return ret; - }; - auto shuffle_3131 = [](float32x4_t a, float32x4_t b) - { - float32x4_t ret{vmovq_n_f32(vgetq_lane_f32(a, 1))}; - ret = vsetq_lane_f32(vgetq_lane_f32(a, 3), ret, 1); - ret = vsetq_lane_f32(vgetq_lane_f32(b, 1), ret, 2); - ret = vsetq_lane_f32(vgetq_lane_f32(b, 3), ret, 3); - return ret; - }; - do { - float32x4_t r04{vdupq_n_f32(0.0f)}; - float32x4_t r14{vdupq_n_f32(0.0f)}; - for(size_t j{0};j < PShift.Coeffs.size();j+=4) - { - const float32x4_t coeffs{vld1q_f32(&PShift.Coeffs[j])}; - const float32x4_t s0{vld1q_f32(&src[j*2])}; - const float32x4_t s1{vld1q_f32(&src[j*2 + 4])}; - - r04 = vmlaq_f32(r04, shuffle_2020(s0, s1), coeffs); - r14 = vmlaq_f32(r14, shuffle_3131(s0, s1), coeffs); - } - r04 = vaddq_f32(r04, vrev64q_f32(r04)); - dst[pos++] = vget_lane_f32(vadd_f32(vget_low_f32(r04), vget_high_f32(r04)), 0); - - r14 = vaddq_f32(r14, vrev64q_f32(r14)); - dst[pos++] = vget_lane_f32(vadd_f32(vget_low_f32(r14), vget_high_f32(r14)), 0); - - src += 2; - } while(--todo); - } - if((dst.size()&1)) - { - auto load4 = [](float32_t a, float32_t b, float32_t c, float32_t d) - { - float32x4_t ret{vmovq_n_f32(a)}; - ret = vsetq_lane_f32(b, ret, 1); - ret = vsetq_lane_f32(c, ret, 2); - ret = vsetq_lane_f32(d, ret, 3); - return ret; - }; - float32x4_t r4{vdupq_n_f32(0.0f)}; - for(size_t j{0};j < PShift.Coeffs.size();j+=4) - { - const float32x4_t coeffs{vld1q_f32(&PShift.Coeffs[j])}; - const float32x4_t s{load4(src[j*2], src[j*2 + 2], src[j*2 + 4], src[j*2 + 6])}; - r4 = vmlaq_f32(r4, s, coeffs); - } - r4 = vaddq_f32(r4, vrev64q_f32(r4)); - dst[pos] = vget_lane_f32(vadd_f32(vget_low_f32(r4), vget_high_f32(r4)), 0); - } - -#else - - for(float &output : dst) - { - float ret{0.0f}; - for(size_t j{0};j < PShift.Coeffs.size();++j) - ret += src[j*2] * PShift.Coeffs[j]; +using complex_d = std::complex<double>; - output += ret; - ++src; - } -#endif -} +const PhaseShifterT<UhjEncoder::sFilterDelay*2> PShift{}; } // namespace -/* Encoding 2-channel UHJ from B-Format is done as: +/* Encoding UHJ from B-Format is done as: * * S = 0.9396926*W + 0.1855740*X * D = j(-0.3420201*W + 0.5098604*X) + 0.6554516*Y * * Left = (S + D)/2.0 * Right = (S - D)/2.0 + * T = j(-0.1432*W + 0.6511746*X) - 0.7071068*Y + * Q = 0.9772*Z * - * where j is a wide-band +90 degree phase shift. + * where j is a wide-band +90 degree phase shift. T is excluded from 2-channel + * output, and Q is excluded from 2- and 3-channel output. * * The phase shift is done using a FIR filter derived from an FFT'd impulse * with the desired shift. */ -void Uhj2Encoder::encode(FloatBufferLine &LeftOut, FloatBufferLine &RightOut, +void UhjEncoder::encode(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut, const FloatBufferLine *InSamples, const size_t SamplesToDo) { + /* Given FuMa input, a +3dB boost is needed for the expected levels. */ + static constexpr float sqrt2{1.41421356237f}; + ASSUME(SamplesToDo > 0); float *RESTRICT left{al::assume_aligned<16>(LeftOut.data())}; @@ -233,43 +55,120 @@ void Uhj2Encoder::encode(FloatBufferLine &LeftOut, FloatBufferLine &RightOut, const float *RESTRICT xinput{al::assume_aligned<16>(InSamples[1].data())}; const float *RESTRICT yinput{al::assume_aligned<16>(InSamples[2].data())}; - /* Combine the previously delayed mid/side signal with the input. */ + /* Combine the previously delayed S/D signal with the input. Include any + * existing direct signal with it. + */ /* S = 0.9396926*W + 0.1855740*X */ - auto miditer = std::copy(mMidDelay.cbegin(), mMidDelay.cend(), mMid.begin()); + auto miditer = mS.begin() + sFilterDelay; std::transform(winput, winput+SamplesToDo, xinput, miditer, [](const float w, const float x) noexcept -> float - { return 0.9396926f*w + 0.1855740f*x; }); + { return 0.9396926f*sqrt2*w + 0.1855740f*sqrt2*x; }); + for(size_t i{0};i < SamplesToDo;++i,++miditer) + *miditer += left[i] + right[i]; /* D = 0.6554516*Y */ - auto sideiter = std::copy(mSideDelay.cbegin(), mSideDelay.cend(), mSide.begin()); + auto sideiter = mD.begin() + sFilterDelay; std::transform(yinput, yinput+SamplesToDo, sideiter, - [](const float y) noexcept -> float { return 0.6554516f*y; }); - - /* Include any existing direct signal in the mid/side buffers. */ - for(size_t i{0};i < SamplesToDo;++i,++miditer) - *miditer += left[i] + right[i]; + [](const float y) noexcept -> float { return 0.6554516f*sqrt2*y; }); for(size_t i{0};i < SamplesToDo;++i,++sideiter) *sideiter += left[i] - right[i]; - /* Copy the future samples back to the delay buffers for next time. */ - std::copy_n(mMid.cbegin()+SamplesToDo, mMidDelay.size(), mMidDelay.begin()); - std::copy_n(mSide.cbegin()+SamplesToDo, mSideDelay.size(), mSideDelay.begin()); - - /* Now add the all-passed signal into the side signal. */ - /* D += j(-0.3420201*W + 0.5098604*X) */ - auto tmpiter = std::copy(mSideHistory.cbegin(), mSideHistory.cend(), mTemp.begin()); + auto tmpiter = std::copy(mWXHistory.cbegin(), mWXHistory.cend(), mTemp.begin()); std::transform(winput, winput+SamplesToDo, xinput, tmpiter, [](const float w, const float x) noexcept -> float - { return -0.3420201f*w + 0.5098604f*x; }); - std::copy_n(mTemp.cbegin()+SamplesToDo, mSideHistory.size(), mSideHistory.begin()); - allpass_process({mSide.data(), SamplesToDo}, mTemp.data()); + { return -0.3420201f*sqrt2*w + 0.5098604f*sqrt2*x; }); + std::copy_n(mTemp.cbegin()+SamplesToDo, mWXHistory.size(), mWXHistory.begin()); + PShift.processAccum({mD.data(), SamplesToDo}, mTemp.data()); /* Left = (S + D)/2.0 */ for(size_t i{0};i < SamplesToDo;i++) - left[i] = (mMid[i] + mSide[i]) * 0.5f; + left[i] = (mS[i] + mD[i]) * 0.5f; /* Right = (S - D)/2.0 */ for(size_t i{0};i < SamplesToDo;i++) - right[i] = (mMid[i] - mSide[i]) * 0.5f; + right[i] = (mS[i] - mD[i]) * 0.5f; + + /* Copy the future samples to the front for next time. */ + std::copy(mS.cbegin()+SamplesToDo, mS.cbegin()+SamplesToDo+sFilterDelay, mS.begin()); + std::copy(mD.cbegin()+SamplesToDo, mD.cbegin()+SamplesToDo+sFilterDelay, mD.begin()); +} + + +/* Decoding UHJ is done as: + * + * S = Left + Right + * D = Left - Right + * + * W = 0.981530*S + 0.197484*j(0.828347*D + 0.767835*T) + * X = 0.418504*S - j(0.828347*D + 0.767835*T) + * Y = 0.795954*D - 0.676406*T + j(0.186626*S) + * Z = 1.023332*Q + * + * where j is a +90 degree phase shift. 3-channel UHJ excludes Q, while 2- + * channel excludes Q and T. The B-Format signal reconstructed from 2-channel + * UHJ should not be run through a normal B-Format decoder, as it needs + * different shelf filters. + */ +void UhjDecoder::decode(const al::span<BufferLine> samples, const size_t offset, + const size_t samplesToDo, const size_t forwardSamples) +{ + /* A -3dB attenuation is needed for FuMa output. */ + static constexpr float sqrt1_2{0.707106781187f}; + + ASSUME(samplesToDo > 0); + + { + const float *RESTRICT left{al::assume_aligned<16>(samples[0].data() + offset)}; + const float *RESTRICT right{al::assume_aligned<16>(samples[1].data() + offset)}; + const float *RESTRICT t{al::assume_aligned<16>(samples[2].data() + offset)}; + + /* S = Left + Right */ + for(size_t i{0};i < samplesToDo+sFilterDelay;++i) + mS[i] = (left[i] + right[i]) * sqrt1_2; + + /* D = Left - Right */ + for(size_t i{0};i < samplesToDo+sFilterDelay;++i) + mD[i] = (left[i] - right[i]) * sqrt1_2; + + /* T */ + for(size_t i{0};i < samplesToDo+sFilterDelay;++i) + mT[i] = t[i] * sqrt1_2; + } + + float *RESTRICT woutput{al::assume_aligned<16>(samples[0].data() + offset)}; + float *RESTRICT xoutput{al::assume_aligned<16>(samples[1].data() + offset)}; + float *RESTRICT youtput{al::assume_aligned<16>(samples[2].data() + offset)}; + + /* Precompute j(0.828347*D + 0.767835*T) and store in xoutput. */ + auto tmpiter = std::copy(mDTHistory.cbegin(), mDTHistory.cend(), mTemp.begin()); + std::transform(mD.cbegin(), mD.cbegin()+samplesToDo+sFilterDelay, mT.cbegin(), tmpiter, + [](const float d, const float t) noexcept { return 0.828347f*d + 0.767835f*t; }); + std::copy_n(mTemp.cbegin()+forwardSamples, mDTHistory.size(), mDTHistory.begin()); + PShift.process({xoutput, samplesToDo}, mTemp.data()); + + /* W = 0.981530*S + 0.197484*j(0.828347*D + 0.767835*T) */ + for(size_t i{0};i < samplesToDo;++i) + woutput[i] = 0.981530f*mS[i] + 0.197484f*xoutput[i]; + /* X = 0.418504*S - j(0.828347*D + 0.767835*T) */ + for(size_t i{0};i < samplesToDo;++i) + xoutput[i] = 0.418504f*mS[i] - xoutput[i]; + + /* Precompute j*S and store in youtput. */ + tmpiter = std::copy(mSHistory.cbegin(), mSHistory.cend(), mTemp.begin()); + std::copy_n(mS.cbegin(), samplesToDo+sFilterDelay, tmpiter); + std::copy_n(mTemp.cbegin()+forwardSamples, mSHistory.size(), mSHistory.begin()); + PShift.process({youtput, samplesToDo}, mTemp.data()); + + /* Y = 0.795954*D - 0.676406*T + j(0.186626*S) */ + for(size_t i{0};i < samplesToDo;++i) + youtput[i] = 0.795954f*mD[i] - 0.676406f*mT[i] + 0.186626f*youtput[i]; + + if(samples.size() > 3) + { + float *RESTRICT zoutput{samples[3].data() + offset}; + /* Z = 1.023332*Q */ + for(size_t i{0};i < samplesToDo;++i) + zoutput[i] = 1.023332f*sqrt1_2*zoutput[i]; + } } diff --git a/core/uhjfilter.h b/core/uhjfilter.h index c2cb8722..c04913b4 100644 --- a/core/uhjfilter.h +++ b/core/uhjfilter.h @@ -5,35 +5,60 @@ #include "almalloc.h" #include "bufferline.h" +#include "resampler_limits.h" -struct Uhj2Encoder { - /* A particular property of the filter allows it to cover nearly twice its - * length, so the filter size is also the effective delay (despite being - * center-aligned). +struct UhjEncoder { + /* The filter delay is half it's effective size, so a delay of 128 has a + * FIR length of 256. */ - constexpr static size_t sFilterSize{128}; + constexpr static size_t sFilterDelay{128}; - /* Delays for the unfiltered signal. */ - alignas(16) std::array<float,sFilterSize> mMidDelay{}; - alignas(16) std::array<float,sFilterSize> mSideDelay{}; - - alignas(16) std::array<float,BufferLineSize+sFilterSize> mMid{}; - alignas(16) std::array<float,BufferLineSize+sFilterSize> mSide{}; + /* Delays and processing storage for the unfiltered signal. */ + alignas(16) std::array<float,BufferLineSize+sFilterDelay> mS{}; + alignas(16) std::array<float,BufferLineSize+sFilterDelay> mD{}; /* History for the FIR filter. */ - alignas(16) std::array<float,sFilterSize*2 - 1> mSideHistory{}; + alignas(16) std::array<float,sFilterDelay*2 - 1> mWXHistory{}; - alignas(16) std::array<float,BufferLineSize + sFilterSize*2> mTemp{}; + alignas(16) std::array<float,BufferLineSize + sFilterDelay*2> mTemp{}; /** * Encodes a 2-channel UHJ (stereo-compatible) signal from a B-Format input * signal. The input must use FuMa channel ordering and scaling. */ - void encode(FloatBufferLine &LeftOut, FloatBufferLine &RightOut, + void encode(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut, const FloatBufferLine *InSamples, const size_t SamplesToDo); - DEF_NEWDEL(Uhj2Encoder) + DEF_NEWDEL(UhjEncoder) +}; + + +struct UhjDecoder { + constexpr static size_t sFilterDelay{128}; + + constexpr static size_t sLineSize{BufferLineSize+MaxResamplerPadding+sFilterDelay}; + using BufferLine = std::array<float,sLineSize>; + + alignas(16) std::array<float,BufferLineSize+MaxResamplerEdge+sFilterDelay> mS{}; + alignas(16) std::array<float,BufferLineSize+MaxResamplerEdge+sFilterDelay> mD{}; + alignas(16) std::array<float,BufferLineSize+MaxResamplerEdge+sFilterDelay> mT{}; + + alignas(16) std::array<float,sFilterDelay-1> mDTHistory{}; + alignas(16) std::array<float,sFilterDelay-1> mSHistory{}; + + alignas(16) std::array<float,BufferLineSize+MaxResamplerEdge + sFilterDelay*2> mTemp{}; + + /** + * Decodes a 3- or 4-channel UHJ signal into a B-Format signal with FuMa + * channel ordering and scaling. For 3-channel, the 3rd channel may be + * attenuated by 'n', where 0 <= n <= 1. So 2-channel UHJ can be decoded by + * leaving the 3rd channel input silent (n=0). + */ + void decode(const al::span<BufferLine> samples, const size_t offset, const size_t samplesToDo, + const size_t forwardSamples); + + DEF_NEWDEL(UhjDecoder) }; #endif /* CORE_UHJFILTER_H */ diff --git a/core/uiddefs.cpp b/core/uiddefs.cpp new file mode 100644 index 00000000..244c01a5 --- /dev/null +++ b/core/uiddefs.cpp @@ -0,0 +1,37 @@ + +#include "config.h" + + +#ifndef AL_NO_UID_DEFS + +#if defined(HAVE_GUIDDEF_H) || defined(HAVE_INITGUID_H) +#define INITGUID +#include <windows.h> +#ifdef HAVE_GUIDDEF_H +#include <guiddef.h> +#else +#include <initguid.h> +#endif + +DEFINE_GUID(KSDATAFORMAT_SUBTYPE_PCM, 0x00000001, 0x0000, 0x0010, 0x80,0x00, 0x00,0xaa,0x00,0x38,0x9b,0x71); +DEFINE_GUID(KSDATAFORMAT_SUBTYPE_IEEE_FLOAT, 0x00000003, 0x0000, 0x0010, 0x80,0x00, 0x00,0xaa,0x00,0x38,0x9b,0x71); + +DEFINE_GUID(IID_IDirectSoundNotify, 0xb0210783, 0x89cd, 0x11d0, 0xaf,0x08, 0x00,0xa0,0xc9,0x25,0xcd,0x16); + +DEFINE_GUID(CLSID_MMDeviceEnumerator, 0xbcde0395, 0xe52f, 0x467c, 0x8e,0x3d, 0xc4,0x57,0x92,0x91,0x69,0x2e); +DEFINE_GUID(IID_IMMDeviceEnumerator, 0xa95664d2, 0x9614, 0x4f35, 0xa7,0x46, 0xde,0x8d,0xb6,0x36,0x17,0xe6); +DEFINE_GUID(IID_IAudioClient, 0x1cb9ad4c, 0xdbfa, 0x4c32, 0xb1,0x78, 0xc2,0xf5,0x68,0xa7,0x03,0xb2); +DEFINE_GUID(IID_IAudioRenderClient, 0xf294acfc, 0x3146, 0x4483, 0xa7,0xbf, 0xad,0xdc,0xa7,0xc2,0x60,0xe2); +DEFINE_GUID(IID_IAudioCaptureClient, 0xc8adbd64, 0xe71e, 0x48a0, 0xa4,0xde, 0x18,0x5c,0x39,0x5c,0xd3,0x17); + +#ifdef HAVE_WASAPI +#include <wtypes.h> +#include <devpropdef.h> +#include <propkeydef.h> +DEFINE_DEVPROPKEY(DEVPKEY_Device_FriendlyName, 0xa45c254e, 0xdf1c, 0x4efd, 0x80,0x20, 0x67,0xd1,0x46,0xa8,0x50,0xe0, 14); +DEFINE_PROPERTYKEY(PKEY_AudioEndpoint_FormFactor, 0x1da5d803, 0xd492, 0x4edd, 0x8c,0x23, 0xe0,0xc0,0xff,0xee,0x7f,0x0e, 0); +DEFINE_PROPERTYKEY(PKEY_AudioEndpoint_GUID, 0x1da5d803, 0xd492, 0x4edd, 0x8c, 0x23,0xe0, 0xc0,0xff,0xee,0x7f,0x0e, 4 ); +#endif +#endif + +#endif /* AL_NO_UID_DEFS */ diff --git a/core/voice.cpp b/core/voice.cpp new file mode 100644 index 00000000..c764a277 --- /dev/null +++ b/core/voice.cpp @@ -0,0 +1,849 @@ + +#include "config.h" + +#include "voice.h" + +#include <algorithm> +#include <array> +#include <atomic> +#include <cassert> +#include <cstdint> +#include <iterator> +#include <memory> +#include <new> +#include <stdlib.h> +#include <utility> +#include <vector> + +#include "albyte.h" +#include "alnumeric.h" +#include "aloptional.h" +#include "alspan.h" +#include "alstring.h" +#include "ambidefs.h" +#include "async_event.h" +#include "buffer_storage.h" +#include "context.h" +#include "cpu_caps.h" +#include "devformat.h" +#include "device.h" +#include "filters/biquad.h" +#include "filters/nfc.h" +#include "filters/splitter.h" +#include "fmt_traits.h" +#include "logging.h" +#include "mixer.h" +#include "mixer/defs.h" +#include "mixer/hrtfdefs.h" +#include "opthelpers.h" +#include "resampler_limits.h" +#include "ringbuffer.h" +#include "vector.h" +#include "voice_change.h" + +struct CTag; +#ifdef HAVE_SSE +struct SSETag; +#endif +#ifdef HAVE_NEON +struct NEONTag; +#endif +struct CopyTag; + + +static_assert(!(sizeof(Voice::BufferLine)&15), "Voice::BufferLine must be a multiple of 16 bytes"); + +Resampler ResamplerDefault{Resampler::Linear}; + +namespace { + +using uint = unsigned int; + +using HrtfMixerFunc = void(*)(const float *InSamples, float2 *AccumSamples, const uint IrSize, + const MixHrtfFilter *hrtfparams, const size_t BufferSize); +using HrtfMixerBlendFunc = void(*)(const float *InSamples, float2 *AccumSamples, + const uint IrSize, const HrtfFilter *oldparams, const MixHrtfFilter *newparams, + const size_t BufferSize); + +HrtfMixerFunc MixHrtfSamples{MixHrtf_<CTag>}; +HrtfMixerBlendFunc MixHrtfBlendSamples{MixHrtfBlend_<CTag>}; + +inline MixerFunc SelectMixer() +{ +#ifdef HAVE_NEON + if((CPUCapFlags&CPU_CAP_NEON)) + return Mix_<NEONTag>; +#endif +#ifdef HAVE_SSE + if((CPUCapFlags&CPU_CAP_SSE)) + return Mix_<SSETag>; +#endif + return Mix_<CTag>; +} + +inline HrtfMixerFunc SelectHrtfMixer() +{ +#ifdef HAVE_NEON + if((CPUCapFlags&CPU_CAP_NEON)) + return MixHrtf_<NEONTag>; +#endif +#ifdef HAVE_SSE + if((CPUCapFlags&CPU_CAP_SSE)) + return MixHrtf_<SSETag>; +#endif + return MixHrtf_<CTag>; +} + +inline HrtfMixerBlendFunc SelectHrtfBlendMixer() +{ +#ifdef HAVE_NEON + if((CPUCapFlags&CPU_CAP_NEON)) + return MixHrtfBlend_<NEONTag>; +#endif +#ifdef HAVE_SSE + if((CPUCapFlags&CPU_CAP_SSE)) + return MixHrtfBlend_<SSETag>; +#endif + return MixHrtfBlend_<CTag>; +} + +} // namespace + +void Voice::InitMixer(al::optional<std::string> resampler) +{ + if(resampler) + { + struct ResamplerEntry { + const char name[16]; + const Resampler resampler; + }; + constexpr ResamplerEntry ResamplerList[]{ + { "none", Resampler::Point }, + { "point", Resampler::Point }, + { "linear", Resampler::Linear }, + { "cubic", Resampler::Cubic }, + { "bsinc12", Resampler::BSinc12 }, + { "fast_bsinc12", Resampler::FastBSinc12 }, + { "bsinc24", Resampler::BSinc24 }, + { "fast_bsinc24", Resampler::FastBSinc24 }, + }; + + const char *str{resampler->c_str()}; + if(al::strcasecmp(str, "bsinc") == 0) + { + WARN("Resampler option \"%s\" is deprecated, using bsinc12\n", str); + str = "bsinc12"; + } + else if(al::strcasecmp(str, "sinc4") == 0 || al::strcasecmp(str, "sinc8") == 0) + { + WARN("Resampler option \"%s\" is deprecated, using cubic\n", str); + str = "cubic"; + } + + auto iter = std::find_if(std::begin(ResamplerList), std::end(ResamplerList), + [str](const ResamplerEntry &entry) -> bool + { return al::strcasecmp(str, entry.name) == 0; }); + if(iter == std::end(ResamplerList)) + ERR("Invalid resampler: %s\n", str); + else + ResamplerDefault = iter->resampler; + } + + MixSamples = SelectMixer(); + MixHrtfBlendSamples = SelectHrtfBlendMixer(); + MixHrtfSamples = SelectHrtfMixer(); +} + + +namespace { + +void SendSourceStoppedEvent(ContextBase *context, uint id) +{ + RingBuffer *ring{context->mAsyncEvents.get()}; + auto evt_vec = ring->getWriteVector(); + if(evt_vec.first.len < 1) return; + + AsyncEvent *evt{::new(evt_vec.first.buf) AsyncEvent{EventType_SourceStateChange}}; + evt->u.srcstate.id = id; + evt->u.srcstate.state = AsyncEvent::SrcState::Stop; + + ring->writeAdvance(1); +} + + +const float *DoFilters(BiquadFilter &lpfilter, BiquadFilter &hpfilter, float *dst, + const al::span<const float> src, int type) +{ + switch(type) + { + case AF_None: + lpfilter.clear(); + hpfilter.clear(); + break; + + case AF_LowPass: + lpfilter.process(src, dst); + hpfilter.clear(); + return dst; + case AF_HighPass: + lpfilter.clear(); + hpfilter.process(src, dst); + return dst; + + case AF_BandPass: + DualBiquad{lpfilter, hpfilter}.process(src, dst); + return dst; + } + return src.data(); +} + + +void LoadSamples(const al::span<Voice::BufferLine> dstSamples, const size_t dstOffset, + const al::byte *src, const size_t srcOffset, const FmtType srctype, const FmtChannels srcchans, + const size_t samples) noexcept +{ +#define HANDLE_FMT(T) case T: \ + { \ + constexpr size_t sampleSize{sizeof(al::FmtTypeTraits<T>::Type)}; \ + if(srcchans == FmtUHJ2) \ + { \ + constexpr size_t srcstep{2u}; \ + src += srcOffset*srcstep*sampleSize; \ + al::LoadSampleArray<T>(dstSamples[0].data() + dstOffset, src, \ + srcstep, samples); \ + al::LoadSampleArray<T>(dstSamples[1].data() + dstOffset, \ + src + sampleSize, srcstep, samples); \ + std::fill_n(dstSamples[2].data() + dstOffset, samples, 0.0f); \ + } \ + else \ + { \ + const size_t srcstep{dstSamples.size()}; \ + src += srcOffset*srcstep*sampleSize; \ + for(auto &dst : dstSamples) \ + { \ + al::LoadSampleArray<T>(dst.data() + dstOffset, src, srcstep, \ + samples); \ + src += sampleSize; \ + } \ + } \ + } \ + break + + switch(srctype) + { + HANDLE_FMT(FmtUByte); + HANDLE_FMT(FmtShort); + HANDLE_FMT(FmtFloat); + HANDLE_FMT(FmtDouble); + HANDLE_FMT(FmtMulaw); + HANDLE_FMT(FmtAlaw); + } +#undef HANDLE_FMT +} + +void LoadBufferStatic(VoiceBufferItem *buffer, VoiceBufferItem *bufferLoopItem, + const size_t dataPosInt, const FmtType sampleType, const FmtChannels sampleChannels, + const size_t samplesToLoad, const al::span<Voice::BufferLine> voiceSamples) +{ + const uint loopStart{buffer->mLoopStart}; + const uint loopEnd{buffer->mLoopEnd}; + ASSUME(loopEnd > loopStart); + + /* If current pos is beyond the loop range, do not loop */ + if(!bufferLoopItem || dataPosInt >= loopEnd) + { + /* Load what's left to play from the buffer */ + const size_t remaining{minz(samplesToLoad, buffer->mSampleLen-dataPosInt)}; + LoadSamples(voiceSamples, MaxResamplerEdge, buffer->mSamples, dataPosInt, sampleType, + sampleChannels, remaining); + + if(const size_t toFill{samplesToLoad - remaining}) + { + for(auto &chanbuffer : voiceSamples) + { + auto srcsamples = chanbuffer.data() + MaxResamplerEdge - 1 + remaining; + std::fill_n(srcsamples + 1, toFill, *srcsamples); + } + } + } + else + { + /* Load what's left of this loop iteration */ + const size_t remaining{minz(samplesToLoad, loopEnd-dataPosInt)}; + LoadSamples(voiceSamples, MaxResamplerEdge, buffer->mSamples, dataPosInt, sampleType, + sampleChannels, remaining); + + /* Load repeats of the loop to fill the buffer. */ + const auto loopSize = static_cast<size_t>(loopEnd - loopStart); + size_t samplesLoaded{remaining}; + while(const size_t toFill{minz(samplesToLoad - samplesLoaded, loopSize)}) + { + LoadSamples(voiceSamples, MaxResamplerEdge + samplesLoaded, buffer->mSamples, + loopStart, sampleType, sampleChannels, toFill); + samplesLoaded += toFill; + } + } +} + +void LoadBufferCallback(VoiceBufferItem *buffer, const size_t numCallbackSamples, + const FmtType sampleType, const FmtChannels sampleChannels, const size_t samplesToLoad, + const al::span<Voice::BufferLine> voiceSamples) +{ + /* Load what's left to play from the buffer */ + const size_t remaining{minz(samplesToLoad, numCallbackSamples)}; + LoadSamples(voiceSamples, MaxResamplerEdge, buffer->mSamples, 0, sampleType, sampleChannels, + remaining); + + if(const size_t toFill{samplesToLoad - remaining}) + { + for(auto &chanbuffer : voiceSamples) + { + auto srcsamples = chanbuffer.data() + MaxResamplerEdge - 1 + remaining; + std::fill_n(srcsamples + 1, toFill, *srcsamples); + } + } +} + +void LoadBufferQueue(VoiceBufferItem *buffer, VoiceBufferItem *bufferLoopItem, + size_t dataPosInt, const FmtType sampleType, const FmtChannels sampleChannels, + const size_t samplesToLoad, const al::span<Voice::BufferLine> voiceSamples) +{ + /* Crawl the buffer queue to fill in the temp buffer */ + size_t samplesLoaded{0}; + while(buffer && samplesLoaded != samplesToLoad) + { + if(dataPosInt >= buffer->mSampleLen) + { + dataPosInt -= buffer->mSampleLen; + buffer = buffer->mNext.load(std::memory_order_acquire); + if(!buffer) buffer = bufferLoopItem; + continue; + } + + const size_t remaining{minz(samplesToLoad-samplesLoaded, buffer->mSampleLen-dataPosInt)}; + LoadSamples(voiceSamples, MaxResamplerEdge+samplesLoaded, buffer->mSamples, dataPosInt, + sampleType, sampleChannels, remaining); + + samplesLoaded += remaining; + if(samplesLoaded == samplesToLoad) + break; + + dataPosInt = 0; + buffer = buffer->mNext.load(std::memory_order_acquire); + if(!buffer) buffer = bufferLoopItem; + } + if(const size_t toFill{samplesToLoad - samplesLoaded}) + { + size_t chanidx{0}; + for(auto &chanbuffer : voiceSamples) + { + auto srcsamples = chanbuffer.data() + MaxResamplerEdge - 1 + samplesLoaded; + std::fill_n(srcsamples + 1, toFill, *srcsamples); + ++chanidx; + } + } +} + + +void DoHrtfMix(const float *samples, const uint DstBufferSize, DirectParams &parms, + const float TargetGain, const uint Counter, uint OutPos, DeviceBase *Device) +{ + const uint IrSize{Device->mIrSize}; + auto &HrtfSamples = Device->HrtfSourceData; + /* Source HRTF mixing needs to include the direct delay so it remains + * aligned with the direct mix's HRTF filtering. + */ + float2 *AccumSamples{Device->HrtfAccumData + HrtfDirectDelay}; + + /* Copy the HRTF history and new input samples into a temp buffer. */ + auto src_iter = std::copy(parms.Hrtf.History.begin(), parms.Hrtf.History.end(), + std::begin(HrtfSamples)); + std::copy_n(samples, DstBufferSize, src_iter); + /* Copy the last used samples back into the history buffer for later. */ + std::copy_n(std::begin(HrtfSamples) + DstBufferSize, parms.Hrtf.History.size(), + parms.Hrtf.History.begin()); + + /* If fading and this is the first mixing pass, fade between the IRs. */ + uint fademix{0u}; + if(Counter && OutPos == 0) + { + fademix = minu(DstBufferSize, Counter); + + float gain{TargetGain}; + + /* The new coefficients need to fade in completely since they're + * replacing the old ones. To keep the gain fading consistent, + * interpolate between the old and new target gains given how much of + * the fade time this mix handles. + */ + if(Counter > fademix) + { + const float a{static_cast<float>(fademix) / static_cast<float>(Counter)}; + gain = lerp(parms.Hrtf.Old.Gain, TargetGain, a); + } + + MixHrtfFilter hrtfparams{ + parms.Hrtf.Target.Coeffs, + parms.Hrtf.Target.Delay, + 0.0f, gain / static_cast<float>(fademix)}; + MixHrtfBlendSamples(HrtfSamples, AccumSamples+OutPos, IrSize, &parms.Hrtf.Old, &hrtfparams, + fademix); + + /* Update the old parameters with the result. */ + parms.Hrtf.Old = parms.Hrtf.Target; + parms.Hrtf.Old.Gain = gain; + OutPos += fademix; + } + + if(fademix < DstBufferSize) + { + const uint todo{DstBufferSize - fademix}; + float gain{TargetGain}; + + /* Interpolate the target gain if the gain fading lasts longer than + * this mix. + */ + if(Counter > DstBufferSize) + { + const float a{static_cast<float>(todo) / static_cast<float>(Counter-fademix)}; + gain = lerp(parms.Hrtf.Old.Gain, TargetGain, a); + } + + MixHrtfFilter hrtfparams{ + parms.Hrtf.Target.Coeffs, + parms.Hrtf.Target.Delay, + parms.Hrtf.Old.Gain, + (gain - parms.Hrtf.Old.Gain) / static_cast<float>(todo)}; + MixHrtfSamples(HrtfSamples+fademix, AccumSamples+OutPos, IrSize, &hrtfparams, todo); + + /* Store the now-current gain for next time. */ + parms.Hrtf.Old.Gain = gain; + } +} + +void DoNfcMix(const al::span<const float> samples, FloatBufferLine *OutBuffer, DirectParams &parms, + const float *TargetGains, const uint Counter, const uint OutPos, DeviceBase *Device) +{ + using FilterProc = void (NfcFilter::*)(const al::span<const float>, float*); + static constexpr FilterProc NfcProcess[MaxAmbiOrder+1]{ + nullptr, &NfcFilter::process1, &NfcFilter::process2, &NfcFilter::process3}; + + float *CurrentGains{parms.Gains.Current.data()}; + MixSamples(samples, {OutBuffer, 1u}, CurrentGains, TargetGains, Counter, OutPos); + ++OutBuffer; + ++CurrentGains; + ++TargetGains; + + const al::span<float> nfcsamples{Device->NfcSampleData, samples.size()}; + size_t order{1}; + while(const size_t chancount{Device->NumChannelsPerOrder[order]}) + { + (parms.NFCtrlFilter.*NfcProcess[order])(samples, nfcsamples.data()); + MixSamples(nfcsamples, {OutBuffer, chancount}, CurrentGains, TargetGains, Counter, OutPos); + OutBuffer += chancount; + CurrentGains += chancount; + TargetGains += chancount; + if(++order == MaxAmbiOrder+1) + break; + } +} + +} // namespace + +void Voice::mix(const State vstate, ContextBase *Context, const uint SamplesToDo) +{ + static constexpr std::array<float,MAX_OUTPUT_CHANNELS> SilentTarget{}; + + ASSUME(SamplesToDo > 0); + + /* Get voice info */ + uint DataPosInt{mPosition.load(std::memory_order_relaxed)}; + uint DataPosFrac{mPositionFrac.load(std::memory_order_relaxed)}; + VoiceBufferItem *BufferListItem{mCurrentBuffer.load(std::memory_order_relaxed)}; + VoiceBufferItem *BufferLoopItem{mLoopBuffer.load(std::memory_order_relaxed)}; + const uint increment{mStep}; + if UNLIKELY(increment < 1) + { + /* If the voice is supposed to be stopping but can't be mixed, just + * stop it before bailing. + */ + if(vstate == Stopping) + mPlayState.store(Stopped, std::memory_order_release); + return; + } + + DeviceBase *Device{Context->mDevice}; + const uint NumSends{Device->NumAuxSends}; + + ResamplerFunc Resample{(increment == MixerFracOne && DataPosFrac == 0) ? + Resample_<CopyTag,CTag> : mResampler}; + + uint Counter{(mFlags&VoiceIsFading) ? SamplesToDo : 0}; + if(!Counter) + { + /* No fading, just overwrite the old/current params. */ + for(auto &chandata : mChans) + { + { + DirectParams &parms = chandata.mDryParams; + if(!(mFlags&VoiceHasHrtf)) + parms.Gains.Current = parms.Gains.Target; + else + parms.Hrtf.Old = parms.Hrtf.Target; + } + for(uint send{0};send < NumSends;++send) + { + if(mSend[send].Buffer.empty()) + continue; + + SendParams &parms = chandata.mWetParams[send]; + parms.Gains.Current = parms.Gains.Target; + } + } + } + else if UNLIKELY(!BufferListItem) + Counter = std::min(Counter, 64u); + + const uint PostPadding{MaxResamplerEdge + + ((mFmtChannels==FmtUHJ2 || mFmtChannels==FmtUHJ3 || mFmtChannels==FmtUHJ4) + ? uint{UhjDecoder::sFilterDelay} : 0u)}; + uint buffers_done{0u}; + uint OutPos{0u}; + do { + /* Figure out how many buffer samples will be needed */ + uint DstBufferSize{SamplesToDo - OutPos}; + uint SrcBufferSize; + + if(increment <= MixerFracOne) + { + /* Calculate the last written dst sample pos. */ + uint64_t DataSize64{DstBufferSize - 1}; + /* Calculate the last read src sample pos. */ + DataSize64 = (DataSize64*increment + DataPosFrac) >> MixerFracBits; + /* +1 to get the src sample count, include padding. */ + DataSize64 += 1 + PostPadding; + + /* Result is guaranteed to be <= BufferLineSize+ResamplerPrePadding + * since we won't use more src samples than dst samples+padding. + */ + SrcBufferSize = static_cast<uint>(DataSize64); + } + else + { + uint64_t DataSize64{DstBufferSize}; + /* Calculate the end src sample pos, include padding. */ + DataSize64 = (DataSize64*increment + DataPosFrac) >> MixerFracBits; + DataSize64 += PostPadding; + + if(DataSize64 <= LineSize - MaxResamplerEdge) + SrcBufferSize = static_cast<uint>(DataSize64); + else + { + /* If the source size got saturated, we can't fill the desired + * dst size. Figure out how many samples we can actually mix. + */ + SrcBufferSize = LineSize - MaxResamplerEdge; + + DataSize64 = SrcBufferSize - PostPadding; + DataSize64 = ((DataSize64<<MixerFracBits) - DataPosFrac) / increment; + if(DataSize64 < DstBufferSize) + { + /* Some mixers require being 16-byte aligned, so also limit + * to a multiple of 4 samples to maintain alignment. + */ + DstBufferSize = static_cast<uint>(DataSize64) & ~3u; + } + ASSUME(DstBufferSize > 0); + } + } + + if((mFlags&(VoiceIsCallback|VoiceCallbackStopped)) == VoiceIsCallback && BufferListItem) + { + if(SrcBufferSize > mNumCallbackSamples) + { + const size_t byteOffset{mNumCallbackSamples*mFrameSize}; + const size_t needBytes{SrcBufferSize*mFrameSize - byteOffset}; + + const int gotBytes{BufferListItem->mCallback(BufferListItem->mUserData, + &BufferListItem->mSamples[byteOffset], static_cast<int>(needBytes))}; + if(gotBytes < 0) + mFlags |= VoiceCallbackStopped; + else if(static_cast<uint>(gotBytes) < needBytes) + { + mFlags |= VoiceCallbackStopped; + mNumCallbackSamples += static_cast<uint>(static_cast<uint>(gotBytes) / + mFrameSize); + } + else + mNumCallbackSamples = SrcBufferSize; + } + } + + if UNLIKELY(!BufferListItem) + { + for(auto &chanbuffer : mVoiceSamples) + { + auto srciter = chanbuffer.data() + MaxResamplerEdge; + auto srcend = chanbuffer.data() + MaxResamplerPadding; + + /* When loading from a voice that ended prematurely, only take + * the samples that get closest to 0 amplitude. This helps + * certain sounds fade out better. + */ + auto abs_lt = [](const float lhs, const float rhs) noexcept -> bool + { return std::abs(lhs) < std::abs(rhs); }; + srciter = std::min_element(srciter, srcend, abs_lt); + + SrcBufferSize = SrcBufferSize - PostPadding + MaxResamplerPadding; + std::fill(srciter+1, chanbuffer.data() + SrcBufferSize, *srciter); + } + } + else + { + if((mFlags&VoiceIsStatic)) + LoadBufferStatic(BufferListItem, BufferLoopItem, DataPosInt, mFmtType, mFmtChannels, + SrcBufferSize, mVoiceSamples); + else if((mFlags&VoiceIsCallback)) + LoadBufferCallback(BufferListItem, mNumCallbackSamples, mFmtType, mFmtChannels, + SrcBufferSize, mVoiceSamples); + else + LoadBufferQueue(BufferListItem, BufferLoopItem, DataPosInt, mFmtType, mFmtChannels, + SrcBufferSize, mVoiceSamples); + + if(mDecoder) + { + const size_t srcOffset{(increment*DstBufferSize + DataPosFrac)>>MixerFracBits}; + SrcBufferSize = SrcBufferSize - PostPadding + MaxResamplerEdge; + mDecoder->decode(mVoiceSamples, MaxResamplerEdge, SrcBufferSize, srcOffset); + } + } + + auto voiceSamples = mVoiceSamples.begin(); + for(auto &chandata : mChans) + { + /* Resample, then apply ambisonic upsampling as needed. */ + float *ResampledData{Resample(&mResampleState, + voiceSamples->data() + MaxResamplerEdge, DataPosFrac, increment, + {Device->ResampledData, DstBufferSize})}; + if((mFlags&VoiceIsAmbisonic)) + chandata.mAmbiSplitter.processHfScale({ResampledData, DstBufferSize}, + chandata.mAmbiScale); + + /* Now filter and mix to the appropriate outputs. */ + const al::span<float,BufferLineSize> FilterBuf{Device->FilteredData}; + { + DirectParams &parms = chandata.mDryParams; + const float *samples{DoFilters(parms.LowPass, parms.HighPass, FilterBuf.data(), + {ResampledData, DstBufferSize}, mDirect.FilterType)}; + + if((mFlags&VoiceHasHrtf)) + { + const float TargetGain{UNLIKELY(vstate == Stopping) ? 0.0f : + parms.Hrtf.Target.Gain}; + DoHrtfMix(samples, DstBufferSize, parms, TargetGain, Counter, OutPos, Device); + } + else if((mFlags&VoiceHasNfc)) + { + const float *TargetGains{UNLIKELY(vstate == Stopping) ? SilentTarget.data() + : parms.Gains.Target.data()}; + DoNfcMix({samples, DstBufferSize}, mDirect.Buffer.data(), parms, TargetGains, + Counter, OutPos, Device); + } + else + { + const float *TargetGains{UNLIKELY(vstate == Stopping) ? SilentTarget.data() + : parms.Gains.Target.data()}; + MixSamples({samples, DstBufferSize}, mDirect.Buffer, + parms.Gains.Current.data(), TargetGains, Counter, OutPos); + } + } + + for(uint send{0};send < NumSends;++send) + { + if(mSend[send].Buffer.empty()) + continue; + + SendParams &parms = chandata.mWetParams[send]; + const float *samples{DoFilters(parms.LowPass, parms.HighPass, FilterBuf.data(), + {ResampledData, DstBufferSize}, mSend[send].FilterType)}; + + const float *TargetGains{UNLIKELY(vstate == Stopping) ? SilentTarget.data() + : parms.Gains.Target.data()}; + MixSamples({samples, DstBufferSize}, mSend[send].Buffer, + parms.Gains.Current.data(), TargetGains, Counter, OutPos); + } + + /* Store the last source samples used for next time. */ + const size_t srcOffset{(increment*DstBufferSize + DataPosFrac)>>MixerFracBits}; + std::copy_n(voiceSamples->data()+srcOffset, MaxResamplerPadding, voiceSamples->data()); + ++voiceSamples; + } + /* Update positions */ + DataPosFrac += increment*DstBufferSize; + const uint SrcSamplesDone{DataPosFrac>>MixerFracBits}; + DataPosInt += SrcSamplesDone; + DataPosFrac &= MixerFracMask; + + OutPos += DstBufferSize; + Counter = maxu(DstBufferSize, Counter) - DstBufferSize; + + if UNLIKELY(!BufferListItem) + { + /* Do nothing extra when there's no buffers. */ + } + else if((mFlags&VoiceIsStatic)) + { + if(BufferLoopItem) + { + /* Handle looping static source */ + const uint LoopStart{BufferListItem->mLoopStart}; + const uint LoopEnd{BufferListItem->mLoopEnd}; + if(DataPosInt >= LoopEnd) + { + assert(LoopEnd > LoopStart); + DataPosInt = ((DataPosInt-LoopStart)%(LoopEnd-LoopStart)) + LoopStart; + } + } + else + { + /* Handle non-looping static source */ + if(DataPosInt >= BufferListItem->mSampleLen) + { + BufferListItem = nullptr; + break; + } + } + } + else if((mFlags&VoiceIsCallback)) + { + if(SrcSamplesDone < mNumCallbackSamples) + { + const size_t byteOffset{SrcSamplesDone*mFrameSize}; + const size_t byteEnd{mNumCallbackSamples*mFrameSize}; + al::byte *data{BufferListItem->mSamples}; + std::copy(data+byteOffset, data+byteEnd, data); + mNumCallbackSamples -= SrcSamplesDone; + } + else + { + BufferListItem = nullptr; + mNumCallbackSamples = 0; + } + } + else + { + /* Handle streaming source */ + do { + if(BufferListItem->mSampleLen > DataPosInt) + break; + + DataPosInt -= BufferListItem->mSampleLen; + + ++buffers_done; + BufferListItem = BufferListItem->mNext.load(std::memory_order_relaxed); + if(!BufferListItem) BufferListItem = BufferLoopItem; + } while(BufferListItem); + } + } while(OutPos < SamplesToDo); + + mFlags |= VoiceIsFading; + + /* Don't update positions and buffers if we were stopping. */ + if UNLIKELY(vstate == Stopping) + { + mPlayState.store(Stopped, std::memory_order_release); + return; + } + + /* Capture the source ID in case it's reset for stopping. */ + const uint SourceID{mSourceID.load(std::memory_order_relaxed)}; + + /* Update voice info */ + mPosition.store(DataPosInt, std::memory_order_relaxed); + mPositionFrac.store(DataPosFrac, std::memory_order_relaxed); + mCurrentBuffer.store(BufferListItem, std::memory_order_relaxed); + if(!BufferListItem) + { + mLoopBuffer.store(nullptr, std::memory_order_relaxed); + mSourceID.store(0u, std::memory_order_relaxed); + } + std::atomic_thread_fence(std::memory_order_release); + + /* Send any events now, after the position/buffer info was updated. */ + const uint enabledevt{Context->mEnabledEvts.load(std::memory_order_acquire)}; + if(buffers_done > 0 && (enabledevt&EventType_BufferCompleted)) + { + RingBuffer *ring{Context->mAsyncEvents.get()}; + auto evt_vec = ring->getWriteVector(); + if(evt_vec.first.len > 0) + { + AsyncEvent *evt{::new(evt_vec.first.buf) AsyncEvent{EventType_BufferCompleted}}; + evt->u.bufcomp.id = SourceID; + evt->u.bufcomp.count = buffers_done; + ring->writeAdvance(1); + } + } + + if(!BufferListItem) + { + /* If the voice just ended, set it to Stopping so the next render + * ensures any residual noise fades to 0 amplitude. + */ + mPlayState.store(Stopping, std::memory_order_release); + if((enabledevt&EventType_SourceStateChange)) + SendSourceStoppedEvent(Context, SourceID); + } +} + +void Voice::prepare(DeviceBase *device) +{ + if((mFmtChannels == FmtUHJ2 || mFmtChannels == FmtUHJ3 || mFmtChannels==FmtUHJ4) && !mDecoder) + mDecoder = std::make_unique<UhjDecoder>(); + else if(mFmtChannels != FmtUHJ2 && mFmtChannels != FmtUHJ3 && mFmtChannels != FmtUHJ4) + mDecoder = nullptr; + + /* Clear the stepping value explicitly so the mixer knows not to mix this + * until the update gets applied. + */ + mStep = 0; + + /* Make sure the sample history is cleared. */ + std::fill(mVoiceSamples.begin(), mVoiceSamples.end(), BufferLine{}); + + /* Don't need to set the VoiceIsAmbisonic flag if the device is not higher + * order than the voice. No HF scaling is necessary to mix it. + */ + if(mAmbiOrder && device->mAmbiOrder > mAmbiOrder) + { + const uint8_t *OrderFromChan{(mFmtChannels == FmtBFormat2D) ? + AmbiIndex::OrderFrom2DChannel().data() : AmbiIndex::OrderFromChannel().data()}; + const auto scales = AmbiScale::GetHFOrderScales(mAmbiOrder, device->mAmbiOrder); + + const BandSplitter splitter{device->mXOverFreq / static_cast<float>(device->Frequency)}; + for(auto &chandata : mChans) + { + chandata.mAmbiScale = scales[*(OrderFromChan++)]; + chandata.mAmbiSplitter = splitter; + chandata.mDryParams = DirectParams{}; + std::fill_n(chandata.mWetParams.begin(), device->NumAuxSends, SendParams{}); + } + mFlags |= VoiceIsAmbisonic; + } + else + { + for(auto &chandata : mChans) + { + chandata.mDryParams = DirectParams{}; + std::fill_n(chandata.mWetParams.begin(), device->NumAuxSends, SendParams{}); + } + mFlags &= ~VoiceIsAmbisonic; + } + + if(device->AvgSpeakerDist > 0.0f) + { + const float w1{SpeedOfSoundMetersPerSec / + (device->AvgSpeakerDist * static_cast<float>(device->Frequency))}; + for(auto &chandata : mChans) + chandata.mDryParams.NFCtrlFilter.init(w1); + } +} diff --git a/core/voice.h b/core/voice.h new file mode 100644 index 00000000..c3347cda --- /dev/null +++ b/core/voice.h @@ -0,0 +1,270 @@ +#ifndef CORE_VOICE_H +#define CORE_VOICE_H + +#include <array> +#include <atomic> +#include <memory> +#include <stddef.h> +#include <string> + +#include "albyte.h" +#include "almalloc.h" +#include "aloptional.h" +#include "alspan.h" +#include "bufferline.h" +#include "buffer_storage.h" +#include "devformat.h" +#include "filters/biquad.h" +#include "filters/nfc.h" +#include "filters/splitter.h" +#include "mixer/defs.h" +#include "mixer/hrtfdefs.h" +#include "resampler_limits.h" +#include "uhjfilter.h" +#include "vector.h" + +struct ContextBase; +struct DeviceBase; +struct EffectSlot; +enum class DistanceModel : unsigned char; + +using uint = unsigned int; + + +#define MAX_SENDS 6 + + +enum class SpatializeMode : unsigned char { + Off, + On, + Auto +}; + +enum class DirectMode : unsigned char { + Off, + DropMismatch, + RemixMismatch +}; + + +/* Maximum number of extra source samples that may need to be loaded, for + * resampling or conversion purposes. + */ +constexpr uint MaxPostVoiceLoad{MaxResamplerEdge + UhjDecoder::sFilterDelay}; + + +enum { + AF_None = 0, + AF_LowPass = 1, + AF_HighPass = 2, + AF_BandPass = AF_LowPass | AF_HighPass +}; + + +struct DirectParams { + BiquadFilter LowPass; + BiquadFilter HighPass; + + NfcFilter NFCtrlFilter; + + struct { + HrtfFilter Old; + HrtfFilter Target; + alignas(16) std::array<float,HrtfHistoryLength> History; + } Hrtf; + + struct { + std::array<float,MAX_OUTPUT_CHANNELS> Current; + std::array<float,MAX_OUTPUT_CHANNELS> Target; + } Gains; +}; + +struct SendParams { + BiquadFilter LowPass; + BiquadFilter HighPass; + + struct { + std::array<float,MAX_OUTPUT_CHANNELS> Current; + std::array<float,MAX_OUTPUT_CHANNELS> Target; + } Gains; +}; + + +struct VoiceBufferItem { + std::atomic<VoiceBufferItem*> mNext{nullptr}; + + CallbackType mCallback{nullptr}; + void *mUserData{nullptr}; + + uint mSampleLen{0u}; + uint mLoopStart{0u}; + uint mLoopEnd{0u}; + + al::byte *mSamples{nullptr}; +}; + + +struct VoiceProps { + float Pitch; + float Gain; + float OuterGain; + float MinGain; + float MaxGain; + float InnerAngle; + float OuterAngle; + float RefDistance; + float MaxDistance; + float RolloffFactor; + std::array<float,3> Position; + std::array<float,3> Velocity; + std::array<float,3> Direction; + std::array<float,3> OrientAt; + std::array<float,3> OrientUp; + bool HeadRelative; + DistanceModel mDistanceModel; + Resampler mResampler; + DirectMode DirectChannels; + SpatializeMode mSpatializeMode; + + bool DryGainHFAuto; + bool WetGainAuto; + bool WetGainHFAuto; + float OuterGainHF; + + float AirAbsorptionFactor; + float RoomRolloffFactor; + float DopplerFactor; + + std::array<float,2> StereoPan; + + float Radius; + + /** Direct filter and auxiliary send info. */ + struct { + float Gain; + float GainHF; + float HFReference; + float GainLF; + float LFReference; + } Direct; + struct SendData { + EffectSlot *Slot; + float Gain; + float GainHF; + float HFReference; + float GainLF; + float LFReference; + } Send[MAX_SENDS]; +}; + +struct VoicePropsItem : public VoiceProps { + std::atomic<VoicePropsItem*> next{nullptr}; + + DEF_NEWDEL(VoicePropsItem) +}; + +constexpr uint VoiceIsStatic{ 1u<<0}; +constexpr uint VoiceIsCallback{ 1u<<1}; +constexpr uint VoiceIsAmbisonic{ 1u<<2}; /* Needs HF scaling for ambisonic upsampling. */ +constexpr uint VoiceCallbackStopped{1u<<3}; +constexpr uint VoiceIsFading{ 1u<<4}; /* Use gain stepping for smooth transitions. */ +constexpr uint VoiceHasHrtf{ 1u<<5}; +constexpr uint VoiceHasNfc{ 1u<<6}; + +struct Voice { + enum State { + Stopped, + Playing, + Stopping, + Pending + }; + + std::atomic<VoicePropsItem*> mUpdate{nullptr}; + + VoiceProps mProps; + + std::atomic<uint> mSourceID{0u}; + std::atomic<State> mPlayState{Stopped}; + std::atomic<bool> mPendingChange{false}; + + /** + * Source offset in samples, relative to the currently playing buffer, NOT + * the whole queue. + */ + std::atomic<uint> mPosition; + /** Fractional (fixed-point) offset to the next sample. */ + std::atomic<uint> mPositionFrac; + + /* Current buffer queue item being played. */ + std::atomic<VoiceBufferItem*> mCurrentBuffer; + + /* Buffer queue item to loop to at end of queue (will be NULL for non- + * looping voices). + */ + std::atomic<VoiceBufferItem*> mLoopBuffer; + + /* Properties for the attached buffer(s). */ + FmtChannels mFmtChannels; + FmtType mFmtType; + uint mFrequency; + uint mFrameSize; + AmbiLayout mAmbiLayout; + AmbiScaling mAmbiScaling; + uint mAmbiOrder; + + std::unique_ptr<UhjDecoder> mDecoder; + + /** Current target parameters used for mixing. */ + uint mStep{0}; + + ResamplerFunc mResampler; + + InterpState mResampleState; + + uint mFlags{}; + uint mNumCallbackSamples{0}; + + struct TargetData { + int FilterType; + al::span<FloatBufferLine> Buffer; + }; + TargetData mDirect; + std::array<TargetData,MAX_SENDS> mSend; + + /* The first MaxResamplerPadding/2 elements are the sample history from the + * previous mix, with an additional MaxResamplerPadding/2 elements that are + * now current (which may be overwritten if the buffer data is still + * available). + */ + static constexpr size_t LineSize{BufferLineSize + MaxResamplerPadding + + UhjDecoder::sFilterDelay}; + using BufferLine = std::array<float,LineSize>; + al::vector<BufferLine,16> mVoiceSamples{2}; + + struct ChannelData { + float mAmbiScale; + BandSplitter mAmbiSplitter; + + DirectParams mDryParams; + std::array<SendParams,MAX_SENDS> mWetParams; + }; + al::vector<ChannelData> mChans{2}; + + Voice() = default; + ~Voice() { delete mUpdate.exchange(nullptr, std::memory_order_acq_rel); } + + Voice(const Voice&) = delete; + Voice& operator=(const Voice&) = delete; + + void mix(const State vstate, ContextBase *Context, const uint SamplesToDo); + + void prepare(DeviceBase *device); + + static void InitMixer(al::optional<std::string> resampler); + + DEF_NEWDEL(Voice) +}; + +extern Resampler ResamplerDefault; + +#endif /* CORE_VOICE_H */ diff --git a/core/voice_change.h b/core/voice_change.h new file mode 100644 index 00000000..ddc6186f --- /dev/null +++ b/core/voice_change.h @@ -0,0 +1,31 @@ +#ifndef VOICE_CHANGE_H +#define VOICE_CHANGE_H + +#include <atomic> + +#include "almalloc.h" + +struct Voice; + +using uint = unsigned int; + + +enum class VChangeState { + Reset, + Stop, + Play, + Pause, + Restart +}; +struct VoiceChange { + Voice *mOldVoice{nullptr}; + Voice *mVoice{nullptr}; + uint mSourceID{0}; + VChangeState mState{}; + + std::atomic<VoiceChange*> mNext{nullptr}; + + DEF_NEWDEL(VoiceChange) +}; + +#endif /* VOICE_CHANGE_H */ |