aboutsummaryrefslogtreecommitdiffstats
path: root/core
diff options
context:
space:
mode:
Diffstat (limited to 'core')
-rw-r--r--core/ambidefs.cpp44
-rw-r--r--core/ambidefs.h4
-rw-r--r--core/async_event.h55
-rw-r--r--core/bformatdec.cpp263
-rw-r--r--core/bformatdec.h71
-rw-r--r--core/bsinc_defs.h6
-rw-r--r--core/bsinc_tables.cpp103
-rw-r--r--core/buffer_storage.cpp41
-rw-r--r--core/buffer_storage.h75
-rw-r--r--core/bufferline.h3
-rw-r--r--core/context.cpp5
-rw-r--r--core/context.h171
-rw-r--r--core/converter.cpp371
-rw-r--r--core/converter.h59
-rw-r--r--core/dbus_wrap.cpp46
-rw-r--r--core/dbus_wrap.h75
-rw-r--r--core/device.cpp7
-rw-r--r--core/device.h290
-rw-r--r--core/front_stablizer.h36
-rw-r--r--core/helpers.cpp514
-rw-r--r--core/helpers.h18
-rw-r--r--core/hrtf.cpp1447
-rw-r--r--core/hrtf.h90
-rw-r--r--core/logging.h7
-rw-r--r--core/mixer.cpp126
-rw-r--r--core/mixer.h101
-rw-r--r--core/mixer/defs.h9
-rw-r--r--core/mixer/hrtfbase.h12
-rw-r--r--core/mixer/hrtfdefs.h5
-rw-r--r--core/mixer/mixer_c.cpp18
-rw-r--r--core/mixer/mixer_neon.cpp18
-rw-r--r--core/mixer/mixer_sse.cpp18
-rw-r--r--core/mixer/mixer_sse2.cpp8
-rw-r--r--core/resampler_limits.h12
-rw-r--r--core/rtkit.cpp240
-rw-r--r--core/rtkit.h80
-rw-r--r--core/uhjfilter.cpp323
-rw-r--r--core/uhjfilter.h55
-rw-r--r--core/uiddefs.cpp37
-rw-r--r--core/voice.cpp849
-rw-r--r--core/voice.h270
-rw-r--r--core/voice_change.h31
42 files changed, 5689 insertions, 324 deletions
diff --git a/core/ambidefs.cpp b/core/ambidefs.cpp
new file mode 100644
index 00000000..2725748e
--- /dev/null
+++ b/core/ambidefs.cpp
@@ -0,0 +1,44 @@
+
+#include "config.h"
+
+#include "ambidefs.h"
+
+#include <cassert>
+
+
+namespace {
+
+constexpr std::array<float,MaxAmbiOrder+1> Ambi3DDecoderHFScale{{
+ 1.00000000e+00f, 1.00000000e+00f
+}};
+constexpr std::array<float,MaxAmbiOrder+1> Ambi3DDecoderHFScale2O{{
+ 7.45355990e-01f, 1.00000000e+00f, 1.00000000e+00f
+}};
+constexpr std::array<float,MaxAmbiOrder+1> Ambi3DDecoderHFScale3O{{
+ 5.89792205e-01f, 8.79693856e-01f, 1.00000000e+00f, 1.00000000e+00f
+}};
+
+inline auto& GetDecoderHFScales(uint order) noexcept
+{
+ if(order >= 3) return Ambi3DDecoderHFScale3O;
+ if(order == 2) return Ambi3DDecoderHFScale2O;
+ return Ambi3DDecoderHFScale;
+}
+
+} // namespace
+
+auto AmbiScale::GetHFOrderScales(const uint in_order, const uint out_order) noexcept
+ -> std::array<float,MaxAmbiOrder+1>
+{
+ std::array<float,MaxAmbiOrder+1> ret{};
+
+ assert(out_order >= in_order);
+
+ const auto &target = GetDecoderHFScales(out_order);
+ const auto &input = GetDecoderHFScales(in_order);
+
+ for(size_t i{0};i < in_order+1;++i)
+ ret[i] = input[i] / target[i];
+
+ return ret;
+}
diff --git a/core/ambidefs.h b/core/ambidefs.h
index a72f7b78..22739359 100644
--- a/core/ambidefs.h
+++ b/core/ambidefs.h
@@ -97,6 +97,10 @@ struct AmbiScale {
}};
return ret;
}
+
+ /* Retrieves per-order HF scaling factors for "upsampling" ambisonic data. */
+ static std::array<float,MaxAmbiOrder+1> GetHFOrderScales(const uint in_order,
+ const uint out_order) noexcept;
};
struct AmbiIndex {
diff --git a/core/async_event.h b/core/async_event.h
new file mode 100644
index 00000000..054f0563
--- /dev/null
+++ b/core/async_event.h
@@ -0,0 +1,55 @@
+#ifndef CORE_EVENT_H
+#define CORE_EVENT_H
+
+#include "almalloc.h"
+
+struct EffectState;
+
+using uint = unsigned int;
+
+
+enum {
+ /* End event thread processing. */
+ EventType_KillThread = 0,
+
+ /* User event types. */
+ EventType_SourceStateChange = 1<<0,
+ EventType_BufferCompleted = 1<<1,
+ EventType_Disconnected = 1<<2,
+
+ /* Internal events. */
+ EventType_ReleaseEffectState = 65536,
+};
+
+struct AsyncEvent {
+ enum class SrcState {
+ Reset,
+ Stop,
+ Play,
+ Pause
+ };
+
+ uint EnumType{0u};
+ union {
+ char dummy;
+ struct {
+ uint id;
+ SrcState state;
+ } srcstate;
+ struct {
+ uint id;
+ uint count;
+ } bufcomp;
+ struct {
+ char msg[244];
+ } disconnect;
+ EffectState *mEffectState;
+ } u{};
+
+ AsyncEvent() noexcept = default;
+ constexpr AsyncEvent(uint type) noexcept : EnumType{type} { }
+
+ DISABLE_ALLOC()
+};
+
+#endif
diff --git a/core/bformatdec.cpp b/core/bformatdec.cpp
new file mode 100644
index 00000000..6bf85ec9
--- /dev/null
+++ b/core/bformatdec.cpp
@@ -0,0 +1,263 @@
+
+#include "config.h"
+
+#include "bformatdec.h"
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <utility>
+
+#include "almalloc.h"
+#include "ambdec.h"
+#include "filters/splitter.h"
+#include "front_stablizer.h"
+#include "math_defs.h"
+#include "mixer.h"
+#include "opthelpers.h"
+
+
+namespace {
+
+inline auto& GetAmbiScales(AmbDecScale scaletype) noexcept
+{
+ if(scaletype == AmbDecScale::FuMa) return AmbiScale::FromFuMa();
+ if(scaletype == AmbDecScale::SN3D) return AmbiScale::FromSN3D();
+ return AmbiScale::FromN3D();
+}
+
+} // namespace
+
+
+BFormatDec::BFormatDec(const AmbDecConf *conf, const bool allow_2band, const size_t inchans,
+ const uint srate, const uint (&chanmap)[MAX_OUTPUT_CHANNELS],
+ std::unique_ptr<FrontStablizer> stablizer)
+ : mStablizer{std::move(stablizer)}, mDualBand{allow_2band && (conf->FreqBands == 2)}
+ , mChannelDec{inchans}
+{
+ const bool periphonic{(conf->ChanMask&AmbiPeriphonicMask) != 0};
+ auto&& coeff_scale = GetAmbiScales(conf->CoeffScale);
+
+ if(!mDualBand)
+ {
+ for(size_t j{0},k{0};j < mChannelDec.size();++j)
+ {
+ const size_t acn{periphonic ? j : AmbiIndex::FromACN2D()[j]};
+ if(!(conf->ChanMask&(1u<<acn))) continue;
+ const size_t order{AmbiIndex::OrderFromChannel()[acn]};
+ const float gain{conf->HFOrderGain[order] / coeff_scale[acn]};
+ for(size_t i{0u};i < conf->NumSpeakers;++i)
+ {
+ const size_t chanidx{chanmap[i]};
+ mChannelDec[j].mGains.Single[chanidx] = conf->Matrix[i][k] * gain;
+ }
+ ++k;
+ }
+ }
+ else
+ {
+ mChannelDec[0].mXOver.init(conf->XOverFreq / static_cast<float>(srate));
+ for(size_t j{1};j < mChannelDec.size();++j)
+ mChannelDec[j].mXOver = mChannelDec[0].mXOver;
+
+ const float ratio{std::pow(10.0f, conf->XOverRatio / 40.0f)};
+ for(size_t j{0},k{0};j < mChannelDec.size();++j)
+ {
+ const size_t acn{periphonic ? j : AmbiIndex::FromACN2D()[j]};
+ if(!(conf->ChanMask&(1u<<acn))) continue;
+ const size_t order{AmbiIndex::OrderFromChannel()[acn]};
+ const float hfGain{conf->HFOrderGain[order] * ratio / coeff_scale[acn]};
+ const float lfGain{conf->LFOrderGain[order] / ratio / coeff_scale[acn]};
+ for(size_t i{0u};i < conf->NumSpeakers;++i)
+ {
+ const size_t chanidx{chanmap[i]};
+ mChannelDec[j].mGains.Dual[sHFBand][chanidx] = conf->HFMatrix[i][k] * hfGain;
+ mChannelDec[j].mGains.Dual[sLFBand][chanidx] = conf->LFMatrix[i][k] * lfGain;
+ }
+ ++k;
+ }
+ }
+}
+
+BFormatDec::BFormatDec(const size_t inchans, const al::span<const ChannelDec> coeffs,
+ const al::span<const ChannelDec> coeffslf, std::unique_ptr<FrontStablizer> stablizer)
+ : mStablizer{std::move(stablizer)}, mDualBand{!coeffslf.empty()}, mChannelDec{inchans}
+{
+ if(!mDualBand)
+ {
+ for(size_t j{0};j < mChannelDec.size();++j)
+ {
+ float *outcoeffs{mChannelDec[j].mGains.Single};
+ for(const ChannelDec &incoeffs : coeffs)
+ *(outcoeffs++) = incoeffs[j];
+ }
+ }
+ else
+ {
+ for(size_t j{0};j < mChannelDec.size();++j)
+ {
+ float *outcoeffs{mChannelDec[j].mGains.Dual[sHFBand]};
+ for(const ChannelDec &incoeffs : coeffs)
+ *(outcoeffs++) = incoeffs[j];
+
+ outcoeffs = mChannelDec[j].mGains.Dual[sLFBand];
+ for(const ChannelDec &incoeffs : coeffslf)
+ *(outcoeffs++) = incoeffs[j];
+ }
+ }
+}
+
+
+void BFormatDec::process(const al::span<FloatBufferLine> OutBuffer,
+ const FloatBufferLine *InSamples, const size_t SamplesToDo)
+{
+ ASSUME(SamplesToDo > 0);
+
+ if(mDualBand)
+ {
+ const al::span<float> hfSamples{mSamples[sHFBand].data(), SamplesToDo};
+ const al::span<float> lfSamples{mSamples[sLFBand].data(), SamplesToDo};
+ for(auto &chandec : mChannelDec)
+ {
+ chandec.mXOver.process({InSamples->data(), SamplesToDo}, hfSamples.data(),
+ lfSamples.data());
+ MixSamples(hfSamples, OutBuffer, chandec.mGains.Dual[sHFBand],
+ chandec.mGains.Dual[sHFBand], 0, 0);
+ MixSamples(lfSamples, OutBuffer, chandec.mGains.Dual[sLFBand],
+ chandec.mGains.Dual[sLFBand], 0, 0);
+ ++InSamples;
+ }
+ }
+ else
+ {
+ for(auto &chandec : mChannelDec)
+ {
+ MixSamples({InSamples->data(), SamplesToDo}, OutBuffer, chandec.mGains.Single,
+ chandec.mGains.Single, 0, 0);
+ ++InSamples;
+ }
+ }
+}
+
+void BFormatDec::processStablize(const al::span<FloatBufferLine> OutBuffer,
+ const FloatBufferLine *InSamples, const size_t lidx, const size_t ridx, const size_t cidx,
+ const size_t SamplesToDo)
+{
+ ASSUME(SamplesToDo > 0);
+
+ /* Move the existing direct L/R signal out so it doesn't get processed by
+ * the stablizer. Add a delay to it so it stays aligned with the stablizer
+ * delay.
+ */
+ float *RESTRICT mid{al::assume_aligned<16>(mStablizer->MidDirect.data())};
+ float *RESTRICT side{al::assume_aligned<16>(mStablizer->Side.data())};
+ for(size_t i{0};i < SamplesToDo;++i)
+ {
+ mid[FrontStablizer::DelayLength+i] = OutBuffer[lidx][i] + OutBuffer[ridx][i];
+ side[FrontStablizer::DelayLength+i] = OutBuffer[lidx][i] - OutBuffer[ridx][i];
+ }
+ std::fill_n(OutBuffer[lidx].begin(), SamplesToDo, 0.0f);
+ std::fill_n(OutBuffer[ridx].begin(), SamplesToDo, 0.0f);
+
+ /* Decode the B-Format input to OutBuffer. */
+ process(OutBuffer, InSamples, SamplesToDo);
+
+ /* Apply a delay to all channels, except the front-left and front-right, so
+ * they maintain correct timing.
+ */
+ const size_t NumChannels{OutBuffer.size()};
+ for(size_t i{0u};i < NumChannels;i++)
+ {
+ if(i == lidx || i == ridx)
+ continue;
+
+ auto &DelayBuf = mStablizer->DelayBuf[i];
+ auto buffer_end = OutBuffer[i].begin() + SamplesToDo;
+ if LIKELY(SamplesToDo >= FrontStablizer::DelayLength)
+ {
+ auto delay_end = std::rotate(OutBuffer[i].begin(),
+ buffer_end - FrontStablizer::DelayLength, buffer_end);
+ std::swap_ranges(OutBuffer[i].begin(), delay_end, DelayBuf.begin());
+ }
+ else
+ {
+ auto delay_start = std::swap_ranges(OutBuffer[i].begin(), buffer_end,
+ DelayBuf.begin());
+ std::rotate(DelayBuf.begin(), delay_start, DelayBuf.end());
+ }
+ }
+
+ /* Include the side signal for what was just decoded. */
+ for(size_t i{0};i < SamplesToDo;++i)
+ side[FrontStablizer::DelayLength+i] += OutBuffer[lidx][i] - OutBuffer[ridx][i];
+
+ /* Combine the delayed mid signal with the decoded mid signal. Note that
+ * the samples are stored and combined in reverse, so the newest samples
+ * are at the front and the oldest at the back.
+ */
+ al::span<float> tmpbuf{mStablizer->TempBuf.data(), SamplesToDo+FrontStablizer::DelayLength};
+ auto tmpiter = tmpbuf.begin() + SamplesToDo;
+ std::copy(mStablizer->MidDelay.cbegin(), mStablizer->MidDelay.cend(), tmpiter);
+ for(size_t i{0};i < SamplesToDo;++i)
+ *--tmpiter = OutBuffer[lidx][i] + OutBuffer[ridx][i];
+ /* Save the newest samples for next time. */
+ std::copy_n(tmpbuf.cbegin(), mStablizer->MidDelay.size(), mStablizer->MidDelay.begin());
+
+ /* Apply an all-pass on the reversed signal, then reverse the samples to
+ * get the forward signal with a reversed phase shift. The future samples
+ * are included with the all-pass to reduce the error in the output
+ * samples (the smaller the delay, the more error is introduced).
+ */
+ mStablizer->MidFilter.applyAllpass(tmpbuf);
+ tmpbuf = tmpbuf.subspan<FrontStablizer::DelayLength>();
+ std::reverse(tmpbuf.begin(), tmpbuf.end());
+
+ /* Now apply the band-splitter, combining its phase shift with the reversed
+ * phase shift, restoring the original phase on the split signal.
+ */
+ mStablizer->MidFilter.process(tmpbuf, mStablizer->MidHF.data(), mStablizer->MidLF.data());
+
+ /* This pans the separate low- and high-frequency signals between being on
+ * the center channel and the left+right channels. The low-frequency signal
+ * is panned 1/3rd toward center and the high-frequency signal is panned
+ * 1/4th toward center. These values can be tweaked.
+ */
+ const float cos_lf{std::cos(1.0f/3.0f * (al::MathDefs<float>::Pi()*0.5f))};
+ const float cos_hf{std::cos(1.0f/4.0f * (al::MathDefs<float>::Pi()*0.5f))};
+ const float sin_lf{std::sin(1.0f/3.0f * (al::MathDefs<float>::Pi()*0.5f))};
+ const float sin_hf{std::sin(1.0f/4.0f * (al::MathDefs<float>::Pi()*0.5f))};
+ for(size_t i{0};i < SamplesToDo;i++)
+ {
+ const float m{mStablizer->MidLF[i]*cos_lf + mStablizer->MidHF[i]*cos_hf + mid[i]};
+ const float c{mStablizer->MidLF[i]*sin_lf + mStablizer->MidHF[i]*sin_hf};
+ const float s{side[i]};
+
+ /* The generated center channel signal adds to the existing signal,
+ * while the modified left and right channels replace.
+ */
+ OutBuffer[lidx][i] = (m + s) * 0.5f;
+ OutBuffer[ridx][i] = (m - s) * 0.5f;
+ OutBuffer[cidx][i] += c * 0.5f;
+ }
+ /* Move the delayed mid/side samples to the front for next time. */
+ auto mid_end = mStablizer->MidDirect.cbegin() + SamplesToDo;
+ std::copy(mid_end, mid_end+FrontStablizer::DelayLength, mStablizer->MidDirect.begin());
+ auto side_end = mStablizer->Side.cbegin() + SamplesToDo;
+ std::copy(side_end, side_end+FrontStablizer::DelayLength, mStablizer->Side.begin());
+}
+
+
+std::unique_ptr<BFormatDec> BFormatDec::Create(const AmbDecConf *conf, const bool allow_2band,
+ const size_t inchans, const uint srate, const uint (&chanmap)[MAX_OUTPUT_CHANNELS],
+ std::unique_ptr<FrontStablizer> stablizer)
+{
+ return std::unique_ptr<BFormatDec>{new(FamCount(inchans))
+ BFormatDec{conf, allow_2band, inchans, srate, chanmap, std::move(stablizer)}};
+}
+std::unique_ptr<BFormatDec> BFormatDec::Create(const size_t inchans,
+ const al::span<const ChannelDec> coeffs, const al::span<const ChannelDec> coeffslf,
+ std::unique_ptr<FrontStablizer> stablizer)
+{
+ return std::unique_ptr<BFormatDec>{new(FamCount(inchans))
+ BFormatDec{inchans, coeffs, coeffslf, std::move(stablizer)}};
+}
diff --git a/core/bformatdec.h b/core/bformatdec.h
new file mode 100644
index 00000000..a0ae3f27
--- /dev/null
+++ b/core/bformatdec.h
@@ -0,0 +1,71 @@
+#ifndef CORE_BFORMATDEC_H
+#define CORE_BFORMATDEC_H
+
+#include <array>
+#include <cstddef>
+#include <memory>
+
+#include "almalloc.h"
+#include "alspan.h"
+#include "ambidefs.h"
+#include "bufferline.h"
+#include "devformat.h"
+#include "filters/splitter.h"
+
+struct AmbDecConf;
+struct FrontStablizer;
+
+
+using ChannelDec = std::array<float,MaxAmbiChannels>;
+
+class BFormatDec {
+ static constexpr size_t sHFBand{0};
+ static constexpr size_t sLFBand{1};
+ static constexpr size_t sNumBands{2};
+
+ struct ChannelDecoder {
+ union MatrixU {
+ float Dual[sNumBands][MAX_OUTPUT_CHANNELS];
+ float Single[MAX_OUTPUT_CHANNELS];
+ } mGains{};
+
+ /* NOTE: BandSplitter filter is unused with single-band decoding. */
+ BandSplitter mXOver;
+ };
+
+ alignas(16) std::array<FloatBufferLine,2> mSamples;
+
+ const std::unique_ptr<FrontStablizer> mStablizer;
+ const bool mDualBand{false};
+
+ al::FlexArray<ChannelDecoder> mChannelDec;
+
+public:
+ BFormatDec(const AmbDecConf *conf, const bool allow_2band, const size_t inchans,
+ const uint srate, const uint (&chanmap)[MAX_OUTPUT_CHANNELS],
+ std::unique_ptr<FrontStablizer> stablizer);
+ BFormatDec(const size_t inchans, const al::span<const ChannelDec> coeffs,
+ const al::span<const ChannelDec> coeffslf, std::unique_ptr<FrontStablizer> stablizer);
+
+ bool hasStablizer() const noexcept { return mStablizer != nullptr; };
+
+ /* Decodes the ambisonic input to the given output channels. */
+ void process(const al::span<FloatBufferLine> OutBuffer, const FloatBufferLine *InSamples,
+ const size_t SamplesToDo);
+
+ /* Decodes the ambisonic input to the given output channels with stablization. */
+ void processStablize(const al::span<FloatBufferLine> OutBuffer,
+ const FloatBufferLine *InSamples, const size_t lidx, const size_t ridx, const size_t cidx,
+ const size_t SamplesToDo);
+
+ static std::unique_ptr<BFormatDec> Create(const AmbDecConf *conf, const bool allow_2band,
+ const size_t inchans, const uint srate, const uint (&chanmap)[MAX_OUTPUT_CHANNELS],
+ std::unique_ptr<FrontStablizer> stablizer);
+ static std::unique_ptr<BFormatDec> Create(const size_t inchans,
+ const al::span<const ChannelDec> coeffs, const al::span<const ChannelDec> coeffslf,
+ std::unique_ptr<FrontStablizer> stablizer);
+
+ DEF_FAM_NEWDEL(BFormatDec, mChannelDec)
+};
+
+#endif /* CORE_BFORMATDEC_H */
diff --git a/core/bsinc_defs.h b/core/bsinc_defs.h
index 43865289..f2958231 100644
--- a/core/bsinc_defs.h
+++ b/core/bsinc_defs.h
@@ -7,10 +7,4 @@ constexpr unsigned int BSincScaleCount{1 << BSincScaleBits};
constexpr unsigned int BSincPhaseBits{5};
constexpr unsigned int BSincPhaseCount{1 << BSincPhaseBits};
-/* The maximum number of sample points for the bsinc filters. The max points
- * includes the doubling for downsampling, so the maximum number of base sample
- * points is 24, which is 23rd order.
- */
-constexpr unsigned int BSincPointsMax{48};
-
#endif /* CORE_BSINC_DEFS_H */
diff --git a/core/bsinc_tables.cpp b/core/bsinc_tables.cpp
index 315e1448..ff73c301 100644
--- a/core/bsinc_tables.cpp
+++ b/core/bsinc_tables.cpp
@@ -9,6 +9,7 @@
#include <memory>
#include <stdexcept>
+#include "core/mixer/defs.h"
#include "math_defs.h"
@@ -24,7 +25,8 @@ using uint = unsigned int;
*/
constexpr double Sinc(const double x)
{
- if(!(x > 1e-15 || x < -1e-15))
+ constexpr double epsilon{std::numeric_limits<double>::epsilon()};
+ if(!(x > epsilon || x < -epsilon))
return 1.0;
return std::sin(al::MathDefs<double>::Pi()*x) / (al::MathDefs<double>::Pi()*x);
}
@@ -35,7 +37,7 @@ constexpr double Sinc(const double x)
* I_0(x) = sum_{k=0}^inf (1 / k!)^2 (x / 2)^(2 k)
* = sum_{k=0}^inf ((x / 2)^k / k!)^2
*/
-constexpr double BesselI_0(const double x)
+constexpr double BesselI_0(const double x) noexcept
{
/* Start at k=1 since k=0 is trivial. */
const double x2{x / 2.0};
@@ -82,7 +84,7 @@ constexpr double Kaiser(const double beta, const double k, const double besseli_
/* Calculates the (normalized frequency) transition width of the Kaiser window.
* Rejection is in dB.
*/
-constexpr double CalcKaiserWidth(const double rejection, const uint order)
+constexpr double CalcKaiserWidth(const double rejection, const uint order) noexcept
{
if(rejection > 21.19)
return (rejection - 7.95) / (order * 2.285 * al::MathDefs<double>::Tau());
@@ -122,7 +124,7 @@ struct BSincHeader {
uint num_points{Order+1};
for(uint si{0};si < BSincScaleCount;++si)
{
- const double scale{scaleBase + (scaleRange * si / (BSincScaleCount-1))};
+ const double scale{scaleBase + (scaleRange * (si+1) / BSincScaleCount)};
const uint a_{std::min(static_cast<uint>(num_points / 2.0 / scale), num_points)};
const uint m{2 * a_};
@@ -144,21 +146,33 @@ constexpr BSincHeader bsinc24_hdr{60, 23};
* namespace while also being used as non-type template parameters.
*/
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 6
+
+/* The number of sample points is double the a value (rounded up to a multiple
+ * of 4), and scale index 0 includes the doubling for downsampling. bsinc24 is
+ * currently the highest quality filter, and will use the most sample points.
+ */
+constexpr uint BSincPointsMax{(bsinc24_hdr.a[0]*2 + 3) & ~3u};
+static_assert(BSincPointsMax <= MaxResamplerPadding, "MaxResamplerPadding is too small");
+
template<size_t total_size>
struct BSincFilterArray {
alignas(16) std::array<float, total_size> mTable;
+ const BSincHeader &hdr;
- BSincFilterArray(const BSincHeader &hdr)
+ BSincFilterArray(const BSincHeader &hdr_) : hdr{hdr_}
+ {
#else
template<const BSincHeader &hdr>
struct BSincFilterArray {
- alignas(16) std::array<float, hdr.total_size> mTable;
+ alignas(16) std::array<float, hdr.total_size> mTable{};
BSincFilterArray()
-#endif
{
- using filter_type = double[][BSincPhaseCount+1][BSincPointsMax];
- auto filter = std::make_unique<filter_type>(BSincScaleCount);
+ constexpr uint BSincPointsMax{(hdr.a[0]*2 + 3) & ~3u};
+ static_assert(BSincPointsMax <= MaxResamplerPadding, "MaxResamplerPadding is too small");
+#endif
+ using filter_type = double[BSincPhaseCount+1][BSincPointsMax];
+ auto filter = std::make_unique<filter_type[]>(BSincScaleCount);
/* Calculate the Kaiser-windowed Sinc filter coefficients for each
* scale and phase index.
@@ -167,38 +181,38 @@ struct BSincFilterArray {
{
const uint m{hdr.a[si] * 2};
const size_t o{(BSincPointsMax-m) / 2};
- const double scale{hdr.scaleBase + (hdr.scaleRange * si / (BSincScaleCount-1))};
- const double cutoff{scale - (hdr.scaleBase * std::max(0.5, scale) * 2.0)};
+ const double scale{hdr.scaleBase + (hdr.scaleRange * (si+1) / BSincScaleCount)};
+ const double cutoff{scale - (hdr.scaleBase * std::max(1.0, scale*2.0))};
const auto a = static_cast<double>(hdr.a[si]);
- const double l{a - 1.0};
+ const double l{a - 1.0/BSincPhaseCount};
/* Do one extra phase index so that the phase delta has a proper
* target for its last index.
*/
for(uint pi{0};pi <= BSincPhaseCount;++pi)
{
- const double phase{l + (pi/double{BSincPhaseCount})};
+ const double phase{std::floor(l) + (pi/double{BSincPhaseCount})};
for(uint i{0};i < m;++i)
{
const double x{i - phase};
- filter[si][pi][o+i] = Kaiser(hdr.beta, x/a, hdr.besseli_0_beta) * cutoff *
+ filter[si][pi][o+i] = Kaiser(hdr.beta, x/l, hdr.besseli_0_beta) * cutoff *
Sinc(cutoff*x);
}
}
}
size_t idx{0};
- for(size_t si{0};si < BSincScaleCount-1;++si)
+ for(size_t si{0};si < BSincScaleCount;++si)
{
const size_t m{((hdr.a[si]*2) + 3) & ~3u};
const size_t o{(BSincPointsMax-m) / 2};
+ /* Write out each phase index's filter and phase delta for this
+ * quality scale.
+ */
for(size_t pi{0};pi < BSincPhaseCount;++pi)
{
- /* Write out the filter. Also calculate and write out the phase
- * and scale deltas.
- */
for(size_t i{0};i < m;++i)
mTable[idx++] = static_cast<float>(filter[si][pi][o+i]);
@@ -210,11 +224,22 @@ struct BSincFilterArray {
const double phDelta{filter[si][pi+1][o+i] - filter[si][pi][o+i]};
mTable[idx++] = static_cast<float>(phDelta);
}
-
+ }
+ /* Calculate and write out each phase index's filter quality scale
+ * deltas. The last scale index doesn't have any scale or scale-
+ * phase deltas.
+ */
+ if(si == BSincScaleCount-1)
+ {
+ for(size_t i{0};i < BSincPhaseCount*m*2;++i)
+ mTable[idx++] = 0.0f;
+ }
+ else for(size_t pi{0};pi < BSincPhaseCount;++pi)
+ {
/* Linear interpolation between scales is also simplified.
*
- * Given a difference in points between scales, the destination
- * points will be 0, thus: x = a + f (-a)
+ * Given a difference in the number of points between scales,
+ * the destination points will be 0, thus: x = a + f (-a)
*/
for(size_t i{0};i < m;++i)
{
@@ -233,31 +258,11 @@ struct BSincFilterArray {
}
}
}
- {
- /* The last scale index doesn't have any scale or scale-phase
- * deltas.
- */
- constexpr size_t si{BSincScaleCount-1};
- const size_t m{((hdr.a[si]*2) + 3) & ~3u};
- const size_t o{(BSincPointsMax-m) / 2};
-
- for(size_t pi{0};pi < BSincPhaseCount;++pi)
- {
- for(size_t i{0};i < m;++i)
- mTable[idx++] = static_cast<float>(filter[si][pi][o+i]);
- for(size_t i{0};i < m;++i)
- {
- const double phDelta{filter[si][pi+1][o+i] - filter[si][pi][o+i]};
- mTable[idx++] = static_cast<float>(phDelta);
- }
- for(size_t i{0};i < m;++i)
- mTable[idx++] = 0.0f;
- for(size_t i{0};i < m;++i)
- mTable[idx++] = 0.0f;
- }
- }
assert(idx == hdr.total_size);
}
+
+ constexpr const BSincHeader &getHeader() const noexcept { return hdr; }
+ constexpr const float *getTable() const noexcept { return &mTable.front(); }
};
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 6
@@ -268,9 +273,11 @@ const BSincFilterArray<bsinc12_hdr> bsinc12_filter{};
const BSincFilterArray<bsinc24_hdr> bsinc24_filter{};
#endif
-constexpr BSincTable GenerateBSincTable(const BSincHeader &hdr, const float *tab)
+template<typename T>
+constexpr BSincTable GenerateBSincTable(const T &filter)
{
BSincTable ret{};
+ const BSincHeader &hdr = filter.getHeader();
ret.scaleBase = static_cast<float>(hdr.scaleBase);
ret.scaleRange = static_cast<float>(1.0 / hdr.scaleRange);
for(size_t i{0};i < BSincScaleCount;++i)
@@ -278,11 +285,11 @@ constexpr BSincTable GenerateBSincTable(const BSincHeader &hdr, const float *tab
ret.filterOffset[0] = 0;
for(size_t i{1};i < BSincScaleCount;++i)
ret.filterOffset[i] = ret.filterOffset[i-1] + ret.m[i-1]*4*BSincPhaseCount;
- ret.Tab = tab;
+ ret.Tab = filter.getTable();
return ret;
}
} // namespace
-const BSincTable bsinc12{GenerateBSincTable(bsinc12_hdr, &bsinc12_filter.mTable.front())};
-const BSincTable bsinc24{GenerateBSincTable(bsinc24_hdr, &bsinc24_filter.mTable.front())};
+const BSincTable bsinc12{GenerateBSincTable(bsinc12_filter)};
+const BSincTable bsinc24{GenerateBSincTable(bsinc24_filter)};
diff --git a/core/buffer_storage.cpp b/core/buffer_storage.cpp
new file mode 100644
index 00000000..5179db13
--- /dev/null
+++ b/core/buffer_storage.cpp
@@ -0,0 +1,41 @@
+
+#include "config.h"
+
+#include "buffer_storage.h"
+
+#include <stdint.h>
+
+
+uint BytesFromFmt(FmtType type) noexcept
+{
+ switch(type)
+ {
+ case FmtUByte: return sizeof(uint8_t);
+ case FmtShort: return sizeof(int16_t);
+ case FmtFloat: return sizeof(float);
+ case FmtDouble: return sizeof(double);
+ case FmtMulaw: return sizeof(uint8_t);
+ case FmtAlaw: return sizeof(uint8_t);
+ }
+ return 0;
+}
+
+uint ChannelsFromFmt(FmtChannels chans, uint ambiorder) noexcept
+{
+ switch(chans)
+ {
+ case FmtMono: return 1;
+ case FmtStereo: return 2;
+ case FmtRear: return 2;
+ case FmtQuad: return 4;
+ case FmtX51: return 6;
+ case FmtX61: return 7;
+ case FmtX71: return 8;
+ case FmtBFormat2D: return (ambiorder*2) + 1;
+ case FmtBFormat3D: return (ambiorder+1) * (ambiorder+1);
+ case FmtUHJ2: return 2;
+ case FmtUHJ3: return 3;
+ case FmtUHJ4: return 4;
+ }
+ return 0;
+}
diff --git a/core/buffer_storage.h b/core/buffer_storage.h
new file mode 100644
index 00000000..59280354
--- /dev/null
+++ b/core/buffer_storage.h
@@ -0,0 +1,75 @@
+#ifndef CORE_BUFFER_STORAGE_H
+#define CORE_BUFFER_STORAGE_H
+
+#include <atomic>
+
+#include "albyte.h"
+
+
+using uint = unsigned int;
+
+/* Storable formats */
+enum FmtType : unsigned char {
+ FmtUByte,
+ FmtShort,
+ FmtFloat,
+ FmtDouble,
+ FmtMulaw,
+ FmtAlaw,
+};
+enum FmtChannels : unsigned char {
+ FmtMono,
+ FmtStereo,
+ FmtRear,
+ FmtQuad,
+ FmtX51, /* (WFX order) */
+ FmtX61, /* (WFX order) */
+ FmtX71, /* (WFX order) */
+ FmtBFormat2D,
+ FmtBFormat3D,
+ FmtUHJ2, /* 2-channel UHJ, aka "BHJ", stereo-compatible */
+ FmtUHJ3, /* 3-channel UHJ, aka "THJ" */
+ FmtUHJ4, /* 4-channel UHJ, aka "PHJ" */
+};
+
+enum class AmbiLayout : unsigned char {
+ FuMa,
+ ACN,
+};
+enum class AmbiScaling : unsigned char {
+ FuMa,
+ SN3D,
+ N3D,
+};
+
+uint BytesFromFmt(FmtType type) noexcept;
+uint ChannelsFromFmt(FmtChannels chans, uint ambiorder) noexcept;
+inline uint FrameSizeFromFmt(FmtChannels chans, FmtType type, uint ambiorder) noexcept
+{ return ChannelsFromFmt(chans, ambiorder) * BytesFromFmt(type); }
+
+
+using CallbackType = int(*)(void*, void*, int);
+
+struct BufferStorage {
+ CallbackType mCallback{nullptr};
+ void *mUserData{nullptr};
+
+ uint mSampleRate{0u};
+ FmtChannels mChannels{FmtMono};
+ FmtType mType{FmtShort};
+ uint mSampleLen{0u};
+
+ AmbiLayout mAmbiLayout{AmbiLayout::FuMa};
+ AmbiScaling mAmbiScaling{AmbiScaling::FuMa};
+ uint mAmbiOrder{0u};
+
+ inline uint bytesFromFmt() const noexcept { return BytesFromFmt(mType); }
+ inline uint channelsFromFmt() const noexcept
+ { return ChannelsFromFmt(mChannels, mAmbiOrder); }
+ inline uint frameSizeFromFmt() const noexcept { return channelsFromFmt() * bytesFromFmt(); }
+
+ inline bool isBFormat() const noexcept
+ { return mChannels == FmtBFormat2D || mChannels == FmtBFormat3D; }
+};
+
+#endif /* CORE_BUFFER_STORAGE_H */
diff --git a/core/bufferline.h b/core/bufferline.h
index 503e208d..8b445f3f 100644
--- a/core/bufferline.h
+++ b/core/bufferline.h
@@ -3,6 +3,8 @@
#include <array>
+#include "alspan.h"
+
/* Size for temporary storage of buffer data, in floats. Larger values need
* more memory and are harder on cache, while smaller values may need more
* iterations for mixing.
@@ -10,5 +12,6 @@
constexpr int BufferLineSize{1024};
using FloatBufferLine = std::array<float,BufferLineSize>;
+using FloatBufferSpan = al::span<float,BufferLineSize>;
#endif /* CORE_BUFFERLINE_H */
diff --git a/core/context.cpp b/core/context.cpp
new file mode 100644
index 00000000..f1c310aa
--- /dev/null
+++ b/core/context.cpp
@@ -0,0 +1,5 @@
+
+#include "config.h"
+
+#include "context.h"
+
diff --git a/core/context.h b/core/context.h
new file mode 100644
index 00000000..bf439053
--- /dev/null
+++ b/core/context.h
@@ -0,0 +1,171 @@
+#ifndef CORE_CONTEXT_H
+#define CORE_CONTEXT_H
+
+#include <array>
+#include <atomic>
+#include <cstddef>
+#include <memory>
+#include <thread>
+
+#include "almalloc.h"
+#include "alspan.h"
+#include "atomic.h"
+#include "core/bufferline.h"
+#include "threads.h"
+#include "vecmat.h"
+#include "vector.h"
+
+struct DeviceBase;
+struct EffectSlot;
+struct EffectSlotProps;
+struct RingBuffer;
+struct Voice;
+struct VoiceChange;
+struct VoicePropsItem;
+
+using uint = unsigned int;
+
+
+constexpr float SpeedOfSoundMetersPerSec{343.3f};
+
+enum class DistanceModel : unsigned char {
+ Disable,
+ Inverse, InverseClamped,
+ Linear, LinearClamped,
+ Exponent, ExponentClamped,
+
+ Default = InverseClamped
+};
+
+
+struct WetBuffer {
+ bool mInUse;
+ al::FlexArray<FloatBufferLine, 16> mBuffer;
+
+ WetBuffer(size_t count) : mBuffer{count} { }
+
+ DEF_FAM_NEWDEL(WetBuffer, mBuffer)
+};
+using WetBufferPtr = std::unique_ptr<WetBuffer>;
+
+
+struct ContextProps {
+ float DopplerFactor;
+ float DopplerVelocity;
+ float SpeedOfSound;
+ bool SourceDistanceModel;
+ DistanceModel mDistanceModel;
+
+ std::atomic<ContextProps*> next;
+
+ DEF_NEWDEL(ContextProps)
+};
+
+struct ListenerProps {
+ std::array<float,3> Position;
+ std::array<float,3> Velocity;
+ std::array<float,3> OrientAt;
+ std::array<float,3> OrientUp;
+ float Gain;
+ float MetersPerUnit;
+
+ std::atomic<ListenerProps*> next;
+
+ DEF_NEWDEL(ListenerProps)
+};
+
+struct ContextParams {
+ /* Pointer to the most recent property values that are awaiting an update. */
+ std::atomic<ContextProps*> ContextUpdate{nullptr};
+ std::atomic<ListenerProps*> ListenerUpdate{nullptr};
+
+ alu::Matrix Matrix{alu::Matrix::Identity()};
+ alu::Vector Velocity{};
+
+ float Gain{1.0f};
+ float MetersPerUnit{1.0f};
+
+ float DopplerFactor{1.0f};
+ float SpeedOfSound{343.3f}; /* in units per sec! */
+
+ bool SourceDistanceModel{false};
+ DistanceModel mDistanceModel{};
+};
+
+struct ContextBase {
+ DeviceBase *const mDevice;
+
+ /* Counter for the pre-mixing updates, in 31.1 fixed point (lowest bit
+ * indicates if updates are currently happening).
+ */
+ RefCount mUpdateCount{0u};
+ std::atomic<bool> mHoldUpdates{false};
+ std::atomic<bool> mStopVoicesOnDisconnect{true};
+
+ float mGainBoost{1.0f};
+
+ /* Linked lists of unused property containers, free to use for future
+ * updates.
+ */
+ std::atomic<ContextProps*> mFreeContextProps{nullptr};
+ std::atomic<ListenerProps*> mFreeListenerProps{nullptr};
+ std::atomic<VoicePropsItem*> mFreeVoiceProps{nullptr};
+ std::atomic<EffectSlotProps*> mFreeEffectslotProps{nullptr};
+
+ /* The voice change tail is the beginning of the "free" elements, up to and
+ * *excluding* the current. If tail==current, there's no free elements and
+ * new ones need to be allocated. The current voice change is the element
+ * last processed, and any after are pending.
+ */
+ VoiceChange *mVoiceChangeTail{};
+ std::atomic<VoiceChange*> mCurrentVoiceChange{};
+
+ void allocVoiceChanges(size_t addcount);
+
+
+ ContextParams mParams;
+
+ using VoiceArray = al::FlexArray<Voice*>;
+ std::atomic<VoiceArray*> mVoices{};
+ std::atomic<size_t> mActiveVoiceCount{};
+
+ void allocVoices(size_t addcount);
+ al::span<Voice*> getVoicesSpan() const noexcept
+ {
+ return {mVoices.load(std::memory_order_relaxed)->data(),
+ mActiveVoiceCount.load(std::memory_order_relaxed)};
+ }
+ al::span<Voice*> getVoicesSpanAcquired() const noexcept
+ {
+ return {mVoices.load(std::memory_order_acquire)->data(),
+ mActiveVoiceCount.load(std::memory_order_acquire)};
+ }
+
+
+ using EffectSlotArray = al::FlexArray<EffectSlot*>;
+ std::atomic<EffectSlotArray*> mActiveAuxSlots{nullptr};
+
+ std::thread mEventThread;
+ al::semaphore mEventSem;
+ std::unique_ptr<RingBuffer> mAsyncEvents;
+ std::atomic<uint> mEnabledEvts{0u};
+
+ /* Asynchronous voice change actions are processed as a linked list of
+ * VoiceChange objects by the mixer, which is atomically appended to.
+ * However, to avoid allocating each object individually, they're allocated
+ * in clusters that are stored in a vector for easy automatic cleanup.
+ */
+ using VoiceChangeCluster = std::unique_ptr<VoiceChange[]>;
+ al::vector<VoiceChangeCluster> mVoiceChangeClusters;
+
+ using VoiceCluster = std::unique_ptr<Voice[]>;
+ al::vector<VoiceCluster> mVoiceClusters;
+
+
+ ContextBase(DeviceBase *device);
+ ContextBase(const ContextBase&) = delete;
+ ContextBase& operator=(const ContextBase&) = delete;
+ ~ContextBase();
+};
+
+#endif /* CORE_CONTEXT_H */
diff --git a/core/converter.cpp b/core/converter.cpp
new file mode 100644
index 00000000..6a06b464
--- /dev/null
+++ b/core/converter.cpp
@@ -0,0 +1,371 @@
+
+#include "config.h"
+
+#include "converter.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <iterator>
+#include <limits.h>
+
+#include "albit.h"
+#include "albyte.h"
+#include "alnumeric.h"
+#include "fpu_ctrl.h"
+
+struct CTag;
+struct CopyTag;
+
+
+namespace {
+
+constexpr uint MaxPitch{10};
+
+static_assert((BufferLineSize-1)/MaxPitch > 0, "MaxPitch is too large for BufferLineSize!");
+static_assert((INT_MAX>>MixerFracBits)/MaxPitch > BufferLineSize,
+ "MaxPitch and/or BufferLineSize are too large for MixerFracBits!");
+
+/* Base template left undefined. Should be marked =delete, but Clang 3.8.1
+ * chokes on that given the inline specializations.
+ */
+template<DevFmtType T>
+inline float LoadSample(DevFmtType_t<T> val) noexcept;
+
+template<> inline float LoadSample<DevFmtByte>(DevFmtType_t<DevFmtByte> val) noexcept
+{ return val * (1.0f/128.0f); }
+template<> inline float LoadSample<DevFmtShort>(DevFmtType_t<DevFmtShort> val) noexcept
+{ return val * (1.0f/32768.0f); }
+template<> inline float LoadSample<DevFmtInt>(DevFmtType_t<DevFmtInt> val) noexcept
+{ return static_cast<float>(val) * (1.0f/2147483648.0f); }
+template<> inline float LoadSample<DevFmtFloat>(DevFmtType_t<DevFmtFloat> val) noexcept
+{ return val; }
+
+template<> inline float LoadSample<DevFmtUByte>(DevFmtType_t<DevFmtUByte> val) noexcept
+{ return LoadSample<DevFmtByte>(static_cast<int8_t>(val - 128)); }
+template<> inline float LoadSample<DevFmtUShort>(DevFmtType_t<DevFmtUShort> val) noexcept
+{ return LoadSample<DevFmtShort>(static_cast<int16_t>(val - 32768)); }
+template<> inline float LoadSample<DevFmtUInt>(DevFmtType_t<DevFmtUInt> val) noexcept
+{ return LoadSample<DevFmtInt>(static_cast<int32_t>(val - 2147483648u)); }
+
+
+template<DevFmtType T>
+inline void LoadSampleArray(float *RESTRICT dst, const void *src, const size_t srcstep,
+ const size_t samples) noexcept
+{
+ const DevFmtType_t<T> *ssrc = static_cast<const DevFmtType_t<T>*>(src);
+ for(size_t i{0u};i < samples;i++)
+ dst[i] = LoadSample<T>(ssrc[i*srcstep]);
+}
+
+void LoadSamples(float *dst, const void *src, const size_t srcstep, const DevFmtType srctype,
+ const size_t samples) noexcept
+{
+#define HANDLE_FMT(T) \
+ case T: LoadSampleArray<T>(dst, src, srcstep, samples); break
+ switch(srctype)
+ {
+ HANDLE_FMT(DevFmtByte);
+ HANDLE_FMT(DevFmtUByte);
+ HANDLE_FMT(DevFmtShort);
+ HANDLE_FMT(DevFmtUShort);
+ HANDLE_FMT(DevFmtInt);
+ HANDLE_FMT(DevFmtUInt);
+ HANDLE_FMT(DevFmtFloat);
+ }
+#undef HANDLE_FMT
+}
+
+
+template<DevFmtType T>
+inline DevFmtType_t<T> StoreSample(float) noexcept;
+
+template<> inline float StoreSample<DevFmtFloat>(float val) noexcept
+{ return val; }
+template<> inline int32_t StoreSample<DevFmtInt>(float val) noexcept
+{ return fastf2i(clampf(val*2147483648.0f, -2147483648.0f, 2147483520.0f)); }
+template<> inline int16_t StoreSample<DevFmtShort>(float val) noexcept
+{ return static_cast<int16_t>(fastf2i(clampf(val*32768.0f, -32768.0f, 32767.0f))); }
+template<> inline int8_t StoreSample<DevFmtByte>(float val) noexcept
+{ return static_cast<int8_t>(fastf2i(clampf(val*128.0f, -128.0f, 127.0f))); }
+
+/* Define unsigned output variations. */
+template<> inline uint32_t StoreSample<DevFmtUInt>(float val) noexcept
+{ return static_cast<uint32_t>(StoreSample<DevFmtInt>(val)) + 2147483648u; }
+template<> inline uint16_t StoreSample<DevFmtUShort>(float val) noexcept
+{ return static_cast<uint16_t>(StoreSample<DevFmtShort>(val) + 32768); }
+template<> inline uint8_t StoreSample<DevFmtUByte>(float val) noexcept
+{ return static_cast<uint8_t>(StoreSample<DevFmtByte>(val) + 128); }
+
+template<DevFmtType T>
+inline void StoreSampleArray(void *dst, const float *RESTRICT src, const size_t dststep,
+ const size_t samples) noexcept
+{
+ DevFmtType_t<T> *sdst = static_cast<DevFmtType_t<T>*>(dst);
+ for(size_t i{0u};i < samples;i++)
+ sdst[i*dststep] = StoreSample<T>(src[i]);
+}
+
+
+void StoreSamples(void *dst, const float *src, const size_t dststep, const DevFmtType dsttype,
+ const size_t samples) noexcept
+{
+#define HANDLE_FMT(T) \
+ case T: StoreSampleArray<T>(dst, src, dststep, samples); break
+ switch(dsttype)
+ {
+ HANDLE_FMT(DevFmtByte);
+ HANDLE_FMT(DevFmtUByte);
+ HANDLE_FMT(DevFmtShort);
+ HANDLE_FMT(DevFmtUShort);
+ HANDLE_FMT(DevFmtInt);
+ HANDLE_FMT(DevFmtUInt);
+ HANDLE_FMT(DevFmtFloat);
+ }
+#undef HANDLE_FMT
+}
+
+
+template<DevFmtType T>
+void Mono2Stereo(float *RESTRICT dst, const void *src, const size_t frames) noexcept
+{
+ const DevFmtType_t<T> *ssrc = static_cast<const DevFmtType_t<T>*>(src);
+ for(size_t i{0u};i < frames;i++)
+ dst[i*2 + 1] = dst[i*2 + 0] = LoadSample<T>(ssrc[i]) * 0.707106781187f;
+}
+
+template<DevFmtType T>
+void Multi2Mono(uint chanmask, const size_t step, const float scale, float *RESTRICT dst,
+ const void *src, const size_t frames) noexcept
+{
+ const DevFmtType_t<T> *ssrc = static_cast<const DevFmtType_t<T>*>(src);
+ std::fill_n(dst, frames, 0.0f);
+ for(size_t c{0};chanmask;++c)
+ {
+ if LIKELY((chanmask&1))
+ {
+ for(size_t i{0u};i < frames;i++)
+ dst[i] += LoadSample<T>(ssrc[i*step + c]);
+ }
+ chanmask >>= 1;
+ }
+ for(size_t i{0u};i < frames;i++)
+ dst[i] *= scale;
+}
+
+} // namespace
+
+SampleConverterPtr CreateSampleConverter(DevFmtType srcType, DevFmtType dstType, size_t numchans,
+ uint srcRate, uint dstRate, Resampler resampler)
+{
+ if(numchans < 1 || srcRate < 1 || dstRate < 1)
+ return nullptr;
+
+ SampleConverterPtr converter{new(FamCount(numchans)) SampleConverter{numchans}};
+ converter->mSrcType = srcType;
+ converter->mDstType = dstType;
+ converter->mSrcTypeSize = BytesFromDevFmt(srcType);
+ converter->mDstTypeSize = BytesFromDevFmt(dstType);
+
+ converter->mSrcPrepCount = 0;
+ converter->mFracOffset = 0;
+
+ /* Have to set the mixer FPU mode since that's what the resampler code expects. */
+ FPUCtl mixer_mode{};
+ auto step = static_cast<uint>(
+ mind(srcRate*double{MixerFracOne}/dstRate + 0.5, MaxPitch*MixerFracOne));
+ converter->mIncrement = maxu(step, 1);
+ if(converter->mIncrement == MixerFracOne)
+ converter->mResample = Resample_<CopyTag,CTag>;
+ else
+ converter->mResample = PrepareResampler(resampler, converter->mIncrement,
+ &converter->mState);
+
+ return converter;
+}
+
+uint SampleConverter::availableOut(uint srcframes) const
+{
+ int prepcount{mSrcPrepCount};
+ if(prepcount < 0)
+ {
+ /* Negative prepcount means we need to skip that many input samples. */
+ if(static_cast<uint>(-prepcount) >= srcframes)
+ return 0;
+ srcframes -= static_cast<uint>(-prepcount);
+ prepcount = 0;
+ }
+
+ if(srcframes < 1)
+ {
+ /* No output samples if there's no input samples. */
+ return 0;
+ }
+
+ if(prepcount < MaxResamplerPadding
+ && static_cast<uint>(MaxResamplerPadding - prepcount) >= srcframes)
+ {
+ /* Not enough input samples to generate an output sample. */
+ return 0;
+ }
+
+ auto DataSize64 = static_cast<uint64_t>(prepcount);
+ DataSize64 += srcframes;
+ DataSize64 -= MaxResamplerPadding;
+ DataSize64 <<= MixerFracBits;
+ DataSize64 -= mFracOffset;
+
+ /* If we have a full prep, we can generate at least one sample. */
+ return static_cast<uint>(clampu64((DataSize64 + mIncrement-1)/mIncrement, 1,
+ std::numeric_limits<int>::max()));
+}
+
+uint SampleConverter::convert(const void **src, uint *srcframes, void *dst, uint dstframes)
+{
+ const uint SrcFrameSize{static_cast<uint>(mChan.size()) * mSrcTypeSize};
+ const uint DstFrameSize{static_cast<uint>(mChan.size()) * mDstTypeSize};
+ const uint increment{mIncrement};
+ auto SamplesIn = static_cast<const al::byte*>(*src);
+ uint NumSrcSamples{*srcframes};
+
+ FPUCtl mixer_mode{};
+ uint pos{0};
+ while(pos < dstframes && NumSrcSamples > 0)
+ {
+ int prepcount{mSrcPrepCount};
+ if(prepcount < 0)
+ {
+ /* Negative prepcount means we need to skip that many input samples. */
+ if(static_cast<uint>(-prepcount) >= NumSrcSamples)
+ {
+ mSrcPrepCount = static_cast<int>(NumSrcSamples) + prepcount;
+ NumSrcSamples = 0;
+ break;
+ }
+ SamplesIn += SrcFrameSize*static_cast<uint>(-prepcount);
+ NumSrcSamples -= static_cast<uint>(-prepcount);
+ mSrcPrepCount = 0;
+ continue;
+ }
+ const uint toread{minu(NumSrcSamples, BufferLineSize - MaxResamplerPadding)};
+
+ if(prepcount < MaxResamplerPadding
+ && static_cast<uint>(MaxResamplerPadding - prepcount) >= toread)
+ {
+ /* Not enough input samples to generate an output sample. Store
+ * what we're given for later.
+ */
+ for(size_t chan{0u};chan < mChan.size();chan++)
+ LoadSamples(&mChan[chan].PrevSamples[prepcount], SamplesIn + mSrcTypeSize*chan,
+ mChan.size(), mSrcType, toread);
+
+ mSrcPrepCount = prepcount + static_cast<int>(toread);
+ NumSrcSamples = 0;
+ break;
+ }
+
+ float *RESTRICT SrcData{mSrcSamples};
+ float *RESTRICT DstData{mDstSamples};
+ uint DataPosFrac{mFracOffset};
+ auto DataSize64 = static_cast<uint64_t>(prepcount);
+ DataSize64 += toread;
+ DataSize64 -= MaxResamplerPadding;
+ DataSize64 <<= MixerFracBits;
+ DataSize64 -= DataPosFrac;
+
+ /* If we have a full prep, we can generate at least one sample. */
+ auto DstSize = static_cast<uint>(
+ clampu64((DataSize64 + increment-1)/increment, 1, BufferLineSize));
+ DstSize = minu(DstSize, dstframes-pos);
+
+ for(size_t chan{0u};chan < mChan.size();chan++)
+ {
+ const al::byte *SrcSamples{SamplesIn + mSrcTypeSize*chan};
+ al::byte *DstSamples = static_cast<al::byte*>(dst) + mDstTypeSize*chan;
+
+ /* Load the previous samples into the source data first, then the
+ * new samples from the input buffer.
+ */
+ std::copy_n(mChan[chan].PrevSamples, prepcount, SrcData);
+ LoadSamples(SrcData + prepcount, SrcSamples, mChan.size(), mSrcType, toread);
+
+ /* Store as many prep samples for next time as possible, given the
+ * number of output samples being generated.
+ */
+ uint SrcDataEnd{(DstSize*increment + DataPosFrac)>>MixerFracBits};
+ if(SrcDataEnd >= static_cast<uint>(prepcount)+toread)
+ std::fill(std::begin(mChan[chan].PrevSamples),
+ std::end(mChan[chan].PrevSamples), 0.0f);
+ else
+ {
+ const size_t len{minz(al::size(mChan[chan].PrevSamples),
+ static_cast<uint>(prepcount)+toread-SrcDataEnd)};
+ std::copy_n(SrcData+SrcDataEnd, len, mChan[chan].PrevSamples);
+ std::fill(std::begin(mChan[chan].PrevSamples)+len,
+ std::end(mChan[chan].PrevSamples), 0.0f);
+ }
+
+ /* Now resample, and store the result in the output buffer. */
+ const float *ResampledData{mResample(&mState, SrcData+(MaxResamplerPadding>>1),
+ DataPosFrac, increment, {DstData, DstSize})};
+
+ StoreSamples(DstSamples, ResampledData, mChan.size(), mDstType, DstSize);
+ }
+
+ /* Update the number of prep samples still available, as well as the
+ * fractional offset.
+ */
+ DataPosFrac += increment*DstSize;
+ mSrcPrepCount = mini(prepcount + static_cast<int>(toread - (DataPosFrac>>MixerFracBits)),
+ MaxResamplerPadding);
+ mFracOffset = DataPosFrac & MixerFracMask;
+
+ /* Update the src and dst pointers in case there's still more to do. */
+ SamplesIn += SrcFrameSize*(DataPosFrac>>MixerFracBits);
+ NumSrcSamples -= minu(NumSrcSamples, (DataPosFrac>>MixerFracBits));
+
+ dst = static_cast<al::byte*>(dst) + DstFrameSize*DstSize;
+ pos += DstSize;
+ }
+
+ *src = SamplesIn;
+ *srcframes = NumSrcSamples;
+
+ return pos;
+}
+
+
+void ChannelConverter::convert(const void *src, float *dst, uint frames) const
+{
+ if(mDstChans == DevFmtMono)
+ {
+ const float scale{std::sqrt(1.0f / static_cast<float>(al::popcount(mChanMask)))};
+ switch(mSrcType)
+ {
+#define HANDLE_FMT(T) case T: Multi2Mono<T>(mChanMask, mSrcStep, scale, dst, src, frames); break
+ HANDLE_FMT(DevFmtByte);
+ HANDLE_FMT(DevFmtUByte);
+ HANDLE_FMT(DevFmtShort);
+ HANDLE_FMT(DevFmtUShort);
+ HANDLE_FMT(DevFmtInt);
+ HANDLE_FMT(DevFmtUInt);
+ HANDLE_FMT(DevFmtFloat);
+#undef HANDLE_FMT
+ }
+ }
+ else if(mChanMask == 0x1 && mDstChans == DevFmtStereo)
+ {
+ switch(mSrcType)
+ {
+#define HANDLE_FMT(T) case T: Mono2Stereo<T>(dst, src, frames); break
+ HANDLE_FMT(DevFmtByte);
+ HANDLE_FMT(DevFmtUByte);
+ HANDLE_FMT(DevFmtShort);
+ HANDLE_FMT(DevFmtUShort);
+ HANDLE_FMT(DevFmtInt);
+ HANDLE_FMT(DevFmtUInt);
+ HANDLE_FMT(DevFmtFloat);
+#undef HANDLE_FMT
+ }
+ }
+}
diff --git a/core/converter.h b/core/converter.h
new file mode 100644
index 00000000..2d22ae38
--- /dev/null
+++ b/core/converter.h
@@ -0,0 +1,59 @@
+#ifndef CORE_CONVERTER_H
+#define CORE_CONVERTER_H
+
+#include <cstddef>
+#include <memory>
+
+#include "almalloc.h"
+#include "devformat.h"
+#include "mixer/defs.h"
+
+using uint = unsigned int;
+
+
+struct SampleConverter {
+ DevFmtType mSrcType{};
+ DevFmtType mDstType{};
+ uint mSrcTypeSize{};
+ uint mDstTypeSize{};
+
+ int mSrcPrepCount{};
+
+ uint mFracOffset{};
+ uint mIncrement{};
+ InterpState mState{};
+ ResamplerFunc mResample{};
+
+ alignas(16) float mSrcSamples[BufferLineSize]{};
+ alignas(16) float mDstSamples[BufferLineSize]{};
+
+ struct ChanSamples {
+ alignas(16) float PrevSamples[MaxResamplerPadding];
+ };
+ al::FlexArray<ChanSamples> mChan;
+
+ SampleConverter(size_t numchans) : mChan{numchans} { }
+
+ uint convert(const void **src, uint *srcframes, void *dst, uint dstframes);
+ uint availableOut(uint srcframes) const;
+
+ DEF_FAM_NEWDEL(SampleConverter, mChan)
+};
+using SampleConverterPtr = std::unique_ptr<SampleConverter>;
+
+SampleConverterPtr CreateSampleConverter(DevFmtType srcType, DevFmtType dstType, size_t numchans,
+ uint srcRate, uint dstRate, Resampler resampler);
+
+
+struct ChannelConverter {
+ DevFmtType mSrcType{};
+ uint mSrcStep{};
+ uint mChanMask{};
+ DevFmtChannels mDstChans{};
+
+ bool is_active() const noexcept { return mChanMask != 0; }
+
+ void convert(const void *src, float *dst, uint frames) const;
+};
+
+#endif /* CORE_CONVERTER_H */
diff --git a/core/dbus_wrap.cpp b/core/dbus_wrap.cpp
new file mode 100644
index 00000000..506dd815
--- /dev/null
+++ b/core/dbus_wrap.cpp
@@ -0,0 +1,46 @@
+
+#include "config.h"
+
+#include "dbus_wrap.h"
+
+#ifdef HAVE_DYNLOAD
+
+#include <mutex>
+#include <type_traits>
+
+#include "logging.h"
+
+
+void *dbus_handle{nullptr};
+#define DECL_FUNC(x) decltype(x) *p##x{};
+DBUS_FUNCTIONS(DECL_FUNC)
+#undef DECL_FUNC
+
+void PrepareDBus()
+{
+ static constexpr char libname[] = "libdbus-1.so.3";
+
+ auto load_func = [](auto &f, const char *name) -> void
+ { f = reinterpret_cast<std::remove_reference_t<decltype(f)>>(GetSymbol(dbus_handle, name)); };
+#define LOAD_FUNC(x) do { \
+ load_func(p##x, #x); \
+ if(!p##x) \
+ { \
+ WARN("Failed to load function %s\n", #x); \
+ CloseLib(dbus_handle); \
+ dbus_handle = nullptr; \
+ return; \
+ } \
+} while(0);
+
+ dbus_handle = LoadLib(libname);
+ if(!dbus_handle)
+ {
+ WARN("Failed to load %s\n", libname);
+ return;
+ }
+
+DBUS_FUNCTIONS(LOAD_FUNC)
+#undef LOAD_FUNC
+}
+#endif
diff --git a/core/dbus_wrap.h b/core/dbus_wrap.h
new file mode 100644
index 00000000..61dbb971
--- /dev/null
+++ b/core/dbus_wrap.h
@@ -0,0 +1,75 @@
+#ifndef CORE_DBUS_WRAP_H
+#define CORE_DBUS_WRAP_H
+
+#include <memory>
+
+#include <dbus/dbus.h>
+
+#include "dynload.h"
+
+
+#define DBUS_FUNCTIONS(MAGIC) \
+MAGIC(dbus_error_init) \
+MAGIC(dbus_error_free) \
+MAGIC(dbus_bus_get) \
+MAGIC(dbus_connection_set_exit_on_disconnect) \
+MAGIC(dbus_connection_unref) \
+MAGIC(dbus_connection_send_with_reply_and_block) \
+MAGIC(dbus_message_unref) \
+MAGIC(dbus_message_new_method_call) \
+MAGIC(dbus_message_append_args) \
+MAGIC(dbus_message_iter_init) \
+MAGIC(dbus_message_iter_next) \
+MAGIC(dbus_message_iter_recurse) \
+MAGIC(dbus_message_iter_get_arg_type) \
+MAGIC(dbus_message_iter_get_basic) \
+MAGIC(dbus_set_error_from_message)
+
+#ifdef HAVE_DYNLOAD
+
+#include <mutex>
+
+extern void *dbus_handle;
+#define DECL_FUNC(x) extern decltype(x) *p##x;
+DBUS_FUNCTIONS(DECL_FUNC)
+#undef DECL_FUNC
+
+void PrepareDBus();
+
+inline auto HasDBus()
+{
+ static std::once_flag init_dbus{};
+ std::call_once(init_dbus, PrepareDBus);
+ return dbus_handle;
+}
+
+#else
+
+#define DECL_FUNC(x) constexpr auto p##x = &x;
+DBUS_FUNCTIONS(DECL_FUNC)
+#undef DECL_FUNC
+
+constexpr bool HasDBus() noexcept { return true; }
+#endif /* HAVE_DYNLOAD */
+
+
+namespace dbus {
+
+struct Error {
+ Error() { (*pdbus_error_init)(&mError); }
+ ~Error() { (*pdbus_error_free)(&mError); }
+ DBusError* operator->() { return &mError; }
+ DBusError &get() { return mError; }
+private:
+ DBusError mError{};
+};
+
+struct ConnectionDeleter {
+ void operator()(DBusConnection *c) { (*pdbus_connection_unref)(c); }
+};
+using ConnectionPtr = std::unique_ptr<DBusConnection,ConnectionDeleter>;
+
+} // namespace dbus
+
+
+#endif /* CORE_DBUS_WRAP_H */
diff --git a/core/device.cpp b/core/device.cpp
new file mode 100644
index 00000000..9705c0ac
--- /dev/null
+++ b/core/device.cpp
@@ -0,0 +1,7 @@
+
+#include "config.h"
+
+#include "device.h"
+
+
+al::FlexArray<ContextBase*> DeviceBase::sEmptyContextArray{0u};
diff --git a/core/device.h b/core/device.h
new file mode 100644
index 00000000..4cc822cc
--- /dev/null
+++ b/core/device.h
@@ -0,0 +1,290 @@
+#ifndef CORE_DEVICE_H
+#define CORE_DEVICE_H
+
+#include <stddef.h>
+
+#include <array>
+#include <atomic>
+#include <bitset>
+#include <chrono>
+#include <memory>
+#include <mutex>
+#include <string>
+
+#include "almalloc.h"
+#include "alspan.h"
+#include "ambidefs.h"
+#include "atomic.h"
+#include "core/bufferline.h"
+#include "devformat.h"
+#include "intrusive_ptr.h"
+#include "mixer/hrtfdefs.h"
+#include "opthelpers.h"
+#include "vector.h"
+
+struct BackendBase;
+class BFormatDec;
+struct bs2b;
+struct Compressor;
+struct ContextBase;
+struct DirectHrtfState;
+struct HrtfStore;
+struct UhjEncoder;
+
+using uint = unsigned int;
+
+
+#define MIN_OUTPUT_RATE 8000
+#define MAX_OUTPUT_RATE 192000
+#define DEFAULT_OUTPUT_RATE 44100
+
+#define DEFAULT_UPDATE_SIZE 882 /* 20ms */
+#define DEFAULT_NUM_UPDATES 3
+
+
+enum class DeviceType : unsigned char {
+ Playback,
+ Capture,
+ Loopback
+};
+
+
+enum class RenderMode : unsigned char {
+ Normal,
+ Pairwise,
+ Hrtf
+};
+
+
+struct InputRemixMap {
+ struct TargetMix { Channel channel; float mix; };
+
+ Channel channel;
+ std::array<TargetMix,2> targets;
+};
+
+
+/* Maximum delay in samples for speaker distance compensation. */
+#define MAX_DELAY_LENGTH 1024
+
+struct DistanceComp {
+ struct ChanData {
+ float Gain{1.0f};
+ uint Length{0u}; /* Valid range is [0...MAX_DELAY_LENGTH). */
+ float *Buffer{nullptr};
+ };
+
+ std::array<ChanData,MAX_OUTPUT_CHANNELS> mChannels;
+ al::FlexArray<float,16> mSamples;
+
+ DistanceComp(size_t count) : mSamples{count} { }
+
+ static std::unique_ptr<DistanceComp> Create(size_t numsamples)
+ { return std::unique_ptr<DistanceComp>{new(FamCount(numsamples)) DistanceComp{numsamples}}; }
+
+ DEF_FAM_NEWDEL(DistanceComp, mSamples)
+};
+
+
+struct BFChannelConfig {
+ float Scale;
+ uint Index;
+};
+
+
+struct MixParams {
+ /* Coefficient channel mapping for mixing to the buffer. */
+ std::array<BFChannelConfig,MAX_OUTPUT_CHANNELS> AmbiMap{};
+
+ al::span<FloatBufferLine> Buffer;
+};
+
+struct RealMixParams {
+ al::span<const InputRemixMap> RemixMap;
+ std::array<uint,MaxChannels> ChannelIndex{};
+
+ al::span<FloatBufferLine> Buffer;
+};
+
+enum {
+ // Frequency was requested by the app or config file
+ FrequencyRequest,
+ // Channel configuration was requested by the config file
+ ChannelsRequest,
+ // Sample type was requested by the config file
+ SampleTypeRequest,
+
+ // Specifies if the DSP is paused at user request
+ DevicePaused,
+ // Specifies if the device is currently running
+ DeviceRunning,
+
+ DeviceFlagsCount
+};
+
+struct DeviceBase {
+ /* To avoid extraneous allocations, a 0-sized FlexArray<ContextBase*> is
+ * defined globally as a sharable object.
+ */
+ static al::FlexArray<ContextBase*> sEmptyContextArray;
+
+ std::atomic<bool> Connected{true};
+ const DeviceType Type{};
+
+ uint Frequency{};
+ uint UpdateSize{};
+ uint BufferSize{};
+
+ DevFmtChannels FmtChans{};
+ DevFmtType FmtType{};
+ bool IsHeadphones{false};
+ uint mAmbiOrder{0};
+ float mXOverFreq{400.0f};
+ /* For DevFmtAmbi* output only, specifies the channel order and
+ * normalization.
+ */
+ DevAmbiLayout mAmbiLayout{DevAmbiLayout::Default};
+ DevAmbiScaling mAmbiScale{DevAmbiScaling::Default};
+
+ std::string DeviceName;
+
+ // Device flags
+ std::bitset<DeviceFlagsCount> Flags{};
+
+ uint NumAuxSends{};
+
+ /* Rendering mode. */
+ RenderMode mRenderMode{RenderMode::Normal};
+
+ /* The average speaker distance as determined by the ambdec configuration,
+ * HRTF data set, or the NFC-HOA reference delay. Only used for NFC.
+ */
+ float AvgSpeakerDist{0.0f};
+
+ uint SamplesDone{0u};
+ std::chrono::nanoseconds ClockBase{0};
+ std::chrono::nanoseconds FixedLatency{0};
+
+ /* Temp storage used for mixer processing. */
+ alignas(16) float ResampledData[BufferLineSize];
+ alignas(16) float FilteredData[BufferLineSize];
+ union {
+ alignas(16) float HrtfSourceData[BufferLineSize + HrtfHistoryLength];
+ alignas(16) float NfcSampleData[BufferLineSize];
+ };
+
+ /* Persistent storage for HRTF mixing. */
+ alignas(16) float2 HrtfAccumData[BufferLineSize + HrirLength + HrtfDirectDelay];
+
+ /* Mixing buffer used by the Dry mix and Real output. */
+ al::vector<FloatBufferLine, 16> MixBuffer;
+
+ /* The "dry" path corresponds to the main output. */
+ MixParams Dry;
+ uint NumChannelsPerOrder[MaxAmbiOrder+1]{};
+
+ /* "Real" output, which will be written to the device buffer. May alias the
+ * dry buffer.
+ */
+ RealMixParams RealOut;
+
+ /* HRTF state and info */
+ std::unique_ptr<DirectHrtfState> mHrtfState;
+ al::intrusive_ptr<HrtfStore> mHrtf;
+ uint mIrSize{0};
+
+ /* Ambisonic-to-UHJ encoder */
+ std::unique_ptr<UhjEncoder> mUhjEncoder;
+
+ /* Ambisonic decoder for speakers */
+ std::unique_ptr<BFormatDec> AmbiDecoder;
+
+ /* Stereo-to-binaural filter */
+ std::unique_ptr<bs2b> Bs2b;
+
+ using PostProc = void(DeviceBase::*)(const size_t SamplesToDo);
+ PostProc PostProcess{nullptr};
+
+ std::unique_ptr<Compressor> Limiter;
+
+ /* Delay buffers used to compensate for speaker distances. */
+ std::unique_ptr<DistanceComp> ChannelDelays;
+
+ /* Dithering control. */
+ float DitherDepth{0.0f};
+ uint DitherSeed{0u};
+
+ /* Running count of the mixer invocations, in 31.1 fixed point. This
+ * actually increments *twice* when mixing, first at the start and then at
+ * the end, so the bottom bit indicates if the device is currently mixing
+ * and the upper bits indicates how many mixes have been done.
+ */
+ RefCount MixCount{0u};
+
+ // Contexts created on this device
+ std::atomic<al::FlexArray<ContextBase*>*> mContexts{nullptr};
+
+ /* This lock protects the device state (format, update size, etc) from
+ * being from being changed in multiple threads, or being accessed while
+ * being changed. It's also used to serialize calls to the backend.
+ */
+ std::mutex StateLock;
+ std::unique_ptr<BackendBase> Backend;
+
+
+ DeviceBase(DeviceType type);
+ DeviceBase(const DeviceBase&) = delete;
+ DeviceBase& operator=(const DeviceBase&) = delete;
+ ~DeviceBase();
+
+ uint bytesFromFmt() const noexcept { return BytesFromDevFmt(FmtType); }
+ uint channelsFromFmt() const noexcept { return ChannelsFromDevFmt(FmtChans, mAmbiOrder); }
+ uint frameSizeFromFmt() const noexcept { return bytesFromFmt() * channelsFromFmt(); }
+
+ uint waitForMix() const noexcept
+ {
+ uint refcount;
+ while((refcount=MixCount.load(std::memory_order_acquire))&1) {
+ }
+ return refcount;
+ }
+
+ void ProcessHrtf(const size_t SamplesToDo);
+ void ProcessAmbiDec(const size_t SamplesToDo);
+ void ProcessAmbiDecStablized(const size_t SamplesToDo);
+ void ProcessUhj(const size_t SamplesToDo);
+ void ProcessBs2b(const size_t SamplesToDo);
+
+ inline void postProcess(const size_t SamplesToDo)
+ { if LIKELY(PostProcess) (this->*PostProcess)(SamplesToDo); }
+
+ void renderSamples(void *outBuffer, const uint numSamples, const size_t frameStep);
+
+ /* Caller must lock the device state, and the mixer must not be running. */
+#ifdef __USE_MINGW_ANSI_STDIO
+ [[gnu::format(gnu_printf,2,3)]]
+#else
+ [[gnu::format(printf,2,3)]]
+#endif
+ void handleDisconnect(const char *msg, ...);
+
+ DISABLE_ALLOC()
+};
+
+
+/* Must be less than 15 characters (16 including terminating null) for
+ * compatibility with pthread_setname_np limitations. */
+#define MIXER_THREAD_NAME "alsoft-mixer"
+
+#define RECORD_THREAD_NAME "alsoft-record"
+
+
+/**
+ * Returns the index for the given channel name (e.g. FrontCenter), or
+ * INVALID_CHANNEL_INDEX if it doesn't exist.
+ */
+inline uint GetChannelIdxByName(const RealMixParams &real, Channel chan) noexcept
+{ return real.ChannelIndex[chan]; }
+#define INVALID_CHANNEL_INDEX ~0u
+
+#endif /* CORE_DEVICE_H */
diff --git a/core/front_stablizer.h b/core/front_stablizer.h
new file mode 100644
index 00000000..3d328a8d
--- /dev/null
+++ b/core/front_stablizer.h
@@ -0,0 +1,36 @@
+#ifndef CORE_FRONT_STABLIZER_H
+#define CORE_FRONT_STABLIZER_H
+
+#include <array>
+#include <memory>
+
+#include "almalloc.h"
+#include "bufferline.h"
+#include "filters/splitter.h"
+
+
+struct FrontStablizer {
+ static constexpr size_t DelayLength{256u};
+
+ FrontStablizer(size_t numchans) : DelayBuf{numchans} { }
+
+ alignas(16) std::array<float,BufferLineSize + DelayLength> Side{};
+ alignas(16) std::array<float,BufferLineSize + DelayLength> MidDirect{};
+ alignas(16) std::array<float,DelayLength> MidDelay{};
+
+ alignas(16) std::array<float,BufferLineSize + DelayLength> TempBuf{};
+
+ BandSplitter MidFilter;
+ alignas(16) FloatBufferLine MidLF{};
+ alignas(16) FloatBufferLine MidHF{};
+
+ using DelayLine = std::array<float,DelayLength>;
+ al::FlexArray<DelayLine,16> DelayBuf;
+
+ static std::unique_ptr<FrontStablizer> Create(size_t numchans)
+ { return std::unique_ptr<FrontStablizer>{new(FamCount(numchans)) FrontStablizer{numchans}}; }
+
+ DEF_FAM_NEWDEL(FrontStablizer, DelayBuf)
+};
+
+#endif /* CORE_FRONT_STABLIZER_H */
diff --git a/core/helpers.cpp b/core/helpers.cpp
new file mode 100644
index 00000000..dcb785c9
--- /dev/null
+++ b/core/helpers.cpp
@@ -0,0 +1,514 @@
+
+#include "config.h"
+
+#include "helpers.h"
+
+#include <algorithm>
+#include <cerrno>
+#include <cstdarg>
+#include <cstdlib>
+#include <cstdio>
+#include <cstring>
+#include <mutex>
+#include <limits>
+#include <string>
+
+#include "almalloc.h"
+#include "alfstream.h"
+#include "aloptional.h"
+#include "alspan.h"
+#include "alstring.h"
+#include "logging.h"
+#include "strutils.h"
+#include "vector.h"
+
+
+/* Mixing thread piority level */
+int RTPrioLevel{1};
+
+/* Allow reducing the process's RTTime limit for RTKit. */
+bool AllowRTTimeLimit{true};
+
+
+#ifdef _WIN32
+
+#include <shlobj.h>
+
+const PathNamePair &GetProcBinary()
+{
+ static al::optional<PathNamePair> procbin;
+ if(procbin) return *procbin;
+
+ auto fullpath = al::vector<WCHAR>(256);
+ DWORD len{GetModuleFileNameW(nullptr, fullpath.data(), static_cast<DWORD>(fullpath.size()))};
+ while(len == fullpath.size())
+ {
+ fullpath.resize(fullpath.size() << 1);
+ len = GetModuleFileNameW(nullptr, fullpath.data(), static_cast<DWORD>(fullpath.size()));
+ }
+ if(len == 0)
+ {
+ ERR("Failed to get process name: error %lu\n", GetLastError());
+ procbin = al::make_optional<PathNamePair>();
+ return *procbin;
+ }
+
+ fullpath.resize(len);
+ if(fullpath.back() != 0)
+ fullpath.push_back(0);
+
+ auto sep = std::find(fullpath.rbegin()+1, fullpath.rend(), '\\');
+ sep = std::find(fullpath.rbegin()+1, sep, '/');
+ if(sep != fullpath.rend())
+ {
+ *sep = 0;
+ procbin = al::make_optional<PathNamePair>(wstr_to_utf8(fullpath.data()),
+ wstr_to_utf8(&*sep + 1));
+ }
+ else
+ procbin = al::make_optional<PathNamePair>(std::string{}, wstr_to_utf8(fullpath.data()));
+
+ TRACE("Got binary: %s, %s\n", procbin->path.c_str(), procbin->fname.c_str());
+ return *procbin;
+}
+
+namespace {
+
+void DirectorySearch(const char *path, const char *ext, al::vector<std::string> *const results)
+{
+ std::string pathstr{path};
+ pathstr += "\\*";
+ pathstr += ext;
+ TRACE("Searching %s\n", pathstr.c_str());
+
+ std::wstring wpath{utf8_to_wstr(pathstr.c_str())};
+ WIN32_FIND_DATAW fdata;
+ HANDLE hdl{FindFirstFileW(wpath.c_str(), &fdata)};
+ if(hdl == INVALID_HANDLE_VALUE) return;
+
+ const auto base = results->size();
+
+ do {
+ results->emplace_back();
+ std::string &str = results->back();
+ str = path;
+ str += '\\';
+ str += wstr_to_utf8(fdata.cFileName);
+ } while(FindNextFileW(hdl, &fdata));
+ FindClose(hdl);
+
+ const al::span<std::string> newlist{results->data()+base, results->size()-base};
+ std::sort(newlist.begin(), newlist.end());
+ for(const auto &name : newlist)
+ TRACE(" got %s\n", name.c_str());
+}
+
+} // namespace
+
+al::vector<std::string> SearchDataFiles(const char *ext, const char *subdir)
+{
+ auto is_slash = [](int c) noexcept -> int { return (c == '\\' || c == '/'); };
+
+ static std::mutex search_lock;
+ std::lock_guard<std::mutex> _{search_lock};
+
+ /* If the path is absolute, use it directly. */
+ al::vector<std::string> results;
+ if(isalpha(subdir[0]) && subdir[1] == ':' && is_slash(subdir[2]))
+ {
+ std::string path{subdir};
+ std::replace(path.begin(), path.end(), '/', '\\');
+ DirectorySearch(path.c_str(), ext, &results);
+ return results;
+ }
+ if(subdir[0] == '\\' && subdir[1] == '\\' && subdir[2] == '?' && subdir[3] == '\\')
+ {
+ DirectorySearch(subdir, ext, &results);
+ return results;
+ }
+
+ std::string path;
+
+ /* Search the app-local directory. */
+ if(auto localpath = al::getenv(L"ALSOFT_LOCAL_PATH"))
+ {
+ path = wstr_to_utf8(localpath->c_str());
+ if(is_slash(path.back()))
+ path.pop_back();
+ }
+ else if(WCHAR *cwdbuf{_wgetcwd(nullptr, 0)})
+ {
+ path = wstr_to_utf8(cwdbuf);
+ if(is_slash(path.back()))
+ path.pop_back();
+ free(cwdbuf);
+ }
+ else
+ path = ".";
+ std::replace(path.begin(), path.end(), '/', '\\');
+ DirectorySearch(path.c_str(), ext, &results);
+
+ /* Search the local and global data dirs. */
+ static const int ids[2]{ CSIDL_APPDATA, CSIDL_COMMON_APPDATA };
+ for(int id : ids)
+ {
+ WCHAR buffer[MAX_PATH];
+ if(SHGetSpecialFolderPathW(nullptr, buffer, id, FALSE) == FALSE)
+ continue;
+
+ path = wstr_to_utf8(buffer);
+ if(!is_slash(path.back()))
+ path += '\\';
+ path += subdir;
+ std::replace(path.begin(), path.end(), '/', '\\');
+
+ DirectorySearch(path.c_str(), ext, &results);
+ }
+
+ return results;
+}
+
+void SetRTPriority(void)
+{
+ if(RTPrioLevel > 0)
+ {
+ if(!SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL))
+ ERR("Failed to set priority level for thread\n");
+ }
+}
+
+#else
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <dirent.h>
+#ifdef __FreeBSD__
+#include <sys/sysctl.h>
+#endif
+#ifdef __HAIKU__
+#include <FindDirectory.h>
+#endif
+#ifdef HAVE_PROC_PIDPATH
+#include <libproc.h>
+#endif
+#if defined(HAVE_PTHREAD_SETSCHEDPARAM) && !defined(__OpenBSD__)
+#include <pthread.h>
+#include <sched.h>
+#endif
+#ifdef HAVE_RTKIT
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include "dbus_wrap.h"
+#include "rtkit.h"
+#ifndef RLIMIT_RTTIME
+#define RLIMIT_RTTIME 15
+#endif
+#endif
+
+const PathNamePair &GetProcBinary()
+{
+ static al::optional<PathNamePair> procbin;
+ if(procbin) return *procbin;
+
+ al::vector<char> pathname;
+#ifdef __FreeBSD__
+ size_t pathlen;
+ int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 };
+ if(sysctl(mib, 4, nullptr, &pathlen, nullptr, 0) == -1)
+ WARN("Failed to sysctl kern.proc.pathname: %s\n", strerror(errno));
+ else
+ {
+ pathname.resize(pathlen + 1);
+ sysctl(mib, 4, pathname.data(), &pathlen, nullptr, 0);
+ pathname.resize(pathlen);
+ }
+#endif
+#ifdef HAVE_PROC_PIDPATH
+ if(pathname.empty())
+ {
+ char procpath[PROC_PIDPATHINFO_MAXSIZE]{};
+ const pid_t pid{getpid()};
+ if(proc_pidpath(pid, procpath, sizeof(procpath)) < 1)
+ ERR("proc_pidpath(%d, ...) failed: %s\n", pid, strerror(errno));
+ else
+ pathname.insert(pathname.end(), procpath, procpath+strlen(procpath));
+ }
+#endif
+#ifdef __HAIKU__
+ if(pathname.empty())
+ {
+ char procpath[PATH_MAX];
+ if(find_path(B_APP_IMAGE_SYMBOL, B_FIND_PATH_IMAGE_PATH, NULL, procpath, sizeof(procpath)) == B_OK)
+ pathname.insert(pathname.end(), procpath, procpath+strlen(procpath));
+ }
+#endif
+#ifndef __SWITCH__
+ if(pathname.empty())
+ {
+ static const char SelfLinkNames[][32]{
+ "/proc/self/exe",
+ "/proc/self/file",
+ "/proc/curproc/exe",
+ "/proc/curproc/file"
+ };
+
+ pathname.resize(256);
+
+ const char *selfname{};
+ ssize_t len{};
+ for(const char *name : SelfLinkNames)
+ {
+ selfname = name;
+ len = readlink(selfname, pathname.data(), pathname.size());
+ if(len >= 0 || errno != ENOENT) break;
+ }
+
+ while(len > 0 && static_cast<size_t>(len) == pathname.size())
+ {
+ pathname.resize(pathname.size() << 1);
+ len = readlink(selfname, pathname.data(), pathname.size());
+ }
+ if(len <= 0)
+ {
+ WARN("Failed to readlink %s: %s\n", selfname, strerror(errno));
+ len = 0;
+ }
+
+ pathname.resize(static_cast<size_t>(len));
+ }
+#endif
+ while(!pathname.empty() && pathname.back() == 0)
+ pathname.pop_back();
+
+ auto sep = std::find(pathname.crbegin(), pathname.crend(), '/');
+ if(sep != pathname.crend())
+ procbin = al::make_optional<PathNamePair>(std::string(pathname.cbegin(), sep.base()-1),
+ std::string(sep.base(), pathname.cend()));
+ else
+ procbin = al::make_optional<PathNamePair>(std::string{},
+ std::string(pathname.cbegin(), pathname.cend()));
+
+ TRACE("Got binary: \"%s\", \"%s\"\n", procbin->path.c_str(), procbin->fname.c_str());
+ return *procbin;
+}
+
+namespace {
+
+void DirectorySearch(const char *path, const char *ext, al::vector<std::string> *const results)
+{
+ TRACE("Searching %s for *%s\n", path, ext);
+ DIR *dir{opendir(path)};
+ if(!dir) return;
+
+ const auto base = results->size();
+ const size_t extlen{strlen(ext)};
+
+ while(struct dirent *dirent{readdir(dir)})
+ {
+ if(strcmp(dirent->d_name, ".") == 0 || strcmp(dirent->d_name, "..") == 0)
+ continue;
+
+ const size_t len{strlen(dirent->d_name)};
+ if(len <= extlen) continue;
+ if(al::strcasecmp(dirent->d_name+len-extlen, ext) != 0)
+ continue;
+
+ results->emplace_back();
+ std::string &str = results->back();
+ str = path;
+ if(str.back() != '/')
+ str.push_back('/');
+ str += dirent->d_name;
+ }
+ closedir(dir);
+
+ const al::span<std::string> newlist{results->data()+base, results->size()-base};
+ std::sort(newlist.begin(), newlist.end());
+ for(const auto &name : newlist)
+ TRACE(" got %s\n", name.c_str());
+}
+
+} // namespace
+
+al::vector<std::string> SearchDataFiles(const char *ext, const char *subdir)
+{
+ static std::mutex search_lock;
+ std::lock_guard<std::mutex> _{search_lock};
+
+ al::vector<std::string> results;
+ if(subdir[0] == '/')
+ {
+ DirectorySearch(subdir, ext, &results);
+ return results;
+ }
+
+ /* Search the app-local directory. */
+ if(auto localpath = al::getenv("ALSOFT_LOCAL_PATH"))
+ DirectorySearch(localpath->c_str(), ext, &results);
+ else
+ {
+ al::vector<char> cwdbuf(256);
+ while(!getcwd(cwdbuf.data(), cwdbuf.size()))
+ {
+ if(errno != ERANGE)
+ {
+ cwdbuf.clear();
+ break;
+ }
+ cwdbuf.resize(cwdbuf.size() << 1);
+ }
+ if(cwdbuf.empty())
+ DirectorySearch(".", ext, &results);
+ else
+ {
+ DirectorySearch(cwdbuf.data(), ext, &results);
+ cwdbuf.clear();
+ }
+ }
+
+ // Search local data dir
+ if(auto datapath = al::getenv("XDG_DATA_HOME"))
+ {
+ std::string &path = *datapath;
+ if(path.back() != '/')
+ path += '/';
+ path += subdir;
+ DirectorySearch(path.c_str(), ext, &results);
+ }
+ else if(auto homepath = al::getenv("HOME"))
+ {
+ std::string &path = *homepath;
+ if(path.back() == '/')
+ path.pop_back();
+ path += "/.local/share/";
+ path += subdir;
+ DirectorySearch(path.c_str(), ext, &results);
+ }
+
+ // Search global data dirs
+ std::string datadirs{al::getenv("XDG_DATA_DIRS").value_or("/usr/local/share/:/usr/share/")};
+
+ size_t curpos{0u};
+ while(curpos < datadirs.size())
+ {
+ size_t nextpos{datadirs.find(':', curpos)};
+
+ std::string path{(nextpos != std::string::npos) ?
+ datadirs.substr(curpos, nextpos++ - curpos) : datadirs.substr(curpos)};
+ curpos = nextpos;
+
+ if(path.empty()) continue;
+ if(path.back() != '/')
+ path += '/';
+ path += subdir;
+
+ DirectorySearch(path.c_str(), ext, &results);
+ }
+
+ return results;
+}
+
+void SetRTPriority()
+{
+ if(RTPrioLevel <= 0)
+ return;
+
+ int err{-ENOTSUP};
+#if defined(HAVE_PTHREAD_SETSCHEDPARAM) && !defined(__OpenBSD__)
+ struct sched_param param{};
+ /* Use the minimum real-time priority possible for now (on Linux this
+ * should be 1 for SCHED_RR).
+ */
+ param.sched_priority = sched_get_priority_min(SCHED_RR);
+#ifdef SCHED_RESET_ON_FORK
+ err = pthread_setschedparam(pthread_self(), SCHED_RR|SCHED_RESET_ON_FORK, &param);
+ if(err == EINVAL)
+#endif
+ err = pthread_setschedparam(pthread_self(), SCHED_RR, &param);
+ if(err == 0) return;
+
+ WARN("pthread_setschedparam failed: %s (%d)\n", std::strerror(err), err);
+#endif
+#ifdef HAVE_RTKIT
+ if(HasDBus())
+ {
+ dbus::Error error;
+ if(dbus::ConnectionPtr conn{(*pdbus_bus_get)(DBUS_BUS_SYSTEM, &error.get())})
+ {
+ using ulonglong = unsigned long long;
+ auto limit_rttime = [](DBusConnection *c) -> int
+ {
+ long long maxrttime{rtkit_get_rttime_usec_max(c)};
+ if(maxrttime <= 0) return static_cast<int>(std::abs(maxrttime));
+ const ulonglong umaxtime{static_cast<ulonglong>(maxrttime)};
+
+ struct rlimit rlim{};
+ if(getrlimit(RLIMIT_RTTIME, &rlim) != 0)
+ return errno;
+ TRACE("RTTime max: %llu (hard: %llu, soft: %llu)\n", umaxtime,
+ ulonglong{rlim.rlim_max}, ulonglong{rlim.rlim_cur});
+ if(rlim.rlim_max > umaxtime)
+ {
+ rlim.rlim_max = static_cast<rlim_t>(std::min<ulonglong>(umaxtime,
+ std::numeric_limits<rlim_t>::max()));
+ rlim.rlim_cur = std::min(rlim.rlim_cur, rlim.rlim_max);
+ if(setrlimit(RLIMIT_RTTIME, &rlim) != 0)
+ return errno;
+ }
+ return 0;
+ };
+
+ /* Don't stupidly exit if the connection dies while doing this. */
+ (*pdbus_connection_set_exit_on_disconnect)(conn.get(), false);
+
+ int nicemin{};
+ err = rtkit_get_min_nice_level(conn.get(), &nicemin);
+ if(err == -ENOENT)
+ {
+ err = std::abs(err);
+ ERR("Could not query RTKit: %s (%d)\n", std::strerror(err), err);
+ return;
+ }
+ int rtmax{rtkit_get_max_realtime_priority(conn.get())};
+ TRACE("Maximum real-time priority: %d, minimum niceness: %d\n", rtmax, nicemin);
+
+ err = EINVAL;
+ if(rtmax > 0)
+ {
+ if(AllowRTTimeLimit)
+ {
+ err = limit_rttime(conn.get());
+ if(err != 0)
+ WARN("Failed to set RLIMIT_RTTIME for RTKit: %s (%d)\n",
+ std::strerror(err), err);
+ }
+
+ /* Use half the maximum real-time priority allowed. */
+ TRACE("Making real-time with priority %d\n", (rtmax+1)/2);
+ err = rtkit_make_realtime(conn.get(), 0, (rtmax+1)/2);
+ if(err == 0) return;
+
+ err = std::abs(err);
+ WARN("Failed to set real-time priority: %s (%d)\n", std::strerror(err), err);
+ }
+ if(nicemin < 0)
+ {
+ TRACE("Making high priority with niceness %d\n", nicemin);
+ err = rtkit_make_high_priority(conn.get(), 0, nicemin);
+ if(err == 0) return;
+
+ err = std::abs(err);
+ WARN("Failed to set high priority: %s (%d)\n", std::strerror(err), err);
+ }
+ }
+ else
+ WARN("D-Bus connection failed with %s: %s\n", error->name, error->message);
+ }
+ else
+ WARN("D-Bus not available\n");
+#endif
+ ERR("Could not set elevated priority: %s (%d)\n", std::strerror(err), err);
+}
+
+#endif
diff --git a/core/helpers.h b/core/helpers.h
new file mode 100644
index 00000000..f0bfcf1b
--- /dev/null
+++ b/core/helpers.h
@@ -0,0 +1,18 @@
+#ifndef CORE_HELPERS_H
+#define CORE_HELPERS_H
+
+#include <string>
+
+#include "vector.h"
+
+
+struct PathNamePair { std::string path, fname; };
+const PathNamePair &GetProcBinary(void);
+
+extern int RTPrioLevel;
+extern bool AllowRTTimeLimit;
+void SetRTPriority(void);
+
+al::vector<std::string> SearchDataFiles(const char *match, const char *subdir);
+
+#endif /* CORE_HELPERS_H */
diff --git a/core/hrtf.cpp b/core/hrtf.cpp
new file mode 100644
index 00000000..e0ab8f0a
--- /dev/null
+++ b/core/hrtf.cpp
@@ -0,0 +1,1447 @@
+
+#include "config.h"
+
+#include "hrtf.h"
+
+#include <algorithm>
+#include <array>
+#include <cassert>
+#include <cctype>
+#include <cmath>
+#include <cstdint>
+#include <cstdio>
+#include <cstring>
+#include <fstream>
+#include <iterator>
+#include <memory>
+#include <mutex>
+#include <numeric>
+#include <type_traits>
+#include <utility>
+
+#include "albit.h"
+#include "albyte.h"
+#include "alfstream.h"
+#include "almalloc.h"
+#include "alnumeric.h"
+#include "aloptional.h"
+#include "alspan.h"
+#include "ambidefs.h"
+#include "filters/splitter.h"
+#include "helpers.h"
+#include "logging.h"
+#include "math_defs.h"
+#include "mixer/hrtfdefs.h"
+#include "opthelpers.h"
+#include "polyphase_resampler.h"
+#include "vector.h"
+
+
+namespace {
+
+struct HrtfEntry {
+ std::string mDispName;
+ std::string mFilename;
+};
+
+struct LoadedHrtf {
+ std::string mFilename;
+ std::unique_ptr<HrtfStore> mEntry;
+};
+
+/* Data set limits must be the same as or more flexible than those defined in
+ * the makemhr utility.
+ */
+constexpr uint MinFdCount{1};
+constexpr uint MaxFdCount{16};
+
+constexpr uint MinFdDistance{50};
+constexpr uint MaxFdDistance{2500};
+
+constexpr uint MinEvCount{5};
+constexpr uint MaxEvCount{181};
+
+constexpr uint MinAzCount{1};
+constexpr uint MaxAzCount{255};
+
+constexpr uint MaxHrirDelay{HrtfHistoryLength - 1};
+
+constexpr uint HrirDelayFracBits{2};
+constexpr uint HrirDelayFracOne{1 << HrirDelayFracBits};
+constexpr uint HrirDelayFracHalf{HrirDelayFracOne >> 1};
+
+static_assert(MaxHrirDelay*HrirDelayFracOne < 256, "MAX_HRIR_DELAY or DELAY_FRAC too large");
+
+constexpr char magicMarker00[8]{'M','i','n','P','H','R','0','0'};
+constexpr char magicMarker01[8]{'M','i','n','P','H','R','0','1'};
+constexpr char magicMarker02[8]{'M','i','n','P','H','R','0','2'};
+constexpr char magicMarker03[8]{'M','i','n','P','H','R','0','3'};
+
+/* First value for pass-through coefficients (remaining are 0), used for omni-
+ * directional sounds. */
+constexpr float PassthruCoeff{0.707106781187f/*sqrt(0.5)*/};
+
+std::mutex LoadedHrtfLock;
+al::vector<LoadedHrtf> LoadedHrtfs;
+
+std::mutex EnumeratedHrtfLock;
+al::vector<HrtfEntry> EnumeratedHrtfs;
+
+
+class databuf final : public std::streambuf {
+ int_type underflow() override
+ { return traits_type::eof(); }
+
+ pos_type seekoff(off_type offset, std::ios_base::seekdir whence, std::ios_base::openmode mode) override
+ {
+ if((mode&std::ios_base::out) || !(mode&std::ios_base::in))
+ return traits_type::eof();
+
+ char_type *cur;
+ switch(whence)
+ {
+ case std::ios_base::beg:
+ if(offset < 0 || offset > egptr()-eback())
+ return traits_type::eof();
+ cur = eback() + offset;
+ break;
+
+ case std::ios_base::cur:
+ if((offset >= 0 && offset > egptr()-gptr()) ||
+ (offset < 0 && -offset > gptr()-eback()))
+ return traits_type::eof();
+ cur = gptr() + offset;
+ break;
+
+ case std::ios_base::end:
+ if(offset > 0 || -offset > egptr()-eback())
+ return traits_type::eof();
+ cur = egptr() + offset;
+ break;
+
+ default:
+ return traits_type::eof();
+ }
+
+ setg(eback(), cur, egptr());
+ return cur - eback();
+ }
+
+ pos_type seekpos(pos_type pos, std::ios_base::openmode mode) override
+ {
+ // Simplified version of seekoff
+ if((mode&std::ios_base::out) || !(mode&std::ios_base::in))
+ return traits_type::eof();
+
+ if(pos < 0 || pos > egptr()-eback())
+ return traits_type::eof();
+
+ setg(eback(), eback() + static_cast<size_t>(pos), egptr());
+ return pos;
+ }
+
+public:
+ databuf(const char_type *start_, const char_type *end_) noexcept
+ {
+ setg(const_cast<char_type*>(start_), const_cast<char_type*>(start_),
+ const_cast<char_type*>(end_));
+ }
+};
+
+class idstream final : public std::istream {
+ databuf mStreamBuf;
+
+public:
+ idstream(const char *start_, const char *end_)
+ : std::istream{nullptr}, mStreamBuf{start_, end_}
+ { init(&mStreamBuf); }
+};
+
+
+struct IdxBlend { uint idx; float blend; };
+/* Calculate the elevation index given the polar elevation in radians. This
+ * will return an index between 0 and (evcount - 1).
+ */
+IdxBlend CalcEvIndex(uint evcount, float ev)
+{
+ ev = (al::MathDefs<float>::Pi()*0.5f + ev) * static_cast<float>(evcount-1) /
+ al::MathDefs<float>::Pi();
+ uint idx{float2uint(ev)};
+
+ return IdxBlend{minu(idx, evcount-1), ev-static_cast<float>(idx)};
+}
+
+/* Calculate the azimuth index given the polar azimuth in radians. This will
+ * return an index between 0 and (azcount - 1).
+ */
+IdxBlend CalcAzIndex(uint azcount, float az)
+{
+ az = (al::MathDefs<float>::Tau()+az) * static_cast<float>(azcount) /
+ al::MathDefs<float>::Tau();
+ uint idx{float2uint(az)};
+
+ return IdxBlend{idx%azcount, az-static_cast<float>(idx)};
+}
+
+} // namespace
+
+
+/* Calculates static HRIR coefficients and delays for the given polar elevation
+ * and azimuth in radians. The coefficients are normalized.
+ */
+void GetHrtfCoeffs(const HrtfStore *Hrtf, float elevation, float azimuth, float distance,
+ float spread, HrirArray &coeffs, const al::span<uint,2> delays)
+{
+ const float dirfact{1.0f - (spread / al::MathDefs<float>::Tau())};
+
+ const auto *field = Hrtf->field;
+ const auto *field_end = field + Hrtf->fdCount-1;
+ size_t ebase{0};
+ while(distance < field->distance && field != field_end)
+ {
+ ebase += field->evCount;
+ ++field;
+ }
+
+ /* Calculate the elevation indices. */
+ const auto elev0 = CalcEvIndex(field->evCount, elevation);
+ const size_t elev1_idx{minu(elev0.idx+1, field->evCount-1)};
+ const size_t ir0offset{Hrtf->elev[ebase + elev0.idx].irOffset};
+ const size_t ir1offset{Hrtf->elev[ebase + elev1_idx].irOffset};
+
+ /* Calculate azimuth indices. */
+ const auto az0 = CalcAzIndex(Hrtf->elev[ebase + elev0.idx].azCount, azimuth);
+ const auto az1 = CalcAzIndex(Hrtf->elev[ebase + elev1_idx].azCount, azimuth);
+
+ /* Calculate the HRIR indices to blend. */
+ const size_t idx[4]{
+ ir0offset + az0.idx,
+ ir0offset + ((az0.idx+1) % Hrtf->elev[ebase + elev0.idx].azCount),
+ ir1offset + az1.idx,
+ ir1offset + ((az1.idx+1) % Hrtf->elev[ebase + elev1_idx].azCount)
+ };
+
+ /* Calculate bilinear blending weights, attenuated according to the
+ * directional panning factor.
+ */
+ const float blend[4]{
+ (1.0f-elev0.blend) * (1.0f-az0.blend) * dirfact,
+ (1.0f-elev0.blend) * ( az0.blend) * dirfact,
+ ( elev0.blend) * (1.0f-az1.blend) * dirfact,
+ ( elev0.blend) * ( az1.blend) * dirfact
+ };
+
+ /* Calculate the blended HRIR delays. */
+ float d{Hrtf->delays[idx[0]][0]*blend[0] + Hrtf->delays[idx[1]][0]*blend[1] +
+ Hrtf->delays[idx[2]][0]*blend[2] + Hrtf->delays[idx[3]][0]*blend[3]};
+ delays[0] = fastf2u(d * float{1.0f/HrirDelayFracOne});
+ d = Hrtf->delays[idx[0]][1]*blend[0] + Hrtf->delays[idx[1]][1]*blend[1] +
+ Hrtf->delays[idx[2]][1]*blend[2] + Hrtf->delays[idx[3]][1]*blend[3];
+ delays[1] = fastf2u(d * float{1.0f/HrirDelayFracOne});
+
+ /* Calculate the blended HRIR coefficients. */
+ float *coeffout{al::assume_aligned<16>(&coeffs[0][0])};
+ coeffout[0] = PassthruCoeff * (1.0f-dirfact);
+ coeffout[1] = PassthruCoeff * (1.0f-dirfact);
+ std::fill_n(coeffout+2, size_t{HrirLength-1}*2, 0.0f);
+ for(size_t c{0};c < 4;c++)
+ {
+ const float *srccoeffs{al::assume_aligned<16>(Hrtf->coeffs[idx[c]][0].data())};
+ const float mult{blend[c]};
+ auto blend_coeffs = [mult](const float src, const float coeff) noexcept -> float
+ { return src*mult + coeff; };
+ std::transform(srccoeffs, srccoeffs + HrirLength*2, coeffout, coeffout, blend_coeffs);
+ }
+}
+
+
+std::unique_ptr<DirectHrtfState> DirectHrtfState::Create(size_t num_chans)
+{ return std::unique_ptr<DirectHrtfState>{new(FamCount(num_chans)) DirectHrtfState{num_chans}}; }
+
+void DirectHrtfState::build(const HrtfStore *Hrtf, const uint irSize,
+ const al::span<const AngularPoint> AmbiPoints, const float (*AmbiMatrix)[MaxAmbiChannels],
+ const float XOverFreq, const al::span<const float,MaxAmbiOrder+1> AmbiOrderHFGain)
+{
+ using double2 = std::array<double,2>;
+ struct ImpulseResponse {
+ const ConstHrirSpan hrir;
+ uint ldelay, rdelay;
+ };
+
+ const double xover_norm{double{XOverFreq} / Hrtf->sampleRate};
+ for(size_t i{0};i < mChannels.size();++i)
+ {
+ const size_t order{AmbiIndex::OrderFromChannel()[i]};
+ mChannels[i].mSplitter.init(static_cast<float>(xover_norm));
+ mChannels[i].mHfScale = AmbiOrderHFGain[order];
+ }
+
+ uint min_delay{HrtfHistoryLength*HrirDelayFracOne}, max_delay{0};
+ al::vector<ImpulseResponse> impres; impres.reserve(AmbiPoints.size());
+ auto calc_res = [Hrtf,&max_delay,&min_delay](const AngularPoint &pt) -> ImpulseResponse
+ {
+ auto &field = Hrtf->field[0];
+ const auto elev0 = CalcEvIndex(field.evCount, pt.Elev.value);
+ const size_t elev1_idx{minu(elev0.idx+1, field.evCount-1)};
+ const size_t ir0offset{Hrtf->elev[elev0.idx].irOffset};
+ const size_t ir1offset{Hrtf->elev[elev1_idx].irOffset};
+
+ const auto az0 = CalcAzIndex(Hrtf->elev[elev0.idx].azCount, pt.Azim.value);
+ const auto az1 = CalcAzIndex(Hrtf->elev[elev1_idx].azCount, pt.Azim.value);
+
+ const size_t idx[4]{
+ ir0offset + az0.idx,
+ ir0offset + ((az0.idx+1) % Hrtf->elev[elev0.idx].azCount),
+ ir1offset + az1.idx,
+ ir1offset + ((az1.idx+1) % Hrtf->elev[elev1_idx].azCount)
+ };
+
+ const std::array<double,4> blend{{
+ (1.0-elev0.blend) * (1.0-az0.blend),
+ (1.0-elev0.blend) * ( az0.blend),
+ ( elev0.blend) * (1.0-az1.blend),
+ ( elev0.blend) * ( az1.blend)
+ }};
+
+ /* The largest blend factor serves as the closest HRIR. */
+ const size_t irOffset{idx[std::max_element(blend.begin(), blend.end()) - blend.begin()]};
+ ImpulseResponse res{Hrtf->coeffs[irOffset],
+ Hrtf->delays[irOffset][0], Hrtf->delays[irOffset][1]};
+
+ min_delay = minu(min_delay, minu(res.ldelay, res.rdelay));
+ max_delay = maxu(max_delay, maxu(res.ldelay, res.rdelay));
+
+ return res;
+ };
+ std::transform(AmbiPoints.begin(), AmbiPoints.end(), std::back_inserter(impres), calc_res);
+ auto hrir_delay_round = [](const uint d) noexcept -> uint
+ { return (d+HrirDelayFracHalf) >> HrirDelayFracBits; };
+
+ TRACE("Min delay: %.2f, max delay: %.2f, FIR length: %u\n",
+ min_delay/double{HrirDelayFracOne}, max_delay/double{HrirDelayFracOne}, irSize);
+
+ const bool per_hrir_min{mChannels.size() > AmbiChannelsFromOrder(1)};
+ auto tmpres = al::vector<std::array<double2,HrirLength>>(mChannels.size());
+ max_delay = 0;
+ for(size_t c{0u};c < AmbiPoints.size();++c)
+ {
+ const ConstHrirSpan hrir{impres[c].hrir};
+ const uint base_delay{per_hrir_min ? minu(impres[c].ldelay, impres[c].rdelay) : min_delay};
+ const uint ldelay{hrir_delay_round(impres[c].ldelay - base_delay)};
+ const uint rdelay{hrir_delay_round(impres[c].rdelay - base_delay)};
+ max_delay = maxu(max_delay, maxu(impres[c].ldelay, impres[c].rdelay) - base_delay);
+
+ for(size_t i{0u};i < mChannels.size();++i)
+ {
+ const double mult{AmbiMatrix[c][i]};
+ const size_t numirs{HrirLength - maxz(ldelay, rdelay)};
+ size_t lidx{ldelay}, ridx{rdelay};
+ for(size_t j{0};j < numirs;++j)
+ {
+ tmpres[i][lidx++][0] += hrir[j][0] * mult;
+ tmpres[i][ridx++][1] += hrir[j][1] * mult;
+ }
+ }
+ }
+ impres.clear();
+
+ for(size_t i{0u};i < mChannels.size();++i)
+ {
+ auto copy_arr = [](const double2 &in) noexcept -> float2
+ { return float2{{static_cast<float>(in[0]), static_cast<float>(in[1])}}; };
+ std::transform(tmpres[i].cbegin(), tmpres[i].cend(), mChannels[i].mCoeffs.begin(),
+ copy_arr);
+ }
+ tmpres.clear();
+
+ const uint max_length{minu(hrir_delay_round(max_delay) + irSize, HrirLength)};
+ TRACE("New max delay: %.2f, FIR length: %u\n", max_delay/double{HrirDelayFracOne},
+ max_length);
+ mIrSize = max_length;
+}
+
+
+namespace {
+
+std::unique_ptr<HrtfStore> CreateHrtfStore(uint rate, ushort irSize,
+ const al::span<const HrtfStore::Field> fields,
+ const al::span<const HrtfStore::Elevation> elevs, const HrirArray *coeffs,
+ const ubyte2 *delays, const char *filename)
+{
+ std::unique_ptr<HrtfStore> Hrtf;
+
+ const size_t irCount{size_t{elevs.back().azCount} + elevs.back().irOffset};
+ size_t total{sizeof(HrtfStore)};
+ total = RoundUp(total, alignof(HrtfStore::Field)); /* Align for field infos */
+ total += sizeof(HrtfStore::Field)*fields.size();
+ total = RoundUp(total, alignof(HrtfStore::Elevation)); /* Align for elevation infos */
+ total += sizeof(Hrtf->elev[0])*elevs.size();
+ total = RoundUp(total, 16); /* Align for coefficients using SIMD */
+ total += sizeof(Hrtf->coeffs[0])*irCount;
+ total += sizeof(Hrtf->delays[0])*irCount;
+
+ Hrtf.reset(new (al_calloc(16, total)) HrtfStore{});
+ if(!Hrtf)
+ ERR("Out of memory allocating storage for %s.\n", filename);
+ else
+ {
+ InitRef(Hrtf->mRef, 1u);
+ Hrtf->sampleRate = rate;
+ Hrtf->irSize = irSize;
+ Hrtf->fdCount = static_cast<uint>(fields.size());
+
+ /* Set up pointers to storage following the main HRTF struct. */
+ char *base = reinterpret_cast<char*>(Hrtf.get());
+ size_t offset{sizeof(HrtfStore)};
+
+ offset = RoundUp(offset, alignof(HrtfStore::Field)); /* Align for field infos */
+ auto field_ = reinterpret_cast<HrtfStore::Field*>(base + offset);
+ offset += sizeof(field_[0])*fields.size();
+
+ offset = RoundUp(offset, alignof(HrtfStore::Elevation)); /* Align for elevation infos */
+ auto elev_ = reinterpret_cast<HrtfStore::Elevation*>(base + offset);
+ offset += sizeof(elev_[0])*elevs.size();
+
+ offset = RoundUp(offset, 16); /* Align for coefficients using SIMD */
+ auto coeffs_ = reinterpret_cast<HrirArray*>(base + offset);
+ offset += sizeof(coeffs_[0])*irCount;
+
+ auto delays_ = reinterpret_cast<ubyte2*>(base + offset);
+ offset += sizeof(delays_[0])*irCount;
+
+ assert(offset == total);
+
+ /* Copy input data to storage. */
+ std::copy(fields.cbegin(), fields.cend(), field_);
+ std::copy(elevs.cbegin(), elevs.cend(), elev_);
+ std::copy_n(coeffs, irCount, coeffs_);
+ std::copy_n(delays, irCount, delays_);
+
+ /* Finally, assign the storage pointers. */
+ Hrtf->field = field_;
+ Hrtf->elev = elev_;
+ Hrtf->coeffs = coeffs_;
+ Hrtf->delays = delays_;
+ }
+
+ return Hrtf;
+}
+
+void MirrorLeftHrirs(const al::span<const HrtfStore::Elevation> elevs, HrirArray *coeffs,
+ ubyte2 *delays)
+{
+ for(const auto &elev : elevs)
+ {
+ const ushort evoffset{elev.irOffset};
+ const ushort azcount{elev.azCount};
+ for(size_t j{0};j < azcount;j++)
+ {
+ const size_t lidx{evoffset + j};
+ const size_t ridx{evoffset + ((azcount-j) % azcount)};
+
+ const size_t irSize{coeffs[ridx].size()};
+ for(size_t k{0};k < irSize;k++)
+ coeffs[ridx][k][1] = coeffs[lidx][k][0];
+ delays[ridx][1] = delays[lidx][0];
+ }
+ }
+}
+
+
+template<typename T, size_t num_bits=sizeof(T)*8>
+inline T readle(std::istream &data)
+{
+ static_assert((num_bits&7) == 0, "num_bits must be a multiple of 8");
+ static_assert(num_bits <= sizeof(T)*8, "num_bits is too large for the type");
+
+ T ret{};
+ if_constexpr(al::endian::native == al::endian::little)
+ {
+ if(!data.read(reinterpret_cast<char*>(&ret), num_bits/8))
+ return static_cast<T>(EOF);
+ }
+ else
+ {
+ al::byte b[sizeof(T)]{};
+ if(!data.read(reinterpret_cast<char*>(b), num_bits/8))
+ return static_cast<T>(EOF);
+ std::reverse_copy(std::begin(b), std::end(b), reinterpret_cast<al::byte*>(&ret));
+ }
+
+ if_constexpr(std::is_signed<T>::value && num_bits < sizeof(T)*8)
+ {
+ constexpr auto signbit = static_cast<T>(1u << (num_bits-1));
+ return static_cast<T>((ret^signbit) - signbit);
+ }
+ return ret;
+}
+
+template<>
+inline uint8_t readle<uint8_t,8>(std::istream &data)
+{ return static_cast<uint8_t>(data.get()); }
+
+
+std::unique_ptr<HrtfStore> LoadHrtf00(std::istream &data, const char *filename)
+{
+ uint rate{readle<uint32_t>(data)};
+ ushort irCount{readle<uint16_t>(data)};
+ ushort irSize{readle<uint16_t>(data)};
+ ubyte evCount{readle<uint8_t>(data)};
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+
+ if(irSize < MinIrLength || irSize > HrirLength)
+ {
+ ERR("Unsupported HRIR size, irSize=%d (%d to %d)\n", irSize, MinIrLength, HrirLength);
+ return nullptr;
+ }
+ if(evCount < MinEvCount || evCount > MaxEvCount)
+ {
+ ERR("Unsupported elevation count: evCount=%d (%d to %d)\n",
+ evCount, MinEvCount, MaxEvCount);
+ return nullptr;
+ }
+
+ auto elevs = al::vector<HrtfStore::Elevation>(evCount);
+ for(auto &elev : elevs)
+ elev.irOffset = readle<uint16_t>(data);
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+ for(size_t i{1};i < evCount;i++)
+ {
+ if(elevs[i].irOffset <= elevs[i-1].irOffset)
+ {
+ ERR("Invalid evOffset: evOffset[%zu]=%d (last=%d)\n", i, elevs[i].irOffset,
+ elevs[i-1].irOffset);
+ return nullptr;
+ }
+ }
+ if(irCount <= elevs.back().irOffset)
+ {
+ ERR("Invalid evOffset: evOffset[%zu]=%d (irCount=%d)\n",
+ elevs.size()-1, elevs.back().irOffset, irCount);
+ return nullptr;
+ }
+
+ for(size_t i{1};i < evCount;i++)
+ {
+ elevs[i-1].azCount = static_cast<ushort>(elevs[i].irOffset - elevs[i-1].irOffset);
+ if(elevs[i-1].azCount < MinAzCount || elevs[i-1].azCount > MaxAzCount)
+ {
+ ERR("Unsupported azimuth count: azCount[%zd]=%d (%d to %d)\n",
+ i-1, elevs[i-1].azCount, MinAzCount, MaxAzCount);
+ return nullptr;
+ }
+ }
+ elevs.back().azCount = static_cast<ushort>(irCount - elevs.back().irOffset);
+ if(elevs.back().azCount < MinAzCount || elevs.back().azCount > MaxAzCount)
+ {
+ ERR("Unsupported azimuth count: azCount[%zu]=%d (%d to %d)\n",
+ elevs.size()-1, elevs.back().azCount, MinAzCount, MaxAzCount);
+ return nullptr;
+ }
+
+ auto coeffs = al::vector<HrirArray>(irCount, HrirArray{});
+ auto delays = al::vector<ubyte2>(irCount);
+ for(auto &hrir : coeffs)
+ {
+ for(auto &val : al::span<float2>{hrir.data(), irSize})
+ val[0] = readle<int16_t>(data) / 32768.0f;
+ }
+ for(auto &val : delays)
+ val[0] = readle<uint8_t>(data);
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+ for(size_t i{0};i < irCount;i++)
+ {
+ if(delays[i][0] > MaxHrirDelay)
+ {
+ ERR("Invalid delays[%zd]: %d (%d)\n", i, delays[i][0], MaxHrirDelay);
+ return nullptr;
+ }
+ delays[i][0] <<= HrirDelayFracBits;
+ }
+
+ /* Mirror the left ear responses to the right ear. */
+ MirrorLeftHrirs({elevs.data(), elevs.size()}, coeffs.data(), delays.data());
+
+ const HrtfStore::Field field[1]{{0.0f, evCount}};
+ return CreateHrtfStore(rate, irSize, field, {elevs.data(), elevs.size()}, coeffs.data(),
+ delays.data(), filename);
+}
+
+std::unique_ptr<HrtfStore> LoadHrtf01(std::istream &data, const char *filename)
+{
+ uint rate{readle<uint32_t>(data)};
+ ushort irSize{readle<uint8_t>(data)};
+ ubyte evCount{readle<uint8_t>(data)};
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+
+ if(irSize < MinIrLength || irSize > HrirLength)
+ {
+ ERR("Unsupported HRIR size, irSize=%d (%d to %d)\n", irSize, MinIrLength, HrirLength);
+ return nullptr;
+ }
+ if(evCount < MinEvCount || evCount > MaxEvCount)
+ {
+ ERR("Unsupported elevation count: evCount=%d (%d to %d)\n",
+ evCount, MinEvCount, MaxEvCount);
+ return nullptr;
+ }
+
+ auto elevs = al::vector<HrtfStore::Elevation>(evCount);
+ for(auto &elev : elevs)
+ elev.azCount = readle<uint8_t>(data);
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+ for(size_t i{0};i < evCount;++i)
+ {
+ if(elevs[i].azCount < MinAzCount || elevs[i].azCount > MaxAzCount)
+ {
+ ERR("Unsupported azimuth count: azCount[%zd]=%d (%d to %d)\n", i, elevs[i].azCount,
+ MinAzCount, MaxAzCount);
+ return nullptr;
+ }
+ }
+
+ elevs[0].irOffset = 0;
+ for(size_t i{1};i < evCount;i++)
+ elevs[i].irOffset = static_cast<ushort>(elevs[i-1].irOffset + elevs[i-1].azCount);
+ const ushort irCount{static_cast<ushort>(elevs.back().irOffset + elevs.back().azCount)};
+
+ auto coeffs = al::vector<HrirArray>(irCount, HrirArray{});
+ auto delays = al::vector<ubyte2>(irCount);
+ for(auto &hrir : coeffs)
+ {
+ for(auto &val : al::span<float2>{hrir.data(), irSize})
+ val[0] = readle<int16_t>(data) / 32768.0f;
+ }
+ for(auto &val : delays)
+ val[0] = readle<uint8_t>(data);
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+ for(size_t i{0};i < irCount;i++)
+ {
+ if(delays[i][0] > MaxHrirDelay)
+ {
+ ERR("Invalid delays[%zd]: %d (%d)\n", i, delays[i][0], MaxHrirDelay);
+ return nullptr;
+ }
+ delays[i][0] <<= HrirDelayFracBits;
+ }
+
+ /* Mirror the left ear responses to the right ear. */
+ MirrorLeftHrirs({elevs.data(), elevs.size()}, coeffs.data(), delays.data());
+
+ const HrtfStore::Field field[1]{{0.0f, evCount}};
+ return CreateHrtfStore(rate, irSize, field, {elevs.data(), elevs.size()}, coeffs.data(),
+ delays.data(), filename);
+}
+
+std::unique_ptr<HrtfStore> LoadHrtf02(std::istream &data, const char *filename)
+{
+ constexpr ubyte SampleType_S16{0};
+ constexpr ubyte SampleType_S24{1};
+ constexpr ubyte ChanType_LeftOnly{0};
+ constexpr ubyte ChanType_LeftRight{1};
+
+ uint rate{readle<uint32_t>(data)};
+ ubyte sampleType{readle<uint8_t>(data)};
+ ubyte channelType{readle<uint8_t>(data)};
+ ushort irSize{readle<uint8_t>(data)};
+ ubyte fdCount{readle<uint8_t>(data)};
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+
+ if(sampleType > SampleType_S24)
+ {
+ ERR("Unsupported sample type: %d\n", sampleType);
+ return nullptr;
+ }
+ if(channelType > ChanType_LeftRight)
+ {
+ ERR("Unsupported channel type: %d\n", channelType);
+ return nullptr;
+ }
+
+ if(irSize < MinIrLength || irSize > HrirLength)
+ {
+ ERR("Unsupported HRIR size, irSize=%d (%d to %d)\n", irSize, MinIrLength, HrirLength);
+ return nullptr;
+ }
+ if(fdCount < 1 || fdCount > MaxFdCount)
+ {
+ ERR("Unsupported number of field-depths: fdCount=%d (%d to %d)\n", fdCount, MinFdCount,
+ MaxFdCount);
+ return nullptr;
+ }
+
+ auto fields = al::vector<HrtfStore::Field>(fdCount);
+ auto elevs = al::vector<HrtfStore::Elevation>{};
+ for(size_t f{0};f < fdCount;f++)
+ {
+ const ushort distance{readle<uint16_t>(data)};
+ const ubyte evCount{readle<uint8_t>(data)};
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+
+ if(distance < MinFdDistance || distance > MaxFdDistance)
+ {
+ ERR("Unsupported field distance[%zu]=%d (%d to %d millimeters)\n", f, distance,
+ MinFdDistance, MaxFdDistance);
+ return nullptr;
+ }
+ if(evCount < MinEvCount || evCount > MaxEvCount)
+ {
+ ERR("Unsupported elevation count: evCount[%zu]=%d (%d to %d)\n", f, evCount,
+ MinEvCount, MaxEvCount);
+ return nullptr;
+ }
+
+ fields[f].distance = distance / 1000.0f;
+ fields[f].evCount = evCount;
+ if(f > 0 && fields[f].distance <= fields[f-1].distance)
+ {
+ ERR("Field distance[%zu] is not after previous (%f > %f)\n", f, fields[f].distance,
+ fields[f-1].distance);
+ return nullptr;
+ }
+
+ const size_t ebase{elevs.size()};
+ elevs.resize(ebase + evCount);
+ for(auto &elev : al::span<HrtfStore::Elevation>(elevs.data()+ebase, evCount))
+ elev.azCount = readle<uint8_t>(data);
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+
+ for(size_t e{0};e < evCount;e++)
+ {
+ if(elevs[ebase+e].azCount < MinAzCount || elevs[ebase+e].azCount > MaxAzCount)
+ {
+ ERR("Unsupported azimuth count: azCount[%zu][%zu]=%d (%d to %d)\n", f, e,
+ elevs[ebase+e].azCount, MinAzCount, MaxAzCount);
+ return nullptr;
+ }
+ }
+ }
+
+ elevs[0].irOffset = 0;
+ std::partial_sum(elevs.cbegin(), elevs.cend(), elevs.begin(),
+ [](const HrtfStore::Elevation &last, const HrtfStore::Elevation &cur)
+ -> HrtfStore::Elevation
+ {
+ return HrtfStore::Elevation{cur.azCount,
+ static_cast<ushort>(last.azCount + last.irOffset)};
+ });
+ const auto irTotal = static_cast<ushort>(elevs.back().azCount + elevs.back().irOffset);
+
+ auto coeffs = al::vector<HrirArray>(irTotal, HrirArray{});
+ auto delays = al::vector<ubyte2>(irTotal);
+ if(channelType == ChanType_LeftOnly)
+ {
+ if(sampleType == SampleType_S16)
+ {
+ for(auto &hrir : coeffs)
+ {
+ for(auto &val : al::span<float2>{hrir.data(), irSize})
+ val[0] = readle<int16_t>(data) / 32768.0f;
+ }
+ }
+ else if(sampleType == SampleType_S24)
+ {
+ for(auto &hrir : coeffs)
+ {
+ for(auto &val : al::span<float2>{hrir.data(), irSize})
+ val[0] = static_cast<float>(readle<int,24>(data)) / 8388608.0f;
+ }
+ }
+ for(auto &val : delays)
+ val[0] = readle<uint8_t>(data);
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+ for(size_t i{0};i < irTotal;++i)
+ {
+ if(delays[i][0] > MaxHrirDelay)
+ {
+ ERR("Invalid delays[%zu][0]: %d (%d)\n", i, delays[i][0], MaxHrirDelay);
+ return nullptr;
+ }
+ delays[i][0] <<= HrirDelayFracBits;
+ }
+
+ /* Mirror the left ear responses to the right ear. */
+ MirrorLeftHrirs({elevs.data(), elevs.size()}, coeffs.data(), delays.data());
+ }
+ else if(channelType == ChanType_LeftRight)
+ {
+ if(sampleType == SampleType_S16)
+ {
+ for(auto &hrir : coeffs)
+ {
+ for(auto &val : al::span<float2>{hrir.data(), irSize})
+ {
+ val[0] = readle<int16_t>(data) / 32768.0f;
+ val[1] = readle<int16_t>(data) / 32768.0f;
+ }
+ }
+ }
+ else if(sampleType == SampleType_S24)
+ {
+ for(auto &hrir : coeffs)
+ {
+ for(auto &val : al::span<float2>{hrir.data(), irSize})
+ {
+ val[0] = static_cast<float>(readle<int,24>(data)) / 8388608.0f;
+ val[1] = static_cast<float>(readle<int,24>(data)) / 8388608.0f;
+ }
+ }
+ }
+ for(auto &val : delays)
+ {
+ val[0] = readle<uint8_t>(data);
+ val[1] = readle<uint8_t>(data);
+ }
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+
+ for(size_t i{0};i < irTotal;++i)
+ {
+ if(delays[i][0] > MaxHrirDelay)
+ {
+ ERR("Invalid delays[%zu][0]: %d (%d)\n", i, delays[i][0], MaxHrirDelay);
+ return nullptr;
+ }
+ if(delays[i][1] > MaxHrirDelay)
+ {
+ ERR("Invalid delays[%zu][1]: %d (%d)\n", i, delays[i][1], MaxHrirDelay);
+ return nullptr;
+ }
+ delays[i][0] <<= HrirDelayFracBits;
+ delays[i][1] <<= HrirDelayFracBits;
+ }
+ }
+
+ if(fdCount > 1)
+ {
+ auto fields_ = al::vector<HrtfStore::Field>(fields.size());
+ auto elevs_ = al::vector<HrtfStore::Elevation>(elevs.size());
+ auto coeffs_ = al::vector<HrirArray>(coeffs.size());
+ auto delays_ = al::vector<ubyte2>(delays.size());
+
+ /* Simple reverse for the per-field elements. */
+ std::reverse_copy(fields.cbegin(), fields.cend(), fields_.begin());
+
+ /* Each field has a group of elevations, which each have an azimuth
+ * count. Reverse the order of the groups, keeping the relative order
+ * of per-group azimuth counts.
+ */
+ auto elevs__end = elevs_.end();
+ auto copy_azs = [&elevs,&elevs__end](const ptrdiff_t ebase, const HrtfStore::Field &field)
+ -> ptrdiff_t
+ {
+ auto elevs_src = elevs.begin()+ebase;
+ elevs__end = std::copy_backward(elevs_src, elevs_src+field.evCount, elevs__end);
+ return ebase + field.evCount;
+ };
+ (void)std::accumulate(fields.cbegin(), fields.cend(), ptrdiff_t{0}, copy_azs);
+ assert(elevs_.begin() == elevs__end);
+
+ /* Reestablish the IR offset for each elevation index, given the new
+ * ordering of elevations.
+ */
+ elevs_[0].irOffset = 0;
+ std::partial_sum(elevs_.cbegin(), elevs_.cend(), elevs_.begin(),
+ [](const HrtfStore::Elevation &last, const HrtfStore::Elevation &cur)
+ -> HrtfStore::Elevation
+ {
+ return HrtfStore::Elevation{cur.azCount,
+ static_cast<ushort>(last.azCount + last.irOffset)};
+ });
+
+ /* Reverse the order of each field's group of IRs. */
+ auto coeffs_end = coeffs_.end();
+ auto delays_end = delays_.end();
+ auto copy_irs = [&elevs,&coeffs,&delays,&coeffs_end,&delays_end](
+ const ptrdiff_t ebase, const HrtfStore::Field &field) -> ptrdiff_t
+ {
+ auto accum_az = [](int count, const HrtfStore::Elevation &elev) noexcept -> int
+ { return count + elev.azCount; };
+ const auto elevs_mid = elevs.cbegin() + ebase;
+ const auto elevs_end = elevs_mid + field.evCount;
+ const int abase{std::accumulate(elevs.cbegin(), elevs_mid, 0, accum_az)};
+ const int num_azs{std::accumulate(elevs_mid, elevs_end, 0, accum_az)};
+
+ coeffs_end = std::copy_backward(coeffs.cbegin() + abase,
+ coeffs.cbegin() + (abase+num_azs), coeffs_end);
+ delays_end = std::copy_backward(delays.cbegin() + abase,
+ delays.cbegin() + (abase+num_azs), delays_end);
+
+ return ebase + field.evCount;
+ };
+ (void)std::accumulate(fields.cbegin(), fields.cend(), ptrdiff_t{0}, copy_irs);
+ assert(coeffs_.begin() == coeffs_end);
+ assert(delays_.begin() == delays_end);
+
+ fields = std::move(fields_);
+ elevs = std::move(elevs_);
+ coeffs = std::move(coeffs_);
+ delays = std::move(delays_);
+ }
+
+ return CreateHrtfStore(rate, irSize, {fields.data(), fields.size()},
+ {elevs.data(), elevs.size()}, coeffs.data(), delays.data(), filename);
+}
+
+std::unique_ptr<HrtfStore> LoadHrtf03(std::istream &data, const char *filename)
+{
+ constexpr ubyte ChanType_LeftOnly{0};
+ constexpr ubyte ChanType_LeftRight{1};
+
+ uint rate{readle<uint32_t>(data)};
+ ubyte channelType{readle<uint8_t>(data)};
+ ushort irSize{readle<uint8_t>(data)};
+ ubyte fdCount{readle<uint8_t>(data)};
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+
+ if(channelType > ChanType_LeftRight)
+ {
+ ERR("Unsupported channel type: %d\n", channelType);
+ return nullptr;
+ }
+
+ if(irSize < MinIrLength || irSize > HrirLength)
+ {
+ ERR("Unsupported HRIR size, irSize=%d (%d to %d)\n", irSize, MinIrLength, HrirLength);
+ return nullptr;
+ }
+ if(fdCount < 1 || fdCount > MaxFdCount)
+ {
+ ERR("Unsupported number of field-depths: fdCount=%d (%d to %d)\n", fdCount, MinFdCount,
+ MaxFdCount);
+ return nullptr;
+ }
+
+ auto fields = al::vector<HrtfStore::Field>(fdCount);
+ auto elevs = al::vector<HrtfStore::Elevation>{};
+ for(size_t f{0};f < fdCount;f++)
+ {
+ const ushort distance{readle<uint16_t>(data)};
+ const ubyte evCount{readle<uint8_t>(data)};
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+
+ if(distance < MinFdDistance || distance > MaxFdDistance)
+ {
+ ERR("Unsupported field distance[%zu]=%d (%d to %d millimeters)\n", f, distance,
+ MinFdDistance, MaxFdDistance);
+ return nullptr;
+ }
+ if(evCount < MinEvCount || evCount > MaxEvCount)
+ {
+ ERR("Unsupported elevation count: evCount[%zu]=%d (%d to %d)\n", f, evCount,
+ MinEvCount, MaxEvCount);
+ return nullptr;
+ }
+
+ fields[f].distance = distance / 1000.0f;
+ fields[f].evCount = evCount;
+ if(f > 0 && fields[f].distance > fields[f-1].distance)
+ {
+ ERR("Field distance[%zu] is not before previous (%f <= %f)\n", f, fields[f].distance,
+ fields[f-1].distance);
+ return nullptr;
+ }
+
+ const size_t ebase{elevs.size()};
+ elevs.resize(ebase + evCount);
+ for(auto &elev : al::span<HrtfStore::Elevation>(elevs.data()+ebase, evCount))
+ elev.azCount = readle<uint8_t>(data);
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+
+ for(size_t e{0};e < evCount;e++)
+ {
+ if(elevs[ebase+e].azCount < MinAzCount || elevs[ebase+e].azCount > MaxAzCount)
+ {
+ ERR("Unsupported azimuth count: azCount[%zu][%zu]=%d (%d to %d)\n", f, e,
+ elevs[ebase+e].azCount, MinAzCount, MaxAzCount);
+ return nullptr;
+ }
+ }
+ }
+
+ elevs[0].irOffset = 0;
+ std::partial_sum(elevs.cbegin(), elevs.cend(), elevs.begin(),
+ [](const HrtfStore::Elevation &last, const HrtfStore::Elevation &cur)
+ -> HrtfStore::Elevation
+ {
+ return HrtfStore::Elevation{cur.azCount,
+ static_cast<ushort>(last.azCount + last.irOffset)};
+ });
+ const auto irTotal = static_cast<ushort>(elevs.back().azCount + elevs.back().irOffset);
+
+ auto coeffs = al::vector<HrirArray>(irTotal, HrirArray{});
+ auto delays = al::vector<ubyte2>(irTotal);
+ if(channelType == ChanType_LeftOnly)
+ {
+ for(auto &hrir : coeffs)
+ {
+ for(auto &val : al::span<float2>{hrir.data(), irSize})
+ val[0] = static_cast<float>(readle<int,24>(data)) / 8388608.0f;
+ }
+ for(auto &val : delays)
+ val[0] = readle<uint8_t>(data);
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+ for(size_t i{0};i < irTotal;++i)
+ {
+ if(delays[i][0] > MaxHrirDelay<<HrirDelayFracBits)
+ {
+ ERR("Invalid delays[%zu][0]: %f (%d)\n", i,
+ delays[i][0] / float{HrirDelayFracOne}, MaxHrirDelay);
+ return nullptr;
+ }
+ }
+
+ /* Mirror the left ear responses to the right ear. */
+ MirrorLeftHrirs({elevs.data(), elevs.size()}, coeffs.data(), delays.data());
+ }
+ else if(channelType == ChanType_LeftRight)
+ {
+ for(auto &hrir : coeffs)
+ {
+ for(auto &val : al::span<float2>{hrir.data(), irSize})
+ {
+ val[0] = static_cast<float>(readle<int,24>(data)) / 8388608.0f;
+ val[1] = static_cast<float>(readle<int,24>(data)) / 8388608.0f;
+ }
+ }
+ for(auto &val : delays)
+ {
+ val[0] = readle<uint8_t>(data);
+ val[1] = readle<uint8_t>(data);
+ }
+ if(!data || data.eof())
+ {
+ ERR("Failed reading %s\n", filename);
+ return nullptr;
+ }
+
+ for(size_t i{0};i < irTotal;++i)
+ {
+ if(delays[i][0] > MaxHrirDelay<<HrirDelayFracBits)
+ {
+ ERR("Invalid delays[%zu][0]: %f (%d)\n", i,
+ delays[i][0] / float{HrirDelayFracOne}, MaxHrirDelay);
+ return nullptr;
+ }
+ if(delays[i][1] > MaxHrirDelay<<HrirDelayFracBits)
+ {
+ ERR("Invalid delays[%zu][1]: %f (%d)\n", i,
+ delays[i][1] / float{HrirDelayFracOne}, MaxHrirDelay);
+ return nullptr;
+ }
+ }
+ }
+
+ return CreateHrtfStore(rate, irSize, {fields.data(), fields.size()},
+ {elevs.data(), elevs.size()}, coeffs.data(), delays.data(), filename);
+}
+
+
+bool checkName(const std::string &name)
+{
+ auto match_name = [&name](const HrtfEntry &entry) -> bool { return name == entry.mDispName; };
+ auto &enum_names = EnumeratedHrtfs;
+ return std::find_if(enum_names.cbegin(), enum_names.cend(), match_name) != enum_names.cend();
+}
+
+void AddFileEntry(const std::string &filename)
+{
+ /* Check if this file has already been enumerated. */
+ auto enum_iter = std::find_if(EnumeratedHrtfs.cbegin(), EnumeratedHrtfs.cend(),
+ [&filename](const HrtfEntry &entry) -> bool
+ { return entry.mFilename == filename; });
+ if(enum_iter != EnumeratedHrtfs.cend())
+ {
+ TRACE("Skipping duplicate file entry %s\n", filename.c_str());
+ return;
+ }
+
+ /* TODO: Get a human-readable name from the HRTF data (possibly coming in a
+ * format update). */
+ size_t namepos{filename.find_last_of('/')+1};
+ if(!namepos) namepos = filename.find_last_of('\\')+1;
+
+ size_t extpos{filename.find_last_of('.')};
+ if(extpos <= namepos) extpos = std::string::npos;
+
+ const std::string basename{(extpos == std::string::npos) ?
+ filename.substr(namepos) : filename.substr(namepos, extpos-namepos)};
+ std::string newname{basename};
+ int count{1};
+ while(checkName(newname))
+ {
+ newname = basename;
+ newname += " #";
+ newname += std::to_string(++count);
+ }
+ EnumeratedHrtfs.emplace_back(HrtfEntry{newname, filename});
+ const HrtfEntry &entry = EnumeratedHrtfs.back();
+
+ TRACE("Adding file entry \"%s\"\n", entry.mFilename.c_str());
+}
+
+/* Unfortunate that we have to duplicate AddFileEntry to take a memory buffer
+ * for input instead of opening the given filename.
+ */
+void AddBuiltInEntry(const std::string &dispname, uint residx)
+{
+ const std::string filename{'!'+std::to_string(residx)+'_'+dispname};
+
+ auto enum_iter = std::find_if(EnumeratedHrtfs.cbegin(), EnumeratedHrtfs.cend(),
+ [&filename](const HrtfEntry &entry) -> bool
+ { return entry.mFilename == filename; });
+ if(enum_iter != EnumeratedHrtfs.cend())
+ {
+ TRACE("Skipping duplicate file entry %s\n", filename.c_str());
+ return;
+ }
+
+ /* TODO: Get a human-readable name from the HRTF data (possibly coming in a
+ * format update). */
+
+ std::string newname{dispname};
+ int count{1};
+ while(checkName(newname))
+ {
+ newname = dispname;
+ newname += " #";
+ newname += std::to_string(++count);
+ }
+ EnumeratedHrtfs.emplace_back(HrtfEntry{newname, filename});
+ const HrtfEntry &entry = EnumeratedHrtfs.back();
+
+ TRACE("Adding built-in entry \"%s\"\n", entry.mFilename.c_str());
+}
+
+
+#define IDR_DEFAULT_HRTF_MHR 1
+
+#ifndef ALSOFT_EMBED_HRTF_DATA
+
+al::span<const char> GetResource(int /*name*/)
+{ return {}; }
+
+#else
+
+#include "hrtf_default.h"
+
+al::span<const char> GetResource(int name)
+{
+ if(name == IDR_DEFAULT_HRTF_MHR)
+ return {reinterpret_cast<const char*>(hrtf_default), sizeof(hrtf_default)};
+ return {};
+}
+#endif
+
+} // namespace
+
+
+al::vector<std::string> EnumerateHrtf(al::optional<std::string> pathopt)
+{
+ std::lock_guard<std::mutex> _{EnumeratedHrtfLock};
+ EnumeratedHrtfs.clear();
+
+ bool usedefaults{true};
+ if(pathopt)
+ {
+ const char *pathlist{pathopt->c_str()};
+ while(pathlist && *pathlist)
+ {
+ const char *next, *end;
+
+ while(isspace(*pathlist) || *pathlist == ',')
+ pathlist++;
+ if(*pathlist == '\0')
+ continue;
+
+ next = strchr(pathlist, ',');
+ if(next)
+ end = next++;
+ else
+ {
+ end = pathlist + strlen(pathlist);
+ usedefaults = false;
+ }
+
+ while(end != pathlist && isspace(*(end-1)))
+ --end;
+ if(end != pathlist)
+ {
+ const std::string pname{pathlist, end};
+ for(const auto &fname : SearchDataFiles(".mhr", pname.c_str()))
+ AddFileEntry(fname);
+ }
+
+ pathlist = next;
+ }
+ }
+
+ if(usedefaults)
+ {
+ for(const auto &fname : SearchDataFiles(".mhr", "openal/hrtf"))
+ AddFileEntry(fname);
+
+ if(!GetResource(IDR_DEFAULT_HRTF_MHR).empty())
+ AddBuiltInEntry("Built-In HRTF", IDR_DEFAULT_HRTF_MHR);
+ }
+
+ al::vector<std::string> list;
+ list.reserve(EnumeratedHrtfs.size());
+ for(auto &entry : EnumeratedHrtfs)
+ list.emplace_back(entry.mDispName);
+
+ return list;
+}
+
+HrtfStorePtr GetLoadedHrtf(const std::string &name, const uint devrate)
+{
+ std::lock_guard<std::mutex> _{EnumeratedHrtfLock};
+ auto entry_iter = std::find_if(EnumeratedHrtfs.cbegin(), EnumeratedHrtfs.cend(),
+ [&name](const HrtfEntry &entry) -> bool { return entry.mDispName == name; });
+ if(entry_iter == EnumeratedHrtfs.cend())
+ return nullptr;
+ const std::string &fname = entry_iter->mFilename;
+
+ std::lock_guard<std::mutex> __{LoadedHrtfLock};
+ auto hrtf_lt_fname = [](LoadedHrtf &hrtf, const std::string &filename) -> bool
+ { return hrtf.mFilename < filename; };
+ auto handle = std::lower_bound(LoadedHrtfs.begin(), LoadedHrtfs.end(), fname, hrtf_lt_fname);
+ while(handle != LoadedHrtfs.end() && handle->mFilename == fname)
+ {
+ HrtfStore *hrtf{handle->mEntry.get()};
+ if(hrtf && hrtf->sampleRate == devrate)
+ {
+ hrtf->add_ref();
+ return HrtfStorePtr{hrtf};
+ }
+ ++handle;
+ }
+
+ std::unique_ptr<std::istream> stream;
+ int residx{};
+ char ch{};
+ if(sscanf(fname.c_str(), "!%d%c", &residx, &ch) == 2 && ch == '_')
+ {
+ TRACE("Loading %s...\n", fname.c_str());
+ al::span<const char> res{GetResource(residx)};
+ if(res.empty())
+ {
+ ERR("Could not get resource %u, %s\n", residx, name.c_str());
+ return nullptr;
+ }
+ stream = std::make_unique<idstream>(res.begin(), res.end());
+ }
+ else
+ {
+ TRACE("Loading %s...\n", fname.c_str());
+ auto fstr = std::make_unique<al::ifstream>(fname.c_str(), std::ios::binary);
+ if(!fstr->is_open())
+ {
+ ERR("Could not open %s\n", fname.c_str());
+ return nullptr;
+ }
+ stream = std::move(fstr);
+ }
+
+ std::unique_ptr<HrtfStore> hrtf;
+ char magic[sizeof(magicMarker03)];
+ stream->read(magic, sizeof(magic));
+ if(stream->gcount() < static_cast<std::streamsize>(sizeof(magicMarker03)))
+ ERR("%s data is too short (%zu bytes)\n", name.c_str(), stream->gcount());
+ else if(memcmp(magic, magicMarker03, sizeof(magicMarker03)) == 0)
+ {
+ TRACE("Detected data set format v3\n");
+ hrtf = LoadHrtf03(*stream, name.c_str());
+ }
+ else if(memcmp(magic, magicMarker02, sizeof(magicMarker02)) == 0)
+ {
+ TRACE("Detected data set format v2\n");
+ hrtf = LoadHrtf02(*stream, name.c_str());
+ }
+ else if(memcmp(magic, magicMarker01, sizeof(magicMarker01)) == 0)
+ {
+ TRACE("Detected data set format v1\n");
+ hrtf = LoadHrtf01(*stream, name.c_str());
+ }
+ else if(memcmp(magic, magicMarker00, sizeof(magicMarker00)) == 0)
+ {
+ TRACE("Detected data set format v0\n");
+ hrtf = LoadHrtf00(*stream, name.c_str());
+ }
+ else
+ ERR("Invalid header in %s: \"%.8s\"\n", name.c_str(), magic);
+ stream.reset();
+
+ if(!hrtf)
+ {
+ ERR("Failed to load %s\n", name.c_str());
+ return nullptr;
+ }
+
+ if(hrtf->sampleRate != devrate)
+ {
+ TRACE("Resampling HRTF %s (%uhz -> %uhz)\n", name.c_str(), hrtf->sampleRate, devrate);
+
+ /* Calculate the last elevation's index and get the total IR count. */
+ const size_t lastEv{std::accumulate(hrtf->field, hrtf->field+hrtf->fdCount, size_t{0},
+ [](const size_t curval, const HrtfStore::Field &field) noexcept -> size_t
+ { return curval + field.evCount; }
+ ) - 1};
+ const size_t irCount{size_t{hrtf->elev[lastEv].irOffset} + hrtf->elev[lastEv].azCount};
+
+ /* Resample all the IRs. */
+ std::array<std::array<double,HrirLength>,2> inout;
+ PPhaseResampler rs;
+ rs.init(hrtf->sampleRate, devrate);
+ for(size_t i{0};i < irCount;++i)
+ {
+ HrirArray &coeffs = const_cast<HrirArray&>(hrtf->coeffs[i]);
+ for(size_t j{0};j < 2;++j)
+ {
+ std::transform(coeffs.cbegin(), coeffs.cend(), inout[0].begin(),
+ [j](const float2 &in) noexcept -> double { return in[j]; });
+ rs.process(HrirLength, inout[0].data(), HrirLength, inout[1].data());
+ for(size_t k{0};k < HrirLength;++k)
+ coeffs[k][j] = static_cast<float>(inout[1][k]);
+ }
+ }
+ rs = {};
+
+ /* Scale the delays for the new sample rate. */
+ float max_delay{0.0f};
+ auto new_delays = al::vector<float2>(irCount);
+ const float rate_scale{static_cast<float>(devrate)/static_cast<float>(hrtf->sampleRate)};
+ for(size_t i{0};i < irCount;++i)
+ {
+ for(size_t j{0};j < 2;++j)
+ {
+ const float new_delay{std::round(hrtf->delays[i][j] * rate_scale) /
+ float{HrirDelayFracOne}};
+ max_delay = maxf(max_delay, new_delay);
+ new_delays[i][j] = new_delay;
+ }
+ }
+
+ /* If the new delays exceed the max, scale it down to fit (essentially
+ * shrinking the head radius; not ideal but better than a per-delay
+ * clamp).
+ */
+ float delay_scale{HrirDelayFracOne};
+ if(max_delay > MaxHrirDelay)
+ {
+ WARN("Resampled delay exceeds max (%.2f > %d)\n", max_delay, MaxHrirDelay);
+ delay_scale *= float{MaxHrirDelay} / max_delay;
+ }
+
+ for(size_t i{0};i < irCount;++i)
+ {
+ ubyte2 &delays = const_cast<ubyte2&>(hrtf->delays[i]);
+ for(size_t j{0};j < 2;++j)
+ delays[j] = static_cast<ubyte>(float2int(new_delays[i][j]*delay_scale + 0.5f));
+ }
+
+ /* Scale the IR size for the new sample rate and update the stored
+ * sample rate.
+ */
+ const float newIrSize{std::round(static_cast<float>(hrtf->irSize) * rate_scale)};
+ hrtf->irSize = static_cast<uint>(minf(HrirLength, newIrSize));
+ hrtf->sampleRate = devrate;
+ }
+
+ TRACE("Loaded HRTF %s for sample rate %uhz, %u-sample filter\n", name.c_str(),
+ hrtf->sampleRate, hrtf->irSize);
+ handle = LoadedHrtfs.emplace(handle, LoadedHrtf{fname, std::move(hrtf)});
+
+ return HrtfStorePtr{handle->mEntry.get()};
+}
+
+
+void HrtfStore::add_ref()
+{
+ auto ref = IncrementRef(mRef);
+ TRACE("HrtfStore %p increasing refcount to %u\n", decltype(std::declval<void*>()){this}, ref);
+}
+
+void HrtfStore::release()
+{
+ auto ref = DecrementRef(mRef);
+ TRACE("HrtfStore %p decreasing refcount to %u\n", decltype(std::declval<void*>()){this}, ref);
+ if(ref == 0)
+ {
+ std::lock_guard<std::mutex> _{LoadedHrtfLock};
+
+ /* Go through and remove all unused HRTFs. */
+ auto remove_unused = [](LoadedHrtf &hrtf) -> bool
+ {
+ HrtfStore *entry{hrtf.mEntry.get()};
+ if(entry && ReadRef(entry->mRef) == 0)
+ {
+ TRACE("Unloading unused HRTF %s\n", hrtf.mFilename.data());
+ hrtf.mEntry = nullptr;
+ return true;
+ }
+ return false;
+ };
+ auto iter = std::remove_if(LoadedHrtfs.begin(), LoadedHrtfs.end(), remove_unused);
+ LoadedHrtfs.erase(iter, LoadedHrtfs.end());
+ }
+}
diff --git a/core/hrtf.h b/core/hrtf.h
new file mode 100644
index 00000000..61e5bada
--- /dev/null
+++ b/core/hrtf.h
@@ -0,0 +1,90 @@
+#ifndef CORE_HRTF_H
+#define CORE_HRTF_H
+
+#include <array>
+#include <cstddef>
+#include <memory>
+#include <string>
+
+#include "almalloc.h"
+#include "aloptional.h"
+#include "alspan.h"
+#include "atomic.h"
+#include "ambidefs.h"
+#include "bufferline.h"
+#include "mixer/hrtfdefs.h"
+#include "intrusive_ptr.h"
+#include "vector.h"
+
+
+struct HrtfStore {
+ RefCount mRef;
+
+ uint sampleRate;
+ uint irSize;
+
+ struct Field {
+ float distance;
+ ubyte evCount;
+ };
+ /* NOTE: Fields are stored *backwards*. field[0] is the farthest field, and
+ * field[fdCount-1] is the nearest.
+ */
+ uint fdCount;
+ const Field *field;
+
+ struct Elevation {
+ ushort azCount;
+ ushort irOffset;
+ };
+ Elevation *elev;
+ const HrirArray *coeffs;
+ const ubyte2 *delays;
+
+ void add_ref();
+ void release();
+
+ DEF_PLACE_NEWDEL()
+};
+using HrtfStorePtr = al::intrusive_ptr<HrtfStore>;
+
+
+struct EvRadians { float value; };
+struct AzRadians { float value; };
+struct AngularPoint {
+ EvRadians Elev;
+ AzRadians Azim;
+};
+
+
+struct DirectHrtfState {
+ std::array<float,HrtfDirectDelay+BufferLineSize> mTemp;
+
+ /* HRTF filter state for dry buffer content */
+ uint mIrSize{0};
+ al::FlexArray<HrtfChannelState> mChannels;
+
+ DirectHrtfState(size_t numchans) : mChannels{numchans} { }
+ /**
+ * Produces HRTF filter coefficients for decoding B-Format, given a set of
+ * virtual speaker positions, a matching decoding matrix, and per-order
+ * high-frequency gains for the decoder. The calculated impulse responses
+ * are ordered and scaled according to the matrix input.
+ */
+ void build(const HrtfStore *Hrtf, const uint irSize,
+ const al::span<const AngularPoint> AmbiPoints, const float (*AmbiMatrix)[MaxAmbiChannels],
+ const float XOverFreq, const al::span<const float,MaxAmbiOrder+1> AmbiOrderHFGain);
+
+ static std::unique_ptr<DirectHrtfState> Create(size_t num_chans);
+
+ DEF_FAM_NEWDEL(DirectHrtfState, mChannels)
+};
+
+
+al::vector<std::string> EnumerateHrtf(al::optional<std::string> pathopt);
+HrtfStorePtr GetLoadedHrtf(const std::string &name, const uint devrate);
+
+void GetHrtfCoeffs(const HrtfStore *Hrtf, float elevation, float azimuth, float distance,
+ float spread, HrirArray &coeffs, const al::span<uint,2> delays);
+
+#endif /* CORE_HRTF_H */
diff --git a/core/logging.h b/core/logging.h
index b931c27e..81465929 100644
--- a/core/logging.h
+++ b/core/logging.h
@@ -35,7 +35,12 @@ extern FILE *gLogFile;
#else
-[[gnu::format(printf,3,4)]] void al_print(LogLevel level, FILE *logfile, const char *fmt, ...);
+#ifdef __USE_MINGW_ANSI_STDIO
+[[gnu::format(gnu_printf,3,4)]]
+#else
+[[gnu::format(printf,3,4)]]
+#endif
+void al_print(LogLevel level, FILE *logfile, const char *fmt, ...);
#define TRACE(...) al_print(LogLevel::Trace, gLogFile, "[ALSOFT] (II) " __VA_ARGS__)
diff --git a/core/mixer.cpp b/core/mixer.cpp
new file mode 100644
index 00000000..71e48fe3
--- /dev/null
+++ b/core/mixer.cpp
@@ -0,0 +1,126 @@
+
+#include "config.h"
+
+#include "mixer.h"
+
+#include <cmath>
+
+#include "devformat.h"
+#include "device.h"
+#include "math_defs.h"
+#include "mixer/defs.h"
+
+struct CTag;
+
+
+MixerFunc MixSamples{Mix_<CTag>};
+
+
+std::array<float,MaxAmbiChannels> CalcAmbiCoeffs(const float y, const float z, const float x,
+ const float spread)
+{
+ std::array<float,MaxAmbiChannels> coeffs;
+
+ /* Zeroth-order */
+ coeffs[0] = 1.0f; /* ACN 0 = 1 */
+ /* First-order */
+ coeffs[1] = 1.732050808f * y; /* ACN 1 = sqrt(3) * Y */
+ coeffs[2] = 1.732050808f * z; /* ACN 2 = sqrt(3) * Z */
+ coeffs[3] = 1.732050808f * x; /* ACN 3 = sqrt(3) * X */
+ /* Second-order */
+ const float xx{x*x}, yy{y*y}, zz{z*z}, xy{x*y}, yz{y*z}, xz{x*z};
+ coeffs[4] = 3.872983346f * xy; /* ACN 4 = sqrt(15) * X * Y */
+ coeffs[5] = 3.872983346f * yz; /* ACN 5 = sqrt(15) * Y * Z */
+ coeffs[6] = 1.118033989f * (3.0f*zz - 1.0f); /* ACN 6 = sqrt(5)/2 * (3*Z*Z - 1) */
+ coeffs[7] = 3.872983346f * xz; /* ACN 7 = sqrt(15) * X * Z */
+ coeffs[8] = 1.936491673f * (xx - yy); /* ACN 8 = sqrt(15)/2 * (X*X - Y*Y) */
+ /* Third-order */
+ coeffs[9] = 2.091650066f * (y*(3.0f*xx - yy)); /* ACN 9 = sqrt(35/8) * Y * (3*X*X - Y*Y) */
+ coeffs[10] = 10.246950766f * (z*xy); /* ACN 10 = sqrt(105) * Z * X * Y */
+ coeffs[11] = 1.620185175f * (y*(5.0f*zz - 1.0f)); /* ACN 11 = sqrt(21/8) * Y * (5*Z*Z - 1) */
+ coeffs[12] = 1.322875656f * (z*(5.0f*zz - 3.0f)); /* ACN 12 = sqrt(7)/2 * Z * (5*Z*Z - 3) */
+ coeffs[13] = 1.620185175f * (x*(5.0f*zz - 1.0f)); /* ACN 13 = sqrt(21/8) * X * (5*Z*Z - 1) */
+ coeffs[14] = 5.123475383f * (z*(xx - yy)); /* ACN 14 = sqrt(105)/2 * Z * (X*X - Y*Y) */
+ coeffs[15] = 2.091650066f * (x*(xx - 3.0f*yy)); /* ACN 15 = sqrt(35/8) * X * (X*X - 3*Y*Y) */
+ /* Fourth-order */
+ /* ACN 16 = sqrt(35)*3/2 * X * Y * (X*X - Y*Y) */
+ /* ACN 17 = sqrt(35/2)*3/2 * (3*X*X - Y*Y) * Y * Z */
+ /* ACN 18 = sqrt(5)*3/2 * X * Y * (7*Z*Z - 1) */
+ /* ACN 19 = sqrt(5/2)*3/2 * Y * Z * (7*Z*Z - 3) */
+ /* ACN 20 = 3/8 * (35*Z*Z*Z*Z - 30*Z*Z + 3) */
+ /* ACN 21 = sqrt(5/2)*3/2 * X * Z * (7*Z*Z - 3) */
+ /* ACN 22 = sqrt(5)*3/4 * (X*X - Y*Y) * (7*Z*Z - 1) */
+ /* ACN 23 = sqrt(35/2)*3/2 * (X*X - 3*Y*Y) * X * Z */
+ /* ACN 24 = sqrt(35)*3/8 * (X*X*X*X - 6*X*X*Y*Y + Y*Y*Y*Y) */
+
+ if(spread > 0.0f)
+ {
+ /* Implement the spread by using a spherical source that subtends the
+ * angle spread. See:
+ * http://www.ppsloan.org/publications/StupidSH36.pdf - Appendix A3
+ *
+ * When adjusted for N3D normalization instead of SN3D, these
+ * calculations are:
+ *
+ * ZH0 = -sqrt(pi) * (-1+ca);
+ * ZH1 = 0.5*sqrt(pi) * sa*sa;
+ * ZH2 = -0.5*sqrt(pi) * ca*(-1+ca)*(ca+1);
+ * ZH3 = -0.125*sqrt(pi) * (-1+ca)*(ca+1)*(5*ca*ca - 1);
+ * ZH4 = -0.125*sqrt(pi) * ca*(-1+ca)*(ca+1)*(7*ca*ca - 3);
+ * ZH5 = -0.0625*sqrt(pi) * (-1+ca)*(ca+1)*(21*ca*ca*ca*ca - 14*ca*ca + 1);
+ *
+ * The gain of the source is compensated for size, so that the
+ * loudness doesn't depend on the spread. Thus:
+ *
+ * ZH0 = 1.0f;
+ * ZH1 = 0.5f * (ca+1.0f);
+ * ZH2 = 0.5f * (ca+1.0f)*ca;
+ * ZH3 = 0.125f * (ca+1.0f)*(5.0f*ca*ca - 1.0f);
+ * ZH4 = 0.125f * (ca+1.0f)*(7.0f*ca*ca - 3.0f)*ca;
+ * ZH5 = 0.0625f * (ca+1.0f)*(21.0f*ca*ca*ca*ca - 14.0f*ca*ca + 1.0f);
+ */
+ const float ca{std::cos(spread * 0.5f)};
+ /* Increase the source volume by up to +3dB for a full spread. */
+ const float scale{std::sqrt(1.0f + spread/al::MathDefs<float>::Tau())};
+
+ const float ZH0_norm{scale};
+ const float ZH1_norm{scale * 0.5f * (ca+1.f)};
+ const float ZH2_norm{scale * 0.5f * (ca+1.f)*ca};
+ const float ZH3_norm{scale * 0.125f * (ca+1.f)*(5.f*ca*ca-1.f)};
+
+ /* Zeroth-order */
+ coeffs[0] *= ZH0_norm;
+ /* First-order */
+ coeffs[1] *= ZH1_norm;
+ coeffs[2] *= ZH1_norm;
+ coeffs[3] *= ZH1_norm;
+ /* Second-order */
+ coeffs[4] *= ZH2_norm;
+ coeffs[5] *= ZH2_norm;
+ coeffs[6] *= ZH2_norm;
+ coeffs[7] *= ZH2_norm;
+ coeffs[8] *= ZH2_norm;
+ /* Third-order */
+ coeffs[9] *= ZH3_norm;
+ coeffs[10] *= ZH3_norm;
+ coeffs[11] *= ZH3_norm;
+ coeffs[12] *= ZH3_norm;
+ coeffs[13] *= ZH3_norm;
+ coeffs[14] *= ZH3_norm;
+ coeffs[15] *= ZH3_norm;
+ }
+
+ return coeffs;
+}
+
+void ComputePanGains(const MixParams *mix, const float*RESTRICT coeffs, const float ingain,
+ const al::span<float,MAX_OUTPUT_CHANNELS> gains)
+{
+ auto ambimap = mix->AmbiMap.cbegin();
+
+ auto iter = std::transform(ambimap, ambimap+mix->Buffer.size(), gains.begin(),
+ [coeffs,ingain](const BFChannelConfig &chanmap) noexcept -> float
+ { return chanmap.Scale * coeffs[chanmap.Index] * ingain; }
+ );
+ std::fill(iter, gains.end(), 0.0f);
+}
diff --git a/core/mixer.h b/core/mixer.h
new file mode 100644
index 00000000..309f4224
--- /dev/null
+++ b/core/mixer.h
@@ -0,0 +1,101 @@
+#ifndef CORE_MIXER_H
+#define CORE_MIXER_H
+
+#include <array>
+#include <cmath>
+#include <stddef.h>
+#include <type_traits>
+
+#include "alspan.h"
+#include "ambidefs.h"
+#include "bufferline.h"
+#include "devformat.h"
+
+struct MixParams;
+
+using MixerFunc = void(*)(const al::span<const float> InSamples,
+ const al::span<FloatBufferLine> OutBuffer, float *CurrentGains, const float *TargetGains,
+ const size_t Counter, const size_t OutPos);
+
+extern MixerFunc MixSamples;
+
+
+/**
+ * Calculates ambisonic encoder coefficients using the X, Y, and Z direction
+ * components, which must represent a normalized (unit length) vector, and the
+ * spread is the angular width of the sound (0...tau).
+ *
+ * NOTE: The components use ambisonic coordinates. As a result:
+ *
+ * Ambisonic Y = OpenAL -X
+ * Ambisonic Z = OpenAL Y
+ * Ambisonic X = OpenAL -Z
+ *
+ * The components are ordered such that OpenAL's X, Y, and Z are the first,
+ * second, and third parameters respectively -- simply negate X and Z.
+ */
+std::array<float,MaxAmbiChannels> CalcAmbiCoeffs(const float y, const float z, const float x,
+ const float spread);
+
+/**
+ * CalcDirectionCoeffs
+ *
+ * Calculates ambisonic coefficients based on an OpenAL direction vector. The
+ * vector must be normalized (unit length), and the spread is the angular width
+ * of the sound (0...tau).
+ */
+inline std::array<float,MaxAmbiChannels> CalcDirectionCoeffs(const float (&dir)[3],
+ const float spread)
+{
+ /* Convert from OpenAL coords to Ambisonics. */
+ return CalcAmbiCoeffs(-dir[0], dir[1], -dir[2], spread);
+}
+
+/**
+ * CalcAngleCoeffs
+ *
+ * Calculates ambisonic coefficients based on azimuth and elevation. The
+ * azimuth and elevation parameters are in radians, going right and up
+ * respectively.
+ */
+inline std::array<float,MaxAmbiChannels> CalcAngleCoeffs(const float azimuth,
+ const float elevation, const float spread)
+{
+ const float x{-std::sin(azimuth) * std::cos(elevation)};
+ const float y{ std::sin(elevation)};
+ const float z{ std::cos(azimuth) * std::cos(elevation)};
+
+ return CalcAmbiCoeffs(x, y, z, spread);
+}
+
+
+/**
+ * ComputePanGains
+ *
+ * Computes panning gains using the given channel decoder coefficients and the
+ * pre-calculated direction or angle coefficients. For B-Format sources, the
+ * coeffs are a 'slice' of a transform matrix for the input channel, used to
+ * scale and orient the sound samples.
+ */
+void ComputePanGains(const MixParams *mix, const float*RESTRICT coeffs, const float ingain,
+ const al::span<float,MAX_OUTPUT_CHANNELS> gains);
+
+
+/** Helper to set an identity/pass-through panning for ambisonic mixing (3D input). */
+template<typename T, typename I, typename F>
+auto SetAmbiPanIdentity(T iter, I count, F func) -> std::enable_if_t<std::is_integral<I>::value>
+{
+ if(count < 1) return;
+
+ std::array<float,MaxAmbiChannels> coeffs{{1.0f}};
+ func(*iter, coeffs);
+ ++iter;
+ for(I i{1};i < count;++i,++iter)
+ {
+ coeffs[i-1] = 0.0f;
+ coeffs[i ] = 1.0f;
+ func(*iter, coeffs);
+ }
+}
+
+#endif /* CORE_MIXER_H */
diff --git a/core/mixer/defs.h b/core/mixer/defs.h
index acf60350..ba304f22 100644
--- a/core/mixer/defs.h
+++ b/core/mixer/defs.h
@@ -6,6 +6,7 @@
#include "alspan.h"
#include "core/bufferline.h"
+#include "core/resampler_limits.h"
struct HrtfChannelState;
struct HrtfFilter;
@@ -19,12 +20,6 @@ constexpr int MixerFracBits{12};
constexpr int MixerFracOne{1 << MixerFracBits};
constexpr int MixerFracMask{MixerFracOne - 1};
-/* Maximum number of samples to pad on the ends of a buffer for resampling.
- * Note that the padding is symmetric (half at the beginning and half at the
- * end)!
- */
-constexpr int MaxResamplerPadding{48};
-
constexpr float GainSilenceThreshold{0.00001f}; /* -100dB */
@@ -80,7 +75,7 @@ template<typename InstTag>
void MixHrtfBlend_(const float *InSamples, float2 *AccumSamples, const uint IrSize,
const HrtfFilter *oldparams, const MixHrtfFilter *newparams, const size_t BufferSize);
template<typename InstTag>
-void MixDirectHrtf_(FloatBufferLine &LeftOut, FloatBufferLine &RightOut,
+void MixDirectHrtf_(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut,
const al::span<const FloatBufferLine> InSamples, float2 *AccumSamples,
float *TempBuf, HrtfChannelState *ChanState, const size_t IrSize, const size_t BufferSize);
diff --git a/core/mixer/hrtfbase.h b/core/mixer/hrtfbase.h
index 7419f960..79b09a3d 100644
--- a/core/mixer/hrtfbase.h
+++ b/core/mixer/hrtfbase.h
@@ -12,7 +12,7 @@
using uint = unsigned int;
using ApplyCoeffsT = void(&)(float2 *RESTRICT Values, const size_t irSize,
- const HrirArray &Coeffs, const float left, const float right);
+ const ConstHrirSpan Coeffs, const float left, const float right);
template<ApplyCoeffsT ApplyCoeffs>
inline void MixHrtfBase(const float *InSamples, float2 *RESTRICT AccumSamples, const size_t IrSize,
@@ -20,7 +20,7 @@ inline void MixHrtfBase(const float *InSamples, float2 *RESTRICT AccumSamples, c
{
ASSUME(BufferSize > 0);
- const HrirArray &Coeffs = *hrtfparams->Coeffs;
+ const ConstHrirSpan Coeffs{hrtfparams->Coeffs};
const float gainstep{hrtfparams->GainStep};
const float gain{hrtfparams->Gain};
@@ -45,9 +45,9 @@ inline void MixHrtfBlendBase(const float *InSamples, float2 *RESTRICT AccumSampl
{
ASSUME(BufferSize > 0);
- const auto &OldCoeffs = oldparams->Coeffs;
+ const ConstHrirSpan OldCoeffs{oldparams->Coeffs};
const float oldGainStep{oldparams->Gain / static_cast<float>(BufferSize)};
- const auto &NewCoeffs = *newparams->Coeffs;
+ const ConstHrirSpan NewCoeffs{newparams->Coeffs};
const float newGainStep{newparams->GainStep};
if LIKELY(oldparams->Gain > GainSilenceThreshold)
@@ -84,7 +84,7 @@ inline void MixHrtfBlendBase(const float *InSamples, float2 *RESTRICT AccumSampl
}
template<ApplyCoeffsT ApplyCoeffs>
-inline void MixDirectHrtfBase(FloatBufferLine &LeftOut, FloatBufferLine &RightOut,
+inline void MixDirectHrtfBase(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut,
const al::span<const FloatBufferLine> InSamples, float2 *RESTRICT AccumSamples,
float *TempBuf, HrtfChannelState *ChanState, const size_t IrSize, const size_t BufferSize)
{
@@ -133,7 +133,7 @@ inline void MixDirectHrtfBase(FloatBufferLine &LeftOut, FloatBufferLine &RightOu
ChanState->mSplitter.processHfScale(tempbuf, ChanState->mHfScale);
/* Now apply the HRIR coefficients to this channel. */
- const auto &Coeffs = ChanState->mCoeffs;
+ const ConstHrirSpan Coeffs{ChanState->mCoeffs};
for(size_t i{0u};i < BufferSize;++i)
{
const float insample{tempbuf[i]};
diff --git a/core/mixer/hrtfdefs.h b/core/mixer/hrtfdefs.h
index 89a9bb8d..7046a31e 100644
--- a/core/mixer/hrtfdefs.h
+++ b/core/mixer/hrtfdefs.h
@@ -3,6 +3,7 @@
#include <array>
+#include "alspan.h"
#include "core/ambidefs.h"
#include "core/bufferline.h"
#include "core/filters/splitter.h"
@@ -28,9 +29,11 @@ constexpr uint MinIrLength{8};
constexpr uint HrtfDirectDelay{256};
using HrirArray = std::array<float2,HrirLength>;
+using HrirSpan = al::span<float2,HrirLength>;
+using ConstHrirSpan = al::span<const float2,HrirLength>;
struct MixHrtfFilter {
- const HrirArray *Coeffs;
+ const ConstHrirSpan Coeffs;
uint2 Delay;
float Gain;
float GainStep;
diff --git a/core/mixer/mixer_c.cpp b/core/mixer/mixer_c.cpp
index ff9538a4..f82f7dd1 100644
--- a/core/mixer/mixer_c.cpp
+++ b/core/mixer/mixer_c.cpp
@@ -32,15 +32,16 @@ inline float do_cubic(const InterpState&, const float *RESTRICT vals, const uint
inline float do_bsinc(const InterpState &istate, const float *RESTRICT vals, const uint frac)
{
const size_t m{istate.bsinc.m};
+ ASSUME(m > 0);
// Calculate the phase index and factor.
const uint pi{frac >> FracPhaseBitDiff};
const float pf{static_cast<float>(frac & (FracPhaseDiffOne-1)) * (1.0f/FracPhaseDiffOne)};
- const float *fil{istate.bsinc.filter + m*pi*4};
- const float *phd{fil + m};
- const float *scd{phd + m};
- const float *spd{scd + m};
+ const float *RESTRICT fil{istate.bsinc.filter + m*pi*2};
+ const float *RESTRICT phd{fil + m};
+ const float *RESTRICT scd{fil + BSincPhaseCount*2*m};
+ const float *RESTRICT spd{scd + m};
// Apply the scale and phase interpolated filter.
float r{0.0f};
@@ -51,13 +52,14 @@ inline float do_bsinc(const InterpState &istate, const float *RESTRICT vals, con
inline float do_fastbsinc(const InterpState &istate, const float *RESTRICT vals, const uint frac)
{
const size_t m{istate.bsinc.m};
+ ASSUME(m > 0);
// Calculate the phase index and factor.
const uint pi{frac >> FracPhaseBitDiff};
const float pf{static_cast<float>(frac & (FracPhaseDiffOne-1)) * (1.0f/FracPhaseDiffOne)};
- const float *fil{istate.bsinc.filter + m*pi*4};
- const float *phd{fil + m};
+ const float *RESTRICT fil{istate.bsinc.filter + m*pi*2};
+ const float *RESTRICT phd{fil + m};
// Apply the phase interpolated filter.
float r{0.0f};
@@ -83,7 +85,7 @@ float *DoResample(const InterpState *state, float *RESTRICT src, uint frac, uint
return dst.data();
}
-inline void ApplyCoeffs(float2 *RESTRICT Values, const size_t IrSize, const HrirArray &Coeffs,
+inline void ApplyCoeffs(float2 *RESTRICT Values, const size_t IrSize, const ConstHrirSpan Coeffs,
const float left, const float right)
{
ASSUME(IrSize >= MinIrLength);
@@ -149,7 +151,7 @@ void MixHrtfBlend_<CTag>(const float *InSamples, float2 *AccumSamples, const uin
}
template<>
-void MixDirectHrtf_<CTag>(FloatBufferLine &LeftOut, FloatBufferLine &RightOut,
+void MixDirectHrtf_<CTag>(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut,
const al::span<const FloatBufferLine> InSamples, float2 *AccumSamples,
float *TempBuf, HrtfChannelState *ChanState, const size_t IrSize, const size_t BufferSize)
{
diff --git a/core/mixer/mixer_neon.cpp b/core/mixer/mixer_neon.cpp
index f3e5f130..a3afdc6b 100644
--- a/core/mixer/mixer_neon.cpp
+++ b/core/mixer/mixer_neon.cpp
@@ -34,7 +34,7 @@ inline float32x4_t set_f4(float l0, float l1, float l2, float l3)
constexpr uint FracPhaseBitDiff{MixerFracBits - BSincPhaseBits};
constexpr uint FracPhaseDiffOne{1 << FracPhaseBitDiff};
-inline void ApplyCoeffs(float2 *RESTRICT Values, const size_t IrSize, const HrirArray &Coeffs,
+inline void ApplyCoeffs(float2 *RESTRICT Values, const size_t IrSize, const ConstHrirSpan Coeffs,
const float left, const float right)
{
float32x4_t leftright4;
@@ -118,6 +118,7 @@ float *Resample_<BSincTag,NEONTag>(const InterpState *state, float *RESTRICT src
const float *const filter{state->bsinc.filter};
const float32x4_t sf4{vdupq_n_f32(state->bsinc.sf)};
const size_t m{state->bsinc.m};
+ ASSUME(m > 0);
src -= state->bsinc.l;
for(float &out_sample : dst)
@@ -130,10 +131,10 @@ float *Resample_<BSincTag,NEONTag>(const InterpState *state, float *RESTRICT src
float32x4_t r4{vdupq_n_f32(0.0f)};
{
const float32x4_t pf4{vdupq_n_f32(pf)};
- const float *fil{filter + m*pi*4};
- const float *phd{fil + m};
- const float *scd{phd + m};
- const float *spd{scd + m};
+ const float *RESTRICT fil{filter + m*pi*2};
+ const float *RESTRICT phd{fil + m};
+ const float *RESTRICT scd{fil + BSincPhaseCount*2*m};
+ const float *RESTRICT spd{scd + m};
size_t td{m >> 2};
size_t j{0u};
@@ -163,6 +164,7 @@ float *Resample_<FastBSincTag,NEONTag>(const InterpState *state, float *RESTRICT
{
const float *const filter{state->bsinc.filter};
const size_t m{state->bsinc.m};
+ ASSUME(m > 0);
src -= state->bsinc.l;
for(float &out_sample : dst)
@@ -175,8 +177,8 @@ float *Resample_<FastBSincTag,NEONTag>(const InterpState *state, float *RESTRICT
float32x4_t r4{vdupq_n_f32(0.0f)};
{
const float32x4_t pf4{vdupq_n_f32(pf)};
- const float *fil{filter + m*pi*4};
- const float *phd{fil + m};
+ const float *RESTRICT fil{filter + m*pi*2};
+ const float *RESTRICT phd{fil + m};
size_t td{m >> 2};
size_t j{0u};
@@ -213,7 +215,7 @@ void MixHrtfBlend_<NEONTag>(const float *InSamples, float2 *AccumSamples, const
}
template<>
-void MixDirectHrtf_<NEONTag>(FloatBufferLine &LeftOut, FloatBufferLine &RightOut,
+void MixDirectHrtf_<NEONTag>(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut,
const al::span<const FloatBufferLine> InSamples, float2 *AccumSamples,
float *TempBuf, HrtfChannelState *ChanState, const size_t IrSize, const size_t BufferSize)
{
diff --git a/core/mixer/mixer_sse.cpp b/core/mixer/mixer_sse.cpp
index c0fd8fa1..3cfb00a5 100644
--- a/core/mixer/mixer_sse.cpp
+++ b/core/mixer/mixer_sse.cpp
@@ -26,7 +26,7 @@ constexpr uint FracPhaseDiffOne{1 << FracPhaseBitDiff};
#define MLA4(x, y, z) _mm_add_ps(x, _mm_mul_ps(y, z))
-inline void ApplyCoeffs(float2 *RESTRICT Values, const size_t IrSize, const HrirArray &Coeffs,
+inline void ApplyCoeffs(float2 *RESTRICT Values, const size_t IrSize, const ConstHrirSpan Coeffs,
const float left, const float right)
{
const __m128 lrlr{_mm_setr_ps(left, right, left, right)};
@@ -82,6 +82,7 @@ float *Resample_<BSincTag,SSETag>(const InterpState *state, float *RESTRICT src,
const float *const filter{state->bsinc.filter};
const __m128 sf4{_mm_set1_ps(state->bsinc.sf)};
const size_t m{state->bsinc.m};
+ ASSUME(m > 0);
src -= state->bsinc.l;
for(float &out_sample : dst)
@@ -94,10 +95,10 @@ float *Resample_<BSincTag,SSETag>(const InterpState *state, float *RESTRICT src,
__m128 r4{_mm_setzero_ps()};
{
const __m128 pf4{_mm_set1_ps(pf)};
- const float *fil{filter + m*pi*4};
- const float *phd{fil + m};
- const float *scd{phd + m};
- const float *spd{scd + m};
+ const float *RESTRICT fil{filter + m*pi*2};
+ const float *RESTRICT phd{fil + m};
+ const float *RESTRICT scd{fil + BSincPhaseCount*2*m};
+ const float *RESTRICT spd{scd + m};
size_t td{m >> 2};
size_t j{0u};
@@ -128,6 +129,7 @@ float *Resample_<FastBSincTag,SSETag>(const InterpState *state, float *RESTRICT
{
const float *const filter{state->bsinc.filter};
const size_t m{state->bsinc.m};
+ ASSUME(m > 0);
src -= state->bsinc.l;
for(float &out_sample : dst)
@@ -140,8 +142,8 @@ float *Resample_<FastBSincTag,SSETag>(const InterpState *state, float *RESTRICT
__m128 r4{_mm_setzero_ps()};
{
const __m128 pf4{_mm_set1_ps(pf)};
- const float *fil{filter + m*pi*4};
- const float *phd{fil + m};
+ const float *RESTRICT fil{filter + m*pi*2};
+ const float *RESTRICT phd{fil + m};
size_t td{m >> 2};
size_t j{0u};
@@ -179,7 +181,7 @@ void MixHrtfBlend_<SSETag>(const float *InSamples, float2 *AccumSamples, const u
}
template<>
-void MixDirectHrtf_<SSETag>(FloatBufferLine &LeftOut, FloatBufferLine &RightOut,
+void MixDirectHrtf_<SSETag>(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut,
const al::span<const FloatBufferLine> InSamples, float2 *AccumSamples,
float *TempBuf, HrtfChannelState *ChanState, const size_t IrSize, const size_t BufferSize)
{
diff --git a/core/mixer/mixer_sse2.cpp b/core/mixer/mixer_sse2.cpp
index f91d5dcd..99d04210 100644
--- a/core/mixer/mixer_sse2.cpp
+++ b/core/mixer/mixer_sse2.cpp
@@ -52,10 +52,10 @@ float *Resample_<LerpTag,SSE2Tag>(const InterpState*, float *RESTRICT src, uint
auto dst_iter = dst.begin();
for(size_t todo{dst.size()>>2};todo;--todo)
{
- const int pos0{_mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(0, 0, 0, 0)))};
- const int pos1{_mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(1, 1, 1, 1)))};
- const int pos2{_mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(2, 2, 2, 2)))};
- const int pos3{_mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(3, 3, 3, 3)))};
+ const int pos0{_mm_cvtsi128_si32(pos4)};
+ const int pos1{_mm_cvtsi128_si32(_mm_srli_si128(pos4, 4))};
+ const int pos2{_mm_cvtsi128_si32(_mm_srli_si128(pos4, 8))};
+ const int pos3{_mm_cvtsi128_si32(_mm_srli_si128(pos4, 12))};
const __m128 val1{_mm_setr_ps(src[pos0 ], src[pos1 ], src[pos2 ], src[pos3 ])};
const __m128 val2{_mm_setr_ps(src[pos0+1], src[pos1+1], src[pos2+1], src[pos3+1])};
diff --git a/core/resampler_limits.h b/core/resampler_limits.h
new file mode 100644
index 00000000..9d4cefda
--- /dev/null
+++ b/core/resampler_limits.h
@@ -0,0 +1,12 @@
+#ifndef CORE_RESAMPLER_LIMITS_H
+#define CORE_RESAMPLER_LIMITS_H
+
+/* Maximum number of samples to pad on the ends of a buffer for resampling.
+ * Note that the padding is symmetric (half at the beginning and half at the
+ * end)!
+ */
+constexpr int MaxResamplerPadding{48};
+
+constexpr int MaxResamplerEdge{MaxResamplerPadding >> 1};
+
+#endif /* CORE_RESAMPLER_LIMITS_H */
diff --git a/core/rtkit.cpp b/core/rtkit.cpp
new file mode 100644
index 00000000..8b489e71
--- /dev/null
+++ b/core/rtkit.cpp
@@ -0,0 +1,240 @@
+/*-*- Mode: C; c-basic-offset: 8 -*-*/
+
+/***
+ Copyright 2009 Lennart Poettering
+ Copyright 2010 David Henningsson <[email protected]>
+ Copyright 2021 Chris Robinson
+
+ Permission is hereby granted, free of charge, to any person
+ obtaining a copy of this software and associated documentation files
+ (the "Software"), to deal in the Software without restriction,
+ including without limitation the rights to use, copy, modify, merge,
+ publish, distribute, sublicense, and/or sell copies of the Software,
+ and to permit persons to whom the Software is furnished to do so,
+ subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+***/
+
+#include "config.h"
+
+#include "rtkit.h"
+
+#include <errno.h>
+
+#ifdef __linux__
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <memory>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+
+
+namespace dbus {
+ constexpr int TypeString{'s'};
+ constexpr int TypeVariant{'v'};
+ constexpr int TypeInt32{'i'};
+ constexpr int TypeUInt32{'u'};
+ constexpr int TypeInt64{'x'};
+ constexpr int TypeUInt64{'t'};
+ constexpr int TypeInvalid{'\0'};
+
+ struct MessageDeleter {
+ void operator()(DBusMessage *m) { (*pdbus_message_unref)(m); }
+ };
+ using MessagePtr = std::unique_ptr<DBusMessage,MessageDeleter>;
+} // namespace dbus
+
+namespace {
+
+inline pid_t _gettid()
+{ return static_cast<pid_t>(syscall(SYS_gettid)); }
+
+int translate_error(const char *name)
+{
+ if(strcmp(name, DBUS_ERROR_NO_MEMORY) == 0)
+ return -ENOMEM;
+ if(strcmp(name, DBUS_ERROR_SERVICE_UNKNOWN) == 0
+ || strcmp(name, DBUS_ERROR_NAME_HAS_NO_OWNER) == 0)
+ return -ENOENT;
+ if(strcmp(name, DBUS_ERROR_ACCESS_DENIED) == 0
+ || strcmp(name, DBUS_ERROR_AUTH_FAILED) == 0)
+ return -EACCES;
+ return -EIO;
+}
+
+int rtkit_get_int_property(DBusConnection *connection, const char *propname, long long *propval)
+{
+ dbus::MessagePtr m{(*pdbus_message_new_method_call)(RTKIT_SERVICE_NAME, RTKIT_OBJECT_PATH,
+ "org.freedesktop.DBus.Properties", "Get")};
+ if(!m) return -ENOMEM;
+
+ const char *interfacestr = RTKIT_SERVICE_NAME;
+ auto ready = (*pdbus_message_append_args)(m.get(),
+ dbus::TypeString, &interfacestr,
+ dbus::TypeString, &propname,
+ dbus::TypeInvalid);
+ if(!ready) return -ENOMEM;
+
+ dbus::Error error;
+ dbus::MessagePtr r{(*pdbus_connection_send_with_reply_and_block)(connection, m.get(), -1,
+ &error.get())};
+ if(!r) return translate_error(error->name);
+
+ if((*pdbus_set_error_from_message)(&error.get(), r.get()))
+ return translate_error(error->name);
+
+ int ret{-EBADMSG};
+ DBusMessageIter iter{};
+ (*pdbus_message_iter_init)(r.get(), &iter);
+ while(int curtype{(*pdbus_message_iter_get_arg_type)(&iter)})
+ {
+ if(curtype == dbus::TypeVariant)
+ {
+ DBusMessageIter subiter{};
+ (*pdbus_message_iter_recurse)(&iter, &subiter);
+
+ while((curtype=(*pdbus_message_iter_get_arg_type)(&subiter)) != dbus::TypeInvalid)
+ {
+ if(curtype == dbus::TypeInt32)
+ {
+ dbus_int32_t i32{};
+ (*pdbus_message_iter_get_basic)(&subiter, &i32);
+ *propval = i32;
+ ret = 0;
+ }
+
+ if(curtype == dbus::TypeInt64)
+ {
+ dbus_int64_t i64{};
+ (*pdbus_message_iter_get_basic)(&subiter, &i64);
+ *propval = i64;
+ ret = 0;
+ }
+
+ (*pdbus_message_iter_next)(&subiter);
+ }
+ }
+ (*pdbus_message_iter_next)(&iter);
+ }
+
+ return ret;
+}
+
+} // namespace
+
+extern "C" {
+int rtkit_get_max_realtime_priority(DBusConnection *connection)
+{
+ long long retval{};
+ int err{rtkit_get_int_property(connection, "MaxRealtimePriority", &retval)};
+ return err < 0 ? err : static_cast<int>(retval);
+}
+
+int rtkit_get_min_nice_level(DBusConnection *connection, int *min_nice_level)
+{
+ long long retval{};
+ int err{rtkit_get_int_property(connection, "MinNiceLevel", &retval)};
+ if(err >= 0) *min_nice_level = static_cast<int>(retval);
+ return err;
+}
+
+long long rtkit_get_rttime_usec_max(DBusConnection *connection)
+{
+ long long retval{};
+ int err{rtkit_get_int_property(connection, "RTTimeUSecMax", &retval)};
+ return err < 0 ? err : retval;
+}
+
+int rtkit_make_realtime(DBusConnection *connection, pid_t thread, int priority)
+{
+ if(thread == 0)
+ thread = _gettid();
+
+ dbus::MessagePtr m{(*pdbus_message_new_method_call)(RTKIT_SERVICE_NAME, RTKIT_OBJECT_PATH,
+ "org.freedesktop.RealtimeKit1", "MakeThreadRealtime")};
+ if(!m) return -ENOMEM;
+
+ auto u64 = static_cast<dbus_uint64_t>(thread);
+ auto u32 = static_cast<dbus_uint32_t>(priority);
+ auto ready = (*pdbus_message_append_args)(m.get(),
+ dbus::TypeUInt64, &u64,
+ dbus::TypeUInt32, &u32,
+ dbus::TypeInvalid);
+ if(!ready) return -ENOMEM;
+
+ dbus::Error error;
+ dbus::MessagePtr r{(*pdbus_connection_send_with_reply_and_block)(connection, m.get(), -1,
+ &error.get())};
+ if(!r) return translate_error(error->name);
+
+ if((*pdbus_set_error_from_message)(&error.get(), r.get()))
+ return translate_error(error->name);
+
+ return 0;
+}
+
+int rtkit_make_high_priority(DBusConnection *connection, pid_t thread, int nice_level)
+{
+ if(thread == 0)
+ thread = _gettid();
+
+ dbus::MessagePtr m{(*pdbus_message_new_method_call)(RTKIT_SERVICE_NAME, RTKIT_OBJECT_PATH,
+ "org.freedesktop.RealtimeKit1", "MakeThreadHighPriority")};
+ if(!m) return -ENOMEM;
+
+ auto u64 = static_cast<dbus_uint64_t>(thread);
+ auto s32 = static_cast<dbus_int32_t>(nice_level);
+ auto ready = (*pdbus_message_append_args)(m.get(),
+ dbus::TypeUInt64, &u64,
+ dbus::TypeInt32, &s32,
+ dbus::TypeInvalid);
+ if(!ready) return -ENOMEM;
+
+ dbus::Error error;
+ dbus::MessagePtr r{(*pdbus_connection_send_with_reply_and_block)(connection, m.get(), -1,
+ &error.get())};
+ if(!r) return translate_error(error->name);
+
+ if((*pdbus_set_error_from_message)(&error.get(), r.get()))
+ return translate_error(error->name);
+
+ return 0;
+}
+} // extern "C"
+
+#else
+
+extern "C" {
+int rtkit_make_realtime(DBusConnection *connection, pid_t thread, int priority)
+{ return -ENOTSUP; }
+
+int rtkit_make_high_priority(DBusConnection *connection, pid_t thread, int nice_level)
+{ return -ENOTSUP; }
+
+int rtkit_get_max_realtime_priority(DBusConnection *connection)
+{ return -ENOTSUP; }
+
+int rtkit_get_min_nice_level(DBusConnection *connection, int *min_nice_level)
+{ return -ENOTSUP; }
+
+long long rtkit_get_rttime_usec_max(DBusConnection *connection)
+{ return -ENOTSUP; }
+} // extern "C"
+
+#endif
diff --git a/core/rtkit.h b/core/rtkit.h
new file mode 100644
index 00000000..96e81d4a
--- /dev/null
+++ b/core/rtkit.h
@@ -0,0 +1,80 @@
+/*-*- Mode: C; c-basic-offset: 8 -*-*/
+
+#ifndef foortkithfoo
+#define foortkithfoo
+
+/***
+ Copyright 2009 Lennart Poettering
+ Copyright 2010 David Henningsson <[email protected]>
+
+ Permission is hereby granted, free of charge, to any person
+ obtaining a copy of this software and associated documentation files
+ (the "Software"), to deal in the Software without restriction,
+ including without limitation the rights to use, copy, modify, merge,
+ publish, distribute, sublicense, and/or sell copies of the Software,
+ and to permit persons to whom the Software is furnished to do so,
+ subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+***/
+
+#include <sys/types.h>
+
+#include "dbus_wrap.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* This is the reference implementation for a client for
+ * RealtimeKit. You don't have to use this, but if do, just copy these
+ * sources into your repository */
+
+#define RTKIT_SERVICE_NAME "org.freedesktop.RealtimeKit1"
+#define RTKIT_OBJECT_PATH "/org/freedesktop/RealtimeKit1"
+
+/* This is mostly equivalent to sched_setparam(thread, SCHED_RR, {
+ * .sched_priority = priority }). 'thread' needs to be a kernel thread
+ * id as returned by gettid(), not a pthread_t! If 'thread' is 0 the
+ * current thread is used. The returned value is a negative errno
+ * style error code, or 0 on success. */
+int rtkit_make_realtime(DBusConnection *system_bus, pid_t thread, int priority);
+
+/* This is mostly equivalent to setpriority(PRIO_PROCESS, thread,
+ * nice_level). 'thread' needs to be a kernel thread id as returned by
+ * gettid(), not a pthread_t! If 'thread' is 0 the current thread is
+ * used. The returned value is a negative errno style error code, or 0
+ * on success.*/
+int rtkit_make_high_priority(DBusConnection *system_bus, pid_t thread, int nice_level);
+
+/* Return the maximum value of realtime priority available. Realtime requests
+ * above this value will fail. A negative value is an errno style error code.
+ */
+int rtkit_get_max_realtime_priority(DBusConnection *system_bus);
+
+/* Retreive the minimum value of nice level available. High prio requests
+ * below this value will fail. The returned value is a negative errno
+ * style error code, or 0 on success.*/
+int rtkit_get_min_nice_level(DBusConnection *system_bus, int *min_nice_level);
+
+/* Return the maximum value of RLIMIT_RTTIME to set before attempting a
+ * realtime request. A negative value is an errno style error code.
+ */
+long long rtkit_get_rttime_usec_max(DBusConnection *system_bus);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/core/uhjfilter.cpp b/core/uhjfilter.cpp
index 92f35901..f1af4b94 100644
--- a/core/uhjfilter.cpp
+++ b/core/uhjfilter.cpp
@@ -3,227 +3,49 @@
#include "uhjfilter.h"
-#ifdef HAVE_SSE_INTRINSICS
-#include <xmmintrin.h>
-#elif defined(HAVE_NEON)
-#include <arm_neon.h>
-#endif
-
#include <algorithm>
#include <iterator>
#include "alcomplex.h"
#include "alnumeric.h"
#include "opthelpers.h"
+#include "phase_shifter.h"
namespace {
-using complex_d = std::complex<double>;
-
-struct PhaseShifterT {
- alignas(16) std::array<float,Uhj2Encoder::sFilterSize> Coeffs;
-
- /* Some notes on this filter construction.
- *
- * A wide-band phase-shift filter needs a delay to maintain linearity. A
- * dirac impulse in the center of a time-domain buffer represents a filter
- * passing all frequencies through as-is with a pure delay. Converting that
- * to the frequency domain, adjusting the phase of each frequency bin by
- * +90 degrees, then converting back to the time domain, results in a FIR
- * filter that applies a +90 degree wide-band phase-shift.
- *
- * A particularly notable aspect of the time-domain filter response is that
- * every other coefficient is 0. This allows doubling the effective size of
- * the filter, by storing only the non-0 coefficients and double-stepping
- * over the input to apply it.
- *
- * Additionally, the resulting filter is independent of the sample rate.
- * The same filter can be applied regardless of the device's sample rate
- * and achieve the same effect.
- */
- PhaseShifterT()
- {
- constexpr size_t fft_size{Uhj2Encoder::sFilterSize * 2};
- constexpr size_t half_size{fft_size / 2};
-
- /* Generate a frequency domain impulse with a +90 degree phase offset.
- * Reconstruct the mirrored frequencies to convert to the time domain.
- */
- auto fftBuffer = std::make_unique<complex_d[]>(fft_size);
- std::fill_n(fftBuffer.get(), fft_size, complex_d{});
- fftBuffer[half_size] = 1.0;
-
- forward_fft({fftBuffer.get(), fft_size});
- for(size_t i{0};i < half_size+1;++i)
- fftBuffer[i] = complex_d{-fftBuffer[i].imag(), fftBuffer[i].real()};
- for(size_t i{half_size+1};i < fft_size;++i)
- fftBuffer[i] = std::conj(fftBuffer[fft_size - i]);
- inverse_fft({fftBuffer.get(), fft_size});
-
- /* Reverse the filter for simpler processing, and store only the non-0
- * coefficients.
- */
- auto fftiter = fftBuffer.get() + half_size + (Uhj2Encoder::sFilterSize-1);
- for(float &coeff : Coeffs)
- {
- coeff = static_cast<float>(fftiter->real() / double{fft_size});
- fftiter -= 2;
- }
- }
-};
-const PhaseShifterT PShift{};
-
-void allpass_process(al::span<float> dst, const float *RESTRICT src)
-{
-#ifdef HAVE_SSE_INTRINSICS
- size_t pos{0};
- if(size_t todo{dst.size()>>1})
- {
- do {
- __m128 r04{_mm_setzero_ps()};
- __m128 r14{_mm_setzero_ps()};
- for(size_t j{0};j < PShift.Coeffs.size();j+=4)
- {
- const __m128 coeffs{_mm_load_ps(&PShift.Coeffs[j])};
- const __m128 s0{_mm_loadu_ps(&src[j*2])};
- const __m128 s1{_mm_loadu_ps(&src[j*2 + 4])};
-
- __m128 s{_mm_shuffle_ps(s0, s1, _MM_SHUFFLE(2, 0, 2, 0))};
- r04 = _mm_add_ps(r04, _mm_mul_ps(s, coeffs));
-
- s = _mm_shuffle_ps(s0, s1, _MM_SHUFFLE(3, 1, 3, 1));
- r14 = _mm_add_ps(r14, _mm_mul_ps(s, coeffs));
- }
- r04 = _mm_add_ps(r04, _mm_shuffle_ps(r04, r04, _MM_SHUFFLE(0, 1, 2, 3)));
- r04 = _mm_add_ps(r04, _mm_movehl_ps(r04, r04));
- dst[pos++] += _mm_cvtss_f32(r04);
-
- r14 = _mm_add_ps(r14, _mm_shuffle_ps(r14, r14, _MM_SHUFFLE(0, 1, 2, 3)));
- r14 = _mm_add_ps(r14, _mm_movehl_ps(r14, r14));
- dst[pos++] += _mm_cvtss_f32(r14);
-
- src += 2;
- } while(--todo);
- }
- if((dst.size()&1))
- {
- __m128 r4{_mm_setzero_ps()};
- for(size_t j{0};j < PShift.Coeffs.size();j+=4)
- {
- const __m128 coeffs{_mm_load_ps(&PShift.Coeffs[j])};
- /* NOTE: This could alternatively be done with two unaligned loads
- * and a shuffle. Which would be better?
- */
- const __m128 s{_mm_setr_ps(src[j*2], src[j*2 + 2], src[j*2 + 4], src[j*2 + 6])};
- r4 = _mm_add_ps(r4, _mm_mul_ps(s, coeffs));
- }
- r4 = _mm_add_ps(r4, _mm_shuffle_ps(r4, r4, _MM_SHUFFLE(0, 1, 2, 3)));
- r4 = _mm_add_ps(r4, _mm_movehl_ps(r4, r4));
-
- dst[pos] += _mm_cvtss_f32(r4);
- }
-
-#elif defined(HAVE_NEON)
+static_assert(UhjEncoder::sFilterDelay==UhjDecoder::sFilterDelay, "UHJ filter delays mismatch");
- size_t pos{0};
- if(size_t todo{dst.size()>>1})
- {
- /* There doesn't seem to be NEON intrinsics to do this kind of stipple
- * shuffling, so there's two custom methods for it.
- */
- auto shuffle_2020 = [](float32x4_t a, float32x4_t b)
- {
- float32x4_t ret{vmovq_n_f32(vgetq_lane_f32(a, 0))};
- ret = vsetq_lane_f32(vgetq_lane_f32(a, 2), ret, 1);
- ret = vsetq_lane_f32(vgetq_lane_f32(b, 0), ret, 2);
- ret = vsetq_lane_f32(vgetq_lane_f32(b, 2), ret, 3);
- return ret;
- };
- auto shuffle_3131 = [](float32x4_t a, float32x4_t b)
- {
- float32x4_t ret{vmovq_n_f32(vgetq_lane_f32(a, 1))};
- ret = vsetq_lane_f32(vgetq_lane_f32(a, 3), ret, 1);
- ret = vsetq_lane_f32(vgetq_lane_f32(b, 1), ret, 2);
- ret = vsetq_lane_f32(vgetq_lane_f32(b, 3), ret, 3);
- return ret;
- };
- do {
- float32x4_t r04{vdupq_n_f32(0.0f)};
- float32x4_t r14{vdupq_n_f32(0.0f)};
- for(size_t j{0};j < PShift.Coeffs.size();j+=4)
- {
- const float32x4_t coeffs{vld1q_f32(&PShift.Coeffs[j])};
- const float32x4_t s0{vld1q_f32(&src[j*2])};
- const float32x4_t s1{vld1q_f32(&src[j*2 + 4])};
-
- r04 = vmlaq_f32(r04, shuffle_2020(s0, s1), coeffs);
- r14 = vmlaq_f32(r14, shuffle_3131(s0, s1), coeffs);
- }
- r04 = vaddq_f32(r04, vrev64q_f32(r04));
- dst[pos++] = vget_lane_f32(vadd_f32(vget_low_f32(r04), vget_high_f32(r04)), 0);
-
- r14 = vaddq_f32(r14, vrev64q_f32(r14));
- dst[pos++] = vget_lane_f32(vadd_f32(vget_low_f32(r14), vget_high_f32(r14)), 0);
-
- src += 2;
- } while(--todo);
- }
- if((dst.size()&1))
- {
- auto load4 = [](float32_t a, float32_t b, float32_t c, float32_t d)
- {
- float32x4_t ret{vmovq_n_f32(a)};
- ret = vsetq_lane_f32(b, ret, 1);
- ret = vsetq_lane_f32(c, ret, 2);
- ret = vsetq_lane_f32(d, ret, 3);
- return ret;
- };
- float32x4_t r4{vdupq_n_f32(0.0f)};
- for(size_t j{0};j < PShift.Coeffs.size();j+=4)
- {
- const float32x4_t coeffs{vld1q_f32(&PShift.Coeffs[j])};
- const float32x4_t s{load4(src[j*2], src[j*2 + 2], src[j*2 + 4], src[j*2 + 6])};
- r4 = vmlaq_f32(r4, s, coeffs);
- }
- r4 = vaddq_f32(r4, vrev64q_f32(r4));
- dst[pos] = vget_lane_f32(vadd_f32(vget_low_f32(r4), vget_high_f32(r4)), 0);
- }
-
-#else
-
- for(float &output : dst)
- {
- float ret{0.0f};
- for(size_t j{0};j < PShift.Coeffs.size();++j)
- ret += src[j*2] * PShift.Coeffs[j];
+using complex_d = std::complex<double>;
- output += ret;
- ++src;
- }
-#endif
-}
+const PhaseShifterT<UhjEncoder::sFilterDelay*2> PShift{};
} // namespace
-/* Encoding 2-channel UHJ from B-Format is done as:
+/* Encoding UHJ from B-Format is done as:
*
* S = 0.9396926*W + 0.1855740*X
* D = j(-0.3420201*W + 0.5098604*X) + 0.6554516*Y
*
* Left = (S + D)/2.0
* Right = (S - D)/2.0
+ * T = j(-0.1432*W + 0.6511746*X) - 0.7071068*Y
+ * Q = 0.9772*Z
*
- * where j is a wide-band +90 degree phase shift.
+ * where j is a wide-band +90 degree phase shift. T is excluded from 2-channel
+ * output, and Q is excluded from 2- and 3-channel output.
*
* The phase shift is done using a FIR filter derived from an FFT'd impulse
* with the desired shift.
*/
-void Uhj2Encoder::encode(FloatBufferLine &LeftOut, FloatBufferLine &RightOut,
+void UhjEncoder::encode(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut,
const FloatBufferLine *InSamples, const size_t SamplesToDo)
{
+ /* Given FuMa input, a +3dB boost is needed for the expected levels. */
+ static constexpr float sqrt2{1.41421356237f};
+
ASSUME(SamplesToDo > 0);
float *RESTRICT left{al::assume_aligned<16>(LeftOut.data())};
@@ -233,43 +55,120 @@ void Uhj2Encoder::encode(FloatBufferLine &LeftOut, FloatBufferLine &RightOut,
const float *RESTRICT xinput{al::assume_aligned<16>(InSamples[1].data())};
const float *RESTRICT yinput{al::assume_aligned<16>(InSamples[2].data())};
- /* Combine the previously delayed mid/side signal with the input. */
+ /* Combine the previously delayed S/D signal with the input. Include any
+ * existing direct signal with it.
+ */
/* S = 0.9396926*W + 0.1855740*X */
- auto miditer = std::copy(mMidDelay.cbegin(), mMidDelay.cend(), mMid.begin());
+ auto miditer = mS.begin() + sFilterDelay;
std::transform(winput, winput+SamplesToDo, xinput, miditer,
[](const float w, const float x) noexcept -> float
- { return 0.9396926f*w + 0.1855740f*x; });
+ { return 0.9396926f*sqrt2*w + 0.1855740f*sqrt2*x; });
+ for(size_t i{0};i < SamplesToDo;++i,++miditer)
+ *miditer += left[i] + right[i];
/* D = 0.6554516*Y */
- auto sideiter = std::copy(mSideDelay.cbegin(), mSideDelay.cend(), mSide.begin());
+ auto sideiter = mD.begin() + sFilterDelay;
std::transform(yinput, yinput+SamplesToDo, sideiter,
- [](const float y) noexcept -> float { return 0.6554516f*y; });
-
- /* Include any existing direct signal in the mid/side buffers. */
- for(size_t i{0};i < SamplesToDo;++i,++miditer)
- *miditer += left[i] + right[i];
+ [](const float y) noexcept -> float { return 0.6554516f*sqrt2*y; });
for(size_t i{0};i < SamplesToDo;++i,++sideiter)
*sideiter += left[i] - right[i];
- /* Copy the future samples back to the delay buffers for next time. */
- std::copy_n(mMid.cbegin()+SamplesToDo, mMidDelay.size(), mMidDelay.begin());
- std::copy_n(mSide.cbegin()+SamplesToDo, mSideDelay.size(), mSideDelay.begin());
-
- /* Now add the all-passed signal into the side signal. */
-
/* D += j(-0.3420201*W + 0.5098604*X) */
- auto tmpiter = std::copy(mSideHistory.cbegin(), mSideHistory.cend(), mTemp.begin());
+ auto tmpiter = std::copy(mWXHistory.cbegin(), mWXHistory.cend(), mTemp.begin());
std::transform(winput, winput+SamplesToDo, xinput, tmpiter,
[](const float w, const float x) noexcept -> float
- { return -0.3420201f*w + 0.5098604f*x; });
- std::copy_n(mTemp.cbegin()+SamplesToDo, mSideHistory.size(), mSideHistory.begin());
- allpass_process({mSide.data(), SamplesToDo}, mTemp.data());
+ { return -0.3420201f*sqrt2*w + 0.5098604f*sqrt2*x; });
+ std::copy_n(mTemp.cbegin()+SamplesToDo, mWXHistory.size(), mWXHistory.begin());
+ PShift.processAccum({mD.data(), SamplesToDo}, mTemp.data());
/* Left = (S + D)/2.0 */
for(size_t i{0};i < SamplesToDo;i++)
- left[i] = (mMid[i] + mSide[i]) * 0.5f;
+ left[i] = (mS[i] + mD[i]) * 0.5f;
/* Right = (S - D)/2.0 */
for(size_t i{0};i < SamplesToDo;i++)
- right[i] = (mMid[i] - mSide[i]) * 0.5f;
+ right[i] = (mS[i] - mD[i]) * 0.5f;
+
+ /* Copy the future samples to the front for next time. */
+ std::copy(mS.cbegin()+SamplesToDo, mS.cbegin()+SamplesToDo+sFilterDelay, mS.begin());
+ std::copy(mD.cbegin()+SamplesToDo, mD.cbegin()+SamplesToDo+sFilterDelay, mD.begin());
+}
+
+
+/* Decoding UHJ is done as:
+ *
+ * S = Left + Right
+ * D = Left - Right
+ *
+ * W = 0.981530*S + 0.197484*j(0.828347*D + 0.767835*T)
+ * X = 0.418504*S - j(0.828347*D + 0.767835*T)
+ * Y = 0.795954*D - 0.676406*T + j(0.186626*S)
+ * Z = 1.023332*Q
+ *
+ * where j is a +90 degree phase shift. 3-channel UHJ excludes Q, while 2-
+ * channel excludes Q and T. The B-Format signal reconstructed from 2-channel
+ * UHJ should not be run through a normal B-Format decoder, as it needs
+ * different shelf filters.
+ */
+void UhjDecoder::decode(const al::span<BufferLine> samples, const size_t offset,
+ const size_t samplesToDo, const size_t forwardSamples)
+{
+ /* A -3dB attenuation is needed for FuMa output. */
+ static constexpr float sqrt1_2{0.707106781187f};
+
+ ASSUME(samplesToDo > 0);
+
+ {
+ const float *RESTRICT left{al::assume_aligned<16>(samples[0].data() + offset)};
+ const float *RESTRICT right{al::assume_aligned<16>(samples[1].data() + offset)};
+ const float *RESTRICT t{al::assume_aligned<16>(samples[2].data() + offset)};
+
+ /* S = Left + Right */
+ for(size_t i{0};i < samplesToDo+sFilterDelay;++i)
+ mS[i] = (left[i] + right[i]) * sqrt1_2;
+
+ /* D = Left - Right */
+ for(size_t i{0};i < samplesToDo+sFilterDelay;++i)
+ mD[i] = (left[i] - right[i]) * sqrt1_2;
+
+ /* T */
+ for(size_t i{0};i < samplesToDo+sFilterDelay;++i)
+ mT[i] = t[i] * sqrt1_2;
+ }
+
+ float *RESTRICT woutput{al::assume_aligned<16>(samples[0].data() + offset)};
+ float *RESTRICT xoutput{al::assume_aligned<16>(samples[1].data() + offset)};
+ float *RESTRICT youtput{al::assume_aligned<16>(samples[2].data() + offset)};
+
+ /* Precompute j(0.828347*D + 0.767835*T) and store in xoutput. */
+ auto tmpiter = std::copy(mDTHistory.cbegin(), mDTHistory.cend(), mTemp.begin());
+ std::transform(mD.cbegin(), mD.cbegin()+samplesToDo+sFilterDelay, mT.cbegin(), tmpiter,
+ [](const float d, const float t) noexcept { return 0.828347f*d + 0.767835f*t; });
+ std::copy_n(mTemp.cbegin()+forwardSamples, mDTHistory.size(), mDTHistory.begin());
+ PShift.process({xoutput, samplesToDo}, mTemp.data());
+
+ /* W = 0.981530*S + 0.197484*j(0.828347*D + 0.767835*T) */
+ for(size_t i{0};i < samplesToDo;++i)
+ woutput[i] = 0.981530f*mS[i] + 0.197484f*xoutput[i];
+ /* X = 0.418504*S - j(0.828347*D + 0.767835*T) */
+ for(size_t i{0};i < samplesToDo;++i)
+ xoutput[i] = 0.418504f*mS[i] - xoutput[i];
+
+ /* Precompute j*S and store in youtput. */
+ tmpiter = std::copy(mSHistory.cbegin(), mSHistory.cend(), mTemp.begin());
+ std::copy_n(mS.cbegin(), samplesToDo+sFilterDelay, tmpiter);
+ std::copy_n(mTemp.cbegin()+forwardSamples, mSHistory.size(), mSHistory.begin());
+ PShift.process({youtput, samplesToDo}, mTemp.data());
+
+ /* Y = 0.795954*D - 0.676406*T + j(0.186626*S) */
+ for(size_t i{0};i < samplesToDo;++i)
+ youtput[i] = 0.795954f*mD[i] - 0.676406f*mT[i] + 0.186626f*youtput[i];
+
+ if(samples.size() > 3)
+ {
+ float *RESTRICT zoutput{samples[3].data() + offset};
+ /* Z = 1.023332*Q */
+ for(size_t i{0};i < samplesToDo;++i)
+ zoutput[i] = 1.023332f*sqrt1_2*zoutput[i];
+ }
}
diff --git a/core/uhjfilter.h b/core/uhjfilter.h
index c2cb8722..c04913b4 100644
--- a/core/uhjfilter.h
+++ b/core/uhjfilter.h
@@ -5,35 +5,60 @@
#include "almalloc.h"
#include "bufferline.h"
+#include "resampler_limits.h"
-struct Uhj2Encoder {
- /* A particular property of the filter allows it to cover nearly twice its
- * length, so the filter size is also the effective delay (despite being
- * center-aligned).
+struct UhjEncoder {
+ /* The filter delay is half it's effective size, so a delay of 128 has a
+ * FIR length of 256.
*/
- constexpr static size_t sFilterSize{128};
+ constexpr static size_t sFilterDelay{128};
- /* Delays for the unfiltered signal. */
- alignas(16) std::array<float,sFilterSize> mMidDelay{};
- alignas(16) std::array<float,sFilterSize> mSideDelay{};
-
- alignas(16) std::array<float,BufferLineSize+sFilterSize> mMid{};
- alignas(16) std::array<float,BufferLineSize+sFilterSize> mSide{};
+ /* Delays and processing storage for the unfiltered signal. */
+ alignas(16) std::array<float,BufferLineSize+sFilterDelay> mS{};
+ alignas(16) std::array<float,BufferLineSize+sFilterDelay> mD{};
/* History for the FIR filter. */
- alignas(16) std::array<float,sFilterSize*2 - 1> mSideHistory{};
+ alignas(16) std::array<float,sFilterDelay*2 - 1> mWXHistory{};
- alignas(16) std::array<float,BufferLineSize + sFilterSize*2> mTemp{};
+ alignas(16) std::array<float,BufferLineSize + sFilterDelay*2> mTemp{};
/**
* Encodes a 2-channel UHJ (stereo-compatible) signal from a B-Format input
* signal. The input must use FuMa channel ordering and scaling.
*/
- void encode(FloatBufferLine &LeftOut, FloatBufferLine &RightOut,
+ void encode(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut,
const FloatBufferLine *InSamples, const size_t SamplesToDo);
- DEF_NEWDEL(Uhj2Encoder)
+ DEF_NEWDEL(UhjEncoder)
+};
+
+
+struct UhjDecoder {
+ constexpr static size_t sFilterDelay{128};
+
+ constexpr static size_t sLineSize{BufferLineSize+MaxResamplerPadding+sFilterDelay};
+ using BufferLine = std::array<float,sLineSize>;
+
+ alignas(16) std::array<float,BufferLineSize+MaxResamplerEdge+sFilterDelay> mS{};
+ alignas(16) std::array<float,BufferLineSize+MaxResamplerEdge+sFilterDelay> mD{};
+ alignas(16) std::array<float,BufferLineSize+MaxResamplerEdge+sFilterDelay> mT{};
+
+ alignas(16) std::array<float,sFilterDelay-1> mDTHistory{};
+ alignas(16) std::array<float,sFilterDelay-1> mSHistory{};
+
+ alignas(16) std::array<float,BufferLineSize+MaxResamplerEdge + sFilterDelay*2> mTemp{};
+
+ /**
+ * Decodes a 3- or 4-channel UHJ signal into a B-Format signal with FuMa
+ * channel ordering and scaling. For 3-channel, the 3rd channel may be
+ * attenuated by 'n', where 0 <= n <= 1. So 2-channel UHJ can be decoded by
+ * leaving the 3rd channel input silent (n=0).
+ */
+ void decode(const al::span<BufferLine> samples, const size_t offset, const size_t samplesToDo,
+ const size_t forwardSamples);
+
+ DEF_NEWDEL(UhjDecoder)
};
#endif /* CORE_UHJFILTER_H */
diff --git a/core/uiddefs.cpp b/core/uiddefs.cpp
new file mode 100644
index 00000000..244c01a5
--- /dev/null
+++ b/core/uiddefs.cpp
@@ -0,0 +1,37 @@
+
+#include "config.h"
+
+
+#ifndef AL_NO_UID_DEFS
+
+#if defined(HAVE_GUIDDEF_H) || defined(HAVE_INITGUID_H)
+#define INITGUID
+#include <windows.h>
+#ifdef HAVE_GUIDDEF_H
+#include <guiddef.h>
+#else
+#include <initguid.h>
+#endif
+
+DEFINE_GUID(KSDATAFORMAT_SUBTYPE_PCM, 0x00000001, 0x0000, 0x0010, 0x80,0x00, 0x00,0xaa,0x00,0x38,0x9b,0x71);
+DEFINE_GUID(KSDATAFORMAT_SUBTYPE_IEEE_FLOAT, 0x00000003, 0x0000, 0x0010, 0x80,0x00, 0x00,0xaa,0x00,0x38,0x9b,0x71);
+
+DEFINE_GUID(IID_IDirectSoundNotify, 0xb0210783, 0x89cd, 0x11d0, 0xaf,0x08, 0x00,0xa0,0xc9,0x25,0xcd,0x16);
+
+DEFINE_GUID(CLSID_MMDeviceEnumerator, 0xbcde0395, 0xe52f, 0x467c, 0x8e,0x3d, 0xc4,0x57,0x92,0x91,0x69,0x2e);
+DEFINE_GUID(IID_IMMDeviceEnumerator, 0xa95664d2, 0x9614, 0x4f35, 0xa7,0x46, 0xde,0x8d,0xb6,0x36,0x17,0xe6);
+DEFINE_GUID(IID_IAudioClient, 0x1cb9ad4c, 0xdbfa, 0x4c32, 0xb1,0x78, 0xc2,0xf5,0x68,0xa7,0x03,0xb2);
+DEFINE_GUID(IID_IAudioRenderClient, 0xf294acfc, 0x3146, 0x4483, 0xa7,0xbf, 0xad,0xdc,0xa7,0xc2,0x60,0xe2);
+DEFINE_GUID(IID_IAudioCaptureClient, 0xc8adbd64, 0xe71e, 0x48a0, 0xa4,0xde, 0x18,0x5c,0x39,0x5c,0xd3,0x17);
+
+#ifdef HAVE_WASAPI
+#include <wtypes.h>
+#include <devpropdef.h>
+#include <propkeydef.h>
+DEFINE_DEVPROPKEY(DEVPKEY_Device_FriendlyName, 0xa45c254e, 0xdf1c, 0x4efd, 0x80,0x20, 0x67,0xd1,0x46,0xa8,0x50,0xe0, 14);
+DEFINE_PROPERTYKEY(PKEY_AudioEndpoint_FormFactor, 0x1da5d803, 0xd492, 0x4edd, 0x8c,0x23, 0xe0,0xc0,0xff,0xee,0x7f,0x0e, 0);
+DEFINE_PROPERTYKEY(PKEY_AudioEndpoint_GUID, 0x1da5d803, 0xd492, 0x4edd, 0x8c, 0x23,0xe0, 0xc0,0xff,0xee,0x7f,0x0e, 4 );
+#endif
+#endif
+
+#endif /* AL_NO_UID_DEFS */
diff --git a/core/voice.cpp b/core/voice.cpp
new file mode 100644
index 00000000..c764a277
--- /dev/null
+++ b/core/voice.cpp
@@ -0,0 +1,849 @@
+
+#include "config.h"
+
+#include "voice.h"
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <memory>
+#include <new>
+#include <stdlib.h>
+#include <utility>
+#include <vector>
+
+#include "albyte.h"
+#include "alnumeric.h"
+#include "aloptional.h"
+#include "alspan.h"
+#include "alstring.h"
+#include "ambidefs.h"
+#include "async_event.h"
+#include "buffer_storage.h"
+#include "context.h"
+#include "cpu_caps.h"
+#include "devformat.h"
+#include "device.h"
+#include "filters/biquad.h"
+#include "filters/nfc.h"
+#include "filters/splitter.h"
+#include "fmt_traits.h"
+#include "logging.h"
+#include "mixer.h"
+#include "mixer/defs.h"
+#include "mixer/hrtfdefs.h"
+#include "opthelpers.h"
+#include "resampler_limits.h"
+#include "ringbuffer.h"
+#include "vector.h"
+#include "voice_change.h"
+
+struct CTag;
+#ifdef HAVE_SSE
+struct SSETag;
+#endif
+#ifdef HAVE_NEON
+struct NEONTag;
+#endif
+struct CopyTag;
+
+
+static_assert(!(sizeof(Voice::BufferLine)&15), "Voice::BufferLine must be a multiple of 16 bytes");
+
+Resampler ResamplerDefault{Resampler::Linear};
+
+namespace {
+
+using uint = unsigned int;
+
+using HrtfMixerFunc = void(*)(const float *InSamples, float2 *AccumSamples, const uint IrSize,
+ const MixHrtfFilter *hrtfparams, const size_t BufferSize);
+using HrtfMixerBlendFunc = void(*)(const float *InSamples, float2 *AccumSamples,
+ const uint IrSize, const HrtfFilter *oldparams, const MixHrtfFilter *newparams,
+ const size_t BufferSize);
+
+HrtfMixerFunc MixHrtfSamples{MixHrtf_<CTag>};
+HrtfMixerBlendFunc MixHrtfBlendSamples{MixHrtfBlend_<CTag>};
+
+inline MixerFunc SelectMixer()
+{
+#ifdef HAVE_NEON
+ if((CPUCapFlags&CPU_CAP_NEON))
+ return Mix_<NEONTag>;
+#endif
+#ifdef HAVE_SSE
+ if((CPUCapFlags&CPU_CAP_SSE))
+ return Mix_<SSETag>;
+#endif
+ return Mix_<CTag>;
+}
+
+inline HrtfMixerFunc SelectHrtfMixer()
+{
+#ifdef HAVE_NEON
+ if((CPUCapFlags&CPU_CAP_NEON))
+ return MixHrtf_<NEONTag>;
+#endif
+#ifdef HAVE_SSE
+ if((CPUCapFlags&CPU_CAP_SSE))
+ return MixHrtf_<SSETag>;
+#endif
+ return MixHrtf_<CTag>;
+}
+
+inline HrtfMixerBlendFunc SelectHrtfBlendMixer()
+{
+#ifdef HAVE_NEON
+ if((CPUCapFlags&CPU_CAP_NEON))
+ return MixHrtfBlend_<NEONTag>;
+#endif
+#ifdef HAVE_SSE
+ if((CPUCapFlags&CPU_CAP_SSE))
+ return MixHrtfBlend_<SSETag>;
+#endif
+ return MixHrtfBlend_<CTag>;
+}
+
+} // namespace
+
+void Voice::InitMixer(al::optional<std::string> resampler)
+{
+ if(resampler)
+ {
+ struct ResamplerEntry {
+ const char name[16];
+ const Resampler resampler;
+ };
+ constexpr ResamplerEntry ResamplerList[]{
+ { "none", Resampler::Point },
+ { "point", Resampler::Point },
+ { "linear", Resampler::Linear },
+ { "cubic", Resampler::Cubic },
+ { "bsinc12", Resampler::BSinc12 },
+ { "fast_bsinc12", Resampler::FastBSinc12 },
+ { "bsinc24", Resampler::BSinc24 },
+ { "fast_bsinc24", Resampler::FastBSinc24 },
+ };
+
+ const char *str{resampler->c_str()};
+ if(al::strcasecmp(str, "bsinc") == 0)
+ {
+ WARN("Resampler option \"%s\" is deprecated, using bsinc12\n", str);
+ str = "bsinc12";
+ }
+ else if(al::strcasecmp(str, "sinc4") == 0 || al::strcasecmp(str, "sinc8") == 0)
+ {
+ WARN("Resampler option \"%s\" is deprecated, using cubic\n", str);
+ str = "cubic";
+ }
+
+ auto iter = std::find_if(std::begin(ResamplerList), std::end(ResamplerList),
+ [str](const ResamplerEntry &entry) -> bool
+ { return al::strcasecmp(str, entry.name) == 0; });
+ if(iter == std::end(ResamplerList))
+ ERR("Invalid resampler: %s\n", str);
+ else
+ ResamplerDefault = iter->resampler;
+ }
+
+ MixSamples = SelectMixer();
+ MixHrtfBlendSamples = SelectHrtfBlendMixer();
+ MixHrtfSamples = SelectHrtfMixer();
+}
+
+
+namespace {
+
+void SendSourceStoppedEvent(ContextBase *context, uint id)
+{
+ RingBuffer *ring{context->mAsyncEvents.get()};
+ auto evt_vec = ring->getWriteVector();
+ if(evt_vec.first.len < 1) return;
+
+ AsyncEvent *evt{::new(evt_vec.first.buf) AsyncEvent{EventType_SourceStateChange}};
+ evt->u.srcstate.id = id;
+ evt->u.srcstate.state = AsyncEvent::SrcState::Stop;
+
+ ring->writeAdvance(1);
+}
+
+
+const float *DoFilters(BiquadFilter &lpfilter, BiquadFilter &hpfilter, float *dst,
+ const al::span<const float> src, int type)
+{
+ switch(type)
+ {
+ case AF_None:
+ lpfilter.clear();
+ hpfilter.clear();
+ break;
+
+ case AF_LowPass:
+ lpfilter.process(src, dst);
+ hpfilter.clear();
+ return dst;
+ case AF_HighPass:
+ lpfilter.clear();
+ hpfilter.process(src, dst);
+ return dst;
+
+ case AF_BandPass:
+ DualBiquad{lpfilter, hpfilter}.process(src, dst);
+ return dst;
+ }
+ return src.data();
+}
+
+
+void LoadSamples(const al::span<Voice::BufferLine> dstSamples, const size_t dstOffset,
+ const al::byte *src, const size_t srcOffset, const FmtType srctype, const FmtChannels srcchans,
+ const size_t samples) noexcept
+{
+#define HANDLE_FMT(T) case T: \
+ { \
+ constexpr size_t sampleSize{sizeof(al::FmtTypeTraits<T>::Type)}; \
+ if(srcchans == FmtUHJ2) \
+ { \
+ constexpr size_t srcstep{2u}; \
+ src += srcOffset*srcstep*sampleSize; \
+ al::LoadSampleArray<T>(dstSamples[0].data() + dstOffset, src, \
+ srcstep, samples); \
+ al::LoadSampleArray<T>(dstSamples[1].data() + dstOffset, \
+ src + sampleSize, srcstep, samples); \
+ std::fill_n(dstSamples[2].data() + dstOffset, samples, 0.0f); \
+ } \
+ else \
+ { \
+ const size_t srcstep{dstSamples.size()}; \
+ src += srcOffset*srcstep*sampleSize; \
+ for(auto &dst : dstSamples) \
+ { \
+ al::LoadSampleArray<T>(dst.data() + dstOffset, src, srcstep, \
+ samples); \
+ src += sampleSize; \
+ } \
+ } \
+ } \
+ break
+
+ switch(srctype)
+ {
+ HANDLE_FMT(FmtUByte);
+ HANDLE_FMT(FmtShort);
+ HANDLE_FMT(FmtFloat);
+ HANDLE_FMT(FmtDouble);
+ HANDLE_FMT(FmtMulaw);
+ HANDLE_FMT(FmtAlaw);
+ }
+#undef HANDLE_FMT
+}
+
+void LoadBufferStatic(VoiceBufferItem *buffer, VoiceBufferItem *bufferLoopItem,
+ const size_t dataPosInt, const FmtType sampleType, const FmtChannels sampleChannels,
+ const size_t samplesToLoad, const al::span<Voice::BufferLine> voiceSamples)
+{
+ const uint loopStart{buffer->mLoopStart};
+ const uint loopEnd{buffer->mLoopEnd};
+ ASSUME(loopEnd > loopStart);
+
+ /* If current pos is beyond the loop range, do not loop */
+ if(!bufferLoopItem || dataPosInt >= loopEnd)
+ {
+ /* Load what's left to play from the buffer */
+ const size_t remaining{minz(samplesToLoad, buffer->mSampleLen-dataPosInt)};
+ LoadSamples(voiceSamples, MaxResamplerEdge, buffer->mSamples, dataPosInt, sampleType,
+ sampleChannels, remaining);
+
+ if(const size_t toFill{samplesToLoad - remaining})
+ {
+ for(auto &chanbuffer : voiceSamples)
+ {
+ auto srcsamples = chanbuffer.data() + MaxResamplerEdge - 1 + remaining;
+ std::fill_n(srcsamples + 1, toFill, *srcsamples);
+ }
+ }
+ }
+ else
+ {
+ /* Load what's left of this loop iteration */
+ const size_t remaining{minz(samplesToLoad, loopEnd-dataPosInt)};
+ LoadSamples(voiceSamples, MaxResamplerEdge, buffer->mSamples, dataPosInt, sampleType,
+ sampleChannels, remaining);
+
+ /* Load repeats of the loop to fill the buffer. */
+ const auto loopSize = static_cast<size_t>(loopEnd - loopStart);
+ size_t samplesLoaded{remaining};
+ while(const size_t toFill{minz(samplesToLoad - samplesLoaded, loopSize)})
+ {
+ LoadSamples(voiceSamples, MaxResamplerEdge + samplesLoaded, buffer->mSamples,
+ loopStart, sampleType, sampleChannels, toFill);
+ samplesLoaded += toFill;
+ }
+ }
+}
+
+void LoadBufferCallback(VoiceBufferItem *buffer, const size_t numCallbackSamples,
+ const FmtType sampleType, const FmtChannels sampleChannels, const size_t samplesToLoad,
+ const al::span<Voice::BufferLine> voiceSamples)
+{
+ /* Load what's left to play from the buffer */
+ const size_t remaining{minz(samplesToLoad, numCallbackSamples)};
+ LoadSamples(voiceSamples, MaxResamplerEdge, buffer->mSamples, 0, sampleType, sampleChannels,
+ remaining);
+
+ if(const size_t toFill{samplesToLoad - remaining})
+ {
+ for(auto &chanbuffer : voiceSamples)
+ {
+ auto srcsamples = chanbuffer.data() + MaxResamplerEdge - 1 + remaining;
+ std::fill_n(srcsamples + 1, toFill, *srcsamples);
+ }
+ }
+}
+
+void LoadBufferQueue(VoiceBufferItem *buffer, VoiceBufferItem *bufferLoopItem,
+ size_t dataPosInt, const FmtType sampleType, const FmtChannels sampleChannels,
+ const size_t samplesToLoad, const al::span<Voice::BufferLine> voiceSamples)
+{
+ /* Crawl the buffer queue to fill in the temp buffer */
+ size_t samplesLoaded{0};
+ while(buffer && samplesLoaded != samplesToLoad)
+ {
+ if(dataPosInt >= buffer->mSampleLen)
+ {
+ dataPosInt -= buffer->mSampleLen;
+ buffer = buffer->mNext.load(std::memory_order_acquire);
+ if(!buffer) buffer = bufferLoopItem;
+ continue;
+ }
+
+ const size_t remaining{minz(samplesToLoad-samplesLoaded, buffer->mSampleLen-dataPosInt)};
+ LoadSamples(voiceSamples, MaxResamplerEdge+samplesLoaded, buffer->mSamples, dataPosInt,
+ sampleType, sampleChannels, remaining);
+
+ samplesLoaded += remaining;
+ if(samplesLoaded == samplesToLoad)
+ break;
+
+ dataPosInt = 0;
+ buffer = buffer->mNext.load(std::memory_order_acquire);
+ if(!buffer) buffer = bufferLoopItem;
+ }
+ if(const size_t toFill{samplesToLoad - samplesLoaded})
+ {
+ size_t chanidx{0};
+ for(auto &chanbuffer : voiceSamples)
+ {
+ auto srcsamples = chanbuffer.data() + MaxResamplerEdge - 1 + samplesLoaded;
+ std::fill_n(srcsamples + 1, toFill, *srcsamples);
+ ++chanidx;
+ }
+ }
+}
+
+
+void DoHrtfMix(const float *samples, const uint DstBufferSize, DirectParams &parms,
+ const float TargetGain, const uint Counter, uint OutPos, DeviceBase *Device)
+{
+ const uint IrSize{Device->mIrSize};
+ auto &HrtfSamples = Device->HrtfSourceData;
+ /* Source HRTF mixing needs to include the direct delay so it remains
+ * aligned with the direct mix's HRTF filtering.
+ */
+ float2 *AccumSamples{Device->HrtfAccumData + HrtfDirectDelay};
+
+ /* Copy the HRTF history and new input samples into a temp buffer. */
+ auto src_iter = std::copy(parms.Hrtf.History.begin(), parms.Hrtf.History.end(),
+ std::begin(HrtfSamples));
+ std::copy_n(samples, DstBufferSize, src_iter);
+ /* Copy the last used samples back into the history buffer for later. */
+ std::copy_n(std::begin(HrtfSamples) + DstBufferSize, parms.Hrtf.History.size(),
+ parms.Hrtf.History.begin());
+
+ /* If fading and this is the first mixing pass, fade between the IRs. */
+ uint fademix{0u};
+ if(Counter && OutPos == 0)
+ {
+ fademix = minu(DstBufferSize, Counter);
+
+ float gain{TargetGain};
+
+ /* The new coefficients need to fade in completely since they're
+ * replacing the old ones. To keep the gain fading consistent,
+ * interpolate between the old and new target gains given how much of
+ * the fade time this mix handles.
+ */
+ if(Counter > fademix)
+ {
+ const float a{static_cast<float>(fademix) / static_cast<float>(Counter)};
+ gain = lerp(parms.Hrtf.Old.Gain, TargetGain, a);
+ }
+
+ MixHrtfFilter hrtfparams{
+ parms.Hrtf.Target.Coeffs,
+ parms.Hrtf.Target.Delay,
+ 0.0f, gain / static_cast<float>(fademix)};
+ MixHrtfBlendSamples(HrtfSamples, AccumSamples+OutPos, IrSize, &parms.Hrtf.Old, &hrtfparams,
+ fademix);
+
+ /* Update the old parameters with the result. */
+ parms.Hrtf.Old = parms.Hrtf.Target;
+ parms.Hrtf.Old.Gain = gain;
+ OutPos += fademix;
+ }
+
+ if(fademix < DstBufferSize)
+ {
+ const uint todo{DstBufferSize - fademix};
+ float gain{TargetGain};
+
+ /* Interpolate the target gain if the gain fading lasts longer than
+ * this mix.
+ */
+ if(Counter > DstBufferSize)
+ {
+ const float a{static_cast<float>(todo) / static_cast<float>(Counter-fademix)};
+ gain = lerp(parms.Hrtf.Old.Gain, TargetGain, a);
+ }
+
+ MixHrtfFilter hrtfparams{
+ parms.Hrtf.Target.Coeffs,
+ parms.Hrtf.Target.Delay,
+ parms.Hrtf.Old.Gain,
+ (gain - parms.Hrtf.Old.Gain) / static_cast<float>(todo)};
+ MixHrtfSamples(HrtfSamples+fademix, AccumSamples+OutPos, IrSize, &hrtfparams, todo);
+
+ /* Store the now-current gain for next time. */
+ parms.Hrtf.Old.Gain = gain;
+ }
+}
+
+void DoNfcMix(const al::span<const float> samples, FloatBufferLine *OutBuffer, DirectParams &parms,
+ const float *TargetGains, const uint Counter, const uint OutPos, DeviceBase *Device)
+{
+ using FilterProc = void (NfcFilter::*)(const al::span<const float>, float*);
+ static constexpr FilterProc NfcProcess[MaxAmbiOrder+1]{
+ nullptr, &NfcFilter::process1, &NfcFilter::process2, &NfcFilter::process3};
+
+ float *CurrentGains{parms.Gains.Current.data()};
+ MixSamples(samples, {OutBuffer, 1u}, CurrentGains, TargetGains, Counter, OutPos);
+ ++OutBuffer;
+ ++CurrentGains;
+ ++TargetGains;
+
+ const al::span<float> nfcsamples{Device->NfcSampleData, samples.size()};
+ size_t order{1};
+ while(const size_t chancount{Device->NumChannelsPerOrder[order]})
+ {
+ (parms.NFCtrlFilter.*NfcProcess[order])(samples, nfcsamples.data());
+ MixSamples(nfcsamples, {OutBuffer, chancount}, CurrentGains, TargetGains, Counter, OutPos);
+ OutBuffer += chancount;
+ CurrentGains += chancount;
+ TargetGains += chancount;
+ if(++order == MaxAmbiOrder+1)
+ break;
+ }
+}
+
+} // namespace
+
+void Voice::mix(const State vstate, ContextBase *Context, const uint SamplesToDo)
+{
+ static constexpr std::array<float,MAX_OUTPUT_CHANNELS> SilentTarget{};
+
+ ASSUME(SamplesToDo > 0);
+
+ /* Get voice info */
+ uint DataPosInt{mPosition.load(std::memory_order_relaxed)};
+ uint DataPosFrac{mPositionFrac.load(std::memory_order_relaxed)};
+ VoiceBufferItem *BufferListItem{mCurrentBuffer.load(std::memory_order_relaxed)};
+ VoiceBufferItem *BufferLoopItem{mLoopBuffer.load(std::memory_order_relaxed)};
+ const uint increment{mStep};
+ if UNLIKELY(increment < 1)
+ {
+ /* If the voice is supposed to be stopping but can't be mixed, just
+ * stop it before bailing.
+ */
+ if(vstate == Stopping)
+ mPlayState.store(Stopped, std::memory_order_release);
+ return;
+ }
+
+ DeviceBase *Device{Context->mDevice};
+ const uint NumSends{Device->NumAuxSends};
+
+ ResamplerFunc Resample{(increment == MixerFracOne && DataPosFrac == 0) ?
+ Resample_<CopyTag,CTag> : mResampler};
+
+ uint Counter{(mFlags&VoiceIsFading) ? SamplesToDo : 0};
+ if(!Counter)
+ {
+ /* No fading, just overwrite the old/current params. */
+ for(auto &chandata : mChans)
+ {
+ {
+ DirectParams &parms = chandata.mDryParams;
+ if(!(mFlags&VoiceHasHrtf))
+ parms.Gains.Current = parms.Gains.Target;
+ else
+ parms.Hrtf.Old = parms.Hrtf.Target;
+ }
+ for(uint send{0};send < NumSends;++send)
+ {
+ if(mSend[send].Buffer.empty())
+ continue;
+
+ SendParams &parms = chandata.mWetParams[send];
+ parms.Gains.Current = parms.Gains.Target;
+ }
+ }
+ }
+ else if UNLIKELY(!BufferListItem)
+ Counter = std::min(Counter, 64u);
+
+ const uint PostPadding{MaxResamplerEdge +
+ ((mFmtChannels==FmtUHJ2 || mFmtChannels==FmtUHJ3 || mFmtChannels==FmtUHJ4)
+ ? uint{UhjDecoder::sFilterDelay} : 0u)};
+ uint buffers_done{0u};
+ uint OutPos{0u};
+ do {
+ /* Figure out how many buffer samples will be needed */
+ uint DstBufferSize{SamplesToDo - OutPos};
+ uint SrcBufferSize;
+
+ if(increment <= MixerFracOne)
+ {
+ /* Calculate the last written dst sample pos. */
+ uint64_t DataSize64{DstBufferSize - 1};
+ /* Calculate the last read src sample pos. */
+ DataSize64 = (DataSize64*increment + DataPosFrac) >> MixerFracBits;
+ /* +1 to get the src sample count, include padding. */
+ DataSize64 += 1 + PostPadding;
+
+ /* Result is guaranteed to be <= BufferLineSize+ResamplerPrePadding
+ * since we won't use more src samples than dst samples+padding.
+ */
+ SrcBufferSize = static_cast<uint>(DataSize64);
+ }
+ else
+ {
+ uint64_t DataSize64{DstBufferSize};
+ /* Calculate the end src sample pos, include padding. */
+ DataSize64 = (DataSize64*increment + DataPosFrac) >> MixerFracBits;
+ DataSize64 += PostPadding;
+
+ if(DataSize64 <= LineSize - MaxResamplerEdge)
+ SrcBufferSize = static_cast<uint>(DataSize64);
+ else
+ {
+ /* If the source size got saturated, we can't fill the desired
+ * dst size. Figure out how many samples we can actually mix.
+ */
+ SrcBufferSize = LineSize - MaxResamplerEdge;
+
+ DataSize64 = SrcBufferSize - PostPadding;
+ DataSize64 = ((DataSize64<<MixerFracBits) - DataPosFrac) / increment;
+ if(DataSize64 < DstBufferSize)
+ {
+ /* Some mixers require being 16-byte aligned, so also limit
+ * to a multiple of 4 samples to maintain alignment.
+ */
+ DstBufferSize = static_cast<uint>(DataSize64) & ~3u;
+ }
+ ASSUME(DstBufferSize > 0);
+ }
+ }
+
+ if((mFlags&(VoiceIsCallback|VoiceCallbackStopped)) == VoiceIsCallback && BufferListItem)
+ {
+ if(SrcBufferSize > mNumCallbackSamples)
+ {
+ const size_t byteOffset{mNumCallbackSamples*mFrameSize};
+ const size_t needBytes{SrcBufferSize*mFrameSize - byteOffset};
+
+ const int gotBytes{BufferListItem->mCallback(BufferListItem->mUserData,
+ &BufferListItem->mSamples[byteOffset], static_cast<int>(needBytes))};
+ if(gotBytes < 0)
+ mFlags |= VoiceCallbackStopped;
+ else if(static_cast<uint>(gotBytes) < needBytes)
+ {
+ mFlags |= VoiceCallbackStopped;
+ mNumCallbackSamples += static_cast<uint>(static_cast<uint>(gotBytes) /
+ mFrameSize);
+ }
+ else
+ mNumCallbackSamples = SrcBufferSize;
+ }
+ }
+
+ if UNLIKELY(!BufferListItem)
+ {
+ for(auto &chanbuffer : mVoiceSamples)
+ {
+ auto srciter = chanbuffer.data() + MaxResamplerEdge;
+ auto srcend = chanbuffer.data() + MaxResamplerPadding;
+
+ /* When loading from a voice that ended prematurely, only take
+ * the samples that get closest to 0 amplitude. This helps
+ * certain sounds fade out better.
+ */
+ auto abs_lt = [](const float lhs, const float rhs) noexcept -> bool
+ { return std::abs(lhs) < std::abs(rhs); };
+ srciter = std::min_element(srciter, srcend, abs_lt);
+
+ SrcBufferSize = SrcBufferSize - PostPadding + MaxResamplerPadding;
+ std::fill(srciter+1, chanbuffer.data() + SrcBufferSize, *srciter);
+ }
+ }
+ else
+ {
+ if((mFlags&VoiceIsStatic))
+ LoadBufferStatic(BufferListItem, BufferLoopItem, DataPosInt, mFmtType, mFmtChannels,
+ SrcBufferSize, mVoiceSamples);
+ else if((mFlags&VoiceIsCallback))
+ LoadBufferCallback(BufferListItem, mNumCallbackSamples, mFmtType, mFmtChannels,
+ SrcBufferSize, mVoiceSamples);
+ else
+ LoadBufferQueue(BufferListItem, BufferLoopItem, DataPosInt, mFmtType, mFmtChannels,
+ SrcBufferSize, mVoiceSamples);
+
+ if(mDecoder)
+ {
+ const size_t srcOffset{(increment*DstBufferSize + DataPosFrac)>>MixerFracBits};
+ SrcBufferSize = SrcBufferSize - PostPadding + MaxResamplerEdge;
+ mDecoder->decode(mVoiceSamples, MaxResamplerEdge, SrcBufferSize, srcOffset);
+ }
+ }
+
+ auto voiceSamples = mVoiceSamples.begin();
+ for(auto &chandata : mChans)
+ {
+ /* Resample, then apply ambisonic upsampling as needed. */
+ float *ResampledData{Resample(&mResampleState,
+ voiceSamples->data() + MaxResamplerEdge, DataPosFrac, increment,
+ {Device->ResampledData, DstBufferSize})};
+ if((mFlags&VoiceIsAmbisonic))
+ chandata.mAmbiSplitter.processHfScale({ResampledData, DstBufferSize},
+ chandata.mAmbiScale);
+
+ /* Now filter and mix to the appropriate outputs. */
+ const al::span<float,BufferLineSize> FilterBuf{Device->FilteredData};
+ {
+ DirectParams &parms = chandata.mDryParams;
+ const float *samples{DoFilters(parms.LowPass, parms.HighPass, FilterBuf.data(),
+ {ResampledData, DstBufferSize}, mDirect.FilterType)};
+
+ if((mFlags&VoiceHasHrtf))
+ {
+ const float TargetGain{UNLIKELY(vstate == Stopping) ? 0.0f :
+ parms.Hrtf.Target.Gain};
+ DoHrtfMix(samples, DstBufferSize, parms, TargetGain, Counter, OutPos, Device);
+ }
+ else if((mFlags&VoiceHasNfc))
+ {
+ const float *TargetGains{UNLIKELY(vstate == Stopping) ? SilentTarget.data()
+ : parms.Gains.Target.data()};
+ DoNfcMix({samples, DstBufferSize}, mDirect.Buffer.data(), parms, TargetGains,
+ Counter, OutPos, Device);
+ }
+ else
+ {
+ const float *TargetGains{UNLIKELY(vstate == Stopping) ? SilentTarget.data()
+ : parms.Gains.Target.data()};
+ MixSamples({samples, DstBufferSize}, mDirect.Buffer,
+ parms.Gains.Current.data(), TargetGains, Counter, OutPos);
+ }
+ }
+
+ for(uint send{0};send < NumSends;++send)
+ {
+ if(mSend[send].Buffer.empty())
+ continue;
+
+ SendParams &parms = chandata.mWetParams[send];
+ const float *samples{DoFilters(parms.LowPass, parms.HighPass, FilterBuf.data(),
+ {ResampledData, DstBufferSize}, mSend[send].FilterType)};
+
+ const float *TargetGains{UNLIKELY(vstate == Stopping) ? SilentTarget.data()
+ : parms.Gains.Target.data()};
+ MixSamples({samples, DstBufferSize}, mSend[send].Buffer,
+ parms.Gains.Current.data(), TargetGains, Counter, OutPos);
+ }
+
+ /* Store the last source samples used for next time. */
+ const size_t srcOffset{(increment*DstBufferSize + DataPosFrac)>>MixerFracBits};
+ std::copy_n(voiceSamples->data()+srcOffset, MaxResamplerPadding, voiceSamples->data());
+ ++voiceSamples;
+ }
+ /* Update positions */
+ DataPosFrac += increment*DstBufferSize;
+ const uint SrcSamplesDone{DataPosFrac>>MixerFracBits};
+ DataPosInt += SrcSamplesDone;
+ DataPosFrac &= MixerFracMask;
+
+ OutPos += DstBufferSize;
+ Counter = maxu(DstBufferSize, Counter) - DstBufferSize;
+
+ if UNLIKELY(!BufferListItem)
+ {
+ /* Do nothing extra when there's no buffers. */
+ }
+ else if((mFlags&VoiceIsStatic))
+ {
+ if(BufferLoopItem)
+ {
+ /* Handle looping static source */
+ const uint LoopStart{BufferListItem->mLoopStart};
+ const uint LoopEnd{BufferListItem->mLoopEnd};
+ if(DataPosInt >= LoopEnd)
+ {
+ assert(LoopEnd > LoopStart);
+ DataPosInt = ((DataPosInt-LoopStart)%(LoopEnd-LoopStart)) + LoopStart;
+ }
+ }
+ else
+ {
+ /* Handle non-looping static source */
+ if(DataPosInt >= BufferListItem->mSampleLen)
+ {
+ BufferListItem = nullptr;
+ break;
+ }
+ }
+ }
+ else if((mFlags&VoiceIsCallback))
+ {
+ if(SrcSamplesDone < mNumCallbackSamples)
+ {
+ const size_t byteOffset{SrcSamplesDone*mFrameSize};
+ const size_t byteEnd{mNumCallbackSamples*mFrameSize};
+ al::byte *data{BufferListItem->mSamples};
+ std::copy(data+byteOffset, data+byteEnd, data);
+ mNumCallbackSamples -= SrcSamplesDone;
+ }
+ else
+ {
+ BufferListItem = nullptr;
+ mNumCallbackSamples = 0;
+ }
+ }
+ else
+ {
+ /* Handle streaming source */
+ do {
+ if(BufferListItem->mSampleLen > DataPosInt)
+ break;
+
+ DataPosInt -= BufferListItem->mSampleLen;
+
+ ++buffers_done;
+ BufferListItem = BufferListItem->mNext.load(std::memory_order_relaxed);
+ if(!BufferListItem) BufferListItem = BufferLoopItem;
+ } while(BufferListItem);
+ }
+ } while(OutPos < SamplesToDo);
+
+ mFlags |= VoiceIsFading;
+
+ /* Don't update positions and buffers if we were stopping. */
+ if UNLIKELY(vstate == Stopping)
+ {
+ mPlayState.store(Stopped, std::memory_order_release);
+ return;
+ }
+
+ /* Capture the source ID in case it's reset for stopping. */
+ const uint SourceID{mSourceID.load(std::memory_order_relaxed)};
+
+ /* Update voice info */
+ mPosition.store(DataPosInt, std::memory_order_relaxed);
+ mPositionFrac.store(DataPosFrac, std::memory_order_relaxed);
+ mCurrentBuffer.store(BufferListItem, std::memory_order_relaxed);
+ if(!BufferListItem)
+ {
+ mLoopBuffer.store(nullptr, std::memory_order_relaxed);
+ mSourceID.store(0u, std::memory_order_relaxed);
+ }
+ std::atomic_thread_fence(std::memory_order_release);
+
+ /* Send any events now, after the position/buffer info was updated. */
+ const uint enabledevt{Context->mEnabledEvts.load(std::memory_order_acquire)};
+ if(buffers_done > 0 && (enabledevt&EventType_BufferCompleted))
+ {
+ RingBuffer *ring{Context->mAsyncEvents.get()};
+ auto evt_vec = ring->getWriteVector();
+ if(evt_vec.first.len > 0)
+ {
+ AsyncEvent *evt{::new(evt_vec.first.buf) AsyncEvent{EventType_BufferCompleted}};
+ evt->u.bufcomp.id = SourceID;
+ evt->u.bufcomp.count = buffers_done;
+ ring->writeAdvance(1);
+ }
+ }
+
+ if(!BufferListItem)
+ {
+ /* If the voice just ended, set it to Stopping so the next render
+ * ensures any residual noise fades to 0 amplitude.
+ */
+ mPlayState.store(Stopping, std::memory_order_release);
+ if((enabledevt&EventType_SourceStateChange))
+ SendSourceStoppedEvent(Context, SourceID);
+ }
+}
+
+void Voice::prepare(DeviceBase *device)
+{
+ if((mFmtChannels == FmtUHJ2 || mFmtChannels == FmtUHJ3 || mFmtChannels==FmtUHJ4) && !mDecoder)
+ mDecoder = std::make_unique<UhjDecoder>();
+ else if(mFmtChannels != FmtUHJ2 && mFmtChannels != FmtUHJ3 && mFmtChannels != FmtUHJ4)
+ mDecoder = nullptr;
+
+ /* Clear the stepping value explicitly so the mixer knows not to mix this
+ * until the update gets applied.
+ */
+ mStep = 0;
+
+ /* Make sure the sample history is cleared. */
+ std::fill(mVoiceSamples.begin(), mVoiceSamples.end(), BufferLine{});
+
+ /* Don't need to set the VoiceIsAmbisonic flag if the device is not higher
+ * order than the voice. No HF scaling is necessary to mix it.
+ */
+ if(mAmbiOrder && device->mAmbiOrder > mAmbiOrder)
+ {
+ const uint8_t *OrderFromChan{(mFmtChannels == FmtBFormat2D) ?
+ AmbiIndex::OrderFrom2DChannel().data() : AmbiIndex::OrderFromChannel().data()};
+ const auto scales = AmbiScale::GetHFOrderScales(mAmbiOrder, device->mAmbiOrder);
+
+ const BandSplitter splitter{device->mXOverFreq / static_cast<float>(device->Frequency)};
+ for(auto &chandata : mChans)
+ {
+ chandata.mAmbiScale = scales[*(OrderFromChan++)];
+ chandata.mAmbiSplitter = splitter;
+ chandata.mDryParams = DirectParams{};
+ std::fill_n(chandata.mWetParams.begin(), device->NumAuxSends, SendParams{});
+ }
+ mFlags |= VoiceIsAmbisonic;
+ }
+ else
+ {
+ for(auto &chandata : mChans)
+ {
+ chandata.mDryParams = DirectParams{};
+ std::fill_n(chandata.mWetParams.begin(), device->NumAuxSends, SendParams{});
+ }
+ mFlags &= ~VoiceIsAmbisonic;
+ }
+
+ if(device->AvgSpeakerDist > 0.0f)
+ {
+ const float w1{SpeedOfSoundMetersPerSec /
+ (device->AvgSpeakerDist * static_cast<float>(device->Frequency))};
+ for(auto &chandata : mChans)
+ chandata.mDryParams.NFCtrlFilter.init(w1);
+ }
+}
diff --git a/core/voice.h b/core/voice.h
new file mode 100644
index 00000000..c3347cda
--- /dev/null
+++ b/core/voice.h
@@ -0,0 +1,270 @@
+#ifndef CORE_VOICE_H
+#define CORE_VOICE_H
+
+#include <array>
+#include <atomic>
+#include <memory>
+#include <stddef.h>
+#include <string>
+
+#include "albyte.h"
+#include "almalloc.h"
+#include "aloptional.h"
+#include "alspan.h"
+#include "bufferline.h"
+#include "buffer_storage.h"
+#include "devformat.h"
+#include "filters/biquad.h"
+#include "filters/nfc.h"
+#include "filters/splitter.h"
+#include "mixer/defs.h"
+#include "mixer/hrtfdefs.h"
+#include "resampler_limits.h"
+#include "uhjfilter.h"
+#include "vector.h"
+
+struct ContextBase;
+struct DeviceBase;
+struct EffectSlot;
+enum class DistanceModel : unsigned char;
+
+using uint = unsigned int;
+
+
+#define MAX_SENDS 6
+
+
+enum class SpatializeMode : unsigned char {
+ Off,
+ On,
+ Auto
+};
+
+enum class DirectMode : unsigned char {
+ Off,
+ DropMismatch,
+ RemixMismatch
+};
+
+
+/* Maximum number of extra source samples that may need to be loaded, for
+ * resampling or conversion purposes.
+ */
+constexpr uint MaxPostVoiceLoad{MaxResamplerEdge + UhjDecoder::sFilterDelay};
+
+
+enum {
+ AF_None = 0,
+ AF_LowPass = 1,
+ AF_HighPass = 2,
+ AF_BandPass = AF_LowPass | AF_HighPass
+};
+
+
+struct DirectParams {
+ BiquadFilter LowPass;
+ BiquadFilter HighPass;
+
+ NfcFilter NFCtrlFilter;
+
+ struct {
+ HrtfFilter Old;
+ HrtfFilter Target;
+ alignas(16) std::array<float,HrtfHistoryLength> History;
+ } Hrtf;
+
+ struct {
+ std::array<float,MAX_OUTPUT_CHANNELS> Current;
+ std::array<float,MAX_OUTPUT_CHANNELS> Target;
+ } Gains;
+};
+
+struct SendParams {
+ BiquadFilter LowPass;
+ BiquadFilter HighPass;
+
+ struct {
+ std::array<float,MAX_OUTPUT_CHANNELS> Current;
+ std::array<float,MAX_OUTPUT_CHANNELS> Target;
+ } Gains;
+};
+
+
+struct VoiceBufferItem {
+ std::atomic<VoiceBufferItem*> mNext{nullptr};
+
+ CallbackType mCallback{nullptr};
+ void *mUserData{nullptr};
+
+ uint mSampleLen{0u};
+ uint mLoopStart{0u};
+ uint mLoopEnd{0u};
+
+ al::byte *mSamples{nullptr};
+};
+
+
+struct VoiceProps {
+ float Pitch;
+ float Gain;
+ float OuterGain;
+ float MinGain;
+ float MaxGain;
+ float InnerAngle;
+ float OuterAngle;
+ float RefDistance;
+ float MaxDistance;
+ float RolloffFactor;
+ std::array<float,3> Position;
+ std::array<float,3> Velocity;
+ std::array<float,3> Direction;
+ std::array<float,3> OrientAt;
+ std::array<float,3> OrientUp;
+ bool HeadRelative;
+ DistanceModel mDistanceModel;
+ Resampler mResampler;
+ DirectMode DirectChannels;
+ SpatializeMode mSpatializeMode;
+
+ bool DryGainHFAuto;
+ bool WetGainAuto;
+ bool WetGainHFAuto;
+ float OuterGainHF;
+
+ float AirAbsorptionFactor;
+ float RoomRolloffFactor;
+ float DopplerFactor;
+
+ std::array<float,2> StereoPan;
+
+ float Radius;
+
+ /** Direct filter and auxiliary send info. */
+ struct {
+ float Gain;
+ float GainHF;
+ float HFReference;
+ float GainLF;
+ float LFReference;
+ } Direct;
+ struct SendData {
+ EffectSlot *Slot;
+ float Gain;
+ float GainHF;
+ float HFReference;
+ float GainLF;
+ float LFReference;
+ } Send[MAX_SENDS];
+};
+
+struct VoicePropsItem : public VoiceProps {
+ std::atomic<VoicePropsItem*> next{nullptr};
+
+ DEF_NEWDEL(VoicePropsItem)
+};
+
+constexpr uint VoiceIsStatic{ 1u<<0};
+constexpr uint VoiceIsCallback{ 1u<<1};
+constexpr uint VoiceIsAmbisonic{ 1u<<2}; /* Needs HF scaling for ambisonic upsampling. */
+constexpr uint VoiceCallbackStopped{1u<<3};
+constexpr uint VoiceIsFading{ 1u<<4}; /* Use gain stepping for smooth transitions. */
+constexpr uint VoiceHasHrtf{ 1u<<5};
+constexpr uint VoiceHasNfc{ 1u<<6};
+
+struct Voice {
+ enum State {
+ Stopped,
+ Playing,
+ Stopping,
+ Pending
+ };
+
+ std::atomic<VoicePropsItem*> mUpdate{nullptr};
+
+ VoiceProps mProps;
+
+ std::atomic<uint> mSourceID{0u};
+ std::atomic<State> mPlayState{Stopped};
+ std::atomic<bool> mPendingChange{false};
+
+ /**
+ * Source offset in samples, relative to the currently playing buffer, NOT
+ * the whole queue.
+ */
+ std::atomic<uint> mPosition;
+ /** Fractional (fixed-point) offset to the next sample. */
+ std::atomic<uint> mPositionFrac;
+
+ /* Current buffer queue item being played. */
+ std::atomic<VoiceBufferItem*> mCurrentBuffer;
+
+ /* Buffer queue item to loop to at end of queue (will be NULL for non-
+ * looping voices).
+ */
+ std::atomic<VoiceBufferItem*> mLoopBuffer;
+
+ /* Properties for the attached buffer(s). */
+ FmtChannels mFmtChannels;
+ FmtType mFmtType;
+ uint mFrequency;
+ uint mFrameSize;
+ AmbiLayout mAmbiLayout;
+ AmbiScaling mAmbiScaling;
+ uint mAmbiOrder;
+
+ std::unique_ptr<UhjDecoder> mDecoder;
+
+ /** Current target parameters used for mixing. */
+ uint mStep{0};
+
+ ResamplerFunc mResampler;
+
+ InterpState mResampleState;
+
+ uint mFlags{};
+ uint mNumCallbackSamples{0};
+
+ struct TargetData {
+ int FilterType;
+ al::span<FloatBufferLine> Buffer;
+ };
+ TargetData mDirect;
+ std::array<TargetData,MAX_SENDS> mSend;
+
+ /* The first MaxResamplerPadding/2 elements are the sample history from the
+ * previous mix, with an additional MaxResamplerPadding/2 elements that are
+ * now current (which may be overwritten if the buffer data is still
+ * available).
+ */
+ static constexpr size_t LineSize{BufferLineSize + MaxResamplerPadding +
+ UhjDecoder::sFilterDelay};
+ using BufferLine = std::array<float,LineSize>;
+ al::vector<BufferLine,16> mVoiceSamples{2};
+
+ struct ChannelData {
+ float mAmbiScale;
+ BandSplitter mAmbiSplitter;
+
+ DirectParams mDryParams;
+ std::array<SendParams,MAX_SENDS> mWetParams;
+ };
+ al::vector<ChannelData> mChans{2};
+
+ Voice() = default;
+ ~Voice() { delete mUpdate.exchange(nullptr, std::memory_order_acq_rel); }
+
+ Voice(const Voice&) = delete;
+ Voice& operator=(const Voice&) = delete;
+
+ void mix(const State vstate, ContextBase *Context, const uint SamplesToDo);
+
+ void prepare(DeviceBase *device);
+
+ static void InitMixer(al::optional<std::string> resampler);
+
+ DEF_NEWDEL(Voice)
+};
+
+extern Resampler ResamplerDefault;
+
+#endif /* CORE_VOICE_H */
diff --git a/core/voice_change.h b/core/voice_change.h
new file mode 100644
index 00000000..ddc6186f
--- /dev/null
+++ b/core/voice_change.h
@@ -0,0 +1,31 @@
+#ifndef VOICE_CHANGE_H
+#define VOICE_CHANGE_H
+
+#include <atomic>
+
+#include "almalloc.h"
+
+struct Voice;
+
+using uint = unsigned int;
+
+
+enum class VChangeState {
+ Reset,
+ Stop,
+ Play,
+ Pause,
+ Restart
+};
+struct VoiceChange {
+ Voice *mOldVoice{nullptr};
+ Voice *mVoice{nullptr};
+ uint mSourceID{0};
+ VChangeState mState{};
+
+ std::atomic<VoiceChange*> mNext{nullptr};
+
+ DEF_NEWDEL(VoiceChange)
+};
+
+#endif /* VOICE_CHANGE_H */