40 files changed, 1080 insertions, 738 deletions
diff --git a/core/ambdec.cpp b/core/ambdec.cpp
index 8ca182c4..f98e1098 100644
--- a/core/ambdec.cpp
+++ b/core/ambdec.cpp
@@ -47,9 +47,9 @@ enum class ReaderScope {
 #else
 [[gnu::format(printf,2,3)]]
 #endif
-al::optional<std::string> make_error(size_t linenum, const char *fmt, ...)
+std::optional<std::string> make_error(size_t linenum, const char *fmt, ...)
 {
-    al::optional<std::string> ret;
+    std::optional<std::string> ret;
     auto &str = ret.emplace();
 
     str.resize(256);
@@ -77,7 +77,7 @@ al::optional<std::string> make_error(size_t linenum, const char *fmt, ...)
 AmbDecConf::~AmbDecConf() = default;
 
 
-al::optional<std::string> AmbDecConf::load(const char *fname) noexcept
+std::optional<std::string> AmbDecConf::load(const char *fname) noexcept
 {
     al::ifstream f{fname};
     if(!f.is_open())
@@ -139,7 +139,7 @@ al::optional<std::string> AmbDecConf::load(const char *fname) noexcept
                 {
                     --toread;
                     istr >> value;
-                    if(curgain < al::size(gains))
+                    if(curgain < std::size(gains))
                         gains[curgain++] = value;
                 }
             }
@@ -291,7 +291,7 @@ al::optional<std::string> AmbDecConf::load(const char *fname) noexcept
             if(CoeffScale == AmbDecScale::Unset)
                 return make_error(linenum, "No coefficient scaling defined");
 
-            return al::nullopt;
+            return std::nullopt;
         }
         else
             return make_error(linenum, "Unexpected command: %s", command.c_str());
diff --git a/core/ambdec.h b/core/ambdec.h
index 7f739781..19f68697 100644
--- a/core/ambdec.h
+++ b/core/ambdec.h
@@ -3,9 +3,9 @@
 
 #include <array>
 #include <memory>
+#include <optional>
 #include <string>
 
-#include "aloptional.h"
 #include "core/ambidefs.h"
 
 /* Helpers to read .ambdec configuration files. */
@@ -49,7 +49,7 @@ struct AmbDecConf {
 
     ~AmbDecConf();
 
-    al::optional<std::string> load(const char *fname) noexcept;
+    std::optional<std::string> load(const char *fname) noexcept;
 };
 
 #endif /* CORE_AMBDEC_H */
diff --git a/core/ambidefs.cpp b/core/ambidefs.cpp
index 70d6f356..2389ce6b 100644
--- a/core/ambidefs.cpp
+++ b/core/ambidefs.cpp
@@ -21,25 +21,25 @@ constexpr auto inv_sqrt3f = static_cast<float>(1.0/al::numbers::sqrt3);
  * will result in that channel being subsequently decoded for second-order as
  * if it was a first-order decoder for that same speaker array.
  */
-constexpr std::array<std::array<float,MaxAmbiOrder+1>,MaxAmbiOrder+1> HFScales{{
-    {{ 4.000000000e+00f, 2.309401077e+00f, 1.192569588e+00f, 7.189495850e-01f }},
-    {{ 4.000000000e+00f, 2.309401077e+00f, 1.192569588e+00f, 7.189495850e-01f }},
-    {{ 2.981423970e+00f, 2.309401077e+00f, 1.192569588e+00f, 7.189495850e-01f }},
-    {{ 2.359168820e+00f, 2.031565936e+00f, 1.444598386e+00f, 7.189495850e-01f }},
-    /* 1.947005434e+00f, 1.764337084e+00f, 1.424707344e+00f, 9.755104127e-01f, 4.784482742e-01f */
-}};
+constexpr std::array HFScales{
+    std::array{4.000000000e+00f, 2.309401077e+00f, 1.192569588e+00f, 7.189495850e-01f},
+    std::array{4.000000000e+00f, 2.309401077e+00f, 1.192569588e+00f, 7.189495850e-01f},
+    std::array{2.981423970e+00f, 2.309401077e+00f, 1.192569588e+00f, 7.189495850e-01f},
+    std::array{2.359168820e+00f, 2.031565936e+00f, 1.444598386e+00f, 7.189495850e-01f},
+    /*std::array{1.947005434e+00f, 1.764337084e+00f, 1.424707344e+00f, 9.755104127e-01f, 4.784482742e-01f}, */
+};
 
 /* Same as above, but using a 10-point horizontal-only speaker array. Should
  * only be used when the device is mixing in 2D B-Format for horizontal-only
  * output.
  */
-constexpr std::array<std::array<float,MaxAmbiOrder+1>,MaxAmbiOrder+1> HFScales2D{{
-    {{ 2.236067977e+00f, 1.581138830e+00f, 9.128709292e-01f, 6.050756345e-01f }},
-    {{ 2.236067977e+00f, 1.581138830e+00f, 9.128709292e-01f, 6.050756345e-01f }},
-    {{ 1.825741858e+00f, 1.581138830e+00f, 9.128709292e-01f, 6.050756345e-01f }},
-    {{ 1.581138830e+00f, 1.460781803e+00f, 1.118033989e+00f, 6.050756345e-01f }},
-    /* 1.414213562e+00f, 1.344997024e+00f, 1.144122806e+00f, 8.312538756e-01f, 4.370160244e-01f */
-}};
+constexpr std::array HFScales2D{
+    std::array{2.236067977e+00f, 1.581138830e+00f, 9.128709292e-01f, 6.050756345e-01f},
+    std::array{2.236067977e+00f, 1.581138830e+00f, 9.128709292e-01f, 6.050756345e-01f},
+    std::array{1.825741858e+00f, 1.581138830e+00f, 9.128709292e-01f, 6.050756345e-01f},
+    std::array{1.581138830e+00f, 1.460781803e+00f, 1.118033989e+00f, 6.050756345e-01f},
+    /*std::array{1.414213562e+00f, 1.344997024e+00f, 1.144122806e+00f, 8.312538756e-01f, 4.370160244e-01f}, */
+};
 
 
 /* This calculates a first-order "upsampler" matrix. It combines a first-order
@@ -49,17 +49,17 @@ constexpr std::array<std::array<float,MaxAmbiOrder+1>,MaxAmbiOrder+1> HFScales2D
  * signal. While not perfect, this should accurately encode a lower-order
  * signal into a higher-order signal.
  */
-constexpr std::array<std::array<float,4>,8> FirstOrderDecoder{{
-    {{ 1.250000000e-01f,  1.250000000e-01f,  1.250000000e-01f,  1.250000000e-01f, }},
-    {{ 1.250000000e-01f,  1.250000000e-01f,  1.250000000e-01f, -1.250000000e-01f, }},
-    {{ 1.250000000e-01f, -1.250000000e-01f,  1.250000000e-01f,  1.250000000e-01f, }},
-    {{ 1.250000000e-01f, -1.250000000e-01f,  1.250000000e-01f, -1.250000000e-01f, }},
-    {{ 1.250000000e-01f,  1.250000000e-01f, -1.250000000e-01f,  1.250000000e-01f, }},
-    {{ 1.250000000e-01f,  1.250000000e-01f, -1.250000000e-01f, -1.250000000e-01f, }},
-    {{ 1.250000000e-01f, -1.250000000e-01f, -1.250000000e-01f,  1.250000000e-01f, }},
-    {{ 1.250000000e-01f, -1.250000000e-01f, -1.250000000e-01f, -1.250000000e-01f, }},
-}};
-constexpr std::array<AmbiChannelFloatArray,8> FirstOrderEncoder{{
+constexpr std::array FirstOrderDecoder{
+    std::array{1.250000000e-01f,  1.250000000e-01f,  1.250000000e-01f,  1.250000000e-01f},
+    std::array{1.250000000e-01f,  1.250000000e-01f,  1.250000000e-01f, -1.250000000e-01f},
+    std::array{1.250000000e-01f, -1.250000000e-01f,  1.250000000e-01f,  1.250000000e-01f},
+    std::array{1.250000000e-01f, -1.250000000e-01f,  1.250000000e-01f, -1.250000000e-01f},
+    std::array{1.250000000e-01f,  1.250000000e-01f, -1.250000000e-01f,  1.250000000e-01f},
+    std::array{1.250000000e-01f,  1.250000000e-01f, -1.250000000e-01f, -1.250000000e-01f},
+    std::array{1.250000000e-01f, -1.250000000e-01f, -1.250000000e-01f,  1.250000000e-01f},
+    std::array{1.250000000e-01f, -1.250000000e-01f, -1.250000000e-01f, -1.250000000e-01f},
+};
+constexpr std::array FirstOrderEncoder{
     CalcAmbiCoeffs( inv_sqrt3f,  inv_sqrt3f,  inv_sqrt3f),
     CalcAmbiCoeffs( inv_sqrt3f,  inv_sqrt3f, -inv_sqrt3f),
     CalcAmbiCoeffs(-inv_sqrt3f,  inv_sqrt3f,  inv_sqrt3f),
@@ -68,25 +68,25 @@ constexpr std::array<AmbiChannelFloatArray,8> FirstOrderEncoder{{
     CalcAmbiCoeffs( inv_sqrt3f, -inv_sqrt3f, -inv_sqrt3f),
     CalcAmbiCoeffs(-inv_sqrt3f, -inv_sqrt3f,  inv_sqrt3f),
     CalcAmbiCoeffs(-inv_sqrt3f, -inv_sqrt3f, -inv_sqrt3f),
-}};
+};
 static_assert(FirstOrderDecoder.size() == FirstOrderEncoder.size(), "First-order mismatch");
 
 /* This calculates a 2D first-order "upsampler" matrix. Same as the first-order
  * matrix, just using a more optimized speaker array for horizontal-only
  * content.
  */
-constexpr std::array<std::array<float,4>,4> FirstOrder2DDecoder{{
-    {{ 2.500000000e-01f,  2.041241452e-01f, 0.0f,  2.041241452e-01f, }},
-    {{ 2.500000000e-01f,  2.041241452e-01f, 0.0f, -2.041241452e-01f, }},
-    {{ 2.500000000e-01f, -2.041241452e-01f, 0.0f,  2.041241452e-01f, }},
-    {{ 2.500000000e-01f, -2.041241452e-01f, 0.0f, -2.041241452e-01f, }},
-}};
-constexpr std::array<AmbiChannelFloatArray,4> FirstOrder2DEncoder{{
+constexpr std::array FirstOrder2DDecoder{
+    std::array{2.500000000e-01f,  2.041241452e-01f, 0.0f,  2.041241452e-01f},
+    std::array{2.500000000e-01f,  2.041241452e-01f, 0.0f, -2.041241452e-01f},
+    std::array{2.500000000e-01f, -2.041241452e-01f, 0.0f,  2.041241452e-01f},
+    std::array{2.500000000e-01f, -2.041241452e-01f, 0.0f, -2.041241452e-01f},
+};
+constexpr std::array FirstOrder2DEncoder{
     CalcAmbiCoeffs( inv_sqrt2f, 0.0f,  inv_sqrt2f),
     CalcAmbiCoeffs( inv_sqrt2f, 0.0f, -inv_sqrt2f),
     CalcAmbiCoeffs(-inv_sqrt2f, 0.0f,  inv_sqrt2f),
     CalcAmbiCoeffs(-inv_sqrt2f, 0.0f, -inv_sqrt2f),
-}};
+};
 static_assert(FirstOrder2DDecoder.size() == FirstOrder2DEncoder.size(), "First-order 2D mismatch");
 
 
@@ -94,21 +94,21 @@ static_assert(FirstOrder2DDecoder.size() == FirstOrder2DEncoder.size(), "First-o
  * matrix, just using a slightly more dense speaker array suitable for second-
  * order content.
  */
-constexpr std::array<std::array<float,9>,12> SecondOrderDecoder{{
-    {{ 8.333333333e-02f,  0.000000000e+00f, -7.588274978e-02f,  1.227808683e-01f,  0.000000000e+00f,  0.000000000e+00f, -1.591525047e-02f, -1.443375673e-01f,  1.167715449e-01f, }},
-    {{ 8.333333333e-02f, -1.227808683e-01f,  0.000000000e+00f,  7.588274978e-02f, -1.443375673e-01f,  0.000000000e+00f, -9.316949906e-02f,  0.000000000e+00f, -7.216878365e-02f, }},
-    {{ 8.333333333e-02f, -7.588274978e-02f,  1.227808683e-01f,  0.000000000e+00f,  0.000000000e+00f, -1.443375673e-01f,  1.090847495e-01f,  0.000000000e+00f, -4.460276122e-02f, }},
-    {{ 8.333333333e-02f,  0.000000000e+00f,  7.588274978e-02f,  1.227808683e-01f,  0.000000000e+00f,  0.000000000e+00f, -1.591525047e-02f,  1.443375673e-01f,  1.167715449e-01f, }},
-    {{ 8.333333333e-02f, -1.227808683e-01f,  0.000000000e+00f, -7.588274978e-02f,  1.443375673e-01f,  0.000000000e+00f, -9.316949906e-02f,  0.000000000e+00f, -7.216878365e-02f, }},
-    {{ 8.333333333e-02f,  7.588274978e-02f, -1.227808683e-01f,  0.000000000e+00f,  0.000000000e+00f, -1.443375673e-01f,  1.090847495e-01f,  0.000000000e+00f, -4.460276122e-02f, }},
-    {{ 8.333333333e-02f,  0.000000000e+00f, -7.588274978e-02f, -1.227808683e-01f,  0.000000000e+00f,  0.000000000e+00f, -1.591525047e-02f,  1.443375673e-01f,  1.167715449e-01f, }},
-    {{ 8.333333333e-02f,  1.227808683e-01f,  0.000000000e+00f, -7.588274978e-02f, -1.443375673e-01f,  0.000000000e+00f, -9.316949906e-02f,  0.000000000e+00f, -7.216878365e-02f, }},
-    {{ 8.333333333e-02f,  7.588274978e-02f,  1.227808683e-01f,  0.000000000e+00f,  0.000000000e+00f,  1.443375673e-01f,  1.090847495e-01f,  0.000000000e+00f, -4.460276122e-02f, }},
-    {{ 8.333333333e-02f,  0.000000000e+00f,  7.588274978e-02f, -1.227808683e-01f,  0.000000000e+00f,  0.000000000e+00f, -1.591525047e-02f, -1.443375673e-01f,  1.167715449e-01f, }},
-    {{ 8.333333333e-02f,  1.227808683e-01f,  0.000000000e+00f,  7.588274978e-02f,  1.443375673e-01f,  0.000000000e+00f, -9.316949906e-02f,  0.000000000e+00f, -7.216878365e-02f, }},
-    {{ 8.333333333e-02f, -7.588274978e-02f, -1.227808683e-01f,  0.000000000e+00f,  0.000000000e+00f,  1.443375673e-01f,  1.090847495e-01f,  0.000000000e+00f, -4.460276122e-02f, }},
-}};
-constexpr std::array<AmbiChannelFloatArray,12> SecondOrderEncoder{{
+constexpr std::array SecondOrderDecoder{
+    std::array{8.333333333e-02f,  0.000000000e+00f, -7.588274978e-02f,  1.227808683e-01f,  0.000000000e+00f,  0.000000000e+00f, -1.591525047e-02f, -1.443375673e-01f,  1.167715449e-01f},
+    std::array{8.333333333e-02f, -1.227808683e-01f,  0.000000000e+00f,  7.588274978e-02f, -1.443375673e-01f,  0.000000000e+00f, -9.316949906e-02f,  0.000000000e+00f, -7.216878365e-02f},
+    std::array{8.333333333e-02f, -7.588274978e-02f,  1.227808683e-01f,  0.000000000e+00f,  0.000000000e+00f, -1.443375673e-01f,  1.090847495e-01f,  0.000000000e+00f, -4.460276122e-02f},
+    std::array{8.333333333e-02f,  0.000000000e+00f,  7.588274978e-02f,  1.227808683e-01f,  0.000000000e+00f,  0.000000000e+00f, -1.591525047e-02f,  1.443375673e-01f,  1.167715449e-01f},
+    std::array{8.333333333e-02f, -1.227808683e-01f,  0.000000000e+00f, -7.588274978e-02f,  1.443375673e-01f,  0.000000000e+00f, -9.316949906e-02f,  0.000000000e+00f, -7.216878365e-02f},
+    std::array{8.333333333e-02f,  7.588274978e-02f, -1.227808683e-01f,  0.000000000e+00f,  0.000000000e+00f, -1.443375673e-01f,  1.090847495e-01f,  0.000000000e+00f, -4.460276122e-02f},
+    std::array{8.333333333e-02f,  0.000000000e+00f, -7.588274978e-02f, -1.227808683e-01f,  0.000000000e+00f,  0.000000000e+00f, -1.591525047e-02f,  1.443375673e-01f,  1.167715449e-01f},
+    std::array{8.333333333e-02f,  1.227808683e-01f,  0.000000000e+00f, -7.588274978e-02f, -1.443375673e-01f,  0.000000000e+00f, -9.316949906e-02f,  0.000000000e+00f, -7.216878365e-02f},
+    std::array{8.333333333e-02f,  7.588274978e-02f,  1.227808683e-01f,  0.000000000e+00f,  0.000000000e+00f,  1.443375673e-01f,  1.090847495e-01f,  0.000000000e+00f, -4.460276122e-02f},
+    std::array{8.333333333e-02f,  0.000000000e+00f,  7.588274978e-02f, -1.227808683e-01f,  0.000000000e+00f,  0.000000000e+00f, -1.591525047e-02f, -1.443375673e-01f,  1.167715449e-01f},
+    std::array{8.333333333e-02f,  1.227808683e-01f,  0.000000000e+00f,  7.588274978e-02f,  1.443375673e-01f,  0.000000000e+00f, -9.316949906e-02f,  0.000000000e+00f, -7.216878365e-02f},
+    std::array{8.333333333e-02f, -7.588274978e-02f, -1.227808683e-01f,  0.000000000e+00f,  0.000000000e+00f,  1.443375673e-01f,  1.090847495e-01f,  0.000000000e+00f, -4.460276122e-02f},
+};
+constexpr std::array SecondOrderEncoder{
     CalcAmbiCoeffs( 0.000000000e+00f, -5.257311121e-01f,  8.506508084e-01f),
     CalcAmbiCoeffs(-8.506508084e-01f,  0.000000000e+00f,  5.257311121e-01f),
     CalcAmbiCoeffs(-5.257311121e-01f,  8.506508084e-01f,  0.000000000e+00f),
@@ -121,29 +121,29 @@ constexpr std::array<AmbiChannelFloatArray,12> SecondOrderEncoder{{
     CalcAmbiCoeffs( 0.000000000e+00f,  5.257311121e-01f, -8.506508084e-01f),
     CalcAmbiCoeffs( 8.506508084e-01f,  0.000000000e+00f,  5.257311121e-01f),
     CalcAmbiCoeffs(-5.257311121e-01f, -8.506508084e-01f,  0.000000000e+00f),
-}};
+};
 static_assert(SecondOrderDecoder.size() == SecondOrderEncoder.size(), "Second-order mismatch");
 
 /* This calculates a 2D second-order "upsampler" matrix. Same as the second-
  * order matrix, just using a more optimized speaker array for horizontal-only
  * content.
  */
-constexpr std::array<std::array<float,9>,6> SecondOrder2DDecoder{{
-    {{ 1.666666667e-01f, -9.622504486e-02f, 0.0f,  1.666666667e-01f, -1.490711985e-01f, 0.0f, 0.0f, 0.0f,  8.606629658e-02f, }},
-    {{ 1.666666667e-01f, -1.924500897e-01f, 0.0f,  0.000000000e+00f,  0.000000000e+00f, 0.0f, 0.0f, 0.0f, -1.721325932e-01f, }},
-    {{ 1.666666667e-01f, -9.622504486e-02f, 0.0f, -1.666666667e-01f,  1.490711985e-01f, 0.0f, 0.0f, 0.0f,  8.606629658e-02f, }},
-    {{ 1.666666667e-01f,  9.622504486e-02f, 0.0f, -1.666666667e-01f, -1.490711985e-01f, 0.0f, 0.0f, 0.0f,  8.606629658e-02f, }},
-    {{ 1.666666667e-01f,  1.924500897e-01f, 0.0f,  0.000000000e+00f,  0.000000000e+00f, 0.0f, 0.0f, 0.0f, -1.721325932e-01f, }},
-    {{ 1.666666667e-01f,  9.622504486e-02f, 0.0f,  1.666666667e-01f,  1.490711985e-01f, 0.0f, 0.0f, 0.0f,  8.606629658e-02f, }},
-}};
-constexpr std::array<AmbiChannelFloatArray,6> SecondOrder2DEncoder{{
+constexpr std::array SecondOrder2DDecoder{
+    std::array{1.666666667e-01f, -9.622504486e-02f, 0.0f,  1.666666667e-01f, -1.490711985e-01f, 0.0f, 0.0f, 0.0f,  8.606629658e-02f},
+    std::array{1.666666667e-01f, -1.924500897e-01f, 0.0f,  0.000000000e+00f,  0.000000000e+00f, 0.0f, 0.0f, 0.0f, -1.721325932e-01f},
+    std::array{1.666666667e-01f, -9.622504486e-02f, 0.0f, -1.666666667e-01f,  1.490711985e-01f, 0.0f, 0.0f, 0.0f,  8.606629658e-02f},
+    std::array{1.666666667e-01f,  9.622504486e-02f, 0.0f, -1.666666667e-01f, -1.490711985e-01f, 0.0f, 0.0f, 0.0f,  8.606629658e-02f},
+    std::array{1.666666667e-01f,  1.924500897e-01f, 0.0f,  0.000000000e+00f,  0.000000000e+00f, 0.0f, 0.0f, 0.0f, -1.721325932e-01f},
+    std::array{1.666666667e-01f,  9.622504486e-02f, 0.0f,  1.666666667e-01f,  1.490711985e-01f, 0.0f, 0.0f, 0.0f,  8.606629658e-02f},
+};
+constexpr std::array SecondOrder2DEncoder{
     CalcAmbiCoeffs(-0.50000000000f, 0.0f,  0.86602540379f),
     CalcAmbiCoeffs(-1.00000000000f, 0.0f,  0.00000000000f),
     CalcAmbiCoeffs(-0.50000000000f, 0.0f, -0.86602540379f),
     CalcAmbiCoeffs( 0.50000000000f, 0.0f, -0.86602540379f),
     CalcAmbiCoeffs( 1.00000000000f, 0.0f,  0.00000000000f),
     CalcAmbiCoeffs( 0.50000000000f, 0.0f,  0.86602540379f),
-}};
+};
 static_assert(SecondOrder2DDecoder.size() == SecondOrder2DEncoder.size(),
     "Second-order 2D mismatch");
 
@@ -152,29 +152,29 @@ static_assert(SecondOrder2DDecoder.size() == SecondOrder2DEncoder.size(),
  * matrix, just using a more dense speaker array suitable for third-order
  * content.
  */
-constexpr std::array<std::array<float,16>,20> ThirdOrderDecoder{{
-    {{ 5.000000000e-02f,  3.090169944e-02f,  8.090169944e-02f,  0.000000000e+00f,  0.000000000e+00f,  6.454972244e-02f,  9.045084972e-02f,  0.000000000e+00f, -1.232790000e-02f, -1.256118221e-01f,  0.000000000e+00f,  1.126112056e-01f,  7.944389175e-02f,  0.000000000e+00f,  2.421151497e-02f,  0.000000000e+00f, }},
-    {{ 5.000000000e-02f, -3.090169944e-02f,  8.090169944e-02f,  0.000000000e+00f,  0.000000000e+00f, -6.454972244e-02f,  9.045084972e-02f,  0.000000000e+00f, -1.232790000e-02f,  1.256118221e-01f,  0.000000000e+00f, -1.126112056e-01f,  7.944389175e-02f,  0.000000000e+00f,  2.421151497e-02f,  0.000000000e+00f, }},
-    {{ 5.000000000e-02f,  3.090169944e-02f, -8.090169944e-02f,  0.000000000e+00f,  0.000000000e+00f, -6.454972244e-02f,  9.045084972e-02f,  0.000000000e+00f, -1.232790000e-02f, -1.256118221e-01f,  0.000000000e+00f,  1.126112056e-01f, -7.944389175e-02f,  0.000000000e+00f, -2.421151497e-02f,  0.000000000e+00f, }},
-    {{ 5.000000000e-02f, -3.090169944e-02f, -8.090169944e-02f,  0.000000000e+00f,  0.000000000e+00f,  6.454972244e-02f,  9.045084972e-02f,  0.000000000e+00f, -1.232790000e-02f,  1.256118221e-01f,  0.000000000e+00f, -1.126112056e-01f, -7.944389175e-02f,  0.000000000e+00f, -2.421151497e-02f,  0.000000000e+00f, }},
-    {{ 5.000000000e-02f,  8.090169944e-02f,  0.000000000e+00f,  3.090169944e-02f,  6.454972244e-02f,  0.000000000e+00f, -5.590169944e-02f,  0.000000000e+00f, -7.216878365e-02f, -7.763237543e-02f,  0.000000000e+00f, -2.950836627e-02f,  0.000000000e+00f, -1.497759251e-01f,  0.000000000e+00f, -7.763237543e-02f, }},
-    {{ 5.000000000e-02f,  8.090169944e-02f,  0.000000000e+00f, -3.090169944e-02f, -6.454972244e-02f,  0.000000000e+00f, -5.590169944e-02f,  0.000000000e+00f, -7.216878365e-02f, -7.763237543e-02f,  0.000000000e+00f, -2.950836627e-02f,  0.000000000e+00f,  1.497759251e-01f,  0.000000000e+00f,  7.763237543e-02f, }},
-    {{ 5.000000000e-02f, -8.090169944e-02f,  0.000000000e+00f,  3.090169944e-02f, -6.454972244e-02f,  0.000000000e+00f, -5.590169944e-02f,  0.000000000e+00f, -7.216878365e-02f,  7.763237543e-02f,  0.000000000e+00f,  2.950836627e-02f,  0.000000000e+00f, -1.497759251e-01f,  0.000000000e+00f, -7.763237543e-02f, }},
-    {{ 5.000000000e-02f, -8.090169944e-02f,  0.000000000e+00f, -3.090169944e-02f,  6.454972244e-02f,  0.000000000e+00f, -5.590169944e-02f,  0.000000000e+00f, -7.216878365e-02f,  7.763237543e-02f,  0.000000000e+00f,  2.950836627e-02f,  0.000000000e+00f,  1.497759251e-01f,  0.000000000e+00f,  7.763237543e-02f, }},
-    {{ 5.000000000e-02f,  0.000000000e+00f,  3.090169944e-02f,  8.090169944e-02f,  0.000000000e+00f,  0.000000000e+00f, -3.454915028e-02f,  6.454972244e-02f,  8.449668365e-02f,  0.000000000e+00f,  0.000000000e+00f,  0.000000000e+00f,  3.034486645e-02f, -6.779013272e-02f,  1.659481923e-01f,  4.797944664e-02f, }},
-    {{ 5.000000000e-02f,  0.000000000e+00f,  3.090169944e-02f, -8.090169944e-02f,  0.000000000e+00f,  0.000000000e+00f, -3.454915028e-02f, -6.454972244e-02f,  8.449668365e-02f,  0.000000000e+00f,  0.000000000e+00f,  0.000000000e+00f,  3.034486645e-02f,  6.779013272e-02f,  1.659481923e-01f, -4.797944664e-02f, }},
-    {{ 5.000000000e-02f,  0.000000000e+00f, -3.090169944e-02f,  8.090169944e-02f,  0.000000000e+00f,  0.000000000e+00f, -3.454915028e-02f, -6.454972244e-02f,  8.449668365e-02f,  0.000000000e+00f,  0.000000000e+00f,  0.000000000e+00f, -3.034486645e-02f, -6.779013272e-02f, -1.659481923e-01f,  4.797944664e-02f, }},
-    {{ 5.000000000e-02f,  0.000000000e+00f, -3.090169944e-02f, -8.090169944e-02f,  0.000000000e+00f,  0.000000000e+00f, -3.454915028e-02f,  6.454972244e-02f,  8.449668365e-02f,  0.000000000e+00f,  0.000000000e+00f,  0.000000000e+00f, -3.034486645e-02f,  6.779013272e-02f, -1.659481923e-01f, -4.797944664e-02f, }},
-    {{ 5.000000000e-02f,  5.000000000e-02f,  5.000000000e-02f,  5.000000000e-02f,  6.454972244e-02f,  6.454972244e-02f,  0.000000000e+00f,  6.454972244e-02f,  0.000000000e+00f,  1.016220987e-01f,  6.338656910e-02f, -1.092600649e-02f, -7.364853795e-02f,  1.011266756e-01f, -7.086833869e-02f, -1.482646439e-02f, }},
-    {{ 5.000000000e-02f,  5.000000000e-02f,  5.000000000e-02f, -5.000000000e-02f, -6.454972244e-02f,  6.454972244e-02f,  0.000000000e+00f, -6.454972244e-02f,  0.000000000e+00f,  1.016220987e-01f, -6.338656910e-02f, -1.092600649e-02f, -7.364853795e-02f, -1.011266756e-01f, -7.086833869e-02f,  1.482646439e-02f, }},
-    {{ 5.000000000e-02f, -5.000000000e-02f,  5.000000000e-02f,  5.000000000e-02f, -6.454972244e-02f, -6.454972244e-02f,  0.000000000e+00f,  6.454972244e-02f,  0.000000000e+00f, -1.016220987e-01f, -6.338656910e-02f,  1.092600649e-02f, -7.364853795e-02f,  1.011266756e-01f, -7.086833869e-02f, -1.482646439e-02f, }},
-    {{ 5.000000000e-02f, -5.000000000e-02f,  5.000000000e-02f, -5.000000000e-02f,  6.454972244e-02f, -6.454972244e-02f,  0.000000000e+00f, -6.454972244e-02f,  0.000000000e+00f, -1.016220987e-01f,  6.338656910e-02f,  1.092600649e-02f, -7.364853795e-02f, -1.011266756e-01f, -7.086833869e-02f,  1.482646439e-02f, }},
-    {{ 5.000000000e-02f,  5.000000000e-02f, -5.000000000e-02f,  5.000000000e-02f,  6.454972244e-02f, -6.454972244e-02f,  0.000000000e+00f, -6.454972244e-02f,  0.000000000e+00f,  1.016220987e-01f, -6.338656910e-02f, -1.092600649e-02f,  7.364853795e-02f,  1.011266756e-01f,  7.086833869e-02f, -1.482646439e-02f, }},
-    {{ 5.000000000e-02f,  5.000000000e-02f, -5.000000000e-02f, -5.000000000e-02f, -6.454972244e-02f, -6.454972244e-02f,  0.000000000e+00f,  6.454972244e-02f,  0.000000000e+00f,  1.016220987e-01f,  6.338656910e-02f, -1.092600649e-02f,  7.364853795e-02f, -1.011266756e-01f,  7.086833869e-02f,  1.482646439e-02f, }},
-    {{ 5.000000000e-02f, -5.000000000e-02f, -5.000000000e-02f,  5.000000000e-02f, -6.454972244e-02f,  6.454972244e-02f,  0.000000000e+00f, -6.454972244e-02f,  0.000000000e+00f, -1.016220987e-01f,  6.338656910e-02f,  1.092600649e-02f,  7.364853795e-02f,  1.011266756e-01f,  7.086833869e-02f, -1.482646439e-02f, }},
-    {{ 5.000000000e-02f, -5.000000000e-02f, -5.000000000e-02f, -5.000000000e-02f,  6.454972244e-02f,  6.454972244e-02f,  0.000000000e+00f,  6.454972244e-02f,  0.000000000e+00f, -1.016220987e-01f, -6.338656910e-02f,  1.092600649e-02f,  7.364853795e-02f, -1.011266756e-01f,  7.086833869e-02f,  1.482646439e-02f, }},
-}};
-constexpr std::array<AmbiChannelFloatArray,20> ThirdOrderEncoder{{
+constexpr std::array ThirdOrderDecoder{
+    std::array{5.000000000e-02f,  3.090169944e-02f,  8.090169944e-02f,  0.000000000e+00f,  0.000000000e+00f,  6.454972244e-02f,  9.045084972e-02f,  0.000000000e+00f, -1.232790000e-02f, -1.256118221e-01f,  0.000000000e+00f,  1.126112056e-01f,  7.944389175e-02f,  0.000000000e+00f,  2.421151497e-02f,  0.000000000e+00f},
+    std::array{5.000000000e-02f, -3.090169944e-02f,  8.090169944e-02f,  0.000000000e+00f,  0.000000000e+00f, -6.454972244e-02f,  9.045084972e-02f,  0.000000000e+00f, -1.232790000e-02f,  1.256118221e-01f,  0.000000000e+00f, -1.126112056e-01f,  7.944389175e-02f,  0.000000000e+00f,  2.421151497e-02f,  0.000000000e+00f},
+    std::array{5.000000000e-02f,  3.090169944e-02f, -8.090169944e-02f,  0.000000000e+00f,  0.000000000e+00f, -6.454972244e-02f,  9.045084972e-02f,  0.000000000e+00f, -1.232790000e-02f, -1.256118221e-01f,  0.000000000e+00f,  1.126112056e-01f, -7.944389175e-02f,  0.000000000e+00f, -2.421151497e-02f,  0.000000000e+00f},
+    std::array{5.000000000e-02f, -3.090169944e-02f, -8.090169944e-02f,  0.000000000e+00f,  0.000000000e+00f,  6.454972244e-02f,  9.045084972e-02f,  0.000000000e+00f, -1.232790000e-02f,  1.256118221e-01f,  0.000000000e+00f, -1.126112056e-01f, -7.944389175e-02f,  0.000000000e+00f, -2.421151497e-02f,  0.000000000e+00f},
+    std::array{5.000000000e-02f,  8.090169944e-02f,  0.000000000e+00f,  3.090169944e-02f,  6.454972244e-02f,  0.000000000e+00f, -5.590169944e-02f,  0.000000000e+00f, -7.216878365e-02f, -7.763237543e-02f,  0.000000000e+00f, -2.950836627e-02f,  0.000000000e+00f, -1.497759251e-01f,  0.000000000e+00f, -7.763237543e-02f},
+    std::array{5.000000000e-02f,  8.090169944e-02f,  0.000000000e+00f, -3.090169944e-02f, -6.454972244e-02f,  0.000000000e+00f, -5.590169944e-02f,  0.000000000e+00f, -7.216878365e-02f, -7.763237543e-02f,  0.000000000e+00f, -2.950836627e-02f,  0.000000000e+00f,  1.497759251e-01f,  0.000000000e+00f,  7.763237543e-02f},
+    std::array{5.000000000e-02f, -8.090169944e-02f,  0.000000000e+00f,  3.090169944e-02f, -6.454972244e-02f,  0.000000000e+00f, -5.590169944e-02f,  0.000000000e+00f, -7.216878365e-02f,  7.763237543e-02f,  0.000000000e+00f,  2.950836627e-02f,  0.000000000e+00f, -1.497759251e-01f,  0.000000000e+00f, -7.763237543e-02f},
+    std::array{5.000000000e-02f, -8.090169944e-02f,  0.000000000e+00f, -3.090169944e-02f,  6.454972244e-02f,  0.000000000e+00f, -5.590169944e-02f,  0.000000000e+00f, -7.216878365e-02f,  7.763237543e-02f,  0.000000000e+00f,  2.950836627e-02f,  0.000000000e+00f,  1.497759251e-01f,  0.000000000e+00f,  7.763237543e-02f},
+    std::array{5.000000000e-02f,  0.000000000e+00f,  3.090169944e-02f,  8.090169944e-02f,  0.000000000e+00f,  0.000000000e+00f, -3.454915028e-02f,  6.454972244e-02f,  8.449668365e-02f,  0.000000000e+00f,  0.000000000e+00f,  0.000000000e+00f,  3.034486645e-02f, -6.779013272e-02f,  1.659481923e-01f,  4.797944664e-02f},
+    std::array{5.000000000e-02f,  0.000000000e+00f,  3.090169944e-02f, -8.090169944e-02f,  0.000000000e+00f,  0.000000000e+00f, -3.454915028e-02f, -6.454972244e-02f,  8.449668365e-02f,  0.000000000e+00f,  0.000000000e+00f,  0.000000000e+00f,  3.034486645e-02f,  6.779013272e-02f,  1.659481923e-01f, -4.797944664e-02f},
+    std::array{5.000000000e-02f,  0.000000000e+00f, -3.090169944e-02f,  8.090169944e-02f,  0.000000000e+00f,  0.000000000e+00f, -3.454915028e-02f, -6.454972244e-02f,  8.449668365e-02f,  0.000000000e+00f,  0.000000000e+00f,  0.000000000e+00f, -3.034486645e-02f, -6.779013272e-02f, -1.659481923e-01f,  4.797944664e-02f},
+    std::array{5.000000000e-02f,  0.000000000e+00f, -3.090169944e-02f, -8.090169944e-02f,  0.000000000e+00f,  0.000000000e+00f, -3.454915028e-02f,  6.454972244e-02f,  8.449668365e-02f,  0.000000000e+00f,  0.000000000e+00f,  0.000000000e+00f, -3.034486645e-02f,  6.779013272e-02f, -1.659481923e-01f, -4.797944664e-02f},
+    std::array{5.000000000e-02f,  5.000000000e-02f,  5.000000000e-02f,  5.000000000e-02f,  6.454972244e-02f,  6.454972244e-02f,  0.000000000e+00f,  6.454972244e-02f,  0.000000000e+00f,  1.016220987e-01f,  6.338656910e-02f, -1.092600649e-02f, -7.364853795e-02f,  1.011266756e-01f, -7.086833869e-02f, -1.482646439e-02f},
+    std::array{5.000000000e-02f,  5.000000000e-02f,  5.000000000e-02f, -5.000000000e-02f, -6.454972244e-02f,  6.454972244e-02f,  0.000000000e+00f, -6.454972244e-02f,  0.000000000e+00f,  1.016220987e-01f, -6.338656910e-02f, -1.092600649e-02f, -7.364853795e-02f, -1.011266756e-01f, -7.086833869e-02f,  1.482646439e-02f},
+    std::array{5.000000000e-02f, -5.000000000e-02f,  5.000000000e-02f,  5.000000000e-02f, -6.454972244e-02f, -6.454972244e-02f,  0.000000000e+00f,  6.454972244e-02f,  0.000000000e+00f, -1.016220987e-01f, -6.338656910e-02f,  1.092600649e-02f, -7.364853795e-02f,  1.011266756e-01f, -7.086833869e-02f, -1.482646439e-02f},
+    std::array{5.000000000e-02f, -5.000000000e-02f,  5.000000000e-02f, -5.000000000e-02f,  6.454972244e-02f, -6.454972244e-02f,  0.000000000e+00f, -6.454972244e-02f,  0.000000000e+00f, -1.016220987e-01f,  6.338656910e-02f,  1.092600649e-02f, -7.364853795e-02f, -1.011266756e-01f, -7.086833869e-02f,  1.482646439e-02f},
+    std::array{5.000000000e-02f,  5.000000000e-02f, -5.000000000e-02f,  5.000000000e-02f,  6.454972244e-02f, -6.454972244e-02f,  0.000000000e+00f, -6.454972244e-02f,  0.000000000e+00f,  1.016220987e-01f, -6.338656910e-02f, -1.092600649e-02f,  7.364853795e-02f,  1.011266756e-01f,  7.086833869e-02f, -1.482646439e-02f},
+    std::array{5.000000000e-02f,  5.000000000e-02f, -5.000000000e-02f, -5.000000000e-02f, -6.454972244e-02f, -6.454972244e-02f,  0.000000000e+00f,  6.454972244e-02f,  0.000000000e+00f,  1.016220987e-01f,  6.338656910e-02f, -1.092600649e-02f,  7.364853795e-02f, -1.011266756e-01f,  7.086833869e-02f,  1.482646439e-02f},
+    std::array{5.000000000e-02f, -5.000000000e-02f, -5.000000000e-02f,  5.000000000e-02f, -6.454972244e-02f,  6.454972244e-02f,  0.000000000e+00f, -6.454972244e-02f,  0.000000000e+00f, -1.016220987e-01f,  6.338656910e-02f,  1.092600649e-02f,  7.364853795e-02f,  1.011266756e-01f,  7.086833869e-02f, -1.482646439e-02f},
+    std::array{5.000000000e-02f, -5.000000000e-02f, -5.000000000e-02f, -5.000000000e-02f,  6.454972244e-02f,  6.454972244e-02f,  0.000000000e+00f,  6.454972244e-02f,  0.000000000e+00f, -1.016220987e-01f, -6.338656910e-02f,  1.092600649e-02f,  7.364853795e-02f, -1.011266756e-01f,  7.086833869e-02f,  1.482646439e-02f},
+};
+constexpr std::array ThirdOrderEncoder{
     CalcAmbiCoeffs( 0.35682208976f,  0.93417235897f,  0.00000000000f),
     CalcAmbiCoeffs(-0.35682208976f,  0.93417235897f,  0.00000000000f),
     CalcAmbiCoeffs( 0.35682208976f, -0.93417235897f,  0.00000000000f),
@@ -195,24 +195,24 @@ constexpr std::array<AmbiChannelFloatArray,20> ThirdOrderEncoder{{
     CalcAmbiCoeffs(     inv_sqrt3f,     -inv_sqrt3f,     -inv_sqrt3f),
     CalcAmbiCoeffs(    -inv_sqrt3f,     -inv_sqrt3f,      inv_sqrt3f),
     CalcAmbiCoeffs(    -inv_sqrt3f,     -inv_sqrt3f,     -inv_sqrt3f),
-}};
+};
 static_assert(ThirdOrderDecoder.size() == ThirdOrderEncoder.size(), "Third-order mismatch");
 
 /* This calculates a 2D third-order "upsampler" matrix. Same as the third-order
  * matrix, just using a more optimized speaker array for horizontal-only
  * content.
  */
-constexpr std::array<std::array<float,16>,8> ThirdOrder2DDecoder{{
-    {{ 1.250000000e-01f, -5.523559567e-02f, 0.0f,  1.333505242e-01f, -9.128709292e-02f, 0.0f, 0.0f, 0.0f,  9.128709292e-02f, -1.104247249e-01f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  4.573941867e-02f, }},
-    {{ 1.250000000e-01f, -1.333505242e-01f, 0.0f,  5.523559567e-02f, -9.128709292e-02f, 0.0f, 0.0f, 0.0f, -9.128709292e-02f,  4.573941867e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -1.104247249e-01f, }},
-    {{ 1.250000000e-01f, -1.333505242e-01f, 0.0f, -5.523559567e-02f,  9.128709292e-02f, 0.0f, 0.0f, 0.0f, -9.128709292e-02f,  4.573941867e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  1.104247249e-01f, }},
-    {{ 1.250000000e-01f, -5.523559567e-02f, 0.0f, -1.333505242e-01f,  9.128709292e-02f, 0.0f, 0.0f, 0.0f,  9.128709292e-02f, -1.104247249e-01f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -4.573941867e-02f, }},
-    {{ 1.250000000e-01f,  5.523559567e-02f, 0.0f, -1.333505242e-01f, -9.128709292e-02f, 0.0f, 0.0f, 0.0f,  9.128709292e-02f,  1.104247249e-01f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -4.573941867e-02f, }},
-    {{ 1.250000000e-01f,  1.333505242e-01f, 0.0f, -5.523559567e-02f, -9.128709292e-02f, 0.0f, 0.0f, 0.0f, -9.128709292e-02f, -4.573941867e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  1.104247249e-01f, }},
-    {{ 1.250000000e-01f,  1.333505242e-01f, 0.0f,  5.523559567e-02f,  9.128709292e-02f, 0.0f, 0.0f, 0.0f, -9.128709292e-02f, -4.573941867e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -1.104247249e-01f, }},
-    {{ 1.250000000e-01f,  5.523559567e-02f, 0.0f,  1.333505242e-01f,  9.128709292e-02f, 0.0f, 0.0f, 0.0f,  9.128709292e-02f,  1.104247249e-01f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  4.573941867e-02f, }},
-}};
-constexpr std::array<AmbiChannelFloatArray,8> ThirdOrder2DEncoder{{
+constexpr std::array ThirdOrder2DDecoder{
+    std::array{1.250000000e-01f, -5.523559567e-02f, 0.0f,  1.333505242e-01f, -9.128709292e-02f, 0.0f, 0.0f, 0.0f,  9.128709292e-02f, -1.104247249e-01f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  4.573941867e-02f},
+    std::array{1.250000000e-01f, -1.333505242e-01f, 0.0f,  5.523559567e-02f, -9.128709292e-02f, 0.0f, 0.0f, 0.0f, -9.128709292e-02f,  4.573941867e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -1.104247249e-01f},
+    std::array{1.250000000e-01f, -1.333505242e-01f, 0.0f, -5.523559567e-02f,  9.128709292e-02f, 0.0f, 0.0f, 0.0f, -9.128709292e-02f,  4.573941867e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  1.104247249e-01f},
+    std::array{1.250000000e-01f, -5.523559567e-02f, 0.0f, -1.333505242e-01f,  9.128709292e-02f, 0.0f, 0.0f, 0.0f,  9.128709292e-02f, -1.104247249e-01f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -4.573941867e-02f},
+    std::array{1.250000000e-01f,  5.523559567e-02f, 0.0f, -1.333505242e-01f, -9.128709292e-02f, 0.0f, 0.0f, 0.0f,  9.128709292e-02f,  1.104247249e-01f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -4.573941867e-02f},
+    std::array{1.250000000e-01f,  1.333505242e-01f, 0.0f, -5.523559567e-02f, -9.128709292e-02f, 0.0f, 0.0f, 0.0f, -9.128709292e-02f, -4.573941867e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  1.104247249e-01f},
+    std::array{1.250000000e-01f,  1.333505242e-01f, 0.0f,  5.523559567e-02f,  9.128709292e-02f, 0.0f, 0.0f, 0.0f, -9.128709292e-02f, -4.573941867e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -1.104247249e-01f},
+    std::array{1.250000000e-01f,  5.523559567e-02f, 0.0f,  1.333505242e-01f,  9.128709292e-02f, 0.0f, 0.0f, 0.0f,  9.128709292e-02f,  1.104247249e-01f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  4.573941867e-02f},
+};
+constexpr std::array ThirdOrder2DEncoder{
     CalcAmbiCoeffs(-0.38268343237f, 0.0f,  0.92387953251f),
     CalcAmbiCoeffs(-0.92387953251f, 0.0f,  0.38268343237f),
     CalcAmbiCoeffs(-0.92387953251f, 0.0f, -0.38268343237f),
@@ -221,7 +221,7 @@ constexpr std::array<AmbiChannelFloatArray,8> ThirdOrder2DEncoder{{
     CalcAmbiCoeffs( 0.92387953251f, 0.0f, -0.38268343237f),
     CalcAmbiCoeffs( 0.92387953251f, 0.0f,  0.38268343237f),
     CalcAmbiCoeffs( 0.38268343237f, 0.0f,  0.92387953251f),
-}};
+};
 static_assert(ThirdOrder2DDecoder.size() == ThirdOrder2DEncoder.size(), "Third-order 2D mismatch");
 
 
@@ -230,19 +230,19 @@ static_assert(ThirdOrder2DDecoder.size() == ThirdOrder2DEncoder.size(), "Third-o
  * the foreseeable future. This is only necessary for mixing horizontal-only
  * fourth-order content to 3D.
  */
-constexpr std::array<std::array<float,25>,10> FourthOrder2DDecoder{{
-    {{ 1.000000000e-01f,  3.568220898e-02f, 0.0f,  1.098185471e-01f,  6.070619982e-02f, 0.0f, 0.0f, 0.0f,  8.355491589e-02f,  7.735682057e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  5.620301997e-02f,  8.573754253e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  2.785781628e-02f, }},
-    {{ 1.000000000e-01f,  9.341723590e-02f, 0.0f,  6.787159473e-02f,  9.822469464e-02f, 0.0f, 0.0f, 0.0f, -3.191513794e-02f,  2.954767620e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -9.093839659e-02f, -5.298871540e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -7.293270986e-02f, }},
-    {{ 1.000000000e-01f,  1.154700538e-01f, 0.0f,  0.000000000e+00f,  0.000000000e+00f, 0.0f, 0.0f, 0.0f, -1.032795559e-01f, -9.561828875e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  0.000000000e+00f,  0.000000000e+00f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  9.014978717e-02f, }},
-    {{ 1.000000000e-01f,  9.341723590e-02f, 0.0f, -6.787159473e-02f, -9.822469464e-02f, 0.0f, 0.0f, 0.0f, -3.191513794e-02f,  2.954767620e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  9.093839659e-02f,  5.298871540e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -7.293270986e-02f, }},
-    {{ 1.000000000e-01f,  3.568220898e-02f, 0.0f, -1.098185471e-01f, -6.070619982e-02f, 0.0f, 0.0f, 0.0f,  8.355491589e-02f,  7.735682057e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -5.620301997e-02f, -8.573754253e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  2.785781628e-02f, }},
-    {{ 1.000000000e-01f, -3.568220898e-02f, 0.0f, -1.098185471e-01f,  6.070619982e-02f, 0.0f, 0.0f, 0.0f,  8.355491589e-02f, -7.735682057e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -5.620301997e-02f,  8.573754253e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  2.785781628e-02f, }},
-    {{ 1.000000000e-01f, -9.341723590e-02f, 0.0f, -6.787159473e-02f,  9.822469464e-02f, 0.0f, 0.0f, 0.0f, -3.191513794e-02f, -2.954767620e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  9.093839659e-02f, -5.298871540e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -7.293270986e-02f, }},
-    {{ 1.000000000e-01f, -1.154700538e-01f, 0.0f,  0.000000000e+00f,  0.000000000e+00f, 0.0f, 0.0f, 0.0f, -1.032795559e-01f,  9.561828875e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  0.000000000e+00f,  0.000000000e+00f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  9.014978717e-02f, }},
-    {{ 1.000000000e-01f, -9.341723590e-02f, 0.0f,  6.787159473e-02f, -9.822469464e-02f, 0.0f, 0.0f, 0.0f, -3.191513794e-02f, -2.954767620e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -9.093839659e-02f,  5.298871540e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -7.293270986e-02f, }},
-    {{ 1.000000000e-01f, -3.568220898e-02f, 0.0f,  1.098185471e-01f, -6.070619982e-02f, 0.0f, 0.0f, 0.0f,  8.355491589e-02f, -7.735682057e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  5.620301997e-02f, -8.573754253e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  2.785781628e-02f, }},
-}};
-constexpr std::array<AmbiChannelFloatArray,10> FourthOrder2DEncoder{{
+constexpr std::array FourthOrder2DDecoder{
+    std::array{1.000000000e-01f,  3.568220898e-02f, 0.0f,  1.098185471e-01f,  6.070619982e-02f, 0.0f, 0.0f, 0.0f,  8.355491589e-02f,  7.735682057e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  5.620301997e-02f,  8.573754253e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  2.785781628e-02f},
+    std::array{1.000000000e-01f,  9.341723590e-02f, 0.0f,  6.787159473e-02f,  9.822469464e-02f, 0.0f, 0.0f, 0.0f, -3.191513794e-02f,  2.954767620e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -9.093839659e-02f, -5.298871540e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -7.293270986e-02f},
+    std::array{1.000000000e-01f,  1.154700538e-01f, 0.0f,  0.000000000e+00f,  0.000000000e+00f, 0.0f, 0.0f, 0.0f, -1.032795559e-01f, -9.561828875e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  0.000000000e+00f,  0.000000000e+00f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  9.014978717e-02f},
+    std::array{1.000000000e-01f,  9.341723590e-02f, 0.0f, -6.787159473e-02f, -9.822469464e-02f, 0.0f, 0.0f, 0.0f, -3.191513794e-02f,  2.954767620e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  9.093839659e-02f,  5.298871540e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -7.293270986e-02f},
+    std::array{1.000000000e-01f,  3.568220898e-02f, 0.0f, -1.098185471e-01f, -6.070619982e-02f, 0.0f, 0.0f, 0.0f,  8.355491589e-02f,  7.735682057e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -5.620301997e-02f, -8.573754253e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  2.785781628e-02f},
+    std::array{1.000000000e-01f, -3.568220898e-02f, 0.0f, -1.098185471e-01f,  6.070619982e-02f, 0.0f, 0.0f, 0.0f,  8.355491589e-02f, -7.735682057e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -5.620301997e-02f,  8.573754253e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  2.785781628e-02f},
+    std::array{1.000000000e-01f, -9.341723590e-02f, 0.0f, -6.787159473e-02f,  9.822469464e-02f, 0.0f, 0.0f, 0.0f, -3.191513794e-02f, -2.954767620e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  9.093839659e-02f, -5.298871540e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -7.293270986e-02f},
+    std::array{1.000000000e-01f, -1.154700538e-01f, 0.0f,  0.000000000e+00f,  0.000000000e+00f, 0.0f, 0.0f, 0.0f, -1.032795559e-01f,  9.561828875e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  0.000000000e+00f,  0.000000000e+00f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  9.014978717e-02f},
+    std::array{1.000000000e-01f, -9.341723590e-02f, 0.0f,  6.787159473e-02f, -9.822469464e-02f, 0.0f, 0.0f, 0.0f, -3.191513794e-02f, -2.954767620e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -9.093839659e-02f,  5.298871540e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -7.293270986e-02f},
+    std::array{1.000000000e-01f, -3.568220898e-02f, 0.0f,  1.098185471e-01f, -6.070619982e-02f, 0.0f, 0.0f, 0.0f,  8.355491589e-02f, -7.735682057e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  5.620301997e-02f, -8.573754253e-02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,  2.785781628e-02f},
+};
+constexpr std::array FourthOrder2DEncoder{
     CalcAmbiCoeffs( 3.090169944e-01f,  0.000000000e+00f,  9.510565163e-01f),
     CalcAmbiCoeffs( 8.090169944e-01f,  0.000000000e+00f,  5.877852523e-01f),
     CalcAmbiCoeffs( 1.000000000e+00f,  0.000000000e+00f,  0.000000000e+00f),
@@ -253,12 +253,12 @@ constexpr std::array<AmbiChannelFloatArray,10> FourthOrder2DEncoder{{
     CalcAmbiCoeffs(-1.000000000e+00f,  0.000000000e+00f,  0.000000000e+00f),
     CalcAmbiCoeffs(-8.090169944e-01f,  0.000000000e+00f,  5.877852523e-01f),
     CalcAmbiCoeffs(-3.090169944e-01f,  0.000000000e+00f,  9.510565163e-01f),
-}};
+};
 static_assert(FourthOrder2DDecoder.size() == FourthOrder2DEncoder.size(), "Fourth-order 2D mismatch");
 
 
 template<size_t N, size_t M>
-auto CalcAmbiUpsampler(const std::array<std::array<float,N>,M> &decoder,
+constexpr auto CalcAmbiUpsampler(const std::array<std::array<float,N>,M> &decoder,
     const std::array<AmbiChannelFloatArray,M> &encoder)
 {
     std::array<AmbiChannelFloatArray,N> res{};
@@ -279,13 +279,13 @@ auto CalcAmbiUpsampler(const std::array<std::array<float,N>,M> &decoder,
 
 } // namespace
 
-const std::array<AmbiChannelFloatArray,4> AmbiScale::FirstOrderUp{CalcAmbiUpsampler(FirstOrderDecoder, FirstOrderEncoder)};
-const std::array<AmbiChannelFloatArray,4> AmbiScale::FirstOrder2DUp{CalcAmbiUpsampler(FirstOrder2DDecoder, FirstOrder2DEncoder)};
-const std::array<AmbiChannelFloatArray,9> AmbiScale::SecondOrderUp{CalcAmbiUpsampler(SecondOrderDecoder, SecondOrderEncoder)};
-const std::array<AmbiChannelFloatArray,9> AmbiScale::SecondOrder2DUp{CalcAmbiUpsampler(SecondOrder2DDecoder, SecondOrder2DEncoder)};
-const std::array<AmbiChannelFloatArray,16> AmbiScale::ThirdOrderUp{CalcAmbiUpsampler(ThirdOrderDecoder, ThirdOrderEncoder)};
-const std::array<AmbiChannelFloatArray,16> AmbiScale::ThirdOrder2DUp{CalcAmbiUpsampler(ThirdOrder2DDecoder, ThirdOrder2DEncoder)};
-const std::array<AmbiChannelFloatArray,25> AmbiScale::FourthOrder2DUp{CalcAmbiUpsampler(FourthOrder2DDecoder, FourthOrder2DEncoder)};
+const std::array<std::array<float,MaxAmbiChannels>,4> AmbiScale::FirstOrderUp{CalcAmbiUpsampler(FirstOrderDecoder, FirstOrderEncoder)};
+const std::array<std::array<float,MaxAmbiChannels>,4> AmbiScale::FirstOrder2DUp{CalcAmbiUpsampler(FirstOrder2DDecoder, FirstOrder2DEncoder)};
+const std::array<std::array<float,MaxAmbiChannels>,9> AmbiScale::SecondOrderUp{CalcAmbiUpsampler(SecondOrderDecoder, SecondOrderEncoder)};
+const std::array<std::array<float,MaxAmbiChannels>,9> AmbiScale::SecondOrder2DUp{CalcAmbiUpsampler(SecondOrder2DDecoder, SecondOrder2DEncoder)};
+const std::array<std::array<float,MaxAmbiChannels>,16> AmbiScale::ThirdOrderUp{CalcAmbiUpsampler(ThirdOrderDecoder, ThirdOrderEncoder)};
+const std::array<std::array<float,MaxAmbiChannels>,16> AmbiScale::ThirdOrder2DUp{CalcAmbiUpsampler(ThirdOrder2DDecoder, ThirdOrder2DEncoder)};
+const std::array<std::array<float,MaxAmbiChannels>,25> AmbiScale::FourthOrder2DUp{CalcAmbiUpsampler(FourthOrder2DDecoder, FourthOrder2DEncoder)};
 
 
 std::array<float,MaxAmbiOrder+1> AmbiScale::GetHFOrderScales(const uint src_order,
diff --git a/core/ambidefs.h b/core/ambidefs.h
index b7d2bcd1..bea1a312 100644
--- a/core/ambidefs.h
+++ b/core/ambidefs.h
@@ -14,26 +14,26 @@ using uint = unsigned int;
  * needed will be (o+1)**2, thus zero-order has 1, first-order has 4, second-
  * order has 9, third-order has 16, and fourth-order has 25.
  */
-constexpr uint8_t MaxAmbiOrder{3};
+inline constexpr uint8_t MaxAmbiOrder{3};
 constexpr inline size_t AmbiChannelsFromOrder(size_t order) noexcept
 { return (order+1) * (order+1); }
-constexpr size_t MaxAmbiChannels{AmbiChannelsFromOrder(MaxAmbiOrder)};
+inline constexpr size_t MaxAmbiChannels{AmbiChannelsFromOrder(MaxAmbiOrder)};
 
 /* A bitmask of ambisonic channels for 0 to 4th order. This only specifies up
  * to 4th order, which is the highest order a 32-bit mask value can specify (a
  * 64-bit mask could handle up to 7th order).
  */
-constexpr uint Ambi0OrderMask{0x00000001};
-constexpr uint Ambi1OrderMask{0x0000000f};
-constexpr uint Ambi2OrderMask{0x000001ff};
-constexpr uint Ambi3OrderMask{0x0000ffff};
-constexpr uint Ambi4OrderMask{0x01ffffff};
+inline constexpr uint Ambi0OrderMask{0x00000001};
+inline constexpr uint Ambi1OrderMask{0x0000000f};
+inline constexpr uint Ambi2OrderMask{0x000001ff};
+inline constexpr uint Ambi3OrderMask{0x0000ffff};
+inline constexpr uint Ambi4OrderMask{0x01ffffff};
 
 /* A bitmask of ambisonic channels with height information. If none of these
  * channels are used/needed, there's no height (e.g. with most surround sound
  * speaker setups). This is ACN ordering, with bit 0 being ACN 0, etc.
  */
-constexpr uint AmbiPeriphonicMask{0xfe7ce4};
+inline constexpr uint AmbiPeriphonicMask{0xfe7ce4};
 
 /* The maximum number of ambisonic channels for 2D (non-periphonic)
  * representation. This is 2 per each order above zero-order, plus 1 for zero-
@@ -41,77 +41,61 @@ constexpr uint AmbiPeriphonicMask{0xfe7ce4};
  */
 constexpr inline size_t Ambi2DChannelsFromOrder(size_t order) noexcept
 { return order*2 + 1; }
-constexpr size_t MaxAmbi2DChannels{Ambi2DChannelsFromOrder(MaxAmbiOrder)};
+inline constexpr size_t MaxAmbi2DChannels{Ambi2DChannelsFromOrder(MaxAmbiOrder)};
 
 
 /* NOTE: These are scale factors as applied to Ambisonics content. Decoder
  * coefficients should be divided by these values to get proper scalings.
  */
 struct AmbiScale {
-    static auto& FromN3D() noexcept
-    {
-        static constexpr const std::array<float,MaxAmbiChannels> ret{{
-            1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
-            1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f
-        }};
-        return ret;
-    }
-    static auto& FromSN3D() noexcept
-    {
-        static constexpr const std::array<float,MaxAmbiChannels> ret{{
-            1.000000000f, /* ACN  0, sqrt(1) */
-            1.732050808f, /* ACN  1, sqrt(3) */
-            1.732050808f, /* ACN  2, sqrt(3) */
-            1.732050808f, /* ACN  3, sqrt(3) */
-            2.236067978f, /* ACN  4, sqrt(5) */
-            2.236067978f, /* ACN  5, sqrt(5) */
-            2.236067978f, /* ACN  6, sqrt(5) */
-            2.236067978f, /* ACN  7, sqrt(5) */
-            2.236067978f, /* ACN  8, sqrt(5) */
-            2.645751311f, /* ACN  9, sqrt(7) */
-            2.645751311f, /* ACN 10, sqrt(7) */
-            2.645751311f, /* ACN 11, sqrt(7) */
-            2.645751311f, /* ACN 12, sqrt(7) */
-            2.645751311f, /* ACN 13, sqrt(7) */
-            2.645751311f, /* ACN 14, sqrt(7) */
-            2.645751311f, /* ACN 15, sqrt(7) */
-        }};
-        return ret;
-    }
-    static auto& FromFuMa() noexcept
-    {
-        static constexpr const std::array<float,MaxAmbiChannels> ret{{
-            1.414213562f, /* ACN  0 (W), sqrt(2) */
-            1.732050808f, /* ACN  1 (Y), sqrt(3) */
-            1.732050808f, /* ACN  2 (Z), sqrt(3) */
-            1.732050808f, /* ACN  3 (X), sqrt(3) */
-            1.936491673f, /* ACN  4 (V), sqrt(15)/2 */
-            1.936491673f, /* ACN  5 (T), sqrt(15)/2 */
-            2.236067978f, /* ACN  6 (R), sqrt(5) */
-            1.936491673f, /* ACN  7 (S), sqrt(15)/2 */
-            1.936491673f, /* ACN  8 (U), sqrt(15)/2 */
-            2.091650066f, /* ACN  9 (Q), sqrt(35/8) */
-            1.972026594f, /* ACN 10 (O), sqrt(35)/3 */
-            2.231093404f, /* ACN 11 (M), sqrt(224/45) */
-            2.645751311f, /* ACN 12 (K), sqrt(7) */
-            2.231093404f, /* ACN 13 (L), sqrt(224/45) */
-            1.972026594f, /* ACN 14 (N), sqrt(35)/3 */
-            2.091650066f, /* ACN 15 (P), sqrt(35/8) */
-        }};
-        return ret;
-    }
-    static auto& FromUHJ() noexcept
-    {
-        static constexpr const std::array<float,MaxAmbiChannels> ret{{
-            1.000000000f, /* ACN  0 (W), sqrt(1) */
-            1.224744871f, /* ACN  1 (Y), sqrt(3/2) */
-            1.224744871f, /* ACN  2 (Z), sqrt(3/2) */
-            1.224744871f, /* ACN  3 (X), sqrt(3/2) */
-            /* Higher orders not relevant for UHJ. */
-            1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
-        }};
-        return ret;
-    }
+    static inline constexpr std::array<float,MaxAmbiChannels> FromN3D{{
+        1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+        1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f
+    }};
+    static inline constexpr std::array<float,MaxAmbiChannels> FromSN3D{{
+        1.000000000f, /* ACN  0, sqrt(1) */
+        1.732050808f, /* ACN  1, sqrt(3) */
+        1.732050808f, /* ACN  2, sqrt(3) */
+        1.732050808f, /* ACN  3, sqrt(3) */
+        2.236067978f, /* ACN  4, sqrt(5) */
+        2.236067978f, /* ACN  5, sqrt(5) */
+        2.236067978f, /* ACN  6, sqrt(5) */
+        2.236067978f, /* ACN  7, sqrt(5) */
+        2.236067978f, /* ACN  8, sqrt(5) */
+        2.645751311f, /* ACN  9, sqrt(7) */
+        2.645751311f, /* ACN 10, sqrt(7) */
+        2.645751311f, /* ACN 11, sqrt(7) */
+        2.645751311f, /* ACN 12, sqrt(7) */
+        2.645751311f, /* ACN 13, sqrt(7) */
+        2.645751311f, /* ACN 14, sqrt(7) */
+        2.645751311f, /* ACN 15, sqrt(7) */
+    }};
+    static inline constexpr std::array<float,MaxAmbiChannels> FromFuMa{{
+        1.414213562f, /* ACN  0 (W), sqrt(2) */
+        1.732050808f, /* ACN  1 (Y), sqrt(3) */
+        1.732050808f, /* ACN  2 (Z), sqrt(3) */
+        1.732050808f, /* ACN  3 (X), sqrt(3) */
+        1.936491673f, /* ACN  4 (V), sqrt(15)/2 */
+        1.936491673f, /* ACN  5 (T), sqrt(15)/2 */
+        2.236067978f, /* ACN  6 (R), sqrt(5) */
+        1.936491673f, /* ACN  7 (S), sqrt(15)/2 */
+        1.936491673f, /* ACN  8 (U), sqrt(15)/2 */
+        2.091650066f, /* ACN  9 (Q), sqrt(35/8) */
+        1.972026594f, /* ACN 10 (O), sqrt(35)/3 */
+        2.231093404f, /* ACN 11 (M), sqrt(224/45) */
+        2.645751311f, /* ACN 12 (K), sqrt(7) */
+        2.231093404f, /* ACN 13 (L), sqrt(224/45) */
+        1.972026594f, /* ACN 14 (N), sqrt(35)/3 */
+        2.091650066f, /* ACN 15 (P), sqrt(35/8) */
+    }};
+    static inline constexpr std::array<float,MaxAmbiChannels> FromUHJ{{
+        1.000000000f, /* ACN  0 (W), sqrt(1) */
+        1.224744871f, /* ACN  1 (Y), sqrt(3/2) */
+        1.224744871f, /* ACN  2 (Z), sqrt(3/2) */
+        1.224744871f, /* ACN  3 (X), sqrt(3/2) */
+        /* Higher orders not relevant for UHJ. */
+        1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+    }};
 
     /* Retrieves per-order HF scaling factors for "upsampling" ambisonic data. */
     static std::array<float,MaxAmbiOrder+1> GetHFOrderScales(const uint src_order,
@@ -127,72 +111,49 @@ struct AmbiScale {
 };
 
 struct AmbiIndex {
-    static auto& FromFuMa() noexcept
-    {
-        static constexpr const std::array<uint8_t,MaxAmbiChannels> ret{{
-            0,  /* W */
-            3,  /* X */
-            1,  /* Y */
-            2,  /* Z */
-            6,  /* R */
-            7,  /* S */
-            5,  /* T */
-            8,  /* U */
-            4,  /* V */
-            12, /* K */
-            13, /* L */
-            11, /* M */
-            14, /* N */
-            10, /* O */
-            15, /* P */
-            9,  /* Q */
-        }};
-        return ret;
-    }
-    static auto& FromFuMa2D() noexcept
-    {
-        static constexpr const std::array<uint8_t,MaxAmbi2DChannels> ret{{
-            0,  /* W */
-            3,  /* X */
-            1,  /* Y */
-            8,  /* U */
-            4,  /* V */
-            15, /* P */
-            9,  /* Q */
-        }};
-        return ret;
-    }
-
-    static auto& FromACN() noexcept
-    {
-        static constexpr const std::array<uint8_t,MaxAmbiChannels> ret{{
-            0,  1,  2,  3,  4,  5,  6,  7,
-            8,  9, 10, 11, 12, 13, 14, 15
-        }};
-        return ret;
-    }
-    static auto& FromACN2D() noexcept
-    {
-        static constexpr const std::array<uint8_t,MaxAmbi2DChannels> ret{{
-            0, 1,3, 4,8, 9,15
-        }};
-        return ret;
-    }
-
-    static auto& OrderFromChannel() noexcept
-    {
-        static constexpr const std::array<uint8_t,MaxAmbiChannels> ret{{
-            0, 1,1,1, 2,2,2,2,2, 3,3,3,3,3,3,3,
-        }};
-        return ret;
-    }
-    static auto& OrderFrom2DChannel() noexcept
-    {
-        static constexpr const std::array<uint8_t,MaxAmbi2DChannels> ret{{
-            0, 1,1, 2,2, 3,3,
-        }};
-        return ret;
-    }
+    static inline constexpr std::array<uint8_t,MaxAmbiChannels> FromFuMa{{
+        0,  /* W */
+        3,  /* X */
+        1,  /* Y */
+        2,  /* Z */
+        6,  /* R */
+        7,  /* S */
+        5,  /* T */
+        8,  /* U */
+        4,  /* V */
+        12, /* K */
+        13, /* L */
+        11, /* M */
+        14, /* N */
+        10, /* O */
+        15, /* P */
+        9,  /* Q */
+    }};
+    static inline constexpr std::array<uint8_t,MaxAmbi2DChannels> FromFuMa2D{{
+        0,  /* W */
+        3,  /* X */
+        1,  /* Y */
+        8,  /* U */
+        4,  /* V */
+        15, /* P */
+        9,  /* Q */
+    }};
+
+    static inline constexpr std::array<uint8_t,MaxAmbiChannels> FromACN{{
+        0,  1,  2,  3,  4,  5,  6,  7,
+        8,  9, 10, 11, 12, 13, 14, 15
+    }};
+    static inline constexpr std::array<uint8_t,MaxAmbi2DChannels> FromACN2D{{
+        0, 1,3, 4,8, 9,15
+    }};
+
+
+    static inline constexpr std::array<uint8_t,MaxAmbiChannels> OrderFromChannel{{
+        0, 1,1,1, 2,2,2,2,2, 3,3,3,3,3,3,3,
+    }};
+    static inline constexpr std::array<uint8_t,MaxAmbi2DChannels> OrderFrom2DChannel{{
+        0, 1,1, 2,2, 3,3,
+    }};
 };
 
 
diff --git a/core/async_event.h b/core/async_event.h
index 5a2f5f91..f1ca0c7b 100644
--- a/core/async_event.h
+++ b/core/async_event.h
@@ -1,6 +1,9 @@
 #ifndef CORE_EVENT_H
 #define CORE_EVENT_H
 
+#include <stdint.h>
+#include <variant>
+
 #include "almalloc.h"
 
 struct EffectState;
@@ -8,48 +11,53 @@ struct EffectState;
 using uint = unsigned int;
 
 
-struct AsyncEvent {
-    enum : uint {
-        /* User event types. */
-        SourceStateChange,
-        BufferCompleted,
-        Disconnected,
-        UserEventCount,
-
-        /* Internal events, always processed. */
-        ReleaseEffectState = 128,
-
-        /* End event thread processing. */
-        KillThread,
-    };
-
-    enum class SrcState {
-        Reset,
-        Stop,
-        Play,
-        Pause
-    };
-
-    const uint EnumType;
-    union {
-        char dummy;
-        struct {
-            uint id;
-            SrcState state;
-        } srcstate;
-        struct {
-            uint id;
-            uint count;
-        } bufcomp;
-        struct {
-            char msg[244];
-        } disconnect;
-        EffectState *mEffectState;
-    } u{};
-
-    constexpr AsyncEvent(uint type) noexcept : EnumType{type} { }
-
-    DISABLE_ALLOC()
+enum class AsyncEnableBits : uint8_t {
+    SourceState,
+    BufferCompleted,
+    Disconnected,
+    Count
+};
+
+
+enum class AsyncSrcState : uint8_t {
+    Reset,
+    Stop,
+    Play,
+    Pause
+};
+
+using AsyncKillThread = std::monostate;
+
+struct AsyncSourceStateEvent {
+    uint mId;
+    AsyncSrcState mState;
 };
 
+struct AsyncBufferCompleteEvent {
+    uint mId;
+    uint mCount;
+};
+
+struct AsyncDisconnectEvent {
+    char msg[244];
+};
+
+struct AsyncEffectReleaseEvent {
+    EffectState *mEffectState;
+};
+
+using AsyncEvent = std::variant<AsyncKillThread,
+        AsyncSourceStateEvent,
+        AsyncBufferCompleteEvent,
+        AsyncEffectReleaseEvent,
+        AsyncDisconnectEvent>;
+
+template<typename T, typename ...Args>
+auto &InitAsyncEvent(std::byte *evtbuf, Args&& ...args)
+{
+    auto *evt = al::construct_at(reinterpret_cast<AsyncEvent*>(evtbuf), std::in_place_type<T>,
+        std::forward<Args>(args)...);
+    return std::get<T>(*evt);
+}
+
 #endif
diff --git a/core/bformatdec.cpp b/core/bformatdec.cpp
index 129b9976..a308e185 100644
--- a/core/bformatdec.cpp
+++ b/core/bformatdec.cpp
@@ -16,33 +16,45 @@
 #include "opthelpers.h"
 
 
+namespace {
+
+template<typename... Ts>
+struct overloaded : Ts... { using Ts::operator()...; };
+
+template<typename... Ts>
+overloaded(Ts...) -> overloaded<Ts...>;
+
+} // namespace
+
 BFormatDec::BFormatDec(const size_t inchans, const al::span<const ChannelDec> coeffs,
     const al::span<const ChannelDec> coeffslf, const float xover_f0norm,
     std::unique_ptr<FrontStablizer> stablizer)
-    : mStablizer{std::move(stablizer)}, mDualBand{!coeffslf.empty()}, mChannelDec{inchans}
+    : mStablizer{std::move(stablizer)}
 {
-    if(!mDualBand)
+    if(coeffslf.empty())
     {
-        for(size_t j{0};j < mChannelDec.size();++j)
+        auto &decoder = mChannelDec.emplace<std::vector<ChannelDecoderSingle>>(inchans);
+        for(size_t j{0};j < decoder.size();++j)
         {
-            float *outcoeffs{mChannelDec[j].mGains.Single};
+            float *outcoeffs{decoder[j].mGains};
             for(const ChannelDec &incoeffs : coeffs)
                 *(outcoeffs++) = incoeffs[j];
         }
     }
     else
     {
-        mChannelDec[0].mXOver.init(xover_f0norm);
-        for(size_t j{1};j < mChannelDec.size();++j)
-            mChannelDec[j].mXOver = mChannelDec[0].mXOver;
+        auto &decoder = mChannelDec.emplace<std::vector<ChannelDecoderDual>>(inchans);
+        decoder[0].mXOver.init(xover_f0norm);
+        for(size_t j{1};j < decoder.size();++j)
+            decoder[j].mXOver = decoder[0].mXOver;
 
-        for(size_t j{0};j < mChannelDec.size();++j)
+        for(size_t j{0};j < decoder.size();++j)
         {
-            float *outcoeffs{mChannelDec[j].mGains.Dual[sHFBand]};
+            float *outcoeffs{decoder[j].mGains[sHFBand]};
             for(const ChannelDec &incoeffs : coeffs)
                 *(outcoeffs++) = incoeffs[j];
 
-            outcoeffs = mChannelDec[j].mGains.Dual[sLFBand];
+            outcoeffs = decoder[j].mGains[sLFBand];
             for(const ChannelDec &incoeffs : coeffslf)
                 *(outcoeffs++) = incoeffs[j];
         }
@@ -55,30 +67,32 @@ void BFormatDec::process(const al::span<FloatBufferLine> OutBuffer,
 {
     ASSUME(SamplesToDo > 0);
 
-    if(mDualBand)
+    auto decode_dualband = [=](std::vector<ChannelDecoderDual> &decoder)
     {
+        auto *input = InSamples;
         const al::span<float> hfSamples{mSamples[sHFBand].data(), SamplesToDo};
         const al::span<float> lfSamples{mSamples[sLFBand].data(), SamplesToDo};
-        for(auto &chandec : mChannelDec)
+        for(auto &chandec : decoder)
         {
-            chandec.mXOver.process({InSamples->data(), SamplesToDo}, hfSamples.data(),
+            chandec.mXOver.process({input->data(), SamplesToDo}, hfSamples.data(),
                 lfSamples.data());
-            MixSamples(hfSamples, OutBuffer, chandec.mGains.Dual[sHFBand],
-                chandec.mGains.Dual[sHFBand], 0, 0);
-            MixSamples(lfSamples, OutBuffer, chandec.mGains.Dual[sLFBand],
-                chandec.mGains.Dual[sLFBand], 0, 0);
-            ++InSamples;
+            MixSamples(hfSamples, OutBuffer, chandec.mGains[sHFBand], chandec.mGains[sHFBand],0,0);
+            MixSamples(lfSamples, OutBuffer, chandec.mGains[sLFBand], chandec.mGains[sLFBand],0,0);
+            ++input;
         }
-    }
-    else
+    };
+    auto decode_singleband = [=](std::vector<ChannelDecoderSingle> &decoder)
     {
-        for(auto &chandec : mChannelDec)
+        auto *input = InSamples;
+        for(auto &chandec : decoder)
         {
-            MixSamples({InSamples->data(), SamplesToDo}, OutBuffer, chandec.mGains.Single,
-                chandec.mGains.Single, 0, 0);
-            ++InSamples;
+            MixSamples({input->data(), SamplesToDo}, OutBuffer, chandec.mGains, chandec.mGains,
+                0, 0);
+            ++input;
         }
-    }
+    };
+
+    std::visit(overloaded{decode_dualband, decode_singleband}, mChannelDec);
 }
 
 void BFormatDec::processStablize(const al::span<FloatBufferLine> OutBuffer,
diff --git a/core/bformatdec.h b/core/bformatdec.h
index 7a27a5a4..3bb7f544 100644
--- a/core/bformatdec.h
+++ b/core/bformatdec.h
@@ -4,6 +4,8 @@
 #include <array>
 #include <cstddef>
 #include <memory>
+#include <variant>
+#include <vector>
 
 #include "almalloc.h"
 #include "alspan.h"
@@ -11,7 +13,6 @@
 #include "bufferline.h"
 #include "devformat.h"
 #include "filters/splitter.h"
-#include "vector.h"
 
 struct FrontStablizer;
 
@@ -23,27 +24,20 @@ class BFormatDec {
     static constexpr size_t sLFBand{1};
     static constexpr size_t sNumBands{2};
 
-    struct ChannelDecoder {
-        union MatrixU {
-            float Dual[sNumBands][MAX_OUTPUT_CHANNELS];
-            float Single[MAX_OUTPUT_CHANNELS];
-        } mGains{};
+    struct ChannelDecoderSingle {
+        float mGains[MAX_OUTPUT_CHANNELS];
+    };
 
-        /* NOTE: BandSplitter filter is unused with single-band decoding. */
+    struct ChannelDecoderDual {
         BandSplitter mXOver;
+        float mGains[sNumBands][MAX_OUTPUT_CHANNELS];
     };
 
     alignas(16) std::array<FloatBufferLine,2> mSamples;
 
     const std::unique_ptr<FrontStablizer> mStablizer;
-    const bool mDualBand{false};
-
-    /* TODO: This should ideally be a FlexArray, since ChannelDecoder is rather
-     * small and only a few are needed (3, 4, 5, 7, typically). But that can
-     * only be used in a standard layout struct, and a std::unique_ptr member
-     * (mStablizer) causes GCC and Clang to warn it's not.
-     */
-    al::vector<ChannelDecoder> mChannelDec;
+
+    std::variant<std::vector<ChannelDecoderSingle>,std::vector<ChannelDecoderDual>> mChannelDec;
 
 public:
     BFormatDec(const size_t inchans, const al::span<const ChannelDec> coeffs,
diff --git a/core/bsinc_tables.cpp b/core/bsinc_tables.cpp
index 693645f4..41102e9a 100644
--- a/core/bsinc_tables.cpp
+++ b/core/bsinc_tables.cpp
@@ -7,48 +7,50 @@
 #include <cmath>
 #include <limits>
 #include <memory>
+#include <stddef.h>
 #include <stdexcept>
 
 #include "alnumbers.h"
-#include "core/mixer/defs.h"
+#include "alnumeric.h"
+#include "bsinc_defs.h"
+#include "resampler_limits.h"
 
 
 namespace {
 
 using uint = unsigned int;
 
+#if __cpp_lib_math_special_functions >= 201603L
+using std::cyl_bessel_i;
 
-/* This is the normalized cardinal sine (sinc) function.
- *
- *   sinc(x) = { 1,                   x = 0
- *             { sin(pi x) / (pi x),  otherwise.
- */
-constexpr double Sinc(const double x)
-{
-    constexpr double epsilon{std::numeric_limits<double>::epsilon()};
-    if(!(x > epsilon || x < -epsilon))
-        return 1.0;
-    return std::sin(al::numbers::pi*x) / (al::numbers::pi*x);
-}
+#else
 
 /* The zero-order modified Bessel function of the first kind, used for the
  * Kaiser window.
  *
  *   I_0(x) = sum_{k=0}^inf (1 / k!)^2 (x / 2)^(2 k)
  *          = sum_{k=0}^inf ((x / 2)^k / k!)^2
+ *
+ * This implementation only handles nu = 0, and isn't the most precise (it
+ * starts with the largest value and accumulates successively smaller values,
+ * compounding the rounding and precision error), but it's good enough.
  */
-constexpr double BesselI_0(const double x) noexcept
+template<typename T, typename U>
+U cyl_bessel_i(T nu, U x)
 {
+    if(nu != T{0})
+        throw std::runtime_error{"cyl_bessel_i: nu != 0"};
+
     /* Start at k=1 since k=0 is trivial. */
-    const double x2{x / 2.0};
+    const double x2{x/2.0};
     double term{1.0};
     double sum{1.0};
-    double last_sum{};
     int k{1};
 
     /* Let the integration converge until the term of the sum is no longer
      * significant.
      */
+    double last_sum{};
     do {
         const double y{x2 / k};
         ++k;
@@ -56,8 +58,21 @@ constexpr double BesselI_0(const double x) noexcept
         term *= y * y;
         sum += term;
     } while(sum != last_sum);
+    return static_cast<U>(sum);
+}
+#endif
 
-    return sum;
+/* This is the normalized cardinal sine (sinc) function.
+ *
+ *   sinc(x) = { 1,                   x = 0
+ *             { sin(pi x) / (pi x),  otherwise.
+ */
+constexpr double Sinc(const double x)
+{
+    constexpr double epsilon{std::numeric_limits<double>::epsilon()};
+    if(!(x > epsilon || x < -epsilon))
+        return 1.0;
+    return std::sin(al::numbers::pi*x) / (al::numbers::pi*x);
 }
 
 /* Calculate a Kaiser window from the given beta value and a normalized k
@@ -78,7 +93,7 @@ constexpr double Kaiser(const double beta, const double k, const double besseli_
 {
     if(!(k >= -1.0 && k <= 1.0))
         return 0.0;
-    return BesselI_0(beta * std::sqrt(1.0 - k*k)) / besseli_0_beta;
+    return cyl_bessel_i(0, beta * std::sqrt(1.0 - k*k)) / besseli_0_beta;
 }
 
 /* Calculates the (normalized frequency) transition width of the Kaiser window.
@@ -107,8 +122,6 @@ struct BSincHeader {
     double width{};
     double beta{};
     double scaleBase{};
-    double scaleRange{};
-    double besseli_0_beta{};
 
     uint a[BSincScaleCount]{};
     uint total_size{};
@@ -118,13 +131,11 @@ struct BSincHeader {
         width = CalcKaiserWidth(Rejection, Order);
         beta = CalcKaiserBeta(Rejection);
         scaleBase = width / 2.0;
-        scaleRange = 1.0 - scaleBase;
-        besseli_0_beta = BesselI_0(beta);
 
         uint num_points{Order+1};
         for(uint si{0};si < BSincScaleCount;++si)
         {
-            const double scale{scaleBase + (scaleRange * (si+1) / BSincScaleCount)};
+            const double scale{lerpd(scaleBase, 1.0, (si+1) / double{BSincScaleCount})};
             const uint a_{std::min(static_cast<uint>(num_points / 2.0 / scale), num_points)};
             const uint m{2 * a_};
 
@@ -142,26 +153,6 @@ constexpr BSincHeader bsinc12_hdr{60, 11};
 constexpr BSincHeader bsinc24_hdr{60, 23};
 
 
-/* NOTE: GCC 5 has an issue with BSincHeader objects being in an anonymous
- * namespace while also being used as non-type template parameters.
- */
-#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 6
-
-/* The number of sample points is double the a value (rounded up to a multiple
- * of 4), and scale index 0 includes the doubling for downsampling. bsinc24 is
- * currently the highest quality filter, and will use the most sample points.
- */
-constexpr uint BSincPointsMax{(bsinc24_hdr.a[0]*2 + 3) & ~3u};
-static_assert(BSincPointsMax <= MaxResamplerPadding, "MaxResamplerPadding is too small");
-
-template<size_t total_size>
-struct BSincFilterArray {
-    alignas(16) std::array<float, total_size> mTable;
-    const BSincHeader &hdr;
-
-    BSincFilterArray(const BSincHeader &hdr_) : hdr{hdr_}
-    {
-#else
 template<const BSincHeader &hdr>
 struct BSincFilterArray {
     alignas(16) std::array<float, hdr.total_size> mTable{};
@@ -170,10 +161,12 @@ struct BSincFilterArray {
     {
         constexpr uint BSincPointsMax{(hdr.a[0]*2 + 3) & ~3u};
         static_assert(BSincPointsMax <= MaxResamplerPadding, "MaxResamplerPadding is too small");
-#endif
+
         using filter_type = double[BSincPhaseCount+1][BSincPointsMax];
         auto filter = std::make_unique<filter_type[]>(BSincScaleCount);
 
+        const double besseli_0_beta{cyl_bessel_i(0, hdr.beta)};
+
         /* Calculate the Kaiser-windowed Sinc filter coefficients for each
          * scale and phase index.
          */
@@ -181,7 +174,7 @@ struct BSincFilterArray {
         {
             const uint m{hdr.a[si] * 2};
             const size_t o{(BSincPointsMax-m) / 2};
-            const double scale{hdr.scaleBase + (hdr.scaleRange * (si+1) / BSincScaleCount)};
+            const double scale{lerpd(hdr.scaleBase, 1.0, (si+1) / double{BSincScaleCount})};
             const double cutoff{scale - (hdr.scaleBase * std::max(1.0, scale*2.0))};
             const auto a = static_cast<double>(hdr.a[si]);
             const double l{a - 1.0/BSincPhaseCount};
@@ -196,7 +189,7 @@ struct BSincFilterArray {
                 for(uint i{0};i < m;++i)
                 {
                     const double x{i - phase};
-                    filter[si][pi][o+i] = Kaiser(hdr.beta, x/l, hdr.besseli_0_beta) * cutoff *
+                    filter[si][pi][o+i] = Kaiser(hdr.beta, x/l, besseli_0_beta) * cutoff *
                         Sinc(cutoff*x);
                 }
             }
@@ -265,13 +258,8 @@ struct BSincFilterArray {
     constexpr const float *getTable() const noexcept { return &mTable.front(); }
 };
 
-#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 6
-const BSincFilterArray<bsinc12_hdr.total_size> bsinc12_filter{bsinc12_hdr};
-const BSincFilterArray<bsinc24_hdr.total_size> bsinc24_filter{bsinc24_hdr};
-#else
 const BSincFilterArray<bsinc12_hdr> bsinc12_filter{};
 const BSincFilterArray<bsinc24_hdr> bsinc24_filter{};
-#endif
 
 template<typename T>
 constexpr BSincTable GenerateBSincTable(const T &filter)
@@ -279,7 +267,7 @@ constexpr BSincTable GenerateBSincTable(const T &filter)
     BSincTable ret{};
     const BSincHeader &hdr = filter.getHeader();
     ret.scaleBase = static_cast<float>(hdr.scaleBase);
-    ret.scaleRange = static_cast<float>(1.0 / hdr.scaleRange);
+    ret.scaleRange = static_cast<float>(1.0 / (1.0 - hdr.scaleBase));
     for(size_t i{0};i < BSincScaleCount;++i)
         ret.m[i] = ((hdr.a[i]*2) + 3) & ~3u;
     ret.filterOffset[0] = 0;
diff --git a/core/buffer_storage.cpp b/core/buffer_storage.cpp
index 98ca2c1b..6ffab124 100644
--- a/core/buffer_storage.cpp
+++ b/core/buffer_storage.cpp
@@ -12,6 +12,7 @@ const char *NameFromFormat(FmtType type) noexcept
     {
     case FmtUByte: return "UInt8";
     case FmtShort: return "Int16";
+    case FmtInt: return "Int32";
     case FmtFloat: return "Float";
     case FmtDouble: return "Double";
     case FmtMulaw: return "muLaw";
@@ -49,6 +50,7 @@ uint BytesFromFmt(FmtType type) noexcept
     {
     case FmtUByte: return sizeof(uint8_t);
     case FmtShort: return sizeof(int16_t);
+    case FmtInt: return sizeof(int32_t);
     case FmtFloat: return sizeof(float);
     case FmtDouble: return sizeof(double);
     case FmtMulaw: return sizeof(uint8_t);
diff --git a/core/buffer_storage.h b/core/buffer_storage.h
index 282d5b53..3b581b5e 100644
--- a/core/buffer_storage.h
+++ b/core/buffer_storage.h
@@ -2,8 +2,8 @@
 #define CORE_BUFFER_STORAGE_H
 
 #include <atomic>
+#include <cstddef>
 
-#include "albyte.h"
 #include "alnumeric.h"
 #include "alspan.h"
 #include "ambidefs.h"
@@ -15,6 +15,7 @@ using uint = unsigned int;
 enum FmtType : unsigned char {
     FmtUByte,
     FmtShort,
+    FmtInt,
     FmtFloat,
     FmtDouble,
     FmtMulaw,
@@ -85,7 +86,7 @@ struct BufferStorage {
     CallbackType mCallback{nullptr};
     void *mUserData{nullptr};
 
-    al::span<al::byte> mData;
+    al::span<std::byte> mData;
 
     uint mSampleRate{0u};
     FmtChannels mChannels{FmtMono};
diff --git a/core/context.cpp b/core/context.cpp
index d68d8327..2ebbc7b1 100644
--- a/core/context.cpp
+++ b/core/context.cpp
@@ -2,7 +2,10 @@
 #include "config.h"
 
 #include <cassert>
+#include <limits>
 #include <memory>
+#include <stdexcept>
+#include <utility>
 
 #include "async_event.h"
 #include "context.h"
@@ -51,7 +54,7 @@ ContextBase::~ContextBase()
 
     if(EffectSlotArray *curarray{mActiveAuxSlots.exchange(nullptr, std::memory_order_relaxed)})
     {
-        al::destroy_n(curarray->end(), curarray->size());
+        std::destroy_n(curarray->end(), curarray->size());
         delete curarray;
     }
 
@@ -63,12 +66,14 @@ ContextBase::~ContextBase()
         auto evt_vec = mAsyncEvents->getReadVector();
         if(evt_vec.first.len > 0)
         {
-            al::destroy_n(reinterpret_cast<AsyncEvent*>(evt_vec.first.buf), evt_vec.first.len);
+            std::destroy_n(std::launder(reinterpret_cast<AsyncEvent*>(evt_vec.first.buf)),
+                evt_vec.first.len);
             count += evt_vec.first.len;
         }
         if(evt_vec.second.len > 0)
         {
-            al::destroy_n(reinterpret_cast<AsyncEvent*>(evt_vec.second.buf), evt_vec.second.len);
+            std::destroy_n(std::launder(reinterpret_cast<AsyncEvent*>(evt_vec.second.buf)),
+                evt_vec.second.len);
             count += evt_vec.second.len;
         }
         if(count > 0)
diff --git a/core/context.h b/core/context.h
index 9723eac3..ccb7dd3b 100644
--- a/core/context.h
+++ b/core/context.h
@@ -7,15 +7,15 @@
 #include <cstddef>
 #include <memory>
 #include <thread>
+#include <vector>
 
 #include "almalloc.h"
+#include "alsem.h"
 #include "alspan.h"
 #include "async_event.h"
 #include "atomic.h"
-#include "bufferline.h"
-#include "threads.h"
+#include "opthelpers.h"
 #include "vecmat.h"
-#include "vector.h"
 
 struct DeviceBase;
 struct EffectSlot;
@@ -25,8 +25,6 @@ struct Voice;
 struct VoiceChange;
 struct VoicePropsItem;
 
-using uint = unsigned int;
-
 
 constexpr float SpeedOfSoundMetersPerSec{343.3f};
 
@@ -137,7 +135,7 @@ struct ContextBase {
     std::thread mEventThread;
     al::semaphore mEventSem;
     std::unique_ptr<RingBuffer> mAsyncEvents;
-    using AsyncEventBitset = std::bitset<AsyncEvent::UserEventCount>;
+    using AsyncEventBitset = std::bitset<al::to_underlying(AsyncEnableBits::Count)>;
     std::atomic<AsyncEventBitset> mEnabledEvts{0u};
 
     /* Asynchronous voice change actions are processed as a linked list of
@@ -146,20 +144,20 @@ struct ContextBase {
      * in clusters that are stored in a vector for easy automatic cleanup.
      */
     using VoiceChangeCluster = std::unique_ptr<VoiceChange[]>;
-    al::vector<VoiceChangeCluster> mVoiceChangeClusters;
+    std::vector<VoiceChangeCluster> mVoiceChangeClusters;
 
     using VoiceCluster = std::unique_ptr<Voice[]>;
-    al::vector<VoiceCluster> mVoiceClusters;
+    std::vector<VoiceCluster> mVoiceClusters;
 
     using VoicePropsCluster = std::unique_ptr<VoicePropsItem[]>;
-    al::vector<VoicePropsCluster> mVoicePropClusters;
+    std::vector<VoicePropsCluster> mVoicePropClusters;
 
 
     static constexpr size_t EffectSlotClusterSize{4};
     EffectSlot *getEffectSlot();
 
     using EffectSlotCluster = std::unique_ptr<EffectSlot[]>;
-    al::vector<EffectSlotCluster> mEffectSlotClusters;
+    std::vector<EffectSlotCluster> mEffectSlotClusters;
 
 
     ContextBase(DeviceBase *device);
diff --git a/core/converter.cpp b/core/converter.cpp
index a5141448..5b2f3e15 100644
--- a/core/converter.cpp
+++ b/core/converter.cpp
@@ -6,12 +6,12 @@
 #include <algorithm>
 #include <cassert>
 #include <cmath>
+#include <cstddef>
 #include <cstdint>
 #include <iterator>
 #include <limits.h>
 
 #include "albit.h"
-#include "albyte.h"
 #include "alnumeric.h"
 #include "fpu_ctrl.h"
 
@@ -219,7 +219,7 @@ uint SampleConverter::convert(const void **src, uint *srcframes, void *dst, uint
     const uint SrcFrameSize{static_cast<uint>(mChan.size()) * mSrcTypeSize};
     const uint DstFrameSize{static_cast<uint>(mChan.size()) * mDstTypeSize};
     const uint increment{mIncrement};
-    auto SamplesIn = static_cast<const al::byte*>(*src);
+    auto SamplesIn = static_cast<const std::byte*>(*src);
     uint NumSrcSamples{*srcframes};
 
     FPUCtl mixer_mode{};
@@ -265,8 +265,8 @@ uint SampleConverter::convert(const void **src, uint *srcframes, void *dst, uint
 
         for(size_t chan{0u};chan < mChan.size();chan++)
         {
-            const al::byte *SrcSamples{SamplesIn + mSrcTypeSize*chan};
-            al::byte *DstSamples = static_cast<al::byte*>(dst) + mDstTypeSize*chan;
+            const std::byte *SrcSamples{SamplesIn + mSrcTypeSize*chan};
+            std::byte *DstSamples = static_cast<std::byte*>(dst) + mDstTypeSize*chan;
 
             /* Load the previous samples into the source data first, then the
              * new samples from the input buffer.
@@ -299,7 +299,7 @@ uint SampleConverter::convert(const void **src, uint *srcframes, void *dst, uint
         SamplesIn += SrcFrameSize*srcread;
         NumSrcSamples -= srcread;
 
-        dst = static_cast<al::byte*>(dst) + DstFrameSize*DstSize;
+        dst = static_cast<std::byte*>(dst) + DstFrameSize*DstSize;
         pos += DstSize;
     }
 
@@ -309,6 +309,98 @@ uint SampleConverter::convert(const void **src, uint *srcframes, void *dst, uint
     return pos;
 }
 
+uint SampleConverter::convertPlanar(const void **src, uint *srcframes, void *const*dst, uint dstframes)
+{
+    const uint increment{mIncrement};
+    uint NumSrcSamples{*srcframes};
+
+    FPUCtl mixer_mode{};
+    uint pos{0};
+    while(pos < dstframes && NumSrcSamples > 0)
+    {
+        const uint prepcount{mSrcPrepCount};
+        const uint readable{minu(NumSrcSamples, BufferLineSize - prepcount)};
+
+        if(prepcount < MaxResamplerPadding && MaxResamplerPadding-prepcount >= readable)
+        {
+            /* Not enough input samples to generate an output sample. Store
+             * what we're given for later.
+             */
+            for(size_t chan{0u};chan < mChan.size();chan++)
+            {
+                LoadSamples(&mChan[chan].PrevSamples[prepcount],
+                    static_cast<const std::byte*>(src[chan]), 1, mSrcType, readable);
+                src[chan] = static_cast<const std::byte*>(src[chan]) + mSrcTypeSize*readable;
+            }
+
+            mSrcPrepCount = prepcount + readable;
+            NumSrcSamples = 0;
+            break;
+        }
+
+        float *RESTRICT SrcData{mSrcSamples};
+        float *RESTRICT DstData{mDstSamples};
+        uint DataPosFrac{mFracOffset};
+        uint64_t DataSize64{prepcount};
+        DataSize64 += readable;
+        DataSize64 -= MaxResamplerPadding;
+        DataSize64 <<= MixerFracBits;
+        DataSize64 -= DataPosFrac;
+
+        /* If we have a full prep, we can generate at least one sample. */
+        auto DstSize = static_cast<uint>(
+            clampu64((DataSize64 + increment-1)/increment, 1, BufferLineSize));
+        DstSize = minu(DstSize, dstframes-pos);
+
+        const uint DataPosEnd{DstSize*increment + DataPosFrac};
+        const uint SrcDataEnd{DataPosEnd>>MixerFracBits};
+
+        assert(prepcount+readable >= SrcDataEnd);
+        const uint nextprep{minu(prepcount + readable - SrcDataEnd, MaxResamplerPadding)};
+
+        for(size_t chan{0u};chan < mChan.size();chan++)
+        {
+            /* Load the previous samples into the source data first, then the
+             * new samples from the input buffer.
+             */
+            std::copy_n(mChan[chan].PrevSamples, prepcount, SrcData);
+            LoadSamples(SrcData + prepcount, src[chan], 1, mSrcType, readable);
+
+            /* Store as many prep samples for next time as possible, given the
+             * number of output samples being generated.
+             */
+            std::copy_n(SrcData+SrcDataEnd, nextprep, mChan[chan].PrevSamples);
+            std::fill(std::begin(mChan[chan].PrevSamples)+nextprep,
+                std::end(mChan[chan].PrevSamples), 0.0f);
+
+            /* Now resample, and store the result in the output buffer. */
+            mResample(&mState, SrcData+MaxResamplerEdge, DataPosFrac, increment,
+                {DstData, DstSize});
+
+            std::byte *DstSamples = static_cast<std::byte*>(dst[chan]) + pos*mDstTypeSize;
+            StoreSamples(DstSamples, DstData, 1, mDstType, DstSize);
+        }
+
+        /* Update the number of prep samples still available, as well as the
+         * fractional offset.
+         */
+        mSrcPrepCount = nextprep;
+        mFracOffset = DataPosEnd & MixerFracMask;
+
+        /* Update the src and dst pointers in case there's still more to do. */
+        const uint srcread{minu(NumSrcSamples, SrcDataEnd + mSrcPrepCount - prepcount)};
+        for(size_t chan{0u};chan < mChan.size();chan++)
+            src[chan] = static_cast<const std::byte*>(src[chan]) + mSrcTypeSize*srcread;
+        NumSrcSamples -= srcread;
+
+        pos += DstSize;
+    }
+
+    *srcframes = NumSrcSamples;
+
+    return pos;
+}
+
 
 void ChannelConverter::convert(const void *src, float *dst, uint frames) const
 {
diff --git a/core/converter.h b/core/converter.h
index 01becea2..49ca124d 100644
--- a/core/converter.h
+++ b/core/converter.h
@@ -36,6 +36,7 @@ struct SampleConverter {
     SampleConverter(size_t numchans) : mChan{numchans} { }
 
     uint convert(const void **src, uint *srcframes, void *dst, uint dstframes);
+    uint convertPlanar(const void **src, uint *srcframes, void *const*dst, uint dstframes);
     uint availableOut(uint srcframes) const;
 
     using SampleOffset = std::chrono::duration<int64_t, std::ratio<1,MixerFracOne>>;
diff --git a/core/cpu_caps.cpp b/core/cpu_caps.cpp
index d4b4d86c..1a064cf4 100644
--- a/core/cpu_caps.cpp
+++ b/core/cpu_caps.cpp
@@ -17,6 +17,7 @@
 #include <intrin.h>
 #endif
 
+#include <algorithm>
 #include <array>
 #include <cctype>
 #include <string>
@@ -50,14 +51,14 @@ inline std::array<reg_type,4> get_cpuid(unsigned int f)
 
 } // namespace
 
-al::optional<CPUInfo> GetCPUInfo()
+std::optional<CPUInfo> GetCPUInfo()
 {
     CPUInfo ret;
 
 #ifdef CAN_GET_CPUID
     auto cpuregs = get_cpuid(0);
     if(cpuregs[0] == 0)
-        return al::nullopt;
+        return std::nullopt;
 
     const reg_type maxfunc{cpuregs[0]};
 
diff --git a/core/cpu_caps.h b/core/cpu_caps.h
index ffd671d0..0826a49b 100644
--- a/core/cpu_caps.h
+++ b/core/cpu_caps.h
@@ -1,10 +1,9 @@
 #ifndef CORE_CPU_CAPS_H
 #define CORE_CPU_CAPS_H
 
+#include <optional>
 #include <string>
 
-#include "aloptional.h"
-
 
 extern int CPUCapFlags;
 enum {
@@ -21,6 +20,6 @@ struct CPUInfo {
     int mCaps{0};
 };
 
-al::optional<CPUInfo> GetCPUInfo();
+std::optional<CPUInfo> GetCPUInfo();
 
 #endif /* CORE_CPU_CAPS_H */
diff --git a/core/cubic_tables.cpp b/core/cubic_tables.cpp
index 73ec6b3f..5e7aafad 100644
--- a/core/cubic_tables.cpp
+++ b/core/cubic_tables.cpp
@@ -1,24 +1,16 @@
 
 #include "cubic_tables.h"
 
-#include <algorithm>
 #include <array>
-#include <cassert>
-#include <cmath>
-#include <limits>
-#include <memory>
-#include <stdexcept>
+#include <stddef.h>
 
-#include "alnumbers.h"
-#include "core/mixer/defs.h"
+#include "cubic_defs.h"
 
 
 namespace {
 
-using uint = unsigned int;
-
 struct SplineFilterArray {
-    alignas(16) CubicCoefficients mTable[CubicPhaseCount]{};
+    alignas(16) std::array<CubicCoefficients,CubicPhaseCount> mTable{};
 
     constexpr SplineFilterArray()
     {
@@ -49,7 +41,7 @@ struct SplineFilterArray {
         mTable[pi].mDeltas[3] = -mTable[pi].mCoeffs[3];
     }
 
-    constexpr auto getTable() const noexcept { return al::as_span(mTable); }
+    constexpr auto& getTable() const noexcept { return mTable; }
 };
 
 constexpr SplineFilterArray SplineFilter{};
diff --git a/core/dbus_wrap.cpp b/core/dbus_wrap.cpp
index 7f221706..48419566 100644
--- a/core/dbus_wrap.cpp
+++ b/core/dbus_wrap.cpp
@@ -8,20 +8,16 @@
 #include <mutex>
 #include <type_traits>
 
+#include "albit.h"
 #include "logging.h"
 
 
-void *dbus_handle{nullptr};
-#define DECL_FUNC(x) decltype(p##x) p##x{};
-DBUS_FUNCTIONS(DECL_FUNC)
-#undef DECL_FUNC
-
 void PrepareDBus()
 {
     static constexpr char libname[] = "libdbus-1.so.3";
 
     auto load_func = [](auto &f, const char *name) -> void
-    { f = reinterpret_cast<std::remove_reference_t<decltype(f)>>(GetSymbol(dbus_handle, name)); };
+    { f = al::bit_cast<std::remove_reference_t<decltype(f)>>(GetSymbol(dbus_handle, name)); };
 #define LOAD_FUNC(x) do {                         \
     load_func(p##x, #x);                          \
     if(!p##x)                                     \
diff --git a/core/dbus_wrap.h b/core/dbus_wrap.h
index 09eaacf9..65f08942 100644
--- a/core/dbus_wrap.h
+++ b/core/dbus_wrap.h
@@ -28,8 +28,8 @@ MAGIC(dbus_message_iter_get_arg_type) \
 MAGIC(dbus_message_iter_get_basic) \
 MAGIC(dbus_set_error_from_message)
 
-extern void *dbus_handle;
-#define DECL_FUNC(x) extern decltype(x) *p##x;
+inline void *dbus_handle{};
+#define DECL_FUNC(x) inline decltype(x) *p##x{};
 DBUS_FUNCTIONS(DECL_FUNC)
 #undef DECL_FUNC
 
diff --git a/core/device.h b/core/device.h
index 9aaf7adb..b1ffc9ce 100644
--- a/core/device.h
+++ b/core/device.h
@@ -1,14 +1,13 @@
 #ifndef CORE_DEVICE_H
 #define CORE_DEVICE_H
 
-#include <stddef.h>
-
 #include <array>
 #include <atomic>
 #include <bitset>
 #include <chrono>
 #include <memory>
-#include <mutex>
+#include <stddef.h>
+#include <stdint.h>
 #include <string>
 
 #include "almalloc.h"
@@ -43,20 +42,20 @@ using uint = unsigned int;
 #define DEFAULT_NUM_UPDATES  3
 
 
-enum class DeviceType : unsigned char {
+enum class DeviceType : uint8_t {
     Playback,
     Capture,
     Loopback
 };
 
 
-enum class RenderMode : unsigned char {
+enum class RenderMode : uint8_t {
     Normal,
     Pairwise,
     Hrtf
 };
 
-enum class StereoEncoding : unsigned char {
+enum class StereoEncoding : uint8_t {
     Basic,
     Uhj,
     Hrtf,
@@ -95,7 +94,7 @@ struct DistanceComp {
 };
 
 
-constexpr uint InvalidChannelIndex{~0u};
+constexpr uint8_t InvalidChannelIndex{static_cast<uint8_t>(~0u)};
 
 struct BFChannelConfig {
     float Scale;
@@ -113,8 +112,8 @@ struct MixParams {
      * source is expected to be a 3D ACN/N3D ambisonic buffer, and for each
      * channel [0...count), the given functor is called with the source channel
      * index, destination channel index, and the gain for that channel. If the
-     * destination channel is INVALID_CHANNEL_INDEX, the given source channel
-     * is not used for output.
+     * destination channel is InvalidChannelIndex, the given source channel is
+     * not used for output.
      */
     template<typename F>
     void setAmbiMixParams(const MixParams &inmix, const float gainbase, F func) const
@@ -123,14 +122,14 @@ struct MixParams {
         const size_t numOut{Buffer.size()};
         for(size_t i{0};i < numIn;++i)
         {
-            auto idx = InvalidChannelIndex;
-            auto gain = 0.0f;
+            uint8_t idx{InvalidChannelIndex};
+            float gain{0.0f};
 
             for(size_t j{0};j < numOut;++j)
             {
                 if(AmbiMap[j].Index == inmix.AmbiMap[i].Index)
                 {
-                    idx = static_cast<uint>(j);
+                    idx = static_cast<uint8_t>(j);
                     gain = AmbiMap[j].Scale * gainbase;
                     break;
                 }
@@ -142,7 +141,7 @@ struct MixParams {
 
 struct RealMixParams {
     al::span<const InputRemixMap> RemixMap;
-    std::array<uint,MaxChannels> ChannelIndex{};
+    std::array<uint8_t,MaxChannels> ChannelIndex{};
 
     al::span<FloatBufferLine> Buffer;
 };
@@ -166,6 +165,11 @@ enum {
     // ear buds, etc).
     DirectEar,
 
+    /* Specifies if output is using speaker virtualization (e.g. Windows
+     * Spatial Audio).
+     */
+    Virtualization,
+
     DeviceFlagsCount
 };
 
@@ -325,9 +329,9 @@ struct DeviceBase {
 
     /**
      * Returns the index for the given channel name (e.g. FrontCenter), or
-     * INVALID_CHANNEL_INDEX if it doesn't exist.
+     * InvalidChannelIndex if it doesn't exist.
      */
-    uint channelIdxByName(Channel chan) const noexcept
+    uint8_t channelIdxByName(Channel chan) const noexcept
     { return RealOut.ChannelIndex[chan]; }
 
     DISABLE_ALLOC()
diff --git a/core/effects/base.h b/core/effects/base.h
index 4ee19f37..83df7cf0 100644
--- a/core/effects/base.h
+++ b/core/effects/base.h
@@ -1,9 +1,9 @@
 #ifndef CORE_EFFECTS_BASE_H
 #define CORE_EFFECTS_BASE_H
 
+#include <array>
 #include <stddef.h>
 
-#include "albyte.h"
 #include "almalloc.h"
 #include "alspan.h"
 #include "atomic.h"
@@ -166,6 +166,11 @@ union EffectProps {
     struct {
         float Gain;
     } Dedicated;
+
+    struct {
+        std::array<float,3> OrientAt;
+        std::array<float,3> OrientUp;
+    } Convolution;
 };
 
 
diff --git a/core/filters/nfc.h b/core/filters/nfc.h
index 33f67a5f..4b8e68b5 100644
--- a/core/filters/nfc.h
+++ b/core/filters/nfc.h
@@ -39,7 +39,7 @@ public:
      * w1 = speed_of_sound / (control_distance * sample_rate);
      *
      * Generally speaking, the control distance should be approximately the
-     * average speaker distance, or based on the reference delay if outputing
+     * average speaker distance, or based on the reference delay if outputting
      * NFC-HOA. It must not be negative, 0, or infinite. The source distance
      * should not be too small relative to the control distance.
      */
diff --git a/core/fmt_traits.h b/core/fmt_traits.h
index f797f836..02473014 100644
--- a/core/fmt_traits.h
+++ b/core/fmt_traits.h
@@ -1,10 +1,9 @@
 #ifndef CORE_FMT_TRAITS_H
 #define CORE_FMT_TRAITS_H
 
-#include <stddef.h>
+#include <cstddef>
 #include <stdint.h>
 
-#include "albyte.h"
 #include "buffer_storage.h"
 
 
@@ -22,36 +21,43 @@ struct FmtTypeTraits<FmtUByte> {
     using Type = uint8_t;
 
     template<typename OutT>
-    static constexpr inline OutT to(const Type val) noexcept
-    { return val*OutT{1.0/128.0} - OutT{1.0}; }
+    static constexpr OutT to(const Type val) noexcept { return val*OutT{1.0/128.0} - OutT{1.0}; }
 };
 template<>
 struct FmtTypeTraits<FmtShort> {
     using Type = int16_t;
 
     template<typename OutT>
-    static constexpr inline OutT to(const Type val) noexcept { return val*OutT{1.0/32768.0}; }
+    static constexpr OutT to(const Type val) noexcept { return val*OutT{1.0/32768.0}; }
+};
+template<>
+struct FmtTypeTraits<FmtInt> {
+    using Type = int32_t;
+
+    template<typename OutT>
+    static constexpr OutT to(const Type val) noexcept
+    { return static_cast<OutT>(val)*OutT{1.0/2147483648.0}; }
 };
 template<>
 struct FmtTypeTraits<FmtFloat> {
     using Type = float;
 
     template<typename OutT>
-    static constexpr inline OutT to(const Type val) noexcept { return val; }
+    static constexpr OutT to(const Type val) noexcept { return val; }
 };
 template<>
 struct FmtTypeTraits<FmtDouble> {
     using Type = double;
 
     template<typename OutT>
-    static constexpr inline OutT to(const Type val) noexcept { return static_cast<OutT>(val); }
+    static constexpr OutT to(const Type val) noexcept { return static_cast<OutT>(val); }
 };
 template<>
 struct FmtTypeTraits<FmtMulaw> {
     using Type = uint8_t;
 
     template<typename OutT>
-    static constexpr inline OutT to(const Type val) noexcept
+    static constexpr OutT to(const Type val) noexcept
     { return muLawDecompressionTable[val] * OutT{1.0/32768.0}; }
 };
 template<>
@@ -59,14 +65,14 @@ struct FmtTypeTraits<FmtAlaw> {
     using Type = uint8_t;
 
     template<typename OutT>
-    static constexpr inline OutT to(const Type val) noexcept
+    static constexpr OutT to(const Type val) noexcept
     { return aLawDecompressionTable[val] * OutT{1.0/32768.0}; }
 };
 
 
 template<FmtType SrcType, typename DstT>
-inline void LoadSampleArray(DstT *RESTRICT dst, const al::byte *src, const size_t srcstep,
-    const size_t samples) noexcept
+inline void LoadSampleArray(DstT *RESTRICT dst, const std::byte *src, const std::size_t srcstep,
+    const std::size_t samples) noexcept
 {
     using TypeTraits = FmtTypeTraits<SrcType>;
     using SampleType = typename TypeTraits::Type;
diff --git a/core/fpu_ctrl.cpp b/core/fpu_ctrl.cpp
index 0cf0d6e7..435855ad 100644
--- a/core/fpu_ctrl.cpp
+++ b/core/fpu_ctrl.cpp
@@ -8,38 +8,71 @@
 #endif
 #ifdef HAVE_SSE_INTRINSICS
 #include <emmintrin.h>
-#ifndef _MM_DENORMALS_ZERO_MASK
+#elif defined(HAVE_SSE)
+#include <xmmintrin.h>
+#endif
+
+#if defined(HAVE_SSE) && !defined(_MM_DENORMALS_ZERO_MASK)
 /* Some headers seem to be missing these? */
 #define _MM_DENORMALS_ZERO_MASK 0x0040u
 #define _MM_DENORMALS_ZERO_ON 0x0040u
 #endif
-#endif
 
 #include "cpu_caps.h"
 
+namespace {
 
-void FPUCtl::enter() noexcept
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+[[gnu::target("sse")]]
+#endif
+[[maybe_unused]]
+void disable_denormals(unsigned int *state [[maybe_unused]])
 {
-    if(this->in_mode) return;
-
 #if defined(HAVE_SSE_INTRINSICS)
-    this->sse_state = _mm_getcsr();
-    unsigned int sseState{this->sse_state};
+    *state = _mm_getcsr();
+    unsigned int sseState{*state};
     sseState &= ~(_MM_FLUSH_ZERO_MASK | _MM_DENORMALS_ZERO_MASK);
     sseState |= _MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON;
     _mm_setcsr(sseState);
 
-#elif defined(__GNUC__) && defined(HAVE_SSE)
+#elif defined(HAVE_SSE)
 
-    if((CPUCapFlags&CPU_CAP_SSE))
+    *state = _mm_getcsr();
+    unsigned int sseState{*state};
+    sseState &= ~_MM_FLUSH_ZERO_MASK;
+    sseState |= _MM_FLUSH_ZERO_ON;
+    if((CPUCapFlags&CPU_CAP_SSE2))
     {
-        __asm__ __volatile__("stmxcsr %0" : "=m" (*&this->sse_state));
-        unsigned int sseState{this->sse_state};
-        sseState |= 0x8000; /* set flush-to-zero */
-        if((CPUCapFlags&CPU_CAP_SSE2))
-            sseState |= 0x0040; /* set denormals-are-zero */
-        __asm__ __volatile__("ldmxcsr %0" : : "m" (*&sseState));
+        sseState &= ~_MM_DENORMALS_ZERO_MASK;
+        sseState |= _MM_DENORMALS_ZERO_ON;
     }
+    _mm_setcsr(sseState);
+#endif
+}
+
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+[[gnu::target("sse")]]
+#endif
+[[maybe_unused]]
+void reset_fpu(unsigned int state [[maybe_unused]])
+{
+#if defined(HAVE_SSE_INTRINSICS) || defined(HAVE_SSE)
+    _mm_setcsr(state);
+#endif
+}
+
+} // namespace
+
+
+void FPUCtl::enter() noexcept
+{
+    if(this->in_mode) return;
+
+#if defined(HAVE_SSE_INTRINSICS)
+    disable_denormals(&this->sse_state);
+#elif defined(HAVE_SSE)
+    if((CPUCapFlags&CPU_CAP_SSE))
+        disable_denormals(&this->sse_state);
 #endif
 
     this->in_mode = true;
@@ -50,12 +83,10 @@ void FPUCtl::leave() noexcept
     if(!this->in_mode) return;
 
 #if defined(HAVE_SSE_INTRINSICS)
-    _mm_setcsr(this->sse_state);
-
-#elif defined(__GNUC__) && defined(HAVE_SSE)
-
+    reset_fpu(this->sse_state);
+#elif defined(HAVE_SSE)
     if((CPUCapFlags&CPU_CAP_SSE))
-        __asm__ __volatile__("ldmxcsr %0" : : "m" (*&this->sse_state));
+        reset_fpu(this->sse_state);
 #endif
     this->in_mode = false;
 }
diff --git a/core/helpers.cpp b/core/helpers.cpp
index 99cf009c..5a996eee 100644
--- a/core/helpers.cpp
+++ b/core/helpers.cpp
@@ -3,29 +3,27 @@
 
 #include "helpers.h"
 
+#if defined(_WIN32)
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#endif
+
 #include <algorithm>
-#include <cerrno>
-#include <cstdarg>
 #include <cstdlib>
-#include <cstdio>
 #include <cstring>
-#include <mutex>
 #include <limits>
+#include <mutex>
+#include <optional>
 #include <string>
-#include <tuple>
 
-#include "almalloc.h"
-#include "alfstream.h"
 #include "alnumeric.h"
-#include "aloptional.h"
 #include "alspan.h"
 #include "alstring.h"
 #include "logging.h"
 #include "strutils.h"
-#include "vector.h"
 
 
-/* Mixing thread piority level */
+/* Mixing thread priority level */
 int RTPrioLevel{1};
 
 /* Allow reducing the process's RTTime limit for RTKit. */
@@ -34,14 +32,15 @@ bool AllowRTTimeLimit{true};
 
 #ifdef _WIN32
 
+#include <cctype>
 #include <shlobj.h>
 
 const PathNamePair &GetProcBinary()
 {
-    static al::optional<PathNamePair> procbin;
+    static std::optional<PathNamePair> procbin;
     if(procbin) return *procbin;
-
-    auto fullpath = al::vector<WCHAR>(256);
+#if !defined(ALSOFT_UWP)
+    auto fullpath = std::vector<WCHAR>(256);
     DWORD len{GetModuleFileNameW(nullptr, fullpath.data(), static_cast<DWORD>(fullpath.size()))};
     while(len == fullpath.size())
     {
@@ -58,7 +57,16 @@ const PathNamePair &GetProcBinary()
     fullpath.resize(len);
     if(fullpath.back() != 0)
         fullpath.push_back(0);
-
+#else
+    auto exePath               = __wargv[0];
+    if (!exePath)
+    {
+        ERR("Failed to get process name: error %lu\n", GetLastError());
+        procbin.emplace();
+        return *procbin;
+    }
+    std::vector<WCHAR> fullpath{exePath, exePath + wcslen(exePath) + 1};
+#endif
     std::replace(fullpath.begin(), fullpath.end(), '/', '\\');
     auto sep = std::find(fullpath.rbegin()+1, fullpath.rend(), '\\');
     if(sep != fullpath.rend())
@@ -75,16 +83,16 @@ const PathNamePair &GetProcBinary()
 
 namespace {
 
-void DirectorySearch(const char *path, const char *ext, al::vector<std::string> *const results)
+void DirectorySearch(const char *path, const char *ext, std::vector<std::string> *const results)
 {
     std::string pathstr{path};
     pathstr += "\\*";
     pathstr += ext;
     TRACE("Searching %s\n", pathstr.c_str());
 
-    std::wstring wpath{utf8_to_wstr(pathstr.c_str())};
+    std::wstring wpath{utf8_to_wstr(pathstr)};
     WIN32_FIND_DATAW fdata;
-    HANDLE hdl{FindFirstFileW(wpath.c_str(), &fdata)};
+    HANDLE hdl{FindFirstFileExW(wpath.c_str(), FindExInfoStandard, &fdata, FindExSearchNameMatch, NULL, 0)};
     if(hdl == INVALID_HANDLE_VALUE) return;
 
     const auto base = results->size();
@@ -97,7 +105,6 @@ void DirectorySearch(const char *path, const char *ext, al::vector<std::string>
         str += wstr_to_utf8(fdata.cFileName);
     } while(FindNextFileW(hdl, &fdata));
     FindClose(hdl);
-
     const al::span<std::string> newlist{results->data()+base, results->size()-base};
     std::sort(newlist.begin(), newlist.end());
     for(const auto &name : newlist)
@@ -106,16 +113,16 @@ void DirectorySearch(const char *path, const char *ext, al::vector<std::string>
 
 } // namespace
 
-al::vector<std::string> SearchDataFiles(const char *ext, const char *subdir)
+std::vector<std::string> SearchDataFiles(const char *ext, const char *subdir)
 {
-    auto is_slash = [](int c) noexcept -> int { return (c == '\\' || c == '/'); };
+    auto is_slash = [](int c) noexcept { return (c == '\\' || c == '/'); };
 
     static std::mutex search_lock;
     std::lock_guard<std::mutex> _{search_lock};
 
     /* If the path is absolute, use it directly. */
-    al::vector<std::string> results;
-    if(isalpha(subdir[0]) && subdir[1] == ':' && is_slash(subdir[2]))
+    std::vector<std::string> results;
+    if(std::isalpha(subdir[0]) && subdir[1] == ':' && is_slash(subdir[2]))
     {
         std::string path{subdir};
         std::replace(path.begin(), path.end(), '/', '\\');
@@ -149,9 +156,9 @@ al::vector<std::string> SearchDataFiles(const char *ext, const char *subdir)
     std::replace(path.begin(), path.end(), '/', '\\');
     DirectorySearch(path.c_str(), ext, &results);
 
+#if !defined(ALSOFT_UWP) && !defined(_GAMING_XBOX)
     /* Search the local and global data dirs. */
-    static const int ids[2]{ CSIDL_APPDATA, CSIDL_COMMON_APPDATA };
-    for(int id : ids)
+    for(auto id : std::array{CSIDL_APPDATA, CSIDL_COMMON_APPDATA})
     {
         WCHAR buffer[MAX_PATH];
         if(SHGetSpecialFolderPathW(nullptr, buffer, id, FALSE) == FALSE)
@@ -165,24 +172,27 @@ al::vector<std::string> SearchDataFiles(const char *ext, const char *subdir)
 
         DirectorySearch(path.c_str(), ext, &results);
     }
+#endif
 
     return results;
 }
 
 void SetRTPriority(void)
 {
+#if !defined(ALSOFT_UWP)
     if(RTPrioLevel > 0)
     {
         if(!SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL))
             ERR("Failed to set priority level for thread\n");
     }
+#endif
 }
 
 #else
 
-#include <sys/types.h>
-#include <unistd.h>
+#include <cerrno>
 #include <dirent.h>
+#include <unistd.h>
 #ifdef __FreeBSD__
 #include <sys/sysctl.h>
 #endif
@@ -197,7 +207,6 @@ void SetRTPriority(void)
 #include <sched.h>
 #endif
 #ifdef HAVE_RTKIT
-#include <sys/time.h>
 #include <sys/resource.h>
 
 #include "dbus_wrap.h"
@@ -209,10 +218,10 @@ void SetRTPriority(void)
 
 const PathNamePair &GetProcBinary()
 {
-    static al::optional<PathNamePair> procbin;
+    static std::optional<PathNamePair> procbin;
     if(procbin) return *procbin;
 
-    al::vector<char> pathname;
+    std::vector<char> pathname;
 #ifdef __FreeBSD__
     size_t pathlen;
     int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 };
@@ -247,7 +256,7 @@ const PathNamePair &GetProcBinary()
 #ifndef __SWITCH__
     if(pathname.empty())
     {
-        static const char SelfLinkNames[][32]{
+        const char *SelfLinkNames[]{
             "/proc/self/exe",
             "/proc/self/file",
             "/proc/curproc/exe",
@@ -295,7 +304,7 @@ const PathNamePair &GetProcBinary()
 
 namespace {
 
-void DirectorySearch(const char *path, const char *ext, al::vector<std::string> *const results)
+void DirectorySearch(const char *path, const char *ext, std::vector<std::string> *const results)
 {
     TRACE("Searching %s for *%s\n", path, ext);
     DIR *dir{opendir(path)};
@@ -331,12 +340,12 @@ void DirectorySearch(const char *path, const char *ext, al::vector<std::string>
 
 } // namespace
 
-al::vector<std::string> SearchDataFiles(const char *ext, const char *subdir)
+std::vector<std::string> SearchDataFiles(const char *ext, const char *subdir)
 {
     static std::mutex search_lock;
     std::lock_guard<std::mutex> _{search_lock};
 
-    al::vector<std::string> results;
+    std::vector<std::string> results;
     if(subdir[0] == '/')
     {
         DirectorySearch(subdir, ext, &results);
@@ -348,7 +357,7 @@ al::vector<std::string> SearchDataFiles(const char *ext, const char *subdir)
         DirectorySearch(localpath->c_str(), ext, &results);
     else
     {
-        al::vector<char> cwdbuf(256);
+        std::vector<char> cwdbuf(256);
         while(!getcwd(cwdbuf.data(), cwdbuf.size()))
         {
             if(errno != ERANGE)
@@ -425,7 +434,7 @@ al::vector<std::string> SearchDataFiles(const char *ext, const char *subdir)
 
 namespace {
 
-bool SetRTPriorityPthread(int prio)
+bool SetRTPriorityPthread(int prio [[maybe_unused]])
 {
     int err{ENOTSUP};
 #if defined(HAVE_PTHREAD_SETSCHEDPARAM) && !defined(__OpenBSD__)
@@ -445,16 +454,12 @@ bool SetRTPriorityPthread(int prio)
 #endif
         err = pthread_setschedparam(pthread_self(), SCHED_RR, &param);
     if(err == 0) return true;
-
-#else
-
-    std::ignore = prio;
 #endif
     WARN("pthread_setschedparam failed: %s (%d)\n", std::strerror(err), err);
     return false;
 }
 
-bool SetRTPriorityRTKit(int prio)
+bool SetRTPriorityRTKit(int prio [[maybe_unused]])
 {
 #ifdef HAVE_RTKIT
     if(!HasDBus())
@@ -547,7 +552,6 @@ bool SetRTPriorityRTKit(int prio)
 
 #else
 
-    std::ignore = prio;
     WARN("D-Bus not supported\n");
 #endif
     return false;
diff --git a/core/helpers.h b/core/helpers.h
index f0bfcf1b..df51c116 100644
--- a/core/helpers.h
+++ b/core/helpers.h
@@ -1,18 +1,26 @@
 #ifndef CORE_HELPERS_H
 #define CORE_HELPERS_H
 
+#include <utility>
 #include <string>
+#include <vector>
 
-#include "vector.h"
 
+struct PathNamePair {
+    std::string path, fname;
 
-struct PathNamePair { std::string path, fname; };
+    PathNamePair() = default;
+    template<typename T, typename U>
+    PathNamePair(T&& path_, U&& fname_)
+        : path{std::forward<T>(path_)}, fname{std::forward<U>(fname_)}
+    { }
+};
 const PathNamePair &GetProcBinary(void);
 
 extern int RTPrioLevel;
 extern bool AllowRTTimeLimit;
 void SetRTPriority(void);
 
-al::vector<std::string> SearchDataFiles(const char *match, const char *subdir);
+std::vector<std::string> SearchDataFiles(const char *match, const char *subdir);
 
 #endif /* CORE_HELPERS_H */
diff --git a/core/hrtf.cpp b/core/hrtf.cpp
index d5c7573a..9a13a004 100644
--- a/core/hrtf.cpp
+++ b/core/hrtf.cpp
@@ -8,6 +8,7 @@
 #include <cassert>
 #include <cctype>
 #include <cmath>
+#include <cstddef>
 #include <cstdint>
 #include <cstdio>
 #include <cstring>
@@ -16,16 +17,16 @@
 #include <memory>
 #include <mutex>
 #include <numeric>
+#include <optional>
 #include <type_traits>
 #include <utility>
+#include <vector>
 
 #include "albit.h"
-#include "albyte.h"
 #include "alfstream.h"
 #include "almalloc.h"
 #include "alnumbers.h"
 #include "alnumeric.h"
-#include "aloptional.h"
 #include "alspan.h"
 #include "ambidefs.h"
 #include "filters/splitter.h"
@@ -34,7 +35,6 @@
 #include "mixer/hrtfdefs.h"
 #include "opthelpers.h"
 #include "polyphase_resampler.h"
-#include "vector.h"
 
 
 namespace {
@@ -98,10 +98,10 @@ constexpr char magicMarker03[8]{'M','i','n','P','H','R','0','3'};
 constexpr auto PassthruCoeff = static_cast<float>(1.0/al::numbers::sqrt2);
 
 std::mutex LoadedHrtfLock;
-al::vector<LoadedHrtf> LoadedHrtfs;
+std::vector<LoadedHrtf> LoadedHrtfs;
 
 std::mutex EnumeratedHrtfLock;
-al::vector<HrtfEntry> EnumeratedHrtfs;
+std::vector<HrtfEntry> EnumeratedHrtfs;
 
 
 class databuf final : public std::streambuf {
@@ -289,13 +289,13 @@ void DirectHrtfState::build(const HrtfStore *Hrtf, const uint irSize, const bool
     mChannels[0].mSplitter.init(static_cast<float>(xover_norm));
     for(size_t i{0};i < mChannels.size();++i)
     {
-        const size_t order{AmbiIndex::OrderFromChannel()[i]};
+        const size_t order{AmbiIndex::OrderFromChannel[i]};
         mChannels[i].mSplitter = mChannels[0].mSplitter;
         mChannels[i].mHfScale = AmbiOrderHFGain[order];
     }
 
     uint min_delay{HrtfHistoryLength*HrirDelayFracOne}, max_delay{0};
-    al::vector<ImpulseResponse> impres; impres.reserve(AmbiPoints.size());
+    std::vector<ImpulseResponse> impres; impres.reserve(AmbiPoints.size());
     auto calc_res = [Hrtf,&max_delay,&min_delay](const AngularPoint &pt) -> ImpulseResponse
     {
         auto &field = Hrtf->mFields[0];
@@ -331,7 +331,7 @@ void DirectHrtfState::build(const HrtfStore *Hrtf, const uint irSize, const bool
     TRACE("Min delay: %.2f, max delay: %.2f, FIR length: %u\n",
         min_delay/double{HrirDelayFracOne}, max_delay/double{HrirDelayFracOne}, irSize);
 
-    auto tmpres = al::vector<std::array<double2,HrirLength>>(mChannels.size());
+    auto tmpres = std::vector<std::array<double2,HrirLength>>(mChannels.size());
     max_delay = 0;
     for(size_t c{0u};c < AmbiPoints.size();++c)
     {
@@ -393,7 +393,7 @@ std::unique_ptr<HrtfStore> CreateHrtfStore(uint rate, uint8_t irSize,
     {
         Hrtf.reset(al::construct_at(static_cast<HrtfStore*>(ptr)));
         InitRef(Hrtf->mRef, 1u);
-        Hrtf->mSampleRate = rate;
+        Hrtf->mSampleRate = rate & 0xff'ff'ff;
         Hrtf->mIrSize = irSize;
 
         /* Set up pointers to storage following the main HRTF struct. */
@@ -425,7 +425,7 @@ std::unique_ptr<HrtfStore> CreateHrtfStore(uint rate, uint8_t irSize,
         std::uninitialized_copy_n(delays, irCount, delays_);
 
         /* Finally, assign the storage pointers. */
-        Hrtf->mFields = al::as_span(field_, fields.size());
+        Hrtf->mFields = {field_, fields.size()};
         Hrtf->mElev = elev_;
         Hrtf->mCoeffs = coeffs_;
         Hrtf->mDelays = delays_;
@@ -492,10 +492,10 @@ T> readle(std::istream &data)
     static_assert(num_bits <= sizeof(T)*8, "num_bits is too large for the type");
 
     T ret{};
-    al::byte b[sizeof(T)]{};
+    std::byte b[sizeof(T)]{};
     if(!data.read(reinterpret_cast<char*>(b), num_bits/8))
         return static_cast<T>(EOF);
-    std::reverse_copy(std::begin(b), std::end(b), reinterpret_cast<al::byte*>(&ret));
+    std::reverse_copy(std::begin(b), std::end(b), reinterpret_cast<std::byte*>(&ret));
 
     return fixsign<num_bits>(ret);
 }
@@ -529,7 +529,7 @@ std::unique_ptr<HrtfStore> LoadHrtf00(std::istream &data, const char *filename)
         return nullptr;
     }
 
-    auto elevs = al::vector<HrtfStore::Elevation>(evCount);
+    auto elevs = std::vector<HrtfStore::Elevation>(evCount);
     for(auto &elev : elevs)
         elev.irOffset = readle<uint16_t>(data);
     if(!data || data.eof())
@@ -571,8 +571,8 @@ std::unique_ptr<HrtfStore> LoadHrtf00(std::istream &data, const char *filename)
         return nullptr;
     }
 
-    auto coeffs = al::vector<HrirArray>(irCount, HrirArray{});
-    auto delays = al::vector<ubyte2>(irCount);
+    auto coeffs = std::vector<HrirArray>(irCount, HrirArray{});
+    auto delays = std::vector<ubyte2>(irCount);
     for(auto &hrir : coeffs)
     {
         for(auto &val : al::span<float2>{hrir.data(), irSize})
@@ -626,7 +626,7 @@ std::unique_ptr<HrtfStore> LoadHrtf01(std::istream &data, const char *filename)
         return nullptr;
     }
 
-    auto elevs = al::vector<HrtfStore::Elevation>(evCount);
+    auto elevs = std::vector<HrtfStore::Elevation>(evCount);
     for(auto &elev : elevs)
         elev.azCount = readle<uint8_t>(data);
     if(!data || data.eof())
@@ -649,8 +649,8 @@ std::unique_ptr<HrtfStore> LoadHrtf01(std::istream &data, const char *filename)
         elevs[i].irOffset = static_cast<ushort>(elevs[i-1].irOffset + elevs[i-1].azCount);
     const ushort irCount{static_cast<ushort>(elevs.back().irOffset + elevs.back().azCount)};
 
-    auto coeffs = al::vector<HrirArray>(irCount, HrirArray{});
-    auto delays = al::vector<ubyte2>(irCount);
+    auto coeffs = std::vector<HrirArray>(irCount, HrirArray{});
+    auto delays = std::vector<ubyte2>(irCount);
     for(auto &hrir : coeffs)
     {
         for(auto &val : al::span<float2>{hrir.data(), irSize})
@@ -722,8 +722,8 @@ std::unique_ptr<HrtfStore> LoadHrtf02(std::istream &data, const char *filename)
         return nullptr;
     }
 
-    auto fields = al::vector<HrtfStore::Field>(fdCount);
-    auto elevs = al::vector<HrtfStore::Elevation>{};
+    auto fields = std::vector<HrtfStore::Field>(fdCount);
+    auto elevs = std::vector<HrtfStore::Elevation>{};
     for(size_t f{0};f < fdCount;f++)
     {
         const ushort distance{readle<uint16_t>(data)};
@@ -787,8 +787,8 @@ std::unique_ptr<HrtfStore> LoadHrtf02(std::istream &data, const char *filename)
         });
     const auto irTotal = static_cast<ushort>(elevs.back().azCount + elevs.back().irOffset);
 
-    auto coeffs = al::vector<HrirArray>(irTotal, HrirArray{});
-    auto delays = al::vector<ubyte2>(irTotal);
+    auto coeffs = std::vector<HrirArray>(irTotal, HrirArray{});
+    auto delays = std::vector<ubyte2>(irTotal);
     if(channelType == ChanType_LeftOnly)
     {
         if(sampleType == SampleType_S16)
@@ -881,10 +881,10 @@ std::unique_ptr<HrtfStore> LoadHrtf02(std::istream &data, const char *filename)
 
     if(fdCount > 1)
     {
-        auto fields_ = al::vector<HrtfStore::Field>(fields.size());
-        auto elevs_ = al::vector<HrtfStore::Elevation>(elevs.size());
-        auto coeffs_ = al::vector<HrirArray>(coeffs.size());
-        auto delays_ = al::vector<ubyte2>(delays.size());
+        auto fields_ = std::vector<HrtfStore::Field>(fields.size());
+        auto elevs_ = std::vector<HrtfStore::Elevation>(elevs.size());
+        auto coeffs_ = std::vector<HrirArray>(coeffs.size());
+        auto delays_ = std::vector<ubyte2>(delays.size());
 
         /* Simple reverse for the per-field elements. */
         std::reverse_copy(fields.cbegin(), fields.cend(), fields_.begin());
@@ -983,8 +983,8 @@ std::unique_ptr<HrtfStore> LoadHrtf03(std::istream &data, const char *filename)
         return nullptr;
     }
 
-    auto fields = al::vector<HrtfStore::Field>(fdCount);
-    auto elevs = al::vector<HrtfStore::Elevation>{};
+    auto fields = std::vector<HrtfStore::Field>(fdCount);
+    auto elevs = std::vector<HrtfStore::Elevation>{};
     for(size_t f{0};f < fdCount;f++)
     {
         const ushort distance{readle<uint16_t>(data)};
@@ -1048,8 +1048,8 @@ std::unique_ptr<HrtfStore> LoadHrtf03(std::istream &data, const char *filename)
         });
     const auto irTotal = static_cast<ushort>(elevs.back().azCount + elevs.back().irOffset);
 
-    auto coeffs = al::vector<HrirArray>(irTotal, HrirArray{});
-    auto delays = al::vector<ubyte2>(irTotal);
+    auto coeffs = std::vector<HrirArray>(irTotal, HrirArray{});
+    auto delays = std::vector<ubyte2>(irTotal);
     if(channelType == ChanType_LeftOnly)
     {
         for(auto &hrir : coeffs)
@@ -1221,7 +1221,7 @@ al::span<const char> GetResource(int name)
 } // namespace
 
 
-al::vector<std::string> EnumerateHrtf(al::optional<std::string> pathopt)
+std::vector<std::string> EnumerateHrtf(std::optional<std::string> pathopt)
 {
     std::lock_guard<std::mutex> _{EnumeratedHrtfLock};
     EnumeratedHrtfs.clear();
@@ -1270,7 +1270,7 @@ al::vector<std::string> EnumerateHrtf(al::optional<std::string> pathopt)
             AddBuiltInEntry("Built-In HRTF", IDR_DEFAULT_HRTF_MHR);
     }
 
-    al::vector<std::string> list;
+    std::vector<std::string> list;
     list.reserve(EnumeratedHrtfs.size());
     for(auto &entry : EnumeratedHrtfs)
         list.emplace_back(entry.mDispName);
@@ -1368,7 +1368,7 @@ HrtfStorePtr GetLoadedHrtf(const std::string &name, const uint devrate)
         TRACE("Resampling HRTF %s (%uhz -> %uhz)\n", name.c_str(), hrtf->mSampleRate, devrate);
 
         /* Calculate the last elevation's index and get the total IR count. */
-        const size_t lastEv{std::accumulate(hrtf->mFields.begin(), hrtf->mFields.end(), size_t{0},
+        const size_t lastEv{std::accumulate(hrtf->mFields.begin(), hrtf->mFields.end(), 0_uz,
             [](const size_t curval, const HrtfStore::Field &field) noexcept -> size_t
             { return curval + field.evCount; }
         ) - 1};
@@ -1394,7 +1394,7 @@ HrtfStorePtr GetLoadedHrtf(const std::string &name, const uint devrate)
 
         /* Scale the delays for the new sample rate. */
         float max_delay{0.0f};
-        auto new_delays = al::vector<float2>(irCount);
+        auto new_delays = std::vector<float2>(irCount);
         const float rate_scale{static_cast<float>(devrate)/static_cast<float>(hrtf->mSampleRate)};
         for(size_t i{0};i < irCount;++i)
         {
@@ -1430,7 +1430,7 @@ HrtfStorePtr GetLoadedHrtf(const std::string &name, const uint devrate)
          */
         const float newIrSize{std::round(static_cast<float>(hrtf->mIrSize) * rate_scale)};
         hrtf->mIrSize = static_cast<uint8_t>(minf(HrirLength, newIrSize));
-        hrtf->mSampleRate = devrate;
+        hrtf->mSampleRate = devrate & 0xff'ff'ff;
     }
 
     TRACE("Loaded HRTF %s for sample rate %uhz, %u-sample filter\n", name.c_str(),
diff --git a/core/hrtf.h b/core/hrtf.h
index eb18682a..5e6e09a8 100644
--- a/core/hrtf.h
+++ b/core/hrtf.h
@@ -4,17 +4,17 @@
 #include <array>
 #include <cstddef>
 #include <memory>
+#include <optional>
 #include <string>
+#include <vector>
 
 #include "almalloc.h"
-#include "aloptional.h"
 #include "alspan.h"
 #include "atomic.h"
 #include "ambidefs.h"
 #include "bufferline.h"
 #include "mixer/hrtfdefs.h"
 #include "intrusive_ptr.h"
-#include "vector.h"
 
 
 struct HrtfStore {
@@ -83,7 +83,7 @@ struct DirectHrtfState {
 };
 
 
-al::vector<std::string> EnumerateHrtf(al::optional<std::string> pathopt);
+std::vector<std::string> EnumerateHrtf(std::optional<std::string> pathopt);
 HrtfStorePtr GetLoadedHrtf(const std::string &name, const uint devrate);
 
 #endif /* CORE_HRTF_H */
diff --git a/core/logging.cpp b/core/logging.cpp
index 34a95e5a..56ad0a0d 100644
--- a/core/logging.cpp
+++ b/core/logging.cpp
@@ -3,13 +3,17 @@
 
 #include "logging.h"
 
+#include <cctype>
 #include <cstdarg>
 #include <cstdio>
+#include <cstring>
+#include <mutex>
+#include <optional>
 #include <string>
+#include <vector>
 
 #include "alspan.h"
 #include "strutils.h"
-#include "vector.h"
 
 
 #if defined(_WIN32)
@@ -19,22 +23,74 @@
 #include <android/log.h>
 #endif
 
-void al_print(LogLevel level, FILE *logfile, const char *fmt, ...)
+
+FILE *gLogFile{stderr};
+#ifdef _DEBUG
+LogLevel gLogLevel{LogLevel::Warning};
+#else
+LogLevel gLogLevel{LogLevel::Error};
+#endif
+
+
+namespace {
+
+enum class LogState : uint8_t {
+    FirstRun,
+    Ready,
+    Disable
+};
+
+std::mutex LogCallbackMutex;
+LogState gLogState{LogState::FirstRun};
+
+LogCallbackFunc gLogCallback{};
+void *gLogCallbackPtr{};
+
+constexpr std::optional<char> GetLevelCode(LogLevel level)
+{
+    switch(level)
+    {
+    case LogLevel::Disable: break;
+    case LogLevel::Error: return 'E';
+    case LogLevel::Warning: return 'W';
+    case LogLevel::Trace: return 'I';
+    }
+    return std::nullopt;
+}
+
+} // namespace
+
+void al_set_log_callback(LogCallbackFunc callback, void *userptr)
+{
+    auto cblock = std::lock_guard{LogCallbackMutex};
+    gLogCallback = callback;
+    gLogCallbackPtr = callback ? userptr : nullptr;
+    if(gLogState == LogState::FirstRun)
+    {
+        auto extlogopt = al::getenv("ALSOFT_DISABLE_LOG_CALLBACK");
+        if(!extlogopt || *extlogopt != "1")
+            gLogState = LogState::Ready;
+        else
+            gLogState = LogState::Disable;
+    }
+}
+
+void al_print(LogLevel level, const char *fmt, ...)
 {
     /* Kind of ugly since string literals are const char arrays with a size
      * that includes the null terminator, which we want to exclude from the
      * span.
      */
-    auto prefix = al::as_span("[ALSOFT] (--) ").first<14>();
+    auto prefix = al::span{"[ALSOFT] (--) "}.first<14>();
     switch(level)
     {
     case LogLevel::Disable: break;
-    case LogLevel::Error: prefix = al::as_span("[ALSOFT] (EE) ").first<14>(); break;
-    case LogLevel::Warning: prefix = al::as_span("[ALSOFT] (WW) ").first<14>(); break;
-    case LogLevel::Trace: prefix = al::as_span("[ALSOFT] (II) ").first<14>(); break;
+    case LogLevel::Error: prefix = al::span{"[ALSOFT] (EE) "}.first<14>(); break;
+    case LogLevel::Warning: prefix = al::span{"[ALSOFT] (WW) "}.first<14>(); break;
+    case LogLevel::Trace: prefix = al::span{"[ALSOFT] (II) "}.first<14>(); break;
     }
 
-    al::vector<char> dynmsg;
+    std::vector<char> dynmsg;
     std::array<char,256> stcmsg{};
 
     char *str{stcmsg.data()};
@@ -45,21 +101,28 @@ void al_print(LogLevel level, FILE *logfile, const char *fmt, ...)
     va_start(args, fmt);
     va_copy(args2, args);
     const int msglen{std::vsnprintf(msg.data(), msg.size(), fmt, args)};
-    if(msglen >= 0 && static_cast<size_t>(msglen) >= msg.size()) UNLIKELY
+    if(msglen >= 0)
     {
-        dynmsg.resize(static_cast<size_t>(msglen)+prefix.size() + 1u);
+        if(static_cast<size_t>(msglen) >= msg.size()) UNLIKELY
+        {
+            dynmsg.resize(static_cast<size_t>(msglen)+prefix.size() + 1u);
 
-        str = dynmsg.data();
-        auto prefend2 = std::copy_n(prefix.begin(), prefix.size(), dynmsg.begin());
-        msg = {prefend2, dynmsg.end()};
+            str = dynmsg.data();
+            auto prefend2 = std::copy_n(prefix.begin(), prefix.size(), dynmsg.begin());
+            msg = {prefend2, dynmsg.end()};
 
-        std::vsnprintf(msg.data(), msg.size(), fmt, args2);
+            std::vsnprintf(msg.data(), msg.size(), fmt, args2);
+        }
+        msg = msg.first(static_cast<size_t>(msglen));
     }
+    else
+        msg = {msg.data(), std::strlen(msg.data())};
     va_end(args2);
     va_end(args);
 
     if(gLogLevel >= level)
     {
+        auto logfile = gLogFile;
         fputs(str, logfile);
         fflush(logfile);
     }
@@ -86,4 +149,21 @@ void al_print(LogLevel level, FILE *logfile, const char *fmt, ...)
     };
     __android_log_print(android_severity(level), "openal", "%s", str);
 #endif
+
+    auto cblock = std::lock_guard{LogCallbackMutex};
+    if(gLogState != LogState::Disable)
+    {
+        while(!msg.empty() && std::isspace(msg.back()))
+        {
+            msg.back() = '\0';
+            msg = msg.first(msg.size()-1);
+        }
+        if(auto logcode = GetLevelCode(level); logcode && !msg.empty())
+        {
+            if(gLogCallback)
+                gLogCallback(gLogCallbackPtr, *logcode, msg.data(), static_cast<int>(msg.size()));
+            else if(gLogState == LogState::FirstRun)
+                gLogState = LogState::Disable;
+        }
+    }
 }
diff --git a/core/logging.h b/core/logging.h
index f4b6ab56..06b7cdde 100644
--- a/core/logging.h
+++ b/core/logging.h
@@ -16,36 +16,23 @@ extern LogLevel gLogLevel;
 
 extern FILE *gLogFile;
 
-#ifdef __USE_MINGW_ANSI_STDIO
-[[gnu::format(gnu_printf,3,4)]]
-#else
-[[gnu::format(printf,3,4)]]
-#endif
-void al_print(LogLevel level, FILE *logfile, const char *fmt, ...);
 
-#if (!defined(_WIN32) || defined(NDEBUG)) && !defined(__ANDROID__)
-#define TRACE(...) do {                                                       \
-    if(gLogLevel >= LogLevel::Trace) UNLIKELY                                 \
-        al_print(LogLevel::Trace, gLogFile, __VA_ARGS__);                     \
-} while(0)
+using LogCallbackFunc = void(*)(void *userptr, char level, const char *message, int length) noexcept;
 
-#define WARN(...) do {                                                        \
-    if(gLogLevel >= LogLevel::Warning) UNLIKELY                               \
-        al_print(LogLevel::Warning, gLogFile, __VA_ARGS__);                   \
-} while(0)
+void al_set_log_callback(LogCallbackFunc callback, void *userptr);
 
-#define ERR(...) do {                                                         \
-    if(gLogLevel >= LogLevel::Error) UNLIKELY                                 \
-        al_print(LogLevel::Error, gLogFile, __VA_ARGS__);                     \
-} while(0)
 
+#ifdef __USE_MINGW_ANSI_STDIO
+[[gnu::format(gnu_printf,2,3)]]
 #else
+[[gnu::format(printf,2,3)]]
+#endif
+void al_print(LogLevel level, const char *fmt, ...);
 
-#define TRACE(...) al_print(LogLevel::Trace, gLogFile, __VA_ARGS__)
+#define TRACE(...) al_print(LogLevel::Trace, __VA_ARGS__)
 
-#define WARN(...) al_print(LogLevel::Warning, gLogFile, __VA_ARGS__)
+#define WARN(...) al_print(LogLevel::Warning, __VA_ARGS__)
 
-#define ERR(...) al_print(LogLevel::Error, gLogFile, __VA_ARGS__)
-#endif
+#define ERR(...) al_print(LogLevel::Error, __VA_ARGS__)
 
 #endif /* CORE_LOGGING_H */
diff --git a/core/mastering.cpp b/core/mastering.cpp
index 97a4008e..4445719b 100644
--- a/core/mastering.cpp
+++ b/core/mastering.cpp
@@ -382,10 +382,10 @@ std::unique_ptr<Compressor> Compressor::Create(const size_t NumChans, const floa
 Compressor::~Compressor()
 {
     if(mHold)
-        al::destroy_at(mHold);
+        std::destroy_at(mHold);
     mHold = nullptr;
     if(mDelay)
-        al::destroy_n(mDelay, mNumChans);
+        std::destroy_n(mDelay, mNumChans);
     mDelay = nullptr;
 }
 
diff --git a/core/mixer.cpp b/core/mixer.cpp
index 066c57bd..806ac8b8 100644
--- a/core/mixer.cpp
+++ b/core/mixer.cpp
@@ -82,14 +82,13 @@ std::array<float,MaxAmbiChannels> CalcAmbiCoeffs(const float y, const float z, c
     return coeffs;
 }
 
-void ComputePanGains(const MixParams *mix, const float*RESTRICT coeffs, const float ingain,
-    const al::span<float,MaxAmbiChannels> gains)
+void ComputePanGains(const MixParams *mix, const al::span<const float,MaxAmbiChannels> coeffs,
+    const float ingain, const al::span<float,MaxAmbiChannels> gains)
 {
     auto ambimap = mix->AmbiMap.cbegin();
 
     auto iter = std::transform(ambimap, ambimap+mix->Buffer.size(), gains.begin(),
         [coeffs,ingain](const BFChannelConfig &chanmap) noexcept -> float
-        { return chanmap.Scale * coeffs[chanmap.Index] * ingain; }
-    );
+        { return chanmap.Scale * coeffs[chanmap.Index] * ingain; });
     std::fill(iter, gains.end(), 0.0f);
 }
diff --git a/core/mixer.h b/core/mixer.h
index aa7597bb..9062ebac 100644
--- a/core/mixer.h
+++ b/core/mixer.h
@@ -58,7 +58,7 @@ std::array<float,MaxAmbiChannels> CalcAmbiCoeffs(const float y, const float z, c
  * vector must be normalized (unit length), and the spread is the angular width
  * of the sound (0...tau).
  */
-inline std::array<float,MaxAmbiChannels> CalcDirectionCoeffs(const float (&dir)[3],
+inline std::array<float,MaxAmbiChannels> CalcDirectionCoeffs(const al::span<const float,3> dir,
     const float spread)
 {
     /* Convert from OpenAL coords to Ambisonics. */
@@ -71,7 +71,7 @@ inline std::array<float,MaxAmbiChannels> CalcDirectionCoeffs(const float (&dir)[
  * Calculates ambisonic coefficients based on an OpenAL direction vector. The
  * vector must be normalized (unit length).
  */
-constexpr std::array<float,MaxAmbiChannels> CalcDirectionCoeffs(const float (&dir)[3])
+constexpr std::array<float,MaxAmbiChannels> CalcDirectionCoeffs(const al::span<const float,3> dir)
 {
     /* Convert from OpenAL coords to Ambisonics. */
     return CalcAmbiCoeffs(-dir[0], dir[1], -dir[2]);
@@ -103,7 +103,7 @@ inline std::array<float,MaxAmbiChannels> CalcAngleCoeffs(const float azimuth,
  * coeffs are a 'slice' of a transform matrix for the input channel, used to
  * scale and orient the sound samples.
  */
-void ComputePanGains(const MixParams *mix, const float*RESTRICT coeffs, const float ingain,
-    const al::span<float,MaxAmbiChannels> gains);
+void ComputePanGains(const MixParams *mix, const al::span<const float,MaxAmbiChannels> coeffs,
+    const float ingain, const al::span<float,MaxAmbiChannels> gains);
 
 #endif /* CORE_MIXER_H */
diff --git a/core/mixer/mixer_neon.cpp b/core/mixer/mixer_neon.cpp
index ef2936b3..ead775af 100644
--- a/core/mixer/mixer_neon.cpp
+++ b/core/mixer/mixer_neon.cpp
@@ -342,7 +342,7 @@ void Mix_<NEONTag>(const al::span<const float> InSamples, const al::span<FloatBu
 {
     const float delta{(Counter > 0) ? 1.0f / static_cast<float>(Counter) : 0.0f};
     const auto min_len = minz(Counter, InSamples.size());
-    const auto aligned_len = minz((min_len+3) & ~size_t{3}, InSamples.size()) - min_len;
+    const auto aligned_len = minz((min_len+3) & ~3_uz, InSamples.size()) - min_len;
 
     for(FloatBufferLine &output : OutBuffer)
         MixLine(InSamples, al::assume_aligned<16>(output.data()+OutPos), *CurrentGains++,
@@ -355,7 +355,7 @@ void Mix_<NEONTag>(const al::span<const float> InSamples, float *OutBuffer, floa
 {
     const float delta{(Counter > 0) ? 1.0f / static_cast<float>(Counter) : 0.0f};
     const auto min_len = minz(Counter, InSamples.size());
-    const auto aligned_len = minz((min_len+3) & ~size_t{3}, InSamples.size()) - min_len;
+    const auto aligned_len = minz((min_len+3) & ~3_uz, InSamples.size()) - min_len;
 
     MixLine(InSamples, al::assume_aligned<16>(OutBuffer), CurrentGain, TargetGain, delta, min_len,
         aligned_len, Counter);
diff --git a/core/mixer/mixer_sse.cpp b/core/mixer/mixer_sse.cpp
index 0aa5d5fb..70f77c14 100644
--- a/core/mixer/mixer_sse.cpp
+++ b/core/mixer/mixer_sse.cpp
@@ -307,7 +307,7 @@ void Mix_<SSETag>(const al::span<const float> InSamples, const al::span<FloatBuf
 {
     const float delta{(Counter > 0) ? 1.0f / static_cast<float>(Counter) : 0.0f};
     const auto min_len = minz(Counter, InSamples.size());
-    const auto aligned_len = minz((min_len+3) & ~size_t{3}, InSamples.size()) - min_len;
+    const auto aligned_len = minz((min_len+3) & ~3_uz, InSamples.size()) - min_len;
 
     for(FloatBufferLine &output : OutBuffer)
         MixLine(InSamples, al::assume_aligned<16>(output.data()+OutPos), *CurrentGains++,
@@ -320,7 +320,7 @@ void Mix_<SSETag>(const al::span<const float> InSamples, float *OutBuffer, float
 {
     const float delta{(Counter > 0) ? 1.0f / static_cast<float>(Counter) : 0.0f};
     const auto min_len = minz(Counter, InSamples.size());
-    const auto aligned_len = minz((min_len+3) & ~size_t{3}, InSamples.size()) - min_len;
+    const auto aligned_len = minz((min_len+3) & ~3_uz, InSamples.size()) - min_len;
 
     MixLine(InSamples, al::assume_aligned<16>(OutBuffer), CurrentGain, TargetGain, delta, min_len,
         aligned_len, Counter);
diff --git a/core/uhjfilter.cpp b/core/uhjfilter.cpp
index df50956a..28999e09 100644
--- a/core/uhjfilter.cpp
+++ b/core/uhjfilter.cpp
@@ -9,7 +9,9 @@
 #include "alcomplex.h"
 #include "alnumeric.h"
 #include "opthelpers.h"
+#include "pffft.h"
 #include "phase_shifter.h"
+#include "vector.h"
 
 
 UhjQualityType UhjDecodeQuality{UhjQualityType::Default};
@@ -18,38 +20,141 @@ UhjQualityType UhjEncodeQuality{UhjQualityType::Default};
 
 namespace {
 
-const PhaseShifterT<UhjLength256> PShiftLq{};
-const PhaseShifterT<UhjLength512> PShiftHq{};
+struct PFFFTSetupDeleter {
+    void operator()(PFFFT_Setup *ptr) { pffft_destroy_setup(ptr); }
+};
+using PFFFTSetupPtr = std::unique_ptr<PFFFT_Setup,PFFFTSetupDeleter>;
 
+/* Convolution is implemented using a segmented overlap-add method. The filter
+ * response is broken up into multiple segments of 128 samples, and each
+ * segment has an FFT applied with a 256-sample buffer (the latter half left
+ * silent) to get its frequency-domain response.
+ *
+ * Input samples are similarly broken up into 128-sample segments, with a 256-
+ * sample FFT applied to each new incoming segment to get its frequency-domain
+ * response. A history of FFT'd input segments is maintained, equal to the
+ * number of filter response segments.
+ *
+ * To apply the convolution, each filter response segment is convolved with its
+ * paired input segment (using complex multiplies, far cheaper than time-domain
+ * FIRs), accumulating into an FFT buffer. The input history is then shifted to
+ * align with later filter response segments for the next input segment.
+ *
+ * An inverse FFT is then applied to the accumulated FFT buffer to get a 256-
+ * sample time-domain response for output, which is split in two halves. The
+ * first half is the 128-sample output, and the second half is a 128-sample
+ * (really, 127) delayed extension, which gets added to the output next time.
+ * Convolving two time-domain responses of length N results in a time-domain
+ * signal of length N*2 - 1, and this holds true regardless of the convolution
+ * being applied in the frequency domain, so these "overflow" samples need to
+ * be accounted for.
+ */
 template<size_t N>
-struct GetPhaseShifter;
-template<>
-struct GetPhaseShifter<UhjLength256> { static auto& Get() noexcept { return PShiftLq; } };
-template<>
-struct GetPhaseShifter<UhjLength512> { static auto& Get() noexcept { return PShiftHq; } };
+struct SegmentedFilter {
+    static constexpr size_t sFftLength{256};
+    static constexpr size_t sSampleLength{sFftLength / 2};
+    static constexpr size_t sNumSegments{N/sSampleLength};
+    static_assert(N >= sFftLength);
+    static_assert((N % sSampleLength) == 0);
 
+    PFFFTSetupPtr mFft;
+    alignas(16) std::array<float,sFftLength*sNumSegments> mFilterData;
+
+    SegmentedFilter()
+    {
+        mFft = PFFFTSetupPtr{pffft_new_setup(sFftLength, PFFFT_REAL)};
+
+        using complex_d = std::complex<double>;
+        constexpr size_t fft_size{N};
+        constexpr size_t half_size{fft_size / 2};
+
+        /* To set up the filter, we need to generate the desired response.
+         * Start with a pure delay that passes all frequencies through.
+         */
+        auto fftBuffer = std::make_unique<complex_d[]>(fft_size);
+        std::fill_n(fftBuffer.get(), fft_size, complex_d{});
+        fftBuffer[half_size] = 1.0;
+
+        /* Convert to the frequency domain, shift the phase of each bin by +90
+         * degrees, then convert back to the time domain.
+         *
+         * NOTE: The 0- and half-frequency are always real for a real signal.
+         * To maintain that and their phase (0 or pi), they're heavily
+         * attenuated instead of shifted like the others.
+         */
+        forward_fft(al::span{fftBuffer.get(), fft_size});
+        fftBuffer[0] *= std::numeric_limits<double>::epsilon();
+        for(size_t i{1};i < half_size;++i)
+            fftBuffer[i] = complex_d{-fftBuffer[i].imag(), fftBuffer[i].real()};
+        fftBuffer[half_size] *= std::numeric_limits<double>::epsilon();
+        for(size_t i{half_size+1};i < fft_size;++i)
+            fftBuffer[i] = std::conj(fftBuffer[fft_size - i]);
+        inverse_fft(al::span{fftBuffer.get(), fft_size});
+
+        /* The segments of the filter are converted back to the frequency
+         * domain, each on their own (0 stuffed).
+         */
+        auto fftBuffer2 = std::make_unique<complex_d[]>(sFftLength);
+        auto fftTmp = al::vector<float,16>(sFftLength);
+        float *filter{mFilterData.data()};
+        for(size_t s{0};s < sNumSegments;++s)
+        {
+            for(size_t i{0};i < sSampleLength;++i)
+                fftBuffer2[i] = fftBuffer[sSampleLength*s + i].real() / double{fft_size};
+            std::fill_n(fftBuffer2.get()+sSampleLength, sSampleLength, complex_d{});
+            forward_fft(al::span{fftBuffer2.get(), sFftLength});
+
+            /* Convert to zdomain data for PFFFT, scaled by the FFT length so
+             * the iFFT result will be normalized.
+             */
+            for(size_t i{0};i < sSampleLength;++i)
+            {
+                fftTmp[i*2 + 0] = static_cast<float>(fftBuffer2[i].real()) / float{sFftLength};
+                fftTmp[i*2 + 1] = static_cast<float>((i == 0) ? fftBuffer2[sSampleLength].real()
+                    : fftBuffer2[i].imag()) / float{sFftLength};
+            }
+            pffft_zreorder(mFft.get(), fftTmp.data(), filter, PFFFT_BACKWARD);
+            filter += sFftLength;
+        }
+    }
+};
+
+template<size_t N>
+const SegmentedFilter<N> gSegmentedFilter;
+
+template<size_t N>
+const PhaseShifterT<N> PShifter;
 
-constexpr float square(float x) noexcept
-{ return x*x; }
 
 /* Filter coefficients for the 'base' all-pass IIR, which applies a frequency-
  * dependent phase-shift of N degrees. The output of the filter requires a 1-
  * sample delay.
  */
 constexpr std::array<float,4> Filter1Coeff{{
-    square(0.6923878f), square(0.9360654322959f), square(0.9882295226860f),
-    square(0.9987488452737f)
+    0.479400865589f, 0.876218493539f, 0.976597589508f, 0.997499255936f
 }};
 /* Filter coefficients for the offset all-pass IIR, which applies a frequency-
  * dependent phase-shift of N+90 degrees.
  */
 constexpr std::array<float,4> Filter2Coeff{{
-    square(0.4021921162426f), square(0.8561710882420f), square(0.9722909545651f),
-    square(0.9952884791278f)
+    0.161758498368f, 0.733028932341f, 0.945349700329f, 0.990599156684f
 }};
 
 } // namespace
 
+void UhjAllPassFilter::processOne(const al::span<const float, 4> coeffs, float x)
+{
+    auto state = mState;
+    for(size_t i{0};i < 4;++i)
+    {
+        const float y{x*coeffs[i] + state[i].z[0]};
+        state[i].z[0] = state[i].z[1];
+        state[i].z[1] = y*coeffs[i] - x;
+        x = y;
+    }
+    mState = state;
+}
+
 void UhjAllPassFilter::process(const al::span<const float,4> coeffs,
     const al::span<const float> src, const bool updateState, float *RESTRICT dst)
 {
@@ -92,7 +197,10 @@ template<size_t N>
 void UhjEncoder<N>::encode(float *LeftOut, float *RightOut,
     const al::span<const float*const,3> InSamples, const size_t SamplesToDo)
 {
-    const auto &PShift = GetPhaseShifter<N>::Get();
+    static constexpr auto &Filter = gSegmentedFilter<N>;
+    static_assert(sFftLength == Filter.sFftLength);
+    static_assert(sSegmentSize == Filter.sSampleLength);
+    static_assert(sNumSegments == Filter.sNumSegments);
 
     ASSUME(SamplesToDo > 0);
 
@@ -109,10 +217,71 @@ void UhjEncoder<N>::encode(float *LeftOut, float *RightOut,
         mS[i] = 0.9396926f*mW[i] + 0.1855740f*mX[i];
 
     /* Precompute j(-0.3420201*W + 0.5098604*X) and store in mD. */
-    std::transform(winput, winput+SamplesToDo, xinput, mWX.begin() + sWXInOffset,
-        [](const float w, const float x) noexcept -> float
-        { return -0.3420201f*w + 0.5098604f*x; });
-    PShift.process({mD.data(), SamplesToDo}, mWX.data());
+    size_t curseg{mCurrentSegment};
+    for(size_t base{0};base < SamplesToDo;)
+    {
+        const size_t todo{minz(sSegmentSize-mFifoPos, SamplesToDo-base)};
+
+        /* Copy out the samples that were previously processed by the FFT. */
+        std::copy_n(mWXInOut.begin()+mFifoPos, todo, mD.begin()+base);
+
+        /* Transform the non-delayed input and store in the front half of the
+         * filter input.
+         */
+        std::transform(winput+base, winput+base+todo, xinput+base, mWXInOut.begin()+mFifoPos,
+            [](const float w, const float x) noexcept -> float
+            { return -0.3420201f*w + 0.5098604f*x; });
+
+        mFifoPos += todo;
+        base += todo;
+
+        /* Check whether the input buffer is filled with new samples. */
+        if(mFifoPos < sSegmentSize) break;
+        mFifoPos = 0;
+
+        /* Copy the new input to the next history segment, clearing the back
+         * half of the segment, and convert to the frequency domain.
+         */
+        float *input{mWXHistory.data() + curseg*sFftLength};
+        std::copy_n(mWXInOut.begin(), sSegmentSize, input);
+        std::fill_n(input+sSegmentSize, sSegmentSize, 0.0f);
+
+        pffft_transform(Filter.mFft.get(), input, input, mWorkData.data(), PFFFT_FORWARD);
+
+        /* Convolve each input segment with its IR filter counterpart (aligned
+         * in time, from newest to oldest).
+         */
+        mFftBuffer.fill(0.0f);
+        const float *filter{Filter.mFilterData.data()};
+        for(size_t s{curseg};s < sNumSegments;++s)
+        {
+            pffft_zconvolve_accumulate(Filter.mFft.get(), input, filter, mFftBuffer.data());
+            input += sFftLength;
+            filter += sFftLength;
+        }
+        input = mWXHistory.data();
+        for(size_t s{0};s < curseg;++s)
+        {
+            pffft_zconvolve_accumulate(Filter.mFft.get(), input, filter, mFftBuffer.data());
+            input += sFftLength;
+            filter += sFftLength;
+        }
+
+        /* Convert back to samples, writing to the output and storing the extra
+         * for next time.
+         */
+        pffft_transform(Filter.mFft.get(), mFftBuffer.data(), mFftBuffer.data(),
+            mWorkData.data(), PFFFT_BACKWARD);
+
+        for(size_t i{0};i < sSegmentSize;++i)
+            mWXInOut[i] = mFftBuffer[i] + mWXInOut[sSegmentSize+i];
+        for(size_t i{0};i < sSegmentSize;++i)
+            mWXInOut[sSegmentSize+i] = mFftBuffer[sSegmentSize+i];
+
+        /* Shift the input history. */
+        curseg = curseg ? (curseg-1) : (sNumSegments-1);
+    }
+    mCurrentSegment = curseg;
 
     /* D = j(-0.3420201*W + 0.5098604*X) + 0.6554516*Y */
     for(size_t i{0};i < SamplesToDo;++i)
@@ -122,7 +291,6 @@ void UhjEncoder<N>::encode(float *LeftOut, float *RightOut,
     std::copy(mW.cbegin()+SamplesToDo, mW.cbegin()+SamplesToDo+sFilterDelay, mW.begin());
     std::copy(mX.cbegin()+SamplesToDo, mX.cbegin()+SamplesToDo+sFilterDelay, mX.begin());
     std::copy(mY.cbegin()+SamplesToDo, mY.cbegin()+SamplesToDo+sFilterDelay, mY.begin());
-    std::copy(mWX.cbegin()+SamplesToDo, mWX.cbegin()+SamplesToDo+sWXInOffset, mWX.begin());
 
     /* Apply a delay to the existing output to align with the input delay. */
     auto *delayBuffer = mDirectDelay.data();
@@ -133,7 +301,7 @@ void UhjEncoder<N>::encode(float *LeftOut, float *RightOut,
 
         float *inout{al::assume_aligned<16>(buffer)};
         auto inout_end = inout + SamplesToDo;
-        if(SamplesToDo >= sFilterDelay) LIKELY
+        if(SamplesToDo >= sFilterDelay)
         {
             auto delay_end = std::rotate(inout, inout_end - sFilterDelay, inout_end);
             std::swap_ranges(inout, delay_end, distbuf);
@@ -240,7 +408,7 @@ void UhjDecoder<N>::decode(const al::span<float*> samples, const size_t samplesT
 {
     static_assert(sInputPadding <= sMaxPadding, "Filter padding is too large");
 
-    const auto &PShift = GetPhaseShifter<N>::Get();
+    constexpr auto &PShift = PShifter<N>;
 
     ASSUME(samplesToDo > 0);
 
@@ -313,11 +481,11 @@ void UhjDecoderIIR::decode(const al::span<float*> samples, const size_t samplesT
         const float *RESTRICT right{al::assume_aligned<16>(samples[1])};
 
         /* S = Left + Right */
-        for(size_t i{0};i < samplesToDo;++i)
+        for(size_t i{0};i < samplesToDo+sInputPadding;++i)
             mS[i] = left[i] + right[i];
 
         /* D = Left - Right */
-        for(size_t i{0};i < samplesToDo;++i)
+        for(size_t i{0};i < samplesToDo+sInputPadding;++i)
             mD[i] = left[i] - right[i];
     }
 
@@ -326,14 +494,13 @@ void UhjDecoderIIR::decode(const al::span<float*> samples, const size_t samplesT
     float *RESTRICT youtput{al::assume_aligned<16>(samples[2])};
 
     /* Precompute j(0.828331*D + 0.767820*T) and store in xoutput. */
-    std::transform(mD.cbegin(), mD.cbegin()+samplesToDo, youtput, mTemp.begin(),
+    std::transform(mD.cbegin(), mD.cbegin()+sInputPadding+samplesToDo, youtput, mTemp.begin(),
         [](const float d, const float t) noexcept { return 0.828331f*d + 0.767820f*t; });
-    mFilter2DT.process(Filter2Coeff, {mTemp.data(), samplesToDo}, updateState, xoutput);
+    if(mFirstRun) mFilter2DT.processOne(Filter2Coeff, mTemp[0]);
+    mFilter2DT.process(Filter2Coeff, {mTemp.data()+1, samplesToDo}, updateState, xoutput);
 
     /* Apply filter1 to S and store in mTemp. */
-    mTemp[0] = mDelayS;
-    mFilter1S.process(Filter1Coeff, {mS.data(), samplesToDo}, updateState, mTemp.data()+1);
-    if(updateState) LIKELY mDelayS = mTemp[samplesToDo];
+    mFilter1S.process(Filter1Coeff, {mS.data(), samplesToDo}, updateState, mTemp.data());
 
     /* W = 0.981532*S + 0.197484*j(0.828331*D + 0.767820*T) */
     for(size_t i{0};i < samplesToDo;++i)
@@ -346,12 +513,11 @@ void UhjDecoderIIR::decode(const al::span<float*> samples, const size_t samplesT
     /* Apply filter1 to (0.795968*D - 0.676392*T) and store in mTemp. */
     std::transform(mD.cbegin(), mD.cbegin()+samplesToDo, youtput, youtput,
         [](const float d, const float t) noexcept { return 0.795968f*d - 0.676392f*t; });
-    mTemp[0] = mDelayDT;
-    mFilter1DT.process(Filter1Coeff, {youtput, samplesToDo}, updateState, mTemp.data()+1);
-    if(updateState) LIKELY mDelayDT = mTemp[samplesToDo];
+    mFilter1DT.process(Filter1Coeff, {youtput, samplesToDo}, updateState, mTemp.data());
 
     /* Precompute j*S and store in youtput. */
-    mFilter2S.process(Filter2Coeff, {mS.data(), samplesToDo}, updateState, youtput);
+    if(mFirstRun) mFilter2S.processOne(Filter2Coeff, mS[0]);
+    mFilter2S.process(Filter2Coeff, {mS.data()+1, samplesToDo}, updateState, youtput);
 
     /* Y = 0.795968*D - 0.676392*T + j(0.186633*S) */
     for(size_t i{0};i < samplesToDo;++i)
@@ -363,14 +529,14 @@ void UhjDecoderIIR::decode(const al::span<float*> samples, const size_t samplesT
         float *RESTRICT zoutput{al::assume_aligned<16>(samples[3])};
 
         /* Apply filter1 to Q and store in mTemp. */
-        mTemp[0] = mDelayQ;
-        mFilter1Q.process(Filter1Coeff, {zoutput, samplesToDo}, updateState, mTemp.data()+1);
-        if(updateState) LIKELY mDelayQ = mTemp[samplesToDo];
+        mFilter1Q.process(Filter1Coeff, {zoutput, samplesToDo}, updateState, mTemp.data());
 
         /* Z = 1.023332*Q */
         for(size_t i{0};i < samplesToDo;++i)
             zoutput[i] = 1.023332f*mTemp[i];
     }
+
+    mFirstRun = false;
 }
 
 
@@ -392,7 +558,7 @@ void UhjStereoDecoder<N>::decode(const al::span<float*> samples, const size_t sa
 {
     static_assert(sInputPadding <= sMaxPadding, "Filter padding is too large");
 
-    const auto &PShift = GetPhaseShifter<N>::Get();
+    constexpr auto &PShift = PShifter<N>;
 
     ASSUME(samplesToDo > 0);
 
@@ -470,7 +636,7 @@ void UhjStereoDecoderIIR::decode(const al::span<float*> samples, const size_t sa
         const float *RESTRICT left{al::assume_aligned<16>(samples[0])};
         const float *RESTRICT right{al::assume_aligned<16>(samples[1])};
 
-        for(size_t i{0};i < samplesToDo;++i)
+        for(size_t i{0};i < samplesToDo+sInputPadding;++i)
             mS[i] = left[i] + right[i];
 
         /* Pre-apply the width factor to the difference signal D. Smoothly
@@ -480,7 +646,7 @@ void UhjStereoDecoderIIR::decode(const al::span<float*> samples, const size_t sa
         const float wcurrent{(mCurrentWidth < 0.0f) ? wtarget : mCurrentWidth};
         if(wtarget == wcurrent || !updateState)
         {
-            for(size_t i{0};i < samplesToDo;++i)
+            for(size_t i{0};i < samplesToDo+sInputPadding;++i)
                 mD[i] = (left[i] - right[i]) * wcurrent;
             mCurrentWidth = wcurrent;
         }
@@ -493,6 +659,8 @@ void UhjStereoDecoderIIR::decode(const al::span<float*> samples, const size_t sa
                 mD[i] = (left[i] - right[i]) * (wcurrent + wstep*fi);
                 fi += 1.0f;
             }
+            for(size_t i{samplesToDo};i < samplesToDo+sInputPadding;++i)
+                mD[i] = (left[i] - right[i]) * wtarget;
             mCurrentWidth = wtarget;
         }
     }
@@ -502,12 +670,11 @@ void UhjStereoDecoderIIR::decode(const al::span<float*> samples, const size_t sa
     float *RESTRICT youtput{al::assume_aligned<16>(samples[2])};
 
     /* Apply filter1 to S and store in mTemp. */
-    mTemp[0] = mDelayS;
-    mFilter1S.process(Filter1Coeff, {mS.data(), samplesToDo}, updateState, mTemp.data()+1);
-    if(updateState) LIKELY mDelayS = mTemp[samplesToDo];
+    mFilter1S.process(Filter1Coeff, {mS.data(), samplesToDo}, updateState, mTemp.data());
 
     /* Precompute j*D and store in xoutput. */
-    mFilter2D.process(Filter2Coeff, {mD.data(), samplesToDo}, updateState, xoutput);
+    if(mFirstRun) mFilter2D.processOne(Filter2Coeff, mD[0]);
+    mFilter2D.process(Filter2Coeff, {mD.data()+1, samplesToDo}, updateState, xoutput);
 
     /* W = 0.6098637*S - 0.6896511*j*w*D */
     for(size_t i{0};i < samplesToDo;++i)
@@ -517,16 +684,17 @@ void UhjStereoDecoderIIR::decode(const al::span<float*> samples, const size_t sa
         xoutput[i] = 0.8624776f*mTemp[i] + 0.7626955f*xoutput[i];
 
     /* Precompute j*S and store in youtput. */
-    mFilter2S.process(Filter2Coeff, {mS.data(), samplesToDo}, updateState, youtput);
+    if(mFirstRun) mFilter2S.processOne(Filter2Coeff, mS[0]);
+    mFilter2S.process(Filter2Coeff, {mS.data()+1, samplesToDo}, updateState, youtput);
 
     /* Apply filter1 to D and store in mTemp. */
-    mTemp[0] = mDelayD;
-    mFilter1D.process(Filter1Coeff, {mD.data(), samplesToDo}, updateState, mTemp.data()+1);
-    if(updateState) LIKELY mDelayD = mTemp[samplesToDo];
+    mFilter1D.process(Filter1Coeff, {mD.data(), samplesToDo}, updateState, mTemp.data());
 
     /* Y = 1.6822415*w*D - 0.2156194*j*S */
     for(size_t i{0};i < samplesToDo;++i)
         youtput[i] = 1.6822415f*mTemp[i] - 0.2156194f*youtput[i];
+
+    mFirstRun = false;
 }
 
 
diff --git a/core/uhjfilter.h b/core/uhjfilter.h
index df308094..348dc7e1 100644
--- a/core/uhjfilter.h
+++ b/core/uhjfilter.h
@@ -29,6 +29,7 @@ struct UhjAllPassFilter {
     };
     std::array<AllPassState,4> mState;
 
+    void processOne(const al::span<const float,4> coeffs, float x);
     void process(const al::span<const float,4> coeffs, const al::span<const float> src,
         const bool update, float *RESTRICT dst);
 };
@@ -50,7 +51,10 @@ struct UhjEncoderBase {
 
 template<size_t N>
 struct UhjEncoder final : public UhjEncoderBase {
-    static constexpr size_t sFilterDelay{N/2};
+    static constexpr size_t sFftLength{256};
+    static constexpr size_t sSegmentSize{sFftLength/2};
+    static constexpr size_t sNumSegments{N/sSegmentSize};
+    static constexpr size_t sFilterDelay{N/2 + sSegmentSize};
 
     /* Delays and processing storage for the input signal. */
     alignas(16) std::array<float,BufferLineSize+sFilterDelay> mW{};
@@ -60,11 +64,12 @@ struct UhjEncoder final : public UhjEncoderBase {
     alignas(16) std::array<float,BufferLineSize> mS{};
     alignas(16) std::array<float,BufferLineSize> mD{};
 
-    /* History and temp storage for the FIR filter. New samples should be
-     * written to index sFilterDelay*2 - 1.
-     */
-    static constexpr size_t sWXInOffset{sFilterDelay*2 - 1};
-    alignas(16) std::array<float,BufferLineSize + sFilterDelay*2> mWX{};
+    /* History and temp storage for the convolution filter. */
+    size_t mFifoPos{}, mCurrentSegment{};
+    alignas(16) std::array<float,sFftLength> mWXInOut{};
+    alignas(16) std::array<float,sFftLength> mFftBuffer{};
+    alignas(16) std::array<float,sFftLength> mWorkData{};
+    alignas(16) std::array<float,sFftLength*sNumSegments> mWXHistory{};
 
     alignas(16) std::array<std::array<float,sFilterDelay>,2> mDirectDelay{};
 
@@ -77,8 +82,6 @@ struct UhjEncoder final : public UhjEncoderBase {
      */
     void encode(float *LeftOut, float *RightOut, const al::span<const float*const,3> InSamples,
         const size_t SamplesToDo) override;
-
-    DEF_NEWDEL(UhjEncoder)
 };
 
 struct UhjEncoderIIR final : public UhjEncoderBase {
@@ -160,17 +163,18 @@ struct UhjDecoder final : public DecoderBase {
 };
 
 struct UhjDecoderIIR final : public DecoderBase {
-    /* FIXME: These IIR decoder filters actually have a 1-sample delay on the
-     * non-filtered components, which is not reflected in the source latency
-     * value. sInputPadding is 0, however, because it doesn't need any extra
-     * input samples.
+    /* These IIR decoder filters normally have a 1-sample delay on the non-
+     * filtered components. However, the filtered components are made to skip
+     * the first output sample and take one future sample, which puts it ahead
+     * by one sample. The first filtered output sample is cut to align it with
+     * the first non-filtered sample, similar to the FIR filters.
      */
-    static constexpr size_t sInputPadding{0};
+    static constexpr size_t sInputPadding{1};
 
-    alignas(16) std::array<float,BufferLineSize> mS{};
-    alignas(16) std::array<float,BufferLineSize> mD{};
-    alignas(16) std::array<float,BufferLineSize+1> mTemp{};
-    float mDelayS{}, mDelayDT{}, mDelayQ{};
+    bool mFirstRun{true};
+    alignas(16) std::array<float,BufferLineSize+sInputPadding> mS{};
+    alignas(16) std::array<float,BufferLineSize+sInputPadding> mD{};
+    alignas(16) std::array<float,BufferLineSize+sInputPadding> mTemp{};
 
     UhjAllPassFilter mFilter1S;
     UhjAllPassFilter mFilter2DT;
@@ -211,14 +215,14 @@ struct UhjStereoDecoder final : public DecoderBase {
 };
 
 struct UhjStereoDecoderIIR final : public DecoderBase {
-    static constexpr size_t sInputPadding{0};
+    static constexpr size_t sInputPadding{1};
 
+    bool mFirstRun{true};
     float mCurrentWidth{-1.0f};
 
-    alignas(16) std::array<float,BufferLineSize> mS{};
-    alignas(16) std::array<float,BufferLineSize> mD{};
-    alignas(16) std::array<float,BufferLineSize+1> mTemp{};
-    float mDelayS{}, mDelayD{};
+    alignas(16) std::array<float,BufferLineSize+sInputPadding> mS{};
+    alignas(16) std::array<float,BufferLineSize+sInputPadding> mD{};
+    alignas(16) std::array<float,BufferLineSize> mTemp{};
 
     UhjAllPassFilter mFilter1S;
     UhjAllPassFilter mFilter2D;
diff --git a/core/uiddefs.cpp b/core/uiddefs.cpp
index 244c01a5..9471bba5 100644
--- a/core/uiddefs.cpp
+++ b/core/uiddefs.cpp
@@ -19,12 +19,8 @@ DEFINE_GUID(KSDATAFORMAT_SUBTYPE_IEEE_FLOAT, 0x00000003, 0x0000, 0x0010, 0x80,0x
 DEFINE_GUID(IID_IDirectSoundNotify,   0xb0210783, 0x89cd, 0x11d0, 0xaf,0x08, 0x00,0xa0,0xc9,0x25,0xcd,0x16);
 
 DEFINE_GUID(CLSID_MMDeviceEnumerator, 0xbcde0395, 0xe52f, 0x467c, 0x8e,0x3d, 0xc4,0x57,0x92,0x91,0x69,0x2e);
-DEFINE_GUID(IID_IMMDeviceEnumerator,  0xa95664d2, 0x9614, 0x4f35, 0xa7,0x46, 0xde,0x8d,0xb6,0x36,0x17,0xe6);
-DEFINE_GUID(IID_IAudioClient,         0x1cb9ad4c, 0xdbfa, 0x4c32, 0xb1,0x78, 0xc2,0xf5,0x68,0xa7,0x03,0xb2);
-DEFINE_GUID(IID_IAudioRenderClient,   0xf294acfc, 0x3146, 0x4483, 0xa7,0xbf, 0xad,0xdc,0xa7,0xc2,0x60,0xe2);
-DEFINE_GUID(IID_IAudioCaptureClient,  0xc8adbd64, 0xe71e, 0x48a0, 0xa4,0xde, 0x18,0x5c,0x39,0x5c,0xd3,0x17);
 
-#ifdef HAVE_WASAPI
+#if defined(HAVE_WASAPI) && !defined(ALSOFT_UWP)
 #include <wtypes.h>
 #include <devpropdef.h>
 #include <propkeydef.h>
diff --git a/core/voice.cpp b/core/voice.cpp
index e8fbcccd..3889c42d 100644
--- a/core/voice.cpp
+++ b/core/voice.cpp
@@ -12,13 +12,12 @@
 #include <iterator>
 #include <memory>
 #include <new>
+#include <optional>
 #include <stdlib.h>
 #include <utility>
 #include <vector>
 
-#include "albyte.h"
 #include "alnumeric.h"
-#include "aloptional.h"
 #include "alspan.h"
 #include "alstring.h"
 #include "ambidefs.h"
@@ -129,7 +128,7 @@ inline HrtfMixerBlendFunc SelectHrtfBlendMixer()
 
 } // namespace
 
-void Voice::InitMixer(al::optional<std::string> resampler)
+void Voice::InitMixer(std::optional<std::string> resampler)
 {
     if(resampler)
     {
@@ -227,10 +226,9 @@ void SendSourceStoppedEvent(ContextBase *context, uint id)
     auto evt_vec = ring->getWriteVector();
     if(evt_vec.first.len < 1) return;
 
-    AsyncEvent *evt{al::construct_at(reinterpret_cast<AsyncEvent*>(evt_vec.first.buf),
-        AsyncEvent::SourceStateChange)};
-    evt->u.srcstate.id = id;
-    evt->u.srcstate.state = AsyncEvent::SrcState::Stop;
+    auto &evt = InitAsyncEvent<AsyncSourceStateEvent>(evt_vec.first.buf);
+    evt.mId = id;
+    evt.mState = AsyncSrcState::Stop;
 
     ring->writeAdvance(1);
 }
@@ -264,7 +262,7 @@ const float *DoFilters(BiquadFilter &lpfilter, BiquadFilter &hpfilter, float *ds
 
 
 template<FmtType Type>
-inline void LoadSamples(float *RESTRICT dstSamples, const al::byte *src, const size_t srcChan,
+inline void LoadSamples(float *RESTRICT dstSamples, const std::byte *src, const size_t srcChan,
     const size_t srcOffset, const size_t srcStep, const size_t /*samplesPerBlock*/,
     const size_t samplesToLoad) noexcept
 {
@@ -275,7 +273,7 @@ inline void LoadSamples(float *RESTRICT dstSamples, const al::byte *src, const s
 }
 
 template<>
-inline void LoadSamples<FmtIMA4>(float *RESTRICT dstSamples, const al::byte *src,
+inline void LoadSamples<FmtIMA4>(float *RESTRICT dstSamples, const std::byte *src,
     const size_t srcChan, const size_t srcOffset, const size_t srcStep,
     const size_t samplesPerBlock, const size_t samplesToLoad) noexcept
 {
@@ -289,14 +287,15 @@ inline void LoadSamples<FmtIMA4>(float *RESTRICT dstSamples, const al::byte *src
     /* NOTE: This could probably be optimized better. */
     size_t wrote{0};
     do {
+        static constexpr int MaxStepIndex{static_cast<int>(std::size(IMAStep_size)) - 1};
         /* Each IMA4 block starts with a signed 16-bit sample, and a signed
          * 16-bit table index. The table index needs to be clamped.
          */
-        int sample{src[srcChan*4] | (src[srcChan*4 + 1] << 8)};
-        int index{src[srcChan*4 + 2] | (src[srcChan*4 + 3] << 8)};
+        int sample{int(src[srcChan*4]) | (int(src[srcChan*4 + 1]) << 8)};
+        int index{int(src[srcChan*4 + 2]) | (int(src[srcChan*4 + 3]) << 8)};
 
         sample = (sample^0x8000) - 32768;
-        index = clampi((index^0x8000) - 32768, 0, al::size(IMAStep_size)-1);
+        index = clampi((index^0x8000) - 32768, 0, MaxStepIndex);
 
         if(skip == 0)
         {
@@ -312,7 +311,7 @@ inline void LoadSamples<FmtIMA4>(float *RESTRICT dstSamples, const al::byte *src
             sample = clampi(sample, -32768, 32767);
 
             index += IMA4Index_adjust[nibble];
-            index = clampi(index, 0, al::size(IMAStep_size)-1);
+            index = clampi(index, 0, MaxStepIndex);
 
             return sample;
         };
@@ -325,17 +324,17 @@ inline void LoadSamples<FmtIMA4>(float *RESTRICT dstSamples, const al::byte *src
          * always be less than the block size). They need to be decoded despite
          * being ignored for proper state on the remaining samples.
          */
-        const al::byte *nibbleData{src + (srcStep+srcChan)*4};
+        const std::byte *nibbleData{src + (srcStep+srcChan)*4};
         size_t nibbleOffset{0};
         const size_t startOffset{skip + 1};
         for(;skip;--skip)
         {
             const size_t byteShift{(nibbleOffset&1) * 4};
-            const size_t wordOffset{(nibbleOffset>>1) & ~size_t{3}};
+            const size_t wordOffset{(nibbleOffset>>1) & ~3_uz};
             const size_t byteOffset{wordOffset*srcStep + ((nibbleOffset>>1)&3u)};
             ++nibbleOffset;
 
-            std::ignore = decode_sample((nibbleData[byteOffset]>>byteShift) & 15u);
+            std::ignore = decode_sample(uint(nibbleData[byteOffset]>>byteShift) & 15u);
         }
 
         /* Second, decode the rest of the block and write to the output, until
@@ -345,11 +344,11 @@ inline void LoadSamples<FmtIMA4>(float *RESTRICT dstSamples, const al::byte *src
         for(size_t i{0};i < todo;++i)
         {
             const size_t byteShift{(nibbleOffset&1) * 4};
-            const size_t wordOffset{(nibbleOffset>>1) & ~size_t{3}};
+            const size_t wordOffset{(nibbleOffset>>1) & ~3_uz};
             const size_t byteOffset{wordOffset*srcStep + ((nibbleOffset>>1)&3u)};
             ++nibbleOffset;
 
-            const int result{decode_sample((nibbleData[byteOffset]>>byteShift) & 15u)};
+            const int result{decode_sample(uint(nibbleData[byteOffset]>>byteShift) & 15u)};
             dstSamples[wrote++] = static_cast<float>(result) / 32768.0f;
         }
         if(wrote == samplesToLoad)
@@ -360,7 +359,7 @@ inline void LoadSamples<FmtIMA4>(float *RESTRICT dstSamples, const al::byte *src
 }
 
 template<>
-inline void LoadSamples<FmtMSADPCM>(float *RESTRICT dstSamples, const al::byte *src,
+inline void LoadSamples<FmtMSADPCM>(float *RESTRICT dstSamples, const std::byte *src,
     const size_t srcChan, const size_t srcOffset, const size_t srcStep,
     const size_t samplesPerBlock, const size_t samplesToLoad) noexcept
 {
@@ -377,19 +376,19 @@ inline void LoadSamples<FmtMSADPCM>(float *RESTRICT dstSamples, const al::byte *
          * nibble sample value. This is followed by the two initial 16-bit
          * sample history values.
          */
-        const al::byte *input{src};
-        const uint8_t blockpred{std::min(input[srcChan], uint8_t{6})};
+        const std::byte *input{src};
+        const uint8_t blockpred{std::min(uint8_t(input[srcChan]), uint8_t{6})};
         input += srcStep;
-        int delta{input[2*srcChan + 0] | (input[2*srcChan + 1] << 8)};
+        int delta{int(input[2*srcChan + 0]) | (int(input[2*srcChan + 1]) << 8)};
         input += srcStep*2;
 
         int sampleHistory[2]{};
-        sampleHistory[0] = input[2*srcChan + 0] | (input[2*srcChan + 1]<<8);
+        sampleHistory[0] = int(input[2*srcChan + 0]) | (int(input[2*srcChan + 1])<<8);
         input += srcStep*2;
-        sampleHistory[1] = input[2*srcChan + 0] | (input[2*srcChan + 1]<<8);
+        sampleHistory[1] = int(input[2*srcChan + 0]) | (int(input[2*srcChan + 1])<<8);
         input += srcStep*2;
 
-        const auto coeffs = al::as_span(MSADPCMAdaptionCoeff[blockpred]);
+        const al::span coeffs{MSADPCMAdaptionCoeff[blockpred]};
         delta = (delta^0x8000) - 32768;
         sampleHistory[0] = (sampleHistory[0]^0x8000) - 32768;
         sampleHistory[1] = (sampleHistory[1]^0x8000) - 32768;
@@ -439,7 +438,7 @@ inline void LoadSamples<FmtMSADPCM>(float *RESTRICT dstSamples, const al::byte *
             const size_t byteShift{((nibbleOffset&1)^1) * 4};
             nibbleOffset += srcStep;
 
-            std::ignore = decode_sample((input[byteOffset]>>byteShift) & 15);
+            std::ignore = decode_sample(int(input[byteOffset]>>byteShift) & 15);
         }
 
         /* Now decode the rest of the block, until the end of the block or the
@@ -452,7 +451,7 @@ inline void LoadSamples<FmtMSADPCM>(float *RESTRICT dstSamples, const al::byte *
             const size_t byteShift{((nibbleOffset&1)^1) * 4};
             nibbleOffset += srcStep;
 
-            const int sample{decode_sample((input[byteOffset]>>byteShift) & 15)};
+            const int sample{decode_sample(int(input[byteOffset]>>byteShift) & 15)};
             dstSamples[wrote++] = static_cast<float>(sample) / 32768.0f;
         }
         if(wrote == samplesToLoad)
@@ -462,7 +461,7 @@ inline void LoadSamples<FmtMSADPCM>(float *RESTRICT dstSamples, const al::byte *
     } while(true);
 }
 
-void LoadSamples(float *dstSamples, const al::byte *src, const size_t srcChan,
+void LoadSamples(float *dstSamples, const std::byte *src, const size_t srcChan,
     const size_t srcOffset, const FmtType srcType, const size_t srcStep,
     const size_t samplesPerBlock, const size_t samplesToLoad) noexcept
 {
@@ -475,6 +474,7 @@ void LoadSamples(float *dstSamples, const al::byte *src, const size_t srcChan,
     {
     HANDLE_FMT(FmtUByte);
     HANDLE_FMT(FmtShort);
+    HANDLE_FMT(FmtInt);
     HANDLE_FMT(FmtFloat);
     HANDLE_FMT(FmtDouble);
     HANDLE_FMT(FmtMulaw);
@@ -798,7 +798,7 @@ void Voice::mix(const State vstate, ContextBase *Context, const nanoseconds devi
         using ResBufType = decltype(DeviceBase::mResampleData);
         static constexpr uint srcSizeMax{static_cast<uint>(ResBufType{}.size()-MaxResamplerEdge)};
 
-        const auto prevSamples = al::as_span(mPrevSamples[chan]);
+        const al::span prevSamples{mPrevSamples[chan]};
         const auto resampleBuffer = std::copy(prevSamples.cbegin(), prevSamples.cend(),
             Device->mResampleData.begin()) - MaxResamplerEdge;
         int intPos{DataPosInt};
@@ -1101,7 +1101,7 @@ void Voice::mix(const State vstate, ContextBase *Context, const nanoseconds devi
             {
                 const size_t byteOffset{blocksDone*mBytesPerBlock};
                 const size_t byteEnd{mNumCallbackBlocks*mBytesPerBlock};
-                al::byte *data{BufferListItem->mSamples};
+                std::byte *data{BufferListItem->mSamples};
                 std::copy(data+byteOffset, data+byteEnd, data);
                 mNumCallbackBlocks -= blocksDone;
                 mCallbackBlockBase += blocksDone;
@@ -1145,16 +1145,15 @@ void Voice::mix(const State vstate, ContextBase *Context, const nanoseconds devi
 
     /* Send any events now, after the position/buffer info was updated. */
     const auto enabledevt = Context->mEnabledEvts.load(std::memory_order_acquire);
-    if(buffers_done > 0 && enabledevt.test(AsyncEvent::BufferCompleted))
+    if(buffers_done > 0 && enabledevt.test(al::to_underlying(AsyncEnableBits::BufferCompleted)))
     {
         RingBuffer *ring{Context->mAsyncEvents.get()};
         auto evt_vec = ring->getWriteVector();
         if(evt_vec.first.len > 0)
         {
-            AsyncEvent *evt{al::construct_at(reinterpret_cast<AsyncEvent*>(evt_vec.first.buf),
-                AsyncEvent::BufferCompleted)};
-            evt->u.bufcomp.id = SourceID;
-            evt->u.bufcomp.count = buffers_done;
+            auto &evt = InitAsyncEvent<AsyncBufferCompleteEvent>(evt_vec.first.buf);
+            evt.mId = SourceID;
+            evt.mCount = buffers_done;
             ring->writeAdvance(1);
         }
     }
@@ -1165,7 +1164,7 @@ void Voice::mix(const State vstate, ContextBase *Context, const nanoseconds devi
          * ensures any residual noise fades to 0 amplitude.
          */
         mPlayState.store(Stopping, std::memory_order_release);
-        if(enabledevt.test(AsyncEvent::SourceStateChange))
+        if(enabledevt.test(al::to_underlying(AsyncEnableBits::SourceState)))
             SendSourceStoppedEvent(Context, SourceID);
     }
 }
@@ -1275,7 +1274,7 @@ void Voice::prepare(DeviceBase *device)
     else if(mAmbiOrder && device->mAmbiOrder > mAmbiOrder)
     {
         const uint8_t *OrderFromChan{Is2DAmbisonic(mFmtChannels) ?
-            AmbiIndex::OrderFrom2DChannel().data() : AmbiIndex::OrderFromChannel().data()};
+            AmbiIndex::OrderFrom2DChannel.data() : AmbiIndex::OrderFromChannel.data()};
         const auto scales = AmbiScale::GetHFOrderScales(mAmbiOrder, device->mAmbiOrder,
             device->m2DMixing);
 
diff --git a/core/voice.h b/core/voice.h
index 57ee7b01..a599eda8 100644
--- a/core/voice.h
+++ b/core/voice.h
@@ -5,13 +5,12 @@
 #include <atomic>
 #include <bitset>
 #include <chrono>
+#include <cstddef>
 #include <memory>
-#include <stddef.h>
+#include <optional>
 #include <string>
 
-#include "albyte.h"
 #include "almalloc.h"
-#include "aloptional.h"
 #include "alspan.h"
 #include "bufferline.h"
 #include "buffer_storage.h"
@@ -100,7 +99,7 @@ struct VoiceBufferItem {
     uint mLoopStart{0u};
     uint mLoopEnd{0u};
 
-    al::byte *mSamples{nullptr};
+    std::byte *mSamples{nullptr};
 };
 
 
@@ -270,7 +269,7 @@ struct Voice {
 
     void prepare(DeviceBase *device);
 
-    static void InitMixer(al::optional<std::string> resampler);
+    static void InitMixer(std::optional<std::string> resampler);
 
     DEF_NEWDEL(Voice)
 };