21 files changed, 562 insertions, 479 deletions
diff --git a/common/albit.h b/common/albit.h
index 82a4a00d..d54a189c 100644
--- a/common/albit.h
+++ b/common/albit.h
@@ -1,6 +1,7 @@
 #ifndef AL_BIT_H
 #define AL_BIT_H
 
+#include <array>
 #include <cstdint>
 #include <cstring>
 #include <limits>
@@ -17,9 +18,9 @@ std::enable_if_t<sizeof(To) == sizeof(From) && std::is_trivially_copyable_v<From
     && std::is_trivially_copyable_v<To>,
 To> bit_cast(const From &src) noexcept
 {
-    alignas(To) char dst[sizeof(To)];
-    std::memcpy(&dst[0], &src, sizeof(To));
-    return *std::launder(reinterpret_cast<To*>(&dst[0]));
+    alignas(To) std::array<char,sizeof(To)> dst;
+    std::memcpy(dst.data(), &src, sizeof(To));
+    return *std::launder(reinterpret_cast<To*>(dst.data()));
 }
 
 #ifdef __BYTE_ORDER__
diff --git a/common/alcomplex.cpp b/common/alcomplex.cpp
index 82a0c43c..f2de5f51 100644
--- a/common/alcomplex.cpp
+++ b/common/alcomplex.cpp
@@ -155,6 +155,8 @@ void complex_fft(const al::span<std::complex<double>> buffer, const double sign)
     }
     else
     {
+        assert(log2_size < 32);
+
         for(size_t idx{1u};idx < fftsize-1;++idx)
         {
             size_t revidx{idx};
diff --git a/common/almalloc.cpp b/common/almalloc.cpp
deleted file mode 100644
index ad1dc6be..00000000
--- a/common/almalloc.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-
-#include "config.h"
-
-#include "almalloc.h"
-
-#include <cassert>
-#include <cstddef>
-#include <cstdlib>
-#include <cstring>
-#include <memory>
-#ifdef HAVE_MALLOC_H
-#include <malloc.h>
-#endif
-
-
-void *al_malloc(size_t alignment, size_t size)
-{
-    assert((alignment & (alignment-1)) == 0);
-    alignment = std::max(alignment, alignof(std::max_align_t));
-
-#if defined(HAVE_POSIX_MEMALIGN)
-    void *ret{};
-    if(posix_memalign(&ret, alignment, size) == 0)
-        return ret;
-    return nullptr;
-#elif defined(HAVE__ALIGNED_MALLOC)
-    return _aligned_malloc(size, alignment);
-#else
-    size_t total_size{size + alignment-1 + sizeof(void*)};
-    void *base{std::malloc(total_size)};
-    if(base != nullptr)
-    {
-        void *aligned_ptr{static_cast<char*>(base) + sizeof(void*)};
-        total_size -= sizeof(void*);
-
-        std::align(alignment, size, aligned_ptr, total_size);
-        *(static_cast<void**>(aligned_ptr)-1) = base;
-        base = aligned_ptr;
-    }
-    return base;
-#endif
-}
-
-void *al_calloc(size_t alignment, size_t size)
-{
-    void *ret{al_malloc(alignment, size)};
-    if(ret) std::memset(ret, 0, size);
-    return ret;
-}
-
-void al_free(void *ptr) noexcept
-{
-#if defined(HAVE_POSIX_MEMALIGN)
-    std::free(ptr);
-#elif defined(HAVE__ALIGNED_MALLOC)
-    _aligned_free(ptr);
-#else
-    if(ptr != nullptr)
-        std::free(*(static_cast<void**>(ptr) - 1));
-#endif
-}
diff --git a/common/almalloc.h b/common/almalloc.h
index 873473ca..3b9965e6 100644
--- a/common/almalloc.h
+++ b/common/almalloc.h
@@ -13,39 +13,17 @@
 #include "pragmadefs.h"
 
 
-void al_free(void *ptr) noexcept;
-[[gnu::alloc_align(1), gnu::alloc_size(2), gnu::malloc]]
-void *al_malloc(size_t alignment, size_t size);
-[[gnu::alloc_align(1), gnu::alloc_size(2), gnu::malloc]]
-void *al_calloc(size_t alignment, size_t size);
+namespace gsl {
+template<typename T> using owner = T;
+};
 
 
-#define DISABLE_ALLOC()                                                       \
+#define DISABLE_ALLOC                                                         \
     void *operator new(size_t) = delete;                                      \
     void *operator new[](size_t) = delete;                                    \
     void operator delete(void*) noexcept = delete;                            \
     void operator delete[](void*) noexcept = delete;
 
-#define DEF_NEWDEL(T)                                                         \
-    void *operator new(size_t size)                                           \
-    {                                                                         \
-        static_assert(&operator new == &T::operator new,                      \
-            "Incorrect container type specified");                            \
-        if(void *ret{al_malloc(alignof(T), size)})                            \
-            return ret;                                                       \
-        throw std::bad_alloc();                                               \
-    }                                                                         \
-    void *operator new[](size_t size) { return operator new(size); }          \
-    void operator delete(void *block) noexcept { al_free(block); }            \
-    void operator delete[](void *block) noexcept { operator delete(block); }
-
-#define DEF_PLACE_NEWDEL()                                                    \
-    void *operator new(size_t /*size*/, void *ptr) noexcept { return ptr; }   \
-    void *operator new[](size_t /*size*/, void *ptr) noexcept { return ptr; } \
-    void operator delete(void *block, void*) noexcept { al_free(block); }     \
-    void operator delete(void *block) noexcept { al_free(block); }            \
-    void operator delete[](void *block, void*) noexcept { al_free(block); }   \
-    void operator delete[](void *block) noexcept { al_free(block); }
 
 enum FamCount : size_t { };
 
@@ -58,54 +36,59 @@ enum FamCount : size_t { };
             sizeof(T));                                                       \
     }                                                                         \
                                                                               \
-    void *operator new(size_t /*size*/, FamCount count)                       \
+    gsl::owner<void*> operator new(size_t /*size*/, FamCount count)           \
     {                                                                         \
-        if(void *ret{al_malloc(alignof(T), T::Sizeof(count))})                \
-            return ret;                                                       \
-        throw std::bad_alloc();                                               \
+        const auto alignment = std::align_val_t{alignof(T)};                  \
+        return ::operator new[](T::Sizeof(count), alignment);                 \
     }                                                                         \
+    void operator delete(gsl::owner<void*> block, FamCount) noexcept          \
+    { ::operator delete[](block, std::align_val_t{alignof(T)}); }             \
+    void operator delete(gsl::owner<void*> block) noexcept                    \
+    { ::operator delete[](block, std::align_val_t{alignof(T)}); }             \
     void *operator new[](size_t /*size*/) = delete;                           \
-    void operator delete(void *block, FamCount) { al_free(block); }           \
-    void operator delete(void *block) noexcept { al_free(block); }            \
     void operator delete[](void* /*block*/) = delete;
 
 
 namespace al {
 
-template<typename T, std::size_t Align=alignof(T)>
+template<typename T, std::size_t AlignV=alignof(T)>
 struct allocator {
-    static constexpr std::size_t alignment{std::max(Align, alignof(T))};
-
-    using value_type = T;
-    using reference = T&;
-    using const_reference = const T&;
-    using pointer = T*;
-    using const_pointer = const T*;
+    static constexpr auto Alignment = std::max(AlignV, alignof(T));
+    static constexpr auto AlignVal = std::align_val_t{Alignment};
+
+    using value_type = std::remove_cv_t<std::remove_reference_t<T>>;
+    using reference = value_type&;
+    using const_reference = const value_type&;
+    using pointer = value_type*;
+    using const_pointer = const value_type*;
     using size_type = std::size_t;
     using difference_type = std::ptrdiff_t;
     using is_always_equal = std::true_type;
 
-    template<typename U>
+    template<typename U, std::enable_if_t<alignof(U) <= Alignment,bool> = true>
     struct rebind {
-        using other = allocator<U, Align>;
+        using other = allocator<U,Alignment>;
     };
 
     constexpr explicit allocator() noexcept = default;
     template<typename U, std::size_t N>
-    constexpr explicit allocator(const allocator<U,N>&) noexcept { }
+    constexpr explicit allocator(const allocator<U,N>&) noexcept
+    { static_assert(Alignment == allocator<U,N>::Alignment); }
 
-    T *allocate(std::size_t n)
+    gsl::owner<T*> allocate(std::size_t n)
     {
         if(n > std::numeric_limits<std::size_t>::max()/sizeof(T)) throw std::bad_alloc();
-        if(auto p = al_malloc(alignment, n*sizeof(T))) return static_cast<T*>(p);
-        throw std::bad_alloc();
+        return static_cast<gsl::owner<T*>>(::operator new[](n*sizeof(T), AlignVal));
     }
-    void deallocate(T *p, std::size_t) noexcept { al_free(p); }
+    void deallocate(gsl::owner<T*> p, std::size_t) noexcept
+    { ::operator delete[](gsl::owner<void*>{p}, AlignVal); }
 };
 template<typename T, std::size_t N, typename U, std::size_t M>
-constexpr bool operator==(const allocator<T,N>&, const allocator<U,M>&) noexcept { return true; }
+constexpr bool operator==(const allocator<T,N>&, const allocator<U,M>&) noexcept
+{ return allocator<T,N>::Alignment == allocator<U,M>::Alignment; }
 template<typename T, std::size_t N, typename U, std::size_t M>
-constexpr bool operator!=(const allocator<T,N>&, const allocator<U,M>&) noexcept { return false; }
+constexpr bool operator!=(const allocator<T,N>&, const allocator<U,M>&) noexcept
+{ return allocator<T,N>::Alignment != allocator<U,M>::Alignment; }
 
 
 template<typename T>
@@ -124,124 +107,15 @@ constexpr auto to_address(const T &p) noexcept
 
 template<typename T, typename ...Args>
 constexpr T* construct_at(T *ptr, Args&& ...args)
-    noexcept(std::is_nothrow_constructible<T, Args...>::value)
-{ return ::new(static_cast<void*>(ptr)) T{std::forward<Args>(args)...}; }
-
-
-/* Storage for flexible array data. This is trivially destructible if type T is
- * trivially destructible.
- */
-template<typename T, size_t alignment, bool = std::is_trivially_destructible<T>::value>
-struct FlexArrayStorage {
-    const size_t mSize;
-    union {
-        char mDummy;
-        alignas(alignment) T mArray[1];
-    };
-
-    static constexpr size_t Sizeof(size_t count, size_t base=0u) noexcept
-    {
-        const size_t len{sizeof(T)*count};
-        return std::max(offsetof(FlexArrayStorage,mArray)+len, sizeof(FlexArrayStorage)) + base;
-    }
-
-    FlexArrayStorage(size_t size) : mSize{size}
-    { std::uninitialized_default_construct_n(mArray, mSize); }
-    ~FlexArrayStorage() = default;
-
-    FlexArrayStorage(const FlexArrayStorage&) = delete;
-    FlexArrayStorage& operator=(const FlexArrayStorage&) = delete;
-};
-
-template<typename T, size_t alignment>
-struct FlexArrayStorage<T,alignment,false> {
-    const size_t mSize;
-    union {
-        char mDummy;
-        alignas(alignment) T mArray[1];
-    };
-
-    static constexpr size_t Sizeof(size_t count, size_t base) noexcept
-    {
-        const size_t len{sizeof(T)*count};
-        return std::max(offsetof(FlexArrayStorage,mArray)+len, sizeof(FlexArrayStorage)) + base;
-    }
-
-    FlexArrayStorage(size_t size) : mSize{size}
-    { std::uninitialized_default_construct_n(mArray, mSize); }
-    ~FlexArrayStorage() { std::destroy_n(mArray, mSize); }
-
-    FlexArrayStorage(const FlexArrayStorage&) = delete;
-    FlexArrayStorage& operator=(const FlexArrayStorage&) = delete;
-};
-
-/* A flexible array type. Used either standalone or at the end of a parent
- * struct, with placement new, to have a run-time-sized array that's embedded
- * with its size.
- */
-template<typename T, size_t alignment=alignof(T)>
-struct FlexArray {
-    using element_type = T;
-    using value_type = std::remove_cv_t<T>;
-    using index_type = size_t;
-    using difference_type = ptrdiff_t;
-
-    using pointer = T*;
-    using const_pointer = const T*;
-    using reference = T&;
-    using const_reference = const T&;
-
-    using iterator = pointer;
-    using const_iterator = const_pointer;
-    using reverse_iterator = std::reverse_iterator<iterator>;
-    using const_reverse_iterator = std::reverse_iterator<const_iterator>;
-
-    using Storage_t_ = FlexArrayStorage<element_type,alignment>;
-
-    Storage_t_ mStore;
-
-    static constexpr index_type Sizeof(index_type count, index_type base=0u) noexcept
-    { return Storage_t_::Sizeof(count, base); }
-    static std::unique_ptr<FlexArray> Create(index_type count)
-    {
-        void *ptr{al_calloc(alignof(FlexArray), Sizeof(count))};
-        return std::unique_ptr<FlexArray>{al::construct_at(static_cast<FlexArray*>(ptr), count)};
-    }
-
-    FlexArray(index_type size) : mStore{size} { }
-    ~FlexArray() = default;
-
-    index_type size() const noexcept { return mStore.mSize; }
-    bool empty() const noexcept { return mStore.mSize == 0; }
-
-    pointer data() noexcept { return mStore.mArray; }
-    const_pointer data() const noexcept { return mStore.mArray; }
-
-    reference operator[](index_type i) noexcept { return mStore.mArray[i]; }
-    const_reference operator[](index_type i) const noexcept { return mStore.mArray[i]; }
-
-    reference front() noexcept { return mStore.mArray[0]; }
-    const_reference front() const noexcept { return mStore.mArray[0]; }
-
-    reference back() noexcept { return mStore.mArray[mStore.mSize-1]; }
-    const_reference back() const noexcept { return mStore.mArray[mStore.mSize-1]; }
-
-    iterator begin() noexcept { return mStore.mArray; }
-    const_iterator begin() const noexcept { return mStore.mArray; }
-    const_iterator cbegin() const noexcept { return mStore.mArray; }
-    iterator end() noexcept { return mStore.mArray + mStore.mSize; }
-    const_iterator end() const noexcept { return mStore.mArray + mStore.mSize; }
-    const_iterator cend() const noexcept { return mStore.mArray + mStore.mSize; }
-
-    reverse_iterator rbegin() noexcept { return end(); }
-    const_reverse_iterator rbegin() const noexcept { return end(); }
-    const_reverse_iterator crbegin() const noexcept { return cend(); }
-    reverse_iterator rend() noexcept { return begin(); }
-    const_reverse_iterator rend() const noexcept { return begin(); }
-    const_reverse_iterator crend() const noexcept { return cbegin(); }
-
-    DEF_PLACE_NEWDEL()
-};
+    noexcept(std::is_nothrow_constructible_v<T, Args...>)
+{
+    /* NOLINTBEGIN(cppcoreguidelines-owning-memory) construct_at doesn't
+     * necessarily handle the address from an owner, while placement new
+     * expects to.
+     */
+    return ::new(static_cast<void*>(ptr)) T{std::forward<Args>(args)...};
+    /* NOLINTEND(cppcoreguidelines-owning-memory) */
+}
 
 } // namespace al
 
diff --git a/common/alnumbers.h b/common/alnumbers.h
index e92d7b87..7abe6b32 100644
--- a/common/alnumbers.h
+++ b/common/alnumbers.h
@@ -3,9 +3,7 @@
 
 #include <utility>
 
-namespace al {
-
-namespace numbers {
+namespace al::numbers {
 
 namespace detail_ {
     template<typename T>
@@ -29,8 +27,6 @@ inline constexpr auto inv_pi = inv_pi_v<double>;
 inline constexpr auto sqrt2 = sqrt2_v<double>;
 inline constexpr auto sqrt3 = sqrt3_v<double>;
 
-} // namespace numbers
-
-} // namespace al
+} // namespace al::numbers
 
 #endif /* COMMON_ALNUMBERS_H */
diff --git a/common/alnumeric.h b/common/alnumeric.h
index 6281b012..cb8704b2 100644
--- a/common/alnumeric.h
+++ b/common/alnumeric.h
@@ -245,7 +245,7 @@ inline float fast_roundf(float f) noexcept
     /* Integral limit, where sub-integral precision is not available for
      * floats.
      */
-    static constexpr float ilim[2]{
+    static constexpr std::array ilim{
          8388608.0f /*  0x1.0p+23 */,
         -8388608.0f /* -0x1.0p+23 */
     };
diff --git a/common/alsem.h b/common/alsem.h
index 9f72d1c6..90b39319 100644
--- a/common/alsem.h
+++ b/common/alsem.h
@@ -24,7 +24,7 @@ class semaphore {
 #else
     using native_type = sem_t;
 #endif
-    native_type mSem;
+    native_type mSem{};
 
 public:
     semaphore(unsigned int initial=0);
diff --git a/common/alspan.h b/common/alspan.h
index 341ce7c8..822915da 100644
--- a/common/alspan.h
+++ b/common/alspan.h
@@ -5,6 +5,7 @@
 #include <cstddef>
 #include <initializer_list>
 #include <iterator>
+#include <stdexcept>
 #include <type_traits>
 
 #include "almalloc.h"
@@ -12,7 +13,7 @@
 
 namespace al {
 
-constexpr size_t dynamic_extent{static_cast<size_t>(-1)};
+inline constexpr size_t dynamic_extent{static_cast<size_t>(-1)};
 
 template<typename T, size_t E=dynamic_extent>
 class span;
@@ -23,31 +24,31 @@ namespace detail_ {
     template<typename T, size_t E>
     struct is_span_<span<T,E>> : std::true_type { };
     template<typename T>
-    constexpr bool is_span_v = is_span_<std::remove_cv_t<T>>::value;
+    inline constexpr bool is_span_v = is_span_<std::remove_cv_t<T>>::value;
 
     template<typename T>
     struct is_std_array_ : std::false_type { };
     template<typename T, size_t N>
     struct is_std_array_<std::array<T,N>> : std::true_type { };
     template<typename T>
-    constexpr bool is_std_array_v = is_std_array_<std::remove_cv_t<T>>::value;
+    inline constexpr bool is_std_array_v = is_std_array_<std::remove_cv_t<T>>::value;
 
     template<typename T, typename = void>
-    constexpr bool has_size_and_data = false;
+    inline constexpr bool has_size_and_data = false;
     template<typename T>
-    constexpr bool has_size_and_data<T,
+    inline constexpr bool has_size_and_data<T,
         std::void_t<decltype(std::size(std::declval<T>())),decltype(std::data(std::declval<T>()))>>
         = true;
 
     template<typename C>
-    constexpr bool is_valid_container_type = !is_span_v<C> && !is_std_array_v<C>
+    inline constexpr bool is_valid_container_type = !is_span_v<C> && !is_std_array_v<C>
         && !std::is_array<C>::value && has_size_and_data<C>;
 
     template<typename T, typename U>
-    constexpr bool is_array_compatible = std::is_convertible<T(*)[],U(*)[]>::value;
+    inline constexpr bool is_array_compatible = std::is_convertible<T(*)[],U(*)[]>::value; /* NOLINT(*-avoid-c-arrays) */
 
     template<typename C, typename T>
-    constexpr bool is_valid_container = is_valid_container_type<C>
+    inline constexpr bool is_valid_container = is_valid_container_type<C>
         && is_array_compatible<std::remove_pointer_t<decltype(std::data(std::declval<C&>()))>,T>;
 } // namespace detail_
 
@@ -79,9 +80,9 @@ public:
     constexpr explicit span(U iter, index_type) : mData{::al::to_address(iter)} { }
     template<typename U, typename V, REQUIRES(!std::is_convertible<V,size_t>::value)>
     constexpr explicit span(U first, V) : mData{::al::to_address(first)}
-    {}
+    { }
 
-    constexpr span(type_identity_t<element_type> (&arr)[E]) noexcept
+    constexpr span(type_identity_t<element_type> (&arr)[E]) noexcept /* NOLINT(*-avoid-c-arrays) */
         : span{std::data(arr), std::size(arr)}
     { }
     constexpr span(std::array<value_type,E> &arr) noexcept
@@ -107,43 +108,43 @@ public:
 
     constexpr span& operator=(const span &rhs) noexcept = default;
 
-    constexpr reference front() const { return *mData; }
-    constexpr reference back() const { return *(mData+E-1); }
-    constexpr reference operator[](index_type idx) const { return mData[idx]; }
-    constexpr pointer data() const noexcept { return mData; }
+    [[nodiscard]] constexpr auto front() const -> reference { return mData[0]; }
+    [[nodiscard]] constexpr auto back() const -> reference { return mData[E-1]; }
+    [[nodiscard]] constexpr auto operator[](index_type idx) const -> reference { return mData[idx]; }
+    [[nodiscard]] constexpr auto data() const noexcept -> pointer { return mData; }
 
-    constexpr index_type size() const noexcept { return E; }
-    constexpr index_type size_bytes() const noexcept { return E * sizeof(value_type); }
-    constexpr bool empty() const noexcept { return E == 0; }
+    [[nodiscard]] constexpr auto size() const noexcept -> index_type { return E; }
+    [[nodiscard]] constexpr auto size_bytes() const noexcept -> index_type { return E * sizeof(value_type); }
+    [[nodiscard]] constexpr auto empty() const noexcept -> bool { return E == 0; }
 
-    constexpr iterator begin() const noexcept { return mData; }
-    constexpr iterator end() const noexcept { return mData+E; }
-    constexpr const_iterator cbegin() const noexcept { return mData; }
-    constexpr const_iterator cend() const noexcept { return mData+E; }
+    [[nodiscard]] constexpr auto begin() const noexcept -> iterator { return mData; }
+    [[nodiscard]] constexpr auto end() const noexcept -> iterator { return mData+E; }
+    [[nodiscard]] constexpr auto cbegin() const noexcept -> const_iterator { return mData; }
+    [[nodiscard]] constexpr auto cend() const noexcept -> const_iterator { return mData+E; }
 
-    constexpr reverse_iterator rbegin() const noexcept { return reverse_iterator{end()}; }
-    constexpr reverse_iterator rend() const noexcept { return reverse_iterator{begin()}; }
-    constexpr const_reverse_iterator crbegin() const noexcept
+    [[nodiscard]] constexpr auto rbegin() const noexcept -> reverse_iterator { return reverse_iterator{end()}; }
+    [[nodiscard]] constexpr auto rend() const noexcept -> reverse_iterator { return reverse_iterator{begin()}; }
+    [[nodiscard]] constexpr auto crbegin() const noexcept -> const_reverse_iterator
     { return const_reverse_iterator{cend()}; }
-    constexpr const_reverse_iterator crend() const noexcept
+    [[nodiscard]] constexpr auto crend() const noexcept -> const_reverse_iterator
     { return const_reverse_iterator{cbegin()}; }
 
     template<size_t C>
-    constexpr span<element_type,C> first() const
+    [[nodiscard]] constexpr auto first() const -> span<element_type,C>
     {
         static_assert(E >= C, "New size exceeds original capacity");
         return span<element_type,C>{mData, C};
     }
 
     template<size_t C>
-    constexpr span<element_type,C> last() const
+    [[nodiscard]] constexpr auto last() const -> span<element_type,C>
     {
         static_assert(E >= C, "New size exceeds original capacity");
         return span<element_type,C>{mData+(E-C), C};
     }
 
     template<size_t O, size_t C>
-    constexpr auto subspan() const -> std::enable_if_t<C!=dynamic_extent,span<element_type,C>>
+    [[nodiscard]] constexpr auto subspan() const -> std::enable_if_t<C!=dynamic_extent,span<element_type,C>>
     {
         static_assert(E >= O, "Offset exceeds extent");
         static_assert(E-O >= C, "New size exceeds original capacity");
@@ -151,7 +152,7 @@ public:
     }
 
     template<size_t O, size_t C=dynamic_extent>
-    constexpr auto subspan() const -> std::enable_if_t<C==dynamic_extent,span<element_type,E-O>>
+    [[nodiscard]] constexpr auto subspan() const -> std::enable_if_t<C==dynamic_extent,span<element_type,E-O>>
     {
         static_assert(E >= O, "Offset exceeds extent");
         return span<element_type,E-O>{mData+O, E-O};
@@ -161,10 +162,10 @@ public:
      * defining the specialization. As a result, these methods need to be
      * defined later.
      */
-    constexpr span<element_type,dynamic_extent> first(size_t count) const;
-    constexpr span<element_type,dynamic_extent> last(size_t count) const;
-    constexpr span<element_type,dynamic_extent> subspan(size_t offset,
-        size_t count=dynamic_extent) const;
+    [[nodiscard]] constexpr auto first(size_t count) const -> span<element_type,dynamic_extent>;
+    [[nodiscard]] constexpr auto last(size_t count) const -> span<element_type,dynamic_extent>;
+    [[nodiscard]] constexpr auto subspan(size_t offset,
+        size_t count=dynamic_extent) const -> span<element_type,dynamic_extent>;
 
 private:
     pointer mData{nullptr};
@@ -192,14 +193,14 @@ public:
 
     constexpr span() noexcept = default;
     template<typename U>
-    constexpr span(U iter, index_type count) : mData{::al::to_address(iter)}, mDataEnd{::al::to_address(iter) + count}
+    constexpr span(U iter, index_type count) : mData{::al::to_address(iter)}, mDataLength{count}
     { }
     template<typename U, typename V, REQUIRES(!std::is_convertible<V,size_t>::value)>
     constexpr span(U first, V last) : span{::al::to_address(first), static_cast<size_t>(last - first)}
     { }
 
     template<size_t N>
-    constexpr span(type_identity_t<element_type> (&arr)[N]) noexcept
+    constexpr span(type_identity_t<element_type> (&arr)[N]) noexcept /* NOLINT(*-avoid-c-arrays) */
         : span{std::data(arr), std::size(arr)}
     { }
     template<size_t N>
@@ -221,83 +222,122 @@ public:
 
     constexpr span& operator=(const span &rhs) noexcept = default;
 
-    constexpr reference front() const { return *mData; }
-    constexpr reference back() const { return *(mDataEnd-1); }
-    constexpr reference operator[](index_type idx) const { return mData[idx]; }
-    constexpr pointer data() const noexcept { return mData; }
+    [[nodiscard]] constexpr auto front() const -> reference { return mData[0]; }
+    [[nodiscard]] constexpr auto back() const -> reference { return mData[mDataLength-1]; }
+    [[nodiscard]] constexpr auto operator[](index_type idx) const -> reference { return mData[idx]; }
+    [[nodiscard]] constexpr auto data() const noexcept -> pointer { return mData; }
 
-    constexpr index_type size() const noexcept { return static_cast<index_type>(mDataEnd-mData); }
-    constexpr index_type size_bytes() const noexcept
-    { return static_cast<index_type>(mDataEnd-mData) * sizeof(value_type); }
-    constexpr bool empty() const noexcept { return mData == mDataEnd; }
+    [[nodiscard]] constexpr auto size() const noexcept -> index_type { return mDataLength; }
+    [[nodiscard]] constexpr auto size_bytes() const noexcept -> index_type { return mDataLength * sizeof(value_type); }
+    [[nodiscard]] constexpr auto empty() const noexcept -> bool { return mDataLength == 0; }
 
-    constexpr iterator begin() const noexcept { return mData; }
-    constexpr iterator end() const noexcept { return mDataEnd; }
-    constexpr const_iterator cbegin() const noexcept { return mData; }
-    constexpr const_iterator cend() const noexcept { return mDataEnd; }
+    [[nodiscard]] constexpr auto begin() const noexcept -> iterator { return mData; }
+    [[nodiscard]] constexpr auto end() const noexcept -> iterator { return mData+mDataLength; }
+    [[nodiscard]] constexpr auto cbegin() const noexcept -> const_iterator { return mData; }
+    [[nodiscard]] constexpr auto cend() const noexcept -> const_iterator { return mData+mDataLength; }
 
-    constexpr reverse_iterator rbegin() const noexcept { return reverse_iterator{end()}; }
-    constexpr reverse_iterator rend() const noexcept { return reverse_iterator{begin()}; }
-    constexpr const_reverse_iterator crbegin() const noexcept
+    [[nodiscard]] constexpr auto rbegin() const noexcept -> reverse_iterator { return reverse_iterator{end()}; }
+    [[nodiscard]] constexpr auto rend() const noexcept -> reverse_iterator { return reverse_iterator{begin()}; }
+    [[nodiscard]] constexpr auto crbegin() const noexcept -> const_reverse_iterator
     { return const_reverse_iterator{cend()}; }
-    constexpr const_reverse_iterator crend() const noexcept
+    [[nodiscard]] constexpr auto crend() const noexcept -> const_reverse_iterator
     { return const_reverse_iterator{cbegin()}; }
 
     template<size_t C>
-    constexpr span<element_type,C> first() const
-    { return span<element_type,C>{mData, C}; }
+    [[nodiscard]] constexpr auto first() const -> span<element_type,C>
+    {
+        if(C > mDataLength)
+            throw std::out_of_range{"Subspan count out of range"};
+        return span<element_type,C>{mData, C};
+    }
 
-    constexpr span first(size_t count) const
-    { return (count >= size()) ? *this : span{mData, mData+count}; }
+    [[nodiscard]] constexpr auto first(size_t count) const -> span
+    {
+        if(count > mDataLength)
+            throw std::out_of_range{"Subspan count out of range"};
+        return span{mData, count};
+    }
 
     template<size_t C>
-    constexpr span<element_type,C> last() const
-    { return span<element_type,C>{mDataEnd-C, C}; }
+    [[nodiscard]] constexpr auto last() const -> span<element_type,C>
+    {
+        if(C > mDataLength)
+            throw std::out_of_range{"Subspan count out of range"};
+        return span<element_type,C>{mData+mDataLength-C, C};
+    }
 
-    constexpr span last(size_t count) const
-    { return (count >= size()) ? *this : span{mDataEnd-count, mDataEnd}; }
+    [[nodiscard]] constexpr auto last(size_t count) const -> span
+    {
+        if(count > mDataLength)
+            throw std::out_of_range{"Subspan count out of range"};
+        return span{mData+mDataLength-count, count};
+    }
 
     template<size_t O, size_t C>
-    constexpr auto subspan() const -> std::enable_if_t<C!=dynamic_extent,span<element_type,C>>
-    { return span<element_type,C>{mData+O, C}; }
+    [[nodiscard]] constexpr auto subspan() const -> std::enable_if_t<C!=dynamic_extent,span<element_type,C>>
+    {
+        if(O > mDataLength)
+            throw std::out_of_range{"Subspan offset out of range"};
+        if(C > mDataLength-O)
+            throw std::out_of_range{"Subspan length out of range"};
+        return span<element_type,C>{mData+O, C};
+    }
 
     template<size_t O, size_t C=dynamic_extent>
-    constexpr auto subspan() const -> std::enable_if_t<C==dynamic_extent,span<element_type,C>>
-    { return span<element_type,C>{mData+O, mDataEnd}; }
+    [[nodiscard]] constexpr auto subspan() const -> std::enable_if_t<C==dynamic_extent,span<element_type,C>>
+    {
+        if(O > mDataLength)
+            throw std::out_of_range{"Subspan offset out of range"};
+        return span<element_type,C>{mData+O, mDataLength-O};
+    }
 
-    constexpr span subspan(size_t offset, size_t count=dynamic_extent) const
+    [[nodiscard]] constexpr auto subspan(size_t offset, size_t count=dynamic_extent) const -> span
     {
-        return (offset > size()) ? span{} :
-            (count >= size()-offset) ? span{mData+offset, mDataEnd} :
-            span{mData+offset, mData+offset+count};
+        if(offset > mDataLength)
+            throw std::out_of_range{"Subspan offset out of range"};
+        if(count != dynamic_extent)
+        {
+            if(count > mDataLength-offset)
+                throw std::out_of_range{"Subspan length out of range"};
+            return span{mData+offset, count};
+        }
+        return span{mData+offset, mDataLength-offset};
     }
 
 private:
     pointer mData{nullptr};
-    pointer mDataEnd{nullptr};
+    index_type mDataLength{0};
 };
 
 template<typename T, size_t E>
-constexpr inline auto span<T,E>::first(size_t count) const -> span<element_type,dynamic_extent>
+[[nodiscard]] constexpr inline auto span<T,E>::first(size_t count) const -> span<element_type,dynamic_extent>
 {
-    return (count >= size()) ? span<element_type>{mData, extent} :
-        span<element_type>{mData, count};
+    if(count > size())
+        throw std::out_of_range{"Subspan count out of range"};
+    return span<element_type>{mData, count};
 }
 
 template<typename T, size_t E>
-constexpr inline auto span<T,E>::last(size_t count) const -> span<element_type,dynamic_extent>
+[[nodiscard]] constexpr inline auto span<T,E>::last(size_t count) const -> span<element_type,dynamic_extent>
 {
-    return (count >= size()) ? span<element_type>{mData, extent} :
-        span<element_type>{mData+extent-count, count};
+    if(count > size())
+        throw std::out_of_range{"Subspan count out of range"};
+    return span<element_type>{mData+size()-count, count};
 }
 
 template<typename T, size_t E>
-constexpr inline auto span<T,E>::subspan(size_t offset, size_t count) const
+[[nodiscard]] constexpr inline auto span<T,E>::subspan(size_t offset, size_t count) const
     -> span<element_type,dynamic_extent>
 {
-    return (offset > size()) ? span<element_type>{} :
-        (count >= size()-offset) ? span<element_type>{mData+offset, mData+extent} :
-        span<element_type>{mData+offset, mData+offset+count};
+    if(offset > size())
+        throw std::out_of_range{"Subspan offset out of range"};
+    if(count != dynamic_extent)
+    {
+        if(count > size()-offset)
+            throw std::out_of_range{"Subspan length out of range"};
+        return span{mData+offset, count};
+    }
+    return span{mData+offset, size()-offset};
 }
 
 
@@ -305,7 +345,7 @@ template<typename T, typename EndOrSize>
 span(T, EndOrSize) -> span<std::remove_reference_t<decltype(*std::declval<T&>())>>;
 
 template<typename T, std::size_t N>
-span(T (&)[N]) -> span<T, N>;
+span(T (&)[N]) -> span<T, N>; /* NOLINT(*-avoid-c-arrays) */
 
 template<typename T, std::size_t N>
 span(std::array<T, N>&) -> span<T, N>;
diff --git a/common/althrd_setname.cpp b/common/althrd_setname.cpp
index 22d33092..21197ba0 100644
--- a/common/althrd_setname.cpp
+++ b/common/althrd_setname.cpp
@@ -60,7 +60,7 @@ using setname_t4 = int(*)(pthread_t, const char*, void*);
 { func(pthread_self(), name); }
 
 [[maybe_unused]] void setname_caller(setname_t4 func, const char *name)
-{ func(pthread_self(), "%s", static_cast<void*>(const_cast<char*>(name))); }
+{ func(pthread_self(), "%s", const_cast<char*>(name)); /* NOLINT(*-const-cast) */ }
 
 } // namespace
 
diff --git a/common/atomic.h b/common/atomic.h
index 5e9b04c6..e85c4f76 100644
--- a/common/atomic.h
+++ b/common/atomic.h
@@ -2,17 +2,16 @@
 #define AL_ATOMIC_H
 
 #include <atomic>
+#include <memory>
 
+#include "almalloc.h"
 
-using RefCount = std::atomic<unsigned int>;
-
-inline void InitRef(RefCount &ref, unsigned int value)
-{ ref.store(value, std::memory_order_relaxed); }
-inline unsigned int ReadRef(RefCount &ref)
-{ return ref.load(std::memory_order_acquire); }
-inline unsigned int IncrementRef(RefCount &ref)
+template<typename T>
+auto IncrementRef(std::atomic<T> &ref) noexcept
 { return ref.fetch_add(1u, std::memory_order_acq_rel)+1u; }
-inline unsigned int DecrementRef(RefCount &ref)
+
+template<typename T>
+auto DecrementRef(std::atomic<T> &ref) noexcept
 { return ref.fetch_sub(1u, std::memory_order_acq_rel)-1u; }
 
 
@@ -30,4 +29,75 @@ inline void AtomicReplaceHead(std::atomic<T> &head, T newhead)
             std::memory_order_acq_rel, std::memory_order_acquire));
 }
 
+namespace al {
+
+template<typename T, typename D=std::default_delete<T>>
+class atomic_unique_ptr {
+    std::atomic<gsl::owner<T*>> mPointer{};
+
+    using unique_ptr_t = std::unique_ptr<T,D>;
+
+public:
+    atomic_unique_ptr() = default;
+    atomic_unique_ptr(const atomic_unique_ptr&) = delete;
+    explicit atomic_unique_ptr(std::nullptr_t) noexcept { }
+    explicit atomic_unique_ptr(gsl::owner<T*> ptr) noexcept : mPointer{ptr} { }
+    explicit atomic_unique_ptr(unique_ptr_t&& rhs) noexcept : mPointer{rhs.release()} { }
+    ~atomic_unique_ptr()
+    {
+        if(auto ptr = mPointer.exchange(nullptr, std::memory_order_relaxed))
+            D{}(ptr);
+    }
+
+    auto operator=(const atomic_unique_ptr&) -> atomic_unique_ptr& = delete;
+    auto operator=(std::nullptr_t) noexcept -> atomic_unique_ptr&
+    {
+        if(auto ptr = mPointer.exchange(nullptr))
+            D{}(ptr);
+        return *this;
+    }
+    auto operator=(unique_ptr_t&& rhs) noexcept -> atomic_unique_ptr&
+    {
+        if(auto ptr = mPointer.exchange(rhs.release()))
+            D{}(ptr);
+        return *this;
+    }
+
+    [[nodiscard]]
+    auto load(std::memory_order m=std::memory_order_seq_cst) const noexcept -> T*
+    { return mPointer.load(m); }
+    void store(std::nullptr_t, std::memory_order m=std::memory_order_seq_cst) noexcept
+    {
+        if(auto oldptr = mPointer.exchange(nullptr, m))
+            D{}(oldptr);
+    }
+    void store(gsl::owner<T*> ptr, std::memory_order m=std::memory_order_seq_cst) noexcept
+    {
+        if(auto oldptr = mPointer.exchange(ptr, m))
+            D{}(oldptr);
+    }
+    void store(unique_ptr_t&& ptr, std::memory_order m=std::memory_order_seq_cst) noexcept
+    {
+        if(auto oldptr = mPointer.exchange(ptr.release(), m))
+            D{}(oldptr);
+    }
+
+    [[nodiscard]]
+    auto exchange(std::nullptr_t, std::memory_order m=std::memory_order_seq_cst) noexcept -> unique_ptr_t
+    { return unique_ptr_t{mPointer.exchange(nullptr, m)}; }
+    [[nodiscard]]
+    auto exchange(gsl::owner<T*> ptr, std::memory_order m=std::memory_order_seq_cst) noexcept -> unique_ptr_t
+    { return unique_ptr_t{mPointer.exchange(ptr, m)}; }
+    [[nodiscard]]
+    auto exchange(std::unique_ptr<T>&& ptr, std::memory_order m=std::memory_order_seq_cst) noexcept -> unique_ptr_t
+    { return unique_ptr_t{mPointer.exchange(ptr.release(), m)}; }
+
+    [[nodiscard]]
+    auto is_lock_free() const noexcept -> bool { return mPointer.is_lock_free(); }
+
+    static constexpr auto is_always_lock_free = std::atomic<gsl::owner<T*>>::is_always_lock_free;
+};
+
+} // namespace al
+
 #endif /* AL_ATOMIC_H */
diff --git a/common/dynload.cpp b/common/dynload.cpp
index 86c36e00..333a9435 100644
--- a/common/dynload.cpp
+++ b/common/dynload.cpp
@@ -3,13 +3,12 @@
 
 #include "dynload.h"
 
-#include "albit.h"
-#include "strutils.h"
-
 #ifdef _WIN32
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
 
+#include "strutils.h"
+
 void *LoadLib(const char *name)
 {
     std::wstring wname{utf8_to_wstr(name)};
@@ -18,7 +17,7 @@ void *LoadLib(const char *name)
 void CloseLib(void *handle)
 { FreeLibrary(static_cast<HMODULE>(handle)); }
 void *GetSymbol(void *handle, const char *name)
-{ return al::bit_cast<void*>(GetProcAddress(static_cast<HMODULE>(handle), name)); }
+{ return reinterpret_cast<void*>(GetProcAddress(static_cast<HMODULE>(handle), name)); }
 
 #elif defined(HAVE_DLFCN_H)
 
diff --git a/common/flexarray.h b/common/flexarray.h
new file mode 100644
index 00000000..b8077988
--- /dev/null
+++ b/common/flexarray.h
@@ -0,0 +1,125 @@
+#ifndef AL_FLEXARRAY_H
+#define AL_FLEXARRAY_H
+
+#include <algorithm>
+#include <cstddef>
+#include <stdexcept>
+#include <type_traits>
+
+#include "almalloc.h"
+#include "alspan.h"
+
+namespace al {
+
+/* Storage for flexible array data. This is trivially destructible if type T is
+ * trivially destructible.
+ */
+template<typename T, size_t alignment, bool = std::is_trivially_destructible<T>::value>
+struct alignas(std::max(alignment, alignof(al::span<T>))) FlexArrayStorage : al::span<T> {
+    static constexpr size_t Sizeof(size_t count, size_t base=0u) noexcept
+    { return sizeof(FlexArrayStorage) + sizeof(T)*count + base; }
+
+    FlexArrayStorage(size_t size) noexcept(std::is_nothrow_constructible_v<T>)
+        : al::span<T>{::new(static_cast<void*>(this+1)) T[size], size}
+    { }
+    ~FlexArrayStorage() = default;
+
+    FlexArrayStorage(const FlexArrayStorage&) = delete;
+    FlexArrayStorage& operator=(const FlexArrayStorage&) = delete;
+};
+
+template<typename T, size_t alignment>
+struct alignas(std::max(alignment, alignof(al::span<T>))) FlexArrayStorage<T,alignment,false> : al::span<T> {
+    static constexpr size_t Sizeof(size_t count, size_t base=0u) noexcept
+    { return sizeof(FlexArrayStorage) + sizeof(T)*count + base; }
+
+    FlexArrayStorage(size_t size) noexcept(std::is_nothrow_constructible_v<T>)
+        : al::span<T>{::new(static_cast<void*>(this+1)) T[size], size}
+    { }
+    ~FlexArrayStorage() { std::destroy(this->begin(), this->end()); }
+
+    FlexArrayStorage(const FlexArrayStorage&) = delete;
+    FlexArrayStorage& operator=(const FlexArrayStorage&) = delete;
+};
+
+/* A flexible array type. Used either standalone or at the end of a parent
+ * struct, to have a run-time-sized array that's embedded with its size. Should
+ * be used delicately, ensuring there's no additional data after the FlexArray
+ * member.
+ */
+template<typename T, size_t Align=alignof(T)>
+struct FlexArray {
+    using element_type = T;
+    using value_type = std::remove_cv_t<T>;
+    using index_type = size_t;
+    using difference_type = ptrdiff_t;
+
+    using pointer = T*;
+    using const_pointer = const T*;
+    using reference = T&;
+    using const_reference = const T&;
+
+    using iterator = pointer;
+    using const_iterator = const_pointer;
+    using reverse_iterator = std::reverse_iterator<iterator>;
+    using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+
+    static constexpr size_t alignment{std::max(alignof(T), Align)};
+    using Storage_t_ = FlexArrayStorage<element_type,alignment>;
+
+    const Storage_t_ mStore;
+
+    static constexpr index_type Sizeof(index_type count, index_type base=0u) noexcept
+    { return Storage_t_::Sizeof(count, base); }
+    static std::unique_ptr<FlexArray> Create(index_type count)
+    { return std::unique_ptr<FlexArray>{new(FamCount{count}) FlexArray{count}}; }
+
+    FlexArray(index_type size) noexcept(std::is_nothrow_constructible_v<Storage_t_,index_type>)
+        : mStore{size}
+    { }
+    ~FlexArray() = default;
+
+    [[nodiscard]] auto size() const noexcept -> index_type { return mStore.size(); }
+    [[nodiscard]] auto empty() const noexcept -> bool { return mStore.empty(); }
+
+    [[nodiscard]] auto data() noexcept -> pointer { return mStore.data(); }
+    [[nodiscard]] auto data() const noexcept -> const_pointer { return mStore.data(); }
+
+    [[nodiscard]] auto operator[](index_type i) noexcept -> reference { return mStore[i]; }
+    [[nodiscard]] auto operator[](index_type i) const noexcept -> const_reference { return mStore[i]; }
+
+    [[nodiscard]] auto front() noexcept -> reference { return mStore.front(); }
+    [[nodiscard]] auto front() const noexcept -> const_reference { return mStore.front(); }
+
+    [[nodiscard]] auto back() noexcept -> reference { return mStore.back(); }
+    [[nodiscard]] auto back() const noexcept -> const_reference { return mStore.back(); }
+
+    [[nodiscard]] auto begin() noexcept -> iterator { return mStore.begin(); }
+    [[nodiscard]] auto begin() const noexcept -> const_iterator { return mStore.cbegin(); }
+    [[nodiscard]] auto cbegin() const noexcept -> const_iterator { return mStore.cbegin(); }
+    [[nodiscard]] auto end() noexcept -> iterator { return mStore.end(); }
+    [[nodiscard]] auto end() const noexcept -> const_iterator { return mStore.cend(); }
+    [[nodiscard]] auto cend() const noexcept -> const_iterator { return mStore.cend(); }
+
+    [[nodiscard]] auto rbegin() noexcept -> reverse_iterator { return end(); }
+    [[nodiscard]] auto rbegin() const noexcept -> const_reverse_iterator { return cend(); }
+    [[nodiscard]] auto crbegin() const noexcept -> const_reverse_iterator { return cend(); }
+    [[nodiscard]] auto rend() noexcept -> reverse_iterator { return begin(); }
+    [[nodiscard]] auto rend() const noexcept -> const_reverse_iterator { return cbegin(); }
+    [[nodiscard]] auto crend() const noexcept -> const_reverse_iterator { return cbegin(); }
+
+    gsl::owner<void*> operator new(size_t, FamCount count)
+    { return ::operator new[](Sizeof(count), std::align_val_t{alignof(FlexArray)}); }
+    void operator delete(gsl::owner<void*> block, FamCount) noexcept
+    { ::operator delete[](block, std::align_val_t{alignof(FlexArray)}); }
+    void operator delete(gsl::owner<void*> block) noexcept
+    { ::operator delete[](block, std::align_val_t{alignof(FlexArray)}); }
+
+    void *operator new(size_t size) = delete;
+    void *operator new[](size_t size) = delete;
+    void operator delete[](void *block) = delete;
+};
+
+} // namespace al
+
+#endif /* AL_FLEXARRAY_H */
diff --git a/common/intrusive_ptr.h b/common/intrusive_ptr.h
index 27075347..0152b92a 100644
--- a/common/intrusive_ptr.h
+++ b/common/intrusive_ptr.h
@@ -11,7 +11,7 @@ namespace al {
 
 template<typename T>
 class intrusive_ref {
-    RefCount mRef{1u};
+    std::atomic<unsigned int> mRef{1u};
 
 public:
     unsigned int add_ref() noexcept { return IncrementRef(mRef); }
@@ -81,9 +81,9 @@ public:
 
     explicit operator bool() const noexcept { return mPtr != nullptr; }
 
-    T& operator*() const noexcept { return *mPtr; }
-    T* operator->() const noexcept { return mPtr; }
-    T* get() const noexcept { return mPtr; }
+    [[nodiscard]] auto operator*() const noexcept -> T& { return *mPtr; }
+    [[nodiscard]] auto operator->() const noexcept -> T* { return mPtr; }
+    [[nodiscard]] auto get() const noexcept -> T* { return mPtr; }
 
     void reset(T *ptr=nullptr) noexcept
     {
diff --git a/common/opthelpers.h b/common/opthelpers.h
index dc43ccdb..ae2611da 100644
--- a/common/opthelpers.h
+++ b/common/opthelpers.h
@@ -42,7 +42,7 @@
 #elif HAS_BUILTIN(__builtin_unreachable)
 #define ASSUME(x) do { if(x) break; __builtin_unreachable(); } while(0)
 #else
-#define ASSUME(x) ((void)0)
+#define ASSUME(x) (static_cast<void>(0))
 #endif
 
 /* This shouldn't be needed since unknown attributes are ignored, but older
diff --git a/common/pffft.cpp b/common/pffft.cpp
index 71f71fa6..46d97918 100644
--- a/common/pffft.cpp
+++ b/common/pffft.cpp
@@ -58,16 +58,17 @@
 #include "pffft.h"
 
 #include <array>
-#include <assert.h>
+#include <cassert>
 #include <cmath>
+#include <cstdio>
+#include <cstdlib>
 #include <cstring>
-#include <stdio.h>
-#include <stdlib.h>
 #include <vector>
 
 #include "albit.h"
 #include "almalloc.h"
 #include "alnumbers.h"
+#include "alnumeric.h"
 #include "alspan.h"
 #include "opthelpers.h"
 
@@ -90,8 +91,8 @@ using uint = unsigned int;
  * Altivec support macros
  */
 #if defined(__ppc__) || defined(__ppc64__) || defined(__powerpc__) || defined(__powerpc64__)
-typedef vector float v4sf;
-#define SIMD_SZ 4
+using v4sf = vector float;
+constexpr uint SimdSize{4};
 #define VZERO() ((vector float) vec_splat_u8(0))
 #define VMUL(a,b) vec_madd(a,b, VZERO())
 #define VADD vec_add
@@ -142,19 +143,27 @@ force_inline void vtranspose4(v4sf &x0, v4sf &x1, v4sf &x2, v4sf &x3) noexcept
     (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
 
 #include <xmmintrin.h>
-typedef __m128 v4sf;
-#define SIMD_SZ 4 // 4 floats by simd vector -- this is pretty much hardcoded in the preprocess/finalize functions anyway so you will have to work if you want to enable AVX with its 256-bit vectors.
+using v4sf = __m128;
+/* 4 floats by simd vector -- this is pretty much hardcoded in the preprocess/
+ * finalize functions anyway so you will have to work if you want to enable AVX
+ * with its 256-bit vectors.
+ */
+constexpr uint SimdSize{4};
 #define VZERO _mm_setzero_ps
 #define VMUL _mm_mul_ps
 #define VADD _mm_add_ps
-#define VMADD(a,b,c) _mm_add_ps(_mm_mul_ps(a,b), c)
+force_inline v4sf vmadd(const v4sf a, const v4sf b, const v4sf c) noexcept
+{ return _mm_add_ps(_mm_mul_ps(a,b), c); }
+#define VMADD vmadd
 #define VSUB _mm_sub_ps
 #define LD_PS1 _mm_set1_ps
 #define VSET4 _mm_setr_ps
-#define VINSERT0(v, a) _mm_move_ss((v), _mm_set_ss(a))
+force_inline v4sf vinsert0(const v4sf v, const float a) noexcept
+{ return _mm_move_ss(v, _mm_set_ss(a)); }
+#define VINSERT0 vinsert0
 #define VEXTRACT0 _mm_cvtss_f32
 
-force_inline void interleave2(v4sf in1, v4sf in2, v4sf &out1, v4sf &out2) noexcept
+force_inline void interleave2(const v4sf in1, const v4sf in2, v4sf &out1, v4sf &out2) noexcept
 {
     v4sf tmp{_mm_unpacklo_ps(in1, in2)};
     out2 = _mm_unpackhi_ps(in1, in2);
@@ -170,7 +179,7 @@ force_inline void uninterleave2(v4sf in1, v4sf in2, v4sf &out1, v4sf &out2) noex
 force_inline void vtranspose4(v4sf &x0, v4sf &x1, v4sf &x2, v4sf &x3) noexcept
 { _MM_TRANSPOSE4_PS(x0, x1, x2, x3); }
 
-#define VSWAPHL(a,b) _mm_shuffle_ps(b, a, _MM_SHUFFLE(3,2,1,0))
+#define VSWAPHL(a,b) _mm_shuffle_ps((b), (a), _MM_SHUFFLE(3,2,1,0))
 
 /*
  * ARM NEON support macros
@@ -178,8 +187,8 @@ force_inline void vtranspose4(v4sf &x0, v4sf &x1, v4sf &x2, v4sf &x3) noexcept
 #elif defined(__ARM_NEON) || defined(__aarch64__) || defined(__arm64)
 
 #include <arm_neon.h>
-typedef float32x4_t v4sf;
-#define SIMD_SZ 4
+using v4sf = float32x4_t;
+constexpr uint SimdSize{4};
 #define VZERO() vdupq_n_f32(0)
 #define VMUL vmulq_f32
 #define VADD vaddq_f32
@@ -238,7 +247,7 @@ force_inline void vtranspose4(v4sf &x0, v4sf &x1, v4sf &x2, v4sf &x3) noexcept
 #elif defined(__GNUC__)
 
 using v4sf [[gnu::vector_size(16), gnu::aligned(16)]] = float;
-#define SIMD_SZ 4
+constexpr uint SimdSize{4};
 #define VZERO() v4sf{0,0,0,0}
 #define VMUL(a,b) ((a) * (b))
 #define VADD(a,b) ((a) + (b))
@@ -297,8 +306,8 @@ force_inline v4sf vswaphl(v4sf a, v4sf b) noexcept
 
 // fallback mode for situations where SIMD is not available, use scalar mode instead
 #ifdef PFFFT_SIMD_DISABLE
-typedef float v4sf;
-#define SIMD_SZ 1
+using v4sf = float;
+constexpr uint SimdSize{1};
 #define VZERO() 0.f
 #define VMUL(a,b) ((a)*(b))
 #define VADD(a,b) ((a)+(b))
@@ -309,7 +318,7 @@ typedef float v4sf;
 
 inline bool valigned(const float *ptr) noexcept
 {
-    static constexpr uintptr_t alignmask{SIMD_SZ*4 - 1};
+    static constexpr uintptr_t alignmask{SimdSize*4 - 1};
     return (reinterpret_cast<uintptr_t>(ptr) & alignmask) == 0;
 }
 
@@ -335,14 +344,14 @@ force_inline void vcplxmulconj(v4sf &ar, v4sf &ai, v4sf br, v4sf bi) noexcept
 [[maybe_unused]] void validate_pffft_simd()
 {
     using float4 = std::array<float,4>;
-    static constexpr float f[16]{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+    static constexpr std::array<float,16> f{{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}};
 
     float4 a0_f, a1_f, a2_f, a3_f, t_f, u_f;
     v4sf a0_v, a1_v, a2_v, a3_v, t_v, u_v;
-    std::memcpy(&a0_v, f, 4*sizeof(float));
-    std::memcpy(&a1_v, f+4, 4*sizeof(float));
-    std::memcpy(&a2_v, f+8, 4*sizeof(float));
-    std::memcpy(&a3_v, f+12, 4*sizeof(float));
+    std::memcpy(&a0_v, f.data(), 4*sizeof(float));
+    std::memcpy(&a1_v, f.data()+4, 4*sizeof(float));
+    std::memcpy(&a2_v, f.data()+8, 4*sizeof(float));
+    std::memcpy(&a3_v, f.data()+12, 4*sizeof(float));
 
     t_v = VZERO(); t_f = al::bit_cast<float4>(t_v);
     printf("VZERO=[%2g %2g %2g %2g]\n", t_f[0], t_f[1], t_f[2], t_f[3]); assertv4(t, 0, 0, 0, 0);
@@ -379,7 +388,9 @@ force_inline void vcplxmulconj(v4sf &ar, v4sf &ai, v4sf br, v4sf bi) noexcept
 #endif //!PFFFT_SIMD_DISABLE
 
 /* SSE and co like 16-bytes aligned pointers */
-#define MALLOC_V4SF_ALIGNMENT 64 // with a 64-byte alignment, we are even aligned on L2 cache lines...
+/* with a 64-byte alignment, we are even aligned on L2 cache lines... */
+constexpr auto V4sfAlignment = size_t(64);
+constexpr auto V4sfAlignVal = std::align_val_t(V4sfAlignment);
 
 /*
   passf2 and passb2 has been merged here, fsign = -1 for passf2, +1 for passb2
@@ -538,8 +549,8 @@ NOINLINE void passf5_ps(const size_t ido, const size_t l1, const v4sf *cc, v4sf
     const v4sf ti11{LD_PS1(0.951056516295154f*fsign)};
     const v4sf ti12{LD_PS1(0.587785252292473f*fsign)};
 
-#define cc_ref(a_1,a_2) cc[(a_2-1)*ido + (a_1) + 1]
-#define ch_ref(a_1,a_3) ch[(a_3-1)*l1*ido + (a_1) + 1]
+#define cc_ref(a_1,a_2) cc[((a_2)-1)*ido + (a_1) + 1]
+#define ch_ref(a_1,a_3) ch[((a_3)-1)*l1*ido + (a_1) + 1]
 
     assert(ido > 2);
     for(size_t k{0};k < l1;++k, cc += 5*ido, ch += ido)
@@ -958,8 +969,8 @@ void radf5_ps(const size_t ido, const size_t l1, const v4sf *RESTRICT cc, v4sf *
     const v4sf tr12{LD_PS1(-0.809016994374947f)};
     const v4sf ti12{LD_PS1(0.587785252292473f)};
 
-#define cc_ref(a_1,a_2,a_3) cc[((a_3)*l1 + (a_2))*ido + a_1]
-#define ch_ref(a_1,a_2,a_3) ch[((a_3)*5 + (a_2))*ido + a_1]
+#define cc_ref(a_1,a_2,a_3) cc[((a_3)*l1 + (a_2))*ido + (a_1)]
+#define ch_ref(a_1,a_2,a_3) ch[((a_3)*5 + (a_2))*ido + (a_1)]
 
     /* Parameter adjustments */
     ch -= 1 + ido * 6;
@@ -1040,8 +1051,8 @@ void radb5_ps(const size_t ido, const size_t l1, const v4sf *RESTRICT cc, v4sf *
     const v4sf tr12{LD_PS1(-0.809016994374947f)};
     const v4sf ti12{LD_PS1(0.587785252292473f)};
 
-#define cc_ref(a_1,a_2,a_3) cc[((a_3)*5 + (a_2))*ido + a_1]
-#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1]
+#define cc_ref(a_1,a_2,a_3) cc[((a_3)*5 + (a_2))*ido + (a_1)]
+#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + (a_1)]
 
     /* Parameter adjustments */
     ch -= 1 + ido*(1 + l1);
@@ -1331,7 +1342,7 @@ uint decompose(const uint n, const al::span<uint,15> ifac, const al::span<const
 
 void rffti1_ps(const uint n, float *wa, const al::span<uint,15> ifac)
 {
-    static constexpr uint ntryh[]{4,2,3,5};
+    static constexpr std::array ntryh{4u,2u,3u,5u};
 
     const uint nf{decompose(n, ifac, ntryh)};
     const double argh{2.0*al::numbers::pi / n};
@@ -1365,7 +1376,7 @@ void rffti1_ps(const uint n, float *wa, const al::span<uint,15> ifac)
 
 void cffti1_ps(const uint n, float *wa, const al::span<uint,15> ifac)
 {
-    static constexpr uint ntryh[]{5,3,4,2};
+    static constexpr std::array ntryh{5u,3u,4u,2u};
 
     const uint nf{decompose(n, ifac, ntryh)};
     const double argh{2.0*al::numbers::pi / n};
@@ -1405,24 +1416,20 @@ void cffti1_ps(const uint n, float *wa, const al::span<uint,15> ifac)
 
 } // namespace
 
-void *pffft_aligned_malloc(size_t nb_bytes)
-{ return al_malloc(MALLOC_V4SF_ALIGNMENT, nb_bytes); }
-
-void pffft_aligned_free(void *p) { al_free(p); }
-
-int pffft_simd_size() { return SIMD_SZ; }
-
+/* NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding) */
 struct PFFFT_Setup {
-    uint N;
-    uint Ncvec; // nb of complex simd vectors (N/4 if PFFFT_COMPLEX, N/8 if PFFFT_REAL)
-    std::array<uint,15> ifac;
-    pffft_transform_t transform;
+    uint N{};
+    uint Ncvec{}; /* nb of complex simd vectors (N/4 if PFFFT_COMPLEX, N/8 if PFFFT_REAL) */
+    std::array<uint,15> ifac{};
+    pffft_transform_t transform{};
 
-    float *twiddle; // N/4 elements
-    alignas(MALLOC_V4SF_ALIGNMENT) v4sf e[1]; // N/4*3 elements
+    float *twiddle{}; /* N/4 elements */
+    al::span<v4sf> e; /* N/4*3 elements */
+
+    alignas(V4sfAlignment) std::byte end;
 };
 
-PFFFT_Setup *pffft_new_setup(unsigned int N, pffft_transform_t transform)
+gsl::owner<PFFFT_Setup*> pffft_new_setup(unsigned int N, pffft_transform_t transform)
 {
     assert(transform == PFFFT_REAL || transform == PFFFT_COMPLEX);
     assert(N > 0);
@@ -1431,50 +1438,53 @@ PFFFT_Setup *pffft_new_setup(unsigned int N, pffft_transform_t transform)
      * handle other cases (or maybe just switch to a scalar fft, I don't know..)
      */
     if(transform == PFFFT_REAL)
-        assert((N%(2*SIMD_SZ*SIMD_SZ)) == 0);
+        assert((N%(2*SimdSize*SimdSize)) == 0);
     else
-        assert((N%(SIMD_SZ*SIMD_SZ)) == 0);
+        assert((N%(SimdSize*SimdSize)) == 0);
 
-    const uint Ncvec = (transform == PFFFT_REAL ? N/2 : N)/SIMD_SZ;
-    const size_t storelen{offsetof(PFFFT_Setup, e[0]) + (2u*Ncvec * sizeof(v4sf))};
+    const uint Ncvec{(transform == PFFFT_REAL ? N/2 : N) / SimdSize};
 
-    void *store{al_calloc(MALLOC_V4SF_ALIGNMENT, storelen)};
-    if(!store) return nullptr;
+    const size_t storelen{std::max(offsetof(PFFFT_Setup, end) + 2_zu*Ncvec*sizeof(v4sf),
+        sizeof(PFFFT_Setup))};
+    auto storage = static_cast<gsl::owner<std::byte*>>(::operator new[](storelen, V4sfAlignVal));
+    al::span extrastore{&storage[offsetof(PFFFT_Setup, end)], 2_zu*Ncvec*sizeof(v4sf)};
 
-    PFFFT_Setup *s{::new(store) PFFFT_Setup{}};
+    gsl::owner<PFFFT_Setup*> s{::new(storage) PFFFT_Setup{}};
     s->N = N;
     s->transform = transform;
-    /* nb of complex simd vectors */
     s->Ncvec = Ncvec;
-    s->twiddle = reinterpret_cast<float*>(&s->e[2u*Ncvec*(SIMD_SZ-1)/SIMD_SZ]);
 
-    if constexpr(SIMD_SZ > 1)
+    const size_t ecount{2_zu*Ncvec*(SimdSize-1)/SimdSize};
+    s->e = {std::launder(reinterpret_cast<v4sf*>(extrastore.data())), ecount};
+    s->twiddle = std::launder(reinterpret_cast<float*>(&extrastore[ecount*sizeof(v4sf)]));
+
+    if constexpr(SimdSize > 1)
     {
-        auto e = std::vector<float>(2u*Ncvec*(SIMD_SZ-1), 0.0f);
+        auto e = std::vector<float>(s->e.size()*SimdSize, 0.0f);
         for(size_t k{0};k < s->Ncvec;++k)
         {
-            const size_t i{k / SIMD_SZ};
-            const size_t j{k % SIMD_SZ};
-            for(size_t m{0};m < SIMD_SZ-1;++m)
+            const size_t i{k / SimdSize};
+            const size_t j{k % SimdSize};
+            for(size_t m{0};m < SimdSize-1;++m)
             {
                 const double A{-2.0*al::numbers::pi*static_cast<double>((m+1)*k) / N};
-                e[((i*3 + m)*2 + 0)*SIMD_SZ + j] = static_cast<float>(std::cos(A));
-                e[((i*3 + m)*2 + 1)*SIMD_SZ + j] = static_cast<float>(std::sin(A));
+                e[((i*3 + m)*2 + 0)*SimdSize + j] = static_cast<float>(std::cos(A));
+                e[((i*3 + m)*2 + 1)*SimdSize + j] = static_cast<float>(std::sin(A));
             }
         }
-        std::memcpy(s->e, e.data(), e.size()*sizeof(float));
+        std::memcpy(s->e.data(), e.data(), e.size()*sizeof(float));
     }
     if(transform == PFFFT_REAL)
-        rffti1_ps(N/SIMD_SZ, s->twiddle, s->ifac);
+        rffti1_ps(N/SimdSize, s->twiddle, s->ifac);
     else
-        cffti1_ps(N/SIMD_SZ, s->twiddle, s->ifac);
+        cffti1_ps(N/SimdSize, s->twiddle, s->ifac);
 
     /* check that N is decomposable with allowed prime factors */
     size_t m{1};
     for(size_t k{0};k < s->ifac[1];++k)
         m *= s->ifac[2+k];
 
-    if(m != N/SIMD_SZ)
+    if(m != N/SimdSize)
     {
         pffft_destroy_setup(s);
         s = nullptr;
@@ -1484,10 +1494,10 @@ PFFFT_Setup *pffft_new_setup(unsigned int N, pffft_transform_t transform)
 }
 
 
-void pffft_destroy_setup(PFFFT_Setup *s)
+void pffft_destroy_setup(gsl::owner<PFFFT_Setup*> s) noexcept
 {
     std::destroy_at(s);
-    al_free(s);
+    ::operator delete[](gsl::owner<void*>{s}, V4sfAlignVal);
 }
 
 #if !defined(PFFFT_SIMD_DISABLE)
@@ -1537,7 +1547,7 @@ void pffft_cplx_finalize(const size_t Ncvec, const v4sf *in, v4sf *RESTRICT out,
 {
     assert(in != out);
 
-    const size_t dk{Ncvec/SIMD_SZ}; // number of 4x4 matrix blocks
+    const size_t dk{Ncvec/SimdSize}; // number of 4x4 matrix blocks
     for(size_t k{0};k < dk;++k)
     {
         v4sf r0{in[8*k+0]}, i0{in[8*k+1]};
@@ -1581,7 +1591,7 @@ void pffft_cplx_preprocess(const size_t Ncvec, const v4sf *in, v4sf *RESTRICT ou
 {
     assert(in != out);
 
-    const size_t dk{Ncvec/SIMD_SZ}; // number of 4x4 matrix blocks
+    const size_t dk{Ncvec/SimdSize}; // number of 4x4 matrix blocks
     for(size_t k{0};k < dk;++k)
     {
         v4sf r0{in[8*k+0]}, i0{in[8*k+1]};
@@ -1674,12 +1684,12 @@ NOINLINE void pffft_real_finalize(const size_t Ncvec, const v4sf *in, v4sf *REST
     static constexpr float s{al::numbers::sqrt2_v<float>/2.0f};
 
     assert(in != out);
-    const size_t dk{Ncvec/SIMD_SZ}; // number of 4x4 matrix blocks
+    const size_t dk{Ncvec/SimdSize}; // number of 4x4 matrix blocks
     /* fftpack order is f0r f1r f1i f2r f2i ... f(n-1)r f(n-1)i f(n)r */
 
     const v4sf zero{VZERO()};
-    const auto cr = al::bit_cast<std::array<float,SIMD_SZ>>(in[0]);
-    const auto ci = al::bit_cast<std::array<float,SIMD_SZ>>(in[Ncvec*2-1]);
+    const auto cr = al::bit_cast<std::array<float,SimdSize>>(in[0]);
+    const auto ci = al::bit_cast<std::array<float,SimdSize>>(in[Ncvec*2-1]);
     pffft_real_finalize_4x4(&zero, &zero, in+1, e, out);
 
     /* [cr0 cr1 cr2 cr3 ci0 ci1 ci2 ci3]
@@ -1765,11 +1775,11 @@ NOINLINE void pffft_real_preprocess(const size_t Ncvec, const v4sf *in, v4sf *RE
     static constexpr float sqrt2{al::numbers::sqrt2_v<float>};
 
     assert(in != out);
-    const size_t dk{Ncvec/SIMD_SZ}; // number of 4x4 matrix blocks
+    const size_t dk{Ncvec/SimdSize}; // number of 4x4 matrix blocks
     /* fftpack order is f0r f1r f1i f2r f2i ... f(n-1)r f(n-1)i f(n)r */
 
-    std::array<float,SIMD_SZ> Xr, Xi;
-    for(size_t k{0};k < SIMD_SZ;++k)
+    std::array<float,SimdSize> Xr, Xi;
+    for(size_t k{0};k < SimdSize;++k)
     {
         Xr[k] = VEXTRACT0(in[2*k]);
         Xi[k] = VEXTRACT0(in[2*k + 1]);
@@ -1813,7 +1823,7 @@ void pffft_transform_internal(const PFFFT_Setup *setup, const v4sf *vinput, v4sf
     const size_t Ncvec{setup->Ncvec};
     const bool nf_odd{(setup->ifac[1]&1) != 0};
 
-    v4sf *buff[2]{voutput, scratch};
+    std::array buff{voutput, scratch};
     bool ib{nf_odd != ordered};
     if(direction == PFFFT_FORWARD)
     {
@@ -1824,7 +1834,7 @@ void pffft_transform_internal(const PFFFT_Setup *setup, const v4sf *vinput, v4sf
         if(setup->transform == PFFFT_REAL)
         {
             ib = (rfftf1_ps(Ncvec*2, vinput, buff[ib], buff[!ib], setup->twiddle, setup->ifac) == buff[1]);
-            pffft_real_finalize(Ncvec, buff[ib], buff[!ib], setup->e);
+            pffft_real_finalize(Ncvec, buff[ib], buff[!ib], setup->e.data());
         }
         else
         {
@@ -1833,7 +1843,7 @@ void pffft_transform_internal(const PFFFT_Setup *setup, const v4sf *vinput, v4sf
                 uninterleave2(vinput[k*2], vinput[k*2+1], tmp[k*2], tmp[k*2+1]);
 
             ib = (cfftf1_ps(Ncvec, buff[ib], buff[!ib], buff[ib], setup->twiddle, setup->ifac, -1.0f) == buff[1]);
-            pffft_cplx_finalize(Ncvec, buff[ib], buff[!ib], setup->e);
+            pffft_cplx_finalize(Ncvec, buff[ib], buff[!ib], setup->e.data());
         }
         if(ordered)
             pffft_zreorder(setup, reinterpret_cast<float*>(buff[!ib]),
@@ -1855,12 +1865,12 @@ void pffft_transform_internal(const PFFFT_Setup *setup, const v4sf *vinput, v4sf
         }
         if(setup->transform == PFFFT_REAL)
         {
-            pffft_real_preprocess(Ncvec, vinput, buff[ib], setup->e);
+            pffft_real_preprocess(Ncvec, vinput, buff[ib], setup->e.data());
             ib = (rfftb1_ps(Ncvec*2, buff[ib], buff[0], buff[1], setup->twiddle, setup->ifac) == buff[1]);
         }
         else
         {
-            pffft_cplx_preprocess(Ncvec, vinput, buff[ib], setup->e);
+            pffft_cplx_preprocess(Ncvec, vinput, buff[ib], setup->e.data());
             ib = (cfftf1_ps(Ncvec, buff[ib], buff[0], buff[1],  setup->twiddle, setup->ifac, +1.0f) == buff[1]);
             for(size_t k{0};k < Ncvec;++k)
                 interleave2(buff[ib][k*2], buff[ib][k*2+1], buff[ib][k*2], buff[ib][k*2+1]);
@@ -1899,8 +1909,8 @@ void pffft_zreorder(const PFFFT_Setup *setup, const float *in, float *out,
                 interleave2(vin[k*8 + 0], vin[k*8 + 1], vout[2*(0*dk + k) + 0], vout[2*(0*dk + k) + 1]);
                 interleave2(vin[k*8 + 4], vin[k*8 + 5], vout[2*(2*dk + k) + 0], vout[2*(2*dk + k) + 1]);
             }
-            reversed_copy(dk, vin+2, 8, vout + N/SIMD_SZ/2);
-            reversed_copy(dk, vin+6, 8, vout + N/SIMD_SZ);
+            reversed_copy(dk, vin+2, 8, vout + N/SimdSize/2);
+            reversed_copy(dk, vin+6, 8, vout + N/SimdSize);
         }
         else
         {
@@ -1909,8 +1919,8 @@ void pffft_zreorder(const PFFFT_Setup *setup, const float *in, float *out,
                 uninterleave2(vin[2*(0*dk + k) + 0], vin[2*(0*dk + k) + 1], vout[k*8 + 0], vout[k*8 + 1]);
                 uninterleave2(vin[2*(2*dk + k) + 0], vin[2*(2*dk + k) + 1], vout[k*8 + 4], vout[k*8 + 5]);
             }
-            unreversed_copy(dk, vin + N/SIMD_SZ/4, vout + N/SIMD_SZ - 6, -8);
-            unreversed_copy(dk, vin + 3*N/SIMD_SZ/4, vout + N/SIMD_SZ - 2, -8);
+            unreversed_copy(dk, vin + N/SimdSize/4, vout + N/SimdSize - 6, -8);
+            unreversed_copy(dk, vin + 3_uz*N/SimdSize/4, vout + N/SimdSize - 2, -8);
         }
     }
     else
diff --git a/common/pffft.h b/common/pffft.h
index 9cff9e54..cf356524 100644
--- a/common/pffft.h
+++ b/common/pffft.h
@@ -79,36 +79,32 @@
 #ifndef PFFFT_H
 #define PFFFT_H
 
-#include <stddef.h> // for size_t
-#include <stdint.h>
+#include <cstddef>
+#include <cstdint>
+#include <utility>
+
+#include "almalloc.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
 
 /* opaque struct holding internal stuff (precomputed twiddle factors) this
  * struct can be shared by many threads as it contains only read-only data.
  */
-typedef struct PFFFT_Setup PFFFT_Setup;
-
-#ifndef PFFFT_COMMON_ENUMS
-#define PFFFT_COMMON_ENUMS
+struct PFFFT_Setup;
 
 /* direction of the transform */
-typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t;
+enum pffft_direction_t { PFFFT_FORWARD, PFFFT_BACKWARD };
 
 /* type of transform */
-typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t;
-
-#endif
+enum pffft_transform_t { PFFFT_REAL, PFFFT_COMPLEX };
 
 /**
  * Prepare for performing transforms of size N -- the returned PFFFT_Setup
  * structure is read-only so it can safely be shared by multiple concurrent
  * threads.
  */
-PFFFT_Setup *pffft_new_setup(unsigned int N, pffft_transform_t transform);
-void pffft_destroy_setup(PFFFT_Setup *setup);
+[[gnu::malloc]]
+gsl::owner<PFFFT_Setup*> pffft_new_setup(unsigned int N, pffft_transform_t transform);
+void pffft_destroy_setup(gsl::owner<PFFFT_Setup*> setup) noexcept;
 
 /**
  * Perform a Fourier transform. The z-domain data is stored in the most
@@ -174,19 +170,47 @@ void pffft_zconvolve_scale_accumulate(const PFFFT_Setup *setup, const float *dft
  */
 void pffft_zconvolve_accumulate(const PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab);
 
-/**
- * The float buffers must have the correct alignment (16-byte boundary on intel
- * and powerpc). This function may be used to obtain such correctly aligned
- * buffers.
- */
-void *pffft_aligned_malloc(size_t nb_bytes);
-void pffft_aligned_free(void *ptr);
 
-/* Return 4 or 1 depending if vectorization was enable when building pffft.cpp. */
-int pffft_simd_size();
+struct PFFFTSetup {
+    gsl::owner<PFFFT_Setup*> mSetup{};
+
+    PFFFTSetup() = default;
+    PFFFTSetup(const PFFFTSetup&) = delete;
+    PFFFTSetup(PFFFTSetup&& rhs) noexcept : mSetup{rhs.mSetup} { rhs.mSetup = nullptr; }
+    explicit PFFFTSetup(std::nullptr_t) noexcept { }
+    explicit PFFFTSetup(unsigned int n, pffft_transform_t transform)
+        : mSetup{pffft_new_setup(n, transform)}
+    { }
+    ~PFFFTSetup() { if(mSetup) pffft_destroy_setup(mSetup); }
+
+    PFFFTSetup& operator=(const PFFFTSetup&) = delete;
+    PFFFTSetup& operator=(PFFFTSetup&& rhs) noexcept
+    {
+        if(mSetup)
+            pffft_destroy_setup(mSetup);
+        mSetup = rhs.mSetup;
+        rhs.mSetup = nullptr;
+        return *this;
+    }
+
+    void transform(const float *input, float *output, float *work, pffft_direction_t direction) const
+    { pffft_transform(mSetup, input, output, work, direction); }
+
+    void transform_ordered(const float *input, float *output, float *work,
+        pffft_direction_t direction) const
+    { pffft_transform_ordered(mSetup, input, output, work, direction); }
+
+    void zreorder(const float *input, float *output, pffft_direction_t direction) const
+    { pffft_zreorder(mSetup, input, output, direction); }
+
+    void zconvolve_scale_accumulate(const float *dft_a, const float *dft_b, float *dft_ab,
+        float scaling) const
+    { pffft_zconvolve_scale_accumulate(mSetup, dft_a, dft_b, dft_ab, scaling); }
+
+    void zconvolve_accumulate(const float *dft_a, const float *dft_b, float *dft_ab) const
+    { pffft_zconvolve_accumulate(mSetup, dft_a, dft_b, dft_ab); }
 
-#ifdef __cplusplus
-}
-#endif
+    [[nodiscard]] operator bool() const noexcept { return mSetup != nullptr; }
+};
 
 #endif // PFFFT_H
diff --git a/common/phase_shifter.h b/common/phase_shifter.h
index e1a83dab..1b3463de 100644
--- a/common/phase_shifter.h
+++ b/common/phase_shifter.h
@@ -10,6 +10,7 @@
 #include <array>
 #include <stddef.h>
 #include <type_traits>
+#include <vector>
 
 #include "alcomplex.h"
 #include "alspan.h"
@@ -52,20 +53,19 @@ struct PhaseShifterT {
         constexpr size_t fft_size{FilterSize};
         constexpr size_t half_size{fft_size / 2};
 
-        auto fftBuffer = std::make_unique<complex_d[]>(fft_size);
-        std::fill_n(fftBuffer.get(), fft_size, complex_d{});
+        auto fftBuffer = std::vector<complex_d>(fft_size, complex_d{});
         fftBuffer[half_size] = 1.0;
 
-        forward_fft(al::span{fftBuffer.get(), fft_size});
+        forward_fft(al::span{fftBuffer});
         fftBuffer[0] *= std::numeric_limits<double>::epsilon();
         for(size_t i{1};i < half_size;++i)
             fftBuffer[i] = complex_d{-fftBuffer[i].imag(), fftBuffer[i].real()};
         fftBuffer[half_size] *= std::numeric_limits<double>::epsilon();
         for(size_t i{half_size+1};i < fft_size;++i)
             fftBuffer[i] = std::conj(fftBuffer[fft_size - i]);
-        inverse_fft(al::span{fftBuffer.get(), fft_size});
+        inverse_fft(al::span{fftBuffer});
 
-        auto fftiter = fftBuffer.get() + fft_size - 1;
+        auto fftiter = fftBuffer.data() + fft_size - 1;
         for(float &coeff : mCoeffs)
         {
             coeff = static_cast<float>(fftiter->real() / double{fft_size});
diff --git a/common/polyphase_resampler.h b/common/polyphase_resampler.h
index 557485bb..764111c9 100644
--- a/common/polyphase_resampler.h
+++ b/common/polyphase_resampler.h
@@ -40,7 +40,7 @@ struct PPhaseResampler {
     explicit operator bool() const noexcept { return !mF.empty(); }
 
 private:
-    uint mP, mQ, mM, mL;
+    uint mP{}, mQ{}, mM{}, mL{};
     std::vector<double> mF;
 };
 
diff --git a/common/ringbuffer.cpp b/common/ringbuffer.cpp
index af1f3669..2636bfb4 100644
--- a/common/ringbuffer.cpp
+++ b/common/ringbuffer.cpp
@@ -24,6 +24,8 @@
 
 #include <algorithm>
 #include <climits>
+#include <cstdint>
+#include <limits>
 #include <stdexcept>
 
 #include "almalloc.h"
@@ -40,7 +42,7 @@ RingBufferPtr RingBuffer::Create(std::size_t sz, std::size_t elem_sz, int limit_
         power_of_two |= power_of_two>>4;
         power_of_two |= power_of_two>>8;
         power_of_two |= power_of_two>>16;
-        if constexpr(SIZE_MAX > UINT_MAX)
+        if constexpr(sizeof(size_t) > sizeof(uint32_t))
             power_of_two |= power_of_two>>32;
     }
     ++power_of_two;
@@ -159,7 +161,7 @@ std::size_t RingBuffer::write(const void *src, std::size_t cnt) noexcept
 }
 
 
-auto RingBuffer::getReadVector() const noexcept -> DataPair
+auto RingBuffer::getReadVector() noexcept -> DataPair
 {
     DataPair ret;
 
@@ -174,15 +176,15 @@ auto RingBuffer::getReadVector() const noexcept -> DataPair
     {
         /* Two part vector: the rest of the buffer after the current read ptr,
          * plus some from the start of the buffer. */
-        ret.first.buf = const_cast<std::byte*>(mBuffer.data() + r*mElemSize);
+        ret.first.buf = mBuffer.data() + r*mElemSize;
         ret.first.len = mSizeMask+1 - r;
-        ret.second.buf = const_cast<std::byte*>(mBuffer.data());
+        ret.second.buf = mBuffer.data();
         ret.second.len = cnt2 & mSizeMask;
     }
     else
     {
         /* Single part vector: just the rest of the buffer */
-        ret.first.buf = const_cast<std::byte*>(mBuffer.data() + r*mElemSize);
+        ret.first.buf = mBuffer.data() + r*mElemSize;
         ret.first.len = free_cnt;
         ret.second.buf = nullptr;
         ret.second.len = 0;
@@ -191,7 +193,7 @@ auto RingBuffer::getReadVector() const noexcept -> DataPair
     return ret;
 }
 
-auto RingBuffer::getWriteVector() const noexcept -> DataPair
+auto RingBuffer::getWriteVector() noexcept -> DataPair
 {
     DataPair ret;
 
@@ -206,14 +208,14 @@ auto RingBuffer::getWriteVector() const noexcept -> DataPair
     {
         /* Two part vector: the rest of the buffer after the current write ptr,
          * plus some from the start of the buffer. */
-        ret.first.buf = const_cast<std::byte*>(mBuffer.data() + w*mElemSize);
+        ret.first.buf = mBuffer.data() + w*mElemSize;
         ret.first.len = mSizeMask+1 - w;
-        ret.second.buf = const_cast<std::byte*>(mBuffer.data());
+        ret.second.buf = mBuffer.data();
         ret.second.len = cnt2 & mSizeMask;
     }
     else
     {
-        ret.first.buf = const_cast<std::byte*>(mBuffer.data() + w*mElemSize);
+        ret.first.buf = mBuffer.data() + w*mElemSize;
         ret.first.len = free_cnt;
         ret.second.buf = nullptr;
         ret.second.len = 0;
diff --git a/common/ringbuffer.h b/common/ringbuffer.h
index 8c65c3af..ee59205a 100644
--- a/common/ringbuffer.h
+++ b/common/ringbuffer.h
@@ -7,6 +7,7 @@
 #include <utility>
 
 #include "almalloc.h"
+#include "flexarray.h"
 
 
 /* NOTE: This lockless ringbuffer implementation is copied from JACK, extended
@@ -32,30 +33,29 @@ public:
     };
     using DataPair = std::pair<Data,Data>;
 
-
     RingBuffer(const std::size_t count) : mBuffer{count} { }
 
     /** Reset the read and write pointers to zero. This is not thread safe. */
-    void reset() noexcept;
+    auto reset() noexcept -> void;
 
     /**
      * The non-copying data reader. Returns two ringbuffer data pointers that
      * hold the current readable data. If the readable data is in one segment
      * the second segment has zero length.
      */
-    DataPair getReadVector() const noexcept;
+    [[nodiscard]] auto getReadVector() noexcept -> DataPair;
     /**
      * The non-copying data writer. Returns two ringbuffer data pointers that
      * hold the current writeable data. If the writeable data is in one segment
      * the second segment has zero length.
      */
-    DataPair getWriteVector() const noexcept;
+    [[nodiscard]] auto getWriteVector() noexcept -> DataPair;
 
     /**
      * Return the number of elements available for reading. This is the number
      * of elements in front of the read pointer and behind the write pointer.
      */
-    std::size_t readSpace() const noexcept
+    [[nodiscard]] auto readSpace() const noexcept -> size_t
     {
         const size_t w{mWritePtr.load(std::memory_order_acquire)};
         const size_t r{mReadPtr.load(std::memory_order_acquire)};
@@ -66,14 +66,14 @@ public:
      * The copying data reader. Copy at most `cnt' elements into `dest'.
      * Returns the actual number of elements copied.
      */
-    std::size_t read(void *dest, std::size_t cnt) noexcept;
+    [[nodiscard]] auto read(void *dest, size_t cnt) noexcept -> size_t;
     /**
      * The copying data reader w/o read pointer advance. Copy at most `cnt'
      * elements into `dest'. Returns the actual number of elements copied.
      */
-    std::size_t peek(void *dest, std::size_t cnt) const noexcept;
+    [[nodiscard]] auto peek(void *dest, size_t cnt) const noexcept -> size_t;
     /** Advance the read pointer `cnt' places. */
-    void readAdvance(std::size_t cnt) noexcept
+    auto readAdvance(size_t cnt) noexcept -> void
     { mReadPtr.fetch_add(cnt, std::memory_order_acq_rel); }
 
 
@@ -81,7 +81,7 @@ public:
      * Return the number of elements available for writing. This is the number
      * of elements in front of the write pointer and behind the read pointer.
      */
-    std::size_t writeSpace() const noexcept
+    [[nodiscard]] auto writeSpace() const noexcept -> size_t
     {
         const size_t w{mWritePtr.load(std::memory_order_acquire)};
         const size_t r{mReadPtr.load(std::memory_order_acquire) + mWriteSize - mSizeMask};
@@ -92,12 +92,12 @@ public:
      * The copying data writer. Copy at most `cnt' elements from `src'. Returns
      * the actual number of elements copied.
      */
-    std::size_t write(const void *src, std::size_t cnt) noexcept;
+    [[nodiscard]] auto write(const void *src, size_t cnt) noexcept -> size_t;
     /** Advance the write pointer `cnt' places. */
-    void writeAdvance(std::size_t cnt) noexcept
+    auto writeAdvance(size_t cnt) noexcept -> void
     { mWritePtr.fetch_add(cnt, std::memory_order_acq_rel); }
 
-    std::size_t getElemSize() const noexcept { return mElemSize; }
+    [[nodiscard]] auto getElemSize() const noexcept -> size_t { return mElemSize; }
 
     /**
      * Create a new ringbuffer to hold at least `sz' elements of `elem_sz'
@@ -105,7 +105,8 @@ public:
      * (even if it is already a power of two, to ensure the requested amount
      * can be written).
      */
-    static std::unique_ptr<RingBuffer> Create(std::size_t sz, std::size_t elem_sz, int limit_writes);
+    [[nodiscard]]
+    static auto Create(size_t sz, size_t elem_sz, int limit_writes) -> std::unique_ptr<RingBuffer>;
 
     DEF_FAM_NEWDEL(RingBuffer, mBuffer)
 };
diff --git a/common/vecmat.h b/common/vecmat.h
index a45f262f..0cdb82eb 100644
--- a/common/vecmat.h
+++ b/common/vecmat.h
@@ -14,7 +14,7 @@ namespace alu {
 template<typename T>
 class VectorR {
     static_assert(std::is_floating_point<T>::value, "Must use floating-point types");
-    alignas(16) T mVals[4];
+    alignas(16) std::array<T,4> mVals;
 
 public:
     constexpr VectorR() noexcept = default;
@@ -58,7 +58,7 @@ public:
         return T{0};
     }
 
-    constexpr VectorR cross_product(const alu::VectorR<T> &rhs) const noexcept
+    [[nodiscard]] constexpr auto cross_product(const alu::VectorR<T> &rhs) const noexcept -> VectorR
     {
         return VectorR{
             mVals[1]*rhs.mVals[2] - mVals[2]*rhs.mVals[1],
@@ -67,7 +67,7 @@ public:
             T{0}};
     }
 
-    constexpr T dot_product(const alu::VectorR<T> &rhs) const noexcept
+    [[nodiscard]] constexpr auto dot_product(const alu::VectorR<T> &rhs) const noexcept -> T
     { return mVals[0]*rhs.mVals[0] + mVals[1]*rhs.mVals[1] + mVals[2]*rhs.mVals[2]; }
 };
 using Vector = VectorR<float>;
@@ -75,7 +75,7 @@ using Vector = VectorR<float>;
 template<typename T>
 class MatrixR {
     static_assert(std::is_floating_point<T>::value, "Must use floating-point types");
-    alignas(16) T mVals[16];
+    alignas(16) std::array<T,16> mVals;
 
 public:
     constexpr MatrixR() noexcept = default;