diff options
author | Sven Göthel <[email protected]> | 2024-01-05 13:52:12 +0100 |
---|---|---|
committer | Sven Göthel <[email protected]> | 2024-01-05 13:52:12 +0100 |
commit | ec98cdacc85ff0202852472c7756586437912f22 (patch) | |
tree | 42414746a27ab35cb8cdbc95af521d74821e57f4 /common | |
parent | fd5269bec9a5fe4815974b1786a037e6a247bfd2 (diff) | |
parent | b82cd2e60edb8fbe5fdd3567105ae76a016a554c (diff) |
Diffstat (limited to 'common')
-rw-r--r-- | common/albit.h | 7 | ||||
-rw-r--r-- | common/alcomplex.cpp | 2 | ||||
-rw-r--r-- | common/almalloc.cpp | 61 | ||||
-rw-r--r-- | common/almalloc.h | 208 | ||||
-rw-r--r-- | common/alnumbers.h | 8 | ||||
-rw-r--r-- | common/alnumeric.h | 2 | ||||
-rw-r--r-- | common/alsem.h | 2 | ||||
-rw-r--r-- | common/alspan.h | 198 | ||||
-rw-r--r-- | common/althrd_setname.cpp | 2 | ||||
-rw-r--r-- | common/atomic.h | 86 | ||||
-rw-r--r-- | common/dynload.cpp | 7 | ||||
-rw-r--r-- | common/flexarray.h | 125 | ||||
-rw-r--r-- | common/intrusive_ptr.h | 8 | ||||
-rw-r--r-- | common/opthelpers.h | 2 | ||||
-rw-r--r-- | common/pffft.cpp | 178 | ||||
-rw-r--r-- | common/pffft.h | 78 | ||||
-rw-r--r-- | common/phase_shifter.h | 10 | ||||
-rw-r--r-- | common/polyphase_resampler.h | 2 | ||||
-rw-r--r-- | common/ringbuffer.cpp | 20 | ||||
-rw-r--r-- | common/ringbuffer.h | 27 | ||||
-rw-r--r-- | common/vecmat.h | 8 |
21 files changed, 562 insertions, 479 deletions
diff --git a/common/albit.h b/common/albit.h index 82a4a00d..d54a189c 100644 --- a/common/albit.h +++ b/common/albit.h @@ -1,6 +1,7 @@ #ifndef AL_BIT_H #define AL_BIT_H +#include <array> #include <cstdint> #include <cstring> #include <limits> @@ -17,9 +18,9 @@ std::enable_if_t<sizeof(To) == sizeof(From) && std::is_trivially_copyable_v<From && std::is_trivially_copyable_v<To>, To> bit_cast(const From &src) noexcept { - alignas(To) char dst[sizeof(To)]; - std::memcpy(&dst[0], &src, sizeof(To)); - return *std::launder(reinterpret_cast<To*>(&dst[0])); + alignas(To) std::array<char,sizeof(To)> dst; + std::memcpy(dst.data(), &src, sizeof(To)); + return *std::launder(reinterpret_cast<To*>(dst.data())); } #ifdef __BYTE_ORDER__ diff --git a/common/alcomplex.cpp b/common/alcomplex.cpp index 82a0c43c..f2de5f51 100644 --- a/common/alcomplex.cpp +++ b/common/alcomplex.cpp @@ -155,6 +155,8 @@ void complex_fft(const al::span<std::complex<double>> buffer, const double sign) } else { + assert(log2_size < 32); + for(size_t idx{1u};idx < fftsize-1;++idx) { size_t revidx{idx}; diff --git a/common/almalloc.cpp b/common/almalloc.cpp deleted file mode 100644 index ad1dc6be..00000000 --- a/common/almalloc.cpp +++ /dev/null @@ -1,61 +0,0 @@ - -#include "config.h" - -#include "almalloc.h" - -#include <cassert> -#include <cstddef> -#include <cstdlib> -#include <cstring> -#include <memory> -#ifdef HAVE_MALLOC_H -#include <malloc.h> -#endif - - -void *al_malloc(size_t alignment, size_t size) -{ - assert((alignment & (alignment-1)) == 0); - alignment = std::max(alignment, alignof(std::max_align_t)); - -#if defined(HAVE_POSIX_MEMALIGN) - void *ret{}; - if(posix_memalign(&ret, alignment, size) == 0) - return ret; - return nullptr; -#elif defined(HAVE__ALIGNED_MALLOC) - return _aligned_malloc(size, alignment); -#else - size_t total_size{size + alignment-1 + sizeof(void*)}; - void *base{std::malloc(total_size)}; - if(base != nullptr) - { - void *aligned_ptr{static_cast<char*>(base) + sizeof(void*)}; - total_size -= sizeof(void*); - - std::align(alignment, size, aligned_ptr, total_size); - *(static_cast<void**>(aligned_ptr)-1) = base; - base = aligned_ptr; - } - return base; -#endif -} - -void *al_calloc(size_t alignment, size_t size) -{ - void *ret{al_malloc(alignment, size)}; - if(ret) std::memset(ret, 0, size); - return ret; -} - -void al_free(void *ptr) noexcept -{ -#if defined(HAVE_POSIX_MEMALIGN) - std::free(ptr); -#elif defined(HAVE__ALIGNED_MALLOC) - _aligned_free(ptr); -#else - if(ptr != nullptr) - std::free(*(static_cast<void**>(ptr) - 1)); -#endif -} diff --git a/common/almalloc.h b/common/almalloc.h index 873473ca..3b9965e6 100644 --- a/common/almalloc.h +++ b/common/almalloc.h @@ -13,39 +13,17 @@ #include "pragmadefs.h" -void al_free(void *ptr) noexcept; -[[gnu::alloc_align(1), gnu::alloc_size(2), gnu::malloc]] -void *al_malloc(size_t alignment, size_t size); -[[gnu::alloc_align(1), gnu::alloc_size(2), gnu::malloc]] -void *al_calloc(size_t alignment, size_t size); +namespace gsl { +template<typename T> using owner = T; +}; -#define DISABLE_ALLOC() \ +#define DISABLE_ALLOC \ void *operator new(size_t) = delete; \ void *operator new[](size_t) = delete; \ void operator delete(void*) noexcept = delete; \ void operator delete[](void*) noexcept = delete; -#define DEF_NEWDEL(T) \ - void *operator new(size_t size) \ - { \ - static_assert(&operator new == &T::operator new, \ - "Incorrect container type specified"); \ - if(void *ret{al_malloc(alignof(T), size)}) \ - return ret; \ - throw std::bad_alloc(); \ - } \ - void *operator new[](size_t size) { return operator new(size); } \ - void operator delete(void *block) noexcept { al_free(block); } \ - void operator delete[](void *block) noexcept { operator delete(block); } - -#define DEF_PLACE_NEWDEL() \ - void *operator new(size_t /*size*/, void *ptr) noexcept { return ptr; } \ - void *operator new[](size_t /*size*/, void *ptr) noexcept { return ptr; } \ - void operator delete(void *block, void*) noexcept { al_free(block); } \ - void operator delete(void *block) noexcept { al_free(block); } \ - void operator delete[](void *block, void*) noexcept { al_free(block); } \ - void operator delete[](void *block) noexcept { al_free(block); } enum FamCount : size_t { }; @@ -58,54 +36,59 @@ enum FamCount : size_t { }; sizeof(T)); \ } \ \ - void *operator new(size_t /*size*/, FamCount count) \ + gsl::owner<void*> operator new(size_t /*size*/, FamCount count) \ { \ - if(void *ret{al_malloc(alignof(T), T::Sizeof(count))}) \ - return ret; \ - throw std::bad_alloc(); \ + const auto alignment = std::align_val_t{alignof(T)}; \ + return ::operator new[](T::Sizeof(count), alignment); \ } \ + void operator delete(gsl::owner<void*> block, FamCount) noexcept \ + { ::operator delete[](block, std::align_val_t{alignof(T)}); } \ + void operator delete(gsl::owner<void*> block) noexcept \ + { ::operator delete[](block, std::align_val_t{alignof(T)}); } \ void *operator new[](size_t /*size*/) = delete; \ - void operator delete(void *block, FamCount) { al_free(block); } \ - void operator delete(void *block) noexcept { al_free(block); } \ void operator delete[](void* /*block*/) = delete; namespace al { -template<typename T, std::size_t Align=alignof(T)> +template<typename T, std::size_t AlignV=alignof(T)> struct allocator { - static constexpr std::size_t alignment{std::max(Align, alignof(T))}; - - using value_type = T; - using reference = T&; - using const_reference = const T&; - using pointer = T*; - using const_pointer = const T*; + static constexpr auto Alignment = std::max(AlignV, alignof(T)); + static constexpr auto AlignVal = std::align_val_t{Alignment}; + + using value_type = std::remove_cv_t<std::remove_reference_t<T>>; + using reference = value_type&; + using const_reference = const value_type&; + using pointer = value_type*; + using const_pointer = const value_type*; using size_type = std::size_t; using difference_type = std::ptrdiff_t; using is_always_equal = std::true_type; - template<typename U> + template<typename U, std::enable_if_t<alignof(U) <= Alignment,bool> = true> struct rebind { - using other = allocator<U, Align>; + using other = allocator<U,Alignment>; }; constexpr explicit allocator() noexcept = default; template<typename U, std::size_t N> - constexpr explicit allocator(const allocator<U,N>&) noexcept { } + constexpr explicit allocator(const allocator<U,N>&) noexcept + { static_assert(Alignment == allocator<U,N>::Alignment); } - T *allocate(std::size_t n) + gsl::owner<T*> allocate(std::size_t n) { if(n > std::numeric_limits<std::size_t>::max()/sizeof(T)) throw std::bad_alloc(); - if(auto p = al_malloc(alignment, n*sizeof(T))) return static_cast<T*>(p); - throw std::bad_alloc(); + return static_cast<gsl::owner<T*>>(::operator new[](n*sizeof(T), AlignVal)); } - void deallocate(T *p, std::size_t) noexcept { al_free(p); } + void deallocate(gsl::owner<T*> p, std::size_t) noexcept + { ::operator delete[](gsl::owner<void*>{p}, AlignVal); } }; template<typename T, std::size_t N, typename U, std::size_t M> -constexpr bool operator==(const allocator<T,N>&, const allocator<U,M>&) noexcept { return true; } +constexpr bool operator==(const allocator<T,N>&, const allocator<U,M>&) noexcept +{ return allocator<T,N>::Alignment == allocator<U,M>::Alignment; } template<typename T, std::size_t N, typename U, std::size_t M> -constexpr bool operator!=(const allocator<T,N>&, const allocator<U,M>&) noexcept { return false; } +constexpr bool operator!=(const allocator<T,N>&, const allocator<U,M>&) noexcept +{ return allocator<T,N>::Alignment != allocator<U,M>::Alignment; } template<typename T> @@ -124,124 +107,15 @@ constexpr auto to_address(const T &p) noexcept template<typename T, typename ...Args> constexpr T* construct_at(T *ptr, Args&& ...args) - noexcept(std::is_nothrow_constructible<T, Args...>::value) -{ return ::new(static_cast<void*>(ptr)) T{std::forward<Args>(args)...}; } - - -/* Storage for flexible array data. This is trivially destructible if type T is - * trivially destructible. - */ -template<typename T, size_t alignment, bool = std::is_trivially_destructible<T>::value> -struct FlexArrayStorage { - const size_t mSize; - union { - char mDummy; - alignas(alignment) T mArray[1]; - }; - - static constexpr size_t Sizeof(size_t count, size_t base=0u) noexcept - { - const size_t len{sizeof(T)*count}; - return std::max(offsetof(FlexArrayStorage,mArray)+len, sizeof(FlexArrayStorage)) + base; - } - - FlexArrayStorage(size_t size) : mSize{size} - { std::uninitialized_default_construct_n(mArray, mSize); } - ~FlexArrayStorage() = default; - - FlexArrayStorage(const FlexArrayStorage&) = delete; - FlexArrayStorage& operator=(const FlexArrayStorage&) = delete; -}; - -template<typename T, size_t alignment> -struct FlexArrayStorage<T,alignment,false> { - const size_t mSize; - union { - char mDummy; - alignas(alignment) T mArray[1]; - }; - - static constexpr size_t Sizeof(size_t count, size_t base) noexcept - { - const size_t len{sizeof(T)*count}; - return std::max(offsetof(FlexArrayStorage,mArray)+len, sizeof(FlexArrayStorage)) + base; - } - - FlexArrayStorage(size_t size) : mSize{size} - { std::uninitialized_default_construct_n(mArray, mSize); } - ~FlexArrayStorage() { std::destroy_n(mArray, mSize); } - - FlexArrayStorage(const FlexArrayStorage&) = delete; - FlexArrayStorage& operator=(const FlexArrayStorage&) = delete; -}; - -/* A flexible array type. Used either standalone or at the end of a parent - * struct, with placement new, to have a run-time-sized array that's embedded - * with its size. - */ -template<typename T, size_t alignment=alignof(T)> -struct FlexArray { - using element_type = T; - using value_type = std::remove_cv_t<T>; - using index_type = size_t; - using difference_type = ptrdiff_t; - - using pointer = T*; - using const_pointer = const T*; - using reference = T&; - using const_reference = const T&; - - using iterator = pointer; - using const_iterator = const_pointer; - using reverse_iterator = std::reverse_iterator<iterator>; - using const_reverse_iterator = std::reverse_iterator<const_iterator>; - - using Storage_t_ = FlexArrayStorage<element_type,alignment>; - - Storage_t_ mStore; - - static constexpr index_type Sizeof(index_type count, index_type base=0u) noexcept - { return Storage_t_::Sizeof(count, base); } - static std::unique_ptr<FlexArray> Create(index_type count) - { - void *ptr{al_calloc(alignof(FlexArray), Sizeof(count))}; - return std::unique_ptr<FlexArray>{al::construct_at(static_cast<FlexArray*>(ptr), count)}; - } - - FlexArray(index_type size) : mStore{size} { } - ~FlexArray() = default; - - index_type size() const noexcept { return mStore.mSize; } - bool empty() const noexcept { return mStore.mSize == 0; } - - pointer data() noexcept { return mStore.mArray; } - const_pointer data() const noexcept { return mStore.mArray; } - - reference operator[](index_type i) noexcept { return mStore.mArray[i]; } - const_reference operator[](index_type i) const noexcept { return mStore.mArray[i]; } - - reference front() noexcept { return mStore.mArray[0]; } - const_reference front() const noexcept { return mStore.mArray[0]; } - - reference back() noexcept { return mStore.mArray[mStore.mSize-1]; } - const_reference back() const noexcept { return mStore.mArray[mStore.mSize-1]; } - - iterator begin() noexcept { return mStore.mArray; } - const_iterator begin() const noexcept { return mStore.mArray; } - const_iterator cbegin() const noexcept { return mStore.mArray; } - iterator end() noexcept { return mStore.mArray + mStore.mSize; } - const_iterator end() const noexcept { return mStore.mArray + mStore.mSize; } - const_iterator cend() const noexcept { return mStore.mArray + mStore.mSize; } - - reverse_iterator rbegin() noexcept { return end(); } - const_reverse_iterator rbegin() const noexcept { return end(); } - const_reverse_iterator crbegin() const noexcept { return cend(); } - reverse_iterator rend() noexcept { return begin(); } - const_reverse_iterator rend() const noexcept { return begin(); } - const_reverse_iterator crend() const noexcept { return cbegin(); } - - DEF_PLACE_NEWDEL() -}; + noexcept(std::is_nothrow_constructible_v<T, Args...>) +{ + /* NOLINTBEGIN(cppcoreguidelines-owning-memory) construct_at doesn't + * necessarily handle the address from an owner, while placement new + * expects to. + */ + return ::new(static_cast<void*>(ptr)) T{std::forward<Args>(args)...}; + /* NOLINTEND(cppcoreguidelines-owning-memory) */ +} } // namespace al diff --git a/common/alnumbers.h b/common/alnumbers.h index e92d7b87..7abe6b32 100644 --- a/common/alnumbers.h +++ b/common/alnumbers.h @@ -3,9 +3,7 @@ #include <utility> -namespace al { - -namespace numbers { +namespace al::numbers { namespace detail_ { template<typename T> @@ -29,8 +27,6 @@ inline constexpr auto inv_pi = inv_pi_v<double>; inline constexpr auto sqrt2 = sqrt2_v<double>; inline constexpr auto sqrt3 = sqrt3_v<double>; -} // namespace numbers - -} // namespace al +} // namespace al::numbers #endif /* COMMON_ALNUMBERS_H */ diff --git a/common/alnumeric.h b/common/alnumeric.h index 6281b012..cb8704b2 100644 --- a/common/alnumeric.h +++ b/common/alnumeric.h @@ -245,7 +245,7 @@ inline float fast_roundf(float f) noexcept /* Integral limit, where sub-integral precision is not available for * floats. */ - static constexpr float ilim[2]{ + static constexpr std::array ilim{ 8388608.0f /* 0x1.0p+23 */, -8388608.0f /* -0x1.0p+23 */ }; diff --git a/common/alsem.h b/common/alsem.h index 9f72d1c6..90b39319 100644 --- a/common/alsem.h +++ b/common/alsem.h @@ -24,7 +24,7 @@ class semaphore { #else using native_type = sem_t; #endif - native_type mSem; + native_type mSem{}; public: semaphore(unsigned int initial=0); diff --git a/common/alspan.h b/common/alspan.h index 341ce7c8..822915da 100644 --- a/common/alspan.h +++ b/common/alspan.h @@ -5,6 +5,7 @@ #include <cstddef> #include <initializer_list> #include <iterator> +#include <stdexcept> #include <type_traits> #include "almalloc.h" @@ -12,7 +13,7 @@ namespace al { -constexpr size_t dynamic_extent{static_cast<size_t>(-1)}; +inline constexpr size_t dynamic_extent{static_cast<size_t>(-1)}; template<typename T, size_t E=dynamic_extent> class span; @@ -23,31 +24,31 @@ namespace detail_ { template<typename T, size_t E> struct is_span_<span<T,E>> : std::true_type { }; template<typename T> - constexpr bool is_span_v = is_span_<std::remove_cv_t<T>>::value; + inline constexpr bool is_span_v = is_span_<std::remove_cv_t<T>>::value; template<typename T> struct is_std_array_ : std::false_type { }; template<typename T, size_t N> struct is_std_array_<std::array<T,N>> : std::true_type { }; template<typename T> - constexpr bool is_std_array_v = is_std_array_<std::remove_cv_t<T>>::value; + inline constexpr bool is_std_array_v = is_std_array_<std::remove_cv_t<T>>::value; template<typename T, typename = void> - constexpr bool has_size_and_data = false; + inline constexpr bool has_size_and_data = false; template<typename T> - constexpr bool has_size_and_data<T, + inline constexpr bool has_size_and_data<T, std::void_t<decltype(std::size(std::declval<T>())),decltype(std::data(std::declval<T>()))>> = true; template<typename C> - constexpr bool is_valid_container_type = !is_span_v<C> && !is_std_array_v<C> + inline constexpr bool is_valid_container_type = !is_span_v<C> && !is_std_array_v<C> && !std::is_array<C>::value && has_size_and_data<C>; template<typename T, typename U> - constexpr bool is_array_compatible = std::is_convertible<T(*)[],U(*)[]>::value; + inline constexpr bool is_array_compatible = std::is_convertible<T(*)[],U(*)[]>::value; /* NOLINT(*-avoid-c-arrays) */ template<typename C, typename T> - constexpr bool is_valid_container = is_valid_container_type<C> + inline constexpr bool is_valid_container = is_valid_container_type<C> && is_array_compatible<std::remove_pointer_t<decltype(std::data(std::declval<C&>()))>,T>; } // namespace detail_ @@ -79,9 +80,9 @@ public: constexpr explicit span(U iter, index_type) : mData{::al::to_address(iter)} { } template<typename U, typename V, REQUIRES(!std::is_convertible<V,size_t>::value)> constexpr explicit span(U first, V) : mData{::al::to_address(first)} - {} + { } - constexpr span(type_identity_t<element_type> (&arr)[E]) noexcept + constexpr span(type_identity_t<element_type> (&arr)[E]) noexcept /* NOLINT(*-avoid-c-arrays) */ : span{std::data(arr), std::size(arr)} { } constexpr span(std::array<value_type,E> &arr) noexcept @@ -107,43 +108,43 @@ public: constexpr span& operator=(const span &rhs) noexcept = default; - constexpr reference front() const { return *mData; } - constexpr reference back() const { return *(mData+E-1); } - constexpr reference operator[](index_type idx) const { return mData[idx]; } - constexpr pointer data() const noexcept { return mData; } + [[nodiscard]] constexpr auto front() const -> reference { return mData[0]; } + [[nodiscard]] constexpr auto back() const -> reference { return mData[E-1]; } + [[nodiscard]] constexpr auto operator[](index_type idx) const -> reference { return mData[idx]; } + [[nodiscard]] constexpr auto data() const noexcept -> pointer { return mData; } - constexpr index_type size() const noexcept { return E; } - constexpr index_type size_bytes() const noexcept { return E * sizeof(value_type); } - constexpr bool empty() const noexcept { return E == 0; } + [[nodiscard]] constexpr auto size() const noexcept -> index_type { return E; } + [[nodiscard]] constexpr auto size_bytes() const noexcept -> index_type { return E * sizeof(value_type); } + [[nodiscard]] constexpr auto empty() const noexcept -> bool { return E == 0; } - constexpr iterator begin() const noexcept { return mData; } - constexpr iterator end() const noexcept { return mData+E; } - constexpr const_iterator cbegin() const noexcept { return mData; } - constexpr const_iterator cend() const noexcept { return mData+E; } + [[nodiscard]] constexpr auto begin() const noexcept -> iterator { return mData; } + [[nodiscard]] constexpr auto end() const noexcept -> iterator { return mData+E; } + [[nodiscard]] constexpr auto cbegin() const noexcept -> const_iterator { return mData; } + [[nodiscard]] constexpr auto cend() const noexcept -> const_iterator { return mData+E; } - constexpr reverse_iterator rbegin() const noexcept { return reverse_iterator{end()}; } - constexpr reverse_iterator rend() const noexcept { return reverse_iterator{begin()}; } - constexpr const_reverse_iterator crbegin() const noexcept + [[nodiscard]] constexpr auto rbegin() const noexcept -> reverse_iterator { return reverse_iterator{end()}; } + [[nodiscard]] constexpr auto rend() const noexcept -> reverse_iterator { return reverse_iterator{begin()}; } + [[nodiscard]] constexpr auto crbegin() const noexcept -> const_reverse_iterator { return const_reverse_iterator{cend()}; } - constexpr const_reverse_iterator crend() const noexcept + [[nodiscard]] constexpr auto crend() const noexcept -> const_reverse_iterator { return const_reverse_iterator{cbegin()}; } template<size_t C> - constexpr span<element_type,C> first() const + [[nodiscard]] constexpr auto first() const -> span<element_type,C> { static_assert(E >= C, "New size exceeds original capacity"); return span<element_type,C>{mData, C}; } template<size_t C> - constexpr span<element_type,C> last() const + [[nodiscard]] constexpr auto last() const -> span<element_type,C> { static_assert(E >= C, "New size exceeds original capacity"); return span<element_type,C>{mData+(E-C), C}; } template<size_t O, size_t C> - constexpr auto subspan() const -> std::enable_if_t<C!=dynamic_extent,span<element_type,C>> + [[nodiscard]] constexpr auto subspan() const -> std::enable_if_t<C!=dynamic_extent,span<element_type,C>> { static_assert(E >= O, "Offset exceeds extent"); static_assert(E-O >= C, "New size exceeds original capacity"); @@ -151,7 +152,7 @@ public: } template<size_t O, size_t C=dynamic_extent> - constexpr auto subspan() const -> std::enable_if_t<C==dynamic_extent,span<element_type,E-O>> + [[nodiscard]] constexpr auto subspan() const -> std::enable_if_t<C==dynamic_extent,span<element_type,E-O>> { static_assert(E >= O, "Offset exceeds extent"); return span<element_type,E-O>{mData+O, E-O}; @@ -161,10 +162,10 @@ public: * defining the specialization. As a result, these methods need to be * defined later. */ - constexpr span<element_type,dynamic_extent> first(size_t count) const; - constexpr span<element_type,dynamic_extent> last(size_t count) const; - constexpr span<element_type,dynamic_extent> subspan(size_t offset, - size_t count=dynamic_extent) const; + [[nodiscard]] constexpr auto first(size_t count) const -> span<element_type,dynamic_extent>; + [[nodiscard]] constexpr auto last(size_t count) const -> span<element_type,dynamic_extent>; + [[nodiscard]] constexpr auto subspan(size_t offset, + size_t count=dynamic_extent) const -> span<element_type,dynamic_extent>; private: pointer mData{nullptr}; @@ -192,14 +193,14 @@ public: constexpr span() noexcept = default; template<typename U> - constexpr span(U iter, index_type count) : mData{::al::to_address(iter)}, mDataEnd{::al::to_address(iter) + count} + constexpr span(U iter, index_type count) : mData{::al::to_address(iter)}, mDataLength{count} { } template<typename U, typename V, REQUIRES(!std::is_convertible<V,size_t>::value)> constexpr span(U first, V last) : span{::al::to_address(first), static_cast<size_t>(last - first)} { } template<size_t N> - constexpr span(type_identity_t<element_type> (&arr)[N]) noexcept + constexpr span(type_identity_t<element_type> (&arr)[N]) noexcept /* NOLINT(*-avoid-c-arrays) */ : span{std::data(arr), std::size(arr)} { } template<size_t N> @@ -221,83 +222,122 @@ public: constexpr span& operator=(const span &rhs) noexcept = default; - constexpr reference front() const { return *mData; } - constexpr reference back() const { return *(mDataEnd-1); } - constexpr reference operator[](index_type idx) const { return mData[idx]; } - constexpr pointer data() const noexcept { return mData; } + [[nodiscard]] constexpr auto front() const -> reference { return mData[0]; } + [[nodiscard]] constexpr auto back() const -> reference { return mData[mDataLength-1]; } + [[nodiscard]] constexpr auto operator[](index_type idx) const -> reference { return mData[idx]; } + [[nodiscard]] constexpr auto data() const noexcept -> pointer { return mData; } - constexpr index_type size() const noexcept { return static_cast<index_type>(mDataEnd-mData); } - constexpr index_type size_bytes() const noexcept - { return static_cast<index_type>(mDataEnd-mData) * sizeof(value_type); } - constexpr bool empty() const noexcept { return mData == mDataEnd; } + [[nodiscard]] constexpr auto size() const noexcept -> index_type { return mDataLength; } + [[nodiscard]] constexpr auto size_bytes() const noexcept -> index_type { return mDataLength * sizeof(value_type); } + [[nodiscard]] constexpr auto empty() const noexcept -> bool { return mDataLength == 0; } - constexpr iterator begin() const noexcept { return mData; } - constexpr iterator end() const noexcept { return mDataEnd; } - constexpr const_iterator cbegin() const noexcept { return mData; } - constexpr const_iterator cend() const noexcept { return mDataEnd; } + [[nodiscard]] constexpr auto begin() const noexcept -> iterator { return mData; } + [[nodiscard]] constexpr auto end() const noexcept -> iterator { return mData+mDataLength; } + [[nodiscard]] constexpr auto cbegin() const noexcept -> const_iterator { return mData; } + [[nodiscard]] constexpr auto cend() const noexcept -> const_iterator { return mData+mDataLength; } - constexpr reverse_iterator rbegin() const noexcept { return reverse_iterator{end()}; } - constexpr reverse_iterator rend() const noexcept { return reverse_iterator{begin()}; } - constexpr const_reverse_iterator crbegin() const noexcept + [[nodiscard]] constexpr auto rbegin() const noexcept -> reverse_iterator { return reverse_iterator{end()}; } + [[nodiscard]] constexpr auto rend() const noexcept -> reverse_iterator { return reverse_iterator{begin()}; } + [[nodiscard]] constexpr auto crbegin() const noexcept -> const_reverse_iterator { return const_reverse_iterator{cend()}; } - constexpr const_reverse_iterator crend() const noexcept + [[nodiscard]] constexpr auto crend() const noexcept -> const_reverse_iterator { return const_reverse_iterator{cbegin()}; } template<size_t C> - constexpr span<element_type,C> first() const - { return span<element_type,C>{mData, C}; } + [[nodiscard]] constexpr auto first() const -> span<element_type,C> + { + if(C > mDataLength) + throw std::out_of_range{"Subspan count out of range"}; + return span<element_type,C>{mData, C}; + } - constexpr span first(size_t count) const - { return (count >= size()) ? *this : span{mData, mData+count}; } + [[nodiscard]] constexpr auto first(size_t count) const -> span + { + if(count > mDataLength) + throw std::out_of_range{"Subspan count out of range"}; + return span{mData, count}; + } template<size_t C> - constexpr span<element_type,C> last() const - { return span<element_type,C>{mDataEnd-C, C}; } + [[nodiscard]] constexpr auto last() const -> span<element_type,C> + { + if(C > mDataLength) + throw std::out_of_range{"Subspan count out of range"}; + return span<element_type,C>{mData+mDataLength-C, C}; + } - constexpr span last(size_t count) const - { return (count >= size()) ? *this : span{mDataEnd-count, mDataEnd}; } + [[nodiscard]] constexpr auto last(size_t count) const -> span + { + if(count > mDataLength) + throw std::out_of_range{"Subspan count out of range"}; + return span{mData+mDataLength-count, count}; + } template<size_t O, size_t C> - constexpr auto subspan() const -> std::enable_if_t<C!=dynamic_extent,span<element_type,C>> - { return span<element_type,C>{mData+O, C}; } + [[nodiscard]] constexpr auto subspan() const -> std::enable_if_t<C!=dynamic_extent,span<element_type,C>> + { + if(O > mDataLength) + throw std::out_of_range{"Subspan offset out of range"}; + if(C > mDataLength-O) + throw std::out_of_range{"Subspan length out of range"}; + return span<element_type,C>{mData+O, C}; + } template<size_t O, size_t C=dynamic_extent> - constexpr auto subspan() const -> std::enable_if_t<C==dynamic_extent,span<element_type,C>> - { return span<element_type,C>{mData+O, mDataEnd}; } + [[nodiscard]] constexpr auto subspan() const -> std::enable_if_t<C==dynamic_extent,span<element_type,C>> + { + if(O > mDataLength) + throw std::out_of_range{"Subspan offset out of range"}; + return span<element_type,C>{mData+O, mDataLength-O}; + } - constexpr span subspan(size_t offset, size_t count=dynamic_extent) const + [[nodiscard]] constexpr auto subspan(size_t offset, size_t count=dynamic_extent) const -> span { - return (offset > size()) ? span{} : - (count >= size()-offset) ? span{mData+offset, mDataEnd} : - span{mData+offset, mData+offset+count}; + if(offset > mDataLength) + throw std::out_of_range{"Subspan offset out of range"}; + if(count != dynamic_extent) + { + if(count > mDataLength-offset) + throw std::out_of_range{"Subspan length out of range"}; + return span{mData+offset, count}; + } + return span{mData+offset, mDataLength-offset}; } private: pointer mData{nullptr}; - pointer mDataEnd{nullptr}; + index_type mDataLength{0}; }; template<typename T, size_t E> -constexpr inline auto span<T,E>::first(size_t count) const -> span<element_type,dynamic_extent> +[[nodiscard]] constexpr inline auto span<T,E>::first(size_t count) const -> span<element_type,dynamic_extent> { - return (count >= size()) ? span<element_type>{mData, extent} : - span<element_type>{mData, count}; + if(count > size()) + throw std::out_of_range{"Subspan count out of range"}; + return span<element_type>{mData, count}; } template<typename T, size_t E> -constexpr inline auto span<T,E>::last(size_t count) const -> span<element_type,dynamic_extent> +[[nodiscard]] constexpr inline auto span<T,E>::last(size_t count) const -> span<element_type,dynamic_extent> { - return (count >= size()) ? span<element_type>{mData, extent} : - span<element_type>{mData+extent-count, count}; + if(count > size()) + throw std::out_of_range{"Subspan count out of range"}; + return span<element_type>{mData+size()-count, count}; } template<typename T, size_t E> -constexpr inline auto span<T,E>::subspan(size_t offset, size_t count) const +[[nodiscard]] constexpr inline auto span<T,E>::subspan(size_t offset, size_t count) const -> span<element_type,dynamic_extent> { - return (offset > size()) ? span<element_type>{} : - (count >= size()-offset) ? span<element_type>{mData+offset, mData+extent} : - span<element_type>{mData+offset, mData+offset+count}; + if(offset > size()) + throw std::out_of_range{"Subspan offset out of range"}; + if(count != dynamic_extent) + { + if(count > size()-offset) + throw std::out_of_range{"Subspan length out of range"}; + return span{mData+offset, count}; + } + return span{mData+offset, size()-offset}; } @@ -305,7 +345,7 @@ template<typename T, typename EndOrSize> span(T, EndOrSize) -> span<std::remove_reference_t<decltype(*std::declval<T&>())>>; template<typename T, std::size_t N> -span(T (&)[N]) -> span<T, N>; +span(T (&)[N]) -> span<T, N>; /* NOLINT(*-avoid-c-arrays) */ template<typename T, std::size_t N> span(std::array<T, N>&) -> span<T, N>; diff --git a/common/althrd_setname.cpp b/common/althrd_setname.cpp index 22d33092..21197ba0 100644 --- a/common/althrd_setname.cpp +++ b/common/althrd_setname.cpp @@ -60,7 +60,7 @@ using setname_t4 = int(*)(pthread_t, const char*, void*); { func(pthread_self(), name); } [[maybe_unused]] void setname_caller(setname_t4 func, const char *name) -{ func(pthread_self(), "%s", static_cast<void*>(const_cast<char*>(name))); } +{ func(pthread_self(), "%s", const_cast<char*>(name)); /* NOLINT(*-const-cast) */ } } // namespace diff --git a/common/atomic.h b/common/atomic.h index 5e9b04c6..e85c4f76 100644 --- a/common/atomic.h +++ b/common/atomic.h @@ -2,17 +2,16 @@ #define AL_ATOMIC_H #include <atomic> +#include <memory> +#include "almalloc.h" -using RefCount = std::atomic<unsigned int>; - -inline void InitRef(RefCount &ref, unsigned int value) -{ ref.store(value, std::memory_order_relaxed); } -inline unsigned int ReadRef(RefCount &ref) -{ return ref.load(std::memory_order_acquire); } -inline unsigned int IncrementRef(RefCount &ref) +template<typename T> +auto IncrementRef(std::atomic<T> &ref) noexcept { return ref.fetch_add(1u, std::memory_order_acq_rel)+1u; } -inline unsigned int DecrementRef(RefCount &ref) + +template<typename T> +auto DecrementRef(std::atomic<T> &ref) noexcept { return ref.fetch_sub(1u, std::memory_order_acq_rel)-1u; } @@ -30,4 +29,75 @@ inline void AtomicReplaceHead(std::atomic<T> &head, T newhead) std::memory_order_acq_rel, std::memory_order_acquire)); } +namespace al { + +template<typename T, typename D=std::default_delete<T>> +class atomic_unique_ptr { + std::atomic<gsl::owner<T*>> mPointer{}; + + using unique_ptr_t = std::unique_ptr<T,D>; + +public: + atomic_unique_ptr() = default; + atomic_unique_ptr(const atomic_unique_ptr&) = delete; + explicit atomic_unique_ptr(std::nullptr_t) noexcept { } + explicit atomic_unique_ptr(gsl::owner<T*> ptr) noexcept : mPointer{ptr} { } + explicit atomic_unique_ptr(unique_ptr_t&& rhs) noexcept : mPointer{rhs.release()} { } + ~atomic_unique_ptr() + { + if(auto ptr = mPointer.exchange(nullptr, std::memory_order_relaxed)) + D{}(ptr); + } + + auto operator=(const atomic_unique_ptr&) -> atomic_unique_ptr& = delete; + auto operator=(std::nullptr_t) noexcept -> atomic_unique_ptr& + { + if(auto ptr = mPointer.exchange(nullptr)) + D{}(ptr); + return *this; + } + auto operator=(unique_ptr_t&& rhs) noexcept -> atomic_unique_ptr& + { + if(auto ptr = mPointer.exchange(rhs.release())) + D{}(ptr); + return *this; + } + + [[nodiscard]] + auto load(std::memory_order m=std::memory_order_seq_cst) const noexcept -> T* + { return mPointer.load(m); } + void store(std::nullptr_t, std::memory_order m=std::memory_order_seq_cst) noexcept + { + if(auto oldptr = mPointer.exchange(nullptr, m)) + D{}(oldptr); + } + void store(gsl::owner<T*> ptr, std::memory_order m=std::memory_order_seq_cst) noexcept + { + if(auto oldptr = mPointer.exchange(ptr, m)) + D{}(oldptr); + } + void store(unique_ptr_t&& ptr, std::memory_order m=std::memory_order_seq_cst) noexcept + { + if(auto oldptr = mPointer.exchange(ptr.release(), m)) + D{}(oldptr); + } + + [[nodiscard]] + auto exchange(std::nullptr_t, std::memory_order m=std::memory_order_seq_cst) noexcept -> unique_ptr_t + { return unique_ptr_t{mPointer.exchange(nullptr, m)}; } + [[nodiscard]] + auto exchange(gsl::owner<T*> ptr, std::memory_order m=std::memory_order_seq_cst) noexcept -> unique_ptr_t + { return unique_ptr_t{mPointer.exchange(ptr, m)}; } + [[nodiscard]] + auto exchange(std::unique_ptr<T>&& ptr, std::memory_order m=std::memory_order_seq_cst) noexcept -> unique_ptr_t + { return unique_ptr_t{mPointer.exchange(ptr.release(), m)}; } + + [[nodiscard]] + auto is_lock_free() const noexcept -> bool { return mPointer.is_lock_free(); } + + static constexpr auto is_always_lock_free = std::atomic<gsl::owner<T*>>::is_always_lock_free; +}; + +} // namespace al + #endif /* AL_ATOMIC_H */ diff --git a/common/dynload.cpp b/common/dynload.cpp index 86c36e00..333a9435 100644 --- a/common/dynload.cpp +++ b/common/dynload.cpp @@ -3,13 +3,12 @@ #include "dynload.h" -#include "albit.h" -#include "strutils.h" - #ifdef _WIN32 #define WIN32_LEAN_AND_MEAN #include <windows.h> +#include "strutils.h" + void *LoadLib(const char *name) { std::wstring wname{utf8_to_wstr(name)}; @@ -18,7 +17,7 @@ void *LoadLib(const char *name) void CloseLib(void *handle) { FreeLibrary(static_cast<HMODULE>(handle)); } void *GetSymbol(void *handle, const char *name) -{ return al::bit_cast<void*>(GetProcAddress(static_cast<HMODULE>(handle), name)); } +{ return reinterpret_cast<void*>(GetProcAddress(static_cast<HMODULE>(handle), name)); } #elif defined(HAVE_DLFCN_H) diff --git a/common/flexarray.h b/common/flexarray.h new file mode 100644 index 00000000..b8077988 --- /dev/null +++ b/common/flexarray.h @@ -0,0 +1,125 @@ +#ifndef AL_FLEXARRAY_H +#define AL_FLEXARRAY_H + +#include <algorithm> +#include <cstddef> +#include <stdexcept> +#include <type_traits> + +#include "almalloc.h" +#include "alspan.h" + +namespace al { + +/* Storage for flexible array data. This is trivially destructible if type T is + * trivially destructible. + */ +template<typename T, size_t alignment, bool = std::is_trivially_destructible<T>::value> +struct alignas(std::max(alignment, alignof(al::span<T>))) FlexArrayStorage : al::span<T> { + static constexpr size_t Sizeof(size_t count, size_t base=0u) noexcept + { return sizeof(FlexArrayStorage) + sizeof(T)*count + base; } + + FlexArrayStorage(size_t size) noexcept(std::is_nothrow_constructible_v<T>) + : al::span<T>{::new(static_cast<void*>(this+1)) T[size], size} + { } + ~FlexArrayStorage() = default; + + FlexArrayStorage(const FlexArrayStorage&) = delete; + FlexArrayStorage& operator=(const FlexArrayStorage&) = delete; +}; + +template<typename T, size_t alignment> +struct alignas(std::max(alignment, alignof(al::span<T>))) FlexArrayStorage<T,alignment,false> : al::span<T> { + static constexpr size_t Sizeof(size_t count, size_t base=0u) noexcept + { return sizeof(FlexArrayStorage) + sizeof(T)*count + base; } + + FlexArrayStorage(size_t size) noexcept(std::is_nothrow_constructible_v<T>) + : al::span<T>{::new(static_cast<void*>(this+1)) T[size], size} + { } + ~FlexArrayStorage() { std::destroy(this->begin(), this->end()); } + + FlexArrayStorage(const FlexArrayStorage&) = delete; + FlexArrayStorage& operator=(const FlexArrayStorage&) = delete; +}; + +/* A flexible array type. Used either standalone or at the end of a parent + * struct, to have a run-time-sized array that's embedded with its size. Should + * be used delicately, ensuring there's no additional data after the FlexArray + * member. + */ +template<typename T, size_t Align=alignof(T)> +struct FlexArray { + using element_type = T; + using value_type = std::remove_cv_t<T>; + using index_type = size_t; + using difference_type = ptrdiff_t; + + using pointer = T*; + using const_pointer = const T*; + using reference = T&; + using const_reference = const T&; + + using iterator = pointer; + using const_iterator = const_pointer; + using reverse_iterator = std::reverse_iterator<iterator>; + using const_reverse_iterator = std::reverse_iterator<const_iterator>; + + static constexpr size_t alignment{std::max(alignof(T), Align)}; + using Storage_t_ = FlexArrayStorage<element_type,alignment>; + + const Storage_t_ mStore; + + static constexpr index_type Sizeof(index_type count, index_type base=0u) noexcept + { return Storage_t_::Sizeof(count, base); } + static std::unique_ptr<FlexArray> Create(index_type count) + { return std::unique_ptr<FlexArray>{new(FamCount{count}) FlexArray{count}}; } + + FlexArray(index_type size) noexcept(std::is_nothrow_constructible_v<Storage_t_,index_type>) + : mStore{size} + { } + ~FlexArray() = default; + + [[nodiscard]] auto size() const noexcept -> index_type { return mStore.size(); } + [[nodiscard]] auto empty() const noexcept -> bool { return mStore.empty(); } + + [[nodiscard]] auto data() noexcept -> pointer { return mStore.data(); } + [[nodiscard]] auto data() const noexcept -> const_pointer { return mStore.data(); } + + [[nodiscard]] auto operator[](index_type i) noexcept -> reference { return mStore[i]; } + [[nodiscard]] auto operator[](index_type i) const noexcept -> const_reference { return mStore[i]; } + + [[nodiscard]] auto front() noexcept -> reference { return mStore.front(); } + [[nodiscard]] auto front() const noexcept -> const_reference { return mStore.front(); } + + [[nodiscard]] auto back() noexcept -> reference { return mStore.back(); } + [[nodiscard]] auto back() const noexcept -> const_reference { return mStore.back(); } + + [[nodiscard]] auto begin() noexcept -> iterator { return mStore.begin(); } + [[nodiscard]] auto begin() const noexcept -> const_iterator { return mStore.cbegin(); } + [[nodiscard]] auto cbegin() const noexcept -> const_iterator { return mStore.cbegin(); } + [[nodiscard]] auto end() noexcept -> iterator { return mStore.end(); } + [[nodiscard]] auto end() const noexcept -> const_iterator { return mStore.cend(); } + [[nodiscard]] auto cend() const noexcept -> const_iterator { return mStore.cend(); } + + [[nodiscard]] auto rbegin() noexcept -> reverse_iterator { return end(); } + [[nodiscard]] auto rbegin() const noexcept -> const_reverse_iterator { return cend(); } + [[nodiscard]] auto crbegin() const noexcept -> const_reverse_iterator { return cend(); } + [[nodiscard]] auto rend() noexcept -> reverse_iterator { return begin(); } + [[nodiscard]] auto rend() const noexcept -> const_reverse_iterator { return cbegin(); } + [[nodiscard]] auto crend() const noexcept -> const_reverse_iterator { return cbegin(); } + + gsl::owner<void*> operator new(size_t, FamCount count) + { return ::operator new[](Sizeof(count), std::align_val_t{alignof(FlexArray)}); } + void operator delete(gsl::owner<void*> block, FamCount) noexcept + { ::operator delete[](block, std::align_val_t{alignof(FlexArray)}); } + void operator delete(gsl::owner<void*> block) noexcept + { ::operator delete[](block, std::align_val_t{alignof(FlexArray)}); } + + void *operator new(size_t size) = delete; + void *operator new[](size_t size) = delete; + void operator delete[](void *block) = delete; +}; + +} // namespace al + +#endif /* AL_FLEXARRAY_H */ diff --git a/common/intrusive_ptr.h b/common/intrusive_ptr.h index 27075347..0152b92a 100644 --- a/common/intrusive_ptr.h +++ b/common/intrusive_ptr.h @@ -11,7 +11,7 @@ namespace al { template<typename T> class intrusive_ref { - RefCount mRef{1u}; + std::atomic<unsigned int> mRef{1u}; public: unsigned int add_ref() noexcept { return IncrementRef(mRef); } @@ -81,9 +81,9 @@ public: explicit operator bool() const noexcept { return mPtr != nullptr; } - T& operator*() const noexcept { return *mPtr; } - T* operator->() const noexcept { return mPtr; } - T* get() const noexcept { return mPtr; } + [[nodiscard]] auto operator*() const noexcept -> T& { return *mPtr; } + [[nodiscard]] auto operator->() const noexcept -> T* { return mPtr; } + [[nodiscard]] auto get() const noexcept -> T* { return mPtr; } void reset(T *ptr=nullptr) noexcept { diff --git a/common/opthelpers.h b/common/opthelpers.h index dc43ccdb..ae2611da 100644 --- a/common/opthelpers.h +++ b/common/opthelpers.h @@ -42,7 +42,7 @@ #elif HAS_BUILTIN(__builtin_unreachable) #define ASSUME(x) do { if(x) break; __builtin_unreachable(); } while(0) #else -#define ASSUME(x) ((void)0) +#define ASSUME(x) (static_cast<void>(0)) #endif /* This shouldn't be needed since unknown attributes are ignored, but older diff --git a/common/pffft.cpp b/common/pffft.cpp index 71f71fa6..46d97918 100644 --- a/common/pffft.cpp +++ b/common/pffft.cpp @@ -58,16 +58,17 @@ #include "pffft.h" #include <array> -#include <assert.h> +#include <cassert> #include <cmath> +#include <cstdio> +#include <cstdlib> #include <cstring> -#include <stdio.h> -#include <stdlib.h> #include <vector> #include "albit.h" #include "almalloc.h" #include "alnumbers.h" +#include "alnumeric.h" #include "alspan.h" #include "opthelpers.h" @@ -90,8 +91,8 @@ using uint = unsigned int; * Altivec support macros */ #if defined(__ppc__) || defined(__ppc64__) || defined(__powerpc__) || defined(__powerpc64__) -typedef vector float v4sf; -#define SIMD_SZ 4 +using v4sf = vector float; +constexpr uint SimdSize{4}; #define VZERO() ((vector float) vec_splat_u8(0)) #define VMUL(a,b) vec_madd(a,b, VZERO()) #define VADD vec_add @@ -142,19 +143,27 @@ force_inline void vtranspose4(v4sf &x0, v4sf &x1, v4sf &x2, v4sf &x3) noexcept (defined(_M_IX86_FP) && _M_IX86_FP >= 1) #include <xmmintrin.h> -typedef __m128 v4sf; -#define SIMD_SZ 4 // 4 floats by simd vector -- this is pretty much hardcoded in the preprocess/finalize functions anyway so you will have to work if you want to enable AVX with its 256-bit vectors. +using v4sf = __m128; +/* 4 floats by simd vector -- this is pretty much hardcoded in the preprocess/ + * finalize functions anyway so you will have to work if you want to enable AVX + * with its 256-bit vectors. + */ +constexpr uint SimdSize{4}; #define VZERO _mm_setzero_ps #define VMUL _mm_mul_ps #define VADD _mm_add_ps -#define VMADD(a,b,c) _mm_add_ps(_mm_mul_ps(a,b), c) +force_inline v4sf vmadd(const v4sf a, const v4sf b, const v4sf c) noexcept +{ return _mm_add_ps(_mm_mul_ps(a,b), c); } +#define VMADD vmadd #define VSUB _mm_sub_ps #define LD_PS1 _mm_set1_ps #define VSET4 _mm_setr_ps -#define VINSERT0(v, a) _mm_move_ss((v), _mm_set_ss(a)) +force_inline v4sf vinsert0(const v4sf v, const float a) noexcept +{ return _mm_move_ss(v, _mm_set_ss(a)); } +#define VINSERT0 vinsert0 #define VEXTRACT0 _mm_cvtss_f32 -force_inline void interleave2(v4sf in1, v4sf in2, v4sf &out1, v4sf &out2) noexcept +force_inline void interleave2(const v4sf in1, const v4sf in2, v4sf &out1, v4sf &out2) noexcept { v4sf tmp{_mm_unpacklo_ps(in1, in2)}; out2 = _mm_unpackhi_ps(in1, in2); @@ -170,7 +179,7 @@ force_inline void uninterleave2(v4sf in1, v4sf in2, v4sf &out1, v4sf &out2) noex force_inline void vtranspose4(v4sf &x0, v4sf &x1, v4sf &x2, v4sf &x3) noexcept { _MM_TRANSPOSE4_PS(x0, x1, x2, x3); } -#define VSWAPHL(a,b) _mm_shuffle_ps(b, a, _MM_SHUFFLE(3,2,1,0)) +#define VSWAPHL(a,b) _mm_shuffle_ps((b), (a), _MM_SHUFFLE(3,2,1,0)) /* * ARM NEON support macros @@ -178,8 +187,8 @@ force_inline void vtranspose4(v4sf &x0, v4sf &x1, v4sf &x2, v4sf &x3) noexcept #elif defined(__ARM_NEON) || defined(__aarch64__) || defined(__arm64) #include <arm_neon.h> -typedef float32x4_t v4sf; -#define SIMD_SZ 4 +using v4sf = float32x4_t; +constexpr uint SimdSize{4}; #define VZERO() vdupq_n_f32(0) #define VMUL vmulq_f32 #define VADD vaddq_f32 @@ -238,7 +247,7 @@ force_inline void vtranspose4(v4sf &x0, v4sf &x1, v4sf &x2, v4sf &x3) noexcept #elif defined(__GNUC__) using v4sf [[gnu::vector_size(16), gnu::aligned(16)]] = float; -#define SIMD_SZ 4 +constexpr uint SimdSize{4}; #define VZERO() v4sf{0,0,0,0} #define VMUL(a,b) ((a) * (b)) #define VADD(a,b) ((a) + (b)) @@ -297,8 +306,8 @@ force_inline v4sf vswaphl(v4sf a, v4sf b) noexcept // fallback mode for situations where SIMD is not available, use scalar mode instead #ifdef PFFFT_SIMD_DISABLE -typedef float v4sf; -#define SIMD_SZ 1 +using v4sf = float; +constexpr uint SimdSize{1}; #define VZERO() 0.f #define VMUL(a,b) ((a)*(b)) #define VADD(a,b) ((a)+(b)) @@ -309,7 +318,7 @@ typedef float v4sf; inline bool valigned(const float *ptr) noexcept { - static constexpr uintptr_t alignmask{SIMD_SZ*4 - 1}; + static constexpr uintptr_t alignmask{SimdSize*4 - 1}; return (reinterpret_cast<uintptr_t>(ptr) & alignmask) == 0; } @@ -335,14 +344,14 @@ force_inline void vcplxmulconj(v4sf &ar, v4sf &ai, v4sf br, v4sf bi) noexcept [[maybe_unused]] void validate_pffft_simd() { using float4 = std::array<float,4>; - static constexpr float f[16]{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + static constexpr std::array<float,16> f{{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}}; float4 a0_f, a1_f, a2_f, a3_f, t_f, u_f; v4sf a0_v, a1_v, a2_v, a3_v, t_v, u_v; - std::memcpy(&a0_v, f, 4*sizeof(float)); - std::memcpy(&a1_v, f+4, 4*sizeof(float)); - std::memcpy(&a2_v, f+8, 4*sizeof(float)); - std::memcpy(&a3_v, f+12, 4*sizeof(float)); + std::memcpy(&a0_v, f.data(), 4*sizeof(float)); + std::memcpy(&a1_v, f.data()+4, 4*sizeof(float)); + std::memcpy(&a2_v, f.data()+8, 4*sizeof(float)); + std::memcpy(&a3_v, f.data()+12, 4*sizeof(float)); t_v = VZERO(); t_f = al::bit_cast<float4>(t_v); printf("VZERO=[%2g %2g %2g %2g]\n", t_f[0], t_f[1], t_f[2], t_f[3]); assertv4(t, 0, 0, 0, 0); @@ -379,7 +388,9 @@ force_inline void vcplxmulconj(v4sf &ar, v4sf &ai, v4sf br, v4sf bi) noexcept #endif //!PFFFT_SIMD_DISABLE /* SSE and co like 16-bytes aligned pointers */ -#define MALLOC_V4SF_ALIGNMENT 64 // with a 64-byte alignment, we are even aligned on L2 cache lines... +/* with a 64-byte alignment, we are even aligned on L2 cache lines... */ +constexpr auto V4sfAlignment = size_t(64); +constexpr auto V4sfAlignVal = std::align_val_t(V4sfAlignment); /* passf2 and passb2 has been merged here, fsign = -1 for passf2, +1 for passb2 @@ -538,8 +549,8 @@ NOINLINE void passf5_ps(const size_t ido, const size_t l1, const v4sf *cc, v4sf const v4sf ti11{LD_PS1(0.951056516295154f*fsign)}; const v4sf ti12{LD_PS1(0.587785252292473f*fsign)}; -#define cc_ref(a_1,a_2) cc[(a_2-1)*ido + (a_1) + 1] -#define ch_ref(a_1,a_3) ch[(a_3-1)*l1*ido + (a_1) + 1] +#define cc_ref(a_1,a_2) cc[((a_2)-1)*ido + (a_1) + 1] +#define ch_ref(a_1,a_3) ch[((a_3)-1)*l1*ido + (a_1) + 1] assert(ido > 2); for(size_t k{0};k < l1;++k, cc += 5*ido, ch += ido) @@ -958,8 +969,8 @@ void radf5_ps(const size_t ido, const size_t l1, const v4sf *RESTRICT cc, v4sf * const v4sf tr12{LD_PS1(-0.809016994374947f)}; const v4sf ti12{LD_PS1(0.587785252292473f)}; -#define cc_ref(a_1,a_2,a_3) cc[((a_3)*l1 + (a_2))*ido + a_1] -#define ch_ref(a_1,a_2,a_3) ch[((a_3)*5 + (a_2))*ido + a_1] +#define cc_ref(a_1,a_2,a_3) cc[((a_3)*l1 + (a_2))*ido + (a_1)] +#define ch_ref(a_1,a_2,a_3) ch[((a_3)*5 + (a_2))*ido + (a_1)] /* Parameter adjustments */ ch -= 1 + ido * 6; @@ -1040,8 +1051,8 @@ void radb5_ps(const size_t ido, const size_t l1, const v4sf *RESTRICT cc, v4sf * const v4sf tr12{LD_PS1(-0.809016994374947f)}; const v4sf ti12{LD_PS1(0.587785252292473f)}; -#define cc_ref(a_1,a_2,a_3) cc[((a_3)*5 + (a_2))*ido + a_1] -#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] +#define cc_ref(a_1,a_2,a_3) cc[((a_3)*5 + (a_2))*ido + (a_1)] +#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + (a_1)] /* Parameter adjustments */ ch -= 1 + ido*(1 + l1); @@ -1331,7 +1342,7 @@ uint decompose(const uint n, const al::span<uint,15> ifac, const al::span<const void rffti1_ps(const uint n, float *wa, const al::span<uint,15> ifac) { - static constexpr uint ntryh[]{4,2,3,5}; + static constexpr std::array ntryh{4u,2u,3u,5u}; const uint nf{decompose(n, ifac, ntryh)}; const double argh{2.0*al::numbers::pi / n}; @@ -1365,7 +1376,7 @@ void rffti1_ps(const uint n, float *wa, const al::span<uint,15> ifac) void cffti1_ps(const uint n, float *wa, const al::span<uint,15> ifac) { - static constexpr uint ntryh[]{5,3,4,2}; + static constexpr std::array ntryh{5u,3u,4u,2u}; const uint nf{decompose(n, ifac, ntryh)}; const double argh{2.0*al::numbers::pi / n}; @@ -1405,24 +1416,20 @@ void cffti1_ps(const uint n, float *wa, const al::span<uint,15> ifac) } // namespace -void *pffft_aligned_malloc(size_t nb_bytes) -{ return al_malloc(MALLOC_V4SF_ALIGNMENT, nb_bytes); } - -void pffft_aligned_free(void *p) { al_free(p); } - -int pffft_simd_size() { return SIMD_SZ; } - +/* NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding) */ struct PFFFT_Setup { - uint N; - uint Ncvec; // nb of complex simd vectors (N/4 if PFFFT_COMPLEX, N/8 if PFFFT_REAL) - std::array<uint,15> ifac; - pffft_transform_t transform; + uint N{}; + uint Ncvec{}; /* nb of complex simd vectors (N/4 if PFFFT_COMPLEX, N/8 if PFFFT_REAL) */ + std::array<uint,15> ifac{}; + pffft_transform_t transform{}; - float *twiddle; // N/4 elements - alignas(MALLOC_V4SF_ALIGNMENT) v4sf e[1]; // N/4*3 elements + float *twiddle{}; /* N/4 elements */ + al::span<v4sf> e; /* N/4*3 elements */ + + alignas(V4sfAlignment) std::byte end; }; -PFFFT_Setup *pffft_new_setup(unsigned int N, pffft_transform_t transform) +gsl::owner<PFFFT_Setup*> pffft_new_setup(unsigned int N, pffft_transform_t transform) { assert(transform == PFFFT_REAL || transform == PFFFT_COMPLEX); assert(N > 0); @@ -1431,50 +1438,53 @@ PFFFT_Setup *pffft_new_setup(unsigned int N, pffft_transform_t transform) * handle other cases (or maybe just switch to a scalar fft, I don't know..) */ if(transform == PFFFT_REAL) - assert((N%(2*SIMD_SZ*SIMD_SZ)) == 0); + assert((N%(2*SimdSize*SimdSize)) == 0); else - assert((N%(SIMD_SZ*SIMD_SZ)) == 0); + assert((N%(SimdSize*SimdSize)) == 0); - const uint Ncvec = (transform == PFFFT_REAL ? N/2 : N)/SIMD_SZ; - const size_t storelen{offsetof(PFFFT_Setup, e[0]) + (2u*Ncvec * sizeof(v4sf))}; + const uint Ncvec{(transform == PFFFT_REAL ? N/2 : N) / SimdSize}; - void *store{al_calloc(MALLOC_V4SF_ALIGNMENT, storelen)}; - if(!store) return nullptr; + const size_t storelen{std::max(offsetof(PFFFT_Setup, end) + 2_zu*Ncvec*sizeof(v4sf), + sizeof(PFFFT_Setup))}; + auto storage = static_cast<gsl::owner<std::byte*>>(::operator new[](storelen, V4sfAlignVal)); + al::span extrastore{&storage[offsetof(PFFFT_Setup, end)], 2_zu*Ncvec*sizeof(v4sf)}; - PFFFT_Setup *s{::new(store) PFFFT_Setup{}}; + gsl::owner<PFFFT_Setup*> s{::new(storage) PFFFT_Setup{}}; s->N = N; s->transform = transform; - /* nb of complex simd vectors */ s->Ncvec = Ncvec; - s->twiddle = reinterpret_cast<float*>(&s->e[2u*Ncvec*(SIMD_SZ-1)/SIMD_SZ]); - if constexpr(SIMD_SZ > 1) + const size_t ecount{2_zu*Ncvec*(SimdSize-1)/SimdSize}; + s->e = {std::launder(reinterpret_cast<v4sf*>(extrastore.data())), ecount}; + s->twiddle = std::launder(reinterpret_cast<float*>(&extrastore[ecount*sizeof(v4sf)])); + + if constexpr(SimdSize > 1) { - auto e = std::vector<float>(2u*Ncvec*(SIMD_SZ-1), 0.0f); + auto e = std::vector<float>(s->e.size()*SimdSize, 0.0f); for(size_t k{0};k < s->Ncvec;++k) { - const size_t i{k / SIMD_SZ}; - const size_t j{k % SIMD_SZ}; - for(size_t m{0};m < SIMD_SZ-1;++m) + const size_t i{k / SimdSize}; + const size_t j{k % SimdSize}; + for(size_t m{0};m < SimdSize-1;++m) { const double A{-2.0*al::numbers::pi*static_cast<double>((m+1)*k) / N}; - e[((i*3 + m)*2 + 0)*SIMD_SZ + j] = static_cast<float>(std::cos(A)); - e[((i*3 + m)*2 + 1)*SIMD_SZ + j] = static_cast<float>(std::sin(A)); + e[((i*3 + m)*2 + 0)*SimdSize + j] = static_cast<float>(std::cos(A)); + e[((i*3 + m)*2 + 1)*SimdSize + j] = static_cast<float>(std::sin(A)); } } - std::memcpy(s->e, e.data(), e.size()*sizeof(float)); + std::memcpy(s->e.data(), e.data(), e.size()*sizeof(float)); } if(transform == PFFFT_REAL) - rffti1_ps(N/SIMD_SZ, s->twiddle, s->ifac); + rffti1_ps(N/SimdSize, s->twiddle, s->ifac); else - cffti1_ps(N/SIMD_SZ, s->twiddle, s->ifac); + cffti1_ps(N/SimdSize, s->twiddle, s->ifac); /* check that N is decomposable with allowed prime factors */ size_t m{1}; for(size_t k{0};k < s->ifac[1];++k) m *= s->ifac[2+k]; - if(m != N/SIMD_SZ) + if(m != N/SimdSize) { pffft_destroy_setup(s); s = nullptr; @@ -1484,10 +1494,10 @@ PFFFT_Setup *pffft_new_setup(unsigned int N, pffft_transform_t transform) } -void pffft_destroy_setup(PFFFT_Setup *s) +void pffft_destroy_setup(gsl::owner<PFFFT_Setup*> s) noexcept { std::destroy_at(s); - al_free(s); + ::operator delete[](gsl::owner<void*>{s}, V4sfAlignVal); } #if !defined(PFFFT_SIMD_DISABLE) @@ -1537,7 +1547,7 @@ void pffft_cplx_finalize(const size_t Ncvec, const v4sf *in, v4sf *RESTRICT out, { assert(in != out); - const size_t dk{Ncvec/SIMD_SZ}; // number of 4x4 matrix blocks + const size_t dk{Ncvec/SimdSize}; // number of 4x4 matrix blocks for(size_t k{0};k < dk;++k) { v4sf r0{in[8*k+0]}, i0{in[8*k+1]}; @@ -1581,7 +1591,7 @@ void pffft_cplx_preprocess(const size_t Ncvec, const v4sf *in, v4sf *RESTRICT ou { assert(in != out); - const size_t dk{Ncvec/SIMD_SZ}; // number of 4x4 matrix blocks + const size_t dk{Ncvec/SimdSize}; // number of 4x4 matrix blocks for(size_t k{0};k < dk;++k) { v4sf r0{in[8*k+0]}, i0{in[8*k+1]}; @@ -1674,12 +1684,12 @@ NOINLINE void pffft_real_finalize(const size_t Ncvec, const v4sf *in, v4sf *REST static constexpr float s{al::numbers::sqrt2_v<float>/2.0f}; assert(in != out); - const size_t dk{Ncvec/SIMD_SZ}; // number of 4x4 matrix blocks + const size_t dk{Ncvec/SimdSize}; // number of 4x4 matrix blocks /* fftpack order is f0r f1r f1i f2r f2i ... f(n-1)r f(n-1)i f(n)r */ const v4sf zero{VZERO()}; - const auto cr = al::bit_cast<std::array<float,SIMD_SZ>>(in[0]); - const auto ci = al::bit_cast<std::array<float,SIMD_SZ>>(in[Ncvec*2-1]); + const auto cr = al::bit_cast<std::array<float,SimdSize>>(in[0]); + const auto ci = al::bit_cast<std::array<float,SimdSize>>(in[Ncvec*2-1]); pffft_real_finalize_4x4(&zero, &zero, in+1, e, out); /* [cr0 cr1 cr2 cr3 ci0 ci1 ci2 ci3] @@ -1765,11 +1775,11 @@ NOINLINE void pffft_real_preprocess(const size_t Ncvec, const v4sf *in, v4sf *RE static constexpr float sqrt2{al::numbers::sqrt2_v<float>}; assert(in != out); - const size_t dk{Ncvec/SIMD_SZ}; // number of 4x4 matrix blocks + const size_t dk{Ncvec/SimdSize}; // number of 4x4 matrix blocks /* fftpack order is f0r f1r f1i f2r f2i ... f(n-1)r f(n-1)i f(n)r */ - std::array<float,SIMD_SZ> Xr, Xi; - for(size_t k{0};k < SIMD_SZ;++k) + std::array<float,SimdSize> Xr, Xi; + for(size_t k{0};k < SimdSize;++k) { Xr[k] = VEXTRACT0(in[2*k]); Xi[k] = VEXTRACT0(in[2*k + 1]); @@ -1813,7 +1823,7 @@ void pffft_transform_internal(const PFFFT_Setup *setup, const v4sf *vinput, v4sf const size_t Ncvec{setup->Ncvec}; const bool nf_odd{(setup->ifac[1]&1) != 0}; - v4sf *buff[2]{voutput, scratch}; + std::array buff{voutput, scratch}; bool ib{nf_odd != ordered}; if(direction == PFFFT_FORWARD) { @@ -1824,7 +1834,7 @@ void pffft_transform_internal(const PFFFT_Setup *setup, const v4sf *vinput, v4sf if(setup->transform == PFFFT_REAL) { ib = (rfftf1_ps(Ncvec*2, vinput, buff[ib], buff[!ib], setup->twiddle, setup->ifac) == buff[1]); - pffft_real_finalize(Ncvec, buff[ib], buff[!ib], setup->e); + pffft_real_finalize(Ncvec, buff[ib], buff[!ib], setup->e.data()); } else { @@ -1833,7 +1843,7 @@ void pffft_transform_internal(const PFFFT_Setup *setup, const v4sf *vinput, v4sf uninterleave2(vinput[k*2], vinput[k*2+1], tmp[k*2], tmp[k*2+1]); ib = (cfftf1_ps(Ncvec, buff[ib], buff[!ib], buff[ib], setup->twiddle, setup->ifac, -1.0f) == buff[1]); - pffft_cplx_finalize(Ncvec, buff[ib], buff[!ib], setup->e); + pffft_cplx_finalize(Ncvec, buff[ib], buff[!ib], setup->e.data()); } if(ordered) pffft_zreorder(setup, reinterpret_cast<float*>(buff[!ib]), @@ -1855,12 +1865,12 @@ void pffft_transform_internal(const PFFFT_Setup *setup, const v4sf *vinput, v4sf } if(setup->transform == PFFFT_REAL) { - pffft_real_preprocess(Ncvec, vinput, buff[ib], setup->e); + pffft_real_preprocess(Ncvec, vinput, buff[ib], setup->e.data()); ib = (rfftb1_ps(Ncvec*2, buff[ib], buff[0], buff[1], setup->twiddle, setup->ifac) == buff[1]); } else { - pffft_cplx_preprocess(Ncvec, vinput, buff[ib], setup->e); + pffft_cplx_preprocess(Ncvec, vinput, buff[ib], setup->e.data()); ib = (cfftf1_ps(Ncvec, buff[ib], buff[0], buff[1], setup->twiddle, setup->ifac, +1.0f) == buff[1]); for(size_t k{0};k < Ncvec;++k) interleave2(buff[ib][k*2], buff[ib][k*2+1], buff[ib][k*2], buff[ib][k*2+1]); @@ -1899,8 +1909,8 @@ void pffft_zreorder(const PFFFT_Setup *setup, const float *in, float *out, interleave2(vin[k*8 + 0], vin[k*8 + 1], vout[2*(0*dk + k) + 0], vout[2*(0*dk + k) + 1]); interleave2(vin[k*8 + 4], vin[k*8 + 5], vout[2*(2*dk + k) + 0], vout[2*(2*dk + k) + 1]); } - reversed_copy(dk, vin+2, 8, vout + N/SIMD_SZ/2); - reversed_copy(dk, vin+6, 8, vout + N/SIMD_SZ); + reversed_copy(dk, vin+2, 8, vout + N/SimdSize/2); + reversed_copy(dk, vin+6, 8, vout + N/SimdSize); } else { @@ -1909,8 +1919,8 @@ void pffft_zreorder(const PFFFT_Setup *setup, const float *in, float *out, uninterleave2(vin[2*(0*dk + k) + 0], vin[2*(0*dk + k) + 1], vout[k*8 + 0], vout[k*8 + 1]); uninterleave2(vin[2*(2*dk + k) + 0], vin[2*(2*dk + k) + 1], vout[k*8 + 4], vout[k*8 + 5]); } - unreversed_copy(dk, vin + N/SIMD_SZ/4, vout + N/SIMD_SZ - 6, -8); - unreversed_copy(dk, vin + 3*N/SIMD_SZ/4, vout + N/SIMD_SZ - 2, -8); + unreversed_copy(dk, vin + N/SimdSize/4, vout + N/SimdSize - 6, -8); + unreversed_copy(dk, vin + 3_uz*N/SimdSize/4, vout + N/SimdSize - 2, -8); } } else diff --git a/common/pffft.h b/common/pffft.h index 9cff9e54..cf356524 100644 --- a/common/pffft.h +++ b/common/pffft.h @@ -79,36 +79,32 @@ #ifndef PFFFT_H #define PFFFT_H -#include <stddef.h> // for size_t -#include <stdint.h> +#include <cstddef> +#include <cstdint> +#include <utility> + +#include "almalloc.h" -#ifdef __cplusplus -extern "C" { -#endif /* opaque struct holding internal stuff (precomputed twiddle factors) this * struct can be shared by many threads as it contains only read-only data. */ -typedef struct PFFFT_Setup PFFFT_Setup; - -#ifndef PFFFT_COMMON_ENUMS -#define PFFFT_COMMON_ENUMS +struct PFFFT_Setup; /* direction of the transform */ -typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t; +enum pffft_direction_t { PFFFT_FORWARD, PFFFT_BACKWARD }; /* type of transform */ -typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t; - -#endif +enum pffft_transform_t { PFFFT_REAL, PFFFT_COMPLEX }; /** * Prepare for performing transforms of size N -- the returned PFFFT_Setup * structure is read-only so it can safely be shared by multiple concurrent * threads. */ -PFFFT_Setup *pffft_new_setup(unsigned int N, pffft_transform_t transform); -void pffft_destroy_setup(PFFFT_Setup *setup); +[[gnu::malloc]] +gsl::owner<PFFFT_Setup*> pffft_new_setup(unsigned int N, pffft_transform_t transform); +void pffft_destroy_setup(gsl::owner<PFFFT_Setup*> setup) noexcept; /** * Perform a Fourier transform. The z-domain data is stored in the most @@ -174,19 +170,47 @@ void pffft_zconvolve_scale_accumulate(const PFFFT_Setup *setup, const float *dft */ void pffft_zconvolve_accumulate(const PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab); -/** - * The float buffers must have the correct alignment (16-byte boundary on intel - * and powerpc). This function may be used to obtain such correctly aligned - * buffers. - */ -void *pffft_aligned_malloc(size_t nb_bytes); -void pffft_aligned_free(void *ptr); -/* Return 4 or 1 depending if vectorization was enable when building pffft.cpp. */ -int pffft_simd_size(); +struct PFFFTSetup { + gsl::owner<PFFFT_Setup*> mSetup{}; + + PFFFTSetup() = default; + PFFFTSetup(const PFFFTSetup&) = delete; + PFFFTSetup(PFFFTSetup&& rhs) noexcept : mSetup{rhs.mSetup} { rhs.mSetup = nullptr; } + explicit PFFFTSetup(std::nullptr_t) noexcept { } + explicit PFFFTSetup(unsigned int n, pffft_transform_t transform) + : mSetup{pffft_new_setup(n, transform)} + { } + ~PFFFTSetup() { if(mSetup) pffft_destroy_setup(mSetup); } + + PFFFTSetup& operator=(const PFFFTSetup&) = delete; + PFFFTSetup& operator=(PFFFTSetup&& rhs) noexcept + { + if(mSetup) + pffft_destroy_setup(mSetup); + mSetup = rhs.mSetup; + rhs.mSetup = nullptr; + return *this; + } + + void transform(const float *input, float *output, float *work, pffft_direction_t direction) const + { pffft_transform(mSetup, input, output, work, direction); } + + void transform_ordered(const float *input, float *output, float *work, + pffft_direction_t direction) const + { pffft_transform_ordered(mSetup, input, output, work, direction); } + + void zreorder(const float *input, float *output, pffft_direction_t direction) const + { pffft_zreorder(mSetup, input, output, direction); } + + void zconvolve_scale_accumulate(const float *dft_a, const float *dft_b, float *dft_ab, + float scaling) const + { pffft_zconvolve_scale_accumulate(mSetup, dft_a, dft_b, dft_ab, scaling); } + + void zconvolve_accumulate(const float *dft_a, const float *dft_b, float *dft_ab) const + { pffft_zconvolve_accumulate(mSetup, dft_a, dft_b, dft_ab); } -#ifdef __cplusplus -} -#endif + [[nodiscard]] operator bool() const noexcept { return mSetup != nullptr; } +}; #endif // PFFFT_H diff --git a/common/phase_shifter.h b/common/phase_shifter.h index e1a83dab..1b3463de 100644 --- a/common/phase_shifter.h +++ b/common/phase_shifter.h @@ -10,6 +10,7 @@ #include <array> #include <stddef.h> #include <type_traits> +#include <vector> #include "alcomplex.h" #include "alspan.h" @@ -52,20 +53,19 @@ struct PhaseShifterT { constexpr size_t fft_size{FilterSize}; constexpr size_t half_size{fft_size / 2}; - auto fftBuffer = std::make_unique<complex_d[]>(fft_size); - std::fill_n(fftBuffer.get(), fft_size, complex_d{}); + auto fftBuffer = std::vector<complex_d>(fft_size, complex_d{}); fftBuffer[half_size] = 1.0; - forward_fft(al::span{fftBuffer.get(), fft_size}); + forward_fft(al::span{fftBuffer}); fftBuffer[0] *= std::numeric_limits<double>::epsilon(); for(size_t i{1};i < half_size;++i) fftBuffer[i] = complex_d{-fftBuffer[i].imag(), fftBuffer[i].real()}; fftBuffer[half_size] *= std::numeric_limits<double>::epsilon(); for(size_t i{half_size+1};i < fft_size;++i) fftBuffer[i] = std::conj(fftBuffer[fft_size - i]); - inverse_fft(al::span{fftBuffer.get(), fft_size}); + inverse_fft(al::span{fftBuffer}); - auto fftiter = fftBuffer.get() + fft_size - 1; + auto fftiter = fftBuffer.data() + fft_size - 1; for(float &coeff : mCoeffs) { coeff = static_cast<float>(fftiter->real() / double{fft_size}); diff --git a/common/polyphase_resampler.h b/common/polyphase_resampler.h index 557485bb..764111c9 100644 --- a/common/polyphase_resampler.h +++ b/common/polyphase_resampler.h @@ -40,7 +40,7 @@ struct PPhaseResampler { explicit operator bool() const noexcept { return !mF.empty(); } private: - uint mP, mQ, mM, mL; + uint mP{}, mQ{}, mM{}, mL{}; std::vector<double> mF; }; diff --git a/common/ringbuffer.cpp b/common/ringbuffer.cpp index af1f3669..2636bfb4 100644 --- a/common/ringbuffer.cpp +++ b/common/ringbuffer.cpp @@ -24,6 +24,8 @@ #include <algorithm> #include <climits> +#include <cstdint> +#include <limits> #include <stdexcept> #include "almalloc.h" @@ -40,7 +42,7 @@ RingBufferPtr RingBuffer::Create(std::size_t sz, std::size_t elem_sz, int limit_ power_of_two |= power_of_two>>4; power_of_two |= power_of_two>>8; power_of_two |= power_of_two>>16; - if constexpr(SIZE_MAX > UINT_MAX) + if constexpr(sizeof(size_t) > sizeof(uint32_t)) power_of_two |= power_of_two>>32; } ++power_of_two; @@ -159,7 +161,7 @@ std::size_t RingBuffer::write(const void *src, std::size_t cnt) noexcept } -auto RingBuffer::getReadVector() const noexcept -> DataPair +auto RingBuffer::getReadVector() noexcept -> DataPair { DataPair ret; @@ -174,15 +176,15 @@ auto RingBuffer::getReadVector() const noexcept -> DataPair { /* Two part vector: the rest of the buffer after the current read ptr, * plus some from the start of the buffer. */ - ret.first.buf = const_cast<std::byte*>(mBuffer.data() + r*mElemSize); + ret.first.buf = mBuffer.data() + r*mElemSize; ret.first.len = mSizeMask+1 - r; - ret.second.buf = const_cast<std::byte*>(mBuffer.data()); + ret.second.buf = mBuffer.data(); ret.second.len = cnt2 & mSizeMask; } else { /* Single part vector: just the rest of the buffer */ - ret.first.buf = const_cast<std::byte*>(mBuffer.data() + r*mElemSize); + ret.first.buf = mBuffer.data() + r*mElemSize; ret.first.len = free_cnt; ret.second.buf = nullptr; ret.second.len = 0; @@ -191,7 +193,7 @@ auto RingBuffer::getReadVector() const noexcept -> DataPair return ret; } -auto RingBuffer::getWriteVector() const noexcept -> DataPair +auto RingBuffer::getWriteVector() noexcept -> DataPair { DataPair ret; @@ -206,14 +208,14 @@ auto RingBuffer::getWriteVector() const noexcept -> DataPair { /* Two part vector: the rest of the buffer after the current write ptr, * plus some from the start of the buffer. */ - ret.first.buf = const_cast<std::byte*>(mBuffer.data() + w*mElemSize); + ret.first.buf = mBuffer.data() + w*mElemSize; ret.first.len = mSizeMask+1 - w; - ret.second.buf = const_cast<std::byte*>(mBuffer.data()); + ret.second.buf = mBuffer.data(); ret.second.len = cnt2 & mSizeMask; } else { - ret.first.buf = const_cast<std::byte*>(mBuffer.data() + w*mElemSize); + ret.first.buf = mBuffer.data() + w*mElemSize; ret.first.len = free_cnt; ret.second.buf = nullptr; ret.second.len = 0; diff --git a/common/ringbuffer.h b/common/ringbuffer.h index 8c65c3af..ee59205a 100644 --- a/common/ringbuffer.h +++ b/common/ringbuffer.h @@ -7,6 +7,7 @@ #include <utility> #include "almalloc.h" +#include "flexarray.h" /* NOTE: This lockless ringbuffer implementation is copied from JACK, extended @@ -32,30 +33,29 @@ public: }; using DataPair = std::pair<Data,Data>; - RingBuffer(const std::size_t count) : mBuffer{count} { } /** Reset the read and write pointers to zero. This is not thread safe. */ - void reset() noexcept; + auto reset() noexcept -> void; /** * The non-copying data reader. Returns two ringbuffer data pointers that * hold the current readable data. If the readable data is in one segment * the second segment has zero length. */ - DataPair getReadVector() const noexcept; + [[nodiscard]] auto getReadVector() noexcept -> DataPair; /** * The non-copying data writer. Returns two ringbuffer data pointers that * hold the current writeable data. If the writeable data is in one segment * the second segment has zero length. */ - DataPair getWriteVector() const noexcept; + [[nodiscard]] auto getWriteVector() noexcept -> DataPair; /** * Return the number of elements available for reading. This is the number * of elements in front of the read pointer and behind the write pointer. */ - std::size_t readSpace() const noexcept + [[nodiscard]] auto readSpace() const noexcept -> size_t { const size_t w{mWritePtr.load(std::memory_order_acquire)}; const size_t r{mReadPtr.load(std::memory_order_acquire)}; @@ -66,14 +66,14 @@ public: * The copying data reader. Copy at most `cnt' elements into `dest'. * Returns the actual number of elements copied. */ - std::size_t read(void *dest, std::size_t cnt) noexcept; + [[nodiscard]] auto read(void *dest, size_t cnt) noexcept -> size_t; /** * The copying data reader w/o read pointer advance. Copy at most `cnt' * elements into `dest'. Returns the actual number of elements copied. */ - std::size_t peek(void *dest, std::size_t cnt) const noexcept; + [[nodiscard]] auto peek(void *dest, size_t cnt) const noexcept -> size_t; /** Advance the read pointer `cnt' places. */ - void readAdvance(std::size_t cnt) noexcept + auto readAdvance(size_t cnt) noexcept -> void { mReadPtr.fetch_add(cnt, std::memory_order_acq_rel); } @@ -81,7 +81,7 @@ public: * Return the number of elements available for writing. This is the number * of elements in front of the write pointer and behind the read pointer. */ - std::size_t writeSpace() const noexcept + [[nodiscard]] auto writeSpace() const noexcept -> size_t { const size_t w{mWritePtr.load(std::memory_order_acquire)}; const size_t r{mReadPtr.load(std::memory_order_acquire) + mWriteSize - mSizeMask}; @@ -92,12 +92,12 @@ public: * The copying data writer. Copy at most `cnt' elements from `src'. Returns * the actual number of elements copied. */ - std::size_t write(const void *src, std::size_t cnt) noexcept; + [[nodiscard]] auto write(const void *src, size_t cnt) noexcept -> size_t; /** Advance the write pointer `cnt' places. */ - void writeAdvance(std::size_t cnt) noexcept + auto writeAdvance(size_t cnt) noexcept -> void { mWritePtr.fetch_add(cnt, std::memory_order_acq_rel); } - std::size_t getElemSize() const noexcept { return mElemSize; } + [[nodiscard]] auto getElemSize() const noexcept -> size_t { return mElemSize; } /** * Create a new ringbuffer to hold at least `sz' elements of `elem_sz' @@ -105,7 +105,8 @@ public: * (even if it is already a power of two, to ensure the requested amount * can be written). */ - static std::unique_ptr<RingBuffer> Create(std::size_t sz, std::size_t elem_sz, int limit_writes); + [[nodiscard]] + static auto Create(size_t sz, size_t elem_sz, int limit_writes) -> std::unique_ptr<RingBuffer>; DEF_FAM_NEWDEL(RingBuffer, mBuffer) }; diff --git a/common/vecmat.h b/common/vecmat.h index a45f262f..0cdb82eb 100644 --- a/common/vecmat.h +++ b/common/vecmat.h @@ -14,7 +14,7 @@ namespace alu { template<typename T> class VectorR { static_assert(std::is_floating_point<T>::value, "Must use floating-point types"); - alignas(16) T mVals[4]; + alignas(16) std::array<T,4> mVals; public: constexpr VectorR() noexcept = default; @@ -58,7 +58,7 @@ public: return T{0}; } - constexpr VectorR cross_product(const alu::VectorR<T> &rhs) const noexcept + [[nodiscard]] constexpr auto cross_product(const alu::VectorR<T> &rhs) const noexcept -> VectorR { return VectorR{ mVals[1]*rhs.mVals[2] - mVals[2]*rhs.mVals[1], @@ -67,7 +67,7 @@ public: T{0}}; } - constexpr T dot_product(const alu::VectorR<T> &rhs) const noexcept + [[nodiscard]] constexpr auto dot_product(const alu::VectorR<T> &rhs) const noexcept -> T { return mVals[0]*rhs.mVals[0] + mVals[1]*rhs.mVals[1] + mVals[2]*rhs.mVals[2]; } }; using Vector = VectorR<float>; @@ -75,7 +75,7 @@ using Vector = VectorR<float>; template<typename T> class MatrixR { static_assert(std::is_floating_point<T>::value, "Must use floating-point types"); - alignas(16) T mVals[16]; + alignas(16) std::array<T,16> mVals; public: constexpr MatrixR() noexcept = default; |