aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Robinson <[email protected]>2023-12-27 04:23:19 -0800
committerChris Robinson <[email protected]>2023-12-27 04:23:19 -0800
commit768781bab97732fbd0d66fa153d4ebc768be1240 (patch)
tree32bc3c16e708c2a24c2aa322bcbfe27e82c31bb9
parent205a73876234c0b1363189306530ada73ece56f2 (diff)
Improve ownership handing with PFFFT
-rw-r--r--common/pffft.cpp53
-rw-r--r--common/pffft.h41
2 files changed, 36 insertions, 58 deletions
diff --git a/common/pffft.cpp b/common/pffft.cpp
index 9d9dad23..bbfbaa49 100644
--- a/common/pffft.cpp
+++ b/common/pffft.cpp
@@ -380,7 +380,9 @@ force_inline void vcplxmulconj(v4sf &ar, v4sf &ai, v4sf br, v4sf bi) noexcept
#endif //!PFFFT_SIMD_DISABLE
/* SSE and co like 16-bytes aligned pointers */
-#define MALLOC_V4SF_ALIGNMENT 64 // with a 64-byte alignment, we are even aligned on L2 cache lines...
+/* with a 64-byte alignment, we are even aligned on L2 cache lines... */
+constexpr auto V4sfAlignment = size_t(64);
+constexpr auto V4sfAlignVal = std::align_val_t(V4sfAlignment);
/*
passf2 and passb2 has been merged here, fsign = -1 for passf2, +1 for passb2
@@ -1406,24 +1408,20 @@ void cffti1_ps(const uint n, float *wa, const al::span<uint,15> ifac)
} // namespace
-void *pffft_aligned_malloc(size_t nb_bytes)
-{ return al_malloc(MALLOC_V4SF_ALIGNMENT, nb_bytes); }
-
-void pffft_aligned_free(void *p) noexcept { al_free(p); }
-
-int pffft_simd_size() noexcept { return SIMD_SZ; }
-
+/* NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding) */
struct PFFFT_Setup {
- alignas(MALLOC_V4SF_ALIGNMENT) uint N;
- uint Ncvec; // nb of complex simd vectors (N/4 if PFFFT_COMPLEX, N/8 if PFFFT_REAL)
- std::array<uint,15> ifac;
- pffft_transform_t transform;
+ uint N{};
+ uint Ncvec{}; /* nb of complex simd vectors (N/4 if PFFFT_COMPLEX, N/8 if PFFFT_REAL) */
+ std::array<uint,15> ifac{};
+ pffft_transform_t transform{};
- float *twiddle; // N/4 elements
- al::span<v4sf> e; // N/4*3 elements
+ float *twiddle{}; /* N/4 elements */
+ al::span<v4sf> e; /* N/4*3 elements */
+
+ alignas(V4sfAlignment) std::byte end;
};
-PFFFT_Setup *pffft_new_setup(unsigned int N, pffft_transform_t transform)
+gsl::owner<PFFFT_Setup*> pffft_new_setup(unsigned int N, pffft_transform_t transform)
{
assert(transform == PFFFT_REAL || transform == PFFFT_COMPLEX);
assert(N > 0);
@@ -1436,23 +1434,25 @@ PFFFT_Setup *pffft_new_setup(unsigned int N, pffft_transform_t transform)
else
assert((N%(SIMD_SZ*SIMD_SZ)) == 0);
- const uint Ncvec = (transform == PFFFT_REAL ? N/2 : N)/SIMD_SZ;
- const size_t storelen{sizeof(PFFFT_Setup) + (2_zu*Ncvec * sizeof(v4sf))};
+ const uint Ncvec{(transform == PFFFT_REAL ? N/2 : N)/SIMD_SZ};
- void *store{al_calloc(MALLOC_V4SF_ALIGNMENT, storelen)};
- if(!store) return nullptr;
+ const size_t storelen{std::max(offsetof(PFFFT_Setup, end) + 2_zu*Ncvec*sizeof(v4sf),
+ sizeof(PFFFT_Setup))};
+ gsl::owner<std::byte*> storage{::new(V4sfAlignVal) std::byte[storelen]{}};
+ al::span extrastore{&storage[offsetof(PFFFT_Setup, end)], 2_zu*Ncvec*sizeof(v4sf)};
- PFFFT_Setup *s{::new(store) PFFFT_Setup{}};
+ gsl::owner<PFFFT_Setup*> s{::new(storage) PFFFT_Setup{}};
s->N = N;
s->transform = transform;
- /* nb of complex simd vectors */
s->Ncvec = Ncvec;
- s->e = {reinterpret_cast<v4sf*>(reinterpret_cast<char*>(s+1)), 2_zu*Ncvec};
- s->twiddle = reinterpret_cast<float*>(&s->e[2u*Ncvec*(SIMD_SZ-1)/SIMD_SZ]);
+
+ const size_t ecount{2_zu*Ncvec*(SIMD_SZ-1)/SIMD_SZ};
+ s->e = {std::launder(reinterpret_cast<v4sf*>(extrastore.data())), ecount};
+ s->twiddle = std::launder(reinterpret_cast<float*>(&extrastore[ecount*sizeof(v4sf)]));
if constexpr(SIMD_SZ > 1)
{
- auto e = std::vector<float>(2_zu*Ncvec*(SIMD_SZ-1), 0.0f);
+ auto e = std::vector<float>(s->e.size()*SIMD_SZ, 0.0f);
for(size_t k{0};k < s->Ncvec;++k)
{
const size_t i{k / SIMD_SZ};
@@ -1486,10 +1486,11 @@ PFFFT_Setup *pffft_new_setup(unsigned int N, pffft_transform_t transform)
}
-void pffft_destroy_setup(PFFFT_Setup *s) noexcept
+void pffft_destroy_setup(gsl::owner<PFFFT_Setup*> s) noexcept
{
std::destroy_at(s);
- al_free(s);
+ auto storage = reinterpret_cast<gsl::owner<std::byte*>>(s);
+ ::operator delete[](storage, V4sfAlignVal);
}
#if !defined(PFFFT_SIMD_DISABLE)
diff --git a/common/pffft.h b/common/pffft.h
index 5ef03820..cf356524 100644
--- a/common/pffft.h
+++ b/common/pffft.h
@@ -79,15 +79,12 @@
#ifndef PFFFT_H
#define PFFFT_H
-#include <stddef.h> // for size_t
-#include <stdint.h>
-
-#ifdef __cplusplus
#include <cstddef>
+#include <cstdint>
#include <utility>
-extern "C" {
-#endif
+#include "almalloc.h"
+
/* opaque struct holding internal stuff (precomputed twiddle factors) this
* struct can be shared by many threads as it contains only read-only data.
@@ -100,20 +97,14 @@ enum pffft_direction_t { PFFFT_FORWARD, PFFFT_BACKWARD };
/* type of transform */
enum pffft_transform_t { PFFFT_REAL, PFFFT_COMPLEX };
-#ifndef __cplusplus
-typedef struct PFFFT_Setup PFFFT_Setup;
-typedef enum pffft_direction_t pffft_direction_t;
-typedef enum pffft_transform_t pffft_transform_t;
-#endif
-
/**
* Prepare for performing transforms of size N -- the returned PFFFT_Setup
* structure is read-only so it can safely be shared by multiple concurrent
* threads.
*/
[[gnu::malloc]]
-PFFFT_Setup *pffft_new_setup(unsigned int N, pffft_transform_t transform);
-void pffft_destroy_setup(PFFFT_Setup *setup) noexcept;
+gsl::owner<PFFFT_Setup*> pffft_new_setup(unsigned int N, pffft_transform_t transform);
+void pffft_destroy_setup(gsl::owner<PFFFT_Setup*> setup) noexcept;
/**
* Perform a Fourier transform. The z-domain data is stored in the most
@@ -179,28 +170,14 @@ void pffft_zconvolve_scale_accumulate(const PFFFT_Setup *setup, const float *dft
*/
void pffft_zconvolve_accumulate(const PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab);
-/**
- * The float buffers must have the correct alignment (16-byte boundary on intel
- * and powerpc). This function may be used to obtain such correctly aligned
- * buffers.
- */
-[[gnu::alloc_size(1), gnu::malloc]]
-void *pffft_aligned_malloc(size_t nb_bytes);
-void pffft_aligned_free(void *ptr) noexcept;
-
-/* Return 4 or 1 depending if vectorization was enable when building pffft.cpp. */
-int pffft_simd_size() noexcept;
-
-#ifdef __cplusplus
-}
struct PFFFTSetup {
- PFFFT_Setup *mSetup{};
+ gsl::owner<PFFFT_Setup*> mSetup{};
PFFFTSetup() = default;
PFFFTSetup(const PFFFTSetup&) = delete;
PFFFTSetup(PFFFTSetup&& rhs) noexcept : mSetup{rhs.mSetup} { rhs.mSetup = nullptr; }
- explicit PFFFTSetup(std::nullptr_t) { }
+ explicit PFFFTSetup(std::nullptr_t) noexcept { }
explicit PFFFTSetup(unsigned int n, pffft_transform_t transform)
: mSetup{pffft_new_setup(n, transform)}
{ }
@@ -211,7 +188,8 @@ struct PFFFTSetup {
{
if(mSetup)
pffft_destroy_setup(mSetup);
- mSetup = std::exchange(rhs.mSetup, nullptr);
+ mSetup = rhs.mSetup;
+ rhs.mSetup = nullptr;
return *this;
}
@@ -234,6 +212,5 @@ struct PFFFTSetup {
[[nodiscard]] operator bool() const noexcept { return mSetup != nullptr; }
};
-#endif
#endif // PFFFT_H