diff options
author | Chris Robinson <[email protected]> | 2023-12-09 12:35:07 -0800 |
---|---|---|
committer | Chris Robinson <[email protected]> | 2023-12-09 12:35:07 -0800 |
commit | 44fbc93909a1a1d1dc26c01feb32bf13a5140234 (patch) | |
tree | a9e9c5d7d9fcf6a47e654177dbbdfb5e3b2bd155 /common | |
parent | decc10da2bdbb611cce63916f8c6f8b17ea45da1 (diff) |
Be less messy with PFFFT
Remove a 1-element array for an over-allocated struct array. Also add a wrapper
struct for C++.
Diffstat (limited to 'common')
-rw-r--r-- | common/pffft.cpp | 18 | ||||
-rw-r--r-- | common/pffft.h | 57 |
2 files changed, 60 insertions, 15 deletions
diff --git a/common/pffft.cpp b/common/pffft.cpp index 505c9791..bf564086 100644 --- a/common/pffft.cpp +++ b/common/pffft.cpp @@ -1413,13 +1413,13 @@ void pffft_aligned_free(void *p) { al_free(p); } int pffft_simd_size() { return SIMD_SZ; } struct PFFFT_Setup { - uint N; + alignas(MALLOC_V4SF_ALIGNMENT) uint N; uint Ncvec; // nb of complex simd vectors (N/4 if PFFFT_COMPLEX, N/8 if PFFFT_REAL) std::array<uint,15> ifac; pffft_transform_t transform; float *twiddle; // N/4 elements - alignas(MALLOC_V4SF_ALIGNMENT) v4sf e[1]; // N/4*3 elements + al::span<v4sf> e; // N/4*3 elements }; PFFFT_Setup *pffft_new_setup(unsigned int N, pffft_transform_t transform) @@ -1436,8 +1436,7 @@ PFFFT_Setup *pffft_new_setup(unsigned int N, pffft_transform_t transform) assert((N%(SIMD_SZ*SIMD_SZ)) == 0); const uint Ncvec = (transform == PFFFT_REAL ? N/2 : N)/SIMD_SZ; - const size_t storelen{std::max(sizeof(PFFFT_Setup), - offsetof(PFFFT_Setup, e[0]) + (2u*Ncvec * sizeof(v4sf)))}; + const size_t storelen{sizeof(PFFFT_Setup) + (2u*Ncvec * sizeof(v4sf))}; void *store{al_calloc(MALLOC_V4SF_ALIGNMENT, storelen)}; if(!store) return nullptr; @@ -1447,6 +1446,7 @@ PFFFT_Setup *pffft_new_setup(unsigned int N, pffft_transform_t transform) s->transform = transform; /* nb of complex simd vectors */ s->Ncvec = Ncvec; + s->e = {reinterpret_cast<v4sf*>(reinterpret_cast<char*>(s+1)), 2u*Ncvec}; s->twiddle = reinterpret_cast<float*>(&s->e[2u*Ncvec*(SIMD_SZ-1)/SIMD_SZ]); if constexpr(SIMD_SZ > 1) @@ -1463,7 +1463,7 @@ PFFFT_Setup *pffft_new_setup(unsigned int N, pffft_transform_t transform) e[((i*3 + m)*2 + 1)*SIMD_SZ + j] = static_cast<float>(std::sin(A)); } } - std::memcpy(s->e, e.data(), e.size()*sizeof(float)); + std::memcpy(s->e.data(), e.data(), e.size()*sizeof(float)); } if(transform == PFFFT_REAL) rffti1_ps(N/SIMD_SZ, s->twiddle, s->ifac); @@ -1825,7 +1825,7 @@ void pffft_transform_internal(const PFFFT_Setup *setup, const v4sf *vinput, v4sf if(setup->transform == PFFFT_REAL) { ib = (rfftf1_ps(Ncvec*2, vinput, buff[ib], buff[!ib], setup->twiddle, setup->ifac) == buff[1]); - pffft_real_finalize(Ncvec, buff[ib], buff[!ib], setup->e); + pffft_real_finalize(Ncvec, buff[ib], buff[!ib], setup->e.data()); } else { @@ -1834,7 +1834,7 @@ void pffft_transform_internal(const PFFFT_Setup *setup, const v4sf *vinput, v4sf uninterleave2(vinput[k*2], vinput[k*2+1], tmp[k*2], tmp[k*2+1]); ib = (cfftf1_ps(Ncvec, buff[ib], buff[!ib], buff[ib], setup->twiddle, setup->ifac, -1.0f) == buff[1]); - pffft_cplx_finalize(Ncvec, buff[ib], buff[!ib], setup->e); + pffft_cplx_finalize(Ncvec, buff[ib], buff[!ib], setup->e.data()); } if(ordered) pffft_zreorder(setup, reinterpret_cast<float*>(buff[!ib]), @@ -1856,12 +1856,12 @@ void pffft_transform_internal(const PFFFT_Setup *setup, const v4sf *vinput, v4sf } if(setup->transform == PFFFT_REAL) { - pffft_real_preprocess(Ncvec, vinput, buff[ib], setup->e); + pffft_real_preprocess(Ncvec, vinput, buff[ib], setup->e.data()); ib = (rfftb1_ps(Ncvec*2, buff[ib], buff[0], buff[1], setup->twiddle, setup->ifac) == buff[1]); } else { - pffft_cplx_preprocess(Ncvec, vinput, buff[ib], setup->e); + pffft_cplx_preprocess(Ncvec, vinput, buff[ib], setup->e.data()); ib = (cfftf1_ps(Ncvec, buff[ib], buff[0], buff[1], setup->twiddle, setup->ifac, +1.0f) == buff[1]); for(size_t k{0};k < Ncvec;++k) interleave2(buff[ib][k*2], buff[ib][k*2+1], buff[ib][k*2], buff[ib][k*2+1]); diff --git a/common/pffft.h b/common/pffft.h index 9cff9e54..b31304f6 100644 --- a/common/pffft.h +++ b/common/pffft.h @@ -83,23 +83,27 @@ #include <stdint.h> #ifdef __cplusplus +#include <cstddef> +#include <utility> + extern "C" { #endif /* opaque struct holding internal stuff (precomputed twiddle factors) this * struct can be shared by many threads as it contains only read-only data. */ -typedef struct PFFFT_Setup PFFFT_Setup; - -#ifndef PFFFT_COMMON_ENUMS -#define PFFFT_COMMON_ENUMS +struct PFFFT_Setup; /* direction of the transform */ -typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t; +enum pffft_direction_t { PFFFT_FORWARD, PFFFT_BACKWARD }; /* type of transform */ -typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t; +enum pffft_transform_t { PFFFT_REAL, PFFFT_COMPLEX }; +#ifndef __cplusplus +typedef struct PFFFT_Setup PFFFT_Setup; +typedef enum pffft_direction_t pffft_direction_t; +typedef enum pffft_transform_t pffft_transform_t; #endif /** @@ -187,6 +191,47 @@ int pffft_simd_size(); #ifdef __cplusplus } + +struct PFFFTSetup { + PFFFT_Setup *mSetup{}; + + PFFFTSetup() = default; + PFFFTSetup(const PFFFTSetup&) = delete; + PFFFTSetup(PFFFTSetup&& rhs) : mSetup{rhs.mSetup} { rhs.mSetup = nullptr; } + explicit PFFFTSetup(std::nullptr_t) { } + explicit PFFFTSetup(unsigned int n, pffft_transform_t transform) + : mSetup{pffft_new_setup(n, transform)} + { } + ~PFFFTSetup() { if(mSetup) pffft_destroy_setup(mSetup); } + + PFFFTSetup& operator=(const PFFFTSetup&) = delete; + PFFFTSetup& operator=(PFFFTSetup&& rhs) + { + if(mSetup) + pffft_destroy_setup(mSetup); + mSetup = std::exchange(rhs.mSetup, nullptr); + return *this; + } + + void transform(const float *input, float *output, float *work, pffft_direction_t direction) const + { pffft_transform(mSetup, input, output, work, direction); } + + void transform_ordered(const float *input, float *output, float *work, + pffft_direction_t direction) const + { pffft_transform_ordered(mSetup, input, output, work, direction); } + + void zreorder(const float *input, float *output, pffft_direction_t direction) const + { pffft_zreorder(mSetup, input, output, direction); } + + void zconvolve_scale_accumulate(const float *dft_a, const float *dft_b, float *dft_ab, + float scaling) const + { pffft_zconvolve_scale_accumulate(mSetup, dft_a, dft_b, dft_ab, scaling); } + + void zconvolve_accumulate(const float *dft_a, const float *dft_b, float *dft_ab) const + { pffft_zconvolve_accumulate(mSetup, dft_a, dft_b, dft_ab); } + + [[nodiscard]] operator bool() const noexcept { return mSetup != nullptr; } +}; #endif #endif // PFFFT_H |