diff options
-rw-r--r-- | alc/effects/convolution.cpp | 22 | ||||
-rw-r--r-- | alc/effects/pshifter.cpp | 18 | ||||
-rw-r--r-- | common/pffft.cpp | 18 | ||||
-rw-r--r-- | common/pffft.h | 57 | ||||
-rw-r--r-- | core/uhjfilter.cpp | 23 |
5 files changed, 82 insertions, 56 deletions
diff --git a/alc/effects/convolution.cpp b/alc/effects/convolution.cpp index 8db7a045..5e81f6d1 100644 --- a/alc/effects/convolution.cpp +++ b/alc/effects/convolution.cpp @@ -190,12 +190,6 @@ void apply_fir(al::span<float> dst, const float *RESTRICT src, const float *REST } -struct PFFFTSetupDeleter { - void operator()(PFFFT_Setup *ptr) { pffft_destroy_setup(ptr); } -}; -using PFFFTSetupPtr = std::unique_ptr<PFFFT_Setup,PFFFTSetupDeleter>; - - struct ConvolutionState final : public EffectState { FmtChannels mChannels{}; AmbiLayout mAmbiLayout{}; @@ -207,7 +201,7 @@ struct ConvolutionState final : public EffectState { al::vector<std::array<float,ConvolveUpdateSamples>,16> mFilter; al::vector<std::array<float,ConvolveUpdateSamples*2>,16> mOutput; - PFFFTSetupPtr mFft{}; + PFFFTSetup mFft{}; alignas(16) std::array<float,ConvolveUpdateSize> mFftBuffer{}; alignas(16) std::array<float,ConvolveUpdateSize> mFftWorkBuffer{}; @@ -270,7 +264,7 @@ void ConvolutionState::deviceUpdate(const DeviceBase *device, const BufferStorag static constexpr uint MaxConvolveAmbiOrder{1u}; if(!mFft) - mFft = PFFFTSetupPtr{pffft_new_setup(ConvolveUpdateSize, PFFFT_REAL)}; + mFft = PFFFTSetup{ConvolveUpdateSize, PFFFT_REAL}; mFifoPos = 0; mInput.fill(0.0f); @@ -400,7 +394,7 @@ void ConvolutionState::deviceUpdate(const DeviceBase *device, const BufferStorag /* Reorder backward to make it suitable for pffft_zconvolve and the * subsequent pffft_transform(..., PFFFT_BACKWARD). */ - pffft_zreorder(mFft.get(), ffttmp.data(), al::to_address(filteriter), PFFFT_BACKWARD); + mFft.zreorder(ffttmp.data(), al::to_address(filteriter), PFFFT_BACKWARD); filteriter += ConvolveUpdateSize; } } @@ -642,7 +636,7 @@ void ConvolutionState::process(const size_t samplesToDo, /* Calculate the frequency-domain response and add the relevant * frequency bins to the FFT history. */ - pffft_transform(mFft.get(), mInput.data(), mComplexData.data() + curseg*ConvolveUpdateSize, + mFft.transform(mInput.data(), mComplexData.data() + curseg*ConvolveUpdateSize, mFftWorkBuffer.data(), PFFFT_FORWARD); const float *filter{mComplexData.data() + mNumConvolveSegs*ConvolveUpdateSize}; @@ -655,14 +649,14 @@ void ConvolutionState::process(const size_t samplesToDo, const float *input{&mComplexData[curseg*ConvolveUpdateSize]}; for(size_t s{curseg};s < mNumConvolveSegs;++s) { - pffft_zconvolve_accumulate(mFft.get(), input, filter, mFftBuffer.data()); + mFft.zconvolve_accumulate(input, filter, mFftBuffer.data()); input += ConvolveUpdateSize; filter += ConvolveUpdateSize; } input = mComplexData.data(); for(size_t s{0};s < curseg;++s) { - pffft_zconvolve_accumulate(mFft.get(), input, filter, mFftBuffer.data()); + mFft.zconvolve_accumulate(input, filter, mFftBuffer.data()); input += ConvolveUpdateSize; filter += ConvolveUpdateSize; } @@ -672,8 +666,8 @@ void ConvolutionState::process(const size_t samplesToDo, * second-half samples (and this output's second half is * subsequently saved for next time). */ - pffft_transform(mFft.get(), mFftBuffer.data(), mFftBuffer.data(), - mFftWorkBuffer.data(), PFFFT_BACKWARD); + mFft.transform(mFftBuffer.data(), mFftBuffer.data(), mFftWorkBuffer.data(), + PFFFT_BACKWARD); /* The filter was attenuated, so the response is already scaled. */ for(size_t i{0};i < ConvolveUpdateSamples;++i) diff --git a/alc/effects/pshifter.cpp b/alc/effects/pshifter.cpp index 0c27be30..871e866a 100644 --- a/alc/effects/pshifter.cpp +++ b/alc/effects/pshifter.cpp @@ -74,12 +74,6 @@ struct Windower { const Windower gWindow{}; -struct PFFFTSetupDeleter { - void operator()(PFFFT_Setup *ptr) { pffft_destroy_setup(ptr); } -}; -using PFFFTSetupPtr = std::unique_ptr<PFFFT_Setup,PFFFTSetupDeleter>; - - struct FrequencyBin { float Magnitude; float FreqBin; @@ -99,7 +93,7 @@ struct PshifterState final : public EffectState { std::array<float,StftHalfSize+1> mSumPhase; std::array<float,StftSize> mOutputAccum; - PFFFTSetupPtr mFft; + PFFFTSetup mFft; alignas(16) std::array<float,StftSize> mFftBuffer; alignas(16) std::array<float,StftSize> mFftWorkBuffer; @@ -142,7 +136,7 @@ void PshifterState::deviceUpdate(const DeviceBase*, const BufferStorage*) std::fill(std::begin(mTargetGains), std::end(mTargetGains), 0.0f); if(!mFft) - mFft = PFFFTSetupPtr{pffft_new_setup(StftSize, PFFFT_REAL)}; + mFft = PFFFTSetup{StftSize, PFFFT_REAL}; } void PshifterState::update(const ContextBase*, const EffectSlot *slot, @@ -197,8 +191,8 @@ void PshifterState::process(const size_t samplesToDo, mFftBuffer[k] = mFIFO[src] * gWindow.mData[k]; for(size_t src{0u}, k{StftSize-mPos};src < mPos;++src,++k) mFftBuffer[k] = mFIFO[src] * gWindow.mData[k]; - pffft_transform_ordered(mFft.get(), mFftBuffer.data(), mFftBuffer.data(), - mFftWorkBuffer.data(), PFFFT_FORWARD); + mFft.transform_ordered(mFftBuffer.data(), mFftBuffer.data(), mFftWorkBuffer.data(), + PFFFT_FORWARD); /* Analyze the obtained data. Since the real FFT is symmetric, only * StftHalfSize+1 samples are needed. @@ -296,8 +290,8 @@ void PshifterState::process(const size_t samplesToDo, /* Apply an inverse FFT to get the time-domain signal, and accumulate * for the output with windowing. */ - pffft_transform_ordered(mFft.get(), mFftBuffer.data(), mFftBuffer.data(), - mFftWorkBuffer.data(), PFFFT_BACKWARD); + mFft.transform_ordered(mFftBuffer.data(), mFftBuffer.data(), mFftWorkBuffer.data(), + PFFFT_BACKWARD); static constexpr float scale{3.0f / OversampleFactor / StftSize}; for(size_t dst{mPos}, k{0u};dst < StftSize;++dst,++k) diff --git a/common/pffft.cpp b/common/pffft.cpp index 505c9791..bf564086 100644 --- a/common/pffft.cpp +++ b/common/pffft.cpp @@ -1413,13 +1413,13 @@ void pffft_aligned_free(void *p) { al_free(p); } int pffft_simd_size() { return SIMD_SZ; } struct PFFFT_Setup { - uint N; + alignas(MALLOC_V4SF_ALIGNMENT) uint N; uint Ncvec; // nb of complex simd vectors (N/4 if PFFFT_COMPLEX, N/8 if PFFFT_REAL) std::array<uint,15> ifac; pffft_transform_t transform; float *twiddle; // N/4 elements - alignas(MALLOC_V4SF_ALIGNMENT) v4sf e[1]; // N/4*3 elements + al::span<v4sf> e; // N/4*3 elements }; PFFFT_Setup *pffft_new_setup(unsigned int N, pffft_transform_t transform) @@ -1436,8 +1436,7 @@ PFFFT_Setup *pffft_new_setup(unsigned int N, pffft_transform_t transform) assert((N%(SIMD_SZ*SIMD_SZ)) == 0); const uint Ncvec = (transform == PFFFT_REAL ? N/2 : N)/SIMD_SZ; - const size_t storelen{std::max(sizeof(PFFFT_Setup), - offsetof(PFFFT_Setup, e[0]) + (2u*Ncvec * sizeof(v4sf)))}; + const size_t storelen{sizeof(PFFFT_Setup) + (2u*Ncvec * sizeof(v4sf))}; void *store{al_calloc(MALLOC_V4SF_ALIGNMENT, storelen)}; if(!store) return nullptr; @@ -1447,6 +1446,7 @@ PFFFT_Setup *pffft_new_setup(unsigned int N, pffft_transform_t transform) s->transform = transform; /* nb of complex simd vectors */ s->Ncvec = Ncvec; + s->e = {reinterpret_cast<v4sf*>(reinterpret_cast<char*>(s+1)), 2u*Ncvec}; s->twiddle = reinterpret_cast<float*>(&s->e[2u*Ncvec*(SIMD_SZ-1)/SIMD_SZ]); if constexpr(SIMD_SZ > 1) @@ -1463,7 +1463,7 @@ PFFFT_Setup *pffft_new_setup(unsigned int N, pffft_transform_t transform) e[((i*3 + m)*2 + 1)*SIMD_SZ + j] = static_cast<float>(std::sin(A)); } } - std::memcpy(s->e, e.data(), e.size()*sizeof(float)); + std::memcpy(s->e.data(), e.data(), e.size()*sizeof(float)); } if(transform == PFFFT_REAL) rffti1_ps(N/SIMD_SZ, s->twiddle, s->ifac); @@ -1825,7 +1825,7 @@ void pffft_transform_internal(const PFFFT_Setup *setup, const v4sf *vinput, v4sf if(setup->transform == PFFFT_REAL) { ib = (rfftf1_ps(Ncvec*2, vinput, buff[ib], buff[!ib], setup->twiddle, setup->ifac) == buff[1]); - pffft_real_finalize(Ncvec, buff[ib], buff[!ib], setup->e); + pffft_real_finalize(Ncvec, buff[ib], buff[!ib], setup->e.data()); } else { @@ -1834,7 +1834,7 @@ void pffft_transform_internal(const PFFFT_Setup *setup, const v4sf *vinput, v4sf uninterleave2(vinput[k*2], vinput[k*2+1], tmp[k*2], tmp[k*2+1]); ib = (cfftf1_ps(Ncvec, buff[ib], buff[!ib], buff[ib], setup->twiddle, setup->ifac, -1.0f) == buff[1]); - pffft_cplx_finalize(Ncvec, buff[ib], buff[!ib], setup->e); + pffft_cplx_finalize(Ncvec, buff[ib], buff[!ib], setup->e.data()); } if(ordered) pffft_zreorder(setup, reinterpret_cast<float*>(buff[!ib]), @@ -1856,12 +1856,12 @@ void pffft_transform_internal(const PFFFT_Setup *setup, const v4sf *vinput, v4sf } if(setup->transform == PFFFT_REAL) { - pffft_real_preprocess(Ncvec, vinput, buff[ib], setup->e); + pffft_real_preprocess(Ncvec, vinput, buff[ib], setup->e.data()); ib = (rfftb1_ps(Ncvec*2, buff[ib], buff[0], buff[1], setup->twiddle, setup->ifac) == buff[1]); } else { - pffft_cplx_preprocess(Ncvec, vinput, buff[ib], setup->e); + pffft_cplx_preprocess(Ncvec, vinput, buff[ib], setup->e.data()); ib = (cfftf1_ps(Ncvec, buff[ib], buff[0], buff[1], setup->twiddle, setup->ifac, +1.0f) == buff[1]); for(size_t k{0};k < Ncvec;++k) interleave2(buff[ib][k*2], buff[ib][k*2+1], buff[ib][k*2], buff[ib][k*2+1]); diff --git a/common/pffft.h b/common/pffft.h index 9cff9e54..b31304f6 100644 --- a/common/pffft.h +++ b/common/pffft.h @@ -83,23 +83,27 @@ #include <stdint.h> #ifdef __cplusplus +#include <cstddef> +#include <utility> + extern "C" { #endif /* opaque struct holding internal stuff (precomputed twiddle factors) this * struct can be shared by many threads as it contains only read-only data. */ -typedef struct PFFFT_Setup PFFFT_Setup; - -#ifndef PFFFT_COMMON_ENUMS -#define PFFFT_COMMON_ENUMS +struct PFFFT_Setup; /* direction of the transform */ -typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t; +enum pffft_direction_t { PFFFT_FORWARD, PFFFT_BACKWARD }; /* type of transform */ -typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t; +enum pffft_transform_t { PFFFT_REAL, PFFFT_COMPLEX }; +#ifndef __cplusplus +typedef struct PFFFT_Setup PFFFT_Setup; +typedef enum pffft_direction_t pffft_direction_t; +typedef enum pffft_transform_t pffft_transform_t; #endif /** @@ -187,6 +191,47 @@ int pffft_simd_size(); #ifdef __cplusplus } + +struct PFFFTSetup { + PFFFT_Setup *mSetup{}; + + PFFFTSetup() = default; + PFFFTSetup(const PFFFTSetup&) = delete; + PFFFTSetup(PFFFTSetup&& rhs) : mSetup{rhs.mSetup} { rhs.mSetup = nullptr; } + explicit PFFFTSetup(std::nullptr_t) { } + explicit PFFFTSetup(unsigned int n, pffft_transform_t transform) + : mSetup{pffft_new_setup(n, transform)} + { } + ~PFFFTSetup() { if(mSetup) pffft_destroy_setup(mSetup); } + + PFFFTSetup& operator=(const PFFFTSetup&) = delete; + PFFFTSetup& operator=(PFFFTSetup&& rhs) + { + if(mSetup) + pffft_destroy_setup(mSetup); + mSetup = std::exchange(rhs.mSetup, nullptr); + return *this; + } + + void transform(const float *input, float *output, float *work, pffft_direction_t direction) const + { pffft_transform(mSetup, input, output, work, direction); } + + void transform_ordered(const float *input, float *output, float *work, + pffft_direction_t direction) const + { pffft_transform_ordered(mSetup, input, output, work, direction); } + + void zreorder(const float *input, float *output, pffft_direction_t direction) const + { pffft_zreorder(mSetup, input, output, direction); } + + void zconvolve_scale_accumulate(const float *dft_a, const float *dft_b, float *dft_ab, + float scaling) const + { pffft_zconvolve_scale_accumulate(mSetup, dft_a, dft_b, dft_ab, scaling); } + + void zconvolve_accumulate(const float *dft_a, const float *dft_b, float *dft_ab) const + { pffft_zconvolve_accumulate(mSetup, dft_a, dft_b, dft_ab); } + + [[nodiscard]] operator bool() const noexcept { return mSetup != nullptr; } +}; #endif #endif // PFFFT_H diff --git a/core/uhjfilter.cpp b/core/uhjfilter.cpp index 28999e09..e507d705 100644 --- a/core/uhjfilter.cpp +++ b/core/uhjfilter.cpp @@ -20,11 +20,6 @@ UhjQualityType UhjEncodeQuality{UhjQualityType::Default}; namespace { -struct PFFFTSetupDeleter { - void operator()(PFFFT_Setup *ptr) { pffft_destroy_setup(ptr); } -}; -using PFFFTSetupPtr = std::unique_ptr<PFFFT_Setup,PFFFTSetupDeleter>; - /* Convolution is implemented using a segmented overlap-add method. The filter * response is broken up into multiple segments of 128 samples, and each * segment has an FFT applied with a 256-sample buffer (the latter half left @@ -57,13 +52,11 @@ struct SegmentedFilter { static_assert(N >= sFftLength); static_assert((N % sSampleLength) == 0); - PFFFTSetupPtr mFft; + PFFFTSetup mFft; alignas(16) std::array<float,sFftLength*sNumSegments> mFilterData; - SegmentedFilter() + SegmentedFilter() : mFft{sFftLength, PFFFT_REAL} { - mFft = PFFFTSetupPtr{pffft_new_setup(sFftLength, PFFFT_REAL)}; - using complex_d = std::complex<double>; constexpr size_t fft_size{N}; constexpr size_t half_size{fft_size / 2}; @@ -113,7 +106,7 @@ struct SegmentedFilter { fftTmp[i*2 + 1] = static_cast<float>((i == 0) ? fftBuffer2[sSampleLength].real() : fftBuffer2[i].imag()) / float{sFftLength}; } - pffft_zreorder(mFft.get(), fftTmp.data(), filter, PFFFT_BACKWARD); + mFft.zreorder(fftTmp.data(), filter, PFFFT_BACKWARD); filter += sFftLength; } } @@ -246,7 +239,7 @@ void UhjEncoder<N>::encode(float *LeftOut, float *RightOut, std::copy_n(mWXInOut.begin(), sSegmentSize, input); std::fill_n(input+sSegmentSize, sSegmentSize, 0.0f); - pffft_transform(Filter.mFft.get(), input, input, mWorkData.data(), PFFFT_FORWARD); + Filter.mFft.transform(input, input, mWorkData.data(), PFFFT_FORWARD); /* Convolve each input segment with its IR filter counterpart (aligned * in time, from newest to oldest). @@ -255,14 +248,14 @@ void UhjEncoder<N>::encode(float *LeftOut, float *RightOut, const float *filter{Filter.mFilterData.data()}; for(size_t s{curseg};s < sNumSegments;++s) { - pffft_zconvolve_accumulate(Filter.mFft.get(), input, filter, mFftBuffer.data()); + Filter.mFft.zconvolve_accumulate(input, filter, mFftBuffer.data()); input += sFftLength; filter += sFftLength; } input = mWXHistory.data(); for(size_t s{0};s < curseg;++s) { - pffft_zconvolve_accumulate(Filter.mFft.get(), input, filter, mFftBuffer.data()); + Filter.mFft.zconvolve_accumulate(input, filter, mFftBuffer.data()); input += sFftLength; filter += sFftLength; } @@ -270,8 +263,8 @@ void UhjEncoder<N>::encode(float *LeftOut, float *RightOut, /* Convert back to samples, writing to the output and storing the extra * for next time. */ - pffft_transform(Filter.mFft.get(), mFftBuffer.data(), mFftBuffer.data(), - mWorkData.data(), PFFFT_BACKWARD); + Filter.mFft.transform(mFftBuffer.data(), mFftBuffer.data(), mWorkData.data(), + PFFFT_BACKWARD); for(size_t i{0};i < sSegmentSize;++i) mWXInOut[i] = mFftBuffer[i] + mWXInOut[sSegmentSize+i]; |