aboutsummaryrefslogtreecommitdiffstats
path: root/common/alcomplex.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'common/alcomplex.cpp')
-rw-r--r--common/alcomplex.cpp142
1 files changed, 95 insertions, 47 deletions
diff --git a/common/alcomplex.cpp b/common/alcomplex.cpp
index 4420a1bb..82a0c43c 100644
--- a/common/alcomplex.cpp
+++ b/common/alcomplex.cpp
@@ -4,6 +4,7 @@
#include "alcomplex.h"
#include <algorithm>
+#include <array>
#include <cassert>
#include <cmath>
#include <cstddef>
@@ -20,6 +21,7 @@ namespace {
using ushort = unsigned short;
using ushort2 = std::pair<ushort,ushort>;
+using complex_d = std::complex<double>;
constexpr size_t BitReverseCounter(size_t log2_size) noexcept
{
@@ -34,7 +36,7 @@ template<size_t N>
struct BitReverser {
static_assert(N <= sizeof(ushort)*8, "Too many bits for the bit-reversal table.");
- ushort2 mData[BitReverseCounter(N)]{};
+ std::array<ushort2,BitReverseCounter(N)> mData{};
constexpr BitReverser()
{
@@ -44,12 +46,13 @@ struct BitReverser {
/* Bit-reversal permutation applied to a sequence of fftsize items. */
for(size_t idx{1u};idx < fftsize-1;++idx)
{
- size_t revidx{0u}, imask{idx};
- for(size_t i{0};i < N;++i)
- {
- revidx = (revidx<<1) | (imask&1);
- imask >>= 1;
- }
+ size_t revidx{idx};
+ revidx = ((revidx&0xaaaaaaaa) >> 1) | ((revidx&0x55555555) << 1);
+ revidx = ((revidx&0xcccccccc) >> 2) | ((revidx&0x33333333) << 2);
+ revidx = ((revidx&0xf0f0f0f0) >> 4) | ((revidx&0x0f0f0f0f) << 4);
+ revidx = ((revidx&0xff00ff00) >> 8) | ((revidx&0x00ff00ff) << 8);
+ revidx = (revidx >> 16) | ((revidx&0x0000ffff) << 16);
+ revidx >>= 32-N;
if(idx < revidx)
{
@@ -58,14 +61,13 @@ struct BitReverser {
++ret_i;
}
}
- assert(ret_i == al::size(mData));
+ assert(ret_i == std::size(mData));
}
};
-/* These bit-reversal swap tables support up to 10-bit indices (1024 elements),
- * which is the largest used by OpenAL Soft's filters and effects. Larger FFT
- * requests, used by some utilities where performance is less important, will
- * use a slower table-less path.
+/* These bit-reversal swap tables support up to 11-bit indices (2048 elements),
+ * which is large enough for the filters and effects in OpenAL Soft. Larger FFT
+ * requests will use a slower table-less path.
*/
constexpr BitReverser<2> BitReverser2{};
constexpr BitReverser<3> BitReverser3{};
@@ -76,7 +78,8 @@ constexpr BitReverser<7> BitReverser7{};
constexpr BitReverser<8> BitReverser8{};
constexpr BitReverser<9> BitReverser9{};
constexpr BitReverser<10> BitReverser10{};
-constexpr std::array<al::span<const ushort2>,11> gBitReverses{{
+constexpr BitReverser<11> BitReverser11{};
+constexpr std::array<al::span<const ushort2>,12> gBitReverses{{
{}, {},
BitReverser2.mData,
BitReverser3.mData,
@@ -86,14 +89,29 @@ constexpr std::array<al::span<const ushort2>,11> gBitReverses{{
BitReverser7.mData,
BitReverser8.mData,
BitReverser9.mData,
- BitReverser10.mData
+ BitReverser10.mData,
+ BitReverser11.mData
+}};
+
+/* Lookup table for std::polar(1, pi / (1<<index)); */
+template<typename T>
+constexpr std::array<std::complex<T>,gBitReverses.size()-1> gArgAngle{{
+ {static_cast<T>(-1.00000000000000000e+00), static_cast<T>(0.00000000000000000e+00)},
+ {static_cast<T>( 0.00000000000000000e+00), static_cast<T>(1.00000000000000000e+00)},
+ {static_cast<T>( 7.07106781186547524e-01), static_cast<T>(7.07106781186547524e-01)},
+ {static_cast<T>( 9.23879532511286756e-01), static_cast<T>(3.82683432365089772e-01)},
+ {static_cast<T>( 9.80785280403230449e-01), static_cast<T>(1.95090322016128268e-01)},
+ {static_cast<T>( 9.95184726672196886e-01), static_cast<T>(9.80171403295606020e-02)},
+ {static_cast<T>( 9.98795456205172393e-01), static_cast<T>(4.90676743274180143e-02)},
+ {static_cast<T>( 9.99698818696204220e-01), static_cast<T>(2.45412285229122880e-02)},
+ {static_cast<T>( 9.99924701839144541e-01), static_cast<T>(1.22715382857199261e-02)},
+ {static_cast<T>( 9.99981175282601143e-01), static_cast<T>(6.13588464915447536e-03)},
+ {static_cast<T>( 9.99995293809576172e-01), static_cast<T>(3.06795676296597627e-03)}
}};
} // namespace
-template<typename Real>
-std::enable_if_t<std::is_floating_point<Real>::value>
-complex_fft(const al::span<std::complex<Real>> buffer, const al::type_identity_t<Real> sign)
+void complex_fft(const al::span<std::complex<double>> buffer, const double sign)
{
const size_t fftsize{buffer.size()};
/* Get the number of bits used for indexing. Simplifies bit-reversal and
@@ -101,48 +119,82 @@ complex_fft(const al::span<std::complex<Real>> buffer, const al::type_identity_t
*/
const size_t log2_size{static_cast<size_t>(al::countr_zero(fftsize))};
- if(log2_size >= gBitReverses.size()) UNLIKELY
+ if(log2_size < gBitReverses.size()) LIKELY
{
- for(size_t idx{1u};idx < fftsize-1;++idx)
+ for(auto &rev : gBitReverses[log2_size])
+ std::swap(buffer[rev.first], buffer[rev.second]);
+
+ /* Iterative form of Danielson-Lanczos lemma */
+ for(size_t i{0};i < log2_size;++i)
{
- size_t revidx{0u}, imask{idx};
- for(size_t i{0};i < log2_size;++i)
+ const size_t step2{1_uz << i};
+ const size_t step{2_uz << i};
+ /* The first iteration of the inner loop would have u=1, which we
+ * can simplify to remove a number of complex multiplies.
+ */
+ for(size_t k{0};k < fftsize;k+=step)
{
- revidx = (revidx<<1) | (imask&1);
- imask >>= 1;
+ const complex_d temp{buffer[k+step2]};
+ buffer[k+step2] = buffer[k] - temp;
+ buffer[k] += temp;
}
- if(idx < revidx)
- std::swap(buffer[idx], buffer[revidx]);
+ const complex_d w{gArgAngle<double>[i].real(), gArgAngle<double>[i].imag()*sign};
+ complex_d u{w};
+ for(size_t j{1};j < step2;j++)
+ {
+ for(size_t k{j};k < fftsize;k+=step)
+ {
+ const complex_d temp{buffer[k+step2] * u};
+ buffer[k+step2] = buffer[k] - temp;
+ buffer[k] += temp;
+ }
+ u *= w;
+ }
}
}
- else for(auto &rev : gBitReverses[log2_size])
- std::swap(buffer[rev.first], buffer[rev.second]);
-
- /* Iterative form of Danielson-Lanczos lemma */
- const Real pi{al::numbers::pi_v<Real> * sign};
- size_t step2{1u};
- for(size_t i{0};i < log2_size;++i)
+ else
{
- const Real arg{pi / static_cast<Real>(step2)};
+ for(size_t idx{1u};idx < fftsize-1;++idx)
+ {
+ size_t revidx{idx};
+ revidx = ((revidx&0xaaaaaaaa) >> 1) | ((revidx&0x55555555) << 1);
+ revidx = ((revidx&0xcccccccc) >> 2) | ((revidx&0x33333333) << 2);
+ revidx = ((revidx&0xf0f0f0f0) >> 4) | ((revidx&0x0f0f0f0f) << 4);
+ revidx = ((revidx&0xff00ff00) >> 8) | ((revidx&0x00ff00ff) << 8);
+ revidx = (revidx >> 16) | ((revidx&0x0000ffff) << 16);
+ revidx >>= 32-log2_size;
+
+ if(idx < revidx)
+ std::swap(buffer[idx], buffer[revidx]);
+ }
- /* TODO: Would std::polar(1.0, arg) be any better? */
- const std::complex<Real> w{std::cos(arg), std::sin(arg)};
- std::complex<Real> u{1.0, 0.0};
- const size_t step{step2 << 1};
- for(size_t j{0};j < step2;j++)
+ const double pi{al::numbers::pi * sign};
+ for(size_t i{0};i < log2_size;++i)
{
- for(size_t k{j};k < fftsize;k+=step)
+ const size_t step2{1_uz << i};
+ const size_t step{2_uz << i};
+ for(size_t k{0};k < fftsize;k+=step)
{
- std::complex<Real> temp{buffer[k+step2] * u};
+ const complex_d temp{buffer[k+step2]};
buffer[k+step2] = buffer[k] - temp;
buffer[k] += temp;
}
- u *= w;
+ const double arg{pi / static_cast<double>(step2)};
+ const complex_d w{std::polar(1.0, arg)};
+ complex_d u{w};
+ for(size_t j{1};j < step2;j++)
+ {
+ for(size_t k{j};k < fftsize;k+=step)
+ {
+ const complex_d temp{buffer[k+step2] * u};
+ buffer[k+step2] = buffer[k] - temp;
+ buffer[k] += temp;
+ }
+ u *= w;
+ }
}
-
- step2 <<= 1;
}
}
@@ -165,7 +217,3 @@ void complex_hilbert(const al::span<std::complex<double>> buffer)
forward_fft(buffer);
}
-
-
-template void complex_fft<>(const al::span<std::complex<float>> buffer, const float sign);
-template void complex_fft<>(const al::span<std::complex<double>> buffer, const double sign);