common/alnumeric.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293

#ifndef AL_NUMERIC_H
#define AL_NUMERIC_H

#include <algorithm>
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <type_traits>
#ifdef HAVE_INTRIN_H
#include <intrin.h>
#endif
#ifdef HAVE_SSE_INTRINSICS
#include <xmmintrin.h>
#endif

#include "albit.h"
#include "altraits.h"
#include "opthelpers.h"


constexpr auto operator "" _i64(unsigned long long n) noexcept { return static_cast<int64_t>(n); }
constexpr auto operator "" _u64(unsigned long long n) noexcept { return static_cast<uint64_t>(n); }

constexpr auto operator "" _z(unsigned long long n) noexcept
{ return static_cast<std::make_signed_t<size_t>>(n); }
constexpr auto operator "" _uz(unsigned long long n) noexcept { return static_cast<size_t>(n); }
constexpr auto operator "" _zu(unsigned long long n) noexcept { return static_cast<size_t>(n); }


constexpr inline float minf(float a, float b) noexcept
{ return ((a > b) ? b : a); }
constexpr inline float maxf(float a, float b) noexcept
{ return ((a > b) ? a : b); }
constexpr inline float clampf(float val, float min, float max) noexcept
{ return minf(max, maxf(min, val)); }

constexpr inline double mind(double a, double b) noexcept
{ return ((a > b) ? b : a); }
constexpr inline double maxd(double a, double b) noexcept
{ return ((a > b) ? a : b); }
constexpr inline double clampd(double val, double min, double max) noexcept
{ return mind(max, maxd(min, val)); }

constexpr inline unsigned int minu(unsigned int a, unsigned int b) noexcept
{ return ((a > b) ? b : a); }
constexpr inline unsigned int maxu(unsigned int a, unsigned int b) noexcept
{ return ((a > b) ? a : b); }
constexpr inline unsigned int clampu(unsigned int val, unsigned int min, unsigned int max) noexcept
{ return minu(max, maxu(min, val)); }

constexpr inline int mini(int a, int b) noexcept
{ return ((a > b) ? b : a); }
constexpr inline int maxi(int a, int b) noexcept
{ return ((a > b) ? a : b); }
constexpr inline int clampi(int val, int min, int max) noexcept
{ return mini(max, maxi(min, val)); }

constexpr inline int64_t mini64(int64_t a, int64_t b) noexcept
{ return ((a > b) ? b : a); }
constexpr inline int64_t maxi64(int64_t a, int64_t b) noexcept
{ return ((a > b) ? a : b); }
constexpr inline int64_t clampi64(int64_t val, int64_t min, int64_t max) noexcept
{ return mini64(max, maxi64(min, val)); }

constexpr inline uint64_t minu64(uint64_t a, uint64_t b) noexcept
{ return ((a > b) ? b : a); }
constexpr inline uint64_t maxu64(uint64_t a, uint64_t b) noexcept
{ return ((a > b) ? a : b); }
constexpr inline uint64_t clampu64(uint64_t val, uint64_t min, uint64_t max) noexcept
{ return minu64(max, maxu64(min, val)); }

constexpr inline size_t minz(size_t a, size_t b) noexcept
{ return ((a > b) ? b : a); }
constexpr inline size_t maxz(size_t a, size_t b) noexcept
{ return ((a > b) ? a : b); }
constexpr inline size_t clampz(size_t val, size_t min, size_t max) noexcept
{ return minz(max, maxz(min, val)); }


constexpr inline float lerpf(float val1, float val2, float mu) noexcept
{ return val1 + (val2-val1)*mu; }
constexpr inline float cubic(float val1, float val2, float val3, float val4, float mu) noexcept
{
    const float mu2{mu*mu}, mu3{mu2*mu};
    const float a0{-0.5f*mu3 +       mu2 + -0.5f*mu};
    const float a1{ 1.5f*mu3 + -2.5f*mu2            + 1.0f};
    const float a2{-1.5f*mu3 +  2.0f*mu2 +  0.5f*mu};
    const float a3{ 0.5f*mu3 + -0.5f*mu2};
    return val1*a0 + val2*a1 + val3*a2 + val4*a3;
}


/** Find the next power-of-2 for non-power-of-2 numbers. */
inline uint32_t NextPowerOf2(uint32_t value) noexcept
{
    if(value > 0)
    {
        value--;
        value |= value>>1;
        value |= value>>2;
        value |= value>>4;
        value |= value>>8;
        value |= value>>16;
    }
    return value+1;
}

/**
 * If the value is not already a multiple of r, round down to the next
 * multiple.
 */
template<typename T>
constexpr T RoundDown(T value, al::type_identity_t<T> r) noexcept
{ return value - (value%r); }

/**
 * If the value is not already a multiple of r, round up to the next multiple.
 */
template<typename T>
constexpr T RoundUp(T value, al::type_identity_t<T> r) noexcept
{ return RoundDown(value + r-1, r); }


/**
 * Fast float-to-int conversion. No particular rounding mode is assumed; the
 * IEEE-754 default is round-to-nearest with ties-to-even, though an app could
 * change it on its own threads. On some systems, a truncating conversion may
 * always be the fastest method.
 */
inline int fastf2i(float f) noexcept
{
#if defined(HAVE_SSE_INTRINSICS)
    return _mm_cvt_ss2si(_mm_set_ss(f));

#elif defined(_MSC_VER) && defined(_M_IX86_FP)

    int i;
    __asm fld f
    __asm fistp i
    return i;

#elif (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__))

    int i;
#ifdef __SSE_MATH__
    __asm__("cvtss2si %1, %0" : "=r"(i) : "x"(f));
#else
    __asm__ __volatile__("fistpl %0" : "=m"(i) : "t"(f) : "st");
#endif
    return i;

#else

    return static_cast<int>(f);
#endif
}
inline unsigned int fastf2u(float f) noexcept
{ return static_cast<unsigned int>(fastf2i(f)); }

/** Converts float-to-int using standard behavior (truncation). */
inline int float2int(float f) noexcept
{
#if defined(HAVE_SSE_INTRINSICS)
    return _mm_cvtt_ss2si(_mm_set_ss(f));

#elif (defined(_MSC_VER) && defined(_M_IX86_FP) && _M_IX86_FP == 0) \
    || ((defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__)) \
        && !defined(__SSE_MATH__))
    const int conv_i{al::bit_cast<int>(f)};

    const int sign{(conv_i>>31) | 1};
    const int shift{((conv_i>>23)&0xff) - (127+23)};

    /* Over/underflow */
    if(shift >= 31 || shift < -23) UNLIKELY
        return 0;

    const int mant{(conv_i&0x7fffff) | 0x800000};
    if(shift < 0) LIKELY
        return (mant >> -shift) * sign;
    return (mant << shift) * sign;

#else

    return static_cast<int>(f);
#endif
}
inline unsigned int float2uint(float f) noexcept
{ return static_cast<unsigned int>(float2int(f)); }

/** Converts double-to-int using standard behavior (truncation). */
inline int double2int(double d) noexcept
{
#if defined(HAVE_SSE_INTRINSICS)
    return _mm_cvttsd_si32(_mm_set_sd(d));

#elif (defined(_MSC_VER) && defined(_M_IX86_FP) && _M_IX86_FP < 2) \
    || ((defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__)) \
        && !defined(__SSE2_MATH__))
    const int64_t conv_i64{al::bit_cast<int64_t>(d)};

    const int sign{static_cast<int>(conv_i64 >> 63) | 1};
    const int shift{(static_cast<int>(conv_i64 >> 52) & 0x7ff) - (1023 + 52)};

    /* Over/underflow */
    if(shift >= 63 || shift < -52) UNLIKELY
        return 0;

    const int64_t mant{(conv_i64 & 0xfffffffffffff_i64) | 0x10000000000000_i64};
    if(shift < 0) LIKELY
        return static_cast<int>(mant >> -shift) * sign;
    return static_cast<int>(mant << shift) * sign;

#else

    return static_cast<int>(d);
#endif
}

/**
 * Rounds a float to the nearest integral value, according to the current
 * rounding mode. This is essentially an inlined version of rintf, although
 * makes fewer promises (e.g. -0 or -0.25 rounded to 0 may result in +0).
 */
inline float fast_roundf(float f) noexcept
{
#if (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__)) \
    && !defined(__SSE_MATH__)

    float out;
    __asm__ __volatile__("frndint" : "=t"(out) : "0"(f));
    return out;

#elif (defined(__GNUC__) || defined(__clang__)) && defined(__aarch64__)

    float out;
    __asm__ volatile("frintx %s0, %s1" : "=w"(out) : "w"(f));
    return out;

#else

    /* Integral limit, where sub-integral precision is not available for
     * floats.
     */
    static constexpr float ilim[2]{
         8388608.0f /*  0x1.0p+23 */,
        -8388608.0f /* -0x1.0p+23 */
    };
    const unsigned int conv_i{al::bit_cast<unsigned int>(f)};

    const unsigned int sign{(conv_i>>31)&0x01};
    const unsigned int expo{(conv_i>>23)&0xff};

    if(expo >= 150/*+23*/) UNLIKELY
    {
        /* An exponent (base-2) of 23 or higher is incapable of sub-integral
         * precision, so it's already an integral value. We don't need to worry
         * about infinity or NaN here.
         */
        return f;
    }
    /* Adding the integral limit to the value (with a matching sign) forces a
     * result that has no sub-integral precision, and is consequently forced to
     * round to an integral value. Removing the integral limit then restores
     * the initial value rounded to the integral. The compiler should not
     * optimize this out because of non-associative rules on floating-point
     * math (as long as you don't use -fassociative-math,
     * -funsafe-math-optimizations, -ffast-math, or -Ofast, in which case this
     * may break).
     */
    f += ilim[sign];
    return f - ilim[sign];
#endif
}


// Converts level (mB) to gain.
inline float level_mb_to_gain(float x)
{
    if(x <= -10'000.0f)
        return 0.0f;
    return std::pow(10.0f, x / 2'000.0f);
}

// Converts gain to level (mB).
inline float gain_to_level_mb(float x)
{
    if (x <= 0.0f)
        return -10'000.0f;
    return maxf(std::log10(x) * 2'000.0f, -10'000.0f);
}

#endif /* AL_NUMERIC_H */