diff options
-rw-r--r-- | Alc/ALu.c | 6 | ||||
-rw-r--r-- | Alc/fpu_modes.h | 17 | ||||
-rw-r--r-- | Alc/helpers.c | 53 | ||||
-rw-r--r-- | OpenAL32/Include/alMain.h | 50 |
4 files changed, 47 insertions, 79 deletions
@@ -1092,7 +1092,7 @@ static void CalcNonAttnSourceParams(ALvoice *voice, const struct ALvoiceProps *p if(Pitch > (ALfloat)MAX_PITCH) voice->Step = MAX_PITCH<<FRACTIONBITS; else - voice->Step = maxi(fastf2i(Pitch*FRACTIONONE + 0.5f), 1); + voice->Step = maxi(fastf2i(Pitch * FRACTIONONE), 1); if(props->Resampler == BSinc24Resampler) BsincPrepare(voice->Step, &voice->ResampleState.bsinc, &bsinc24); else if(props->Resampler == BSinc12Resampler) @@ -1453,7 +1453,7 @@ static void CalcAttnSourceParams(ALvoice *voice, const struct ALvoiceProps *prop if(Pitch > (ALfloat)MAX_PITCH) voice->Step = MAX_PITCH<<FRACTIONBITS; else - voice->Step = maxi(fastf2i(Pitch*FRACTIONONE + 0.5f), 1); + voice->Step = maxi(fastf2i(Pitch * FRACTIONONE), 1); if(props->Resampler == BSinc24Resampler) BsincPrepare(voice->Step, &voice->ResampleState.bsinc, &bsinc24); else if(props->Resampler == BSinc12Resampler) @@ -1663,7 +1663,7 @@ static void ApplyDither(ALfloat (*restrict Samples)[BUFFERSIZE], ALuint *dither_ ALuint rng0 = dither_rng(&seed); ALuint rng1 = dither_rng(&seed); val += (ALfloat)(rng0*(1.0/UINT_MAX) - rng1*(1.0/UINT_MAX)); - samples[i] = roundf(val) * invscale; + samples[i] = fastf2i(val) * invscale; } } *dither_seed = seed; diff --git a/Alc/fpu_modes.h b/Alc/fpu_modes.h index 750252fc..eb305967 100644 --- a/Alc/fpu_modes.h +++ b/Alc/fpu_modes.h @@ -7,16 +7,13 @@ typedef struct FPUCtl { -#ifdef HAVE_FENV_H - fenv_t flt_env; -#ifdef _WIN32 - int round_mode; -#endif -#else - int state; -#endif -#ifdef HAVE_SSE - int sse_state; +#if defined(__GNUC__) && defined(HAVE_SSE) + unsigned int sse_state; +#elif defined(HAVE___CONTROL87_2) + unsigned int state; + unsigned int sse_state; +#elif defined(HAVE__CONTROLFP) + unsigned int state; #endif } FPUCtl; void SetMixerFPUMode(FPUCtl *ctl); diff --git a/Alc/helpers.c b/Alc/helpers.c index c311ea2e..7bcb3f4a 100644 --- a/Alc/helpers.c +++ b/Alc/helpers.c @@ -269,81 +269,44 @@ void FillCPUCaps(int capfilter) void SetMixerFPUMode(FPUCtl *ctl) { -#ifdef HAVE_FENV_H - fegetenv(&ctl->flt_env); -#ifdef _WIN32 - /* HACK: A nasty bug in MinGW-W64 causes fegetenv and fesetenv to not save - * and restore the FPU rounding mode, so we have to do it manually. Don't - * know if this also applies to MSVC. - */ - ctl->round_mode = fegetround(); -#endif -#if defined(__GNUC__) && defined(HAVE_SSE) - /* FIXME: Some fegetenv implementations can get the SSE environment too? - * How to tell when it does? */ - if((CPUCapFlags&CPU_CAP_SSE)) - __asm__ __volatile__("stmxcsr %0" : "=m" (*&ctl->sse_state)); -#endif - -#ifdef FE_TOWARDZERO - fesetround(FE_TOWARDZERO); -#endif #if defined(__GNUC__) && defined(HAVE_SSE) if((CPUCapFlags&CPU_CAP_SSE)) { - int sseState = ctl->sse_state; - sseState |= 0x6000; /* set round-to-zero */ + __asm__ __volatile__("stmxcsr %0" : "=m" (*&ctl->sse_state)); + unsigned int sseState = ctl->sse_state; sseState |= 0x8000; /* set flush-to-zero */ if((CPUCapFlags&CPU_CAP_SSE2)) sseState |= 0x0040; /* set denormals-are-zero */ __asm__ __volatile__("ldmxcsr %0" : : "m" (*&sseState)); } -#endif #elif defined(HAVE___CONTROL87_2) - int mode; - __control87_2(0, 0, &ctl->state, NULL); - __control87_2(_RC_CHOP, _MCW_RC, &mode, NULL); -#ifdef HAVE_SSE - if((CPUCapFlags&CPU_CAP_SSE)) - { - __control87_2(0, 0, NULL, &ctl->sse_state); - __control87_2(_RC_CHOP|_DN_FLUSH, _MCW_RC|_MCW_DN, NULL, &mode); - } -#endif + __control87_2(0, 0, &ctl->state, &ctl->sse_state); + _control87(_DN_FLUSH, _MCW_DN); #elif defined(HAVE__CONTROLFP) ctl->state = _controlfp(0, 0); - (void)_controlfp(_RC_CHOP, _MCW_RC); + _controlfp(_DN_FLUSH, _MCW_DN); #endif } void RestoreFPUMode(const FPUCtl *ctl) { -#ifdef HAVE_FENV_H - fesetenv(&ctl->flt_env); -#ifdef _WIN32 - fesetround(ctl->round_mode); -#endif #if defined(__GNUC__) && defined(HAVE_SSE) if((CPUCapFlags&CPU_CAP_SSE)) __asm__ __volatile__("ldmxcsr %0" : : "m" (*&ctl->sse_state)); -#endif #elif defined(HAVE___CONTROL87_2) int mode; - __control87_2(ctl->state, _MCW_RC, &mode, NULL); -#ifdef HAVE_SSE - if((CPUCapFlags&CPU_CAP_SSE)) - __control87_2(ctl->sse_state, _MCW_RC|_MCW_DN, NULL, &mode); -#endif + __control87_2(ctl->state, _MCW_DN, &mode, NULL); + __control87_2(ctl->sse_state, _MCW_DN, NULL, &mode); #elif defined(HAVE__CONTROLFP) - _controlfp(ctl->state, _MCW_RC); + _controlfp(ctl->state, _MCW_DN); #endif } diff --git a/OpenAL32/Include/alMain.h b/OpenAL32/Include/alMain.h index 1cf1e5e2..0cab5a17 100644 --- a/OpenAL32/Include/alMain.h +++ b/OpenAL32/Include/alMain.h @@ -226,36 +226,44 @@ inline size_t RoundUp(size_t value, size_t r) return value - (value%r); } -/* Fast float-to-int conversion. Assumes the FPU is already in round-to-zero - * mode. */ +/* Fast float-to-int conversion. No particular rounding mode is assumed; the + * IEEE-754 default is round-to-nearest with ties-to-even, though an app could + * change it on its own threads. On some systems, a truncating conversion may + * always be the fastest method. + */ inline ALint fastf2i(ALfloat f) { -#if (defined(__i386__) && !defined(__SSE_MATH__)) || (defined(_M_IX86_FP) && (_M_IX86_FP == 0)) -/* If using the x87 instruction set, try to use more efficient float-to-int - * operations. The fistp instruction converts to integer efficiently enough, - * but it isn't IEEE-754-compliant because it uses the current rounding mode - * instead of always truncating -- the compiler will generate costly control - * word changes with it to get correct behavior. If supported, lrintf converts - * to integer using the current rounding mode, i.e. using fistp without control - * word changes (if nothing even better is available). As long as the rounding - * mode is set to round-to-zero ahead of time, and the call gets inlined, this - * works fine. - * - * Other instruction sets, like SSE and ARM, have opcodes that inherently do - * the right thing, and don't suffer from the same excessive performance - * degredation from float-to-int conversions. - */ -#ifdef HAVE_LRINTF - return lrintf(f); -#elif defined(_MSC_VER) && defined(_M_IX86) +#if defined(_MSC_VER) && defined(_M_IX86_FP) ALint i; +#if _M_IX86_FP > 0 + __asm cvtss2si i, f +#else __asm fld f __asm fistp i +#endif return i; + +#elif (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__)) + + ALint i; +#ifdef __SSE_MATH__ + __asm__("cvtss2si %1, %0" : "=r"(i) : "x"(f)); #else - return (ALint)f; + __asm__("flds %1\n fistps %0" : "=m"(i) : "m"(f)); #endif + return i; + + /* On GCC when compiling with -fno-math-errno, lrintf can be inlined to + * some simple instructions. Clang does not inline it, always generating a + * libc call, while MSVC's implementation is horribly slow, so always fall + * back to a normal integer conversion for them. + */ +#elif defined(HAVE_LRINTF) && !defined(_MSC_VER) && !defined(__clang__) + + return lrintf(f); + #else + return (ALint)f; #endif } |