diff options
Diffstat (limited to 'core/fpu_ctrl.cpp')
-rw-r--r-- | core/fpu_ctrl.cpp | 71 |
1 files changed, 51 insertions, 20 deletions
diff --git a/core/fpu_ctrl.cpp b/core/fpu_ctrl.cpp index 0cf0d6e7..435855ad 100644 --- a/core/fpu_ctrl.cpp +++ b/core/fpu_ctrl.cpp @@ -8,38 +8,71 @@ #endif #ifdef HAVE_SSE_INTRINSICS #include <emmintrin.h> -#ifndef _MM_DENORMALS_ZERO_MASK +#elif defined(HAVE_SSE) +#include <xmmintrin.h> +#endif + +#if defined(HAVE_SSE) && !defined(_MM_DENORMALS_ZERO_MASK) /* Some headers seem to be missing these? */ #define _MM_DENORMALS_ZERO_MASK 0x0040u #define _MM_DENORMALS_ZERO_ON 0x0040u #endif -#endif #include "cpu_caps.h" +namespace { -void FPUCtl::enter() noexcept +#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) +[[gnu::target("sse")]] +#endif +[[maybe_unused]] +void disable_denormals(unsigned int *state [[maybe_unused]]) { - if(this->in_mode) return; - #if defined(HAVE_SSE_INTRINSICS) - this->sse_state = _mm_getcsr(); - unsigned int sseState{this->sse_state}; + *state = _mm_getcsr(); + unsigned int sseState{*state}; sseState &= ~(_MM_FLUSH_ZERO_MASK | _MM_DENORMALS_ZERO_MASK); sseState |= _MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON; _mm_setcsr(sseState); -#elif defined(__GNUC__) && defined(HAVE_SSE) +#elif defined(HAVE_SSE) - if((CPUCapFlags&CPU_CAP_SSE)) + *state = _mm_getcsr(); + unsigned int sseState{*state}; + sseState &= ~_MM_FLUSH_ZERO_MASK; + sseState |= _MM_FLUSH_ZERO_ON; + if((CPUCapFlags&CPU_CAP_SSE2)) { - __asm__ __volatile__("stmxcsr %0" : "=m" (*&this->sse_state)); - unsigned int sseState{this->sse_state}; - sseState |= 0x8000; /* set flush-to-zero */ - if((CPUCapFlags&CPU_CAP_SSE2)) - sseState |= 0x0040; /* set denormals-are-zero */ - __asm__ __volatile__("ldmxcsr %0" : : "m" (*&sseState)); + sseState &= ~_MM_DENORMALS_ZERO_MASK; + sseState |= _MM_DENORMALS_ZERO_ON; } + _mm_setcsr(sseState); +#endif +} + +#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) +[[gnu::target("sse")]] +#endif +[[maybe_unused]] +void reset_fpu(unsigned int state [[maybe_unused]]) +{ +#if defined(HAVE_SSE_INTRINSICS) || defined(HAVE_SSE) + _mm_setcsr(state); +#endif +} + +} // namespace + + +void FPUCtl::enter() noexcept +{ + if(this->in_mode) return; + +#if defined(HAVE_SSE_INTRINSICS) + disable_denormals(&this->sse_state); +#elif defined(HAVE_SSE) + if((CPUCapFlags&CPU_CAP_SSE)) + disable_denormals(&this->sse_state); #endif this->in_mode = true; @@ -50,12 +83,10 @@ void FPUCtl::leave() noexcept if(!this->in_mode) return; #if defined(HAVE_SSE_INTRINSICS) - _mm_setcsr(this->sse_state); - -#elif defined(__GNUC__) && defined(HAVE_SSE) - + reset_fpu(this->sse_state); +#elif defined(HAVE_SSE) if((CPUCapFlags&CPU_CAP_SSE)) - __asm__ __volatile__("ldmxcsr %0" : : "m" (*&this->sse_state)); + reset_fpu(this->sse_state); #endif this->in_mode = false; } |