Don't assume round-to-zero for fastf2i

author: Chris Robinson <[email protected]> 2018-05-04 02:05:26 -0700
committer: Chris Robinson <[email protected]> 2018-05-04 02:09:32 -0700
commit: 75e2cb97f74aeed2e50e4355607f041414a43976 (patch)
tree: 70d5957e9616fccc65b81a1dc4ff62babf1b6b36 /OpenAL32
parent: ac8dbd7a56e4ca0ccfbef61b89bdb55775abea6a (diff)
1 files changed, 29 insertions, 21 deletions
diff --git a/OpenAL32/Include/alMain.h b/OpenAL32/Include/alMain.h
index 1cf1e5e2..0cab5a17 100644
--- a/OpenAL32/Include/alMain.h
+++ b/OpenAL32/Include/alMain.h
@@ -226,36 +226,44 @@ inline size_t RoundUp(size_t value, size_t r)
     return value - (value%r);
 }
 
-/* Fast float-to-int conversion. Assumes the FPU is already in round-to-zero
- * mode. */
+/* Fast float-to-int conversion. No particular rounding mode is assumed; the
+ * IEEE-754 default is round-to-nearest with ties-to-even, though an app could
+ * change it on its own threads. On some systems, a truncating conversion may
+ * always be the fastest method.
+ */
 inline ALint fastf2i(ALfloat f)
 {
-#if (defined(__i386__) && !defined(__SSE_MATH__)) || (defined(_M_IX86_FP) && (_M_IX86_FP == 0))
-/* If using the x87 instruction set, try to use more efficient float-to-int
- * operations. The fistp instruction converts to integer efficiently enough,
- * but it isn't IEEE-754-compliant because it uses the current rounding mode
- * instead of always truncating -- the compiler will generate costly control
- * word changes with it to get correct behavior. If supported, lrintf converts
- * to integer using the current rounding mode, i.e. using fistp without control
- * word changes (if nothing even better is available). As long as the rounding
- * mode is set to round-to-zero ahead of time, and the call gets inlined, this
- * works fine.
- *
- * Other instruction sets, like SSE and ARM, have opcodes that inherently do
- * the right thing, and don't suffer from the same excessive performance
- * degredation from float-to-int conversions.
- */
-#ifdef HAVE_LRINTF
-    return lrintf(f);
-#elif defined(_MSC_VER) && defined(_M_IX86)
+#if defined(_MSC_VER) && defined(_M_IX86_FP)
     ALint i;
+#if _M_IX86_FP > 0
+    __asm cvtss2si i, f
+#else
     __asm fld f
     __asm fistp i
+#endif
     return i;
+
+#elif (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__))
+
+    ALint i;
+#ifdef __SSE_MATH__
+    __asm__("cvtss2si %1, %0" : "=r"(i) : "x"(f));
 #else
-    return (ALint)f;
+    __asm__("flds %1\n fistps %0" : "=m"(i) : "m"(f));
 #endif
+    return i;
+
+    /* On GCC when compiling with -fno-math-errno, lrintf can be inlined to
+     * some simple instructions. Clang does not inline it, always generating a
+     * libc call, while MSVC's implementation is horribly slow, so always fall
+     * back to a normal integer conversion for them.
+     */
+#elif defined(HAVE_LRINTF) && !defined(_MSC_VER) && !defined(__clang__)
+
+    return lrintf(f);
+
 #else
+
     return (ALint)f;
 #endif
 }
author	Chris Robinson <[email protected]>	2018-05-04 02:05:26 -0700
committer	Chris Robinson <[email protected]>	2018-05-04 02:09:32 -0700
commit	75e2cb97f74aeed2e50e4355607f041414a43976 (patch)
tree	70d5957e9616fccc65b81a1dc4ff62babf1b6b36 /OpenAL32
parent	ac8dbd7a56e4ca0ccfbef61b89bdb55775abea6a (diff)