diff options
Diffstat (limited to 'LibOVR/Src/Kernel/OVR_Atomic.h')
-rw-r--r-- | LibOVR/Src/Kernel/OVR_Atomic.h | 149 |
1 files changed, 87 insertions, 62 deletions
diff --git a/LibOVR/Src/Kernel/OVR_Atomic.h b/LibOVR/Src/Kernel/OVR_Atomic.h index b826251..478077b 100644 --- a/LibOVR/Src/Kernel/OVR_Atomic.h +++ b/LibOVR/Src/Kernel/OVR_Atomic.h @@ -1,6 +1,6 @@ /************************************************************************************ -PublicHeader: OVR.h +PublicHeader: OVR_Kernel.h Filename : OVR_Atomic.h Content : Contains atomic operations and inline fastest locking functionality. Will contain #ifdefs for OS efficiency. @@ -8,16 +8,16 @@ Content : Contains atomic operations and inline fastest locking Created : September 19, 2012 Notes : -Copyright : Copyright 2014 Oculus VR, Inc. All Rights reserved. +Copyright : Copyright 2014 Oculus VR, LLC All Rights reserved. -Licensed under the Oculus VR Rift SDK License Version 3.1 (the "License"); +Licensed under the Oculus VR Rift SDK License Version 3.2 (the "License"); you may not use the Oculus VR Rift SDK except in compliance with the License, which is provided at the time of installation or download, or which otherwise accompanies this software in either electronic or hard copy form. You may obtain a copy of the License at -http://www.oculusvr.com/licenses/LICENSE-3.1 +http://www.oculusvr.com/licenses/LICENSE-3.2 Unless required by applicable law or agreed to in writing, the Oculus VR SDK distributed under the License is distributed on an "AS IS" BASIS, @@ -26,18 +26,26 @@ See the License for the specific language governing permissions and limitations under the License. ************************************************************************************/ + #ifndef OVR_Atomic_h #define OVR_Atomic_h #include "OVR_Types.h" // Include System thread functionality. -#if defined(OVR_OS_WIN32) -#include <windows.h> +#if defined(OVR_OS_MS) && !defined(OVR_OS_MS_MOBILE) +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#include <Windows.h> #else #include <pthread.h> #endif +#ifdef OVR_CC_MSVC +#include <intrin.h> +#pragma intrinsic(_ReadBarrier, _WriteBarrier, _ReadWriteBarrier) +#endif namespace OVR { @@ -50,7 +58,7 @@ template<class C> class AtomicOps; template<class T> class AtomicInt; template<class T> class AtomicPtr; -class Lock; +class Lock; //----------------------------------------------------------------------------------- @@ -93,7 +101,7 @@ class Lock; struct AtomicOpsRawBase { -#if !defined(OVR_ENABLE_THREADS) || defined(OVR_CPU_X86) || defined(OVR_OS_WIN32) || defined(OVR_OS_IPHONE) +#if !defined(OVR_ENABLE_THREADS) || defined(OVR_CPU_X86) || defined(OVR_CPU_X86_64) // Need to have empty constructor to avoid class 'unused' variable warning. struct FullSync { inline FullSync() { } }; struct AcquireSync { inline AcquireSync() { } }; @@ -109,12 +117,11 @@ struct AtomicOpsRawBase struct AcquireSync { inline AcquireSync() { } ~AcquireSync() { asm volatile("sync\n"); } }; struct ReleaseSync { inline ReleaseSync() { asm volatile("sync\n"); } }; -#elif defined(OVR_CPU_ARM) +#elif defined(OVR_CPU_ARM) // Includes Android and iOS. struct FullSync { inline FullSync() { asm volatile("dmb\n"); } ~FullSync() { asm volatile("dmb\n"); } }; struct AcquireSync { inline AcquireSync() { } ~AcquireSync() { asm volatile("dmb\n"); } }; struct ReleaseSync { inline ReleaseSync() { asm volatile("dmb\n"); } }; - #elif defined(OVR_CC_GNU) && (__GNUC__ >= 4) // __sync functions are already full sync struct FullSync { inline FullSync() { } }; @@ -130,11 +137,11 @@ struct AtomicOpsRaw_4ByteImpl : public AtomicOpsRawBase #if !defined(OVR_ENABLE_THREADS) // Provide a type for no-thread-support cases. Used by AtomicOpsRaw_DefImpl. - typedef UInt32 T; + typedef uint32_t T; // *** Thread - Safe Atomic Versions. -#elif defined(OVR_OS_WIN32) +#elif defined(OVR_OS_MS) // Use special defined for VC6, where volatile is not used and // InterlockedCompareExchange is declared incorrectly. @@ -153,10 +160,10 @@ struct AtomicOpsRaw_4ByteImpl : public AtomicOpsRawBase inline static bool CompareAndSet_NoSync(volatile T* p, T c, T val) { return InterlockedCompareExchange((InterlockETPtr)p, (ET)val, (ET)c) == (ET)c; } #elif defined(OVR_CPU_PPC64) || defined(OVR_CPU_PPC) - typedef UInt32 T; - static inline UInt32 Exchange_NoSync(volatile UInt32 *i, UInt32 j) + typedef uint32_t T; + static inline uint32_t Exchange_NoSync(volatile uint32_t *i, uint32_t j) { - UInt32 ret; + uint32_t ret; asm volatile("1:\n\t" "lwarx %[r],0,%[i]\n\t" @@ -167,9 +174,9 @@ struct AtomicOpsRaw_4ByteImpl : public AtomicOpsRawBase return ret; } - static inline UInt32 ExchangeAdd_NoSync(volatile UInt32 *i, UInt32 j) + static inline uint32_t ExchangeAdd_NoSync(volatile uint32_t *i, uint32_t j) { - UInt32 dummy, ret; + uint32_t dummy, ret; asm volatile("1:\n\t" "lwarx %[r],0,%[i]\n\t" @@ -181,9 +188,9 @@ struct AtomicOpsRaw_4ByteImpl : public AtomicOpsRawBase return ret; } - static inline bool CompareAndSet_NoSync(volatile UInt32 *i, UInt32 c, UInt32 value) + static inline bool CompareAndSet_NoSync(volatile uint32_t *i, uint32_t c, uint32_t value) { - UInt32 ret; + uint32_t ret; asm volatile("1:\n\t" "lwarx %[r],0,%[i]\n\t" @@ -199,11 +206,11 @@ struct AtomicOpsRaw_4ByteImpl : public AtomicOpsRawBase } #elif defined(OVR_CPU_MIPS) - typedef UInt32 T; + typedef uint32_t T; - static inline UInt32 Exchange_NoSync(volatile UInt32 *i, UInt32 j) + static inline uint32_t Exchange_NoSync(volatile uint32_t *i, uint32_t j) { - UInt32 ret; + uint32_t ret; asm volatile("1:\n\t" "ll %[r],0(%[i])\n\t" @@ -215,9 +222,9 @@ struct AtomicOpsRaw_4ByteImpl : public AtomicOpsRawBase return ret; } - static inline UInt32 ExchangeAdd_NoSync(volatile UInt32 *i, UInt32 j) + static inline uint32_t ExchangeAdd_NoSync(volatile uint32_t *i, uint32_t j) { - UInt32 ret; + uint32_t ret; asm volatile("1:\n\t" "ll %[r],0(%[i])\n\t" @@ -230,9 +237,9 @@ struct AtomicOpsRaw_4ByteImpl : public AtomicOpsRawBase return ret; } - static inline bool CompareAndSet_NoSync(volatile UInt32 *i, UInt32 c, UInt32 value) + static inline bool CompareAndSet_NoSync(volatile uint32_t *i, uint32_t c, uint32_t value) { - UInt32 ret, dummy; + uint32_t ret, dummy; asm volatile("1:\n\t" "move %[r],$0\n\t" @@ -250,9 +257,9 @@ struct AtomicOpsRaw_4ByteImpl : public AtomicOpsRawBase } #elif defined(OVR_CPU_ARM) && defined(OVR_CC_ARM) - typedef UInt32 T; + typedef uint32_t T; - static inline UInt32 Exchange_NoSync(volatile UInt32 *i, UInt32 j) + static inline uint32_t Exchange_NoSync(volatile uint32_t *i, uint32_t j) { for(;;) { @@ -261,7 +268,7 @@ struct AtomicOpsRaw_4ByteImpl : public AtomicOpsRawBase return r; } } - static inline UInt32 ExchangeAdd_NoSync(volatile UInt32 *i, UInt32 j) + static inline uint32_t ExchangeAdd_NoSync(volatile uint32_t *i, uint32_t j) { for(;;) { @@ -271,7 +278,7 @@ struct AtomicOpsRaw_4ByteImpl : public AtomicOpsRawBase } } - static inline bool CompareAndSet_NoSync(volatile UInt32 *i, UInt32 c, UInt32 value) + static inline bool CompareAndSet_NoSync(volatile uint32_t *i, uint32_t c, uint32_t value) { for(;;) { @@ -284,11 +291,11 @@ struct AtomicOpsRaw_4ByteImpl : public AtomicOpsRawBase } #elif defined(OVR_CPU_ARM) - typedef UInt32 T; + typedef uint32_t T; - static inline UInt32 Exchange_NoSync(volatile UInt32 *i, UInt32 j) + static inline uint32_t Exchange_NoSync(volatile uint32_t *i, uint32_t j) { - UInt32 ret, dummy; + uint32_t ret, dummy; asm volatile("1:\n\t" "ldrex %[r],[%[i]]\n\t" @@ -300,9 +307,9 @@ struct AtomicOpsRaw_4ByteImpl : public AtomicOpsRawBase return ret; } - static inline UInt32 ExchangeAdd_NoSync(volatile UInt32 *i, UInt32 j) + static inline uint32_t ExchangeAdd_NoSync(volatile uint32_t *i, uint32_t j) { - UInt32 ret, dummy, test; + uint32_t ret, dummy, test; asm volatile("1:\n\t" "ldrex %[r],[%[i]]\n\t" @@ -315,9 +322,9 @@ struct AtomicOpsRaw_4ByteImpl : public AtomicOpsRawBase return ret; } - static inline bool CompareAndSet_NoSync(volatile UInt32 *i, UInt32 c, UInt32 value) + static inline bool CompareAndSet_NoSync(volatile uint32_t *i, uint32_t c, uint32_t value) { - UInt32 ret = 1, dummy, test; + uint32_t ret = 1, dummy, test; asm volatile("1:\n\t" "ldrex %[o],[%[i]]\n\t" @@ -334,9 +341,9 @@ struct AtomicOpsRaw_4ByteImpl : public AtomicOpsRawBase } #elif defined(OVR_CPU_X86) - typedef UInt32 T; + typedef uint32_t T; - static inline UInt32 Exchange_NoSync(volatile UInt32 *i, UInt32 j) + static inline uint32_t Exchange_NoSync(volatile uint32_t *i, uint32_t j) { asm volatile("xchgl %1,%[i]\n" : "+m" (*i), "=q" (j) : [i] "m" (*i), "1" (j) : "cc", "memory"); @@ -344,7 +351,7 @@ struct AtomicOpsRaw_4ByteImpl : public AtomicOpsRawBase return j; } - static inline UInt32 ExchangeAdd_NoSync(volatile UInt32 *i, UInt32 j) + static inline uint32_t ExchangeAdd_NoSync(volatile uint32_t *i, uint32_t j) { asm volatile("lock; xaddl %1,%[i]\n" : "+m" (*i), "+q" (j) : [i] "m" (*i) : "cc", "memory"); @@ -352,9 +359,9 @@ struct AtomicOpsRaw_4ByteImpl : public AtomicOpsRawBase return j; } - static inline bool CompareAndSet_NoSync(volatile UInt32 *i, UInt32 c, UInt32 value) + static inline bool CompareAndSet_NoSync(volatile uint32_t *i, uint32_t c, uint32_t value) { - UInt32 ret; + uint32_t ret; asm volatile("lock; cmpxchgl %[v],%[i]\n" : "+m" (*i), "=a" (ret) : [i] "m" (*i), "1" (c), [v] "q" (value) : "cc", "memory"); @@ -364,7 +371,7 @@ struct AtomicOpsRaw_4ByteImpl : public AtomicOpsRawBase #elif defined(OVR_CC_GNU) && (__GNUC__ >= 4 && __GNUC_MINOR__ >= 1) - typedef UInt32 T; + typedef uint32_t T; static inline T Exchange_NoSync(volatile T *i, T j) { @@ -396,10 +403,10 @@ struct AtomicOpsRaw_8ByteImpl : public AtomicOpsRawBase #if !defined(OVR_64BIT_POINTERS) || !defined(OVR_ENABLE_THREADS) // Provide a type for no-thread-support cases. Used by AtomicOpsRaw_DefImpl. - typedef UInt64 T; + typedef uint64_t T; // *** Thread - Safe OS specific versions. -#elif defined(OVR_OS_WIN32) +#elif defined(OVR_OS_MS) // This is only for 64-bit systems. typedef LONG64 T; @@ -410,11 +417,11 @@ struct AtomicOpsRaw_8ByteImpl : public AtomicOpsRawBase #elif defined(OVR_CPU_PPC64) - typedef UInt64 T; + typedef uint64_t T; - static inline UInt64 Exchange_NoSync(volatile UInt64 *i, UInt64 j) + static inline uint64_t Exchange_NoSync(volatile uint64_t *i, uint64_t j) { - UInt64 dummy, ret; + uint64_t dummy, ret; asm volatile("1:\n\t" "ldarx %[r],0,%[i]\n\t" @@ -426,9 +433,9 @@ struct AtomicOpsRaw_8ByteImpl : public AtomicOpsRawBase return ret; } - static inline UInt64 ExchangeAdd_NoSync(volatile UInt64 *i, UInt64 j) + static inline uint64_t ExchangeAdd_NoSync(volatile uint64_t *i, uint64_t j) { - UInt64 dummy, ret; + uint64_t dummy, ret; asm volatile("1:\n\t" "ldarx %[r],0,%[i]\n\t" @@ -440,9 +447,9 @@ struct AtomicOpsRaw_8ByteImpl : public AtomicOpsRawBase return ret; } - static inline bool CompareAndSet_NoSync(volatile UInt64 *i, UInt64 c, UInt64 value) + static inline bool CompareAndSet_NoSync(volatile uint64_t *i, uint64_t c, uint64_t value) { - UInt64 ret, dummy; + uint64_t ret, dummy; asm volatile("1:\n\t" "ldarx %[r],0,%[i]\n\t" @@ -459,7 +466,7 @@ struct AtomicOpsRaw_8ByteImpl : public AtomicOpsRawBase #elif defined(OVR_CC_GNU) && (__GNUC__ >= 4 && __GNUC_MINOR__ >= 1) - typedef UInt64 T; + typedef uint64_t T; static inline T Exchange_NoSync(volatile T *i, T j) { @@ -533,7 +540,22 @@ struct AtomicOpsRaw_DefImpl : public O #else inline static void Store_Release(volatile O_T* p, O_T val) { O_ReleaseSync sync; OVR_UNUSED(sync); *p = val; } #endif - inline static O_T Load_Acquire(const volatile O_T* p) { O_AcquireSync sync; OVR_UNUSED(sync); return *p; } + inline static O_T Load_Acquire(const volatile O_T* p) + { + O_AcquireSync sync; + OVR_UNUSED(sync); + +#if defined(OVR_CC_MSVC) + _ReadBarrier(); // Compiler fence and load barrier +#elif defined(OVR_CC_INTEL) + __memory_barrier(); // Compiler fence +#else + // GCC-compatible: + asm volatile ("" : : : "memory"); // Compiler fence +#endif + + return *p; + } }; @@ -583,11 +605,15 @@ public: inline static C ExchangeAdd_NoSync(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::ExchangeAdd_NoSync((PT)p, u.t); return u.c; } inline static bool CompareAndSet_Sync(volatile C* p, C c, C val) { C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_Sync((PT)p, cu.t, u.t); } inline static bool CompareAndSet_Release(volatile C* p, C c, C val){ C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_Release((PT)p, cu.t, u.t); } - inline static bool CompareAndSet_Relse(volatile C* p, C c, C val){ C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_Acquire((PT)p, cu.t, u.t); } + inline static bool CompareAndSet_Acquire(volatile C* p, C c, C val){ C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_Acquire((PT)p, cu.t, u.t); } inline static bool CompareAndSet_NoSync(volatile C* p, C c, C val) { C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_NoSync((PT)p, cu.t, u.t); } + // Loads and stores with memory fence. These have only the relevant versions. inline static void Store_Release(volatile C* p, C val) { C2T_union u; u.c = val; Ops::Store_Release((PT)p, u.t); } inline static C Load_Acquire(const volatile C* p) { C2T_union u; u.t = Ops::Load_Acquire((PT)p); return u.c; } + + // Deprecated typo error: + inline static bool CompareAndSet_Relse(volatile C* p, C c, C val){ C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_Acquire((PT)p, cu.t, u.t); } }; @@ -616,7 +642,7 @@ public: inline T Exchange_NoSync(T val) { return Ops::Exchange_NoSync(&Value, val); } inline bool CompareAndSet_Sync(T c, T val) { return Ops::CompareAndSet_Sync(&Value, c, val); } inline bool CompareAndSet_Release(T c, T val) { return Ops::CompareAndSet_Release(&Value, c, val); } - inline bool CompareAndSet_Acquire(T c, T val) { return Ops::CompareAndSet_Relse(&Value, c, val); } + inline bool CompareAndSet_Acquire(T c, T val) { return Ops::CompareAndSet_Acquire(&Value, c, val); } inline bool CompareAndSet_NoSync(T c, T val) { return Ops::CompareAndSet_NoSync(&Value, c, val); } // Load & Store. inline void Store_Release(T val) { Ops::Store_Release(&Value, val); } @@ -787,7 +813,6 @@ public: }; - //----------------------------------------------------------------------------------- // ***** Lock @@ -816,11 +841,11 @@ public: inline void Unlock() { } // Windows. -#elif defined(OVR_OS_WIN32) +#elif defined(OVR_OS_MS) CRITICAL_SECTION cs; public: - Lock(unsigned spinCount = 0); + Lock(unsigned spinCount = 10000); // Mutexes with non-zero spin counts usually result in better performance. ~Lock(); // Locking functions. inline void DoLock() { ::EnterCriticalSection(&cs); } @@ -833,9 +858,9 @@ public: static pthread_mutexattr_t RecursiveAttr; static bool RecursiveAttrInit; - Lock (unsigned dummy = 0) + Lock (unsigned spinCount = 0) // To do: Support spin count, probably via a custom lock implementation. { - OVR_UNUSED(dummy); + OVR_UNUSED(spinCount); if (!RecursiveAttrInit) { pthread_mutexattr_init(&RecursiveAttr); @@ -881,7 +906,7 @@ private: // UseCount and max alignment. volatile int UseCount; - UInt64 Buffer[(sizeof(Lock)+sizeof(UInt64)-1)/sizeof(UInt64)]; + uint64_t Buffer[(sizeof(Lock)+sizeof(uint64_t)-1)/sizeof(uint64_t)]; }; |