Make the device clock members atomic

Even though they're protected by a SeqLock of sorts, it's still UB to read and write non-atomic vars from different threads. It's fine to do relaxed reads and writes given the lock though, to help alleviate the cost.
author: Chris Robinson <[email protected]> 2023-12-03 14:18:32 -0800
committer: Chris Robinson <[email protected]> 2023-12-03 14:23:31 -0800
commit: 2c27d8bc756fd4b134aa16ef9901734e1509062b (patch)
tree: e3d982f434cf30ef3a802424c44180ee165cb2c1 /alc
parent: e6bb91212be93b0b7e4c99c1409f91dd8e211688 (diff)
3 files changed, 36 insertions, 23 deletions
diff --git a/alc/alc.cpp b/alc/alc.cpp
index 6017e743..ab4cc7ba 100644
--- a/alc/alc.cpp
+++ b/alc/alc.cpp
@@ -976,10 +976,18 @@ std::unique_ptr<Compressor> CreateDeviceLimiter(const ALCdevice *device, const f
  */
 inline void UpdateClockBase(ALCdevice *device)
 {
-    IncrementRef(device->MixCount);
-    device->ClockBase += nanoseconds{seconds{device->SamplesDone}} / device->Frequency;
-    device->SamplesDone = 0;
-    IncrementRef(device->MixCount);
+    const auto mixCount = device->MixCount.load(std::memory_order_relaxed);
+    device->MixCount.store(mixCount+1, std::memory_order_relaxed);
+    std::atomic_thread_fence(std::memory_order_release);
+
+    auto samplesDone = device->mSamplesDone.load(std::memory_order_relaxed);
+    auto clockBase = device->mClockBase.load(std::memory_order_relaxed);
+
+    clockBase += nanoseconds{seconds{samplesDone}} / device->Frequency;
+    device->mClockBase.store(clockBase, std::memory_order_relaxed);
+    device->mSamplesDone.store(0, std::memory_order_relaxed);
+
+    device->MixCount.store(mixCount+2, std::memory_order_release);
 }
 
 /**
@@ -2504,8 +2512,8 @@ ALC_API void ALC_APIENTRY alcGetInteger64vSOFT(ALCdevice *device, ALCenum pname,
             nanoseconds basecount;
             do {
                 refcount = dev->waitForMix();
-                basecount = dev->ClockBase;
-                samplecount = dev->SamplesDone;
+                basecount = dev->mClockBase.load(std::memory_order_relaxed);
+                samplecount = dev->mSamplesDone.load(std::memory_order_relaxed);
             } while(refcount != ReadRef(dev->MixCount));
             basecount += nanoseconds{seconds{samplecount}} / dev->Frequency;
             *values = basecount.count();
diff --git a/alc/alu.cpp b/alc/alu.cpp
index e0858b18..23518fa9 100644
--- a/alc/alu.cpp
+++ b/alc/alu.cpp
@@ -1910,8 +1910,9 @@ void ProcessContexts(DeviceBase *device, const uint SamplesToDo)
 {
     ASSUME(SamplesToDo > 0);
 
-    const nanoseconds curtime{device->ClockBase +
-        nanoseconds{seconds{device->SamplesDone}}/device->Frequency};
+    const nanoseconds curtime{device->mClockBase.load(std::memory_order_relaxed) +
+        nanoseconds{seconds{device->mSamplesDone.load(std::memory_order_relaxed)}}/
+        device->Frequency};
 
     for(ContextBase *ctx : *device->mContexts.load(std::memory_order_acquire))
     {
@@ -2135,7 +2136,9 @@ uint DeviceBase::renderSamples(const uint numSamples)
         buffer.fill(0.0f);
 
     /* Increment the mix count at the start (lsb should now be 1). */
-    IncrementRef(MixCount);
+    const auto mixCount = MixCount.load(std::memory_order_relaxed);
+    MixCount.store(mixCount+1, std::memory_order_relaxed);
+    std::atomic_thread_fence(std::memory_order_release);
 
     /* Process and mix each context's sources and effects. */
     ProcessContexts(this, samplesToDo);
@@ -2144,12 +2147,16 @@ uint DeviceBase::renderSamples(const uint numSamples)
      * and added to clock base so that large sample counts don't overflow
      * during conversion. This also guarantees a stable conversion.
      */
-    SamplesDone += samplesToDo;
-    ClockBase += std::chrono::seconds{SamplesDone / Frequency};
-    SamplesDone %= Frequency;
+    {
+        auto samplesDone = mSamplesDone.load(std::memory_order_relaxed) + samplesToDo;
+        auto clockBase = mClockBase.load(std::memory_order_relaxed) +
+            std::chrono::seconds{samplesDone/Frequency};
+        mSamplesDone.store(samplesDone%Frequency, std::memory_order_relaxed);
+        mClockBase.store(clockBase, std::memory_order_relaxed);
+    }
 
     /* Increment the mix count at the end (lsb should now be 0). */
-    IncrementRef(MixCount);
+    MixCount.store(mixCount+2, std::memory_order_release);
 
     /* Apply any needed post-process for finalizing the Dry mix to the RealOut
      * (Ambisonic decode, UHJ encode, etc).
@@ -2225,7 +2232,10 @@ void DeviceBase::renderSamples(void *outBuffer, const uint numSamples, const siz
 
 void DeviceBase::handleDisconnect(const char *msg, ...)
 {
-    IncrementRef(MixCount);
+    const auto mixCount = MixCount.load(std::memory_order_relaxed);
+    MixCount.store(mixCount+1, std::memory_order_relaxed);
+    std::atomic_thread_fence(std::memory_order_release);
+
     if(Connected.exchange(false, std::memory_order_acq_rel))
     {
         AsyncEvent evt{std::in_place_type<AsyncDisconnectEvent>};
@@ -2267,5 +2277,6 @@ void DeviceBase::handleDisconnect(const char *msg, ...)
             std::for_each(voicelist.begin(), voicelist.end(), stop_voice);
         }
     }
-    IncrementRef(MixCount);
+
+    MixCount.store(mixCount+2, std::memory_order_release);
 }
diff --git a/alc/backends/base.h b/alc/backends/base.h
index e1f53405..f38c1d45 100644
--- a/alc/backends/base.h
+++ b/alc/backends/base.h
@@ -55,14 +55,8 @@ enum class BackendType {
 /* Helper to get the current clock time from the device's ClockBase, and
  * SamplesDone converted from the sample rate.
  */
-inline std::chrono::nanoseconds GetDeviceClockTime(DeviceBase *device)
-{
-    using std::chrono::seconds;
-    using std::chrono::nanoseconds;
-
-    auto ns = nanoseconds{seconds{device->SamplesDone}} / device->Frequency;
-    return device->ClockBase + ns;
-}
+inline std::chrono::nanoseconds GetDeviceClockTime(const DeviceBase *device) noexcept
+{ return device->getClockTime(); }
 
 /* Helper to get the device latency from the backend, including any fixed
  * latency from post-processing.
author	Chris Robinson <[email protected]>	2023-12-03 14:18:32 -0800
committer	Chris Robinson <[email protected]>	2023-12-03 14:23:31 -0800
commit	2c27d8bc756fd4b134aa16ef9901734e1509062b (patch)
tree	e3d982f434cf30ef3a802424c44180ee165cb2c1 /alc
parent	e6bb91212be93b0b7e4c99c1409f91dd8e211688 (diff)