diff options
author | Chris Robinson <[email protected]> | 2023-12-03 14:18:32 -0800 |
---|---|---|
committer | Chris Robinson <[email protected]> | 2023-12-03 14:23:31 -0800 |
commit | 2c27d8bc756fd4b134aa16ef9901734e1509062b (patch) | |
tree | e3d982f434cf30ef3a802424c44180ee165cb2c1 /alc | |
parent | e6bb91212be93b0b7e4c99c1409f91dd8e211688 (diff) |
Make the device clock members atomic
Even though they're protected by a SeqLock of sorts, it's still UB to read and
write non-atomic vars from different threads. It's fine to do relaxed reads and
writes given the lock though, to help alleviate the cost.
Diffstat (limited to 'alc')
-rw-r--r-- | alc/alc.cpp | 20 | ||||
-rw-r--r-- | alc/alu.cpp | 29 | ||||
-rw-r--r-- | alc/backends/base.h | 10 |
3 files changed, 36 insertions, 23 deletions
diff --git a/alc/alc.cpp b/alc/alc.cpp index 6017e743..ab4cc7ba 100644 --- a/alc/alc.cpp +++ b/alc/alc.cpp @@ -976,10 +976,18 @@ std::unique_ptr<Compressor> CreateDeviceLimiter(const ALCdevice *device, const f */ inline void UpdateClockBase(ALCdevice *device) { - IncrementRef(device->MixCount); - device->ClockBase += nanoseconds{seconds{device->SamplesDone}} / device->Frequency; - device->SamplesDone = 0; - IncrementRef(device->MixCount); + const auto mixCount = device->MixCount.load(std::memory_order_relaxed); + device->MixCount.store(mixCount+1, std::memory_order_relaxed); + std::atomic_thread_fence(std::memory_order_release); + + auto samplesDone = device->mSamplesDone.load(std::memory_order_relaxed); + auto clockBase = device->mClockBase.load(std::memory_order_relaxed); + + clockBase += nanoseconds{seconds{samplesDone}} / device->Frequency; + device->mClockBase.store(clockBase, std::memory_order_relaxed); + device->mSamplesDone.store(0, std::memory_order_relaxed); + + device->MixCount.store(mixCount+2, std::memory_order_release); } /** @@ -2504,8 +2512,8 @@ ALC_API void ALC_APIENTRY alcGetInteger64vSOFT(ALCdevice *device, ALCenum pname, nanoseconds basecount; do { refcount = dev->waitForMix(); - basecount = dev->ClockBase; - samplecount = dev->SamplesDone; + basecount = dev->mClockBase.load(std::memory_order_relaxed); + samplecount = dev->mSamplesDone.load(std::memory_order_relaxed); } while(refcount != ReadRef(dev->MixCount)); basecount += nanoseconds{seconds{samplecount}} / dev->Frequency; *values = basecount.count(); diff --git a/alc/alu.cpp b/alc/alu.cpp index e0858b18..23518fa9 100644 --- a/alc/alu.cpp +++ b/alc/alu.cpp @@ -1910,8 +1910,9 @@ void ProcessContexts(DeviceBase *device, const uint SamplesToDo) { ASSUME(SamplesToDo > 0); - const nanoseconds curtime{device->ClockBase + - nanoseconds{seconds{device->SamplesDone}}/device->Frequency}; + const nanoseconds curtime{device->mClockBase.load(std::memory_order_relaxed) + + nanoseconds{seconds{device->mSamplesDone.load(std::memory_order_relaxed)}}/ + device->Frequency}; for(ContextBase *ctx : *device->mContexts.load(std::memory_order_acquire)) { @@ -2135,7 +2136,9 @@ uint DeviceBase::renderSamples(const uint numSamples) buffer.fill(0.0f); /* Increment the mix count at the start (lsb should now be 1). */ - IncrementRef(MixCount); + const auto mixCount = MixCount.load(std::memory_order_relaxed); + MixCount.store(mixCount+1, std::memory_order_relaxed); + std::atomic_thread_fence(std::memory_order_release); /* Process and mix each context's sources and effects. */ ProcessContexts(this, samplesToDo); @@ -2144,12 +2147,16 @@ uint DeviceBase::renderSamples(const uint numSamples) * and added to clock base so that large sample counts don't overflow * during conversion. This also guarantees a stable conversion. */ - SamplesDone += samplesToDo; - ClockBase += std::chrono::seconds{SamplesDone / Frequency}; - SamplesDone %= Frequency; + { + auto samplesDone = mSamplesDone.load(std::memory_order_relaxed) + samplesToDo; + auto clockBase = mClockBase.load(std::memory_order_relaxed) + + std::chrono::seconds{samplesDone/Frequency}; + mSamplesDone.store(samplesDone%Frequency, std::memory_order_relaxed); + mClockBase.store(clockBase, std::memory_order_relaxed); + } /* Increment the mix count at the end (lsb should now be 0). */ - IncrementRef(MixCount); + MixCount.store(mixCount+2, std::memory_order_release); /* Apply any needed post-process for finalizing the Dry mix to the RealOut * (Ambisonic decode, UHJ encode, etc). @@ -2225,7 +2232,10 @@ void DeviceBase::renderSamples(void *outBuffer, const uint numSamples, const siz void DeviceBase::handleDisconnect(const char *msg, ...) { - IncrementRef(MixCount); + const auto mixCount = MixCount.load(std::memory_order_relaxed); + MixCount.store(mixCount+1, std::memory_order_relaxed); + std::atomic_thread_fence(std::memory_order_release); + if(Connected.exchange(false, std::memory_order_acq_rel)) { AsyncEvent evt{std::in_place_type<AsyncDisconnectEvent>}; @@ -2267,5 +2277,6 @@ void DeviceBase::handleDisconnect(const char *msg, ...) std::for_each(voicelist.begin(), voicelist.end(), stop_voice); } } - IncrementRef(MixCount); + + MixCount.store(mixCount+2, std::memory_order_release); } diff --git a/alc/backends/base.h b/alc/backends/base.h index e1f53405..f38c1d45 100644 --- a/alc/backends/base.h +++ b/alc/backends/base.h @@ -55,14 +55,8 @@ enum class BackendType { /* Helper to get the current clock time from the device's ClockBase, and * SamplesDone converted from the sample rate. */ -inline std::chrono::nanoseconds GetDeviceClockTime(DeviceBase *device) -{ - using std::chrono::seconds; - using std::chrono::nanoseconds; - - auto ns = nanoseconds{seconds{device->SamplesDone}} / device->Frequency; - return device->ClockBase + ns; -} +inline std::chrono::nanoseconds GetDeviceClockTime(const DeviceBase *device) noexcept +{ return device->getClockTime(); } /* Helper to get the device latency from the backend, including any fixed * latency from post-processing. |