diff options
author | Chris Robinson <[email protected]> | 2020-05-02 20:27:05 -0700 |
---|---|---|
committer | Chris Robinson <[email protected]> | 2020-05-02 20:27:05 -0700 |
commit | b52fde7c0e57b4965e3b87d9cc2d98611064b7e5 (patch) | |
tree | 9824a0c08eec955b2dcc1d3475dd0a42399b345e /alc | |
parent | b051f2e33d0146df60332fdea04ed1c6138c9a70 (diff) |
Simplify SIMD linear resampler loop count
Diffstat (limited to 'alc')
-rw-r--r-- | alc/mixer/mixer_neon.cpp | 7 | ||||
-rw-r--r-- | alc/mixer/mixer_sse2.cpp | 7 | ||||
-rw-r--r-- | alc/mixer/mixer_sse41.cpp | 7 |
3 files changed, 9 insertions, 12 deletions
diff --git a/alc/mixer/mixer_neon.cpp b/alc/mixer/mixer_neon.cpp index 83388b68..2cf7e2e8 100644 --- a/alc/mixer/mixer_neon.cpp +++ b/alc/mixer/mixer_neon.cpp @@ -64,8 +64,7 @@ const float *Resample_<LerpTag,NEONTag>(const InterpState*, const float *RESTRIC pos4 = vld1q_s32(reinterpret_cast<int*>(pos_)); auto dst_iter = dst.begin(); - const auto aligned_end = (dst.size()&~3u) + dst_iter; - while(dst_iter != aligned_end) + for(size_t todo{dst.size()>>2};todo;--todo) { const int pos0{vgetq_lane_s32(pos4, 0)}; const int pos1{vgetq_lane_s32(pos4, 1)}; @@ -87,7 +86,7 @@ const float *Resample_<LerpTag,NEONTag>(const InterpState*, const float *RESTRIC frac4 = vandq_s32(frac4, fracMask4); } - if(dst_iter != dst.end()) + if(size_t todo{dst.size()&3}) { src += static_cast<ALuint>(vgetq_lane_s32(pos4, 0)); frac = static_cast<ALuint>(vgetq_lane_s32(frac4, 0)); @@ -98,7 +97,7 @@ const float *Resample_<LerpTag,NEONTag>(const InterpState*, const float *RESTRIC frac += increment; src += frac>>FRACTIONBITS; frac &= FRACTIONMASK; - } while(dst_iter != dst.end()); + } while(--todo); } return dst.data(); } diff --git a/alc/mixer/mixer_sse2.cpp b/alc/mixer/mixer_sse2.cpp index 3558b55e..088284a7 100644 --- a/alc/mixer/mixer_sse2.cpp +++ b/alc/mixer/mixer_sse2.cpp @@ -46,8 +46,7 @@ const float *Resample_<LerpTag,SSE2Tag>(const InterpState*, const float *RESTRIC static_cast<int>(pos_[2]), static_cast<int>(pos_[3]))}; auto dst_iter = dst.begin(); - const auto aligned_end = (dst.size()&~3u) + dst_iter; - while(dst_iter != aligned_end) + for(size_t todo{dst.size()>>2};todo;--todo) { const int pos0{_mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(0, 0, 0, 0)))}; const int pos1{_mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(1, 1, 1, 1)))}; @@ -69,7 +68,7 @@ const float *Resample_<LerpTag,SSE2Tag>(const InterpState*, const float *RESTRIC frac4 = _mm_and_si128(frac4, fracMask4); } - if(dst_iter != dst.end()) + if(size_t todo{dst.size()&3}) { src += static_cast<ALuint>(_mm_cvtsi128_si32(pos4)); frac = static_cast<ALuint>(_mm_cvtsi128_si32(frac4)); @@ -80,7 +79,7 @@ const float *Resample_<LerpTag,SSE2Tag>(const InterpState*, const float *RESTRIC frac += increment; src += frac>>FRACTIONBITS; frac &= FRACTIONMASK; - } while(dst_iter != dst.end()); + } while(--todo); } return dst.data(); } diff --git a/alc/mixer/mixer_sse41.cpp b/alc/mixer/mixer_sse41.cpp index b5aebe7d..f18cd6b4 100644 --- a/alc/mixer/mixer_sse41.cpp +++ b/alc/mixer/mixer_sse41.cpp @@ -47,8 +47,7 @@ const float *Resample_<LerpTag,SSE4Tag>(const InterpState*, const float *RESTRIC static_cast<int>(pos_[2]), static_cast<int>(pos_[3]))}; auto dst_iter = dst.begin(); - const auto aligned_end = (dst.size()&~3u) + dst_iter; - while(dst_iter != aligned_end) + for(size_t todo{dst.size()>>2};todo;--todo) { const int pos0{_mm_extract_epi32(pos4, 0)}; const int pos1{_mm_extract_epi32(pos4, 1)}; @@ -70,7 +69,7 @@ const float *Resample_<LerpTag,SSE4Tag>(const InterpState*, const float *RESTRIC frac4 = _mm_and_si128(frac4, fracMask4); } - if(dst_iter != dst.end()) + if(size_t todo{dst.size()&3}) { /* NOTE: These four elements represent the position *after* the last * four samples, so the lowest element is the next position to @@ -85,7 +84,7 @@ const float *Resample_<LerpTag,SSE4Tag>(const InterpState*, const float *RESTRIC frac += increment; src += frac>>FRACTIONBITS; frac &= FRACTIONMASK; - } while(dst_iter != dst.end()); + } while(--todo); } return dst.data(); } |