From b52fde7c0e57b4965e3b87d9cc2d98611064b7e5 Mon Sep 17 00:00:00 2001 From: Chris Robinson Date: Sat, 2 May 2020 20:27:05 -0700 Subject: Simplify SIMD linear resampler loop count --- alc/mixer/mixer_neon.cpp | 7 +++---- alc/mixer/mixer_sse2.cpp | 7 +++---- alc/mixer/mixer_sse41.cpp | 7 +++---- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/alc/mixer/mixer_neon.cpp b/alc/mixer/mixer_neon.cpp index 83388b68..2cf7e2e8 100644 --- a/alc/mixer/mixer_neon.cpp +++ b/alc/mixer/mixer_neon.cpp @@ -64,8 +64,7 @@ const float *Resample_(const InterpState*, const float *RESTRIC pos4 = vld1q_s32(reinterpret_cast(pos_)); auto dst_iter = dst.begin(); - const auto aligned_end = (dst.size()&~3u) + dst_iter; - while(dst_iter != aligned_end) + for(size_t todo{dst.size()>>2};todo;--todo) { const int pos0{vgetq_lane_s32(pos4, 0)}; const int pos1{vgetq_lane_s32(pos4, 1)}; @@ -87,7 +86,7 @@ const float *Resample_(const InterpState*, const float *RESTRIC frac4 = vandq_s32(frac4, fracMask4); } - if(dst_iter != dst.end()) + if(size_t todo{dst.size()&3}) { src += static_cast(vgetq_lane_s32(pos4, 0)); frac = static_cast(vgetq_lane_s32(frac4, 0)); @@ -98,7 +97,7 @@ const float *Resample_(const InterpState*, const float *RESTRIC frac += increment; src += frac>>FRACTIONBITS; frac &= FRACTIONMASK; - } while(dst_iter != dst.end()); + } while(--todo); } return dst.data(); } diff --git a/alc/mixer/mixer_sse2.cpp b/alc/mixer/mixer_sse2.cpp index 3558b55e..088284a7 100644 --- a/alc/mixer/mixer_sse2.cpp +++ b/alc/mixer/mixer_sse2.cpp @@ -46,8 +46,7 @@ const float *Resample_(const InterpState*, const float *RESTRIC static_cast(pos_[2]), static_cast(pos_[3]))}; auto dst_iter = dst.begin(); - const auto aligned_end = (dst.size()&~3u) + dst_iter; - while(dst_iter != aligned_end) + for(size_t todo{dst.size()>>2};todo;--todo) { const int pos0{_mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(0, 0, 0, 0)))}; const int pos1{_mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(1, 1, 1, 1)))}; @@ -69,7 +68,7 @@ const float *Resample_(const InterpState*, const float *RESTRIC frac4 = _mm_and_si128(frac4, fracMask4); } - if(dst_iter != dst.end()) + if(size_t todo{dst.size()&3}) { src += static_cast(_mm_cvtsi128_si32(pos4)); frac = static_cast(_mm_cvtsi128_si32(frac4)); @@ -80,7 +79,7 @@ const float *Resample_(const InterpState*, const float *RESTRIC frac += increment; src += frac>>FRACTIONBITS; frac &= FRACTIONMASK; - } while(dst_iter != dst.end()); + } while(--todo); } return dst.data(); } diff --git a/alc/mixer/mixer_sse41.cpp b/alc/mixer/mixer_sse41.cpp index b5aebe7d..f18cd6b4 100644 --- a/alc/mixer/mixer_sse41.cpp +++ b/alc/mixer/mixer_sse41.cpp @@ -47,8 +47,7 @@ const float *Resample_(const InterpState*, const float *RESTRIC static_cast(pos_[2]), static_cast(pos_[3]))}; auto dst_iter = dst.begin(); - const auto aligned_end = (dst.size()&~3u) + dst_iter; - while(dst_iter != aligned_end) + for(size_t todo{dst.size()>>2};todo;--todo) { const int pos0{_mm_extract_epi32(pos4, 0)}; const int pos1{_mm_extract_epi32(pos4, 1)}; @@ -70,7 +69,7 @@ const float *Resample_(const InterpState*, const float *RESTRIC frac4 = _mm_and_si128(frac4, fracMask4); } - if(dst_iter != dst.end()) + if(size_t todo{dst.size()&3}) { /* NOTE: These four elements represent the position *after* the last * four samples, so the lowest element is the next position to @@ -85,7 +84,7 @@ const float *Resample_(const InterpState*, const float *RESTRIC frac += increment; src += frac>>FRACTIONBITS; frac &= FRACTIONMASK; - } while(dst_iter != dst.end()); + } while(--todo); } return dst.data(); } -- cgit v1.2.3