aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Robinson <[email protected]>2020-05-02 20:27:05 -0700
committerChris Robinson <[email protected]>2020-05-02 20:27:05 -0700
commitb52fde7c0e57b4965e3b87d9cc2d98611064b7e5 (patch)
tree9824a0c08eec955b2dcc1d3475dd0a42399b345e
parentb051f2e33d0146df60332fdea04ed1c6138c9a70 (diff)
Simplify SIMD linear resampler loop count
-rw-r--r--alc/mixer/mixer_neon.cpp7
-rw-r--r--alc/mixer/mixer_sse2.cpp7
-rw-r--r--alc/mixer/mixer_sse41.cpp7
3 files changed, 9 insertions, 12 deletions
diff --git a/alc/mixer/mixer_neon.cpp b/alc/mixer/mixer_neon.cpp
index 83388b68..2cf7e2e8 100644
--- a/alc/mixer/mixer_neon.cpp
+++ b/alc/mixer/mixer_neon.cpp
@@ -64,8 +64,7 @@ const float *Resample_<LerpTag,NEONTag>(const InterpState*, const float *RESTRIC
pos4 = vld1q_s32(reinterpret_cast<int*>(pos_));
auto dst_iter = dst.begin();
- const auto aligned_end = (dst.size()&~3u) + dst_iter;
- while(dst_iter != aligned_end)
+ for(size_t todo{dst.size()>>2};todo;--todo)
{
const int pos0{vgetq_lane_s32(pos4, 0)};
const int pos1{vgetq_lane_s32(pos4, 1)};
@@ -87,7 +86,7 @@ const float *Resample_<LerpTag,NEONTag>(const InterpState*, const float *RESTRIC
frac4 = vandq_s32(frac4, fracMask4);
}
- if(dst_iter != dst.end())
+ if(size_t todo{dst.size()&3})
{
src += static_cast<ALuint>(vgetq_lane_s32(pos4, 0));
frac = static_cast<ALuint>(vgetq_lane_s32(frac4, 0));
@@ -98,7 +97,7 @@ const float *Resample_<LerpTag,NEONTag>(const InterpState*, const float *RESTRIC
frac += increment;
src += frac>>FRACTIONBITS;
frac &= FRACTIONMASK;
- } while(dst_iter != dst.end());
+ } while(--todo);
}
return dst.data();
}
diff --git a/alc/mixer/mixer_sse2.cpp b/alc/mixer/mixer_sse2.cpp
index 3558b55e..088284a7 100644
--- a/alc/mixer/mixer_sse2.cpp
+++ b/alc/mixer/mixer_sse2.cpp
@@ -46,8 +46,7 @@ const float *Resample_<LerpTag,SSE2Tag>(const InterpState*, const float *RESTRIC
static_cast<int>(pos_[2]), static_cast<int>(pos_[3]))};
auto dst_iter = dst.begin();
- const auto aligned_end = (dst.size()&~3u) + dst_iter;
- while(dst_iter != aligned_end)
+ for(size_t todo{dst.size()>>2};todo;--todo)
{
const int pos0{_mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(0, 0, 0, 0)))};
const int pos1{_mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(1, 1, 1, 1)))};
@@ -69,7 +68,7 @@ const float *Resample_<LerpTag,SSE2Tag>(const InterpState*, const float *RESTRIC
frac4 = _mm_and_si128(frac4, fracMask4);
}
- if(dst_iter != dst.end())
+ if(size_t todo{dst.size()&3})
{
src += static_cast<ALuint>(_mm_cvtsi128_si32(pos4));
frac = static_cast<ALuint>(_mm_cvtsi128_si32(frac4));
@@ -80,7 +79,7 @@ const float *Resample_<LerpTag,SSE2Tag>(const InterpState*, const float *RESTRIC
frac += increment;
src += frac>>FRACTIONBITS;
frac &= FRACTIONMASK;
- } while(dst_iter != dst.end());
+ } while(--todo);
}
return dst.data();
}
diff --git a/alc/mixer/mixer_sse41.cpp b/alc/mixer/mixer_sse41.cpp
index b5aebe7d..f18cd6b4 100644
--- a/alc/mixer/mixer_sse41.cpp
+++ b/alc/mixer/mixer_sse41.cpp
@@ -47,8 +47,7 @@ const float *Resample_<LerpTag,SSE4Tag>(const InterpState*, const float *RESTRIC
static_cast<int>(pos_[2]), static_cast<int>(pos_[3]))};
auto dst_iter = dst.begin();
- const auto aligned_end = (dst.size()&~3u) + dst_iter;
- while(dst_iter != aligned_end)
+ for(size_t todo{dst.size()>>2};todo;--todo)
{
const int pos0{_mm_extract_epi32(pos4, 0)};
const int pos1{_mm_extract_epi32(pos4, 1)};
@@ -70,7 +69,7 @@ const float *Resample_<LerpTag,SSE4Tag>(const InterpState*, const float *RESTRIC
frac4 = _mm_and_si128(frac4, fracMask4);
}
- if(dst_iter != dst.end())
+ if(size_t todo{dst.size()&3})
{
/* NOTE: These four elements represent the position *after* the last
* four samples, so the lowest element is the next position to
@@ -85,7 +84,7 @@ const float *Resample_<LerpTag,SSE4Tag>(const InterpState*, const float *RESTRIC
frac += increment;
src += frac>>FRACTIONBITS;
frac &= FRACTIONMASK;
- } while(dst_iter != dst.end());
+ } while(--todo);
}
return dst.data();
}