diff options
author | Chris Robinson <[email protected]> | 2018-09-03 17:07:43 -0700 |
---|---|---|
committer | Chris Robinson <[email protected]> | 2018-09-03 17:07:43 -0700 |
commit | 0f243b927cde10dab3adcf3fde99bf05a34080cc (patch) | |
tree | 31f262073091d21cd162cbf4c0779a637157dbf7 | |
parent | fce86815f494b981f3cbbba290f5399c9907dee7 (diff) |
Slightly restructure some loops
-rw-r--r-- | Alc/mixer/mixer_neon.c | 35 | ||||
-rw-r--r-- | Alc/mixer/mixer_sse2.c | 19 | ||||
-rw-r--r-- | Alc/mixer/mixer_sse41.c | 19 |
3 files changed, 34 insertions, 39 deletions
diff --git a/Alc/mixer/mixer_neon.c b/Alc/mixer/mixer_neon.c index 4feb431d..db61fd41 100644 --- a/Alc/mixer/mixer_neon.c +++ b/Alc/mixer/mixer_neon.c @@ -19,18 +19,17 @@ const ALfloat *Resample_lerp_Neon(const InterpState* UNUSED(state), const int32x4_t fracMask4 = vdupq_n_s32(FRACTIONMASK); alignas(16) ALint pos_[4]; alignas(16) ALsizei frac_[4]; - int32x4_t pos4; - int32x4_t frac4; - ALsizei i; + int32x4_t pos4, frac4; + ALsizei todo, pos, i; ASSUME(numsamples > 0); InitiatePositionArrays(frac, increment, frac_, pos_, 4); - frac4 = vld1q_s32(frac_); pos4 = vld1q_s32(pos_); - for(i = 0;numsamples-i > 3;i += 4) + todo = numsamples & ~3; + for(i = 0;i < todo;i += 4) { const int pos0 = vgetq_lane_s32(pos4, 0); const int pos1 = vgetq_lane_s32(pos4, 1); @@ -51,21 +50,19 @@ const ALfloat *Resample_lerp_Neon(const InterpState* UNUSED(state), frac4 = vandq_s32(frac4, fracMask4); } - if(i < numsamples) + /* NOTE: These four elements represent the position *after* the last four + * samples, so the lowest element is the next position to resample. + */ + pos = vgetq_lane_s32(pos4, 0); + frac = vgetq_lane_s32(frac4, 0); + + for(;i < numsamples;++i) { - /* NOTE: These four elements represent the position *after* the last - * four samples, so the lowest element is the next position to - * resample. - */ - int pos = vgetq_lane_s32(pos4, 0); - frac = vgetq_lane_s32(frac4, 0); - do { - dst[i] = lerp(src[pos], src[pos+1], frac * (1.0f/FRACTIONONE)); - - frac += increment; - pos += frac>>FRACTIONBITS; - frac &= FRACTIONMASK; - } while(++i < numsamples); + dst[i] = lerp(src[pos], src[pos+1], frac * (1.0f/FRACTIONONE)); + + frac += increment; + pos += frac>>FRACTIONBITS; + frac &= FRACTIONMASK; } return dst; } diff --git a/Alc/mixer/mixer_sse2.c b/Alc/mixer/mixer_sse2.c index 83aaf7f2..629e0ec7 100644 --- a/Alc/mixer/mixer_sse2.c +++ b/Alc/mixer/mixer_sse2.c @@ -34,20 +34,19 @@ const ALfloat *Resample_lerp_SSE2(const InterpState* UNUSED(state), const __m128i increment4 = _mm_set1_epi32(increment*4); const __m128 fracOne4 = _mm_set1_ps(1.0f/FRACTIONONE); const __m128i fracMask4 = _mm_set1_epi32(FRACTIONMASK); - union { alignas(16) ALint i[4]; float f[4]; } pos_; - union { alignas(16) ALsizei i[4]; float f[4]; } frac_; + ALint pos_[4]; + ALsizei frac_[4]; __m128i frac4, pos4; - ALint pos; - ALsizei i; + ALsizei todo, pos, i; ASSUME(numsamples > 0); - InitiatePositionArrays(frac, increment, frac_.i, pos_.i, 4); + InitiatePositionArrays(frac, increment, frac_, pos_, 4); + frac4 = _mm_setr_epi32(frac_[0], frac_[1], frac_[2], frac_[3]); + pos4 = _mm_setr_epi32(pos_[0], pos_[1], pos_[2], pos_[3]); - frac4 = _mm_castps_si128(_mm_load_ps(frac_.f)); - pos4 = _mm_castps_si128(_mm_load_ps(pos_.f)); - - for(i = 0;numsamples-i > 3;i += 4) + todo = numsamples & ~3; + for(i = 0;i < todo;i += 4) { const int pos0 = _mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(0, 0, 0, 0))); const int pos1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(1, 1, 1, 1))); @@ -74,7 +73,7 @@ const ALfloat *Resample_lerp_SSE2(const InterpState* UNUSED(state), pos = _mm_cvtsi128_si32(pos4); frac = _mm_cvtsi128_si32(frac4); - for(;i < numsamples;i++) + for(;i < numsamples;++i) { dst[i] = lerp(src[pos], src[pos+1], frac * (1.0f/FRACTIONONE)); diff --git a/Alc/mixer/mixer_sse41.c b/Alc/mixer/mixer_sse41.c index 396b7c7c..128acadf 100644 --- a/Alc/mixer/mixer_sse41.c +++ b/Alc/mixer/mixer_sse41.c @@ -35,20 +35,19 @@ const ALfloat *Resample_lerp_SSE41(const InterpState* UNUSED(state), const __m128i increment4 = _mm_set1_epi32(increment*4); const __m128 fracOne4 = _mm_set1_ps(1.0f/FRACTIONONE); const __m128i fracMask4 = _mm_set1_epi32(FRACTIONMASK); - union { alignas(16) ALint i[4]; float f[4]; } pos_; - union { alignas(16) ALsizei i[4]; float f[4]; } frac_; + ALint pos_[4]; + ALsizei frac_[4]; __m128i frac4, pos4; - ALint pos; - ALsizei i; + ALsizei todo, pos, i; ASSUME(numsamples > 0); - InitiatePositionArrays(frac, increment, frac_.i, pos_.i, 4); + InitiatePositionArrays(frac, increment, frac_, pos_, 4); + frac4 = _mm_setr_epi32(frac_[0], frac_[1], frac_[2], frac_[3]); + pos4 = _mm_setr_epi32(pos_[0], pos_[1], pos_[2], pos_[3]); - frac4 = _mm_castps_si128(_mm_load_ps(frac_.f)); - pos4 = _mm_castps_si128(_mm_load_ps(pos_.f)); - - for(i = 0;numsamples-i > 3;i += 4) + todo = numsamples & ~3; + for(i = 0;i < todo;i += 4) { const int pos0 = _mm_extract_epi32(pos4, 0); const int pos1 = _mm_extract_epi32(pos4, 1); @@ -75,7 +74,7 @@ const ALfloat *Resample_lerp_SSE41(const InterpState* UNUSED(state), pos = _mm_cvtsi128_si32(pos4); frac = _mm_cvtsi128_si32(frac4); - for(;i < numsamples;i++) + for(;i < numsamples;++i) { dst[i] = lerp(src[pos], src[pos+1], frac * (1.0f/FRACTIONONE)); |