aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Alc/mixer/mixer_neon.c35
-rw-r--r--Alc/mixer/mixer_sse2.c19
-rw-r--r--Alc/mixer/mixer_sse41.c19
3 files changed, 34 insertions, 39 deletions
diff --git a/Alc/mixer/mixer_neon.c b/Alc/mixer/mixer_neon.c
index 4feb431d..db61fd41 100644
--- a/Alc/mixer/mixer_neon.c
+++ b/Alc/mixer/mixer_neon.c
@@ -19,18 +19,17 @@ const ALfloat *Resample_lerp_Neon(const InterpState* UNUSED(state),
const int32x4_t fracMask4 = vdupq_n_s32(FRACTIONMASK);
alignas(16) ALint pos_[4];
alignas(16) ALsizei frac_[4];
- int32x4_t pos4;
- int32x4_t frac4;
- ALsizei i;
+ int32x4_t pos4, frac4;
+ ALsizei todo, pos, i;
ASSUME(numsamples > 0);
InitiatePositionArrays(frac, increment, frac_, pos_, 4);
-
frac4 = vld1q_s32(frac_);
pos4 = vld1q_s32(pos_);
- for(i = 0;numsamples-i > 3;i += 4)
+ todo = numsamples & ~3;
+ for(i = 0;i < todo;i += 4)
{
const int pos0 = vgetq_lane_s32(pos4, 0);
const int pos1 = vgetq_lane_s32(pos4, 1);
@@ -51,21 +50,19 @@ const ALfloat *Resample_lerp_Neon(const InterpState* UNUSED(state),
frac4 = vandq_s32(frac4, fracMask4);
}
- if(i < numsamples)
+ /* NOTE: These four elements represent the position *after* the last four
+ * samples, so the lowest element is the next position to resample.
+ */
+ pos = vgetq_lane_s32(pos4, 0);
+ frac = vgetq_lane_s32(frac4, 0);
+
+ for(;i < numsamples;++i)
{
- /* NOTE: These four elements represent the position *after* the last
- * four samples, so the lowest element is the next position to
- * resample.
- */
- int pos = vgetq_lane_s32(pos4, 0);
- frac = vgetq_lane_s32(frac4, 0);
- do {
- dst[i] = lerp(src[pos], src[pos+1], frac * (1.0f/FRACTIONONE));
-
- frac += increment;
- pos += frac>>FRACTIONBITS;
- frac &= FRACTIONMASK;
- } while(++i < numsamples);
+ dst[i] = lerp(src[pos], src[pos+1], frac * (1.0f/FRACTIONONE));
+
+ frac += increment;
+ pos += frac>>FRACTIONBITS;
+ frac &= FRACTIONMASK;
}
return dst;
}
diff --git a/Alc/mixer/mixer_sse2.c b/Alc/mixer/mixer_sse2.c
index 83aaf7f2..629e0ec7 100644
--- a/Alc/mixer/mixer_sse2.c
+++ b/Alc/mixer/mixer_sse2.c
@@ -34,20 +34,19 @@ const ALfloat *Resample_lerp_SSE2(const InterpState* UNUSED(state),
const __m128i increment4 = _mm_set1_epi32(increment*4);
const __m128 fracOne4 = _mm_set1_ps(1.0f/FRACTIONONE);
const __m128i fracMask4 = _mm_set1_epi32(FRACTIONMASK);
- union { alignas(16) ALint i[4]; float f[4]; } pos_;
- union { alignas(16) ALsizei i[4]; float f[4]; } frac_;
+ ALint pos_[4];
+ ALsizei frac_[4];
__m128i frac4, pos4;
- ALint pos;
- ALsizei i;
+ ALsizei todo, pos, i;
ASSUME(numsamples > 0);
- InitiatePositionArrays(frac, increment, frac_.i, pos_.i, 4);
+ InitiatePositionArrays(frac, increment, frac_, pos_, 4);
+ frac4 = _mm_setr_epi32(frac_[0], frac_[1], frac_[2], frac_[3]);
+ pos4 = _mm_setr_epi32(pos_[0], pos_[1], pos_[2], pos_[3]);
- frac4 = _mm_castps_si128(_mm_load_ps(frac_.f));
- pos4 = _mm_castps_si128(_mm_load_ps(pos_.f));
-
- for(i = 0;numsamples-i > 3;i += 4)
+ todo = numsamples & ~3;
+ for(i = 0;i < todo;i += 4)
{
const int pos0 = _mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(0, 0, 0, 0)));
const int pos1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(1, 1, 1, 1)));
@@ -74,7 +73,7 @@ const ALfloat *Resample_lerp_SSE2(const InterpState* UNUSED(state),
pos = _mm_cvtsi128_si32(pos4);
frac = _mm_cvtsi128_si32(frac4);
- for(;i < numsamples;i++)
+ for(;i < numsamples;++i)
{
dst[i] = lerp(src[pos], src[pos+1], frac * (1.0f/FRACTIONONE));
diff --git a/Alc/mixer/mixer_sse41.c b/Alc/mixer/mixer_sse41.c
index 396b7c7c..128acadf 100644
--- a/Alc/mixer/mixer_sse41.c
+++ b/Alc/mixer/mixer_sse41.c
@@ -35,20 +35,19 @@ const ALfloat *Resample_lerp_SSE41(const InterpState* UNUSED(state),
const __m128i increment4 = _mm_set1_epi32(increment*4);
const __m128 fracOne4 = _mm_set1_ps(1.0f/FRACTIONONE);
const __m128i fracMask4 = _mm_set1_epi32(FRACTIONMASK);
- union { alignas(16) ALint i[4]; float f[4]; } pos_;
- union { alignas(16) ALsizei i[4]; float f[4]; } frac_;
+ ALint pos_[4];
+ ALsizei frac_[4];
__m128i frac4, pos4;
- ALint pos;
- ALsizei i;
+ ALsizei todo, pos, i;
ASSUME(numsamples > 0);
- InitiatePositionArrays(frac, increment, frac_.i, pos_.i, 4);
+ InitiatePositionArrays(frac, increment, frac_, pos_, 4);
+ frac4 = _mm_setr_epi32(frac_[0], frac_[1], frac_[2], frac_[3]);
+ pos4 = _mm_setr_epi32(pos_[0], pos_[1], pos_[2], pos_[3]);
- frac4 = _mm_castps_si128(_mm_load_ps(frac_.f));
- pos4 = _mm_castps_si128(_mm_load_ps(pos_.f));
-
- for(i = 0;numsamples-i > 3;i += 4)
+ todo = numsamples & ~3;
+ for(i = 0;i < todo;i += 4)
{
const int pos0 = _mm_extract_epi32(pos4, 0);
const int pos1 = _mm_extract_epi32(pos4, 1);
@@ -75,7 +74,7 @@ const ALfloat *Resample_lerp_SSE41(const InterpState* UNUSED(state),
pos = _mm_cvtsi128_si32(pos4);
frac = _mm_cvtsi128_si32(frac4);
- for(;i < numsamples;i++)
+ for(;i < numsamples;++i)
{
dst[i] = lerp(src[pos], src[pos+1], frac * (1.0f/FRACTIONONE));