aboutsummaryrefslogtreecommitdiffstats
path: root/Alc/mixer
diff options
context:
space:
mode:
authorChris Robinson <[email protected]>2018-09-03 12:18:06 -0700
committerChris Robinson <[email protected]>2018-09-03 12:42:31 -0700
commitfce86815f494b981f3cbbba290f5399c9907dee7 (patch)
tree3a83696340c7abcfbec0c041a8ad899759b90b14 /Alc/mixer
parent3b4f28d173d21bad20f28ec41f594f88459596dd (diff)
Extract SIMD values right before using them
Diffstat (limited to 'Alc/mixer')
-rw-r--r--Alc/mixer/mixer_neon.c12
-rw-r--r--Alc/mixer/mixer_sse2.c15
-rw-r--r--Alc/mixer/mixer_sse41.c15
3 files changed, 21 insertions, 21 deletions
diff --git a/Alc/mixer/mixer_neon.c b/Alc/mixer/mixer_neon.c
index 9aa279a2..4feb431d 100644
--- a/Alc/mixer/mixer_neon.c
+++ b/Alc/mixer/mixer_neon.c
@@ -32,8 +32,12 @@ const ALfloat *Resample_lerp_Neon(const InterpState* UNUSED(state),
for(i = 0;numsamples-i > 3;i += 4)
{
- const float32x4_t val1 = (float32x4_t){src[pos_[0]], src[pos_[1]], src[pos_[2]], src[pos_[3]]};
- const float32x4_t val2 = (float32x4_t){src[pos_[0]+1], src[pos_[1]+1], src[pos_[2]+1], src[pos_[3]+1]};
+ const int pos0 = vgetq_lane_s32(pos4, 0);
+ const int pos1 = vgetq_lane_s32(pos4, 1);
+ const int pos2 = vgetq_lane_s32(pos4, 2);
+ const int pos3 = vgetq_lane_s32(pos4, 3);
+ const float32x4_t val1 = (float32x4_t){src[pos0], src[pos1], src[pos2], src[pos3]};
+ const float32x4_t val2 = (float32x4_t){src[pos0+1], src[pos1+1], src[pos2+1], src[pos3+1]};
/* val1 + (val2-val1)*mu */
const float32x4_t r0 = vsubq_f32(val2, val1);
@@ -45,8 +49,6 @@ const ALfloat *Resample_lerp_Neon(const InterpState* UNUSED(state),
frac4 = vaddq_s32(frac4, increment4);
pos4 = vaddq_s32(pos4, vshrq_n_s32(frac4, FRACTIONBITS));
frac4 = vandq_s32(frac4, fracMask4);
-
- vst1q_s32(pos_, pos4);
}
if(i < numsamples)
@@ -55,7 +57,7 @@ const ALfloat *Resample_lerp_Neon(const InterpState* UNUSED(state),
* four samples, so the lowest element is the next position to
* resample.
*/
- ALint pos = pos_[0];
+ int pos = vgetq_lane_s32(pos4, 0);
frac = vgetq_lane_s32(frac4, 0);
do {
dst[i] = lerp(src[pos], src[pos+1], frac * (1.0f/FRACTIONONE));
diff --git a/Alc/mixer/mixer_sse2.c b/Alc/mixer/mixer_sse2.c
index e0198022..83aaf7f2 100644
--- a/Alc/mixer/mixer_sse2.c
+++ b/Alc/mixer/mixer_sse2.c
@@ -49,8 +49,12 @@ const ALfloat *Resample_lerp_SSE2(const InterpState* UNUSED(state),
for(i = 0;numsamples-i > 3;i += 4)
{
- const __m128 val1 = _mm_setr_ps(src[pos_.i[0] ], src[pos_.i[1] ], src[pos_.i[2] ], src[pos_.i[3] ]);
- const __m128 val2 = _mm_setr_ps(src[pos_.i[0]+1], src[pos_.i[1]+1], src[pos_.i[2]+1], src[pos_.i[3]+1]);
+ const int pos0 = _mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(0, 0, 0, 0)));
+ const int pos1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(1, 1, 1, 1)));
+ const int pos2 = _mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(2, 2, 2, 2)));
+ const int pos3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(3, 3, 3, 3)));
+ const __m128 val1 = _mm_setr_ps(src[pos0 ], src[pos1 ], src[pos2 ], src[pos3 ]);
+ const __m128 val2 = _mm_setr_ps(src[pos0+1], src[pos1+1], src[pos2+1], src[pos3+1]);
/* val1 + (val2-val1)*mu */
const __m128 r0 = _mm_sub_ps(val2, val1);
@@ -62,17 +66,12 @@ const ALfloat *Resample_lerp_SSE2(const InterpState* UNUSED(state),
frac4 = _mm_add_epi32(frac4, increment4);
pos4 = _mm_add_epi32(pos4, _mm_srli_epi32(frac4, FRACTIONBITS));
frac4 = _mm_and_si128(frac4, fracMask4);
-
- pos_.i[0] = _mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(0, 0, 0, 0)));
- pos_.i[1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(1, 1, 1, 1)));
- pos_.i[2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(2, 2, 2, 2)));
- pos_.i[3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(3, 3, 3, 3)));
}
/* NOTE: These four elements represent the position *after* the last four
* samples, so the lowest element is the next position to resample.
*/
- pos = pos_.i[0];
+ pos = _mm_cvtsi128_si32(pos4);
frac = _mm_cvtsi128_si32(frac4);
for(;i < numsamples;i++)
diff --git a/Alc/mixer/mixer_sse41.c b/Alc/mixer/mixer_sse41.c
index 98a3ef74..396b7c7c 100644
--- a/Alc/mixer/mixer_sse41.c
+++ b/Alc/mixer/mixer_sse41.c
@@ -50,8 +50,12 @@ const ALfloat *Resample_lerp_SSE41(const InterpState* UNUSED(state),
for(i = 0;numsamples-i > 3;i += 4)
{
- const __m128 val1 = _mm_setr_ps(src[pos_.i[0]], src[pos_.i[1]], src[pos_.i[2]], src[pos_.i[3]]);
- const __m128 val2 = _mm_setr_ps(src[pos_.i[0]+1], src[pos_.i[1]+1], src[pos_.i[2]+1], src[pos_.i[3]+1]);
+ const int pos0 = _mm_extract_epi32(pos4, 0);
+ const int pos1 = _mm_extract_epi32(pos4, 1);
+ const int pos2 = _mm_extract_epi32(pos4, 2);
+ const int pos3 = _mm_extract_epi32(pos4, 3);
+ const __m128 val1 = _mm_setr_ps(src[pos0 ], src[pos1 ], src[pos2 ], src[pos3 ]);
+ const __m128 val2 = _mm_setr_ps(src[pos0+1], src[pos1+1], src[pos2+1], src[pos3+1]);
/* val1 + (val2-val1)*mu */
const __m128 r0 = _mm_sub_ps(val2, val1);
@@ -63,17 +67,12 @@ const ALfloat *Resample_lerp_SSE41(const InterpState* UNUSED(state),
frac4 = _mm_add_epi32(frac4, increment4);
pos4 = _mm_add_epi32(pos4, _mm_srli_epi32(frac4, FRACTIONBITS));
frac4 = _mm_and_si128(frac4, fracMask4);
-
- pos_.i[0] = _mm_extract_epi32(pos4, 0);
- pos_.i[1] = _mm_extract_epi32(pos4, 1);
- pos_.i[2] = _mm_extract_epi32(pos4, 2);
- pos_.i[3] = _mm_extract_epi32(pos4, 3);
}
/* NOTE: These four elements represent the position *after* the last four
* samples, so the lowest element is the next position to resample.
*/
- pos = pos_.i[0];
+ pos = _mm_cvtsi128_si32(pos4);
frac = _mm_cvtsi128_si32(frac4);
for(;i < numsamples;i++)