diff options
author | Chris Robinson <[email protected]> | 2019-09-13 03:25:13 -0700 |
---|---|---|
committer | Chris Robinson <[email protected]> | 2019-09-13 03:25:13 -0700 |
commit | c1690178ec0b020018857ed0b666ff9b16e01c21 (patch) | |
tree | c5c30f740ea57cd4e6f3c96bde68237937781cf3 /alc/mixer | |
parent | 5f862a5b49412ef2690a271ed240d5a6fc881b37 (diff) |
Make the resampler increment unsigned
Diffstat (limited to 'alc/mixer')
-rw-r--r-- | alc/mixer/defs.h | 9 | ||||
-rw-r--r-- | alc/mixer/mixer_c.cpp | 14 | ||||
-rw-r--r-- | alc/mixer/mixer_neon.cpp | 42 | ||||
-rw-r--r-- | alc/mixer/mixer_sse.cpp | 5 | ||||
-rw-r--r-- | alc/mixer/mixer_sse2.cpp | 39 | ||||
-rw-r--r-- | alc/mixer/mixer_sse41.cpp | 43 |
6 files changed, 74 insertions, 78 deletions
diff --git a/alc/mixer/defs.h b/alc/mixer/defs.h index 4aa3c6b6..a6e07bd4 100644 --- a/alc/mixer/defs.h +++ b/alc/mixer/defs.h @@ -28,7 +28,7 @@ enum ResampleType { template<ResampleType TypeTag, InstSetType InstTag> const ALfloat *Resample_(const InterpState *state, const ALfloat *RESTRICT src, ALuint frac, - ALint increment, const al::span<float> dst); + ALuint increment, const al::span<float> dst); template<InstSetType InstTag> void Mix_(const al::span<const float> InSamples, const al::span<FloatBufferLine> OutBuffer, @@ -45,13 +45,14 @@ template<InstSetType InstTag> void MixDirectHrtf_(FloatBufferLine &LeftOut, FloatBufferLine &RightOut, const al::span<const FloatBufferLine> InSamples, float2 *AccumSamples, DirectHrtfState *State, const size_t BufferSize); /* Vectorized resampler helpers */ -inline void InitiatePositionArrays(ALsizei frac, ALint increment, ALsizei *RESTRICT frac_arr, ALsizei *RESTRICT pos_arr, ALsizei size) +inline void InitPosArrays(ALuint frac, ALuint increment, ALuint *frac_arr, ALuint *pos_arr, + size_t size) { pos_arr[0] = 0; frac_arr[0] = frac; - for(ALsizei i{1};i < size;i++) + for(size_t i{1};i < size;i++) { - ALint frac_tmp = frac_arr[i-1] + increment; + const ALuint frac_tmp{frac_arr[i-1] + increment}; pos_arr[i] = pos_arr[i-1] + (frac_tmp>>FRACTIONBITS); frac_arr[i] = frac_tmp&FRACTIONMASK; } diff --git a/alc/mixer/mixer_c.cpp b/alc/mixer/mixer_c.cpp index a8fb9a19..c5e712f9 100644 --- a/alc/mixer/mixer_c.cpp +++ b/alc/mixer/mixer_c.cpp @@ -44,10 +44,8 @@ inline ALfloat do_bsinc(const InterpState &istate, const ALfloat *RESTRICT vals, using SamplerT = ALfloat(const InterpState&, const ALfloat*RESTRICT, const ALuint); template<SamplerT &Sampler> const ALfloat *DoResample(const InterpState *state, const ALfloat *RESTRICT src, - ALuint frac, ALint increment, const al::span<float> dst) + ALuint frac, ALuint increment, const al::span<float> dst) { - ASSUME(increment > 0); - const InterpState istate{*state}; auto proc_sample = [&src,&frac,istate,increment]() -> ALfloat { @@ -68,7 +66,7 @@ const ALfloat *DoResample(const InterpState *state, const ALfloat *RESTRICT src, template<> const ALfloat *Resample_<CopyTag,CTag>(const InterpState*, const ALfloat *RESTRICT src, ALuint, - ALint, const al::span<float> dst) + ALuint, const al::span<float> dst) { #if defined(HAVE_SSE) || defined(HAVE_NEON) /* Avoid copying the source data if it's aligned like the destination. */ @@ -81,22 +79,22 @@ const ALfloat *Resample_<CopyTag,CTag>(const InterpState*, const ALfloat *RESTRI template<> const ALfloat *Resample_<PointTag,CTag>(const InterpState *state, const ALfloat *RESTRICT src, - ALuint frac, ALint increment, const al::span<float> dst) + ALuint frac, ALuint increment, const al::span<float> dst) { return DoResample<do_point>(state, src, frac, increment, dst); } template<> const ALfloat *Resample_<LerpTag,CTag>(const InterpState *state, const ALfloat *RESTRICT src, - ALuint frac, ALint increment, const al::span<float> dst) + ALuint frac, ALuint increment, const al::span<float> dst) { return DoResample<do_lerp>(state, src, frac, increment, dst); } template<> const ALfloat *Resample_<CubicTag,CTag>(const InterpState *state, const ALfloat *RESTRICT src, - ALuint frac, ALint increment, const al::span<float> dst) + ALuint frac, ALuint increment, const al::span<float> dst) { return DoResample<do_cubic>(state, src-1, frac, increment, dst); } template<> const ALfloat *Resample_<BSincTag,CTag>(const InterpState *state, const ALfloat *RESTRICT src, - ALuint frac, ALint increment, const al::span<float> dst) + ALuint frac, ALuint increment, const al::span<float> dst) { return DoResample<do_bsinc>(state, src-state->bsinc.l, frac, increment, dst); } diff --git a/alc/mixer/mixer_neon.cpp b/alc/mixer/mixer_neon.cpp index d5b1658f..c0a8e628 100644 --- a/alc/mixer/mixer_neon.cpp +++ b/alc/mixer/mixer_neon.cpp @@ -16,22 +16,20 @@ template<> const ALfloat *Resample_<LerpTag,NEONTag>(const InterpState*, const ALfloat *RESTRICT src, - ALuint frac, ALint increment, const al::span<float> dst) + ALuint frac, ALuint increment, const al::span<float> dst) { - const int32x4_t increment4 = vdupq_n_s32(increment*4); + const int32x4_t increment4 = vdupq_n_s32(static_cast<int>(increment*4)); const float32x4_t fracOne4 = vdupq_n_f32(1.0f/FRACTIONONE); const int32x4_t fracMask4 = vdupq_n_s32(FRACTIONMASK); - alignas(16) ALsizei pos_[4], frac_[4]; + alignas(16) ALuint pos_[4], frac_[4]; int32x4_t pos4, frac4; - ASSUME(increment > 0); - - InitiatePositionArrays(frac, increment, frac_, pos_, 4); - frac4 = vld1q_s32(frac_); - pos4 = vld1q_s32(pos_); + InitPosArrays(frac, increment, frac_, pos_, 4); + frac4 = vld1q_s32(reinterpret_cast<int*>(frac_)); + pos4 = vld1q_s32(reinterpret_cast<int*>(pos_)); auto dst_iter = dst.begin(); - const auto aligned_end = (dst.size()&~3) + dst_iter; + const auto aligned_end = (dst.size()&~3u) + dst_iter; while(dst_iter != aligned_end) { const int pos0{vgetq_lane_s32(pos4, 0)}; @@ -54,33 +52,31 @@ const ALfloat *Resample_<LerpTag,NEONTag>(const InterpState*, const ALfloat *RES frac4 = vandq_s32(frac4, fracMask4); } - /* NOTE: These four elements represent the position *after* the last four - * samples, so the lowest element is the next position to resample. - */ - src += static_cast<ALuint>(vgetq_lane_s32(pos4, 0)); - frac = vgetq_lane_s32(frac4, 0); - - while(dst_iter != dst.end()) + if(dst_iter != dst.end()) { - *(dst_iter++) = lerp(src[0], src[1], frac * (1.0f/FRACTIONONE)); + src += static_cast<ALuint>(vgetq_lane_s32(pos4, 0)); + frac = vgetq_lane_s32(frac4, 0); - frac += increment; - src += frac>>FRACTIONBITS; - frac &= FRACTIONMASK; + do { + *(dst_iter++) = lerp(src[0], src[1], frac * (1.0f/FRACTIONONE)); + + frac += increment; + src += frac>>FRACTIONBITS; + frac &= FRACTIONMASK; + } while(dst_iter != dst.end()); } return dst.begin(); } template<> const ALfloat *Resample_<BSincTag,NEONTag>(const InterpState *state, const ALfloat *RESTRICT src, - ALuint frac, ALint increment, const al::span<float> dst) + ALuint frac, ALuint increment, const al::span<float> dst) { const ALfloat *const filter{state->bsinc.filter}; const float32x4_t sf4{vdupq_n_f32(state->bsinc.sf)}; const ptrdiff_t m{state->bsinc.m}; ASSUME(m > 0); - ASSUME(increment > 0); src -= state->bsinc.l; for(float &out_sample : dst) @@ -183,7 +179,7 @@ void Mix_<NEONTag>(const al::span<const float> InSamples, const al::span<FloatBu const ALfloat delta{(Counter > 0) ? 1.0f / static_cast<ALfloat>(Counter) : 0.0f}; const bool reached_target{InSamples.size() >= Counter}; const auto min_end = reached_target ? InSamples.begin() + Counter : InSamples.end(); - const auto aligned_end = minz(InSamples.size(), (min_end-InSamples.begin()+3) & ~3) + + const auto aligned_end = minz(InSamples.size(), (min_end-InSamples.begin()+3) & ~3u) + InSamples.begin(); for(FloatBufferLine &output : OutBuffer) { diff --git a/alc/mixer/mixer_sse.cpp b/alc/mixer/mixer_sse.cpp index 1965f3e6..82e2bfbe 100644 --- a/alc/mixer/mixer_sse.cpp +++ b/alc/mixer/mixer_sse.cpp @@ -15,14 +15,13 @@ template<> const ALfloat *Resample_<BSincTag,SSETag>(const InterpState *state, const ALfloat *RESTRICT src, - ALuint frac, ALint increment, const al::span<float> dst) + ALuint frac, ALuint increment, const al::span<float> dst) { const ALfloat *const filter{state->bsinc.filter}; const __m128 sf4{_mm_set1_ps(state->bsinc.sf)}; const ptrdiff_t m{state->bsinc.m}; ASSUME(m > 0); - ASSUME(increment > 0); src -= state->bsinc.l; for(float &out_sample : dst) @@ -146,7 +145,7 @@ void Mix_<SSETag>(const al::span<const float> InSamples, const al::span<FloatBuf const ALfloat delta{(Counter > 0) ? 1.0f / static_cast<ALfloat>(Counter) : 0.0f}; const bool reached_target{InSamples.size() >= Counter}; const auto min_end = reached_target ? InSamples.begin() + Counter : InSamples.end(); - const auto aligned_end = minz(InSamples.size(), (min_end-InSamples.begin()+3) & ~3) + + const auto aligned_end = minz(InSamples.size(), (min_end-InSamples.begin()+3) & ~3u) + InSamples.begin(); for(FloatBufferLine &output : OutBuffer) { diff --git a/alc/mixer/mixer_sse2.cpp b/alc/mixer/mixer_sse2.cpp index b126cd25..38d77fd9 100644 --- a/alc/mixer/mixer_sse2.cpp +++ b/alc/mixer/mixer_sse2.cpp @@ -29,21 +29,21 @@ template<> const ALfloat *Resample_<LerpTag,SSE2Tag>(const InterpState*, const ALfloat *RESTRICT src, - ALuint frac, ALint increment, const al::span<float> dst) + ALuint frac, ALuint increment, const al::span<float> dst) { - const __m128i increment4{_mm_set1_epi32(increment*4)}; + const __m128i increment4{_mm_set1_epi32(static_cast<int>(increment*4))}; const __m128 fracOne4{_mm_set1_ps(1.0f/FRACTIONONE)}; const __m128i fracMask4{_mm_set1_epi32(FRACTIONMASK)}; - ASSUME(increment > 0); - - alignas(16) ALsizei pos_[4], frac_[4]; - InitiatePositionArrays(frac, increment, frac_, pos_, 4); - __m128i frac4{_mm_setr_epi32(frac_[0], frac_[1], frac_[2], frac_[3])}; - __m128i pos4{_mm_setr_epi32(pos_[0], pos_[1], pos_[2], pos_[3])}; + alignas(16) ALuint pos_[4], frac_[4]; + InitPosArrays(frac, increment, frac_, pos_, 4); + __m128i frac4{_mm_setr_epi32(static_cast<int>(frac_[0]), static_cast<int>(frac_[1]), + static_cast<int>(frac_[2]), static_cast<int>(frac_[3]))}; + __m128i pos4{_mm_setr_epi32(static_cast<int>(pos_[0]), static_cast<int>(pos_[1]), + static_cast<int>(pos_[2]), static_cast<int>(pos_[3]))}; auto dst_iter = dst.begin(); - const auto aligned_end = (dst.size()&~3) + dst_iter; + const auto aligned_end = (dst.size()&~3u) + dst_iter; while(dst_iter != aligned_end) { const int pos0{_mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(0, 0, 0, 0)))}; @@ -66,19 +66,18 @@ const ALfloat *Resample_<LerpTag,SSE2Tag>(const InterpState*, const ALfloat *RES frac4 = _mm_and_si128(frac4, fracMask4); } - /* NOTE: These four elements represent the position *after* the last four - * samples, so the lowest element is the next position to resample. - */ - src += static_cast<ALuint>(_mm_cvtsi128_si32(pos4)); - frac = _mm_cvtsi128_si32(frac4); - - while(dst_iter != dst.end()) + if(dst_iter != dst.end()) { - *(dst_iter++) = lerp(src[0], src[1], frac * (1.0f/FRACTIONONE)); + src += static_cast<ALuint>(_mm_cvtsi128_si32(pos4)); + frac = static_cast<ALuint>(_mm_cvtsi128_si32(frac4)); + + do { + *(dst_iter++) = lerp(src[0], src[1], frac * (1.0f/FRACTIONONE)); - frac += increment; - src += frac>>FRACTIONBITS; - frac &= FRACTIONMASK; + frac += increment; + src += frac>>FRACTIONBITS; + frac &= FRACTIONMASK; + } while(dst_iter != dst.end()); } return dst.begin(); } diff --git a/alc/mixer/mixer_sse41.cpp b/alc/mixer/mixer_sse41.cpp index 06c51e6a..0a87f76f 100644 --- a/alc/mixer/mixer_sse41.cpp +++ b/alc/mixer/mixer_sse41.cpp @@ -30,21 +30,21 @@ template<> const ALfloat *Resample_<LerpTag,SSE4Tag>(const InterpState*, const ALfloat *RESTRICT src, - ALuint frac, ALint increment, const al::span<float> dst) + ALuint frac, ALuint increment, const al::span<float> dst) { - const __m128i increment4{_mm_set1_epi32(increment*4)}; + const __m128i increment4{_mm_set1_epi32(static_cast<int>(increment*4))}; const __m128 fracOne4{_mm_set1_ps(1.0f/FRACTIONONE)}; const __m128i fracMask4{_mm_set1_epi32(FRACTIONMASK)}; - ASSUME(increment > 0); - - alignas(16) ALsizei pos_[4], frac_[4]; - InitiatePositionArrays(frac, increment, frac_, pos_, 4); - __m128i frac4{_mm_setr_epi32(frac_[0], frac_[1], frac_[2], frac_[3])}; - __m128i pos4{_mm_setr_epi32(pos_[0], pos_[1], pos_[2], pos_[3])}; + alignas(16) ALuint pos_[4], frac_[4]; + InitPosArrays(frac, increment, frac_, pos_, 4); + __m128i frac4{_mm_setr_epi32(static_cast<int>(frac_[0]), static_cast<int>(frac_[1]), + static_cast<int>(frac_[2]), static_cast<int>(frac_[3]))}; + __m128i pos4{_mm_setr_epi32(static_cast<int>(pos_[0]), static_cast<int>(pos_[1]), + static_cast<int>(pos_[2]), static_cast<int>(pos_[3]))}; auto dst_iter = dst.begin(); - const auto aligned_end = (dst.size()&~3) + dst_iter; + const auto aligned_end = (dst.size()&~3u) + dst_iter; while(dst_iter != aligned_end) { const int pos0{_mm_extract_epi32(pos4, 0)}; @@ -67,19 +67,22 @@ const ALfloat *Resample_<LerpTag,SSE4Tag>(const InterpState*, const ALfloat *RES frac4 = _mm_and_si128(frac4, fracMask4); } - /* NOTE: These four elements represent the position *after* the last four - * samples, so the lowest element is the next position to resample. - */ - src += static_cast<ALuint>(_mm_cvtsi128_si32(pos4)); - frac = _mm_cvtsi128_si32(frac4); - - while(dst_iter != dst.end()) + if(dst_iter != dst.end()) { - *(dst_iter++) = lerp(src[0], src[1], frac * (1.0f/FRACTIONONE)); + /* NOTE: These four elements represent the position *after* the last + * four samples, so the lowest element is the next position to + * resample. + */ + src += static_cast<ALuint>(_mm_cvtsi128_si32(pos4)); + frac = static_cast<ALuint>(_mm_cvtsi128_si32(frac4)); + + do { + *(dst_iter++) = lerp(src[0], src[1], frac * (1.0f/FRACTIONONE)); - frac += increment; - src += frac>>FRACTIONBITS; - frac &= FRACTIONMASK; + frac += increment; + src += frac>>FRACTIONBITS; + frac &= FRACTIONMASK; + } while(dst_iter != dst.end()); } return dst.begin(); } |