From 091e676db34ff51a709427d5b1203bfcd0788fb4 Mon Sep 17 00:00:00 2001 From: Chris Robinson Date: Thu, 22 Mar 2018 05:06:15 -0700 Subject: Move mixer sources into a sub-directory --- Alc/ALu.c | 2 +- Alc/bformatdec.c | 1 - Alc/converter.c | 2 +- Alc/effects/reverb.c | 1 - Alc/mixer.c | 781 ------------------------------------------------ Alc/mixer/defs.h | 119 ++++++++ Alc/mixer/mixer_c.c | 209 +++++++++++++ Alc/mixer/mixer_inc.c | 114 +++++++ Alc/mixer/mixer_neon.c | 261 ++++++++++++++++ Alc/mixer/mixer_sse.c | 229 ++++++++++++++ Alc/mixer/mixer_sse2.c | 82 +++++ Alc/mixer/mixer_sse3.c | 0 Alc/mixer/mixer_sse41.c | 86 ++++++ Alc/mixer_c.c | 208 ------------- Alc/mixer_defs.h | 119 -------- Alc/mixer_inc.c | 114 ------- Alc/mixer_neon.c | 261 ---------------- Alc/mixer_sse.c | 229 -------------- Alc/mixer_sse2.c | 82 ----- Alc/mixer_sse3.c | 0 Alc/mixer_sse41.c | 86 ------ Alc/mixvoice.c | 781 ++++++++++++++++++++++++++++++++++++++++++++++++ 22 files changed, 1883 insertions(+), 1884 deletions(-) delete mode 100644 Alc/mixer.c create mode 100644 Alc/mixer/defs.h create mode 100644 Alc/mixer/mixer_c.c create mode 100644 Alc/mixer/mixer_inc.c create mode 100644 Alc/mixer/mixer_neon.c create mode 100644 Alc/mixer/mixer_sse.c create mode 100644 Alc/mixer/mixer_sse2.c create mode 100644 Alc/mixer/mixer_sse3.c create mode 100644 Alc/mixer/mixer_sse41.c delete mode 100644 Alc/mixer_c.c delete mode 100644 Alc/mixer_defs.h delete mode 100644 Alc/mixer_inc.c delete mode 100644 Alc/mixer_neon.c delete mode 100644 Alc/mixer_sse.c delete mode 100644 Alc/mixer_sse2.c delete mode 100644 Alc/mixer_sse3.c delete mode 100644 Alc/mixer_sse41.c create mode 100644 Alc/mixvoice.c (limited to 'Alc') diff --git a/Alc/ALu.c b/Alc/ALu.c index 1aa35cb7..63d13838 100644 --- a/Alc/ALu.c +++ b/Alc/ALu.c @@ -40,9 +40,9 @@ #include "static_assert.h" #include "ringbuffer.h" +#include "mixer/defs.h" #include "fpu_modes.h" #include "cpu_caps.h" -#include "mixer_defs.h" #include "bsinc_inc.h" #include "backends/base.h" diff --git a/Alc/bformatdec.c b/Alc/bformatdec.c index 28dbc742..ff0cd657 100644 --- a/Alc/bformatdec.c +++ b/Alc/bformatdec.c @@ -3,7 +3,6 @@ #include "bformatdec.h" #include "ambdec.h" -#include "mixer_defs.h" #include "alu.h" #include "bool.h" diff --git a/Alc/converter.c b/Alc/converter.c index 157073f2..6e28b4a6 100644 --- a/Alc/converter.c +++ b/Alc/converter.c @@ -4,7 +4,7 @@ #include "converter.h" #include "fpu_modes.h" -#include "mixer_defs.h" +#include "mixer/defs.h" SampleConverter *CreateSampleConverter(enum DevFmtType srcType, enum DevFmtType dstType, ALsizei numchans, ALsizei srcRate, ALsizei dstRate) diff --git a/Alc/effects/reverb.c b/Alc/effects/reverb.c index bd5553ad..ff1ee143 100644 --- a/Alc/effects/reverb.c +++ b/Alc/effects/reverb.c @@ -31,7 +31,6 @@ #include "alFilter.h" #include "alListener.h" #include "alError.h" -#include "mixer_defs.h" /* This is a user config option for modifying the overall output of the reverb * effect. diff --git a/Alc/mixer.c b/Alc/mixer.c deleted file mode 100644 index 7a7bbfe0..00000000 --- a/Alc/mixer.c +++ /dev/null @@ -1,781 +0,0 @@ -/** - * OpenAL cross platform audio library - * Copyright (C) 1999-2007 by authors. - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public - * License along with this library; if not, write to the - * Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * Or go to http://www.gnu.org/copyleft/lgpl.html - */ - -#include "config.h" - -#include -#include -#include -#include -#include - -#include "alMain.h" -#include "AL/al.h" -#include "AL/alc.h" -#include "alSource.h" -#include "alBuffer.h" -#include "alListener.h" -#include "alAuxEffectSlot.h" -#include "sample_cvt.h" -#include "alu.h" -#include "alconfig.h" -#include "ringbuffer.h" - -#include "cpu_caps.h" -#include "mixer_defs.h" - - -static_assert((INT_MAX>>FRACTIONBITS)/MAX_PITCH > BUFFERSIZE, - "MAX_PITCH and/or BUFFERSIZE are too large for FRACTIONBITS!"); - -extern inline void InitiatePositionArrays(ALsizei frac, ALint increment, ALsizei *restrict frac_arr, ALint *restrict pos_arr, ALsizei size); - - -/* BSinc24 requires up to 23 extra samples before the current position, and 24 after. */ -static_assert(MAX_RESAMPLE_PADDING >= 24, "MAX_RESAMPLE_PADDING must be at least 24!"); - - -enum Resampler ResamplerDefault = LinearResampler; - -MixerFunc MixSamples = Mix_C; -RowMixerFunc MixRowSamples = MixRow_C; -static HrtfMixerFunc MixHrtfSamples = MixHrtf_C; -static HrtfMixerBlendFunc MixHrtfBlendSamples = MixHrtfBlend_C; - -static MixerFunc SelectMixer(void) -{ -#ifdef HAVE_NEON - if((CPUCapFlags&CPU_CAP_NEON)) - return Mix_Neon; -#endif -#ifdef HAVE_SSE - if((CPUCapFlags&CPU_CAP_SSE)) - return Mix_SSE; -#endif - return Mix_C; -} - -static RowMixerFunc SelectRowMixer(void) -{ -#ifdef HAVE_NEON - if((CPUCapFlags&CPU_CAP_NEON)) - return MixRow_Neon; -#endif -#ifdef HAVE_SSE - if((CPUCapFlags&CPU_CAP_SSE)) - return MixRow_SSE; -#endif - return MixRow_C; -} - -static inline HrtfMixerFunc SelectHrtfMixer(void) -{ -#ifdef HAVE_NEON - if((CPUCapFlags&CPU_CAP_NEON)) - return MixHrtf_Neon; -#endif -#ifdef HAVE_SSE - if((CPUCapFlags&CPU_CAP_SSE)) - return MixHrtf_SSE; -#endif - return MixHrtf_C; -} - -static inline HrtfMixerBlendFunc SelectHrtfBlendMixer(void) -{ -#ifdef HAVE_NEON - if((CPUCapFlags&CPU_CAP_NEON)) - return MixHrtfBlend_Neon; -#endif -#ifdef HAVE_SSE - if((CPUCapFlags&CPU_CAP_SSE)) - return MixHrtfBlend_SSE; -#endif - return MixHrtfBlend_C; -} - -ResamplerFunc SelectResampler(enum Resampler resampler) -{ - switch(resampler) - { - case PointResampler: - return Resample_point_C; - case LinearResampler: -#ifdef HAVE_NEON - if((CPUCapFlags&CPU_CAP_NEON)) - return Resample_lerp_Neon; -#endif -#ifdef HAVE_SSE4_1 - if((CPUCapFlags&CPU_CAP_SSE4_1)) - return Resample_lerp_SSE41; -#endif -#ifdef HAVE_SSE2 - if((CPUCapFlags&CPU_CAP_SSE2)) - return Resample_lerp_SSE2; -#endif - return Resample_lerp_C; - case FIR4Resampler: - return Resample_cubic_C; - case BSinc12Resampler: - case BSinc24Resampler: -#ifdef HAVE_NEON - if((CPUCapFlags&CPU_CAP_NEON)) - return Resample_bsinc_Neon; -#endif -#ifdef HAVE_SSE - if((CPUCapFlags&CPU_CAP_SSE)) - return Resample_bsinc_SSE; -#endif - return Resample_bsinc_C; - } - - return Resample_point_C; -} - - -void aluInitMixer(void) -{ - const char *str; - - if(ConfigValueStr(NULL, NULL, "resampler", &str)) - { - if(strcasecmp(str, "point") == 0 || strcasecmp(str, "none") == 0) - ResamplerDefault = PointResampler; - else if(strcasecmp(str, "linear") == 0) - ResamplerDefault = LinearResampler; - else if(strcasecmp(str, "cubic") == 0) - ResamplerDefault = FIR4Resampler; - else if(strcasecmp(str, "bsinc12") == 0) - ResamplerDefault = BSinc12Resampler; - else if(strcasecmp(str, "bsinc24") == 0) - ResamplerDefault = BSinc24Resampler; - else if(strcasecmp(str, "bsinc") == 0) - { - WARN("Resampler option \"%s\" is deprecated, using bsinc12\n", str); - ResamplerDefault = BSinc12Resampler; - } - else if(strcasecmp(str, "sinc4") == 0 || strcasecmp(str, "sinc8") == 0) - { - WARN("Resampler option \"%s\" is deprecated, using cubic\n", str); - ResamplerDefault = FIR4Resampler; - } - else - { - char *end; - long n = strtol(str, &end, 0); - if(*end == '\0' && (n == PointResampler || n == LinearResampler || n == FIR4Resampler)) - ResamplerDefault = n; - else - WARN("Invalid resampler: %s\n", str); - } - } - - MixHrtfBlendSamples = SelectHrtfBlendMixer(); - MixHrtfSamples = SelectHrtfMixer(); - MixSamples = SelectMixer(); - MixRowSamples = SelectRowMixer(); -} - - -static void SendAsyncEvent(ALCcontext *context, ALuint enumtype, ALenum type, - ALuint objid, ALuint param, const char *msg) -{ - AsyncEvent evt; - evt.EnumType = enumtype; - evt.Type = type; - evt.ObjectId = objid; - evt.Param = param; - strcpy(evt.Message, msg); - if(ll_ringbuffer_write(context->AsyncEvents, (const char*)&evt, 1) == 1) - alsem_post(&context->EventSem); -} - - -static inline ALfloat Sample_ALubyte(ALubyte val) -{ return (val-128) * (1.0f/128.0f); } - -static inline ALfloat Sample_ALshort(ALshort val) -{ return val * (1.0f/32768.0f); } - -static inline ALfloat Sample_ALfloat(ALfloat val) -{ return val; } - -static inline ALfloat Sample_ALdouble(ALdouble val) -{ return (ALfloat)val; } - -typedef ALubyte ALmulaw; -static inline ALfloat Sample_ALmulaw(ALmulaw val) -{ return muLawDecompressionTable[val] * (1.0f/32768.0f); } - -typedef ALubyte ALalaw; -static inline ALfloat Sample_ALalaw(ALalaw val) -{ return aLawDecompressionTable[val] * (1.0f/32768.0f); } - -#define DECL_TEMPLATE(T) \ -static inline void Load_##T(ALfloat *restrict dst, const T *restrict src, \ - ALint srcstep, ALsizei samples) \ -{ \ - ALsizei i; \ - for(i = 0;i < samples;i++) \ - dst[i] += Sample_##T(src[i*srcstep]); \ -} - -DECL_TEMPLATE(ALubyte) -DECL_TEMPLATE(ALshort) -DECL_TEMPLATE(ALfloat) -DECL_TEMPLATE(ALdouble) -DECL_TEMPLATE(ALmulaw) -DECL_TEMPLATE(ALalaw) - -#undef DECL_TEMPLATE - -static void LoadSamples(ALfloat *restrict dst, const ALvoid *restrict src, ALint srcstep, - enum FmtType srctype, ALsizei samples) -{ -#define HANDLE_FMT(ET, ST) case ET: Load_##ST(dst, src, srcstep, samples); break - switch(srctype) - { - HANDLE_FMT(FmtUByte, ALubyte); - HANDLE_FMT(FmtShort, ALshort); - HANDLE_FMT(FmtFloat, ALfloat); - HANDLE_FMT(FmtDouble, ALdouble); - HANDLE_FMT(FmtMulaw, ALmulaw); - HANDLE_FMT(FmtAlaw, ALalaw); - } -#undef HANDLE_FMT -} - - -static const ALfloat *DoFilters(ALfilterState *lpfilter, ALfilterState *hpfilter, - ALfloat *restrict dst, const ALfloat *restrict src, - ALsizei numsamples, enum ActiveFilters type) -{ - ALsizei i; - switch(type) - { - case AF_None: - ALfilterState_processPassthru(lpfilter, src, numsamples); - ALfilterState_processPassthru(hpfilter, src, numsamples); - break; - - case AF_LowPass: - ALfilterState_process(lpfilter, dst, src, numsamples); - ALfilterState_processPassthru(hpfilter, dst, numsamples); - return dst; - case AF_HighPass: - ALfilterState_processPassthru(lpfilter, src, numsamples); - ALfilterState_process(hpfilter, dst, src, numsamples); - return dst; - - case AF_BandPass: - for(i = 0;i < numsamples;) - { - ALfloat temp[256]; - ALsizei todo = mini(256, numsamples-i); - - ALfilterState_process(lpfilter, temp, src+i, todo); - ALfilterState_process(hpfilter, dst+i, temp, todo); - i += todo; - } - return dst; - } - return src; -} - - -/* This function uses these device temp buffers. */ -#define SOURCE_DATA_BUF 0 -#define RESAMPLED_BUF 1 -#define FILTERED_BUF 2 -#define NFC_DATA_BUF 3 -ALboolean MixSource(ALvoice *voice, ALuint SourceID, ALCcontext *Context, ALsizei SamplesToDo) -{ - ALCdevice *Device = Context->Device; - ALbufferlistitem *BufferListItem; - ALbufferlistitem *BufferLoopItem; - ALsizei NumChannels, SampleSize; - ALbitfieldSOFT enabledevt; - ALsizei buffers_done = 0; - ResamplerFunc Resample; - ALsizei DataPosInt; - ALsizei DataPosFrac; - ALint64 DataSize64; - ALint increment; - ALsizei Counter; - ALsizei OutPos; - ALsizei IrSize; - bool isplaying; - bool firstpass; - bool isstatic; - ALsizei chan; - ALsizei send; - - /* Get source info */ - isplaying = true; /* Will only be called while playing. */ - isstatic = !!(voice->Flags&VOICE_IS_STATIC); - DataPosInt = ATOMIC_LOAD(&voice->position, almemory_order_acquire); - DataPosFrac = ATOMIC_LOAD(&voice->position_fraction, almemory_order_relaxed); - BufferListItem = ATOMIC_LOAD(&voice->current_buffer, almemory_order_relaxed); - BufferLoopItem = ATOMIC_LOAD(&voice->loop_buffer, almemory_order_relaxed); - NumChannels = voice->NumChannels; - SampleSize = voice->SampleSize; - increment = voice->Step; - - IrSize = (Device->HrtfHandle ? Device->HrtfHandle->irSize : 0); - - Resample = ((increment == FRACTIONONE && DataPosFrac == 0) ? - Resample_copy_C : voice->Resampler); - - Counter = (voice->Flags&VOICE_IS_FADING) ? SamplesToDo : 0; - firstpass = true; - OutPos = 0; - - do { - ALsizei SrcBufferSize, DstBufferSize; - - /* Figure out how many buffer samples will be needed */ - DataSize64 = SamplesToDo-OutPos; - DataSize64 *= increment; - DataSize64 += DataPosFrac+FRACTIONMASK; - DataSize64 >>= FRACTIONBITS; - DataSize64 += MAX_RESAMPLE_PADDING*2; - SrcBufferSize = (ALsizei)mini64(DataSize64, BUFFERSIZE); - - /* Figure out how many samples we can actually mix from this. */ - DataSize64 = SrcBufferSize; - DataSize64 -= MAX_RESAMPLE_PADDING*2; - DataSize64 <<= FRACTIONBITS; - DataSize64 -= DataPosFrac; - DstBufferSize = (ALsizei)mini64((DataSize64+(increment-1)) / increment, - SamplesToDo - OutPos); - - /* Some mixers like having a multiple of 4, so try to give that unless - * this is the last update. */ - if(DstBufferSize < SamplesToDo-OutPos) - DstBufferSize &= ~3; - - /* It's impossible to have a buffer list item with no entries. */ - assert(BufferListItem->num_buffers > 0); - - for(chan = 0;chan < NumChannels;chan++) - { - const ALfloat *ResampledData; - ALfloat *SrcData = Device->TempBuffer[SOURCE_DATA_BUF]; - ALsizei FilledAmt; - - /* Load the previous samples into the source data first, and clear the rest. */ - memcpy(SrcData, voice->PrevSamples[chan], MAX_RESAMPLE_PADDING*sizeof(ALfloat)); - memset(SrcData+MAX_RESAMPLE_PADDING, 0, (BUFFERSIZE-MAX_RESAMPLE_PADDING)* - sizeof(ALfloat)); - FilledAmt = MAX_RESAMPLE_PADDING; - - if(isstatic) - { - /* TODO: For static sources, loop points are taken from the - * first buffer (should be adjusted by any buffer offset, to - * possibly be added later). - */ - const ALbuffer *Buffer0 = BufferListItem->buffers[0]; - const ALsizei LoopStart = Buffer0->LoopStart; - const ALsizei LoopEnd = Buffer0->LoopEnd; - const ALsizei LoopSize = LoopEnd - LoopStart; - - /* If current pos is beyond the loop range, do not loop */ - if(!BufferLoopItem || DataPosInt >= LoopEnd) - { - ALsizei SizeToDo = SrcBufferSize - FilledAmt; - ALsizei CompLen = 0; - ALsizei i; - - BufferLoopItem = NULL; - - for(i = 0;i < BufferListItem->num_buffers;i++) - { - const ALbuffer *buffer = BufferListItem->buffers[i]; - const ALubyte *Data = buffer->data; - ALsizei DataSize; - - if(DataPosInt >= buffer->SampleLen) - continue; - - /* Load what's left to play from the buffer */ - DataSize = mini(SizeToDo, buffer->SampleLen - DataPosInt); - CompLen = maxi(CompLen, DataSize); - - LoadSamples(&SrcData[FilledAmt], - &Data[(DataPosInt*NumChannels + chan)*SampleSize], - NumChannels, buffer->FmtType, DataSize - ); - } - FilledAmt += CompLen; - } - else - { - ALsizei SizeToDo = mini(SrcBufferSize - FilledAmt, LoopEnd - DataPosInt); - ALsizei CompLen = 0; - ALsizei i; - - for(i = 0;i < BufferListItem->num_buffers;i++) - { - const ALbuffer *buffer = BufferListItem->buffers[i]; - const ALubyte *Data = buffer->data; - ALsizei DataSize; - - if(DataPosInt >= buffer->SampleLen) - continue; - - /* Load what's left of this loop iteration */ - DataSize = mini(SizeToDo, buffer->SampleLen - DataPosInt); - CompLen = maxi(CompLen, DataSize); - - LoadSamples(&SrcData[FilledAmt], - &Data[(DataPosInt*NumChannels + chan)*SampleSize], - NumChannels, buffer->FmtType, DataSize - ); - } - FilledAmt += CompLen; - - while(SrcBufferSize > FilledAmt) - { - const ALsizei SizeToDo = mini(SrcBufferSize - FilledAmt, LoopSize); - - CompLen = 0; - for(i = 0;i < BufferListItem->num_buffers;i++) - { - const ALbuffer *buffer = BufferListItem->buffers[i]; - const ALubyte *Data = buffer->data; - ALsizei DataSize; - - if(LoopStart >= buffer->SampleLen) - continue; - - DataSize = mini(SizeToDo, buffer->SampleLen - LoopStart); - CompLen = maxi(CompLen, DataSize); - - LoadSamples(&SrcData[FilledAmt], - &Data[(LoopStart*NumChannels + chan)*SampleSize], - NumChannels, buffer->FmtType, DataSize - ); - } - FilledAmt += CompLen; - } - } - } - else - { - /* Crawl the buffer queue to fill in the temp buffer */ - ALbufferlistitem *tmpiter = BufferListItem; - ALsizei pos = DataPosInt; - - while(tmpiter && SrcBufferSize > FilledAmt) - { - ALsizei SizeToDo = SrcBufferSize - FilledAmt; - ALsizei CompLen = 0; - ALsizei i; - - for(i = 0;i < tmpiter->num_buffers;i++) - { - const ALbuffer *ALBuffer = tmpiter->buffers[i]; - ALsizei DataSize = ALBuffer ? ALBuffer->SampleLen : 0; - CompLen = maxi(CompLen, DataSize); - - if(DataSize > pos) - { - const ALubyte *Data = ALBuffer->data; - Data += (pos*NumChannels + chan)*SampleSize; - - DataSize = minu(SizeToDo, DataSize - pos); - LoadSamples(&SrcData[FilledAmt], Data, NumChannels, - ALBuffer->FmtType, DataSize); - } - } - if(pos > CompLen) - pos -= CompLen; - else - { - FilledAmt += CompLen - pos; - pos = 0; - } - if(SrcBufferSize > FilledAmt) - { - tmpiter = ATOMIC_LOAD(&tmpiter->next, almemory_order_acquire); - if(!tmpiter) tmpiter = BufferLoopItem; - } - } - } - - /* Store the last source samples used for next time. */ - memcpy(voice->PrevSamples[chan], - &SrcData[(increment*DstBufferSize + DataPosFrac)>>FRACTIONBITS], - MAX_RESAMPLE_PADDING*sizeof(ALfloat) - ); - - /* Now resample, then filter and mix to the appropriate outputs. */ - ResampledData = Resample(&voice->ResampleState, - &SrcData[MAX_RESAMPLE_PADDING], DataPosFrac, increment, - Device->TempBuffer[RESAMPLED_BUF], DstBufferSize - ); - { - DirectParams *parms = &voice->Direct.Params[chan]; - const ALfloat *samples; - - samples = DoFilters( - &parms->LowPass, &parms->HighPass, Device->TempBuffer[FILTERED_BUF], - ResampledData, DstBufferSize, voice->Direct.FilterType - ); - if(!(voice->Flags&VOICE_HAS_HRTF)) - { - if(!Counter) - memcpy(parms->Gains.Current, parms->Gains.Target, - sizeof(parms->Gains.Current)); - if(!(voice->Flags&VOICE_HAS_NFC)) - MixSamples(samples, voice->Direct.Channels, voice->Direct.Buffer, - parms->Gains.Current, parms->Gains.Target, Counter, OutPos, - DstBufferSize - ); - else - { - ALfloat *nfcsamples = Device->TempBuffer[NFC_DATA_BUF]; - ALsizei chanoffset = 0; - - MixSamples(samples, - voice->Direct.ChannelsPerOrder[0], voice->Direct.Buffer, - parms->Gains.Current, parms->Gains.Target, Counter, OutPos, - DstBufferSize - ); - chanoffset += voice->Direct.ChannelsPerOrder[0]; -#define APPLY_NFC_MIX(order) \ - if(voice->Direct.ChannelsPerOrder[order] > 0) \ - { \ - NfcFilterUpdate##order(&parms->NFCtrlFilter, nfcsamples, samples, \ - DstBufferSize); \ - MixSamples(nfcsamples, voice->Direct.ChannelsPerOrder[order], \ - voice->Direct.Buffer+chanoffset, parms->Gains.Current+chanoffset, \ - parms->Gains.Target+chanoffset, Counter, OutPos, DstBufferSize \ - ); \ - chanoffset += voice->Direct.ChannelsPerOrder[order]; \ - } - APPLY_NFC_MIX(1) - APPLY_NFC_MIX(2) - APPLY_NFC_MIX(3) -#undef APPLY_NFC_MIX - } - } - else - { - MixHrtfParams hrtfparams; - ALsizei fademix = 0; - int lidx, ridx; - - lidx = GetChannelIdxByName(&Device->RealOut, FrontLeft); - ridx = GetChannelIdxByName(&Device->RealOut, FrontRight); - assert(lidx != -1 && ridx != -1); - - if(!Counter) - { - /* No fading, just overwrite the old HRTF params. */ - parms->Hrtf.Old = parms->Hrtf.Target; - } - else if(!(parms->Hrtf.Old.Gain > GAIN_SILENCE_THRESHOLD)) - { - /* The old HRTF params are silent, so overwrite the old - * coefficients with the new, and reset the old gain to - * 0. The future mix will then fade from silence. - */ - parms->Hrtf.Old = parms->Hrtf.Target; - parms->Hrtf.Old.Gain = 0.0f; - } - else if(firstpass) - { - ALfloat gain; - - /* Fade between the coefficients over 128 samples. */ - fademix = mini(DstBufferSize, 128); - - /* The new coefficients need to fade in completely - * since they're replacing the old ones. To keep the - * gain fading consistent, interpolate between the old - * and new target gains given how much of the fade time - * this mix handles. - */ - gain = lerp(parms->Hrtf.Old.Gain, parms->Hrtf.Target.Gain, - minf(1.0f, (ALfloat)fademix/Counter)); - hrtfparams.Coeffs = parms->Hrtf.Target.Coeffs; - hrtfparams.Delay[0] = parms->Hrtf.Target.Delay[0]; - hrtfparams.Delay[1] = parms->Hrtf.Target.Delay[1]; - hrtfparams.Gain = 0.0f; - hrtfparams.GainStep = gain / (ALfloat)fademix; - - MixHrtfBlendSamples( - voice->Direct.Buffer[lidx], voice->Direct.Buffer[ridx], - samples, voice->Offset, OutPos, IrSize, &parms->Hrtf.Old, - &hrtfparams, &parms->Hrtf.State, fademix - ); - /* Update the old parameters with the result. */ - parms->Hrtf.Old = parms->Hrtf.Target; - if(fademix < Counter) - parms->Hrtf.Old.Gain = hrtfparams.Gain; - } - - if(fademix < DstBufferSize) - { - ALsizei todo = DstBufferSize - fademix; - ALfloat gain = parms->Hrtf.Target.Gain; - - /* Interpolate the target gain if the gain fading lasts - * longer than this mix. - */ - if(Counter > DstBufferSize) - gain = lerp(parms->Hrtf.Old.Gain, gain, - (ALfloat)todo/(Counter-fademix)); - - hrtfparams.Coeffs = parms->Hrtf.Target.Coeffs; - hrtfparams.Delay[0] = parms->Hrtf.Target.Delay[0]; - hrtfparams.Delay[1] = parms->Hrtf.Target.Delay[1]; - hrtfparams.Gain = parms->Hrtf.Old.Gain; - hrtfparams.GainStep = (gain - parms->Hrtf.Old.Gain) / (ALfloat)todo; - MixHrtfSamples( - voice->Direct.Buffer[lidx], voice->Direct.Buffer[ridx], - samples+fademix, voice->Offset+fademix, OutPos+fademix, IrSize, - &hrtfparams, &parms->Hrtf.State, todo - ); - /* Store the interpolated gain or the final target gain - * depending if the fade is done. - */ - if(DstBufferSize < Counter) - parms->Hrtf.Old.Gain = gain; - else - parms->Hrtf.Old.Gain = parms->Hrtf.Target.Gain; - } - } - } - - for(send = 0;send < Device->NumAuxSends;send++) - { - SendParams *parms = &voice->Send[send].Params[chan]; - const ALfloat *samples; - - if(!voice->Send[send].Buffer) - continue; - - samples = DoFilters( - &parms->LowPass, &parms->HighPass, Device->TempBuffer[FILTERED_BUF], - ResampledData, DstBufferSize, voice->Send[send].FilterType - ); - - if(!Counter) - memcpy(parms->Gains.Current, parms->Gains.Target, - sizeof(parms->Gains.Current)); - MixSamples(samples, voice->Send[send].Channels, voice->Send[send].Buffer, - parms->Gains.Current, parms->Gains.Target, Counter, OutPos, DstBufferSize - ); - } - } - /* Update positions */ - DataPosFrac += increment*DstBufferSize; - DataPosInt += DataPosFrac>>FRACTIONBITS; - DataPosFrac &= FRACTIONMASK; - - OutPos += DstBufferSize; - voice->Offset += DstBufferSize; - Counter = maxi(DstBufferSize, Counter) - DstBufferSize; - firstpass = false; - - if(isstatic) - { - if(BufferLoopItem) - { - /* Handle looping static source */ - const ALbuffer *Buffer = BufferListItem->buffers[0]; - ALsizei LoopStart = Buffer->LoopStart; - ALsizei LoopEnd = Buffer->LoopEnd; - if(DataPosInt >= LoopEnd) - { - assert(LoopEnd > LoopStart); - DataPosInt = ((DataPosInt-LoopStart)%(LoopEnd-LoopStart)) + LoopStart; - } - } - else - { - /* Handle non-looping static source */ - ALsizei CompLen = 0; - ALsizei i; - - for(i = 0;i < BufferListItem->num_buffers;i++) - { - const ALbuffer *buffer = BufferListItem->buffers[i]; - if(buffer) CompLen = maxi(CompLen, buffer->SampleLen); - } - - if(DataPosInt >= CompLen) - { - isplaying = false; - BufferListItem = NULL; - DataPosInt = 0; - DataPosFrac = 0; - break; - } - } - } - else while(1) - { - /* Handle streaming source */ - ALsizei CompLen = 0; - ALsizei i; - - for(i = 0;i < BufferListItem->num_buffers;i++) - { - const ALbuffer *buffer = BufferListItem->buffers[i]; - if(buffer) CompLen = maxi(CompLen, buffer->SampleLen); - } - - if(CompLen > DataPosInt) - break; - - buffers_done += BufferListItem->num_buffers; - BufferListItem = ATOMIC_LOAD(&BufferListItem->next, almemory_order_acquire); - if(!BufferListItem && !(BufferListItem=BufferLoopItem)) - { - isplaying = false; - DataPosInt = 0; - DataPosFrac = 0; - break; - } - - DataPosInt -= CompLen; - } - } while(isplaying && OutPos < SamplesToDo); - - voice->Flags |= VOICE_IS_FADING; - - /* Update source info */ - ATOMIC_STORE(&voice->position, DataPosInt, almemory_order_relaxed); - ATOMIC_STORE(&voice->position_fraction, DataPosFrac, almemory_order_relaxed); - ATOMIC_STORE(&voice->current_buffer, BufferListItem, almemory_order_release); - - /* Send any events now, after the position/buffer info was updated. */ - enabledevt = ATOMIC_LOAD(&Context->EnabledEvts, almemory_order_acquire); - if(buffers_done > 0 && (enabledevt&EventType_BufferCompleted)) - SendAsyncEvent(Context, EventType_BufferCompleted, - AL_EVENT_TYPE_BUFFER_COMPLETED_SOFT, SourceID, buffers_done, "Buffer completed" - ); - - return isplaying; -} diff --git a/Alc/mixer/defs.h b/Alc/mixer/defs.h new file mode 100644 index 00000000..fe19cef4 --- /dev/null +++ b/Alc/mixer/defs.h @@ -0,0 +1,119 @@ +#ifndef MIXER_DEFS_H +#define MIXER_DEFS_H + +#include "AL/alc.h" +#include "AL/al.h" +#include "alMain.h" +#include "alu.h" + +struct MixGains; + +struct MixHrtfParams; +struct HrtfState; + +/* C resamplers */ +const ALfloat *Resample_copy_C(const InterpState *state, const ALfloat *restrict src, ALsizei frac, ALint increment, ALfloat *restrict dst, ALsizei dstlen); +const ALfloat *Resample_point_C(const InterpState *state, const ALfloat *restrict src, ALsizei frac, ALint increment, ALfloat *restrict dst, ALsizei dstlen); +const ALfloat *Resample_lerp_C(const InterpState *state, const ALfloat *restrict src, ALsizei frac, ALint increment, ALfloat *restrict dst, ALsizei dstlen); +const ALfloat *Resample_cubic_C(const InterpState *state, const ALfloat *restrict src, ALsizei frac, ALint increment, ALfloat *restrict dst, ALsizei dstlen); +const ALfloat *Resample_bsinc_C(const InterpState *state, const ALfloat *restrict src, ALsizei frac, ALint increment, ALfloat *restrict dst, ALsizei dstlen); + + +/* C mixers */ +void MixHrtf_C(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, + const ALfloat *data, ALsizei Offset, ALsizei OutPos, + const ALsizei IrSize, struct MixHrtfParams *hrtfparams, + struct HrtfState *hrtfstate, ALsizei BufferSize); +void MixHrtfBlend_C(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, + const ALfloat *data, ALsizei Offset, ALsizei OutPos, + const ALsizei IrSize, const HrtfParams *oldparams, + MixHrtfParams *newparams, HrtfState *hrtfstate, + ALsizei BufferSize); +void MixDirectHrtf_C(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, + const ALfloat *data, ALsizei Offset, const ALsizei IrSize, + const ALfloat (*restrict Coeffs)[2], ALfloat (*restrict Values)[2], + ALsizei BufferSize); +void Mix_C(const ALfloat *data, ALsizei OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE], + ALfloat *CurrentGains, const ALfloat *TargetGains, ALsizei Counter, ALsizei OutPos, + ALsizei BufferSize); +void MixRow_C(ALfloat *OutBuffer, const ALfloat *Gains, + const ALfloat (*restrict data)[BUFFERSIZE], ALsizei InChans, + ALsizei InPos, ALsizei BufferSize); + +/* SSE mixers */ +void MixHrtf_SSE(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, + const ALfloat *data, ALsizei Offset, ALsizei OutPos, + const ALsizei IrSize, struct MixHrtfParams *hrtfparams, + struct HrtfState *hrtfstate, ALsizei BufferSize); +void MixHrtfBlend_SSE(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, + const ALfloat *data, ALsizei Offset, ALsizei OutPos, + const ALsizei IrSize, const HrtfParams *oldparams, + MixHrtfParams *newparams, HrtfState *hrtfstate, + ALsizei BufferSize); +void MixDirectHrtf_SSE(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, + const ALfloat *data, ALsizei Offset, const ALsizei IrSize, + const ALfloat (*restrict Coeffs)[2], ALfloat (*restrict Values)[2], + ALsizei BufferSize); +void Mix_SSE(const ALfloat *data, ALsizei OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE], + ALfloat *CurrentGains, const ALfloat *TargetGains, ALsizei Counter, ALsizei OutPos, + ALsizei BufferSize); +void MixRow_SSE(ALfloat *OutBuffer, const ALfloat *Gains, + const ALfloat (*restrict data)[BUFFERSIZE], ALsizei InChans, + ALsizei InPos, ALsizei BufferSize); + +/* SSE resamplers */ +inline void InitiatePositionArrays(ALsizei frac, ALint increment, ALsizei *restrict frac_arr, ALint *restrict pos_arr, ALsizei size) +{ + ALsizei i; + + pos_arr[0] = 0; + frac_arr[0] = frac; + for(i = 1;i < size;i++) + { + ALint frac_tmp = frac_arr[i-1] + increment; + pos_arr[i] = pos_arr[i-1] + (frac_tmp>>FRACTIONBITS); + frac_arr[i] = frac_tmp&FRACTIONMASK; + } +} + +const ALfloat *Resample_lerp_SSE2(const InterpState *state, const ALfloat *restrict src, + ALsizei frac, ALint increment, ALfloat *restrict dst, + ALsizei numsamples); +const ALfloat *Resample_lerp_SSE41(const InterpState *state, const ALfloat *restrict src, + ALsizei frac, ALint increment, ALfloat *restrict dst, + ALsizei numsamples); + +const ALfloat *Resample_bsinc_SSE(const InterpState *state, const ALfloat *restrict src, + ALsizei frac, ALint increment, ALfloat *restrict dst, + ALsizei dstlen); + +/* Neon mixers */ +void MixHrtf_Neon(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, + const ALfloat *data, ALsizei Offset, ALsizei OutPos, + const ALsizei IrSize, struct MixHrtfParams *hrtfparams, + struct HrtfState *hrtfstate, ALsizei BufferSize); +void MixHrtfBlend_Neon(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, + const ALfloat *data, ALsizei Offset, ALsizei OutPos, + const ALsizei IrSize, const HrtfParams *oldparams, + MixHrtfParams *newparams, HrtfState *hrtfstate, + ALsizei BufferSize); +void MixDirectHrtf_Neon(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, + const ALfloat *data, ALsizei Offset, const ALsizei IrSize, + const ALfloat (*restrict Coeffs)[2], ALfloat (*restrict Values)[2], + ALsizei BufferSize); +void Mix_Neon(const ALfloat *data, ALsizei OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE], + ALfloat *CurrentGains, const ALfloat *TargetGains, ALsizei Counter, ALsizei OutPos, + ALsizei BufferSize); +void MixRow_Neon(ALfloat *OutBuffer, const ALfloat *Gains, + const ALfloat (*restrict data)[BUFFERSIZE], ALsizei InChans, + ALsizei InPos, ALsizei BufferSize); + +/* Neon resamplers */ +const ALfloat *Resample_lerp_Neon(const InterpState *state, const ALfloat *restrict src, + ALsizei frac, ALint increment, ALfloat *restrict dst, + ALsizei numsamples); +const ALfloat *Resample_bsinc_Neon(const InterpState *state, const ALfloat *restrict src, + ALsizei frac, ALint increment, ALfloat *restrict dst, + ALsizei dstlen); + +#endif /* MIXER_DEFS_H */ diff --git a/Alc/mixer/mixer_c.c b/Alc/mixer/mixer_c.c new file mode 100644 index 00000000..0c33e9b0 --- /dev/null +++ b/Alc/mixer/mixer_c.c @@ -0,0 +1,209 @@ +#include "config.h" + +#include + +#include "alMain.h" +#include "alu.h" +#include "alSource.h" +#include "alAuxEffectSlot.h" +#include "defs.h" + + +static inline ALfloat do_point(const ALfloat *restrict vals, ALsizei UNUSED(frac)) +{ return vals[0]; } +static inline ALfloat do_lerp(const ALfloat *restrict vals, ALsizei frac) +{ return lerp(vals[0], vals[1], frac * (1.0f/FRACTIONONE)); } +static inline ALfloat do_cubic(const ALfloat *restrict vals, ALsizei frac) +{ return cubic(vals[0], vals[1], vals[2], vals[3], frac * (1.0f/FRACTIONONE)); } + +const ALfloat *Resample_copy_C(const InterpState* UNUSED(state), + const ALfloat *restrict src, ALsizei UNUSED(frac), ALint UNUSED(increment), + ALfloat *restrict dst, ALsizei numsamples) +{ +#if defined(HAVE_SSE) || defined(HAVE_NEON) + /* Avoid copying the source data if it's aligned like the destination. */ + if((((intptr_t)src)&15) == (((intptr_t)dst)&15)) + return src; +#endif + memcpy(dst, src, numsamples*sizeof(ALfloat)); + return dst; +} + +#define DECL_TEMPLATE(Tag, Sampler, O) \ +const ALfloat *Resample_##Tag##_C(const InterpState* UNUSED(state), \ + const ALfloat *restrict src, ALsizei frac, ALint increment, \ + ALfloat *restrict dst, ALsizei numsamples) \ +{ \ + ALsizei i; \ + \ + src -= O; \ + for(i = 0;i < numsamples;i++) \ + { \ + dst[i] = Sampler(src, frac); \ + \ + frac += increment; \ + src += frac>>FRACTIONBITS; \ + frac &= FRACTIONMASK; \ + } \ + return dst; \ +} + +DECL_TEMPLATE(point, do_point, 0) +DECL_TEMPLATE(lerp, do_lerp, 0) +DECL_TEMPLATE(cubic, do_cubic, 1) + +#undef DECL_TEMPLATE + +const ALfloat *Resample_bsinc_C(const InterpState *state, const ALfloat *restrict src, + ALsizei frac, ALint increment, ALfloat *restrict dst, + ALsizei dstlen) +{ + const ALfloat *fil, *scd, *phd, *spd; + const ALfloat *const filter = state->bsinc.filter; + const ALfloat sf = state->bsinc.sf; + const ALsizei m = state->bsinc.m; + ALsizei j_f, pi, i; + ALfloat pf, r; + + src += state->bsinc.l; + for(i = 0;i < dstlen;i++) + { + // Calculate the phase index and factor. +#define FRAC_PHASE_BITDIFF (FRACTIONBITS-BSINC_PHASE_BITS) + pi = frac >> FRAC_PHASE_BITDIFF; + pf = (frac & ((1<>FRACTIONBITS; + frac &= FRACTIONMASK; + } + return dst; +} + + +void ALfilterState_processC(ALfilterState *filter, ALfloat *restrict dst, const ALfloat *restrict src, ALsizei numsamples) +{ + ALsizei i; + if(LIKELY(numsamples > 1)) + { + ALfloat x0 = filter->x[0]; + ALfloat x1 = filter->x[1]; + ALfloat y0 = filter->y[0]; + ALfloat y1 = filter->y[1]; + + for(i = 0;i < numsamples;i++) + { + dst[i] = filter->b0* src[i] + + filter->b1*x0 + filter->b2*x1 - + filter->a1*y0 - filter->a2*y1; + y1 = y0; y0 = dst[i]; + x1 = x0; x0 = src[i]; + } + + filter->x[0] = x0; + filter->x[1] = x1; + filter->y[0] = y0; + filter->y[1] = y1; + } + else if(numsamples == 1) + { + dst[0] = filter->b0 * src[0] + + filter->b1 * filter->x[0] + + filter->b2 * filter->x[1] - + filter->a1 * filter->y[0] - + filter->a2 * filter->y[1]; + filter->x[1] = filter->x[0]; + filter->x[0] = src[0]; + filter->y[1] = filter->y[0]; + filter->y[0] = dst[0]; + } +} + + +static inline void ApplyCoeffs(ALsizei Offset, ALfloat (*restrict Values)[2], + const ALsizei IrSize, + const ALfloat (*restrict Coeffs)[2], + ALfloat left, ALfloat right) +{ + ALsizei c; + for(c = 0;c < IrSize;c++) + { + const ALsizei off = (Offset+c)&HRIR_MASK; + Values[off][0] += Coeffs[c][0] * left; + Values[off][1] += Coeffs[c][1] * right; + } +} + +#define MixHrtf MixHrtf_C +#define MixHrtfBlend MixHrtfBlend_C +#define MixDirectHrtf MixDirectHrtf_C +#include "mixer_inc.c" +#undef MixHrtf + + +void Mix_C(const ALfloat *data, ALsizei OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE], + ALfloat *CurrentGains, const ALfloat *TargetGains, ALsizei Counter, ALsizei OutPos, + ALsizei BufferSize) +{ + ALfloat gain, delta, step; + ALsizei c; + + delta = (Counter > 0) ? 1.0f/(ALfloat)Counter : 0.0f; + + for(c = 0;c < OutChans;c++) + { + ALsizei pos = 0; + gain = CurrentGains[c]; + step = (TargetGains[c] - gain) * delta; + if(fabsf(step) > FLT_EPSILON) + { + ALsizei minsize = mini(BufferSize, Counter); + for(;pos < minsize;pos++) + { + OutBuffer[c][OutPos+pos] += data[pos]*gain; + gain += step; + } + if(pos == Counter) + gain = TargetGains[c]; + CurrentGains[c] = gain; + } + + if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD)) + continue; + for(;pos < BufferSize;pos++) + OutBuffer[c][OutPos+pos] += data[pos]*gain; + } +} + +/* Basically the inverse of the above. Rather than one input going to multiple + * outputs (each with its own gain), it's multiple inputs (each with its own + * gain) going to one output. This applies one row (vs one column) of a matrix + * transform. And as the matrices are more or less static once set up, no + * stepping is necessary. + */ +void MixRow_C(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*restrict data)[BUFFERSIZE], ALsizei InChans, ALsizei InPos, ALsizei BufferSize) +{ + ALsizei c, i; + + for(c = 0;c < InChans;c++) + { + ALfloat gain = Gains[c]; + if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD)) + continue; + + for(i = 0;i < BufferSize;i++) + OutBuffer[i] += data[c][InPos+i] * gain; + } +} diff --git a/Alc/mixer/mixer_inc.c b/Alc/mixer/mixer_inc.c new file mode 100644 index 00000000..ad0daa63 --- /dev/null +++ b/Alc/mixer/mixer_inc.c @@ -0,0 +1,114 @@ +#include "config.h" + +#include "alMain.h" +#include "alSource.h" + +#include "hrtf.h" +#include "align.h" +#include "alu.h" +#include "defs.h" + + +static inline void ApplyCoeffs(ALsizei Offset, ALfloat (*restrict Values)[2], + const ALsizei irSize, + const ALfloat (*restrict Coeffs)[2], + ALfloat left, ALfloat right); + + +void MixHrtf(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, + const ALfloat *data, ALsizei Offset, ALsizei OutPos, + const ALsizei IrSize, MixHrtfParams *hrtfparams, HrtfState *hrtfstate, + ALsizei BufferSize) +{ + const ALfloat (*Coeffs)[2] = ASSUME_ALIGNED(hrtfparams->Coeffs, 16); + const ALsizei Delay[2] = { hrtfparams->Delay[0], hrtfparams->Delay[1] }; + ALfloat gainstep = hrtfparams->GainStep; + ALfloat gain = hrtfparams->Gain; + ALfloat left, right; + ALsizei i; + + LeftOut += OutPos; + RightOut += OutPos; + for(i = 0;i < BufferSize;i++) + { + hrtfstate->History[Offset&HRTF_HISTORY_MASK] = *(data++); + left = hrtfstate->History[(Offset-Delay[0])&HRTF_HISTORY_MASK]*gain; + right = hrtfstate->History[(Offset-Delay[1])&HRTF_HISTORY_MASK]*gain; + + hrtfstate->Values[(Offset+IrSize-1)&HRIR_MASK][0] = 0.0f; + hrtfstate->Values[(Offset+IrSize-1)&HRIR_MASK][1] = 0.0f; + + ApplyCoeffs(Offset, hrtfstate->Values, IrSize, Coeffs, left, right); + *(LeftOut++) += hrtfstate->Values[Offset&HRIR_MASK][0]; + *(RightOut++) += hrtfstate->Values[Offset&HRIR_MASK][1]; + + gain += gainstep; + Offset++; + } + hrtfparams->Gain = gain; +} + +void MixHrtfBlend(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, + const ALfloat *data, ALsizei Offset, ALsizei OutPos, + const ALsizei IrSize, const HrtfParams *oldparams, + MixHrtfParams *newparams, HrtfState *hrtfstate, + ALsizei BufferSize) +{ + const ALfloat (*OldCoeffs)[2] = ASSUME_ALIGNED(oldparams->Coeffs, 16); + const ALsizei OldDelay[2] = { oldparams->Delay[0], oldparams->Delay[1] }; + ALfloat oldGain = oldparams->Gain; + ALfloat oldGainStep = -oldGain / (ALfloat)BufferSize; + const ALfloat (*NewCoeffs)[2] = ASSUME_ALIGNED(newparams->Coeffs, 16); + const ALsizei NewDelay[2] = { newparams->Delay[0], newparams->Delay[1] }; + ALfloat newGain = newparams->Gain; + ALfloat newGainStep = newparams->GainStep; + ALfloat left, right; + ALsizei i; + + LeftOut += OutPos; + RightOut += OutPos; + for(i = 0;i < BufferSize;i++) + { + hrtfstate->Values[(Offset+IrSize-1)&HRIR_MASK][0] = 0.0f; + hrtfstate->Values[(Offset+IrSize-1)&HRIR_MASK][1] = 0.0f; + + hrtfstate->History[Offset&HRTF_HISTORY_MASK] = *(data++); + + left = hrtfstate->History[(Offset-OldDelay[0])&HRTF_HISTORY_MASK]*oldGain; + right = hrtfstate->History[(Offset-OldDelay[1])&HRTF_HISTORY_MASK]*oldGain; + ApplyCoeffs(Offset, hrtfstate->Values, IrSize, OldCoeffs, left, right); + + left = hrtfstate->History[(Offset-NewDelay[0])&HRTF_HISTORY_MASK]*newGain; + right = hrtfstate->History[(Offset-NewDelay[1])&HRTF_HISTORY_MASK]*newGain; + ApplyCoeffs(Offset, hrtfstate->Values, IrSize, NewCoeffs, left, right); + + *(LeftOut++) += hrtfstate->Values[Offset&HRIR_MASK][0]; + *(RightOut++) += hrtfstate->Values[Offset&HRIR_MASK][1]; + + oldGain += oldGainStep; + newGain += newGainStep; + Offset++; + } + newparams->Gain = newGain; +} + +void MixDirectHrtf(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, + const ALfloat *data, ALsizei Offset, const ALsizei IrSize, + const ALfloat (*restrict Coeffs)[2], ALfloat (*restrict Values)[2], + ALsizei BufferSize) +{ + ALfloat insample; + ALsizei i; + + for(i = 0;i < BufferSize;i++) + { + Values[(Offset+IrSize)&HRIR_MASK][0] = 0.0f; + Values[(Offset+IrSize)&HRIR_MASK][1] = 0.0f; + Offset++; + + insample = *(data++); + ApplyCoeffs(Offset, Values, IrSize, Coeffs, insample, insample); + *(LeftOut++) += Values[Offset&HRIR_MASK][0]; + *(RightOut++) += Values[Offset&HRIR_MASK][1]; + } +} diff --git a/Alc/mixer/mixer_neon.c b/Alc/mixer/mixer_neon.c new file mode 100644 index 00000000..b93d11fd --- /dev/null +++ b/Alc/mixer/mixer_neon.c @@ -0,0 +1,261 @@ +#include "config.h" + +#include + +#include "AL/al.h" +#include "AL/alc.h" +#include "alMain.h" +#include "alu.h" +#include "hrtf.h" +#include "defs.h" + + +const ALfloat *Resample_lerp_Neon(const InterpState* UNUSED(state), + const ALfloat *restrict src, ALsizei frac, ALint increment, + ALfloat *restrict dst, ALsizei numsamples) +{ + const int32x4_t increment4 = vdupq_n_s32(increment*4); + const float32x4_t fracOne4 = vdupq_n_f32(1.0f/FRACTIONONE); + const int32x4_t fracMask4 = vdupq_n_s32(FRACTIONMASK); + alignas(16) ALint pos_[4]; + alignas(16) ALsizei frac_[4]; + int32x4_t pos4; + int32x4_t frac4; + ALsizei i; + + InitiatePositionArrays(frac, increment, frac_, pos_, 4); + + frac4 = vld1q_s32(frac_); + pos4 = vld1q_s32(pos_); + + for(i = 0;numsamples-i > 3;i += 4) + { + const float32x4_t val1 = (float32x4_t){src[pos_[0]], src[pos_[1]], src[pos_[2]], src[pos_[3]]}; + const float32x4_t val2 = (float32x4_t){src[pos_[0]+1], src[pos_[1]+1], src[pos_[2]+1], src[pos_[3]+1]}; + + /* val1 + (val2-val1)*mu */ + const float32x4_t r0 = vsubq_f32(val2, val1); + const float32x4_t mu = vmulq_f32(vcvtq_f32_s32(frac4), fracOne4); + const float32x4_t out = vmlaq_f32(val1, mu, r0); + + vst1q_f32(&dst[i], out); + + frac4 = vaddq_s32(frac4, increment4); + pos4 = vaddq_s32(pos4, vshrq_n_s32(frac4, FRACTIONBITS)); + frac4 = vandq_s32(frac4, fracMask4); + + vst1q_s32(pos_, pos4); + } + + if(i < numsamples) + { + /* NOTE: These four elements represent the position *after* the last + * four samples, so the lowest element is the next position to + * resample. + */ + ALint pos = pos_[0]; + frac = vgetq_lane_s32(frac4, 0); + do { + dst[i] = lerp(src[pos], src[pos+1], frac * (1.0f/FRACTIONONE)); + + frac += increment; + pos += frac>>FRACTIONBITS; + frac &= FRACTIONMASK; + } while(++i < numsamples); + } + return dst; +} + +const ALfloat *Resample_bsinc_Neon(const InterpState *state, + const ALfloat *restrict src, ALsizei frac, ALint increment, + ALfloat *restrict dst, ALsizei dstlen) +{ + const ALfloat *const filter = state->bsinc.filter; + const float32x4_t sf4 = vdupq_n_f32(state->bsinc.sf); + const ALsizei m = state->bsinc.m; + const float32x4_t *fil, *scd, *phd, *spd; + ALsizei pi, i, j, offset; + float32x4_t r4; + ALfloat pf; + + src += state->bsinc.l; + for(i = 0;i < dstlen;i++) + { + // Calculate the phase index and factor. +#define FRAC_PHASE_BITDIFF (FRACTIONBITS-BSINC_PHASE_BITS) + pi = frac >> FRAC_PHASE_BITDIFF; + pf = (frac & ((1<>FRACTIONBITS; + frac &= FRACTIONMASK; + } + return dst; +} + + +static inline void ApplyCoeffs(ALsizei Offset, ALfloat (*restrict Values)[2], + const ALsizei IrSize, + const ALfloat (*restrict Coeffs)[2], + ALfloat left, ALfloat right) +{ + ALsizei c; + float32x4_t leftright4; + { + float32x2_t leftright2 = vdup_n_f32(0.0); + leftright2 = vset_lane_f32(left, leftright2, 0); + leftright2 = vset_lane_f32(right, leftright2, 1); + leftright4 = vcombine_f32(leftright2, leftright2); + } + Values = ASSUME_ALIGNED(Values, 16); + Coeffs = ASSUME_ALIGNED(Coeffs, 16); + for(c = 0;c < IrSize;c += 2) + { + const ALsizei o0 = (Offset+c)&HRIR_MASK; + const ALsizei o1 = (o0+1)&HRIR_MASK; + float32x4_t vals = vcombine_f32(vld1_f32((float32_t*)&Values[o0][0]), + vld1_f32((float32_t*)&Values[o1][0])); + float32x4_t coefs = vld1q_f32((float32_t*)&Coeffs[c][0]); + + vals = vmlaq_f32(vals, coefs, leftright4); + + vst1_f32((float32_t*)&Values[o0][0], vget_low_f32(vals)); + vst1_f32((float32_t*)&Values[o1][0], vget_high_f32(vals)); + } +} + +#define MixHrtf MixHrtf_Neon +#define MixHrtfBlend MixHrtfBlend_Neon +#define MixDirectHrtf MixDirectHrtf_Neon +#include "mixer_inc.c" +#undef MixHrtf + + +void Mix_Neon(const ALfloat *data, ALsizei OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE], + ALfloat *CurrentGains, const ALfloat *TargetGains, ALsizei Counter, ALsizei OutPos, + ALsizei BufferSize) +{ + ALfloat gain, delta, step; + float32x4_t gain4; + ALsizei c; + + data = ASSUME_ALIGNED(data, 16); + OutBuffer = ASSUME_ALIGNED(OutBuffer, 16); + + delta = (Counter > 0) ? 1.0f/(ALfloat)Counter : 0.0f; + + for(c = 0;c < OutChans;c++) + { + ALsizei pos = 0; + gain = CurrentGains[c]; + step = (TargetGains[c] - gain) * delta; + if(fabsf(step) > FLT_EPSILON) + { + ALsizei minsize = mini(BufferSize, Counter); + /* Mix with applying gain steps in aligned multiples of 4. */ + if(minsize-pos > 3) + { + float32x4_t step4; + gain4 = vsetq_lane_f32(gain, gain4, 0); + gain4 = vsetq_lane_f32(gain + step, gain4, 1); + gain4 = vsetq_lane_f32(gain + step + step, gain4, 2); + gain4 = vsetq_lane_f32(gain + step + step + step, gain4, 3); + step4 = vdupq_n_f32(step + step + step + step); + do { + const float32x4_t val4 = vld1q_f32(&data[pos]); + float32x4_t dry4 = vld1q_f32(&OutBuffer[c][OutPos+pos]); + dry4 = vmlaq_f32(dry4, val4, gain4); + gain4 = vaddq_f32(gain4, step4); + vst1q_f32(&OutBuffer[c][OutPos+pos], dry4); + pos += 4; + } while(minsize-pos > 3); + /* NOTE: gain4 now represents the next four gains after the + * last four mixed samples, so the lowest element represents + * the next gain to apply. + */ + gain = vgetq_lane_f32(gain4, 0); + } + /* Mix with applying left over gain steps that aren't aligned multiples of 4. */ + for(;pos < minsize;pos++) + { + OutBuffer[c][OutPos+pos] += data[pos]*gain; + gain += step; + } + if(pos == Counter) + gain = TargetGains[c]; + CurrentGains[c] = gain; + + /* Mix until pos is aligned with 4 or the mix is done. */ + minsize = mini(BufferSize, (pos+3)&~3); + for(;pos < minsize;pos++) + OutBuffer[c][OutPos+pos] += data[pos]*gain; + } + + if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD)) + continue; + gain4 = vdupq_n_f32(gain); + for(;BufferSize-pos > 3;pos += 4) + { + const float32x4_t val4 = vld1q_f32(&data[pos]); + float32x4_t dry4 = vld1q_f32(&OutBuffer[c][OutPos+pos]); + dry4 = vmlaq_f32(dry4, val4, gain4); + vst1q_f32(&OutBuffer[c][OutPos+pos], dry4); + } + for(;pos < BufferSize;pos++) + OutBuffer[c][OutPos+pos] += data[pos]*gain; + } +} + +void MixRow_Neon(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*restrict data)[BUFFERSIZE], ALsizei InChans, ALsizei InPos, ALsizei BufferSize) +{ + float32x4_t gain4; + ALsizei c; + + data = ASSUME_ALIGNED(data, 16); + OutBuffer = ASSUME_ALIGNED(OutBuffer, 16); + + for(c = 0;c < InChans;c++) + { + ALsizei pos = 0; + ALfloat gain = Gains[c]; + if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD)) + continue; + + gain4 = vdupq_n_f32(gain); + for(;BufferSize-pos > 3;pos += 4) + { + const float32x4_t val4 = vld1q_f32(&data[c][InPos+pos]); + float32x4_t dry4 = vld1q_f32(&OutBuffer[pos]); + dry4 = vmlaq_f32(dry4, val4, gain4); + vst1q_f32(&OutBuffer[pos], dry4); + } + for(;pos < BufferSize;pos++) + OutBuffer[pos] += data[c][InPos+pos]*gain; + } +} diff --git a/Alc/mixer/mixer_sse.c b/Alc/mixer/mixer_sse.c new file mode 100644 index 00000000..288661b2 --- /dev/null +++ b/Alc/mixer/mixer_sse.c @@ -0,0 +1,229 @@ +#include "config.h" + +#include + +#include "AL/al.h" +#include "AL/alc.h" +#include "alMain.h" +#include "alu.h" + +#include "alSource.h" +#include "alAuxEffectSlot.h" +#include "defs.h" + + +const ALfloat *Resample_bsinc_SSE(const InterpState *state, const ALfloat *restrict src, + ALsizei frac, ALint increment, ALfloat *restrict dst, + ALsizei dstlen) +{ + const ALfloat *const filter = state->bsinc.filter; + const __m128 sf4 = _mm_set1_ps(state->bsinc.sf); + const ALsizei m = state->bsinc.m; + const __m128 *fil, *scd, *phd, *spd; + ALsizei pi, i, j, offset; + ALfloat pf; + __m128 r4; + + src += state->bsinc.l; + for(i = 0;i < dstlen;i++) + { + // Calculate the phase index and factor. +#define FRAC_PHASE_BITDIFF (FRACTIONBITS-BSINC_PHASE_BITS) + pi = frac >> FRAC_PHASE_BITDIFF; + pf = (frac & ((1<>FRACTIONBITS; + frac &= FRACTIONMASK; + } + return dst; +} + + +static inline void ApplyCoeffs(ALsizei Offset, ALfloat (*restrict Values)[2], + const ALsizei IrSize, + const ALfloat (*restrict Coeffs)[2], + ALfloat left, ALfloat right) +{ + const __m128 lrlr = _mm_setr_ps(left, right, left, right); + __m128 vals = _mm_setzero_ps(); + __m128 coeffs; + ALsizei i; + + Values = ASSUME_ALIGNED(Values, 16); + Coeffs = ASSUME_ALIGNED(Coeffs, 16); + if((Offset&1)) + { + const ALsizei o0 = Offset&HRIR_MASK; + const ALsizei o1 = (Offset+IrSize-1)&HRIR_MASK; + __m128 imp0, imp1; + + coeffs = _mm_load_ps(&Coeffs[0][0]); + vals = _mm_loadl_pi(vals, (__m64*)&Values[o0][0]); + imp0 = _mm_mul_ps(lrlr, coeffs); + vals = _mm_add_ps(imp0, vals); + _mm_storel_pi((__m64*)&Values[o0][0], vals); + for(i = 1;i < IrSize-1;i += 2) + { + const ALsizei o2 = (Offset+i)&HRIR_MASK; + + coeffs = _mm_load_ps(&Coeffs[i+1][0]); + vals = _mm_load_ps(&Values[o2][0]); + imp1 = _mm_mul_ps(lrlr, coeffs); + imp0 = _mm_shuffle_ps(imp0, imp1, _MM_SHUFFLE(1, 0, 3, 2)); + vals = _mm_add_ps(imp0, vals); + _mm_store_ps(&Values[o2][0], vals); + imp0 = imp1; + } + vals = _mm_loadl_pi(vals, (__m64*)&Values[o1][0]); + imp0 = _mm_movehl_ps(imp0, imp0); + vals = _mm_add_ps(imp0, vals); + _mm_storel_pi((__m64*)&Values[o1][0], vals); + } + else + { + for(i = 0;i < IrSize;i += 2) + { + const ALsizei o = (Offset + i)&HRIR_MASK; + + coeffs = _mm_load_ps(&Coeffs[i][0]); + vals = _mm_load_ps(&Values[o][0]); + vals = _mm_add_ps(vals, _mm_mul_ps(lrlr, coeffs)); + _mm_store_ps(&Values[o][0], vals); + } + } +} + +#define MixHrtf MixHrtf_SSE +#define MixHrtfBlend MixHrtfBlend_SSE +#define MixDirectHrtf MixDirectHrtf_SSE +#include "mixer_inc.c" +#undef MixHrtf + + +void Mix_SSE(const ALfloat *data, ALsizei OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE], + ALfloat *CurrentGains, const ALfloat *TargetGains, ALsizei Counter, ALsizei OutPos, + ALsizei BufferSize) +{ + ALfloat gain, delta, step; + __m128 gain4; + ALsizei c; + + delta = (Counter > 0) ? 1.0f/(ALfloat)Counter : 0.0f; + + for(c = 0;c < OutChans;c++) + { + ALsizei pos = 0; + gain = CurrentGains[c]; + step = (TargetGains[c] - gain) * delta; + if(fabsf(step) > FLT_EPSILON) + { + ALsizei minsize = mini(BufferSize, Counter); + /* Mix with applying gain steps in aligned multiples of 4. */ + if(minsize-pos > 3) + { + __m128 step4; + gain4 = _mm_setr_ps( + gain, + gain + step, + gain + step + step, + gain + step + step + step + ); + step4 = _mm_set1_ps(step + step + step + step); + do { + const __m128 val4 = _mm_load_ps(&data[pos]); + __m128 dry4 = _mm_load_ps(&OutBuffer[c][OutPos+pos]); + dry4 = _mm_add_ps(dry4, _mm_mul_ps(val4, gain4)); + gain4 = _mm_add_ps(gain4, step4); + _mm_store_ps(&OutBuffer[c][OutPos+pos], dry4); + pos += 4; + } while(minsize-pos > 3); + /* NOTE: gain4 now represents the next four gains after the + * last four mixed samples, so the lowest element represents + * the next gain to apply. + */ + gain = _mm_cvtss_f32(gain4); + } + /* Mix with applying left over gain steps that aren't aligned multiples of 4. */ + for(;pos < minsize;pos++) + { + OutBuffer[c][OutPos+pos] += data[pos]*gain; + gain += step; + } + if(pos == Counter) + gain = TargetGains[c]; + CurrentGains[c] = gain; + + /* Mix until pos is aligned with 4 or the mix is done. */ + minsize = mini(BufferSize, (pos+3)&~3); + for(;pos < minsize;pos++) + OutBuffer[c][OutPos+pos] += data[pos]*gain; + } + + if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD)) + continue; + gain4 = _mm_set1_ps(gain); + for(;BufferSize-pos > 3;pos += 4) + { + const __m128 val4 = _mm_load_ps(&data[pos]); + __m128 dry4 = _mm_load_ps(&OutBuffer[c][OutPos+pos]); + dry4 = _mm_add_ps(dry4, _mm_mul_ps(val4, gain4)); + _mm_store_ps(&OutBuffer[c][OutPos+pos], dry4); + } + for(;pos < BufferSize;pos++) + OutBuffer[c][OutPos+pos] += data[pos]*gain; + } +} + +void MixRow_SSE(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*restrict data)[BUFFERSIZE], ALsizei InChans, ALsizei InPos, ALsizei BufferSize) +{ + __m128 gain4; + ALsizei c; + + for(c = 0;c < InChans;c++) + { + ALsizei pos = 0; + ALfloat gain = Gains[c]; + if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD)) + continue; + + gain4 = _mm_set1_ps(gain); + for(;BufferSize-pos > 3;pos += 4) + { + const __m128 val4 = _mm_load_ps(&data[c][InPos+pos]); + __m128 dry4 = _mm_load_ps(&OutBuffer[pos]); + dry4 = _mm_add_ps(dry4, _mm_mul_ps(val4, gain4)); + _mm_store_ps(&OutBuffer[pos], dry4); + } + for(;pos < BufferSize;pos++) + OutBuffer[pos] += data[c][InPos+pos]*gain; + } +} diff --git a/Alc/mixer/mixer_sse2.c b/Alc/mixer/mixer_sse2.c new file mode 100644 index 00000000..19d07719 --- /dev/null +++ b/Alc/mixer/mixer_sse2.c @@ -0,0 +1,82 @@ +/** + * OpenAL cross platform audio library + * Copyright (C) 2014 by Timothy Arceri . + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * Or go to http://www.gnu.org/copyleft/lgpl.html + */ + +#include "config.h" + +#include +#include + +#include "alu.h" +#include "defs.h" + + +const ALfloat *Resample_lerp_SSE2(const InterpState* UNUSED(state), + const ALfloat *restrict src, ALsizei frac, ALint increment, + ALfloat *restrict dst, ALsizei numsamples) +{ + const __m128i increment4 = _mm_set1_epi32(increment*4); + const __m128 fracOne4 = _mm_set1_ps(1.0f/FRACTIONONE); + const __m128i fracMask4 = _mm_set1_epi32(FRACTIONMASK); + union { alignas(16) ALint i[4]; float f[4]; } pos_; + union { alignas(16) ALsizei i[4]; float f[4]; } frac_; + __m128i frac4, pos4; + ALint pos; + ALsizei i; + + InitiatePositionArrays(frac, increment, frac_.i, pos_.i, 4); + + frac4 = _mm_castps_si128(_mm_load_ps(frac_.f)); + pos4 = _mm_castps_si128(_mm_load_ps(pos_.f)); + + for(i = 0;numsamples-i > 3;i += 4) + { + const __m128 val1 = _mm_setr_ps(src[pos_.i[0]], src[pos_.i[1]], src[pos_.i[2]], src[pos_.i[3]]); + const __m128 val2 = _mm_setr_ps(src[pos_.i[0]+1], src[pos_.i[1]+1], src[pos_.i[2]+1], src[pos_.i[3]+1]); + + /* val1 + (val2-val1)*mu */ + const __m128 r0 = _mm_sub_ps(val2, val1); + const __m128 mu = _mm_mul_ps(_mm_cvtepi32_ps(frac4), fracOne4); + const __m128 out = _mm_add_ps(val1, _mm_mul_ps(mu, r0)); + + _mm_store_ps(&dst[i], out); + + frac4 = _mm_add_epi32(frac4, increment4); + pos4 = _mm_add_epi32(pos4, _mm_srli_epi32(frac4, FRACTIONBITS)); + frac4 = _mm_and_si128(frac4, fracMask4); + + _mm_store_ps(pos_.f, _mm_castsi128_ps(pos4)); + } + + /* NOTE: These four elements represent the position *after* the last four + * samples, so the lowest element is the next position to resample. + */ + pos = pos_.i[0]; + frac = _mm_cvtsi128_si32(frac4); + + for(;i < numsamples;i++) + { + dst[i] = lerp(src[pos], src[pos+1], frac * (1.0f/FRACTIONONE)); + + frac += increment; + pos += frac>>FRACTIONBITS; + frac &= FRACTIONMASK; + } + return dst; +} diff --git a/Alc/mixer/mixer_sse3.c b/Alc/mixer/mixer_sse3.c new file mode 100644 index 00000000..e69de29b diff --git a/Alc/mixer/mixer_sse41.c b/Alc/mixer/mixer_sse41.c new file mode 100644 index 00000000..85fd0f5e --- /dev/null +++ b/Alc/mixer/mixer_sse41.c @@ -0,0 +1,86 @@ +/** + * OpenAL cross platform audio library + * Copyright (C) 2014 by Timothy Arceri . + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * Or go to http://www.gnu.org/copyleft/lgpl.html + */ + +#include "config.h" + +#include +#include +#include + +#include "alu.h" +#include "defs.h" + + +const ALfloat *Resample_lerp_SSE41(const InterpState* UNUSED(state), + const ALfloat *restrict src, ALsizei frac, ALint increment, + ALfloat *restrict dst, ALsizei numsamples) +{ + const __m128i increment4 = _mm_set1_epi32(increment*4); + const __m128 fracOne4 = _mm_set1_ps(1.0f/FRACTIONONE); + const __m128i fracMask4 = _mm_set1_epi32(FRACTIONMASK); + union { alignas(16) ALint i[4]; float f[4]; } pos_; + union { alignas(16) ALsizei i[4]; float f[4]; } frac_; + __m128i frac4, pos4; + ALint pos; + ALsizei i; + + InitiatePositionArrays(frac, increment, frac_.i, pos_.i, 4); + + frac4 = _mm_castps_si128(_mm_load_ps(frac_.f)); + pos4 = _mm_castps_si128(_mm_load_ps(pos_.f)); + + for(i = 0;numsamples-i > 3;i += 4) + { + const __m128 val1 = _mm_setr_ps(src[pos_.i[0]], src[pos_.i[1]], src[pos_.i[2]], src[pos_.i[3]]); + const __m128 val2 = _mm_setr_ps(src[pos_.i[0]+1], src[pos_.i[1]+1], src[pos_.i[2]+1], src[pos_.i[3]+1]); + + /* val1 + (val2-val1)*mu */ + const __m128 r0 = _mm_sub_ps(val2, val1); + const __m128 mu = _mm_mul_ps(_mm_cvtepi32_ps(frac4), fracOne4); + const __m128 out = _mm_add_ps(val1, _mm_mul_ps(mu, r0)); + + _mm_store_ps(&dst[i], out); + + frac4 = _mm_add_epi32(frac4, increment4); + pos4 = _mm_add_epi32(pos4, _mm_srli_epi32(frac4, FRACTIONBITS)); + frac4 = _mm_and_si128(frac4, fracMask4); + + pos_.i[0] = _mm_extract_epi32(pos4, 0); + pos_.i[1] = _mm_extract_epi32(pos4, 1); + pos_.i[2] = _mm_extract_epi32(pos4, 2); + pos_.i[3] = _mm_extract_epi32(pos4, 3); + } + + /* NOTE: These four elements represent the position *after* the last four + * samples, so the lowest element is the next position to resample. + */ + pos = pos_.i[0]; + frac = _mm_cvtsi128_si32(frac4); + + for(;i < numsamples;i++) + { + dst[i] = lerp(src[pos], src[pos+1], frac * (1.0f/FRACTIONONE)); + + frac += increment; + pos += frac>>FRACTIONBITS; + frac &= FRACTIONMASK; + } + return dst; +} diff --git a/Alc/mixer_c.c b/Alc/mixer_c.c deleted file mode 100644 index 2346080a..00000000 --- a/Alc/mixer_c.c +++ /dev/null @@ -1,208 +0,0 @@ -#include "config.h" - -#include - -#include "alMain.h" -#include "alu.h" -#include "alSource.h" -#include "alAuxEffectSlot.h" - - -static inline ALfloat do_point(const ALfloat *restrict vals, ALsizei UNUSED(frac)) -{ return vals[0]; } -static inline ALfloat do_lerp(const ALfloat *restrict vals, ALsizei frac) -{ return lerp(vals[0], vals[1], frac * (1.0f/FRACTIONONE)); } -static inline ALfloat do_cubic(const ALfloat *restrict vals, ALsizei frac) -{ return cubic(vals[0], vals[1], vals[2], vals[3], frac * (1.0f/FRACTIONONE)); } - -const ALfloat *Resample_copy_C(const InterpState* UNUSED(state), - const ALfloat *restrict src, ALsizei UNUSED(frac), ALint UNUSED(increment), - ALfloat *restrict dst, ALsizei numsamples) -{ -#if defined(HAVE_SSE) || defined(HAVE_NEON) - /* Avoid copying the source data if it's aligned like the destination. */ - if((((intptr_t)src)&15) == (((intptr_t)dst)&15)) - return src; -#endif - memcpy(dst, src, numsamples*sizeof(ALfloat)); - return dst; -} - -#define DECL_TEMPLATE(Tag, Sampler, O) \ -const ALfloat *Resample_##Tag##_C(const InterpState* UNUSED(state), \ - const ALfloat *restrict src, ALsizei frac, ALint increment, \ - ALfloat *restrict dst, ALsizei numsamples) \ -{ \ - ALsizei i; \ - \ - src -= O; \ - for(i = 0;i < numsamples;i++) \ - { \ - dst[i] = Sampler(src, frac); \ - \ - frac += increment; \ - src += frac>>FRACTIONBITS; \ - frac &= FRACTIONMASK; \ - } \ - return dst; \ -} - -DECL_TEMPLATE(point, do_point, 0) -DECL_TEMPLATE(lerp, do_lerp, 0) -DECL_TEMPLATE(cubic, do_cubic, 1) - -#undef DECL_TEMPLATE - -const ALfloat *Resample_bsinc_C(const InterpState *state, const ALfloat *restrict src, - ALsizei frac, ALint increment, ALfloat *restrict dst, - ALsizei dstlen) -{ - const ALfloat *fil, *scd, *phd, *spd; - const ALfloat *const filter = state->bsinc.filter; - const ALfloat sf = state->bsinc.sf; - const ALsizei m = state->bsinc.m; - ALsizei j_f, pi, i; - ALfloat pf, r; - - src += state->bsinc.l; - for(i = 0;i < dstlen;i++) - { - // Calculate the phase index and factor. -#define FRAC_PHASE_BITDIFF (FRACTIONBITS-BSINC_PHASE_BITS) - pi = frac >> FRAC_PHASE_BITDIFF; - pf = (frac & ((1<>FRACTIONBITS; - frac &= FRACTIONMASK; - } - return dst; -} - - -void ALfilterState_processC(ALfilterState *filter, ALfloat *restrict dst, const ALfloat *restrict src, ALsizei numsamples) -{ - ALsizei i; - if(LIKELY(numsamples > 1)) - { - ALfloat x0 = filter->x[0]; - ALfloat x1 = filter->x[1]; - ALfloat y0 = filter->y[0]; - ALfloat y1 = filter->y[1]; - - for(i = 0;i < numsamples;i++) - { - dst[i] = filter->b0* src[i] + - filter->b1*x0 + filter->b2*x1 - - filter->a1*y0 - filter->a2*y1; - y1 = y0; y0 = dst[i]; - x1 = x0; x0 = src[i]; - } - - filter->x[0] = x0; - filter->x[1] = x1; - filter->y[0] = y0; - filter->y[1] = y1; - } - else if(numsamples == 1) - { - dst[0] = filter->b0 * src[0] + - filter->b1 * filter->x[0] + - filter->b2 * filter->x[1] - - filter->a1 * filter->y[0] - - filter->a2 * filter->y[1]; - filter->x[1] = filter->x[0]; - filter->x[0] = src[0]; - filter->y[1] = filter->y[0]; - filter->y[0] = dst[0]; - } -} - - -static inline void ApplyCoeffs(ALsizei Offset, ALfloat (*restrict Values)[2], - const ALsizei IrSize, - const ALfloat (*restrict Coeffs)[2], - ALfloat left, ALfloat right) -{ - ALsizei c; - for(c = 0;c < IrSize;c++) - { - const ALsizei off = (Offset+c)&HRIR_MASK; - Values[off][0] += Coeffs[c][0] * left; - Values[off][1] += Coeffs[c][1] * right; - } -} - -#define MixHrtf MixHrtf_C -#define MixHrtfBlend MixHrtfBlend_C -#define MixDirectHrtf MixDirectHrtf_C -#include "mixer_inc.c" -#undef MixHrtf - - -void Mix_C(const ALfloat *data, ALsizei OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE], - ALfloat *CurrentGains, const ALfloat *TargetGains, ALsizei Counter, ALsizei OutPos, - ALsizei BufferSize) -{ - ALfloat gain, delta, step; - ALsizei c; - - delta = (Counter > 0) ? 1.0f/(ALfloat)Counter : 0.0f; - - for(c = 0;c < OutChans;c++) - { - ALsizei pos = 0; - gain = CurrentGains[c]; - step = (TargetGains[c] - gain) * delta; - if(fabsf(step) > FLT_EPSILON) - { - ALsizei minsize = mini(BufferSize, Counter); - for(;pos < minsize;pos++) - { - OutBuffer[c][OutPos+pos] += data[pos]*gain; - gain += step; - } - if(pos == Counter) - gain = TargetGains[c]; - CurrentGains[c] = gain; - } - - if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD)) - continue; - for(;pos < BufferSize;pos++) - OutBuffer[c][OutPos+pos] += data[pos]*gain; - } -} - -/* Basically the inverse of the above. Rather than one input going to multiple - * outputs (each with its own gain), it's multiple inputs (each with its own - * gain) going to one output. This applies one row (vs one column) of a matrix - * transform. And as the matrices are more or less static once set up, no - * stepping is necessary. - */ -void MixRow_C(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*restrict data)[BUFFERSIZE], ALsizei InChans, ALsizei InPos, ALsizei BufferSize) -{ - ALsizei c, i; - - for(c = 0;c < InChans;c++) - { - ALfloat gain = Gains[c]; - if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD)) - continue; - - for(i = 0;i < BufferSize;i++) - OutBuffer[i] += data[c][InPos+i] * gain; - } -} diff --git a/Alc/mixer_defs.h b/Alc/mixer_defs.h deleted file mode 100644 index fe19cef4..00000000 --- a/Alc/mixer_defs.h +++ /dev/null @@ -1,119 +0,0 @@ -#ifndef MIXER_DEFS_H -#define MIXER_DEFS_H - -#include "AL/alc.h" -#include "AL/al.h" -#include "alMain.h" -#include "alu.h" - -struct MixGains; - -struct MixHrtfParams; -struct HrtfState; - -/* C resamplers */ -const ALfloat *Resample_copy_C(const InterpState *state, const ALfloat *restrict src, ALsizei frac, ALint increment, ALfloat *restrict dst, ALsizei dstlen); -const ALfloat *Resample_point_C(const InterpState *state, const ALfloat *restrict src, ALsizei frac, ALint increment, ALfloat *restrict dst, ALsizei dstlen); -const ALfloat *Resample_lerp_C(const InterpState *state, const ALfloat *restrict src, ALsizei frac, ALint increment, ALfloat *restrict dst, ALsizei dstlen); -const ALfloat *Resample_cubic_C(const InterpState *state, const ALfloat *restrict src, ALsizei frac, ALint increment, ALfloat *restrict dst, ALsizei dstlen); -const ALfloat *Resample_bsinc_C(const InterpState *state, const ALfloat *restrict src, ALsizei frac, ALint increment, ALfloat *restrict dst, ALsizei dstlen); - - -/* C mixers */ -void MixHrtf_C(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, - const ALfloat *data, ALsizei Offset, ALsizei OutPos, - const ALsizei IrSize, struct MixHrtfParams *hrtfparams, - struct HrtfState *hrtfstate, ALsizei BufferSize); -void MixHrtfBlend_C(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, - const ALfloat *data, ALsizei Offset, ALsizei OutPos, - const ALsizei IrSize, const HrtfParams *oldparams, - MixHrtfParams *newparams, HrtfState *hrtfstate, - ALsizei BufferSize); -void MixDirectHrtf_C(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, - const ALfloat *data, ALsizei Offset, const ALsizei IrSize, - const ALfloat (*restrict Coeffs)[2], ALfloat (*restrict Values)[2], - ALsizei BufferSize); -void Mix_C(const ALfloat *data, ALsizei OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE], - ALfloat *CurrentGains, const ALfloat *TargetGains, ALsizei Counter, ALsizei OutPos, - ALsizei BufferSize); -void MixRow_C(ALfloat *OutBuffer, const ALfloat *Gains, - const ALfloat (*restrict data)[BUFFERSIZE], ALsizei InChans, - ALsizei InPos, ALsizei BufferSize); - -/* SSE mixers */ -void MixHrtf_SSE(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, - const ALfloat *data, ALsizei Offset, ALsizei OutPos, - const ALsizei IrSize, struct MixHrtfParams *hrtfparams, - struct HrtfState *hrtfstate, ALsizei BufferSize); -void MixHrtfBlend_SSE(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, - const ALfloat *data, ALsizei Offset, ALsizei OutPos, - const ALsizei IrSize, const HrtfParams *oldparams, - MixHrtfParams *newparams, HrtfState *hrtfstate, - ALsizei BufferSize); -void MixDirectHrtf_SSE(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, - const ALfloat *data, ALsizei Offset, const ALsizei IrSize, - const ALfloat (*restrict Coeffs)[2], ALfloat (*restrict Values)[2], - ALsizei BufferSize); -void Mix_SSE(const ALfloat *data, ALsizei OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE], - ALfloat *CurrentGains, const ALfloat *TargetGains, ALsizei Counter, ALsizei OutPos, - ALsizei BufferSize); -void MixRow_SSE(ALfloat *OutBuffer, const ALfloat *Gains, - const ALfloat (*restrict data)[BUFFERSIZE], ALsizei InChans, - ALsizei InPos, ALsizei BufferSize); - -/* SSE resamplers */ -inline void InitiatePositionArrays(ALsizei frac, ALint increment, ALsizei *restrict frac_arr, ALint *restrict pos_arr, ALsizei size) -{ - ALsizei i; - - pos_arr[0] = 0; - frac_arr[0] = frac; - for(i = 1;i < size;i++) - { - ALint frac_tmp = frac_arr[i-1] + increment; - pos_arr[i] = pos_arr[i-1] + (frac_tmp>>FRACTIONBITS); - frac_arr[i] = frac_tmp&FRACTIONMASK; - } -} - -const ALfloat *Resample_lerp_SSE2(const InterpState *state, const ALfloat *restrict src, - ALsizei frac, ALint increment, ALfloat *restrict dst, - ALsizei numsamples); -const ALfloat *Resample_lerp_SSE41(const InterpState *state, const ALfloat *restrict src, - ALsizei frac, ALint increment, ALfloat *restrict dst, - ALsizei numsamples); - -const ALfloat *Resample_bsinc_SSE(const InterpState *state, const ALfloat *restrict src, - ALsizei frac, ALint increment, ALfloat *restrict dst, - ALsizei dstlen); - -/* Neon mixers */ -void MixHrtf_Neon(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, - const ALfloat *data, ALsizei Offset, ALsizei OutPos, - const ALsizei IrSize, struct MixHrtfParams *hrtfparams, - struct HrtfState *hrtfstate, ALsizei BufferSize); -void MixHrtfBlend_Neon(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, - const ALfloat *data, ALsizei Offset, ALsizei OutPos, - const ALsizei IrSize, const HrtfParams *oldparams, - MixHrtfParams *newparams, HrtfState *hrtfstate, - ALsizei BufferSize); -void MixDirectHrtf_Neon(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, - const ALfloat *data, ALsizei Offset, const ALsizei IrSize, - const ALfloat (*restrict Coeffs)[2], ALfloat (*restrict Values)[2], - ALsizei BufferSize); -void Mix_Neon(const ALfloat *data, ALsizei OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE], - ALfloat *CurrentGains, const ALfloat *TargetGains, ALsizei Counter, ALsizei OutPos, - ALsizei BufferSize); -void MixRow_Neon(ALfloat *OutBuffer, const ALfloat *Gains, - const ALfloat (*restrict data)[BUFFERSIZE], ALsizei InChans, - ALsizei InPos, ALsizei BufferSize); - -/* Neon resamplers */ -const ALfloat *Resample_lerp_Neon(const InterpState *state, const ALfloat *restrict src, - ALsizei frac, ALint increment, ALfloat *restrict dst, - ALsizei numsamples); -const ALfloat *Resample_bsinc_Neon(const InterpState *state, const ALfloat *restrict src, - ALsizei frac, ALint increment, ALfloat *restrict dst, - ALsizei dstlen); - -#endif /* MIXER_DEFS_H */ diff --git a/Alc/mixer_inc.c b/Alc/mixer_inc.c deleted file mode 100644 index 3c9d4dc5..00000000 --- a/Alc/mixer_inc.c +++ /dev/null @@ -1,114 +0,0 @@ -#include "config.h" - -#include "alMain.h" -#include "alSource.h" - -#include "hrtf.h" -#include "mixer_defs.h" -#include "align.h" -#include "alu.h" - - -static inline void ApplyCoeffs(ALsizei Offset, ALfloat (*restrict Values)[2], - const ALsizei irSize, - const ALfloat (*restrict Coeffs)[2], - ALfloat left, ALfloat right); - - -void MixHrtf(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, - const ALfloat *data, ALsizei Offset, ALsizei OutPos, - const ALsizei IrSize, MixHrtfParams *hrtfparams, HrtfState *hrtfstate, - ALsizei BufferSize) -{ - const ALfloat (*Coeffs)[2] = ASSUME_ALIGNED(hrtfparams->Coeffs, 16); - const ALsizei Delay[2] = { hrtfparams->Delay[0], hrtfparams->Delay[1] }; - ALfloat gainstep = hrtfparams->GainStep; - ALfloat gain = hrtfparams->Gain; - ALfloat left, right; - ALsizei i; - - LeftOut += OutPos; - RightOut += OutPos; - for(i = 0;i < BufferSize;i++) - { - hrtfstate->History[Offset&HRTF_HISTORY_MASK] = *(data++); - left = hrtfstate->History[(Offset-Delay[0])&HRTF_HISTORY_MASK]*gain; - right = hrtfstate->History[(Offset-Delay[1])&HRTF_HISTORY_MASK]*gain; - - hrtfstate->Values[(Offset+IrSize-1)&HRIR_MASK][0] = 0.0f; - hrtfstate->Values[(Offset+IrSize-1)&HRIR_MASK][1] = 0.0f; - - ApplyCoeffs(Offset, hrtfstate->Values, IrSize, Coeffs, left, right); - *(LeftOut++) += hrtfstate->Values[Offset&HRIR_MASK][0]; - *(RightOut++) += hrtfstate->Values[Offset&HRIR_MASK][1]; - - gain += gainstep; - Offset++; - } - hrtfparams->Gain = gain; -} - -void MixHrtfBlend(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, - const ALfloat *data, ALsizei Offset, ALsizei OutPos, - const ALsizei IrSize, const HrtfParams *oldparams, - MixHrtfParams *newparams, HrtfState *hrtfstate, - ALsizei BufferSize) -{ - const ALfloat (*OldCoeffs)[2] = ASSUME_ALIGNED(oldparams->Coeffs, 16); - const ALsizei OldDelay[2] = { oldparams->Delay[0], oldparams->Delay[1] }; - ALfloat oldGain = oldparams->Gain; - ALfloat oldGainStep = -oldGain / (ALfloat)BufferSize; - const ALfloat (*NewCoeffs)[2] = ASSUME_ALIGNED(newparams->Coeffs, 16); - const ALsizei NewDelay[2] = { newparams->Delay[0], newparams->Delay[1] }; - ALfloat newGain = newparams->Gain; - ALfloat newGainStep = newparams->GainStep; - ALfloat left, right; - ALsizei i; - - LeftOut += OutPos; - RightOut += OutPos; - for(i = 0;i < BufferSize;i++) - { - hrtfstate->Values[(Offset+IrSize-1)&HRIR_MASK][0] = 0.0f; - hrtfstate->Values[(Offset+IrSize-1)&HRIR_MASK][1] = 0.0f; - - hrtfstate->History[Offset&HRTF_HISTORY_MASK] = *(data++); - - left = hrtfstate->History[(Offset-OldDelay[0])&HRTF_HISTORY_MASK]*oldGain; - right = hrtfstate->History[(Offset-OldDelay[1])&HRTF_HISTORY_MASK]*oldGain; - ApplyCoeffs(Offset, hrtfstate->Values, IrSize, OldCoeffs, left, right); - - left = hrtfstate->History[(Offset-NewDelay[0])&HRTF_HISTORY_MASK]*newGain; - right = hrtfstate->History[(Offset-NewDelay[1])&HRTF_HISTORY_MASK]*newGain; - ApplyCoeffs(Offset, hrtfstate->Values, IrSize, NewCoeffs, left, right); - - *(LeftOut++) += hrtfstate->Values[Offset&HRIR_MASK][0]; - *(RightOut++) += hrtfstate->Values[Offset&HRIR_MASK][1]; - - oldGain += oldGainStep; - newGain += newGainStep; - Offset++; - } - newparams->Gain = newGain; -} - -void MixDirectHrtf(ALfloat *restrict LeftOut, ALfloat *restrict RightOut, - const ALfloat *data, ALsizei Offset, const ALsizei IrSize, - const ALfloat (*restrict Coeffs)[2], ALfloat (*restrict Values)[2], - ALsizei BufferSize) -{ - ALfloat insample; - ALsizei i; - - for(i = 0;i < BufferSize;i++) - { - Values[(Offset+IrSize)&HRIR_MASK][0] = 0.0f; - Values[(Offset+IrSize)&HRIR_MASK][1] = 0.0f; - Offset++; - - insample = *(data++); - ApplyCoeffs(Offset, Values, IrSize, Coeffs, insample, insample); - *(LeftOut++) += Values[Offset&HRIR_MASK][0]; - *(RightOut++) += Values[Offset&HRIR_MASK][1]; - } -} diff --git a/Alc/mixer_neon.c b/Alc/mixer_neon.c deleted file mode 100644 index 631e4f7c..00000000 --- a/Alc/mixer_neon.c +++ /dev/null @@ -1,261 +0,0 @@ -#include "config.h" - -#include - -#include "AL/al.h" -#include "AL/alc.h" -#include "alMain.h" -#include "alu.h" -#include "hrtf.h" -#include "mixer_defs.h" - - -const ALfloat *Resample_lerp_Neon(const InterpState* UNUSED(state), - const ALfloat *restrict src, ALsizei frac, ALint increment, - ALfloat *restrict dst, ALsizei numsamples) -{ - const int32x4_t increment4 = vdupq_n_s32(increment*4); - const float32x4_t fracOne4 = vdupq_n_f32(1.0f/FRACTIONONE); - const int32x4_t fracMask4 = vdupq_n_s32(FRACTIONMASK); - alignas(16) ALint pos_[4]; - alignas(16) ALsizei frac_[4]; - int32x4_t pos4; - int32x4_t frac4; - ALsizei i; - - InitiatePositionArrays(frac, increment, frac_, pos_, 4); - - frac4 = vld1q_s32(frac_); - pos4 = vld1q_s32(pos_); - - for(i = 0;numsamples-i > 3;i += 4) - { - const float32x4_t val1 = (float32x4_t){src[pos_[0]], src[pos_[1]], src[pos_[2]], src[pos_[3]]}; - const float32x4_t val2 = (float32x4_t){src[pos_[0]+1], src[pos_[1]+1], src[pos_[2]+1], src[pos_[3]+1]}; - - /* val1 + (val2-val1)*mu */ - const float32x4_t r0 = vsubq_f32(val2, val1); - const float32x4_t mu = vmulq_f32(vcvtq_f32_s32(frac4), fracOne4); - const float32x4_t out = vmlaq_f32(val1, mu, r0); - - vst1q_f32(&dst[i], out); - - frac4 = vaddq_s32(frac4, increment4); - pos4 = vaddq_s32(pos4, vshrq_n_s32(frac4, FRACTIONBITS)); - frac4 = vandq_s32(frac4, fracMask4); - - vst1q_s32(pos_, pos4); - } - - if(i < numsamples) - { - /* NOTE: These four elements represent the position *after* the last - * four samples, so the lowest element is the next position to - * resample. - */ - ALint pos = pos_[0]; - frac = vgetq_lane_s32(frac4, 0); - do { - dst[i] = lerp(src[pos], src[pos+1], frac * (1.0f/FRACTIONONE)); - - frac += increment; - pos += frac>>FRACTIONBITS; - frac &= FRACTIONMASK; - } while(++i < numsamples); - } - return dst; -} - -const ALfloat *Resample_bsinc_Neon(const InterpState *state, - const ALfloat *restrict src, ALsizei frac, ALint increment, - ALfloat *restrict dst, ALsizei dstlen) -{ - const ALfloat *const filter = state->bsinc.filter; - const float32x4_t sf4 = vdupq_n_f32(state->bsinc.sf); - const ALsizei m = state->bsinc.m; - const float32x4_t *fil, *scd, *phd, *spd; - ALsizei pi, i, j, offset; - float32x4_t r4; - ALfloat pf; - - src += state->bsinc.l; - for(i = 0;i < dstlen;i++) - { - // Calculate the phase index and factor. -#define FRAC_PHASE_BITDIFF (FRACTIONBITS-BSINC_PHASE_BITS) - pi = frac >> FRAC_PHASE_BITDIFF; - pf = (frac & ((1<>FRACTIONBITS; - frac &= FRACTIONMASK; - } - return dst; -} - - -static inline void ApplyCoeffs(ALsizei Offset, ALfloat (*restrict Values)[2], - const ALsizei IrSize, - const ALfloat (*restrict Coeffs)[2], - ALfloat left, ALfloat right) -{ - ALsizei c; - float32x4_t leftright4; - { - float32x2_t leftright2 = vdup_n_f32(0.0); - leftright2 = vset_lane_f32(left, leftright2, 0); - leftright2 = vset_lane_f32(right, leftright2, 1); - leftright4 = vcombine_f32(leftright2, leftright2); - } - Values = ASSUME_ALIGNED(Values, 16); - Coeffs = ASSUME_ALIGNED(Coeffs, 16); - for(c = 0;c < IrSize;c += 2) - { - const ALsizei o0 = (Offset+c)&HRIR_MASK; - const ALsizei o1 = (o0+1)&HRIR_MASK; - float32x4_t vals = vcombine_f32(vld1_f32((float32_t*)&Values[o0][0]), - vld1_f32((float32_t*)&Values[o1][0])); - float32x4_t coefs = vld1q_f32((float32_t*)&Coeffs[c][0]); - - vals = vmlaq_f32(vals, coefs, leftright4); - - vst1_f32((float32_t*)&Values[o0][0], vget_low_f32(vals)); - vst1_f32((float32_t*)&Values[o1][0], vget_high_f32(vals)); - } -} - -#define MixHrtf MixHrtf_Neon -#define MixHrtfBlend MixHrtfBlend_Neon -#define MixDirectHrtf MixDirectHrtf_Neon -#include "mixer_inc.c" -#undef MixHrtf - - -void Mix_Neon(const ALfloat *data, ALsizei OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE], - ALfloat *CurrentGains, const ALfloat *TargetGains, ALsizei Counter, ALsizei OutPos, - ALsizei BufferSize) -{ - ALfloat gain, delta, step; - float32x4_t gain4; - ALsizei c; - - data = ASSUME_ALIGNED(data, 16); - OutBuffer = ASSUME_ALIGNED(OutBuffer, 16); - - delta = (Counter > 0) ? 1.0f/(ALfloat)Counter : 0.0f; - - for(c = 0;c < OutChans;c++) - { - ALsizei pos = 0; - gain = CurrentGains[c]; - step = (TargetGains[c] - gain) * delta; - if(fabsf(step) > FLT_EPSILON) - { - ALsizei minsize = mini(BufferSize, Counter); - /* Mix with applying gain steps in aligned multiples of 4. */ - if(minsize-pos > 3) - { - float32x4_t step4; - gain4 = vsetq_lane_f32(gain, gain4, 0); - gain4 = vsetq_lane_f32(gain + step, gain4, 1); - gain4 = vsetq_lane_f32(gain + step + step, gain4, 2); - gain4 = vsetq_lane_f32(gain + step + step + step, gain4, 3); - step4 = vdupq_n_f32(step + step + step + step); - do { - const float32x4_t val4 = vld1q_f32(&data[pos]); - float32x4_t dry4 = vld1q_f32(&OutBuffer[c][OutPos+pos]); - dry4 = vmlaq_f32(dry4, val4, gain4); - gain4 = vaddq_f32(gain4, step4); - vst1q_f32(&OutBuffer[c][OutPos+pos], dry4); - pos += 4; - } while(minsize-pos > 3); - /* NOTE: gain4 now represents the next four gains after the - * last four mixed samples, so the lowest element represents - * the next gain to apply. - */ - gain = vgetq_lane_f32(gain4, 0); - } - /* Mix with applying left over gain steps that aren't aligned multiples of 4. */ - for(;pos < minsize;pos++) - { - OutBuffer[c][OutPos+pos] += data[pos]*gain; - gain += step; - } - if(pos == Counter) - gain = TargetGains[c]; - CurrentGains[c] = gain; - - /* Mix until pos is aligned with 4 or the mix is done. */ - minsize = mini(BufferSize, (pos+3)&~3); - for(;pos < minsize;pos++) - OutBuffer[c][OutPos+pos] += data[pos]*gain; - } - - if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD)) - continue; - gain4 = vdupq_n_f32(gain); - for(;BufferSize-pos > 3;pos += 4) - { - const float32x4_t val4 = vld1q_f32(&data[pos]); - float32x4_t dry4 = vld1q_f32(&OutBuffer[c][OutPos+pos]); - dry4 = vmlaq_f32(dry4, val4, gain4); - vst1q_f32(&OutBuffer[c][OutPos+pos], dry4); - } - for(;pos < BufferSize;pos++) - OutBuffer[c][OutPos+pos] += data[pos]*gain; - } -} - -void MixRow_Neon(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*restrict data)[BUFFERSIZE], ALsizei InChans, ALsizei InPos, ALsizei BufferSize) -{ - float32x4_t gain4; - ALsizei c; - - data = ASSUME_ALIGNED(data, 16); - OutBuffer = ASSUME_ALIGNED(OutBuffer, 16); - - for(c = 0;c < InChans;c++) - { - ALsizei pos = 0; - ALfloat gain = Gains[c]; - if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD)) - continue; - - gain4 = vdupq_n_f32(gain); - for(;BufferSize-pos > 3;pos += 4) - { - const float32x4_t val4 = vld1q_f32(&data[c][InPos+pos]); - float32x4_t dry4 = vld1q_f32(&OutBuffer[pos]); - dry4 = vmlaq_f32(dry4, val4, gain4); - vst1q_f32(&OutBuffer[pos], dry4); - } - for(;pos < BufferSize;pos++) - OutBuffer[pos] += data[c][InPos+pos]*gain; - } -} diff --git a/Alc/mixer_sse.c b/Alc/mixer_sse.c deleted file mode 100644 index 281b6f85..00000000 --- a/Alc/mixer_sse.c +++ /dev/null @@ -1,229 +0,0 @@ -#include "config.h" - -#include - -#include "AL/al.h" -#include "AL/alc.h" -#include "alMain.h" -#include "alu.h" - -#include "alSource.h" -#include "alAuxEffectSlot.h" -#include "mixer_defs.h" - - -const ALfloat *Resample_bsinc_SSE(const InterpState *state, const ALfloat *restrict src, - ALsizei frac, ALint increment, ALfloat *restrict dst, - ALsizei dstlen) -{ - const ALfloat *const filter = state->bsinc.filter; - const __m128 sf4 = _mm_set1_ps(state->bsinc.sf); - const ALsizei m = state->bsinc.m; - const __m128 *fil, *scd, *phd, *spd; - ALsizei pi, i, j, offset; - ALfloat pf; - __m128 r4; - - src += state->bsinc.l; - for(i = 0;i < dstlen;i++) - { - // Calculate the phase index and factor. -#define FRAC_PHASE_BITDIFF (FRACTIONBITS-BSINC_PHASE_BITS) - pi = frac >> FRAC_PHASE_BITDIFF; - pf = (frac & ((1<>FRACTIONBITS; - frac &= FRACTIONMASK; - } - return dst; -} - - -static inline void ApplyCoeffs(ALsizei Offset, ALfloat (*restrict Values)[2], - const ALsizei IrSize, - const ALfloat (*restrict Coeffs)[2], - ALfloat left, ALfloat right) -{ - const __m128 lrlr = _mm_setr_ps(left, right, left, right); - __m128 vals = _mm_setzero_ps(); - __m128 coeffs; - ALsizei i; - - Values = ASSUME_ALIGNED(Values, 16); - Coeffs = ASSUME_ALIGNED(Coeffs, 16); - if((Offset&1)) - { - const ALsizei o0 = Offset&HRIR_MASK; - const ALsizei o1 = (Offset+IrSize-1)&HRIR_MASK; - __m128 imp0, imp1; - - coeffs = _mm_load_ps(&Coeffs[0][0]); - vals = _mm_loadl_pi(vals, (__m64*)&Values[o0][0]); - imp0 = _mm_mul_ps(lrlr, coeffs); - vals = _mm_add_ps(imp0, vals); - _mm_storel_pi((__m64*)&Values[o0][0], vals); - for(i = 1;i < IrSize-1;i += 2) - { - const ALsizei o2 = (Offset+i)&HRIR_MASK; - - coeffs = _mm_load_ps(&Coeffs[i+1][0]); - vals = _mm_load_ps(&Values[o2][0]); - imp1 = _mm_mul_ps(lrlr, coeffs); - imp0 = _mm_shuffle_ps(imp0, imp1, _MM_SHUFFLE(1, 0, 3, 2)); - vals = _mm_add_ps(imp0, vals); - _mm_store_ps(&Values[o2][0], vals); - imp0 = imp1; - } - vals = _mm_loadl_pi(vals, (__m64*)&Values[o1][0]); - imp0 = _mm_movehl_ps(imp0, imp0); - vals = _mm_add_ps(imp0, vals); - _mm_storel_pi((__m64*)&Values[o1][0], vals); - } - else - { - for(i = 0;i < IrSize;i += 2) - { - const ALsizei o = (Offset + i)&HRIR_MASK; - - coeffs = _mm_load_ps(&Coeffs[i][0]); - vals = _mm_load_ps(&Values[o][0]); - vals = _mm_add_ps(vals, _mm_mul_ps(lrlr, coeffs)); - _mm_store_ps(&Values[o][0], vals); - } - } -} - -#define MixHrtf MixHrtf_SSE -#define MixHrtfBlend MixHrtfBlend_SSE -#define MixDirectHrtf MixDirectHrtf_SSE -#include "mixer_inc.c" -#undef MixHrtf - - -void Mix_SSE(const ALfloat *data, ALsizei OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE], - ALfloat *CurrentGains, const ALfloat *TargetGains, ALsizei Counter, ALsizei OutPos, - ALsizei BufferSize) -{ - ALfloat gain, delta, step; - __m128 gain4; - ALsizei c; - - delta = (Counter > 0) ? 1.0f/(ALfloat)Counter : 0.0f; - - for(c = 0;c < OutChans;c++) - { - ALsizei pos = 0; - gain = CurrentGains[c]; - step = (TargetGains[c] - gain) * delta; - if(fabsf(step) > FLT_EPSILON) - { - ALsizei minsize = mini(BufferSize, Counter); - /* Mix with applying gain steps in aligned multiples of 4. */ - if(minsize-pos > 3) - { - __m128 step4; - gain4 = _mm_setr_ps( - gain, - gain + step, - gain + step + step, - gain + step + step + step - ); - step4 = _mm_set1_ps(step + step + step + step); - do { - const __m128 val4 = _mm_load_ps(&data[pos]); - __m128 dry4 = _mm_load_ps(&OutBuffer[c][OutPos+pos]); - dry4 = _mm_add_ps(dry4, _mm_mul_ps(val4, gain4)); - gain4 = _mm_add_ps(gain4, step4); - _mm_store_ps(&OutBuffer[c][OutPos+pos], dry4); - pos += 4; - } while(minsize-pos > 3); - /* NOTE: gain4 now represents the next four gains after the - * last four mixed samples, so the lowest element represents - * the next gain to apply. - */ - gain = _mm_cvtss_f32(gain4); - } - /* Mix with applying left over gain steps that aren't aligned multiples of 4. */ - for(;pos < minsize;pos++) - { - OutBuffer[c][OutPos+pos] += data[pos]*gain; - gain += step; - } - if(pos == Counter) - gain = TargetGains[c]; - CurrentGains[c] = gain; - - /* Mix until pos is aligned with 4 or the mix is done. */ - minsize = mini(BufferSize, (pos+3)&~3); - for(;pos < minsize;pos++) - OutBuffer[c][OutPos+pos] += data[pos]*gain; - } - - if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD)) - continue; - gain4 = _mm_set1_ps(gain); - for(;BufferSize-pos > 3;pos += 4) - { - const __m128 val4 = _mm_load_ps(&data[pos]); - __m128 dry4 = _mm_load_ps(&OutBuffer[c][OutPos+pos]); - dry4 = _mm_add_ps(dry4, _mm_mul_ps(val4, gain4)); - _mm_store_ps(&OutBuffer[c][OutPos+pos], dry4); - } - for(;pos < BufferSize;pos++) - OutBuffer[c][OutPos+pos] += data[pos]*gain; - } -} - -void MixRow_SSE(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*restrict data)[BUFFERSIZE], ALsizei InChans, ALsizei InPos, ALsizei BufferSize) -{ - __m128 gain4; - ALsizei c; - - for(c = 0;c < InChans;c++) - { - ALsizei pos = 0; - ALfloat gain = Gains[c]; - if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD)) - continue; - - gain4 = _mm_set1_ps(gain); - for(;BufferSize-pos > 3;pos += 4) - { - const __m128 val4 = _mm_load_ps(&data[c][InPos+pos]); - __m128 dry4 = _mm_load_ps(&OutBuffer[pos]); - dry4 = _mm_add_ps(dry4, _mm_mul_ps(val4, gain4)); - _mm_store_ps(&OutBuffer[pos], dry4); - } - for(;pos < BufferSize;pos++) - OutBuffer[pos] += data[c][InPos+pos]*gain; - } -} diff --git a/Alc/mixer_sse2.c b/Alc/mixer_sse2.c deleted file mode 100644 index 3f8224e7..00000000 --- a/Alc/mixer_sse2.c +++ /dev/null @@ -1,82 +0,0 @@ -/** - * OpenAL cross platform audio library - * Copyright (C) 2014 by Timothy Arceri . - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public - * License along with this library; if not, write to the - * Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * Or go to http://www.gnu.org/copyleft/lgpl.html - */ - -#include "config.h" - -#include -#include - -#include "alu.h" -#include "mixer_defs.h" - - -const ALfloat *Resample_lerp_SSE2(const InterpState* UNUSED(state), - const ALfloat *restrict src, ALsizei frac, ALint increment, - ALfloat *restrict dst, ALsizei numsamples) -{ - const __m128i increment4 = _mm_set1_epi32(increment*4); - const __m128 fracOne4 = _mm_set1_ps(1.0f/FRACTIONONE); - const __m128i fracMask4 = _mm_set1_epi32(FRACTIONMASK); - union { alignas(16) ALint i[4]; float f[4]; } pos_; - union { alignas(16) ALsizei i[4]; float f[4]; } frac_; - __m128i frac4, pos4; - ALint pos; - ALsizei i; - - InitiatePositionArrays(frac, increment, frac_.i, pos_.i, 4); - - frac4 = _mm_castps_si128(_mm_load_ps(frac_.f)); - pos4 = _mm_castps_si128(_mm_load_ps(pos_.f)); - - for(i = 0;numsamples-i > 3;i += 4) - { - const __m128 val1 = _mm_setr_ps(src[pos_.i[0]], src[pos_.i[1]], src[pos_.i[2]], src[pos_.i[3]]); - const __m128 val2 = _mm_setr_ps(src[pos_.i[0]+1], src[pos_.i[1]+1], src[pos_.i[2]+1], src[pos_.i[3]+1]); - - /* val1 + (val2-val1)*mu */ - const __m128 r0 = _mm_sub_ps(val2, val1); - const __m128 mu = _mm_mul_ps(_mm_cvtepi32_ps(frac4), fracOne4); - const __m128 out = _mm_add_ps(val1, _mm_mul_ps(mu, r0)); - - _mm_store_ps(&dst[i], out); - - frac4 = _mm_add_epi32(frac4, increment4); - pos4 = _mm_add_epi32(pos4, _mm_srli_epi32(frac4, FRACTIONBITS)); - frac4 = _mm_and_si128(frac4, fracMask4); - - _mm_store_ps(pos_.f, _mm_castsi128_ps(pos4)); - } - - /* NOTE: These four elements represent the position *after* the last four - * samples, so the lowest element is the next position to resample. - */ - pos = pos_.i[0]; - frac = _mm_cvtsi128_si32(frac4); - - for(;i < numsamples;i++) - { - dst[i] = lerp(src[pos], src[pos+1], frac * (1.0f/FRACTIONONE)); - - frac += increment; - pos += frac>>FRACTIONBITS; - frac &= FRACTIONMASK; - } - return dst; -} diff --git a/Alc/mixer_sse3.c b/Alc/mixer_sse3.c deleted file mode 100644 index e69de29b..00000000 diff --git a/Alc/mixer_sse41.c b/Alc/mixer_sse41.c deleted file mode 100644 index 4f88d540..00000000 --- a/Alc/mixer_sse41.c +++ /dev/null @@ -1,86 +0,0 @@ -/** - * OpenAL cross platform audio library - * Copyright (C) 2014 by Timothy Arceri . - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public - * License along with this library; if not, write to the - * Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * Or go to http://www.gnu.org/copyleft/lgpl.html - */ - -#include "config.h" - -#include -#include -#include - -#include "alu.h" -#include "mixer_defs.h" - - -const ALfloat *Resample_lerp_SSE41(const InterpState* UNUSED(state), - const ALfloat *restrict src, ALsizei frac, ALint increment, - ALfloat *restrict dst, ALsizei numsamples) -{ - const __m128i increment4 = _mm_set1_epi32(increment*4); - const __m128 fracOne4 = _mm_set1_ps(1.0f/FRACTIONONE); - const __m128i fracMask4 = _mm_set1_epi32(FRACTIONMASK); - union { alignas(16) ALint i[4]; float f[4]; } pos_; - union { alignas(16) ALsizei i[4]; float f[4]; } frac_; - __m128i frac4, pos4; - ALint pos; - ALsizei i; - - InitiatePositionArrays(frac, increment, frac_.i, pos_.i, 4); - - frac4 = _mm_castps_si128(_mm_load_ps(frac_.f)); - pos4 = _mm_castps_si128(_mm_load_ps(pos_.f)); - - for(i = 0;numsamples-i > 3;i += 4) - { - const __m128 val1 = _mm_setr_ps(src[pos_.i[0]], src[pos_.i[1]], src[pos_.i[2]], src[pos_.i[3]]); - const __m128 val2 = _mm_setr_ps(src[pos_.i[0]+1], src[pos_.i[1]+1], src[pos_.i[2]+1], src[pos_.i[3]+1]); - - /* val1 + (val2-val1)*mu */ - const __m128 r0 = _mm_sub_ps(val2, val1); - const __m128 mu = _mm_mul_ps(_mm_cvtepi32_ps(frac4), fracOne4); - const __m128 out = _mm_add_ps(val1, _mm_mul_ps(mu, r0)); - - _mm_store_ps(&dst[i], out); - - frac4 = _mm_add_epi32(frac4, increment4); - pos4 = _mm_add_epi32(pos4, _mm_srli_epi32(frac4, FRACTIONBITS)); - frac4 = _mm_and_si128(frac4, fracMask4); - - pos_.i[0] = _mm_extract_epi32(pos4, 0); - pos_.i[1] = _mm_extract_epi32(pos4, 1); - pos_.i[2] = _mm_extract_epi32(pos4, 2); - pos_.i[3] = _mm_extract_epi32(pos4, 3); - } - - /* NOTE: These four elements represent the position *after* the last four - * samples, so the lowest element is the next position to resample. - */ - pos = pos_.i[0]; - frac = _mm_cvtsi128_si32(frac4); - - for(;i < numsamples;i++) - { - dst[i] = lerp(src[pos], src[pos+1], frac * (1.0f/FRACTIONONE)); - - frac += increment; - pos += frac>>FRACTIONBITS; - frac &= FRACTIONMASK; - } - return dst; -} diff --git a/Alc/mixvoice.c b/Alc/mixvoice.c new file mode 100644 index 00000000..0e039115 --- /dev/null +++ b/Alc/mixvoice.c @@ -0,0 +1,781 @@ +/** + * OpenAL cross platform audio library + * Copyright (C) 1999-2007 by authors. + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * Or go to http://www.gnu.org/copyleft/lgpl.html + */ + +#include "config.h" + +#include +#include +#include +#include +#include + +#include "alMain.h" +#include "AL/al.h" +#include "AL/alc.h" +#include "alSource.h" +#include "alBuffer.h" +#include "alListener.h" +#include "alAuxEffectSlot.h" +#include "sample_cvt.h" +#include "alu.h" +#include "alconfig.h" +#include "ringbuffer.h" + +#include "cpu_caps.h" +#include "mixer/defs.h" + + +static_assert((INT_MAX>>FRACTIONBITS)/MAX_PITCH > BUFFERSIZE, + "MAX_PITCH and/or BUFFERSIZE are too large for FRACTIONBITS!"); + +extern inline void InitiatePositionArrays(ALsizei frac, ALint increment, ALsizei *restrict frac_arr, ALint *restrict pos_arr, ALsizei size); + + +/* BSinc24 requires up to 23 extra samples before the current position, and 24 after. */ +static_assert(MAX_RESAMPLE_PADDING >= 24, "MAX_RESAMPLE_PADDING must be at least 24!"); + + +enum Resampler ResamplerDefault = LinearResampler; + +MixerFunc MixSamples = Mix_C; +RowMixerFunc MixRowSamples = MixRow_C; +static HrtfMixerFunc MixHrtfSamples = MixHrtf_C; +static HrtfMixerBlendFunc MixHrtfBlendSamples = MixHrtfBlend_C; + +static MixerFunc SelectMixer(void) +{ +#ifdef HAVE_NEON + if((CPUCapFlags&CPU_CAP_NEON)) + return Mix_Neon; +#endif +#ifdef HAVE_SSE + if((CPUCapFlags&CPU_CAP_SSE)) + return Mix_SSE; +#endif + return Mix_C; +} + +static RowMixerFunc SelectRowMixer(void) +{ +#ifdef HAVE_NEON + if((CPUCapFlags&CPU_CAP_NEON)) + return MixRow_Neon; +#endif +#ifdef HAVE_SSE + if((CPUCapFlags&CPU_CAP_SSE)) + return MixRow_SSE; +#endif + return MixRow_C; +} + +static inline HrtfMixerFunc SelectHrtfMixer(void) +{ +#ifdef HAVE_NEON + if((CPUCapFlags&CPU_CAP_NEON)) + return MixHrtf_Neon; +#endif +#ifdef HAVE_SSE + if((CPUCapFlags&CPU_CAP_SSE)) + return MixHrtf_SSE; +#endif + return MixHrtf_C; +} + +static inline HrtfMixerBlendFunc SelectHrtfBlendMixer(void) +{ +#ifdef HAVE_NEON + if((CPUCapFlags&CPU_CAP_NEON)) + return MixHrtfBlend_Neon; +#endif +#ifdef HAVE_SSE + if((CPUCapFlags&CPU_CAP_SSE)) + return MixHrtfBlend_SSE; +#endif + return MixHrtfBlend_C; +} + +ResamplerFunc SelectResampler(enum Resampler resampler) +{ + switch(resampler) + { + case PointResampler: + return Resample_point_C; + case LinearResampler: +#ifdef HAVE_NEON + if((CPUCapFlags&CPU_CAP_NEON)) + return Resample_lerp_Neon; +#endif +#ifdef HAVE_SSE4_1 + if((CPUCapFlags&CPU_CAP_SSE4_1)) + return Resample_lerp_SSE41; +#endif +#ifdef HAVE_SSE2 + if((CPUCapFlags&CPU_CAP_SSE2)) + return Resample_lerp_SSE2; +#endif + return Resample_lerp_C; + case FIR4Resampler: + return Resample_cubic_C; + case BSinc12Resampler: + case BSinc24Resampler: +#ifdef HAVE_NEON + if((CPUCapFlags&CPU_CAP_NEON)) + return Resample_bsinc_Neon; +#endif +#ifdef HAVE_SSE + if((CPUCapFlags&CPU_CAP_SSE)) + return Resample_bsinc_SSE; +#endif + return Resample_bsinc_C; + } + + return Resample_point_C; +} + + +void aluInitMixer(void) +{ + const char *str; + + if(ConfigValueStr(NULL, NULL, "resampler", &str)) + { + if(strcasecmp(str, "point") == 0 || strcasecmp(str, "none") == 0) + ResamplerDefault = PointResampler; + else if(strcasecmp(str, "linear") == 0) + ResamplerDefault = LinearResampler; + else if(strcasecmp(str, "cubic") == 0) + ResamplerDefault = FIR4Resampler; + else if(strcasecmp(str, "bsinc12") == 0) + ResamplerDefault = BSinc12Resampler; + else if(strcasecmp(str, "bsinc24") == 0) + ResamplerDefault = BSinc24Resampler; + else if(strcasecmp(str, "bsinc") == 0) + { + WARN("Resampler option \"%s\" is deprecated, using bsinc12\n", str); + ResamplerDefault = BSinc12Resampler; + } + else if(strcasecmp(str, "sinc4") == 0 || strcasecmp(str, "sinc8") == 0) + { + WARN("Resampler option \"%s\" is deprecated, using cubic\n", str); + ResamplerDefault = FIR4Resampler; + } + else + { + char *end; + long n = strtol(str, &end, 0); + if(*end == '\0' && (n == PointResampler || n == LinearResampler || n == FIR4Resampler)) + ResamplerDefault = n; + else + WARN("Invalid resampler: %s\n", str); + } + } + + MixHrtfBlendSamples = SelectHrtfBlendMixer(); + MixHrtfSamples = SelectHrtfMixer(); + MixSamples = SelectMixer(); + MixRowSamples = SelectRowMixer(); +} + + +static void SendAsyncEvent(ALCcontext *context, ALuint enumtype, ALenum type, + ALuint objid, ALuint param, const char *msg) +{ + AsyncEvent evt; + evt.EnumType = enumtype; + evt.Type = type; + evt.ObjectId = objid; + evt.Param = param; + strcpy(evt.Message, msg); + if(ll_ringbuffer_write(context->AsyncEvents, (const char*)&evt, 1) == 1) + alsem_post(&context->EventSem); +} + + +static inline ALfloat Sample_ALubyte(ALubyte val) +{ return (val-128) * (1.0f/128.0f); } + +static inline ALfloat Sample_ALshort(ALshort val) +{ return val * (1.0f/32768.0f); } + +static inline ALfloat Sample_ALfloat(ALfloat val) +{ return val; } + +static inline ALfloat Sample_ALdouble(ALdouble val) +{ return (ALfloat)val; } + +typedef ALubyte ALmulaw; +static inline ALfloat Sample_ALmulaw(ALmulaw val) +{ return muLawDecompressionTable[val] * (1.0f/32768.0f); } + +typedef ALubyte ALalaw; +static inline ALfloat Sample_ALalaw(ALalaw val) +{ return aLawDecompressionTable[val] * (1.0f/32768.0f); } + +#define DECL_TEMPLATE(T) \ +static inline void Load_##T(ALfloat *restrict dst, const T *restrict src, \ + ALint srcstep, ALsizei samples) \ +{ \ + ALsizei i; \ + for(i = 0;i < samples;i++) \ + dst[i] += Sample_##T(src[i*srcstep]); \ +} + +DECL_TEMPLATE(ALubyte) +DECL_TEMPLATE(ALshort) +DECL_TEMPLATE(ALfloat) +DECL_TEMPLATE(ALdouble) +DECL_TEMPLATE(ALmulaw) +DECL_TEMPLATE(ALalaw) + +#undef DECL_TEMPLATE + +static void LoadSamples(ALfloat *restrict dst, const ALvoid *restrict src, ALint srcstep, + enum FmtType srctype, ALsizei samples) +{ +#define HANDLE_FMT(ET, ST) case ET: Load_##ST(dst, src, srcstep, samples); break + switch(srctype) + { + HANDLE_FMT(FmtUByte, ALubyte); + HANDLE_FMT(FmtShort, ALshort); + HANDLE_FMT(FmtFloat, ALfloat); + HANDLE_FMT(FmtDouble, ALdouble); + HANDLE_FMT(FmtMulaw, ALmulaw); + HANDLE_FMT(FmtAlaw, ALalaw); + } +#undef HANDLE_FMT +} + + +static const ALfloat *DoFilters(ALfilterState *lpfilter, ALfilterState *hpfilter, + ALfloat *restrict dst, const ALfloat *restrict src, + ALsizei numsamples, enum ActiveFilters type) +{ + ALsizei i; + switch(type) + { + case AF_None: + ALfilterState_processPassthru(lpfilter, src, numsamples); + ALfilterState_processPassthru(hpfilter, src, numsamples); + break; + + case AF_LowPass: + ALfilterState_process(lpfilter, dst, src, numsamples); + ALfilterState_processPassthru(hpfilter, dst, numsamples); + return dst; + case AF_HighPass: + ALfilterState_processPassthru(lpfilter, src, numsamples); + ALfilterState_process(hpfilter, dst, src, numsamples); + return dst; + + case AF_BandPass: + for(i = 0;i < numsamples;) + { + ALfloat temp[256]; + ALsizei todo = mini(256, numsamples-i); + + ALfilterState_process(lpfilter, temp, src+i, todo); + ALfilterState_process(hpfilter, dst+i, temp, todo); + i += todo; + } + return dst; + } + return src; +} + + +/* This function uses these device temp buffers. */ +#define SOURCE_DATA_BUF 0 +#define RESAMPLED_BUF 1 +#define FILTERED_BUF 2 +#define NFC_DATA_BUF 3 +ALboolean MixSource(ALvoice *voice, ALuint SourceID, ALCcontext *Context, ALsizei SamplesToDo) +{ + ALCdevice *Device = Context->Device; + ALbufferlistitem *BufferListItem; + ALbufferlistitem *BufferLoopItem; + ALsizei NumChannels, SampleSize; + ALbitfieldSOFT enabledevt; + ALsizei buffers_done = 0; + ResamplerFunc Resample; + ALsizei DataPosInt; + ALsizei DataPosFrac; + ALint64 DataSize64; + ALint increment; + ALsizei Counter; + ALsizei OutPos; + ALsizei IrSize; + bool isplaying; + bool firstpass; + bool isstatic; + ALsizei chan; + ALsizei send; + + /* Get source info */ + isplaying = true; /* Will only be called while playing. */ + isstatic = !!(voice->Flags&VOICE_IS_STATIC); + DataPosInt = ATOMIC_LOAD(&voice->position, almemory_order_acquire); + DataPosFrac = ATOMIC_LOAD(&voice->position_fraction, almemory_order_relaxed); + BufferListItem = ATOMIC_LOAD(&voice->current_buffer, almemory_order_relaxed); + BufferLoopItem = ATOMIC_LOAD(&voice->loop_buffer, almemory_order_relaxed); + NumChannels = voice->NumChannels; + SampleSize = voice->SampleSize; + increment = voice->Step; + + IrSize = (Device->HrtfHandle ? Device->HrtfHandle->irSize : 0); + + Resample = ((increment == FRACTIONONE && DataPosFrac == 0) ? + Resample_copy_C : voice->Resampler); + + Counter = (voice->Flags&VOICE_IS_FADING) ? SamplesToDo : 0; + firstpass = true; + OutPos = 0; + + do { + ALsizei SrcBufferSize, DstBufferSize; + + /* Figure out how many buffer samples will be needed */ + DataSize64 = SamplesToDo-OutPos; + DataSize64 *= increment; + DataSize64 += DataPosFrac+FRACTIONMASK; + DataSize64 >>= FRACTIONBITS; + DataSize64 += MAX_RESAMPLE_PADDING*2; + SrcBufferSize = (ALsizei)mini64(DataSize64, BUFFERSIZE); + + /* Figure out how many samples we can actually mix from this. */ + DataSize64 = SrcBufferSize; + DataSize64 -= MAX_RESAMPLE_PADDING*2; + DataSize64 <<= FRACTIONBITS; + DataSize64 -= DataPosFrac; + DstBufferSize = (ALsizei)mini64((DataSize64+(increment-1)) / increment, + SamplesToDo - OutPos); + + /* Some mixers like having a multiple of 4, so try to give that unless + * this is the last update. */ + if(DstBufferSize < SamplesToDo-OutPos) + DstBufferSize &= ~3; + + /* It's impossible to have a buffer list item with no entries. */ + assert(BufferListItem->num_buffers > 0); + + for(chan = 0;chan < NumChannels;chan++) + { + const ALfloat *ResampledData; + ALfloat *SrcData = Device->TempBuffer[SOURCE_DATA_BUF]; + ALsizei FilledAmt; + + /* Load the previous samples into the source data first, and clear the rest. */ + memcpy(SrcData, voice->PrevSamples[chan], MAX_RESAMPLE_PADDING*sizeof(ALfloat)); + memset(SrcData+MAX_RESAMPLE_PADDING, 0, (BUFFERSIZE-MAX_RESAMPLE_PADDING)* + sizeof(ALfloat)); + FilledAmt = MAX_RESAMPLE_PADDING; + + if(isstatic) + { + /* TODO: For static sources, loop points are taken from the + * first buffer (should be adjusted by any buffer offset, to + * possibly be added later). + */ + const ALbuffer *Buffer0 = BufferListItem->buffers[0]; + const ALsizei LoopStart = Buffer0->LoopStart; + const ALsizei LoopEnd = Buffer0->LoopEnd; + const ALsizei LoopSize = LoopEnd - LoopStart; + + /* If current pos is beyond the loop range, do not loop */ + if(!BufferLoopItem || DataPosInt >= LoopEnd) + { + ALsizei SizeToDo = SrcBufferSize - FilledAmt; + ALsizei CompLen = 0; + ALsizei i; + + BufferLoopItem = NULL; + + for(i = 0;i < BufferListItem->num_buffers;i++) + { + const ALbuffer *buffer = BufferListItem->buffers[i]; + const ALubyte *Data = buffer->data; + ALsizei DataSize; + + if(DataPosInt >= buffer->SampleLen) + continue; + + /* Load what's left to play from the buffer */ + DataSize = mini(SizeToDo, buffer->SampleLen - DataPosInt); + CompLen = maxi(CompLen, DataSize); + + LoadSamples(&SrcData[FilledAmt], + &Data[(DataPosInt*NumChannels + chan)*SampleSize], + NumChannels, buffer->FmtType, DataSize + ); + } + FilledAmt += CompLen; + } + else + { + ALsizei SizeToDo = mini(SrcBufferSize - FilledAmt, LoopEnd - DataPosInt); + ALsizei CompLen = 0; + ALsizei i; + + for(i = 0;i < BufferListItem->num_buffers;i++) + { + const ALbuffer *buffer = BufferListItem->buffers[i]; + const ALubyte *Data = buffer->data; + ALsizei DataSize; + + if(DataPosInt >= buffer->SampleLen) + continue; + + /* Load what's left of this loop iteration */ + DataSize = mini(SizeToDo, buffer->SampleLen - DataPosInt); + CompLen = maxi(CompLen, DataSize); + + LoadSamples(&SrcData[FilledAmt], + &Data[(DataPosInt*NumChannels + chan)*SampleSize], + NumChannels, buffer->FmtType, DataSize + ); + } + FilledAmt += CompLen; + + while(SrcBufferSize > FilledAmt) + { + const ALsizei SizeToDo = mini(SrcBufferSize - FilledAmt, LoopSize); + + CompLen = 0; + for(i = 0;i < BufferListItem->num_buffers;i++) + { + const ALbuffer *buffer = BufferListItem->buffers[i]; + const ALubyte *Data = buffer->data; + ALsizei DataSize; + + if(LoopStart >= buffer->SampleLen) + continue; + + DataSize = mini(SizeToDo, buffer->SampleLen - LoopStart); + CompLen = maxi(CompLen, DataSize); + + LoadSamples(&SrcData[FilledAmt], + &Data[(LoopStart*NumChannels + chan)*SampleSize], + NumChannels, buffer->FmtType, DataSize + ); + } + FilledAmt += CompLen; + } + } + } + else + { + /* Crawl the buffer queue to fill in the temp buffer */ + ALbufferlistitem *tmpiter = BufferListItem; + ALsizei pos = DataPosInt; + + while(tmpiter && SrcBufferSize > FilledAmt) + { + ALsizei SizeToDo = SrcBufferSize - FilledAmt; + ALsizei CompLen = 0; + ALsizei i; + + for(i = 0;i < tmpiter->num_buffers;i++) + { + const ALbuffer *ALBuffer = tmpiter->buffers[i]; + ALsizei DataSize = ALBuffer ? ALBuffer->SampleLen : 0; + CompLen = maxi(CompLen, DataSize); + + if(DataSize > pos) + { + const ALubyte *Data = ALBuffer->data; + Data += (pos*NumChannels + chan)*SampleSize; + + DataSize = minu(SizeToDo, DataSize - pos); + LoadSamples(&SrcData[FilledAmt], Data, NumChannels, + ALBuffer->FmtType, DataSize); + } + } + if(pos > CompLen) + pos -= CompLen; + else + { + FilledAmt += CompLen - pos; + pos = 0; + } + if(SrcBufferSize > FilledAmt) + { + tmpiter = ATOMIC_LOAD(&tmpiter->next, almemory_order_acquire); + if(!tmpiter) tmpiter = BufferLoopItem; + } + } + } + + /* Store the last source samples used for next time. */ + memcpy(voice->PrevSamples[chan], + &SrcData[(increment*DstBufferSize + DataPosFrac)>>FRACTIONBITS], + MAX_RESAMPLE_PADDING*sizeof(ALfloat) + ); + + /* Now resample, then filter and mix to the appropriate outputs. */ + ResampledData = Resample(&voice->ResampleState, + &SrcData[MAX_RESAMPLE_PADDING], DataPosFrac, increment, + Device->TempBuffer[RESAMPLED_BUF], DstBufferSize + ); + { + DirectParams *parms = &voice->Direct.Params[chan]; + const ALfloat *samples; + + samples = DoFilters( + &parms->LowPass, &parms->HighPass, Device->TempBuffer[FILTERED_BUF], + ResampledData, DstBufferSize, voice->Direct.FilterType + ); + if(!(voice->Flags&VOICE_HAS_HRTF)) + { + if(!Counter) + memcpy(parms->Gains.Current, parms->Gains.Target, + sizeof(parms->Gains.Current)); + if(!(voice->Flags&VOICE_HAS_NFC)) + MixSamples(samples, voice->Direct.Channels, voice->Direct.Buffer, + parms->Gains.Current, parms->Gains.Target, Counter, OutPos, + DstBufferSize + ); + else + { + ALfloat *nfcsamples = Device->TempBuffer[NFC_DATA_BUF]; + ALsizei chanoffset = 0; + + MixSamples(samples, + voice->Direct.ChannelsPerOrder[0], voice->Direct.Buffer, + parms->Gains.Current, parms->Gains.Target, Counter, OutPos, + DstBufferSize + ); + chanoffset += voice->Direct.ChannelsPerOrder[0]; +#define APPLY_NFC_MIX(order) \ + if(voice->Direct.ChannelsPerOrder[order] > 0) \ + { \ + NfcFilterUpdate##order(&parms->NFCtrlFilter, nfcsamples, samples, \ + DstBufferSize); \ + MixSamples(nfcsamples, voice->Direct.ChannelsPerOrder[order], \ + voice->Direct.Buffer+chanoffset, parms->Gains.Current+chanoffset, \ + parms->Gains.Target+chanoffset, Counter, OutPos, DstBufferSize \ + ); \ + chanoffset += voice->Direct.ChannelsPerOrder[order]; \ + } + APPLY_NFC_MIX(1) + APPLY_NFC_MIX(2) + APPLY_NFC_MIX(3) +#undef APPLY_NFC_MIX + } + } + else + { + MixHrtfParams hrtfparams; + ALsizei fademix = 0; + int lidx, ridx; + + lidx = GetChannelIdxByName(&Device->RealOut, FrontLeft); + ridx = GetChannelIdxByName(&Device->RealOut, FrontRight); + assert(lidx != -1 && ridx != -1); + + if(!Counter) + { + /* No fading, just overwrite the old HRTF params. */ + parms->Hrtf.Old = parms->Hrtf.Target; + } + else if(!(parms->Hrtf.Old.Gain > GAIN_SILENCE_THRESHOLD)) + { + /* The old HRTF params are silent, so overwrite the old + * coefficients with the new, and reset the old gain to + * 0. The future mix will then fade from silence. + */ + parms->Hrtf.Old = parms->Hrtf.Target; + parms->Hrtf.Old.Gain = 0.0f; + } + else if(firstpass) + { + ALfloat gain; + + /* Fade between the coefficients over 128 samples. */ + fademix = mini(DstBufferSize, 128); + + /* The new coefficients need to fade in completely + * since they're replacing the old ones. To keep the + * gain fading consistent, interpolate between the old + * and new target gains given how much of the fade time + * this mix handles. + */ + gain = lerp(parms->Hrtf.Old.Gain, parms->Hrtf.Target.Gain, + minf(1.0f, (ALfloat)fademix/Counter)); + hrtfparams.Coeffs = parms->Hrtf.Target.Coeffs; + hrtfparams.Delay[0] = parms->Hrtf.Target.Delay[0]; + hrtfparams.Delay[1] = parms->Hrtf.Target.Delay[1]; + hrtfparams.Gain = 0.0f; + hrtfparams.GainStep = gain / (ALfloat)fademix; + + MixHrtfBlendSamples( + voice->Direct.Buffer[lidx], voice->Direct.Buffer[ridx], + samples, voice->Offset, OutPos, IrSize, &parms->Hrtf.Old, + &hrtfparams, &parms->Hrtf.State, fademix + ); + /* Update the old parameters with the result. */ + parms->Hrtf.Old = parms->Hrtf.Target; + if(fademix < Counter) + parms->Hrtf.Old.Gain = hrtfparams.Gain; + } + + if(fademix < DstBufferSize) + { + ALsizei todo = DstBufferSize - fademix; + ALfloat gain = parms->Hrtf.Target.Gain; + + /* Interpolate the target gain if the gain fading lasts + * longer than this mix. + */ + if(Counter > DstBufferSize) + gain = lerp(parms->Hrtf.Old.Gain, gain, + (ALfloat)todo/(Counter-fademix)); + + hrtfparams.Coeffs = parms->Hrtf.Target.Coeffs; + hrtfparams.Delay[0] = parms->Hrtf.Target.Delay[0]; + hrtfparams.Delay[1] = parms->Hrtf.Target.Delay[1]; + hrtfparams.Gain = parms->Hrtf.Old.Gain; + hrtfparams.GainStep = (gain - parms->Hrtf.Old.Gain) / (ALfloat)todo; + MixHrtfSamples( + voice->Direct.Buffer[lidx], voice->Direct.Buffer[ridx], + samples+fademix, voice->Offset+fademix, OutPos+fademix, IrSize, + &hrtfparams, &parms->Hrtf.State, todo + ); + /* Store the interpolated gain or the final target gain + * depending if the fade is done. + */ + if(DstBufferSize < Counter) + parms->Hrtf.Old.Gain = gain; + else + parms->Hrtf.Old.Gain = parms->Hrtf.Target.Gain; + } + } + } + + for(send = 0;send < Device->NumAuxSends;send++) + { + SendParams *parms = &voice->Send[send].Params[chan]; + const ALfloat *samples; + + if(!voice->Send[send].Buffer) + continue; + + samples = DoFilters( + &parms->LowPass, &parms->HighPass, Device->TempBuffer[FILTERED_BUF], + ResampledData, DstBufferSize, voice->Send[send].FilterType + ); + + if(!Counter) + memcpy(parms->Gains.Current, parms->Gains.Target, + sizeof(parms->Gains.Current)); + MixSamples(samples, voice->Send[send].Channels, voice->Send[send].Buffer, + parms->Gains.Current, parms->Gains.Target, Counter, OutPos, DstBufferSize + ); + } + } + /* Update positions */ + DataPosFrac += increment*DstBufferSize; + DataPosInt += DataPosFrac>>FRACTIONBITS; + DataPosFrac &= FRACTIONMASK; + + OutPos += DstBufferSize; + voice->Offset += DstBufferSize; + Counter = maxi(DstBufferSize, Counter) - DstBufferSize; + firstpass = false; + + if(isstatic) + { + if(BufferLoopItem) + { + /* Handle looping static source */ + const ALbuffer *Buffer = BufferListItem->buffers[0]; + ALsizei LoopStart = Buffer->LoopStart; + ALsizei LoopEnd = Buffer->LoopEnd; + if(DataPosInt >= LoopEnd) + { + assert(LoopEnd > LoopStart); + DataPosInt = ((DataPosInt-LoopStart)%(LoopEnd-LoopStart)) + LoopStart; + } + } + else + { + /* Handle non-looping static source */ + ALsizei CompLen = 0; + ALsizei i; + + for(i = 0;i < BufferListItem->num_buffers;i++) + { + const ALbuffer *buffer = BufferListItem->buffers[i]; + if(buffer) CompLen = maxi(CompLen, buffer->SampleLen); + } + + if(DataPosInt >= CompLen) + { + isplaying = false; + BufferListItem = NULL; + DataPosInt = 0; + DataPosFrac = 0; + break; + } + } + } + else while(1) + { + /* Handle streaming source */ + ALsizei CompLen = 0; + ALsizei i; + + for(i = 0;i < BufferListItem->num_buffers;i++) + { + const ALbuffer *buffer = BufferListItem->buffers[i]; + if(buffer) CompLen = maxi(CompLen, buffer->SampleLen); + } + + if(CompLen > DataPosInt) + break; + + buffers_done += BufferListItem->num_buffers; + BufferListItem = ATOMIC_LOAD(&BufferListItem->next, almemory_order_acquire); + if(!BufferListItem && !(BufferListItem=BufferLoopItem)) + { + isplaying = false; + DataPosInt = 0; + DataPosFrac = 0; + break; + } + + DataPosInt -= CompLen; + } + } while(isplaying && OutPos < SamplesToDo); + + voice->Flags |= VOICE_IS_FADING; + + /* Update source info */ + ATOMIC_STORE(&voice->position, DataPosInt, almemory_order_relaxed); + ATOMIC_STORE(&voice->position_fraction, DataPosFrac, almemory_order_relaxed); + ATOMIC_STORE(&voice->current_buffer, BufferListItem, almemory_order_release); + + /* Send any events now, after the position/buffer info was updated. */ + enabledevt = ATOMIC_LOAD(&Context->EnabledEvts, almemory_order_acquire); + if(buffers_done > 0 && (enabledevt&EventType_BufferCompleted)) + SendAsyncEvent(Context, EventType_BufferCompleted, + AL_EVENT_TYPE_BUFFER_COMPLETED_SOFT, SourceID, buffers_done, "Buffer completed" + ); + + return isplaying; +} -- cgit v1.2.3