diff options
-rw-r--r-- | Alc/ALu.c | 13 | ||||
-rw-r--r-- | Alc/bformatdec.c | 35 | ||||
-rw-r--r-- | Alc/bformatdec.h | 6 | ||||
-rw-r--r-- | Alc/mixer.c | 13 | ||||
-rw-r--r-- | Alc/mixer_c.c | 4 | ||||
-rw-r--r-- | Alc/mixer_defs.h | 15 | ||||
-rw-r--r-- | Alc/mixer_neon.c | 6 | ||||
-rw-r--r-- | Alc/mixer_sse.c | 6 | ||||
-rw-r--r-- | OpenAL32/Include/alMain.h | 42 | ||||
-rw-r--r-- | OpenAL32/Include/alu.h | 5 |
10 files changed, 98 insertions, 47 deletions
@@ -1489,10 +1489,9 @@ ALvoid aluMixData(ALCdevice *device, ALvoid *buffer, ALsizei size) slot = slotroot; while(slot) { - const ALeffectslot *cslot = slot; - ALeffectState *state = cslot->Params.EffectState; - V(state,process)(SamplesToDo, cslot->WetBuffer, state->OutBuffer, - state->OutChannels); + ALeffectState *state = slot->Params.EffectState; + V(state,process)(SamplesToDo, SAFE_CONST(ALfloatBUFFERSIZE*,slot->WetBuffer), + state->OutBuffer, state->OutChannels); slot = ATOMIC_LOAD(&slot->next, almemory_order_relaxed); } @@ -1540,19 +1539,19 @@ ALvoid aluMixData(ALCdevice *device, ALvoid *buffer, ALsizei size) { if(device->Dry.Buffer != device->FOAOut.Buffer) bformatdec_upSample(device->AmbiDecoder, - device->Dry.Buffer, device->FOAOut.Buffer, + device->Dry.Buffer, SAFE_CONST(ALfloatBUFFERSIZE*,device->FOAOut.Buffer), device->FOAOut.NumChannels, SamplesToDo ); bformatdec_process(device->AmbiDecoder, device->RealOut.Buffer, device->RealOut.NumChannels, - device->Dry.Buffer, SamplesToDo + SAFE_CONST(ALfloatBUFFERSIZE*,device->Dry.Buffer), SamplesToDo ); } else if(device->AmbiUp) { ambiup_process(device->AmbiUp, device->RealOut.Buffer, device->RealOut.NumChannels, - device->FOAOut.Buffer, SamplesToDo + SAFE_CONST(ALfloatBUFFERSIZE*,device->FOAOut.Buffer), SamplesToDo ); } else if(device->Uhj_Encoder) diff --git a/Alc/bformatdec.c b/Alc/bformatdec.c index 0722c061..2aab4ed8 100644 --- a/Alc/bformatdec.c +++ b/Alc/bformatdec.c @@ -161,19 +161,6 @@ static const ALfloat Ambi3DDecoder[8][FB_Max][MAX_AMBI_COEFFS] = { static ALfloat Ambi3DEncoder[8][MAX_AMBI_COEFFS]; -static inline RowMixerFunc SelectRowMixer(void) -{ -#ifdef HAVE_SSE - if((CPUCapFlags&CPU_CAP_SSE)) - return MixRow_SSE; -#endif -#ifdef HAVE_NEON - if((CPUCapFlags&CPU_CAP_NEON)) - return MixRow_Neon; -#endif - return MixRow_C; -} - static RowMixerFunc MixMatrixRow = MixRow_C; @@ -495,7 +482,7 @@ void bformatdec_reset(BFormatDec *dec, const AmbDecConf *conf, ALuint chancount, } -void bformatdec_process(struct BFormatDec *dec, ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALuint OutChannels, ALfloat (*restrict InSamples)[BUFFERSIZE], ALuint SamplesToDo) +void bformatdec_process(struct BFormatDec *dec, ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALuint OutChannels, const ALfloat (*restrict InSamples)[BUFFERSIZE], ALuint SamplesToDo) { ALuint chan, i; @@ -512,10 +499,12 @@ void bformatdec_process(struct BFormatDec *dec, ALfloat (*restrict OutBuffer)[BU memset(dec->ChannelMix, 0, SamplesToDo*sizeof(ALfloat)); MixMatrixRow(dec->ChannelMix, dec->Matrix.Dual[chan][FB_HighFreq], - dec->SamplesHF, dec->NumChannels, SamplesToDo + SAFE_CONST(ALfloatBUFFERSIZE*,dec->SamplesHF), dec->NumChannels, 0, + SamplesToDo ); MixMatrixRow(dec->ChannelMix, dec->Matrix.Dual[chan][FB_LowFreq], - dec->SamplesLF, dec->NumChannels, SamplesToDo + SAFE_CONST(ALfloatBUFFERSIZE*,dec->SamplesLF), dec->NumChannels, 0, + SamplesToDo ); if(dec->Delay[chan].Length > 0) @@ -553,7 +542,7 @@ void bformatdec_process(struct BFormatDec *dec, ALfloat (*restrict OutBuffer)[BU memset(dec->ChannelMix, 0, SamplesToDo*sizeof(ALfloat)); MixMatrixRow(dec->ChannelMix, dec->Matrix.Single[chan], InSamples, - dec->NumChannels, SamplesToDo); + dec->NumChannels, 0, SamplesToDo); if(dec->Delay[chan].Length > 0) { @@ -584,7 +573,7 @@ void bformatdec_process(struct BFormatDec *dec, ALfloat (*restrict OutBuffer)[BU } -void bformatdec_upSample(struct BFormatDec *dec, ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALfloat (*restrict InSamples)[BUFFERSIZE], ALuint InChannels, ALuint SamplesToDo) +void bformatdec_upSample(struct BFormatDec *dec, ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat (*restrict InSamples)[BUFFERSIZE], ALuint InChannels, ALuint SamplesToDo) { ALuint i, j; @@ -608,7 +597,9 @@ void bformatdec_upSample(struct BFormatDec *dec, ALfloat (*restrict OutBuffer)[B /* Now write each band to the output. */ for(j = 0;j < dec->NumChannels;j++) MixMatrixRow(OutBuffer[j], dec->UpSampler.Gains[i][j], - dec->Samples, FB_Max, SamplesToDo); + SAFE_CONST(ALfloatBUFFERSIZE*,dec->Samples), FB_Max, 0, + SamplesToDo + ); } } @@ -659,7 +650,7 @@ void ambiup_reset(struct AmbiUpsampler *ambiup, const ALCdevice *device) } } -void ambiup_process(struct AmbiUpsampler *ambiup, ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALuint OutChannels, ALfloat (*restrict InSamples)[BUFFERSIZE], ALuint SamplesToDo) +void ambiup_process(struct AmbiUpsampler *ambiup, ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALuint OutChannels, const ALfloat (*restrict InSamples)[BUFFERSIZE], ALuint SamplesToDo) { ALuint i, j; @@ -672,6 +663,8 @@ void ambiup_process(struct AmbiUpsampler *ambiup, ALfloat (*restrict OutBuffer)[ for(j = 0;j < OutChannels;j++) MixMatrixRow(OutBuffer[j], ambiup->Gains[i][j], - ambiup->Samples, FB_Max, SamplesToDo); + SAFE_CONST(ALfloatBUFFERSIZE*,ambiup->Samples), FB_Max, 0, + SamplesToDo + ); } } diff --git a/Alc/bformatdec.h b/Alc/bformatdec.h index 433603dd..e78d89a7 100644 --- a/Alc/bformatdec.h +++ b/Alc/bformatdec.h @@ -17,10 +17,10 @@ int bformatdec_getOrder(const struct BFormatDec *dec); void bformatdec_reset(struct BFormatDec *dec, const struct AmbDecConf *conf, ALuint chancount, ALuint srate, const ALuint chanmap[MAX_OUTPUT_CHANNELS], int flags); /* Decodes the ambisonic input to the given output channels. */ -void bformatdec_process(struct BFormatDec *dec, ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALuint OutChannels, ALfloat (*restrict InSamples)[BUFFERSIZE], ALuint SamplesToDo); +void bformatdec_process(struct BFormatDec *dec, ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALuint OutChannels, const ALfloat (*restrict InSamples)[BUFFERSIZE], ALuint SamplesToDo); /* Up-samples a first-order input to the decoder's configuration. */ -void bformatdec_upSample(struct BFormatDec *dec, ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALfloat (*restrict InSamples)[BUFFERSIZE], ALuint InChannels, ALuint SamplesToDo); +void bformatdec_upSample(struct BFormatDec *dec, ALfloat (*restrict OutBuffer)[BUFFERSIZE], const ALfloat (*restrict InSamples)[BUFFERSIZE], ALuint InChannels, ALuint SamplesToDo); /* Stand-alone first-order upsampler. Kept here because it shares some stuff @@ -30,7 +30,7 @@ struct AmbiUpsampler *ambiup_alloc(); void ambiup_free(struct AmbiUpsampler *ambiup); void ambiup_reset(struct AmbiUpsampler *ambiup, const ALCdevice *device); -void ambiup_process(struct AmbiUpsampler *ambiup, ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALuint OutChannels, ALfloat (*restrict InSamples)[BUFFERSIZE], ALuint SamplesToDo); +void ambiup_process(struct AmbiUpsampler *ambiup, ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALuint OutChannels, const ALfloat (*restrict InSamples)[BUFFERSIZE], ALuint SamplesToDo); /* Band splitter. Splits a signal into two phase-matching frequency bands. */ diff --git a/Alc/mixer.c b/Alc/mixer.c index 2736920e..864e2395 100644 --- a/Alc/mixer.c +++ b/Alc/mixer.c @@ -79,6 +79,19 @@ MixerFunc SelectMixer(void) return Mix_C; } +RowMixerFunc SelectRowMixer(void) +{ +#ifdef HAVE_SSE + if((CPUCapFlags&CPU_CAP_SSE)) + return MixRow_SSE; +#endif +#ifdef HAVE_NEON + if((CPUCapFlags&CPU_CAP_NEON)) + return MixRow_Neon; +#endif + return MixRow_C; +} + static inline HrtfMixerFunc SelectHrtfMixer(void) { #ifdef HAVE_SSE diff --git a/Alc/mixer_c.c b/Alc/mixer_c.c index c74729a8..3e726df5 100644 --- a/Alc/mixer_c.c +++ b/Alc/mixer_c.c @@ -208,7 +208,7 @@ void Mix_C(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)[B * transform. And as the matrices are more or less static once set up, no * stepping is necessary. */ -void MixRow_C(ALfloat *OutBuffer, const ALfloat *Gains, ALfloat (*restrict data)[BUFFERSIZE], ALuint InChans, ALuint BufferSize) +void MixRow_C(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*restrict data)[BUFFERSIZE], ALuint InChans, ALuint InPos, ALuint BufferSize) { ALuint c, i; @@ -219,6 +219,6 @@ void MixRow_C(ALfloat *OutBuffer, const ALfloat *Gains, ALfloat (*restrict data) continue; for(i = 0;i < BufferSize;i++) - OutBuffer[i] += data[c][i] * gain; + OutBuffer[i] += data[c][InPos+i] * gain; } } diff --git a/Alc/mixer_defs.h b/Alc/mixer_defs.h index 1572ac36..05e6f964 100644 --- a/Alc/mixer_defs.h +++ b/Alc/mixer_defs.h @@ -31,8 +31,9 @@ void MixDirectHrtf_C(ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALuint lidx, ALu ALuint BufferSize); void Mix_C(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE], struct MixGains *Gains, ALuint Counter, ALuint OutPos, ALuint BufferSize); -void MixRow_C(ALfloat *OutBuffer, const ALfloat *Gains, ALfloat (*restrict data)[BUFFERSIZE], - ALuint InChans, ALuint BufferSize); +void MixRow_C(ALfloat *OutBuffer, const ALfloat *Gains, + const ALfloat (*restrict data)[BUFFERSIZE], ALuint InChans, + ALuint InPos, ALuint BufferSize); /* SSE mixers */ void MixHrtf_SSE(ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALuint lidx, ALuint ridx, @@ -45,8 +46,9 @@ void MixDirectHrtf_SSE(ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALuint lidx, A ALuint BufferSize); void Mix_SSE(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE], struct MixGains *Gains, ALuint Counter, ALuint OutPos, ALuint BufferSize); -void MixRow_SSE(ALfloat *OutBuffer, const ALfloat *Gains, ALfloat (*restrict data)[BUFFERSIZE], - ALuint InChans, ALuint BufferSize); +void MixRow_SSE(ALfloat *OutBuffer, const ALfloat *Gains, + const ALfloat (*restrict data)[BUFFERSIZE], ALuint InChans, + ALuint InPos, ALuint BufferSize); /* SSE resamplers */ inline void InitiatePositionArrays(ALuint frac, ALuint increment, ALuint *frac_arr, ALuint *pos_arr, ALuint size) @@ -92,7 +94,8 @@ void MixDirectHrtf_Neon(ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALuint lidx, ALuint BufferSize); void Mix_Neon(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE], struct MixGains *Gains, ALuint Counter, ALuint OutPos, ALuint BufferSize); -void MixRow_Neon(ALfloat *OutBuffer, const ALfloat *Gains, ALfloat (*restrict data)[BUFFERSIZE], - ALuint InChans, ALuint BufferSize); +void MixRow_Neon(ALfloat *OutBuffer, const ALfloat *Gains, + const ALfloat (*restrict data)[BUFFERSIZE], ALuint InChans, + ALuint InPos, ALuint BufferSize); #endif /* MIXER_DEFS_H */ diff --git a/Alc/mixer_neon.c b/Alc/mixer_neon.c index 73395aeb..8c96aef1 100644 --- a/Alc/mixer_neon.c +++ b/Alc/mixer_neon.c @@ -144,7 +144,7 @@ void Mix_Neon(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer } } -void MixRow_Neon(ALfloat *OutBuffer, const ALfloat *Gains, ALfloat (*restrict data)[BUFFERSIZE], ALuint InChans, ALuint BufferSize) +void MixRow_Neon(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*restrict data)[BUFFERSIZE], ALuint InChans, ALuint InPos, ALuint BufferSize) { float32x4_t gain4; ALuint c; @@ -159,12 +159,12 @@ void MixRow_Neon(ALfloat *OutBuffer, const ALfloat *Gains, ALfloat (*restrict da gain4 = vdupq_n_f32(gain); for(;BufferSize-pos > 3;pos += 4) { - const float32x4_t val4 = vld1q_f32(&data[c][pos]); + const float32x4_t val4 = vld1q_f32(&data[c][InPos+pos]); float32x4_t dry4 = vld1q_f32(&OutBuffer[pos]); dry4 = vmlaq_f32(dry4, val4, gain4); vst1q_f32(&OutBuffer[pos], dry4); } for(;pos < BufferSize;pos++) - OutBuffer[pos] += data[c][pos]*gain; + OutBuffer[pos] += data[c][InPos+pos]*gain; } } diff --git a/Alc/mixer_sse.c b/Alc/mixer_sse.c index 1ac78f9c..24e0e545 100644 --- a/Alc/mixer_sse.c +++ b/Alc/mixer_sse.c @@ -262,7 +262,7 @@ void Mix_SSE(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer) } } -void MixRow_SSE(ALfloat *OutBuffer, const ALfloat *Gains, ALfloat (*restrict data)[BUFFERSIZE], ALuint InChans, ALuint BufferSize) +void MixRow_SSE(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*restrict data)[BUFFERSIZE], ALuint InChans, ALuint InPos, ALuint BufferSize) { __m128 gain4; ALuint c; @@ -277,12 +277,12 @@ void MixRow_SSE(ALfloat *OutBuffer, const ALfloat *Gains, ALfloat (*restrict dat gain4 = _mm_set1_ps(gain); for(;BufferSize-pos > 3;pos += 4) { - const __m128 val4 = _mm_load_ps(&data[c][pos]); + const __m128 val4 = _mm_load_ps(&data[c][InPos+pos]); __m128 dry4 = _mm_load_ps(&OutBuffer[pos]); dry4 = _mm_add_ps(dry4, _mm_mul_ps(val4, gain4)); _mm_store_ps(&OutBuffer[pos], dry4); } for(;pos < BufferSize;pos++) - OutBuffer[pos] += data[c][pos]*gain; + OutBuffer[pos] += data[c][InPos+pos]*gain; } } diff --git a/OpenAL32/Include/alMain.h b/OpenAL32/Include/alMain.h index af599227..837e1082 100644 --- a/OpenAL32/Include/alMain.h +++ b/OpenAL32/Include/alMain.h @@ -128,6 +128,48 @@ AL_API ALboolean AL_APIENTRY alIsBufferFormatSupportedSOFT(ALenum format); #endif +#ifdef __GNUC__ +/* Because of a long-standing deficiency in C, you're not allowed to implicitly + * cast a pointer-to-type-array to a pointer-to-const-type-array. For example, + * + * int (*ptr)[10]; + * const int (*cptr)[10] = ptr; + * + * is not allowed and most compilers will generate noisy warnings about + * incompatible types, even though it just makes the array elements const. + * Clang will allow it if you make the array type a typedef, like this: + * + * typedef int int10[10]; + * int10 *ptr; + * const int10 *cptr = ptr; + * + * however GCC does not and still issues the incompatible type warning. The + * "proper" way to fix it is to add an explicit cast for the constified type, + * but that removes the vast majority of otherwise useful type-checking you'd + * get, and runs the risk of improper casts if types are later changed. Leaving + * it non-const can also be an issue if you use it as a function parameter, and + * happen to have a const type as input (and also reduce the capabilities of + * the compiler to better optimize the function). + * + * So to work around the problem, we use a macro. The macro first assigns the + * incoming variable to the specified non-const type to ensure it's the correct + * type, then casts the variable as the desired constified type. Very ugly, but + * I'd rather not have hundreds of lines of warnings because I want to tell the + * compiler that some array(s) can't be changed by the code, or have lots of + * error-prone casts. + */ +#define SAFE_CONST(T, var) __extension__({ \ + T _tmp = (var); \ + (const T)_tmp; \ +}) +#else +/* Non-GNU-compatible compilers have to use a straight cast with no extra + * checks, due to the lack of multi-statement expressions. + */ +#define SAFE_CONST(T, var) ((const T)(var)) +#endif + + typedef ALint64SOFT ALint64; typedef ALuint64SOFT ALuint64; diff --git a/OpenAL32/Include/alu.h b/OpenAL32/Include/alu.h index 29cb00fb..3ced9628 100644 --- a/OpenAL32/Include/alu.h +++ b/OpenAL32/Include/alu.h @@ -162,8 +162,8 @@ typedef void (*MixerFunc)(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE], struct MixGains *Gains, ALuint Counter, ALuint OutPos, ALuint BufferSize); typedef void (*RowMixerFunc)(ALfloat *OutBuffer, const ALfloat *gains, - ALfloat (*restrict data)[BUFFERSIZE], ALuint InChans, - ALuint BufferSize); + const ALfloat (*restrict data)[BUFFERSIZE], ALuint InChans, + ALuint InPos, ALuint BufferSize); typedef void (*HrtfMixerFunc)(ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALuint lidx, ALuint ridx, const ALfloat *data, ALuint Counter, ALuint Offset, ALuint OutPos, const ALuint IrSize, const MixHrtfParams *hrtfparams, @@ -265,6 +265,7 @@ enum HrtfRequestMode { void aluInitMixer(void); MixerFunc SelectMixer(void); +RowMixerFunc SelectRowMixer(void); /* aluInitRenderer * |