diff options
author | Chris Robinson <[email protected]> | 2012-09-11 06:32:42 -0700 |
---|---|---|
committer | Chris Robinson <[email protected]> | 2012-09-11 06:32:42 -0700 |
commit | 4c5cb2189b56808767e755d140b2246ee19ff39c (patch) | |
tree | 7ca5c2eac755641edfcc4d5943e038d9a6b2b113 /Alc | |
parent | 98ff6f990af1ad3159c5d9a2209b7b6de36d2130 (diff) |
Use a non-interleaved DryBuffer
Diffstat (limited to 'Alc')
-rw-r--r-- | Alc/ALu.c | 46 | ||||
-rw-r--r-- | Alc/alcDedicated.c | 13 | ||||
-rw-r--r-- | Alc/alcEcho.c | 6 | ||||
-rw-r--r-- | Alc/alcModulator.c | 6 | ||||
-rw-r--r-- | Alc/alcReverb.c | 8 | ||||
-rw-r--r-- | Alc/mixer_inc.c | 21 | ||||
-rw-r--r-- | Alc/mixer_sse.c | 41 |
7 files changed, 65 insertions, 76 deletions
@@ -797,7 +797,7 @@ static __inline ALubyte aluF2UB(ALfloat val) static void Write_##T##_##N(ALCdevice *device, T *RESTRICT buffer, \ ALuint SamplesToDo) \ { \ - ALfloat (*RESTRICT DryBuffer)[MaxChannels] = device->DryBuffer; \ + ALfloat (*RESTRICT DryBuffer)[BUFFERSIZE] = device->DryBuffer; \ const enum Channel *ChanMap = device->DevChannels; \ ALuint i, j; \ \ @@ -807,7 +807,7 @@ static void Write_##T##_##N(ALCdevice *device, T *RESTRICT buffer, \ enum Channel chan = ChanMap[j]; \ \ for(i = 0;i < SamplesToDo;i++) \ - out[i*N] = func(DryBuffer[i][chan]); \ + out[i*N] = func(DryBuffer[chan][i]); \ } \ } @@ -913,7 +913,8 @@ ALvoid aluMixData(ALCdevice *device, ALvoid *buffer, ALsizei size) while(size > 0) { SamplesToDo = minu(size, BUFFERSIZE); - memset(device->DryBuffer, 0, SamplesToDo*MaxChannels*sizeof(ALfloat)); + for(c = 0;c < MaxChannels;c++) + memset(device->DryBuffer[c], 0, SamplesToDo*sizeof(ALfloat)); ALCdevice_Lock(device); ctx = device->ContextList; @@ -1002,7 +1003,7 @@ ALvoid aluMixData(ALCdevice *device, ALvoid *buffer, ALsizei size) { for(i = 0;i < SamplesToDo;i++) { - device->DryBuffer[i][FrontCenter] += device->ClickRemoval[FrontCenter]; + device->DryBuffer[FrontCenter][i] += device->ClickRemoval[FrontCenter]; device->ClickRemoval[FrontCenter] -= device->ClickRemoval[FrontCenter] * (1.0f/256.0f); } device->ClickRemoval[FrontCenter] += device->PendingClicks[FrontCenter]; @@ -1011,38 +1012,41 @@ ALvoid aluMixData(ALCdevice *device, ALvoid *buffer, ALsizei size) else if(device->FmtChans == DevFmtStereo) { /* Assumes the first two channels are FrontLeft and FrontRight */ - for(i = 0;i < SamplesToDo;i++) + for(c = 0;c < 2;c++) { - for(c = 0;c < 2;c++) + ALfloat offset = device->ClickRemoval[c]; + for(i = 0;i < SamplesToDo;i++) { - device->DryBuffer[i][c] += device->ClickRemoval[c]; - device->ClickRemoval[c] -= device->ClickRemoval[c] * (1.0f/256.0f); + device->DryBuffer[c][i] += offset; + offset -= offset * (1.0f/256.0f); } - } - for(c = 0;c < 2;c++) - { - device->ClickRemoval[c] += device->PendingClicks[c]; + device->ClickRemoval[c] = offset + device->PendingClicks[c]; device->PendingClicks[c] = 0.0f; } if(device->Bs2b) { + float samples[2]; for(i = 0;i < SamplesToDo;i++) - bs2b_cross_feed(device->Bs2b, &device->DryBuffer[i][0]); + { + samples[0] = device->DryBuffer[FrontLeft][i]; + samples[1] = device->DryBuffer[FrontRight][i]; + bs2b_cross_feed(device->Bs2b, samples); + device->DryBuffer[FrontLeft][i] = samples[0]; + device->DryBuffer[FrontRight][i] = samples[1]; + } } } else { - for(i = 0;i < SamplesToDo;i++) + for(c = 0;c < MaxChannels;c++) { - for(c = 0;c < MaxChannels;c++) + ALfloat offset = device->ClickRemoval[c]; + for(i = 0;i < SamplesToDo;i++) { - device->DryBuffer[i][c] += device->ClickRemoval[c]; - device->ClickRemoval[c] -= device->ClickRemoval[c] * (1.0f/256.0f); + device->DryBuffer[c][i] += offset; + offset -= offset * (1.0f/256.0f); } - } - for(c = 0;c < MaxChannels;c++) - { - device->ClickRemoval[c] += device->PendingClicks[c]; + device->ClickRemoval[c] = offset + device->PendingClicks[c]; device->PendingClicks[c] = 0.0f; } } diff --git a/Alc/alcDedicated.c b/Alc/alcDedicated.c index 64c2910b..2fb3dbbe 100644 --- a/Alc/alcDedicated.c +++ b/Alc/alcDedicated.c @@ -66,19 +66,16 @@ static ALvoid DedicatedUpdate(ALeffectState *effect, ALCdevice *device, const AL state->gains[LFE] = Gain; } -static ALvoid DedicatedProcess(ALeffectState *effect, ALuint SamplesToDo, const ALfloat *SamplesIn, ALfloat (*SamplesOut)[MaxChannels]) +static ALvoid DedicatedProcess(ALeffectState *effect, ALuint SamplesToDo, const ALfloat *SamplesIn, ALfloat (*SamplesOut)[BUFFERSIZE]) { ALdedicatedState *state = (ALdedicatedState*)effect; const ALfloat *gains = state->gains; - ALuint i, s; + ALuint i, c; - for(i = 0;i < SamplesToDo;i++) + for(c = 0;c < MaxChannels;c++) { - ALfloat sample; - - sample = SamplesIn[i]; - for(s = 0;s < MaxChannels;s++) - SamplesOut[i][s] = sample * gains[s]; + for(i = 0;i < SamplesToDo;i++) + SamplesOut[c][i] = SamplesIn[i] * gains[c]; } } diff --git a/Alc/alcEcho.c b/Alc/alcEcho.c index e3c3df96..1a586488 100644 --- a/Alc/alcEcho.c +++ b/Alc/alcEcho.c @@ -126,7 +126,7 @@ static ALvoid EchoUpdate(ALeffectState *effect, ALCdevice *Device, const ALeffec ComputeAngleGains(Device, atan2f(+lrpan, 0.0f), (1.0f-dirGain)*F_PI, gain, state->Gain[1]); } -static ALvoid EchoProcess(ALeffectState *effect, ALuint SamplesToDo, const ALfloat *SamplesIn, ALfloat (*SamplesOut)[MaxChannels]) +static ALvoid EchoProcess(ALeffectState *effect, ALuint SamplesToDo, const ALfloat *SamplesIn, ALfloat (*SamplesOut)[BUFFERSIZE]) { ALechoState *state = (ALechoState*)effect; const ALuint mask = state->BufferLength-1; @@ -141,12 +141,12 @@ static ALvoid EchoProcess(ALeffectState *effect, ALuint SamplesToDo, const ALflo /* First tap */ smp = state->SampleBuffer[(offset-tap1) & mask]; for(k = 0;k < MaxChannels;k++) - SamplesOut[i][k] += smp * state->Gain[0][k]; + SamplesOut[k][i] += smp * state->Gain[0][k]; /* Second tap */ smp = state->SampleBuffer[(offset-tap2) & mask]; for(k = 0;k < MaxChannels;k++) - SamplesOut[i][k] += smp * state->Gain[1][k]; + SamplesOut[k][i] += smp * state->Gain[1][k]; // Apply damping and feedback gain to the second tap, and mix in the // new sample diff --git a/Alc/alcModulator.c b/Alc/alcModulator.c index ce91e95a..46bf1717 100644 --- a/Alc/alcModulator.c +++ b/Alc/alcModulator.c @@ -84,7 +84,7 @@ static __inline ALfloat hpFilter1P(FILTER *iir, ALuint offset, ALfloat input) #define DECL_TEMPLATE(func) \ static void Process##func(ALmodulatorState *state, ALuint SamplesToDo, \ - const ALfloat *SamplesIn, ALfloat (*SamplesOut)[MaxChannels]) \ + const ALfloat *SamplesIn, ALfloat (*SamplesOut)[BUFFERSIZE]) \ { \ const ALuint step = state->step; \ ALuint index = state->index; \ @@ -102,7 +102,7 @@ static void Process##func(ALmodulatorState *state, ALuint SamplesToDo, \ samp = hpFilter1P(&state->iirFilter, 0, samp); \ \ for(k = 0;k < MaxChannels;k++) \ - SamplesOut[i][k] += state->Gain[k] * samp; \ + SamplesOut[k][i] += state->Gain[k] * samp; \ } \ state->index = index; \ } @@ -160,7 +160,7 @@ static ALvoid ModulatorUpdate(ALeffectState *effect, ALCdevice *Device, const AL } } -static ALvoid ModulatorProcess(ALeffectState *effect, ALuint SamplesToDo, const ALfloat *SamplesIn, ALfloat (*SamplesOut)[MaxChannels]) +static ALvoid ModulatorProcess(ALeffectState *effect, ALuint SamplesToDo, const ALfloat *SamplesIn, ALfloat (*SamplesOut)[BUFFERSIZE]) { ALmodulatorState *state = (ALmodulatorState*)effect; diff --git a/Alc/alcReverb.c b/Alc/alcReverb.c index 13f90511..7fe28fa9 100644 --- a/Alc/alcReverb.c +++ b/Alc/alcReverb.c @@ -546,7 +546,7 @@ static __inline ALvoid EAXVerbPass(ALverbState *State, ALfloat in, ALfloat *earl // This processes the reverb state, given the input samples and an output // buffer. -static ALvoid VerbProcess(ALeffectState *effect, ALuint SamplesToDo, const ALfloat *SamplesIn, ALfloat (*SamplesOut)[MaxChannels]) +static ALvoid VerbProcess(ALeffectState *effect, ALuint SamplesToDo, const ALfloat *SamplesIn, ALfloat (*SamplesOut)[BUFFERSIZE]) { ALverbState *State = (ALverbState*)effect; ALuint index, c; @@ -566,13 +566,13 @@ static ALvoid VerbProcess(ALeffectState *effect, ALuint SamplesToDo, const ALflo // Output the results. for(c = 0;c < MaxChannels;c++) - SamplesOut[index][c] += panGain[c] * out[c&3]; + SamplesOut[c][index] += panGain[c] * out[c&3]; } } // This processes the EAX reverb state, given the input samples and an output // buffer. -static ALvoid EAXVerbProcess(ALeffectState *effect, ALuint SamplesToDo, const ALfloat *SamplesIn, ALfloat (*SamplesOut)[MaxChannels]) +static ALvoid EAXVerbProcess(ALeffectState *effect, ALuint SamplesToDo, const ALfloat *SamplesIn, ALfloat (*SamplesOut)[BUFFERSIZE]) { ALverbState *State = (ALverbState*)effect; ALuint index, c; @@ -584,7 +584,7 @@ static ALvoid EAXVerbProcess(ALeffectState *effect, ALuint SamplesToDo, const AL EAXVerbPass(State, SamplesIn[index], early, late); for(c = 0;c < MaxChannels;c++) - SamplesOut[index][c] += State->Early.PanGain[c]*early[c&3] + + SamplesOut[c][index] += State->Early.PanGain[c]*early[c&3] + State->Late.PanGain[c]*late[c&3]; } } diff --git a/Alc/mixer_inc.c b/Alc/mixer_inc.c index f60ade65..db0941f0 100644 --- a/Alc/mixer_inc.c +++ b/Alc/mixer_inc.c @@ -41,7 +41,7 @@ void MixDirect_Hrtf(ALsource *Source, ALCdevice *Device, DirectParams *params, { const ALint *RESTRICT DelayStep = params->Hrtf.DelayStep; const ALuint IrSize = GetHrtfIrSize(Device->Hrtf); - ALfloat (*RESTRICT DryBuffer)[MaxChannels]; + ALfloat (*RESTRICT DryBuffer)[BUFFERSIZE]; ALfloat *RESTRICT ClickRemoval, *RESTRICT PendingClicks; ALfloat (*RESTRICT CoeffStep)[2] = params->Hrtf.CoeffStep; ALfloat (*RESTRICT TargetCoeffs)[2] = params->Hrtf.Coeffs[srcchan]; @@ -103,8 +103,8 @@ void MixDirect_Hrtf(ALsource *Source, ALCdevice *Device, DirectParams *params, Offset++; ApplyCoeffsStep(Offset, Values, IrSize, Coeffs, CoeffStep, left, right); - DryBuffer[OutPos][FrontLeft] += Values[Offset&HRIR_MASK][0]; - DryBuffer[OutPos][FrontRight] += Values[Offset&HRIR_MASK][1]; + DryBuffer[FrontLeft][OutPos] += Values[Offset&HRIR_MASK][0]; + DryBuffer[FrontRight][OutPos] += Values[Offset&HRIR_MASK][1]; OutPos++; Counter--; @@ -123,8 +123,8 @@ void MixDirect_Hrtf(ALsource *Source, ALCdevice *Device, DirectParams *params, Offset++; ApplyCoeffs(Offset, Values, IrSize, Coeffs, left, right); - DryBuffer[OutPos][FrontLeft] += Values[Offset&HRIR_MASK][0]; - DryBuffer[OutPos][FrontRight] += Values[Offset&HRIR_MASK][1]; + DryBuffer[FrontLeft][OutPos] += Values[Offset&HRIR_MASK][0]; + DryBuffer[FrontRight][OutPos] += Values[Offset&HRIR_MASK][1]; OutPos++; } @@ -147,7 +147,7 @@ void MixDirect(ALsource *Source, ALCdevice *Device, DirectParams *params, const ALfloat *RESTRICT data, ALuint srcchan, ALuint OutPos, ALuint SamplesToDo, ALuint BufferSize) { - ALfloat (*RESTRICT DryBuffer)[MaxChannels]; + ALfloat (*RESTRICT DryBuffer)[BUFFERSIZE]; ALfloat *RESTRICT ClickRemoval, *RESTRICT PendingClicks; ALIGN(16) ALfloat DrySend[MaxChannels]; ALuint pos; @@ -167,13 +167,12 @@ void MixDirect(ALsource *Source, ALCdevice *Device, DirectParams *params, for(c = 0;c < MaxChannels;c++) ClickRemoval[c] -= data[pos]*DrySend[c]; } - for(pos = 0;pos < BufferSize;pos++) + for(c = 0;c < MaxChannels;c++) { - for(c = 0;c < MaxChannels;c++) - DryBuffer[OutPos][c] += data[pos]*DrySend[c]; - OutPos++; + for(pos = 0;pos < BufferSize;pos++) + DryBuffer[c][OutPos+pos] += data[pos]*DrySend[c]; } - if(OutPos == SamplesToDo) + if(OutPos+pos == SamplesToDo) { for(c = 0;c < MaxChannels;c++) PendingClicks[c] += data[pos]*DrySend[c]; diff --git a/Alc/mixer_sse.c b/Alc/mixer_sse.c index 434857c1..88d2c1ee 100644 --- a/Alc/mixer_sse.c +++ b/Alc/mixer_sse.c @@ -132,10 +132,9 @@ void MixDirect_SSE(ALsource *Source, ALCdevice *Device, DirectParams *params, const ALfloat *RESTRICT data, ALuint srcchan, ALuint OutPos, ALuint SamplesToDo, ALuint BufferSize) { - ALfloat (*RESTRICT DryBuffer)[MaxChannels]; + ALfloat (*RESTRICT DryBuffer)[BUFFERSIZE]; ALfloat *RESTRICT ClickRemoval, *RESTRICT PendingClicks; ALfloat DrySend[MaxChannels]; - ALIGN(16) ALfloat value[4]; ALuint pos; ALuint c; (void)Source; @@ -153,38 +152,28 @@ void MixDirect_SSE(ALsource *Source, ALCdevice *Device, DirectParams *params, for(c = 0;c < MaxChannels;c++) ClickRemoval[c] -= data[pos]*DrySend[c]; } - for(pos = 0;pos < BufferSize-3;pos += 4) + for(c = 0;c < MaxChannels;c++) { - const __m128 val4 = _mm_load_ps(&data[pos]); - for(c = 0;c < MaxChannels;c++) + const __m128 gain = _mm_set1_ps(DrySend[c]); + for(pos = 0;pos < BufferSize-3;pos += 4) { - const __m128 gain = _mm_set1_ps(DrySend[c]); - __m128 dry4; - - value[0] = DryBuffer[OutPos ][c]; - value[1] = DryBuffer[OutPos+1][c]; - value[2] = DryBuffer[OutPos+2][c]; - value[3] = DryBuffer[OutPos+3][c]; - dry4 = _mm_load_ps(value); - + const __m128 val4 = _mm_load_ps(&data[pos]); + __m128 dry4 = _mm_load_ps(&DryBuffer[c][OutPos+pos]); dry4 = _mm_add_ps(dry4, _mm_mul_ps(val4, gain)); - - _mm_store_ps(value, dry4); - DryBuffer[OutPos ][c] = value[0]; - DryBuffer[OutPos+1][c] = value[1]; - DryBuffer[OutPos+2][c] = value[2]; - DryBuffer[OutPos+3][c] = value[3]; + _mm_store_ps(&DryBuffer[c][OutPos+pos], dry4); } - - OutPos += 4; } - for(;pos < BufferSize;pos++) + if(pos < BufferSize) { + ALuint oldpos = pos; for(c = 0;c < MaxChannels;c++) - DryBuffer[OutPos][c] += data[pos]*DrySend[c]; - OutPos++; + { + pos = oldpos; + for(;pos < BufferSize;pos++) + DryBuffer[c][OutPos+pos] += data[pos]*DrySend[c]; + } } - if(OutPos == SamplesToDo) + if(OutPos+pos == SamplesToDo) { for(c = 0;c < MaxChannels;c++) PendingClicks[c] += data[pos]*DrySend[c]; |