summaryrefslogtreecommitdiffstats
path: root/Alc
diff options
context:
space:
mode:
authorChris Robinson <[email protected]>2012-09-11 06:32:42 -0700
committerChris Robinson <[email protected]>2012-09-11 06:32:42 -0700
commit4c5cb2189b56808767e755d140b2246ee19ff39c (patch)
tree7ca5c2eac755641edfcc4d5943e038d9a6b2b113 /Alc
parent98ff6f990af1ad3159c5d9a2209b7b6de36d2130 (diff)
Use a non-interleaved DryBuffer
Diffstat (limited to 'Alc')
-rw-r--r--Alc/ALu.c46
-rw-r--r--Alc/alcDedicated.c13
-rw-r--r--Alc/alcEcho.c6
-rw-r--r--Alc/alcModulator.c6
-rw-r--r--Alc/alcReverb.c8
-rw-r--r--Alc/mixer_inc.c21
-rw-r--r--Alc/mixer_sse.c41
7 files changed, 65 insertions, 76 deletions
diff --git a/Alc/ALu.c b/Alc/ALu.c
index 22f347f5..b83e7df1 100644
--- a/Alc/ALu.c
+++ b/Alc/ALu.c
@@ -797,7 +797,7 @@ static __inline ALubyte aluF2UB(ALfloat val)
static void Write_##T##_##N(ALCdevice *device, T *RESTRICT buffer, \
ALuint SamplesToDo) \
{ \
- ALfloat (*RESTRICT DryBuffer)[MaxChannels] = device->DryBuffer; \
+ ALfloat (*RESTRICT DryBuffer)[BUFFERSIZE] = device->DryBuffer; \
const enum Channel *ChanMap = device->DevChannels; \
ALuint i, j; \
\
@@ -807,7 +807,7 @@ static void Write_##T##_##N(ALCdevice *device, T *RESTRICT buffer, \
enum Channel chan = ChanMap[j]; \
\
for(i = 0;i < SamplesToDo;i++) \
- out[i*N] = func(DryBuffer[i][chan]); \
+ out[i*N] = func(DryBuffer[chan][i]); \
} \
}
@@ -913,7 +913,8 @@ ALvoid aluMixData(ALCdevice *device, ALvoid *buffer, ALsizei size)
while(size > 0)
{
SamplesToDo = minu(size, BUFFERSIZE);
- memset(device->DryBuffer, 0, SamplesToDo*MaxChannels*sizeof(ALfloat));
+ for(c = 0;c < MaxChannels;c++)
+ memset(device->DryBuffer[c], 0, SamplesToDo*sizeof(ALfloat));
ALCdevice_Lock(device);
ctx = device->ContextList;
@@ -1002,7 +1003,7 @@ ALvoid aluMixData(ALCdevice *device, ALvoid *buffer, ALsizei size)
{
for(i = 0;i < SamplesToDo;i++)
{
- device->DryBuffer[i][FrontCenter] += device->ClickRemoval[FrontCenter];
+ device->DryBuffer[FrontCenter][i] += device->ClickRemoval[FrontCenter];
device->ClickRemoval[FrontCenter] -= device->ClickRemoval[FrontCenter] * (1.0f/256.0f);
}
device->ClickRemoval[FrontCenter] += device->PendingClicks[FrontCenter];
@@ -1011,38 +1012,41 @@ ALvoid aluMixData(ALCdevice *device, ALvoid *buffer, ALsizei size)
else if(device->FmtChans == DevFmtStereo)
{
/* Assumes the first two channels are FrontLeft and FrontRight */
- for(i = 0;i < SamplesToDo;i++)
+ for(c = 0;c < 2;c++)
{
- for(c = 0;c < 2;c++)
+ ALfloat offset = device->ClickRemoval[c];
+ for(i = 0;i < SamplesToDo;i++)
{
- device->DryBuffer[i][c] += device->ClickRemoval[c];
- device->ClickRemoval[c] -= device->ClickRemoval[c] * (1.0f/256.0f);
+ device->DryBuffer[c][i] += offset;
+ offset -= offset * (1.0f/256.0f);
}
- }
- for(c = 0;c < 2;c++)
- {
- device->ClickRemoval[c] += device->PendingClicks[c];
+ device->ClickRemoval[c] = offset + device->PendingClicks[c];
device->PendingClicks[c] = 0.0f;
}
if(device->Bs2b)
{
+ float samples[2];
for(i = 0;i < SamplesToDo;i++)
- bs2b_cross_feed(device->Bs2b, &device->DryBuffer[i][0]);
+ {
+ samples[0] = device->DryBuffer[FrontLeft][i];
+ samples[1] = device->DryBuffer[FrontRight][i];
+ bs2b_cross_feed(device->Bs2b, samples);
+ device->DryBuffer[FrontLeft][i] = samples[0];
+ device->DryBuffer[FrontRight][i] = samples[1];
+ }
}
}
else
{
- for(i = 0;i < SamplesToDo;i++)
+ for(c = 0;c < MaxChannels;c++)
{
- for(c = 0;c < MaxChannels;c++)
+ ALfloat offset = device->ClickRemoval[c];
+ for(i = 0;i < SamplesToDo;i++)
{
- device->DryBuffer[i][c] += device->ClickRemoval[c];
- device->ClickRemoval[c] -= device->ClickRemoval[c] * (1.0f/256.0f);
+ device->DryBuffer[c][i] += offset;
+ offset -= offset * (1.0f/256.0f);
}
- }
- for(c = 0;c < MaxChannels;c++)
- {
- device->ClickRemoval[c] += device->PendingClicks[c];
+ device->ClickRemoval[c] = offset + device->PendingClicks[c];
device->PendingClicks[c] = 0.0f;
}
}
diff --git a/Alc/alcDedicated.c b/Alc/alcDedicated.c
index 64c2910b..2fb3dbbe 100644
--- a/Alc/alcDedicated.c
+++ b/Alc/alcDedicated.c
@@ -66,19 +66,16 @@ static ALvoid DedicatedUpdate(ALeffectState *effect, ALCdevice *device, const AL
state->gains[LFE] = Gain;
}
-static ALvoid DedicatedProcess(ALeffectState *effect, ALuint SamplesToDo, const ALfloat *SamplesIn, ALfloat (*SamplesOut)[MaxChannels])
+static ALvoid DedicatedProcess(ALeffectState *effect, ALuint SamplesToDo, const ALfloat *SamplesIn, ALfloat (*SamplesOut)[BUFFERSIZE])
{
ALdedicatedState *state = (ALdedicatedState*)effect;
const ALfloat *gains = state->gains;
- ALuint i, s;
+ ALuint i, c;
- for(i = 0;i < SamplesToDo;i++)
+ for(c = 0;c < MaxChannels;c++)
{
- ALfloat sample;
-
- sample = SamplesIn[i];
- for(s = 0;s < MaxChannels;s++)
- SamplesOut[i][s] = sample * gains[s];
+ for(i = 0;i < SamplesToDo;i++)
+ SamplesOut[c][i] = SamplesIn[i] * gains[c];
}
}
diff --git a/Alc/alcEcho.c b/Alc/alcEcho.c
index e3c3df96..1a586488 100644
--- a/Alc/alcEcho.c
+++ b/Alc/alcEcho.c
@@ -126,7 +126,7 @@ static ALvoid EchoUpdate(ALeffectState *effect, ALCdevice *Device, const ALeffec
ComputeAngleGains(Device, atan2f(+lrpan, 0.0f), (1.0f-dirGain)*F_PI, gain, state->Gain[1]);
}
-static ALvoid EchoProcess(ALeffectState *effect, ALuint SamplesToDo, const ALfloat *SamplesIn, ALfloat (*SamplesOut)[MaxChannels])
+static ALvoid EchoProcess(ALeffectState *effect, ALuint SamplesToDo, const ALfloat *SamplesIn, ALfloat (*SamplesOut)[BUFFERSIZE])
{
ALechoState *state = (ALechoState*)effect;
const ALuint mask = state->BufferLength-1;
@@ -141,12 +141,12 @@ static ALvoid EchoProcess(ALeffectState *effect, ALuint SamplesToDo, const ALflo
/* First tap */
smp = state->SampleBuffer[(offset-tap1) & mask];
for(k = 0;k < MaxChannels;k++)
- SamplesOut[i][k] += smp * state->Gain[0][k];
+ SamplesOut[k][i] += smp * state->Gain[0][k];
/* Second tap */
smp = state->SampleBuffer[(offset-tap2) & mask];
for(k = 0;k < MaxChannels;k++)
- SamplesOut[i][k] += smp * state->Gain[1][k];
+ SamplesOut[k][i] += smp * state->Gain[1][k];
// Apply damping and feedback gain to the second tap, and mix in the
// new sample
diff --git a/Alc/alcModulator.c b/Alc/alcModulator.c
index ce91e95a..46bf1717 100644
--- a/Alc/alcModulator.c
+++ b/Alc/alcModulator.c
@@ -84,7 +84,7 @@ static __inline ALfloat hpFilter1P(FILTER *iir, ALuint offset, ALfloat input)
#define DECL_TEMPLATE(func) \
static void Process##func(ALmodulatorState *state, ALuint SamplesToDo, \
- const ALfloat *SamplesIn, ALfloat (*SamplesOut)[MaxChannels]) \
+ const ALfloat *SamplesIn, ALfloat (*SamplesOut)[BUFFERSIZE]) \
{ \
const ALuint step = state->step; \
ALuint index = state->index; \
@@ -102,7 +102,7 @@ static void Process##func(ALmodulatorState *state, ALuint SamplesToDo, \
samp = hpFilter1P(&state->iirFilter, 0, samp); \
\
for(k = 0;k < MaxChannels;k++) \
- SamplesOut[i][k] += state->Gain[k] * samp; \
+ SamplesOut[k][i] += state->Gain[k] * samp; \
} \
state->index = index; \
}
@@ -160,7 +160,7 @@ static ALvoid ModulatorUpdate(ALeffectState *effect, ALCdevice *Device, const AL
}
}
-static ALvoid ModulatorProcess(ALeffectState *effect, ALuint SamplesToDo, const ALfloat *SamplesIn, ALfloat (*SamplesOut)[MaxChannels])
+static ALvoid ModulatorProcess(ALeffectState *effect, ALuint SamplesToDo, const ALfloat *SamplesIn, ALfloat (*SamplesOut)[BUFFERSIZE])
{
ALmodulatorState *state = (ALmodulatorState*)effect;
diff --git a/Alc/alcReverb.c b/Alc/alcReverb.c
index 13f90511..7fe28fa9 100644
--- a/Alc/alcReverb.c
+++ b/Alc/alcReverb.c
@@ -546,7 +546,7 @@ static __inline ALvoid EAXVerbPass(ALverbState *State, ALfloat in, ALfloat *earl
// This processes the reverb state, given the input samples and an output
// buffer.
-static ALvoid VerbProcess(ALeffectState *effect, ALuint SamplesToDo, const ALfloat *SamplesIn, ALfloat (*SamplesOut)[MaxChannels])
+static ALvoid VerbProcess(ALeffectState *effect, ALuint SamplesToDo, const ALfloat *SamplesIn, ALfloat (*SamplesOut)[BUFFERSIZE])
{
ALverbState *State = (ALverbState*)effect;
ALuint index, c;
@@ -566,13 +566,13 @@ static ALvoid VerbProcess(ALeffectState *effect, ALuint SamplesToDo, const ALflo
// Output the results.
for(c = 0;c < MaxChannels;c++)
- SamplesOut[index][c] += panGain[c] * out[c&3];
+ SamplesOut[c][index] += panGain[c] * out[c&3];
}
}
// This processes the EAX reverb state, given the input samples and an output
// buffer.
-static ALvoid EAXVerbProcess(ALeffectState *effect, ALuint SamplesToDo, const ALfloat *SamplesIn, ALfloat (*SamplesOut)[MaxChannels])
+static ALvoid EAXVerbProcess(ALeffectState *effect, ALuint SamplesToDo, const ALfloat *SamplesIn, ALfloat (*SamplesOut)[BUFFERSIZE])
{
ALverbState *State = (ALverbState*)effect;
ALuint index, c;
@@ -584,7 +584,7 @@ static ALvoid EAXVerbProcess(ALeffectState *effect, ALuint SamplesToDo, const AL
EAXVerbPass(State, SamplesIn[index], early, late);
for(c = 0;c < MaxChannels;c++)
- SamplesOut[index][c] += State->Early.PanGain[c]*early[c&3] +
+ SamplesOut[c][index] += State->Early.PanGain[c]*early[c&3] +
State->Late.PanGain[c]*late[c&3];
}
}
diff --git a/Alc/mixer_inc.c b/Alc/mixer_inc.c
index f60ade65..db0941f0 100644
--- a/Alc/mixer_inc.c
+++ b/Alc/mixer_inc.c
@@ -41,7 +41,7 @@ void MixDirect_Hrtf(ALsource *Source, ALCdevice *Device, DirectParams *params,
{
const ALint *RESTRICT DelayStep = params->Hrtf.DelayStep;
const ALuint IrSize = GetHrtfIrSize(Device->Hrtf);
- ALfloat (*RESTRICT DryBuffer)[MaxChannels];
+ ALfloat (*RESTRICT DryBuffer)[BUFFERSIZE];
ALfloat *RESTRICT ClickRemoval, *RESTRICT PendingClicks;
ALfloat (*RESTRICT CoeffStep)[2] = params->Hrtf.CoeffStep;
ALfloat (*RESTRICT TargetCoeffs)[2] = params->Hrtf.Coeffs[srcchan];
@@ -103,8 +103,8 @@ void MixDirect_Hrtf(ALsource *Source, ALCdevice *Device, DirectParams *params,
Offset++;
ApplyCoeffsStep(Offset, Values, IrSize, Coeffs, CoeffStep, left, right);
- DryBuffer[OutPos][FrontLeft] += Values[Offset&HRIR_MASK][0];
- DryBuffer[OutPos][FrontRight] += Values[Offset&HRIR_MASK][1];
+ DryBuffer[FrontLeft][OutPos] += Values[Offset&HRIR_MASK][0];
+ DryBuffer[FrontRight][OutPos] += Values[Offset&HRIR_MASK][1];
OutPos++;
Counter--;
@@ -123,8 +123,8 @@ void MixDirect_Hrtf(ALsource *Source, ALCdevice *Device, DirectParams *params,
Offset++;
ApplyCoeffs(Offset, Values, IrSize, Coeffs, left, right);
- DryBuffer[OutPos][FrontLeft] += Values[Offset&HRIR_MASK][0];
- DryBuffer[OutPos][FrontRight] += Values[Offset&HRIR_MASK][1];
+ DryBuffer[FrontLeft][OutPos] += Values[Offset&HRIR_MASK][0];
+ DryBuffer[FrontRight][OutPos] += Values[Offset&HRIR_MASK][1];
OutPos++;
}
@@ -147,7 +147,7 @@ void MixDirect(ALsource *Source, ALCdevice *Device, DirectParams *params,
const ALfloat *RESTRICT data, ALuint srcchan,
ALuint OutPos, ALuint SamplesToDo, ALuint BufferSize)
{
- ALfloat (*RESTRICT DryBuffer)[MaxChannels];
+ ALfloat (*RESTRICT DryBuffer)[BUFFERSIZE];
ALfloat *RESTRICT ClickRemoval, *RESTRICT PendingClicks;
ALIGN(16) ALfloat DrySend[MaxChannels];
ALuint pos;
@@ -167,13 +167,12 @@ void MixDirect(ALsource *Source, ALCdevice *Device, DirectParams *params,
for(c = 0;c < MaxChannels;c++)
ClickRemoval[c] -= data[pos]*DrySend[c];
}
- for(pos = 0;pos < BufferSize;pos++)
+ for(c = 0;c < MaxChannels;c++)
{
- for(c = 0;c < MaxChannels;c++)
- DryBuffer[OutPos][c] += data[pos]*DrySend[c];
- OutPos++;
+ for(pos = 0;pos < BufferSize;pos++)
+ DryBuffer[c][OutPos+pos] += data[pos]*DrySend[c];
}
- if(OutPos == SamplesToDo)
+ if(OutPos+pos == SamplesToDo)
{
for(c = 0;c < MaxChannels;c++)
PendingClicks[c] += data[pos]*DrySend[c];
diff --git a/Alc/mixer_sse.c b/Alc/mixer_sse.c
index 434857c1..88d2c1ee 100644
--- a/Alc/mixer_sse.c
+++ b/Alc/mixer_sse.c
@@ -132,10 +132,9 @@ void MixDirect_SSE(ALsource *Source, ALCdevice *Device, DirectParams *params,
const ALfloat *RESTRICT data, ALuint srcchan,
ALuint OutPos, ALuint SamplesToDo, ALuint BufferSize)
{
- ALfloat (*RESTRICT DryBuffer)[MaxChannels];
+ ALfloat (*RESTRICT DryBuffer)[BUFFERSIZE];
ALfloat *RESTRICT ClickRemoval, *RESTRICT PendingClicks;
ALfloat DrySend[MaxChannels];
- ALIGN(16) ALfloat value[4];
ALuint pos;
ALuint c;
(void)Source;
@@ -153,38 +152,28 @@ void MixDirect_SSE(ALsource *Source, ALCdevice *Device, DirectParams *params,
for(c = 0;c < MaxChannels;c++)
ClickRemoval[c] -= data[pos]*DrySend[c];
}
- for(pos = 0;pos < BufferSize-3;pos += 4)
+ for(c = 0;c < MaxChannels;c++)
{
- const __m128 val4 = _mm_load_ps(&data[pos]);
- for(c = 0;c < MaxChannels;c++)
+ const __m128 gain = _mm_set1_ps(DrySend[c]);
+ for(pos = 0;pos < BufferSize-3;pos += 4)
{
- const __m128 gain = _mm_set1_ps(DrySend[c]);
- __m128 dry4;
-
- value[0] = DryBuffer[OutPos ][c];
- value[1] = DryBuffer[OutPos+1][c];
- value[2] = DryBuffer[OutPos+2][c];
- value[3] = DryBuffer[OutPos+3][c];
- dry4 = _mm_load_ps(value);
-
+ const __m128 val4 = _mm_load_ps(&data[pos]);
+ __m128 dry4 = _mm_load_ps(&DryBuffer[c][OutPos+pos]);
dry4 = _mm_add_ps(dry4, _mm_mul_ps(val4, gain));
-
- _mm_store_ps(value, dry4);
- DryBuffer[OutPos ][c] = value[0];
- DryBuffer[OutPos+1][c] = value[1];
- DryBuffer[OutPos+2][c] = value[2];
- DryBuffer[OutPos+3][c] = value[3];
+ _mm_store_ps(&DryBuffer[c][OutPos+pos], dry4);
}
-
- OutPos += 4;
}
- for(;pos < BufferSize;pos++)
+ if(pos < BufferSize)
{
+ ALuint oldpos = pos;
for(c = 0;c < MaxChannels;c++)
- DryBuffer[OutPos][c] += data[pos]*DrySend[c];
- OutPos++;
+ {
+ pos = oldpos;
+ for(;pos < BufferSize;pos++)
+ DryBuffer[c][OutPos+pos] += data[pos]*DrySend[c];
+ }
}
- if(OutPos == SamplesToDo)
+ if(OutPos+pos == SamplesToDo)
{
for(c = 0;c < MaxChannels;c++)
PendingClicks[c] += data[pos]*DrySend[c];