summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Robinson <[email protected]>2011-09-29 03:51:46 -0700
committerChris Robinson <[email protected]>2011-09-29 03:51:46 -0700
commitb6b3ca6e6ffab6aa943c760be1290954190ae66e (patch)
treec5281372ec06e7995aa412266205b62d596d1b49
parent53572da7de8d2cf22ed996dec4b992ca378f371c (diff)
Use inline assembly for fast float-to-int conversions
-rw-r--r--Alc/ALu.c6
-rw-r--r--Alc/hrtf.c34
-rw-r--r--OpenAL32/Include/alMain.h24
3 files changed, 44 insertions, 20 deletions
diff --git a/Alc/ALu.c b/Alc/ALu.c
index c02b0fdc..19683653 100644
--- a/Alc/ALu.c
+++ b/Alc/ALu.c
@@ -168,7 +168,7 @@ ALvoid CalcNonAttnSourceParams(ALsource *ALSource, const ALCcontext *ALContext)
ALSource->Params.Step = maxstep<<FRACTIONBITS;
else
{
- ALSource->Params.Step = (ALint)(Pitch*FRACTIONONE);
+ ALSource->Params.Step = fastf2i(Pitch*FRACTIONONE);
if(ALSource->Params.Step == 0)
ALSource->Params.Step = 1;
}
@@ -683,7 +683,7 @@ ALvoid CalcSourceParams(ALsource *ALSource, const ALCcontext *ALContext)
ALSource->Params.Step = maxstep<<FRACTIONBITS;
else
{
- ALSource->Params.Step = (ALint)(Pitch*FRACTIONONE);
+ ALSource->Params.Step = fastf2i(Pitch*FRACTIONONE);
if(ALSource->Params.Step == 0)
ALSource->Params.Step = 1;
}
@@ -814,7 +814,7 @@ static __inline ALshort aluF2S(ALfloat val)
{
if(val > 1.0f) return 32767;
if(val < -1.0f) return -32768;
- return (ALint)(val*32767.0f);
+ return fastf2i(val*32767.0f);
}
static __inline ALushort aluF2US(ALfloat val)
{ return aluF2S(val)+32768; }
diff --git a/Alc/hrtf.c b/Alc/hrtf.c
index d8d65097..f5355762 100644
--- a/Alc/hrtf.c
+++ b/Alc/hrtf.c
@@ -72,7 +72,7 @@ static ALuint NumLoadedHrtfs = 0;
static void CalcEvIndices(ALfloat ev, ALuint *evidx, ALfloat *evmu)
{
ev = (F_PI_2 + ev) * (ELEV_COUNT-1) / F_PI;
- evidx[0] = (ALuint)ev;
+ evidx[0] = fastf2u(ev);
evidx[1] = minu(evidx[0] + 1, ELEV_COUNT-1);
*evmu = ev - evidx[0];
}
@@ -83,7 +83,7 @@ static void CalcEvIndices(ALfloat ev, ALuint *evidx, ALfloat *evmu)
static void CalcAzIndices(ALuint evidx, ALfloat az, ALuint *azidx, ALfloat *azmu)
{
az = (F_PI*2.0f + az) * azCount[evidx] / (F_PI*2.0f);
- azidx[0] = (ALuint)az % azCount[evidx];
+ azidx[0] = fastf2u(az) % azCount[evidx];
azidx[1] = (azidx[0] + 1) % azCount[evidx];
*azmu = az - aluFloor(az);
}
@@ -182,12 +182,12 @@ void GetLerpedHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat azi
}
// Calculate the HRIR delays using linear interpolation.
- delays[0] = (ALuint)(lerp(lerp(Hrtf->delays[lidx[0]], Hrtf->delays[lidx[1]], mu[0]),
- lerp(Hrtf->delays[lidx[2]], Hrtf->delays[lidx[3]], mu[1]),
- mu[2]) * 65536.0f);
- delays[1] = (ALuint)(lerp(lerp(Hrtf->delays[ridx[0]], Hrtf->delays[ridx[1]], mu[0]),
- lerp(Hrtf->delays[ridx[2]], Hrtf->delays[ridx[3]], mu[1]),
- mu[2]) * 65536.0f);
+ delays[0] = fastf2u(lerp(lerp(Hrtf->delays[lidx[0]], Hrtf->delays[lidx[1]], mu[0]),
+ lerp(Hrtf->delays[lidx[2]], Hrtf->delays[lidx[3]], mu[1]),
+ mu[2]) * 65536.0f);
+ delays[1] = fastf2u(lerp(lerp(Hrtf->delays[ridx[0]], Hrtf->delays[ridx[1]], mu[0]),
+ lerp(Hrtf->delays[ridx[2]], Hrtf->delays[ridx[3]], mu[1]),
+ mu[2]) * 65536.0f);
}
// Calculates the moving HRIR target coefficients, target delays, and
@@ -279,20 +279,20 @@ ALuint GetMovingHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat a
left = (ALfloat)(delays[0] - (delayStep[0] * counter));
right = (ALfloat)(delays[1] - (delayStep[1] * counter));
- delays[0] = (ALuint)(lerp(lerp(Hrtf->delays[lidx[0]], Hrtf->delays[lidx[1]], mu[0]),
- lerp(Hrtf->delays[lidx[2]], Hrtf->delays[lidx[3]], mu[1]),
- mu[2]) * 65536.0f);
- delays[1] = (ALuint)(lerp(lerp(Hrtf->delays[ridx[0]], Hrtf->delays[ridx[1]], mu[0]),
- lerp(Hrtf->delays[ridx[2]], Hrtf->delays[ridx[3]], mu[1]),
- mu[2]) * 65536.0f);
+ delays[0] = fastf2u(lerp(lerp(Hrtf->delays[lidx[0]], Hrtf->delays[lidx[1]], mu[0]),
+ lerp(Hrtf->delays[lidx[2]], Hrtf->delays[lidx[3]], mu[1]),
+ mu[2]) * 65536.0f);
+ delays[1] = fastf2u(lerp(lerp(Hrtf->delays[ridx[0]], Hrtf->delays[ridx[1]], mu[0]),
+ lerp(Hrtf->delays[ridx[2]], Hrtf->delays[ridx[3]], mu[1]),
+ mu[2]) * 65536.0f);
- delayStep[0] = (ALint)(step * (delays[0] - left));
- delayStep[1] = (ALint)(step * (delays[1] - right));
+ delayStep[0] = fastf2i(step * (delays[0] - left));
+ delayStep[1] = fastf2i(step * (delays[1] - right));
// The stepping count is the number of samples necessary for the HRIR to
// complete its transition. The mixer will only apply stepping for this
// many samples.
- return (ALuint)delta;
+ return fastf2u(delta);
}
const struct Hrtf *GetHrtf(ALCdevice *device)
diff --git a/OpenAL32/Include/alMain.h b/OpenAL32/Include/alMain.h
index 9615ec8b..88478a88 100644
--- a/OpenAL32/Include/alMain.h
+++ b/OpenAL32/Include/alMain.h
@@ -465,6 +465,30 @@ static __inline ALuint NextPowerOf2(ALuint value)
return powerOf2;
}
+/* Fast float-to-int conversion. Assumes the FPU is already in round-to-zero
+ * mode. */
+static __inline ALint fastf2i(ALfloat f)
+{
+ ALint i;
+#if defined(_MSC_VER)
+ __asm fld f
+ __asm fistp i
+#elif defined(__GNUC__)
+ __asm__ __volatile__("flds %1\n\t"
+ "fistpl %0\n\t"
+ : "=m" (i)
+ : "m" (f));
+#else
+ i = (ALint)f;
+#endif
+ return i;
+}
+
+/* Fast float-to-uint conversion. Assumes the FPU is already in round-to-zero
+ * mode. */
+static __inline ALuint fastf2u(ALfloat f)
+{ return fastf2i(f); }
+
enum DevProbe {
DEVICE_PROBE,