diff options
author | Chris Robinson <[email protected]> | 2011-09-29 03:51:46 -0700 |
---|---|---|
committer | Chris Robinson <[email protected]> | 2011-09-29 03:51:46 -0700 |
commit | b6b3ca6e6ffab6aa943c760be1290954190ae66e (patch) | |
tree | c5281372ec06e7995aa412266205b62d596d1b49 | |
parent | 53572da7de8d2cf22ed996dec4b992ca378f371c (diff) |
Use inline assembly for fast float-to-int conversions
-rw-r--r-- | Alc/ALu.c | 6 | ||||
-rw-r--r-- | Alc/hrtf.c | 34 | ||||
-rw-r--r-- | OpenAL32/Include/alMain.h | 24 |
3 files changed, 44 insertions, 20 deletions
@@ -168,7 +168,7 @@ ALvoid CalcNonAttnSourceParams(ALsource *ALSource, const ALCcontext *ALContext) ALSource->Params.Step = maxstep<<FRACTIONBITS; else { - ALSource->Params.Step = (ALint)(Pitch*FRACTIONONE); + ALSource->Params.Step = fastf2i(Pitch*FRACTIONONE); if(ALSource->Params.Step == 0) ALSource->Params.Step = 1; } @@ -683,7 +683,7 @@ ALvoid CalcSourceParams(ALsource *ALSource, const ALCcontext *ALContext) ALSource->Params.Step = maxstep<<FRACTIONBITS; else { - ALSource->Params.Step = (ALint)(Pitch*FRACTIONONE); + ALSource->Params.Step = fastf2i(Pitch*FRACTIONONE); if(ALSource->Params.Step == 0) ALSource->Params.Step = 1; } @@ -814,7 +814,7 @@ static __inline ALshort aluF2S(ALfloat val) { if(val > 1.0f) return 32767; if(val < -1.0f) return -32768; - return (ALint)(val*32767.0f); + return fastf2i(val*32767.0f); } static __inline ALushort aluF2US(ALfloat val) { return aluF2S(val)+32768; } @@ -72,7 +72,7 @@ static ALuint NumLoadedHrtfs = 0; static void CalcEvIndices(ALfloat ev, ALuint *evidx, ALfloat *evmu) { ev = (F_PI_2 + ev) * (ELEV_COUNT-1) / F_PI; - evidx[0] = (ALuint)ev; + evidx[0] = fastf2u(ev); evidx[1] = minu(evidx[0] + 1, ELEV_COUNT-1); *evmu = ev - evidx[0]; } @@ -83,7 +83,7 @@ static void CalcEvIndices(ALfloat ev, ALuint *evidx, ALfloat *evmu) static void CalcAzIndices(ALuint evidx, ALfloat az, ALuint *azidx, ALfloat *azmu) { az = (F_PI*2.0f + az) * azCount[evidx] / (F_PI*2.0f); - azidx[0] = (ALuint)az % azCount[evidx]; + azidx[0] = fastf2u(az) % azCount[evidx]; azidx[1] = (azidx[0] + 1) % azCount[evidx]; *azmu = az - aluFloor(az); } @@ -182,12 +182,12 @@ void GetLerpedHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat azi } // Calculate the HRIR delays using linear interpolation. - delays[0] = (ALuint)(lerp(lerp(Hrtf->delays[lidx[0]], Hrtf->delays[lidx[1]], mu[0]), - lerp(Hrtf->delays[lidx[2]], Hrtf->delays[lidx[3]], mu[1]), - mu[2]) * 65536.0f); - delays[1] = (ALuint)(lerp(lerp(Hrtf->delays[ridx[0]], Hrtf->delays[ridx[1]], mu[0]), - lerp(Hrtf->delays[ridx[2]], Hrtf->delays[ridx[3]], mu[1]), - mu[2]) * 65536.0f); + delays[0] = fastf2u(lerp(lerp(Hrtf->delays[lidx[0]], Hrtf->delays[lidx[1]], mu[0]), + lerp(Hrtf->delays[lidx[2]], Hrtf->delays[lidx[3]], mu[1]), + mu[2]) * 65536.0f); + delays[1] = fastf2u(lerp(lerp(Hrtf->delays[ridx[0]], Hrtf->delays[ridx[1]], mu[0]), + lerp(Hrtf->delays[ridx[2]], Hrtf->delays[ridx[3]], mu[1]), + mu[2]) * 65536.0f); } // Calculates the moving HRIR target coefficients, target delays, and @@ -279,20 +279,20 @@ ALuint GetMovingHrtfCoeffs(const struct Hrtf *Hrtf, ALfloat elevation, ALfloat a left = (ALfloat)(delays[0] - (delayStep[0] * counter)); right = (ALfloat)(delays[1] - (delayStep[1] * counter)); - delays[0] = (ALuint)(lerp(lerp(Hrtf->delays[lidx[0]], Hrtf->delays[lidx[1]], mu[0]), - lerp(Hrtf->delays[lidx[2]], Hrtf->delays[lidx[3]], mu[1]), - mu[2]) * 65536.0f); - delays[1] = (ALuint)(lerp(lerp(Hrtf->delays[ridx[0]], Hrtf->delays[ridx[1]], mu[0]), - lerp(Hrtf->delays[ridx[2]], Hrtf->delays[ridx[3]], mu[1]), - mu[2]) * 65536.0f); + delays[0] = fastf2u(lerp(lerp(Hrtf->delays[lidx[0]], Hrtf->delays[lidx[1]], mu[0]), + lerp(Hrtf->delays[lidx[2]], Hrtf->delays[lidx[3]], mu[1]), + mu[2]) * 65536.0f); + delays[1] = fastf2u(lerp(lerp(Hrtf->delays[ridx[0]], Hrtf->delays[ridx[1]], mu[0]), + lerp(Hrtf->delays[ridx[2]], Hrtf->delays[ridx[3]], mu[1]), + mu[2]) * 65536.0f); - delayStep[0] = (ALint)(step * (delays[0] - left)); - delayStep[1] = (ALint)(step * (delays[1] - right)); + delayStep[0] = fastf2i(step * (delays[0] - left)); + delayStep[1] = fastf2i(step * (delays[1] - right)); // The stepping count is the number of samples necessary for the HRIR to // complete its transition. The mixer will only apply stepping for this // many samples. - return (ALuint)delta; + return fastf2u(delta); } const struct Hrtf *GetHrtf(ALCdevice *device) diff --git a/OpenAL32/Include/alMain.h b/OpenAL32/Include/alMain.h index 9615ec8b..88478a88 100644 --- a/OpenAL32/Include/alMain.h +++ b/OpenAL32/Include/alMain.h @@ -465,6 +465,30 @@ static __inline ALuint NextPowerOf2(ALuint value) return powerOf2; } +/* Fast float-to-int conversion. Assumes the FPU is already in round-to-zero + * mode. */ +static __inline ALint fastf2i(ALfloat f) +{ + ALint i; +#if defined(_MSC_VER) + __asm fld f + __asm fistp i +#elif defined(__GNUC__) + __asm__ __volatile__("flds %1\n\t" + "fistpl %0\n\t" + : "=m" (i) + : "m" (f)); +#else + i = (ALint)f; +#endif + return i; +} + +/* Fast float-to-uint conversion. Assumes the FPU is already in round-to-zero + * mode. */ +static __inline ALuint fastf2u(ALfloat f) +{ return fastf2i(f); } + enum DevProbe { DEVICE_PROBE, |