Update the MHR format

This update removes the 16/24-bit sample type enum, now always being 24-bit (other than a very small size saving, there's no practical benefit to storing 16-bit samples). This also reverses the field storage, so no on-load fixup is needed, and stores the IR delays with 2 bits of sub-sample precision, allowing for slightly better timing (after resampling, blending, etc).
author: Chris Robinson <[email protected]> 2020-02-11 00:37:21 -0800
committer: Chris Robinson <[email protected]> 2020-02-11 00:37:21 -0800
commit: 1d91e282c80949d30edfdbca96f1a833719ad5c6 (patch)
tree: a42ec33ac00aa28d6177b65cdd46d8b4a7a6c255
parent: 3acc667c287fb9d9b2edb83192264d939b8867a5 (diff)
2 files changed, 202 insertions, 28 deletions
diff --git a/alc/hrtf.cpp b/alc/hrtf.cpp
index ff133f1a..6e5d079d 100644
--- a/alc/hrtf.cpp
+++ b/alc/hrtf.cpp
@@ -96,6 +96,7 @@ static_assert(MAX_HRIR_DELAY*HRIR_DELAY_FRACONE < 256, "MAX_HRIR_DELAY or DELAY_
 constexpr ALchar magicMarker00[8]{'M','i','n','P','H','R','0','0'};
 constexpr ALchar magicMarker01[8]{'M','i','n','P','H','R','0','1'};
 constexpr ALchar magicMarker02[8]{'M','i','n','P','H','R','0','2'};
+constexpr ALchar magicMarker03[8]{'M','i','n','P','H','R','0','3'};
 
 /* First value for pass-through coefficients (remaining are 0), used for omni-
  * directional sounds. */
@@ -1019,6 +1020,175 @@ std::unique_ptr<HrtfStore> LoadHrtf02(std::istream &data, const char *filename)
         {elevs.data(), elevs.size()}, coeffs.data(), delays.data(), filename);
 }
 
+std::unique_ptr<HrtfStore> LoadHrtf03(std::istream &data, const char *filename)
+{
+    constexpr ALubyte ChanType_LeftOnly{0};
+    constexpr ALubyte ChanType_LeftRight{1};
+
+    ALuint rate{GetLE_ALuint(data)};
+    ALubyte channelType{GetLE_ALubyte(data)};
+    ALushort irSize{GetLE_ALubyte(data)};
+    ALubyte fdCount{GetLE_ALubyte(data)};
+    if(!data || data.eof())
+    {
+        ERR("Failed reading %s\n", filename);
+        return nullptr;
+    }
+
+    if(channelType > ChanType_LeftRight)
+    {
+        ERR("Unsupported channel type: %d\n", channelType);
+        return nullptr;
+    }
+
+    if(irSize < MIN_IR_LENGTH || irSize > HRIR_LENGTH)
+    {
+        ERR("Unsupported HRIR size, irSize=%d (%d to %d)\n", irSize, MIN_IR_LENGTH, HRIR_LENGTH);
+        return nullptr;
+    }
+    if(fdCount < 1 || fdCount > MAX_FD_COUNT)
+    {
+        ERR("Unsupported number of field-depths: fdCount=%d (%d to %d)\n", fdCount, MIN_FD_COUNT,
+            MAX_FD_COUNT);
+        return nullptr;
+    }
+
+    auto fields = al::vector<HrtfStore::Field>(fdCount);
+    auto elevs = al::vector<HrtfStore::Elevation>{};
+    for(size_t f{0};f < fdCount;f++)
+    {
+        const ALushort distance{GetLE_ALushort(data)};
+        const ALubyte evCount{GetLE_ALubyte(data)};
+        if(!data || data.eof())
+        {
+            ERR("Failed reading %s\n", filename);
+            return nullptr;
+        }
+
+        if(distance < MIN_FD_DISTANCE || distance > MAX_FD_DISTANCE)
+        {
+            ERR("Unsupported field distance[%zu]=%d (%d to %d millimeters)\n", f, distance,
+                MIN_FD_DISTANCE, MAX_FD_DISTANCE);
+            return nullptr;
+        }
+        if(evCount < MIN_EV_COUNT || evCount > MAX_EV_COUNT)
+        {
+            ERR("Unsupported elevation count: evCount[%zu]=%d (%d to %d)\n", f, evCount,
+                MIN_EV_COUNT, MAX_EV_COUNT);
+            return nullptr;
+        }
+
+        fields[f].distance = distance / 1000.0f;
+        fields[f].evCount = evCount;
+        if(f > 0 && fields[f].distance > fields[f-1].distance)
+        {
+            ERR("Field distance[%zu] is not before previous (%f <= %f)\n", f, fields[f].distance,
+                fields[f-1].distance);
+            return nullptr;
+        }
+
+        const size_t ebase{elevs.size()};
+        elevs.resize(ebase + evCount);
+        for(auto &elev : al::span<HrtfStore::Elevation>(elevs.data()+ebase, evCount))
+            elev.azCount = GetLE_ALubyte(data);
+        if(!data || data.eof())
+        {
+            ERR("Failed reading %s\n", filename);
+            return nullptr;
+        }
+
+        for(size_t e{0};e < evCount;e++)
+        {
+            if(elevs[ebase+e].azCount < MIN_AZ_COUNT || elevs[ebase+e].azCount > MAX_AZ_COUNT)
+            {
+                ERR("Unsupported azimuth count: azCount[%zu][%zu]=%d (%d to %d)\n", f, e,
+                    elevs[ebase+e].azCount, MIN_AZ_COUNT, MAX_AZ_COUNT);
+                return nullptr;
+            }
+        }
+    }
+
+    elevs[0].irOffset = 0;
+    std::partial_sum(elevs.cbegin(), elevs.cend(), elevs.begin(),
+        [](const HrtfStore::Elevation &last, const HrtfStore::Elevation &cur)
+            -> HrtfStore::Elevation
+        {
+            return HrtfStore::Elevation{cur.azCount,
+                static_cast<ALushort>(last.azCount + last.irOffset)};
+        });
+    const auto irTotal = static_cast<ALushort>(elevs.back().azCount + elevs.back().irOffset);
+
+    auto coeffs = al::vector<HrirArray>(irTotal, HrirArray{});
+    auto delays = al::vector<ubyte2>(irTotal);
+    if(channelType == ChanType_LeftOnly)
+    {
+        for(auto &hrir : coeffs)
+        {
+            for(auto &val : al::span<float2>{hrir.data(), irSize})
+                val[0] = static_cast<float>(GetLE_ALint24(data)) / 8388608.0f;
+        }
+        for(auto &val : delays)
+            val[0] = GetLE_ALubyte(data);
+        if(!data || data.eof())
+        {
+            ERR("Failed reading %s\n", filename);
+            return nullptr;
+        }
+        for(size_t i{0};i < irTotal;++i)
+        {
+            if(delays[i][0] > MAX_HRIR_DELAY<<HRIR_DELAY_FRACBITS)
+            {
+                ERR("Invalid delays[%zu][0]: %f (%d)\n", i,
+                    delays[i][0] / float{HRIR_DELAY_FRACONE}, MAX_HRIR_DELAY);
+                return nullptr;
+            }
+        }
+
+        /* Mirror the left ear responses to the right ear. */
+        MirrorLeftHrirs({elevs.data(), elevs.size()}, coeffs.data(), delays.data());
+    }
+    else if(channelType == ChanType_LeftRight)
+    {
+        for(auto &hrir : coeffs)
+        {
+            for(auto &val : al::span<float2>{hrir.data(), irSize})
+            {
+                val[0] = static_cast<float>(GetLE_ALint24(data)) / 8388608.0f;
+                val[1] = static_cast<float>(GetLE_ALint24(data)) / 8388608.0f;
+            }
+        }
+        for(auto &val : delays)
+        {
+            val[0] = GetLE_ALubyte(data);
+            val[1] = GetLE_ALubyte(data);
+        }
+        if(!data || data.eof())
+        {
+            ERR("Failed reading %s\n", filename);
+            return nullptr;
+        }
+
+        for(size_t i{0};i < irTotal;++i)
+        {
+            if(delays[i][0] > MAX_HRIR_DELAY<<HRIR_DELAY_FRACBITS)
+            {
+                ERR("Invalid delays[%zu][0]: %f (%d)\n", i,
+                    delays[i][0] / float{HRIR_DELAY_FRACONE}, MAX_HRIR_DELAY);
+                return nullptr;
+            }
+            if(delays[i][1] > MAX_HRIR_DELAY<<HRIR_DELAY_FRACBITS)
+            {
+                ERR("Invalid delays[%zu][1]: %f (%d)\n", i,
+                    delays[i][1] / float{HRIR_DELAY_FRACONE}, MAX_HRIR_DELAY);
+                return nullptr;
+            }
+        }
+    }
+
+    return CreateHrtfStore(rate, irSize, {fields.data(), fields.size()},
+        {elevs.data(), elevs.size()}, coeffs.data(), delays.data(), filename);
+}
+
 
 bool checkName(const std::string &name)
 {
@@ -1237,10 +1407,15 @@ HrtfStore *GetLoadedHrtf(const std::string &name, const char *devname, const ALu
     }
 
     std::unique_ptr<HrtfStore> hrtf;
-    char magic[sizeof(magicMarker02)];
+    char magic[sizeof(magicMarker03)];
     stream->read(magic, sizeof(magic));
-    if(stream->gcount() < static_cast<std::streamsize>(sizeof(magicMarker02)))
+    if(stream->gcount() < static_cast<std::streamsize>(sizeof(magicMarker03)))
         ERR("%s data is too short (%zu bytes)\n", name.c_str(), stream->gcount());
+    else if(memcmp(magic, magicMarker03, sizeof(magicMarker03)) == 0)
+    {
+        TRACE("Detected data set format v3\n");
+        hrtf = LoadHrtf03(*stream, name.c_str());
+    }
     else if(memcmp(magic, magicMarker02, sizeof(magicMarker02)) == 0)
     {
         TRACE("Detected data set format v2\n");
diff --git a/docs/hrtf.txt b/docs/hrtf.txt
index ba8cd8fa..7a1a500f 100644
--- a/docs/hrtf.txt
+++ b/docs/hrtf.txt
@@ -13,25 +13,22 @@ including above and below the listener, instead of just to the front, back, and
 sides.
 
 The default data set is based on the KEMAR HRTF data provided by MIT, which can
-be found at <http://sound.media.mit.edu/resources/KEMAR.html>. It's only
-available when using 44100hz or 48000hz playback.
+be found at <http://sound.media.mit.edu/resources/KEMAR.html>.
 
 
 Custom HRTF Data Sets
 =====================
 
 OpenAL Soft also provides an option to use user-specified data sets, in
-addition to or in place of the default set. This allows users to provide their
-own data sets, which could be better suited for their heads, or to work with
-stereo speakers instead of headphones, or to support more playback sample
-rates, for example.
+addition to or in place of the default set. This allows users to provide data
+sets that could be better suited for their heads, or to work with stereo
+speakers instead of headphones, for example.
 
 The file format is specified below. It uses little-endian byte order.
 
 ==
-ALchar   magic[8] = "MinPHR02";
+ALchar   magic[8] = "MinPHR03";
 ALuint   sampleRate;
-ALubyte  sampleType;  /* Can be 0 (16-bit) or 1 (24-bit). */
 ALubyte  channelType; /* Can be 0 (mono) or 1 (stereo). */
 ALubyte  hrirSize;    /* Can be 8 to 128 in steps of 8. */
 ALubyte  fdCount;     /* Can be 1 to 16. */
@@ -42,28 +39,30 @@ struct {
     ALubyte azCount[evCount]; /* Each can be 1 to 128. */
 } fields[fdCount];
 
-/* NOTE: ALtype can be ALshort (16-bit) or ALbyte[3] (24-bit) depending on
- * sampleType,
+/* NOTE: ALbyte3 is a packed 24-bit sample type,
  * hrirCount is the sum of all azCounts.
  * channels can be 1 (mono) or 2 (stereo) depending on channelType.
  */
-ALtype coefficients[hrirCount][hrirSize][channels];
+ALbyte3 coefficients[hrirCount][hrirSize][channels];
 ALubyte delays[hrirCount][channels]; /* Each can be 0 to 63. */
 ==
 
-The data is described as thus:
+The data layout is as follows:
 
-The file first starts with the 8-byte marker, "MinPHR02", to identify it as an
+The file first starts with the 8-byte marker, "MinPHR03", to identify it as an
 HRTF data set. This is followed by an unsigned 32-bit integer, specifying the
-sample rate the data set is designed for (OpenAL Soft will not use it if the
-output device's playback rate doesn't match).
+sample rate the data set is designed for (OpenAL Soft will resample the HRIRs
+if the output device's playback rate doesn't match).
 
-Afterward, an unsigned 8-bit integer specifies how many sample points (or
-finite impulse response filter coefficients) make up each HRIR.
+Afterward, an unsigned 8-bit integer specifies the channel type, which can be 0
+(mono, single-channel) or 1 (stereo, dual-channel). After this is another 8-bit
+integer which specifies how many sample points (or finite impulse response
+filter coefficients) make up each HRIR.
 
 The following unsigned 8-bit integer specifies the number of fields used by the
-data set.  Then for each field an unsigned 16-bit short specifies the distance
-for that field (in millimeters), followed by an 8-bit integer for the number of
+data set, which must be in descending order (farthest first, closest last).
+Then for each field an unsigned 16-bit short specifies the distance for that
+field in millimeters, followed by an 8-bit integer for the number of
 elevations.  These elevations start at the bottom (-90 degrees), and increment
 upwards.  Following this is an array of unsigned 8-bit integers, one for each
 elevation which specifies the number of azimuths (and thus HRIRs) that make up
@@ -71,14 +70,14 @@ each elevation.  Azimuths start clockwise from the front, constructing a full
 circle.  Mono HRTFs use the same HRIRs for both ears by reversing the azimuth
 calculation (ie. left = angle, right = 360-angle).
 
-The actual coefficients follow. Each coefficient is a signed 16-bit or 24-bit
-sample.  Stereo HRTFs interleave left/right ear coefficients.  The HRIRs must
-be minimum-phase.  This allows the use of a smaller filter length, reducing
+The actual coefficients follow. Each coefficient is a signed 24-bit sample.
+Stereo HRTFs interleave left/right ear coefficients.  The HRIRs must be
+minimum-phase.  This allows the use of a smaller filter length, reducing
 computation.  For reference, the default data set uses a 32-point filter while
 even the smallest data set provided by MIT used a 128-sample filter (a 4x
 reduction by applying minimum-phase reconstruction).
 
-After the coefficients is an array of unsigned 8-bit delay values, one for
-each HRIR (with stereo HRTFs interleaving left/right ear delays). This is the
-propagation delay (in samples) a signal must wait before being convolved with
-the corresponding minimum-phase HRIR filter.
+After the coefficients is an array of unsigned 8-bit delay values as 6.2 fixed-
+point integers, one for each HRIR (with stereo HRTFs interleaving left/right
+ear delays). This is the propagation delay in samples a signal must wait before
+being convolved with the corresponding minimum-phase HRIR filter.
author	Chris Robinson <[email protected]>	2020-02-11 00:37:21 -0800
committer	Chris Robinson <[email protected]>	2020-02-11 00:37:21 -0800
commit	1d91e282c80949d30edfdbca96f1a833719ad5c6 (patch)
tree	a42ec33ac00aa28d6177b65cdd46d8b4a7a6c255
parent	3acc667c287fb9d9b2edb83192264d939b8867a5 (diff)