aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBernhard Haumacher <[email protected]>2020-05-10 14:09:51 +0200
committerSven Göthel <[email protected]>2024-02-03 02:00:45 +0100
commit24a4f764c1f7b07c81a31991bf65808c2406e5bd (patch)
treed5cfe67cd09aaa16aa8feed04edf08a8b4f040e8
parent468a75d6b84b0cdcbd860f8a52492725be8771d9 (diff)
Improved documentation of Cmap table.
-rw-r--r--src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat.java20
-rw-r--r--src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat0.java18
-rw-r--r--src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat12.java15
-rw-r--r--src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat2.java78
-rw-r--r--src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat4.java71
-rw-r--r--src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat6.java3
-rw-r--r--src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapIndexEntry.java51
-rw-r--r--src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapTable.java81
8 files changed, 331 insertions, 6 deletions
diff --git a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat.java b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat.java
index 1d4bb48bd..e4e2e9ff7 100644
--- a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat.java
+++ b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat.java
@@ -23,6 +23,8 @@ import java.io.DataInput;
import java.io.IOException;
/**
+ * Entry in the {@link CmapTable}.
+ *
* @author <a href="mailto:[email protected]">David Schweinsberg</a>
*/
public abstract class CmapFormat {
@@ -64,8 +66,20 @@ public abstract class CmapFormat {
}
}
+ /**
+ * The format version.
+ *
+ * @see CmapFormat0
+ * @see CmapFormat2
+ * @see CmapFormat4
+ * @see CmapFormat6
+ * @see CmapFormat12
+ */
protected abstract int getFormat();
+ /**
+ * The length in bytes of the subtable.
+ */
public abstract int getLength();
protected abstract int getLanguage();
@@ -75,6 +89,12 @@ public abstract class CmapFormat {
public abstract Range getRange(int index)
throws ArrayIndexOutOfBoundsException;
+ /**
+ * Maps the given character to the index of the glyph to use for this
+ * character.
+ *
+ * @see GlyfTable#getDescription(int)
+ */
public abstract int mapCharCode(int charCode);
@Override
diff --git a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat0.java b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat0.java
index ed3d6cd6c..451a5204f 100644
--- a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat0.java
+++ b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat0.java
@@ -23,14 +23,32 @@ import java.io.DataInput;
import java.io.IOException;
/**
+ * Format 0: Byte encoding table
+ *
+ * <p>
* Simple Macintosh cmap table, mapping only the ASCII character set to glyphs.
+ * </p>
+ *
+ * @see "https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-0-byte-encoding-table"
*
* @author <a href="mailto:[email protected]">David Schweinsberg</a>
*/
public class CmapFormat0 extends CmapFormat {
+ /**
+ * uint16
+ *
+ * @see #getLength()
+ */
private final int _length;
+
+ /**
+ * uint16
+ *
+ * @see #getLanguage()
+ */
private final int _language;
+
private final int[] _glyphIdArray = new int[256];
CmapFormat0(DataInput di) throws IOException {
diff --git a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat12.java b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat12.java
index 530470cae..2554d30bb 100644
--- a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat12.java
+++ b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat12.java
@@ -21,6 +21,21 @@ import java.io.DataInput;
import java.io.IOException;
/**
+ * Format 12: Segmented coverage
+ *
+ * <p>
+ * This is the standard character-to-glyph-index mapping table for the Windows
+ * platform for fonts supporting Unicode supplementary-plane characters (U+10000
+ * to U+10FFFF).
+ * </p>
+ *
+ * <p>
+ * Format 12 is similar to {@link CmapFormat4 format 4} in that it defines
+ * segments for sparse representation. It differs, however, in that it uses
+ * 32-bit character codes.
+ * </p>
+ *
+ * @see "https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-12-segmented-coverage"
*
* @author <a href="mailto:[email protected]">David Schweinsberg</a>
*/
diff --git a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat2.java b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat2.java
index 41fbbb88a..7b3265810 100644
--- a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat2.java
+++ b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat2.java
@@ -23,23 +23,97 @@ import java.io.DataInput;
import java.io.IOException;
/**
- * High-byte mapping through table cmap format.
+ * Format 2: High-byte mapping through table.
+ *
+ * @see "https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-2-high-byte-mapping-through-table"
+ *
* @author <a href="mailto:[email protected]">David Schweinsberg</a>
*/
public class CmapFormat2 extends CmapFormat {
- private static class SubHeader {
+ static class SubHeader {
+ /**
+ * uint16
+ *
+ * First valid low byte for this SubHeader.
+ *
+ * @see #_entryCount
+ */
int _firstCode;
+
+ /**
+ * uint16
+ *
+ * Number of valid low bytes for this SubHeader.
+ *
+ * <p>
+ * The {@link #_firstCode} and {@link #_entryCount} values specify a
+ * subrange that begins at {@link #_firstCode} and has a length equal to
+ * the value of {@link #_entryCount}. This subrange stays within the
+ * 0-255 range of the byte being mapped. Bytes outside of this subrange
+ * are mapped to glyph index 0 (missing glyph). The offset of the byte
+ * within this subrange is then used as index into a corresponding
+ * subarray of {@link #_glyphIndexArray}. This subarray is also of
+ * length {@link #_entryCount}. The value of the {@link #_idRangeOffset}
+ * is the number of bytes past the actual location of the
+ * {@link #_idRangeOffset} word where the {@link #_glyphIndexArray}
+ * element corresponding to {@link #_firstCode} appears.
+ * </p>
+ * <p>
+ * Finally, if the value obtained from the subarray is not 0 (which
+ * indicates the missing glyph), you should add {@link #_idDelta} to it
+ * in order to get the glyphIndex. The value {@link #_idDelta} permits
+ * the same subarray to be used for several different subheaders. The
+ * {@link #_idDelta} arithmetic is modulo 65536.
+ * </p>
+ */
int _entryCount;
+
+ /**
+ * @see #_entryCount
+ */
short _idDelta;
+
+ /**
+ * @see #_entryCount
+ */
int _idRangeOffset;
+
int _arrayIndex;
}
+ /**
+ * uint16
+ *
+ * @see #getLength()
+ */
private final int _length;
+
+ /**
+ * uint16
+ *
+ * @see #getLanguage()
+ */
private final int _language;
+
+ /**
+ * uint16[256]
+ *
+ * Array that maps high bytes to subHeaders: value is subHeader index × 8.
+ */
private final int[] _subHeaderKeys = new int[256];
+
+ /**
+ * Variable-length array of SubHeader records.
+ */
private final SubHeader[] _subHeaders;
+
+ /**
+ * uint16
+ *
+ * Variable-length array containing subarrays used for mapping the low byte
+ * of 2-byte characters.
+ */
private final int[] _glyphIndexArray;
CmapFormat2(DataInput di) throws IOException {
diff --git a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat4.java b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat4.java
index a371b5d6a..a9d0e1378 100644
--- a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat4.java
+++ b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat4.java
@@ -24,21 +24,92 @@ import java.io.IOException;
import java.util.Arrays;
/**
+ * Format 4: Segment mapping to delta values
+ *
+ * <p>
+ * This is the standard character-to-glyph-index mapping table for the Windows
+ * platform for fonts that support Unicode BMP characters.
+ * </p>
+ *
+ * <p>
+ * This format is used when the character codes for the characters represented
+ * by a font fall into several contiguous ranges, possibly with holes in some or
+ * all of the ranges (that is, some of the codes in a range may not have a
+ * representation in the font). The format-dependent data is divided into three
+ * parts, which must occur in the following order:
+ * </p>
+ * <ol>
+ * <li>A four-word header gives parameters for an optimized search of the
+ * segment list;
+ * <li>Four parallel arrays describe the segments (one segment for each
+ * contiguous range of codes);
+ * <li>A variable-length array of glyph IDs (unsigned words).
+ * </ol>
+ *
+ * @see "https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-4-segment-mapping-to-delta-values"
+ *
* @author <a href="mailto:[email protected]">David Schweinsberg</a>
*/
public class CmapFormat4 extends CmapFormat {
+ /**
+ * uint16 length This is the length in bytes of the subtable.
+ */
private final int _length;
+
+ /**
+ * uint16 language
+ */
private final int _language;
+
+ /**
+ * uint16 segCountX2 2 × segCount.
+ */
private final int _segCountX2;
+
+ /**
+ * uint16 searchRange 2 × (2**floor(log2(segCount)))
+ */
private final int _searchRange;
+
+ /**
+ * uint16 entrySelector log2(searchRange/2)
+ */
private final int _entrySelector;
+
+ /**
+ * uint16 rangeShift 2 × segCount - searchRange
+ */
private final int _rangeShift;
+
+ /**
+ * uint16 endCode[segCount] End characterCode for each segment, last=0xFFFF.
+ */
private final int[] _endCode;
+
+ /**
+ * uint16 startCode[segCount] Start character code for each segment.
+ */
private final int[] _startCode;
+
+ /**
+ * int16 idDelta[segCount] Delta for all character codes in segment.
+ */
private final int[] _idDelta;
+
+ /**
+ * uint16 idRangeOffset[segCount] Offsets into glyphIdArray or 0
+ */
private final int[] _idRangeOffset;
+
+ /**
+ * uint16 glyphIdArray[ ] Glyph index array (arbitrary length)
+ */
private final int[] _glyphIdArray;
+
+ /**
+ * @see #_segCountX2
+ */
private final int _segCount;
CmapFormat4(DataInput di) throws IOException {
diff --git a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat6.java b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat6.java
index 80039aac0..daf12675c 100644
--- a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat6.java
+++ b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat6.java
@@ -23,6 +23,9 @@ import java.io.IOException;
/**
* Format 6: Trimmed table mapping
+ *
+ * @see "https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-6-trimmed-table-mapping"
+ *
* @author <a href="mailto:[email protected]">David Schweinsberg</a>
*/
public class CmapFormat6 extends CmapFormat {
diff --git a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapIndexEntry.java b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapIndexEntry.java
index 102ea2f71..99de551f5 100644
--- a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapIndexEntry.java
+++ b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapIndexEntry.java
@@ -54,13 +54,35 @@ import java.io.DataInput;
import java.io.IOException;
/**
+ * Encoding record.
+ *
+ * <p>
+ * The array of encoding records specifies particular encodings and the offset
+ * to the subtable for each encoding.
+ * </p>
+ *
* @author <a href="mailto:[email protected]">David Schweinsberg</a>
*/
public class CmapIndexEntry implements Comparable<CmapIndexEntry> {
+ /**
+ * @see #getPlatformId()
+ */
private int _platformId;
+
+ /**
+ * @see #getEncodingId()
+ */
private int _encodingId;
+
+ /**
+ * @see #getOffset()
+ */
private int _offset;
+
+ /**
+ * @see #getFormat()
+ */
private CmapFormat _format;
CmapIndexEntry(DataInput di) throws IOException {
@@ -69,14 +91,43 @@ public class CmapIndexEntry implements Comparable<CmapIndexEntry> {
_offset = di.readInt();
}
+ /**
+ * uint16
+ *
+ * Platform ID.
+ *
+ * <p>
+ * Complete details on platform IDs and platform-specific encoding and
+ * language IDs are provided in the {@link NameTable}.
+ * </p>
+ *
+ * @see xxxx
+ */
public int getPlatformId() {
return _platformId;
}
+ /**
+ * uint16
+ *
+ * Platform-specific encoding ID.
+ *
+ * <p>
+ * The platform ID and platform-specific encoding ID in the encoding record
+ * are used to specify a particular character encoding. In the case of the
+ * Macintosh platform, a language field within the mapping subtable is also
+ * used for this purpose.
+ * </p>
+ */
public int getEncodingId() {
return _encodingId;
}
+ /**
+ * Offset32
+ *
+ * Byte offset from beginning of table to the subtable for this encoding.
+ */
public int getOffset() {
return _offset;
}
diff --git a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapTable.java b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapTable.java
index 35fdb1c22..3a2dffca2 100644
--- a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapTable.java
+++ b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapTable.java
@@ -55,21 +55,81 @@ import java.io.IOException;
import java.util.Arrays;
/**
+ * Character to Glyph Index Mapping Table
+ *
+ * <p>
+ * This table defines the mapping of character codes to the glyph index values
+ * used in the font. It may contain more than one subtable, in order to support
+ * more than one character encoding scheme.
+ * </p>
+ *
+ * <h2>Overview</h2>
+ *
+ * <p>
+ * This table defines mapping of character codes to a default glyph index.
+ * Different subtables may be defined that each contain mappings for different
+ * character encoding schemes. The table header indicates the character
+ * encodings for which subtables are present.
+ * </p>
+ *
+ * <p>
+ * Regardless of the encoding scheme, character codes that do not correspond to
+ * any glyph in the font should be mapped to glyph index 0. The glyph at this
+ * location must be a special glyph representing a missing character, commonly
+ * known as .notdef.
+ * </p>
+ *
+ * <p>
+ * Each subtable is in one of seven possible formats and begins with a format
+ * field indicating the format used. The first four formats — formats 0, 2, 4
+ * and 6 — were originally defined prior to Unicode 2.0. These formats allow for
+ * 8-bit single-byte, 8-bit multi-byte, and 16-bit encodings. With the
+ * introduction of supplementary planes in Unicode 2.0, the Unicode addressable
+ * code space extends beyond 16 bits. To accommodate this, three additional
+ * formats were added — formats 8, 10 and 12 — that allow for 32-bit encoding
+ * schemes.
+ * </p>
+ *
+ * <p>
+ * Other enhancements in Unicode led to the addition of other subtable formats.
+ * Subtable format 13 allows for an efficient mapping of many characters to a
+ * single glyph; this is useful for “last-resort” fonts that provide fallback
+ * rendering for all possible Unicode characters with a distinct fallback glyph
+ * for different Unicode ranges. Subtable format 14 provides a unified mechanism
+ * for supporting Unicode variation sequences.
+ * </p>
+ *
* @author <a href="mailto:[email protected]">David Schweinsberg</a>
*/
public class CmapTable implements Table {
- private int _version;
+ /**
+ * @see #getVersion()
+ */
+ public static final int VERSION = 0x0000;
+
+ private int _version = VERSION;
private int _numTables;
private CmapIndexEntry[] _entries;
+ /**
+ * Creates a {@link CmapTable}.
+ *
+ * @param di The reader to read from.
+ */
public CmapTable(DataInput di) throws IOException {
_version = di.readUnsignedShort();
_numTables = di.readUnsignedShort();
long bytesRead = 4;
- _entries = new CmapIndexEntry[_numTables];
-
+
// Get each of the index entries
+
+ // Note: The encoding record entries in the 'cmap' header must be sorted
+ // first by platform ID, then by platform-specific encoding ID, and then
+ // by the language field in the corresponding subtable. Each platform
+ // ID, platform-specific encoding ID, and subtable language combination
+ // may appear only once in the 'cmap' table.
+ _entries = new CmapIndexEntry[_numTables];
for (int i = 0; i < _numTables; i++) {
_entries[i] = new CmapIndexEntry(di);
bytesRead += 8;
@@ -79,7 +139,7 @@ public class CmapTable implements Table {
Arrays.sort(_entries);
// Get each of the tables
- int lastOffset = 0;
+ int lastOffset = -1;
CmapFormat lastFormat = null;
for (int i = 0; i < _numTables; i++) {
if (_entries[i].getOffset() == lastOffset) {
@@ -107,10 +167,23 @@ public class CmapTable implements Table {
return cmap;
}
+ /**
+ * uint16 Table version number ({@link #VERSION}}).
+ *
+ * <p>
+ * Note: The 'cmap' table version number remains at {@link #VERSION} for
+ * fonts that make use of the newer subtable formats.
+ * </p>
+ */
public int getVersion() {
return _version;
}
+ /**
+ * uint16
+ *
+ * Number of encoding tables that follow.
+ */
public int getNumTables() {
return _numTables;
}