Commit bb49e1e3 authored by Ed Catmur's avatar Ed Catmur Committed by Albert Astals Cid

Implement Adobe Glyph Naming convention

parent 5634d63a
......@@ -96,6 +96,10 @@ static StdFontMapEntry stdFontMap[] = {
{ "TimesNewRomanPSMT,Italic", "Times-Italic" }
};
static int parseCharName(char *charName, Unicode *uBuf, int uLen,
GBool names, GBool ligatures,
GBool numeric, GBool hex, GBool variants);
//------------------------------------------------------------------------
// GfxFont
//------------------------------------------------------------------------
......@@ -787,35 +791,24 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, char *tagA, Ref idA, GooString *nameA,
}
}
// pass 2: try to fill in the missing chars, looking for names of
// the form 'Axx', 'xx', 'Ann', 'ABnn', or 'nn', where 'A' and 'B'
// are any letters, 'xx' is two hex digits, and 'nn' is 2-4
// decimal digits
if (missing && globalParams->getMapNumericCharNames()) {
// construct the char code -> Unicode mapping object
ctu = CharCodeToUnicode::make8BitToUnicode(toUnicode);
// pass 2: try to fill in the missing chars, looking for ligatures, numeric
// references and variants
if (missing) {
for (code = 0; code < 256; ++code) {
if ((charName = enc[code]) && !toUnicode[code] &&
strcmp(charName, ".notdef")) {
n = strlen(charName);
code2 = -1;
if (hex && n == 3 && isalpha(charName[0]) &&
isxdigit(charName[1]) && isxdigit(charName[2])) {
sscanf(charName+1, "%x", &code2);
} else if (hex && n == 2 &&
isxdigit(charName[0]) && isxdigit(charName[1])) {
sscanf(charName, "%x", &code2);
} else if (!hex && n >= 2 && n <= 4 &&
isdigit(charName[0]) && isdigit(charName[1])) {
code2 = atoi(charName);
} else if (n >= 3 && n <= 5 &&
isdigit(charName[1]) && isdigit(charName[2])) {
code2 = atoi(charName+1);
} else if (n >= 4 && n <= 6 &&
isdigit(charName[2]) && isdigit(charName[3])) {
code2 = atoi(charName+2);
}
if (code2 >= 0 && code2 <= 0xff) {
toUnicode[code] = (Unicode)code2;
}
if ((n = parseCharName(charName, uBuf, sizeof(uBuf)/sizeof(*uBuf),
gFalse, // don't check simple names (pass 1)
gTrue, // do check ligatures
globalParams->getMapNumericCharNames(),
hex,
gTrue))) // do check variants
ctu->setMapping((CharCode)code, uBuf, n);
else
error(-1, "Could not parse charref for nameToUnicode: %s", charName);
}
}
......@@ -829,9 +822,6 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, char *tagA, Ref idA, GooString *nameA,
}
}
// construct the char code -> Unicode mapping object
ctu = CharCodeToUnicode::make8BitToUnicode(toUnicode);
// merge in a ToUnicode CMap, if there is one -- this overwrites
// existing entries in ctu, i.e., the ToUnicode CMap takes
// precedence, but the other encoding info is allowed to fill in any
......@@ -961,6 +951,141 @@ Gfx8BitFont::~Gfx8BitFont() {
}
}
// This function is in part a derived work of the Adobe Glyph Mapping
// Convention: http://www.adobe.com/devnet/opentype/archives/glyph.html
// Algorithmic comments are excerpted from that document to aid
// maintainability.
static int parseCharName(char *charName, Unicode *uBuf, int uLen,
GBool names, GBool ligatures,
GBool numeric, GBool hex, GBool variants)
{
if (uLen <= 0) {
error(-1, "Zero-length output buffer (recursion overflow?) in "
"nameToUnicode: %s", charName);
return 0;
}
// Step 1: drop all the characters from the glyph name starting with the
// first occurrence of a period (U+002E FULL STOP), if any.
if (variants) {
char *var_part = strchr(charName, '.');
if (var_part == charName) {
return 0; // .notdef or similar
} else if (var_part != NULL) {
// parse names of the form 7.oldstyle, P.swash, s.sc, etc.
char *main_part = strndup(charName, var_part - charName);
GBool namesRecurse = gTrue, variantsRecurse = gFalse;
int n = parseCharName(main_part, uBuf, uLen, namesRecurse, ligatures,
numeric, hex, variantsRecurse);
gfree(main_part);
return n;
}
}
// Step 2: split the remaining string into a sequence of components, using
// underscore (U+005F LOW LINE) as the delimiter.
if (ligatures && strchr(charName, '_')) {
// parse names of the form A_a (e.g. f_i, T_h, l_quotesingle)
char *lig_part, *lig_end, *lig_copy;
int n = 0, m;
lig_part = lig_copy = copyString(charName);
do {
if ((lig_end = strchr(lig_part, '_')))
*lig_end = '\0';
if (lig_part[0] != '\0') {
GBool namesRecurse = gTrue, ligaturesRecurse = gFalse;
if ((m = parseCharName(lig_part, uBuf + n, uLen - n, namesRecurse,
ligaturesRecurse, numeric, hex, variants)))
n += m;
else
error(-1, "Could not parse ligature component in charref for "
"nameToUnicode: %s", charName);
}
lig_part = lig_end + 1;
} while (lig_end && n < uLen);
gfree(lig_copy);
return n;
}
// Step 3: map each component to a character string according to the
// procedure below, and concatenate those strings; the result is the
// character string to which the glyph name is mapped.
// 3.1. if the font is Zapf Dingbats (PostScript FontName ZapfDingbats), and
// the component is in the ZapfDingbats list, then map it to the
// corresponding character in that list.
// 3.2. otherwise, if the component is in the Adobe Glyph List, then map it
// to the corresponding character in that list.
if (names && (uBuf[0] = globalParams->mapNameToUnicode(charName))) {
return 1;
}
if (numeric) {
// Not in Adobe Glyph Mapping convention: look for names of the form 'Axx',
// 'xx', 'Ann', 'ABnn', or 'nn', where 'A' and 'B' are any letters, 'xx' is
// two hex digits, and 'nn' is 2-4 decimal digits
unsigned int n = strlen(charName);
if (hex && n == 3 && isalpha(charName[0]) &&
isxdigit(charName[1]) && isxdigit(charName[2])) {
sscanf(charName+1, "%x", (unsigned int *)uBuf);
return 1;
} else if (hex && n == 2 &&
isxdigit(charName[0]) && isxdigit(charName[1])) {
sscanf(charName, "%x", (unsigned int *)uBuf);
return 1;
} else if (!hex && n >= 2 && n <= 4 &&
isdigit(charName[0]) && isdigit(charName[1])) {
uBuf[0] = (Unicode)atoi(charName);
return 1;
} else if (n >= 3 && n <= 5 &&
isdigit(charName[1]) && isdigit(charName[2])) {
uBuf[0] = (Unicode)atoi(charName+1);
return 1;
} else if (n >= 4 && n <= 6 &&
isdigit(charName[2]) && isdigit(charName[3])) {
uBuf[0] = (Unicode)atoi(charName+2);
return 1;
}
// 3.3. otherwise, if the component is of the form "uni" (U+0075 U+006E
// U+0069) followed by a sequence of uppercase hexadecimal digits (0 .. 9,
// A .. F, i.e. U+0030 .. U+0039, U+0041 .. U+0046), the length of that
// sequence is a multiple of four, and each group of four digits represents
// a number in the set {0x0000 .. 0xD7FF, 0xE000 .. 0xFFFF}, then interpret
// each such number as a Unicode scalar value and map the component to the
// string made of those scalar values. Note that the range and digit length
// restrictions mean that the "uni" prefix can be used only with Unicode
// values from the Basic Multilingual Plane (BMP).
if (n >= 7 && (n % 4) == 3 && !strncmp(charName, "uni", 3)) {
unsigned int i, m;
for (i = 0, m = 3; i < uLen && m < n; m += 4) {
if (isxdigit(charName[m]) && isxdigit(charName[m + 1]) &&
isxdigit(charName[m + 2]) && isxdigit(charName[m + 3])) {
unsigned int u;
sscanf(charName + m, "%4x", &u);
if (u <= 0xD7FF || (0xE000 <= u && u <= 0xFFFF)) {
uBuf[i++] = u;
}
}
}
return i;
}
// 3.4. otherwise, if the component is of the form "u" (U+0075) followed by
// a sequence of four to six uppercase hexadecimal digits {0 .. 9, A .. F}
// (U+0030 .. U+0039, U+0041 .. U+0046), and those digits represent a
// number in {0x0000 .. 0xD7FF, 0xE000 .. 0x10FFFF}, then interpret this
// number as a Unicode scalar value and map the component to the string
// made of this scalar value.
if (n >= 5 && n <= 7 && charName[0] == 'u' && isxdigit(charName[1]) &&
isxdigit(charName[2]) && isxdigit(charName[3]) && isxdigit(charName[4])
&& (n <= 5 || isxdigit(charName[5]))
&& (n <= 6 || isxdigit(charName[6]))) {
unsigned int u;
sscanf(charName + 1, "%x", &u);
if (u <= 0xD7FF || (0xE000 <= u && u <= 0x10FFFF)) {
uBuf[0] = u;
return 1;
}
}
}
// 3.5. otherwise, map the component to the empty string
return 0;
}
int Gfx8BitFont::getNextChar(char *s, int len, CharCode *code,
Unicode *u, int uSize, int *uLen,
double *dx, double *dy, double *ox, double *oy) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment