Commit 98d75dcd authored by Jason Crain's avatar Jason Crain Committed by Albert Astals Cid

Limit use of ZapfDingbats character names

Some PDFs use names from ZapfDingbats (a1-a206) without intending for
them to be used for text extraction.  Only use these character names
to locate glyphs or for text extraction with ZapfDingbats fonts.

Bug #60243
parent 817cc333
......@@ -23,6 +23,7 @@
// Copyright (C) 2012 Fabio D'Urso <fabiodurso@hotmail.it>
// Copyright (C) 2012 Adrian Johnson <ajohnson@redneon.com>
// Copyright (C) 2012 Pino Toscano <pino@kde.org>
// Copyright (C) 2013 Jason Crain <jason@aquaticape.us>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
......@@ -893,6 +894,15 @@ int GooString::cmpN(const char *sA, int n) const {
return 0;
}
GBool GooString::endsWith(const char *suffix) const {
int suffixLen = strlen(suffix);
if (length < suffixLen)
return gFalse;
return strcmp(s + length - suffixLen, suffix) == 0;
}
GBool GooString::hasUnicodeMarker(void)
{
return length > 1 && (s[0] & 0xff) == 0xfe && (s[1] & 0xff) == 0xff;
......
......@@ -19,6 +19,7 @@
// Copyright (C) 2006 Krzysztof Kowalczyk <kkowalczyk@gmail.com>
// Copyright (C) 2008-2010, 2012 Albert Astals Cid <aacid@kde.org>
// Copyright (C) 2012 Fabio D'Urso <fabiodurso@hotmail.it>
// Copyright (C) 2013 Jason Crain <jason@aquaticape.us>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
......@@ -140,6 +141,9 @@ public:
int cmp(const char *sA) const;
int cmpN(const char *sA, int n) const;
// Return true if string ends with suffix
GBool endsWith(const char *suffix) const;
GBool hasUnicodeMarker(void);
// Sanitizes the string so that it does
......
......@@ -30,6 +30,7 @@
// Copyright (C) 2012 Yi Yang <ahyangyi@gmail.com>
// Copyright (C) 2012 Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp>
// Copyright (C) 2012 Thomas Freitag <Thomas.Freitag@alfa.de>
// Copyright (C) 2013 Jason Crain <jason@aquaticape.us>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
......@@ -1221,10 +1222,16 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, const char *tagA, Ref idA, GooString *nameA
// pass 1: use the name-to-Unicode mapping table
missing = hex = gFalse;
GBool isZapfDingbats = name && name->endsWith("ZapfDingbats");
for (code = 0; code < 256; ++code) {
if ((charName = enc[code])) {
if (!(toUnicode[code] = globalParams->mapNameToUnicode(charName)) &&
strcmp(charName, ".notdef")) {
if (isZapfDingbats) {
// include ZapfDingbats names
toUnicode[code] = globalParams->mapNameToUnicodeAll(charName);
} else {
toUnicode[code] = globalParams->mapNameToUnicodeText(charName);
}
if (!toUnicode[code] && strcmp(charName, ".notdef")) {
// if it wasn't in the name-to-Unicode table, check for a
// name that looks like 'Axx' or 'xx', where 'A' is any letter
// and 'xx' is two hex digits
......@@ -1485,7 +1492,7 @@ static int parseCharName(char *charName, Unicode *uBuf, int uLen,
// corresponding character in that list.
// 3.2. otherwise, if the component is in the Adobe Glyph List, then map it
// to the corresponding character in that list.
if (names && (uBuf[0] = globalParams->mapNameToUnicode(charName))) {
if (names && (uBuf[0] = globalParams->mapNameToUnicodeText(charName))) {
return 1;
}
if (numeric) {
......@@ -1674,7 +1681,7 @@ int *Gfx8BitFont::getCodeToGIDMap(FoFiTrueType *ff) {
} else if (useUnicode) {
Unicode *uAux;
for (i = 0; i < 256; ++i) {
if (((charName = enc[i]) && (u = globalParams->mapNameToUnicode(charName))))
if (((charName = enc[i]) && (u = globalParams->mapNameToUnicodeAll(charName))))
map[i] = ff->mapCodeToGID(cmap, u);
else
{
......
......@@ -34,6 +34,7 @@
// Copyright (C) 2012 Adrian Johnson <ajohnson@redneon.com>
// Copyright (C) 2012 Thomas Freitag <Thomas.Freitag@alfa.de>
// Copyright (C) 2012 Peter Breitenlohner <peb@mppmu.mpg.de>
// Copyright (C) 2013 Jason Crain <jason@aquaticape.us>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
......@@ -576,7 +577,8 @@ GlobalParams::GlobalParams(const char *customPopplerDataDir)
#ifdef _WIN32
substFiles = new GooHash(gTrue);
#endif
nameToUnicode = new NameToCharCode();
nameToUnicodeZapfDingbats = new NameToCharCode();
nameToUnicodeText = new NameToCharCode();
cidToUnicodes = new GooHash(gTrue);
unicodeToUnicodes = new GooHash(gTrue);
residentUnicodeMaps = new GooHash();
......@@ -648,9 +650,13 @@ GlobalParams::GlobalParams(const char *customPopplerDataDir)
securityHandlers = new GooList();
#endif
// set up the initial nameToUnicode table
for (i = 0; nameToUnicodeTab[i].name; ++i) {
nameToUnicode->add(nameToUnicodeTab[i].name, nameToUnicodeTab[i].u);
// set up the initial nameToUnicode tables
for (i = 0; nameToUnicodeZapfDingbatsTab[i].name; ++i) {
nameToUnicodeZapfDingbats->add(nameToUnicodeZapfDingbatsTab[i].name, nameToUnicodeZapfDingbatsTab[i].u);
}
for (i = 0; nameToUnicodeTextTab[i].name; ++i) {
nameToUnicodeText->add(nameToUnicodeTextTab[i].name, nameToUnicodeTextTab[i].u);
}
// set up the residentUnicodeMaps table
......@@ -740,7 +746,7 @@ void GlobalParams::parseNameToUnicode(GooString *name) {
tok2 = strtok_r(NULL, " \t\r\n", &tokptr);
if (tok1 && tok2) {
sscanf(tok1, "%x", &u);
nameToUnicode->add(tok2, u);
nameToUnicodeText->add(tok2, u);
} else {
error(errConfig, -1, "Bad line in 'nameToUnicode' file ({0:t}:{1:d})",
name, line);
......@@ -796,7 +802,8 @@ GlobalParams::~GlobalParams() {
delete macRomanReverseMap;
delete nameToUnicode;
delete nameToUnicodeZapfDingbats;
delete nameToUnicodeText;
deleteGooHash(cidToUnicodes, GooString);
deleteGooHash(unicodeToUnicodes, GooString);
deleteGooHash(residentUnicodeMaps, UnicodeMap);
......@@ -853,9 +860,17 @@ CharCode GlobalParams::getMacRomanCharCode(char *charName) {
return macRomanReverseMap->lookup(charName);
}
Unicode GlobalParams::mapNameToUnicode(const char *charName) {
// no need to lock - nameToUnicode is constant
return nameToUnicode->lookup(charName);
Unicode GlobalParams::mapNameToUnicodeAll(const char *charName) {
// no need to lock - nameToUnicodeZapfDingbats and nameToUnicodeText are constant
Unicode u = nameToUnicodeZapfDingbats->lookup(charName);
if (!u)
u = nameToUnicodeText->lookup(charName);
return u;
}
Unicode GlobalParams::mapNameToUnicodeText(const char *charName) {
// no need to lock - nameToUnicodeText is constant
return nameToUnicodeText->lookup(charName);
}
UnicodeMap *GlobalParams::getResidentUnicodeMap(GooString *encodingName) {
......
......@@ -25,6 +25,7 @@
// Copyright (C) 2011 Pino Toscano <pino@kde.org>
// Copyright (C) 2012 Adrian Johnson <ajohnson@redneon.com>
// Copyright (C) 2012 Thomas Freitag <Thomas.Freitag@alfa.de>
// Copyright (C) 2013 Jason Crain <jason@aquaticape.us>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
......@@ -139,7 +140,14 @@ public:
CharCode getMacRomanCharCode(char *charName);
Unicode mapNameToUnicode(const char *charName);
// Return Unicode values for character names. Used for general text
// extraction.
Unicode mapNameToUnicodeText(const char *charName);
// Return Unicode values for character names. Used for glyph
// lookups or text extraction with ZapfDingbats fonts.
Unicode mapNameToUnicodeAll(const char *charName);
UnicodeMap *getResidentUnicodeMap(GooString *encodingName);
FILE *getUnicodeMapFile(GooString *encodingName);
FILE *findCMapFile(GooString *collection, GooString *cMapName);
......@@ -271,8 +279,10 @@ private:
//----- user-modifiable settings
NameToCharCode * // mapping from char name to Unicode
nameToUnicode;
NameToCharCode * // mapping from char name to Unicode for ZapfDingbats
nameToUnicodeZapfDingbats;
NameToCharCode * // mapping from char name to Unicode for text
nameToUnicodeText; // extraction
GooHash *cidToUnicodes; // files for mappings from char collections
// to Unicode, indexed by collection name
// [GooString]
......
......@@ -14,6 +14,7 @@
// under GPL version 2 or later
//
// Copyright (C) 2011, 2012 Albert Astals Cid <aacid@kde.org>
// Copyright (C) 2013 Jason Crain <jason@aquaticape.us>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
......@@ -24,10 +25,13 @@
#include <stddef.h>
static struct {
struct NameToUnicodeTab {
Unicode u;
const char *name;
} nameToUnicodeTab[] = {
};
// map character names to Unicode
static struct NameToUnicodeTab nameToUnicodeTextTab[] = {
{0x0021, "!"},
{0x0023, "#"},
{0x0024, "$"},
......@@ -800,207 +804,6 @@ static struct {
{0x005f, "_"},
{0x0060, "`"},
{0x0061, "a"},
{0x275e, "a100"},
{0x2761, "a101"},
{0x2762, "a102"},
{0x2763, "a103"},
{0x2764, "a104"},
{0x2710, "a105"},
{0x2765, "a106"},
{0x2766, "a107"},
{0x2767, "a108"},
{0x2660, "a109"},
{0x2721, "a10"},
{0x2665, "a110"},
{0x2666, "a111"},
{0x2663, "a112"},
{0x2709, "a117"},
{0x2708, "a118"},
{0x2707, "a119"},
{0x261b, "a11"},
{0x2460, "a120"},
{0x2461, "a121"},
{0x2462, "a122"},
{0x2463, "a123"},
{0x2464, "a124"},
{0x2465, "a125"},
{0x2466, "a126"},
{0x2467, "a127"},
{0x2468, "a128"},
{0x2469, "a129"},
{0x261e, "a12"},
{0x2776, "a130"},
{0x2777, "a131"},
{0x2778, "a132"},
{0x2779, "a133"},
{0x277a, "a134"},
{0x277b, "a135"},
{0x277c, "a136"},
{0x277d, "a137"},
{0x277e, "a138"},
{0x277f, "a139"},
{0x270c, "a13"},
{0x2780, "a140"},
{0x2781, "a141"},
{0x2782, "a142"},
{0x2783, "a143"},
{0x2784, "a144"},
{0x2785, "a145"},
{0x2786, "a146"},
{0x2787, "a147"},
{0x2788, "a148"},
{0x2789, "a149"},
{0x270d, "a14"},
{0x278a, "a150"},
{0x278b, "a151"},
{0x278c, "a152"},
{0x278d, "a153"},
{0x278e, "a154"},
{0x278f, "a155"},
{0x2790, "a156"},
{0x2791, "a157"},
{0x2792, "a158"},
{0x2793, "a159"},
{0x270e, "a15"},
{0x2794, "a160"},
{0x2192, "a161"},
{0x27a3, "a162"},
{0x2194, "a163"},
{0x2195, "a164"},
{0x2799, "a165"},
{0x279b, "a166"},
{0x279c, "a167"},
{0x279d, "a168"},
{0x279e, "a169"},
{0x270f, "a16"},
{0x279f, "a170"},
{0x27a0, "a171"},
{0x27a1, "a172"},
{0x27a2, "a173"},
{0x27a4, "a174"},
{0x27a5, "a175"},
{0x27a6, "a176"},
{0x27a7, "a177"},
{0x27a8, "a178"},
{0x27a9, "a179"},
{0x2711, "a17"},
{0x27ab, "a180"},
{0x27ad, "a181"},
{0x27af, "a182"},
{0x27b2, "a183"},
{0x27b3, "a184"},
{0x27b5, "a185"},
{0x27b8, "a186"},
{0x27ba, "a187"},
{0x27bb, "a188"},
{0x27bc, "a189"},
{0x2712, "a18"},
{0x27bd, "a190"},
{0x27be, "a191"},
{0x279a, "a192"},
{0x27aa, "a193"},
{0x27b6, "a194"},
{0x27b9, "a195"},
{0x2798, "a196"},
{0x27b4, "a197"},
{0x27b7, "a198"},
{0x27ac, "a199"},
{0x2713, "a19"},
{0x2701, "a1"},
{0x27ae, "a200"},
{0x27b1, "a201"},
{0x2703, "a202"},
{0x2750, "a203"},
{0x2752, "a204"},
{0x276e, "a205"},
{0x2770, "a206"},
{0x2714, "a20"},
{0x2715, "a21"},
{0x2716, "a22"},
{0x2717, "a23"},
{0x2718, "a24"},
{0x2719, "a25"},
{0x271a, "a26"},
{0x271b, "a27"},
{0x271c, "a28"},
{0x2722, "a29"},
{0x2702, "a2"},
{0x2723, "a30"},
{0x2724, "a31"},
{0x2725, "a32"},
{0x2726, "a33"},
{0x2727, "a34"},
{0x2605, "a35"},
{0x2729, "a36"},
{0x272a, "a37"},
{0x272b, "a38"},
{0x272c, "a39"},
{0x2704, "a3"},
{0x272d, "a40"},
{0x272e, "a41"},
{0x272f, "a42"},
{0x2730, "a43"},
{0x2731, "a44"},
{0x2732, "a45"},
{0x2733, "a46"},
{0x2734, "a47"},
{0x2735, "a48"},
{0x2736, "a49"},
{0x260e, "a4"},
{0x2737, "a50"},
{0x2738, "a51"},
{0x2739, "a52"},
{0x273a, "a53"},
{0x273b, "a54"},
{0x273c, "a55"},
{0x273d, "a56"},
{0x273e, "a57"},
{0x273f, "a58"},
{0x2740, "a59"},
{0x2706, "a5"},
{0x2741, "a60"},
{0x2742, "a61"},
{0x2743, "a62"},
{0x2744, "a63"},
{0x2745, "a64"},
{0x2746, "a65"},
{0x2747, "a66"},
{0x2748, "a67"},
{0x2749, "a68"},
{0x274a, "a69"},
{0x271d, "a6"},
{0x274b, "a70"},
{0x25cf, "a71"},
{0x274d, "a72"},
{0x25a0, "a73"},
{0x274f, "a74"},
{0x2751, "a75"},
{0x25b2, "a76"},
{0x25bc, "a77"},
{0x25c6, "a78"},
{0x2756, "a79"},
{0x271e, "a7"},
{0x25d7, "a81"},
{0x2758, "a82"},
{0x2759, "a83"},
{0x275a, "a84"},
{0x276f, "a85"},
{0x2771, "a86"},
{0x2772, "a87"},
{0x2773, "a88"},
{0x2768, "a89"},
{0x271f, "a8"},
{0x2769, "a90"},
{0x276c, "a91"},
{0x276d, "a92"},
{0x276a, "a93"},
{0x276b, "a94"},
{0x2774, "a95"},
{0x2775, "a96"},
{0x275b, "a97"},
{0x275c, "a98"},
{0x275d, "a99"},
{0x2720, "a9"},
{0x0986, "aabengali"},
{0x00e1, "aacute"},
{0x0906, "aadeva"},
......@@ -4473,3 +4276,209 @@ static struct {
{0x007e, "~"},
{ 0, NULL }
};
// map ZapfDingbats names to Unicode
static struct NameToUnicodeTab nameToUnicodeZapfDingbatsTab[] = {
{0x275e, "a100"},
{0x2761, "a101"},
{0x2762, "a102"},
{0x2763, "a103"},
{0x2764, "a104"},
{0x2710, "a105"},
{0x2765, "a106"},
{0x2766, "a107"},
{0x2767, "a108"},
{0x2660, "a109"},
{0x2721, "a10"},
{0x2665, "a110"},
{0x2666, "a111"},
{0x2663, "a112"},
{0x2709, "a117"},
{0x2708, "a118"},
{0x2707, "a119"},
{0x261b, "a11"},
{0x2460, "a120"},
{0x2461, "a121"},
{0x2462, "a122"},
{0x2463, "a123"},
{0x2464, "a124"},
{0x2465, "a125"},
{0x2466, "a126"},
{0x2467, "a127"},
{0x2468, "a128"},
{0x2469, "a129"},
{0x261e, "a12"},
{0x2776, "a130"},
{0x2777, "a131"},
{0x2778, "a132"},
{0x2779, "a133"},
{0x277a, "a134"},
{0x277b, "a135"},
{0x277c, "a136"},
{0x277d, "a137"},
{0x277e, "a138"},
{0x277f, "a139"},
{0x270c, "a13"},
{0x2780, "a140"},
{0x2781, "a141"},
{0x2782, "a142"},
{0x2783, "a143"},
{0x2784, "a144"},
{0x2785, "a145"},
{0x2786, "a146"},
{0x2787, "a147"},
{0x2788, "a148"},
{0x2789, "a149"},
{0x270d, "a14"},
{0x278a, "a150"},
{0x278b, "a151"},
{0x278c, "a152"},
{0x278d, "a153"},
{0x278e, "a154"},
{0x278f, "a155"},
{0x2790, "a156"},
{0x2791, "a157"},
{0x2792, "a158"},
{0x2793, "a159"},
{0x270e, "a15"},
{0x2794, "a160"},
{0x2192, "a161"},
{0x27a3, "a162"},
{0x2194, "a163"},
{0x2195, "a164"},
{0x2799, "a165"},
{0x279b, "a166"},
{0x279c, "a167"},
{0x279d, "a168"},
{0x279e, "a169"},
{0x270f, "a16"},
{0x279f, "a170"},
{0x27a0, "a171"},
{0x27a1, "a172"},
{0x27a2, "a173"},
{0x27a4, "a174"},
{0x27a5, "a175"},
{0x27a6, "a176"},
{0x27a7, "a177"},
{0x27a8, "a178"},
{0x27a9, "a179"},
{0x2711, "a17"},
{0x27ab, "a180"},
{0x27ad, "a181"},
{0x27af, "a182"},
{0x27b2, "a183"},
{0x27b3, "a184"},
{0x27b5, "a185"},
{0x27b8, "a186"},
{0x27ba, "a187"},
{0x27bb, "a188"},
{0x27bc, "a189"},
{0x2712, "a18"},
{0x27bd, "a190"},
{0x27be, "a191"},
{0x279a, "a192"},
{0x27aa, "a193"},
{0x27b6, "a194"},
{0x27b9, "a195"},
{0x2798, "a196"},
{0x27b4, "a197"},
{0x27b7, "a198"},
{0x27ac, "a199"},
{0x2713, "a19"},
{0x2701, "a1"},
{0x27ae, "a200"},
{0x27b1, "a201"},
{0x2703, "a202"},
{0x2750, "a203"},
{0x2752, "a204"},
{0x276e, "a205"},
{0x2770, "a206"},
{0x2714, "a20"},
{0x2715, "a21"},
{0x2716, "a22"},
{0x2717, "a23"},
{0x2718, "a24"},
{0x2719, "a25"},
{0x271a, "a26"},
{0x271b, "a27"},
{0x271c, "a28"},
{0x2722, "a29"},
{0x2702, "a2"},
{0x2723, "a30"},
{0x2724, "a31"},
{0x2725, "a32"},
{0x2726, "a33"},
{0x2727, "a34"},
{0x2605, "a35"},
{0x2729, "a36"},
{0x272a, "a37"},
{0x272b, "a38"},
{0x272c, "a39"},
{0x2704, "a3"},
{0x272d, "a40"},
{0x272e, "a41"},
{0x272f, "a42"},
{0x2730, "a43"},
{0x2731, "a44"},
{0x2732, "a45"},
{0x2733, "a46"},
{0x2734, "a47"},
{0x2735, "a48"},
{0x2736, "a49"},
{0x260e, "a4"},
{0x2737, "a50"},
{0x2738, "a51"},
{0x2739, "a52"},
{0x273a, "a53"},
{0x273b, "a54"},
{0x273c, "a55"},
{0x273d, "a56"},
{0x273e, "a57"},
{0x273f, "a58"},
{0x2740, "a59"},
{0x2706, "a5"},
{0x2741, "a60"},
{0x2742, "a61"},
{0x2743, "a62"},
{0x2744, "a63"},
{0x2745, "a64"},
{0x2746, "a65"},
{0x2747, "a66"},
{0x2748, "a67"},
{0x2749, "a68"},
{0x274a, "a69"},
{0x271d, "a6"},
{0x274b, "a70"},
{0x25cf, "a71"},
{0x274d, "a72"},
{0x25a0, "a73"},
{0x274f, "a74"},
{0x2751, "a75"},
{0x25b2, "a76"},
{0x25bc, "a77"},
{0x25c6, "a78"},
{0x2756, "a79"},
{0x271e, "a7"},
{0x25d7, "a81"},
{0x2758, "a82"},
{0x2759, "a83"},
{0x275a, "a84"},
{0x276f, "a85"},
{0x2771, "a86"},
{0x2772, "a87"},
{0x2773, "a88"},
{0x2768, "a89"},
{0x271f, "a8"},
{0x2769, "a90"},
{0x276c, "a91"},
{0x276d, "a92"},
{0x276a, "a93"},
{0x276b, "a94"},
{0x2774, "a95"},
{0x2775, "a96"},
{0x275b, "a97"},
{0x275c, "a98"},
{0x275d, "a99"},
{0x2720, "a9"},
{ 0, NULL }
};
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment