Commit e6fb20d7 authored by Axel Strübing's avatar Axel Strübing Committed by Albert Astals Cid

Extract text of a pdf correctly

See "[poppler] text extraction does not work" in the mailing list for more info
parent 4829d36a
......@@ -22,6 +22,7 @@
// Copyright (C) 2008, 2010 Hib Eris <hib@hiberis.nl>
// Copyright (C) 2009 Peter Kerzum <kerzum@yandex-team.ru>
// Copyright (C) 2009, 2010 David Benjamin <davidben@mit.edu>
// Copyright (C) 2011 Axel Strübing <axel.struebing@freenet.de>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
......@@ -176,6 +177,7 @@ GfxFont::GfxFont(char *tagA, Ref idA, GooString *nameA) {
weight = WeightNotDefined;
refCnt = 1;
dfp = NULL;
hasToUnicode = gFalse;
}
GfxFont::~GfxFont() {
......@@ -436,6 +438,7 @@ CharCodeToUnicode *GfxFont::readToUnicodeCMap(Dict *fontDict, int nBits,
} else {
ctu = CharCodeToUnicode::parseCMap(buf, nBits);
}
hasToUnicode = gTrue;
delete buf;
return ctu;
}
......@@ -1697,7 +1700,16 @@ int GfxCIDFont::getNextChar(char *s, int len, CharCode *code,
*code = (CharCode)(cid = cMap->getCID(s, len, &n));
if (ctu) {
*uLen = ctu->mapToUnicode(cid, u);
if (hasToUnicode) {
int i = 0, c = 0;
while (i < n) {
c = (c << 8 ) + (s[i] & 0xff);
++i;
}
*uLen = ctu->mapToUnicode(c, u);
} else {
*uLen = ctu->mapToUnicode(cid, u);
}
} else {
*uLen = 0;
}
......
......@@ -19,6 +19,7 @@
// Copyright (C) 2007 Julien Rebetez <julienr@svn.gnome.org>
// Copyright (C) 2007 Jeff Muizelaar <jeff@infidigm.net>
// Copyright (C) 2007 Koji Otani <sho@bbr.jp>
// Copyright (C) 2011 Axel Strübing <axel.struebing@freenet.de>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
......@@ -253,6 +254,7 @@ protected:
double descent; // max depth below baseline
int refCnt;
GBool ok;
GBool hasToUnicode;
};
//------------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment