Commit 7da5885e authored by Albert Astals Cid's avatar Albert Astals Cid

* goo/GooString.cc

* goo/GooString.h
* goo/gmem.c
* goo/gmem.h
* poppler/Lexer.cc
* poppler/Lexer.h
* poppler/PageLabelInfo.cc
* poppler/Parser.cc
* poppler/UGooString.cc
* poppler/UGooString.h: Patch by Krzysztof Kowalczyk <kkowalczyk@gmail.com> to improve performance. See bug 7808 for details.
parent fbc05a67
2006-12-28 Albert Astals Cid <aacid@kde.org>
* goo/GooString.cc
* goo/GooString.h
* goo/gmem.c
* goo/gmem.h
* poppler/Lexer.cc
* poppler/Lexer.h
* poppler/PageLabelInfo.cc
* poppler/Parser.cc
* poppler/UGooString.cc
* poppler/UGooString.h: Patch by Krzysztof Kowalczyk
<kkowalczyk@gmail.com> to improve performance.
See bug 7808 for details.
2006-12-28 Albert Astals Cid <aacid@kde.org>
* poppler/Annot.cc:
......
......@@ -18,76 +18,119 @@
#include <stddef.h>
#include <string.h>
#include <ctype.h>
#include <assert.h>
#include "gtypes.h"
#include "GooString.h"
static inline int size(int len) {
int inline GooString::roundedSize(int len) {
int delta;
if (len <= STR_STATIC_SIZE-1)
return STR_STATIC_SIZE;
delta = len < 256 ? 7 : 255;
return ((len + 1) + delta) & ~delta;
}
inline void GooString::resize(int length1) {
char *s1;
if (!s) {
s = new char[size(length1)];
} else if (size(length1) != size(length)) {
s1 = new char[size(length1)];
if (length1 < length) {
memcpy(s1, s, length1);
s1[length1] = '\0';
} else {
memcpy(s1, s, length + 1);
// Make sure that the buffer is big enough to contain <newLength> characters
// plus terminating 0.
// We assume that if this is being called from the constructor, <s> was set
// to NULL and <length> was set to 0 to indicate unused string before calling us.
void inline GooString::resize(int newLength) {
char *s1 = s;
if (!s || (roundedSize(length) != roundedSize(newLength))) {
// requires re-allocating data for string
if (newLength < STR_STATIC_SIZE)
s1 = sStatic;
else
s1 = new char[roundedSize(newLength)];
// we had to re-allocate the memory, so copy the content of previous
// buffer into a new buffer
if (s) {
if (newLength < length) {
memcpy(s1, s, newLength);
} else {
memcpy(s1, s, length);
}
}
delete[] s;
s = s1;
if (s != sStatic)
delete[] s;
}
s = s1;
length = newLength;
s[length] = '\0';
}
GooString* GooString::Set(const char *s1, int s1Len, const char *s2, int s2Len)
{
int newLen = 0;
char *p;
if (s1) {
if (CALC_STRING_LEN == s1Len) {
s1Len = strlen(s1);
} else
assert(s1Len >= 0);
newLen += s1Len;
}
if (s2) {
if (CALC_STRING_LEN == s2Len) {
s2Len = strlen(s2);
} else
assert(s2Len >= 0);
newLen += s2Len;
}
resize(newLen);
p = s;
if (s1) {
memcpy(p, s1, s1Len);
p += s1Len;
}
if (s2) {
memcpy(p, s2, s2Len);
p += s2Len;
}
return this;
}
GooString::GooString() {
s = NULL;
resize(length = 0);
s[0] = '\0';
length = 0;
Set(NULL);
}
GooString::GooString(const char *sA) {
int n = strlen(sA);
s = NULL;
resize(length = n);
memcpy(s, sA, n + 1);
length = 0;
Set(sA, CALC_STRING_LEN);
}
GooString::GooString(const char *sA, int lengthA) {
s = NULL;
resize(length = lengthA);
memcpy(s, sA, length * sizeof(char));
s[length] = '\0';
length = 0;
Set(sA, lengthA);
}
GooString::GooString(GooString *str, int idx, int lengthA) {
s = NULL;
resize(length = lengthA);
memcpy(s, str->getCString() + idx, length);
s[length] = '\0';
length = 0;
assert(idx + lengthA < str->length);
Set(str->getCString() + idx, lengthA);
}
GooString::GooString(GooString *str) {
s = NULL;
resize(length = str->getLength());
memcpy(s, str->getCString(), length + 1);
length = 0;
Set(str->getCString(), str->length);
}
GooString::GooString(GooString *str1, GooString *str2) {
int n1 = str1->getLength();
int n2 = str2->getLength();
s = NULL;
resize(length = n1 + n2);
memcpy(s, str1->getCString(), n1);
memcpy(s + n1, str2->getCString(), n2 + 1);
length = 0;
Set(str1->getCString(), str1->length, str2->getCString(), str2->length);
}
GooString *GooString::fromInt(int x) {
......@@ -117,91 +160,50 @@ GooString *GooString::fromInt(int x) {
}
GooString::~GooString() {
delete[] s;
if (s != sStatic)
delete[] s;
}
GooString *GooString::clear() {
s[length = 0] = '\0';
resize(0);
return this;
}
GooString *GooString::append(char c) {
resize(length + 1);
s[length++] = c;
s[length] = '\0';
return this;
return append((const char*)&c, 1);
}
GooString *GooString::append(GooString *str) {
int n = str->getLength();
resize(length + n);
memcpy(s + length, str->getCString(), n + 1);
length += n;
return this;
}
GooString *GooString::append(const char *str) {
int n = strlen(str);
resize(length + n);
memcpy(s + length, str, n + 1);
length += n;
return this;
return append(str->getCString(), str->getLength());
}
GooString *GooString::append(const char *str, int lengthA) {
int prevLen = length;
if (CALC_STRING_LEN == lengthA)
lengthA = strlen(str);
resize(length + lengthA);
memcpy(s + length, str, lengthA);
length += lengthA;
s[length] = '\0';
memcpy(s + prevLen, str, lengthA);
return this;
}
GooString *GooString::insert(int i, char c) {
int j;
resize(length + 1);
for (j = length + 1; j > i; --j)
s[j] = s[j-1];
s[i] = c;
++length;
return this;
return insert(i, (const char*)&c, 1);
}
GooString *GooString::insert(int i, GooString *str) {
int n = str->getLength();
int j;
resize(length + n);
for (j = length; j >= i; --j)
s[j+n] = s[j];
memcpy(s+i, str->getCString(), n);
length += n;
return this;
}
GooString *GooString::insert(int i, const char *str) {
int n = strlen(str);
int j;
resize(length + n);
for (j = length; j >= i; --j)
s[j+n] = s[j];
memcpy(s+i, str, n);
length += n;
return this;
return insert(i, str->getCString(), str->getLength());
}
GooString *GooString::insert(int i, const char *str, int lengthA) {
int j;
int prevLen = length;
if (CALC_STRING_LEN == lengthA)
lengthA = strlen(str);
resize(length + lengthA);
for (j = length; j >= i; --j)
for (j = prevLen; j >= i; --j)
s[j+lengthA] = s[j];
memcpy(s+i, str, lengthA);
length += lengthA;
return this;
}
......@@ -215,7 +217,7 @@ GooString *GooString::del(int i, int n) {
for (j = i; j <= length - n; ++j) {
s[j] = s[j + n];
}
resize(length -= n);
resize(length - n);
}
return this;
}
......
......@@ -15,6 +15,7 @@
#pragma interface
#endif
#include <stdlib.h> // for NULL
#include "gtypes.h"
class GooString {
......@@ -33,6 +34,12 @@ public:
// Create a string from <lengthA> chars at <idx> in <str>.
GooString(GooString *str, int idx, int lengthA);
// Set content of a string to concatination of <s1> and <s2>. They can both
// be NULL. if <s1Len> or <s2Len> is CALC_STRING_LEN, then length of the string
// will be calculated with strlen(). Otherwise we assume they are a valid
// length of string (or its substring)
GooString* Set(const char *s1, int s1Len=CALC_STRING_LEN, const char *s2=NULL, int s2Len=CALC_STRING_LEN);
// Copy a string.
GooString(GooString *str);
GooString *copy() { return new GooString(this); }
......@@ -64,14 +71,12 @@ public:
// Append a character or string.
GooString *append(char c);
GooString *append(GooString *str);
GooString *append(const char *str);
GooString *append(const char *str, int lengthA);
GooString *append(const char *str, int lengthA=CALC_STRING_LEN);
// Insert a character or string.
GooString *insert(int i, char c);
GooString *insert(int i, GooString *str);
GooString *insert(int i, const char *str);
GooString *insert(int i, const char *str, int lengthA);
GooString *insert(int i, const char *str, int lengthA=CALC_STRING_LEN);
// Delete a character or range of characters.
GooString *del(int i, int n = 1);
......@@ -89,11 +94,22 @@ public:
GBool hasUnicodeMarker(void);
private:
// you can tweak this number for a different speed/memory usage tradeoffs.
// In libc malloc() rounding is 16 so it's best to choose a value that
// results in sizeof(GooString) be a multiple of 16.
// 24 makes sizeof(GooString) to be 32.
static const int STR_STATIC_SIZE = 24;
// a special value telling that the length of the string is not given
// so it must be calculated from the strings
static const int CALC_STRING_LEN = -1;
int roundedSize(int len);
char sStatic[STR_STATIC_SIZE];
int length;
char *s;
void resize(int length1);
void resize(int newLength);
};
#endif
......@@ -196,8 +196,7 @@ void gfree(void *p) {
}
}
#else
if (p)
free(p);
free(p);
#endif
}
......
......@@ -37,7 +37,7 @@ extern void *gmallocn(int nObjs, int objSize);
extern void *greallocn(void *p, int nObjs, int objSize);
/*
* Same as free, but checks for and ignores NULL pointers.
* #ifdef DEBUG_MEM, adds debuging info. If not, same as free.
*/
extern void gfree(void *p);
......
......@@ -50,6 +50,7 @@ static char specialChars[256] = {
Lexer::Lexer(XRef *xrefA, Stream *str) {
Object obj;
lookCharLastValueCached = LOOK_VALUE_NOT_CACHED;
xref = xrefA;
curStr.initStream(str);
......@@ -63,6 +64,7 @@ Lexer::Lexer(XRef *xrefA, Stream *str) {
Lexer::Lexer(XRef *xrefA, Object *obj) {
Object obj2;
lookCharLastValueCached = LOOK_VALUE_NOT_CACHED;
xref = xrefA;
if (obj->isStream()) {
......@@ -90,9 +92,15 @@ Lexer::~Lexer() {
}
}
int Lexer::getChar() {
int inline Lexer::getChar() {
int c;
if (LOOK_VALUE_NOT_CACHED != lookCharLastValueCached) {
c = lookCharLastValueCached;
lookCharLastValueCached = LOOK_VALUE_NOT_CACHED;
return c;
}
c = EOF;
while (!curStr.isNone() && (c = curStr.streamGetChar()) == EOF) {
curStr.streamClose();
......@@ -106,11 +114,12 @@ int Lexer::getChar() {
return c;
}
int Lexer::lookChar() {
if (curStr.isNone()) {
return EOF;
int inline Lexer::lookChar() {
if (LOOK_VALUE_NOT_CACHED != lookCharLastValueCached) {
return lookCharLastValueCached;
}
return curStr.streamLookChar();
lookCharLastValueCached = getChar();
return lookCharLastValueCached;
}
Object *Lexer::getObj(Object *obj, int objNum) {
......
......@@ -63,6 +63,16 @@ public:
// Returns true if <c> is a whitespace character.
static GBool isSpace(int c);
// often (e.g. ~30% on PDF Refernce 1.6 pdf file from Adobe site) getChar
// is called right after lookChar. In order to avoid expensive re-doing
// getChar() of underlying stream, we cache the last value found by
// lookChar() in lookCharLastValueCached. A special value
// LOOK_VALUE_NOT_CACHED that should never be part of stream indicates
// that no value was cached
static const int LOOK_VALUE_NOT_CACHED = -3;
int lookCharLastValueCached;
private:
int getChar();
......
#include <config.h>
#include <limits.h>
#include <stdlib.h>
#include <stdio.h>
......
......@@ -39,6 +39,7 @@ Parser::~Parser() {
Object *Parser::getObj(Object *obj,
Guchar *fileKey, int keyLength,
int objNum, int objGen) {
UGooString key;
Stream *str;
Object obj2;
int num;
......@@ -75,14 +76,13 @@ Object *Parser::getObj(Object *obj,
error(getPos(), "Dictionary key must be a name object");
shift();
} else {
// buf1 might go away in shift(), so construct the key
UGooString *key = new UGooString(buf1.getName());
// buf1 might go away in shift(), so construct the key
key.Set(buf1.getName());
shift();
if (buf1.isEOF() || buf1.isError()) {
gfree(key);
break;
}
obj->dictAddOwnKeyVal(key, getObj(&obj2, fileKey, keyLength, objNum, objGen));
obj->dictAdd(key, getObj(&obj2, fileKey, keyLength, objNum, objGen));
}
}
if (buf1.isEOF())
......@@ -120,8 +120,8 @@ Object *Parser::getObj(Object *obj,
s = obj->getString();
decrypt = new Decrypt(fileKey, keyLength, objNum, objGen);
for (i = 0, p = obj->getString()->getCString();
i < s->getLength();
++i, ++p) {
i < s->getLength();
++i, ++p) {
*p = decrypt->decryptByte(*p);
}
delete decrypt;
......@@ -174,6 +174,11 @@ Stream *Parser::makeStream(Object *dict) {
baseStr = lexer->getStream()->getBaseStream();
// skip over stream data
if (Lexer::LOOK_VALUE_NOT_CACHED != lexer->lookCharLastValueCached) {
// take into account the fact that we've cached one value
pos = pos - 1;
lexer->lookCharLastValueCached = Lexer::LOOK_VALUE_NOT_CACHED;
}
lexer->setPos(pos + length);
// refill token buffers and check for 'endstream'
......
......@@ -15,61 +15,139 @@
#include "PDFDocEncoding.h"
#include "UGooString.h"
UGooString::UGooString(Unicode *u, int l)
int inline UGooString::roundedSize(int len) {
int delta;
if (len <= STR_STATIC_SIZE-1)
return STR_STATIC_SIZE;
delta = len < 256 ? 7 : 255;
return ((len + 1) + delta) & ~delta;
}
// Make sure that the buffer is big enough to contain <newLength> characters
// plus terminating 0.
// We assume that if this is being called from the constructor, <s> was set
// to NULL and <length> was set to 0 to indicate unused string before calling us.
void inline UGooString::resize(int newLength) {
Unicode *s1 = s;
if (!s || (roundedSize(length) != roundedSize(newLength))) {
// requires re-allocating data for string
if (newLength < STR_STATIC_SIZE)
s1 = sStatic;
else
s1 = new Unicode[roundedSize(newLength)];
// we had to re-allocate the memory, so copy the content of previous
// buffer into a new buffer
if (s) {
if (newLength < length) {
memcpy(s1, s, newLength);
} else {
memcpy(s1, s, length);
}
}
if (s != sStatic)
delete[] s;
}
s = s1;
length = newLength;
s[length] = '\0';
}
UGooString::UGooString()
{
s = u;
length = l;
s = NULL;
length = 0;
resize(0);
}
UGooString::UGooString(GooString &str)
{
if ((str.getChar(0) & 0xff) == 0xfe && (str.getChar(1) & 0xff) == 0xff)
s = NULL;
length = 0;
if (str.hasUnicodeMarker())
{
length = (str.getLength() - 2) / 2;
s = (Unicode *)gmallocn(length, sizeof(Unicode));
resize((str.getLength() - 2) / 2);
for (int j = 0; j < length; ++j) {
s[j] = ((str.getChar(2 + 2*j) & 0xff) << 8) | (str.getChar(3 + 2*j) & 0xff);
}
} else
initChar(str);
Set(str.getCString(), str.getLength());
}
UGooString::UGooString(Unicode *u, int strLen)
{
resize(strLen);
s = u;
}
UGooString::UGooString(const UGooString &str)
{
length = str.length;
s = (Unicode *)gmallocn(length, sizeof(Unicode));
memcpy(s, str.s, length * sizeof(Unicode));
s = NULL;
length = 0;
Set(str);
}
UGooString::UGooString(const char *str)
UGooString::UGooString(const char *str, int strLen)
{
GooString aux(str);
initChar(aux);
s = NULL;
length = 0;
if (CALC_STRING_LEN == strLen)
strLen = strlen(str);
Set(str, strLen);
}
void UGooString::initChar(GooString &str)
UGooString *UGooString::Set(const UGooString &str)
{
length = str.getLength();
s = (Unicode *)gmallocn(length, sizeof(Unicode));
bool anyNonEncoded = false;
for (int j = 0; j < length && !anyNonEncoded; ++j) {
s[j] = pdfDocEncoding[str.getChar(j) & 0xff];
if (!s[j]) anyNonEncoded = true;
resize(str.length);
memcpy(s, str.s, length * sizeof(Unicode));
return this;
}
UGooString* UGooString::Set(const char *str, int strLen)
{
int j;
bool foundUnencoded = false;
if (CALC_STRING_LEN == strLen)
strLen = strlen(str);
resize(strLen);
for (j = 0; !foundUnencoded && j < length; ++j) {
s[j] = pdfDocEncoding[str[j] & 0xff];
if (!s[j]) {
foundUnencoded = true;
break;
}
}
if ( anyNonEncoded )
if ( foundUnencoded )
{
for (int j = 0; j < length; ++j) {
s[j] = str.getChar(j);
for (j = 0; j < length; ++j) {
s[j] = str[j];
}
}
return this;
}
UGooString *UGooString::clear()
{
resize(0);
return this;
}
UGooString::~UGooString()
{
gfree(s);
if (s != sStatic)
delete[] s;
}
int UGooString::cmp(UGooString *str) const
int UGooString::cmp(const UGooString &str) const
{
return cmp(&str);
}
int UGooString::cmp(const UGooString *str) const
{
int n1, n2, i, x;
Unicode *p1, *p2;
......@@ -85,6 +163,14 @@ int UGooString::cmp(UGooString *str) const
return n1 - n2;
}
// FIXME:
// a) this is confusing because GooString::getCSTring() returns a pointer
// but UGooString returns a newly allocated copy. Should give this
// a different name, like copyAsAscii() or copyAsGooString()
// b) this interface requires copying. It should be changed to take a
// GooString& as a param and put the data inside it so that it uses
// caching optimization of GooString. Callers should be changed to use
// this new interface
char *UGooString::getCString() const
{
char *res = new char[length + 1];
......@@ -92,3 +178,4 @@ char *UGooString::getCString() const
res[length] = '\0';
return res;
}
......@@ -18,36 +18,60 @@ class GooString;
class UGooString
{
public:
// Create an unicode string
UGooString(Unicode *u, int l);
// Create empty unicode string
UGooString();
// Create a unicode string from <str>.
UGooString(GooString &str);
// Create a unicode string from u
UGooString(Unicode *u, int strLen);
// Copy the unicode string
UGooString(const UGooString &str);
// Create a unicode string from <str>.
UGooString(const char *str);
UGooString(const char *str, int strLen = CALC_STRING_LEN);
UGooString *Set(const char *str, int strLen = CALC_STRING_LEN);
UGooString *Set(const UGooString &str);
// Set the string to empty string, freeing all dynamically allocated memory
// as a side effect
UGooString *clear();
// Destructor.
~UGooString();
// Get length.
void resize(int newLength);
int getLength() const { return length; }
// Compare two strings: -1:< 0:= +1:>
int cmp(UGooString *str) const;
int cmp(const UGooString *str) const;
int cmp(const UGooString &str) const;
// get the unicode
Unicode *unicode() const { return s; }
// get the const char*
// Return a newly allocated copy of the string converted to
// ascii (non-Unicode) format. Caller has to delete [] the result
char *getCString() const;
private:
void initChar(GooString &str);
// you can tweak this number for a different speed/memory usage tradeoffs.
// In libc malloc() rounding is 16 so it's best to choose a value that
// results in sizeof(UGooString) be a multiple of 16.
// 20 makes sizeof(UGooString) to be 48.
static const int STR_STATIC_SIZE = 20;
// a special value telling that the length of the string is not given
// so it must be calculated from the strings
static const int CALC_STRING_LEN = -1;
int roundedSize(int len);