diff --git a/ChangeLog b/ChangeLog index 145bf57acd5974ccf01ff1019045a2e7b3613cbb..235abdd06c65454ad15f6e2795055c9a9dae675b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +2006-12-28 Albert Astals Cid + + * goo/GooString.cc + * goo/GooString.h + * goo/gmem.c + * goo/gmem.h + * poppler/Lexer.cc + * poppler/Lexer.h + * poppler/PageLabelInfo.cc + * poppler/Parser.cc + * poppler/UGooString.cc + * poppler/UGooString.h: Patch by Krzysztof Kowalczyk + to improve performance. + See bug 7808 for details. + 2006-12-28 Albert Astals Cid * poppler/Annot.cc: diff --git a/goo/GooString.cc b/goo/GooString.cc index 4d38f50c9cb0b4bfc3c46b3b861ffd3e74cebc22..d81691c2de66ecdfec2d65139f8daab118e587d8 100644 --- a/goo/GooString.cc +++ b/goo/GooString.cc @@ -18,76 +18,119 @@ #include #include #include +#include #include "gtypes.h" #include "GooString.h" -static inline int size(int len) { +int inline GooString::roundedSize(int len) { int delta; - + if (len <= STR_STATIC_SIZE-1) + return STR_STATIC_SIZE; delta = len < 256 ? 7 : 255; return ((len + 1) + delta) & ~delta; } -inline void GooString::resize(int length1) { - char *s1; - - if (!s) { - s = new char[size(length1)]; - } else if (size(length1) != size(length)) { - s1 = new char[size(length1)]; - if (length1 < length) { - memcpy(s1, s, length1); - s1[length1] = '\0'; - } else { - memcpy(s1, s, length + 1); +// Make sure that the buffer is big enough to contain characters +// plus terminating 0. +// We assume that if this is being called from the constructor, was set +// to NULL and was set to 0 to indicate unused string before calling us. +void inline GooString::resize(int newLength) { + char *s1 = s; + + if (!s || (roundedSize(length) != roundedSize(newLength))) { + // requires re-allocating data for string + if (newLength < STR_STATIC_SIZE) + s1 = sStatic; + else + s1 = new char[roundedSize(newLength)]; + + // we had to re-allocate the memory, so copy the content of previous + // buffer into a new buffer + if (s) { + if (newLength < length) { + memcpy(s1, s, newLength); + } else { + memcpy(s1, s, length); + } } - delete[] s; - s = s1; + if (s != sStatic) + delete[] s; } + + s = s1; + length = newLength; + s[length] = '\0'; +} + +GooString* GooString::Set(const char *s1, int s1Len, const char *s2, int s2Len) +{ + int newLen = 0; + char *p; + + if (s1) { + if (CALC_STRING_LEN == s1Len) { + s1Len = strlen(s1); + } else + assert(s1Len >= 0); + newLen += s1Len; + } + + if (s2) { + if (CALC_STRING_LEN == s2Len) { + s2Len = strlen(s2); + } else + assert(s2Len >= 0); + newLen += s2Len; + } + + resize(newLen); + p = s; + if (s1) { + memcpy(p, s1, s1Len); + p += s1Len; + } + if (s2) { + memcpy(p, s2, s2Len); + p += s2Len; + } + return this; } GooString::GooString() { s = NULL; - resize(length = 0); - s[0] = '\0'; + length = 0; + Set(NULL); } GooString::GooString(const char *sA) { - int n = strlen(sA); - s = NULL; - resize(length = n); - memcpy(s, sA, n + 1); + length = 0; + Set(sA, CALC_STRING_LEN); } GooString::GooString(const char *sA, int lengthA) { s = NULL; - resize(length = lengthA); - memcpy(s, sA, length * sizeof(char)); - s[length] = '\0'; + length = 0; + Set(sA, lengthA); } GooString::GooString(GooString *str, int idx, int lengthA) { s = NULL; - resize(length = lengthA); - memcpy(s, str->getCString() + idx, length); - s[length] = '\0'; + length = 0; + assert(idx + lengthA < str->length); + Set(str->getCString() + idx, lengthA); } GooString::GooString(GooString *str) { s = NULL; - resize(length = str->getLength()); - memcpy(s, str->getCString(), length + 1); + length = 0; + Set(str->getCString(), str->length); } GooString::GooString(GooString *str1, GooString *str2) { - int n1 = str1->getLength(); - int n2 = str2->getLength(); - s = NULL; - resize(length = n1 + n2); - memcpy(s, str1->getCString(), n1); - memcpy(s + n1, str2->getCString(), n2 + 1); + length = 0; + Set(str1->getCString(), str1->length, str2->getCString(), str2->length); } GooString *GooString::fromInt(int x) { @@ -117,91 +160,50 @@ GooString *GooString::fromInt(int x) { } GooString::~GooString() { - delete[] s; + if (s != sStatic) + delete[] s; } GooString *GooString::clear() { - s[length = 0] = '\0'; resize(0); return this; } GooString *GooString::append(char c) { - resize(length + 1); - s[length++] = c; - s[length] = '\0'; - return this; + return append((const char*)&c, 1); } GooString *GooString::append(GooString *str) { - int n = str->getLength(); - - resize(length + n); - memcpy(s + length, str->getCString(), n + 1); - length += n; - return this; -} - -GooString *GooString::append(const char *str) { - int n = strlen(str); - - resize(length + n); - memcpy(s + length, str, n + 1); - length += n; - return this; + return append(str->getCString(), str->getLength()); } GooString *GooString::append(const char *str, int lengthA) { + int prevLen = length; + if (CALC_STRING_LEN == lengthA) + lengthA = strlen(str); resize(length + lengthA); - memcpy(s + length, str, lengthA); - length += lengthA; - s[length] = '\0'; + memcpy(s + prevLen, str, lengthA); return this; } GooString *GooString::insert(int i, char c) { - int j; - - resize(length + 1); - for (j = length + 1; j > i; --j) - s[j] = s[j-1]; - s[i] = c; - ++length; - return this; + return insert(i, (const char*)&c, 1); } GooString *GooString::insert(int i, GooString *str) { - int n = str->getLength(); - int j; - - resize(length + n); - for (j = length; j >= i; --j) - s[j+n] = s[j]; - memcpy(s+i, str->getCString(), n); - length += n; - return this; -} - -GooString *GooString::insert(int i, const char *str) { - int n = strlen(str); - int j; - - resize(length + n); - for (j = length; j >= i; --j) - s[j+n] = s[j]; - memcpy(s+i, str, n); - length += n; - return this; + return insert(i, str->getCString(), str->getLength()); } GooString *GooString::insert(int i, const char *str, int lengthA) { int j; + int prevLen = length; + if (CALC_STRING_LEN == lengthA) + lengthA = strlen(str); resize(length + lengthA); - for (j = length; j >= i; --j) + for (j = prevLen; j >= i; --j) s[j+lengthA] = s[j]; memcpy(s+i, str, lengthA); - length += lengthA; return this; } @@ -215,7 +217,7 @@ GooString *GooString::del(int i, int n) { for (j = i; j <= length - n; ++j) { s[j] = s[j + n]; } - resize(length -= n); + resize(length - n); } return this; } diff --git a/goo/GooString.h b/goo/GooString.h index 3a8f9804a7a5047e726a33f2b3a729756d571026..f0f894539a59239df4a51d7ddc59f32390989a12 100644 --- a/goo/GooString.h +++ b/goo/GooString.h @@ -15,6 +15,7 @@ #pragma interface #endif +#include // for NULL #include "gtypes.h" class GooString { @@ -33,6 +34,12 @@ public: // Create a string from chars at in . GooString(GooString *str, int idx, int lengthA); + // Set content of a string to concatination of and . They can both + // be NULL. if or is CALC_STRING_LEN, then length of the string + // will be calculated with strlen(). Otherwise we assume they are a valid + // length of string (or its substring) + GooString* Set(const char *s1, int s1Len=CALC_STRING_LEN, const char *s2=NULL, int s2Len=CALC_STRING_LEN); + // Copy a string. GooString(GooString *str); GooString *copy() { return new GooString(this); } @@ -64,14 +71,12 @@ public: // Append a character or string. GooString *append(char c); GooString *append(GooString *str); - GooString *append(const char *str); - GooString *append(const char *str, int lengthA); + GooString *append(const char *str, int lengthA=CALC_STRING_LEN); // Insert a character or string. GooString *insert(int i, char c); GooString *insert(int i, GooString *str); - GooString *insert(int i, const char *str); - GooString *insert(int i, const char *str, int lengthA); + GooString *insert(int i, const char *str, int lengthA=CALC_STRING_LEN); // Delete a character or range of characters. GooString *del(int i, int n = 1); @@ -89,11 +94,22 @@ public: GBool hasUnicodeMarker(void); private: - + // you can tweak this number for a different speed/memory usage tradeoffs. + // In libc malloc() rounding is 16 so it's best to choose a value that + // results in sizeof(GooString) be a multiple of 16. + // 24 makes sizeof(GooString) to be 32. + static const int STR_STATIC_SIZE = 24; + // a special value telling that the length of the string is not given + // so it must be calculated from the strings + static const int CALC_STRING_LEN = -1; + + int roundedSize(int len); + + char sStatic[STR_STATIC_SIZE]; int length; char *s; - void resize(int length1); + void resize(int newLength); }; #endif diff --git a/goo/gmem.c b/goo/gmem.c index 6a7de57b70c0152f0ad9a0bb9a83b52365a9648a..d0493252f232acddaca394d581efec8cb745cd66 100644 --- a/goo/gmem.c +++ b/goo/gmem.c @@ -196,8 +196,7 @@ void gfree(void *p) { } } #else - if (p) - free(p); + free(p); #endif } diff --git a/goo/gmem.h b/goo/gmem.h index 0435ffea9d360657c93168c5a0558074438e655b..267d1b995cca410e709da5cc23353b2d33155cd8 100644 --- a/goo/gmem.h +++ b/goo/gmem.h @@ -37,7 +37,7 @@ extern void *gmallocn(int nObjs, int objSize); extern void *greallocn(void *p, int nObjs, int objSize); /* - * Same as free, but checks for and ignores NULL pointers. + * #ifdef DEBUG_MEM, adds debuging info. If not, same as free. */ extern void gfree(void *p); diff --git a/poppler/Lexer.cc b/poppler/Lexer.cc index 28262b3cb9918ebca9d3693cc124e1cdd0ee6d30..22d4981ab9a6ed22789046787255eb4132e19ca9 100644 --- a/poppler/Lexer.cc +++ b/poppler/Lexer.cc @@ -50,6 +50,7 @@ static char specialChars[256] = { Lexer::Lexer(XRef *xrefA, Stream *str) { Object obj; + lookCharLastValueCached = LOOK_VALUE_NOT_CACHED; xref = xrefA; curStr.initStream(str); @@ -63,6 +64,7 @@ Lexer::Lexer(XRef *xrefA, Stream *str) { Lexer::Lexer(XRef *xrefA, Object *obj) { Object obj2; + lookCharLastValueCached = LOOK_VALUE_NOT_CACHED; xref = xrefA; if (obj->isStream()) { @@ -90,9 +92,15 @@ Lexer::~Lexer() { } } -int Lexer::getChar() { +int inline Lexer::getChar() { int c; + if (LOOK_VALUE_NOT_CACHED != lookCharLastValueCached) { + c = lookCharLastValueCached; + lookCharLastValueCached = LOOK_VALUE_NOT_CACHED; + return c; + } + c = EOF; while (!curStr.isNone() && (c = curStr.streamGetChar()) == EOF) { curStr.streamClose(); @@ -106,11 +114,12 @@ int Lexer::getChar() { return c; } -int Lexer::lookChar() { - if (curStr.isNone()) { - return EOF; +int inline Lexer::lookChar() { + if (LOOK_VALUE_NOT_CACHED != lookCharLastValueCached) { + return lookCharLastValueCached; } - return curStr.streamLookChar(); + lookCharLastValueCached = getChar(); + return lookCharLastValueCached; } Object *Lexer::getObj(Object *obj, int objNum) { diff --git a/poppler/Lexer.h b/poppler/Lexer.h index 3b8d24c2ae844614d26f9937b5c67331fbef63eb..e57e2c0cbb051ed5222ba43432e3f41f15346d07 100644 --- a/poppler/Lexer.h +++ b/poppler/Lexer.h @@ -63,6 +63,16 @@ public: // Returns true if is a whitespace character. static GBool isSpace(int c); + + // often (e.g. ~30% on PDF Refernce 1.6 pdf file from Adobe site) getChar + // is called right after lookChar. In order to avoid expensive re-doing + // getChar() of underlying stream, we cache the last value found by + // lookChar() in lookCharLastValueCached. A special value + // LOOK_VALUE_NOT_CACHED that should never be part of stream indicates + // that no value was cached + static const int LOOK_VALUE_NOT_CACHED = -3; + int lookCharLastValueCached; + private: int getChar(); diff --git a/poppler/PageLabelInfo.cc b/poppler/PageLabelInfo.cc index a8bcd723d0fa74dc0b1627098190bbadfb4d27a1..525c04ce31da1b4b719ad7f5a0253aa58ad13a0c 100644 --- a/poppler/PageLabelInfo.cc +++ b/poppler/PageLabelInfo.cc @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/poppler/Parser.cc b/poppler/Parser.cc index 90fab9a57106b0149bf1753ec8b99faffc82fb72..087b88f25380d03d6be8ff00fbab2b49e47b0aab 100644 --- a/poppler/Parser.cc +++ b/poppler/Parser.cc @@ -39,6 +39,7 @@ Parser::~Parser() { Object *Parser::getObj(Object *obj, Guchar *fileKey, int keyLength, int objNum, int objGen) { + UGooString key; Stream *str; Object obj2; int num; @@ -75,14 +76,13 @@ Object *Parser::getObj(Object *obj, error(getPos(), "Dictionary key must be a name object"); shift(); } else { - // buf1 might go away in shift(), so construct the key - UGooString *key = new UGooString(buf1.getName()); + // buf1 might go away in shift(), so construct the key + key.Set(buf1.getName()); shift(); if (buf1.isEOF() || buf1.isError()) { - gfree(key); break; } - obj->dictAddOwnKeyVal(key, getObj(&obj2, fileKey, keyLength, objNum, objGen)); + obj->dictAdd(key, getObj(&obj2, fileKey, keyLength, objNum, objGen)); } } if (buf1.isEOF()) @@ -120,8 +120,8 @@ Object *Parser::getObj(Object *obj, s = obj->getString(); decrypt = new Decrypt(fileKey, keyLength, objNum, objGen); for (i = 0, p = obj->getString()->getCString(); - i < s->getLength(); - ++i, ++p) { + i < s->getLength(); + ++i, ++p) { *p = decrypt->decryptByte(*p); } delete decrypt; @@ -174,6 +174,11 @@ Stream *Parser::makeStream(Object *dict) { baseStr = lexer->getStream()->getBaseStream(); // skip over stream data + if (Lexer::LOOK_VALUE_NOT_CACHED != lexer->lookCharLastValueCached) { + // take into account the fact that we've cached one value + pos = pos - 1; + lexer->lookCharLastValueCached = Lexer::LOOK_VALUE_NOT_CACHED; + } lexer->setPos(pos + length); // refill token buffers and check for 'endstream' diff --git a/poppler/UGooString.cc b/poppler/UGooString.cc index 2c6380e9d99bbaa533728cc9c869fe9bbfecd5cc..8fb6ccba1a1e47bb53439077f25734dabe9fe50b 100644 --- a/poppler/UGooString.cc +++ b/poppler/UGooString.cc @@ -15,61 +15,139 @@ #include "PDFDocEncoding.h" #include "UGooString.h" -UGooString::UGooString(Unicode *u, int l) +int inline UGooString::roundedSize(int len) { + int delta; + if (len <= STR_STATIC_SIZE-1) + return STR_STATIC_SIZE; + delta = len < 256 ? 7 : 255; + return ((len + 1) + delta) & ~delta; +} + +// Make sure that the buffer is big enough to contain characters +// plus terminating 0. +// We assume that if this is being called from the constructor, was set +// to NULL and was set to 0 to indicate unused string before calling us. +void inline UGooString::resize(int newLength) { + Unicode *s1 = s; + + if (!s || (roundedSize(length) != roundedSize(newLength))) { + // requires re-allocating data for string + if (newLength < STR_STATIC_SIZE) + s1 = sStatic; + else + s1 = new Unicode[roundedSize(newLength)]; + + // we had to re-allocate the memory, so copy the content of previous + // buffer into a new buffer + if (s) { + if (newLength < length) { + memcpy(s1, s, newLength); + } else { + memcpy(s1, s, length); + } + } + if (s != sStatic) + delete[] s; + } + + s = s1; + length = newLength; + s[length] = '\0'; +} + +UGooString::UGooString() { - s = u; - length = l; + s = NULL; + length = 0; + resize(0); } UGooString::UGooString(GooString &str) { - if ((str.getChar(0) & 0xff) == 0xfe && (str.getChar(1) & 0xff) == 0xff) + s = NULL; + length = 0; + if (str.hasUnicodeMarker()) { - length = (str.getLength() - 2) / 2; - s = (Unicode *)gmallocn(length, sizeof(Unicode)); + resize((str.getLength() - 2) / 2); for (int j = 0; j < length; ++j) { s[j] = ((str.getChar(2 + 2*j) & 0xff) << 8) | (str.getChar(3 + 2*j) & 0xff); } } else - initChar(str); + Set(str.getCString(), str.getLength()); +} + +UGooString::UGooString(Unicode *u, int strLen) +{ + resize(strLen); + s = u; } UGooString::UGooString(const UGooString &str) { - length = str.length; - s = (Unicode *)gmallocn(length, sizeof(Unicode)); - memcpy(s, str.s, length * sizeof(Unicode)); + s = NULL; + length = 0; + Set(str); } -UGooString::UGooString(const char *str) +UGooString::UGooString(const char *str, int strLen) { - GooString aux(str); - initChar(aux); + s = NULL; + length = 0; + if (CALC_STRING_LEN == strLen) + strLen = strlen(str); + Set(str, strLen); } -void UGooString::initChar(GooString &str) +UGooString *UGooString::Set(const UGooString &str) { - length = str.getLength(); - s = (Unicode *)gmallocn(length, sizeof(Unicode)); - bool anyNonEncoded = false; - for (int j = 0; j < length && !anyNonEncoded; ++j) { - s[j] = pdfDocEncoding[str.getChar(j) & 0xff]; - if (!s[j]) anyNonEncoded = true; + resize(str.length); + memcpy(s, str.s, length * sizeof(Unicode)); + return this; +} + +UGooString* UGooString::Set(const char *str, int strLen) +{ + int j; + bool foundUnencoded = false; + + if (CALC_STRING_LEN == strLen) + strLen = strlen(str); + + resize(strLen); + for (j = 0; !foundUnencoded && j < length; ++j) { + s[j] = pdfDocEncoding[str[j] & 0xff]; + if (!s[j]) { + foundUnencoded = true; + break; + } } - if ( anyNonEncoded ) + if ( foundUnencoded ) { - for (int j = 0; j < length; ++j) { - s[j] = str.getChar(j); + for (j = 0; j < length; ++j) { + s[j] = str[j]; } } + return this; +} + +UGooString *UGooString::clear() +{ + resize(0); + return this; } UGooString::~UGooString() { - gfree(s); + if (s != sStatic) + delete[] s; } -int UGooString::cmp(UGooString *str) const +int UGooString::cmp(const UGooString &str) const +{ + return cmp(&str); +} + +int UGooString::cmp(const UGooString *str) const { int n1, n2, i, x; Unicode *p1, *p2; @@ -85,6 +163,14 @@ int UGooString::cmp(UGooString *str) const return n1 - n2; } +// FIXME: +// a) this is confusing because GooString::getCSTring() returns a pointer +// but UGooString returns a newly allocated copy. Should give this +// a different name, like copyAsAscii() or copyAsGooString() +// b) this interface requires copying. It should be changed to take a +// GooString& as a param and put the data inside it so that it uses +// caching optimization of GooString. Callers should be changed to use +// this new interface char *UGooString::getCString() const { char *res = new char[length + 1]; @@ -92,3 +178,4 @@ char *UGooString::getCString() const res[length] = '\0'; return res; } + diff --git a/poppler/UGooString.h b/poppler/UGooString.h index 9161760d85ad408e8a1da6337e8d04fde7223617..eec656d613cb73b5da5e8b53c3d87fa114648f2c 100644 --- a/poppler/UGooString.h +++ b/poppler/UGooString.h @@ -18,36 +18,60 @@ class GooString; class UGooString { public: - // Create an unicode string - UGooString(Unicode *u, int l); + + // Create empty unicode string + UGooString(); // Create a unicode string from . UGooString(GooString &str); + // Create a unicode string from u + UGooString(Unicode *u, int strLen); + // Copy the unicode string UGooString(const UGooString &str); // Create a unicode string from . - UGooString(const char *str); + UGooString(const char *str, int strLen = CALC_STRING_LEN); + + UGooString *Set(const char *str, int strLen = CALC_STRING_LEN); + UGooString *Set(const UGooString &str); + + // Set the string to empty string, freeing all dynamically allocated memory + // as a side effect + UGooString *clear(); - // Destructor. ~UGooString(); - // Get length. + void resize(int newLength); + int getLength() const { return length; } // Compare two strings: -1:< 0:= +1:> - int cmp(UGooString *str) const; + int cmp(const UGooString *str) const; + int cmp(const UGooString &str) const; // get the unicode Unicode *unicode() const { return s; } - // get the const char* + // Return a newly allocated copy of the string converted to + // ascii (non-Unicode) format. Caller has to delete [] the result char *getCString() const; private: - void initChar(GooString &str); + // you can tweak this number for a different speed/memory usage tradeoffs. + // In libc malloc() rounding is 16 so it's best to choose a value that + // results in sizeof(UGooString) be a multiple of 16. + // 20 makes sizeof(UGooString) to be 48. + static const int STR_STATIC_SIZE = 20; + // a special value telling that the length of the string is not given + // so it must be calculated from the strings + static const int CALC_STRING_LEN = -1; + + int roundedSize(int len); + void initChar(const char *str, int strLen); + Unicode sStatic[STR_STATIC_SIZE]; int length; Unicode *s; };