Commit 5934c320 authored by Hib Eris's avatar Hib Eris Committed by Albert Astals Cid

Linearization improvements

Read
http://lists.freedesktop.org/archives/poppler/2010-November/006642.html
for the detailed patch description
parent abb1313a
......@@ -247,10 +247,12 @@ set(poppler_SRCS
poppler/GfxFont.cc
poppler/GfxState.cc
poppler/GlobalParams.cc
poppler/Hints.cc
poppler/JArithmeticDecoder.cc
poppler/JBIG2Stream.cc
poppler/Lexer.cc
poppler/Link.cc
poppler/Linearization.cc
poppler/LocalPDFDocBuilder.cc
poppler/NameToCharCode.cc
poppler/Object.cc
......@@ -393,10 +395,12 @@ if(ENABLE_XPDF_HEADERS)
poppler/GfxState.h
poppler/GfxState_helpers.h
poppler/GlobalParams.h
poppler/Hints.h
poppler/JArithmeticDecoder.h
poppler/JBIG2Stream.h
poppler/Lexer.h
poppler/Link.h
poppler/Linearization.h
poppler/LocalPDFDocBuilder.h
poppler/Movie.h
poppler/NameToCharCode.h
......
......@@ -425,13 +425,13 @@ find_annot_movie_for_action (PopplerDocument *document,
xref->fetch (ref->num, ref->gen, &annotObj);
} else if (link->hasAnnotTitle ()) {
Catalog *catalog = document->doc->getCatalog ();
Object annots;
GooString *title = link->getAnnotTitle ();
int i;
for (i = 1; i <= document->doc->getNumPages (); ++i) {
Page *p = catalog->getPage (i);
Page *p = document->doc->getPage (i);
if (!p) continue;
if (p->getAnnots (&annots)->isArray ()) {
int j;
......
......@@ -435,15 +435,14 @@ PopplerPage *
poppler_document_get_page (PopplerDocument *document,
int index)
{
Catalog *catalog;
Page *page;
g_return_val_if_fail (0 <= index &&
index < poppler_document_get_n_pages (document),
NULL);
catalog = document->doc->getCatalog();
page = catalog->getPage (index + 1);
page = document->doc->getPage (index + 1);
if (!page) return NULL;
return _poppler_page_new (document, page, index);
}
......@@ -2482,18 +2481,22 @@ PopplerFormField *
poppler_document_get_form_field (PopplerDocument *document,
gint id)
{
Catalog *catalog = document->doc->getCatalog();
Page *page;
unsigned pageNum;
unsigned fieldNum;
FormPageWidgets *widgets;
FormWidget *field;
FormWidget::decodeID (id, &pageNum, &fieldNum);
widgets = catalog->getPage (pageNum)->getPageWidgets ();
page = document->doc->getPage (pageNum);
if (!page)
return NULL;
widgets = page->getPageWidgets ();
if (!widgets)
return NULL;
field = widgets->getWidget (fieldNum);
if (field)
return _poppler_form_field_new (document, field);
......
......@@ -1360,6 +1360,7 @@ poppler_page_render_to_ps (PopplerPage *page,
if (!ps_file->out)
ps_file->out = new PSOutputDev (ps_file->filename,
ps_file->document->doc,
ps_file->document->doc->getXRef(),
ps_file->document->doc->getCatalog(),
NULL,
......
......@@ -24,7 +24,7 @@
//------------------------------------------------------------------------
#define CachedFileChunkSize 8192
#define CachedFileChunkSize 8192 // This should be a multiple of cachedStreamBufSize
class GooString;
class CachedFileLoader;
......
This diff is collapsed.
......@@ -148,13 +148,13 @@ public:
GBool isOk() { return ok; }
// Get number of pages.
int getNumPages() { return numPages; }
int getNumPages();
// Get a page.
Page *getPage(int i) { return pages[i-1]; }
Page *getPage(int i);
// Get the reference for a page object.
Ref *getPageRef(int i) { return &pageRefs[i-1]; }
Ref *getPageRef(int i);
// Return base URI, or NULL if none.
GooString *getBaseURI() { return baseURI; }
......@@ -232,6 +232,11 @@ private:
XRef *xref; // the xref table for this PDF file
Page **pages; // array of pages
Ref *pageRefs; // object ID for each page
int lastCachedPage;
GooVector<Dict *> *pagesList;
GooVector<Ref> *pagesRefList;
GooVector<PageAttrs *> *attrsList;
GooVector<int> *kidsIdxList;
Form *form;
int numPages; // number of pages
int pagesSize; // size of pages array
......@@ -251,8 +256,7 @@ private:
PageMode pageMode; // page mode
PageLayout pageLayout; // page layout
int readPageTree(Dict *pages, PageAttrs *attrs, int start,
char *alreadyRead);
GBool cachePageTree(int page); // Cache first <page> pages.
Object *findDestInTree(Object *tree, GooString *name, Object *obj);
Object *getNames();
......
......@@ -7,6 +7,7 @@
// Copyright (C) 2005 Brad Hards <bradh@frogmouth.net>
// Copyright (C) 2006 Kouhei Sutou <kou@cozmixng.org>
// Copyright (C) 2009 Pino Toscano <pino@kde.org>
// Copyright 2010 Hib Eris <hib@hiberis.nl>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
......@@ -70,7 +71,9 @@ GooList *FontInfoScanner::scan(int nPages) {
}
for (int pg = currentPage; pg < lastPage; ++pg) {
page = doc->getCatalog()->getPage(pg);
page = doc->getPage(pg);
if (!page) continue;
if ((resDict = page->getResourceDict())) {
scanFonts(resDict, result);
}
......
This diff is collapsed.
//========================================================================
//
// Hints.h
//
// This file is licensed under the GPLv2 or later
//
// Copyright 2010 Hib Eris <hib@hiberis.nl>
//
//========================================================================
#ifndef HINTS_H
#define HINTS_H
#include <string.h>
#include "goo/gtypes.h"
#include "goo/GooVector.h"
//#include <vector>
#include "PDFDoc.h"
class Stream;
class BaseStream;
class Linearization;
class XRef;
//------------------------------------------------------------------------
// Hints
//------------------------------------------------------------------------
class Hints {
public:
Hints(BaseStream *str, Linearization *linearization, XRef *xref, SecurityHandler *secHdlr);
~Hints();
int getPageObjectNum(int page);
Guint getPageOffset(int page);
GooVector<ByteRange>* getPageRanges(int page);
private:
void readTables(BaseStream *str, Linearization *linearization, XRef *xref, SecurityHandler *secHdlr);
void readPageOffsetTable(Stream *str);
void readSharedObjectsTable(Stream *str);
Guint readBit(Stream *str);
Guint readBits(int n, Stream *str);
Guint hintsOffset;
Guint hintsLength;
Guint hintsOffset2;
Guint hintsLength2;
Guint mainXRefEntriesOffset;
int nPages;
int pageFirst;
int pageObjectFirst;
Guint pageOffsetFirst;
Guint pageEndFirst;
int objectNumberFirst;
Guint nObjectLeast;
Guint objectOffsetFirst;
Guint nBitsDiffObjects;
Guint pageLengthLeast;
Guint nBitsDiffPageLength;
Guint OffsetStreamLeast;
Guint nBitsOffsetStream;
Guint lengthStreamLeast;
Guint nBitsLengthStream;
Guint nBitsNumShared;
Guint nBitsShared;
Guint nBitsNumerator;
Guint denominator;
Guint *nObjects;
int *pageObjectNum;
Guint *xRefOffset;
Guint *pageLength;
Guint *pageOffset;
Guint *numSharedObject;
Guint **sharedObjectId;
Guint nSharedGroups;
Guint *groupLength;
Guint *groupOffset;
Guint *groupHasSignature;
Guint *groupNumObjects;
Guint *groupXRefOffset;
int inputBits;
char bitsBuffer;
};
#endif
//========================================================================
//
// Linearization.cc
//
// This file is licensed under the GPLv2 or later
//
// Copyright 2010 Hib Eris <hib@hiberis.nl>
//
//========================================================================
#include "Linearization.h"
#include "Parser.h"
#include "Lexer.h"
//------------------------------------------------------------------------
// Linearization
//------------------------------------------------------------------------
Linearization::Linearization (BaseStream *str)
{
Parser *parser;
Object obj1, obj2, obj3, obj4, obj5;
linDict.initNull();
str->reset();
obj1.initNull();
parser = new Parser(NULL,
new Lexer(NULL, str->makeSubStream(str->getStart(), gFalse, 0, &obj1)),
gFalse);
parser->getObj(&obj1);
parser->getObj(&obj2);
parser->getObj(&obj3);
parser->getObj(&linDict);
parser->getObj(&obj4);
if (obj1.isInt() && obj2.isInt() && obj3.isCmd("obj") && linDict.isDict()) {
linDict.dictLookup("Linearized", &obj5);
if (!(obj5.isNum() && obj5.getNum() > 0)) {
linDict.free();
linDict.initNull();
}
obj5.free();
}
obj4.free();
obj3.free();
obj2.free();
obj1.free();
delete parser;
}
Linearization:: ~Linearization()
{
linDict.free();
}
Guint Linearization::getLength()
{
if (!linDict.isDict()) return 0;
int length;
if (linDict.getDict()->lookupInt("L", NULL, &length) &&
length > 0) {
return length;
} else {
error(-1, "Length in linearization table is invalid");
return 0;
}
}
Guint Linearization::getHintsOffset()
{
int hintsOffset;
Object obj1, obj2;
if (linDict.isDict() &&
linDict.dictLookup("H", &obj1)->isArray() &&
obj1.arrayGetLength()>=2 &&
obj1.arrayGet(0, &obj2)->isInt() &&
obj2.getInt() > 0) {
hintsOffset = obj2.getInt();
} else {
error(-1, "Hints table offset in linearization table is invalid");
hintsOffset = 0;
}
obj2.free();
obj1.free();
return hintsOffset;
}
Guint Linearization::getHintsLength()
{
int hintsLength;
Object obj1, obj2;
if (linDict.isDict() &&
linDict.dictLookup("H", &obj1)->isArray() &&
obj1.arrayGetLength()>=2 &&
obj1.arrayGet(1, &obj2)->isInt() &&
obj2.getInt() > 0) {
hintsLength = obj2.getInt();
} else {
error(-1, "Hints table length in linearization table is invalid");
hintsLength = 0;
}
obj2.free();
obj1.free();
return hintsLength;
}
Guint Linearization::getHintsOffset2()
{
int hintsOffset2 = 0; // default to 0
Object obj1, obj2;
if (linDict.isDict() &&
linDict.dictLookup("H", &obj1)->isArray() &&
obj1.arrayGetLength()>=4) {
if (obj1.arrayGet(2, &obj2)->isInt() &&
obj2.getInt() > 0) {
hintsOffset2 = obj2.getInt();
} else {
error(-1, "Second hints table offset in linearization table is invalid");
hintsOffset2 = 0;
}
}
obj2.free();
obj1.free();
return hintsOffset2;
}
Guint Linearization::getHintsLength2()
{
int hintsLength2 = 0; // default to 0
Object obj1, obj2;
if (linDict.isDict() &&
linDict.dictLookup("H", &obj1)->isArray() &&
obj1.arrayGetLength()>=4) {
if (obj1.arrayGet(3, &obj2)->isInt() &&
obj2.getInt() > 0) {
hintsLength2 = obj2.getInt();
} else {
error(-1, "Second hints table length in linearization table is invalid");
hintsLength2 = 0;
}
}
obj2.free();
obj1.free();
return hintsLength2;
}
int Linearization::getObjectNumberFirst()
{
int objectNumberFirst = 0;
if (linDict.isDict() &&
linDict.getDict()->lookupInt("O", NULL, &objectNumberFirst) &&
objectNumberFirst > 0) {
return objectNumberFirst;
} else {
error(-1, "Object number of first page in linearization table is invalid");
return 0;
}
}
Guint Linearization::getEndFirst()
{
int pageEndFirst = 0;
if (linDict.isDict() &&
linDict.getDict()->lookupInt("E", NULL, &pageEndFirst) &&
pageEndFirst > 0) {
return pageEndFirst;
} else {
error(-1, "First page end offset in linearization table is invalid");
return 0;
}
}
int Linearization::getNumPages()
{
int numPages = 0;
if (linDict.isDict() &&
linDict.getDict()->lookupInt("N", NULL, &numPages) &&
numPages > 0) {
return numPages;
} else {
error(-1, "Page count in linearization table is invalid");
return 0;
}
}
Guint Linearization::getMainXRefEntriesOffset()
{
int mainXRefEntriesOffset = 0;
if (linDict.isDict() &&
linDict.getDict()->lookupInt("T", NULL, &mainXRefEntriesOffset) &&
mainXRefEntriesOffset > 0) {
return mainXRefEntriesOffset;
} else {
error(-1, "Main Xref offset in linearization table is invalid");
return 0;
}
}
int Linearization::getPageFirst()
{
int pageFirst = 0; // Optional, defaults to 0.
if (linDict.isDict()) {
linDict.getDict()->lookupInt("P", NULL, &pageFirst);
}
if (pageFirst < 0) {
error(-1, "First page in linearization table is invalid");
return 0;
}
return pageFirst;
}
//========================================================================
//
// Linearization.h
//
// This file is licensed under the GPLv2 or later
//
// Copyright 2010 Hib Eris <hib@hiberis.nl>
//
//========================================================================
#ifndef LINEARIZATION_H
#define LINEARIZATION_H
#include "goo/gtypes.h"
#include "Object.h"
class BaseStream;
//------------------------------------------------------------------------
// Linearization
//------------------------------------------------------------------------
class Linearization {
public:
Linearization(BaseStream *str);
~Linearization();
Guint getLength();
Guint getHintsOffset();
Guint getHintsLength();
Guint getHintsOffset2();
Guint getHintsLength2();
int getObjectNumberFirst();
Guint getEndFirst();
int getNumPages();
Guint getMainXRefEntriesOffset();
int getPageFirst();
private:
Object linDict;
};
#endif
......@@ -206,9 +206,11 @@ poppler_include_HEADERS = \
GfxState.h \
GfxState_helpers.h \
GlobalParams.h \
Hints.h \
JArithmeticDecoder.h \
JBIG2Stream.h \
Lexer.h \
Linearization.h \
Link.h \
LocalPDFDocBuilder.h \
Movie.h \
......@@ -284,9 +286,11 @@ libpoppler_la_SOURCES = \
GfxFont.cc \
GfxState.cc \
GlobalParams.cc \
Hints.cc \
JArithmeticDecoder.cc \
JBIG2Stream.cc \
Lexer.cc \
Linearization.cc \
Link.cc \
LocalPDFDocBuilder.cc \
Movie.cc \
......
This diff is collapsed.
......@@ -49,6 +49,9 @@ class Links;
class LinkAction;
class LinkDest;
class Outline;
class Linearization;
class SecurityHandler;
class Hints;
enum PDFWriteMode {
writeStandard,
......@@ -90,6 +93,9 @@ public:
// Get file name.
GooString *getFileName() { return fileName; }
// Get the linearization table.
Linearization *getLinearization();
// Get the xref table.
XRef *getXRef() { return xref; }
......@@ -104,18 +110,18 @@ public:
// Get page parameters.
double getPageMediaWidth(int page)
{ return catalog->getPage(page)->getMediaWidth(); }
{ return getPage(page) ? getPage(page)->getMediaWidth() : 0.0 ; }
double getPageMediaHeight(int page)
{ return catalog->getPage(page)->getMediaHeight(); }
{ return getPage(page) ? getPage(page)->getMediaHeight() : 0.0 ; }
double getPageCropWidth(int page)
{ return catalog->getPage(page)->getCropWidth(); }
{ return getPage(page) ? getPage(page)->getCropWidth() : 0.0 ; }
double getPageCropHeight(int page)
{ return catalog->getPage(page)->getCropHeight(); }
{ return getPage(page) ? getPage(page)->getCropHeight() : 0.0 ; }
int getPageRotate(int page)
{ return catalog->getPage(page)->getRotate(); }
{ return getPage(page) ? getPage(page)->getRotate() : 0 ; }
// Get number of pages.
int getNumPages() { return catalog->getNumPages(); }
int getNumPages();
// Return the contents of the metadata stream, or NULL if there is
// no metadata.
......@@ -124,6 +130,9 @@ public:
// Return the structure tree root object.
Object *getStructTreeRoot() { return catalog->getStructTreeRoot(); }
// Get page.
Page *getPage(int page);
// Display a page.
void displayPage(OutputDev *out, int page,
double hDPI, double vDPI, int rotate,
......@@ -233,12 +242,23 @@ private:
void saveIncrementalUpdate (OutStream* outStr);
void saveCompleteRewrite (OutStream* outStr);
Page *parsePage(int page);
// Get hints.
Hints *getHints();
PDFDoc();
void init();
GBool setup(GooString *ownerPassword, GooString *userPassword);
GBool checkFooter();
void checkHeader();
GBool checkEncryption(GooString *ownerPassword, GooString *userPassword);
// Get the offset of the start xref table.
Guint getStartXRef();
// Get the offset of the entries in the main XRef table of a
// linearized document (0 for non linearized documents).
Guint getMainXRefEntriesOffset();
Guint strToUnsigned(char *s);
GooString *fileName;
FILE *file;
......@@ -246,17 +266,23 @@ private:
void *guiData;
int pdfMajorVersion;
int pdfMinorVersion;
Linearization *linearization;
XRef *xref;
SecurityHandler *secHdlr;
Catalog *catalog;
Hints *hints;
#ifndef DISABLE_OUTLINE
Outline *outline;
#endif
Page **pageCache;
GBool ok;
int errCode;
//If there is an error opening the PDF file with fopen() in the constructor,
//then the POSIX errno will be here.
int fopenErrno;
Guint startXRefPos; // offset of last xref table
};
#endif
......@@ -19,7 +19,7 @@
// Copyright (C) 2006 Jeff Muizelaar <jeff@infidigm.net>
// Copyright (C) 2007, 2008 Brad Hards <bradh@kde.org>
// Copyright (C) 2008, 2009 Koji Otani <sho@bbr.jp>
// Copyright (C) 2008 Hib Eris <hib@hiberis.nl>
// Copyright (C) 2008, 2010 Hib Eris <hib@hiberis.nl>
// Copyright (C) 2009, 2010 Thomas Freitag <Thomas.Freitag@alfa.de>
// Copyright (C) 2009 Till Kamppeter <till.kamppeter@gmail.com>
// Copyright (C) 2009 Carlos Garcia Campos <carlosgc@gnome.org>
......@@ -70,6 +70,7 @@
# include "SplashOutputDev.h"
#endif
#include "PSOutputDev.h"
#include "PDFDoc.h"
#ifdef MACOS
// needed for setting type/creator of MacOS files
......@@ -974,7 +975,7 @@ static void outputToFile(void *stream, char *data, int len) {
fwrite(data, 1, len, (FILE *)stream);
}
PSOutputDev::PSOutputDev(const char *fileName, XRef *xrefA, Catalog *catalog,
PSOutputDev::PSOutputDev(const char *fileName, PDFDoc *doc, XRef *xrefA, Catalog *catalog,
char *psTitle,
int firstPage, int lastPage, PSOutMode modeA,
int paperWidthA, int paperHeightA, GBool duplexA,
......@@ -1035,13 +1036,14 @@ PSOutputDev::PSOutputDev(const char *fileName, XRef *xrefA, Catalog *catalog,
}
init(outputToFile, f, fileTypeA, psTitle,
xrefA, catalog, firstPage, lastPage, modeA,
doc, xrefA, catalog, firstPage, lastPage, modeA,
imgLLXA, imgLLYA, imgURXA, imgURYA, manualCtrlA,
paperWidthA, paperHeightA, duplexA);
}
PSOutputDev::PSOutputDev(PSOutputFunc outputFuncA, void *outputStreamA,
char *psTitle,
PDFDoc *doc,
XRef *xrefA, Catalog *catalog,
int firstPage, int lastPage, PSOutMode modeA,
int paperWidthA, int paperHeightA, GBool duplexA,
......@@ -1070,18 +1072,17 @@ PSOutputDev::PSOutputDev(PSOutputFunc outputFuncA, void *outputStreamA,
forceRasterize = forceRasterizeA;
init(outputFuncA, outputStreamA, psGeneric, psTitle,
xrefA, catalog, firstPage, lastPage, modeA,
doc, xrefA, catalog, firstPage, lastPage, modeA,
imgLLXA, imgLLYA, imgURXA, imgURYA, manualCtrlA,
paperWidthA, paperHeightA, duplexA);
}
void PSOutputDev::init(PSOutputFunc outputFuncA, void *outputStreamA,
PSFileType fileTypeA, char *pstitle, XRef *xrefA, Catalog *catalog,
PSFileType fileTypeA, char *pstitle, PDFDoc *doc, XRef *xrefA, Catalog *catalog,
int firstPage, int lastPage, PSOutMode modeA,
int imgLLXA, int imgLLYA, int imgURXA, int imgURYA,
GBool manualCtrlA, int paperWidthA, int paperHeightA,
GBool duplexA) {
Page *page;
PDFRectangle *box;
// initialize
......@@ -1101,12 +1102,12 @@ void PSOutputDev::init(PSOutputFunc outputFuncA, void *outputStreamA,
imgURX = imgURXA;
imgURY = imgURYA;
if (paperWidth < 0 || paperHeight < 0) {
// this check is needed in case the document has zero pages
if (firstPage > 0 && firstPage <= catalog->getNumPages()) {
page = catalog->getPage(firstPage);
Page *page;
if ((page = doc->getPage(firstPage))) {
paperWidth = (int)ceil(page->getMediaWidth());
paperHeight = (int)ceil(page->getMediaHeight());