Commit ad6e7d86 authored by Albert Astals Cid's avatar Albert Astals Cid

Brad patch for embedded document extraction, only has Qt4 bindings for now, needs Qt3 and glib work

parent 550fb0b6
2006-01-18 Albert Astals Cid <aacid@kde.org>
* glib/poppler-action.cc:
* glib/poppler-document.cc:
* poppler/Annot.cc:
* poppler/Catalog.cc:
* poppler/Catalog.h:
* poppler/Dict.cc:
* poppler/Dict.h:
* poppler/FontInfo.cc:
* poppler/Function.cc:
* poppler/Gfx.cc:
* poppler/GfxFont.cc:
* poppler/GfxState.cc:
* poppler/Link.cc:
* poppler/Link.h:
* poppler/Makefile.am:
* poppler/Object.h:
* poppler/Outline.cc:
* poppler/PDFDoc.cc:
* poppler/PDFDoc.h:
* poppler/PSOutputDev.cc:
* poppler/Page.cc:
* poppler/PageLabelInfo.cc:
* poppler/Parser.cc:
* poppler/SecurityHandler.cc:
* poppler/Stream.cc:
* poppler/XRef.cc:
* qt/poppler-document.cc:
* qt/poppler-page-transition.cc:
* qt4/src/Makefile.am:
* qt4/src/poppler-document.cc:
* qt4/src/poppler-private.h:
* qt4/src/poppler-qt4.h:
* qt4/tests/Makefile.am:
* utils/HtmlOutputDev.cc:
* utils/pdffonts.cc:
* utils/pdfinfo.cc:
* utils/pdftohtml.cc:
* utils/pdftotext.cc: Brad patch for embedded document extraction,
only has Qt4 bindings for now, needs Qt3 and glib work
2006-01-18 Albert Astals Cid <aacid@kde.org>
* qt/poppler-page-transition.h:
......
......@@ -182,7 +182,7 @@ build_goto_dest (PopplerDocument *document,
LinkGoTo *link)
{
LinkDest *link_dest;
GooString *named_dest;
UGooString *named_dest;
/* Return if it isn't OK */
if (! link->isOk ()) {
......
......@@ -28,6 +28,7 @@
#include <Stream.h>
#include <FontInfo.h>
#include <PDFDocEncoding.h>
#include <UGooString.h>
#include "poppler.h"
#include "poppler-private.h"
......
......@@ -18,6 +18,7 @@
#include "Catalog.h"
#include "Gfx.h"
#include "Lexer.h"
#include "UGooString.h"
#include "Annot.h"
//------------------------------------------------------------------------
......
......@@ -23,6 +23,7 @@
#include "Error.h"
#include "Link.h"
#include "PageLabelInfo.h"
#include "UGooString.h"
#include "Catalog.h"
//------------------------------------------------------------------------
......@@ -83,11 +84,14 @@ Catalog::Catalog(XRef *xrefA) {
// read named destination dictionary
catDict.dictLookup("Dests", &dests);
// read root of named destination tree
// read root of named destination tree - PDF1.6 table 3.28
if (catDict.dictLookup("Names", &obj)->isDict()) {
obj.dictLookup("Dests", &obj2);
destNameTree.init(xref, &obj2);
obj2.free();
obj.dictLookup("EmbeddedFiles", &obj2);
embeddedFileNameTree.init(xref, &obj2);
obj2.free();
}
obj.free();
......@@ -178,6 +182,7 @@ Catalog::~Catalog() {
}
dests.free();
destNameTree.free();
embeddedFileNameTree.free();
if (baseURI) {
delete baseURI;
}
......@@ -291,7 +296,7 @@ int Catalog::findPage(int num, int gen) {
return 0;
}
LinkDest *Catalog::findDest(GooString *name) {
LinkDest *Catalog::findDest(UGooString *name) {
LinkDest *dest;
Object obj1, obj2;
GBool found;
......@@ -299,7 +304,7 @@ LinkDest *Catalog::findDest(GooString *name) {
// try named destination dictionary then name tree
found = gFalse;
if (dests.isDict()) {
if (!dests.dictLookup(name->getCString(), &obj1)->isNull())
if (!dests.dictLookup(*name, &obj1)->isNull())
found = gTrue;
else
obj1.free();
......@@ -335,6 +340,97 @@ LinkDest *Catalog::findDest(GooString *name) {
return dest;
}
EmbFile *Catalog::embeddedFile(int i)
{
Object efDict;
Object fileSpec;
Object fileDesc;
Object paramDict;
Object paramObj;
Object strObj;
Object obj, obj2;
obj = embeddedFileNameTree.getValue(i);
GooString *fileName = new GooString();
char *descString = embeddedFileNameTree.getName(i)->getCString();
GooString *desc = new GooString(descString);
delete[] descString;
GooString *createDate = new GooString();
GooString *modDate = new GooString();
Stream *efStream;
if (obj.isRef()) {
if (obj.fetch(xref, &efDict)->isDict()) {
// efDict matches Table 3.40 in the PDF1.6 spec
efDict.dictLookup("F", &fileSpec);
if (fileSpec.isString()) {
delete fileName;
fileName = new GooString(fileSpec.getString());
}
fileSpec.free();
// the logic here is that the description from the name
// dictionary is used if we don't have a more specific
// description - see the Note: on page 157 of the PDF1.6 spec
efDict.dictLookup("Desc", &fileDesc);
if (fileDesc.isString()) {
delete desc;
desc = new GooString(fileDesc.getString());
} else {
efDict.dictLookup("Description", &fileDesc);
if (fileDesc.isString()) {
delete desc;
desc = new GooString(fileDesc.getString());
}
}
fileDesc.free();
efDict.dictLookup("EF", &obj2);
if (obj2.isDict()) {
// This gives us the raw data stream bytes
obj2.dictLookup("F", &strObj);
if (strObj.isStream()) {
efStream = strObj.getStream();
}
// dataDict corresponds to Table 3.41 in the PDF1.6 spec.
Dict *dataDict = efStream->getDict();
// subtype is normally mimetype. You can extract it with code like this:
// Object subtypeName;
// dataDict->lookup( "Subtype", &subtypeName );
// It is optional, so this will sometimes return a null object
// if (subtypeName.isName()) {
// std::cout << "got subtype name: " << subtypeName.getName() << std::endl;
// }
// paramDict corresponds to Table 3.42 in the PDF1.6 spec
Object paramDict;
dataDict->lookup( "Params", &paramDict );
if (paramDict.isDict()) {
paramDict.dictLookup("ModDate", &paramObj);
if (paramObj.isString()) {
delete modDate;
modDate = new GooString(paramObj.getString());
}
paramObj.free();
paramDict.dictLookup("CreationDate", &paramObj);
if (paramObj.isString()) {
delete createDate;
createDate = new GooString(paramObj.getString());
}
paramObj.free();
}
paramDict.free();
}
efDict.free();
obj2.free();
}
}
EmbFile *embeddedFile = new EmbFile(fileName, desc, createDate, modDate, strObj);
strObj.free();
return embeddedFile;
}
NameTree::NameTree(void)
{
size = 0;
......@@ -343,12 +439,15 @@ NameTree::NameTree(void)
}
NameTree::Entry::Entry(Array *array, int index) {
if (!array->getString(index, &name) || !array->getNF(index + 1, &value))
error(-1, "Invalid page tree");
GooString n;
if (!array->getString(index, &n) || !array->getNF(index + 1, &value))
error(-1, "Invalid page tree");
name = new UGooString(n);
}
NameTree::Entry::~Entry() {
value.free();
delete name;
}
void NameTree::addEntry(Entry *entry)
......@@ -402,13 +501,13 @@ void NameTree::parse(Object *tree) {
int NameTree::Entry::cmp(const void *voidKey, const void *voidEntry)
{
GooString *key = (GooString *) voidKey;
UGooString *key = (UGooString *) voidKey;
Entry *entry = *(NameTree::Entry **) voidEntry;
return key->cmp(&entry->name);
return key->cmp(entry->name);
}
GBool NameTree::lookup(GooString *name, Object *obj)
GBool NameTree::lookup(UGooString *name, Object *obj)
{
Entry **entry;
......@@ -424,6 +523,24 @@ GBool NameTree::lookup(GooString *name, Object *obj)
}
}
Object NameTree::getValue(int index)
{
if (index < length) {
return entries[index]->value;
} else {
return Object();
}
}
UGooString *NameTree::getName(int index)
{
if (index < length) {
return entries[index]->name;
} else {
return NULL;
}
}
void NameTree::free()
{
int i;
......
......@@ -19,6 +19,7 @@ class Page;
class PageAttrs;
struct Ref;
class LinkDest;
class UGooString;
class PageLabelInfo;
//------------------------------------------------------------------------
......@@ -30,14 +31,18 @@ public:
NameTree();
void init(XRef *xref, Object *tree);
void parse(Object *tree);
GBool lookup(GooString *name, Object *obj);
GBool lookup(UGooString *name, Object *obj);
void free();
int numEntries() { return length; };
// iterator accessor
Object getValue(int i);
UGooString *getName(int i);
private:
struct Entry {
Entry(Array *array, int index);
~Entry();
GooString name;
UGooString *name;
Object value;
void free();
static int cmp(const void *key, const void *entry);
......@@ -48,7 +53,45 @@ private:
XRef *xref;
Object *root;
Entry **entries;
int size, length;
int size, length; // size is the number of entries in
// the array of Entry*
// length is the number of real Entry
};
class EmbFile {
public:
EmbFile(GooString *name, GooString *description,
GooString *createDate,
GooString *modDate, Object objStr) :
m_name(name),
m_description(description),
m_createDate(createDate),
m_modDate(modDate)
{
objStr.copy(&m_objStr);
}
~EmbFile()
{
delete m_name;
delete m_description;
delete m_modDate;
delete m_createDate;
m_objStr.free();
}
GooString *name() { return m_name; }
GooString *description() { return m_description; }
GooString *modDate() { return m_modDate; }
GooString *createDate() { return m_createDate; }
Object &streamObject() { return m_objStr; }
private:
GooString *m_name;
GooString *m_description;
GooString *m_createDate;
GooString *m_modDate;
Object m_objStr;
};
//------------------------------------------------------------------------
......@@ -92,7 +135,13 @@ public:
// Find a named destination. Returns the link destination, or
// NULL if <name> is not a destination.
LinkDest *findDest(GooString *name);
LinkDest *findDest(UGooString *name);
// Get the number of embedded files
int numEmbeddedFiles() { return embeddedFileNameTree.numEntries(); }
// Get the i'th file embedded (at the Document level) in the document
EmbFile *embeddedFile(int i);
// Convert between page indices and page labels.
GBool labelToIndex(GooString *label, int *index);
......@@ -132,7 +181,8 @@ private:
int numPages; // number of pages
int pagesSize; // size of pages array
Object dests; // named destination dictionary
NameTree destNameTree; // name tree
NameTree destNameTree; // named destination name-tree
NameTree embeddedFileNameTree; // embedded file name-tree
GooString *baseURI; // base URI for URI-type links
Object metadata; // metadata stream
Object structTreeRoot; // structure tree root dictionary
......
......@@ -16,6 +16,7 @@
#include <string.h>
#include "goo/gmem.h"
#include "Object.h"
#include "UGooString.h"
#include "XRef.h"
#include "Dict.h"
......@@ -34,13 +35,13 @@ Dict::~Dict() {
int i;
for (i = 0; i < length; ++i) {
gfree(entries[i].key);
delete entries[i].key;
entries[i].val.free();
}
gfree(entries);
}
void Dict::add(char *key, Object *val) {
void Dict::add(const UGooString &key, Object *val) {
if (length == size) {
if (length == 0) {
size = 8;
......@@ -49,16 +50,16 @@ void Dict::add(char *key, Object *val) {
}
entries = (DictEntry *)greallocn(entries, size, sizeof(DictEntry));
}
entries[length].key = key;
entries[length].key = new UGooString(key);
entries[length].val = *val;
++length;
}
inline DictEntry *Dict::find(char *key) {
inline DictEntry *Dict::find(const UGooString &key) {
int i;
for (i = 0; i < length; ++i) {
if (!strcmp(key, entries[i].key))
if (!key.cmp(entries[i].key))
return &entries[i];
}
return NULL;
......@@ -70,13 +71,13 @@ GBool Dict::is(char *type) {
return (e = find("Type")) && e->val.isName(type);
}
Object *Dict::lookup(char *key, Object *obj) {
Object *Dict::lookup(const UGooString &key, Object *obj) {
DictEntry *e;
return (e = find(key)) ? e->val.fetch(xref, obj) : obj->initNull();
}
Object *Dict::lookupNF(char *key, Object *obj) {
Object *Dict::lookupNF(const UGooString &key, Object *obj) {
DictEntry *e;
return (e = find(key)) ? e->val.copy(obj) : obj->initNull();
......@@ -102,7 +103,7 @@ GBool Dict::lookupInt(const char *key, const char *alt_key, int *value)
return success;
}
char *Dict::getKey(int i) {
UGooString *Dict::getKey(int i) {
return entries[i].key;
}
......
......@@ -15,12 +15,13 @@
#include "Object.h"
class UGooString;
//------------------------------------------------------------------------
// Dict
//------------------------------------------------------------------------
struct DictEntry {
char *key;
UGooString *key;
Object val;
};
......@@ -40,20 +41,20 @@ public:
// Get number of entries.
int getLength() { return length; }
// Add an entry. NB: does not copy key.
void add(char *key, Object *val);
// Add an entry
void add(const UGooString &key, Object *val);
// Check if dictionary is of specified type.
GBool is(char *type);
// Look up an entry and return the value. Returns a null object
// if <key> is not in the dictionary.
Object *lookup(char *key, Object *obj);
Object *lookupNF(char *key, Object *obj);
Object *lookup(const UGooString &key, Object *obj);
Object *lookupNF(const UGooString &key, Object *obj);
GBool lookupInt(const char *key, const char *alt_key, int *value);
// Iterative accessors.
char *getKey(int i);
UGooString *getKey(int i);
Object *getVal(int i, Object *obj);
Object *getValNF(int i, Object *obj);
......@@ -70,7 +71,7 @@ private:
int length; // number of entries in dictionary
int ref; // reference count
DictEntry *find(char *key);
DictEntry *find(const UGooString &key);
};
#endif
......@@ -12,6 +12,7 @@
#include "PDFDoc.h"
#include "config.h"
#include "FontInfo.h"
#include "UGooString.h"
FontInfoScanner::FontInfoScanner(PDFDoc *docA) {
doc = docA;
......
......@@ -22,6 +22,7 @@
#include "Stream.h"
#include "Error.h"
#include "Function.h"
#include "UGooString.h"
//------------------------------------------------------------------------
// Function
......
......@@ -35,6 +35,7 @@
#include "Error.h"
#include "Gfx.h"
#include "ProfileData.h"
#include "UGooString.h"
// the MSVC math.h doesn't define this
#ifndef M_PI
......
......@@ -28,6 +28,7 @@
#include <fofi/FoFiType1.h>
#include <fofi/FoFiType1C.h>
#include <fofi/FoFiTrueType.h>
#include "UGooString.h"
#include "GfxFont.h"
//------------------------------------------------------------------------
......@@ -1549,8 +1550,10 @@ GfxFontDict::GfxFontDict(XRef *xref, Ref *fontDictRef, Dict *fontDict) {
r.gen = 999999;
}
}
fonts[i] = GfxFont::makeFont(xref, fontDict->getKey(i),
char *aux = fontDict->getKey(i)->getCString();
fonts[i] = GfxFont::makeFont(xref, aux,
r, obj2.getDict());
delete[] aux;
if (fonts[i] && !fonts[i]->isOk()) {
delete fonts[i];
fonts[i] = NULL;
......
......@@ -21,6 +21,7 @@
#include "Array.h"
#include "Page.h"
#include "GfxState.h"
#include "UGooString.h"
//------------------------------------------------------------------------
......
......@@ -21,6 +21,7 @@
#include "Array.h"
#include "Dict.h"
#include "Link.h"
#include "UGooString.h"
//------------------------------------------------------------------------
// LinkAction
......@@ -421,9 +422,9 @@ LinkGoTo::LinkGoTo(Object *destObj) {
// named destination
if (destObj->isName()) {
namedDest = new GooString(destObj->getName());
namedDest = new UGooString(destObj->getName());
} else if (destObj->isString()) {
namedDest = destObj->getString()->copy();
namedDest = new UGooString(*destObj->getString());
// destination dictionary
} else if (destObj->isArray()) {
......@@ -459,9 +460,9 @@ LinkGoToR::LinkGoToR(Object *fileSpecObj, Object *destObj) {
// named destination
if (destObj->isName()) {
namedDest = new GooString(destObj->getName());
namedDest = new UGooString(destObj->getName());
} else if (destObj->isString()) {
namedDest = destObj->getString()->copy();
namedDest = new UGooString(*destObj->getString());
// destination dictionary
} else if (destObj->isArray()) {
......
......@@ -16,6 +16,7 @@
#include "Object.h"
class GooString;
class UGooString;
class Array;
class Dict;
......@@ -134,13 +135,13 @@ public:
// Accessors.
virtual LinkActionKind getKind() { return actionGoTo; }
LinkDest *getDest() { return dest; }
GooString *getNamedDest() { return namedDest; }
UGooString *getNamedDest() { return namedDest; }
private:
LinkDest *dest; // regular destination (NULL for remote
// link with bad destination)
GooString *namedDest; // named destination (only one of dest and
UGooString *namedDest; // named destination (only one of dest and
// and namedDest may be non-NULL)
};
......@@ -165,14 +166,14 @@ public:
virtual LinkActionKind getKind() { return actionGoToR; }
GooString *getFileName() { return fileName; }
LinkDest *getDest() { return dest; }
GooString *getNamedDest() { return namedDest; }
UGooString *getNamedDest() { return namedDest; }
private:
GooString *fileName; // file name
LinkDest *dest; // regular destination (NULL for remote
// link with bad destination)
GooString *namedDest; // named destination (only one of dest and
UGooString *namedDest; // named destination (only one of dest and
// and namedDest may be non-NULL)
};
......
......@@ -146,6 +146,7 @@ poppler_include_HEADERS = \
PSOutputDev.h \
TextOutputDev.h \
SecurityHandler.h \
UGooString.h \
UTF8.h \
XpdfPluginAPI.h \
poppler-config.h
......@@ -199,4 +200,5 @@ libpoppler_la_SOURCES = \
PageLabelInfo.h \
PageLabelInfo.cc \
SecurityHandler.cc \
UGooString.cc \
XpdfPluginAPI.cc
......@@ -23,6 +23,7 @@ class XRef;
class Array;
class Dict;
class Stream;
class UGooString;
//------------------------------------------------------------------------
// Ref
......@@ -163,11 +164,11 @@ public:
// Dict accessors.
int dictGetLength();
void dictAdd(char *key, Object *val);
void dictAdd(const UGooString &key, Object *val);
GBool dictIs(char *dictType);
Object *dictLookup(char *key, Object *obj);
Object *dictLookupNF(char *key, Object *obj);
char *dictGetKey(int i);
Object *dictLookup(const UGooString &key, Object *obj);
Object *dictLookupNF(const UGooString &key, Object *obj);
UGooString *dictGetKey(int i);
Object *dictGetVal(int i, Object *obj);
Object *dictGetValNF(int i, Object *obj);
......@@ -238,7 +239,7 @@ inline Object *Object::arrayGetNF(int i, Object *obj)
inline int Object::dictGetLength()
{ return dict->getLength(); }
inline void Object::dictAdd(char *key, Object *val)
inline void Object::dictAdd(const UGooString &key, Object *val)
{ dict->add(key, val); }
inline GBool Object::dictIs(char *dictType)
......@@ -247,13 +248,13 @@ inline GBool Object::dictIs(char *dictType)
inline GBool Object::isDict(char *dictType)
{ return type == objDict && dictIs(dictType); }
inline Object *Object::dictLookup(char *key, Object *obj)
inline Object *Object::dictLookup(const UGooString &key, Object *obj)
{ return dict->lookup(key, obj); }
inline Object *Object::dictLookupNF(char *key, Object *obj)
inline Object *Object::dictLookupNF(const UGooString &key, Object *obj)
{ return dict->lookupNF(key, obj); }
inline char *Object::dictGetKey(int i)
inline UGooString *Object::dictGetKey(int i)
{ return dict->getKey(i); }
inline Object *Object::dictGetVal(int i, Object *obj)
......
......@@ -18,6 +18,7 @@
#include "Link.h"
#include "PDFDocEncoding.h"
#include "Outline.h"
#include "UGooString.h"
//------------------------------------------------------------------------
......
......@@ -38,6 +38,7 @@
#include "Outline.h"
#endif
#include "PDFDoc.h"
#include "UGooString.h"
//------------------------------------------------------------------------
......
......@@ -123,7 +123,7 @@ public:
// Find a named destination. Returns the link destination, or
// NULL if <name> is not a destination.
LinkDest *findDest(GooString *name)
LinkDest *findDest(UGooString *name)
{ return catalog->findDest(name); }
#ifndef DISABLE_OUTLINE
......
......@@ -35,6 +35,7 @@
#include "Stream.h"
#include "Annot.h"
#include "PSOutputDev.h"
#include "UGooString.h"
#ifdef MACOS
// needed for setting type/creator of MacOS files
......@@ -2188,7 +2189,9 @@ void PSOutputDev::setupType3Font(GfxFont *font, GooString *psName,
t3Cacheable = gFalse;
for (i = 0; i < charProcs->getLength(); ++i) {
writePS("/");
writePSName(charProcs->getKey(i));
char *aux = charProcs->getKey(i)->getCString();
writePSName(aux);
delete[] aux;
writePS(" {\n");
gfx->display(charProcs->getVal(i, &charProc));
charProc.free();
......
......@@ -29,6 +29,7 @@
#endif
#include "Error.h"
#include "Page.h"
#include "UGooString.h"
//------------------------------------------------------------------------
// PageAttrs
......