Commit bba57e58 authored by Carlos Garcia Campos's avatar Carlos Garcia Campos

xpdf303: Added the pdfdetach tool

I haven't merged xpdf code for embedded files, I think our
implementation is better and more complete. I've adapated pdfdetach
code to use our code and return also embedded files of file attachment
annotations to match what xpdf does.
parent f62c2f00
......@@ -79,6 +79,28 @@ EmbFile::~EmbFile()
m_objStr.free();
}
GBool EmbFile::save(const char *path) {
FILE *f;
GBool ret;
if (!(f = fopen(path, "wb"))) {
return gFalse;
}
ret = save2(f);
fclose(f);
return ret;
}
GBool EmbFile::save2(FILE *f) {
int c;
m_objStr.streamReset();
while ((c = m_objStr.streamGetChar()) != EOF) {
fputc(c, f);
}
return gTrue;
}
FileSpec::FileSpec(Object *fileSpecA)
{
ok = gTrue;
......
......@@ -33,8 +33,11 @@ public:
GooString *mimeType() { return m_mimetype; }
Stream *stream() { return isOk() ? m_objStr.getStream() : NULL; }
GBool isOk() { return m_objStr.isStream(); }
GBool save(const char *path);
private:
GBool save2(FILE *f);
int m_size;
GooString *m_createDate;
GooString *m_modDate;
......
......@@ -43,6 +43,15 @@ if (HAVE_CAIRO)
install(FILES pdftocairo.1 DESTINATION share/man/man1)
endif (HAVE_CAIRO)
# pdfdetach
set(pdfdetach_SOURCES ${common_srcs}
pdfdetach.cc
)
add_executable(pdfdetach ${pdfdetach_SOURCES})
target_link_libraries(pdfdetach ${common_libs})
install(TARGETS pdfdetach DESTINATION bin)
install(FILES pdfdetach.1 DESTINATION share/man/man1)
# pdffonts
set(pdffonts_SOURCES ${common_srcs}
pdffonts.cc
......
......@@ -44,6 +44,7 @@ endif
AM_LDFLAGS = @auto_import_flags@
bin_PROGRAMS = \
pdfdetach \
pdffonts \
pdfimages \
pdfinfo \
......@@ -56,6 +57,7 @@ bin_PROGRAMS = \
$(pdftocairo_binary)
dist_man1_MANS = \
pdfdetach.1 \
pdffonts.1 \
pdfimages.1 \
pdfinfo.1 \
......@@ -69,6 +71,10 @@ dist_man1_MANS = \
common = parseargs.cc parseargs.h
pdfdetach_SOURCES = \
pdfdetach.cc \
$(common)
pdffonts_SOURCES = \
pdffonts.cc \
$(common)
......
.\" Copyright 2011 Glyph & Cog, LLC
.TH pdfdetach 1 "15 August 2011"
.SH NAME
pdfdetach \- Portable Document Format (PDF) document embedded file
extractor (version 3.03)
.SH SYNOPSIS
.B pdfdetach
[options]
.RI [ PDF-file ]
.SH DESCRIPTION
.B Pdfdetach
lists or extracts embedded files (attachments) from a Portable
Document Format (PDF) file.
.SH CONFIGURATION FILE
Pdfdetach reads a configuration file at startup. It first tries to
find the user's private config file, ~/.xpdfrc. If that doesn't
exist, it looks for a system-wide config file, typically
/usr/local/etc/xpdfrc (but this location can be changed when pdfinfo
is built). See the
.BR xpdfrc (5)
man page for details.
.SH OPTIONS
Some of the following options can be set with configuration file
commands. These are listed in square brackets with the description of
the corresponding command line option.
.TP
.B \-list
List all of the embedded files in the PDF file. File names are
converted to the text encoding specified by the "\-enc" switch.
.TP
.BI \-save " number"
Save the specified embedded file. By default, this uses the file name
associated with the embedded file (as printed by the "\-list" switch);
the file name can be changed with the "\-o" switch.
.TP
.BI \-saveall
Save all of the embedded files. This uses the file names associated
with the embedded files (as printed by the "\-list" switch). By
default, the files are saved in the current directory; this can be
changed with the "\-o" switch.
.TP
.BI \-o " path"
Set the file name used when saving an embedded file with the "\-save"
switch, or the directory used by "\-saveall".
.TP
.BI \-enc " encoding-name"
Sets the encoding to use for text output (embedded file names). The
.I encoding\-name
must be defined with the unicodeMap command (see
.BR xpdfrc (5)).
This defaults to "Latin1" (which is a built-in encoding).
.RB "[config file: " textEncoding ]
.TP
.BI \-opw " password"
Specify the owner password for the PDF file. Providing this will
bypass all security restrictions.
.TP
.BI \-upw " password"
Specify the user password for the PDF file.
.TP
.BI \-cfg " config-file"
Read
.I config-file
in place of ~/.xpdfrc or the system-wide config file.
.TP
.B \-v
Print copyright and version information.
.TP
.B \-h
Print usage information.
.RB ( \-help
and
.B \-\-help
are equivalent.)
.SH EXIT CODES
The Xpdf tools use the following exit codes:
.TP
0
No error.
.TP
1
Error opening a PDF file.
.TP
2
Error opening an output file.
.TP
3
Error related to PDF permissions.
.TP
99
Other error.
.SH AUTHOR
The pdfinfo software and documentation are copyright 1996-2011 Glyph &
Cog, LLC.
.SH "SEE ALSO"
.BR xpdf (1),
.BR pdftops (1),
.BR pdftotext (1),
.BR pdfinfo (1),
.BR pdffonts (1),
.BR pdftoppm (1),
.BR pdfimages (1),
.BR xpdfrc (5)
.br
.B http://www.foolabs.com/xpdf/
//========================================================================
//
// pdfdetach.cc
//
// Copyright 2010 Glyph & Cog, LLC
//
//========================================================================
//========================================================================
//
// Modified under the Poppler project - http://poppler.freedesktop.org
//
// All changes made under the Poppler project to this file are licensed
// under GPL version 2 or later
//
// Copyright (C) 2011 Carlos Garcia Campos <carlosgc@gnome.org>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
//
//========================================================================
#include "config.h"
#include <poppler-config.h>
#include <stdio.h>
#include "goo/gtypes.h"
#include "goo/gmem.h"
#include "goo/GooList.h"
#include "parseargs.h"
#include "Annot.h"
#include "GlobalParams.h"
#include "Page.h"
#include "PDFDoc.h"
#include "PDFDocFactory.h"
#include "FileSpec.h"
#include "CharTypes.h"
#include "Catalog.h"
#include "UnicodeMap.h"
#include "PDFDocEncoding.h"
#include "Error.h"
static GBool doList = gFalse;
static int saveNum = 0;
static GBool saveAll = gFalse;
static char savePath[1024] = "";
static char textEncName[128] = "";
static char ownerPassword[33] = "\001";
static char userPassword[33] = "\001";
static char cfgFileName[256] = "";
static GBool printVersion = gFalse;
static GBool printHelp = gFalse;
static ArgDesc argDesc[] = {
{"-list", argFlag, &doList, 0,
"list all embedded files"},
{"-save", argInt, &saveNum, 0,
"save the specified embedded file"},
{"-saveall", argFlag, &saveAll, 0,
"save all embedded files"},
{"-o", argString, savePath, sizeof(savePath),
"file name for the saved embedded file"},
{"-enc", argString, textEncName, sizeof(textEncName),
"output text encoding name"},
{"-opw", argString, ownerPassword, sizeof(ownerPassword),
"owner password (for encrypted files)"},
{"-upw", argString, userPassword, sizeof(userPassword),
"user password (for encrypted files)"},
{"-cfg", argString, cfgFileName, sizeof(cfgFileName),
"configuration file to use in place of .xpdfrc"},
{"-v", argFlag, &printVersion, 0,
"print copyright and version info"},
{"-h", argFlag, &printHelp, 0,
"print usage information"},
{"-help", argFlag, &printHelp, 0,
"print usage information"},
{"--help", argFlag, &printHelp, 0,
"print usage information"},
{"-?", argFlag, &printHelp, 0,
"print usage information"},
{NULL}
};
int main(int argc, char *argv[]) {
GooString *fileName;
UnicodeMap *uMap;
GooString *ownerPW, *userPW;
PDFDoc *doc;
char uBuf[8];
char path[1024];
char *p;
GBool ok;
int exitCode;
GooList *embeddedFiles = NULL;
int nFiles, nPages, n, i, j;
FileSpec *fileSpec;
Page *page;
Annots *annots;
Annot *annot;
GooString *s1;
Unicode u;
GBool isUnicode;
exitCode = 99;
// parse args
ok = parseArgs(argDesc, &argc, argv);
if ((doList ? 1 : 0) +
((saveNum != 0) ? 1 : 0) +
(saveAll ? 1 : 0) != 1) {
ok = gFalse;
}
if (!ok || argc != 2 || printVersion || printHelp) {
fprintf(stderr, "pdfdetach version %s\n", PACKAGE_VERSION);
fprintf(stderr, "%s\n", popplerCopyright);
fprintf(stderr, "%s\n", xpdfCopyright);
if (!printVersion) {
printUsage("pdfdetach", "<PDF-file>", argDesc);
}
goto err0;
}
fileName = new GooString(argv[1]);
// read config file
globalParams = new GlobalParams(cfgFileName);
if (textEncName[0]) {
globalParams->setTextEncoding(textEncName);
}
// get mapping to output encoding
if (!(uMap = globalParams->getTextEncoding())) {
error(errConfig, -1, "Couldn't get text encoding");
delete fileName;
goto err1;
}
// open PDF file
if (ownerPassword[0] != '\001') {
ownerPW = new GooString(ownerPassword);
} else {
ownerPW = NULL;
}
if (userPassword[0] != '\001') {
userPW = new GooString(userPassword);
} else {
userPW = NULL;
}
doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW);
if (userPW) {
delete userPW;
}
if (ownerPW) {
delete ownerPW;
}
if (!doc->isOk()) {
exitCode = 1;
goto err2;
}
embeddedFiles = new GooList();
for (i = 0; i < doc->getCatalog()->numEmbeddedFiles(); ++i)
embeddedFiles->append(doc->getCatalog()->embeddedFile(i));
nPages = doc->getCatalog()->getNumPages();
for (i = 0; i < nPages; ++i) {
page = doc->getCatalog()->getPage(i + 1);
annots = page->getAnnots();
if (!annots)
break;
for (j = 0; j < annots->getNumAnnots(); ++j) {
annot = annots->getAnnot(j);
if (annot->getType() != Annot::typeFileAttachment)
continue;
embeddedFiles->append(new FileSpec(static_cast<AnnotFileAttachment *>(annot)->getFile()));
}
}
nFiles = embeddedFiles->getLength();
// list embedded files
if (doList) {
printf("%d embedded files\n", nFiles);
for (i = 0; i < nFiles; ++i) {
fileSpec = static_cast<FileSpec *>(embeddedFiles->get(i));
printf("%d: ", i+1);
s1 = fileSpec->getFileName();
if ((s1->getChar(0) & 0xff) == 0xfe && (s1->getChar(1) & 0xff) == 0xff) {
isUnicode = gTrue;
j = 2;
} else {
isUnicode = gFalse;
j = 0;
}
while (j < fileSpec->getFileName()->getLength()) {
if (isUnicode) {
u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j+1) & 0xff);
j += 2;
} else {
u = pdfDocEncoding[s1->getChar(j) & 0xff];
++j;
}
n = uMap->mapUnicode(u, uBuf, sizeof(uBuf));
fwrite(uBuf, 1, n, stdout);
}
fputc('\n', stdout);
}
// save all embedded files
} else if (saveAll) {
for (i = 0; i < nFiles; ++i) {
fileSpec = static_cast<FileSpec *>(embeddedFiles->get(i));
if (savePath[0]) {
n = strlen(savePath);
if (n > (int)sizeof(path) - 2) {
n = sizeof(path) - 2;
}
memcpy(path, savePath, n);
path[n] = '/';
p = path + n + 1;
} else {
p = path;
}
s1 = fileSpec->getFileName();
if ((s1->getChar(0) & 0xff) == 0xfe && (s1->getChar(1) & 0xff) == 0xff) {
isUnicode = gTrue;
j = 2;
} else {
isUnicode = gFalse;
j = 0;
}
while (j < fileSpec->getFileName()->getLength()) {
if (isUnicode) {
u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j+1) & 0xff);
j += 2;
} else {
u = pdfDocEncoding[s1->getChar(j) & 0xff];
++j;
}
n = uMap->mapUnicode(u, uBuf, sizeof(uBuf));
if (p + n >= path + sizeof(path))
break;
memcpy(p, uBuf, n);
p += n;
}
*p = '\0';
if (!fileSpec->getEmbeddedFile()->save(path)) {
error(errIO, -1, "Error saving embedded file as '{0:s}'", p);
exitCode = 2;
goto err2;
}
}
// save an embedded file
} else {
if (saveNum < 1 || saveNum > nFiles) {
error(errCommandLine, -1, "Invalid file number");
goto err2;
}
fileSpec = static_cast<FileSpec *>(embeddedFiles->get(saveNum - 1));
if (savePath[0]) {
p = savePath;
} else {
p = path;
s1 = fileSpec->getFileName();
if ((s1->getChar(0) & 0xff) == 0xfe && (s1->getChar(1) & 0xff) == 0xff) {
isUnicode = gTrue;
j = 2;
} else {
isUnicode = gFalse;
j = 0;
}
while (j < fileSpec->getFileName()->getLength()) {
if (isUnicode) {
u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j+1) & 0xff);
j += 2;
} else {
u = pdfDocEncoding[s1->getChar(j) & 0xff];
++j;
}
n = uMap->mapUnicode(u, uBuf, sizeof(uBuf));
if (p + n >= path + sizeof(path))
break;
memcpy(p, uBuf, n);
p += n;
}
*p = '\0';
p = path;
}
if (!fileSpec->getEmbeddedFile()->save(p)) {
error(errIO, -1, "Error saving embedded file as '{0:s}'", p);
exitCode = 2;
goto err2;
}
}
exitCode = 0;
// clean up
err2:
if (embeddedFiles)
deleteGooList(embeddedFiles, FileSpec);
uMap->decRefCnt();
delete doc;
err1:
delete globalParams;
err0:
// check for memory leaks
Object::memCheck(stderr);
gMemReport(stderr);
return exitCode;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment