Commit 2021c8ff authored by Adrian Johnson's avatar Adrian Johnson

pdfimages: add support for png and tiff output

parent 8f466775
......@@ -42,19 +42,23 @@
#include <math.h>
#include "goo/gmem.h"
#include "goo/NetPBMWriter.h"
#include "goo/PNGWriter.h"
#include "goo/TiffWriter.h"
#include "Error.h"
#include "GfxState.h"
#include "Object.h"
#include "Stream.h"
#include "ImageOutputDev.h"
ImageOutputDev::ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEGA, GBool listImagesA) {
ImageOutputDev::ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool listImagesA) {
listImages = listImagesA;
if (!listImages) {
fileRoot = copyString(fileRootA);
fileName = (char *)gmalloc(strlen(fileRoot) + 45);
}
dumpJPEG = dumpJPEGA;
outputPNG = gFalse;
outputTiff = gFalse;
dumpJPEG = gFalse;
pageNames = pageNamesA;
imgNum = 0;
pageNum = 0;
......@@ -374,6 +378,21 @@ void ImageOutputDev::writeImageFile(ImgWriter *writer, ImageFormat format, const
writer->writeRow(&row);
break;
case imgGray:
p = imgStr->getLine();
rowp = row;
for (int x = 0; x < width; ++x) {
if (p) {
colorMap->getGray(p, &gray);
*rowp++ = colToByte(gray);
p += colorMap->getNumPixelComps();
} else {
*rowp++ = 0;
}
}
writer->writeRow(&row);
break;
case imgMonochrome:
int size = (width + 7)/8;
for (int x = 0; x < size; x++)
......@@ -406,10 +425,52 @@ void ImageOutputDev::writeImage(GfxState *state, Object *ref, Stream *str,
// dump JPEG file
writeRawImage(str, "jpg");
} else if (outputPNG) {
// output in PNG format
#if ENABLE_LIBPNG
ImgWriter *writer;
if (!colorMap || (colorMap->getNumPixelComps() == 1 && colorMap->getBits() == 1)) {
writer = new PNGWriter(PNGWriter::MONOCHROME);
format = imgMonochrome;
} else if (colorMap->getColorSpace()->getMode() == csDeviceGray ||
colorMap->getColorSpace()->getMode() == csCalGray) {
writer = new PNGWriter(PNGWriter::GRAY);
format = imgGray;
} else {
writer = new PNGWriter(PNGWriter::RGB);
format = imgRGB;
}
writeImageFile(writer, format, "png", str, width, height, colorMap);
#endif
} else if (outputTiff) {
// output in TIFF format
#if ENABLE_LIBTIFF
ImgWriter *writer;
if (!colorMap || (colorMap->getNumPixelComps() == 1 && colorMap->getBits() == 1)) {
writer = new TiffWriter(TiffWriter::MONOCHROME);
format = imgMonochrome;
} else if (colorMap->getColorSpace()->getMode() == csDeviceGray ||
colorMap->getColorSpace()->getMode() == csCalGray) {
writer = new TiffWriter(TiffWriter::GRAY);
format = imgGray;
} else {
writer = new TiffWriter(TiffWriter::RGB);
format = imgRGB;
}
writeImageFile(writer, format, "tif", str, width, height, colorMap);
#endif
} else {
// output in PPM/PBM format
ImgWriter *writer;
// dump PBM file
if (!colorMap || (colorMap->getNumPixelComps() == 1 && colorMap->getBits() == 1)) {
writer = new NetPBMWriter(NetPBMWriter::MONOCHROME);
format = imgMonochrome;
......
......@@ -55,19 +55,30 @@ public:
};
enum ImageFormat {
imgRGB,
imgGray,
imgMonochrome
};
// Create an OutputDev which will write images to files named
// <fileRoot>-NNN.<type> or <fileRoot>-PPP-NNN.<type>, if
// <pageNames> is set. Normally, all images are written as PBM
// (.pbm) or PPM (.ppm) files. If <dumpJPEG> is set, JPEG images
// (.pbm) or PPM (.ppm) files unless PNG or Tiff output is enabled
// (PNG is used if both are enabled). If Jpeg is enabled, JPEG images
// are written as JPEG (.jpg) files.
ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEGA, GBool listImagesA);
ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool listImagesA);
// Destructor.
virtual ~ImageOutputDev();
// Use PNG format for output
void enablePNG(GBool png) { outputPNG = png; }
// Use TIFF format for output
void enableTiff(GBool tiff) { outputTiff = tiff; }
// Use Jpeg format for Jpeg files
void enableJpeg(GBool jpeg) { dumpJPEG = jpeg; }
// Check if file was successfully created.
virtual GBool isOk() { return ok; }
......@@ -143,6 +154,8 @@ private:
char *fileName; // buffer for output file names
GBool listImages; // list images instead of dumping
GBool dumpJPEG; // set to dump native JPEG files
GBool outputPNG; // set to output in PNG format
GBool outputTiff; // set to output in TIFF format
GBool pageNames; // set to include page number in file names
int pageNum; // current page number
int imgNum; // current image number
......
......@@ -10,17 +10,22 @@ pdfimages \- Portable Document Format (PDF) image extractor
.SH DESCRIPTION
.B Pdfimages
saves images from a Portable Document Format (PDF) file as Portable
Pixmap (PPM), Portable Bitmap (PBM), or JPEG files.
Pixmap (PPM), Portable Bitmap (PBM), Portable Network Graphics (PNG),
Tagged Image File Format (TIFF), or JPEG files.
.PP
Pdfimages reads the PDF file
.IR PDF-file ,
scans one or more pages, and writes one PPM, PBM, or JPEG file for each image,
scans one or more pages, and writes one file for each image,
.IR image-root - nnn . xxx ,
where
.I nnn
is the image number and
.I xxx
is the image type (.ppm, .pbm, .jpg).
is the image type (.ppm, .pbm, .png, .tif, or .jpg).
.PP
The default output format is PBM (for monochrome images) or PPM for non-monochrome. The
\-png or \-tiff options change to default output to PNG or TIFF respectively. In addition the \-j option
will cause JPEG images in the PDF file to be written in JPEG format.
.SH OPTIONS
.TP
.BI \-f " number"
......@@ -29,11 +34,14 @@ Specifies the first page to scan.
.BI \-l " number"
Specifies the last page to scan.
.TP
.B \-png
Change the default output format to PNG.
.TP
.B \-tiff
Change the default output format to TIFF.
.TP
.B \-j
Normally, all images are written as PBM (for monochrome images) or PPM
(for non-monochrome images) files. With this option, images in DCT
format are saved as JPEG files. All non-DCT images are saved in
PBM/PPM format as usual.
Write images in JPEG format as JPEG files instead of the default format. The JPEG file is identical to the JPEG data stored in the PDF.
.TP
.B \-list
Instead of writing the images, list the images along with various information for each image. Do not specify an
......
......@@ -50,6 +50,8 @@
static int firstPage = 1;
static int lastPage = 0;
static GBool listImages = gFalse;
static GBool enablePNG = gFalse;
static GBool enableTiff = gFalse;
static GBool dumpJPEG = gFalse;
static GBool pageNames = gFalse;
static char ownerPassword[33] = "\001";
......@@ -63,6 +65,14 @@ static const ArgDesc argDesc[] = {
"first page to convert"},
{"-l", argInt, &lastPage, 0,
"last page to convert"},
#if ENABLE_LIBPNG
{"-png", argFlag, &enablePNG, 0,
"change the default output format to PNG"},
#endif
#if ENABLE_LIBTIFF
{"-tiff", argFlag, &enableTiff, 0,
"change the default output format to TIFF"},
#endif
{"-j", argFlag, &dumpJPEG, 0,
"write JPEG images as JPEG files"},
{"-list", argFlag, &listImages, 0,
......@@ -168,10 +178,13 @@ int main(int argc, char *argv[]) {
lastPage = doc->getNumPages();
// write image files
imgOut = new ImageOutputDev(imgRoot, pageNames, dumpJPEG, listImages);
imgOut = new ImageOutputDev(imgRoot, pageNames, listImages);
if (imgOut->isOk()) {
doc->displayPages(imgOut, firstPage, lastPage, 72, 72, 0,
gTrue, gFalse, gFalse);
imgOut->enablePNG(enablePNG);
imgOut->enableTiff(enableTiff);
imgOut->enableJpeg(dumpJPEG);
doc->displayPages(imgOut, firstPage, lastPage, 72, 72, 0,
gTrue, gFalse, gFalse);
}
delete imgOut;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment