Commit 521d3740 authored by Adrian Johnson's avatar Adrian Johnson

pdfimages: add -list option to list all images

Bug 46066
parent 119b6b75
......@@ -44,19 +44,29 @@
#include "Stream.h"
#include "ImageOutputDev.h"
ImageOutputDev::ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEGA) {
fileRoot = copyString(fileRootA);
fileName = (char *)gmalloc(strlen(fileRoot) + 45);
ImageOutputDev::ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEGA, GBool listImagesA) {
listImages = listImagesA;
if (!listImages) {
fileRoot = copyString(fileRootA);
fileName = (char *)gmalloc(strlen(fileRoot) + 45);
}
dumpJPEG = dumpJPEGA;
pageNames = pageNamesA;
imgNum = 0;
pageNum = 0;
ok = gTrue;
if (listImages) {
printf("page num type width height color comp bpc enc interp object ID\n");
printf("---------------------------------------------------------------------\n");
}
}
ImageOutputDev::~ImageOutputDev() {
gfree(fileName);
gfree(fileRoot);
if (!listImages) {
gfree(fileName);
gfree(fileRoot);
}
}
void ImageOutputDev::setFilename(const char *fileExt) {
......@@ -67,18 +77,124 @@ void ImageOutputDev::setFilename(const char *fileExt) {
}
}
GBool ImageOutputDev::tilingPatternFill(GfxState *state, Gfx *gfx, Catalog *cat, Object *str,
double *pmat, int paintType, int tilingType, Dict *resDict,
double *mat, double *bbox,
int x0, int y0, int x1, int y1,
double xStep, double yStep) {
return gTrue;
// do nothing -- this avoids the potentially slow loop in Gfx.cc
void ImageOutputDev::listImage(GfxState *state, Object *ref, Stream *str,
int width, int height,
GfxImageColorMap *colorMap,
GBool interpolate, GBool inlineImg,
ImageType imageType) {
const char *type;
const char *colorspace;
const char *enc;
int components, bpc;
printf("%4d %5d ", pageNum, imgNum);
type = "";
switch (imageType) {
case imgImage:
type = "image";
break;
case imgStencil:
type = "stencil";
break;
case imgMask:
type = "mask";
break;
case imgSmask:
type = "smask";
break;
}
printf("%-7s %5d %5d ", type, width, height);
colorspace = "-";
/* masks and stencils default to ncomps = 1 and bpc = 1 */
components = 1;
bpc = 1;
if (colorMap && colorMap->isOk()) {
switch (colorMap->getColorSpace()->getMode()) {
case csDeviceGray:
case csCalGray:
colorspace = "gray";
break;
case csDeviceRGB:
case csCalRGB:
colorspace = "rgb";
break;
case csDeviceCMYK:
colorspace = "cmyk";
break;
case csLab:
colorspace = "lab";
break;
case csICCBased:
colorspace = "icc";
break;
case csIndexed:
colorspace = "index";
break;
case csSeparation:
colorspace = "sep";
break;
case csDeviceN:
colorspace = "devn";
break;
case csPattern:
default:
colorspace = "-";
break;
}
components = colorMap->getNumPixelComps();
bpc = colorMap->getBits();
}
printf("%-5s %2d %2d ", colorspace, components, bpc);
switch (str->getKind()) {
case strCCITTFax:
enc = "ccitt";
break;
case strDCT:
enc = "jpeg";
break;
case strJPX:
enc = "jpx";
break;
case strJBIG2:
enc = "jbig2";
break;
case strFile:
case strFlate:
case strCachedFile:
case strASCIIHex:
case strASCII85:
case strLZW:
case strRunLength:
case strWeird:
default:
enc = "image";
break;
}
printf("%-5s ", enc);
printf("%-3s ", interpolate ? "yes" : "no");
if (inlineImg) {
printf("[inline]\n");
} else if (ref->isRef()) {
const Ref imageRef = ref->getRef();
if (imageRef.gen >= 100000) {
printf("[none]\n");
} else {
printf(" %6d %2d\n", imageRef.num, imageRef.gen);
}
} else {
printf("[none]\n");
}
++imgNum;
}
void ImageOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str,
int width, int height, GBool invert,
GBool interpolate, GBool inlineImg) {
void ImageOutputDev::writeMask(GfxState *state, Object *ref, Stream *str,
int width, int height, GBool invert,
GBool interpolate, GBool inlineImg) {
FILE *f;
int c;
int size, i;
......@@ -132,10 +248,10 @@ void ImageOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str,
}
}
void ImageOutputDev::drawImage(GfxState *state, Object *ref, Stream *str,
int width, int height,
GfxImageColorMap *colorMap,
GBool interpolate, int *maskColors, GBool inlineImg) {
void ImageOutputDev::writeImage(GfxState *state, Object *ref, Stream *str,
int width, int height,
GfxImageColorMap *colorMap,
GBool interpolate, int *maskColors, GBool inlineImg) {
FILE *f;
ImageStream *imgStr;
Guchar *p;
......@@ -250,13 +366,46 @@ void ImageOutputDev::drawImage(GfxState *state, Object *ref, Stream *str,
}
}
GBool ImageOutputDev::tilingPatternFill(GfxState *state, Gfx *gfx, Catalog *cat, Object *str,
double *pmat, int paintType, int tilingType, Dict *resDict,
double *mat, double *bbox,
int x0, int y0, int x1, int y1,
double xStep, double yStep) {
return gTrue;
// do nothing -- this avoids the potentially slow loop in Gfx.cc
}
void ImageOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str,
int width, int height, GBool invert,
GBool interpolate, GBool inlineImg) {
if (listImages)
listImage(state, ref, str, width, height, NULL, interpolate, inlineImg, imgMask);
else
writeMask(state, ref, str, width, height, invert, interpolate, inlineImg);
}
void ImageOutputDev::drawImage(GfxState *state, Object *ref, Stream *str,
int width, int height,
GfxImageColorMap *colorMap,
GBool interpolate, int *maskColors, GBool inlineImg) {
if (listImages)
listImage(state, ref, str, width, height, colorMap, interpolate, inlineImg, imgImage);
else
writeImage(state, ref, str, width, height, colorMap, interpolate, maskColors, inlineImg);
}
void ImageOutputDev::drawMaskedImage(
GfxState *state, Object *ref, Stream *str,
int width, int height, GfxImageColorMap *colorMap, GBool interpolate,
Stream *maskStr, int maskWidth, int maskHeight, GBool maskInvert, GBool maskInterpolate) {
drawImage(state, ref, str, width, height, colorMap, interpolate, NULL, gFalse);
drawImageMask(state, ref, maskStr, maskWidth, maskHeight, maskInvert,
maskInterpolate, gFalse);
if (listImages) {
listImage(state, ref, str, width, height, colorMap, interpolate, gFalse, imgImage);
listImage(state, ref, str, maskWidth, maskHeight, NULL, maskInterpolate, gFalse, imgMask);
} else {
drawImage(state, ref, str, width, height, colorMap, interpolate, NULL, gFalse);
drawImageMask(state, ref, maskStr, maskWidth, maskHeight, maskInvert,
maskInterpolate, gFalse);
}
}
void ImageOutputDev::drawSoftMaskedImage(
......@@ -264,7 +413,12 @@ void ImageOutputDev::drawSoftMaskedImage(
int width, int height, GfxImageColorMap *colorMap, GBool interpolate,
Stream *maskStr, int maskWidth, int maskHeight,
GfxImageColorMap *maskColorMap, GBool maskInterpolate) {
drawImage(state, ref, str, width, height, colorMap, interpolate, NULL, gFalse);
drawImage(state, ref, maskStr, maskWidth, maskHeight,
maskColorMap, maskInterpolate, NULL, gFalse);
if (listImages) {
listImage(state, ref, str, width, height, colorMap, interpolate, gFalse, imgImage);
listImage(state, ref, maskStr, maskWidth, height, maskColorMap, maskInterpolate, gFalse, imgSmask);
} else {
drawImage(state, ref, str, width, height, colorMap, interpolate, NULL, gFalse);
drawImage(state, ref, maskStr, maskWidth, maskHeight,
maskColorMap, maskInterpolate, NULL, gFalse);
}
}
......@@ -44,13 +44,19 @@ class GfxState;
class ImageOutputDev: public OutputDev {
public:
enum ImageType {
imgImage,
imgStencil,
imgMask,
imgSmask
};
// Create an OutputDev which will write images to files named
// <fileRoot>-NNN.<type> or <fileRoot>-PPP-NNN.<type>, if
// <pageNames> is set. Normally, all images are written as PBM
// (.pbm) or PPM (.ppm) files. If <dumpJPEG> is set, JPEG images
// are written as JPEG (.jpg) files.
ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEGA);
ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEGA, GBool listImagesA);
// Destructor.
virtual ~ImageOutputDev();
......@@ -115,10 +121,22 @@ public:
private:
// Sets the output filename with a given file extension
void setFilename(const char *fileExt);
void listImage(GfxState *state, Object *ref, Stream *str,
int width, int height,
GfxImageColorMap *colorMap,
GBool interpolate, GBool inlineImg,
ImageType imageType);
void writeMask(GfxState *state, Object *ref, Stream *str,
int width, int height, GBool invert,
GBool interpolate, GBool inlineImg);
void writeImage(GfxState *state, Object *ref, Stream *str,
int width, int height, GfxImageColorMap *colorMap,
GBool interpolate, int *maskColors, GBool inlineImg);
char *fileRoot; // root of output file names
char *fileName; // buffer for output file names
GBool listImages; // list images instead of dumping
GBool dumpJPEG; // set to dump native JPEG files
GBool pageNames; // set to include page number in file names
int pageNum; // current page number
......
......@@ -35,6 +35,107 @@ Normally, all images are written as PBM (for monochrome images) or PPM
format are saved as JPEG files. All non-DCT images are saved in
PBM/PPM format as usual.
.TP
.B \-list
Instead of writing the images, list the images along with various information for each image. Do not specify an
.IR image-root
with this option.
.IP
The following information is listed for each font:
.RS
.TP
.B page
the page number containing the image
.TP
.B num
the image number
.TP
.B type
the image type:
.PP
.RS
image - an opaque image
.RE
.RS
mask - a monochrome mask image
.RE
.RS
smask - a soft-mask image
.RE
.RS
stencil - a monochrome mask image used for painting a color or pattern
.RE
.PP
Note: Tranparency in images is represented in PDF using a separate image for the image and the mask/smask.
The mask/smask used as part of a transparent image always immediately follows the image in the image list.
.TP
.B width
image width (in pixels)
.TP
.B height
image height (in pixels)
.PP
Note: the image width/height is the size of the embedded image, not the size the image will be rendered at.
.TP
.B color
image color space:
.PP
.RS
gray - Gray
.RE
.RS
rgb - RGB
.RE
.RS
cmyk - CMYK
.RE
.RS
lab - L*a*b
.RE
.RS
icc - ICC Based
.RE
.RS
index - Indexed Color
.RE
.RS
sep - Separation
.RE
.RS
devn - DeviceN
.RE
.TP
.B comp
number of color components
.TP
.B bpc
bits per component
.TP
.B enc
encoding:
.PP
.RS
image - raster image (may be Flate or LZW compressed but does not use an image encoding)
.RE
.RS
jpeg - Joint Photographic Experts Group
.RE
.RS
jp2 - JPEG2000
.RE
.RS
jbig2 - Joint Bi-Level Image Experts Group
.RE
.RS
ccitt - CCITT Group 3 or Group 4 Fax
.RE
.TP
.B interp
"yes" if the interpolation is to be performed when scaling up the image
.TP
.B object ID
the font dictionary object ID (number and generation)
.RE
.TP
.BI \-opw " password"
Specify the owner password for the PDF file. Providing this will
bypass all security restrictions.
......
......@@ -48,6 +48,7 @@
static int firstPage = 1;
static int lastPage = 0;
static GBool listImages = gFalse;
static GBool dumpJPEG = gFalse;
static GBool pageNames = gFalse;
static char ownerPassword[33] = "\001";
......@@ -63,6 +64,8 @@ static const ArgDesc argDesc[] = {
"last page to convert"},
{"-j", argFlag, &dumpJPEG, 0,
"write JPEG images as JPEG files"},
{"-list", argFlag, &listImages, 0,
"print list of images instead of saving"},
{"-opw", argString, ownerPassword, sizeof(ownerPassword),
"owner password (for encrypted files)"},
{"-upw", argString, userPassword, sizeof(userPassword),
......@@ -87,7 +90,7 @@ static const ArgDesc argDesc[] = {
int main(int argc, char *argv[]) {
PDFDoc *doc;
GooString *fileName;
char *imgRoot;
char *imgRoot = NULL;
GooString *ownerPW, *userPW;
ImageOutputDev *imgOut;
GBool ok;
......@@ -97,7 +100,7 @@ int main(int argc, char *argv[]) {
// parse args
ok = parseArgs(argDesc, &argc, argv);
if (!ok || argc != 3 || printVersion || printHelp) {
if (!ok || (listImages && argc != 2) || (!listImages && argc != 3) || printVersion || printHelp) {
fprintf(stderr, "pdfimages version %s\n", PACKAGE_VERSION);
fprintf(stderr, "%s\n", popplerCopyright);
fprintf(stderr, "%s\n", xpdfCopyright);
......@@ -109,7 +112,8 @@ int main(int argc, char *argv[]) {
goto err0;
}
fileName = new GooString(argv[1]);
imgRoot = argv[2];
if (!listImages)
imgRoot = argv[2];
// read config file
globalParams = new GlobalParams();
......@@ -163,7 +167,7 @@ int main(int argc, char *argv[]) {
lastPage = doc->getNumPages();
// write image files
imgOut = new ImageOutputDev(imgRoot, pageNames, dumpJPEG);
imgOut = new ImageOutputDev(imgRoot, pageNames, dumpJPEG, listImages);
if (imgOut->isOk()) {
doc->displayPages(imgOut, firstPage, lastPage, 72, 72, 0,
gTrue, gFalse, gFalse);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment