pdfinfo.cc 29 KB
Newer Older
1 2 3 4 5
//========================================================================
//
// pdfinfo.cc
//
// Copyright 1998-2003 Glyph & Cog, LLC
6
// Copyright 2013 Igalia S.L.
7 8 9
//
//========================================================================

10 11 12 13
//========================================================================
//
// Modified under the Poppler project - http://poppler.freedesktop.org
//
14 15 16
// All changes made under the Poppler project to this file are licensed
// under GPL version 2 or later
//
17
// Copyright (C) 2006 Dom Lachowicz <cinamod@hotmail.com>
18
// Copyright (C) 2007-2010, 2012, 2016-2018 Albert Astals Cid <aacid@kde.org>
19
// Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
20
// Copyright (C) 2011 Vittal Aithal <vittal.aithal@cognidox.com>
Albert Astals Cid's avatar
Albert Astals Cid committed
21
// Copyright (C) 2012, 2013, 2016-2018 Adrian Johnson <ajohnson@redneon.com>
Albert Astals Cid's avatar
Albert Astals Cid committed
22
// Copyright (C) 2012 Fabio D'Urso <fabiodurso@hotmail.it>
Albert Astals Cid's avatar
Albert Astals Cid committed
23
// Copyright (C) 2013 Adrian Perez de Castro <aperez@igalia.com>
24
// Copyright (C) 2013 Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp>
25
// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
26
// Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
27
// Copyright (C) 2018 Evangelos Rigas <erigas@rnd2.org>
28 29 30 31 32 33
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
//
//========================================================================

34
#include "config.h"
35 36 37 38 39 40 41
#include <poppler-config.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include <time.h>
#include <math.h>
42
#include <map>
43
#include "parseargs.h"
44
#include "printencodings.h"
45
#include "goo/GooString.h"
46
#include "goo/gfile.h"
Adrian Johnson's avatar
Adrian Johnson committed
47
#include "goo/glibc.h"
48 49 50 51 52 53 54 55 56 57
#include "goo/gmem.h"
#include "GlobalParams.h"
#include "Object.h"
#include "Stream.h"
#include "Array.h"
#include "Dict.h"
#include "XRef.h"
#include "Catalog.h"
#include "Page.h"
#include "PDFDoc.h"
Hib Eris's avatar
Hib Eris committed
58
#include "PDFDocFactory.h"
59 60
#include "CharTypes.h"
#include "UnicodeMap.h"
61
#include "UTF.h"
62
#include "Error.h"
63
#include "DateInfo.h"
64
#include "JSInfo.h"
65 66
#include "StructTreeRoot.h"
#include "StructElement.h"
67
#include "Win32Console.h"
68 69 70 71 72 73


static int firstPage = 1;
static int lastPage = 0;
static GBool printBoxes = gFalse;
static GBool printMetadata = gFalse;
74
static GBool printJS = gFalse;
75
static GBool isoDates = gFalse;
76
static GBool rawDates = gFalse;
77 78 79 80 81
static char textEncName[128] = "";
static char ownerPassword[33] = "\001";
static char userPassword[33] = "\001";
static GBool printVersion = gFalse;
static GBool printHelp = gFalse;
82
static GBool printEnc = gFalse;
83 84
static GBool printStructure = gFalse;
static GBool printStructureText = gFalse;
85
static GBool printDests = gFalse;
86

87
static const ArgDesc argDesc[] = {
88 89 90 91 92 93 94 95
  {"-f",      argInt,      &firstPage,        0,
   "first page to convert"},
  {"-l",      argInt,      &lastPage,         0,
   "last page to convert"},
  {"-box",    argFlag,     &printBoxes,       0,
   "print the page bounding boxes"},
  {"-meta",   argFlag,     &printMetadata,    0,
   "print the document metadata (XML)"},
96 97
  {"-js",     argFlag,     &printJS,          0,
   "print all JavaScript in the PDF"},
98 99 100 101
  {"-struct", argFlag,     &printStructure,   0,
   "print the logical document structure (for tagged files)"},
  {"-struct-text", argFlag, &printStructureText, 0,
   "print text contents along with document structure (for tagged files)"},
102 103
  {"-isodates", argFlag,   &isoDates,         0,
   "print the dates in ISO-8601 format"},
104 105
  {"-rawdates", argFlag,   &rawDates,         0,
   "print the undecoded date strings directly from the PDF file"},
106 107
  {"-dests",     argFlag,  &printDests,       0,
   "print all named destinations in the PDF"},
108 109
  {"-enc",    argString,   textEncName,    sizeof(textEncName),
   "output text encoding name"},
110 111
  {"-listenc",argFlag,     &printEnc,      0,
   "list available encodings"},
112 113 114 115 116 117 118 119 120 121 122 123 124 125
  {"-opw",    argString,   ownerPassword,  sizeof(ownerPassword),
   "owner password (for encrypted files)"},
  {"-upw",    argString,   userPassword,   sizeof(userPassword),
   "user password (for encrypted files)"},
  {"-v",      argFlag,     &printVersion,  0,
   "print copyright and version info"},
  {"-h",      argFlag,     &printHelp,     0,
   "print usage information"},
  {"-help",   argFlag,     &printHelp,     0,
   "print usage information"},
  {"--help",  argFlag,     &printHelp,     0,
   "print usage information"},
  {"-?",      argFlag,     &printHelp,     0,
   "print usage information"},
126
  {}
127 128
};

129 130
static void printInfoString(Dict *infoDict, const char *key, const char *text,
			    UnicodeMap *uMap) {
131
  const GooString *s1;
132 133 134
  Unicode *u;
  char buf[8];
  int i, n, len;
135

Albert Astals Cid's avatar
Albert Astals Cid committed
136 137
  Object obj = infoDict->lookup(key);
  if (obj.isString()) {
138 139 140 141 142 143
    fputs(text, stdout);
    s1 = obj.getString();
    len = TextStringToUCS4(s1, &u);
    for (i = 0; i < len; i++) {
      n = uMap->mapUnicode(u[i], buf, sizeof(buf));
      fwrite(buf, 1, n, stdout);
144
    }
145 146
    gfree(u);
    fputc('\n', stdout);
147
  }
148
}
149

150
static void printInfoDate(Dict *infoDict, const char *key, const char *text) {
151
  const char *s;
152 153 154 155 156
  int year, mon, day, hour, min, sec, tz_hour, tz_minute;
  char tz;
  struct tm tmStruct;
  time_t time;
  char buf[256];
157

Albert Astals Cid's avatar
Albert Astals Cid committed
158 159
  Object obj = infoDict->lookup(key);
  if (obj.isString()) {
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
    fputs(text, stdout);
    s = obj.getString()->getCString();
    // TODO do something with the timezone info
    if ( parseDateString( s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute ) ) {
      tmStruct.tm_year = year - 1900;
      tmStruct.tm_mon = mon - 1;
      tmStruct.tm_mday = day;
      tmStruct.tm_hour = hour;
      tmStruct.tm_min = min;
      tmStruct.tm_sec = sec;
      tmStruct.tm_wday = -1;
      tmStruct.tm_yday = -1;
      tmStruct.tm_isdst = -1;
      // compute the tm_wday and tm_yday fields
      time = timegm(&tmStruct);
      if (time != (time_t)-1) {
	int offset = (tz_hour*60 + tz_minute)*60;
	if (tz == '-')
	  offset *= -1;
	time -= offset;
	localtime_r(&time, &tmStruct);
	strftime(buf, sizeof(buf), "%c %Z", &tmStruct);
	fputs(buf, stdout);
      } else {
	fputs(s, stdout);
      }
    } else {
      fputs(s, stdout);
    }
    fputc('\n', stdout);
190
  }
191
}
192

193
static void printISODate(Dict *infoDict, const char *key, const char *text)
194
{
195
  const char *s;
196 197
  int year, mon, day, hour, min, sec, tz_hour, tz_minute;
  char tz;
198

Albert Astals Cid's avatar
Albert Astals Cid committed
199 200
  Object obj = infoDict->lookup(key);
  if (obj.isString()) {
201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
    fputs(text, stdout);
    s = obj.getString()->getCString();
    if ( parseDateString( s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute ) ) {
      fprintf(stdout, "%04d-%02d-%02dT%02d:%02d:%02d", year, mon, day, hour, min, sec);
      if (tz_hour == 0 && tz_minute == 0) {
	fprintf(stdout, "Z");
      } else {
	fprintf(stdout, "%c%02d", tz, tz_hour);
	if (tz_minute)
	  fprintf(stdout, ":%02d", tz_minute);
      }
    } else {
      fputs(s, stdout);
    }
    fputc('\n', stdout);
216
  }
217
}
218

219 220 221 222
static void printBox(const char *text, PDFRectangle *box) {
  printf("%s%8.2f %8.2f %8.2f %8.2f\n",
	 text, box->x1, box->y1, box->x2, box->y2);
}
223

224 225 226 227
static void printIndent(unsigned indent) {
  while (indent--) {
    putchar(' ');
    putchar(' ');
228
  }
229
}
230

231 232 233 234 235 236 237 238
static void printAttribute(const Attribute *attribute, unsigned indent)
{
  printIndent(indent);
  printf(" /%s ", attribute->getTypeName());
  if (attribute->getType() == Attribute::UserProperty) {
    GooString *name = attribute->getName();
    printf("(%s) ", name->getCString());
    delete name;
239
  }
240 241 242
  attribute->getValue()->print(stdout);
  if (attribute->getFormattedValue()) {
    printf(" \"%s\"", attribute->getFormattedValue());
243
  }
244 245
  if (attribute->isHidden()) {
    printf(" [hidden]");
246
  }
247
}
248

249 250 251 252 253
static void printStruct(const StructElement *element, unsigned indent) {
  if (element->isObjectRef()) {
    printIndent(indent);
    printf("Object %i %i\n", element->getObjectRef().num, element->getObjectRef().gen);
    return;
254
  }
255 256 257 258 259 260 261 262 263 264

  if (printStructureText && element->isContent()) {
    GooString *text = element->getText(gFalse);
    printIndent(indent);
    if (text) {
      printf("\"%s\"\n", text->getCString());
    } else {
      printf("(No content?)\n");
    }
    delete text;
265
  }
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293

  if (!element->isContent()) {
      printIndent(indent);
      printf("%s", element->getTypeName());
      if (element->getID()) {
          printf(" <%s>", element->getID()->getCString());
      }
      if (element->getTitle()) {
          printf(" \"%s\"", element->getTitle()->getCString());
      }
      if (element->getRevision() > 0) {
          printf(" r%u", element->getRevision());
      }
      if (element->isInline() || element->isBlock()) {
          printf(" (%s)", element->isInline() ? "inline" : "block");
      }
      if (element->getNumAttributes()) {
          putchar(':');
          for (unsigned i = 0; i < element->getNumAttributes(); i++) {
              putchar('\n');
              printAttribute(element->getAttribute(i), indent + 1);
          }
      }

      putchar('\n');
      for (unsigned i = 0; i < element->getNumChildren(); i++) {
          printStruct(element->getChild(i), indent + 1);
      }
294
  }
295 296
}

297 298 299 300 301 302 303
struct GooStringCompare {
  bool operator() (GooString* lhs, GooString* rhs) const {
    return lhs->cmp(const_cast<GooString*>(rhs)) < 0;
  }
};

static void printLinkDest(LinkDest *dest) {
304
  GooString s;
305 306 307

  switch (dest->getKind()) {
    case destXYZ:
308
      s.append("[ XYZ ");
309
      if (dest->getChangeLeft()) {
310
	s.appendf("{0:4.0g} ", dest->getLeft());
311
      } else {
312
	s.append("null ");
313 314
      }
      if (dest->getChangeTop()) {
315
	s.appendf("{0:4.0g} ", dest->getTop());
316
      } else {
317
	s.append("null ");
318 319
      }
      if (dest->getChangeZoom()) {
320
	s.appendf("{0:4.2f} ", dest->getZoom());
321
      } else {
322
	s.append("null ");
323 324 325
      }
      break;
    case destFit:
326
      s.append("[ Fit ");
327 328 329
      break;
    case destFitH:
      if (dest->getChangeTop()) {
330
	s.appendf("[ FitH {0:4.0g} ", dest->getTop());
331
      } else {
332
	s.append("[ FitH null ");
333 334 335 336
      }
      break;
    case destFitV:
      if (dest->getChangeLeft()) {
337
	s.appendf("[ FitV {0:4.0g} ", dest->getLeft());
338
      } else {
339
	s.append("[ FitV null ");
340 341 342
      }
      break;
    case destFitR:
343
      s.appendf("[ FitR {0:4.0g} {1:4.0g} {2:4.0g} {3:4.0g} ",
344 345 346 347 348 349
	      dest->getLeft(),
	      dest->getBottom(),
	      dest->getRight(),
	      dest->getTop());
      break;
    case destFitB:
350
      s.append("[ FitB ");
351 352 353
      break;
    case destFitBH:
      if (dest->getChangeTop()) {
354
	s.appendf("[ FitBH {0:4.0g} ", dest->getTop());
355
      } else {
356
	s.append("[ FitBH null ");
357 358 359 360
      }
      break;
    case destFitBV:
      if (dest->getChangeLeft()) {
361
	s.appendf("[ FitBV {0:4.0g} ", dest->getLeft());
362
      } else {
363
	s.append("[ FitBV null ");
364 365 366 367
      }
      break;
  }

368 369 370 371
  s.append("                                ");
  s.setChar(26, ']');
  s.setChar(27, '\0');
  printf("%s", s.getCString());
372 373 374
}

static void printDestinations(PDFDoc *doc, UnicodeMap *uMap) {
375
  std::map<Ref,std::map<GooString*,LinkDest*,GooStringCompare> > map;
376 377 378

  int numDests = doc->getCatalog()->numDestNameTree();
  for (int i = 0; i < numDests; i++) {
379
    GooString *name = new GooString(doc->getCatalog()->getDestNameTreeName(i));
380
    LinkDest *dest = doc->getCatalog()->getDestNameTreeDest(i);
381
    if (dest && dest->isPageRef()) {
382
      map[dest->getPageRef()].insert(std::make_pair(name, dest));
383 384 385
    } else {
      delete name;
      delete dest;
386 387 388 389 390 391 392
    }
  }

  numDests = doc->getCatalog()->numDests();
  for (int i = 0; i < numDests; i++) {
    GooString *name = new GooString(doc->getCatalog()->getDestsName(i));
    LinkDest *dest = doc->getCatalog()->getDestsDest(i);
393
    if (dest && dest->isPageRef()) {
394
      map[dest->getPageRef()].insert(std::make_pair(name, dest));
395 396 397
    } else {
      delete name;
      delete dest;
398 399 400 401 402 403
    }
  }

  printf("Page  Destination                 Name\n");
  for (int i = firstPage; i <= lastPage; i++) {
    Ref *ref = doc->getCatalog()->getPageRef(i);
404 405 406 407 408 409 410 411 412
    if (ref) {
      auto pageDests = map.find(*ref);
      if (pageDests != map.end()) {
	for (auto& it: pageDests->second) {
	  printf("%4d ", i);
	  printLinkDest(it.second);
	  printf(" \"");
	  Unicode *u;
	  char buf[8];
413 414 415
	  const int len = TextStringToUCS4(it.first, &u);
	  for (int j = 0; j < len; j++) {
	    const int n = uMap->mapUnicode(u[j], buf, sizeof(buf));
416 417 418 419 420 421
	    fwrite(buf, 1, n, stdout);
	  }
	  gfree(u);
	  printf("\"\n");
	  delete it.first;
	  delete it.second;
422 423 424 425 426 427
	}
      }
    }
  }
}

428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662
static void printPdfSubtype(PDFDoc *doc, UnicodeMap *uMap) {
  const Object info = doc->getDocInfo();
  if (info.isDict()) {
    const PDFSubtype pdftype = doc->getPDFSubtype();

    if ((pdftype == subtypeNull) | (pdftype == subtypeNone)) {
      return;
    }

    std::unique_ptr<GooString> part;
    std::unique_ptr<GooString> abbr;
    std::unique_ptr<GooString> standard;
    std::unique_ptr<GooString> typeExp;
    std::unique_ptr<GooString> confExp;

    // Form title from PDFSubtype
    switch (pdftype)
    {
      case subtypePDFA:
        printInfoString(info.getDict(), "GTS_PDFA1Version", "PDF subtype:    ", uMap);
        typeExp.reset( new GooString("ISO 19005 - Electronic document file format for long-term preservation (PDF/A)") );
        standard.reset(  new GooString("ISO 19005") );
        abbr.reset( new GooString("PDF/A") );
        break;
      case subtypePDFE:
        printInfoString(info.getDict(), "GTS_PDFEVersion", "PDF subtype:    ", uMap);
        typeExp.reset( new GooString("ISO 24517 - Engineering document format using PDF (PDF/E)") );
        standard.reset( new GooString("ISO 24517") );
        abbr.reset( new GooString("PDF/E") );
        break;
      case subtypePDFUA:
        printInfoString(info.getDict(), "GTS_PDFUAVersion", "PDF subtype:    ", uMap);
        typeExp.reset( new GooString("ISO 14289 - Electronic document file format enhancement for accessibility (PDF/UA)") );
        standard.reset( new GooString("ISO 14289") );
        abbr.reset( new GooString("PDF/UA") );
        break;
      case subtypePDFVT:
        printInfoString(info.getDict(), "GTS_PDFVTVersion", "PDF subtype:    ", uMap);
        typeExp.reset( new GooString("ISO 16612 - Electronic document file format for variable data exchange (PDF/VT)") );
        standard.reset( new GooString("ISO 16612") );
        abbr.reset( new GooString("PDF/VT") );
        break;
      case subtypePDFX:
        printInfoString(info.getDict(), "GTS_PDFXVersion", "PDF subtype:    ", uMap);
        typeExp.reset( new GooString("ISO 15930 - Electronic document file format for prepress digital data exchange (PDF/X)") );
        standard.reset( new GooString("ISO 15930") );
        abbr.reset( new GooString("PDF/X") );
        break;
      case subtypeNone:
      case subtypeNull:
      default:
        return;
    }

    // Form the abbreviation from PDFSubtypePart and PDFSubtype
    const PDFSubtypePart subpart = doc->getPDFSubtypePart();
    switch (pdftype) {
      case subtypePDFX:
        switch (subpart) {
          case subtypePart1:
            abbr->append("-1:2001");
            break;
          case subtypePart2:
            abbr->append("-2");
            break;
          case subtypePart3:
            abbr->append("-3:2002");
            break;
          case subtypePart4:
            abbr->append("-1:2003");
            break;
          case subtypePart5:
            abbr->append("-2");
            break;
          case subtypePart6:
            abbr->append("-3:2003");
            break;
          case subtypePart7:
            abbr->append("-4");
            break;
          case subtypePart8:
            abbr->append("-5");
            break;
          default:
            break;
        }
        break;
      case subtypeNone:
      case subtypeNull:
        break;
      default:
        abbr->appendf("-{0:d}", subpart);
        break;
    }

    // Form standard from PDFSubtypePart
    switch (subpart) {
      case subtypePartNone:
      case subtypePartNull:
        break;
      default:
        standard->appendf("-{0:d}", subpart);
        break;
    }

    // Form the subtitle from PDFSubtypePart and PDFSubtype
    switch (pdftype) {
      case subtypePDFA:
          switch (subpart) {
          case subtypePart1:
            part.reset( new GooString("Use of PDF 1.4") );
            break;
          case subtypePart2:
            part.reset( new GooString("Use of ISO 32000-1") );
            break;
          case subtypePart3:
            part.reset( new GooString("Use of ISO 32000-1 with support for embedded files") );
            break;
          default:
            break;
          }
          break;
      case subtypePDFE:
        switch (subpart) {
          case subtypePart1:
            part.reset( new GooString("Use of PDF 1.6") );
            break;
          default:
            break;
          }
          break;
      case subtypePDFUA:
        switch (subpart) {
          case subtypePart1:
            part.reset( new GooString("Use of ISO 32000-1") );
            break;
          case subtypePart2:
            part.reset( new GooString("Use of ISO 32000-2") );
            break;
          case subtypePart3:
            part.reset( new GooString("Use of ISO 32000-1 with support for embedded files") );
            break;
          default:
            break;
          }
          break;
      case subtypePDFVT:
        switch (subpart) {
          case subtypePart1:
            part.reset( new GooString("Using PPML 2.1 and PDF 1.4") );
            break;
          case subtypePart2:
            part.reset( new GooString("Using PDF/X-4 and PDF/X-5 (PDF/VT-1 and PDF/VT-2)") );
            break;
          case subtypePart3:
            part.reset( new GooString("Using PDF/X-6 (PDF/VT-3)") );
            break;
          default:
            break;
          }
          break;
      case subtypePDFX:
        switch (subpart) {
          case subtypePart1:
            part.reset( new GooString("Complete exchange using CMYK data (PDF/X-1 and PDF/X-1a)") );
            break;
          case subtypePart3:
            part.reset( new GooString("Complete exchange suitable for colour-managed workflows (PDF/X-3)") );
            break;
          case subtypePart4:
            part.reset( new GooString("Complete exchange of CMYK and spot colour printing data using PDF 1.4 (PDF/X-1a)") );
            break;
          case subtypePart5:
            part.reset( new GooString("Partial exchange of printing data using PDF 1.4 (PDF/X-2) [Withdrawn]") );
            break;
          case subtypePart6:
            part.reset( new GooString("Complete exchange of printing data suitable for colour-managed workflows using PDF 1.4 (PDF/X-3)") );
            break;
          case subtypePart7:
            part.reset( new GooString("Complete exchange of printing data (PDF/X-4) and partial exchange of printing data with external profile reference (PDF/X-4p) using PDF 1.6") );
            break;
          case subtypePart8:
            part.reset( new GooString("Partial exchange of printing data using PDF 1.6 (PDF/X-5)") );
            break;
          default:
            break;
          }
          break;
      default:
        break;
    }

    // Form Conformance explanation from PDFSubtypeConformance
    switch (doc->getPDFSubtypeConformance())
    {
      case subtypeConfA:
        confExp.reset( new GooString("Level A, Accessible") );
        break;
      case subtypeConfB:
        confExp.reset( new GooString("Level B, Basic") );
        break;
      case subtypeConfG:
        confExp.reset( new GooString("Level G, External graphical content") );
        break;
      case subtypeConfN:
        confExp.reset( new GooString("Level N, External ICC profile") );
        break;
      case subtypeConfP:
        confExp.reset( new GooString("Level P, Embedded ICC profile") );
        break;
      case subtypeConfPG:
        confExp.reset( new GooString("Level PG, Embedded ICC profile and external graphical content") );
        break;
      case subtypeConfU:
        confExp.reset( new GooString("Level U, Unicode support") );
        break;
      case subtypeConfNone:
      case subtypeConfNull:
      default:
        confExp.reset();
        break;
    }

    printf("    Title:         %s\n",typeExp->getCString());
    printf("    Abbreviation:  %s\n", abbr->getCString());
    if (part.get())
      printf("    Subtitle:      Part %d: %s\n", subpart, part->getCString());
    else
      printf("    Subtitle:      Part %d\n", subpart);
    printf("    Standard:      %s-%d\n", typeExp->toStr().substr(0,9).c_str(), subpart);
    if (confExp.get())
      printf("    Conformance:   %s\n", confExp->getCString());
  }
}

663
static void printInfo(PDFDoc *doc, UnicodeMap *uMap, long long filesize, GBool multiPage) {
664 665 666 667 668
  Page *page;
  char buf[256];
  double w, h, wISO, hISO;
  int pg, i;
  int r;
669 670

  // print doc info
Albert Astals Cid's avatar
Albert Astals Cid committed
671
  Object info = doc->getDocInfo();
672 673 674 675 676 677 678
  if (info.isDict()) {
    printInfoString(info.getDict(), "Title",        "Title:          ", uMap);
    printInfoString(info.getDict(), "Subject",      "Subject:        ", uMap);
    printInfoString(info.getDict(), "Keywords",     "Keywords:       ", uMap);
    printInfoString(info.getDict(), "Author",       "Author:         ", uMap);
    printInfoString(info.getDict(), "Creator",      "Creator:        ", uMap);
    printInfoString(info.getDict(), "Producer",     "Producer:       ", uMap);
679 680 681 682
    if (isoDates) {
      printISODate(info.getDict(),   "CreationDate", "CreationDate:   ");
      printISODate(info.getDict(),   "ModDate",      "ModDate:        ");
    } else if (rawDates) {
683 684 685 686 687 688 689 690
      printInfoString(info.getDict(), "CreationDate", "CreationDate:   ",
		      uMap);
      printInfoString(info.getDict(), "ModDate",      "ModDate:        ",
		      uMap);
    } else {
      printInfoDate(info.getDict(),   "CreationDate", "CreationDate:   ");
      printInfoDate(info.getDict(),   "ModDate",      "ModDate:        ");
    }
691 692 693
  }

  // print tagging info
694 695 696 697 698 699
   printf("Tagged:         %s\n",
	  (doc->getCatalog()->getMarkInfo() & Catalog::markInfoMarked) ? "yes" : "no");
   printf("UserProperties: %s\n",
	  (doc->getCatalog()->getMarkInfo() & Catalog::markInfoUserProperties) ? "yes" : "no");
   printf("Suspects:       %s\n",
	  (doc->getCatalog()->getMarkInfo() & Catalog::markInfoSuspects) ? "yes" : "no");
700

701
  // print form info
702 703 704 705 706 707
  switch (doc->getCatalog()->getFormType())
  {
    case Catalog::NoForm:
      printf("Form:           none\n");
      break;
    case Catalog::AcroForm:
708
      printf("Form:           AcroForm\n");
709 710 711 712
      break;
    case Catalog::XfaForm:
      printf("Form:           XFA\n");
      break;
713 714
  }

715 716 717 718 719 720 721
  // print javascript info
  {
    JSInfo jsInfo(doc, firstPage - 1);
    jsInfo.scanJS(lastPage - firstPage + 1);
    printf("JavaScript:     %s\n", jsInfo.containsJS() ? "yes" : "no");
  }

722 723 724 725 726 727
  // print page count
  printf("Pages:          %d\n", doc->getNumPages());

  // print encryption info
  printf("Encrypted:      ");
  if (doc->isEncrypted()) {
728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744
    Guchar *fileKey;
    CryptAlgorithm encAlgorithm;
    int keyLength;
    doc->getXRef()->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);

    const char *encAlgorithmName = "unknown";
    switch (encAlgorithm)
    {
      case cryptRC4:
	encAlgorithmName = "RC4";
	break;
      case cryptAES:
	encAlgorithmName = "AES";
	break;
      case cryptAES256:
	encAlgorithmName = "AES-256";
	break;
745 746
      case cryptNone:
	break;
747 748 749
    }

    printf("yes (print:%s copy:%s change:%s addNotes:%s algorithm:%s)\n",
750 751 752
	   doc->okToPrint(gTrue) ? "yes" : "no",
	   doc->okToCopy(gTrue) ? "yes" : "no",
	   doc->okToChange(gTrue) ? "yes" : "no",
753 754
	   doc->okToAddNotes(gTrue) ? "yes" : "no",
	   encAlgorithmName);
755 756 757 758 759 760
  } else {
    printf("no\n");
  }

  // print page size
  for (pg = firstPage; pg <= lastPage; ++pg) {
761 762
    w = doc->getPageCropWidth(pg);
    h = doc->getPageCropHeight(pg);
763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784
    if (multiPage) {
      printf("Page %4d size: %g x %g pts", pg, w, h);
    } else {
      printf("Page size:      %g x %g pts", w, h);
    }
    if ((fabs(w - 612) < 0.1 && fabs(h - 792) < 0.1) ||
	(fabs(w - 792) < 0.1 && fabs(h - 612) < 0.1)) {
      printf(" (letter)");
    } else {
      hISO = sqrt(sqrt(2.0)) * 7200 / 2.54;
      wISO = hISO / sqrt(2.0);
      for (i = 0; i <= 6; ++i) {
	if ((fabs(w - wISO) < 1 && fabs(h - hISO) < 1) ||
	    (fabs(w - hISO) < 1 && fabs(h - wISO) < 1)) {
	  printf(" (A%d)", i);
	  break;
	}
	hISO = wISO;
	wISO /= sqrt(2.0);
      }
    }
    printf("\n");
785 786 787 788 789 790
    r = doc->getPageRotate(pg);
    if (multiPage) {
      printf("Page %4d rot:  %d\n", pg, r);
    } else {
      printf("Page rot:       %d\n", r);
    }
791
  }
792 793 794 795 796

  // print the boxes
  if (printBoxes) {
    if (multiPage) {
      for (pg = firstPage; pg <= lastPage; ++pg) {
Hib Eris's avatar
Hib Eris committed
797 798
	page = doc->getPage(pg);
	if (!page) {
799
          error(errSyntaxError, -1, "Failed to print boxes for page {0:d}", pg);
Hib Eris's avatar
Hib Eris committed
800 801
	  continue;
	}
802 803 804 805 806 807 808 809 810 811 812 813
	sprintf(buf, "Page %4d MediaBox: ", pg);
	printBox(buf, page->getMediaBox());
	sprintf(buf, "Page %4d CropBox:  ", pg);
	printBox(buf, page->getCropBox());
	sprintf(buf, "Page %4d BleedBox: ", pg);
	printBox(buf, page->getBleedBox());
	sprintf(buf, "Page %4d TrimBox:  ", pg);
	printBox(buf, page->getTrimBox());
	sprintf(buf, "Page %4d ArtBox:   ", pg);
	printBox(buf, page->getArtBox());
      }
    } else {
Hib Eris's avatar
Hib Eris committed
814 815
      page = doc->getPage(firstPage);
      if (!page) {
816
        error(errSyntaxError, -1, "Failed to print boxes for page {0:d}", firstPage);
Hib Eris's avatar
Hib Eris committed
817 818 819 820 821 822 823
      } else {
        printBox("MediaBox:       ", page->getMediaBox());
        printBox("CropBox:        ", page->getCropBox());
        printBox("BleedBox:       ", page->getBleedBox());
        printBox("TrimBox:        ", page->getTrimBox());
        printBox("ArtBox:         ", page->getArtBox());
      }
824 825 826 827
    }
  }

  // print file size
828
  printf("File size:      %lld bytes\n", filesize);
829 830 831 832 833

  // print linearization info
  printf("Optimized:      %s\n", doc->isLinearized() ? "yes" : "no");

  // print PDF version
834
  printf("PDF version:    %d.%d\n", doc->getPDFMajorVersion(), doc->getPDFMinorVersion());
835 836

  printPdfSubtype(doc, uMap);
837
}
838

839 840 841 842 843 844 845 846 847 848 849
int main(int argc, char *argv[]) {
  PDFDoc *doc;
  GooString *fileName;
  GooString *ownerPW, *userPW;
  UnicodeMap *uMap;
  FILE *f;
  GBool ok;
  int exitCode;
  GBool multiPage;

  exitCode = 99;
850

851
  // parse args
852
  Win32Console win32console(&argc, &argv);
853 854 855 856 857 858 859
  ok = parseArgs(argDesc, &argc, argv);
  if (!ok || (argc != 2 && !printEnc) || printVersion || printHelp) {
    fprintf(stderr, "pdfinfo version %s\n", PACKAGE_VERSION);
    fprintf(stderr, "%s\n", popplerCopyright);
    fprintf(stderr, "%s\n", xpdfCopyright);
    if (!printVersion) {
      printUsage("pdfinfo", "<PDF-file>", argDesc);
860
    }
861 862 863
    if (printVersion || printHelp)
      exitCode = 0;
    goto err0;
864 865
  }

866 867
  if (printStructureText)
    printStructure = gTrue;
868

869 870
  // read config file
  globalParams = new GlobalParams();
871

872 873 874 875 876 877
  if (printEnc) {
    printEncodings();
    delete globalParams;
    exitCode = 0;
    goto err0;
  }
878

879
  fileName = new GooString(argv[1]);
880

881 882
  if (textEncName[0]) {
    globalParams->setTextEncoding(textEncName);
883 884
  }

885 886 887 888 889
  // get mapping to output encoding
  if (!(uMap = globalParams->getTextEncoding())) {
    error(errCommandLine, -1, "Couldn't get text encoding");
    delete fileName;
    goto err1;
890 891
  }

892 893 894 895
  // open PDF file
  if (ownerPassword[0] != '\001') {
    ownerPW = new GooString(ownerPassword);
  } else {
896
    ownerPW = nullptr;
897 898 899 900
  }
  if (userPassword[0] != '\001') {
    userPW = new GooString(userPassword);
  } else {
901
    userPW = nullptr;
902
  }
903

904 905 906
  if (fileName->cmp("-") == 0) {
      delete fileName;
      fileName = new GooString("fd://0");
907 908
  }

909
  doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW);
910

911 912 913 914 915 916 917 918 919
  if (userPW) {
    delete userPW;
  }
  if (ownerPW) {
    delete ownerPW;
  }
  if (!doc->isOk()) {
    exitCode = 1;
    goto err2;
920 921
  }

922 923 924
  // get page range
  if (firstPage < 1) {
    firstPage = 1;
925
  }
926 927 928 929
  if (lastPage == 0) {
    multiPage = gFalse;
  } else {
    multiPage = gTrue;
930
  }
931 932
  if (lastPage < 1 || lastPage > doc->getNumPages()) {
    lastPage = doc->getNumPages();
933
  }
934 935 936 937 938
  if (lastPage < firstPage) {
    error(errCommandLine, -1,
          "Wrong page range given: the first page ({0:d}) can not be after the last page ({1:d}).",
          firstPage, lastPage);
    goto err2;
939 940
  }

941 942 943 944 945 946 947
  if (printMetadata) {
    // print the metadata
    GooString *metadata = doc->readMetadata();
    if (metadata) {
      fputs(metadata->getCString(), stdout);
      fputc('\n', stdout);
      delete metadata;
948
    }
949 950 951 952 953 954 955 956 957 958
  } else if (printJS) {
    // print javascript
    JSInfo jsInfo(doc, firstPage - 1);
    jsInfo.scanJS(lastPage - firstPage + 1, stdout, uMap);
  } else if (printStructure || printStructureText) {
    // print structure
    const StructTreeRoot *structTree = doc->getCatalog()->getStructTreeRoot();
    if (structTree) {
      for (unsigned i = 0; i < structTree->getNumChildren(); i++) {
	printStruct(structTree->getChild(i), 0);
959
      }
960
    }
961 962
  } else if (printDests) {
    printDestinations(doc, uMap);
963 964 965
  } else {
    // print info
    long long filesize = 0;
966

967 968 969 970 971 972 973 974 975 976
#ifdef VMS
    f = fopen(fileName->getCString(), "rb", "ctx=stm");
#else
    f = fopen(fileName->getCString(), "rb");
#endif
    if (f) {
      Gfseek(f, 0, SEEK_END);
      filesize = Gftell(f);
      fclose(f);
    }
977 978 979 980

    if (multiPage == gFalse)
      lastPage = 1;

981
    printInfo(doc, uMap, filesize, multiPage);
982
  }
983 984 985 986 987 988 989 990 991 992 993 994
  exitCode = 0;

  // clean up
 err2:
  uMap->decRefCnt();
  delete doc;
  delete fileName;
 err1:
  delete globalParams;
 err0:

  return exitCode;
995
}