XRef.cc 43.9 KB
Newer Older
1
//========================================================================
Kristian Høgsberg's avatar
Kristian Høgsberg committed
2 3 4 5 6 7 8
//
// XRef.cc
//
// Copyright 1996-2003 Glyph & Cog, LLC
//
//========================================================================

9 10 11 12
//========================================================================
//
// Modified under the Poppler project - http://poppler.freedesktop.org
//
13 14 15
// All changes made under the Poppler project to this file are licensed
// under GPL version 2 or later
//
16 17
// Copyright (C) 2005 Dan Sheridan <dan.sheridan@postman.org.uk>
// Copyright (C) 2005 Brad Hards <bradh@frogmouth.net>
18
// Copyright (C) 2006, 2008, 2010, 2012-2014, 2016-2018 Albert Astals Cid <aacid@kde.org>
19 20
// Copyright (C) 2007-2008 Julien Rebetez <julienr@svn.gnome.org>
// Copyright (C) 2007 Carlos Garcia Campos <carlosgc@gnome.org>
21
// Copyright (C) 2009, 2010 Ilya Gorenbein <igorenbein@finjan.com>
22
// Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
23
// Copyright (C) 2012, 2013, 2016 Thomas Freitag <Thomas.Freitag@kabelmail.de>
24
// Copyright (C) 2012, 2013 Fabio D'Urso <fabiodurso@hotmail.it>
25
// Copyright (C) 2013, 2014, 2017 Adrian Johnson <ajohnson@redneon.com>
Albert Astals Cid's avatar
0.23.0  
Albert Astals Cid committed
26
// Copyright (C) 2013 Pino Toscano <pino@kde.org>
Jakub Alba's avatar
Jakub Alba committed
27
// Copyright (C) 2016 Jakub Alba <jakubalba@gmail.com>
Albert Astals Cid's avatar
Albert Astals Cid committed
28
// Copyright (C) 2018, 2019 Adam Reichold <adam.reichold@t-online.de>
Albert Astals Cid's avatar
Albert Astals Cid committed
29
// Copyright (C) 2018 Tobias Deiminger <haxtibal@posteo.de>
30 31 32 33 34 35
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
//
//========================================================================

Kristian Høgsberg's avatar
Kristian Høgsberg committed
36
#include <config.h>
Adrian Johnson's avatar
Adrian Johnson committed
37
#include "poppler-config.h"
Kristian Høgsberg's avatar
Kristian Høgsberg committed
38 39 40 41

#include <stdlib.h>
#include <stddef.h>
#include <string.h>
Adrian Johnson's avatar
Adrian Johnson committed
42
#include <math.h>
Kristian Høgsberg's avatar
Kristian Høgsberg committed
43
#include <ctype.h>
44
#include <limits.h>
Adrian Johnson's avatar
Adrian Johnson committed
45
#include <float.h>
46
#include "goo/gfile.h"
Kristian Høgsberg's avatar
Kristian Høgsberg committed
47 48 49 50 51 52 53 54 55 56 57 58
#include "goo/gmem.h"
#include "Object.h"
#include "Stream.h"
#include "Lexer.h"
#include "Parser.h"
#include "Dict.h"
#include "Error.h"
#include "ErrorCodes.h"
#include "XRef.h"

//------------------------------------------------------------------------
// Permission bits
59
// Note that the PDF spec uses 1 base (eg bit 3 is 1<<2)
Kristian Høgsberg's avatar
Kristian Høgsberg committed
60 61
//------------------------------------------------------------------------

62 63 64 65 66 67 68 69
#define permPrint         (1<<2)  // bit 3
#define permChange        (1<<3)  // bit 4
#define permCopy          (1<<4)  // bit 5
#define permNotes         (1<<5)  // bit 6
#define permFillForm      (1<<8)  // bit 9
#define permAccessibility (1<<9)  // bit 10
#define permAssemble      (1<<10) // bit 11
#define permHighResPrint  (1<<11) // bit 12
Kristian Høgsberg's avatar
Kristian Høgsberg committed
70 71 72 73 74 75 76 77 78 79 80
#define defPermFlags 0xfffc

//------------------------------------------------------------------------
// ObjectStream
//------------------------------------------------------------------------

class ObjectStream {
public:

  // Create an object stream, using object number <objStrNum>,
  // generation 0.
Thomas Freitag's avatar
Thomas Freitag committed
81
  ObjectStream(XRef *xref, int objStrNumA, int recursion = 0);
Kristian Høgsberg's avatar
Kristian Høgsberg committed
82

83
  bool isOk() { return ok; }
84

Kristian Høgsberg's avatar
Kristian Høgsberg committed
85 86
  ~ObjectStream();

87 88 89
  ObjectStream(const ObjectStream &) = delete;
  ObjectStream& operator=(const ObjectStream &) = delete;

Kristian Høgsberg's avatar
Kristian Høgsberg committed
90 91 92 93 94
  // Return the object number of this object stream.
  int getObjStrNum() { return objStrNum; }

  // Get the <objIdx>th object from this stream, which should be
  // object number <objNum>, generation 0.
Albert Astals Cid's avatar
Albert Astals Cid committed
95
  Object getObject(int objIdx, int objNum);
Kristian Høgsberg's avatar
Kristian Høgsberg committed
96 97 98 99 100 101 102

private:

  int objStrNum;		// object number of the object stream
  int nObjects;			// number of objects in the stream
  Object *objs;			// the objects (length = nObjects)
  int *objNums;			// the object numbers (length = nObjects)
103
  bool ok;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
104 105
};

Thomas Freitag's avatar
Thomas Freitag committed
106
ObjectStream::ObjectStream(XRef *xref, int objStrNumA, int recursion) {
Kristian Høgsberg's avatar
Kristian Høgsberg committed
107 108
  Stream *str;
  Parser *parser;
Adrian Johnson's avatar
Adrian Johnson committed
109
  Goffset *offsets;
Albert Astals Cid's avatar
Albert Astals Cid committed
110
  Object objStr, obj1;
Adrian Johnson's avatar
Adrian Johnson committed
111 112
  Goffset first;
  int i;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
113 114 115

  objStrNum = objStrNumA;
  nObjects = 0;
116 117
  objs = nullptr;
  objNums = nullptr;
118
  ok = false;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
119

Albert Astals Cid's avatar
Albert Astals Cid committed
120 121 122
  objStr = xref->fetch(objStrNum, 0, recursion);
  if (!objStr.isStream()) {
    return;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
123 124
  }

Albert Astals Cid's avatar
Albert Astals Cid committed
125 126 127
  obj1 = objStr.streamGetDict()->lookup("N", recursion);
  if (!obj1.isInt()) {
    return;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
128 129 130
  }
  nObjects = obj1.getInt();
  if (nObjects <= 0) {
Albert Astals Cid's avatar
Albert Astals Cid committed
131
    return;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
132 133
  }

Albert Astals Cid's avatar
Albert Astals Cid committed
134
  obj1 = objStr.streamGetDict()->lookup("First", recursion);
Adrian Johnson's avatar
Adrian Johnson committed
135
  if (!obj1.isInt() && !obj1.isInt64()) {
Albert Astals Cid's avatar
Albert Astals Cid committed
136
    return;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
137
  }
Adrian Johnson's avatar
Adrian Johnson committed
138 139 140 141
  if (obj1.isInt())
    first = obj1.getInt();
  else
    first = obj1.getInt64();
Kristian Høgsberg's avatar
Kristian Høgsberg committed
142
  if (first < 0) {
Albert Astals Cid's avatar
Albert Astals Cid committed
143
    return;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
144 145
  }

146 147 148 149
  // this is an arbitrary limit to avoid integer overflow problems
  // in the 'new Object[nObjects]' call (Acrobat apparently limits
  // object streams to 100-200 objects)
  if (nObjects > 1000000) {
150
    error(errSyntaxError, -1, "Too many objects in an object stream");
Albert Astals Cid's avatar
Albert Astals Cid committed
151
    return;
152
  }
Kristian Høgsberg's avatar
Kristian Høgsberg committed
153
  objs = new Object[nObjects];
154
  objNums = (int *)gmallocn(nObjects, sizeof(int));
Adrian Johnson's avatar
Adrian Johnson committed
155
  offsets = (Goffset *)gmallocn(nObjects, sizeof(Goffset));
Kristian Høgsberg's avatar
Kristian Høgsberg committed
156 157 158

  // parse the header: object numbers and offsets
  objStr.streamReset();
159 160
  str = new EmbedStream(objStr.getStream(), Object(objNull), true, first);
  parser = new Parser(xref, new Lexer(xref, str), false);
Kristian Høgsberg's avatar
Kristian Høgsberg committed
161
  for (i = 0; i < nObjects; ++i) {
Albert Astals Cid's avatar
Albert Astals Cid committed
162 163
    obj1 = parser->getObj();
    Object obj2 = parser->getObj();
Adrian Johnson's avatar
Adrian Johnson committed
164
    if (!obj1.isInt() || !(obj2.isInt() || obj2.isInt64())) {
Kristian Høgsberg's avatar
Kristian Høgsberg committed
165 166
      delete parser;
      gfree(offsets);
Albert Astals Cid's avatar
Albert Astals Cid committed
167
      return;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
168 169
    }
    objNums[i] = obj1.getInt();
Adrian Johnson's avatar
Adrian Johnson committed
170 171 172 173
    if (obj2.isInt())
      offsets[i] = obj2.getInt();
    else
      offsets[i] = obj2.getInt64();
Kristian Høgsberg's avatar
Kristian Høgsberg committed
174 175 176 177
    if (objNums[i] < 0 || offsets[i] < 0 ||
	(i > 0 && offsets[i] < offsets[i-1])) {
      delete parser;
      gfree(offsets);
Albert Astals Cid's avatar
Albert Astals Cid committed
178
      return;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
179 180 181 182 183 184 185 186
    }
  }
  while (str->getChar() != EOF) ;
  delete parser;

  // skip to the first object - this shouldn't be necessary because
  // the First key is supposed to be equal to offsets[0], but just in
  // case...
Adrian Johnson's avatar
Adrian Johnson committed
187
  for (Goffset pos = first; pos < offsets[0]; ++pos) {
Kristian Høgsberg's avatar
Kristian Høgsberg committed
188 189 190 191 192 193
    objStr.getStream()->getChar();
  }

  // parse the objects
  for (i = 0; i < nObjects; ++i) {
    if (i == nObjects - 1) {
194
      str = new EmbedStream(objStr.getStream(), Object(objNull), false, 0);
Kristian Høgsberg's avatar
Kristian Høgsberg committed
195
    } else {
196
      str = new EmbedStream(objStr.getStream(), Object(objNull), true,
Kristian Høgsberg's avatar
Kristian Høgsberg committed
197 198
			    offsets[i+1] - offsets[i]);
    }
199
    parser = new Parser(xref, new Lexer(xref, str), false);
Albert Astals Cid's avatar
Albert Astals Cid committed
200
    objs[i] = parser->getObj();
Kristian Høgsberg's avatar
Kristian Høgsberg committed
201 202 203 204 205
    while (str->getChar() != EOF) ;
    delete parser;
  }

  gfree(offsets);
206
  ok = true;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
207 208 209
}

ObjectStream::~ObjectStream() {
Albert Astals Cid's avatar
Albert Astals Cid committed
210
  delete[] objs;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
211 212 213
  gfree(objNums);
}

Albert Astals Cid's avatar
Albert Astals Cid committed
214
Object ObjectStream::getObject(int objIdx, int objNum) {
Kristian Høgsberg's avatar
Kristian Høgsberg committed
215
  if (objIdx < 0 || objIdx >= nObjects || objNum != objNums[objIdx]) {
Albert Astals Cid's avatar
Albert Astals Cid committed
216
    return Object(objNull);
Kristian Høgsberg's avatar
Kristian Høgsberg committed
217
  }
Albert Astals Cid's avatar
Albert Astals Cid committed
218
  return objs[objIdx].copy();
Kristian Høgsberg's avatar
Kristian Høgsberg committed
219 220 221 222 223 224
}

//------------------------------------------------------------------------
// XRef
//------------------------------------------------------------------------

225 226
#define xrefLocker()   std::unique_lock<std::recursive_mutex> locker(mutex)

227
XRef::XRef() : objStrs{5} {
228
  ok = true;
229
  errCode = errNone;
230
  entries = nullptr;
Hib Eris's avatar
Hib Eris committed
231
  capacity = 0;
232
  size = 0;
233
  modified = false;
234
  streamEnds = nullptr;
235
  streamEndsLen = 0;
Hib Eris's avatar
Hib Eris committed
236
  mainXRefEntriesOffset = 0;
237 238 239
  xRefStream = false;
  scannedSpecialFlags = false;
  encrypted = false;
Albert Astals Cid's avatar
Albert Astals Cid committed
240
  permFlags = defPermFlags;
241
  ownerPasswordOk = false;
Albert Astals Cid's avatar
Albert Astals Cid committed
242
  rootNum = -1;
243 244
  strOwner = false;
  xrefReconstructed = false;
245
  encAlgorithm = cryptNone;
246 247
}

248
XRef::XRef(const Object *trailerDictA) : XRef{} {
249
  if (trailerDictA->isDict())
Albert Astals Cid's avatar
Albert Astals Cid committed
250
    trailerDict = trailerDictA->copy();
251 252
}

253
XRef::XRef(BaseStream *strA, Goffset pos, Goffset mainXRefEntriesOffsetA, bool *wasReconstructed, bool reconstruct) : XRef{} {
Kristian Høgsberg's avatar
Kristian Høgsberg committed
254 255
  Object obj;

Hib Eris's avatar
Hib Eris committed
256
  mainXRefEntriesOffset = mainXRefEntriesOffsetA;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
257 258 259 260

  // read the trailer
  str = strA;
  start = str->getStart();
261
  prevXRefOffset = mainXRefOffset = pos;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
262

263 264 265 266 267 268 269 270 271
  if (reconstruct && !(ok = constructXRef(wasReconstructed)))
  {
    errCode = errDamaged;
    return;
  }
  else
  {
    // if there was a problem with the 'startxref' position, try to
    // reconstruct the xref table
Hib Eris's avatar
Hib Eris committed
272
    if (prevXRefOffset == 0) {
273 274 275 276
      if (!(ok = constructXRef(wasReconstructed))) {
        errCode = errDamaged;
        return;
      }
Kristian Høgsberg's avatar
Kristian Høgsberg committed
277

278 279
    // read the xref table
    } else {
Adrian Johnson's avatar
Adrian Johnson committed
280
      std::vector<Goffset> followedXRefStm;
281
      readXRef(&prevXRefOffset, &followedXRefStm, nullptr);
282 283 284 285 286 287 288 289

      // if there was a problem with the xref table,
      // try to reconstruct it
      if (!ok) {
        if (!(ok = constructXRef(wasReconstructed))) {
          errCode = errDamaged;
          return;
        }
Kristian Høgsberg's avatar
Kristian Høgsberg committed
290 291 292
      }
    }

Hib Eris's avatar
Hib Eris committed
293
    // set size to (at least) the size specified in trailer dict
Albert Astals Cid's avatar
Albert Astals Cid committed
294
    obj = trailerDict.dictLookupNF("Size");
Hib Eris's avatar
Hib Eris committed
295
    if (!obj.isInt()) {
296
        error(errSyntaxWarning, -1, "No valid XRef size in trailer");
Hib Eris's avatar
Hib Eris committed
297 298 299 300 301 302 303 304 305 306 307
    } else {
      if (obj.getInt() > size) {
         if (resize(obj.getInt()) != obj.getInt()) {
            if (!(ok = constructXRef(wasReconstructed))) {
               errCode = errDamaged;
               return;
            }
         }
      }
    }

308
    // get the root dictionary (catalog) object
Albert Astals Cid's avatar
Albert Astals Cid committed
309
    obj = trailerDict.dictLookupNF("Root");
310 311 312 313 314 315 316 317
    if (obj.isRef()) {
      rootNum = obj.getRefNum();
      rootGen = obj.getRefGen();
    } else {
      if (!(ok = constructXRef(wasReconstructed))) {
        errCode = errDamaged;
        return;
      }
Kristian Høgsberg's avatar
Kristian Høgsberg committed
318 319 320 321 322 323 324 325
    }
  }
  // now set the trailer dictionary's xref pointer so we can fetch
  // indirect objects from it
  trailerDict.getDict()->setXRef(this);
}

XRef::~XRef() {
326
  for(int i=0; i<size; i++) {
327
      entries[i].obj.~Object();
328
  }
Kristian Høgsberg's avatar
Kristian Høgsberg committed
329
  gfree(entries);
330

Kristian Høgsberg's avatar
Kristian Høgsberg committed
331 332 333
  if (streamEnds) {
    gfree(streamEnds);
  }
Thomas Freitag's avatar
Thomas Freitag committed
334 335 336 337 338
  if (strOwner) {
    delete str;
  }
}

339
XRef *XRef::copy() const {
Thomas Freitag's avatar
Thomas Freitag committed
340 341
  XRef *xref = new XRef();
  xref->str = str->copy();
342
  xref->strOwner = true;
Thomas Freitag's avatar
Thomas Freitag committed
343 344 345 346 347 348 349 350 351 352
  xref->encrypted = encrypted;
  xref->permFlags = permFlags;
  xref->ownerPasswordOk = ownerPasswordOk;
  xref->rootGen = rootGen;
  xref->rootNum = rootNum;

  xref->start = start;
  xref->prevXRefOffset = prevXRefOffset;
  xref->mainXRefEntriesOffset = mainXRefEntriesOffset;
  xref->xRefStream = xRefStream;
Albert Astals Cid's avatar
Albert Astals Cid committed
353
  xref->trailerDict = trailerDict.copy();
Thomas Freitag's avatar
Thomas Freitag committed
354 355 356 357 358 359 360 361 362 363 364 365
  xref->encAlgorithm = encAlgorithm;
  xref->encRevision = encRevision;
  xref->encVersion = encVersion;
  xref->permFlags = permFlags;
  xref->keyLength = keyLength;
  xref->permFlags = permFlags;
  for (int i = 0; i < 32; i++) {
    xref->fileKey[i] = fileKey[i];
  }

  if (xref->reserve(size) == 0) {
    error(errSyntaxError, -1, "unable to allocate {0:d} entries", size);
Albert Astals Cid's avatar
Albert Astals Cid committed
366
    delete xref;
367
    return nullptr;
Thomas Freitag's avatar
Thomas Freitag committed
368 369 370 371 372
  }
  xref->size = size;
  for (int i = 0; i < size; ++i) {
    xref->entries[i].offset = entries[i].offset;
    xref->entries[i].type = entries[i].type;
373
    new (&xref->entries[i].obj) Object(objNull);
Thomas Freitag's avatar
Thomas Freitag committed
374 375 376 377 378
    xref->entries[i].flags = entries[i].flags;
    xref->entries[i].gen = entries[i].gen;
  }
  xref->streamEndsLen = streamEndsLen;
  if (streamEndsLen  != 0) {
Adrian Johnson's avatar
Adrian Johnson committed
379
    xref->streamEnds = (Goffset *)gmalloc(streamEndsLen * sizeof(Goffset));
Thomas Freitag's avatar
Thomas Freitag committed
380 381 382 383 384
    for (int i = 0; i < streamEndsLen; i++) {
      xref->streamEnds[i] = streamEnds[i];
    }
  }
  return xref;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
385 386
}

Hib Eris's avatar
Hib Eris committed
387 388 389 390 391 392 393 394 395 396 397 398
int XRef::reserve(int newSize)
{
  if (newSize > capacity) {

    int realNewSize;
    for (realNewSize = capacity ? 2 * capacity : 1024;
          newSize > realNewSize && realNewSize > 0;
          realNewSize <<= 1) ;
    if ((realNewSize < 0) ||
        (realNewSize >= INT_MAX / (int)sizeof(XRefEntry))) {
      return 0;
    }
Kristian Høgsberg's avatar
Kristian Høgsberg committed
399

Hib Eris's avatar
Hib Eris committed
400
    void *p = greallocn_checkoverflow(entries, realNewSize, sizeof(XRefEntry));
401
    if (p == nullptr) {
Hib Eris's avatar
Hib Eris committed
402
      return 0;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
403
    }
Hib Eris's avatar
Hib Eris committed
404 405 406 407

    entries = (XRefEntry *) p;
    capacity = realNewSize;

Kristian Høgsberg's avatar
Kristian Høgsberg committed
408 409
  }

Hib Eris's avatar
Hib Eris committed
410 411 412 413 414 415 416 417 418 419
  return capacity;
}

int XRef::resize(int newSize)
{
  if (newSize > size) {

    if (reserve(newSize) < newSize) return size;

    for (int i = size; i < newSize; ++i) {
Adrian Johnson's avatar
Adrian Johnson committed
420
      entries[i].offset = -1;
Hib Eris's avatar
Hib Eris committed
421
      entries[i].type = xrefEntryNone;
422
      new (&entries[i].obj) Object(objNull);
423
      entries[i].flags = 0;
Hib Eris's avatar
Hib Eris committed
424 425 426 427
      entries[i].gen = 0;
    }
  } else {
    for (int i = newSize; i < size; i++) {
428
      entries[i].obj.~Object();
Kristian Høgsberg's avatar
Kristian Høgsberg committed
429 430 431
    }
  }

Hib Eris's avatar
Hib Eris committed
432 433 434
  size = newSize;

  return size;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
435 436
}

437 438 439
/* Read one xref table section.  Also reads the associated trailer
 * dictionary, and returns the prev pointer (if any).
 * Arguments:
Adrian Johnson's avatar
Adrian Johnson committed
440
 *   pos                Points to a Goffset containing the offset of the XRef
441 442 443 444 445 446 447
 *                      section to be read. If a prev pointer is found, *pos is
 *                      updated with its value
 *   followedXRefStm    Used in case of nested readXRef calls to spot circular
 *                      references in XRefStm pointers
 *   xrefStreamObjsNum  If not NULL, every time a XRef stream is encountered,
 *                      its object number is appended
 * Return value:
448
 *   true if a prev pointer is found, otherwise false
449
 */
450
bool XRef::readXRef(Goffset *pos, std::vector<Goffset> *followedXRefStm, std::vector<int> *xrefStreamObjsNum) {
Kristian Høgsberg's avatar
Kristian Høgsberg committed
451 452
  Parser *parser;
  Object obj;
453
  bool more;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
454

Albert Astals Cid's avatar
Albert Astals Cid committed
455
  if (unlikely(start > (LLONG_MAX - *pos))) {
456 457
    ok = false;
    return false;
458 459
  }

Kristian Høgsberg's avatar
Kristian Høgsberg committed
460
  // start up a parser, parse one token
461 462
  parser = new Parser(nullptr,
	     new Lexer(nullptr,
463 464 465
	       str->makeSubStream(start + *pos, false, 0, Object(objNull))),
	     true);
  obj = parser->getObj(true);
Kristian Høgsberg's avatar
Kristian Høgsberg committed
466 467 468

  // parse an old-style xref table
  if (obj.isCmd("xref")) {
469
    more = readXRefTable(parser, pos, followedXRefStm, xrefStreamObjsNum);
Kristian Høgsberg's avatar
Kristian Høgsberg committed
470 471 472

  // parse an xref stream
  } else if (obj.isInt()) {
473
    const int objNum = obj.getInt();
474
    if (obj = parser->getObj(true), !obj.isInt()) {
Kristian Høgsberg's avatar
Kristian Høgsberg committed
475 476
      goto err1;
    }
477
    if (obj = parser->getObj(true), !obj.isCmd("obj")) {
Kristian Høgsberg's avatar
Kristian Høgsberg committed
478 479
      goto err1;
    }
Albert Astals Cid's avatar
Albert Astals Cid committed
480
    if (obj = parser->getObj(), !obj.isStream()) {
Kristian Høgsberg's avatar
Kristian Høgsberg committed
481 482
      goto err1;
    }
Hib Eris's avatar
Hib Eris committed
483
    if (trailerDict.isNone()) {
484
      xRefStream = true;
Hib Eris's avatar
Hib Eris committed
485
    }
486 487 488
    if (xrefStreamObjsNum) {
      xrefStreamObjsNum->push_back(objNum);
    }
Kristian Høgsberg's avatar
Kristian Høgsberg committed
489 490 491 492 493 494 495 496 497 498 499
    more = readXRefStream(obj.getStream(), pos);

  } else {
    goto err1;
  }

  delete parser;
  return more;

 err1:
  delete parser;
500 501
  ok = false;
  return false;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
502 503
}

504
bool XRef::readXRefTable(Parser *parser, Goffset *pos, std::vector<Goffset> *followedXRefStm, std::vector<int> *xrefStreamObjsNum) {
Kristian Høgsberg's avatar
Kristian Høgsberg committed
505
  XRefEntry entry;
506
  bool more;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
507
  Object obj, obj2;
Adrian Johnson's avatar
Adrian Johnson committed
508
  Goffset pos2;
Albert Astals Cid's avatar
Albert Astals Cid committed
509
  int first, n;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
510 511

  while (1) {
512
    obj = parser->getObj(true);
Kristian Høgsberg's avatar
Kristian Høgsberg committed
513 514 515 516
    if (obj.isCmd("trailer")) {
      break;
    }
    if (!obj.isInt()) {
Albert Astals Cid's avatar
Albert Astals Cid committed
517
      goto err0;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
518 519
    }
    first = obj.getInt();
520
    obj = parser->getObj(true);
Albert Astals Cid's avatar
Albert Astals Cid committed
521 522
    if (!obj.isInt()) {
      goto err0;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
523 524
    }
    n = obj.getInt();
525
    if (first < 0 || n < 0 || first > INT_MAX - n) {
Hib Eris's avatar
Hib Eris committed
526
      goto err0;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
527 528
    }
    if (first + n > size) {
Hib Eris's avatar
Hib Eris committed
529
      if (resize(first + n) != first + n) {
530
        error(errSyntaxError, -1, "Invalid 'obj' parameters'");
Hib Eris's avatar
Hib Eris committed
531
        goto err0;
532
      }
Kristian Høgsberg's avatar
Kristian Høgsberg committed
533
    }
Albert Astals Cid's avatar
Albert Astals Cid committed
534
    for (int i = first; i < first + n; ++i) {
535
      obj = parser->getObj(true);
Adrian Johnson's avatar
Adrian Johnson committed
536 537 538 539 540
      if (obj.isInt()) {
	entry.offset = obj.getInt();
      } else if (obj.isInt64()) {
	entry.offset = obj.getInt64();
      } else {
Albert Astals Cid's avatar
Albert Astals Cid committed
541
	goto err0;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
542
      }
543
      obj = parser->getObj(true);
Albert Astals Cid's avatar
Albert Astals Cid committed
544 545
      if (!obj.isInt()) {
	goto err0;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
546 547
      }
      entry.gen = obj.getInt();
548
      entry.flags = 0;
549
      obj = parser->getObj(true);
Kristian Høgsberg's avatar
Kristian Høgsberg committed
550 551 552 553 554
      if (obj.isCmd("n")) {
	entry.type = xrefEntryUncompressed;
      } else if (obj.isCmd("f")) {
	entry.type = xrefEntryFree;
      } else {
Albert Astals Cid's avatar
Albert Astals Cid committed
555
	goto err0;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
556
      }
Adrian Johnson's avatar
Adrian Johnson committed
557
      if (entries[i].offset == -1) {
558 559 560 561
	entries[i].offset = entry.offset;
	entries[i].gen = entry.gen;
	entries[i].type = entry.type;
	entries[i].flags = entry.flags;
Albert Astals Cid's avatar
Albert Astals Cid committed
562
	entries[i].obj.setToNull();
563

Kristian Høgsberg's avatar
Kristian Høgsberg committed
564 565 566 567 568 569 570
	// PDF files of patents from the IBM Intellectual Property
	// Network have a bug: the xref table claims to start at 1
	// instead of 0.
	if (i == 1 && first == 1 &&
	    entries[1].offset == 0 && entries[1].gen == 65535 &&
	    entries[1].type == xrefEntryFree) {
	  i = first = 0;
571 572 573 574
	  entries[0].offset = 0;
	  entries[0].gen = 65535;
	  entries[0].type = xrefEntryFree;
	  entries[0].flags = entries[1].flags;
Albert Astals Cid's avatar
Albert Astals Cid committed
575
	  entries[0].obj = std::move(entries[1].obj);
576

Adrian Johnson's avatar
Adrian Johnson committed
577
	  entries[1].offset = -1;
578
	  entries[1].obj.setToNull();
Kristian Høgsberg's avatar
Kristian Høgsberg committed
579 580 581 582 583 584
	}
      }
    }
  }

  // read the trailer dictionary
Albert Astals Cid's avatar
Albert Astals Cid committed
585 586 587
  obj = parser->getObj();
  if (!obj.isDict()) {
    goto err0;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
588 589 590
  }

  // get the 'Prev' pointer
Albert Astals Cid's avatar
Albert Astals Cid committed
591
  obj2 = obj.getDict()->lookupNF("Prev");
Adrian Johnson's avatar
Adrian Johnson committed
592 593 594 595 596
  if (obj2.isInt() || obj2.isInt64()) {
    if (obj2.isInt())
      pos2 = obj2.getInt();
    else
      pos2 = obj2.getInt64();
597 598
    if (pos2 != *pos) {
      *pos = pos2;
599
      more = true;
600 601
    } else {
      error(errSyntaxWarning, -1, "Infinite loop in xref table");
602
      more = false;
603
    }
Kristian Høgsberg's avatar
Kristian Høgsberg committed
604 605 606
  } else if (obj2.isRef()) {
    // certain buggy PDF generators generate "/Prev NNN 0 R" instead
    // of "/Prev NNN"
Oliver Sander's avatar
Oliver Sander committed
607
    pos2 = (unsigned int)obj2.getRefNum();
608 609
    if (pos2 != *pos) {
      *pos = pos2;
610
      more = true;
611 612
    } else {
      error(errSyntaxWarning, -1, "Infinite loop in xref table");
613
      more = false;
614
    }
Kristian Høgsberg's avatar
Kristian Høgsberg committed
615
  } else {
616
    more = false;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
617 618 619 620
  }

  // save the first trailer dictionary
  if (trailerDict.isNone()) {
Albert Astals Cid's avatar
Albert Astals Cid committed
621
    trailerDict = obj.copy();
Kristian Høgsberg's avatar
Kristian Høgsberg committed
622 623 624
  }

  // check for an 'XRefStm' key
Albert Astals Cid's avatar
Albert Astals Cid committed
625
  obj2 = obj.getDict()->lookup("XRefStm");
Adrian Johnson's avatar
Adrian Johnson committed
626 627 628 629
  if (obj2.isInt() || obj2.isInt64()) {
    if (obj2.isInt())
      pos2 = obj2.getInt();
    else
Thomas Freitag's avatar
Thomas Freitag committed
630
      pos2 = obj2.getInt64();
631
    for (size_t i = 0; ok == true && i < followedXRefStm->size(); ++i) {
632
      if (followedXRefStm->at(i) == pos2) {
633
        ok = false;
634 635 636 637
      }
    }
    if (ok) {
      followedXRefStm->push_back(pos2);
638
      readXRef(&pos2, followedXRefStm, xrefStreamObjsNum);
639
    }
Kristian Høgsberg's avatar
Kristian Høgsberg committed
640
    if (!ok) {
Albert Astals Cid's avatar
Albert Astals Cid committed
641
      goto err0;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
642 643 644 645 646
    }
  }

  return more;

Hib Eris's avatar
Hib Eris committed
647
 err0:
648 649
  ok = false;
  return false;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
650 651
}

652
bool XRef::readXRefStream(Stream *xrefStr, Goffset *pos) {
Kristian Høgsberg's avatar
Kristian Høgsberg committed
653
  int w[3];
654
  bool more;
Albert Astals Cid's avatar
Albert Astals Cid committed
655
  Object obj;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
656

657
  ok = false;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
658

Albert Astals Cid's avatar
Albert Astals Cid committed
659 660 661
  Dict *dict = xrefStr->getDict();
  obj = dict->lookupNF("Size");
  if (!obj.isInt()) {
662
    return false;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
663
  }
Albert Astals Cid's avatar
Albert Astals Cid committed
664
  int newSize = obj.getInt();
Kristian Høgsberg's avatar
Kristian Høgsberg committed
665
  if (newSize < 0) {
666
    return false;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
667 668
  }
  if (newSize > size) {
Hib Eris's avatar
Hib Eris committed
669
    if (resize(newSize) != newSize) {
670
      error(errSyntaxError, -1, "Invalid 'size' parameter");
671
      return false;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
672 673 674
    }
  }

Albert Astals Cid's avatar
Albert Astals Cid committed
675 676
  obj = dict->lookupNF("W");
  if (!obj.isArray() || obj.arrayGetLength() < 3) {
677
    return false;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
678
  }
Albert Astals Cid's avatar
Albert Astals Cid committed
679 680 681
  for (int i = 0; i < 3; ++i) {
    Object obj2 = obj.arrayGet(i);
    if (!obj2.isInt()) {
682
      return false;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
683 684
    }
    w[i] = obj2.getInt();
Adrian Johnson's avatar
Adrian Johnson committed
685
    if (w[i] < 0) {
686
      return false;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
687 688
    }
  }
689
  if (w[0] > (int)sizeof(int) || w[1] > (int)sizeof(long long) || w[2] > (int)sizeof(long long)) {
690
    return false;
Adrian Johnson's avatar
Adrian Johnson committed
691
  }
Kristian Høgsberg's avatar
Kristian Høgsberg committed
692 693

  xrefStr->reset();
Albert Astals Cid's avatar
Albert Astals Cid committed
694
  Object idx = dict->lookupNF("Index");
Kristian Høgsberg's avatar
Kristian Høgsberg committed
695
  if (idx.isArray()) {
Albert Astals Cid's avatar
Albert Astals Cid committed
696 697 698
    for (int i = 0; i+1 < idx.arrayGetLength(); i += 2) {
      obj = idx.arrayGet(i);
      if (!obj.isInt()) {
699
	return false;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
700
      }
Albert Astals Cid's avatar
Albert Astals Cid committed
701 702 703
      int first = obj.getInt();
      obj = idx.arrayGet(i+1);
      if (!obj.isInt()) {
704
	return false;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
705
      }
Albert Astals Cid's avatar
Albert Astals Cid committed
706
      int n = obj.getInt();
Kristian Høgsberg's avatar
Kristian Høgsberg committed
707 708
      if (first < 0 || n < 0 ||
	  !readXRefStreamSection(xrefStr, w, first, n)) {
709
	return false;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
710 711 712 713
      }
    }
  } else {
    if (!readXRefStreamSection(xrefStr, w, 0, newSize)) {
714
      return false;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
715 716 717
    }
  }

Albert Astals Cid's avatar
Albert Astals Cid committed
718
  obj = dict->lookupNF("Prev");
719
  if (obj.isInt() && obj.getInt() >= 0) {
Adrian Johnson's avatar
Adrian Johnson committed
720
    *pos = obj.getInt();
721
    more = true;
722
  } else if (obj.isInt64() && obj.getInt64() >= 0) {
Adrian Johnson's avatar
Adrian Johnson committed
723
    *pos = obj.getInt64();
724
    more = true;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
725
  } else {
726
    more = false;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
727 728
  }
  if (trailerDict.isNone()) {
Albert Astals Cid's avatar
Albert Astals Cid committed
729
    trailerDict = xrefStr->getDictObject()->copy();
Kristian Høgsberg's avatar
Kristian Høgsberg committed
730 731
  }

732
  ok = true;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
733 734 735
  return more;
}

736
bool XRef::readXRefStreamSection(Stream *xrefStr, int *w, int first, int n) {
737 738
  unsigned long long offset, gen;
  int type, c, i, j;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
739

740
  if (first > INT_MAX - n) {
741
    return false;
742
  }
Kristian Høgsberg's avatar
Kristian Høgsberg committed
743
  if (first + n < 0) {
744
    return false;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
745 746
  }
  if (first + n > size) {
Hib Eris's avatar
Hib Eris committed
747
    if (resize(first + n) != size) {
748
      error(errSyntaxError, -1, "Invalid 'size' inside xref table");
749
      return false;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
750
    }
751 752
    if (first + n > size) {
      error(errSyntaxError, -1, "Invalid 'first' or 'n' inside xref table");
753
      return false;
754
    }
Kristian Høgsberg's avatar
Kristian Høgsberg committed
755 756 757 758 759 760 761
  }
  for (i = first; i < first + n; ++i) {
    if (w[0] == 0) {
      type = 1;
    } else {
      for (type = 0, j = 0; j < w[0]; ++j) {
	if ((c = xrefStr->getChar()) == EOF) {
762
	  return false;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
763 764 765 766 767 768
	}
	type = (type << 8) + c;
      }
    }
    for (offset = 0, j = 0; j < w[1]; ++j) {
      if ((c = xrefStr->getChar()) == EOF) {
769
	return false;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
770 771 772
      }
      offset = (offset << 8) + c;
    }
773
    if (offset > (unsigned long long)GoffsetMax()) {
774
      error(errSyntaxError, -1, "Offset inside xref table too large for fseek");
775
      return false;
776
    }
Kristian Høgsberg's avatar
Kristian Høgsberg committed
777 778
    for (gen = 0, j = 0; j < w[2]; ++j) {
      if ((c = xrefStr->getChar()) == EOF) {
779
	return false;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
780 781 782
      }
      gen = (gen << 8) + c;
    }
783 784
    if (gen > INT_MAX) {
      error(errSyntaxError, -1, "Gen inside xref table too large (bigger than INT_MAX)");
785
      return false;
786
    }
Adrian Johnson's avatar
Adrian Johnson committed
787
    if (entries[i].offset == -1) {
Kristian Høgsberg's avatar
Kristian Høgsberg committed
788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804
      switch (type) {
      case 0:
	entries[i].offset = offset;
	entries[i].gen = gen;
	entries[i].type = xrefEntryFree;
	break;
      case 1:
	entries[i].offset = offset;
	entries[i].gen = gen;
	entries[i].type = xrefEntryUncompressed;
	break;
      case 2:
	entries[i].offset = offset;
	entries[i].gen = gen;
	entries[i].type = xrefEntryCompressed;
	break;
      default:
805
	return false;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
806 807 808 809
      }
    }
  }

810
  return true;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
811 812 813
}

// Attempt to construct an xref table for a damaged file.
814 815 816
// Warning: Reconstruction of files where last XRef section is a stream
//          or where some objects are defined inside an object stream is not yet supported.
//          Existing data in XRef::entries may get corrupted if applied anyway.
817
bool XRef::constructXRef(bool *wasReconstructed, bool needCatalogDict) {
Kristian Høgsberg's avatar
Kristian Høgsberg committed
818
  Parser *parser;
Albert Astals Cid's avatar
Albert Astals Cid committed
819
  Object obj;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
820
  char buf[256];
Adrian Johnson's avatar
Adrian Johnson committed
821
  Goffset pos;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
822 823 824
  int num, gen;
  int streamEndsSize;
  char *p;
825
  bool gotRoot;
826
  char* token = nullptr;
827 828
  bool oneCycle = true;
  int offset = 0;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
829

830
  resize(0); // free entries properly
Kristian Høgsberg's avatar
Kristian Høgsberg committed
831
  gfree(entries);
Hib Eris's avatar
Hib Eris committed
832
  capacity = 0;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
833
  size = 0;
834
  entries = nullptr;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
835

836
  gotRoot = false;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
837 838
  streamEndsLen = streamEndsSize = 0;

839 840 841 842 843
  if (wasReconstructed)
  {
    *wasReconstructed = true;
  }

Kristian Høgsberg's avatar
Kristian Høgsberg committed
844 845 846 847 848 849 850 851
  str->reset();
  while (1) {
    pos = str->getPos();
    if (!str->getLine(buf, 256)) {
      break;
    }
    p = buf;

852 853 854
    // skip whitespace
    while (*p && Lexer::isSpace(*p & 0xff)) ++p;

855 856 857 858 859 860 861 862 863 864 865 866 867 868
    oneCycle = true;
    offset = 0;

    while( ( token = strstr( p, "endobj" ) ) || oneCycle ) {
      oneCycle = false;

      if( token ) {
        oneCycle = true;
        token[0] = '\0'; 
        offset = token - p;
      }

      // got trailer dictionary
      if (!strncmp(p, "trailer", 7)) {
869 870
        parser = new Parser(nullptr,
		 new Lexer(nullptr,
871 872
		   str->makeSubStream(pos + 7, false, 0, Object(objNull))),
		 false);
Albert Astals Cid's avatar
Albert Astals Cid committed
873
        Object newTrailerDict = parser->getObj();
874
        if (newTrailerDict.isDict()) {
Albert Astals Cid's avatar
Albert Astals Cid committed
875
	  obj = newTrailerDict.dictLookupNF("Root");
876
	  if (obj.isRef() && (!gotRoot || !needCatalogDict) && rootNum != obj.getRefNum()) {