Parser.cc 8.97 KB
Newer Older
Kristian Høgsberg's avatar
Kristian Høgsberg committed
1 2 3 4 5 6 7 8
//========================================================================
//
// Parser.cc
//
// Copyright 1996-2003 Glyph & Cog, LLC
//
//========================================================================

9 10 11 12
//========================================================================
//
// Modified under the Poppler project - http://poppler.freedesktop.org
//
13 14 15
// All changes made under the Poppler project to this file are licensed
// under GPL version 2 or later
//
16
// Copyright (C) 2006, 2009, 201, 2010, 2013, 2014, 2017, 2018 Albert Astals Cid <aacid@kde.org>
17
// Copyright (C) 2006 Krzysztof Kowalczyk <kkowalczyk@gmail.com>
18
// Copyright (C) 2009 Ilya Gorenbein <igorenbein@finjan.com>
19
// Copyright (C) 2012 Hib Eris <hib@hiberis.nl>
20
// Copyright (C) 2013 Adrian Johnson <ajohnson@redneon.com>
21
// Copyright (C) 2013 Thomas Freitag <Thomas.Freitag@alfa.de>
22
// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
23 24 25 26 27 28
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
//
//========================================================================

Kristian Høgsberg's avatar
Kristian Høgsberg committed
29 30 31 32 33 34 35 36 37 38
#include <config.h>

#ifdef USE_GCC_PRAGMAS
#pragma implementation
#endif

#include <stddef.h>
#include "Object.h"
#include "Array.h"
#include "Dict.h"
39
#include "Decrypt.h"
Kristian Høgsberg's avatar
Kristian Høgsberg committed
40 41 42 43
#include "Parser.h"
#include "XRef.h"
#include "Error.h"

44 45 46 47 48
// Max number of nested objects.  This is used to catch infinite loops
// in the object structure. And also technically valid files with
// lots of nested arrays that made us consume all the stack
#define recursionLimit 500

49
Parser::Parser(XRef *xrefA, Lexer *lexerA, GBool allowStreamsA) {
Kristian Høgsberg's avatar
Kristian Høgsberg committed
50 51 52
  xref = xrefA;
  lexer = lexerA;
  inlineImg = 0;
53
  allowStreams = allowStreamsA;
Albert Astals Cid's avatar
Albert Astals Cid committed
54 55
  buf1 = lexer->getObj();
  buf2 = lexer->getObj();
Kristian Høgsberg's avatar
Kristian Høgsberg committed
56 57 58 59 60 61
}

Parser::~Parser() {
  delete lexer;
}

Albert Astals Cid's avatar
Albert Astals Cid committed
62
Object Parser::getObj(int recursion)
63
{
64
  return getObj(gFalse, nullptr, cryptRC4, 0, 0, 0, recursion);
65 66
}

Albert Astals Cid's avatar
Albert Astals Cid committed
67
Object Parser::getObj(GBool simpleOnly,
68
           Guchar *fileKey,
69
		       CryptAlgorithm encAlgorithm, int keyLength,
70 71
		       int objNum, int objGen, int recursion,
		       GBool strict) {
Albert Astals Cid's avatar
Albert Astals Cid committed
72
  Object obj;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
73
  Stream *str;
74
  DecryptStream *decrypt;
75 76
  const GooString *s;
  GooString *s2;
77
  int c;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
78 79 80

  // refill buffer after inline image data
  if (inlineImg == 2) {
Albert Astals Cid's avatar
Albert Astals Cid committed
81 82
    buf1 = lexer->getObj();
    buf2 = lexer->getObj();
Kristian Høgsberg's avatar
Kristian Høgsberg committed
83 84 85
    inlineImg = 0;
  }

86
  if (unlikely(recursion >= recursionLimit)) {
87
    return Object(objError);
88 89
  }

Kristian Høgsberg's avatar
Kristian Høgsberg committed
90
  // array
91
  if (!simpleOnly && buf1.isCmd("[")) {
Kristian Høgsberg's avatar
Kristian Høgsberg committed
92
    shift();
Albert Astals Cid's avatar
Albert Astals Cid committed
93
    obj = Object(new Array(xref));
94
    while (!buf1.isCmd("]") && !buf1.isEOF() && recursion + 1 < recursionLimit) {
Albert Astals Cid's avatar
Albert Astals Cid committed
95 96 97
      Object obj2 = getObj(gFalse, fileKey, encAlgorithm, keyLength, objNum, objGen, recursion + 1);
      obj.arrayAdd(std::move(obj2));
    }
98
    if (recursion + 1 >= recursionLimit && strict) goto err;
99
    if (buf1.isEOF()) {
100
      error(errSyntaxError, getPos(), "End of file inside array");
101 102
      if (strict) goto err;
    }
Kristian Høgsberg's avatar
Kristian Høgsberg committed
103 104 105
    shift();

  // dictionary or stream
106
  } else if (!simpleOnly && buf1.isCmd("<<")) {
107
    shift(objNum);
Albert Astals Cid's avatar
Albert Astals Cid committed
108
    obj = Object(new Dict(xref));
Kristian Høgsberg's avatar
Kristian Høgsberg committed
109 110
    while (!buf1.isCmd(">>") && !buf1.isEOF()) {
      if (!buf1.isName()) {
111
	error(errSyntaxError, getPos(), "Dictionary key must be a name object");
112
	if (strict) goto err;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
113 114
	shift();
      } else {
Albert Astals Cid's avatar
Albert Astals Cid committed
115
	// buf1 might go away in shift(), so construct the key
116
	char *key = copyString(buf1.getName());
Kristian Høgsberg's avatar
Kristian Høgsberg committed
117 118
	shift();
	if (buf1.isEOF() || buf1.isError()) {
119
	  gfree(key);
120
	  if (strict && buf1.isError()) goto err;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
121 122
	  break;
	}
Albert Astals Cid's avatar
Albert Astals Cid committed
123
	Object obj2 = getObj(gFalse, fileKey, encAlgorithm, keyLength, objNum, objGen, recursion + 1);
124
	if (unlikely(obj2.isError() && recursion + 1 >= recursionLimit)) {
125
	  gfree(key);
126 127
	  break;
	}
Albert Astals Cid's avatar
Albert Astals Cid committed
128
	obj.dictAdd(key, std::move(obj2));
Kristian Høgsberg's avatar
Kristian Høgsberg committed
129 130
      }
    }
131
    if (buf1.isEOF()) {
132
      error(errSyntaxError, getPos(), "End of file inside dictionary");
133 134
      if (strict) goto err;
    }
135 136
    // stream objects are not allowed inside content streams or
    // object streams
137
    if (buf2.isCmd("stream")) {
Albert Astals Cid's avatar
Albert Astals Cid committed
138
      if (allowStreams && (str = makeStream(std::move(obj), fileKey, encAlgorithm, keyLength,
139 140
                                            objNum, objGen, recursion + 1,
                                            strict))) {
Albert Astals Cid's avatar
Albert Astals Cid committed
141
        return Object(str);
Kristian Høgsberg's avatar
Kristian Høgsberg committed
142
      } else {
Albert Astals Cid's avatar
Albert Astals Cid committed
143
        return Object(objError);
Kristian Høgsberg's avatar
Kristian Høgsberg committed
144 145 146 147 148 149 150
      }
    } else {
      shift();
    }

  // indirect reference or integer
  } else if (buf1.isInt()) {
Albert Astals Cid's avatar
Albert Astals Cid committed
151
    const int num = buf1.getInt();
Kristian Høgsberg's avatar
Kristian Høgsberg committed
152 153
    shift();
    if (buf1.isInt() && buf2.isCmd("R")) {
Albert Astals Cid's avatar
Albert Astals Cid committed
154
      const int gen = buf1.getInt();
Kristian Høgsberg's avatar
Kristian Høgsberg committed
155 156
      shift();
      shift();
Albert Astals Cid's avatar
Albert Astals Cid committed
157
      return Object(num, gen);
Kristian Høgsberg's avatar
Kristian Høgsberg committed
158
    } else {
Albert Astals Cid's avatar
Albert Astals Cid committed
159
      return Object(num);
Kristian Høgsberg's avatar
Kristian Høgsberg committed
160 161 162 163
    }

  // string
  } else if (buf1.isString() && fileKey) {
164 165
    s = buf1.getString();
    s2 = new GooString();
Albert Astals Cid's avatar
Albert Astals Cid committed
166
    decrypt = new DecryptStream(new MemStream(s->getCString(), 0, s->getLength(), Object(objNull)),
167 168 169 170 171
				fileKey, encAlgorithm, keyLength,
				objNum, objGen);
    decrypt->reset();
    while ((c = decrypt->getChar()) != EOF) {
      s2->append((char)c);
Kristian Høgsberg's avatar
Kristian Høgsberg committed
172 173
    }
    delete decrypt;
Albert Astals Cid's avatar
Albert Astals Cid committed
174
    obj = Object(s2);
Kristian Høgsberg's avatar
Kristian Høgsberg committed
175 176 177 178
    shift();

  // simple object
  } else {
Albert Astals Cid's avatar
Albert Astals Cid committed
179 180 181
    // avoid re-allocating memory for complex objects like strings by
    // shallow copy of <buf1> to <obj> and nulling <buf1> so that
    // subsequent buf1.free() won't free this memory
Albert Astals Cid's avatar
Albert Astals Cid committed
182
    obj = std::move(buf1);
Kristian Høgsberg's avatar
Kristian Høgsberg committed
183 184 185 186
    shift();
  }

  return obj;
187 188

err:
Albert Astals Cid's avatar
Albert Astals Cid committed
189
  return Object(objError);
Kristian Høgsberg's avatar
Kristian Høgsberg committed
190 191
}

Albert Astals Cid's avatar
Albert Astals Cid committed
192
Stream *Parser::makeStream(Object &&dict, Guchar *fileKey,
193
			   CryptAlgorithm encAlgorithm, int keyLength,
194 195
			   int objNum, int objGen, int recursion,
                           GBool strict) {
196
  BaseStream *baseStr;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
197
  Stream *str;
Adrian Johnson's avatar
Adrian Johnson committed
198 199
  Goffset length;
  Goffset pos, endPos;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
200 201 202

  // get stream start position
  lexer->skipToNextLine();
203
  if (!(str = lexer->getStream())) {
204
    return nullptr;
205 206
  }
  pos = str->getPos();
Kristian Høgsberg's avatar
Kristian Høgsberg committed
207 208

  // get length
Albert Astals Cid's avatar
Albert Astals Cid committed
209
  Object obj = dict.dictLookup("Length", recursion);
Kristian Høgsberg's avatar
Kristian Høgsberg committed
210
  if (obj.isInt()) {
Adrian Johnson's avatar
Adrian Johnson committed
211 212 213
    length = obj.getInt();
  } else if (obj.isInt64()) {
    length = obj.getInt64();
Kristian Høgsberg's avatar
Kristian Høgsberg committed
214
  } else {
215
    error(errSyntaxError, getPos(), "Bad 'Length' attribute in stream");
216
    if (strict) return nullptr;
217
    length = 0;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
218 219 220 221 222 223 224 225 226 227
  }

  // check for length in damaged file
  if (xref && xref->getStreamEnd(pos, &endPos)) {
    length = endPos - pos;
  }

  // in badly damaged PDF files, we can run off the end of the input
  // stream immediately after the "stream" token
  if (!lexer->getStream()) {
228
    return nullptr;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
229
  }
230
  baseStr = lexer->getStream()->getBaseStream();
Kristian Høgsberg's avatar
Kristian Høgsberg committed
231 232

  // skip over stream data
Albert Astals Cid's avatar
Albert Astals Cid committed
233 234 235 236 237
  if (Lexer::LOOK_VALUE_NOT_CACHED != lexer->lookCharLastValueCached) {
      // take into account the fact that we've cached one value
      pos = pos - 1;
      lexer->lookCharLastValueCached = Lexer::LOOK_VALUE_NOT_CACHED;
  }
238 239 240
  if (unlikely(length < 0)) {
      return nullptr;
  }
241 242 243
  if (unlikely(pos > LONG_LONG_MAX - length)) {
      return nullptr;
  }
Kristian Høgsberg's avatar
Kristian Høgsberg committed
244 245 246 247
  lexer->setPos(pos + length);

  // refill token buffers and check for 'endstream'
  shift();  // kill '>>'
248
  shift("endstream", objNum);  // kill 'stream'
Kristian Høgsberg's avatar
Kristian Høgsberg committed
249 250 251
  if (buf1.isCmd("endstream")) {
    shift();
  } else {
252
    error(errSyntaxError, getPos(), "Missing 'endstream' or incorrect stream length");
253
    if (strict) return nullptr;
254
    if (xref && lexer->getStream()) {
255 256
      // shift until we find the proper endstream or we change to another object or reach eof
      length = lexer->getPos() - pos;
257
      if (buf1.isCmd("endstream")) {
Albert Astals Cid's avatar
Albert Astals Cid committed
258
        dict.dictSet("Length", Object(length));
259
      }
260 261 262 263 264 265
    } else {
      // When building the xref we can't use it so use this
      // kludge for broken PDF files: just add 5k to the length, and
      // hope its enough
      length += 5000;
    }
Kristian Høgsberg's avatar
Kristian Høgsberg committed
266 267
  }

268
  // make base stream
Albert Astals Cid's avatar
Albert Astals Cid committed
269
  str = baseStr->makeSubStream(pos, gTrue, length, std::move(dict));
270

271 272 273 274 275 276
  // handle decryption
  if (fileKey) {
    str = new DecryptStream(str, fileKey, encAlgorithm, keyLength,
			    objNum, objGen);
  }

277
  // get filters
278
  str = str->addFilters(str->getDict(), recursion);
279

Kristian Høgsberg's avatar
Kristian Høgsberg committed
280 281 282
  return str;
}

283
void Parser::shift(int objNum) {
Kristian Høgsberg's avatar
Kristian Høgsberg committed
284 285 286 287 288 289 290 291 292 293 294 295
  if (inlineImg > 0) {
    if (inlineImg < 2) {
      ++inlineImg;
    } else {
      // in a damaged content stream, if 'ID' shows up in the middle
      // of a dictionary, we need to reset
      inlineImg = 0;
    }
  } else if (buf2.isCmd("ID")) {
    lexer->skipChar();		// skip char after 'ID' command
    inlineImg = 1;
  }
Albert Astals Cid's avatar
Albert Astals Cid committed
296
  buf1 = std::move(buf2);
Kristian Høgsberg's avatar
Kristian Høgsberg committed
297
  if (inlineImg > 0)		// don't buffer inline image data
Albert Astals Cid's avatar
Albert Astals Cid committed
298 299 300 301
    buf2.setToNull();
  else {
    buf2 = lexer->getObj(objNum);
  }
Kristian Høgsberg's avatar
Kristian Høgsberg committed
302
}
Thomas Freitag's avatar
Thomas Freitag committed
303

304
void Parser::shift(const char *cmdA, int objNum) {
Thomas Freitag's avatar
Thomas Freitag committed
305 306 307 308 309 310 311 312 313 314 315 316
  if (inlineImg > 0) {
    if (inlineImg < 2) {
      ++inlineImg;
    } else {
      // in a damaged content stream, if 'ID' shows up in the middle
      // of a dictionary, we need to reset
      inlineImg = 0;
    }
  } else if (buf2.isCmd("ID")) {
    lexer->skipChar();		// skip char after 'ID' command
    inlineImg = 1;
  }
Albert Astals Cid's avatar
Albert Astals Cid committed
317
  buf1 = std::move(buf2);
Thomas Freitag's avatar
Thomas Freitag committed
318
  if (inlineImg > 0) {
Albert Astals Cid's avatar
Albert Astals Cid committed
319
    buf2.setToNull();
Thomas Freitag's avatar
Thomas Freitag committed
320
  } else if (buf1.isCmd(cmdA)) {
Albert Astals Cid's avatar
Albert Astals Cid committed
321
    buf2 = lexer->getObj(objNum);
Thomas Freitag's avatar
Thomas Freitag committed
322
  } else {
Albert Astals Cid's avatar
Albert Astals Cid committed
323
    buf2 = lexer->getObj(cmdA, objNum);
Thomas Freitag's avatar
Thomas Freitag committed
324 325
  }
}