HtmlOutputDev.cc 59.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
//========================================================================
//
// HtmlOutputDev.cc
//
// Copyright 1997-2002 Glyph & Cog, LLC
//
// Changed 1999-2000 by G.Ovtcharov
//
// Changed 2002 by Mikhail Kruk
//
//========================================================================

13
14
15
16
//========================================================================
//
// Modified under the Poppler project - http://poppler.freedesktop.org
//
17
18
19
// All changes made under the Poppler project to this file are licensed
// under GPL version 2 or later
//
20
// Copyright (C) 2005-2013, 2016-2021 Albert Astals Cid <aacid@kde.org>
21
22
// Copyright (C) 2008 Kjartan Maraas <kmaraas@gnome.org>
// Copyright (C) 2008 Boris Toloknov <tlknv@yandex.ru>
23
// Copyright (C) 2008 Haruyuki Kawabe <Haruyuki.Kawabe@unisys.co.jp>
Tomas Are Haavet's avatar
Tomas Are Haavet committed
24
// Copyright (C) 2008 Tomas Are Haavet <tomasare@gmail.com>
25
// Copyright (C) 2009 Warren Toomey <wkt@tuhs.org>
Albert Astals Cid's avatar
Albert Astals Cid committed
26
// Copyright (C) 2009, 2011 Carlos Garcia Campos <carlosgc@gnome.org>
27
// Copyright (C) 2009 Reece Dunn <msclrhd@gmail.com>
28
// Copyright (C) 2010, 2012, 2013 Adrian Johnson <ajohnson@redneon.com>
Hib Eris's avatar
Hib Eris committed
29
// Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
Albert Astals Cid's avatar
Albert Astals Cid committed
30
// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac@cdacmumbai.in) and Onkar Potdar (onkar@cdacmumbai.in)
31
32
// Copyright (C) 2011 Joshua Richardson <jric@chegg.com>
// Copyright (C) 2011 Stephen Reichling <sreichling@chegg.com>
33
// Copyright (C) 2011, 2012 Igor Slepchin <igor.slepchin@gmail.com>
34
// Copyright (C) 2012 Ihar Filipau <thephilips@gmail.com>
35
// Copyright (C) 2012 Gerald Schmidt <solahcin@gmail.com>
Albert Astals Cid's avatar
Albert Astals Cid committed
36
// Copyright (C) 2012 Pino Toscano <pino@kde.org>
Thomas Freitag's avatar
Thomas Freitag committed
37
// Copyright (C) 2013 Thomas Freitag <Thomas.Freitag@alfa.de>
Julien Nabet's avatar
Julien Nabet committed
38
// Copyright (C) 2013 Julien Nabet <serval2412@yahoo.fr>
Albert Astals Cid's avatar
Albert Astals Cid committed
39
// Copyright (C) 2013 Johannes Brandstätter <jbrandstaetter@gmail.com>
Fabio D'Urso's avatar
Fabio D'Urso committed
40
// Copyright (C) 2014 Fabio D'Urso <fabiodurso@hotmail.it>
Albert Astals Cid's avatar
Albert Astals Cid committed
41
// Copyright (C) 2016 Vincent Le Garrec <legarrec.vincent@gmail.com>
42
// Copyright (C) 2017 Caolán McNamara <caolanm@redhat.com>
43
// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
44
// Copyright (C) 2018 Thibaut Brard <thibaut.brard@gmail.com>
Albert Astals Cid's avatar
Albert Astals Cid committed
45
// Copyright (C) 2018-2020 Adam Reichold <adam.reichold@t-online.de>
Albert Astals Cid's avatar
Albert Astals Cid committed
46
// Copyright (C) 2019, 2020 Oliver Sander <oliver.sander@tu-dresden.de>
Albert Astals Cid's avatar
Albert Astals Cid committed
47
// Copyright (C) 2020 Eddie Kohler <ekohler@gmail.com>
48
49
50
51
52
53
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
//
//========================================================================

54
#include "config.h"
55
56
57
58
59
60
#include <cstdio>
#include <cstdlib>
#include <cstdarg>
#include <cstddef>
#include <cctype>
#include <cmath>
61
#include <iostream>
62
#include "goo/GooString.h"
Greg Knight's avatar
Greg Knight committed
63
#include "goo/gbasename.h"
64
65
#include "goo/gbase64.h"
#include "goo/gbasename.h"
66
67
68
69
#include "UnicodeMap.h"
#include "goo/gmem.h"
#include "Error.h"
#include "GfxState.h"
70
#include "Page.h"
71
#include "Annot.h"
72
#include "PNGWriter.h"
73
74
75
#include "GlobalParams.h"
#include "HtmlOutputDev.h"
#include "HtmlFonts.h"
76
#include "HtmlUtils.h"
77
#include "InMemoryFile.h"
78
79
#include "Outline.h"
#include "PDFDoc.h"
80

81
#ifdef ENABLE_LIBPNG
Albert Astals Cid's avatar
Albert Astals Cid committed
82
#    include <png.h>
83
84
#endif

85
#define DEBUG __FILE__ << ": " << __LINE__ << ": DEBUG: "
86

87
88
89
class HtmlImage
{
public:
Albert Astals Cid's avatar
Albert Astals Cid committed
90
91
92
93
94
95
96
97
    HtmlImage(GooString *_fName, GfxState *state) : fName(_fName)
    {
        state->transform(0, 0, &xMin, &yMax);
        state->transform(1, 1, &xMax, &yMin);
    }
    ~HtmlImage() { delete fName; }
    HtmlImage(const HtmlImage &) = delete;
    HtmlImage &operator=(const HtmlImage &) = delete;
98

Albert Astals Cid's avatar
Albert Astals Cid committed
99
100
101
    double xMin, xMax; // image x coordinates
    double yMin, yMax; // image y coordinates
    GooString *fName; // image file name
102
103
};

104
// returns true if x is closer to y than x is to z
Albert Astals Cid's avatar
Albert Astals Cid committed
105
106
107
108
static inline bool IS_CLOSER(float x, float y, float z)
{
    return std::fabs((x) - (y)) < std::fabs((x) - (z));
}
109

110
111
extern bool complexMode;
extern bool singleHtml;
112
extern bool dataUrls;
113
114
115
116
117
118
119
120
121
extern bool ignore;
extern bool printCommands;
extern bool printHtml;
extern bool noframes;
extern bool stout;
extern bool xml;
extern bool noRoundedCoordinates;
extern bool showHidden;
extern bool noMerge;
122

123
124
extern double wordBreakThreshold;

125
static bool debug = false;
126
static GooString *gstr_buff0 = nullptr; // a workspace in which I format strings
127

128
#if 0
129
130
static GooString* Dirname(GooString* str){
  
131
  char *p=str->c_str();
132
133
134
135
136
  int len=str->getLength();
  for (int i=len-1;i>=0;i--)
    if (*(p+i)==SLASH) 
      return new GooString(p,i+1);
  return new GooString();
Albert Astals Cid's avatar
Albert Astals Cid committed
137
}
138
#endif
139

Albert Astals Cid's avatar
Albert Astals Cid committed
140
141
142
static const char *print_matrix(const double *mat)
{
    delete gstr_buff0;
143

Albert Astals Cid's avatar
Albert Astals Cid committed
144
145
    gstr_buff0 = GooString::format("[{0:g} {1:g} {2:g} {3:g} {4:g} {5:g}]", *mat, mat[1], mat[2], mat[3], mat[4], mat[5]);
    return gstr_buff0->c_str();
146
147
}

Albert Astals Cid's avatar
Albert Astals Cid committed
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
static const char *print_uni_str(const Unicode *u, const unsigned uLen)
{
    GooString *gstr_buff1 = nullptr;

    delete gstr_buff0;

    if (!uLen)
        return "";
    gstr_buff0 = GooString::format("{0:c}", (*u < 0x7F ? *u & 0xFF : '?'));
    for (unsigned i = 1; i < uLen; i++) {
        if (u[i] < 0x7F) {
            gstr_buff1 = gstr_buff0->append(u[i] < 0x7F ? static_cast<char>(u[i]) & 0xFF : '?');
            delete gstr_buff0;
            gstr_buff0 = gstr_buff1;
        }
163
164
    }

Albert Astals Cid's avatar
Albert Astals Cid committed
165
    return gstr_buff0->c_str();
166
167
}

168
169
170
171
//------------------------------------------------------------------------
// HtmlString
//------------------------------------------------------------------------

Albert Astals Cid's avatar
Albert Astals Cid committed
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
HtmlString::HtmlString(GfxState *state, double fontSize, HtmlFontAccu *_fonts) : fonts(_fonts)
{
    GfxFont *font;
    double x, y;

    state->transform(state->getCurX(), state->getCurY(), &x, &y);
    if ((font = state->getFont())) {
        double ascent = font->getAscent();
        double descent = font->getDescent();
        if (ascent > 1.05) {
            // printf( "ascent=%.15g is too high, descent=%.15g\n", ascent, descent );
            ascent = 1.05;
        }
        if (descent < -0.4) {
            // printf( "descent %.15g is too low, ascent=%.15g\n", descent, ascent );
            descent = -0.4;
        }
        yMin = y - ascent * fontSize;
        yMax = y - descent * fontSize;
        GfxRGB rgb;
        state->getFillRGB(&rgb);
193
        HtmlFont hfont = HtmlFont(font, static_cast<int>(fontSize), rgb, state->getFillOpacity());
Albert Astals Cid's avatar
Albert Astals Cid committed
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
        if (isMatRotOrSkew(state->getTextMat())) {
            double normalizedMatrix[4];
            memcpy(normalizedMatrix, state->getTextMat(), sizeof(normalizedMatrix));
            // browser rotates the opposite way
            // so flip the sign of the angle -> sin() components change sign
            if (debug)
                std::cerr << DEBUG << "before transform: " << print_matrix(normalizedMatrix) << std::endl;
            normalizedMatrix[1] *= -1;
            normalizedMatrix[2] *= -1;
            if (debug)
                std::cerr << DEBUG << "after reflecting angle: " << print_matrix(normalizedMatrix) << std::endl;
            normalizeRotMat(normalizedMatrix);
            if (debug)
                std::cerr << DEBUG << "after norm: " << print_matrix(normalizedMatrix) << std::endl;
            hfont.setRotMat(normalizedMatrix);
        }
        fontpos = fonts->AddFont(hfont);
    } else {
        // this means that the PDF file draws text without a current font,
        // which should never happen
        yMin = y - 0.95 * fontSize;
        yMax = y + 0.35 * fontSize;
        fontpos = 0;
    }
    if (yMin == yMax) {
        // this is a sanity check for a case that shouldn't happen -- but
        // if it does happen, we want to avoid dividing by zero later
        yMin = y;
        yMax = y + 1;
    }
    col = 0;
    text = nullptr;
    xRight = nullptr;
    link = nullptr;
    len = size = 0;
    yxNext = nullptr;
    xyNext = nullptr;
    htext = new GooString();
    dir = textDirUnknown;
233
234
}

Albert Astals Cid's avatar
Albert Astals Cid committed
235
236
237
238
239
HtmlString::~HtmlString()
{
    gfree(text);
    delete htext;
    gfree(xRight);
240
241
}

Albert Astals Cid's avatar
Albert Astals Cid committed
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
void HtmlString::addChar(GfxState *state, double x, double y, double dx, double dy, Unicode u)
{
    if (dir == textDirUnknown) {
        // dir = UnicodeMap::getDirection(u);
        dir = textDirLeftRight;
    }

    if (len == size) {
        size += 16;
        text = (Unicode *)grealloc(text, size * sizeof(Unicode));
        xRight = (double *)grealloc(xRight, size * sizeof(double));
    }
    text[len] = u;
    if (len == 0) {
        xMin = x;
    }
    xMax = xRight[len] = x + dx;
    // printf("added char: %f %f xright = %f\n", x, dx, x+dx);
    ++len;
261
262
263
264
}

void HtmlString::endString()
{
Albert Astals Cid's avatar
Albert Astals Cid committed
265
266
267
268
269
270
271
    if (dir == textDirRightLeft && len > 1) {
        // printf("will reverse!\n");
        for (int i = 0; i < len / 2; i++) {
            Unicode ch = text[i];
            text[i] = text[len - i - 1];
            text[len - i - 1] = ch;
        }
272
273
274
275
276
277
278
    }
}

//------------------------------------------------------------------------
// HtmlPage
//------------------------------------------------------------------------

Albert Astals Cid's avatar
Albert Astals Cid committed
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
HtmlPage::HtmlPage(bool rawOrderA)
{
    rawOrder = rawOrderA;
    curStr = nullptr;
    yxStrings = nullptr;
    xyStrings = nullptr;
    yxCur1 = yxCur2 = nullptr;
    fonts = new HtmlFontAccu();
    links = new HtmlLinks();
    imgList = new std::vector<HtmlImage *>();
    pageWidth = 0;
    pageHeight = 0;
    fontsPageMarker = 0;
    DocName = nullptr;
    firstPage = -1;
294
295
}

Albert Astals Cid's avatar
Albert Astals Cid committed
296
297
298
299
300
301
302
303
304
305
HtmlPage::~HtmlPage()
{
    clear();
    delete DocName;
    delete fonts;
    delete links;
    for (auto entry : *imgList) {
        delete entry;
    }
    delete imgList;
306
307
}

Albert Astals Cid's avatar
Albert Astals Cid committed
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
void HtmlPage::updateFont(GfxState *state)
{
    GfxFont *font;
    const char *name;
    int code;
    double w;

    // adjust the font size
    fontSize = state->getTransformedFontSize();
    if ((font = state->getFont()) && font->getType() == fontType3) {
        // This is a hack which makes it possible to deal with some Type 3
        // fonts.  The problem is that it's impossible to know what the
        // base coordinate system used in the font is without actually
        // rendering the font.  This code tries to guess by looking at the
        // width of the character 'm' (which breaks if the font is a
        // subset that doesn't contain 'm').
        for (code = 0; code < 256; ++code) {
            if ((name = ((Gfx8BitFont *)font)->getCharName(code)) && name[0] == 'm' && name[1] == '\0') {
                break;
            }
        }
        if (code < 256) {
            w = ((Gfx8BitFont *)font)->getWidth(code);
            if (w != 0) {
                // 600 is a generic average 'm' width -- yes, this is a hack
                fontSize *= w / 0.6;
            }
        }
        const double *fm = font->getFontMatrix();
        if (fm[0] != 0) {
            fontSize *= fabs(fm[3] / fm[0]);
        }
340
341
342
    }
}

Albert Astals Cid's avatar
Albert Astals Cid committed
343
344
345
void HtmlPage::beginString(GfxState *state, const GooString *s)
{
    curStr = new HtmlString(state, fontSize, fonts);
346
347
}

Albert Astals Cid's avatar
Albert Astals Cid committed
348
349
350
351
352
void HtmlPage::conv()
{
    for (HtmlString *tmp = yxStrings; tmp; tmp = tmp->yxNext) {
        delete tmp->htext;
        tmp->htext = HtmlFont::HtmlFilter(tmp->text, tmp->len);
353

Albert Astals Cid's avatar
Albert Astals Cid committed
354
355
356
357
358
        int linkIndex = 0;
        if (links->inLink(tmp->xMin, tmp->yMin, tmp->xMax, tmp->yMax, linkIndex)) {
            tmp->link = links->getLink(linkIndex);
        }
    }
359
360
}

Albert Astals Cid's avatar
Albert Astals Cid committed
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
void HtmlPage::addChar(GfxState *state, double x, double y, double dx, double dy, double ox, double oy, const Unicode *u, int uLen)
{
    double x1, y1, w1, h1, dx2, dy2;
    int n, i;
    state->transform(x, y, &x1, &y1);
    n = curStr->len;

    // check that new character is in the same direction as current string
    // and is not too far away from it before adding
    // if ((UnicodeMap::getDirection(u[0]) != curStr->dir) ||
    // XXX
    if (debug) {
        const double *text_mat = state->getTextMat();
        // rotation is (cos q, sin q, -sin q, cos q, 0, 0)
        // sin q is zero iff there is no rotation, or 180 deg. rotation;
        // for 180 rotation, cos q will be negative
        if (text_mat[0] < 0 || !is_within(text_mat[1], .1, 0)) {
            std::cerr << DEBUG << "rotation matrix for \"" << print_uni_str(u, uLen) << '"' << std::endl;
            std::cerr << "text " << print_matrix(state->getTextMat());
        }
    }
    if (n > 0 && // don't start a new string, unless there is already a string
                 // TODO: the following line assumes that text is flowing left to
                 // right, which will not necessarily be the case, e.g. if rotated;
                 // It assesses whether or not two characters are close enough to
                 // be part of the same string
        fabs(x1 - curStr->xRight[n - 1]) > wordBreakThreshold * (curStr->yMax - curStr->yMin) &&
        // rotation is (cos q, sin q, -sin q, cos q, 0, 0)
        // sin q is zero iff there is no rotation, or 180 deg. rotation;
        // for 180 rotation, cos q will be negative
        !rot_matrices_equal(curStr->getFont().getRotMat(), state->getTextMat())) {
        endString();
        beginString(state, nullptr);
    }
    state->textTransformDelta(state->getCharSpace() * state->getHorizScaling(), 0, &dx2, &dy2);
    dx -= dx2;
    dy -= dy2;
    state->transformDelta(dx, dy, &w1, &h1);
    if (uLen != 0) {
        w1 /= uLen;
        h1 /= uLen;
    }
    for (i = 0; i < uLen; ++i) {
        curStr->addChar(state, x1 + i * w1, y1 + i * h1, w1, h1, u[i]);
405
    }
406
407
}

Albert Astals Cid's avatar
Albert Astals Cid committed
408
409
410
411
412
413
414
415
416
417
418
419
void HtmlPage::endString()
{
    HtmlString *p1, *p2;
    double h, y1, y2;

    // throw away zero-length strings -- they don't have valid xMin/xMax
    // values, and they're useless anyway
    if (curStr->len == 0) {
        delete curStr;
        curStr = nullptr;
        return;
    }
420

Albert Astals Cid's avatar
Albert Astals Cid committed
421
    curStr->endString();
422
423
424
425
426
427
428
429
430

#if 0 //~tmp
  if (curStr->yMax - curStr->yMin > 20) {
    delete curStr;
    curStr = NULL;
    return;
  }
#endif

Albert Astals Cid's avatar
Albert Astals Cid committed
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
    // insert string in y-major list
    h = curStr->yMax - curStr->yMin;
    y1 = curStr->yMin + 0.5 * h;
    y2 = curStr->yMin + 0.8 * h;
    if (rawOrder) {
        p1 = yxCur1;
        p2 = nullptr;
    } else if ((!yxCur1 || (y1 >= yxCur1->yMin && (y2 >= yxCur1->yMax || curStr->xMax >= yxCur1->xMin))) && (!yxCur2 || (y1 < yxCur2->yMin || (y2 < yxCur2->yMax && curStr->xMax < yxCur2->xMin)))) {
        p1 = yxCur1;
        p2 = yxCur2;
    } else {
        for (p1 = nullptr, p2 = yxStrings; p2; p1 = p2, p2 = p2->yxNext) {
            if (y1 < p2->yMin || (y2 < p2->yMax && curStr->xMax < p2->xMin))
                break;
        }
        yxCur2 = p2;
    }
    yxCur1 = curStr;
    if (p1)
        p1->yxNext = curStr;
    else
        yxStrings = curStr;
    curStr->yxNext = p2;
    curStr = nullptr;
455
456
}

Albert Astals Cid's avatar
Albert Astals Cid committed
457
static const char *strrstr(const char *s, const char *ss)
Boris Toloknov's avatar
Boris Toloknov committed
458
{
Albert Astals Cid's avatar
Albert Astals Cid committed
459
460
461
462
463
    const char *p = strstr(s, ss);
    for (const char *pp = p; pp != nullptr; pp = strstr(p + 1, ss)) {
        p = pp;
    }
    return p;
Boris Toloknov's avatar
Boris Toloknov committed
464
465
}

Albert Astals Cid's avatar
Albert Astals Cid committed
466
static void CloseTags(GooString *htext, bool &finish_a, bool &finish_italic, bool &finish_bold)
Boris Toloknov's avatar
Boris Toloknov committed
467
{
Albert Astals Cid's avatar
Albert Astals Cid committed
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
    const char *last_italic = finish_italic && (finish_bold || finish_a) ? strrstr(htext->c_str(), "<i>") : nullptr;
    const char *last_bold = finish_bold && (finish_italic || finish_a) ? strrstr(htext->c_str(), "<b>") : nullptr;
    const char *last_a = finish_a && (finish_italic || finish_bold) ? strrstr(htext->c_str(), "<a ") : nullptr;
    if (finish_a && (finish_italic || finish_bold) && last_a > (last_italic > last_bold ? last_italic : last_bold)) {
        htext->append("</a>", 4);
        finish_a = false;
    }
    if (finish_italic && finish_bold && last_italic > last_bold) {
        htext->append("</i>", 4);
        finish_italic = false;
    }
    if (finish_bold)
        htext->append("</b>", 4);
    if (finish_italic)
        htext->append("</i>", 4);
    if (finish_a)
        htext->append("</a>");
Boris Toloknov's avatar
Boris Toloknov committed
485
486
}

487
488
489
// Strings are lines of text;
// This function aims to combine strings into lines and paragraphs if !noMerge
// It may also strip out duplicate strings (if they are on top of each other); sometimes they are to create a font effect
Albert Astals Cid's avatar
Albert Astals Cid committed
490
491
492
493
494
495
496
void HtmlPage::coalesce()
{
    HtmlString *str1, *str2;
    double space, horSpace, vertSpace, vertOverlap;
    bool addSpace, addLineBreak;
    int n, i;
    double curX, curY;
497
498
499
500
501
502
503
504
505
506
507
508
509

#if 0 //~ for debugging
  for (str1 = yxStrings; str1; str1 = str1->yxNext) {
    printf("x=%f..%f  y=%f..%f  size=%2d '",
	   str1->xMin, str1->xMax, str1->yMin, str1->yMax,
	   (int)(str1->yMax - str1->yMin));
    for (i = 0; i < str1->len; ++i) {
      fputc(str1->text[i] & 0xff, stdout);
    }
    printf("'\n");
  }
  printf("\n------------------------------------------------------------\n\n");
#endif
Albert Astals Cid's avatar
Albert Astals Cid committed
510
    str1 = yxStrings;
511

Albert Astals Cid's avatar
Albert Astals Cid committed
512
513
    if (!str1)
        return;
514

Albert Astals Cid's avatar
Albert Astals Cid committed
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
    //----- discard duplicated text (fake boldface, drop shadows)
    if (!complexMode) { /* if not in complex mode get rid of duplicate strings */
        HtmlString *str3;
        bool found;
        while (str1) {
            double size = str1->yMax - str1->yMin;
            double xLimit = str1->xMin + size * 0.2;
            found = false;
            for (str2 = str1, str3 = str1->yxNext; str3 && str3->xMin < xLimit; str2 = str3, str3 = str2->yxNext) {
                if (str3->len == str1->len && !memcmp(str3->text, str1->text, str1->len * sizeof(Unicode)) && fabs(str3->yMin - str1->yMin) < size * 0.2 && fabs(str3->yMax - str1->yMax) < size * 0.2
                    && fabs(str3->xMax - str1->xMax) < size * 0.2) {
                    found = true;
                    // printf("found duplicate!\n");
                    break;
                }
            }
            if (found) {
                str2->xyNext = str3->xyNext;
                str2->yxNext = str3->yxNext;
                delete str3;
            } else {
                str1 = str1->yxNext;
            }
        }
    } /*- !complexMode */

    str1 = yxStrings;

    const HtmlFont *hfont1 = getFont(str1);
    if (hfont1->isBold())
        str1->htext->insert(0, "<b>", 3);
    if (hfont1->isItalic())
        str1->htext->insert(0, "<i>", 3);
    if (str1->getLink() != nullptr) {
        GooString *ls = str1->getLink()->getLinkStart();
        str1->htext->insert(0, ls);
Boris Toloknov's avatar
Boris Toloknov committed
551
        delete ls;
552
    }
Albert Astals Cid's avatar
Albert Astals Cid committed
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
    curX = str1->xMin;
    curY = str1->yMin;

    while (str1 && (str2 = str1->yxNext)) {
        const HtmlFont *hfont2 = getFont(str2);
        space = str1->yMax - str1->yMin; // the height of the font's bounding box
        horSpace = str2->xMin - str1->xMax;
        // if strings line up on left-hand side AND they are on subsequent lines, we need a line break
        addLineBreak = !noMerge && (fabs(str1->xMin - str2->xMin) < 0.4) && IS_CLOSER(str2->yMax, str1->yMax + space, str1->yMax);
        vertSpace = str2->yMin - str1->yMax;

        // printf("coalesce %d %d %f? ", str1->dir, str2->dir, d);

        if (str2->yMin >= str1->yMin && str2->yMin <= str1->yMax) {
            vertOverlap = str1->yMax - str2->yMin;
        } else if (str2->yMax >= str1->yMin && str2->yMax <= str1->yMax) {
            vertOverlap = str2->yMax - str1->yMin;
        } else {
            vertOverlap = 0;
        }
Boris Toloknov's avatar
Boris Toloknov committed
573

Albert Astals Cid's avatar
Albert Astals Cid committed
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
        // Combine strings if:
        //  They appear to be the same font (complex mode only) && going in the same direction AND at least one of the following:
        //  1.  They appear to be part of the same line of text
        //  2.  They appear to be subsequent lines of a paragraph
        //  We assume (1) or (2) above, respectively, based on:
        //  (1)  strings overlap vertically AND
        //       horizontal space between end of str1 and start of str2 is consistent with a single space or less;
        //       when rawOrder, the strings have to overlap vertically by at least 50%
        //  (2)  Strings flow down the page, but the space between them is not too great, and they are lined up on the left
        if (((((rawOrder && vertOverlap > 0.5 * space) || (!rawOrder && str2->yMin < str1->yMax)) && (horSpace > -0.5 * space && horSpace < space)) || (vertSpace >= 0 && vertSpace < 0.5 * space && addLineBreak))
            && (!complexMode || (hfont1->isEqualIgnoreBold(*hfont2))) && // in complex mode fonts must be the same, in other modes fonts do not metter
            str1->dir == str2->dir // text direction the same
        ) {
            //      printf("yes\n");
            n = str1->len + str2->len;
            if ((addSpace = horSpace > wordBreakThreshold * space)) {
                ++n;
            }
            if (addLineBreak) {
                ++n;
            }

            str1->size = (n + 15) & ~15;
            str1->text = (Unicode *)grealloc(str1->text, str1->size * sizeof(Unicode));
            str1->xRight = (double *)grealloc(str1->xRight, str1->size * sizeof(double));
            if (addSpace) {
                str1->text[str1->len] = 0x20;
                str1->htext->append(xml ? " " : "&#160;");
                str1->xRight[str1->len] = str2->xMin;
                ++str1->len;
            }
            if (addLineBreak) {
                str1->text[str1->len] = '\n';
                str1->htext->append("<br/>");
                str1->xRight[str1->len] = str2->xMin;
                ++str1->len;
                str1->yMin = str2->yMin;
                str1->yMax = str2->yMax;
                str1->xMax = str2->xMax;
                int fontLineSize = hfont1->getLineSize();
                int curLineSize = (int)(vertSpace + space);
                if (curLineSize != fontLineSize) {
                    HtmlFont *newfnt = new HtmlFont(*hfont1);
                    newfnt->setLineSize(curLineSize);
                    str1->fontpos = fonts->AddFont(*newfnt);
                    delete newfnt;
                    hfont1 = getFont(str1);
                    // we have to reget hfont2 because it's location could have
                    // changed on resize
                    hfont2 = getFont(str2);
                }
            }
            for (i = 0; i < str2->len; ++i) {
                str1->text[str1->len] = str2->text[i];
                str1->xRight[str1->len] = str2->xRight[i];
                ++str1->len;
            }

            /* fix <i>, <b> if str1 and str2 differ and handle switch of links */
            HtmlLink *hlink1 = str1->getLink();
            HtmlLink *hlink2 = str2->getLink();
            bool switch_links = !hlink1 || !hlink2 || !hlink1->isEqualDest(*hlink2);
            bool finish_a = switch_links && hlink1 != nullptr;
            bool finish_italic = hfont1->isItalic() && (!hfont2->isItalic() || finish_a);
            bool finish_bold = hfont1->isBold() && (!hfont2->isBold() || finish_a || finish_italic);
            CloseTags(str1->htext, finish_a, finish_italic, finish_bold);
            if (switch_links && hlink2 != nullptr) {
                GooString *ls = hlink2->getLinkStart();
                str1->htext->append(ls);
                delete ls;
            }
            if ((!hfont1->isItalic() || finish_italic) && hfont2->isItalic())
                str1->htext->append("<i>", 3);
            if ((!hfont1->isBold() || finish_bold) && hfont2->isBold())
                str1->htext->append("<b>", 3);

            str1->htext->append(str2->htext);
            // str1 now contains href for link of str2 (if it is defined)
            str1->link = str2->link;
            hfont1 = hfont2;
            if (str2->xMax > str1->xMax) {
                str1->xMax = str2->xMax;
            }
            if (str2->yMax > str1->yMax) {
                str1->yMax = str2->yMax;
            }
            str1->yxNext = str2->yxNext;
            delete str2;
        } else { // keep strings separate
            //      printf("no\n");
            bool finish_a = str1->getLink() != nullptr;
            bool finish_bold = hfont1->isBold();
            bool finish_italic = hfont1->isItalic();
            CloseTags(str1->htext, finish_a, finish_italic, finish_bold);

            str1->xMin = curX;
            str1->yMin = curY;
            str1 = str2;
            curX = str1->xMin;
            curY = str1->yMin;
            hfont1 = hfont2;
            if (hfont1->isBold())
                str1->htext->insert(0, "<b>", 3);
            if (hfont1->isItalic())
                str1->htext->insert(0, "<i>", 3);
            if (str1->getLink() != nullptr) {
                GooString *ls = str1->getLink()->getLinkStart();
                str1->htext->insert(0, ls);
                delete ls;
            }
        }
    }
    str1->xMin = curX;
    str1->yMin = curY;

    bool finish_bold = hfont1->isBold();
    bool finish_italic = hfont1->isItalic();
    bool finish_a = str1->getLink() != nullptr;
    CloseTags(str1->htext, finish_a, finish_italic, finish_bold);
693
694
695
696
697
698

#if 0 //~ for debugging
  for (str1 = yxStrings; str1; str1 = str1->yxNext) {
    printf("x=%3d..%3d  y=%3d..%3d  size=%2d ",
	   (int)str1->xMin, (int)str1->xMax, (int)str1->yMin, (int)str1->yMax,
	   (int)(str1->yMax - str1->yMin));
699
    printf("'%s'\n", str1->htext->c_str());  
700
701
702
703
704
  }
  printf("\n------------------------------------------------------------\n\n");
#endif
}

Albert Astals Cid's avatar
Albert Astals Cid committed
705
706
707
708
void HtmlPage::dumpAsXML(FILE *f, int page)
{
    fprintf(f, "<page number=\"%d\" position=\"absolute\"", page);
    fprintf(f, " top=\"0\" left=\"0\" height=\"%d\" width=\"%d\">\n", pageHeight, pageWidth);
709

Albert Astals Cid's avatar
Albert Astals Cid committed
710
711
712
713
    for (int i = fontsPageMarker; i < fonts->size(); i++) {
        GooString *fontCSStyle = fonts->CSStyle(i);
        fprintf(f, "\t%s\n", fontCSStyle->c_str());
        delete fontCSStyle;
714
    }
Albert Astals Cid's avatar
Albert Astals Cid committed
715
716
717
718
719
720
721
722
723
724
725
726

    for (auto ptr : *imgList) {
        auto img = static_cast<HtmlImage *>(ptr);
        if (!noRoundedCoordinates) {
            fprintf(f, "<image top=\"%d\" left=\"%d\" ", xoutRound(img->yMin), xoutRound(img->xMin));
            fprintf(f, "width=\"%d\" height=\"%d\" ", xoutRound(img->xMax - img->xMin), xoutRound(img->yMax - img->yMin));
        } else {
            fprintf(f, "<image top=\"%f\" left=\"%f\" ", img->yMin, img->xMin);
            fprintf(f, "width=\"%f\" height=\"%f\" ", img->xMax - img->xMin, img->yMax - img->yMin);
        }
        fprintf(f, "src=\"%s\"/>\n", img->fName->c_str());
        delete img;
727
    }
Albert Astals Cid's avatar
Albert Astals Cid committed
728
    imgList->clear();
729

Albert Astals Cid's avatar
Albert Astals Cid committed
730
731
732
733
734
735
736
737
738
739
740
741
742
    for (HtmlString *tmp = yxStrings; tmp; tmp = tmp->yxNext) {
        if (tmp->htext) {
            if (!noRoundedCoordinates) {
                fprintf(f, "<text top=\"%d\" left=\"%d\" ", xoutRound(tmp->yMin), xoutRound(tmp->xMin));
                fprintf(f, "width=\"%d\" height=\"%d\" ", xoutRound(tmp->xMax - tmp->xMin), xoutRound(tmp->yMax - tmp->yMin));
            } else {
                fprintf(f, "<text top=\"%f\" left=\"%f\" ", tmp->yMin, tmp->xMin);
                fprintf(f, "width=\"%f\" height=\"%f\" ", tmp->xMax - tmp->xMin, tmp->yMax - tmp->yMin);
            }
            fprintf(f, "font=\"%d\">", tmp->fontpos);
            fputs(tmp->htext->c_str(), f);
            fputs("</text>\n", f);
        }
743
    }
Albert Astals Cid's avatar
Albert Astals Cid committed
744
    fputs("</page>\n", f);
745
746
}

Ihar Filipau's avatar
Ihar Filipau committed
747
748
static void printCSS(FILE *f)
{
Albert Astals Cid's avatar
Albert Astals Cid committed
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
    // Image flip/flop CSS
    // Source:
    // http://stackoverflow.com/questions/1309055/cross-browser-way-to-flip-html-image-via-javascript-css
    // tested in Chrome, Fx (Linux) and IE9 (W7)
    static const char css[] = "<style type=\"text/css\">"
                              "\n"
                              "<!--"
                              "\n"
                              ".xflip {"
                              "\n"
                              "    -moz-transform: scaleX(-1);"
                              "\n"
                              "    -webkit-transform: scaleX(-1);"
                              "\n"
                              "    -o-transform: scaleX(-1);"
                              "\n"
                              "    transform: scaleX(-1);"
                              "\n"
                              "    filter: fliph;"
                              "\n"
                              "}"
                              "\n"
                              ".yflip {"
                              "\n"
                              "    -moz-transform: scaleY(-1);"
                              "\n"
                              "    -webkit-transform: scaleY(-1);"
                              "\n"
                              "    -o-transform: scaleY(-1);"
                              "\n"
                              "    transform: scaleY(-1);"
                              "\n"
                              "    filter: flipv;"
                              "\n"
                              "}"
                              "\n"
                              ".xyflip {"
                              "\n"
                              "    -moz-transform: scaleX(-1) scaleY(-1);"
                              "\n"
                              "    -webkit-transform: scaleX(-1) scaleY(-1);"
                              "\n"
                              "    -o-transform: scaleX(-1) scaleY(-1);"
                              "\n"
                              "    transform: scaleX(-1) scaleY(-1);"
                              "\n"
                              "    filter: fliph + flipv;"
                              "\n"
                              "}"
                              "\n"
                              "-->"
                              "\n"
                              "</style>"
                              "\n";

    fwrite(css, sizeof(css) - 1, 1, f);
Ihar Filipau's avatar
Ihar Filipau committed
805
806
}

Albert Astals Cid's avatar
Albert Astals Cid committed
807
808
int HtmlPage::dumpComplexHeaders(FILE *const file, FILE *&pageFile, int page)
{
809

Albert Astals Cid's avatar
Albert Astals Cid committed
810
811
812
813
    if (!noframes) {
        const std::string pgNum = std::to_string(page);
        std::string pageFileName(DocName->toStr());
        if (!singleHtml) {
814
815
            pageFileName += '-' + pgNum + ".html";
            pageFile = fopen(pageFileName.c_str(), "w");
Albert Astals Cid's avatar
Albert Astals Cid committed
816
        } else {
817
818
            pageFileName += "-html.html";
            pageFile = fopen(pageFileName.c_str(), "a");
Albert Astals Cid's avatar
Albert Astals Cid committed
819
        }
820

Albert Astals Cid's avatar
Albert Astals Cid committed
821
        if (!pageFile) {
822
            error(errIO, -1, "Couldn't open html file '{0:s}'", pageFileName.c_str());
Albert Astals Cid's avatar
Albert Astals Cid committed
823
824
            return 1;
        }
825

Albert Astals Cid's avatar
Albert Astals Cid committed
826
827
828
829
        if (!singleHtml)
            fprintf(pageFile, "%s\n<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title>Page %d</title>\n\n", DOCTYPE, page);
        else
            fprintf(pageFile, "%s\n<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title>%s</title>\n\n", DOCTYPE, pageFileName.c_str());
830

Albert Astals Cid's avatar
Albert Astals Cid committed
831
832
833
834
835
836
837
838
839
        const std::string htmlEncoding = HtmlOutputDev::mapEncodingToHtml(globalParams->getTextEncodingName());
        if (!singleHtml)
            fprintf(pageFile, "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding.c_str());
        else
            fprintf(pageFile, "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n <br/>\n", htmlEncoding.c_str());
    } else {
        pageFile = file;
        fprintf(pageFile, "<!-- Page %d -->\n", page);
        fprintf(pageFile, "<a name=\"%d\"></a>\n", page);
840
841
    }

Albert Astals Cid's avatar
Albert Astals Cid committed
842
    return 0;
843
844
}

Albert Astals Cid's avatar
Albert Astals Cid committed
845
void HtmlPage::dumpComplex(FILE *file, int page, const std::vector<std::string> &backgroundImages)
846
{
Albert Astals Cid's avatar
Albert Astals Cid committed
847
    FILE *pageFile;
Ihar Filipau's avatar
Ihar Filipau committed
848

Albert Astals Cid's avatar
Albert Astals Cid committed
849
850
    if (firstPage == -1)
        firstPage = page;
Ihar Filipau's avatar
Ihar Filipau committed
851

Albert Astals Cid's avatar
Albert Astals Cid committed
852
853
854
    if (dumpComplexHeaders(file, pageFile, page)) {
        error(errIO, -1, "Couldn't write headers.");
        return;
855
    }
856

Albert Astals Cid's avatar
Albert Astals Cid committed
857
858
859
860
861
862
863
864
865
866
    fputs("<style type=\"text/css\">\n<!--\n", pageFile);
    fputs("\tp {margin: 0; padding: 0;}", pageFile);
    for (int i = fontsPageMarker; i != fonts->size(); i++) {
        GooString *fontCSStyle;
        if (!singleHtml)
            fontCSStyle = fonts->CSStyle(i);
        else
            fontCSStyle = fonts->CSStyle(i, page);
        fprintf(pageFile, "\t%s\n", fontCSStyle->c_str());
        delete fontCSStyle;
867
868
    }

Albert Astals Cid's avatar
Albert Astals Cid committed
869
    fputs("-->\n</style>\n", pageFile);
870

Albert Astals Cid's avatar
Albert Astals Cid committed
871
872
873
    if (!noframes) {
        fputs("</head>\n<body bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n", pageFile);
    }
874

Albert Astals Cid's avatar
Albert Astals Cid committed
875
    fprintf(pageFile, "<div id=\"page%d-div\" style=\"position:relative;width:%dpx;height:%dpx;\">\n", page, pageWidth, pageHeight);
876

Albert Astals Cid's avatar
Albert Astals Cid committed
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
    if (!ignore && (size_t)(page - firstPage) < backgroundImages.size()) {
        fprintf(pageFile, "<img width=\"%d\" height=\"%d\" src=\"%s\" alt=\"background image\"/>\n", pageWidth, pageHeight, backgroundImages[page - firstPage].c_str());
    }

    for (HtmlString *tmp1 = yxStrings; tmp1; tmp1 = tmp1->yxNext) {
        if (tmp1->htext) {
            fprintf(pageFile, "<p style=\"position:absolute;top:%dpx;left:%dpx;white-space:nowrap\" class=\"ft", xoutRound(tmp1->yMin), xoutRound(tmp1->xMin));
            if (!singleHtml) {
                fputc('0', pageFile);
            } else {
                fprintf(pageFile, "%d", page);
            }
            fprintf(pageFile, "%d\">", tmp1->fontpos);
            fputs(tmp1->htext->c_str(), pageFile);
            fputs("</p>\n", pageFile);
        }
    }

    fputs("</div>\n", pageFile);

    if (!noframes) {
        fputs("</body>\n</html>\n", pageFile);
        fclose(pageFile);
    }
}

void HtmlPage::dump(FILE *f, int pageNum, const std::vector<std::string> &backgroundImages)
{
    if (complexMode || singleHtml) {
        if (xml)
            dumpAsXML(f, pageNum);
        if (!xml)
            dumpComplex(f, pageNum, backgroundImages);
    } else {
        fprintf(f, "<a name=%d></a>", pageNum);
        // Loop over the list of image names on this page
        for (auto ptr : *imgList) {
            auto img = static_cast<HtmlImage *>(ptr);

            // see printCSS() for class names
            const char *styles[4] = { "", " class=\"xflip\"", " class=\"yflip\"", " class=\"xyflip\"" };
            int style_index = 0;
            if (img->xMin > img->xMax)
                style_index += 1; // xFlip
            if (img->yMin > img->yMax)
                style_index += 2; // yFlip

            fprintf(f, "<img%s src=\"%s\"/><br/>\n", styles[style_index], img->fName->c_str());
            delete img;
        }
        imgList->clear();

        GooString *str;
        for (HtmlString *tmp = yxStrings; tmp; tmp = tmp->yxNext) {
            if (tmp->htext) {
                str = new GooString(tmp->htext);
                fputs(str->c_str(), f);
                delete str;
                fputs("<br/>\n", f);
            }
        }
        fputs("<hr/>\n", f);
    }
}
941

Albert Astals Cid's avatar
Albert Astals Cid committed
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
void HtmlPage::clear()
{
    HtmlString *p1, *p2;

    if (curStr) {
        delete curStr;
        curStr = nullptr;
    }
    for (p1 = yxStrings; p1; p1 = p2) {
        p2 = p1->yxNext;
        delete p1;
    }
    yxStrings = nullptr;
    xyStrings = nullptr;
    yxCur1 = yxCur2 = nullptr;

    if (!noframes) {
        delete fonts;
        fonts = new HtmlFontAccu();
        fontsPageMarker = 0;
    } else {
        fontsPageMarker = fonts->size();
    }
965

Albert Astals Cid's avatar
Albert Astals Cid committed
966
967
    delete links;
    links = new HtmlLinks();
968
969
}

Albert Astals Cid's avatar
Albert Astals Cid committed
970
971
972
void HtmlPage::setDocName(const char *fname)
{
    DocName = new GooString(fname);
973
974
}

Albert Astals Cid's avatar
Albert Astals Cid committed
975
976
977
978
void HtmlPage::addImage(GooString *fname, GfxState *state)
{
    HtmlImage *img = new HtmlImage(fname, state);
    imgList->push_back(img);
979
980
}

981
982
983
984
//------------------------------------------------------------------------
// HtmlMetaVar
//------------------------------------------------------------------------

985
HtmlMetaVar::HtmlMetaVar(const char *_name, const char *_content)
986
987
988
989
990
991
992
{
    name = new GooString(_name);
    content = new GooString(_content);
}

HtmlMetaVar::~HtmlMetaVar()
{
Albert Astals Cid's avatar
Albert Astals Cid committed
993
994
995
996
997
    delete name;
    delete content;
}

GooString *HtmlMetaVar::toString() const
998
{
999
    GooString *result = new GooString("<meta name=\"");
1000
1001
1002
    result->append(name);
    result->append("\" content=\"");
    result->append(content);
1003
    result->append("\"/>");
1004
1005
1006
1007
1008
1009
1010
    return result;
}

//------------------------------------------------------------------------
// HtmlOutputDev
//------------------------------------------------------------------------

Albert Astals Cid's avatar
Albert Astals Cid committed
1011
static const char *HtmlEncodings[][2] = { { "Latin1", "ISO-8859-1" }, { nullptr, nullptr } };
1012

1013
std::string HtmlOutputDev::mapEncodingToHtml(const std::string &encoding)
1014
{
Albert Astals Cid's avatar
Albert Astals Cid committed
1015
1016
1017
1018
    for (int i = 0; HtmlEncodings[i][0] != nullptr; i++) {
        if (encoding == HtmlEncodings[i][0]) {
            return HtmlEncodings[i][1];
        }
1019
    }
Albert Astals Cid's avatar
Albert Astals Cid committed
1020
    return encoding;
1021
1022
}

Albert Astals Cid's avatar
Albert Astals Cid committed
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
void HtmlOutputDev::doFrame(int firstPage)
{
    GooString *fName = new GooString(Docname);
    fName->append(".html");

    if (!(fContentsFrame = fopen(fName->c_str(), "w"))) {
        error(errIO, -1, "Couldn't open html file '{0:t}'", fName);
        delete fName;
        return;
    }
1033

1034
    delete fName;
Albert Astals Cid's avatar
Albert Astals Cid committed
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055

    const std::string baseName = gbasename(Docname->c_str());
    fputs(DOCTYPE, fContentsFrame);
    fputs("\n<html>", fContentsFrame);
    fputs("\n<head>", fContentsFrame);
    fprintf(fContentsFrame, "\n<title>%s</title>", docTitle->c_str());
    const std::string htmlEncoding = mapEncodingToHtml(globalParams->getTextEncodingName());
    fprintf(fContentsFrame, "\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding.c_str());
    dumpMetaVars(fContentsFrame);
    fprintf(fContentsFrame, "</head>\n");
    fputs("<frameset cols=\"100,*\">\n", fContentsFrame);
    fprintf(fContentsFrame, "<frame name=\"links\" src=\"%s_ind.html\"/>\n", baseName.c_str());
    fputs("<frame name=\"contents\" src=", fContentsFrame);
    if (complexMode)
        fprintf(fContentsFrame, "\"%s-%d.html\"", baseName.c_str(), firstPage);
    else
        fprintf(fContentsFrame, "\"%ss.html\"", baseName.c_str());

    fputs("/>\n</frameset>\n</html>\n", fContentsFrame);

    fclose(fContentsFrame);
1056
1057
}

Albert Astals Cid's avatar
Albert Astals Cid committed
1058
HtmlOutputDev::HtmlOutputDev(Catalog *catalogA, const char *fileName, const char *title, const char *author, const char *keywords, const char *subject, const char *date, bool rawOrderA, int firstPage, bool outline)
1059
{
Albert Astals Cid's avatar
Albert Astals Cid committed
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
    catalog = catalogA;
    fContentsFrame = nullptr;
    page = nullptr;
    docTitle = new GooString(title);
    pages = nullptr;
    dumpJPEG = true;
    // write = true;
    rawOrder = rawOrderA;
    this->doOutline = outline;
    ok = false;
    // this->firstPage = firstPage;
    // pageNum=firstPage;
    // open file
    needClose = false;
    pages = new HtmlPage(rawOrder);

    glMetaVars = new std::vector<HtmlMetaVar *>();
    glMetaVars->push_back(new HtmlMetaVar("generator", "pdftohtml 0.36"));
    if (author)
        glMetaVars->push_back(new HtmlMetaVar("author", author));
    if (keywords)
        glMetaVars->push_back(new HtmlMetaVar("keywords", keywords));
    if (date)
        glMetaVars->push_back(new HtmlMetaVar("date", date));
    if (subject)
        glMetaVars->push_back(new HtmlMetaVar("subject", subject));

    maxPageWidth = 0;
    maxPageHeight = 0;

    pages->setDocName(fileName);
    Docname = new GooString(fileName);

    // for non-xml output (complex or simple) with frames generate the left frame
    if (!xml && !noframes) {
        if (!singleHtml) {
            GooString *left = new GooString(fileName);
            left->append("_ind.html");

            doFrame(firstPage);

            if (!(fContentsFrame = fopen(left->c_str(), "w"))) {
                error(errIO, -1, "Couldn't open html file '{0:t}'", left);
                delete left;
                return;
            }
            delete left;
            fputs(DOCTYPE, fContentsFrame);
            fputs("<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title></title>\n</head>\n<body>\n", fContentsFrame);

            if (doOutline) {
                fprintf(fContentsFrame, "<a href=\"%s%s\" target=\"contents\">Outline</a><br/>", gbasename(Docname->c_str()).c_str(), complexMode ? "-outline.html" : "s.html#outline");
            }
        }
        if (!complexMode) { /* not in complex mode */

            GooString *right = new GooString(fileName);
            right->append("s.html");

            if (!(page = fopen(right->c_str(), "w"))) {
                error(errIO, -1, "Couldn't open html file '{0:t}'", right);
                delete right;
                return;
            }
            delete right;
            fputs(DOCTYPE, page);
            fputs("<html>\n<head>\n<title></title>\n", page);
            printCSS(page);
            fputs("</head>\n<body>\n", page);
        }
1130
1131
    }

Albert Astals Cid's avatar
Albert Astals Cid committed
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
    if (noframes) {
        if (stout)
            page = stdout;
        else {
            GooString *right = new GooString(fileName);
            if (!xml)
                right->append(".html");
            if (xml)
                right->append(".xml");
            if (!(page = fopen(right->c_str(), "w"))) {
                error(errIO, -1, "Couldn't open html file '{0:t}'", right);
                delete right;
                return;
            }
            delete right;
        }

        const std::string htmlEncoding = mapEncodingToHtml(globalParams->getTextEncodingName());
        if (xml) {
            fprintf(page, "<?xml version=\"1.0\" encoding=\"%s\"?>\n", htmlEncoding.c_str());
            fputs("<!DOCTYPE pdf2xml SYSTEM \"pdf2xml.dtd\">\n\n", page);
            fprintf(page, "<pdf2xml producer=\"%s\" version=\"%s\">\n", PACKAGE_NAME, PACKAGE_VERSION);
        } else {
            fprintf(page, "%s\n<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title>%s</title>\n", DOCTYPE, docTitle->c_str());

            fprintf(page, "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding.c_str());

            dumpMetaVars(page);
            printCSS(page);
            fprintf(page, "</head>\n");
            fprintf(page, "<body bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n");
        }
1164
    }
Albert Astals Cid's avatar
Albert Astals Cid committed
1165
    ok = true;
1166
1167
}

Albert Astals Cid's avatar
Albert Astals Cid committed
1168
1169
HtmlOutputDev::~HtmlOutputDev()
{
1170
1171
1172
    delete Docname;
    delete docTitle;

Oliver Sander's avatar
Oliver Sander committed
1173
    for (auto entry : *glMetaVars) {
Albert Astals Cid's avatar
Albert Astals Cid committed
1174
        delete entry;
Oliver Sander's avatar
Oliver Sander committed
1175
1176
    }
    delete glMetaVars;
1177

Albert Astals Cid's avatar
Albert Astals Cid committed
1178
1179
1180
    if (fContentsFrame) {
        fputs("</body>\n</html>\n", fContentsFrame);
        fclose(fContentsFrame);
1181
    }
1182
    if (page != nullptr) {
Albert Astals Cid's avatar
Albert Astals Cid committed
1183
1184
1185
1186
1187
1188
1189
        if (xml) {
            fputs("</pdf2xml>\n", page);
            fclose(page);
        } else if (!complexMode || xml || noframes) {
            fputs("</body>\n</html>\n", page);
            fclose(page);
        }
1190
1191
    }
    if (pages)
Albert Astals Cid's avatar
Albert Astals Cid committed
1192
        delete pages;
1193
1194
}

Albert Astals Cid's avatar
Albert Astals Cid committed
1195
1196
void HtmlOutputDev::startPage(int pageNumA, GfxState *state, XRef *xref)
{
1197
1198
#if 0
  if (mode&&!xml){
1199
    if (write){
1200
      write=false;
1201
1202
      GooString* fname=Dirname(Docname);
      fname->append("image.log");
1203
      if((tin=fopen(getFileNameFromPath(fname->c_str(),fname->getLength()),"w"))==NULL){
1204
1205
1206
1207
1208
1209
1210
1211
1212
	printf("Error : can not open %s",fname);
	exit(1);
      }
      delete fname;
    // if(state->getRotation()!=0) 
    //  fprintf(tin,"ROTATE=%d rotate %d neg %d neg translate\n",state->getRotation(),state->getX1(),-state->getY1());
    // else 
      fprintf(tin,"ROTATE=%d neg %d neg translate\n",state->getX1(),state->getY1());  
    }
1213
1214
  }
#endif
1215

Albert Astals Cid's avatar
Albert Astals Cid committed
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
    pageNum = pageNumA;
    const std::string str = gbasename(Docname->c_str());
    pages->clear();
    if (!noframes) {
        if (fContentsFrame) {
            if (complexMode)
                fprintf(fContentsFrame, "<a href=\"%s-%d.html\"", str.c_str(), pageNum);
            else
                fprintf(fContentsFrame, "<a href=\"%ss.html#%d\"", str.c_str(), pageNum);
            fprintf(fContentsFrame, " target=\"contents\" >Page %d</a><br/>\n", pageNum);
        }
1227
1228
    }

Albert Astals Cid's avatar
Albert Astals Cid committed
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
    pages->pageWidth = static_cast<int>(state->getPageWidth());
    pages->pageHeight = static_cast<int>(state->getPageHeight());
}

void HtmlOutputDev::endPage()
{
    Links *linksList = docPage->getLinks();
    for (int i = 0; i < linksList->getNumLinks(); ++i) {
        doProcessLink(linksList->getLink(i));
    }
    delete linksList;
1240

Albert Astals Cid's avatar
Albert Astals Cid committed
1241
1242
1243
    pages->conv();
    pages->coalesce();
    pages->dump(page, pageNum, backgroundImages);
1244

Albert Astals Cid's avatar
Albert Astals Cid committed
1245
1246
1247
1248
1249
    // I don't yet know what to do in the case when there are pages of different
    // sizes and we want complex output: running ghostscript many times
    // seems very inefficient. So for now I'll just use last page's size
    maxPageWidth = pages->pageWidth;
    maxPageHeight = pages->pageHeight;
1250

Albert Astals Cid's avatar
Albert Astals Cid committed
1251
1252
1253
    // if(!noframes&&!xml) fputs("<br/>\n", fContentsFrame);
    if (!stout && !globalParams->getErrQuiet())
        printf("Page-%d\n", (pageNum));
1254
1255
}

Albert Astals Cid's avatar
Albert Astals Cid committed
1256
1257
1258
void HtmlOutputDev::addBackgroundImage(const std::string &img)
{
    backgroundImages.push_back(img);
1259
1260
}

Albert Astals Cid's avatar
Albert Astals Cid committed
1261
1262
1263
void HtmlOutputDev::updateFont(GfxState *state)
{
    pages->updateFont(state);
1264
1265
}

Albert Astals Cid's avatar
Albert Astals Cid committed
1266
1267
1268
void HtmlOutputDev::beginString(GfxState *state, const GooString *s)
{
    pages->beginString(state, s);
1269
1270
}

Albert Astals Cid's avatar
Albert Astals Cid committed
1271
1272
1273
void HtmlOutputDev::endString(GfxState *state)
{
    pages->endString();
1274
1275
}

Albert Astals Cid's avatar
Albert Astals Cid committed
1276
void HtmlOutputDev::drawChar(GfxState *state, double x, double y, double dx, double dy, double originX, double originY, CharCode code, int /*nBytes*/, const Unicode *u, int uLen)
1277
{
Albert Astals Cid's avatar
Albert Astals Cid committed
1278
1279
1280
1281
    if (!showHidden && (state->getRender() & 3) == 3) {
        return;
    }
    pages->addChar(state, x, y, dx, dy, originX, originY, u, uLen);
1282
1283
}

1284
1285
void HtmlOutputDev::drawJpegImage(GfxState *state, Stream *str)
{
Albert Astals Cid's avatar
Albert Astals Cid committed
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
    InMemoryFile ims;
    FILE *f1 = nullptr;
    int c;

    // open the image file
    GooString *fName = createImageFileName("jpg");
    f1 = dataUrls ? ims.open("wb") : fopen(fName->c_str(), "wb");
    if (!f1) {
        error(errIO, -1, "Couldn't open image file '{0:t}'", fName);
        delete fName;
        return;
    }
1298

Albert Astals Cid's avatar
Albert Astals Cid committed
1299
1300
1301
    // initialize stream
    str = str->getNextStream();
    str->reset();
1302

Albert Astals Cid's avatar
Albert Astals Cid committed
1303
1304
1305
    // copy the stream
    while ((c = str->getChar()) != EOF)
        fputc(c, f1);
1306

Albert Astals Cid's avatar
Albert Astals Cid committed
1307
    fclose(f1);
1308

Albert Astals Cid's avatar
Albert Astals Cid committed
1309
1310
1311
1312
1313
    if (dataUrls) {
        delete fName;
        fName = new GooString(std::string("data:image/jpeg;base64,") + gbase64Encode(ims.getBuffer()));
    }
    pages->addImage(fName, state);
1314
1315
}

Albert Astals Cid's avatar
Albert Astals Cid committed
1316
void HtmlOutputDev::drawPngImage(GfxState *state, Stream *str, int width, int height, GfxImageColorMap *colorMap, bool isMask)
1317
1318
{
#ifdef ENABLE_LIBPNG
Albert Astals Cid's avatar
Albert Astals Cid committed
1319
1320
    FILE *f1;
    InMemoryFile ims;
1321

Albert Astals Cid's avatar
Albert Astals Cid committed
1322
1323
1324
1325
    if (!colorMap && !isMask) {
        error(errInternal, -1, "Can't have color image without a color map");
        return;
    }
1326

Albert Astals Cid's avatar
Albert Astals Cid committed
1327
1328
1329
1330
1331
    // open the image file
    GooString *fName = createImageFileName("png");
    f1 = dataUrls ? ims.open("wb") : fopen(fName->c_str(), "wb");
    if (!f1) {
        error(errIO, -1, "Couldn't open image file '{0:t}'", fName);
1332
        delete fName;
1333
        return;
Albert Astals Cid's avatar
Albert Astals Cid committed
1334
    }
1335

Albert Astals Cid's avatar
Albert Astals Cid committed
1336
1337
1338
1339
    PNGWriter *writer = new PNGWriter(isMask ? PNGWriter::MONOCHROME : PNGWriter::RGB);
    // TODO can we calculate the resolution of the image?
    if (!writer->init(f1, width, height, 72, 72)) {
        error(errInternal, -1, "Can't init PNG for image '{0:t}'", fName);
1340
1341
        delete writer;
        fclose(f1);
1342
        return;
1343
    }
1344

Albert Astals Cid's avatar
Albert Astals Cid committed
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
    if (!isMask) {
        unsigned char *p;
        GfxRGB rgb;
        png_byte *row = (png_byte *)gmalloc(3 * width); // 3 bytes/pixel: RGB
        png_bytep *row_pointer = &row;

        // Initialize the image stream
        ImageStream *imgStr = new ImageStream(str, width, colorMap->getNumPixelComps(), colorMap->getBits());
        imgStr->reset();

        // For each line...
        for (int y = 0; y < height; y++) {

            // Convert into a PNG row
            p = imgStr->getLine();
            if (!p) {
                error(errIO, -1, "Failed to read PNG. '{0:t}' will be incorrect", fName);
                delete fName;
                gfree(row);
                delete writer;
                delete imgStr;
                fclose(f1);
                return;
            }
            for (int x = 0; x < width; x++) {
                colorMap->getRGB(p, &rgb);
                // Write the RGB pixels into the row
                row[3 * x] = colToByte(rgb.r);
                row[3 * x + 1] = colToByte(rgb.g);
                row[3 * x + 2] = colToByte(rgb.b);
                p += colorMap->getNumPixelComps();
            }

            if (!writer->writeRow(row_pointer)) {
                error(errIO, -1, "Failed to write into PNG '{0:t}'", fName);
                delete writer;
                delete imgStr;
                fclose(f1);
                return;
            }
        }
        gfree(row);
        imgStr->close();
        delete imgStr;
    } else { // isMask == true
        int size = (width + 7) / 8;

        // PDF masks use 0 = draw current color, 1 = leave unchanged.
        // We invert this to provide the standard interpretation of alpha
        // (0 = transparent, 1 = opaque). If the colorMap already inverts
        // the mask we leave the data unchanged.
        int invert_bits = 0xff;
        if (colorMap) {
            GfxGray gray;
            unsigned char zero[gfxColorMaxComps];
            memset(zero, 0, sizeof(zero));
            colorMap->getGray(zero, &gray);
            if (colToByte(gray) == 0)
                invert_bits = 0x00;
        }
1405

Albert Astals Cid's avatar
Albert Astals Cid committed
1406
1407
        str->reset();
        unsigned char *png_row = (unsigned char *)gmalloc(size);
1408

Albert Astals Cid's avatar
Albert Astals Cid committed
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
        for (int ri = 0; ri < height; ++ri) {
            for (int i = 0; i < size; i++)
                png_row[i] = str->getChar() ^ invert_bits;

            if (!writer->writeRow(&png_row)) {
                error(errIO, -1, "Failed to write into PNG '{0:t}'", fName);
                delete writer;
                fclose(f1);
                gfree(png_row);
                return;
            }
        }
        str->close();
1422
1423
1424
        gfree(png_row);
    }

Albert Astals Cid's avatar
Albert Astals Cid committed
1425
    str->close();
1426

Albert Astals Cid's avatar
Albert Astals Cid committed
1427
1428
1429
    writer->close();
    delete writer;
    fclose(f1);
1430

Albert Astals Cid's avatar
Albert Astals Cid committed
1431
1432
1433
1434
1435
    if (dataUrls) {
        delete fName;
        fName = new GooString(std::string("data:image/png;base64,") + gbase64Encode(ims.getBuffer()));
    }
    pages->addImage(fName, state);
1436
#else
Albert Astals Cid's avatar
Albert Astals Cid committed
1437
    return;
1438
1439
1440
1441
1442
#endif
}

GooString *HtmlOutputDev::createImageFileName(const char *ext)
{
Albert Astals Cid's avatar
Albert Astals Cid committed
1443
    return GooString::format("{0:s}-{1:d}_{2:d}.{3:s}", Docname->c_str(), pageNum, pages->getNumImages() + 1, ext);
1444
1445
}

Albert Astals Cid's avatar
Albert Astals Cid committed
1446
1447
void HtmlOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str, int width, int height, bool invert, bool interpolate, bool inlineImg)
{