HtmlOutputDev.cc 52.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
//========================================================================
//
// HtmlOutputDev.cc
//
// Copyright 1997-2002 Glyph & Cog, LLC
//
// Changed 1999-2000 by G.Ovtcharov
//
// Changed 2002 by Mikhail Kruk
//
//========================================================================

13
14
15
16
//========================================================================
//
// Modified under the Poppler project - http://poppler.freedesktop.org
//
17
18
19
// All changes made under the Poppler project to this file are licensed
// under GPL version 2 or later
//
20
// Copyright (C) 2005-2013 Albert Astals Cid <aacid@kde.org>
21
22
// Copyright (C) 2008 Kjartan Maraas <kmaraas@gnome.org>
// Copyright (C) 2008 Boris Toloknov <tlknv@yandex.ru>
23
// Copyright (C) 2008 Haruyuki Kawabe <Haruyuki.Kawabe@unisys.co.jp>
Tomas Are Haavet's avatar
Tomas Are Haavet committed
24
// Copyright (C) 2008 Tomas Are Haavet <tomasare@gmail.com>
25
// Copyright (C) 2009 Warren Toomey <wkt@tuhs.org>
Albert Astals Cid's avatar
Albert Astals Cid committed
26
// Copyright (C) 2009, 2011 Carlos Garcia Campos <carlosgc@gnome.org>
27
// Copyright (C) 2009 Reece Dunn <msclrhd@gmail.com>
28
// Copyright (C) 2010, 2012, 2013 Adrian Johnson <ajohnson@redneon.com>
Hib Eris's avatar
Hib Eris committed
29
// Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
Albert Astals Cid's avatar
Albert Astals Cid committed
30
// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac@cdacmumbai.in) and Onkar Potdar (onkar@cdacmumbai.in)
31
32
// Copyright (C) 2011 Joshua Richardson <jric@chegg.com>
// Copyright (C) 2011 Stephen Reichling <sreichling@chegg.com>
33
// Copyright (C) 2011, 2012 Igor Slepchin <igor.slepchin@gmail.com>
34
// Copyright (C) 2012 Ihar Filipau <thephilips@gmail.com>
35
// Copyright (C) 2012 Gerald Schmidt <solahcin@gmail.com>
Albert Astals Cid's avatar
Albert Astals Cid committed
36
// Copyright (C) 2012 Pino Toscano <pino@kde.org>
Thomas Freitag's avatar
Thomas Freitag committed
37
// Copyright (C) 2013 Thomas Freitag <Thomas.Freitag@alfa.de>
Julien Nabet's avatar
Julien Nabet committed
38
// Copyright (C) 2013 Julien Nabet <serval2412@yahoo.fr>
Albert Astals Cid's avatar
Albert Astals Cid committed
39
// Copyright (C) 2013 Johannes Brandstätter <jbrandstaetter@gmail.com>
40
41
42
43
44
45
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
//
//========================================================================

46
47
48
49
#ifdef __GNUC__
#pragma implementation
#endif

50
#include "config.h"
51
52
53
54
55
56
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <stddef.h>
#include <ctype.h>
#include <math.h>
57
#include <iostream>
58
59
60
61
62
63
#include "goo/GooString.h"
#include "goo/GooList.h"
#include "UnicodeMap.h"
#include "goo/gmem.h"
#include "Error.h"
#include "GfxState.h"
64
#include "Page.h"
65
#include "Annot.h"
66
#include "PNGWriter.h"
67
68
69
#include "GlobalParams.h"
#include "HtmlOutputDev.h"
#include "HtmlFonts.h"
70
#include "HtmlUtils.h"
71
72
#include "Outline.h"
#include "PDFDoc.h"
73

74
75
76
77
#ifdef ENABLE_LIBPNG
#include <png.h>
#endif

78
#define DEBUG __FILE__ << ": " << __LINE__ << ": DEBUG: "
79

80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
class HtmlImage
{
public:
    HtmlImage(GooString *_fName, GfxState *state)
      : fName(_fName) {
    state->transform(0, 0, &xMin, &yMax);
    state->transform(1, 1, &xMax, &yMin);
  }
 ~HtmlImage() { delete fName; }

  double xMin, xMax;		// image x coordinates
  double yMin, yMax;		// image y coordinates
  GooString  *fName;		// image file name
};

95
96
97
// returns true if x is closer to y than x is to z
static inline bool IS_CLOSER(float x, float y, float z) { return fabs((x)-(y)) < fabs((x)-(z)); }

98
extern GBool complexMode;
Albert Astals Cid's avatar
Albert Astals Cid committed
99
extern GBool singleHtml;
100
101
102
103
104
105
106
107
108
extern GBool ignore;
extern GBool printCommands;
extern GBool printHtml;
extern GBool noframes;
extern GBool stout;
extern GBool xml;
extern GBool showHidden;
extern GBool noMerge;

109
110
extern double wordBreakThreshold;

111
112
113
static GBool debug = gFalse;
static GooString *gstr_buff0 = NULL; // a workspace in which I format strings

114
115
116
117
118
119
120
121
122
123
static GooString* basename(GooString* str){
  
  char *p=str->getCString();
  int len=str->getLength();
  for (int i=len-1;i>=0;i--)
    if (*(p+i)==SLASH) 
      return new GooString((p+i+1),len-i-1);
  return new GooString(str);
}

124
#if 0
125
126
127
128
129
130
131
132
133
static GooString* Dirname(GooString* str){
  
  char *p=str->getCString();
  int len=str->getLength();
  for (int i=len-1;i>=0;i--)
    if (*(p+i)==SLASH) 
      return new GooString(p,i+1);
  return new GooString();
} 
134
#endif
135

136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
static const char *print_matrix(const double *mat) {
  delete gstr_buff0;

  gstr_buff0 =  GooString::format("[{0:g} {1:g} {2:g} {3:g} {4:g} {5:g}]",
                                  *mat, mat[1], mat[2], mat[3], mat[4], mat[5]);
  return gstr_buff0->getCString();
}

static const char *print_uni_str(const Unicode *u, const unsigned uLen) {
  GooString *gstr_buff1 = NULL;

  delete gstr_buff0;

  if (!uLen) return "";
  gstr_buff0 = GooString::format("{0:c}", (*u < 0x7F ? *u & 0xFF : '?'));
  for (unsigned i = 1; i < uLen; i++) {
    if (u[i] < 0x7F) {
      gstr_buff1 = gstr_buff0->append(u[i] < 0x7F ? static_cast<char>(u[i]) & 0xFF : '?');
      delete gstr_buff0;
      gstr_buff0 = gstr_buff1;
    }
  }

  return gstr_buff0->getCString();
}

162
163
164
165
//------------------------------------------------------------------------
// HtmlString
//------------------------------------------------------------------------

166
HtmlString::HtmlString(GfxState *state, double fontSize, HtmlFontAccu* _fonts) : fonts(_fonts) {
167
168
169
170
171
  GfxFont *font;
  double x, y;

  state->transform(state->getCurX(), state->getCurY(), &x, &y);
  if ((font = state->getFont())) {
172
173
174
175
176
177
178
179
180
181
182
183
    double ascent = font->getAscent();
    double descent = font->getDescent();
    if( ascent > 1.05 ){
        //printf( "ascent=%.15g is too high, descent=%.15g\n", ascent, descent );
        ascent = 1.05;
    }
    if( descent < -0.4 ){
        //printf( "descent %.15g is too low, ascent=%.15g\n", descent, ascent );
        descent = -0.4;
    }
    yMin = y - ascent * fontSize;
    yMax = y - descent * fontSize;
184
185
    GfxRGB rgb;
    state->getFillRGB(&rgb);
186
    HtmlFont hfont=HtmlFont(font, static_cast<int>(fontSize-1), rgb);
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
    if (isMatRotOrSkew(state->getTextMat())) {
      double normalizedMatrix[4];
      memcpy(normalizedMatrix, state->getTextMat(), sizeof(normalizedMatrix));
      // browser rotates the opposite way
      // so flip the sign of the angle -> sin() components change sign
      if (debug)
        std::cerr << DEBUG << "before transform: " << print_matrix(normalizedMatrix) << std::endl;
      normalizedMatrix[1] *= -1;
      normalizedMatrix[2] *= -1;
      if (debug)
        std::cerr << DEBUG << "after reflecting angle: " << print_matrix(normalizedMatrix) << std::endl;
      normalizeRotMat(normalizedMatrix);
      if (debug)
        std::cerr << DEBUG << "after norm: " << print_matrix(normalizedMatrix) << std::endl;
      hfont.setRotMat(normalizedMatrix);
    }
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
    fontpos = fonts->AddFont(hfont);
  } else {
    // this means that the PDF file draws text without a current font,
    // which should never happen
    yMin = y - 0.95 * fontSize;
    yMax = y + 0.35 * fontSize;
    fontpos=0;
  }
  if (yMin == yMax) {
    // this is a sanity check for a case that shouldn't happen -- but
    // if it does happen, we want to avoid dividing by zero later
    yMin = y;
    yMax = y + 1;
  }
  col = 0;
  text = NULL;
  xRight = NULL;
  link = NULL;
  len = size = 0;
  yxNext = NULL;
  xyNext = NULL;
  htext=new GooString();
  dir = textDirUnknown;
}


HtmlString::~HtmlString() {
Tomas Are Haavet's avatar
Tomas Are Haavet committed
230
  gfree(text);
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
  delete htext;
  gfree(xRight);
}

void HtmlString::addChar(GfxState *state, double x, double y,
			 double dx, double dy, Unicode u) {
  if (dir == textDirUnknown) {
    //dir = UnicodeMap::getDirection(u);
    dir = textDirLeftRight;
  } 

  if (len == size) {
    size += 16;
    text = (Unicode *)grealloc(text, size * sizeof(Unicode));
    xRight = (double *)grealloc(xRight, size * sizeof(double));
  }
  text[len] = u;
  if (len == 0) {
    xMin = x;
  }
  xMax = xRight[len] = x + dx;
//printf("added char: %f %f xright = %f\n", x, dx, x+dx);
  ++len;
}

void HtmlString::endString()
{
  if( dir == textDirRightLeft && len > 1 )
  {
    //printf("will reverse!\n");
    for (int i = 0; i < len / 2; i++)
    {
      Unicode ch = text[i];
      text[i] = text[len - i - 1];
      text[len - i - 1] = ch;
    }
  }
}

//------------------------------------------------------------------------
// HtmlPage
//------------------------------------------------------------------------

HtmlPage::HtmlPage(GBool rawOrder, char *imgExtVal) {
  this->rawOrder = rawOrder;
  curStr = NULL;
  yxStrings = NULL;
  xyStrings = NULL;
  yxCur1 = yxCur2 = NULL;
  fonts=new HtmlFontAccu();
  links=new HtmlLinks();
282
  imgList=new GooList();
283
284
285
286
287
288
289
290
291
292
  pageWidth=0;
  pageHeight=0;
  fontsPageMarker = 0;
  DocName=NULL;
  firstPage = -1;
  imgExt = new GooString(imgExtVal);
}

HtmlPage::~HtmlPage() {
  clear();
293
294
295
296
297
  delete DocName;
  delete fonts;
  delete links;
  delete imgExt;
  deleteGooList(imgList, HtmlImage);
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
}

void HtmlPage::updateFont(GfxState *state) {
  GfxFont *font;
  double *fm;
  char *name;
  int code;
  double w;
  
  // adjust the font size
  fontSize = state->getTransformedFontSize();
  if ((font = state->getFont()) && font->getType() == fontType3) {
    // This is a hack which makes it possible to deal with some Type 3
    // fonts.  The problem is that it's impossible to know what the
    // base coordinate system used in the font is without actually
    // rendering the font.  This code tries to guess by looking at the
    // width of the character 'm' (which breaks if the font is a
    // subset that doesn't contain 'm').
    for (code = 0; code < 256; ++code) {
      if ((name = ((Gfx8BitFont *)font)->getCharName(code)) &&
	  name[0] == 'm' && name[1] == '\0') {
	break;
      }
    }
    if (code < 256) {
      w = ((Gfx8BitFont *)font)->getWidth(code);
      if (w != 0) {
	// 600 is a generic average 'm' width -- yes, this is a hack
	fontSize *= w / 0.6;
      }
    }
    fm = font->getFontMatrix();
    if (fm[0] != 0) {
      fontSize *= fabs(fm[3] / fm[0]);
    }
  }
}

void HtmlPage::beginString(GfxState *state, GooString *s) {
  curStr = new HtmlString(state, fontSize, fonts);
}


void HtmlPage::conv(){
  HtmlString *tmp;

  int linkIndex = 0;
  HtmlFont* h;
  for(tmp=yxStrings;tmp;tmp=tmp->yxNext){
     int pos=tmp->fontpos;
     //  printf("%d\n",pos);
     h=fonts->Get(pos);

     if (tmp->htext) delete tmp->htext; 
     tmp->htext=HtmlFont::simple(h,tmp->text,tmp->len);

     if (links->inLink(tmp->xMin,tmp->yMin,tmp->xMax,tmp->yMax, linkIndex)){
       tmp->link = links->getLink(linkIndex);
       /*GooString *t=tmp->htext;
       tmp->htext=links->getLink(k)->Link(tmp->htext);
       delete t;*/
     }
  }

}


void HtmlPage::addChar(GfxState *state, double x, double y,
		       double dx, double dy, 
			double ox, double oy, Unicode *u, int uLen) {
  double x1, y1, w1, h1, dx2, dy2;
  int n, i;
  state->transform(x, y, &x1, &y1);
  n = curStr->len;
 
  // check that new character is in the same direction as current string
  // and is not too far away from it before adding 
  //if ((UnicodeMap::getDirection(u[0]) != curStr->dir) || 
  // XXX
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
  if (debug) {
    double *text_mat = state->getTextMat();
    // rotation is (cos q, sin q, -sin q, cos q, 0, 0)
    // sin q is zero iff there is no rotation, or 180 deg. rotation;
    // for 180 rotation, cos q will be negative
    if (text_mat[0] < 0 || !is_within(text_mat[1], .1, 0)) {
      std::cerr << DEBUG << "rotation matrix for \"" << print_uni_str(u, uLen) << '"' << std::endl;
      std::cerr << "text " << print_matrix(state->getTextMat());
    }
  }
  if (n > 0 && // don't start a new string, unless there is already a string
      // TODO: the following line assumes that text is flowing left to
      // right, which will not necessarily be the case, e.g. if rotated;
      // It assesses whether or not two characters are close enough to
      // be part of the same string
392
      fabs(x1 - curStr->xRight[n-1]) > wordBreakThreshold * (curStr->yMax - curStr->yMin) &&
393
394
395
396
397
      // rotation is (cos q, sin q, -sin q, cos q, 0, 0)
      // sin q is zero iff there is no rotation, or 180 deg. rotation;
      // for 180 rotation, cos q will be negative
      !rot_matrices_equal(curStr->getFont().getRotMat(), state->getTextMat()))
  {
398
399
400
401
402
403
404
405
406
407
408
409
410
    endString();
    beginString(state, NULL);
  }
  state->textTransformDelta(state->getCharSpace() * state->getHorizScaling(),
			    0, &dx2, &dy2);
  dx -= dx2;
  dy -= dy2;
  state->transformDelta(dx, dy, &w1, &h1);
  if (uLen != 0) {
    w1 /= uLen;
    h1 /= uLen;
  }
  for (i = 0; i < uLen; ++i) {
411
    curStr->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, u[i]);
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
  }
}

void HtmlPage::endString() {
  HtmlString *p1, *p2;
  double h, y1, y2;

  // throw away zero-length strings -- they don't have valid xMin/xMax
  // values, and they're useless anyway
  if (curStr->len == 0) {
    delete curStr;
    curStr = NULL;
    return;
  }

  curStr->endString();

#if 0 //~tmp
  if (curStr->yMax - curStr->yMin > 20) {
    delete curStr;
    curStr = NULL;
    return;
  }
#endif

  // insert string in y-major list
  h = curStr->yMax - curStr->yMin;
  y1 = curStr->yMin + 0.5 * h;
  y2 = curStr->yMin + 0.8 * h;
  if (rawOrder) {
    p1 = yxCur1;
    p2 = NULL;
  } else if ((!yxCur1 ||
              (y1 >= yxCur1->yMin &&
               (y2 >= yxCur1->yMax || curStr->xMax >= yxCur1->xMin))) &&
             (!yxCur2 ||
              (y1 < yxCur2->yMin ||
               (y2 < yxCur2->yMax && curStr->xMax < yxCur2->xMin)))) {
    p1 = yxCur1;
    p2 = yxCur2;
  } else {
    for (p1 = NULL, p2 = yxStrings; p2; p1 = p2, p2 = p2->yxNext) {
      if (y1 < p2->yMin || (y2 < p2->yMax && curStr->xMax < p2->xMin))
        break;
    }
    yxCur2 = p2;
  }
  yxCur1 = curStr;
  if (p1)
    p1->yxNext = curStr;
  else
    yxStrings = curStr;
  curStr->yxNext = p2;
  curStr = NULL;
}

Boris Toloknov's avatar
Boris Toloknov committed
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
static const char *strrstr( const char *s, const char *ss )
{
  const char *p = strstr( s, ss );
  for( const char *pp = p; pp != NULL; pp = strstr( p+1, ss ) ){
    p = pp;
  }
  return p;
}

static void CloseTags( GooString *htext, GBool &finish_a, GBool &finish_italic, GBool &finish_bold )
{
  const char *last_italic = finish_italic && ( finish_bold   || finish_a    ) ? strrstr( htext->getCString(), "<i>" ) : NULL;
  const char *last_bold   = finish_bold   && ( finish_italic || finish_a    ) ? strrstr( htext->getCString(), "<b>" ) : NULL;
  const char *last_a      = finish_a      && ( finish_italic || finish_bold ) ? strrstr( htext->getCString(), "<a " ) : NULL;
  if( finish_a && ( finish_italic || finish_bold ) && last_a > ( last_italic > last_bold ? last_italic : last_bold ) ){
    htext->append("</a>", 4);
    finish_a = false;
  }
  if( finish_italic && finish_bold && last_italic > last_bold ){
    htext->append("</i>", 4);
    finish_italic = false;
  }
  if( finish_bold )
    htext->append("</b>", 4);
  if( finish_italic )
    htext->append("</i>", 4);
  if( finish_a )
    htext->append("</a>");
}

498
499
500
// Strings are lines of text;
// This function aims to combine strings into lines and paragraphs if !noMerge
// It may also strip out duplicate strings (if they are on top of each other); sometimes they are to create a font effect
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
void HtmlPage::coalesce() {
  HtmlString *str1, *str2;
  HtmlFont *hfont1, *hfont2;
  double space, horSpace, vertSpace, vertOverlap;
  GBool addSpace, addLineBreak;
  int n, i;
  double curX, curY;

#if 0 //~ for debugging
  for (str1 = yxStrings; str1; str1 = str1->yxNext) {
    printf("x=%f..%f  y=%f..%f  size=%2d '",
	   str1->xMin, str1->xMax, str1->yMin, str1->yMax,
	   (int)(str1->yMax - str1->yMin));
    for (i = 0; i < str1->len; ++i) {
      fputc(str1->text[i] & 0xff, stdout);
    }
    printf("'\n");
  }
  printf("\n------------------------------------------------------------\n\n");
#endif
  str1 = yxStrings;

  if( !str1 ) return;

  //----- discard duplicated text (fake boldface, drop shadows)
  if( !complexMode )
  {	/* if not in complex mode get rid of duplicate strings */
	HtmlString *str3;
	GBool found;
  	while (str1)
	{
		double size = str1->yMax - str1->yMin;
		double xLimit = str1->xMin + size * 0.2;
		found = gFalse;
		for (str2 = str1, str3 = str1->yxNext;
			str3 && str3->xMin < xLimit;
			str2 = str3, str3 = str2->yxNext)
		{
			if (str3->len == str1->len &&
				!memcmp(str3->text, str1->text, str1->len * sizeof(Unicode)) &&
				fabs(str3->yMin - str1->yMin) < size * 0.2 &&
				fabs(str3->yMax - str1->yMax) < size * 0.2 &&
				fabs(str3->xMax - str1->xMax) < size * 0.2)
			{
				found = gTrue;
				//printf("found duplicate!\n");
				break;
			}
		}
		if (found)
		{
			str2->xyNext = str3->xyNext;
			str2->yxNext = str3->yxNext;
			delete str3;
		}
		else
		{
			str1 = str1->yxNext;
		}
	}		
  }	/*- !complexMode */
  
  str1 = yxStrings;
  
  hfont1 = getFont(str1);
  if( hfont1->isBold() )
    str1->htext->insert(0,"<b>",3);
  if( hfont1->isItalic() )
    str1->htext->insert(0,"<i>",3);
  if( str1->getLink() != NULL ) {
    GooString *ls = str1->getLink()->getLinkStart();
    str1->htext->insert(0, ls);
    delete ls;
  }
  curX = str1->xMin; curY = str1->yMin;

  while (str1 && (str2 = str1->yxNext)) {
    hfont2 = getFont(str2);
579
    space = str1->yMax - str1->yMin; // the height of the font's bounding box
580
    horSpace = str2->xMin - str1->xMax;
581
582
    // if strings line up on left-hand side AND they are on subsequent lines, we need a line break
    addLineBreak = !noMerge && (fabs(str1->xMin - str2->xMin) < 0.4) && IS_CLOSER(str2->yMax, str1->yMax + space, str1->yMax);
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
    vertSpace = str2->yMin - str1->yMax;

//printf("coalesce %d %d %f? ", str1->dir, str2->dir, d);

    if (str2->yMin >= str1->yMin && str2->yMin <= str1->yMax)
    {
	vertOverlap = str1->yMax - str2->yMin;
    } else
    if (str2->yMax >= str1->yMin && str2->yMax <= str1->yMax)
    {
	vertOverlap = str2->yMax - str1->yMin;
    } else
    {
    	vertOverlap = 0;
    } 
    
599
600
601
602
603
604
605
606
607
    // Combine strings if:
    //  They appear to be the same font (complex mode only) && going in the same direction AND at least one of the following:
    //  1.  They appear to be part of the same line of text
    //  2.  They appear to be subsequent lines of a paragraph
    //  We assume (1) or (2) above, respectively, based on:
    //  (1)  strings overlap vertically AND
    //       horizontal space between end of str1 and start of str2 is consistent with a single space or less;
    //       when rawOrder, the strings have to overlap vertically by at least 50%
    //  (2)  Strings flow down the page, but the space between them is not too great, and they are lined up on the left
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
    if (
	(
	 (
	  (
	   (rawOrder && vertOverlap > 0.5 * space) 
	   ||
	   (!rawOrder && str2->yMin < str1->yMax)
	  ) &&
	  (horSpace > -0.5 * space && horSpace < space)
	 ) ||
       	 (vertSpace >= 0 && vertSpace < 0.5 * space && addLineBreak)
	) &&
	(!complexMode || (hfont1->isEqualIgnoreBold(*hfont2))) && // in complex mode fonts must be the same, in other modes fonts do not metter
	str1->dir == str2->dir // text direction the same
       ) 
    {
//      printf("yes\n");
      n = str1->len + str2->len;
626
      if ((addSpace = horSpace > wordBreakThreshold * space)) {
627
628
629
630
631
632
633
634
635
636
637
638
639
        ++n;
      }
      if (addLineBreak) {
        ++n;
      }
  
      str1->size = (n + 15) & ~15;
      str1->text = (Unicode *)grealloc(str1->text,
				       str1->size * sizeof(Unicode));
      str1->xRight = (double *)grealloc(str1->xRight,
					str1->size * sizeof(double));
      if (addSpace) {
		  str1->text[str1->len] = 0x20;
640
		  str1->htext->append(xml?" ":"&#160;");
641
642
643
644
645
		  str1->xRight[str1->len] = str2->xMin;
		  ++str1->len;
      }
      if (addLineBreak) {
	  str1->text[str1->len] = '\n';
646
	  str1->htext->append("<br/>");
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
	  str1->xRight[str1->len] = str2->xMin;
	  ++str1->len;
	  str1->yMin = str2->yMin;
	  str1->yMax = str2->yMax;
	  str1->xMax = str2->xMax;
	  int fontLineSize = hfont1->getLineSize();
	  int curLineSize = (int)(vertSpace + space); 
	  if( curLineSize != fontLineSize )
	  {
	      HtmlFont *newfnt = new HtmlFont(*hfont1);
	      newfnt->setLineSize(curLineSize);
	      str1->fontpos = fonts->AddFont(*newfnt);
	      delete newfnt;
	      hfont1 = getFont(str1);
	      // we have to reget hfont2 because it's location could have
	      // changed on resize
	      hfont2 = getFont(str2); 
	  }
      }
      for (i = 0; i < str2->len; ++i) {
	str1->text[str1->len] = str2->text[i];
	str1->xRight[str1->len] = str2->xRight[i];
	++str1->len;
      }

Boris Toloknov's avatar
Boris Toloknov committed
672
      /* fix <i>, <b> if str1 and str2 differ and handle switch of links */
673
674
      HtmlLink *hlink1 = str1->getLink();
      HtmlLink *hlink2 = str2->getLink();
Boris Toloknov's avatar
Boris Toloknov committed
675
676
677
678
679
680
681
682
683
      bool switch_links = !hlink1 || !hlink2 || !hlink1->isEqualDest(*hlink2);
      GBool finish_a = switch_links && hlink1 != NULL;
      GBool finish_italic = hfont1->isItalic() && ( !hfont2->isItalic() || finish_a );
      GBool finish_bold   = hfont1->isBold()   && ( !hfont2->isBold()   || finish_a || finish_italic );
      CloseTags( str1->htext, finish_a, finish_italic, finish_bold );
      if( switch_links && hlink2 != NULL ) {
        GooString *ls = hlink2->getLinkStart();
        str1->htext->append(ls);
        delete ls;
684
      }
Boris Toloknov's avatar
Boris Toloknov committed
685
686
687
688
689
      if( ( !hfont1->isItalic() || finish_italic ) && hfont2->isItalic() )
	    str1->htext->append("<i>", 3);
      if( ( !hfont1->isBold() || finish_bold ) && hfont2->isBold() )
	    str1->htext->append("<b>", 3);

690
691
692
693
694
695
696
697
698
699
700
701
702
703
704

      str1->htext->append(str2->htext);
      // str1 now contains href for link of str2 (if it is defined)
      str1->link = str2->link; 
      hfont1 = hfont2;
      if (str2->xMax > str1->xMax) {
	str1->xMax = str2->xMax;
      }
      if (str2->yMax > str1->yMax) {
	str1->yMax = str2->yMax;
      }
      str1->yxNext = str2->yxNext;
      delete str2;
    } else { // keep strings separate
//      printf("no\n"); 
Boris Toloknov's avatar
Boris Toloknov committed
705
706
707
708
      GBool finish_a = str1->getLink() != NULL;
      GBool finish_bold   = hfont1->isBold();
      GBool finish_italic = hfont1->isItalic();
      CloseTags( str1->htext, finish_a, finish_italic, finish_bold );
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
     
      str1->xMin = curX; str1->yMin = curY; 
      str1 = str2;
      curX = str1->xMin; curY = str1->yMin;
      hfont1 = hfont2;
      if( hfont1->isBold() )
	str1->htext->insert(0,"<b>",3);
      if( hfont1->isItalic() )
	str1->htext->insert(0,"<i>",3);
      if( str1->getLink() != NULL ) {
	GooString *ls = str1->getLink()->getLinkStart();
	str1->htext->insert(0, ls);
	delete ls;
      }
    }
  }
  str1->xMin = curX; str1->yMin = curY;
Boris Toloknov's avatar
Boris Toloknov committed
726
727
728
729
730

  GBool finish_bold   = hfont1->isBold();
  GBool finish_italic = hfont1->isItalic();
  GBool finish_a = str1->getLink() != NULL;
  CloseTags( str1->htext, finish_a, finish_italic, finish_bold );
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753

#if 0 //~ for debugging
  for (str1 = yxStrings; str1; str1 = str1->yxNext) {
    printf("x=%3d..%3d  y=%3d..%3d  size=%2d ",
	   (int)str1->xMin, (int)str1->xMax, (int)str1->yMin, (int)str1->yMax,
	   (int)(str1->yMax - str1->yMin));
    printf("'%s'\n", str1->htext->getCString());  
  }
  printf("\n------------------------------------------------------------\n\n");
#endif

}

void HtmlPage::dumpAsXML(FILE* f,int page){  
  fprintf(f, "<page number=\"%d\" position=\"absolute\"", page);
  fprintf(f," top=\"0\" left=\"0\" height=\"%d\" width=\"%d\">\n", pageHeight,pageWidth);
    
  for(int i=fontsPageMarker;i < fonts->size();i++) {
    GooString *fontCSStyle = fonts->CSStyle(i);
    fprintf(f,"\t%s\n",fontCSStyle->getCString());
    delete fontCSStyle;
  }
  
754
  int listlen=imgList->getLength();
755
  for (int i = 0; i < listlen; i++) {
756
    HtmlImage *img = (HtmlImage*)imgList->del(0);
757
758
759
760
761
762
    fprintf(f,"<image top=\"%d\" left=\"%d\" ",xoutRound(img->yMin),xoutRound(img->xMin));
    fprintf(f,"width=\"%d\" height=\"%d\" ",xoutRound(img->xMax-img->xMin),xoutRound(img->yMax-img->yMin));
    fprintf(f,"src=\"%s\"/>\n",img->fName->getCString());
    delete img;
  }

763
764
765
766
767
  for(HtmlString *tmp=yxStrings;tmp;tmp=tmp->yxNext){
    if (tmp->htext){
      fprintf(f,"<text top=\"%d\" left=\"%d\" ",xoutRound(tmp->yMin),xoutRound(tmp->xMin));
      fprintf(f,"width=\"%d\" height=\"%d\" ",xoutRound(tmp->xMax-tmp->xMin),xoutRound(tmp->yMax-tmp->yMin));
      fprintf(f,"font=\"%d\">", tmp->fontpos);
768
      fputs(tmp->htext->getCString(),f);
769
770
771
772
773
774
      fputs("</text>\n",f);
    }
  }
  fputs("</page>\n",f);
}

Ihar Filipau's avatar
Ihar Filipau committed
775
776
777
778
779
780
781
static void printCSS(FILE *f)
{
  // Image flip/flop CSS
  // Source:
  // http://stackoverflow.com/questions/1309055/cross-browser-way-to-flip-html-image-via-javascript-css
  // tested in Chrome, Fx (Linux) and IE9 (W7)
  static const char css[] = 
782
    "<style type=\"text/css\">" "\n"
Ihar Filipau's avatar
Ihar Filipau committed
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
    "<!--" "\n"
    ".xflip {" "\n"
    "    -moz-transform: scaleX(-1);" "\n"
    "    -webkit-transform: scaleX(-1);" "\n"
    "    -o-transform: scaleX(-1);" "\n"
    "    transform: scaleX(-1);" "\n"
    "    filter: fliph;" "\n"
    "}" "\n"
    ".yflip {" "\n"
    "    -moz-transform: scaleY(-1);" "\n"
    "    -webkit-transform: scaleY(-1);" "\n"
    "    -o-transform: scaleY(-1);" "\n"
    "    transform: scaleY(-1);" "\n"
    "    filter: flipv;" "\n"
    "}" "\n"
    ".xyflip {" "\n"
    "    -moz-transform: scaleX(-1) scaleY(-1);" "\n"
    "    -webkit-transform: scaleX(-1) scaleY(-1);" "\n"
    "    -o-transform: scaleX(-1) scaleY(-1);" "\n"
    "    transform: scaleX(-1) scaleY(-1);" "\n"
    "    filter: fliph + flipv;" "\n"
    "}" "\n"
    "-->" "\n"
806
    "</style>" "\n";
Ihar Filipau's avatar
Ihar Filipau committed
807
808
809
810

  fwrite( css, sizeof(css)-1, 1, f );
}

811
int HtmlPage::dumpComplexHeaders(FILE * const file, FILE *& pageFile, int page) {
812
813
814
815
816
817
  GooString* tmp;

  if( !noframes )
  {
      GooString* pgNum=GooString::fromInt(page);
      tmp = new GooString(DocName);
Albert Astals Cid's avatar
Albert Astals Cid committed
818
819
820
821
822
823
824
      if (!singleHtml){
            tmp->append('-')->append(pgNum)->append(".html");
            pageFile = fopen(tmp->getCString(), "w");
      } else {
            tmp->append("-html")->append(".html");
            pageFile = fopen(tmp->getCString(), "a");
      }
825
      delete pgNum;
Albert Astals Cid's avatar
Albert Astals Cid committed
826
      if (!pageFile) {
827
	  error(errIO, -1, "Couldn't open html file '{0:t}'", tmp);
828
	  delete tmp;
829
	  return 1;
830
831
      } 

Albert Astals Cid's avatar
Albert Astals Cid committed
832
      if (!singleHtml)
833
        fprintf(pageFile,"%s\n<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title>Page %d</title>\n\n", DOCTYPE, page);
Albert Astals Cid's avatar
Albert Astals Cid committed
834
      else
835
        fprintf(pageFile,"%s\n<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title>%s</title>\n\n", DOCTYPE, tmp->getCString());
Albert Astals Cid's avatar
Albert Astals Cid committed
836
837

      delete tmp;
838

839
      GooString *htmlEncoding = HtmlOutputDev::mapEncodingToHtml(globalParams->getTextEncodingName());
Albert Astals Cid's avatar
Albert Astals Cid committed
840
      if (!singleHtml)
841
        fprintf(pageFile, "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding->getCString());
Albert Astals Cid's avatar
Albert Astals Cid committed
842
      else
843
        fprintf(pageFile, "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n <br/>\n", htmlEncoding->getCString());
844
      delete htmlEncoding;
845
846
847
848
849
850
851
852
  }
  else 
  {
      pageFile = file;
      fprintf(pageFile,"<!-- Page %d -->\n", page);
      fprintf(pageFile,"<a name=\"%d\"></a>\n", page);
  } 
  
853
854
855
856
857
858
859
860
861
  return 0;
}

void HtmlPage::dumpComplex(FILE *file, int page){
  FILE* pageFile;
  GooString* tmp;

  if( firstPage == -1 ) firstPage = page; 
  
862
  if (dumpComplexHeaders(file, pageFile, page)) { error(errIO, -1, "Couldn't write headers."); return; }
863
864
865

  tmp=basename(DocName);
   
866
  fputs("<style type=\"text/css\">\n<!--\n",pageFile);
867
  fputs("\tp {margin: 0; padding: 0;}",pageFile);
868
  for(int i=fontsPageMarker;i!=fonts->size();i++) {
Albert Astals Cid's avatar
Albert Astals Cid committed
869
870
871
872
873
    GooString *fontCSStyle;
    if (!singleHtml)
         fontCSStyle = fonts->CSStyle(i);
    else
         fontCSStyle = fonts->CSStyle(i,page);
874
875
876
877
    fprintf(pageFile,"\t%s\n",fontCSStyle->getCString());
    delete fontCSStyle;
  }
 
878
  fputs("-->\n</style>\n",pageFile);
879
880
881
  
  if( !noframes )
  {  
882
      fputs("</head>\n<body bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n",pageFile); 
883
884
  }
  
885
  fprintf(pageFile,"<div id=\"page%d-div\" style=\"position:relative;width:%dpx;height:%dpx;\">\n",
886
887
      page, pageWidth, pageHeight);

888
889
890
  if( !ignore ) 
  {
    fprintf(pageFile,
891
	    "<img width=\"%d\" height=\"%d\" src=\"%s%03d.%s\" alt=\"background image\"/>\n",
892
893
894
895
896
897
898
899
900
	    pageWidth, pageHeight, tmp->getCString(), 
		(page-firstPage+1), imgExt->getCString());
  }
  
  delete tmp;
  
  for(HtmlString *tmp1=yxStrings;tmp1;tmp1=tmp1->yxNext){
    if (tmp1->htext){
      fprintf(pageFile,
901
	      "<p style=\"position:absolute;top:%dpx;left:%dpx;white-space:nowrap\" class=\"ft",
902
903
	      xoutRound(tmp1->yMin),
	      xoutRound(tmp1->xMin));
904
905
906
907
908
909
910
      if (!singleHtml) {
          fputc('0', pageFile);
      } else {
          fprintf(pageFile, "%d", page);
      }
      fprintf(pageFile,"%d\">", tmp1->fontpos);
      fputs(tmp1->htext->getCString(), pageFile);
911
      fputs("</p>\n", pageFile);
912
913
914
    }
  }

915
  fputs("</div>\n", pageFile);
916
917
918
  
  if( !noframes )
  {
919
      fputs("</body>\n</html>\n",pageFile);
920
921
922
923
924
925
926
      fclose(pageFile);
  }
}


void HtmlPage::dump(FILE *f, int pageNum) 
{
Albert Astals Cid's avatar
Albert Astals Cid committed
927
  if (complexMode || singleHtml)
928
929
930
931
932
933
  {
    if (xml) dumpAsXML(f, pageNum);
    if (!xml) dumpComplex(f, pageNum);  
  }
  else
  {
934
    fprintf(f,"<a name=%d></a>",pageNum);
935
    // Loop over the list of image names on this page
936
    int listlen=imgList->getLength();
937
    for (int i = 0; i < listlen; i++) {
938
      HtmlImage *img = (HtmlImage*)imgList->del(0);
Ihar Filipau's avatar
Ihar Filipau committed
939
940
941
942
943
944
945

      // see printCSS() for class names
      const char *styles[4] = { "", " class=\"xflip\"", " class=\"yflip\"", " class=\"xyflip\"" };
      int style_index=0;
      if (img->xMin > img->xMax) style_index += 1; // xFlip
      if (img->yMin > img->yMax) style_index += 2; // yFlip

946
      fprintf(f,"<img%s src=\"%s\"/><br/>\n",styles[style_index],img->fName->getCString());
947
      delete img;
948
    }
949
950
951
952
953
954
955

    GooString* str;
    for(HtmlString *tmp=yxStrings;tmp;tmp=tmp->yxNext){
      if (tmp->htext){
		str=new GooString(tmp->htext); 
		fputs(str->getCString(),f);
		delete str;      
956
		fputs("<br/>\n",f);
957
958
      }
    }
959
	fputs("<hr/>\n",f);  
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
  }
}



void HtmlPage::clear() {
  HtmlString *p1, *p2;

  if (curStr) {
    delete curStr;
    curStr = NULL;
  }
  for (p1 = yxStrings; p1; p1 = p2) {
    p2 = p1->yxNext;
    delete p1;
  }
  yxStrings = NULL;
  xyStrings = NULL;
  yxCur1 = yxCur2 = NULL;

  if( !noframes )
  {
      delete fonts;
      fonts=new HtmlFontAccu();
      fontsPageMarker = 0;
  }
  else
  {
      fontsPageMarker = fonts->size();
  }

  delete links;
  links=new HtmlLinks();
 

}

void HtmlPage::setDocName(char *fname){
  DocName=new GooString(fname);
}

1001
1002
1003
1004
1005
void HtmlPage::addImage(GooString *fname, GfxState *state) {
  HtmlImage *img = new HtmlImage(fname, state);
  imgList->append(img);
}

1006
1007
1008
1009
//------------------------------------------------------------------------
// HtmlMetaVar
//------------------------------------------------------------------------

1010
HtmlMetaVar::HtmlMetaVar(const char *_name, const char *_content)
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
{
    name = new GooString(_name);
    content = new GooString(_content);
}

HtmlMetaVar::~HtmlMetaVar()
{
   delete name;
   delete content;
} 
    
GooString* HtmlMetaVar::toString()	
{
1024
    GooString *result = new GooString("<meta name=\"");
1025
1026
1027
    result->append(name);
    result->append("\" content=\"");
    result->append(content);
1028
    result->append("\"/>");
1029
1030
1031
1032
1033
1034
1035
    return result;
}

//------------------------------------------------------------------------
// HtmlOutputDev
//------------------------------------------------------------------------

1036
static const char* HtmlEncodings[][2] = {
1037
1038
1039
1040
    {"Latin1", "ISO-8859-1"},
    {NULL, NULL}
};

1041
GooString* HtmlOutputDev::mapEncodingToHtml(GooString* encoding)
1042
{
1043
1044
1045
1046
  GooString* enc = encoding;
  for(int i = 0; HtmlEncodings[i][0] != NULL; i++)
  {
    if( enc->cmp(HtmlEncodings[i][0]) == 0 )
1047
    {
1048
1049
      delete enc;
      return new GooString(HtmlEncodings[i][1]);
1050
    }
1051
1052
  }
  return enc; 
1053
1054
1055
1056
}

void HtmlOutputDev::doFrame(int firstPage){
  GooString* fName=new GooString(Docname);
1057
  GooString* htmlEncoding;
1058
1059
  fName->append(".html");

1060
  if (!(fContentsFrame = fopen(fName->getCString(), "w"))){
1061
    error(errIO, -1, "Couldn't open html file '{0:t}'", fName);
1062
    delete fName;
1063
1064
1065
1066
1067
1068
    return;
  }
  
  delete fName;
    
  fName=basename(Docname);
1069
  fputs(DOCTYPE, fContentsFrame);
1070
1071
1072
  fputs("\n<html>",fContentsFrame);
  fputs("\n<head>",fContentsFrame);
  fprintf(fContentsFrame,"\n<title>%s</title>",docTitle->getCString());
1073
  htmlEncoding = mapEncodingToHtml(globalParams->getTextEncodingName());
1074
  fprintf(fContentsFrame, "\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding->getCString());
1075
  dumpMetaVars(fContentsFrame);
1076
1077
1078
1079
  fprintf(fContentsFrame, "</head>\n");
  fputs("<frameset cols=\"100,*\">\n",fContentsFrame);
  fprintf(fContentsFrame,"<frame name=\"links\" src=\"%s_ind.html\"/>\n",fName->getCString());
  fputs("<frame name=\"contents\" src=",fContentsFrame); 
1080
1081
1082
1083
1084
  if (complexMode) 
      fprintf(fContentsFrame,"\"%s-%d.html\"",fName->getCString(), firstPage);
  else
      fprintf(fContentsFrame,"\"%ss.html\"",fName->getCString());
  
1085
  fputs("/>\n</frameset>\n</html>\n",fContentsFrame);
1086
1087
 
  delete fName;
1088
  delete htmlEncoding;
1089
1090
1091
  fclose(fContentsFrame);  
}

1092
HtmlOutputDev::HtmlOutputDev(Catalog *catalogA, char *fileName, char *title, 
1093
1094
1095
1096
	char *author, char *keywords, char *subject, char *date,
	char *extension,
	GBool rawOrder, int firstPage, GBool outline) 
{
1097
  catalog = catalogA;
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
  fContentsFrame = NULL;
  docTitle = new GooString(title);
  pages = NULL;
  dumpJPEG=gTrue;
  //write = gTrue;
  this->rawOrder = rawOrder;
  this->doOutline = outline;
  ok = gFalse;
  //this->firstPage = firstPage;
  //pageNum=firstPage;
  // open file
  needClose = gFalse;
  pages = new HtmlPage(rawOrder, extension);
  
  glMetaVars = new GooList();
  glMetaVars->append(new HtmlMetaVar("generator", "pdftohtml 0.36"));  
  if( author ) glMetaVars->append(new HtmlMetaVar("author", author));  
  if( keywords ) glMetaVars->append(new HtmlMetaVar("keywords", keywords));  
  if( date ) glMetaVars->append(new HtmlMetaVar("date", date));  
  if( subject ) glMetaVars->append(new HtmlMetaVar("subject", subject));
 
  maxPageWidth = 0;
  maxPageHeight = 0;

  pages->setDocName(fileName);
  Docname=new GooString (fileName);

  // for non-xml output (complex or simple) with frames generate the left frame
  if(!xml && !noframes)
  {
Albert Astals Cid's avatar
Albert Astals Cid committed
1128
1129
1130
1131
1132
1133
1134
1135
1136
     if (!singleHtml)
     {
         GooString* left=new GooString(fileName);
         left->append("_ind.html");

         doFrame(firstPage);

         if (!(fContentsFrame = fopen(left->getCString(), "w")))
         {
1137
             error(errIO, -1, "Couldn't open html file '{0:t}'", left);
Albert Astals Cid's avatar
Albert Astals Cid committed
1138
1139
1140
1141
1142
             delete left;
             return;
         }
         delete left;
         fputs(DOCTYPE, fContentsFrame);
1143
         fputs("<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title></title>\n</head>\n<body>\n", fContentsFrame);
Albert Astals Cid's avatar
Albert Astals Cid committed
1144
1145
1146
1147

         if (doOutline)
         {
             GooString *str = basename(Docname);
1148
             fprintf(fContentsFrame, "<a href=\"%s%s\" target=\"contents\">Outline</a><br/>", str->getCString(), complexMode ? "-outline.html" : "s.html#outline");
Albert Astals Cid's avatar
Albert Astals Cid committed
1149
1150
             delete str;
         }
1151
1152
1153
1154
1155
1156
1157
     }
	if (!complexMode)
	{	/* not in complex mode */
		
       GooString* right=new GooString(fileName);
       right->append("s.html");

1158
       if (!(page=fopen(right->getCString(),"w"))){
1159
        error(errIO, -1, "Couldn't open html file '{0:t}'", right);
1160
1161
1162
1163
1164
        delete right;
		return;
       }
       delete right;
       fputs(DOCTYPE, page);
1165
       fputs("<html>\n<head>\n<title></title>\n",page);
Ihar Filipau's avatar
Ihar Filipau committed
1166
       printCSS(page);
1167
       fputs("</head>\n<body>\n",page);
1168
1169
1170
1171
1172
1173
1174
1175
1176
     }
  }

  if (noframes) {
    if (stout) page=stdout;
    else {
      GooString* right=new GooString(fileName);
      if (!xml) right->append(".html");
      if (xml) right->append(".xml");
1177
      if (!(page=fopen(right->getCString(),"w"))){
1178
	error(errIO, -1, "Couldn't open html file '{0:t}'", right);
1179
	delete right;
1180
1181
1182
1183
1184
	return;
      }  
      delete right;
    }

1185
    GooString *htmlEncoding = mapEncodingToHtml(globalParams->getTextEncodingName()); 
1186
1187
    if (xml) 
    {
1188
      fprintf(page, "<?xml version=\"1.0\" encoding=\"%s\"?>\n", htmlEncoding->getCString());
1189
      fputs("<!DOCTYPE pdf2xml SYSTEM \"pdf2xml.dtd\">\n\n", page);
1190
      fprintf(page,"<pdf2xml producer=\"%s\" version=\"%s\">\n", PACKAGE_NAME, PACKAGE_VERSION);
1191
1192
1193
    } 
    else 
    {
1194
      fprintf(page,"%s\n<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title>%s</title>\n", DOCTYPE, docTitle->getCString());
1195
      
1196
      fprintf(page, "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding->getCString());
1197
1198
      
      dumpMetaVars(page);
Ihar Filipau's avatar
Ihar Filipau committed
1199
      printCSS(page);
1200
1201
      fprintf(page,"</head>\n");
      fprintf(page,"<body bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n");
1202
    }
1203
    delete htmlEncoding;
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
  }
  ok = gTrue; 
}

HtmlOutputDev::~HtmlOutputDev() {
    HtmlFont::clear(); 
    
    delete Docname;
    delete docTitle;

    deleteGooList(glMetaVars, HtmlMetaVar);

    if (fContentsFrame){
1217
      fputs("</body>\n</html>\n",fContentsFrame);  
1218
1219
      fclose(fContentsFrame);
    }
1220
1221
1222
1223
1224
1225
1226
    if (page != NULL) {
      if (xml) {
        fputs("</pdf2xml>\n",page);  
        fclose(page);
      } else
      if ( !complexMode || xml || noframes )
      { 
1227
        fputs("</body>\n</html>\n",page);  
1228
1229
        fclose(page);
      }
1230
1231
1232
1233
1234
    }
    if (pages)
      delete pages;
}

Thomas Freitag's avatar
Thomas Freitag committed
1235
void HtmlOutputDev::startPage(int pageNum, GfxState *state, XRef *xref) {
1236
1237
#if 0
  if (mode&&!xml){
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
    if (write){
      write=gFalse;
      GooString* fname=Dirname(Docname);
      fname->append("image.log");
      if((tin=fopen(getFileNameFromPath(fname->getCString(),fname->getLength()),"w"))==NULL){
	printf("Error : can not open %s",fname);
	exit(1);
      }
      delete fname;
    // if(state->getRotation()!=0) 
    //  fprintf(tin,"ROTATE=%d rotate %d neg %d neg translate\n",state->getRotation(),state->getX1(),-state->getY1());
    // else 
      fprintf(tin,"ROTATE=%d neg %d neg translate\n",state->getX1(),state->getY1());  
    }
1252
1253
  }
#endif
1254
1255
1256
1257
1258
1259
1260
1261
1262

  this->pageNum = pageNum;
  GooString *str=basename(Docname);
  pages->clear(); 
  if(!noframes)
  {
    if (fContentsFrame)
	{
      if (complexMode)
1263
		fprintf(fContentsFrame,"<a href=\"%s-%d.html\"",str->getCString(),pageNum);
1264
      else 
1265
		fprintf(fContentsFrame,"<a href=\"%ss.html#%d\"",str->getCString(),pageNum);
1266
      fprintf(fContentsFrame," target=\"contents\" >Page %d</a><br/>\n",pageNum);
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
    }
  }

  pages->pageWidth=static_cast<int>(state->getPageWidth());
  pages->pageHeight=static_cast<int>(state->getPageHeight());

  delete str;
} 


void HtmlOutputDev::endPage() {
1278
  Links *linksList = docPage->getLinks();
1279
1280
  for (int i = 0; i < linksList->getNumLinks(); ++i)
  {
Albert Astals Cid's avatar
Albert Astals Cid committed
1281
      doProcessLink(linksList->getLink(i));
1282
  }
Tomas Are Haavet's avatar
Tomas Are Haavet committed
1283
  delete linksList;
1284

1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
  pages->conv();
  pages->coalesce();
  pages->dump(page, pageNum);
  
  // I don't yet know what to do in the case when there are pages of different
  // sizes and we want complex output: running ghostscript many times 
  // seems very inefficient. So for now I'll just use last page's size
  maxPageWidth = pages->pageWidth;
  maxPageHeight = pages->pageHeight;
  
1295
  //if(!noframes&&!xml) fputs("<br/>\n", fContentsFrame);
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
  if(!stout && !globalParams->getErrQuiet()) printf("Page-%d\n",(pageNum));
}

void HtmlOutputDev::updateFont(GfxState *state) {
  pages->updateFont(state);
}

void HtmlOutputDev::beginString(GfxState *state, GooString *s) {
  pages->beginString(state, s);
}

void HtmlOutputDev::endString(GfxState *state) {
  pages->endString();
}

void HtmlOutputDev::drawChar(GfxState *state, double x, double y,
	      double dx, double dy,
	      double originX, double originY,
Albert Astals Cid's avatar
Albert Astals Cid committed
1314
	      CharCode code, int /*nBytes*/, Unicode *u, int uLen) 
1315
1316
1317
1318
1319
1320
1321
{
  if ( !showHidden && (state->getRender() & 3) == 3) {
    return;
  }
  pages->addChar(state, x, y, dx, dy, originX, originY, u, uLen);
}

1322
1323
1324
1325
1326
1327
void HtmlOutputDev::drawJpegImage(GfxState *state, Stream *str)
{
  FILE *f1;
  int c;

  // open the image file
1328
  GooString *fName=createImageFileName("jpg");
1329
  if (!(f1 = fopen(fName->getCString(), "wb"))) {
1330
    error(errIO, -1, "Couldn't open image file '%s'", fName->getCString());
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
    delete fName;
    return;
  }

  // initialize stream
  str = str->getNextStream();
  str->reset();

  // copy the stream
  while ((c = str->getChar()) != EOF)
    fputc(c, f1);

  fclose(f1);

  if (fName) {
      pages->addImage(fName, state);
  }
}

1350
1351
1352
1353
1354
void HtmlOutputDev::drawPngImage(GfxState *state, Stream *str, int width, int height,
                                 GfxImageColorMap *colorMap, GBool isMask)
{
#ifdef ENABLE_LIBPNG
  FILE *f1;
1355

1356
1357
  if (!colorMap && !isMask) {
    error(errInternal, -1, "Can't have color image without a color map");
1358
1359
1360
    return;
  }

1361
1362
1363
1364
1365
  // open the image file
  GooString *fName=createImageFileName("png");
  if (!(f1 = fopen(fName->getCString(), "wb"))) {
    error(errIO, -1, "Couldn't open image file '%s'", fName->getCString());
    delete fName;
1366
1367
    return;
  }
1368
1369
1370
1371
1372
1373
1374
1375

  PNGWriter *writer = new PNGWriter( isMask ? PNGWriter::MONOCHROME : PNGWriter::RGB );
  // TODO can we calculate the resolution of the image?
  if (!writer->init(f1, width, height, 72, 72)) {
    error(errInternal, -1, "Can't init PNG for image '%s'", fName->getCString());
    delete writer;
    fclose(f1);
    return;
1376
  }
1377
1378

  if (!isMask) {
1379
1380
    Guchar *p;
    GfxRGB rgb;
1381
    png_byte *row = (png_byte *) gmalloc(3 * width);   // 3 bytes/pixel: RGB
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
    png_bytep *row_pointer= &row;

    // Initialize the image stream
    ImageStream *imgStr = new ImageStream(str, width,
                        colorMap->getNumPixelComps(), colorMap->getBits());
    imgStr->reset();

    // For each line...
    for (int y = 0; y < height; y++) {

      // Convert into a PNG row
      p = imgStr->getLine();
      for (int x = 0; x < width; x++) {
        colorMap->getRGB(p, &rgb);
1396
1397
1398
1399
1400
        // Write the RGB pixels into the row
        row[3*x]= colToByte(rgb.r);
        row[3*x+1]= colToByte(rgb.g);
        row[3*x+2]= colToByte(rgb.b);
        p += colorMap->getNumPixelComps();
1401
1402
      }

1403
      if (!writer->writeRow(row_pointer)) {
1404
        error(errIO, -1, "Failed to write into PNG '%s'", fName->getCString());
1405
        delete writer;
1406
        delete imgStr;
1407
        fclose(f1);
1408
1409
1410
        return;
      }
    }
1411
1412
1413
1414
1415
    gfree(row);
    imgStr->close();
    delete imgStr;
  }
  else { // isMask == true
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
    int size = (width + 7)/8;

    // PDF masks use 0 = draw current color, 1 = leave unchanged.
    // We invert this to provide the standard interpretation of alpha
    // (0 = transparent, 1 = opaque). If the colorMap already inverts
    // the mask we leave the data unchanged.
    int invert_bits = 0xff;
    if (colorMap) {
      GfxGray gray;
      Guchar zero = 0;
      colorMap->getGray(&zero, &gray);
      if (colToByte(gray) == 0)
        invert_bits = 0x00;
    }
1430

1431
1432
    str->reset();
    Guchar *png_row = (Guchar *)gmalloc(size);
1433
1434
1435

    for (int ri = 0; ri < height; ++ri)
    {
1436
1437
      for(int i = 0; i < size; i++)
        png_row[i] = str->getChar() ^ invert_bits;
1438

1439
1440
1441
1442
1443
1444
1445
1446
1447
      if (!writer->writeRow( &png_row ))
      {
        error(errIO, -1, "Failed to write into PNG '%s'", fName->getCString());
        delete writer;
        fclose(f1);
        gfree(png_row);
        return;
      }
    }
1448
    str->close();