Commit bcd89bc0 authored by Thibaut Brard's avatar Thibaut Brard Committed by Albert Astals Cid

pdftohtml: Add option to not round coordinates

when outputing as xml
parent 6ef17493
......@@ -41,6 +41,7 @@
// Copyright (C) 2016 Vincent Le Garrec <legarrec.vincent@gmail.com>
// Copyright (C) 2017 Caolán McNamara <caolanm@redhat.com>
// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
// Copyright (C) 2018 Thibaut Brard <thibaut.brard@gmail.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
......@@ -109,6 +110,7 @@ extern GBool printHtml;
extern GBool noframes;
extern GBool stout;
extern GBool xml;
extern GBool noRoundedCoordinates;
extern GBool showHidden;
extern GBool noMerge;
......@@ -760,16 +762,28 @@ void HtmlPage::dumpAsXML(FILE* f,int page){
int listlen=imgList->getLength();
for (int i = 0; i < listlen; i++) {
HtmlImage *img = (HtmlImage*)imgList->del(0);
fprintf(f,"<image top=\"%d\" left=\"%d\" ",xoutRound(img->yMin),xoutRound(img->xMin));
fprintf(f,"width=\"%d\" height=\"%d\" ",xoutRound(img->xMax-img->xMin),xoutRound(img->yMax-img->yMin));
if (!noRoundedCoordinates) {
fprintf(f, "<image top=\"%d\" left=\"%d\" ", xoutRound(img->yMin), xoutRound(img->xMin));
fprintf(f, "width=\"%d\" height=\"%d\" ", xoutRound(img->xMax - img->xMin), xoutRound(img->yMax - img->yMin));
}
else {
fprintf(f, "<image top=\"%f\" left=\"%f\" ", img->yMin, img->xMin);
fprintf(f, "width=\"%f\" height=\"%f\" ", img->xMax - img->xMin, img->yMax - img->yMin);
}
fprintf(f,"src=\"%s\"/>\n",img->fName->getCString());
delete img;
}
for(HtmlString *tmp=yxStrings;tmp;tmp=tmp->yxNext){
if (tmp->htext){
fprintf(f,"<text top=\"%d\" left=\"%d\" ",xoutRound(tmp->yMin),xoutRound(tmp->xMin));
fprintf(f,"width=\"%d\" height=\"%d\" ",xoutRound(tmp->xMax-tmp->xMin),xoutRound(tmp->yMax-tmp->yMin));
if (!noRoundedCoordinates) {
fprintf(f, "<text top=\"%d\" left=\"%d\" ", xoutRound(tmp->yMin), xoutRound(tmp->xMin));
fprintf(f, "width=\"%d\" height=\"%d\" ", xoutRound(tmp->xMax - tmp->xMin), xoutRound(tmp->yMax - tmp->yMin));
}
else {
fprintf(f, "<text top=\"%f\" left=\"%f\" ", tmp->yMin, tmp->xMin);
fprintf(f, "width=\"%f\" height=\"%f\" ", tmp->xMax - tmp->xMin, tmp->yMax - tmp->yMin);
}
fprintf(f,"font=\"%d\">", tmp->fontpos);
fputs(tmp->htext->getCString(),f);
fputs("</text>\n",f);
......
......@@ -58,6 +58,9 @@ zoom the PDF document (default 1.5)
.B \-xml
output for XML post-processing
.TP
.B \-noRoundedCoordinates
do not round coordinates (with XML output only)
.TP
.B \-enc <string>
output text encoding name
.TP
......
......@@ -26,6 +26,7 @@
// Copyright (C) 2015 William Bader <williambader@hotmail.com>
// Copyright (C) 2017 Adrian Johnson <ajohnson@redneon.com>
// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
// Copyright (C) 2018 Thibaut Brard <thibaut.brard@gmail.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
......@@ -81,6 +82,7 @@ static double scale=1.5;
GBool noframes=gFalse;
GBool stout=gFalse;
GBool xml=gFalse;
GBool noRoundedCoordinates = gFalse;
static GBool errQuiet=gFalse;
static GBool noDrm=gFalse;
double wordBreakThreshold=10; // 10%, below converted into a coefficient - 0.1
......@@ -130,6 +132,8 @@ static const ArgDesc argDesc[] = {
"zoom the pdf document (default 1.5)"},
{"-xml", argFlag, &xml, 0,
"output for XML post-processing"},
{"-noroundcoord", argFlag, &noRoundedCoordinates, 0,
"do not round coordinates (with XML output only)"},
{"-hidden", argFlag, &showHidden, 0,
"output hidden text"},
{"-nomerge", argFlag, &noMerge, 0,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment