Commit e8e95d2c authored by Sanchit Anand's avatar Sanchit Anand Committed by Albert Astals Cid

pdftotext: Fix only outputs first page content with -bbox-layout option

Issue #88
parent 09cc5fd1
......@@ -37,6 +37,7 @@
// Copyright (C) 2013 Ed Catmur <ed@catmur.co.uk>
// Copyright (C) 2016 Khaled Hosny <khaledhosny@eglug.org>
// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
// Copyright (C) 2018 Sanchit Anand <sanxchit@gmail.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
......@@ -5878,3 +5879,7 @@ TextPage *TextOutputDev::takeText() {
text = new TextPage(rawOrder);
return ret;
}
TextFlow *TextOutputDev::getFlows() {
return text->getFlows();
}
......@@ -22,6 +22,7 @@
// Copyright (C) 2012, 2013, 2015, 2016 Jason Crain <jason@aquaticape.us>
// Copyright (C) 2013 Thomas Freitag <Thomas.Freitag@alfa.de>
// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
// Copyright (C) 2018 Sanchit Anand <sanxchit@gmail.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
......@@ -890,6 +891,10 @@ public:
// Turn extra processing for HTML conversion on or off.
void enableHTMLExtras(GBool doHTMLA) { doHTML = doHTMLA; }
// Get the head of the linked list of TextFlows for the
// last rasterized page.
TextFlow *getFlows();
private:
TextOutputFunc outputFunc; // output function
......
......@@ -28,6 +28,7 @@
// Copyright (C) 2017 Adrian Johnson <ajohnson@redneon.com>
// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
// Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
// Copyright (C) 2018 Sanchit Anand <sanxchit@gmail.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
......@@ -511,7 +512,6 @@ static void printLine(FILE *f, TextLine *line) {
void printDocBBox(FILE *f, PDFDoc *doc, TextOutputDev *textOut, int first, int last) {
double xMin, yMin, xMax, yMax;
TextPage *textPage;
TextFlow *flow;
TextBlock *blk;
TextLine *line;
......@@ -520,8 +520,7 @@ void printDocBBox(FILE *f, PDFDoc *doc, TextOutputDev *textOut, int first, int l
for (int page = first; page <= last; ++page) {
fprintf(f, " <page width=\"%f\" height=\"%f\">\n",doc->getPageMediaWidth(page), doc->getPageMediaHeight(page));
doc->displayPage(textOut, page, resolution, resolution, 0, gTrue, gFalse, gFalse);
textPage = textOut->takeText();
for (flow = textPage->getFlows(); flow; flow = flow->getNext()) {
for (flow = textOut->getFlows(); flow; flow = flow->getNext()) {
fprintf(f, " <flow>\n");
for (blk = flow->getBlocks(); blk; blk = blk->getNext()) {
blk->getBBox(&xMin, &yMin, &xMax, &yMax);
......@@ -534,7 +533,6 @@ void printDocBBox(FILE *f, PDFDoc *doc, TextOutputDev *textOut, int first, int l
fprintf(f, " </flow>\n");
}
fprintf(f, " </page>\n");
textPage->decRefCnt();
}
fprintf(f, "</doc>\n");
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment