Commit 91ab53fa authored by Greg Knight's avatar Greg Knight Committed by Albert Astals Cid

pdftohtml: add support for dataUrls argument

eliminate the 'extension' field used to regenerate background images; replace with a list of background images
parent 44da4d78
......@@ -60,6 +60,8 @@
#include "goo/GooString.h"
#include "goo/gbasename.h"
#include "goo/GooList.h"
#include "goo/gbase64.h"
#include "goo/gbasename.h"
#include "UnicodeMap.h"
#include "goo/gmem.h"
#include "Error.h"
......@@ -71,6 +73,7 @@
#include "HtmlOutputDev.h"
#include "HtmlFonts.h"
#include "HtmlUtils.h"
#include "InMemoryFile.h"
#include "Outline.h"
#include "PDFDoc.h"
......@@ -102,6 +105,7 @@ static inline bool IS_CLOSER(float x, float y, float z) { return fabs((x)-(y)) <
extern bool complexMode;
extern bool singleHtml;
extern bool dataUrls;
extern bool ignore;
extern bool printCommands;
extern bool printHtml;
......@@ -267,7 +271,7 @@ void HtmlString::endString()
// HtmlPage
//------------------------------------------------------------------------
HtmlPage::HtmlPage(bool rawOrder, const char *imgExtVal) {
HtmlPage::HtmlPage(bool rawOrder) {
this->rawOrder = rawOrder;
curStr = nullptr;
yxStrings = nullptr;
......@@ -281,7 +285,6 @@ HtmlPage::HtmlPage(bool rawOrder, const char *imgExtVal) {
fontsPageMarker = 0;
DocName=nullptr;
firstPage = -1;
imgExt = new GooString(imgExtVal);
}
HtmlPage::~HtmlPage() {
......@@ -289,7 +292,6 @@ HtmlPage::~HtmlPage() {
delete DocName;
delete fonts;
delete links;
delete imgExt;
deleteGooList<HtmlImage>(imgList);
}
......@@ -849,14 +851,12 @@ int HtmlPage::dumpComplexHeaders(FILE * const file, FILE *& pageFile, int page)
return 0;
}
void HtmlPage::dumpComplex(FILE *file, int page){
void HtmlPage::dumpComplex(FILE *file, int page, const std::vector<std::string>& backgroundImages) {
FILE* pageFile;
if( firstPage == -1 ) firstPage = page;
if (dumpComplexHeaders(file, pageFile, page)) { error(errIO, -1, "Couldn't write headers."); return; }
const std::string str = gbasename(DocName->c_str());
fputs("<style type=\"text/css\">\n<!--\n",pageFile);
fputs("\tp {margin: 0; padding: 0;}",pageFile);
......@@ -880,12 +880,11 @@ void HtmlPage::dumpComplex(FILE *file, int page){
fprintf(pageFile,"<div id=\"page%d-div\" style=\"position:relative;width:%dpx;height:%dpx;\">\n",
page, pageWidth, pageHeight);
if( !ignore )
if(!ignore && (size_t) (page - firstPage) < backgroundImages.size())
{
fprintf(pageFile,
"<img width=\"%d\" height=\"%d\" src=\"%s%03d.%s\" alt=\"background image\"/>\n",
pageWidth, pageHeight, str.c_str(),
(page-firstPage+1), imgExt->c_str());
"<img width=\"%d\" height=\"%d\" src=\"%s\" alt=\"background image\"/>\n",
pageWidth, pageHeight, backgroundImages[page - firstPage].c_str());
}
for(HtmlString *tmp1=yxStrings;tmp1;tmp1=tmp1->yxNext){
......@@ -915,12 +914,12 @@ void HtmlPage::dumpComplex(FILE *file, int page){
}
void HtmlPage::dump(FILE *f, int pageNum)
void HtmlPage::dump(FILE *f, int pageNum, const std::vector<std::string>& backgroundImages)
{
if (complexMode || singleHtml)
{
if (xml) dumpAsXML(f, pageNum);
if (!xml) dumpComplex(f, pageNum);
if (!xml) dumpComplex(f, pageNum, backgroundImages);
}
else
{
......@@ -1083,7 +1082,6 @@ void HtmlOutputDev::doFrame(int firstPage){
HtmlOutputDev::HtmlOutputDev(Catalog *catalogA, const char *fileName, const char *title,
const char *author, const char *keywords, const char *subject, const char *date,
const char *extension,
bool rawOrder, int firstPage, bool outline)
{
catalog = catalogA;
......@@ -1099,7 +1097,7 @@ HtmlOutputDev::HtmlOutputDev(Catalog *catalogA, const char *fileName, const char
//pageNum=firstPage;
// open file
needClose = false;
pages = new HtmlPage(rawOrder, extension);
pages = new HtmlPage(rawOrder);
glMetaVars = new GooList();
glMetaVars->push_back(new HtmlMetaVar("generator", "pdftohtml 0.36"));
......@@ -1107,7 +1105,7 @@ HtmlOutputDev::HtmlOutputDev(Catalog *catalogA, const char *fileName, const char
if( keywords ) glMetaVars->push_back(new HtmlMetaVar("keywords", keywords));
if( date ) glMetaVars->push_back(new HtmlMetaVar("date", date));
if( subject ) glMetaVars->push_back(new HtmlMetaVar("subject", subject));
maxPageWidth = 0;
maxPageHeight = 0;
......@@ -1272,7 +1270,7 @@ void HtmlOutputDev::endPage() {
pages->conv();
pages->coalesce();
pages->dump(page, pageNum);
pages->dump(page, pageNum, backgroundImages);
// I don't yet know what to do in the case when there are pages of different
// sizes and we want complex output: running ghostscript many times
......@@ -1284,6 +1282,10 @@ void HtmlOutputDev::endPage() {
if(!stout && !globalParams->getErrQuiet()) printf("Page-%d\n",(pageNum));
}
void HtmlOutputDev::addBackgroundImage(const std::string& img) {
backgroundImages.push_back(img);
}
void HtmlOutputDev::updateFont(GfxState *state) {
pages->updateFont(state);
}
......@@ -1309,12 +1311,14 @@ void HtmlOutputDev::drawChar(GfxState *state, double x, double y,
void HtmlOutputDev::drawJpegImage(GfxState *state, Stream *str)
{
FILE *f1;
InMemoryFile ims;
FILE *f1 = nullptr;
int c;
// open the image file
GooString *fName=createImageFileName("jpg");
if (!(f1 = fopen(fName->c_str(), "wb"))) {
GooString *fName = createImageFileName("jpg");
f1 = dataUrls ? ims.open("wb") : fopen(fName->c_str(), "wb");
if (!f1) {
error(errIO, -1, "Couldn't open image file '{0:t}'", fName);
delete fName;
return;
......@@ -1330,9 +1334,11 @@ void HtmlOutputDev::drawJpegImage(GfxState *state, Stream *str)
fclose(f1);
if (fName) {
pages->addImage(fName, state);
if (dataUrls) {
delete fName;
fName = new GooString(std::string("data:image/jpeg;base64,") + gbase64Encode(ims.getBuffer()));
}
pages->addImage(fName, state);
}
void HtmlOutputDev::drawPngImage(GfxState *state, Stream *str, int width, int height,
......@@ -1340,6 +1346,7 @@ void HtmlOutputDev::drawPngImage(GfxState *state, Stream *str, int width, int he
{
#ifdef ENABLE_LIBPNG
FILE *f1;
InMemoryFile ims;
if (!colorMap && !isMask) {
error(errInternal, -1, "Can't have color image without a color map");
......@@ -1348,7 +1355,8 @@ void HtmlOutputDev::drawPngImage(GfxState *state, Stream *str, int width, int he
// open the image file
GooString *fName=createImageFileName("png");
if (!(f1 = fopen(fName->c_str(), "wb"))) {
f1 = dataUrls ? ims.open("wb") : fopen(fName->c_str(), "wb");
if (!f1) {
error(errIO, -1, "Couldn't open image file '{0:t}'", fName);
delete fName;
return;
......@@ -1453,6 +1461,10 @@ void HtmlOutputDev::drawPngImage(GfxState *state, Stream *str, int width, int he
delete writer;
fclose(f1);
if (dataUrls) {
delete fName;
fName = new GooString(std::string("data:image/png;base64,") + gbase64Encode(ims.getBuffer()));
}
pages->addImage(fName, state);
#else
return;
......@@ -1461,16 +1473,7 @@ void HtmlOutputDev::drawPngImage(GfxState *state, Stream *str, int width, int he
GooString *HtmlOutputDev::createImageFileName(const char *ext)
{
GooString *fName=new GooString(Docname);
fName->append("-");
GooString *pgNum= GooString::fromInt(pageNum);
GooString *imgnum= GooString::fromInt(pages->getNumImages()+1);
fName->append(pgNum)->append("_")->append(imgnum)->append(".")->append(ext);
delete pgNum;
delete imgnum;
return fName;
return GooString::format("{0:s}-{1:d}_{2:d}.{3:s}", Docname->c_str(), pageNum, pages->getNumImages() + 1, ext);
}
void HtmlOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str,
......
......@@ -36,6 +36,7 @@
#include <stdio.h>
#include "goo/GooList.h"
#include "goo/gbasename.h"
#include "GfxFont.h"
#include "OutputDev.h"
#include "HtmlLinks.h"
......@@ -63,7 +64,6 @@ enum UnicodeTextDirection {
textDirTopBottom
};
class HtmlString {
public:
......@@ -116,7 +116,7 @@ class HtmlPage {
public:
// Constructor.
HtmlPage(bool rawOrder, const char *imgExtVal);
HtmlPage(bool rawOrder);
// Destructor.
~HtmlPage();
......@@ -159,7 +159,7 @@ public:
// number of images on the current page
int getNumImages() { return imgList->getLength(); }
void dump(FILE *f, int pageNum);
void dump(FILE *f, int pageNum, const std::vector<std::string>& backgroundImages);
// Clear the page.
void clear();
......@@ -179,7 +179,7 @@ private:
void setDocName(const char* fname);
void dumpAsXML(FILE* f,int page);
void dumpComplex(FILE* f, int page);
void dumpComplex(FILE* f, int page, const std::vector<std::string>& backgroundImages);
int dumpComplexHeaders(FILE * const file, FILE *& pageFile, int page);
// marks the position of the fonts that belong to current page (for noframes)
......@@ -189,7 +189,6 @@ private:
GooList *imgList;
GooString *DocName;
GooString *imgExt;
int pageWidth;
int pageHeight;
int firstPage; // used to begin the numeration of pages
......@@ -234,7 +233,6 @@ public:
const char *keywords,
const char *subject,
const char *date,
const char *extension,
bool rawOrder,
int firstPage = 1,
bool outline = 0);
......@@ -283,6 +281,10 @@ public:
// End a page.
void endPage() override;
// add a background image to the list of background images,
// as this seems to be done outside other processing. takes ownership of img.
void addBackgroundImage(const std::string& img);
//----- update text state
void updateFont(GfxState *state) override;
......@@ -345,6 +347,7 @@ private:
GooList *glMetaVars;
Catalog *catalog;
Page *docPage;
std::vector<std::string> backgroundImages;
friend class HtmlPage;
};
......
......@@ -46,6 +46,8 @@
#include <time.h>
#include "parseargs.h"
#include "goo/GooString.h"
#include "goo/gbase64.h"
#include "goo/gbasename.h"
#include "goo/gmem.h"
#include "Object.h"
#include "Stream.h"
......@@ -68,6 +70,7 @@
#include "DateInfo.h"
#include "goo/gfile.h"
#include "Win32Console.h"
#include "InMemoryFile.h"
static int firstPage = 1;
static int lastPage = 0;
......@@ -77,6 +80,7 @@ static bool printHelp = false;
bool printHtml = false;
bool complexMode=false;
bool singleHtml=false; // singleHtml
bool dataUrls = false;
bool ignore=false;
static char extension[5]="png";
static double scale=1.5;
......@@ -123,6 +127,10 @@ static const ArgDesc argDesc[] = {
"generate complex document"},
{"-s", argFlag, &singleHtml, 0,
"generate single document that includes all pages"},
#ifdef HAVE_IN_MEMORY_FILE
{"-dataurls", argFlag, &dataUrls, 0,
"use data URLs instead of external images in HTML"},
#endif
{"-i", argFlag, &ignore, 0,
"ignore images"},
{"-noframes", argFlag, &noframes, 0,
......@@ -366,7 +374,6 @@ int main(int argc, char *argv[]) {
keywords ? keywords->c_str() : nullptr,
subject ? subject->c_str() : nullptr,
date ? date->c_str() : nullptr,
extension,
rawOrder,
firstPage,
doOutline);
......@@ -387,13 +394,6 @@ int main(int argc, char *argv[]) {
{
delete date;
}
if (htmlOut->isOk())
{
doc->displayPages(htmlOut, firstPage, lastPage, 72 * scale, 72 * scale, 0,
true, false, false);
htmlOut->dumpDocOutline(doc);
}
if ((complexMode || singleHtml) && !xml && !ignore) {
#ifdef HAVE_SPLASH
......@@ -409,6 +409,7 @@ int main(int argc, char *argv[]) {
splashOut->startDoc(doc);
for (int pg = firstPage; pg <= lastPage; ++pg) {
InMemoryFile imf;
doc->displayPage(splashOut, pg,
72 * scale, 72 * scale,
0, true, false, false);
......@@ -416,10 +417,22 @@ int main(int argc, char *argv[]) {
imgFileName = GooString::format("{0:s}{1:03d}.{2:s}",
htmlFileName->c_str(), pg, extension);
bitmap->writeImgFile(format, imgFileName->c_str(),
72 * scale, 72 * scale);
auto f1 = dataUrls ? imf.open("wb") : fopen(imgFileName->c_str(), "wb");
if (!f1) {
fprintf(stderr, "Could not open %s\n", imgFileName->c_str());
delete imgFileName;
continue;
}
bitmap->writeImgFile(format, f1, 72 * scale, 72 * scale);
fclose(f1);
if (dataUrls) {
htmlOut->addBackgroundImage(
std::string((format == splashFormatJpeg) ? "data:image/jpeg;base64," : "data:image/png;base64,") +
gbase64Encode(imf.getBuffer())
);
} else {
htmlOut->addBackgroundImage(gbasename(imgFileName->c_str()));
}
delete imgFileName;
}
......@@ -434,7 +447,14 @@ int main(int argc, char *argv[]) {
return -1;
#endif
}
if (htmlOut->isOk())
{
doc->displayPages(htmlOut, firstPage, lastPage, 72 * scale, 72 * scale, 0,
true, false, false);
htmlOut->dumpDocOutline(doc);
}
delete htmlOut;
exit_status = EXIT_SUCCESS;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment