Commit 722b2bd8 authored by Jeff Muizelaar's avatar Jeff Muizelaar

2007-04-03 Jeff Muizelaar <jeff@infidigm.net>

	* configure.ac:
	* poppler/ABWOutputDev.cc:
	* poppler/ABWOutputDev.h:
	* poppler/Makefile.am:
	* utils/Makefile.am:
	* utils/pdftoabw.cc: Add AbiWord output device and pdftoabw program.
	Patch by Jauco Noordzij. Autotools stuff by Dominic Lachowicz.
parent c9b467da
2007-04-03 Jeff Muizelaar <jeff@infidigm.net>
* configure.ac:
* poppler/ABWOutputDev.cc:
* poppler/ABWOutputDev.h:
* poppler/Makefile.am:
* utils/Makefile.am:
* utils/pdftoabw.cc: Add AbiWord output device and pdftoabw program.
Patch by Jauco Noordzij. Autotools stuff by Dominic Lachowicz.
2007-03-22 Albert Astals Cid <aacid@kde.org>
* poppler/Gfx.cc: Accept reals for width and height of images. Fixes
......
......@@ -251,6 +251,20 @@ elif test x$enable_gtk_test = xtry; then
fi
AM_CONDITIONAL(BUILD_GTK_TEST, test x$enable_gtk_test = xyes)
AC_ARG_ENABLE(abiword-output,
AC_HELP_STRING([--disable-abiword-output],
[Don't build the abiword backend.]),
enable_abiword_output=$enableval,
enable_abiword_output="try")
if test x$enable_abiword_output = xyes; then
PKG_CHECK_MODULES(ABIWORD, libxml-2.0)
elif test x$enable_abiword_output = xtry; then
PKG_CHECK_MODULES(ABIWORD, libxml-2.0,
[enable_abiword_output="yes"],
[enable_abiword_output="no"])
fi
AM_CONDITIONAL(BUILD_ABIWORD_OUTPUT, test x$enable_abiword_output = xyes)
AC_ARG_ENABLE(utils,
AC_HELP_STRING([--disable-utils],
......@@ -317,6 +331,7 @@ echo ""
echo "Building poppler with support for:"
echo " splash output: $enable_splash_output"
echo " cairo output: $enable_cairo_output"
echo " abiword output: $enable_abiword_output"
echo " qt wrapper: $enable_poppler_qt"
echo " qt4 wrapper: $enable_poppler_qt4"
echo " glib wrapper: $enable_poppler_glib"
......
This diff is collapsed.
//========================================================================
//
// ABWOutputDev.h
//
// Copyright 2006 Jauco Noordzij
//
//========================================================================
#ifndef ABWOUTPUTDEV_H
#define ABWOUTPUTDEV_H
#ifdef __GNUC__
#pragma interface
#endif
#include <stdio.h>
#include "goo/gtypes.h"
#include "goo/GooList.h"
#include "GfxFont.h"
#include "OutputDev.h"
#include "Link.h"
#include "Catalog.h"
#include "UnicodeMap.h"
#include "PDFDoc.h"
#include <libxml/parser.h>
#include <libxml/tree.h>
#include <libxml/xpath.h>
#include <libxml/xpathInternals.h>
#ifdef WIN32
# define SLASH '\\'
#else
# define SLASH '/'
#endif
#define xoutRound(x) ((int)(x + 0.5))
class GfxState;
class GooString;
//------------------------------------------------------------------------
// ABWOutputDev
//------------------------------------------------------------------------
class ABWOutputDev: public OutputDev {
public:
// Open a text output file. If <fileName> is NULL, no file is written
// (this is useful, e.g., for searching text). If <useASCII7> is true,
// text is converted to 7-bit ASCII; otherwise, text is converted to
// 8-bit ISO Latin-1. <useASCII7> should also be set for Japanese
// (EUC-JP) text. If <rawOrder> is true, the text is kept in content
// stream order.
ABWOutputDev(xmlDocPtr ext_doc);
// Destructor.
virtual ~ABWOutputDev();
// Check if file was successfully created.
virtual GBool isOk() { return gTrue; }
//---- get info about output device
// Does this device use upside-down coordinates?
// (Upside-down means (0,0) is the top left corner of the page.)
virtual GBool upsideDown() { return gTrue; }
// Does this device use drawChar() or drawString()?
virtual GBool useDrawChar() { return gTrue; }
// Does this device use beginType3Char/endType3Char? Otherwise,
// text in Type 3 fonts will be drawn with drawChar/drawString.
virtual GBool interpretType3Chars() { return gFalse; }
// Does this device need non-text content?
virtual GBool needNonText() { return gFalse; }
//----- initialization and control
// Start a page.
virtual void startPage(int pageNum, GfxState *state);
// End a page.
virtual void endPage();
//----- update text state
virtual void updateFont(GfxState *state);
//----- text drawing
//new feature
virtual int DevType() {return 1234;}
int getPageWidth() { return maxPageWidth; }
int getPageHeight() { return maxPageHeight; }
float getBiggestSeperator(xmlNodePtr N_set, unsigned int direction, float * C1, float * C2);
void recursiveXYC(xmlNodePtr nodeset);
void splitNodes(float splitValue, unsigned int direction, xmlNodePtr N_parent, double extravalue);
virtual void beginString(GfxState *state, GooString *s);
virtual void endString(GfxState *state);
virtual void drawChar(GfxState *state, double x, double y,
double dx, double dy,
double originX, double originY,
CharCode code, int nBytes, Unicode *u, int uLen);
void beginWord(GfxState *state, double x, double y);
void endWord();
void beginTextBlock(GfxState *state, double x, double y);
void endTextBlock();
void interpretXYTree();
void ATP_recursive(xmlNodePtr N_cur);
void cleanUpNode(xmlNodePtr N_parent, bool aggregateInfo);
void transformPage(xmlNodePtr N_parent);
void generateParagraphs();
void addAlignment(xmlNodePtr N_parent);
void setPDFDoc(PDFDoc *priv_pdfdoc);
void createABW();
private:
int maxPageWidth;
int maxPageHeight;
int G_pageNum;
int Style, maxStyle;
//A lot of values are nice to have around. I think that declaring some
//global variables that contain these values is faster & easier than reading
//them from the xml tree every time.
double height;
double wordSpace, charSpace;
double X1,X2,Y1,Y2,horDist, verDist, curDx, curDy;
bool mightBreak;
xmlDocPtr doc;
/* node pointers */
xmlNodePtr N_root, N_content, N_page, N_style, N_text, N_styleset, N_Block, N_word, N_column, N_colset;
xmlNodePtr outputDoc;
xmlXPathContextPtr xpathCtx;
static const unsigned int HORIZONTAL = 0;
static const unsigned int VERTICAL = 1;
UnicodeMap *uMap;
PDFDoc *pdfdoc;
};
#endif
......@@ -73,12 +73,27 @@ zlib_libs = \
endif
if BUILD_ABIWORD_OUTPUT
abiword_sources = \
ABWOutputDev.h \
ABWOutputDev.cc
abiword_includes = \
$(ABIWORD_CFLAGS)
abiword_libs = \
$(ABIWORD_LIBS)
endif
INCLUDES = \
-I$(top_srcdir) \
-I$(top_srcdir)/goo \
$(splash_includes) \
$(cairo_includes) \
$(arthur_includes) \
$(abiword_includes) \
$(FREETYPE_CFLAGS) \
$(FONTCONFIG_CFLAGS)
......@@ -94,6 +109,7 @@ libpoppler_la_LIBADD = \
$(cairo_libs) \
$(libjpeg_libs) \
$(zlib_libs) \
$(abiword_libs) \
$(FREETYPE_LIBS) \
$(FONTCONFIG_LIBS)
......@@ -169,6 +185,7 @@ libpoppler_la_SOURCES = \
$(arthur_sources) \
$(libjpeg_sources) \
$(zlib_sources) \
$(abiword_sources) \
Annot.cc \
Array.cc \
BuiltinFont.cc \
......
......@@ -10,12 +10,23 @@ pdftoppm_manpage = pdftoppm.1
endif
if BUILD_ABIWORD_OUTPUT
pdftoabw_SOURCES = \
pdftoabw.cc \
$(common)
pdftoabw_binary = pdftoabw
endif
INCLUDES = \
-I$(top_srcdir) \
-I$(top_srcdir)/utils \
-I$(top_srcdir)/poppler \
$(UTILS_CFLAGS) \
$(FONTCONFIG_CFLAGS)
$(FONTCONFIG_CFLAGS) \
$(ABIWORD_CFLAGS)
LDADD = \
$(top_builddir)/poppler/libpoppler.la \
......@@ -29,7 +40,8 @@ bin_PROGRAMS = \
pdftops \
pdftotext \
pdftohtml \
$(pdftoppm_binary)
$(pdftoppm_binary) \
$(pdftoabw_binary)
dist_man1_MANS = \
pdffonts.1 \
......
//========================================================================
//
// pdftohtml.cc
//
//
// Copyright 1999-2000 G. Ovtcharov
//========================================================================
#include "config.h"
#include <poppler-config.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include <dirent.h>
#include <time.h>
#include "parseargs.h"
#include "goo/GooString.h"
#include "goo/gmem.h"
#include "Object.h"
#include "Stream.h"
#include "Array.h"
#include "Dict.h"
#include "XRef.h"
#include "Catalog.h"
#include "Page.h"
#include "PDFDoc.h"
#include "ABWOutputDev.h"
#include "PSOutputDev.h"
#include "GlobalParams.h"
#include "Error.h"
#include "UGooString.h"
#include "goo/gfile.h"
#include <libxml/parser.h>
#include <libxml/tree.h>
static int firstPage = 1;
static int lastPage = 0;
GBool printCommands = gTrue;
GBool prettyPrint = gFalse;
static GBool printHelp = gFalse;
GBool stout=gFalse;
static char ownerPassword[33] = "";
static char userPassword[33] = "";
static GooString* getInfoString(Dict *infoDict, char *key);
static GooString* getInfoDate(Dict *infoDict, char *key);
xmlDocPtr XMLdoc;
static char textEncName[128] = "";
static ArgDesc argDesc[] = {
{"-f", argInt, &firstPage, 0,
"first page to convert"},
{"-l", argInt, &lastPage, 0,
"last page to convert"},
{"-h", argFlag, &printHelp, 0,
"print usage information"},
{"--help", argFlag, &printHelp, 0,
"print usage information"},
{"--format", argFlag, &prettyPrint, 0,
"print usage information"},
{"--stdout" ,argFlag, &stout, 0,
"use standard output"},
{"--opw", argString, ownerPassword, sizeof(ownerPassword),
"owner password (for encrypted files)"},
{"--upw", argString, userPassword, sizeof(userPassword),
"user password (for encrypted files)"},
{NULL}
};
int main(int argc, char *argv[]) {
PDFDoc *doc = NULL;
GooString *fileName = NULL;
GooString *docTitle = NULL;
GooString *author = NULL, *keywords = NULL, *subject = NULL, *date = NULL;
GooString *htmlFileName = NULL;
GooString *psFileName = NULL;
ABWOutputDev *htmlOut = NULL;
PSOutputDev *psOut = NULL;
GBool ok;
char *p;
char extension[16] = "png";
GooString *ownerPW, *userPW;
Object info;
char * outpName;
// parse args
parseArgs(argDesc, &argc, argv);
globalParams = new GlobalParams();
fileName = new GooString(argv[1]);
/*
if (stout){*/
outpName = "-";
/* }
else {
//FIXME: add outputfilename stuff
}
*/
doc = new PDFDoc(fileName);
XMLdoc = xmlNewDoc(BAD_CAST "1.0");
htmlOut = new ABWOutputDev(XMLdoc);
htmlOut->setPDFDoc(doc);
/* check for copy permission
if (!doc->okToCopy()) {
error(-1, "Copying of text from this document is not allowed.");
goto error;
}*/
// write text file
if (lastPage == 0) lastPage = doc->getNumPages();
if (htmlOut->isOk())
{
doc->displayPages(htmlOut, 1, lastPage, 72, 72, 0, gTrue, gFalse, gFalse);
htmlOut->createABW();
}
xmlSaveFormatFileEnc(outpName, XMLdoc, "UTF-8", 1);
// clean up
error:
if(globalParams) delete globalParams;
//if(fileName) delete fileName;
if(doc) delete doc;
if(XMLdoc) xmlFreeDoc(XMLdoc);
if(htmlOut) delete htmlOut;
// check for memory leaks
Object::memCheck(stderr);
gMemReport(stderr);
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment