Commit 481db9d9 authored by Kristian Høgsberg's avatar Kristian Høgsberg

2005-07-29 Kristian Høgsberg <krh@redhat.com>

        * poppler/TextOutputDev.cc: Finish TextSelectionDumper class for
        extracting the text from a selection.  Add
        TextPage::getSelectionText() and TextOutputDev::getSelectionText()
        methods to expose the new functionality.

        * glib/poppler-page.cc (poppler_page_get_text): Use
        TextOutputDev::getSelectionText() to get the text from the
        selection.

        * glib/poppler-document.cc (poppler_document_new_from_file):
        * glib/poppler-page.cc (_poppler_page_new): Add extra NULL to
        g_object_new() constructor to silence gcc warning about missing
        sentinel.
parent ad312dbd
2005-07-29 Kristian Høgsberg <krh@redhat.com>
* poppler/TextOutputDev.cc: Finish TextSelectionDumper class for
extracting the text from a selection. Add
TextPage::getSelectionText() and TextOutputDev::getSelectionText()
methods to expose the new functionality.
* glib/poppler-page.cc (poppler_page_get_text): Use
TextOutputDev::getSelectionText() to get the text from the
selection.
* glib/poppler-document.cc (poppler_document_new_from_file):
* glib/poppler-page.cc (_poppler_page_new): Add extra NULL to
g_object_new() constructor to silence gcc warning about missing
sentinel.
2005-07-28 Albert Astals Cid <aacid@kde.org>
* poppler/PageLabelInfo.[cc|h]: Fix memory leaks
......
......@@ -71,7 +71,7 @@ poppler_document_new_from_file (const char *uri,
int err;
char *filename;
document = (PopplerDocument *) g_object_new (POPPLER_TYPE_DOCUMENT, NULL);
document = (PopplerDocument *) g_object_new (POPPLER_TYPE_DOCUMENT, NULL, NULL);
if (!globalParams) {
globalParams = new GlobalParams("/etc/xpdfrc");
......
......@@ -75,7 +75,7 @@ _poppler_page_new (PopplerDocument *document, Page *page, int index)
g_return_val_if_fail (POPPLER_IS_DOCUMENT (document), NULL);
poppler_page = (PopplerPage *) g_object_new (POPPLER_TYPE_PAGE, NULL);
poppler_page = (PopplerPage *) g_object_new (POPPLER_TYPE_PAGE, NULL, NULL);
poppler_page->document = document;
poppler_page->page = page;
poppler_page->index = index;
......@@ -592,32 +592,31 @@ poppler_page_get_thumbnail_size (PopplerPage *page,
**/
char *
poppler_page_get_text (PopplerPage *page,
PopplerRectangle *rect)
PopplerRectangle *selection)
{
TextOutputDev *output_dev;
TextOutputDev *text_dev;
PDFDoc *doc;
GooString *sel_text = new GooString;
double height, y1, y2;
char *result;
PDFRectangle pdf_selection;
g_return_val_if_fail (POPPLER_IS_PAGE (page), FALSE);
g_return_val_if_fail (rect != NULL, NULL);
output_dev = new TextOutputDev (NULL, gTrue, gFalse, gFalse);
doc = page->document->doc;
g_return_val_if_fail (selection != NULL, NULL);
text_dev = poppler_page_get_text_output_dev (page);
height = page->page->getHeight ();
page->page->display(output_dev, 72, 72, poppler_page_get_rotate (page),
gTrue, NULL, doc->getCatalog());
y1 = height - rect->y2;
y2 = height - rect->y1;
sel_text = output_dev->getText (rect->x1, y1, rect->x2, y2);
result = sel_text->getCString ();
delete output_dev;
pdf_selection.x1 = selection->x1;
pdf_selection.y1 = height - selection->y2;
pdf_selection.x2 = selection->x2;
pdf_selection.y2 = height - selection->y1;
sel_text = text_dev->getSelectionText (&pdf_selection);
result = g_strdup (sel_text->getCString ());
delete sel_text;
return result ? g_strdup (result) : NULL;
return result;
}
/**
......
......@@ -3008,11 +3008,11 @@ TextSelectionVisitor::TextSelectionVisitor (TextPage *page)
}
#if 0
class TextSelectionDumper : public TextSelectionVisitor {
public:
TextSelectionDumper(TextPage *page);
virtual ~TextSelectionDumper();
virtual void visitBlock (TextBlock *block,
TextLine *begin,
TextLine *end,
......@@ -3022,58 +3022,116 @@ public:
TextWord *end,
int edge_begin,
int edge_end,
PDFRectangle *selection) { };
virtual void visitWord (TextWord *word, int begin, int end,
PDFRectangle *selection);
virtual void visitWord (TextWord *word, int begin, int end,
PDFRectangle *selection) { };
GooString *TextSelectionDumper::getText(void);
private:
GooString *result;
UnicodeMap *uMap;
char space[8], eol[16];
int spaceLen, eolLen;
double height;
TextLineFrag *frags;
int nFrags, fragsSize;
};
TextSelectionDumper::TextSelectionDumper()
TextSelectionDumper::TextSelectionDumper(TextPage *page)
: TextSelectionVisitor(page)
{
result = new GooString();
uMap = globalParams->getTextEncoding();
// get the output encoding
if (data.uMap == NULL)
return data.result;
data.spaceLen = data.uMap->mapUnicode(0x20, data.space, sizeof(data.space));
data.eolLen = 0; // make gcc happy
switch (globalParams->getTextEOL()) {
case eolUnix:
data.eolLen = data.uMap->mapUnicode(0x0a, data.eol, sizeof(data.eol));
break;
case eolDOS:
data.eolLen = data.uMap->mapUnicode(0x0d, data.eol, sizeof(data.eol));
data.eolLen += data.uMap->mapUnicode(0x0a, data.eol + data.eolLen,
sizeof(data.eol) - data.eolLen);
break;
case eolMac:
data.eolLen = data.uMap->mapUnicode(0x0d, data.eol, sizeof(data.eol));
break;
}
fragsSize = 256;
frags = (TextLineFrag *)gmalloc(fragsSize * sizeof(TextLineFrag));
nFrags = 0;
}
TextSelectionDumper::~TextSelectionDumper()
{
data.uMap->decRefCnt();
gfree(frags);
}
void TextSelectionDumper::visitWord(TextWord *word, int first, int last,
PDFRectangle *selection)
void TextSelectionDumper::visitLine (TextLine *line,
TextWord *begin,
TextWord *end,
int edge_begin,
int edge_end,
PDFRectangle *selection)
{
for (i = first; i <= last; i++)
printf ("%c", word->text[i]);
printf ("\n");
if (nFrags == fragsSize) {
fragsSize *= 2;
frags = (TextLineFrag *) grealloc(frags, fragsSize * sizeof(TextLineFrag));
}
frags[nFrags].init(line, edge_begin, edge_end - edge_begin);
++nFrags;
}
#endif
GooString *TextSelectionDumper::getText (void)
{
GBool oneRot = gTrue;
GooString *s;
TextLineFrag *frag;
int i, col;
GBool multiLine;
UnicodeMap *uMap;
char space[8], eol[16];
int spaceLen, eolLen;
s = new GooString();
uMap = globalParams->getTextEncoding();
if (uMap == NULL)
return s;
spaceLen = uMap->mapUnicode(0x20, space, sizeof(space));
eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol));
if (nFrags > 0) {
for (i = 0; i < nFrags; ++i) {
frags[i].computeCoords(oneRot);
}
page->assignColumns(frags, nFrags, oneRot);
// if all lines in the region have the same rotation, use it;
// otherwise, use the page's primary rotation
if (oneRot) {
qsort(frags, nFrags, sizeof(TextLineFrag),
&TextLineFrag::cmpYXLineRot);
} else {
qsort(frags, nFrags, sizeof(TextLineFrag),
&TextLineFrag::cmpYXPrimaryRot);
}
col = 0;
multiLine = gFalse;
for (i = 0; i < nFrags; ++i) {
frag = &frags[i];
// insert a return
if (frag->col < col ||
(i > 0 && fabs(frag->base - frags[i-1].base) >
maxIntraLineDelta * frags[i-1].line->words->fontSize)) {
s->append(eol, eolLen);
col = 0;
multiLine = gTrue;
}
// column alignment
for (; col < frag->col; ++col) {
s->append(space, spaceLen);
}
// get the fragment text
col += page->dumpFragment(frag->line->text + frag->start, frag->len, uMap, s);
}
if (multiLine) {
s->append(eol, eolLen);
}
}
uMap->decRefCnt();
return s;
}
class TextSelectionSizer : public TextSelectionVisitor {
public:
......@@ -3259,6 +3317,10 @@ void TextWord::visitSelection(TextSelectionVisitor *visitor,
end = i + 1;
}
/* Skip empty selection. */
if (end <= begin)
return;
visitor->visitWord (this, begin, end, selection);
}
......@@ -3290,6 +3352,10 @@ void TextLine::visitSelection(TextSelectionVisitor *visitor,
edge_end = i + 1;
}
/* Skip empty selection. */
if (edge_end <= edge_begin)
return;
visitor->visitLine (this, begin, end, edge_begin, edge_end, selection);
for (p = begin; p != end; p = p->next)
......@@ -3343,6 +3409,10 @@ void TextBlock::visitSelection(TextSelectionVisitor *visitor,
end = p->next;
}
/* Skip empty selection. */
if (end == begin)
return;
visitor->visitBlock (this, begin, end, selection);
for (p = begin; p != end; p = p->next) {
......@@ -3459,6 +3529,15 @@ GooList *TextPage::getSelectionRegion(PDFRectangle *selection,
return sizer.getRegion();
}
GooString *TextPage::getSelectionText(PDFRectangle *selection)
{
TextSelectionDumper dumper(this);
visitSelection(&dumper, selection);
return dumper.getText();
}
GBool TextPage::findCharRange(int pos, int length,
double *xMin, double *yMin,
double *xMax, double *yMax) {
......@@ -4012,6 +4091,11 @@ GooList *TextOutputDev::getSelectionRegion(PDFRectangle *selection,
return text->getSelectionRegion(selection, scale);
}
GooString *TextOutputDev::getSelectionText(PDFRectangle *selection)
{
return text->getSelectionText(selection);
}
GBool TextOutputDev::findCharRange(int pos, int length,
double *xMin, double *yMin,
double *xMax, double *yMax) {
......
......@@ -139,7 +139,9 @@ private:
friend class TextFlow;
friend class TextWordList;
friend class TextPage;
friend class TextSelectionPainter;
friend class TextSelectionDumper;
};
//------------------------------------------------------------------------
......@@ -235,6 +237,7 @@ private:
friend class TextSelectionPainter;
friend class TextSelectionSizer;
friend class TextSelectionDumper;
};
//------------------------------------------------------------------------
......@@ -425,6 +428,8 @@ public:
GooList *getSelectionRegion(PDFRectangle *selection, double scale);
GooString *getSelectionText(PDFRectangle *selection);
// Find a string by character position and length. If found, sets
// the text bounding rectangle and returns true; otherwise returns
// false.
......@@ -487,6 +492,7 @@ private:
friend class TextFlow;
friend class TextWordList;
friend class TextSelectionPainter;
friend class TextSelectionDumper;
};
//------------------------------------------------------------------------
......@@ -582,6 +588,8 @@ public:
GooList *getSelectionRegion(PDFRectangle *selection, double scale);
GooString *getSelectionText(PDFRectangle *selection);
#if TEXTOUT_WORD_LIST
// Build a flat word list, in content stream order (if
// this->rawOrder is true), physical layout order (if
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment