Commit 967a21b5 authored by sgerwk's avatar sgerwk Committed by Albert Astals Cid

bounding box of graphics in the page

parent 04c5fec1
......@@ -404,6 +404,7 @@ set(poppler_SRCS
poppler/Movie.cc
poppler/Rendition.cc
poppler/CertificateInfo.cc
poppler/BBoxOutputDev.cc
)
set(poppler_LIBS ${FREETYPE_LIBRARIES})
if(ENABLE_SPLASH)
......@@ -614,6 +615,7 @@ if(ENABLE_UNSTABLE_API_ABI_HEADERS)
poppler/SecurityHandler.h
poppler/StdinCachedFile.h
poppler/StdinPDFDocBuilder.h
poppler/BBoxOutputDev.h
poppler/UTF.h
poppler/Sound.h
${CMAKE_CURRENT_BINARY_DIR}/poppler/poppler-config.h
......
......@@ -20,6 +20,7 @@ configure_file(poppler-features.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/poppler-feat
if (GTK_FOUND AND BUILD_GTK_TESTS)
add_subdirectory(demo)
add_subdirectory(tests)
endif ()
set(poppler_glib_public_headers
......
......@@ -27,6 +27,7 @@
#include <UnicodeMap.h>
#include <GfxState.h>
#include <PageTransition.h>
#include <BBoxOutputDev.h>
#endif
#include "poppler.h"
......@@ -2187,6 +2188,53 @@ poppler_page_get_crop_box (PopplerPage *page, PopplerRectangle *rect)
rect->y2 = cropBox->y2;
}
/*
* poppler_page_get_bounding_box:
* @page: A #PopplerPage
* @rect: (out) return the bounding box of the page
*
* Returns the bounding box of the page, a rectangle enclosing all text, vector
* graphics (lines, rectangles and curves) and raster images in the page.
* Includes invisible text but not (yet) annotations like highlights and form
* elements.
*
* Return value: %TRUE if the page contains graphics, %FALSE otherwise
*
* Since: 0.88
*/
gboolean
poppler_page_get_bounding_box (PopplerPage *page,
PopplerRectangle *rect) {
Gfx *gfx;
BBoxOutputDev *bb_out;
bool hasGraphics;
g_return_val_if_fail(POPPLER_IS_PAGE (page), false);
g_return_val_if_fail(rect != nullptr, false);
bb_out = new BBoxOutputDev(page->page->getCropBox());
gfx = page->page->createGfx(bb_out,
72.0, 72.0, 0,
false, /* useMediaBox */
true, /* Crop */
-1, -1, -1, -1,
false, /* printing */
nullptr, nullptr);
page->page->display(gfx);
hasGraphics = bb_out->getHasGraphics();
if (hasGraphics) {
rect->x1 = bb_out->getX1();
rect->y1 = bb_out->getY1();
rect->x2 = bb_out->getX2();
rect->y2 = bb_out->getY2();
}
delete gfx;
delete bb_out;
return hasGraphics;
}
/**
* poppler_page_get_text_layout:
* @page: A #PopplerPage
......
......@@ -132,6 +132,9 @@ POPPLER_PUBLIC
void poppler_page_get_crop_box (PopplerPage *page,
PopplerRectangle *rect);
POPPLER_PUBLIC
gboolean poppler_page_get_bounding_box (PopplerPage *page,
PopplerRectangle *rect);
POPPLER_PUBLIC
gboolean poppler_page_get_text_layout (PopplerPage *page,
PopplerRectangle **rectangles,
guint *n_rectangles);
......
......@@ -53,6 +53,7 @@ poppler_page_free_image_mapping
poppler_page_free_link_mapping
poppler_page_free_text_attributes
poppler_page_get_annot_mapping
poppler_page_get_bounding_box
poppler_page_get_crop_box
poppler_page_get_duration
poppler_page_get_form_field_mapping
......
include_directories(
${GTK3_INCLUDE_DIRS}
)
macro(POPPLER_ADD_TESTCASE exe arg1)
add_test(${exe}-${arg1} ${EXE} ${EXECUTABLE_OUTPUT_PATH}/poppler-check-bb ${TESTDATADIR}/unittestcases/${arg1} ${ARGN})
endmacro(POPPLER_ADD_TESTCASE)
add_definitions(${GTK3_CFLAGS_OTHER})
add_definitions(-DTESTDATADIR=\"${TESTDATADIR}\")
set(poppler_check_text_SRCS
check_text.c
)
poppler_add_unittest(poppler-check-text BUILD_GTK_TESTS ${poppler_check_text_SRCS})
target_link_libraries(poppler-check-text poppler-glib ${GTK3_LIBRARIES})
set(poppler_check_bb_SRCS
check_bb.c
)
poppler_add_test(poppler-check-bb BUILD_GTK_TESTS ${poppler_check_bb_SRCS})
target_link_libraries(poppler-check-bb poppler-glib ${GTK3_LIBRARIES})
poppler_add_testcase(poppler-check-bb shapes+attachments.pdf 42.5 42.5 557.5 557.5)
poppler_add_testcase(poppler-check-bb orientation.pdf 34 34 83.74 49 793 34 808 97.19 488.02 793 561 808 34 503.61 49 56)
poppler_add_testcase(poppler-check-bb xr01.pdf 148.71 127.85 308.11 704.57)
poppler_add_testcase(poppler-check-bb xr02.pdf 133.77 124.81 308.11 704.57 133.77 124.80 308.11 704.57)
poppler_add_testcase(poppler-check-bb russian.pdf 71.5 76.81 197.69 131.09)
poppler_add_testcase(poppler-check-bb vis_policy_test.pdf 90 77.93 312.01 265.13)
poppler_add_testcase(poppler-check-bb searchAcrossLines.pdf 107.15 105.23 523.85 691 85.04 94 538.59 762.19)
poppler_add_testcase(poppler-check-bb deseret.pdf 56.8 57.15 109.5 72.8)
poppler_add_testcase(poppler-check-bb fieldWithUtf16Names.pdf 56.65 56.65 264.55 83.05)
poppler_add_testcase(poppler-check-bb bug7063.pdf 56.8 57.46 244.29 118.79)
poppler_add_testcase(poppler-check-bb WithActualText.pdf 100 90.72 331.01 102.35)
poppler_add_testcase(poppler-check-bb Issue637.pdf 70.87 53 293 105.37)
poppler_add_testcase(poppler-check-bb truetype.pdf 17.5 17.5 577.5 225.62)
poppler_add_testcase(poppler-check-bb form_set_icon.pdf -0.5 -0.5 363.34 272.63)
poppler_add_testcase(poppler-check-bb imageretrieve+attachment.pdf 0 0 610.56 792)
poppler_add_testcase(poppler-check-bb checkbox_issue_159.pdf 2.84 14.17 553.18 840.87)
poppler_add_testcase(poppler-check-bb NestedLayers.pdf -1 191 613 793)
poppler_add_testcase(poppler-check-bb A6EmbeddedFiles.pdf 17.88 17.88 558.36 755.73)
add_executable(pdfdrawbb pdfdrawbb.c)
target_link_libraries(pdfdrawbb poppler-glib)
/*
* testing program for the boundingbox function
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <poppler.h>
/*
* compare floating-point coordinates
*/
int equal(double a, double b) {
return fabs(a - b) < 0.01;
}
/*
* main
*/
int main(int argc, char *argv[]) {
GFile *infile;
PopplerDocument *doc;
PopplerPage *page;
int npages, n;
gboolean hg;
PopplerRectangle bb, correct;
GError *err = NULL;
int argx;
/* open file */
g_print("file: %s\n", argv[1]);
infile = g_file_new_for_path(argv[1]);
if (! infile)
exit(EXIT_FAILURE);
doc = poppler_document_new_from_gfile(infile, NULL, NULL, &err);
if (doc == NULL) {
g_printerr("error opening pdf file: %s\n", err->message);
g_error_free(err);
exit(EXIT_FAILURE);
}
/* pages */
npages = poppler_document_get_n_pages(doc);
if (npages < 1) {
g_printerr("no page in document\n");
exit(EXIT_FAILURE);
}
/* check the bounding box */
argx = 2;
for (n = 0; n < poppler_document_get_n_pages(doc); n++) {
g_print(" page: %d\n", n + 1);
page = poppler_document_get_page(doc, n);
hg = poppler_page_get_bounding_box(page, &bb);
if (! hg) {
g_printerr("no graphics in page\n");
exit(EXIT_FAILURE);
}
g_print(" bounding box: %g,%g - %g,%g\n",
bb.x1, bb.y1, bb.x2, bb.y2);
if (argc - argx < 4) {
g_print("not enough arguments\n");
exit(EXIT_FAILURE);
}
correct.x1 = atof(argv[argx++]);
correct.y1 = atof(argv[argx++]);
correct.x2 = atof(argv[argx++]);
correct.y2 = atof(argv[argx++]);
g_print(" correct: %g,%g - %g,%g\n",
correct.x1, correct.y1, correct.x2, correct.y2);
if (! equal(bb.x1, correct.x1) ||
! equal(bb.x2, correct.x2) ||
! equal(bb.y1, correct.y1) ||
! equal(bb.x2, correct.x2)) {
g_print("bounding box differs from expected\n");
exit(EXIT_FAILURE);
}
g_object_unref(page);
}
return EXIT_SUCCESS;
}
/*
* testing program for the get_text function
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <poppler.h>
/*
* main
*/
int main(int argc, char *argv[]) {
GFile *infile;
PopplerDocument *doc;
PopplerPage *page;
int npages, n;
char *text;
GError *err = NULL;
/* open file */
infile = g_file_new_for_path
(TESTDATADIR "/unittestcases/WithActualText.pdf");
if (! infile)
exit(EXIT_FAILURE);
doc = poppler_document_new_from_gfile(infile, NULL, NULL, &err);
if (doc == NULL) {
g_printerr("error opening pdf file: %s\n", err->message);
g_error_free(err);
exit(EXIT_FAILURE);
}
/* pages */
npages = poppler_document_get_n_pages(doc);
if (npages < 1) {
g_printerr("no page in document\n");
exit(EXIT_FAILURE);
}
/* check text */
n = 0;
page = poppler_document_get_page(doc, n);
text = poppler_page_get_text(page);
g_print("%s\n", text);
g_assert_cmpstr(text, ==, "The slow brown fox jumps over the black dog.");
g_object_unref(page);
return EXIT_SUCCESS;
}
/*
* pdfdrawbb.c
*
* draw the bounding box of each page
*/
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <poppler.h>
#include <cairo.h>
#include <cairo-pdf.h>
/*
* add suffix to a pdf filename
*/
char *pdfaddsuffix(char *infile, char *suffix) {
char *basename;
char *outfile;
char *pos;
basename = g_path_get_basename(infile);
outfile = malloc(strlen(infile) + strlen(suffix) + 10);
strcpy(outfile, basename);
g_free(basename);
pos = strrchr(outfile, '.');
if (pos != NULL && (! strcmp(pos, ".pdf") || ! strcmp(pos, ".PDF")))
*pos = '\0';
strcat(outfile, "-");
strcat(outfile, suffix);
strcat(outfile, ".pdf");
return outfile;
}
/*
* main
*/
int main(int argc, char *argv[]) {
int opt;
gboolean usage = FALSE;
char *infilename, *outfilename;
GError *err = NULL;
GFile *infile;
PopplerDocument *doc;
PopplerPage *page;
int npages, n;
PopplerRectangle bb;
gboolean hg;
gdouble width, height;
cairo_surface_t *surface;
cairo_t *cr;
/* arguments */
while ((opt = getopt(argc, argv, "h")) != -1)
switch(opt) {
case 'h':
usage = TRUE;
break;
}
if (! usage && argc - 1 < optind) {
g_print("input file name missing\n");
usage = TRUE;
}
if (usage) {
g_print("usage:\n");
g_print("\tpdfdrawbb");
g_print("[-h] file.pdf\n");
g_print("\t\t-h\t\tthis help\n");
exit(EXIT_FAILURE);
}
infilename = argv[optind];
if (! infilename)
exit(EXIT_FAILURE);
outfilename = pdfaddsuffix(argv[optind], "bb");
/* open file */
infile = g_file_new_for_path(infilename);
if (infile == NULL)
exit(EXIT_FAILURE);
doc = poppler_document_new_from_gfile(infile, NULL, NULL, &err);
if (doc == NULL) {
g_printerr("error opening pdf file: %s\n", err->message);
g_error_free(err);
exit(EXIT_FAILURE);
}
/* pages */
npages = poppler_document_get_n_pages(doc);
if (npages < 1) {
g_print("no page in document\n");
exit(EXIT_FAILURE);
}
/* copy to destination */
surface = cairo_pdf_surface_create(outfilename, width, height);
g_print("infile: %s\n", infilename);
g_print("outfile: %s\n", outfilename);
for (n = 0; n < npages; n++) {
g_print("page %d:\n", n);
page = poppler_document_get_page(doc, n);
poppler_page_get_size(page, &width, &height);
cairo_pdf_surface_set_size(surface, width, height);
hg = poppler_page_get_bounding_box(page, &bb);
if (hg)
g_print("bounding box %g,%g - %g,%g",
bb.x1, bb.y1, bb.x2, bb.y2);
g_print("\n");
cr = cairo_create(surface);
poppler_page_render_for_printing(page, cr);
if (hg) {
cairo_set_source_rgb(cr, 0.6, 0.6, 1.0);
cairo_rectangle(cr,
bb.x1, bb.y1, bb.x2 - bb.x1, bb.y2 - bb.y1);
cairo_stroke(cr);
}
cairo_destroy(cr);
cairo_surface_show_page(surface);
g_object_unref(page);
}
cairo_surface_destroy(surface);
return EXIT_SUCCESS;
}
/*
* boundingbox output device
*/
#include <cmath>
#include <BBoxOutputDev.h>
#include <GfxFont.h>
#define writingModeHorizontal 0
#define writingModeVertical 1
BBoxOutputDev::BBoxOutputDev(const PDFRectangle *cropA) :
BBoxOutputDev(cropA, true, true, true) {
}
BBoxOutputDev::BBoxOutputDev(const PDFRectangle *cropA,
bool textA, bool vectorA, bool rasterA) :
BBoxOutputDev(cropA, textA, vectorA, rasterA, true) {
}
BBoxOutputDev::BBoxOutputDev(const PDFRectangle *cropA,
bool textA, bool vectorA, bool rasterA, bool lwidthA) {
hasGraphics = false;
crop = *cropA;
text = textA;
vector = vectorA;
raster = rasterA;
lwidth = lwidthA;
}
double BBoxOutputDev::getX1() const {
return bb.x1;
}
double BBoxOutputDev::getY1() const {
return bb.y1;
}
double BBoxOutputDev::getX2() const {
return bb.x2;
}
double BBoxOutputDev::getY2() const {
return bb.y2;
}
double BBoxOutputDev::getHasGraphics() const {
return hasGraphics;
}
void BBoxOutputDev::endPage() {
bb.clipTo(&crop);
}
void BBoxOutputDev::stroke(GfxState *state) {
updatePath(&bb, state->getPath(), state);
}
void BBoxOutputDev::fill(GfxState *state) {
updatePath(&bb, state->getPath(), state);
}
void BBoxOutputDev::eoFill(GfxState *state) {
updatePath(&bb, state->getPath(), state);
}
void BBoxOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str,
int width, int height, bool invert,
bool interpolate,
bool inlineImg) {
updateImage(&bb, state);
}
void BBoxOutputDev::drawImage(GfxState *state, Object *ref, Stream *str,
int width, int height,
GfxImageColorMap *colorMap, bool interpolate,
const int *maskColors, bool inlineImg) {
updateImage(&bb, state);
}
void BBoxOutputDev::drawMaskedImage(GfxState *state, Object *ref, Stream *str,
int width, int height,
GfxImageColorMap *colorMap, bool interpolate,
Stream *maskStr, int maskWidth, int maskHeight,
bool maskInvert, bool maskInterpolate) {
updateImage(&bb, state);
}
void BBoxOutputDev::drawSoftMaskedImage(GfxState *state, Object *ref,
Stream *str,
int width, int height,
GfxImageColorMap *colorMap,
bool interpolate,
Stream *maskStr,
int maskWidth, int maskHeight,
GfxImageColorMap *maskColorMap,
bool maskInterpolate) {
updateImage(&bb, state);
}
void BBoxOutputDev::drawChar(GfxState *state,
double x, double y,
double dx, double dy,
double originX, double originY,
CharCode code, int nBytes,
const Unicode *u, int uLen) {
GfxFont *font;
double leftent, rightent, ascent, descent;
const double *fm, *fb;
Matrix fmat;
double fontSize;
double fx, fy, nx, ny;
if (! text)
return;
font = state->getFont();
if (! font)
return;
if (code == (CharCode) 0x20)
return;
fontSize = state->getFontSize();
if (font->getType() != fontType3)
fmat.init(1, 0, 0, 1, 0, 0);
else {
fm = font->getFontMatrix();
fmat.init(fm[0], fm[1], fm[2], fm[3], fm[4], fm[5]);
}
fb = font->getFontBBox();
if (font->getWMode() == writingModeHorizontal) {
leftent = 0;
rightent = 0;
ascent = font->getAscent();
descent = font->getDescent();
}
else {
if (fb[0] == 0 && fb[1] == 0 && fb[2] == 0 && fb[3] == 0) {
leftent = -0.5;
rightent = 0.5;
}
else {
leftent = fb[1];
rightent = fb[3];
}
ascent = 0;
descent = 0;
}
if (font->getType() == fontType3) {
ascent *= 1000;
descent *= 1000;
}
fmat.transform(leftent, descent, &fx, &fy);
state->textTransformDelta(fx, fy, &nx, &ny);
updatePoint(&bb, nx + x, ny + y, state);
fmat.transform(rightent, ascent, &fx, &fy);
state->textTransformDelta(fx, fy, &nx, &ny);
updatePoint(&bb, nx + x, ny + y, state);
fmat.transform(leftent * fontSize, descent * fontSize, &fx, &fy);
state->textTransformDelta(fx, fy, &nx, &ny);
updatePoint(&bb, nx + x + dx, ny + y + dy, state);
fmat.transform(rightent * fontSize, ascent * fontSize, &fx, &fy);
state->textTransformDelta(fx, fy, &nx, &ny);
updatePoint(&bb, nx + x + dx, ny + y + dy, state);
}
void BBoxOutputDev::clip(GfxState *state) {
updateClip(state);
}
void BBoxOutputDev::eoClip(GfxState *state) {
updateClip(state);
}
void BBoxOutputDev::clipToStrokePath(GfxState *state) {
updateClip(state);
}
/* update the crop box with a new path */
void BBoxOutputDev::updateClip(const GfxState *state) {
PDFRectangle box;
bool hg;
hg = hasGraphics;
hasGraphics = true;
updatePath(&box, state->getPath(), state);
hasGraphics = hg;
crop.clipTo(&box);
}
/* update the bounding box with a new point */
void BBoxOutputDev::updatePoint(PDFRectangle *bbA,
double x, double y, const GfxState *state) {
Matrix o = {1, 0, 0, 1, 0, 0};
double tx, ty, fx, fy;
o.scale(1, -1);
o.translate(0, -state->getPageHeight());