Bug in void TextPage::visitSelection(TextSelectionVisitor *visitor, PDFRectangle *selection, SelectionStyle style)
Submitted by srinivas.adicherla
Assigned to poppler-bugs
Description
Created attachment 38683 pdf file
Hi,
I tried to use the function poppler_page_get_selection_region(), with passing rectangle as page boundary, style as POPPLER_SELECTION_LINE. But for some pages its not returning all lines rectangles. So, I started debugging the code. I found that there is a problem in the function TextPage::visitSelection() in the TextOutputDev.cc.
In the function after finding the best_flow we are looping again to visitSelection on each block with in the flow. But by assigning the initial flow in the for loop as (flow=best_flow[start]). This is not looping through all the flows, for the given boundary. Ofcourse I gave page boundary. But if I do with initial flow as (flow = flows); its looping through all the flows. But if I do like this what is the use of finding best_flow. Any suggestions? And I didn't get why we need to calculate bestflow although we are looping through all the flows. ( I think may be in case if we don't give the pageboundary as full page rect) please calrify me?
I attached the pdf file for which the problem is coming. Its giving problem in 8th page.
I pasted the function below.
for (flow = best_flow[start]; flow; flow = flow->next) { /* original / //for (flow = flows; flow; flow = flow->next) { / if we make this change its working*/
TextOutputDev.cc
void TextPage::visitSelection(TextSelectionVisitor *visitor, PDFRectangle *selection, SelectionStyle style) { PDFRectangle child_selection; double x[2], y[2], d, best_d[2]; double xMin, yMin, xMax, yMax; TextFlow *flow, *best_flow[2]; TextBlock *blk, *best_block[2]; int i, count = 0, best_count[2], start, stop; if (!flows) return;
x[0] = selection->x1; y[0] = selection->y1; x[1] = selection->x2; y[1] = selection->y2;
xMin = pageWidth; yMin = pageHeight; xMax = 0.0; yMax = 0.0;
for (i = 0; i < 2; i++) { best_block[i] = NULL; best_flow[i] = NULL; best_count[i] = 0; best_d[i] = 0; } // find the nearest blocks to the selection points // using the manhattan distance. for (flow = flows; flow; flow = flow->next) { for (blk = flow->blocks; blk; blk = blk->next) { count++; // the first/last blocks in reading order are // often not the closest to the page corners; // track the corners, force those blocks to // be selected if the selection runs across // multiple pages. xMin = fmin(xMin, blk->xMin); yMin = fmin(yMin, blk->yMin); xMax = fmax(xMax, blk->xMax); yMax = fmax(yMax, blk->yMax); for (i = 0; i < 2; i++) { d = fmax(blk->xMin - x[i], 0.0) + fmax(x[i] - blk->xMax, 0.0) + fmax(blk->yMin - y[i], 0.0) + fmax(y[i] - blk->yMax, 0.0); if (!best_block[i] || d < best_d[i] || (!blk->next && !flow->next && x[i] > xMax && y[i] > yMax)) { best_block[i] = blk; best_flow[i] = flow; best_count[i] = count; best_d[i] = d; } } } } for (i = 0; i < 2; i++) { if (primaryLR) { if (x[i] < xMin && y[i] < yMin) { best_block[i] = flows->blocks; best_flow[i] = flows; best_count[i] = 1; } } else { if (x[i] > xMax && y[i] < yMin) { best_block[i] = flows->blocks; best_flow[i] = flows; best_count[i] = 1; } } } // assert: best is always set. if (!best_block[0] || !best_block[1]) { return; }
// Now decide which point was first. if (best_count[0] < best_count[1] || (best_count[0] == best_count[1] && y[0] < y[1])) { start = 0; stop = 1; } else { start = 1; stop = 0; }
for (flow = best_flow[start]; flow; flow = flow->next) { //for (flow = flows; flow; flow = flow->next) { if (flow == best_flow[start]) { blk = best_block[start]; } else { blk = flow->blocks; } for (; blk; blk = blk->next) { if (primaryLR) { child_selection.x1 = blk->xMin; child_selection.x2 = blk->xMax; } else { child_selection.x1 = blk->xMax; child_selection.x2 = blk->xMin; } child_selection.y1 = blk->yMin; child_selection.y2 = blk->yMax; if (blk == best_block[start]) { child_selection.x1 = fmax(blk->xMin, fmin(blk->xMax, x[start])); child_selection.y1 = fmax(blk->yMin, fmin(blk->yMax, y[start])); } if (blk == best_block[stop]) { child_selection.x2 = fmax(blk->xMin, fmin(blk->xMax, x[stop])); child_selection.y2 = fmax(blk->yMin, fmin(blk->yMax, y[stop])); blk->visitSelection(visitor, &child_selection, style); return; } blk->visitSelection(visitor, &child_selection, style); } }
}
Attachment 38683, "pdf file":
elk.pdf