Commit d2618832 authored by Nelson Benítez León's avatar Nelson Benítez León Committed by Christian Persch

find, glib: Enhance find to support multi-line matching

On the backend side, add adding 5 new parameters to TextPage::findText(),
4 to return coords for the part of the match that falls on the next line,
and 1 to specify whether hyphen was ignored at end of the first line.

For the glib binding, this extends the public PopplerRectangle struct
by new members to hold additional information about whether the rectangle
belongs to a group of rectangles for the same match, and whether a hyphen
was ignored at the end of the line. Since PopplerRectangle is public
ABI, this is done by making the public PopplerRectangle API return the
enlarged struct, and internally casting to the new struct when required;
the new members are accessible only via accessor functions.

For Qt5 binding, this commit only implements the new flag
Poppler::Page::AcrossLines (but no new function and no new
return data type) and if this flag is passed, the returned
list of rectangles will also include rectangles for the
second part of across-line matches.

This minimum qt5 binding still allows for the creation of
tests for this feature (using the qt5 test framework) which
this commit *do includes*. But a more complete binding (with
a new return type that includes 'next_line' and 'after_hyphen'
boolean fields) is left to do for qt5 binding maintainers
if they want to use this feature (in eg. Okular).

So, as mentioned, this commit incorporates tests for the
implemented across-line matching feature, and the tests do
also check for two included aspects of this feature, which are:

 - Ignoring hyphen character while matching when 1) it's the
   last character of the line and 2) its corresponding matching
   character in the search term is not an hyphen too.

 - Any whitespace characters in the search term will be allowed
   to match on the logic position where the lines split (i.e. what
   would normally be the newline character in a text file, but
   PDF text does not include newline characters between lines).

Regarding the enhancement to findText() function which implements
matching across lines, just two more notes:

 - It won't match on text spanning more than two lines, i.e. it
   only matches text spanning from end of one line to start of
   next line.

 - It does not supports finding backwards, if findText() receives
   both <backward> and <matchAcrossLines> parameters as true, it
   will ignore the <matchAcrossLines> parameter. Implementing
   <matchAcrossLines> with backwards direction is possible, but
   it will make an already complex function like findText() to be
   even more complex, for little gain as eg. Evince does not even
   use the <backward> parameter of findText().

Fixes poppler issues #744 and #755
Related Evince issue https://gitlab.gnome.org/GNOME/evince/issues/333
parent 53c4afce
Pipeline #42748 failed with stage
in 3 minutes and 36 seconds
......@@ -88,6 +88,41 @@ pgd_find_update_progress (PgdFindDemo *demo,
g_free (str);
}
static void
pgd_find_append_match (PgdFindDemo *demo,
GtkTreeModel *model,
GtkTreeIter *iter_child,
PopplerRectangle *rect,
int match_id)
{
char *x1, *y1, *x2, *y2, *str;
str = g_strdup_printf ("Match %d", match_id + 1);
x1 = g_strdup_printf ("%.2f", rect->x1);
y1 = g_strdup_printf ("%.2f", rect->y1);
x2 = g_strdup_printf ("%.2f", rect->x2);
y2 = g_strdup_printf ("%.2f", rect->y2);
gtk_tree_store_set (GTK_TREE_STORE (model), iter_child,
TITLE_COLUMN, str,
X1_COLUMN, x1,
Y1_COLUMN, y1,
X2_COLUMN, x2,
Y2_COLUMN, y2,
VISIBLE_COLUMN, TRUE,
PAGE_COLUMN, demo->page_index,
PAGE_RECT, rect,
-1);
g_free (str);
g_free (x1);
g_free (y1);
g_free (x2);
g_free (y2);
g_object_weak_ref (G_OBJECT (model),
(GWeakNotify)poppler_rectangle_free,
rect);
}
static gboolean
pgd_find_find_text (PgdFindDemo *demo)
{
......@@ -107,63 +142,43 @@ pgd_find_find_text (PgdFindDemo *demo)
matches = poppler_page_find_text_with_options (page, gtk_entry_get_text (GTK_ENTRY (demo->entry)), demo->options);
g_timer_stop (timer);
if (matches) {
GtkTreeIter iter;
GtkTreeIter iter, iter_child;
gchar *str;
GList *l;
gdouble height;
gint n_match = 0;
str = g_strdup_printf ("%d matches found on page %d in %.4f seconds",
g_list_length (matches), demo->page_index + 1,
g_timer_elapsed (timer, NULL));
gtk_tree_store_append (GTK_TREE_STORE (model), &iter, NULL);
gtk_tree_store_set (GTK_TREE_STORE (model), &iter,
TITLE_COLUMN, str,
VISIBLE_COLUMN, FALSE,
PAGE_COLUMN, demo->page_index,
-1);
g_free (str);
poppler_page_get_size (page, NULL, &height);
for (l = matches; l && l->data; l = g_list_next (l)) {
PopplerRectangle *rect = (PopplerRectangle *)l->data;
GtkTreeIter iter_child;
gchar *x1, *y1, *x2, *y2;
gdouble tmp;
str = g_strdup_printf ("Match %d", ++n_match);
x1 = g_strdup_printf ("%.2f", rect->x1);
y1 = g_strdup_printf ("%.2f", rect->y1);
x2 = g_strdup_printf ("%.2f", rect->x2);
y2 = g_strdup_printf ("%.2f", rect->y2);
gdouble tmp;
tmp = rect->y1;
rect->y1 = height - rect->y2;
rect->y2 = height - tmp;
tmp = rect->y1;
rect->y1 = height - rect->y2;
rect->y2 = height - tmp;
gtk_tree_store_append (GTK_TREE_STORE (model), &iter_child, &iter);
gtk_tree_store_set (GTK_TREE_STORE (model), &iter_child,
TITLE_COLUMN, str,
X1_COLUMN, x1,
Y1_COLUMN, y1,
X2_COLUMN, x2,
Y2_COLUMN, y2,
VISIBLE_COLUMN, TRUE,
PAGE_COLUMN, demo->page_index,
PAGE_RECT, rect,
-1);
g_free (str);
g_free (x1);
g_free (y1);
g_free (x2);
g_free (y2);
g_object_weak_ref (G_OBJECT (model),
(GWeakNotify)poppler_rectangle_free,
rect);
pgd_find_append_match (demo, model, &iter_child, rect, n_match);
if (!poppler_rectangle_find_get_match_continued (rect))
++n_match;
}
g_list_free (matches);
str = g_strdup_printf ("%d matches found on page %d in %.4f seconds",
n_match, demo->page_index + 1,
g_timer_elapsed (timer, NULL));
gtk_tree_store_set (GTK_TREE_STORE (model), &iter,
TITLE_COLUMN, str,
VISIBLE_COLUMN, FALSE,
PAGE_COLUMN, demo->page_index,
-1);
g_free (str);
}
g_timer_destroy (timer);
......@@ -356,6 +371,16 @@ pgd_find_backwards_toggled (GtkToggleButton *togglebutton,
demo->options &= ~POPPLER_FIND_BACKWARDS;
}
static void
pgd_find_multiline_toggled (GtkToggleButton *togglebutton,
PgdFindDemo *demo)
{
if (gtk_toggle_button_get_active (togglebutton))
demo->options |= POPPLER_FIND_MULTILINE;
else
demo->options &= ~POPPLER_FIND_MULTILINE;
}
static void
pgd_find_whole_words_toggled (GtkToggleButton *togglebutton,
PgdFindDemo *demo)
......@@ -421,6 +446,13 @@ pgd_find_create_widget (PopplerDocument *document)
hbox = gtk_box_new (GTK_ORIENTATION_HORIZONTAL, 6);
checkbutton = gtk_check_button_new_with_label ("Multi-line");
g_signal_connect (checkbutton, "toggled",
G_CALLBACK (pgd_find_multiline_toggled),
demo);
gtk_box_pack_start (GTK_BOX (hbox), checkbutton, FALSE, FALSE, 0);
gtk_widget_show (checkbutton);
checkbutton = gtk_check_button_new_with_label ("Case sensitive");
g_signal_connect (checkbutton, "toggled",
G_CALLBACK (pgd_find_case_sensitive_toggled),
......
......@@ -44,6 +44,8 @@ enum
PROP_LABEL
};
static PopplerRectangleExtended* poppler_rectangle_extended_new (void);
typedef struct _PopplerPageClass PopplerPageClass;
struct _PopplerPageClass
{
......@@ -697,13 +699,7 @@ poppler_page_get_selection_region (PopplerPage *page,
PDFRectangle *selection_rect = (*list)[i];
PopplerRectangle *rect;
rect = poppler_rectangle_new ();
rect->x1 = selection_rect->x1;
rect->y1 = selection_rect->y1;
rect->x2 = selection_rect->x2;
rect->y2 = selection_rect->y2;
rect = poppler_rectangle_new_from_pdf_rectangle (selection_rect);
region = g_list_prepend (region, rect);
delete selection_rect;
......@@ -907,7 +903,24 @@ poppler_page_get_text_for_area (PopplerPage *page,
* returns a #GList of rectangles for each occurrence of the text on the page.
* The coordinates are in PDF points.
*
* Return value: (element-type PopplerRectangle) (transfer full): a #GList of #PopplerRectangle,
* When %POPPLER_FIND_MULTILINE is passed in @options, matches may span more than
* one line. In this case, the returned list will contain one #PopplerRectangle
* for each part of a match. The function poppler_rectangle_find_get_match_continued()
* will return %TRUE for all rectangles belonging to the same match, except for
* the last one. If a hyphen was ignored at the end of the part of the match,
* poppler_rectangle_find_get_ignored_hyphen() will return %TRUE for that
* rectangle.
*
* Note that currently matches spanning more than two lines are not found.
* (This limitation may be lifted in a future version.)
*
* Note also that currently finding multi-line matches backwards is not
* implemented; if you pass %POPPLER_FIND_BACKWARDS and %POPPLER_FIND_MULTILINE
* together, %POPPLER_FIND_MULTILINE will be ignored.
*
* Return value: (element-type PopplerRectangle) (transfer full): a newly allocated list
* of newly allocated #PopplerRectangle. Free with
* g_list_free_full() using poppler_rectangle_free().
*
* Since: 0.22
**/
......@@ -916,9 +929,11 @@ poppler_page_find_text_with_options (PopplerPage *page,
const char *text,
PopplerFindFlags options)
{
PopplerRectangle *match;
PopplerRectangleExtended *match;
GList *matches;
double xMin, yMin, xMax, yMax;
double xMinNext, yMinNext, xMaxNext, yMaxNext;
bool afterHyphen;
gunichar *ucs4;
glong ucs4_len;
double height;
......@@ -934,10 +949,12 @@ poppler_page_find_text_with_options (PopplerPage *page,
ucs4 = g_utf8_to_ucs4_fast (text, -1, &ucs4_len);
poppler_page_get_size (page, nullptr, &height);
const bool multiline = (options & POPPLER_FIND_MULTILINE);
backwards = options & POPPLER_FIND_BACKWARDS;
matches = nullptr;
xMin = 0;
yMin = backwards ? height : 0;
xMinNext = G_MAXDOUBLE; //we use this to detect valid returned values
while (text_dev->findText (ucs4, ucs4_len,
false, true, // startAtTop, stopAtBottom
......@@ -945,17 +962,41 @@ poppler_page_find_text_with_options (PopplerPage *page,
false, //stopAtLast
options & POPPLER_FIND_CASE_SENSITIVE,
options & POPPLER_FIND_IGNORE_DIACRITICS,
options & POPPLER_FIND_MULTILINE,
backwards,
options & POPPLER_FIND_WHOLE_WORDS_ONLY,
&xMin, &yMin, &xMax, &yMax))
&xMin, &yMin, &xMax, &yMax,
&xMinNext, &yMinNext, &xMaxNext, &yMaxNext, &afterHyphen))
{
match = poppler_rectangle_new ();
match = poppler_rectangle_extended_new ();
match->x1 = xMin;
match->y1 = height - yMax;
match->x2 = xMax;
match->y2 = height - yMin;
match->match_continued = false;
match->ignored_hyphen = false;
matches = g_list_prepend (matches, match);
start_at_last = TRUE;
if (xMinNext != G_MAXDOUBLE) {
// received rect for next-line part of a across-lines match, add it.
if (multiline) {
match->match_continued = true;
match->ignored_hyphen = afterHyphen;
match = poppler_rectangle_extended_new ();
match->x1 = xMinNext;
match->y1 = height - yMaxNext;
match->x2 = xMaxNext;
match->y2 = height - yMinNext;
match->match_continued = false;
match->ignored_hyphen = false;
matches = g_list_prepend (matches, match);
}
xMinNext = G_MAXDOUBLE;
}
}
g_free (ucs4);
......@@ -1553,6 +1594,24 @@ POPPLER_DEFINE_BOXED_TYPE (PopplerRectangle, poppler_rectangle,
poppler_rectangle_copy,
poppler_rectangle_free)
static PopplerRectangleExtended*
poppler_rectangle_extended_new (void)
{
return g_slice_new0 (PopplerRectangleExtended);
}
PopplerRectangle*
poppler_rectangle_new_from_pdf_rectangle (const PDFRectangle* rect)
{
auto r = poppler_rectangle_extended_new ();
r->x1 = rect->x1;
r->y1 = rect->y1;
r->x2 = rect->x2;
r->y2 = rect->y2;
return reinterpret_cast<PopplerRectangle*>(r);
}
/**
* poppler_rectangle_new:
*
......@@ -1563,14 +1622,18 @@ POPPLER_DEFINE_BOXED_TYPE (PopplerRectangle, poppler_rectangle,
PopplerRectangle *
poppler_rectangle_new (void)
{
return g_slice_new0 (PopplerRectangle);
return reinterpret_cast<PopplerRectangle*>(poppler_rectangle_extended_new ());
}
/**
* poppler_rectangle_copy:
* @rectangle: a #PopplerRectangle to copy
*
* Creates a copy of @rectangle
* Creates a copy of @rectangle.
*
* Note that you must only use this function on an allocated PopplerRectangle, as
* returned by poppler_rectangle_new(), poppler_rectangle_copy(), or the list elements
* returned from poppler_page_find_text() or poppler_page_find_text_with_options().
*
* Returns: a new allocated copy of @rectangle
*/
......@@ -1579,22 +1642,104 @@ poppler_rectangle_copy (PopplerRectangle *rectangle)
{
g_return_val_if_fail (rectangle != nullptr, NULL);
return g_slice_dup (PopplerRectangle, rectangle);
auto ext_rectangle = reinterpret_cast<PopplerRectangleExtended*>(rectangle);
return reinterpret_cast<PopplerRectangle*>(g_slice_dup (PopplerRectangleExtended, ext_rectangle));
}
/**
* poppler_rectangle_free:
* @rectangle: a #PopplerRectangle
*
* Frees the given #PopplerRectangle
* Frees the given #PopplerRectangle.
*
* Note that you must only use this function on an allocated PopplerRectangle, as
* returned by poppler_rectangle_new(), poppler_rectangle_copy(), or the list elements
* returned from poppler_page_find_text() or poppler_page_find_text_with_options().
*/
void
poppler_rectangle_free (PopplerRectangle *rectangle)
{
g_slice_free (PopplerRectangle, rectangle);
auto ext_rectangle = reinterpret_cast<PopplerRectangleExtended*>(rectangle);
g_slice_free (PopplerRectangleExtended, ext_rectangle);
}
/**
* poppler_rectangle_to_cairo:
* @rectangle: a #PopplerRectangle
* @cairo_rectangle: (out): a #cairo_rectangle_t to fill in
*
* Transforms @rectangle into a #cairo_rectangle_t, i.e..
*
* Since: 0.78
*/
void
poppler_rectangle_to_cairo (const PopplerRectangle *rectangle,
cairo_rectangle_t *cairo_rectangle)
{
g_return_if_fail (rectangle != nullptr);
g_return_if_fail (cairo_rectangle != nullptr);
cairo_rectangle->x = rectangle->x1;
cairo_rectangle->y = rectangle->y1;
cairo_rectangle->width = rectangle->x2 - rectangle->x1;
cairo_rectangle->height = rectangle->y2 - rectangle->y1;
}
/* PopplerPoint type */
/**
* pango_rectangle_find_get_match_continued:
* @rectangle: a #PopplerRectangle
*
* When using poppler_page_find_text_with_options() with the
* %POPPLER_FIND_MULTILINE flag, a match may span more than one line
* and thus consist of more than one rectangle. Every rectangle belonging
* to the same match will return %TRUE from this function, except for
* the last rectangle, where this function will return %FALSE.
*
* Note that you must only call this function on a #PopplerRectangle
* returned in the list from poppler_page_find_text() or
* poppler_page_find_text_with_options().
*
* Returns: whether there are more rectangles belonging to the same match
*
* Since: 0.78
*/
gboolean
poppler_rectangle_find_get_match_continued (const PopplerRectangle *rectangle)
{
g_return_val_if_fail (rectangle != nullptr, false);
auto ext_rectangle = reinterpret_cast<const PopplerRectangleExtended*>(rectangle);
return ext_rectangle->match_continued;
}
/**
* pango_rectangle_find_get_ignored_hyphen:
* @rectangle: a #PopplerRectangle
*
* When using poppler_page_find_text_with_options() with the
* %POPPLER_FIND_MULTILINE flag, a match may span more than one line,
* and may have been formed by ignoring a hyphen at the end of the line.
* When this happens at the end of the line corresponding to @rectangle,
* this function returns %TRUE (and then poppler_rectangle_find_get_match_continued()
* will also return %TRUE); otherwise it returns %FALSE.
*
* Note that you must only call this function on a #PopplerRectangle
* returned in the list from poppler_page_find_text() or
* poppler_page_find_text_with_options().
*
* Returns: whether a hyphen was ignored at the end of the line corresponding
* to @rectangle.
*
* Since: 0.78
*/
gboolean
poppler_rectangle_find_get_ignored_hyphen (const PopplerRectangle *rectangle)
{
g_return_val_if_fail (rectangle != nullptr, false);
auto ext_rectangle = reinterpret_cast<const PopplerRectangleExtended*>(rectangle);
return ext_rectangle->ignored_hyphen;
}
POPPLER_DEFINE_BOXED_TYPE (PopplerPoint, poppler_point,
poppler_point_copy,
......
......@@ -176,6 +176,13 @@ POPPLER_PUBLIC
PopplerRectangle *poppler_rectangle_copy (PopplerRectangle *rectangle);
POPPLER_PUBLIC
void poppler_rectangle_free (PopplerRectangle *rectangle);
POPPLER_PUBLIC
void poppler_rectangle_to_cairo (const PopplerRectangle *rectangle,
cairo_rectangle_t *cairo_rectangle);
POPPLER_PUBLIC
gboolean poppler_rectangle_find_get_match_continued (const PopplerRectangle *rectangle);
POPPLER_PUBLIC
gboolean poppler_rectangle_find_get_ignored_hyphen (const PopplerRectangle *rectangle);
/* A point on a page, with coordinates in PDF points. */
#define POPPLER_TYPE_POINT (poppler_point_get_type ())
......
......@@ -109,6 +109,24 @@ struct _PopplerStructureElement
const StructElement *elem;
};
/*
* PopplerRectangleExtended:
*
* The real type behind the public PopplerRectangle.
* Must be ABI compatible to it!
*/
typedef struct {
/*< private >*/
double x1;
double y1;
double x2;
double y2;
bool match_continued;
bool ignored_hyphen;
} PopplerRectangleExtended;
PopplerRectangle* poppler_rectangle_new_from_pdf_rectangle (const PDFRectangle* rect);
GList *_poppler_document_get_layers (PopplerDocument *document);
GList *_poppler_document_get_layer_rbgroup (PopplerDocument *document,
Layer *layer);
......
......@@ -157,6 +157,10 @@ typedef enum /*< flags >*/
* @POPPLER_FIND_IGNORE_DIACRITICS: do diacritics insensitive search,
* i.e. ignore accents, umlauts, diaeresis,etc. while matching. This
* option will be ignored if the search term is not pure ascii. Since 0.73.
* @POPPLER_FIND_MULTILINE: allows to match on text spanning from
* end of a line to the next line. (Currently it won't match on text spanning
* more than two lines.) Automatically ignores hyphen at end of line, and
* allows whitespace in search term to match on newline char. Since: 0.78.
*
* Flags using while searching text in a page
*
......@@ -168,7 +172,8 @@ typedef enum /*< flags >*/
POPPLER_FIND_CASE_SENSITIVE = 1 << 0,
POPPLER_FIND_BACKWARDS = 1 << 1,
POPPLER_FIND_WHOLE_WORDS_ONLY = 1 << 2,
POPPLER_FIND_IGNORE_DIACRITICS = 1 << 3
POPPLER_FIND_IGNORE_DIACRITICS = 1 << 3,
POPPLER_FIND_MULTILINE = 1 << 4
} PopplerFindFlags;
typedef struct _PopplerDocument PopplerDocument;
......
......@@ -93,6 +93,9 @@ poppler_quadrilateral_new
poppler_rectangle_copy
poppler_rectangle_free
poppler_rectangle_new
poppler_rectangle_to_cairo
poppler_rectangle_find_get_match_continued
poppler_rectangle_find_get_ignored_hyphen
poppler_text_attributes_copy
poppler_text_attributes_free
poppler_text_attributes_new
......
......@@ -3871,8 +3871,8 @@ bool TextPage::findText(Unicode *s, int len,
double *xMin, double *yMin,
double *xMax, double *yMax) {
return findText(s, len, startAtTop, stopAtBottom, startAtLast, stopAtLast,
caseSensitive, false, backward, wholeWord,
xMin, yMin, xMax, yMax);
caseSensitive, false, false, backward, wholeWord,
xMin, yMin, xMax, yMax, nullptr, nullptr, nullptr, nullptr, nullptr);
}
bool TextPage::findText(Unicode *s, int len,
......@@ -3882,10 +3882,28 @@ bool TextPage::findText(Unicode *s, int len,
bool backward, bool wholeWord,
double *xMin, double *yMin,
double *xMax, double *yMax) {
return findText(s, len, startAtTop, stopAtBottom, startAtLast, stopAtLast,
caseSensitive, ignoreDiacritics, false, backward, wholeWord,
xMin, yMin, xMax, yMax, nullptr, nullptr, nullptr, nullptr, nullptr);
}
bool TextPage::findText(Unicode *s, int len,
bool startAtTop, bool stopAtBottom,
bool startAtLast, bool stopAtLast,
bool caseSensitive, bool ignoreDiacritics,
bool matchAcrossLines,
bool backward, bool wholeWord,
double *xMin, double *yMin,
double *xMax, double *yMax,
double *xMinNext, double *yMinNext,
double *xMaxNext, double *yMaxNext, bool *afterHyphen) {
TextBlock *blk;
TextLine *line;
Unicode *s2, *txt, *reordered;
Unicode *p;
Unicode *nextline;
int nextline_len;
bool nextlineAfterHyphen = false;
int txtSize, m, i, j, k;
double xStart, yStart, xStop, yStop;
double xMin0, yMin0, xMax0, yMax0;
......@@ -3896,6 +3914,10 @@ bool TextPage::findText(Unicode *s, int len,
return false;
}
if (matchAcrossLines && backward) {
// matchAcrossLines is unimplemented for backward search
matchAcrossLines = false;
}
// handle right-to-left text
reordered = (Unicode*)gmallocn(len, sizeof(Unicode));
reorderText(s, len, nullptr, primaryLR, nullptr, reordered);
......@@ -3988,6 +4010,16 @@ bool TextPage::findText(Unicode *s, int len,
&line->normalized_len,
&line->normalized_idx,
true);
if (matchAcrossLines && line->next && !line->next->normalized)
line->next->normalized = unicodeNormalizeNFKC(line->next->text, line->next->len,
&line->next->normalized_len,
&line->next->normalized_idx,
true);
nextline = nullptr;
nextline_len = 0;
// convert the line to uppercase
m = line->normalized_len;
......@@ -4003,6 +4035,14 @@ bool TextPage::findText(Unicode *s, int len,
m = line->ascii_len;
else
ignoreDiacritics = false;
if (matchAcrossLines && line->next && !line->next->ascii_translation)
unicodeToAscii7(line->next->normalized,
line->next->normalized_len,
&line->next->ascii_translation,
&line->next->ascii_len,
line->next->normalized_idx,
&line->next->ascii_idx);
}
if (!caseSensitive) {
if (m > txtSize) {
......@@ -4015,40 +4055,111 @@ bool TextPage::findText(Unicode *s, int len,
else
txt[k] = unicodeToUpper(line->normalized[k]);
}
if (matchAcrossLines && line->next) {
nextline_len = ignoreDiacritics ? line->next->ascii_len : line->next->normalized_len;
nextline = (Unicode *) gmallocn(nextline_len, sizeof(Unicode));
for (k = 0; k < nextline_len; ++k) {
nextline[k] = ignoreDiacritics ? unicodeToUpper(line->next->ascii_translation[k])
: unicodeToUpper(line->next->normalized[k]);
}
}
} else {
if (ignoreDiacritics)
txt = line->ascii_translation;
else
txt = line->normalized;
if (matchAcrossLines && line->next) {
nextline_len = ignoreDiacritics ? line->next->ascii_len : line->next->normalized_len;
nextline = ignoreDiacritics ? line->next->ascii_translation : line->next->normalized;
}
}
// search each position in this line
j = backward ? m - len : 0;
p = txt + j;
while (backward ? j >= 0 : j <= m - len) {
if (!wholeWord ||
((j == 0 || !unicodeTypeAlphaNum(txt[j - 1])) &&
(j + len == m || !unicodeTypeAlphaNum(txt[j + len])))) {
while (backward ? j >= 0 : j <= m - (nextline ? 1 : len)) {
bool wholeWordStartIsOk, wholeWordEndIsOk;
if (wholeWord) {
wholeWordStartIsOk = j == 0 || !unicodeTypeAlphaNum(txt[j - 1]);
if (nextline)
wholeWordEndIsOk = true; // word end may be in next line, so we'll check it later
else
wholeWordEndIsOk = j + len == m || !unicodeTypeAlphaNum(txt[j + len]);
}
if (!wholeWord || (wholeWordStartIsOk && wholeWordEndIsOk)) {
int n = 0;
bool spaceConsumedByNewline = false;
bool found_it;
// compare the strings
for (k = 0; k < len; ++k) {
if (p[k] != s2[k]) {
bool last_char_of_line = j + k == m - 1;
bool last_char_of_search_term = k == len - 1;
if (p[k] != s2[k] || (nextline && last_char_of_line && !last_char_of_search_term)) {
// now check if the comparison failed at the end-of-line hyphen,
// and if so, keep on comparing at the next line
nextlineAfterHyphen = false;
</