Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/source/fitz/stext-search.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright (C) 2004-2024 Artifex Software, Inc. | |
| 2 // | |
| 3 // This file is part of MuPDF. | |
| 4 // | |
| 5 // MuPDF is free software: you can redistribute it and/or modify it under the | |
| 6 // terms of the GNU Affero General Public License as published by the Free | |
| 7 // Software Foundation, either version 3 of the License, or (at your option) | |
| 8 // any later version. | |
| 9 // | |
| 10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY | |
| 11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
| 12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more | |
| 13 // details. | |
| 14 // | |
| 15 // You should have received a copy of the GNU Affero General Public License | |
| 16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> | |
| 17 // | |
| 18 // Alternative licensing terms are available from the licensor. | |
| 19 // For commercial licensing, see <https://www.artifex.com/> or contact | |
| 20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, | |
| 21 // CA 94129, USA, for further information. | |
| 22 | |
| 23 #include "mupdf/fitz.h" | |
| 24 | |
| 25 #include <string.h> | |
| 26 #include <limits.h> | |
| 27 #include <assert.h> | |
| 28 | |
| 29 /* Enumerate marked selection */ | |
| 30 | |
| 31 static float hdist(fz_point *dir, fz_point *a, fz_point *b) | |
| 32 { | |
| 33 float dx = b->x - a->x; | |
| 34 float dy = b->y - a->y; | |
| 35 return fz_abs(dx * dir->x - dy * dir->y); | |
| 36 } | |
| 37 | |
| 38 static float vdist(fz_point *dir, fz_point *a, fz_point *b) | |
| 39 { | |
| 40 float dx = b->x - a->x; | |
| 41 float dy = b->y - a->y; | |
| 42 return fz_abs(dx * dir->y - dy * dir->x); | |
| 43 } | |
| 44 | |
| 45 static float vecdot(fz_point a, fz_point b) | |
| 46 { | |
| 47 return a.x * b.x + a.y * b.y; | |
| 48 } | |
| 49 | |
| 50 static float linedist(fz_point origin, fz_point dir, fz_point q) | |
| 51 { | |
| 52 return vecdot(dir, fz_make_point(q.x - origin.x, q.y - origin.y)); | |
| 53 } | |
| 54 | |
| 55 static int line_length(fz_stext_line *line) | |
| 56 { | |
| 57 fz_stext_char *ch; | |
| 58 int n = 0; | |
| 59 for (ch = line->first_char; ch; ch = ch->next) | |
| 60 ++n; | |
| 61 return n; | |
| 62 } | |
| 63 | |
| 64 static float largest_size_in_line(fz_stext_line *line) | |
| 65 { | |
| 66 fz_stext_char *ch; | |
| 67 float size = 0; | |
| 68 for (ch = line->first_char; ch; ch = ch->next) | |
| 69 if (ch->size > size) | |
| 70 size = ch->size; | |
| 71 return size; | |
| 72 } | |
| 73 | |
| 74 static int find_closest_in_line(fz_stext_line *line, int idx, fz_point q) | |
| 75 { | |
| 76 fz_stext_char *ch; | |
| 77 float closest_dist = 1e30f; | |
| 78 int closest_idx = idx; | |
| 79 float d1, d2; | |
| 80 | |
| 81 float hsize = largest_size_in_line(line) / 2; | |
| 82 fz_point vdir = fz_make_point(-line->dir.y, line->dir.x); | |
| 83 fz_point hdir = line->dir; | |
| 84 | |
| 85 // Compute mid-line from quads! | |
| 86 fz_point p1 = fz_make_point( | |
| 87 (line->first_char->quad.ll.x + line->first_char->quad.ul.x) / 2, | |
| 88 (line->first_char->quad.ll.y + line->first_char->quad.ul.y) / 2 | |
| 89 ); | |
| 90 | |
| 91 // Signed distance perpendicular mid-line (positive is below) | |
| 92 float vdist = linedist(p1, vdir, q); | |
| 93 if (vdist < -hsize) | |
| 94 return idx; | |
| 95 if (vdist > hsize) | |
| 96 return idx + line_length(line); | |
| 97 | |
| 98 for (ch = line->first_char; ch; ch = ch->next) | |
| 99 { | |
| 100 if (ch->bidi & 1) | |
| 101 { | |
| 102 d1 = fz_abs(linedist(ch->quad.lr, hdir, q)); | |
| 103 d2 = fz_abs(linedist(ch->quad.ll, hdir, q)); | |
| 104 } | |
| 105 else | |
| 106 { | |
| 107 d1 = fz_abs(linedist(ch->quad.ll, hdir, q)); | |
| 108 d2 = fz_abs(linedist(ch->quad.lr, hdir, q)); | |
| 109 } | |
| 110 | |
| 111 if (d1 < closest_dist) | |
| 112 { | |
| 113 closest_dist = d1; | |
| 114 closest_idx = idx; | |
| 115 } | |
| 116 | |
| 117 if (d2 < closest_dist) | |
| 118 { | |
| 119 closest_dist = d2; | |
| 120 closest_idx = idx + 1; | |
| 121 } | |
| 122 | |
| 123 ++idx; | |
| 124 } | |
| 125 | |
| 126 return closest_idx; | |
| 127 } | |
| 128 | |
| 129 static int find_closest_in_page(fz_stext_page *page, fz_point q) | |
| 130 { | |
| 131 fz_stext_block *block; | |
| 132 fz_stext_line *line; | |
| 133 fz_stext_line *closest_line = NULL; | |
| 134 int closest_idx = 0; | |
| 135 float closest_vdist = 1e30f; | |
| 136 float closest_hdist = 1e30f; | |
| 137 int idx = 0; | |
| 138 | |
| 139 for (block = page->first_block; block; block = block->next) | |
| 140 { | |
| 141 if (block->type != FZ_STEXT_BLOCK_TEXT) | |
| 142 continue; | |
| 143 for (line = block->u.t.first_line; line; line = line->next) | |
| 144 { | |
| 145 float hsize = largest_size_in_line(line) / 2; | |
| 146 fz_point hdir = line->dir; | |
| 147 fz_point vdir = fz_make_point(-line->dir.y, line->dir.x); | |
| 148 | |
| 149 // Compute mid-line from quads! | |
| 150 fz_point p1 = fz_make_point( | |
| 151 (line->first_char->quad.ll.x + line->first_char->quad.ul.x) / 2, | |
| 152 (line->first_char->quad.ll.y + line->first_char->quad.ul.y) / 2 | |
| 153 ); | |
| 154 fz_point p2 = fz_make_point( | |
| 155 (line->last_char->quad.lr.x + line->last_char->quad.ur.x) / 2, | |
| 156 (line->last_char->quad.lr.y + line->last_char->quad.ur.y) / 2 | |
| 157 ); | |
| 158 | |
| 159 // Signed distance perpendicular mid-line (positive is below) | |
| 160 float vdist = linedist(p1, vdir, q); | |
| 161 | |
| 162 // Signed distance tangent to mid-line from end points (positive is to end) | |
| 163 float hdist1 = linedist(p1, hdir, q); | |
| 164 float hdist2 = linedist(p2, hdir, q); | |
| 165 | |
| 166 // Within the line itself! | |
| 167 if (vdist >= -hsize && vdist <= hsize && (hdist1 > 0) != (hdist2 > 0)) | |
| 168 { | |
| 169 closest_vdist = 0; | |
| 170 closest_hdist = 0; | |
| 171 closest_line = line; | |
| 172 closest_idx = idx; | |
| 173 } | |
| 174 else | |
| 175 { | |
| 176 // Vertical distance from mid-line. | |
| 177 float avdist = fz_abs(vdist); | |
| 178 | |
| 179 // Horizontal distance from closest end-point | |
| 180 float ahdist = fz_min(fz_abs(hdist1), fz_abs(hdist2)); | |
| 181 | |
| 182 if (avdist < hsize) | |
| 183 { | |
| 184 // Within extended line | |
| 185 if (ahdist <= closest_hdist) | |
| 186 { | |
| 187 closest_vdist = 0; | |
| 188 closest_hdist = ahdist; | |
| 189 closest_line = line; | |
| 190 closest_idx = idx; | |
| 191 } | |
| 192 } | |
| 193 else | |
| 194 { | |
| 195 // Outside line | |
| 196 // TODO: closest column? | |
| 197 if (avdist <= closest_vdist) | |
| 198 { | |
| 199 closest_vdist = avdist; | |
| 200 closest_line = line; | |
| 201 closest_idx = idx; | |
| 202 } | |
| 203 } | |
| 204 } | |
| 205 | |
| 206 idx += line_length(line); | |
| 207 } | |
| 208 } | |
| 209 | |
| 210 if (closest_line) | |
| 211 return find_closest_in_line(closest_line, closest_idx, q); | |
| 212 | |
| 213 return 0; | |
| 214 } | |
| 215 | |
| 216 struct callbacks | |
| 217 { | |
| 218 void (*on_char)(fz_context *ctx, void *arg, fz_stext_line *ln, fz_stext_char *ch); | |
| 219 void (*on_line)(fz_context *ctx, void *arg, fz_stext_line *ln); | |
| 220 void *arg; | |
| 221 }; | |
| 222 | |
| 223 static void | |
| 224 fz_enumerate_selection(fz_context *ctx, fz_stext_page *page, fz_point a, fz_point b, struct callbacks *cb) | |
| 225 { | |
| 226 fz_stext_block *block; | |
| 227 fz_stext_line *line; | |
| 228 fz_stext_char *ch; | |
| 229 int idx, start, end; | |
| 230 int inside; | |
| 231 | |
| 232 start = find_closest_in_page(page, a); | |
| 233 end = find_closest_in_page(page, b); | |
| 234 | |
| 235 if (start > end) | |
| 236 idx = start, start = end, end = idx; | |
| 237 | |
| 238 if (start == end) | |
| 239 return; | |
| 240 | |
| 241 inside = 0; | |
| 242 idx = 0; | |
| 243 for (block = page->first_block; block; block = block->next) | |
| 244 { | |
| 245 if (block->type != FZ_STEXT_BLOCK_TEXT) | |
| 246 continue; | |
| 247 for (line = block->u.t.first_line; line; line = line->next) | |
| 248 { | |
| 249 for (ch = line->first_char; ch; ch = ch->next) | |
| 250 { | |
| 251 if (!inside) | |
| 252 if (idx == start) | |
| 253 inside = 1; | |
| 254 if (inside) | |
| 255 cb->on_char(ctx, cb->arg, line, ch); | |
| 256 if (++idx == end) | |
| 257 return; | |
| 258 } | |
| 259 if (inside) | |
| 260 cb->on_line(ctx, cb->arg, line); | |
| 261 } | |
| 262 } | |
| 263 } | |
| 264 | |
| 265 fz_quad | |
| 266 fz_snap_selection(fz_context *ctx, fz_stext_page *page, fz_point *a, fz_point *b, int mode) | |
| 267 { | |
| 268 fz_stext_block *block; | |
| 269 fz_stext_line *line; | |
| 270 fz_stext_char *ch; | |
| 271 fz_quad handles; | |
| 272 int idx, start, end; | |
| 273 int pc; | |
| 274 | |
| 275 start = find_closest_in_page(page, *a); | |
| 276 end = find_closest_in_page(page, *b); | |
| 277 | |
| 278 if (start > end) | |
| 279 idx = start, start = end, end = idx; | |
| 280 | |
| 281 handles.ll = handles.ul = *a; | |
| 282 handles.lr = handles.ur = *b; | |
| 283 | |
| 284 idx = 0; | |
| 285 for (block = page->first_block; block; block = block->next) | |
| 286 { | |
| 287 if (block->type != FZ_STEXT_BLOCK_TEXT) | |
| 288 continue; | |
| 289 for (line = block->u.t.first_line; line; line = line->next) | |
| 290 { | |
| 291 pc = '\n'; | |
| 292 for (ch = line->first_char; ch; ch = ch->next) | |
| 293 { | |
| 294 if (idx <= start) | |
| 295 { | |
| 296 if (mode == FZ_SELECT_CHARS | |
| 297 || (mode == FZ_SELECT_WORDS && (pc == ' ' || pc == '\n')) | |
| 298 || (mode == FZ_SELECT_LINES && (pc == '\n'))) | |
| 299 { | |
| 300 handles.ll = ch->quad.ll; | |
| 301 handles.ul = ch->quad.ul; | |
| 302 *a = ch->origin; | |
| 303 } | |
| 304 } | |
| 305 if (idx >= end) | |
| 306 { | |
| 307 if (mode == FZ_SELECT_CHARS | |
| 308 || (mode == FZ_SELECT_WORDS && (ch->c == ' '))) | |
| 309 { | |
| 310 handles.lr = ch->quad.ll; | |
| 311 handles.ur = ch->quad.ul; | |
| 312 *b = ch->origin; | |
| 313 return handles; | |
| 314 } | |
| 315 if (!ch->next) | |
| 316 { | |
| 317 handles.lr = ch->quad.lr; | |
| 318 handles.ur = ch->quad.ur; | |
| 319 *b = ch->quad.lr; | |
| 320 return handles; | |
| 321 } | |
| 322 } | |
| 323 pc = ch->c; | |
| 324 ++idx; | |
| 325 } | |
| 326 } | |
| 327 } | |
| 328 | |
| 329 return handles; | |
| 330 } | |
| 331 | |
| 332 /* Highlight selection */ | |
| 333 | |
| 334 struct highlight | |
| 335 { | |
| 336 int len, cap; | |
| 337 fz_quad *box; | |
| 338 float hfuzz, vfuzz; | |
| 339 }; | |
| 340 | |
| 341 int same_point(fz_point a, fz_point b) | |
| 342 { | |
| 343 int dx = fz_abs(a.x - b.x); | |
| 344 int dy = fz_abs(a.y - b.y); | |
| 345 return (dx < 0.1 && dy < 0.1); | |
| 346 } | |
| 347 | |
| 348 int is_near(float hfuzz, float vfuzz, fz_point hdir, fz_point end, fz_point p1, fz_point p2) | |
| 349 { | |
| 350 float v = fz_abs(linedist(end, fz_make_point(-hdir.y, hdir.x), p1)); | |
| 351 float d1 = fz_abs(linedist(end, hdir, p1)); | |
| 352 float d2 = fz_abs(linedist(end, hdir, p2)); | |
| 353 return (v < vfuzz && d1 < hfuzz && d1 < d2); | |
| 354 } | |
| 355 | |
| 356 static void on_highlight_char(fz_context *ctx, void *arg, fz_stext_line *line, fz_stext_char *ch) | |
| 357 { | |
| 358 struct highlight *hits = arg; | |
| 359 float vfuzz = hits->vfuzz * ch->size; | |
| 360 float hfuzz = hits->hfuzz * ch->size; | |
| 361 fz_point dir = line->dir; | |
| 362 | |
| 363 // Skip zero-extent quads | |
| 364 if (same_point(ch->quad.ll, ch->quad.lr)) | |
| 365 return; | |
| 366 | |
| 367 if (hits->len > 0) | |
| 368 { | |
| 369 fz_quad *end = &hits->box[hits->len-1]; | |
| 370 | |
| 371 if (is_near(hfuzz, vfuzz, dir, end->lr, ch->quad.ll, ch->quad.lr) && | |
| 372 is_near(hfuzz, vfuzz, dir, end->ur, ch->quad.ul, ch->quad.ur)) | |
| 373 { | |
| 374 end->ur = ch->quad.ur; | |
| 375 end->lr = ch->quad.lr; | |
| 376 return; | |
| 377 } | |
| 378 | |
| 379 if (is_near(hfuzz, vfuzz, dir, end->ll, ch->quad.lr, ch->quad.ll) && | |
| 380 is_near(hfuzz, vfuzz, dir, end->ul, ch->quad.ur, ch->quad.ul)) | |
| 381 { | |
| 382 end->ul = ch->quad.ul; | |
| 383 end->ll = ch->quad.ll; | |
| 384 return; | |
| 385 } | |
| 386 } | |
| 387 | |
| 388 if (hits->len < hits->cap) | |
| 389 hits->box[hits->len++] = ch->quad; | |
| 390 } | |
| 391 | |
| 392 static void on_highlight_line(fz_context *ctx, void *arg, fz_stext_line *line) | |
| 393 { | |
| 394 } | |
| 395 | |
| 396 int | |
| 397 fz_highlight_selection(fz_context *ctx, fz_stext_page *page, fz_point a, fz_point b, fz_quad *quads, int max_quads) | |
| 398 { | |
| 399 struct callbacks cb; | |
| 400 struct highlight hits; | |
| 401 | |
| 402 hits.len = 0; | |
| 403 hits.cap = max_quads; | |
| 404 hits.box = quads; | |
| 405 hits.hfuzz = 0.5f; /* merge large gaps */ | |
| 406 hits.vfuzz = 0.1f; | |
| 407 | |
| 408 cb.on_char = on_highlight_char; | |
| 409 cb.on_line = on_highlight_line; | |
| 410 cb.arg = &hits; | |
| 411 | |
| 412 fz_enumerate_selection(ctx, page, a, b, &cb); | |
| 413 | |
| 414 return hits.len; | |
| 415 } | |
| 416 | |
| 417 /* Copy selection */ | |
| 418 | |
| 419 static void on_copy_char(fz_context *ctx, void *arg, fz_stext_line *line, fz_stext_char *ch) | |
| 420 { | |
| 421 fz_buffer *buffer = arg; | |
| 422 int c = ch->c; | |
| 423 if (c < 32) | |
| 424 c = FZ_REPLACEMENT_CHARACTER; | |
| 425 fz_append_rune(ctx, buffer, c); | |
| 426 } | |
| 427 | |
| 428 static void on_copy_line_crlf(fz_context *ctx, void *arg, fz_stext_line *line) | |
| 429 { | |
| 430 fz_buffer *buffer = arg; | |
| 431 fz_append_byte(ctx, buffer, '\r'); | |
| 432 fz_append_byte(ctx, buffer, '\n'); | |
| 433 } | |
| 434 | |
| 435 static void on_copy_line_lf(fz_context *ctx, void *arg, fz_stext_line *line) | |
| 436 { | |
| 437 fz_buffer *buffer = arg; | |
| 438 fz_append_byte(ctx, buffer, '\n'); | |
| 439 } | |
| 440 | |
| 441 char * | |
| 442 fz_copy_selection(fz_context *ctx, fz_stext_page *page, fz_point a, fz_point b, int crlf) | |
| 443 { | |
| 444 struct callbacks cb; | |
| 445 fz_buffer *buffer; | |
| 446 unsigned char *s; | |
| 447 | |
| 448 buffer = fz_new_buffer(ctx, 1024); | |
| 449 fz_try(ctx) | |
| 450 { | |
| 451 cb.on_char = on_copy_char; | |
| 452 cb.on_line = crlf ? on_copy_line_crlf : on_copy_line_lf; | |
| 453 cb.arg = buffer; | |
| 454 | |
| 455 fz_enumerate_selection(ctx, page, a, b, &cb); | |
| 456 fz_terminate_buffer(ctx, buffer); | |
| 457 } | |
| 458 fz_catch(ctx) | |
| 459 { | |
| 460 fz_drop_buffer(ctx, buffer); | |
| 461 fz_rethrow(ctx); | |
| 462 } | |
| 463 fz_buffer_extract(ctx, buffer, &s); /* take over the data */ | |
| 464 fz_drop_buffer(ctx, buffer); | |
| 465 return (char*)s; | |
| 466 } | |
| 467 | |
| 468 char * | |
| 469 fz_copy_rectangle(fz_context *ctx, fz_stext_page *page, fz_rect area, int crlf) | |
| 470 { | |
| 471 fz_stext_block *block; | |
| 472 fz_stext_line *line; | |
| 473 fz_stext_char *ch; | |
| 474 fz_buffer *buffer; | |
| 475 unsigned char *s; | |
| 476 | |
| 477 int need_new_line = 0; | |
| 478 | |
| 479 buffer = fz_new_buffer(ctx, 1024); | |
| 480 fz_try(ctx) | |
| 481 { | |
| 482 for (block = page->first_block; block; block = block->next) | |
| 483 { | |
| 484 if (block->type != FZ_STEXT_BLOCK_TEXT) | |
| 485 continue; | |
| 486 for (line = block->u.t.first_line; line; line = line->next) | |
| 487 { | |
| 488 int line_had_text = 0; | |
| 489 for (ch = line->first_char; ch; ch = ch->next) | |
| 490 { | |
| 491 fz_rect r = fz_rect_from_quad(ch->quad); | |
| 492 if (!fz_is_empty_rect(fz_intersect_rect(r, area))) | |
| 493 { | |
| 494 line_had_text = 1; | |
| 495 if (need_new_line) | |
| 496 { | |
| 497 fz_append_string(ctx, buffer, crlf ? "\r\n" : "\n"); | |
| 498 need_new_line = 0; | |
| 499 } | |
| 500 fz_append_rune(ctx, buffer, ch->c < 32 ? FZ_REPLACEMENT_CHARACTER : ch->c); | |
| 501 } | |
| 502 } | |
| 503 if (line_had_text) | |
| 504 need_new_line = 1; | |
| 505 } | |
| 506 } | |
| 507 fz_terminate_buffer(ctx, buffer); | |
| 508 } | |
| 509 fz_catch(ctx) | |
| 510 { | |
| 511 fz_drop_buffer(ctx, buffer); | |
| 512 fz_rethrow(ctx); | |
| 513 } | |
| 514 | |
| 515 fz_buffer_extract(ctx, buffer, &s); /* take over the data */ | |
| 516 fz_drop_buffer(ctx, buffer); | |
| 517 return (char*)s; | |
| 518 } | |
| 519 | |
| 520 /* String search */ | |
| 521 | |
| 522 static inline int canon(int c) | |
| 523 { | |
| 524 // Map full-width ASCII forms to ASCII: | |
| 525 // U+FF01 .. U+FF5E => U+0021 .. U+007E | |
| 526 if (c >= 0xFF01 && c <= 0xFF5E) | |
| 527 c = c - 0xFF01 + 0x21; | |
| 528 | |
| 529 if (c == 0xA0 || c == 0x2028 || c == 0x2029) | |
| 530 return ' '; | |
| 531 if (c == '\r' || c == '\n' || c == '\t') | |
| 532 return ' '; | |
| 533 | |
| 534 return fz_toupper(c); | |
| 535 } | |
| 536 | |
| 537 static inline int chartocanon(int *c, const char *s) | |
| 538 { | |
| 539 int n = fz_chartorune(c, s); | |
| 540 *c = canon(*c); | |
| 541 return n; | |
| 542 } | |
| 543 | |
| 544 static const char *match_string(const char *h, const char *n) | |
| 545 { | |
| 546 int hc, nc; | |
| 547 const char *e = h; | |
| 548 h += chartocanon(&hc, h); | |
| 549 n += chartocanon(&nc, n); | |
| 550 while (hc == nc) | |
| 551 { | |
| 552 e = h; | |
| 553 if (hc == ' ') | |
| 554 do | |
| 555 h += chartocanon(&hc, h); | |
| 556 while (hc == ' '); | |
| 557 else | |
| 558 h += chartocanon(&hc, h); | |
| 559 if (nc == ' ') | |
| 560 do | |
| 561 n += chartocanon(&nc, n); | |
| 562 while (nc == ' '); | |
| 563 else | |
| 564 n += chartocanon(&nc, n); | |
| 565 } | |
| 566 return nc == 0 ? e : NULL; | |
| 567 } | |
| 568 | |
| 569 static const char *find_string(const char *s, const char *needle, const char **endp) | |
| 570 { | |
| 571 const char *end; | |
| 572 while (*s) | |
| 573 { | |
| 574 end = match_string(s, needle); | |
| 575 if (end) | |
| 576 return *endp = end, s; | |
| 577 ++s; | |
| 578 } | |
| 579 return *endp = NULL, NULL; | |
| 580 } | |
| 581 | |
| 582 struct search_data | |
| 583 { | |
| 584 /* Number of hits so far.*/ | |
| 585 int count_quads; | |
| 586 int count_hits; | |
| 587 int max_quads; | |
| 588 int quad_fill; | |
| 589 fz_quad locals[32]; | |
| 590 fz_quad *quads; | |
| 591 float hfuzz, vfuzz; | |
| 592 fz_search_callback_fn *cb; | |
| 593 void *opaque; | |
| 594 }; | |
| 595 | |
| 596 static int hit_char(fz_context *ctx, struct search_data *hits, fz_stext_line *line, fz_stext_char *ch, int is_at_start) | |
| 597 { | |
| 598 float vfuzz = ch->size * hits->vfuzz; | |
| 599 float hfuzz = ch->size * hits->hfuzz; | |
| 600 | |
| 601 /* Can we continue an existing quad? */ | |
| 602 if (hits->quad_fill > 0 && !is_at_start) | |
| 603 { | |
| 604 fz_quad *end = &hits->quads[hits->quad_fill-1]; | |
| 605 if (hdist(&line->dir, &end->lr, &ch->quad.ll) < hfuzz | |
| 606 && vdist(&line->dir, &end->lr, &ch->quad.ll) < vfuzz | |
| 607 && hdist(&line->dir, &end->ur, &ch->quad.ul) < hfuzz | |
| 608 && vdist(&line->dir, &end->ur, &ch->quad.ul) < vfuzz) | |
| 609 { | |
| 610 /* Yes */ | |
| 611 end->ur = ch->quad.ur; | |
| 612 end->lr = ch->quad.lr; | |
| 613 return 0; | |
| 614 } | |
| 615 } | |
| 616 | |
| 617 if (is_at_start && hits->quad_fill > 0) | |
| 618 { | |
| 619 /* Send the quads we have queued. */ | |
| 620 hits->count_hits++; | |
| 621 if (hits->cb && hits->cb(ctx, hits->opaque, hits->quad_fill, hits->quads)) | |
| 622 return 1; | |
| 623 hits->quad_fill = 0; | |
| 624 } | |
| 625 | |
| 626 if (hits->quad_fill == hits->max_quads) | |
| 627 { | |
| 628 int newmax = hits->max_quads * 2; | |
| 629 if (hits->quads == hits->locals) | |
| 630 { | |
| 631 hits->quads = fz_malloc(ctx, sizeof(hits->quads[0]) * newmax); | |
| 632 memcpy(hits->quads, hits->locals, sizeof(hits->quads[0]) * nelem(hits->locals)); | |
| 633 } | |
| 634 else | |
| 635 { | |
| 636 hits->quads = fz_realloc(ctx, hits->quads, sizeof(hits->quads[0]) * newmax); | |
| 637 } | |
| 638 hits->max_quads = newmax; | |
| 639 } | |
| 640 hits->quads[hits->quad_fill++] = ch->quad; | |
| 641 hits->count_quads++; | |
| 642 | |
| 643 return 0; | |
| 644 } | |
| 645 | |
| 646 int | |
| 647 fz_search_stext_page_cb(fz_context *ctx, fz_stext_page *page, const char *needle, fz_search_callback_fn *cb, void *opaque) | |
| 648 { | |
| 649 struct search_data hits; | |
| 650 fz_stext_block *block; | |
| 651 fz_stext_line *line; | |
| 652 fz_stext_char *ch; | |
| 653 fz_buffer *buffer; | |
| 654 const char *haystack, *begin, *end; | |
| 655 int c, inside; | |
| 656 | |
| 657 if (strlen(needle) == 0) | |
| 658 return 0; | |
| 659 | |
| 660 hits.count_quads = 0; | |
| 661 hits.count_hits = 0; | |
| 662 hits.quad_fill = 0; | |
| 663 hits.max_quads = nelem(hits.locals); | |
| 664 hits.quads = hits.locals; | |
| 665 hits.hfuzz = 0.2f; /* merge kerns but not large gaps */ | |
| 666 hits.vfuzz = 0.1f; | |
| 667 hits.cb = cb; | |
| 668 hits.opaque = opaque; | |
| 669 | |
| 670 buffer = fz_new_buffer_from_stext_page(ctx, page); | |
| 671 fz_try(ctx) | |
| 672 { | |
| 673 haystack = fz_string_from_buffer(ctx, buffer); | |
| 674 begin = find_string(haystack, needle, &end); | |
| 675 if (!begin) | |
| 676 goto no_more_matches; | |
| 677 | |
| 678 inside = 0; | |
| 679 for (block = page->first_block; block; block = block->next) | |
| 680 { | |
| 681 if (block->type != FZ_STEXT_BLOCK_TEXT) | |
| 682 continue; | |
| 683 for (line = block->u.t.first_line; line; line = line->next) | |
| 684 { | |
| 685 for (ch = line->first_char; ch; ch = ch->next) | |
| 686 { | |
| 687 try_new_match: | |
| 688 if (!inside) | |
| 689 { | |
| 690 if (haystack >= begin) | |
| 691 inside = 1; | |
| 692 } | |
| 693 if (inside) | |
| 694 { | |
| 695 if (haystack < end) | |
| 696 { | |
| 697 if (hit_char(ctx, &hits, line, ch, haystack == begin)) | |
| 698 goto no_more_matches; | |
| 699 } | |
| 700 else | |
| 701 { | |
| 702 inside = 0; | |
| 703 begin = find_string(haystack, needle, &end); | |
| 704 if (!begin) | |
| 705 goto no_more_matches; | |
| 706 else | |
| 707 goto try_new_match; | |
| 708 } | |
| 709 } | |
| 710 haystack += fz_chartorune(&c, haystack); | |
| 711 } | |
| 712 assert(*haystack == '\n'); | |
| 713 ++haystack; | |
| 714 } | |
| 715 assert(*haystack == '\n'); | |
| 716 ++haystack; | |
| 717 } | |
| 718 no_more_matches: | |
| 719 /* Send the quads we have queued. */ | |
| 720 if (hits.quad_fill) | |
| 721 { | |
| 722 hits.count_hits++; | |
| 723 if (hits.cb) | |
| 724 (void)hits.cb(ctx, hits.opaque, hits.quad_fill, hits.quads); | |
| 725 } | |
| 726 } | |
| 727 fz_always(ctx) | |
| 728 { | |
| 729 fz_drop_buffer(ctx, buffer); | |
| 730 if (hits.quads != hits.locals) | |
| 731 fz_free(ctx, hits.quads); | |
| 732 } | |
| 733 fz_catch(ctx) | |
| 734 fz_rethrow(ctx); | |
| 735 | |
| 736 return hits.count_hits; | |
| 737 } | |
| 738 | |
| 739 typedef struct | |
| 740 { | |
| 741 int *hit_mark; | |
| 742 fz_quad *quads; | |
| 743 int max_quads; | |
| 744 int fill; | |
| 745 int hit; | |
| 746 } oldsearch_data; | |
| 747 | |
| 748 static int | |
| 749 oldsearch_cb(fz_context *ctx, void *opaque, int num_quads, fz_quad *quads) | |
| 750 { | |
| 751 oldsearch_data *data = (oldsearch_data *)opaque; | |
| 752 int i; | |
| 753 int hit = data->hit++; | |
| 754 | |
| 755 for (i = 0; i < num_quads; i++) | |
| 756 { | |
| 757 if (data->fill == data->max_quads) | |
| 758 break; | |
| 759 if (data->hit_mark) | |
| 760 data->hit_mark[data->fill] = hit; | |
| 761 data->quads[data->fill] = quads[i]; | |
| 762 data->fill++; | |
| 763 } | |
| 764 | |
| 765 /* We never return 1 here, even if we fill up the buffer, as we | |
| 766 * want the old API to get the correct total number of quads. */ | |
| 767 return 0; | |
| 768 } | |
| 769 | |
| 770 int | |
| 771 fz_search_stext_page(fz_context *ctx, fz_stext_page *page, const char *needle, int *hit_mark, fz_quad *quads, int max_quads) | |
| 772 { | |
| 773 oldsearch_data data; | |
| 774 | |
| 775 data.hit_mark = hit_mark; | |
| 776 data.quads = quads; | |
| 777 data.max_quads = max_quads; | |
| 778 data.fill = 0; | |
| 779 data.hit = 0; | |
| 780 (void)fz_search_stext_page_cb(ctx, page, needle, oldsearch_cb, &data); | |
| 781 return data.fill; /* Return the number of quads we have read */ | |
| 782 } |
