Mercurial > hgrepos > Python2 > PyMuPDF
comparison src_classic/helper-stext.i @ 1:1d09e1dec1d9 upstream
ADD: PyMuPDF v1.26.4: the original sdist.
It does not yet contain MuPDF. This normally will be downloaded when
building PyMuPDF.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:37:51 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 1:1d09e1dec1d9 |
|---|---|
| 1 %{ | |
| 2 /* | |
| 3 # ------------------------------------------------------------------------ | |
| 4 # Copyright 2020-2022, Harald Lieder, mailto:harald.lieder@outlook.com | |
| 5 # License: GNU AFFERO GPL 3.0, https://www.gnu.org/licenses/agpl-3.0.html | |
| 6 # | |
| 7 # Part of "PyMuPDF", a Python binding for "MuPDF" (http://mupdf.com), a | |
| 8 # lightweight PDF, XPS, and E-book viewer, renderer and toolkit which is | |
| 9 # maintained and developed by Artifex Software, Inc. https://artifex.com. | |
| 10 # ------------------------------------------------------------------------ | |
| 11 */ | |
| 12 // need own versions of ascender / descender | |
| 13 static const float | |
| 14 JM_font_ascender(fz_context *ctx, fz_font *font) | |
| 15 { | |
| 16 if (skip_quad_corrections) { | |
| 17 return 0.8f; | |
| 18 } | |
| 19 return fz_font_ascender(ctx, font); | |
| 20 } | |
| 21 | |
| 22 static const float | |
| 23 JM_font_descender(fz_context *ctx, fz_font *font) | |
| 24 { | |
| 25 if (skip_quad_corrections) { | |
| 26 return -0.2f; | |
| 27 } | |
| 28 return fz_font_descender(ctx, font); | |
| 29 } | |
| 30 | |
| 31 | |
| 32 //---------------------------------------------------------------- | |
| 33 // Return true if character is considered to be a word delimiter | |
| 34 //---------------------------------------------------------------- | |
| 35 static const int | |
| 36 JM_is_word_delimiter(int c, PyObject *delimiters) | |
| 37 { | |
| 38 if (c <= 32 || c == 160) return 1; // a standard delimiter | |
| 39 | |
| 40 // extra delimiters must be a non-empty sequence | |
| 41 if (!delimiters || PyObject_Not(delimiters) || !PySequence_Check(delimiters)) { | |
| 42 return 0; | |
| 43 } | |
| 44 | |
| 45 // convert to tuple for easier looping | |
| 46 PyObject *delims = PySequence_Tuple(delimiters); | |
| 47 if (!delims) { | |
| 48 PyErr_Clear(); | |
| 49 return 0; | |
| 50 } | |
| 51 | |
| 52 // Make 1-char PyObject from character given as integer | |
| 53 PyObject *cchar = Py_BuildValue("C", c); // single character PyObject | |
| 54 Py_ssize_t i, len = PyTuple_Size(delims); | |
| 55 for (i = 0; i < len; i++) { | |
| 56 int rc = PyUnicode_Compare(cchar, PyTuple_GET_ITEM(delims, i)); | |
| 57 if (rc == 0) { // equal to a delimiter character | |
| 58 Py_DECREF(cchar); | |
| 59 Py_DECREF(delims); | |
| 60 PyErr_Clear(); | |
| 61 return 1; | |
| 62 } | |
| 63 } | |
| 64 | |
| 65 Py_DECREF(delims); | |
| 66 PyErr_Clear(); | |
| 67 return 0; | |
| 68 } | |
| 69 | |
| 70 /* inactive | |
| 71 //----------------------------------------------------------------------------- | |
| 72 // Make OCR text page directly from an fz_page | |
| 73 //----------------------------------------------------------------------------- | |
| 74 fz_stext_page * | |
| 75 JM_new_stext_page_ocr_from_page(fz_context *ctx, fz_page *page, fz_rect rect, int flags, | |
| 76 const char *lang, const char *tessdata) | |
| 77 { | |
| 78 if (!page) return NULL; | |
| 79 int with_list = 1; | |
| 80 fz_stext_page *tp = NULL; | |
| 81 fz_device *dev = NULL, *ocr_dev = NULL; | |
| 82 fz_var(dev); | |
| 83 fz_var(ocr_dev); | |
| 84 fz_var(tp); | |
| 85 fz_stext_options options; | |
| 86 memset(&options, 0, sizeof options); | |
| 87 options.flags = flags; | |
| 88 //fz_matrix ctm = fz_identity; | |
| 89 fz_matrix ctm1 = fz_make_matrix(100/72, 0, 0, 100/72, 0, 0); | |
| 90 fz_matrix ctm2 = fz_make_matrix(400/72, 0, 0, 400/72, 0, 0); | |
| 91 | |
| 92 fz_try(ctx) { | |
| 93 tp = fz_new_stext_page(ctx, rect); | |
| 94 dev = fz_new_stext_device(ctx, tp, &options); | |
| 95 ocr_dev = fz_new_ocr_device(ctx, dev, fz_identity, rect, with_list, lang, tessdata, NULL); | |
| 96 fz_run_page(ctx, page, ocr_dev, fz_identity, NULL); | |
| 97 fz_close_device(ctx, ocr_dev); | |
| 98 fz_close_device(ctx, dev); | |
| 99 } | |
| 100 fz_always(ctx) { | |
| 101 fz_drop_device(ctx, dev); | |
| 102 fz_drop_device(ctx, ocr_dev); | |
| 103 } | |
| 104 fz_catch(ctx) { | |
| 105 fz_drop_stext_page(ctx, tp); | |
| 106 fz_rethrow(ctx); | |
| 107 } | |
| 108 return tp; | |
| 109 } | |
| 110 */ | |
| 111 | |
| 112 //--------------------------------------------------------------------------- | |
| 113 // APPEND non-ascii runes in unicode escape format to fz_buffer | |
| 114 //--------------------------------------------------------------------------- | |
| 115 void JM_append_rune(fz_context *ctx, fz_buffer *buff, int ch) | |
| 116 { | |
| 117 if (ch == 92) { // prevent accidental "\u" etc. | |
| 118 fz_append_string(ctx, buff, "\\u005c"); | |
| 119 } else if ((ch >= 32 && ch <= 255) || ch == 10) { | |
| 120 fz_append_byte(ctx, buff, ch); | |
| 121 } else if (ch >= 0xd800 && ch <= 0xdfff) { // surrogate Unicode range | |
| 122 fz_append_string(ctx, buff, "\\ufffd"); | |
| 123 } else if (ch <= 0xffff) { // 4 hex digits | |
| 124 fz_append_printf(ctx, buff, "\\u%04x", ch); | |
| 125 } else { // 8 hex digits | |
| 126 fz_append_printf(ctx, buff, "\\U%08x", ch); | |
| 127 } | |
| 128 } | |
| 129 | |
| 130 | |
| 131 // re-compute char quad if ascender/descender values make no sense | |
| 132 static fz_quad | |
| 133 JM_char_quad(fz_context *ctx, fz_stext_line *line, fz_stext_char *ch) | |
| 134 { | |
| 135 if (skip_quad_corrections) { // no special handling | |
| 136 return ch->quad; | |
| 137 } | |
| 138 if (line->wmode) { // never touch vertical write mode | |
| 139 return ch->quad; | |
| 140 } | |
| 141 fz_font *font = ch->font; | |
| 142 float asc = JM_font_ascender(ctx, font); | |
| 143 float dsc = JM_font_descender(ctx, font); | |
| 144 float c, s, fsize = ch->size; | |
| 145 float asc_dsc = asc - dsc + FLT_EPSILON; | |
| 146 if (asc_dsc >= 1 && small_glyph_heights == 0) { // no problem | |
| 147 return ch->quad; | |
| 148 } | |
| 149 if (asc < 1e-3) { // probably Tesseract glyphless font | |
| 150 dsc = -0.1f; | |
| 151 asc = 0.9f; | |
| 152 asc_dsc = 1.0f; | |
| 153 } | |
| 154 | |
| 155 if (small_glyph_heights || asc_dsc < 1) { | |
| 156 dsc = dsc / asc_dsc; | |
| 157 asc = asc / asc_dsc; | |
| 158 } | |
| 159 asc_dsc = asc - dsc; | |
| 160 asc = asc * fsize / asc_dsc; | |
| 161 dsc = dsc * fsize / asc_dsc; | |
| 162 | |
| 163 /* ------------------------------ | |
| 164 Re-compute quad with the adjusted ascender / descender values: | |
| 165 Move ch->origin to (0,0) and de-rotate quad, then adjust the corners, | |
| 166 re-rotate and move back to ch->origin location. | |
| 167 ------------------------------ */ | |
| 168 fz_matrix trm1, trm2, xlate1, xlate2; | |
| 169 fz_quad quad; | |
| 170 c = line->dir.x; // cosine | |
| 171 s = line->dir.y; // sine | |
| 172 trm1 = fz_make_matrix(c, -s, s, c, 0, 0); // derotate | |
| 173 trm2 = fz_make_matrix(c, s, -s, c, 0, 0); // rotate | |
| 174 if (c == -1) { // left-right flip | |
| 175 trm1.d = 1; | |
| 176 trm2.d = 1; | |
| 177 } | |
| 178 xlate1 = fz_make_matrix(1, 0, 0, 1, -ch->origin.x, -ch->origin.y); | |
| 179 xlate2 = fz_make_matrix(1, 0, 0, 1, ch->origin.x, ch->origin.y); | |
| 180 | |
| 181 quad = fz_transform_quad(ch->quad, xlate1); // move origin to (0,0) | |
| 182 quad = fz_transform_quad(quad, trm1); // de-rotate corners | |
| 183 | |
| 184 // adjust vertical coordinates | |
| 185 if (c == 1 && quad.ul.y > 0) { // up-down flip | |
| 186 quad.ul.y = asc; | |
| 187 quad.ur.y = asc; | |
| 188 quad.ll.y = dsc; | |
| 189 quad.lr.y = dsc; | |
| 190 } else { | |
| 191 quad.ul.y = -asc; | |
| 192 quad.ur.y = -asc; | |
| 193 quad.ll.y = -dsc; | |
| 194 quad.lr.y = -dsc; | |
| 195 } | |
| 196 | |
| 197 // adjust horizontal coordinates that are too crazy: | |
| 198 // (1) left x must be >= 0 | |
| 199 // (2) if bbox width is 0, lookup char advance in font. | |
| 200 if (quad.ll.x < 0) { | |
| 201 quad.ll.x = 0; | |
| 202 quad.ul.x = 0; | |
| 203 } | |
| 204 float cwidth = quad.lr.x - quad.ll.x; | |
| 205 if (cwidth < FLT_EPSILON) { | |
| 206 int glyph = fz_encode_character(ctx, font, ch->c); | |
| 207 if (glyph) { | |
| 208 float fwidth = fz_advance_glyph(ctx, font, glyph, line->wmode); | |
| 209 quad.lr.x = quad.ll.x + fwidth * fsize; | |
| 210 quad.ur.x = quad.lr.x; | |
| 211 } | |
| 212 } | |
| 213 | |
| 214 quad = fz_transform_quad(quad, trm2); // rotate back | |
| 215 quad = fz_transform_quad(quad, xlate2); // translate back | |
| 216 return quad; | |
| 217 } | |
| 218 | |
| 219 | |
| 220 // return rect of char quad | |
| 221 static fz_rect | |
| 222 JM_char_bbox(fz_context *ctx, fz_stext_line *line, fz_stext_char *ch) | |
| 223 { | |
| 224 fz_rect r = fz_rect_from_quad(JM_char_quad(ctx, line, ch)); | |
| 225 if (!line->wmode) { | |
| 226 return r; | |
| 227 } | |
| 228 if (r.y1 < r.y0 + ch->size) { | |
| 229 r.y0 = r.y1 - ch->size; | |
| 230 } | |
| 231 return r; | |
| 232 } | |
| 233 | |
| 234 | |
| 235 //------------------------------------------- | |
| 236 // make a buffer from an stext_page's text | |
| 237 //------------------------------------------- | |
| 238 fz_buffer * | |
| 239 JM_new_buffer_from_stext_page(fz_context *ctx, fz_stext_page *page) | |
| 240 { | |
| 241 fz_stext_block *block; | |
| 242 fz_stext_line *line; | |
| 243 fz_stext_char *ch; | |
| 244 fz_rect rect = page->mediabox; | |
| 245 fz_buffer *buf = NULL; | |
| 246 | |
| 247 fz_try(ctx) | |
| 248 { | |
| 249 buf = fz_new_buffer(ctx, 256); | |
| 250 for (block = page->first_block; block; block = block->next) { | |
| 251 if (block->type == FZ_STEXT_BLOCK_TEXT) { | |
| 252 for (line = block->u.t.first_line; line; line = line->next) { | |
| 253 for (ch = line->first_char; ch; ch = ch->next) { | |
| 254 if (!JM_rects_overlap(rect, JM_char_bbox(ctx, line, ch)) && | |
| 255 !fz_is_infinite_rect(rect)) { | |
| 256 continue; | |
| 257 } | |
| 258 fz_append_rune(ctx, buf, ch->c); | |
| 259 } | |
| 260 fz_append_byte(ctx, buf, '\n'); | |
| 261 } | |
| 262 fz_append_byte(ctx, buf, '\n'); | |
| 263 } | |
| 264 } | |
| 265 } | |
| 266 fz_catch(ctx) { | |
| 267 fz_drop_buffer(ctx, buf); | |
| 268 fz_rethrow(ctx); | |
| 269 } | |
| 270 return buf; | |
| 271 } | |
| 272 | |
| 273 | |
| 274 static float hdist(fz_point *dir, fz_point *a, fz_point *b) | |
| 275 { | |
| 276 float dx = b->x - a->x; | |
| 277 float dy = b->y - a->y; | |
| 278 return fz_abs(dx * dir->x + dy * dir->y); | |
| 279 } | |
| 280 | |
| 281 | |
| 282 static float vdist(fz_point *dir, fz_point *a, fz_point *b) | |
| 283 { | |
| 284 float dx = b->x - a->x; | |
| 285 float dy = b->y - a->y; | |
| 286 return fz_abs(dx * dir->y + dy * dir->x); | |
| 287 } | |
| 288 | |
| 289 | |
| 290 struct highlight | |
| 291 { | |
| 292 Py_ssize_t len; | |
| 293 PyObject *quads; | |
| 294 float hfuzz, vfuzz; | |
| 295 }; | |
| 296 | |
| 297 | |
| 298 static void on_highlight_char(fz_context *ctx, void *arg, fz_stext_line *line, fz_stext_char *ch) | |
| 299 { | |
| 300 struct highlight *hits = arg; | |
| 301 float vfuzz = ch->size * hits->vfuzz; | |
| 302 float hfuzz = ch->size * hits->hfuzz; | |
| 303 fz_quad ch_quad = JM_char_quad(ctx, line, ch); | |
| 304 if (hits->len > 0) { | |
| 305 PyObject *quad = PySequence_ITEM(hits->quads, hits->len - 1); | |
| 306 fz_quad end = JM_quad_from_py(quad); | |
| 307 Py_DECREF(quad); | |
| 308 if (hdist(&line->dir, &end.lr, &ch_quad.ll) < hfuzz | |
| 309 && vdist(&line->dir, &end.lr, &ch_quad.ll) < vfuzz | |
| 310 && hdist(&line->dir, &end.ur, &ch_quad.ul) < hfuzz | |
| 311 && vdist(&line->dir, &end.ur, &ch_quad.ul) < vfuzz) | |
| 312 { | |
| 313 end.ur = ch_quad.ur; | |
| 314 end.lr = ch_quad.lr; | |
| 315 quad = JM_py_from_quad(end); | |
| 316 PyList_SetItem(hits->quads, hits->len - 1, quad); | |
| 317 return; | |
| 318 } | |
| 319 } | |
| 320 LIST_APPEND_DROP(hits->quads, JM_py_from_quad(ch_quad)); | |
| 321 hits->len++; | |
| 322 } | |
| 323 | |
| 324 | |
| 325 static inline int canon(int c) | |
| 326 { | |
| 327 /* TODO: proper unicode case folding */ | |
| 328 /* TODO: character equivalence (a matches รค, etc) */ | |
| 329 if (c == 0xA0 || c == 0x2028 || c == 0x2029) | |
| 330 return ' '; | |
| 331 if (c == '\r' || c == '\n' || c == '\t') | |
| 332 return ' '; | |
| 333 if (c >= 'A' && c <= 'Z') | |
| 334 return c - 'A' + 'a'; | |
| 335 return c; | |
| 336 } | |
| 337 | |
| 338 | |
| 339 static inline int chartocanon(int *c, const char *s) | |
| 340 { | |
| 341 int n = fz_chartorune(c, s); | |
| 342 *c = canon(*c); | |
| 343 return n; | |
| 344 } | |
| 345 | |
| 346 | |
| 347 static const char *match_string(const char *h, const char *n) | |
| 348 { | |
| 349 int hc, nc; | |
| 350 const char *e = h; | |
| 351 h += chartocanon(&hc, h); | |
| 352 n += chartocanon(&nc, n); | |
| 353 while (hc == nc) | |
| 354 { | |
| 355 e = h; | |
| 356 if (hc == ' ') | |
| 357 do | |
| 358 h += chartocanon(&hc, h); | |
| 359 while (hc == ' '); | |
| 360 else | |
| 361 h += chartocanon(&hc, h); | |
| 362 if (nc == ' ') | |
| 363 do | |
| 364 n += chartocanon(&nc, n); | |
| 365 while (nc == ' '); | |
| 366 else | |
| 367 n += chartocanon(&nc, n); | |
| 368 } | |
| 369 return nc == 0 ? e : NULL; | |
| 370 } | |
| 371 | |
| 372 | |
| 373 static const char *find_string(const char *s, const char *needle, const char **endp) | |
| 374 { | |
| 375 const char *end; | |
| 376 while (*s) | |
| 377 { | |
| 378 end = match_string(s, needle); | |
| 379 if (end) | |
| 380 return *endp = end, s; | |
| 381 ++s; | |
| 382 } | |
| 383 return *endp = NULL, NULL; | |
| 384 } | |
| 385 | |
| 386 | |
| 387 PyObject * | |
| 388 JM_search_stext_page(fz_context *ctx, fz_stext_page *page, const char *needle) | |
| 389 { | |
| 390 struct highlight hits; | |
| 391 fz_stext_block *block; | |
| 392 fz_stext_line *line; | |
| 393 fz_stext_char *ch; | |
| 394 fz_buffer *buffer = NULL; | |
| 395 const char *haystack, *begin, *end; | |
| 396 fz_rect rect = page->mediabox; | |
| 397 int c, inside; | |
| 398 | |
| 399 if (strlen(needle) == 0) Py_RETURN_NONE; | |
| 400 PyObject *quads = PyList_New(0); | |
| 401 hits.len = 0; | |
| 402 hits.quads = quads; | |
| 403 hits.hfuzz = 0.2f; /* merge kerns but not large gaps */ | |
| 404 hits.vfuzz = 0.1f; | |
| 405 | |
| 406 fz_try(ctx) { | |
| 407 buffer = JM_new_buffer_from_stext_page(ctx, page); | |
| 408 haystack = fz_string_from_buffer(ctx, buffer); | |
| 409 begin = find_string(haystack, needle, &end); | |
| 410 if (!begin) goto no_more_matches; | |
| 411 | |
| 412 inside = 0; | |
| 413 for (block = page->first_block; block; block = block->next) { | |
| 414 if (block->type != FZ_STEXT_BLOCK_TEXT) { | |
| 415 continue; | |
| 416 } | |
| 417 for (line = block->u.t.first_line; line; line = line->next) { | |
| 418 for (ch = line->first_char; ch; ch = ch->next) { | |
| 419 if (!fz_is_infinite_rect(rect) && | |
| 420 !JM_rects_overlap(rect, JM_char_bbox(ctx, line, ch))) { | |
| 421 goto next_char; | |
| 422 } | |
| 423 try_new_match: | |
| 424 if (!inside) { | |
| 425 if (haystack >= begin) inside = 1; | |
| 426 } | |
| 427 if (inside) { | |
| 428 if (haystack < end) { | |
| 429 on_highlight_char(ctx, &hits, line, ch); | |
| 430 } else { | |
| 431 inside = 0; | |
| 432 begin = find_string(haystack, needle, &end); | |
| 433 if (!begin) goto no_more_matches; | |
| 434 else goto try_new_match; | |
| 435 } | |
| 436 } | |
| 437 haystack += fz_chartorune(&c, haystack); | |
| 438 next_char:; | |
| 439 } | |
| 440 assert(*haystack == '\n'); | |
| 441 ++haystack; | |
| 442 } | |
| 443 assert(*haystack == '\n'); | |
| 444 ++haystack; | |
| 445 } | |
| 446 no_more_matches:; | |
| 447 } | |
| 448 fz_always(ctx) | |
| 449 fz_drop_buffer(ctx, buffer); | |
| 450 fz_catch(ctx) | |
| 451 fz_rethrow(ctx); | |
| 452 | |
| 453 return quads; | |
| 454 } | |
| 455 | |
| 456 | |
| 457 //----------------------------------------------------------------------------- | |
| 458 // Plain text output. An identical copy of fz_print_stext_page_as_text, | |
| 459 // but lines within a block are concatenated by space instead a new-line | |
| 460 // character (which else leads to 2 new-lines). | |
| 461 //----------------------------------------------------------------------------- | |
| 462 void | |
| 463 JM_print_stext_page_as_text(fz_context *ctx, fz_buffer *buff, fz_stext_page *page) | |
| 464 { | |
| 465 fz_stext_block *block; | |
| 466 fz_stext_line *line; | |
| 467 fz_stext_char *ch; | |
| 468 fz_rect rect = page->mediabox; | |
| 469 fz_rect chbbox; | |
| 470 int last_char = 0; | |
| 471 char utf[10]; | |
| 472 int i, n; | |
| 473 | |
| 474 for (block = page->first_block; block; block = block->next) { | |
| 475 if (block->type == FZ_STEXT_BLOCK_TEXT) { | |
| 476 for (line = block->u.t.first_line; line; line = line->next) { | |
| 477 last_char = 0; | |
| 478 for (ch = line->first_char; ch; ch = ch->next) { | |
| 479 chbbox = JM_char_bbox(ctx, line, ch); | |
| 480 if (fz_is_infinite_rect(rect) || | |
| 481 JM_rects_overlap(rect, chbbox)) { | |
| 482 last_char = ch->c; | |
| 483 JM_append_rune(ctx, buff, ch->c); | |
| 484 } | |
| 485 } | |
| 486 if (last_char != 10 && last_char > 0) { | |
| 487 fz_append_string(ctx, buff, "\n"); | |
| 488 } | |
| 489 } | |
| 490 } | |
| 491 } | |
| 492 } | |
| 493 | |
| 494 //----------------------------------------------------------------------------- | |
| 495 // Functions for wordlist output | |
| 496 //----------------------------------------------------------------------------- | |
| 497 int JM_append_word(fz_context *ctx, PyObject *lines, fz_buffer *buff, fz_rect *wbbox, | |
| 498 int block_n, int line_n, int word_n) | |
| 499 { | |
| 500 PyObject *s = JM_EscapeStrFromBuffer(ctx, buff); | |
| 501 PyObject *litem = Py_BuildValue("ffffOiii", | |
| 502 wbbox->x0, | |
| 503 wbbox->y0, | |
| 504 wbbox->x1, | |
| 505 wbbox->y1, | |
| 506 s, | |
| 507 block_n, line_n, word_n); | |
| 508 LIST_APPEND_DROP(lines, litem); | |
| 509 Py_DECREF(s); | |
| 510 *wbbox = fz_empty_rect; | |
| 511 return word_n + 1; // word counter | |
| 512 } | |
| 513 | |
| 514 //----------------------------------------------------------------------------- | |
| 515 // Functions for dictionary output | |
| 516 //----------------------------------------------------------------------------- | |
| 517 | |
| 518 static int detect_super_script(fz_stext_line *line, fz_stext_char *ch) | |
| 519 { | |
| 520 if (line->wmode == 0 && line->dir.x == 1 && line->dir.y == 0) | |
| 521 return ch->origin.y < line->first_char->origin.y - ch->size * 0.1f; | |
| 522 return 0; | |
| 523 } | |
| 524 | |
| 525 static int JM_char_font_flags(fz_context *ctx, fz_font *font, fz_stext_line *line, fz_stext_char *ch) | |
| 526 { | |
| 527 int flags = detect_super_script(line, ch); | |
| 528 flags += fz_font_is_italic(ctx, font) * TEXT_FONT_ITALIC; | |
| 529 flags += fz_font_is_serif(ctx, font) * TEXT_FONT_SERIFED; | |
| 530 flags += fz_font_is_monospaced(ctx, font) * TEXT_FONT_MONOSPACED; | |
| 531 flags += fz_font_is_bold(ctx, font) * TEXT_FONT_BOLD; | |
| 532 return flags; | |
| 533 } | |
| 534 | |
| 535 static const char * | |
| 536 JM_font_name(fz_context *ctx, fz_font *font) | |
| 537 { | |
| 538 const char *name = fz_font_name(ctx, font); | |
| 539 const char *s = strchr(name, '+'); | |
| 540 if (subset_fontnames || s == NULL || s-name != 6) { | |
| 541 return name; | |
| 542 } | |
| 543 return s + 1; | |
| 544 } | |
| 545 | |
| 546 | |
| 547 static fz_rect | |
| 548 JM_make_spanlist(fz_context *ctx, PyObject *line_dict, | |
| 549 fz_stext_line *line, int raw, fz_buffer *buff, | |
| 550 fz_rect tp_rect) | |
| 551 { | |
| 552 PyObject *span = NULL, *char_list = NULL, *char_dict; | |
| 553 PyObject *span_list = PyList_New(0); | |
| 554 fz_clear_buffer(ctx, buff); | |
| 555 fz_stext_char *ch; | |
| 556 fz_rect span_rect = fz_empty_rect; | |
| 557 fz_rect line_rect = fz_empty_rect; | |
| 558 fz_point span_origin = {0, 0}; | |
| 559 typedef struct style_s { | |
| 560 float size; int flags; const char *font; int color; | |
| 561 float asc; float desc; | |
| 562 } char_style; | |
| 563 char_style old_style = { -1, -1, "", -1, 0, 0 }, style; | |
| 564 | |
| 565 for (ch = line->first_char; ch; ch = ch->next) { | |
| 566 fz_rect r = JM_char_bbox(ctx, line, ch); | |
| 567 if (!JM_rects_overlap(tp_rect, r) && | |
| 568 !fz_is_infinite_rect(tp_rect)) { | |
| 569 continue; | |
| 570 } | |
| 571 int flags = JM_char_font_flags(ctx, ch->font, line, ch); | |
| 572 fz_point origin = ch->origin; | |
| 573 style.size = ch->size; | |
| 574 style.flags = flags; | |
| 575 style.font = JM_font_name(ctx, ch->font); | |
| 576 style.color = ch->color; | |
| 577 style.asc = JM_font_ascender(ctx, ch->font); | |
| 578 style.desc = JM_font_descender(ctx, ch->font); | |
| 579 | |
| 580 if (style.size != old_style.size || | |
| 581 style.flags != old_style.flags || | |
| 582 style.color != old_style.color || | |
| 583 strcmp(style.font, old_style.font) != 0) { | |
| 584 | |
| 585 if (old_style.size >= 0) { | |
| 586 // not first one, output previous | |
| 587 if (raw) { | |
| 588 // put character list in the span | |
| 589 DICT_SETITEM_DROP(span, dictkey_chars, char_list); | |
| 590 char_list = NULL; | |
| 591 } else { | |
| 592 // put text string in the span | |
| 593 DICT_SETITEM_DROP(span, dictkey_text, JM_EscapeStrFromBuffer(ctx, buff)); | |
| 594 fz_clear_buffer(ctx, buff); | |
| 595 } | |
| 596 | |
| 597 DICT_SETITEM_DROP(span, dictkey_origin, | |
| 598 JM_py_from_point(span_origin)); | |
| 599 DICT_SETITEM_DROP(span, dictkey_bbox, | |
| 600 JM_py_from_rect(span_rect)); | |
| 601 line_rect = fz_union_rect(line_rect, span_rect); | |
| 602 LIST_APPEND_DROP(span_list, span); | |
| 603 span = NULL; | |
| 604 } | |
| 605 | |
| 606 span = PyDict_New(); | |
| 607 float asc = style.asc, desc = style.desc; | |
| 608 if (style.asc < 1e-3) { | |
| 609 asc = 0.9f; | |
| 610 desc = -0.1f; | |
| 611 } | |
| 612 | |
| 613 DICT_SETITEM_DROP(span, dictkey_size, Py_BuildValue("f", style.size)); | |
| 614 DICT_SETITEM_DROP(span, dictkey_flags, Py_BuildValue("i", style.flags)); | |
| 615 DICT_SETITEM_DROP(span, dictkey_font, JM_EscapeStrFromStr(style.font)); | |
| 616 DICT_SETITEM_DROP(span, dictkey_color, Py_BuildValue("i", style.color)); | |
| 617 DICT_SETITEMSTR_DROP(span, "ascender", Py_BuildValue("f", asc)); | |
| 618 DICT_SETITEMSTR_DROP(span, "descender", Py_BuildValue("f", desc)); | |
| 619 | |
| 620 old_style = style; | |
| 621 span_rect = r; | |
| 622 span_origin = origin; | |
| 623 | |
| 624 } | |
| 625 span_rect = fz_union_rect(span_rect, r); | |
| 626 | |
| 627 if (raw) { // make and append a char dict | |
| 628 char_dict = PyDict_New(); | |
| 629 DICT_SETITEM_DROP(char_dict, dictkey_origin, | |
| 630 JM_py_from_point(ch->origin)); | |
| 631 | |
| 632 DICT_SETITEM_DROP(char_dict, dictkey_bbox, | |
| 633 JM_py_from_rect(r)); | |
| 634 | |
| 635 DICT_SETITEM_DROP(char_dict, dictkey_c, | |
| 636 Py_BuildValue("C", ch->c)); | |
| 637 | |
| 638 if (!char_list) { | |
| 639 char_list = PyList_New(0); | |
| 640 } | |
| 641 LIST_APPEND_DROP(char_list, char_dict); | |
| 642 } else { // add character byte to buffer | |
| 643 JM_append_rune(ctx, buff, ch->c); | |
| 644 } | |
| 645 } | |
| 646 // all characters processed, now flush remaining span | |
| 647 if (span) { | |
| 648 if (raw) { | |
| 649 DICT_SETITEM_DROP(span, dictkey_chars, char_list); | |
| 650 char_list = NULL; | |
| 651 } else { | |
| 652 DICT_SETITEM_DROP(span, dictkey_text, JM_EscapeStrFromBuffer(ctx, buff)); | |
| 653 fz_clear_buffer(ctx, buff); | |
| 654 } | |
| 655 DICT_SETITEM_DROP(span, dictkey_origin, JM_py_from_point(span_origin)); | |
| 656 DICT_SETITEM_DROP(span, dictkey_bbox, JM_py_from_rect(span_rect)); | |
| 657 | |
| 658 if (!fz_is_empty_rect(span_rect)) { | |
| 659 LIST_APPEND_DROP(span_list, span); | |
| 660 line_rect = fz_union_rect(line_rect, span_rect); | |
| 661 } else { | |
| 662 Py_DECREF(span); | |
| 663 } | |
| 664 span = NULL; | |
| 665 } | |
| 666 if (!fz_is_empty_rect(line_rect)) { | |
| 667 DICT_SETITEM_DROP(line_dict, dictkey_spans, span_list); | |
| 668 } else { | |
| 669 DICT_SETITEM_DROP(line_dict, dictkey_spans, span_list); | |
| 670 } | |
| 671 return line_rect; | |
| 672 } | |
| 673 | |
| 674 static void JM_make_image_block(fz_context *ctx, fz_stext_block *block, PyObject *block_dict) | |
| 675 { | |
| 676 fz_image *image = block->u.i.image; | |
| 677 fz_buffer *buf = NULL, *freebuf = NULL; | |
| 678 fz_compressed_buffer *buffer = fz_compressed_image_buffer(ctx, image); | |
| 679 fz_var(buf); | |
| 680 fz_var(freebuf); | |
| 681 int n = fz_colorspace_n(ctx, image->colorspace); | |
| 682 int w = image->w; | |
| 683 int h = image->h; | |
| 684 const char *ext = NULL; | |
| 685 int type = FZ_IMAGE_UNKNOWN; | |
| 686 if (buffer) | |
| 687 type = buffer->params.type; | |
| 688 if (type < FZ_IMAGE_BMP || type == FZ_IMAGE_JBIG2) | |
| 689 type = FZ_IMAGE_UNKNOWN; | |
| 690 PyObject *bytes = NULL; | |
| 691 fz_var(bytes); | |
| 692 fz_try(ctx) { | |
| 693 if (buffer && type != FZ_IMAGE_UNKNOWN) { | |
| 694 buf = buffer->buffer; | |
| 695 ext = JM_image_extension(type); | |
| 696 } else { | |
| 697 buf = freebuf = fz_new_buffer_from_image_as_png(ctx, image, fz_default_color_params); | |
| 698 ext = "png"; | |
| 699 } | |
| 700 bytes = JM_BinFromBuffer(ctx, buf); | |
| 701 } | |
| 702 fz_always(ctx) { | |
| 703 if (!bytes) | |
| 704 bytes = JM_BinFromChar(""); | |
| 705 | |
| 706 DICT_SETITEM_DROP(block_dict, dictkey_width, | |
| 707 Py_BuildValue("i", w)); | |
| 708 DICT_SETITEM_DROP(block_dict, dictkey_height, | |
| 709 Py_BuildValue("i", h)); | |
| 710 DICT_SETITEM_DROP(block_dict, dictkey_ext, | |
| 711 Py_BuildValue("s", ext)); | |
| 712 DICT_SETITEM_DROP(block_dict, dictkey_colorspace, | |
| 713 Py_BuildValue("i", n)); | |
| 714 DICT_SETITEM_DROP(block_dict, dictkey_xres, | |
| 715 Py_BuildValue("i", image->xres)); | |
| 716 DICT_SETITEM_DROP(block_dict, dictkey_yres, | |
| 717 Py_BuildValue("i", image->xres)); | |
| 718 DICT_SETITEM_DROP(block_dict, dictkey_bpc, | |
| 719 Py_BuildValue("i", (int) image->bpc)); | |
| 720 DICT_SETITEM_DROP(block_dict, dictkey_matrix, | |
| 721 JM_py_from_matrix(block->u.i.transform)); | |
| 722 DICT_SETITEM_DROP(block_dict, dictkey_size, | |
| 723 Py_BuildValue("n", PyBytes_Size(bytes))); | |
| 724 DICT_SETITEM_DROP(block_dict, dictkey_image, bytes); | |
| 725 | |
| 726 fz_drop_buffer(ctx, freebuf); | |
| 727 } | |
| 728 fz_catch(ctx) {;} | |
| 729 return; | |
| 730 } | |
| 731 | |
| 732 static void JM_make_text_block(fz_context *ctx, fz_stext_block *block, PyObject *block_dict, int raw, fz_buffer *buff, fz_rect tp_rect) | |
| 733 { | |
| 734 fz_stext_line *line; | |
| 735 PyObject *line_list = PyList_New(0), *line_dict; | |
| 736 fz_rect block_rect = fz_empty_rect; | |
| 737 for (line = block->u.t.first_line; line; line = line->next) { | |
| 738 if (fz_is_empty_rect(fz_intersect_rect(tp_rect, line->bbox)) && | |
| 739 !fz_is_infinite_rect(tp_rect)) { | |
| 740 continue; | |
| 741 } | |
| 742 line_dict = PyDict_New(); | |
| 743 fz_rect line_rect = JM_make_spanlist(ctx, line_dict, line, raw, buff, tp_rect); | |
| 744 block_rect = fz_union_rect(block_rect, line_rect); | |
| 745 DICT_SETITEM_DROP(line_dict, dictkey_wmode, | |
| 746 Py_BuildValue("i", line->wmode)); | |
| 747 DICT_SETITEM_DROP(line_dict, dictkey_dir, JM_py_from_point(line->dir)); | |
| 748 DICT_SETITEM_DROP(line_dict, dictkey_bbox, | |
| 749 JM_py_from_rect(line_rect)); | |
| 750 LIST_APPEND_DROP(line_list, line_dict); | |
| 751 } | |
| 752 DICT_SETITEM_DROP(block_dict, dictkey_bbox, JM_py_from_rect(block_rect)); | |
| 753 DICT_SETITEM_DROP(block_dict, dictkey_lines, line_list); | |
| 754 return; | |
| 755 } | |
| 756 | |
| 757 void JM_make_textpage_dict(fz_context *ctx, fz_stext_page *tp, PyObject *page_dict, int raw) | |
| 758 { | |
| 759 fz_stext_block *block; | |
| 760 fz_buffer *text_buffer = fz_new_buffer(ctx, 128); | |
| 761 PyObject *block_dict, *block_list = PyList_New(0); | |
| 762 fz_rect tp_rect = tp->mediabox; | |
| 763 int block_n = -1; | |
| 764 for (block = tp->first_block; block; block = block->next) { | |
| 765 block_n++; | |
| 766 if (!fz_contains_rect(tp_rect, block->bbox) && | |
| 767 !fz_is_infinite_rect(tp_rect) && | |
| 768 block->type == FZ_STEXT_BLOCK_IMAGE) { | |
| 769 continue; | |
| 770 } | |
| 771 if (!fz_is_infinite_rect(tp_rect) && | |
| 772 fz_is_empty_rect(fz_intersect_rect(tp_rect, block->bbox))) { | |
| 773 continue; | |
| 774 } | |
| 775 | |
| 776 block_dict = PyDict_New(); | |
| 777 DICT_SETITEM_DROP(block_dict, dictkey_number, Py_BuildValue("i", block_n)); | |
| 778 DICT_SETITEM_DROP(block_dict, dictkey_type, Py_BuildValue("i", block->type)); | |
| 779 if (block->type == FZ_STEXT_BLOCK_IMAGE) { | |
| 780 DICT_SETITEM_DROP(block_dict, dictkey_bbox, JM_py_from_rect(block->bbox)); | |
| 781 JM_make_image_block(ctx, block, block_dict); | |
| 782 } else { | |
| 783 JM_make_text_block(ctx, block, block_dict, raw, text_buffer, tp_rect); | |
| 784 } | |
| 785 | |
| 786 LIST_APPEND_DROP(block_list, block_dict); | |
| 787 } | |
| 788 DICT_SETITEM_DROP(page_dict, dictkey_blocks, block_list); | |
| 789 fz_drop_buffer(ctx, text_buffer); | |
| 790 } | |
| 791 | |
| 792 | |
| 793 | |
| 794 //--------------------------------------------------------------------- | |
| 795 PyObject * | |
| 796 JM_copy_rectangle(fz_context *ctx, fz_stext_page *page, fz_rect area) | |
| 797 { | |
| 798 fz_stext_block *block; | |
| 799 fz_stext_line *line; | |
| 800 fz_stext_char *ch; | |
| 801 fz_buffer *buffer; | |
| 802 int need_new_line = 0; | |
| 803 PyObject *rc = NULL; | |
| 804 fz_try(ctx) { | |
| 805 buffer = fz_new_buffer(ctx, 1024); | |
| 806 for (block = page->first_block; block; block = block->next) { | |
| 807 if (block->type != FZ_STEXT_BLOCK_TEXT) | |
| 808 continue; | |
| 809 for (line = block->u.t.first_line; line; line = line->next) { | |
| 810 int line_had_text = 0; | |
| 811 for (ch = line->first_char; ch; ch = ch->next) { | |
| 812 fz_rect r = JM_char_bbox(ctx, line, ch); | |
| 813 if (JM_rects_overlap(area, r)) { | |
| 814 line_had_text = 1; | |
| 815 if (need_new_line) { | |
| 816 fz_append_string(ctx, buffer, "\n"); | |
| 817 need_new_line = 0; | |
| 818 } | |
| 819 JM_append_rune(ctx, buffer, ch->c); | |
| 820 } | |
| 821 } | |
| 822 if (line_had_text) | |
| 823 need_new_line = 1; | |
| 824 } | |
| 825 } | |
| 826 fz_terminate_buffer(ctx, buffer); | |
| 827 rc = JM_EscapeStrFromBuffer(ctx, buffer); | |
| 828 if (!rc) { | |
| 829 rc = EMPTY_STRING; | |
| 830 PyErr_Clear(); | |
| 831 } | |
| 832 } | |
| 833 fz_always(ctx) { | |
| 834 fz_drop_buffer(ctx, buffer); | |
| 835 } | |
| 836 fz_catch(ctx) { | |
| 837 fz_rethrow(ctx); | |
| 838 } | |
| 839 return rc; | |
| 840 } | |
| 841 //--------------------------------------------------------------------- | |
| 842 | |
| 843 | |
| 844 | |
| 845 | |
| 846 fz_buffer *JM_object_to_buffer(fz_context *ctx, pdf_obj *what, int compress, int ascii) | |
| 847 { | |
| 848 fz_buffer *res=NULL; | |
| 849 fz_output *out=NULL; | |
| 850 fz_try(ctx) { | |
| 851 res = fz_new_buffer(ctx, 512); | |
| 852 out = fz_new_output_with_buffer(ctx, res); | |
| 853 pdf_print_obj(ctx, out, what, compress, ascii); | |
| 854 } | |
| 855 fz_always(ctx) { | |
| 856 fz_drop_output(ctx, out); | |
| 857 } | |
| 858 fz_catch(ctx) { | |
| 859 fz_rethrow(ctx); | |
| 860 } | |
| 861 fz_terminate_buffer(ctx, res); | |
| 862 return res; | |
| 863 } | |
| 864 | |
| 865 //----------------------------------------------------------------------------- | |
| 866 // Merge the /Resources object created by a text pdf device into the page. | |
| 867 // The device may have created multiple /ExtGState/Alp? and /Font/F? objects. | |
| 868 // These need to be renamed (renumbered) to not overwrite existing page | |
| 869 // objects from previous executions. | |
| 870 // Returns the next available numbers n, m for objects /Alp<n>, /F<m>. | |
| 871 //----------------------------------------------------------------------------- | |
| 872 PyObject *JM_merge_resources(fz_context *ctx, pdf_page *page, pdf_obj *temp_res) | |
| 873 { | |
| 874 // page objects /Resources, /Resources/ExtGState, /Resources/Font | |
| 875 pdf_obj *resources = pdf_dict_get(ctx, page->obj, PDF_NAME(Resources)); | |
| 876 pdf_obj *main_extg = pdf_dict_get(ctx, resources, PDF_NAME(ExtGState)); | |
| 877 pdf_obj *main_fonts = pdf_dict_get(ctx, resources, PDF_NAME(Font)); | |
| 878 | |
| 879 // text pdf device objects /ExtGState, /Font | |
| 880 pdf_obj *temp_extg = pdf_dict_get(ctx, temp_res, PDF_NAME(ExtGState)); | |
| 881 pdf_obj *temp_fonts = pdf_dict_get(ctx, temp_res, PDF_NAME(Font)); | |
| 882 | |
| 883 | |
| 884 int max_alp = -1, max_fonts = -1, i, n; | |
| 885 char text[20]; | |
| 886 | |
| 887 // Handle /Alp objects | |
| 888 if (pdf_is_dict(ctx, temp_extg)) // any created at all? | |
| 889 { | |
| 890 n = pdf_dict_len(ctx, temp_extg); | |
| 891 if (pdf_is_dict(ctx, main_extg)) { // does page have /ExtGState yet? | |
| 892 for (i = 0; i < pdf_dict_len(ctx, main_extg); i++) { | |
| 893 // get highest number of objects named /Alpxxx | |
| 894 char *alp = (char *) pdf_to_name(ctx, pdf_dict_get_key(ctx, main_extg, i)); | |
| 895 if (strncmp(alp, "Alp", 3) != 0) continue; | |
| 896 int j = fz_atoi(alp + 3); | |
| 897 if (j > max_alp) max_alp = j; | |
| 898 } | |
| 899 } | |
| 900 else // create a /ExtGState for the page | |
| 901 main_extg = pdf_dict_put_dict(ctx, resources, PDF_NAME(ExtGState), n); | |
| 902 | |
| 903 max_alp += 1; | |
| 904 for (i = 0; i < n; i++) // copy over renumbered /Alp objects | |
| 905 { | |
| 906 char *alp = (char *) pdf_to_name(ctx, pdf_dict_get_key(ctx, temp_extg, i)); | |
| 907 int j = fz_atoi(alp + 3) + max_alp; | |
| 908 fz_snprintf(text, sizeof(text), "Alp%d", j); // new name | |
| 909 pdf_obj *val = pdf_dict_get_val(ctx, temp_extg, i); | |
| 910 pdf_dict_puts(ctx, main_extg, text, val); | |
| 911 } | |
| 912 } | |
| 913 | |
| 914 | |
| 915 if (pdf_is_dict(ctx, main_fonts)) { // has page any fonts yet? | |
| 916 for (i = 0; i < pdf_dict_len(ctx, main_fonts); i++) { // get max font number | |
| 917 char *font = (char *) pdf_to_name(ctx, pdf_dict_get_key(ctx, main_fonts, i)); | |
| 918 if (strncmp(font, "F", 1) != 0) continue; | |
| 919 int j = fz_atoi(font + 1); | |
| 920 if (j > max_fonts) max_fonts = j; | |
| 921 } | |
| 922 } | |
| 923 else // create a Resources/Font for the page | |
| 924 main_fonts = pdf_dict_put_dict(ctx, resources, PDF_NAME(Font), 2); | |
| 925 | |
| 926 max_fonts += 1; | |
| 927 for (i = 0; i < pdf_dict_len(ctx, temp_fonts); i++) { // copy renumbered fonts | |
| 928 char *font = (char *) pdf_to_name(ctx, pdf_dict_get_key(ctx, temp_fonts, i)); | |
| 929 int j = fz_atoi(font + 1) + max_fonts; | |
| 930 fz_snprintf(text, sizeof(text), "F%d", j); | |
| 931 pdf_obj *val = pdf_dict_get_val(ctx, temp_fonts, i); | |
| 932 pdf_dict_puts(ctx, main_fonts, text, val); | |
| 933 } | |
| 934 return Py_BuildValue("ii", max_alp, max_fonts); // next available numbers | |
| 935 } | |
| 936 | |
| 937 | |
| 938 //----------------------------------------------------------------------------- | |
| 939 // version of fz_show_string, which covers SMALL CAPS | |
| 940 //----------------------------------------------------------------------------- | |
| 941 fz_matrix | |
| 942 JM_show_string_cs(fz_context *ctx, fz_text *text, fz_font *user_font, fz_matrix trm, const char *s, | |
| 943 int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language) | |
| 944 { | |
| 945 fz_font *font=NULL; | |
| 946 int gid, ucs; | |
| 947 float adv; | |
| 948 | |
| 949 while (*s) | |
| 950 { | |
| 951 s += fz_chartorune(&ucs, s); | |
| 952 gid = fz_encode_character_sc(ctx, user_font, ucs); | |
| 953 if (gid == 0) { | |
| 954 gid = fz_encode_character_with_fallback(ctx, user_font, ucs, 0, language, &font); | |
| 955 } else { | |
| 956 font = user_font; | |
| 957 } | |
| 958 fz_show_glyph(ctx, text, font, trm, gid, ucs, wmode, bidi_level, markup_dir, language); | |
| 959 adv = fz_advance_glyph(ctx, font, gid, wmode); | |
| 960 if (wmode == 0) | |
| 961 trm = fz_pre_translate(trm, adv, 0); | |
| 962 else | |
| 963 trm = fz_pre_translate(trm, 0, -adv); | |
| 964 } | |
| 965 | |
| 966 return trm; | |
| 967 } | |
| 968 | |
| 969 | |
| 970 //----------------------------------------------------------------------------- | |
| 971 // version of fz_show_string, which also covers UCDN script | |
| 972 //----------------------------------------------------------------------------- | |
| 973 fz_matrix JM_show_string(fz_context *ctx, fz_text *text, fz_font *user_font, fz_matrix trm, const char *s, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language, int script) | |
| 974 { | |
| 975 fz_font *font; | |
| 976 int gid, ucs; | |
| 977 float adv; | |
| 978 | |
| 979 while (*s) { | |
| 980 s += fz_chartorune(&ucs, s); | |
| 981 gid = fz_encode_character_with_fallback(ctx, user_font, ucs, script, language, &font); | |
| 982 fz_show_glyph(ctx, text, font, trm, gid, ucs, wmode, bidi_level, markup_dir, language); | |
| 983 adv = fz_advance_glyph(ctx, font, gid, wmode); | |
| 984 if (wmode == 0) | |
| 985 trm = fz_pre_translate(trm, adv, 0); | |
| 986 else | |
| 987 trm = fz_pre_translate(trm, 0, -adv); | |
| 988 } | |
| 989 return trm; | |
| 990 } | |
| 991 | |
| 992 | |
| 993 //----------------------------------------------------------------------------- | |
| 994 // return a fz_font from a number of parameters | |
| 995 //----------------------------------------------------------------------------- | |
| 996 fz_font *JM_get_font(fz_context *ctx, | |
| 997 char *fontname, | |
| 998 char *fontfile, | |
| 999 PyObject *fontbuffer, | |
| 1000 int script, | |
| 1001 int lang, | |
| 1002 int ordering, | |
| 1003 int is_bold, | |
| 1004 int is_italic, | |
| 1005 int is_serif, | |
| 1006 int embed) | |
| 1007 { | |
| 1008 const unsigned char *data = NULL; | |
| 1009 int size, index=0; | |
| 1010 fz_buffer *res = NULL; | |
| 1011 fz_font *font = NULL; | |
| 1012 fz_try(ctx) { | |
| 1013 if (fontfile) goto have_file; | |
| 1014 if (EXISTS(fontbuffer)) goto have_buffer; | |
| 1015 if (ordering > -1) goto have_cjk; | |
| 1016 if (fontname) goto have_base14; | |
| 1017 goto have_noto; | |
| 1018 | |
| 1019 // Base-14 or a MuPDF builtin font | |
| 1020 have_base14:; | |
| 1021 font = fz_new_base14_font(ctx, fontname); | |
| 1022 if (font) { | |
| 1023 goto fertig; | |
| 1024 } | |
| 1025 font = fz_new_builtin_font(ctx, fontname, is_bold, is_italic); | |
| 1026 goto fertig; | |
| 1027 | |
| 1028 // CJK font | |
| 1029 have_cjk:; | |
| 1030 font = fz_new_cjk_font(ctx, ordering); | |
| 1031 goto fertig; | |
| 1032 | |
| 1033 // fontfile | |
| 1034 have_file:; | |
| 1035 font = fz_new_font_from_file(ctx, NULL, fontfile, index, 0); | |
| 1036 goto fertig; | |
| 1037 | |
| 1038 // fontbuffer | |
| 1039 have_buffer:; | |
| 1040 res = JM_BufferFromBytes(ctx, fontbuffer); | |
| 1041 font = fz_new_font_from_buffer(ctx, NULL, res, index, 0); | |
| 1042 goto fertig; | |
| 1043 | |
| 1044 // Check for NOTO font | |
| 1045 have_noto:; | |
| 1046 data = fz_lookup_noto_font(ctx, script, lang, &size, &index); | |
| 1047 if (data) font = fz_new_font_from_memory(ctx, NULL, data, size, index, 0); | |
| 1048 if (font) goto fertig; | |
| 1049 font = fz_load_fallback_font(ctx, script, lang, is_serif, is_bold, is_italic); | |
| 1050 goto fertig; | |
| 1051 | |
| 1052 fertig:; | |
| 1053 if (!font) { | |
| 1054 RAISEPY(ctx, MSG_FONT_FAILED, PyExc_RuntimeError); | |
| 1055 } | |
| 1056 #if FZ_VERSION_MAJOR == 1 && FZ_VERSION_MINOR >= 22 | |
| 1057 // if font allows this, set embedding | |
| 1058 if (!font->flags.never_embed) { | |
| 1059 fz_set_font_embedding(ctx, font, embed); | |
| 1060 } | |
| 1061 #endif | |
| 1062 } | |
| 1063 fz_always(ctx) { | |
| 1064 fz_drop_buffer(ctx, res); | |
| 1065 } | |
| 1066 fz_catch(ctx) { | |
| 1067 fz_rethrow(ctx); | |
| 1068 } | |
| 1069 return font; | |
| 1070 } | |
| 1071 | |
| 1072 %} |
