comparison src/extra.i @ 1:1d09e1dec1d9 upstream

ADD: PyMuPDF v1.26.4: the original sdist. It does not yet contain MuPDF. This normally will be downloaded when building PyMuPDF.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:37:51 +0200
parents
children a6bc019ac0b2
comparison
equal deleted inserted replaced
-1:000000000000 1:1d09e1dec1d9
1 %module fitz_extra
2
3 %pythoncode %{
4 # pylint: disable=all
5 %}
6
7 %begin
8 %{
9 #define SWIG_PYTHON_INTERPRETER_NO_DEBUG
10
11 /* This seems to be necessary on some Windows machines with Py_LIMITED_API,
12 otherwise compilation can fail because free() and malloc() are not declared. */
13 #include <stdlib.h>
14 %}
15
16 %init
17 %{
18 /* Initialise some globals that require Python functions.
19
20 [Prior to 2023-08-18 we initialised these global variables inline,
21 but this causes a SEGV on Windows with Python-3.10 for `dictkey_c`
22 (actually any string of length 1 failed).] */
23
24 dictkey_align = PyUnicode_InternFromString("align");
25 dictkey_ascender = PyUnicode_InternFromString("ascender");
26 dictkey_bidi = PyUnicode_InternFromString("bidi");
27 dictkey_bbox = PyUnicode_InternFromString("bbox");
28 dictkey_blocks = PyUnicode_InternFromString("blocks");
29 dictkey_bpc = PyUnicode_InternFromString("bpc");
30 dictkey_c = PyUnicode_InternFromString("c");
31 dictkey_chars = PyUnicode_InternFromString("chars");
32 dictkey_color = PyUnicode_InternFromString("color");
33 dictkey_colorspace = PyUnicode_InternFromString("colorspace");
34 dictkey_content = PyUnicode_InternFromString("content");
35 dictkey_creationDate = PyUnicode_InternFromString("creationDate");
36 dictkey_cs_name = PyUnicode_InternFromString("cs-name");
37 dictkey_da = PyUnicode_InternFromString("da");
38 dictkey_dashes = PyUnicode_InternFromString("dashes");
39 dictkey_desc = PyUnicode_InternFromString("descender");
40 dictkey_descender = PyUnicode_InternFromString("descender");
41 dictkey_dir = PyUnicode_InternFromString("dir");
42 dictkey_effect = PyUnicode_InternFromString("effect");
43 dictkey_ext = PyUnicode_InternFromString("ext");
44 dictkey_filename = PyUnicode_InternFromString("filename");
45 dictkey_fill = PyUnicode_InternFromString("fill");
46 dictkey_flags = PyUnicode_InternFromString("flags");
47 dictkey_char_flags = PyUnicode_InternFromString("char_flags"); /* Only used with mupdf >= 1.25.2. */
48 dictkey_font = PyUnicode_InternFromString("font");
49 dictkey_glyph = PyUnicode_InternFromString("glyph");
50 dictkey_height = PyUnicode_InternFromString("height");
51 dictkey_id = PyUnicode_InternFromString("id");
52 dictkey_image = PyUnicode_InternFromString("image");
53 dictkey_items = PyUnicode_InternFromString("items");
54 dictkey_length = PyUnicode_InternFromString("length");
55 dictkey_lines = PyUnicode_InternFromString("lines");
56 dictkey_matrix = PyUnicode_InternFromString("transform");
57 dictkey_modDate = PyUnicode_InternFromString("modDate");
58 dictkey_name = PyUnicode_InternFromString("name");
59 dictkey_number = PyUnicode_InternFromString("number");
60 dictkey_origin = PyUnicode_InternFromString("origin");
61 dictkey_rect = PyUnicode_InternFromString("rect");
62 dictkey_size = PyUnicode_InternFromString("size");
63 dictkey_smask = PyUnicode_InternFromString("smask");
64 dictkey_spans = PyUnicode_InternFromString("spans");
65 dictkey_stroke = PyUnicode_InternFromString("stroke");
66 dictkey_style = PyUnicode_InternFromString("style");
67 dictkey_subject = PyUnicode_InternFromString("subject");
68 dictkey_text = PyUnicode_InternFromString("text");
69 dictkey_title = PyUnicode_InternFromString("title");
70 dictkey_type = PyUnicode_InternFromString("type");
71 dictkey_ufilename = PyUnicode_InternFromString("ufilename");
72 dictkey_width = PyUnicode_InternFromString("width");
73 dictkey_wmode = PyUnicode_InternFromString("wmode");
74 dictkey_xref = PyUnicode_InternFromString("xref");
75 dictkey_xres = PyUnicode_InternFromString("xres");
76 dictkey_yres = PyUnicode_InternFromString("yres");
77 %}
78
79 %include std_string.i
80
81 %include exception.i
82 %exception {
83 try {
84 $action
85 }
86
87 /* this might not be ok on windows.
88 catch (Swig::DirectorException &e) {
89 SWIG_fail;
90 }*/
91 catch(std::exception& e) {
92 SWIG_exception(SWIG_RuntimeError, e.what());
93 }
94 catch(...) {
95 SWIG_exception(SWIG_RuntimeError, "Unknown exception");
96 }
97 }
98
99 %{
100 #include "mupdf/classes2.h"
101 #include "mupdf/exceptions.h"
102 #include "mupdf/internal.h"
103
104 #include <algorithm>
105 #include <float.h>
106
107
108 #define MAKE_MUPDF_VERSION_INT(major, minor, patch) ((major << 16) + (minor << 8) + (patch << 0))
109
110 #define MUPDF_VERSION_INT MAKE_MUPDF_VERSION_INT(FZ_VERSION_MAJOR, FZ_VERSION_MINOR, FZ_VERSION_PATCH)
111
112 #define MUPDF_VERSION_GE(major, minor, patch) \
113 MUPDF_VERSION_INT >= MAKE_MUPDF_VERSION_INT(major, minor, patch)
114
115 /* Define a wrapper for PDF_NAME that returns a mupdf::PdfObj instead of a
116 pdf_obj*. This avoids implicit construction of a mupdf::PdfObj, which is
117 deliberately prohibited (with `explicit` on constructors) by recent MuPDF. */
118 #define PDF_NAME2(X) mupdf::PdfObj(PDF_NAME(X))
119
120 /* Returns equivalent of `repr(x)`. */
121 static std::string repr(PyObject* x)
122 {
123 PyObject* repr = PyObject_Repr(x);
124 PyObject* repr_str = PyUnicode_AsEncodedString(repr, "utf-8", "~E~");
125 #ifdef Py_LIMITED_API
126 const char* repr_str_s = PyBytes_AsString(repr_str);
127 #else
128 const char* repr_str_s = PyBytes_AS_STRING(repr_str);
129 #endif
130 std::string ret = repr_str_s;
131 Py_DECREF(repr_str);
132 Py_DECREF(repr);
133 return ret;
134 }
135
136 #ifdef Py_LIMITED_API
137 static PyObject* PySequence_ITEM(PyObject* o, Py_ssize_t i)
138 {
139 return PySequence_GetItem(o, i);
140 }
141
142 static const char* PyUnicode_AsUTF8(PyObject* o)
143 {
144 static PyObject* string = nullptr;
145 Py_XDECREF(string);
146 string = PyUnicode_AsUTF8String(o);
147 return PyBytes_AsString(string);
148 }
149 #endif
150
151
152 /* These are also in pymupdf/__init__.py. */
153 const char MSG_BAD_ANNOT_TYPE[] = "bad annot type";
154 const char MSG_BAD_APN[] = "bad or missing annot AP/N";
155 const char MSG_BAD_ARG_INK_ANNOT[] = "arg must be seq of seq of float pairs";
156 const char MSG_BAD_ARG_POINTS[] = "bad seq of points";
157 const char MSG_BAD_BUFFER[] = "bad type: 'buffer'";
158 const char MSG_BAD_COLOR_SEQ[] = "bad color sequence";
159 const char MSG_BAD_DOCUMENT[] = "cannot open broken document";
160 const char MSG_BAD_FILETYPE[] = "bad filetype";
161 const char MSG_BAD_LOCATION[] = "bad location";
162 const char MSG_BAD_OC_CONFIG[] = "bad config number";
163 const char MSG_BAD_OC_LAYER[] = "bad layer number";
164 const char MSG_BAD_OC_REF[] = "bad 'oc' reference";
165 const char MSG_BAD_PAGEID[] = "bad page id";
166 const char MSG_BAD_PAGENO[] = "bad page number(s)";
167 const char MSG_BAD_PDFROOT[] = "PDF has no root";
168 const char MSG_BAD_RECT[] = "rect is infinite or empty";
169 const char MSG_BAD_TEXT[] = "bad type: 'text'";
170 const char MSG_BAD_XREF[] = "bad xref";
171 const char MSG_COLOR_COUNT_FAILED[] = "color count failed";
172 const char MSG_FILE_OR_BUFFER[] = "need font file or buffer";
173 const char MSG_FONT_FAILED[] = "cannot create font";
174 const char MSG_IS_NO_ANNOT[] = "is no annotation";
175 const char MSG_IS_NO_IMAGE[] = "is no image";
176 const char MSG_IS_NO_PDF[] = "is no PDF";
177 const char MSG_IS_NO_DICT[] = "object is no PDF dict";
178 const char MSG_PIX_NOALPHA[] = "source pixmap has no alpha";
179 const char MSG_PIXEL_OUTSIDE[] = "pixel(s) outside image";
180
181 #define JM_BOOL(x) PyBool_FromLong((long) (x))
182
183 static PyObject *JM_UnicodeFromStr(const char *c);
184
185
186 #ifdef _WIN32
187
188 /* These functions are not provided on Windows. */
189
190 int vasprintf(char** str, const char* fmt, va_list ap)
191 {
192 va_list ap2;
193
194 va_copy(ap2, ap);
195 int len = vsnprintf(nullptr, 0, fmt, ap2);
196 va_end(ap2);
197
198 char* buffer = (char*) malloc(len + 1);
199 if (!buffer)
200 {
201 *str = nullptr;
202 return -1;
203 }
204 va_copy(ap2, ap);
205 int len2 = vsnprintf(buffer, len + 1, fmt, ap2);
206 va_end(ap2);
207 assert(len2 == len);
208 *str = buffer;
209 return len;
210 }
211
212 int asprintf(char** str, const char* fmt, ...)
213 {
214 va_list ap;
215 va_start(ap, fmt);
216 int ret = vasprintf(str, fmt, ap);
217 va_end(ap);
218
219 return ret;
220 }
221 #endif
222
223
224 static void messagev(const char* format, va_list va)
225 {
226 static PyObject* pymupdf_module = PyImport_ImportModule("pymupdf");
227 static PyObject* message_fn = PyObject_GetAttrString(pymupdf_module, "message");
228 char* text;
229 vasprintf(&text, format, va);
230 PyObject* text_py = PyString_FromString(text);
231 PyObject* args = PyTuple_Pack(1, text_py);
232 PyObject* ret = PyObject_CallObject(message_fn, args);
233 Py_XDECREF(ret);
234 Py_XDECREF(args);
235 Py_XDECREF(text_py);
236 free(text);
237 }
238
239 static void messagef(const char* format, ...)
240 {
241 va_list args;
242 va_start(args, format);
243 messagev(format, args);
244 va_end(args);
245 }
246
247 PyObject* JM_EscapeStrFromStr(const char* c)
248 {
249 if (!c) return PyUnicode_FromString("");
250 PyObject* val = PyUnicode_DecodeRawUnicodeEscape(c, (Py_ssize_t) strlen(c), "replace");
251 if (!val)
252 {
253 val = PyUnicode_FromString("");
254 PyErr_Clear();
255 }
256 return val;
257 }
258
259 PyObject* JM_EscapeStrFromBuffer(fz_buffer* buff)
260 {
261 if (!buff) return PyUnicode_FromString("");
262 unsigned char* s = nullptr;
263 size_t len = mupdf::ll_fz_buffer_storage(buff, &s);
264 PyObject* val = PyUnicode_DecodeRawUnicodeEscape((const char*) s, (Py_ssize_t) len, "replace");
265 if (!val)
266 {
267 val = PyUnicode_FromString("");
268 PyErr_Clear();
269 }
270 return val;
271 }
272
273 //----------------------------------------------------------------------------
274 // Deep-copies a source page to the target.
275 // Modified version of function of pdfmerge.c: we also copy annotations, but
276 // we skip some subtypes. In addition we rotate output.
277 //----------------------------------------------------------------------------
278 static void page_merge(
279 mupdf::PdfDocument& doc_des,
280 mupdf::PdfDocument& doc_src,
281 int page_from,
282 int page_to,
283 int rotate,
284 int links,
285 int copy_annots,
286 mupdf::PdfGraftMap& graft_map
287 )
288 {
289 // list of object types (per page) we want to copy
290
291 /* Fixme: on linux these get destructed /after/
292 mupdf/platform/c++/implementation/internal.cpp:s_thread_state, which causes
293 problems - s_thread_state::m_ctx will have been freed. We have a hack
294 that sets s_thread_state::m_ctx when destructed, so it mostly works when
295 s_thread_state.get_context() is called after destruction, but this causes
296 memento leaks and is clearly incorrect.
297
298 Perhaps we could use pdf_obj* known_page_objs[] = {...} and create PdfObj
299 wrappers as used - this would avoid any cleanup at exit. And it's a general
300 solution to problem of ordering of cleanup of globals.
301 */
302 static pdf_obj* known_page_objs[] = {
303 PDF_NAME(Contents),
304 PDF_NAME(Resources),
305 PDF_NAME(MediaBox),
306 PDF_NAME(CropBox),
307 PDF_NAME(BleedBox),
308 PDF_NAME(TrimBox),
309 PDF_NAME(ArtBox),
310 PDF_NAME(Rotate),
311 PDF_NAME(UserUnit)
312 };
313 int known_page_objs_num = sizeof(known_page_objs) / sizeof(known_page_objs[0]);
314 mupdf::PdfObj page_ref = mupdf::pdf_lookup_page_obj(doc_src, page_from);
315
316 // make new page dict in dest doc
317 mupdf::PdfObj page_dict = mupdf::pdf_new_dict(doc_des, 4);
318 mupdf::pdf_dict_put(page_dict, PDF_NAME2(Type), PDF_NAME2(Page));
319
320 for (int i = 0; i < known_page_objs_num; ++i)
321 {
322 mupdf::PdfObj known_page_obj(known_page_objs[i]);
323 mupdf::PdfObj obj = mupdf::pdf_dict_get_inheritable(page_ref, known_page_obj);
324 if (obj.m_internal)
325 {
326 mupdf::pdf_dict_put(
327 page_dict,
328 known_page_obj,
329 mupdf::pdf_graft_mapped_object(graft_map, obj)
330 );
331 }
332 }
333
334 // Copy annotations, but skip Link, Popup, IRT, Widget types
335 // If selected, remove dict keys P (parent) and Popup
336 if (copy_annots)
337 {
338 mupdf::PdfObj old_annots = mupdf::pdf_dict_get(page_ref, PDF_NAME2(Annots));
339 int n = mupdf::pdf_array_len(old_annots);
340 if (n > 0)
341 {
342 mupdf::PdfObj new_annots = mupdf::pdf_dict_put_array(page_dict, PDF_NAME2(Annots), n);
343 for (int i = 0; i < n; i++)
344 {
345 mupdf::PdfObj o = mupdf::pdf_array_get(old_annots, i);
346 if (!o.m_internal || !mupdf::pdf_is_dict(o)) // skip non-dict items
347 {
348 continue; // skip invalid/null/non-dict items
349 }
350 if (mupdf::pdf_dict_get(o, PDF_NAME2(IRT)).m_internal) continue;
351 mupdf::PdfObj subtype = mupdf::pdf_dict_get(o, PDF_NAME2(Subtype));
352 if (mupdf::pdf_name_eq(subtype, PDF_NAME2(Link))) continue;
353 if (mupdf::pdf_name_eq(subtype, PDF_NAME2(Popup))) continue;
354 if (mupdf::pdf_name_eq(subtype, PDF_NAME2(Widget))) continue;
355 mupdf::pdf_dict_del(o, PDF_NAME2(Popup));
356 mupdf::pdf_dict_del(o, PDF_NAME2(P));
357 mupdf::PdfObj copy_o = mupdf::pdf_graft_mapped_object(graft_map, o);
358 mupdf::PdfObj annot = mupdf::pdf_new_indirect(
359 doc_des,
360 mupdf::pdf_to_num(copy_o),
361 0
362 );
363 mupdf::pdf_array_push(new_annots, annot);
364 }
365 }
366 }
367 // rotate the page
368 if (rotate != -1)
369 {
370 mupdf::pdf_dict_put_int(page_dict, PDF_NAME2(Rotate), rotate);
371 }
372 // Now add the page dictionary to dest PDF
373 mupdf::PdfObj ref = mupdf::pdf_add_object(doc_des, page_dict);
374
375 // Insert new page at specified location
376 mupdf::pdf_insert_page(doc_des, page_to, ref);
377 }
378
379 //-----------------------------------------------------------------------------
380 // Copy a range of pages (spage, epage) from a source PDF to a specified
381 // location (apage) of the target PDF.
382 // If spage > epage, the sequence of source pages is reversed.
383 //-----------------------------------------------------------------------------
384 static void JM_merge_range(
385 mupdf::PdfDocument& doc_des,
386 mupdf::PdfDocument& doc_src,
387 int spage,
388 int epage,
389 int apage,
390 int rotate,
391 int links,
392 int annots,
393 int show_progress,
394 mupdf::PdfGraftMap& graft_map
395 )
396 {
397 int afterpage = apage;
398 int counter = 0; // copied pages counter
399 int total = mupdf::ll_fz_absi(epage - spage) + 1; // total pages to copy
400
401 if (spage < epage)
402 {
403 for (int page = spage; page <= epage; page++, afterpage++)
404 {
405 page_merge(doc_des, doc_src, page, afterpage, rotate, links, annots, graft_map);
406 counter++;
407 if (show_progress > 0 && counter % show_progress == 0)
408 {
409 messagef("Inserted %i of %i pages.", counter, total);
410 }
411 }
412 }
413 else
414 {
415 for (int page = spage; page >= epage; page--, afterpage++)
416 {
417 page_merge(doc_des, doc_src, page, afterpage, rotate, links, annots, graft_map);
418 counter++;
419 if (show_progress > 0 && counter % show_progress == 0)
420 {
421 messagef("Inserted %i of %i pages.", counter, total);
422 }
423 }
424 }
425 }
426
427 static bool JM_have_operation(mupdf::PdfDocument& pdf)
428 {
429 // Ensure valid journalling state
430 if (pdf.m_internal->journal and !mupdf::pdf_undoredo_step(pdf, 0))
431 {
432 return 0;
433 }
434 return 1;
435 }
436
437 static void JM_ensure_operation(mupdf::PdfDocument& pdf)
438 {
439 if (!JM_have_operation(pdf))
440 {
441 throw std::runtime_error("No journalling operation started");
442 }
443 }
444
445
446 static void FzDocument_insert_pdf(
447 mupdf::FzDocument& doc,
448 mupdf::FzDocument& src,
449 int from_page,
450 int to_page,
451 int start_at,
452 int rotate,
453 int links,
454 int annots,
455 int show_progress,
456 int final,
457 mupdf::PdfGraftMap& graft_map
458 )
459 {
460 //std::cerr << __FILE__ << ":" << __LINE__ << ":" << __FUNCTION__ << "\n";
461 mupdf::PdfDocument pdfout = mupdf::pdf_specifics(doc);
462 mupdf::PdfDocument pdfsrc = mupdf::pdf_specifics(src);
463 int outCount = mupdf::fz_count_pages(doc);
464 int srcCount = mupdf::fz_count_pages(src);
465
466 // local copies of page numbers
467 int fp = from_page;
468 int tp = to_page;
469 int sa = start_at;
470
471 // normalize page numbers
472 fp = std::max(fp, 0); // -1 = first page
473 fp = std::min(fp, srcCount - 1); // but do not exceed last page
474
475 if (tp < 0) tp = srcCount - 1; // -1 = last page
476 tp = std::min(tp, srcCount - 1); // but do not exceed last page
477
478 if (sa < 0) sa = outCount; // -1 = behind last page
479 sa = std::min(sa, outCount); // but that is also the limit
480
481 if (!pdfout.m_internal || !pdfsrc.m_internal)
482 {
483 throw std::runtime_error("source or target not a PDF");
484 }
485 JM_ensure_operation(pdfout);
486 JM_merge_range(pdfout, pdfsrc, fp, tp, sa, rotate, links, annots, show_progress, graft_map);
487 }
488
489 static int page_xref(mupdf::FzDocument& this_doc, int pno)
490 {
491 int page_count = mupdf::fz_count_pages(this_doc);
492 int n = pno;
493 while (n < 0)
494 {
495 n += page_count;
496 }
497 mupdf::PdfDocument pdf = mupdf::pdf_specifics(this_doc);
498 assert(pdf.m_internal);
499 int xref = 0;
500 if (n >= page_count)
501 {
502 throw std::runtime_error(MSG_BAD_PAGENO);//, PyExc_ValueError);
503 }
504 xref = mupdf::pdf_to_num(mupdf::pdf_lookup_page_obj(pdf, n));
505 return xref;
506 }
507
508 static void _newPage(mupdf::PdfDocument& pdf, int pno=-1, float width=595, float height=842)
509 {
510 if (!pdf.m_internal)
511 {
512 throw std::runtime_error("is no PDF");
513 }
514 mupdf::FzRect mediabox(0, 0, width, height);
515 if (pno < -1)
516 {
517 throw std::runtime_error("bad page number(s)"); // Should somehow be Python ValueError
518 }
519 JM_ensure_operation(pdf);
520 // create /Resources and /Contents objects
521 mupdf::PdfObj resources = mupdf::pdf_add_new_dict(pdf, 1);
522 mupdf::FzBuffer contents;
523 mupdf::PdfObj page_obj = mupdf::pdf_add_page(pdf, mediabox, 0, resources, contents);
524 mupdf::pdf_insert_page(pdf, pno, page_obj);
525 }
526
527 static void _newPage(mupdf::FzDocument& self, int pno=-1, float width=595, float height=842)
528 {
529 mupdf::PdfDocument pdf = mupdf::pdf_specifics(self);
530 _newPage(pdf, pno, width, height);
531 }
532
533
534 //------------------------------------------------------------------------
535 // return the annotation names (list of /NM entries)
536 //------------------------------------------------------------------------
537 static std::vector< std::string> JM_get_annot_id_list(mupdf::PdfPage& page)
538 {
539 std::vector< std::string> names;
540 mupdf::PdfObj annots = mupdf::pdf_dict_get(page.obj(), PDF_NAME2(Annots));
541 if (!annots.m_internal) return names;
542 int n = mupdf::pdf_array_len(annots);
543 for (int i = 0; i < n; i++)
544 {
545 mupdf::PdfObj annot_obj = mupdf::pdf_array_get(annots, i);
546 mupdf::PdfObj name = mupdf::pdf_dict_gets(annot_obj, "NM");
547 if (name.m_internal)
548 {
549 names.push_back(mupdf::pdf_to_text_string(name));
550 }
551 }
552 return names;
553 }
554
555
556 //------------------------------------------------------------------------
557 // Add a unique /NM key to an annotation or widget.
558 // Append a number to 'stem' such that the result is a unique name.
559 //------------------------------------------------------------------------
560 static void JM_add_annot_id(mupdf::PdfAnnot& annot, const char* stem)
561 {
562 mupdf::PdfPage page = mupdf::pdf_annot_page(annot);
563 mupdf::PdfObj annot_obj = mupdf::pdf_annot_obj(annot);
564 std::vector< std::string> names = JM_get_annot_id_list(page);
565 char* stem_id = nullptr;
566 for (int i=0; ; ++i)
567 {
568 free(stem_id);
569 asprintf(&stem_id, "fitz-%s%d", stem, i);
570 if (std::find(names.begin(), names.end(), stem_id) == names.end())
571 {
572 break;
573 }
574 }
575 mupdf::PdfObj name = mupdf::pdf_new_string(stem_id, strlen(stem_id));
576 free(stem_id);
577 mupdf::pdf_dict_puts(annot_obj, "NM", name);
578 page.m_internal->doc->resynth_required = 0;
579 }
580
581 //----------------------------------------------------------------
582 // page add_caret_annot
583 //----------------------------------------------------------------
584 static mupdf::PdfAnnot _add_caret_annot(mupdf::PdfPage& page, mupdf::FzPoint& point)
585 {
586 mupdf::PdfAnnot annot = mupdf::pdf_create_annot(page, ::PDF_ANNOT_CARET);
587 mupdf::FzPoint p = point;
588 mupdf::FzRect r = mupdf::pdf_annot_rect(annot);
589 r = mupdf::fz_make_rect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0);
590 mupdf::pdf_set_annot_rect(annot, r);
591 mupdf::pdf_update_annot(annot);
592 JM_add_annot_id(annot, "A");
593 return annot;
594 }
595
596 static mupdf::PdfAnnot _add_caret_annot(mupdf::FzPage& page, mupdf::FzPoint& point)
597 {
598 mupdf::PdfPage pdf_page = mupdf::pdf_page_from_fz_page(page);
599 return _add_caret_annot(pdf_page, point);
600 }
601
602 static const char* Tools_parse_da(mupdf::PdfAnnot& this_annot)
603 {
604 const char* da_str = nullptr;
605 mupdf::PdfObj this_annot_obj = mupdf::pdf_annot_obj(this_annot);
606 mupdf::PdfDocument pdf = mupdf::pdf_get_bound_document(this_annot_obj);
607 try
608 {
609 mupdf::PdfObj da = mupdf::pdf_dict_get_inheritable(this_annot_obj, PDF_NAME2(DA));
610 if (!da.m_internal)
611 {
612 mupdf::PdfObj trailer = mupdf::pdf_trailer(pdf);
613 da = mupdf::pdf_dict_getl(
614 &trailer,
615 PDF_NAME(Root),
616 PDF_NAME(AcroForm),
617 PDF_NAME(DA),
618 nullptr
619 );
620 }
621 da_str = mupdf::pdf_to_text_string(da);
622 }
623 catch (std::exception&)
624 {
625 return nullptr;
626 }
627 return da_str;
628 }
629
630 //----------------------------------------------------------------------------
631 // Turn fz_buffer into a Python bytes object
632 //----------------------------------------------------------------------------
633 static PyObject* JM_BinFromBuffer(fz_buffer* buffer)
634 {
635 if (!buffer)
636 {
637 return PyBytes_FromStringAndSize("", 0);
638 }
639 unsigned char* c = nullptr;
640 size_t len = mupdf::ll_fz_buffer_storage(buffer, &c);
641 return PyBytes_FromStringAndSize((const char*) c, len);
642 }
643 static PyObject* JM_BinFromBuffer(mupdf::FzBuffer& buffer)
644 {
645 return JM_BinFromBuffer( buffer.m_internal);
646 }
647
648 static PyObject* Annot_getAP(mupdf::PdfAnnot& annot)
649 {
650 mupdf::PdfObj annot_obj = mupdf::pdf_annot_obj(annot);
651 mupdf::PdfObj ap = mupdf::pdf_dict_getl(
652 &annot_obj,
653 PDF_NAME(AP),
654 PDF_NAME(N),
655 nullptr
656 );
657 if (mupdf::pdf_is_stream(ap))
658 {
659 mupdf::FzBuffer res = mupdf::pdf_load_stream(ap);
660 return JM_BinFromBuffer(res);
661 }
662 return PyBytes_FromStringAndSize("", 0);
663 }
664
665 void Tools_update_da(mupdf::PdfAnnot& this_annot, const char* da_str)
666 {
667 mupdf::PdfObj this_annot_obj = mupdf::pdf_annot_obj(this_annot);
668 mupdf::pdf_dict_put_text_string(this_annot_obj, PDF_NAME2(DA), da_str);
669 mupdf::pdf_dict_del(this_annot_obj, PDF_NAME2(DS)); /* not supported */
670 mupdf::pdf_dict_del(this_annot_obj, PDF_NAME2(RC)); /* not supported */
671 }
672
673 static int
674 jm_float_item(PyObject* obj, Py_ssize_t idx, double* result)
675 {
676 PyObject* temp = PySequence_ITEM(obj, idx);
677 if (!temp) return 1;
678 *result = PyFloat_AsDouble(temp);
679 Py_DECREF(temp);
680 if (PyErr_Occurred())
681 {
682 PyErr_Clear();
683 return 1;
684 }
685 return 0;
686 }
687
688
689 static mupdf::FzPoint JM_point_from_py(PyObject* p)
690 {
691 fz_point p0 = fz_make_point(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT);
692 if (!p || !PySequence_Check(p) || PySequence_Size(p) != 2)
693 {
694 return p0;
695 }
696 double x;
697 double y;
698 if (jm_float_item(p, 0, &x) == 1) return p0;
699 if (jm_float_item(p, 1, &y) == 1) return p0;
700 if (x < FZ_MIN_INF_RECT) x = FZ_MIN_INF_RECT;
701 if (y < FZ_MIN_INF_RECT) y = FZ_MIN_INF_RECT;
702 if (x > FZ_MAX_INF_RECT) x = FZ_MAX_INF_RECT;
703 if (y > FZ_MAX_INF_RECT) y = FZ_MAX_INF_RECT;
704
705 return fz_make_point(x, y);
706 }
707
708 static int s_list_append_drop(PyObject* list, PyObject* item)
709 {
710 if (!list || !PyList_Check(list) || !item)
711 {
712 return -2;
713 }
714 int rc = PyList_Append(list, item);
715 Py_DECREF(item);
716 return rc;
717 }
718
719 static int LIST_APPEND_DROP(PyObject *list, PyObject *item)
720 {
721 if (!list || !PyList_Check(list) || !item) return -2;
722 int rc = PyList_Append(list, item);
723 Py_DECREF(item);
724 return rc;
725 }
726
727 static int LIST_APPEND(PyObject *list, PyObject *item)
728 {
729 if (!list || !PyList_Check(list) || !item) return -2;
730 int rc = PyList_Append(list, item);
731 return rc;
732 }
733
734 static int DICT_SETITEM_DROP(PyObject *dict, PyObject *key, PyObject *value)
735 {
736 if (!dict || !PyDict_Check(dict) || !key || !value) return -2;
737 int rc = PyDict_SetItem(dict, key, value);
738 Py_DECREF(value);
739 return rc;
740 }
741
742 static int DICT_SETITEMSTR_DROP(PyObject *dict, const char *key, PyObject *value)
743 {
744 if (!dict || !PyDict_Check(dict) || !key || !value) return -2;
745 int rc = PyDict_SetItemString(dict, key, value);
746 Py_DECREF(value);
747 return rc;
748 }
749
750
751 //-----------------------------------------------------------------------------
752 // Functions converting between PySequences and pymupdf geometry objects
753 //-----------------------------------------------------------------------------
754 static int
755 jm_init_item(PyObject* obj, Py_ssize_t idx, int* result)
756 {
757 PyObject* temp = PySequence_ITEM(obj, idx);
758 if (!temp)
759 {
760 return 1;
761 }
762 if (PyLong_Check(temp))
763 {
764 *result = (int) PyLong_AsLong(temp);
765 Py_DECREF(temp);
766 }
767 else if (PyFloat_Check(temp))
768 {
769 *result = (int) PyFloat_AsDouble(temp);
770 Py_DECREF(temp);
771 }
772 else
773 {
774 Py_DECREF(temp);
775 return 1;
776 }
777 if (PyErr_Occurred())
778 {
779 PyErr_Clear();
780 return 1;
781 }
782 return 0;
783 }
784
785 // TODO: ------------------------------------------------------------------
786 // This is a temporary solution and should be replaced by a C++ extension:
787 // There is no way in Python specify an array of fz_point - as is required
788 // for function pdf_set_annot_callout_line().
789 static void JM_set_annot_callout_line(mupdf::PdfAnnot& annot, PyObject *callout, int count)
790 {
791 fz_point points[3];
792 mupdf::FzPoint p;
793 for (int i = 0; i < count; i++)
794 {
795 p = JM_point_from_py(PyTuple_GetItem(callout, (Py_ssize_t) i));
796 points[i] = fz_make_point(p.x, p.y);
797 }
798 mupdf::pdf_set_annot_callout_line(annot, points, count);
799 }
800
801
802 //----------------------------------------------------------------------------
803 // Return list of outline xref numbers. Recursive function. Arguments:
804 // 'obj' first OL item
805 // 'xrefs' empty Python list
806 //----------------------------------------------------------------------------
807 static PyObject* JM_outline_xrefs(mupdf::PdfObj obj, PyObject* xrefs)
808 {
809 if (!obj.m_internal)
810 {
811 return xrefs;
812 }
813 PyObject* newxref = nullptr;
814 mupdf::PdfObj thisobj = obj;
815 while (thisobj.m_internal)
816 {
817 int nxr = mupdf::pdf_to_num(thisobj);
818 newxref = PyLong_FromLong((long) nxr);
819 if (PySequence_Contains(xrefs, newxref)
820 or mupdf::pdf_dict_get(thisobj, PDF_NAME2(Type)).m_internal
821 )
822 {
823 // circular ref or top of chain: terminate
824 Py_DECREF(newxref);
825 break;
826 }
827 s_list_append_drop(xrefs, newxref);
828 mupdf::PdfObj first = mupdf::pdf_dict_get(thisobj, PDF_NAME2(First)); // try go down
829 if (mupdf::pdf_is_dict(first))
830 {
831 xrefs = JM_outline_xrefs(first, xrefs);
832 }
833 thisobj = mupdf::pdf_dict_get(thisobj, PDF_NAME2(Next)); // try go next
834 mupdf::PdfObj parent = mupdf::pdf_dict_get(thisobj, PDF_NAME2(Parent)); // get parent
835 if (!mupdf::pdf_is_dict(thisobj))
836 {
837 thisobj = parent;
838 }
839 }
840 return xrefs;
841 }
842
843
844 PyObject* dictkey_align = NULL;
845 PyObject* dictkey_ascender = NULL;
846 PyObject* dictkey_bidi = NULL;
847 PyObject* dictkey_bbox = NULL;
848 PyObject* dictkey_blocks = NULL;
849 PyObject* dictkey_bpc = NULL;
850 PyObject* dictkey_c = NULL;
851 PyObject* dictkey_chars = NULL;
852 PyObject* dictkey_color = NULL;
853 PyObject* dictkey_colorspace = NULL;
854 PyObject* dictkey_content = NULL;
855 PyObject* dictkey_creationDate = NULL;
856 PyObject* dictkey_cs_name = NULL;
857 PyObject* dictkey_da = NULL;
858 PyObject* dictkey_dashes = NULL;
859 PyObject* dictkey_desc = NULL;
860 PyObject* dictkey_descender = NULL;
861 PyObject* dictkey_dir = NULL;
862 PyObject* dictkey_effect = NULL;
863 PyObject* dictkey_ext = NULL;
864 PyObject* dictkey_filename = NULL;
865 PyObject* dictkey_fill = NULL;
866 PyObject* dictkey_flags = NULL;
867 PyObject* dictkey_char_bidi = NULL;
868 PyObject* dictkey_char_flags = NULL;
869 PyObject* dictkey_font = NULL;
870 PyObject* dictkey_glyph = NULL;
871 PyObject* dictkey_height = NULL;
872 PyObject* dictkey_id = NULL;
873 PyObject* dictkey_image = NULL;
874 PyObject* dictkey_items = NULL;
875 PyObject* dictkey_length = NULL;
876 PyObject* dictkey_lines = NULL;
877 PyObject* dictkey_matrix = NULL;
878 PyObject* dictkey_modDate = NULL;
879 PyObject* dictkey_name = NULL;
880 PyObject* dictkey_number = NULL;
881 PyObject* dictkey_origin = NULL;
882 PyObject* dictkey_rect = NULL;
883 PyObject* dictkey_size = NULL;
884 PyObject* dictkey_smask = NULL;
885 PyObject* dictkey_spans = NULL;
886 PyObject* dictkey_stroke = NULL;
887 PyObject* dictkey_style = NULL;
888 PyObject* dictkey_subject = NULL;
889 PyObject* dictkey_text = NULL;
890 PyObject* dictkey_title = NULL;
891 PyObject* dictkey_type = NULL;
892 PyObject* dictkey_ufilename = NULL;
893 PyObject* dictkey_width = NULL;
894 PyObject* dictkey_wmode = NULL;
895 PyObject* dictkey_xref = NULL;
896 PyObject* dictkey_xres = NULL;
897 PyObject* dictkey_yres = NULL;
898
899 static int dict_setitem_drop(PyObject* dict, PyObject* key, PyObject* value)
900 {
901 if (!dict || !PyDict_Check(dict) || !key || !value)
902 {
903 return -2;
904 }
905 int rc = PyDict_SetItem(dict, key, value);
906 Py_DECREF(value);
907 return rc;
908 }
909
910 static int dict_setitemstr_drop(PyObject* dict, const char* key, PyObject* value)
911 {
912 if (!dict || !PyDict_Check(dict) || !key || !value)
913 {
914 return -2;
915 }
916 int rc = PyDict_SetItemString(dict, key, value);
917 Py_DECREF(value);
918 return rc;
919 }
920
921
922 static void Document_extend_toc_items(mupdf::PdfDocument& pdf, PyObject* items)
923 {
924 PyObject* item=nullptr;
925 PyObject* itemdict=nullptr;
926 PyObject* xrefs=nullptr;
927
928 PyObject* bold = PyUnicode_FromString("bold");
929 PyObject* italic = PyUnicode_FromString("italic");
930 PyObject* collapse = PyUnicode_FromString("collapse");
931 PyObject* zoom = PyUnicode_FromString("zoom");
932
933 try
934 {
935 /* Need to define these things early because later code uses
936 `goto`; otherwise we get compiler warnings 'jump bypasses variable
937 initialization' */
938 int xref = 0;
939 mupdf::PdfObj root;
940 mupdf::PdfObj olroot;
941 mupdf::PdfObj first;
942 Py_ssize_t n;
943 Py_ssize_t m;
944
945 root = mupdf::pdf_dict_get(mupdf::pdf_trailer(pdf), PDF_NAME2(Root));
946 if (!root.m_internal) goto end;
947
948 olroot = mupdf::pdf_dict_get(root, PDF_NAME2(Outlines));
949 if (!olroot.m_internal) goto end;
950
951 first = mupdf::pdf_dict_get(olroot, PDF_NAME2(First));
952 if (!first.m_internal) goto end;
953
954 xrefs = PyList_New(0); // pre-allocate an empty list
955 xrefs = JM_outline_xrefs(first, xrefs);
956 n = PySequence_Size(xrefs);
957 m = PySequence_Size(items);
958 if (!n) goto end;
959
960 if (n != m)
961 {
962 throw std::runtime_error("internal error finding outline xrefs");
963 }
964
965 // update all TOC item dictionaries
966 for (int i = 0; i < n; i++)
967 {
968 jm_init_item(xrefs, i, &xref);
969 item = PySequence_ITEM(items, i);
970 itemdict = PySequence_ITEM(item, 3);
971 if (!itemdict || !PyDict_Check(itemdict))
972 {
973 throw std::runtime_error("need non-simple TOC format");
974 }
975 PyDict_SetItem(itemdict, dictkey_xref, PySequence_ITEM(xrefs, i));
976 mupdf::PdfObj bm = mupdf::pdf_load_object(pdf, xref);
977 int flags = mupdf::pdf_to_int(mupdf::pdf_dict_get(bm, PDF_NAME2(F)));
978 if (flags == 1)
979 {
980 PyDict_SetItem(itemdict, italic, Py_True);
981 }
982 else if (flags == 2)
983 {
984 PyDict_SetItem(itemdict, bold, Py_True);
985 }
986 else if (flags == 3)
987 {
988 PyDict_SetItem(itemdict, italic, Py_True);
989 PyDict_SetItem(itemdict, bold, Py_True);
990 }
991 int count = mupdf::pdf_to_int(mupdf::pdf_dict_get(bm, PDF_NAME2(Count)));
992 if (count < 0)
993 {
994 PyDict_SetItem(itemdict, collapse, Py_True);
995 }
996 else if (count > 0)
997 {
998 PyDict_SetItem(itemdict, collapse, Py_False);
999 }
1000 mupdf::PdfObj col = mupdf::pdf_dict_get(bm, PDF_NAME2(C));
1001 if (mupdf::pdf_is_array(col) && mupdf::pdf_array_len(col) == 3)
1002 {
1003 PyObject* color = PyTuple_New(3);
1004 PyTuple_SET_ITEM(color, 0, Py_BuildValue("f", mupdf::pdf_to_real(mupdf::pdf_array_get(col, 0))));
1005 PyTuple_SET_ITEM(color, 1, Py_BuildValue("f", mupdf::pdf_to_real(mupdf::pdf_array_get(col, 1))));
1006 PyTuple_SET_ITEM(color, 2, Py_BuildValue("f", mupdf::pdf_to_real(mupdf::pdf_array_get(col, 2))));
1007 dict_setitem_drop(itemdict, dictkey_color, color);
1008 }
1009 float z=0;
1010 mupdf::PdfObj obj = mupdf::pdf_dict_get(bm, PDF_NAME2(Dest));
1011 if (!obj.m_internal || !mupdf::pdf_is_array(obj))
1012 {
1013 obj = mupdf::pdf_dict_getl(&bm, PDF_NAME(A), PDF_NAME(D), nullptr);
1014 }
1015 if (mupdf::pdf_is_array(obj) && mupdf::pdf_array_len(obj) == 5)
1016 {
1017 z = mupdf::pdf_to_real(mupdf::pdf_array_get(obj, 4));
1018 }
1019 dict_setitem_drop(itemdict, zoom, Py_BuildValue("f", z));
1020 PyList_SetItem(item, 3, itemdict);
1021 PyList_SetItem(items, i, item);
1022 }
1023 end:;
1024 }
1025 catch (std::exception&)
1026 {
1027 }
1028 Py_CLEAR(xrefs);
1029 Py_CLEAR(bold);
1030 Py_CLEAR(italic);
1031 Py_CLEAR(collapse);
1032 Py_CLEAR(zoom);
1033 }
1034
1035 static void Document_extend_toc_items(mupdf::FzDocument& document, PyObject* items)
1036 {
1037 mupdf::PdfDocument pdf = mupdf::pdf_document_from_fz_document(document);
1038 return Document_extend_toc_items(pdf, items);
1039 }
1040
1041 //-----------------------------------------------------------------------------
1042 // PySequence from fz_rect
1043 //-----------------------------------------------------------------------------
1044 static PyObject* JM_py_from_rect(fz_rect r)
1045 {
1046 return Py_BuildValue("ffff", r.x0, r.y0, r.x1, r.y1);
1047 }
1048 static PyObject* JM_py_from_rect(mupdf::FzRect r)
1049 {
1050 return JM_py_from_rect(*r.internal());
1051 }
1052
1053 //-----------------------------------------------------------------------------
1054 // PySequence from fz_point
1055 //-----------------------------------------------------------------------------
1056 static PyObject* JM_py_from_point(fz_point p)
1057 {
1058 return Py_BuildValue("ff", p.x, p.y);
1059 }
1060
1061 //-----------------------------------------------------------------------------
1062 // PySequence from fz_quad.
1063 //-----------------------------------------------------------------------------
1064 static PyObject *
1065 JM_py_from_quad(fz_quad q)
1066 {
1067 return Py_BuildValue("((f,f),(f,f),(f,f),(f,f))",
1068 q.ul.x, q.ul.y, q.ur.x, q.ur.y,
1069 q.ll.x, q.ll.y, q.lr.x, q.lr.y);
1070 }
1071
1072 //----------------------------------------------------------------
1073 // annotation rectangle
1074 //----------------------------------------------------------------
1075 static mupdf::FzRect Annot_rect(mupdf::PdfAnnot& annot)
1076 {
1077 mupdf::FzRect rect = mupdf::pdf_bound_annot(annot);
1078 return rect;
1079 }
1080
1081 static PyObject* Annot_rect3(mupdf::PdfAnnot& annot)
1082 {
1083 fz_rect rect = mupdf::ll_pdf_bound_annot(annot.m_internal);
1084 return JM_py_from_rect(rect);
1085 }
1086
1087 //-----------------------------------------------------------------------------
1088 // PySequence to fz_rect. Default: infinite rect
1089 //-----------------------------------------------------------------------------
1090 static fz_rect JM_rect_from_py(PyObject* r)
1091 {
1092 if (!r || !PySequence_Check(r) || PySequence_Size(r) != 4)
1093 {
1094 return *mupdf::FzRect(mupdf::FzRect::Fixed_INFINITE).internal();// fz_infinite_rect;
1095 }
1096 double f[4];
1097 for (int i = 0; i < 4; i++)
1098 {
1099 if (jm_float_item(r, i, &f[i]) == 1)
1100 {
1101 return *mupdf::FzRect(mupdf::FzRect::Fixed_INFINITE).internal();
1102 }
1103 if (f[i] < FZ_MIN_INF_RECT) f[i] = FZ_MIN_INF_RECT;
1104 if (f[i] > FZ_MAX_INF_RECT) f[i] = FZ_MAX_INF_RECT;
1105 }
1106 return mupdf::ll_fz_make_rect(
1107 (float) f[0],
1108 (float) f[1],
1109 (float) f[2],
1110 (float) f[3]
1111 );
1112 }
1113
1114 //-----------------------------------------------------------------------------
1115 // PySequence to fz_matrix. Default: fz_identity
1116 //-----------------------------------------------------------------------------
1117 static fz_matrix JM_matrix_from_py(PyObject* m)
1118 {
1119 double a[6];
1120
1121 if (!m || !PySequence_Check(m) || PySequence_Size(m) != 6)
1122 {
1123 return fz_identity;
1124 }
1125 for (int i = 0; i < 6; i++)
1126 {
1127 if (jm_float_item(m, i, &a[i]) == 1)
1128 {
1129 return *mupdf::FzMatrix().internal();
1130 }
1131 }
1132 return mupdf::ll_fz_make_matrix(
1133 (float) a[0],
1134 (float) a[1],
1135 (float) a[2],
1136 (float) a[3],
1137 (float) a[4],
1138 (float) a[5]
1139 );
1140 }
1141
1142 PyObject* util_transform_rect(PyObject* rect, PyObject* matrix)
1143 {
1144 return JM_py_from_rect(
1145 mupdf::ll_fz_transform_rect(
1146 JM_rect_from_py(rect),
1147 JM_matrix_from_py(matrix)
1148 )
1149 );
1150 }
1151
1152 //----------------------------------------------------------------------------
1153 // return normalized /Rotate value:one of 0, 90, 180, 270
1154 //----------------------------------------------------------------------------
1155 static int JM_norm_rotation(int rotate)
1156 {
1157 while (rotate < 0) rotate += 360;
1158 while (rotate >= 360) rotate -= 360;
1159 if (rotate % 90 != 0) return 0;
1160 return rotate;
1161 }
1162
1163
1164 //----------------------------------------------------------------------------
1165 // return a PDF page's /Rotate value: one of (0, 90, 180, 270)
1166 //----------------------------------------------------------------------------
1167 static int JM_page_rotation(mupdf::PdfPage& page)
1168 {
1169 int rotate = 0;
1170 rotate = mupdf::pdf_to_int(
1171 mupdf::pdf_dict_get_inheritable(page.obj(), PDF_NAME2(Rotate))
1172 );
1173 rotate = JM_norm_rotation(rotate);
1174 return rotate;
1175 }
1176
1177
1178 //----------------------------------------------------------------------------
1179 // return a PDF page's MediaBox
1180 //----------------------------------------------------------------------------
1181 static mupdf::FzRect JM_mediabox(mupdf::PdfObj& page_obj)
1182 {
1183 mupdf::FzRect mediabox = mupdf::pdf_to_rect(
1184 mupdf::pdf_dict_get_inheritable(page_obj, PDF_NAME2(MediaBox))
1185 );
1186 if (mupdf::fz_is_empty_rect(mediabox) || mupdf::fz_is_infinite_rect(mediabox))
1187 {
1188 mediabox.x0 = 0;
1189 mediabox.y0 = 0;
1190 mediabox.x1 = 612;
1191 mediabox.y1 = 792;
1192 }
1193 mupdf::FzRect page_mediabox;
1194 page_mediabox.x0 = mupdf::fz_min(mediabox.x0, mediabox.x1);
1195 page_mediabox.y0 = mupdf::fz_min(mediabox.y0, mediabox.y1);
1196 page_mediabox.x1 = mupdf::fz_max(mediabox.x0, mediabox.x1);
1197 page_mediabox.y1 = mupdf::fz_max(mediabox.y0, mediabox.y1);
1198 if (0
1199 || page_mediabox.x1 - page_mediabox.x0 < 1
1200 || page_mediabox.y1 - page_mediabox.y0 < 1
1201 )
1202 {
1203 page_mediabox = *mupdf::FzRect(mupdf::FzRect::Fixed_UNIT).internal(); //fz_unit_rect;
1204 }
1205 return page_mediabox;
1206 }
1207
1208
1209 //----------------------------------------------------------------------------
1210 // return a PDF page's CropBox
1211 //----------------------------------------------------------------------------
1212 mupdf::FzRect JM_cropbox(mupdf::PdfObj& page_obj)
1213 {
1214 mupdf::FzRect mediabox = JM_mediabox(page_obj);
1215 mupdf::FzRect cropbox = mupdf::pdf_to_rect(
1216 mupdf::pdf_dict_get_inheritable(page_obj, PDF_NAME2(CropBox))
1217 );
1218 if (mupdf::fz_is_infinite_rect(cropbox) || mupdf::fz_is_empty_rect(cropbox))
1219 {
1220 cropbox = mediabox;
1221 }
1222 float y0 = mediabox.y1 - cropbox.y1;
1223 float y1 = mediabox.y1 - cropbox.y0;
1224 cropbox.y0 = y0;
1225 cropbox.y1 = y1;
1226 return cropbox;
1227 }
1228
1229
1230 //----------------------------------------------------------------------------
1231 // calculate width and height of the UNROTATED page
1232 //----------------------------------------------------------------------------
1233 static mupdf::FzPoint JM_cropbox_size(mupdf::PdfObj& page_obj)
1234 {
1235 mupdf::FzPoint size;
1236 mupdf::FzRect rect = JM_cropbox(page_obj);
1237 float w = (rect.x0 < rect.x1) ? rect.x1 - rect.x0 : rect.x0 - rect.x1;
1238 float h = (rect.y0 < rect.y1) ? rect.y1 - rect.y0 : rect.y0 - rect.y1;
1239 size = fz_make_point(w, h);
1240 return size;
1241 }
1242
1243
1244 //----------------------------------------------------------------------------
1245 // calculate page rotation matrices
1246 //----------------------------------------------------------------------------
1247 static mupdf::FzMatrix JM_rotate_page_matrix(mupdf::PdfPage& page)
1248 {
1249 if (!page.m_internal)
1250 {
1251 return *mupdf::FzMatrix().internal(); // no valid pdf page given
1252 }
1253 int rotation = JM_page_rotation(page);
1254 if (rotation == 0)
1255 {
1256 return *mupdf::FzMatrix().internal(); // no rotation
1257 }
1258 auto po = page.obj();
1259 mupdf::FzPoint cb_size = JM_cropbox_size(po);
1260 float w = cb_size.x;
1261 float h = cb_size.y;
1262 mupdf::FzMatrix m;
1263 if (rotation == 90)
1264 {
1265 m = mupdf::fz_make_matrix(0, 1, -1, 0, h, 0);
1266 }
1267 else if (rotation == 180)
1268 {
1269 m = mupdf::fz_make_matrix(-1, 0, 0, -1, w, h);
1270 }
1271 else
1272 {
1273 m = mupdf::fz_make_matrix(0, -1, 1, 0, 0, w);
1274 }
1275 return m;
1276 }
1277
1278
1279 static mupdf::FzMatrix JM_derotate_page_matrix(mupdf::PdfPage& page)
1280 { // just the inverse of rotation
1281 return mupdf::fz_invert_matrix(JM_rotate_page_matrix(page));
1282 }
1283
1284 //-----------------------------------------------------------------------------
1285 // PySequence from fz_matrix
1286 //-----------------------------------------------------------------------------
1287 static PyObject* JM_py_from_matrix(mupdf::FzMatrix m)
1288 {
1289 return Py_BuildValue("ffffff", m.a, m.b, m.c, m.d, m.e, m.f);
1290 }
1291
1292 static mupdf::FzMatrix Page_derotate_matrix(mupdf::PdfPage& pdfpage)
1293 {
1294 if (!pdfpage.m_internal)
1295 {
1296 return mupdf::FzMatrix();
1297 }
1298 return JM_derotate_page_matrix(pdfpage);
1299 }
1300
1301 static mupdf::FzMatrix Page_derotate_matrix(mupdf::FzPage& page)
1302 {
1303 mupdf::PdfPage pdf_page = mupdf::pdf_page_from_fz_page(page);
1304 return Page_derotate_matrix(pdf_page);
1305 }
1306
1307
1308 static PyObject *lll_JM_get_annot_xref_list(pdf_obj *page_obj)
1309 {
1310 fz_context* ctx = mupdf::internal_context_get();
1311 PyObject *names = PyList_New(0);
1312 pdf_obj *id, *subtype, *annots, *annot_obj;
1313 int xref, type, i, n;
1314 fz_try(ctx) {
1315 annots = pdf_dict_get(ctx, page_obj, PDF_NAME(Annots));
1316 n = pdf_array_len(ctx, annots);
1317 for (i = 0; i < n; i++) {
1318 annot_obj = pdf_array_get(ctx, annots, i);
1319 xref = pdf_to_num(ctx, annot_obj);
1320 subtype = pdf_dict_get(ctx, annot_obj, PDF_NAME(Subtype));
1321 if (!subtype) {
1322 continue; // subtype is required
1323 }
1324 type = pdf_annot_type_from_string(ctx, pdf_to_name(ctx, subtype));
1325 if (type == PDF_ANNOT_UNKNOWN) {
1326 continue; // only accept valid annot types
1327 }
1328 id = pdf_dict_gets(ctx, annot_obj, "NM");
1329 LIST_APPEND_DROP(names, Py_BuildValue("iis", xref, type, pdf_to_text_string(ctx, id)));
1330 }
1331 }
1332 fz_catch(ctx) {
1333 return names;
1334 }
1335 return names;
1336 }
1337 //------------------------------------------------------------------------
1338 // return the xrefs and /NM ids of a page's annots, links and fields
1339 //------------------------------------------------------------------------
1340 static PyObject* JM_get_annot_xref_list(const mupdf::PdfObj& page_obj)
1341 {
1342 PyObject* names = PyList_New(0);
1343 if (!page_obj.m_internal)
1344 {
1345 return names;
1346 }
1347 return lll_JM_get_annot_xref_list( page_obj.m_internal);
1348 }
1349
1350 static mupdf::FzBuffer JM_object_to_buffer(const mupdf::PdfObj& what, int compress, int ascii)
1351 {
1352 mupdf::FzBuffer res = mupdf::fz_new_buffer(512);
1353 mupdf::FzOutput out(res);
1354 mupdf::pdf_print_obj(out, what, compress, ascii);
1355 out.fz_close_output();
1356 mupdf::fz_terminate_buffer(res);
1357 return res;
1358 }
1359
1360 static PyObject* JM_EscapeStrFromBuffer(mupdf::FzBuffer& buff)
1361 {
1362 if (!buff.m_internal)
1363 {
1364 return PyUnicode_FromString("");
1365 }
1366 unsigned char* s = nullptr;
1367 size_t len = mupdf::fz_buffer_storage(buff, &s);
1368 PyObject* val = PyUnicode_DecodeRawUnicodeEscape((const char*) s, (Py_ssize_t) len, "replace");
1369 if (!val)
1370 {
1371 val = PyUnicode_FromString("");
1372 PyErr_Clear();
1373 }
1374 return val;
1375 }
1376
1377 static PyObject* xref_object(mupdf::PdfDocument& pdf, int xref, int compressed=0, int ascii=0)
1378 {
1379 if (!pdf.m_internal)
1380 {
1381 throw std::runtime_error(MSG_IS_NO_PDF);
1382 }
1383 int xreflen = mupdf::pdf_xref_len(pdf);
1384 if ((xref < 1 || xref >= xreflen) and xref != -1)
1385 {
1386 throw std::runtime_error(MSG_BAD_XREF);
1387 }
1388 mupdf::PdfObj obj = (xref > 0) ? mupdf::pdf_load_object(pdf, xref) : mupdf::pdf_trailer(pdf);
1389 mupdf::FzBuffer res = JM_object_to_buffer(mupdf::pdf_resolve_indirect(obj), compressed, ascii);
1390 PyObject* text = JM_EscapeStrFromBuffer(res);
1391 return text;
1392 }
1393
1394 static PyObject* xref_object(mupdf::FzDocument& document, int xref, int compressed=0, int ascii=0)
1395 {
1396 mupdf::PdfDocument pdf = mupdf::pdf_document_from_fz_document(document);
1397 return xref_object(pdf, xref, compressed, ascii);
1398 }
1399
1400
1401 //-------------------------------------
1402 // fz_output for Python file objects
1403 //-------------------------------------
1404
1405 static PyObject* Link_is_external(mupdf::FzLink& this_link)
1406 {
1407 const char* uri = this_link.m_internal->uri;
1408 if (!uri)
1409 {
1410 return PyBool_FromLong(0);
1411 }
1412 bool ret = mupdf::fz_is_external_link(uri);
1413 return PyBool_FromLong((long) ret);
1414 }
1415
1416 static mupdf::FzLink Link_next(mupdf::FzLink& this_link)
1417 {
1418 return this_link.next();
1419 }
1420
1421
1422 //-----------------------------------------------------------------------------
1423 // create PDF object from given string
1424 //-----------------------------------------------------------------------------
1425 static pdf_obj *lll_JM_pdf_obj_from_str(fz_context *ctx, pdf_document *doc, const char *src)
1426 {
1427 pdf_obj *result = NULL;
1428 pdf_lexbuf lexbuf;
1429 fz_stream *stream = fz_open_memory(ctx, (unsigned char *)src, strlen(src));
1430
1431 pdf_lexbuf_init(ctx, &lexbuf, PDF_LEXBUF_SMALL);
1432
1433 fz_try(ctx) {
1434 result = pdf_parse_stm_obj(ctx, doc, stream, &lexbuf);
1435 }
1436
1437 fz_always(ctx) {
1438 pdf_lexbuf_fin(ctx, &lexbuf);
1439 fz_drop_stream(ctx, stream);
1440 }
1441
1442 fz_catch(ctx) {
1443 mupdf::internal_throw_exception(ctx);
1444 }
1445
1446 return result;
1447
1448 }
1449
1450 /*********************************************************************/
1451 // Page._addAnnot_FromString
1452 // Add new links provided as an array of string object definitions.
1453 /*********************************************************************/
1454 PyObject* Page_addAnnot_FromString(mupdf::PdfPage& page, PyObject* linklist)
1455 {
1456 PyObject* txtpy = nullptr;
1457 int lcount = (int) PySequence_Size(linklist); // link count
1458 //printf("Page_addAnnot_FromString(): lcount=%i\n", lcount);
1459 if (lcount < 1)
1460 {
1461 Py_RETURN_NONE;
1462 }
1463 try
1464 {
1465 // insert links from the provided sources
1466 if (!page.m_internal)
1467 {
1468 throw std::runtime_error(MSG_IS_NO_PDF);
1469 }
1470 if (!mupdf::pdf_dict_get(page.obj(), PDF_NAME2(Annots)).m_internal)
1471 {
1472 mupdf::pdf_dict_put_array(page.obj(), PDF_NAME2(Annots), lcount);
1473 }
1474 mupdf::PdfObj annots = mupdf::pdf_dict_get(page.obj(), PDF_NAME2(Annots));
1475 mupdf::PdfDocument doc = page.doc();
1476 //printf("lcount=%i\n", lcount);
1477 fz_context* ctx = mupdf::internal_context_get();
1478 for (int i = 0; i < lcount; i++)
1479 {
1480 const char* text = nullptr;
1481 txtpy = PySequence_ITEM(linklist, (Py_ssize_t) i);
1482 text = PyUnicode_AsUTF8(txtpy);
1483 Py_CLEAR(txtpy);
1484 if (!text)
1485 {
1486 messagef("skipping bad link / annot item %i.", i);
1487 continue;
1488 }
1489 try
1490 {
1491 pdf_obj* obj = lll_JM_pdf_obj_from_str(ctx, doc.m_internal, text);
1492 pdf_obj* annot = pdf_add_object_drop(
1493 ctx,
1494 doc.m_internal,
1495 obj
1496 );
1497 pdf_obj* ind_obj = pdf_new_indirect(ctx, doc.m_internal, pdf_to_num(ctx, annot), 0);
1498 pdf_array_push_drop(ctx, annots.m_internal, ind_obj);
1499 pdf_drop_obj(ctx, annot);
1500 }
1501 catch (std::exception&)
1502 {
1503 messagef("skipping bad link / annot item %i.", i);
1504 }
1505 }
1506 }
1507 catch (std::exception&)
1508 {
1509 PyErr_Clear();
1510 return nullptr;
1511 }
1512 Py_RETURN_NONE;
1513 }
1514
1515 PyObject* Page_addAnnot_FromString(mupdf::FzPage& page, PyObject* linklist)
1516 {
1517 mupdf::PdfPage pdf_page = mupdf::pdf_page_from_fz_page(page);
1518 return Page_addAnnot_FromString(pdf_page, linklist);
1519 }
1520
1521 static int page_count_fz2(void* document)
1522 {
1523 mupdf::FzDocument* document2 = (mupdf::FzDocument*) document;
1524 return mupdf::fz_count_pages(*document2);
1525 }
1526
1527 static int page_count_fz(mupdf::FzDocument& document)
1528 {
1529 return mupdf::fz_count_pages(document);
1530 }
1531
1532 static int page_count_pdf(mupdf::PdfDocument& pdf)
1533 {
1534 mupdf::FzDocument document = pdf.super();
1535 return page_count_fz(document);
1536 }
1537
1538 static int page_count(mupdf::FzDocument& document)
1539 {
1540 return mupdf::fz_count_pages(document);
1541 }
1542
1543 static int page_count(mupdf::PdfDocument& pdf)
1544 {
1545 mupdf::FzDocument document = pdf.super();
1546 return page_count(document);
1547 }
1548
1549 static PyObject* page_annot_xrefs(mupdf::FzDocument& document, mupdf::PdfDocument& pdf, int pno)
1550 {
1551 int page_count = mupdf::fz_count_pages(document);
1552 int n = pno;
1553 while (n < 0)
1554 {
1555 n += page_count;
1556 }
1557 PyObject* annots = nullptr;
1558 if (n >= page_count)
1559 {
1560 throw std::runtime_error(MSG_BAD_PAGENO);
1561 }
1562 if (!pdf.m_internal)
1563 {
1564 throw std::runtime_error(MSG_IS_NO_PDF);
1565 }
1566 annots = JM_get_annot_xref_list(mupdf::pdf_lookup_page_obj(pdf, n));
1567 return annots;
1568 }
1569
1570 static PyObject* page_annot_xrefs(mupdf::FzDocument& document, int pno)
1571 {
1572 mupdf::PdfDocument pdf = mupdf::pdf_specifics(document);
1573 return page_annot_xrefs(document, pdf, pno);
1574 }
1575
1576 static PyObject* page_annot_xrefs(mupdf::PdfDocument& pdf, int pno)
1577 {
1578 mupdf::FzDocument document = pdf.super();
1579 return page_annot_xrefs(document, pdf, pno);
1580 }
1581
1582 static bool Outline_is_external(mupdf::FzOutline* outline)
1583 {
1584 if (!outline->m_internal->uri)
1585 {
1586 return false;
1587 }
1588 return mupdf::ll_fz_is_external_link(outline->m_internal->uri);
1589 }
1590
1591 int ll_fz_absi(int i)
1592 {
1593 return mupdf::ll_fz_absi(i);
1594 }
1595
1596 enum
1597 {
1598 TEXT_FONT_SUPERSCRIPT = 1,
1599 TEXT_FONT_ITALIC = 2,
1600 TEXT_FONT_SERIFED = 4,
1601 TEXT_FONT_MONOSPACED = 8,
1602 TEXT_FONT_BOLD = 16,
1603 };
1604
1605 int g_skip_quad_corrections = 0;
1606 int g_subset_fontnames = 0;
1607 int g_small_glyph_heights = 0;
1608
1609 void set_skip_quad_corrections(int on)
1610 {
1611 g_skip_quad_corrections = on;
1612 }
1613
1614 void set_subset_fontnames(int on)
1615 {
1616 g_subset_fontnames = on;
1617 }
1618
1619 void set_small_glyph_heights(int on)
1620 {
1621 g_small_glyph_heights = on;
1622 }
1623
1624 struct jm_lineart_device
1625 {
1626 fz_device super;
1627
1628 PyObject* out = {};
1629 PyObject* method = {};
1630 PyObject* pathdict = {};
1631 PyObject* scissors = {};
1632 float pathfactor = {};
1633 fz_matrix ctm = {};
1634 fz_matrix ptm = {};
1635 fz_matrix rot = {};
1636 fz_point lastpoint = {};
1637 fz_point firstpoint = {};
1638 int havemove = 0;
1639 fz_rect pathrect = {};
1640 int clips = {};
1641 int linecount = {};
1642 float linewidth = {};
1643 int path_type = {};
1644 long depth = {};
1645 size_t seqno = {};
1646 char* layer_name;
1647 };
1648
1649
1650 static void jm_lineart_drop_device(fz_context *ctx, fz_device *dev_)
1651 {
1652 jm_lineart_device *dev = (jm_lineart_device *)dev_;
1653 if (PyList_Check(dev->out)) {
1654 Py_CLEAR(dev->out);
1655 }
1656 Py_CLEAR(dev->method);
1657 Py_CLEAR(dev->scissors);
1658 mupdf::ll_fz_free(dev->layer_name);
1659 dev->layer_name = nullptr;
1660 }
1661
1662 typedef jm_lineart_device jm_tracedraw_device;
1663
1664 // need own versions of ascender / descender
1665 static float JM_font_ascender(fz_font* font)
1666 {
1667 if (g_skip_quad_corrections)
1668 {
1669 return 0.8f;
1670 }
1671 return mupdf::ll_fz_font_ascender(font);
1672 }
1673
1674 static float JM_font_descender(fz_font* font)
1675 {
1676 if (g_skip_quad_corrections)
1677 {
1678 return -0.2f;
1679 }
1680 return mupdf::ll_fz_font_descender(font);
1681 }
1682
1683
1684 //----------------------------------------------------------------
1685 // Return true if character is considered to be a word delimiter
1686 //----------------------------------------------------------------
1687 static int
1688 JM_is_word_delimiter(int c, PyObject *delimiters)
1689 {
1690 if (c <= 32 || c == 160) return 1; // a standard delimiter
1691 if (0x202a <= c && c <= 0x202e)
1692 {
1693 return 1; // change between writing directions
1694 }
1695
1696 // extra delimiters must be a non-empty sequence
1697 if (!delimiters || PyObject_Not(delimiters) || !PySequence_Check(delimiters)) {
1698 return 0;
1699 }
1700
1701 // convert to tuple for easier looping
1702 PyObject *delims = PySequence_Tuple(delimiters);
1703 if (!delims) {
1704 PyErr_Clear();
1705 return 0;
1706 }
1707
1708 // Make 1-char PyObject from character given as integer
1709 PyObject *cchar = Py_BuildValue("C", c); // single character PyObject
1710 Py_ssize_t i, len = PyTuple_Size(delims);
1711 for (i = 0; i < len; i++) {
1712 int rc = PyUnicode_Compare(cchar, PyTuple_GET_ITEM(delims, i));
1713 if (rc == 0) { // equal to a delimiter character
1714 Py_DECREF(cchar);
1715 Py_DECREF(delims);
1716 PyErr_Clear();
1717 return 1;
1718 }
1719 }
1720
1721 Py_DECREF(delims);
1722 PyErr_Clear();
1723 return 0;
1724 }
1725
1726 static int
1727 JM_is_rtl_char(int c)
1728 {
1729 if (c < 0x590 || c > 0x900) return 0;
1730 return 1;
1731 }
1732
1733 static const char* JM_font_name(fz_font* font)
1734 {
1735 const char* name = mupdf::ll_fz_font_name(font);
1736 const char* s = strchr(name, '+');
1737 if (g_subset_fontnames || !s || s-name != 6)
1738 {
1739 return name;
1740 }
1741 return s + 1;
1742 }
1743
1744 static int detect_super_script(fz_stext_line *line, fz_stext_char *ch)
1745 {
1746 if (line->wmode == 0 && line->dir.x == 1 && line->dir.y == 0)
1747 {
1748 return ch->origin.y < line->first_char->origin.y - ch->size * 0.1f;
1749 }
1750 return 0;
1751 }
1752
1753 static int JM_char_font_flags(fz_font *font, fz_stext_line *line, fz_stext_char *ch)
1754 {
1755 int flags = 0;
1756 if (line && ch)
1757 {
1758 flags += detect_super_script(line, ch) * TEXT_FONT_SUPERSCRIPT;
1759 }
1760 flags += mupdf::ll_fz_font_is_italic(font) * TEXT_FONT_ITALIC;
1761 flags += mupdf::ll_fz_font_is_serif(font) * TEXT_FONT_SERIFED;
1762 flags += mupdf::ll_fz_font_is_monospaced(font) * TEXT_FONT_MONOSPACED;
1763 flags += mupdf::ll_fz_font_is_bold(font) * TEXT_FONT_BOLD;
1764 return flags;
1765 }
1766
1767 static void jm_trace_text_span(
1768 jm_tracedraw_device* dev,
1769 fz_text_span* span,
1770 int type,
1771 fz_matrix ctm,
1772 fz_colorspace* colorspace,
1773 const float* color,
1774 float alpha,
1775 size_t seqno
1776 )
1777 {
1778 //printf("extra.jm_trace_text_span(): seqno=%zi\n", seqno);
1779 //fz_matrix join = mupdf::ll_fz_concat(span->trm, ctm);
1780 //double fsize = sqrt(fabs((double) span->trm.a * (double) span->trm.d));
1781 fz_matrix mat = mupdf::ll_fz_concat(span->trm, ctm); // text transformation matrix
1782 fz_point dir = mupdf::ll_fz_transform_vector(mupdf::ll_fz_make_point(1, 0), mat); // writing direction
1783 double fsize = sqrt(dir.x * dir.x + dir.y * dir.y); // font size
1784
1785 dir = mupdf::ll_fz_normalize_vector(dir);
1786
1787 // compute effective ascender / descender
1788 double asc = (double) JM_font_ascender(span->font);
1789 double dsc = (double) JM_font_descender(span->font);
1790 if (asc < 1e-3) { // probably Tesseract font
1791 dsc = -0.1;
1792 asc = 0.9;
1793 }
1794
1795 double ascsize = asc * fsize / (asc - dsc);
1796 double dscsize = dsc * fsize / (asc - dsc);
1797 int fflags = 0; // font flags
1798 int mono = mupdf::ll_fz_font_is_monospaced(span->font);
1799 fflags += mono * TEXT_FONT_MONOSPACED;
1800 fflags += mupdf::ll_fz_font_is_italic(span->font) * TEXT_FONT_ITALIC;
1801 fflags += mupdf::ll_fz_font_is_serif(span->font) * TEXT_FONT_SERIFED;
1802 fflags += mupdf::ll_fz_font_is_bold(span->font) * TEXT_FONT_BOLD;
1803
1804 // walk through characters of span
1805 fz_matrix rot = mupdf::ll_fz_make_matrix(dir.x, dir.y, -dir.y, dir.x, 0, 0);
1806 if (dir.x == -1)
1807 {
1808 // left-right flip
1809 rot.d = 1;
1810 }
1811 PyObject* chars = PyTuple_New(span->len);
1812 double space_adv = 0;
1813 double last_adv = 0;
1814 fz_rect span_bbox;
1815
1816 for (int i = 0; i < span->len; i++)
1817 {
1818 double adv = 0;
1819 if (span->items[i].gid >= 0)
1820 {
1821 adv = (double) mupdf::ll_fz_advance_glyph(span->font, span->items[i].gid, span->wmode);
1822 }
1823 adv *= fsize;
1824 last_adv = adv;
1825 if (span->items[i].ucs == 32)
1826 {
1827 space_adv = adv;
1828 }
1829 fz_point char_orig;
1830 char_orig = fz_make_point(span->items[i].x, span->items[i].y);
1831 char_orig = fz_transform_point(char_orig, ctm);
1832 fz_matrix m1 = mupdf::ll_fz_make_matrix(1, 0, 0, 1, -char_orig.x, -char_orig.y);
1833 m1 = mupdf::ll_fz_concat(m1, rot);
1834 m1 = mupdf::ll_fz_concat(m1, mupdf::ll_fz_make_matrix(1, 0, 0, 1, char_orig.x, char_orig.y));
1835 float x0 = char_orig.x;
1836 float x1 = x0 + adv;
1837 float y0;
1838 float y1;
1839 if (
1840 (mat.d > 0 && (dir.x == 1 || dir.x == -1))
1841 ||
1842 (mat.b !=0 && mat.b == -mat.c)
1843 ) // up-down flip
1844 {
1845 // up-down flip
1846 y0 = char_orig.y + dscsize;
1847 y1 = char_orig.y + ascsize;
1848 }
1849 else
1850 {
1851 y0 = char_orig.y - ascsize;
1852 y1 = char_orig.y - dscsize;
1853 }
1854 fz_rect char_bbox = mupdf::ll_fz_make_rect(x0, y0, x1, y1);
1855 char_bbox = mupdf::ll_fz_transform_rect(char_bbox, m1);
1856 PyTuple_SET_ITEM(
1857 chars,
1858 (Py_ssize_t) i,
1859 Py_BuildValue(
1860 "ii(ff)(ffff)",
1861 span->items[i].ucs,
1862 span->items[i].gid,
1863 char_orig.x,
1864 char_orig.y,
1865 char_bbox.x0,
1866 char_bbox.y0,
1867 char_bbox.x1,
1868 char_bbox.y1
1869 )
1870 );
1871 if (i > 0)
1872 {
1873 span_bbox = mupdf::ll_fz_union_rect(span_bbox, char_bbox);
1874 }
1875 else
1876 {
1877 span_bbox = char_bbox;
1878 }
1879 }
1880 if (!space_adv)
1881 {
1882 if (!(fflags & TEXT_FONT_MONOSPACED))
1883 {
1884 fz_font* out_font = nullptr;
1885 space_adv = mupdf::ll_fz_advance_glyph(
1886 span->font,
1887 mupdf::ll_fz_encode_character_with_fallback(span->font, 32, 0, 0, &out_font),
1888 span->wmode
1889 );
1890 space_adv *= fsize;
1891 if (!space_adv)
1892 {
1893 space_adv = last_adv;
1894 }
1895 }
1896 else
1897 {
1898 space_adv = last_adv; // for mono any char width suffices
1899 }
1900 }
1901 // make the span dictionary
1902 PyObject* span_dict = PyDict_New();
1903 dict_setitemstr_drop(span_dict, "dir", JM_py_from_point(dir));
1904 dict_setitem_drop(span_dict, dictkey_font, JM_EscapeStrFromStr(JM_font_name(span->font)));
1905 dict_setitem_drop(span_dict, dictkey_wmode, PyLong_FromLong((long) span->wmode));
1906 dict_setitem_drop(span_dict, dictkey_flags, PyLong_FromLong((long) fflags));
1907 dict_setitemstr_drop(span_dict, "bidi_lvl", PyLong_FromLong((long) span->bidi_level));
1908 dict_setitemstr_drop(span_dict, "bidi_dir", PyLong_FromLong((long) span->markup_dir));
1909 dict_setitem_drop(span_dict, dictkey_ascender, PyFloat_FromDouble(asc));
1910 dict_setitem_drop(span_dict, dictkey_descender, PyFloat_FromDouble(dsc));
1911 dict_setitem_drop(span_dict, dictkey_colorspace, PyLong_FromLong(3));
1912 float rgb[3];
1913 if (colorspace)
1914 {
1915 mupdf::ll_fz_convert_color(
1916 colorspace,
1917 color,
1918 mupdf::ll_fz_device_rgb(),
1919 rgb,
1920 nullptr,
1921 fz_default_color_params
1922 );
1923 }
1924 else
1925 {
1926 rgb[0] = rgb[1] = rgb[2] = 0;
1927 }
1928 double linewidth;
1929 if (dev->linewidth > 0) // width of character border
1930 {
1931 linewidth = (double) dev->linewidth;
1932 }
1933 else
1934 {
1935 linewidth = fsize * 0.05; // default: 5% of font size
1936 }
1937 if (0) std::cout
1938 << " dev->linewidth=" << dev->linewidth
1939 << " fsize=" << fsize
1940 << " linewidth=" << linewidth
1941 << "\n";
1942 dict_setitem_drop(span_dict, dictkey_color, Py_BuildValue("fff", rgb[0], rgb[1], rgb[2]));
1943 dict_setitem_drop(span_dict, dictkey_size, PyFloat_FromDouble(fsize));
1944 dict_setitemstr_drop(span_dict, "opacity", PyFloat_FromDouble((double) alpha));
1945 dict_setitemstr_drop(span_dict, "linewidth", PyFloat_FromDouble((double) linewidth));
1946 dict_setitemstr_drop(span_dict, "spacewidth", PyFloat_FromDouble(space_adv));
1947 dict_setitem_drop(span_dict, dictkey_type, PyLong_FromLong((long) type));
1948 dict_setitem_drop(span_dict, dictkey_bbox, JM_py_from_rect(span_bbox));
1949 dict_setitemstr_drop(span_dict, "layer", JM_UnicodeFromStr(dev->layer_name));
1950 dict_setitemstr_drop(span_dict, "seqno", PyLong_FromSize_t(seqno));
1951 dict_setitem_drop(span_dict, dictkey_chars, chars);
1952 //std::cout << "span_dict=" << repr(span_dict) << "\n";
1953 s_list_append_drop(dev->out, span_dict);
1954 }
1955
1956 static inline void jm_increase_seqno(fz_context* ctx, fz_device* dev_)
1957 {
1958 jm_tracedraw_device* dev = (jm_tracedraw_device*) dev_;
1959 dev->seqno += 1;
1960 }
1961
1962 static void jm_fill_path(
1963 fz_context* ctx,
1964 fz_device* dev,
1965 const fz_path*,
1966 int even_odd,
1967 fz_matrix,
1968 fz_colorspace*,
1969 const float* color,
1970 float alpha,
1971 fz_color_params
1972 )
1973 {
1974 jm_increase_seqno(ctx, dev);
1975 }
1976
1977 static void jm_fill_shade(
1978 fz_context* ctx,
1979 fz_device* dev,
1980 fz_shade* shd,
1981 fz_matrix ctm,
1982 float alpha,
1983 fz_color_params color_params
1984 )
1985 {
1986 jm_increase_seqno(ctx, dev);
1987 }
1988
1989 static void jm_fill_image(
1990 fz_context* ctx,
1991 fz_device* dev,
1992 fz_image* img,
1993 fz_matrix ctm,
1994 float alpha,
1995 fz_color_params color_params
1996 )
1997 {
1998 jm_increase_seqno(ctx, dev);
1999 }
2000
2001 static void jm_fill_image_mask(
2002 fz_context* ctx,
2003 fz_device* dev,
2004 fz_image* img,
2005 fz_matrix ctm,
2006 fz_colorspace* cs,
2007 const float* color,
2008 float alpha,
2009 fz_color_params color_params
2010 )
2011 {
2012 jm_increase_seqno(ctx, dev);
2013 }
2014
2015 static void jm_dev_linewidth(
2016 fz_context* ctx,
2017 fz_device* dev_,
2018 const fz_path* path,
2019 const fz_stroke_state* stroke,
2020 fz_matrix ctm,
2021 fz_colorspace* colorspace,
2022 const float* color,
2023 float alpha,
2024 fz_color_params color_params
2025 )
2026 {
2027 jm_tracedraw_device* dev = (jm_tracedraw_device*) dev_;
2028 if (0) std::cout << "jm_dev_linewidth(): changing dev->linewidth from " << dev->linewidth
2029 << " to stroke->linewidth=" << stroke->linewidth
2030 << "\n";
2031 dev->linewidth = stroke->linewidth;
2032 jm_increase_seqno(ctx, dev_);
2033 }
2034
2035 static void jm_trace_text(
2036 jm_tracedraw_device* dev,
2037 const fz_text* text,
2038 int type,
2039 fz_matrix ctm,
2040 fz_colorspace* colorspace,
2041 const float* color,
2042 float alpha,
2043 size_t seqno
2044 )
2045 {
2046 fz_text_span* span;
2047 for (span = text->head; span; span = span->next)
2048 {
2049 jm_trace_text_span(dev, span, type, ctm, colorspace, color, alpha, seqno);
2050 }
2051 }
2052
2053 /*---------------------------------------------------------
2054 There are 3 text trace types:
2055 0 - fill text (PDF Tr 0)
2056 1 - stroke text (PDF Tr 1)
2057 3 - ignore text (PDF Tr 3)
2058 ---------------------------------------------------------*/
2059 static void
2060 jm_tracedraw_fill_text(
2061 fz_context* ctx,
2062 fz_device* dev_,
2063 const fz_text* text,
2064 fz_matrix ctm,
2065 fz_colorspace* colorspace,
2066 const float* color,
2067 float alpha,
2068 fz_color_params color_params
2069 )
2070 {
2071 jm_tracedraw_device* dev = (jm_tracedraw_device*) dev_;
2072 jm_trace_text(dev, text, 0, ctm, colorspace, color, alpha, dev->seqno);
2073 dev->seqno += 1;
2074 }
2075
2076 static void
2077 jm_tracedraw_stroke_text(
2078 fz_context* ctx,
2079 fz_device* dev_,
2080 const fz_text* text,
2081 const fz_stroke_state* stroke,
2082 fz_matrix ctm,
2083 fz_colorspace* colorspace,
2084 const float* color,
2085 float alpha,
2086 fz_color_params color_params
2087 )
2088 {
2089 jm_tracedraw_device* dev = (jm_tracedraw_device*) dev_;
2090 jm_trace_text(dev, text, 1, ctm, colorspace, color, alpha, dev->seqno);
2091 dev->seqno += 1;
2092 }
2093
2094
2095 static void
2096 jm_tracedraw_ignore_text(
2097 fz_context* ctx,
2098 fz_device* dev_,
2099 const fz_text* text,
2100 fz_matrix ctm
2101 )
2102 {
2103 jm_tracedraw_device* dev = (jm_tracedraw_device*) dev_;
2104 jm_trace_text(dev, text, 3, ctm, nullptr, nullptr, 1, dev->seqno);
2105 dev->seqno += 1;
2106 }
2107
2108 static void
2109 jm_lineart_begin_layer(fz_context *ctx, fz_device *dev_, const char *name)
2110 {
2111 jm_tracedraw_device* dev = (jm_tracedraw_device*) dev_;
2112 mupdf::ll_fz_free(dev->layer_name);
2113 dev->layer_name = mupdf::ll_fz_strdup(name);
2114 }
2115
2116 static void
2117 jm_lineart_end_layer(fz_context *ctx, fz_device *dev_)
2118 {
2119 jm_tracedraw_device* dev = (jm_tracedraw_device*) dev_;
2120 mupdf::ll_fz_free(dev->layer_name);
2121 dev->layer_name = nullptr;
2122 }
2123
2124
2125 mupdf::FzDevice JM_new_texttrace_device(PyObject* out)
2126 {
2127 mupdf::FzDevice device(sizeof(jm_tracedraw_device));
2128 jm_tracedraw_device* dev = (jm_tracedraw_device*) device.m_internal;
2129
2130 dev->super.close_device = nullptr;
2131 dev->super.drop_device = jm_lineart_drop_device;
2132 dev->super.fill_path = jm_fill_path;
2133 dev->super.stroke_path = jm_dev_linewidth;
2134 dev->super.clip_path = nullptr;
2135 dev->super.clip_stroke_path = nullptr;
2136
2137 dev->super.fill_text = jm_tracedraw_fill_text;
2138 dev->super.stroke_text = jm_tracedraw_stroke_text;
2139 dev->super.clip_text = nullptr;
2140 dev->super.clip_stroke_text = nullptr;
2141 dev->super.ignore_text = jm_tracedraw_ignore_text;
2142
2143 dev->super.fill_shade = jm_fill_shade;
2144 dev->super.fill_image = jm_fill_image;
2145 dev->super.fill_image_mask = jm_fill_image_mask;
2146 dev->super.clip_image_mask = nullptr;
2147
2148 dev->super.pop_clip = nullptr;
2149
2150 dev->super.begin_mask = nullptr;
2151 dev->super.end_mask = nullptr;
2152 dev->super.begin_group = nullptr;
2153 dev->super.end_group = nullptr;
2154
2155 dev->super.begin_tile = nullptr;
2156 dev->super.end_tile = nullptr;
2157
2158 dev->super.begin_layer = jm_lineart_begin_layer;
2159 dev->super.end_layer = jm_lineart_end_layer;
2160
2161 dev->super.begin_structure = nullptr;
2162 dev->super.end_structure = nullptr;
2163
2164 dev->super.begin_metatext = nullptr;
2165 dev->super.end_metatext = nullptr;
2166
2167 dev->super.render_flags = nullptr;
2168 dev->super.set_default_colorspaces = nullptr;
2169
2170 Py_XINCREF(out);
2171 dev->out = out;
2172 dev->seqno = 0;
2173 return device;
2174 }
2175
2176
2177 static fz_quad
2178 JM_char_quad(fz_stext_line *line, fz_stext_char *ch)
2179 {
2180 if (g_skip_quad_corrections) { // no special handling
2181 return ch->quad;
2182 }
2183 if (line->wmode) { // never touch vertical write mode
2184 return ch->quad;
2185 }
2186 fz_font *font = ch->font;
2187 float asc = JM_font_ascender(font);
2188 float dsc = JM_font_descender(font);
2189 float c, s, fsize = ch->size;
2190 float asc_dsc = asc - dsc + FLT_EPSILON;
2191 if (asc_dsc >= 1 && g_small_glyph_heights == 0) { // no problem
2192 return ch->quad;
2193 }
2194 if (asc < 1e-3) { // probably Tesseract glyphless font
2195 dsc = -0.1f;
2196 asc = 0.9f;
2197 asc_dsc = 1.0f;
2198 }
2199
2200 if (g_small_glyph_heights || asc_dsc < 1) {
2201 dsc = dsc / asc_dsc;
2202 asc = asc / asc_dsc;
2203 }
2204 asc_dsc = asc - dsc;
2205 asc = asc * fsize / asc_dsc;
2206 dsc = dsc * fsize / asc_dsc;
2207
2208 /* ------------------------------
2209 Re-compute quad with the adjusted ascender / descender values:
2210 Move ch->origin to (0,0) and de-rotate quad, then adjust the corners,
2211 re-rotate and move back to ch->origin location.
2212 ------------------------------ */
2213 fz_matrix trm1, trm2, xlate1, xlate2;
2214 fz_quad quad;
2215 c = line->dir.x; // cosine
2216 s = line->dir.y; // sine
2217 trm1 = mupdf::ll_fz_make_matrix(c, -s, s, c, 0, 0); // derotate
2218 trm2 = mupdf::ll_fz_make_matrix(c, s, -s, c, 0, 0); // rotate
2219 if (c == -1) { // left-right flip
2220 trm1.d = 1;
2221 trm2.d = 1;
2222 }
2223 xlate1 = mupdf::ll_fz_make_matrix(1, 0, 0, 1, -ch->origin.x, -ch->origin.y);
2224 xlate2 = mupdf::ll_fz_make_matrix(1, 0, 0, 1, ch->origin.x, ch->origin.y);
2225
2226 quad = mupdf::ll_fz_transform_quad(ch->quad, xlate1); // move origin to (0,0)
2227 quad = mupdf::ll_fz_transform_quad(quad, trm1); // de-rotate corners
2228
2229 // adjust vertical coordinates
2230 if (c == 1 && quad.ul.y > 0) { // up-down flip
2231 quad.ul.y = asc;
2232 quad.ur.y = asc;
2233 quad.ll.y = dsc;
2234 quad.lr.y = dsc;
2235 } else {
2236 quad.ul.y = -asc;
2237 quad.ur.y = -asc;
2238 quad.ll.y = -dsc;
2239 quad.lr.y = -dsc;
2240 }
2241
2242 // adjust horizontal coordinates that are too crazy:
2243 // (1) left x must be >= 0
2244 // (2) if bbox width is 0, lookup char advance in font.
2245 if (quad.ll.x < 0) {
2246 quad.ll.x = 0;
2247 quad.ul.x = 0;
2248 }
2249 float cwidth = quad.lr.x - quad.ll.x;
2250 if (cwidth < FLT_EPSILON) {
2251 int glyph = mupdf::ll_fz_encode_character( font, ch->c);
2252 if (glyph) {
2253 float fwidth = mupdf::ll_fz_advance_glyph( font, glyph, line->wmode);
2254 quad.lr.x = quad.ll.x + fwidth * fsize;
2255 quad.ur.x = quad.lr.x;
2256 }
2257 }
2258
2259 quad = mupdf::ll_fz_transform_quad(quad, trm2); // rotate back
2260 quad = mupdf::ll_fz_transform_quad(quad, xlate2); // translate back
2261 return quad;
2262 }
2263
2264
2265 static fz_rect JM_char_bbox(fz_stext_line* line, fz_stext_char* ch)
2266 {
2267 fz_rect r = mupdf::ll_fz_rect_from_quad(JM_char_quad( line, ch));
2268 if (!line->wmode) {
2269 return r;
2270 }
2271 if (r.y1 < r.y0 + ch->size) {
2272 r.y0 = r.y1 - ch->size;
2273 }
2274 return r;
2275 }
2276
2277 fz_rect JM_char_bbox(const mupdf::FzStextLine& line, const mupdf::FzStextChar& ch)
2278 {
2279 return JM_char_bbox( line.m_internal, ch.m_internal);
2280 }
2281
2282 static int JM_rects_overlap(const fz_rect a, const fz_rect b)
2283 {
2284 if (0
2285 || a.x0 >= b.x1
2286 || a.y0 >= b.y1
2287 || a.x1 <= b.x0
2288 || a.y1 <= b.y0
2289 )
2290 return 0;
2291 return 1;
2292 }
2293
2294 //
2295 void JM_append_rune(fz_buffer *buff, int ch);
2296
2297 //-----------------------------------------------------------------------------
2298 // Plain text output. An identical copy of fz_print_stext_page_as_text,
2299 // but lines within a block are concatenated by space instead a new-line
2300 // character (which else leads to 2 new-lines).
2301 //-----------------------------------------------------------------------------
2302 void JM_print_stext_page_as_text(mupdf::FzBuffer& res, mupdf::FzStextPage& page)
2303 {
2304 fz_rect rect = page.m_internal->mediabox;
2305
2306 for (auto block: page)
2307 {
2308 if (block.m_internal->type == FZ_STEXT_BLOCK_TEXT)
2309 {
2310 for (auto line: block)
2311 {
2312 int last_char = 0;
2313 for (auto ch: line)
2314 {
2315 fz_rect chbbox = JM_char_bbox( line, ch);
2316 if (mupdf::ll_fz_is_infinite_rect(rect)
2317 || JM_rects_overlap(rect, chbbox)
2318 )
2319 {
2320 last_char = ch.m_internal->c;
2321 JM_append_rune(res.m_internal, last_char);
2322 }
2323 }
2324 if (last_char != 10 && last_char > 0)
2325 {
2326 mupdf::ll_fz_append_string(res.m_internal, "\n");
2327 }
2328 }
2329 }
2330 }
2331 }
2332
2333
2334
2335 // path_type is one of:
2336 #define FILL_PATH 1
2337 #define STROKE_PATH 2
2338 #define CLIP_PATH 3
2339 #define CLIP_STROKE_PATH 4
2340
2341 // Every scissor of a clip is a sub rectangle of the preceding clip scissor if
2342 // the clip level is larger.
2343 static fz_rect compute_scissor(jm_lineart_device *dev)
2344 {
2345 PyObject *last_scissor = NULL;
2346 fz_rect scissor;
2347 if (!dev->scissors) {
2348 dev->scissors = PyList_New(0);
2349 }
2350 Py_ssize_t num_scissors = PyList_Size(dev->scissors);
2351 if (num_scissors > 0) {
2352 last_scissor = PyList_GET_ITEM(dev->scissors, num_scissors-1);
2353 scissor = JM_rect_from_py(last_scissor);
2354 scissor = fz_intersect_rect(scissor, dev->pathrect);
2355 } else {
2356 scissor = dev->pathrect;
2357 }
2358 LIST_APPEND_DROP(dev->scissors, JM_py_from_rect(scissor));
2359 return scissor;
2360 }
2361
2362
2363 /*
2364 --------------------------------------------------------------------------
2365 Check whether the last 4 lines represent a quad.
2366 Because of how we count, the lines are a polyline already, i.e. last point
2367 of a line equals 1st point of next line.
2368 So we check for a polygon (last line's end point equals start point).
2369 If not true we return 0.
2370 --------------------------------------------------------------------------
2371 */
2372 static int
2373 jm_checkquad(jm_lineart_device* dev)
2374 {
2375 PyObject *items = PyDict_GetItem(dev->pathdict, dictkey_items);
2376 Py_ssize_t i, len = PyList_Size(items);
2377 float f[8]; // coordinates of the 4 corners
2378 mupdf::FzPoint temp, lp; // line = (temp, lp)
2379 PyObject *rect;
2380 PyObject *line;
2381 // fill the 8 floats in f, start from items[-4:]
2382 for (i = 0; i < 4; i++) { // store line start points
2383 line = PyList_GET_ITEM(items, len - 4 + i);
2384 temp = JM_point_from_py(PyTuple_GET_ITEM(line, 1));
2385 f[i * 2] = temp.x;
2386 f[i * 2 + 1] = temp.y;
2387 lp = JM_point_from_py(PyTuple_GET_ITEM(line, 2));
2388 }
2389 if (lp.x != f[0] || lp.y != f[1]) {
2390 // not a polygon!
2391 //dev_linecount -= 1;
2392 return 0;
2393 }
2394
2395 // we have detected a quad
2396 dev->linecount = 0; // reset this
2397 // a quad item is ("qu", (ul, ur, ll, lr)), where the tuple items
2398 // are pairs of floats representing a quad corner each.
2399 rect = PyTuple_New(2);
2400 PyTuple_SET_ITEM(rect, 0, PyUnicode_FromString("qu"));
2401 /* ----------------------------------------------------
2402 * relationship of float array to quad points:
2403 * (0, 1) = ul, (2, 3) = ll, (6, 7) = ur, (4, 5) = lr
2404 ---------------------------------------------------- */
2405 fz_quad q = fz_make_quad(f[0], f[1], f[6], f[7], f[2], f[3], f[4], f[5]);
2406 PyTuple_SET_ITEM(rect, 1, JM_py_from_quad(q));
2407 PyList_SetItem(items, len - 4, rect); // replace item -4 by rect
2408 PyList_SetSlice(items, len - 3, len, NULL); // delete remaining 3 items
2409 return 1;
2410 }
2411
2412
2413 /*
2414 --------------------------------------------------------------------------
2415 Check whether the last 3 path items represent a rectangle.
2416 Line 1 and 3 must be horizontal, line 2 must be vertical.
2417 Returns 1 if we have modified the path, otherwise 0.
2418 --------------------------------------------------------------------------
2419 */
2420 static int
2421 jm_checkrect(jm_lineart_device* dev)
2422 {
2423 dev->linecount = 0; // reset line count
2424 long orientation = 0; // area orientation of rectangle
2425 mupdf::FzPoint ll, lr, ur, ul;
2426 mupdf::FzRect r;
2427 PyObject *rect;
2428 PyObject *line0, *line2;
2429 PyObject *items = PyDict_GetItem(dev->pathdict, dictkey_items);
2430 Py_ssize_t len = PyList_Size(items);
2431
2432 line0 = PyList_GET_ITEM(items, len - 3);
2433 ll = JM_point_from_py(PyTuple_GET_ITEM(line0, 1));
2434 lr = JM_point_from_py(PyTuple_GET_ITEM(line0, 2));
2435 // no need to extract "line1"!
2436 line2 = PyList_GET_ITEM(items, len - 1);
2437 ur = JM_point_from_py(PyTuple_GET_ITEM(line2, 1));
2438 ul = JM_point_from_py(PyTuple_GET_ITEM(line2, 2));
2439
2440 /*
2441 ---------------------------------------------------------------------
2442 Assumption:
2443 When decomposing rects, MuPDF always starts with a horizontal line,
2444 followed by a vertical line, followed by a horizontal line.
2445 First line: (ll, lr), third line: (ul, ur).
2446 If 1st line is below 3rd line, we record anti-clockwise (+1), else
2447 clockwise (-1) orientation.
2448 ---------------------------------------------------------------------
2449 */
2450 if (ll.y != lr.y ||
2451 ll.x != ul.x ||
2452 ur.y != ul.y ||
2453 ur.x != lr.x) {
2454 goto drop_out; // not a rectangle
2455 }
2456
2457 // we have a rect, replace last 3 "l" items by one "re" item.
2458 if (ul.y < lr.y) {
2459 r = fz_make_rect(ul.x, ul.y, lr.x, lr.y);
2460 orientation = 1;
2461 } else {
2462 r = fz_make_rect(ll.x, ll.y, ur.x, ur.y);
2463 orientation = -1;
2464 }
2465 rect = PyTuple_New(3);
2466 PyTuple_SET_ITEM(rect, 0, PyUnicode_FromString("re"));
2467 PyTuple_SET_ITEM(rect, 1, JM_py_from_rect(r));
2468 PyTuple_SET_ITEM(rect, 2, PyLong_FromLong(orientation));
2469 PyList_SetItem(items, len - 3, rect); // replace item -3 by rect
2470 PyList_SetSlice(items, len - 2, len, NULL); // delete remaining 2 items
2471 return 1;
2472 drop_out:;
2473 return 0;
2474 }
2475
2476 static PyObject *
2477 jm_lineart_color(fz_colorspace *colorspace, const float *color)
2478 {
2479 float rgb[3];
2480 if (colorspace) {
2481 mupdf::ll_fz_convert_color(colorspace, color, mupdf::ll_fz_device_rgb(),
2482 rgb, NULL, fz_default_color_params);
2483 return Py_BuildValue("fff", rgb[0], rgb[1], rgb[2]);
2484 }
2485 return PyTuple_New(0);
2486 }
2487
2488 static void
2489 trace_moveto(fz_context *ctx, void *dev_, float x, float y)
2490 {
2491 jm_lineart_device* dev = (jm_lineart_device*) dev_;
2492 dev->lastpoint = mupdf::ll_fz_transform_point(fz_make_point(x, y), dev->ctm);
2493 if (mupdf::ll_fz_is_infinite_rect(dev->pathrect))
2494 {
2495 dev->pathrect = mupdf::ll_fz_make_rect(
2496 dev->lastpoint.x,
2497 dev->lastpoint.y,
2498 dev->lastpoint.x,
2499 dev->lastpoint.y
2500 );
2501 }
2502 dev->firstpoint = dev->lastpoint;
2503 dev->havemove = 1;
2504 dev->linecount = 0; // reset # of consec. lines
2505 }
2506
2507 static void
2508 trace_lineto(fz_context *ctx, void *dev_, float x, float y)
2509 {
2510 jm_lineart_device* dev = (jm_lineart_device*) dev_;
2511 fz_point p1 = fz_transform_point(fz_make_point(x, y), dev->ctm);
2512 dev->pathrect = fz_include_point_in_rect(dev->pathrect, p1);
2513 PyObject *list = PyTuple_New(3);
2514 PyTuple_SET_ITEM(list, 0, PyUnicode_FromString("l"));
2515 PyTuple_SET_ITEM(list, 1, JM_py_from_point(dev->lastpoint));
2516 PyTuple_SET_ITEM(list, 2, JM_py_from_point(p1));
2517 dev->lastpoint = p1;
2518 PyObject *items = PyDict_GetItem(dev->pathdict, dictkey_items);
2519 LIST_APPEND_DROP(items, list);
2520 dev->linecount += 1; // counts consecutive lines
2521 if (dev->linecount == 4 && dev->path_type != FILL_PATH) { // shrink to "re" or "qu" item
2522 jm_checkquad(dev);
2523 }
2524 }
2525
2526 static void
2527 trace_curveto(fz_context *ctx, void *dev_, float x1, float y1, float x2, float y2, float x3, float y3)
2528 {
2529 jm_lineart_device* dev = (jm_lineart_device*) dev_;
2530 dev->linecount = 0; // reset # of consec. lines
2531 fz_point p1 = fz_make_point(x1, y1);
2532 fz_point p2 = fz_make_point(x2, y2);
2533 fz_point p3 = fz_make_point(x3, y3);
2534 p1 = fz_transform_point(p1, dev->ctm);
2535 p2 = fz_transform_point(p2, dev->ctm);
2536 p3 = fz_transform_point(p3, dev->ctm);
2537 dev->pathrect = fz_include_point_in_rect(dev->pathrect, p1);
2538 dev->pathrect = fz_include_point_in_rect(dev->pathrect, p2);
2539 dev->pathrect = fz_include_point_in_rect(dev->pathrect, p3);
2540
2541 PyObject *list = PyTuple_New(5);
2542 PyTuple_SET_ITEM(list, 0, PyUnicode_FromString("c"));
2543 PyTuple_SET_ITEM(list, 1, JM_py_from_point(dev->lastpoint));
2544 PyTuple_SET_ITEM(list, 2, JM_py_from_point(p1));
2545 PyTuple_SET_ITEM(list, 3, JM_py_from_point(p2));
2546 PyTuple_SET_ITEM(list, 4, JM_py_from_point(p3));
2547 dev->lastpoint = p3;
2548 PyObject *items = PyDict_GetItem(dev->pathdict, dictkey_items);
2549 LIST_APPEND_DROP(items, list);
2550 }
2551
2552 static void
2553 trace_close(fz_context *ctx, void *dev_)
2554 {
2555 jm_lineart_device* dev = (jm_lineart_device*) dev_;
2556 if (dev->linecount == 3) {
2557 if (jm_checkrect(dev)) {
2558 return;
2559 }
2560 }
2561 dev->linecount = 0; // reset # of consec. lines
2562 if (dev->havemove) {
2563 if (dev->firstpoint.x != dev->lastpoint.x || dev->firstpoint.y != dev->lastpoint.y) {
2564 PyObject *list = PyTuple_New(3);
2565 PyTuple_SET_ITEM(list, 0, PyUnicode_FromString("l"));
2566 PyTuple_SET_ITEM(list, 1, JM_py_from_point(dev->lastpoint));
2567 PyTuple_SET_ITEM(list, 2, JM_py_from_point(dev->firstpoint));
2568 dev->lastpoint = dev->firstpoint;
2569 PyObject *items = PyDict_GetItem(dev->pathdict, dictkey_items);
2570 LIST_APPEND_DROP(items, list);
2571 }
2572 dev->havemove = 0;
2573 DICT_SETITEMSTR_DROP(dev->pathdict, "closePath", JM_BOOL(0));
2574 } else {
2575 DICT_SETITEMSTR_DROP(dev->pathdict, "closePath", JM_BOOL(1));
2576 }
2577 }
2578
2579 static const fz_path_walker trace_path_walker =
2580 {
2581 trace_moveto,
2582 trace_lineto,
2583 trace_curveto,
2584 trace_close
2585 };
2586
2587 /*
2588 ---------------------------------------------------------------------
2589 Create the "items" list of the path dictionary
2590 * either create or empty the path dictionary
2591 * reset the end point of the path
2592 * reset count of consecutive lines
2593 * invoke fz_walk_path(), which create the single items
2594 * if no items detected, empty path dict again
2595 ---------------------------------------------------------------------
2596 */
2597 static void
2598 jm_lineart_path(jm_lineart_device *dev, const fz_path *path)
2599 {
2600 dev->pathrect = fz_infinite_rect;
2601 dev->linecount = 0;
2602 dev->lastpoint = fz_make_point(0, 0);
2603 dev->firstpoint = fz_make_point(0, 0);
2604 if (dev->pathdict) {
2605 Py_CLEAR(dev->pathdict);
2606 }
2607 dev->pathdict = PyDict_New();
2608 DICT_SETITEM_DROP(dev->pathdict, dictkey_items, PyList_New(0));
2609 mupdf::ll_fz_walk_path(path, &trace_path_walker, dev);
2610 // Check if any items were added ...
2611 if (!PyDict_GetItem(dev->pathdict, dictkey_items) || !PyList_Size(PyDict_GetItem(dev->pathdict, dictkey_items)))
2612 {
2613 Py_CLEAR(dev->pathdict);
2614 }
2615 }
2616
2617 //---------------------------------------------------------------------------
2618 // Append current path to list or merge into last path of the list.
2619 // (1) Append if first path, different item lists or not a 'stroke' version
2620 // of previous path
2621 // (2) If new path has the same items, merge its content into previous path
2622 // and change path["type"] to "fs".
2623 // (3) If "out" is callable, skip the previous and pass dictionary to it.
2624 //---------------------------------------------------------------------------
2625 static void
2626 // todo: remove `method` arg - it is dev->method.
2627 jm_append_merge(jm_lineart_device *dev)
2628 {
2629 Py_ssize_t len;
2630 int rc;
2631 PyObject *prev;
2632 PyObject *previtems;
2633 PyObject *thisitems;
2634 const char *thistype;
2635 const char *prevtype;
2636 if (PyCallable_Check(dev->out) || dev->method != Py_None) { // function or method
2637 goto callback;
2638 }
2639 len = PyList_Size(dev->out); // len of output list so far
2640 if (len == 0) { // always append first path
2641 goto append;
2642 }
2643 thistype = PyUnicode_AsUTF8(PyDict_GetItem(dev->pathdict, dictkey_type));
2644 if (strcmp(thistype, "s") != 0) { // if not stroke, then append
2645 goto append;
2646 }
2647 prev = PyList_GET_ITEM(dev->out, len - 1); // get prev path
2648 prevtype = PyUnicode_AsUTF8(PyDict_GetItem(prev, dictkey_type));
2649 if (strcmp(prevtype, "f") != 0) { // if previous not fill, append
2650 goto append;
2651 }
2652 // last check: there must be the same list of items for "f" and "s".
2653 previtems = PyDict_GetItem(prev, dictkey_items);
2654 thisitems = PyDict_GetItem(dev->pathdict, dictkey_items);
2655 if (PyObject_RichCompareBool(previtems, thisitems, Py_NE)) {
2656 goto append;
2657 }
2658 rc = PyDict_Merge(prev, dev->pathdict, 0); // merge, do not override
2659 if (rc == 0) {
2660 DICT_SETITEM_DROP(prev, dictkey_type, PyUnicode_FromString("fs"));
2661 goto postappend;
2662 } else {
2663 messagef("could not merge stroke and fill path");
2664 goto append;
2665 }
2666 append:;
2667 //printf("Appending to dev->out. len(dev->out)=%zi\n", PyList_Size(dev->out));
2668 PyList_Append(dev->out, dev->pathdict);
2669 postappend:;
2670 Py_CLEAR(dev->pathdict);
2671 return;
2672
2673 callback:; // callback function or method
2674 PyObject *resp = NULL;
2675 if (dev->method == Py_None) {
2676 resp = PyObject_CallFunctionObjArgs(dev->out, dev->pathdict, NULL);
2677 } else {
2678 resp = PyObject_CallMethodObjArgs(dev->out, dev->method, dev->pathdict, NULL);
2679 }
2680 if (resp) {
2681 Py_DECREF(resp);
2682 } else {
2683 messagef("calling cdrawings callback function/method failed!");
2684 PyErr_Clear();
2685 }
2686 Py_CLEAR(dev->pathdict);
2687 return;
2688 }
2689
2690 static void
2691 jm_lineart_fill_path(fz_context *ctx, fz_device *dev_, const fz_path *path,
2692 int even_odd, fz_matrix ctm, fz_colorspace *colorspace,
2693 const float *color, float alpha, fz_color_params color_params)
2694 {
2695 jm_lineart_device *dev = (jm_lineart_device *) dev_;
2696 //printf("extra.jm_lineart_fill_path(): dev->seqno=%zi\n", dev->seqno);
2697 dev->ctm = ctm; //fz_concat(ctm, trace_device_ptm);
2698 dev->path_type = FILL_PATH;
2699 jm_lineart_path(dev, path);
2700 if (!dev->pathdict) {
2701 return;
2702 }
2703 DICT_SETITEM_DROP(dev->pathdict, dictkey_type, PyUnicode_FromString("f"));
2704 DICT_SETITEMSTR_DROP(dev->pathdict, "even_odd", JM_BOOL(even_odd));
2705 DICT_SETITEMSTR_DROP(dev->pathdict, "fill_opacity", Py_BuildValue("f", alpha));
2706 DICT_SETITEMSTR_DROP(dev->pathdict, "fill", jm_lineart_color(colorspace, color));
2707 DICT_SETITEM_DROP(dev->pathdict, dictkey_rect, JM_py_from_rect(dev->pathrect));
2708 DICT_SETITEMSTR_DROP(dev->pathdict, "seqno", PyLong_FromSize_t(dev->seqno));
2709 DICT_SETITEMSTR_DROP(dev->pathdict, "layer", JM_UnicodeFromStr(dev->layer_name));
2710 if (dev->clips) {
2711 DICT_SETITEMSTR_DROP(dev->pathdict, "level", PyLong_FromLong(dev->depth));
2712 }
2713 jm_append_merge(dev);
2714 dev->seqno += 1;
2715 }
2716
2717 static void
2718 jm_lineart_stroke_path(fz_context *ctx, fz_device *dev_, const fz_path *path,
2719 const fz_stroke_state *stroke, fz_matrix ctm,
2720 fz_colorspace *colorspace, const float *color, float alpha,
2721 fz_color_params color_params)
2722 {
2723 jm_lineart_device *dev = (jm_lineart_device *)dev_;
2724 //printf("extra.jm_lineart_stroke_path(): dev->seqno=%zi\n", dev->seqno);
2725 int i;
2726 dev->pathfactor = 1;
2727 if (ctm.a != 0 && fz_abs(ctm.a) == fz_abs(ctm.d)) {
2728 dev->pathfactor = fz_abs(ctm.a);
2729 } else {
2730 if (ctm.b != 0 && fz_abs(ctm.b) == fz_abs(ctm.c)) {
2731 dev->pathfactor = fz_abs(ctm.b);
2732 }
2733 }
2734 dev->ctm = ctm; // fz_concat(ctm, trace_device_ptm);
2735 dev->path_type = STROKE_PATH;
2736
2737 jm_lineart_path(dev, path);
2738 if (!dev->pathdict) {
2739 return;
2740 }
2741 DICT_SETITEM_DROP(dev->pathdict, dictkey_type, PyUnicode_FromString("s"));
2742 DICT_SETITEMSTR_DROP(dev->pathdict, "stroke_opacity", Py_BuildValue("f", alpha));
2743 DICT_SETITEMSTR_DROP(dev->pathdict, "color", jm_lineart_color(colorspace, color));
2744 DICT_SETITEM_DROP(dev->pathdict, dictkey_width, Py_BuildValue("f", dev->pathfactor * stroke->linewidth));
2745 DICT_SETITEMSTR_DROP(dev->pathdict, "lineCap", Py_BuildValue("iii", stroke->start_cap, stroke->dash_cap, stroke->end_cap));
2746 DICT_SETITEMSTR_DROP(dev->pathdict, "lineJoin", Py_BuildValue("f", dev->pathfactor * stroke->linejoin));
2747 if (!PyDict_GetItemString(dev->pathdict, "closePath")) {
2748 DICT_SETITEMSTR_DROP(dev->pathdict, "closePath", JM_BOOL(0));
2749 }
2750
2751 // output the "dashes" string
2752 if (stroke->dash_len) {
2753 mupdf::FzBuffer buff(256);
2754 mupdf::fz_append_string(buff, "[ "); // left bracket
2755 for (i = 0; i < stroke->dash_len; i++) {
2756 fz_append_printf(ctx, buff.m_internal, "%g ", dev->pathfactor * stroke->dash_list[i]);
2757 }
2758 fz_append_printf(ctx, buff.m_internal, "] %g", dev->pathfactor * stroke->dash_phase);
2759 DICT_SETITEMSTR_DROP(dev->pathdict, "dashes", JM_EscapeStrFromBuffer(buff));
2760 } else {
2761 DICT_SETITEMSTR_DROP(dev->pathdict, "dashes", PyUnicode_FromString("[] 0"));
2762 }
2763
2764 DICT_SETITEM_DROP(dev->pathdict, dictkey_rect, JM_py_from_rect(dev->pathrect));
2765 DICT_SETITEMSTR_DROP(dev->pathdict, "layer", JM_UnicodeFromStr(dev->layer_name));
2766 DICT_SETITEMSTR_DROP(dev->pathdict, "seqno", PyLong_FromSize_t(dev->seqno));
2767 if (dev->clips) {
2768 DICT_SETITEMSTR_DROP(dev->pathdict, "level", PyLong_FromLong(dev->depth));
2769 }
2770 // output the dict - potentially merging it with a previous fill_path twin
2771 jm_append_merge(dev);
2772 dev->seqno += 1;
2773 }
2774
2775 static void
2776 jm_lineart_clip_path(fz_context *ctx, fz_device *dev_, const fz_path *path, int even_odd, fz_matrix ctm, fz_rect scissor)
2777 {
2778 jm_lineart_device *dev = (jm_lineart_device *)dev_;
2779 if (!dev->clips) return;
2780 dev->ctm = ctm; //fz_concat(ctm, trace_device_ptm);
2781 dev->path_type = CLIP_PATH;
2782 jm_lineart_path(dev, path);
2783 if (!dev->pathdict) {
2784 return;
2785 }
2786 DICT_SETITEM_DROP(dev->pathdict, dictkey_type, PyUnicode_FromString("clip"));
2787 DICT_SETITEMSTR_DROP(dev->pathdict, "even_odd", JM_BOOL(even_odd));
2788 if (!PyDict_GetItemString(dev->pathdict, "closePath")) {
2789 DICT_SETITEMSTR_DROP(dev->pathdict, "closePath", JM_BOOL(0));
2790 }
2791 DICT_SETITEMSTR_DROP(dev->pathdict, "scissor", JM_py_from_rect(compute_scissor(dev)));
2792 DICT_SETITEMSTR_DROP(dev->pathdict, "level", PyLong_FromLong(dev->depth));
2793 DICT_SETITEMSTR_DROP(dev->pathdict, "layer", JM_UnicodeFromStr(dev->layer_name));
2794 jm_append_merge(dev);
2795 dev->depth++;
2796 }
2797
2798 static void
2799 jm_lineart_clip_stroke_path(fz_context *ctx, fz_device *dev_, const fz_path *path, const fz_stroke_state *stroke, fz_matrix ctm, fz_rect scissor)
2800 {
2801 jm_lineart_device *dev = (jm_lineart_device *)dev_;
2802 if (!dev->clips) return;
2803 dev->ctm = ctm; //fz_concat(ctm, trace_device_ptm);
2804 dev->path_type = CLIP_STROKE_PATH;
2805 jm_lineart_path(dev, path);
2806 if (!dev->pathdict) {
2807 return;
2808 }
2809 DICT_SETITEM_DROP(dev->pathdict, dictkey_type, PyUnicode_FromString("clip"));
2810 DICT_SETITEMSTR_DROP(dev->pathdict, "even_odd", Py_BuildValue("s", NULL));
2811 if (!PyDict_GetItemString(dev->pathdict, "closePath")) {
2812 DICT_SETITEMSTR_DROP(dev->pathdict, "closePath", JM_BOOL(0));
2813 }
2814 DICT_SETITEMSTR_DROP(dev->pathdict, "scissor", JM_py_from_rect(compute_scissor(dev)));
2815 DICT_SETITEMSTR_DROP(dev->pathdict, "level", PyLong_FromLong(dev->depth));
2816 DICT_SETITEMSTR_DROP(dev->pathdict, "layer", JM_UnicodeFromStr(dev->layer_name));
2817 jm_append_merge(dev);
2818 dev->depth++;
2819 }
2820
2821
2822 static void
2823 jm_lineart_clip_stroke_text(fz_context *ctx, fz_device *dev_, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm, fz_rect scissor)
2824 {
2825 jm_lineart_device *dev = (jm_lineart_device *)dev_;
2826 if (!dev->clips) return;
2827 compute_scissor(dev);
2828 dev->depth++;
2829 }
2830
2831 static void
2832 jm_lineart_clip_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm, fz_rect scissor)
2833 {
2834 jm_lineart_device *dev = (jm_lineart_device *)dev_;
2835 if (!dev->clips) return;
2836 compute_scissor(dev);
2837 dev->depth++;
2838 }
2839
2840 static void
2841 jm_lineart_clip_image_mask(fz_context *ctx, fz_device *dev_, fz_image *image, fz_matrix ctm, fz_rect scissor)
2842 {
2843 jm_lineart_device *dev = (jm_lineart_device *)dev_;
2844 if (!dev->clips) return;
2845 compute_scissor(dev);
2846 dev->depth++;
2847 }
2848
2849 static void
2850 jm_lineart_pop_clip(fz_context *ctx, fz_device *dev_)
2851 {
2852 jm_lineart_device *dev = (jm_lineart_device *)dev_;
2853 if (!dev->clips) return;
2854 if (!dev->scissors) return;
2855 Py_ssize_t len = PyList_Size(dev->scissors);
2856 if (len < 1) return;
2857 PyList_SetSlice(dev->scissors, len - 1, len, NULL);
2858 dev->depth--;
2859 }
2860
2861
2862 static void
2863 jm_lineart_begin_group(fz_context *ctx, fz_device *dev_, fz_rect bbox, fz_colorspace *cs, int isolated, int knockout, int blendmode, float alpha)
2864 {
2865 jm_lineart_device *dev = (jm_lineart_device *)dev_;
2866 if (!dev->clips) return;
2867 dev->pathdict = Py_BuildValue("{s:s,s:N,s:N,s:N,s:s,s:f,s:i,s:N}",
2868 "type", "group",
2869 "rect", JM_py_from_rect(bbox),
2870 "isolated", JM_BOOL(isolated),
2871 "knockout", JM_BOOL(knockout),
2872 "blendmode", fz_blendmode_name(blendmode),
2873 "opacity", alpha,
2874 "level", dev->depth,
2875 "layer", JM_UnicodeFromStr(dev->layer_name)
2876 );
2877 jm_append_merge(dev);
2878 dev->depth++;
2879 }
2880
2881 static void
2882 jm_lineart_end_group(fz_context *ctx, fz_device *dev_)
2883 {
2884 jm_lineart_device *dev = (jm_lineart_device *)dev_;
2885 if (!dev->clips) return;
2886 dev->depth--;
2887 }
2888
2889 static void jm_lineart_fill_text(fz_context *ctx, fz_device *dev, const fz_text *, fz_matrix, fz_colorspace *, const float *color, float alpha, fz_color_params)
2890 {
2891 jm_increase_seqno(ctx, dev);
2892 }
2893
2894 static void jm_lineart_stroke_text(fz_context *ctx, fz_device *dev, const fz_text *, const fz_stroke_state *, fz_matrix, fz_colorspace *, const float *color, float alpha, fz_color_params)
2895 {
2896 jm_increase_seqno(ctx, dev);
2897 }
2898
2899 static void jm_lineart_fill_shade(fz_context *ctx, fz_device *dev, fz_shade *shd, fz_matrix ctm, float alpha, fz_color_params color_params)
2900 {
2901 jm_increase_seqno(ctx, dev);
2902 }
2903
2904 static void jm_lineart_fill_image(fz_context *ctx, fz_device *dev, fz_image *img, fz_matrix ctm, float alpha, fz_color_params color_params)
2905 {
2906 jm_increase_seqno(ctx, dev);
2907 }
2908
2909 static void jm_lineart_fill_image_mask(fz_context *ctx, fz_device *dev, fz_image *img, fz_matrix ctm, fz_colorspace *, const float *color, float alpha, fz_color_params color_params)
2910 {
2911 jm_increase_seqno(ctx, dev);
2912 }
2913
2914 static void jm_lineart_ignore_text(fz_context *ctx, fz_device *dev, const fz_text *, fz_matrix)
2915 {
2916 jm_increase_seqno(ctx, dev);
2917 }
2918
2919
2920 //-------------------------------------------------------------------
2921 // LINEART device for Python method Page.get_cdrawings()
2922 //-------------------------------------------------------------------
2923 mupdf::FzDevice JM_new_lineart_device(PyObject *out, int clips, PyObject *method)
2924 {
2925 //printf("extra.JM_new_lineart_device()\n");
2926 jm_lineart_device* dev = (jm_lineart_device*) mupdf::ll_fz_new_device_of_size(sizeof(jm_lineart_device));
2927
2928 dev->super.close_device = NULL;
2929 dev->super.drop_device = jm_lineart_drop_device;
2930 dev->super.fill_path = jm_lineart_fill_path;
2931 dev->super.stroke_path = jm_lineart_stroke_path;
2932 dev->super.clip_path = jm_lineart_clip_path;
2933 dev->super.clip_stroke_path = jm_lineart_clip_stroke_path;
2934
2935 dev->super.fill_text = jm_lineart_fill_text;
2936 dev->super.stroke_text = jm_lineart_stroke_text;
2937 dev->super.clip_text = jm_lineart_clip_text;
2938 dev->super.clip_stroke_text = jm_lineart_clip_stroke_text;
2939 dev->super.ignore_text = jm_lineart_ignore_text;
2940
2941 dev->super.fill_shade = jm_lineart_fill_shade;
2942 dev->super.fill_image = jm_lineart_fill_image;
2943 dev->super.fill_image_mask = jm_lineart_fill_image_mask;
2944 dev->super.clip_image_mask = jm_lineart_clip_image_mask;
2945
2946 dev->super.pop_clip = jm_lineart_pop_clip;
2947
2948 dev->super.begin_mask = NULL;
2949 dev->super.end_mask = NULL;
2950 dev->super.begin_group = jm_lineart_begin_group;
2951 dev->super.end_group = jm_lineart_end_group;
2952
2953 dev->super.begin_tile = NULL;
2954 dev->super.end_tile = NULL;
2955
2956 dev->super.begin_layer = jm_lineart_begin_layer;
2957 dev->super.end_layer = jm_lineart_end_layer;
2958
2959 dev->super.begin_structure = NULL;
2960 dev->super.end_structure = NULL;
2961
2962 dev->super.begin_metatext = NULL;
2963 dev->super.end_metatext = NULL;
2964
2965 dev->super.render_flags = NULL;
2966 dev->super.set_default_colorspaces = NULL;
2967
2968 if (PyList_Check(out)) {
2969 Py_INCREF(out);
2970 }
2971 Py_INCREF(method);
2972 dev->out = out;
2973 dev->seqno = 0;
2974 dev->depth = 0;
2975 dev->clips = clips;
2976 dev->method = method;
2977 dev->pathdict = nullptr;
2978
2979 return mupdf::FzDevice(&dev->super);
2980 }
2981
2982 PyObject* get_cdrawings(mupdf::FzPage& page, PyObject *extended=NULL, PyObject *callback=NULL, PyObject *method=NULL)
2983 {
2984 //fz_page *page = (fz_page *) $self;
2985 //fz_device *dev = NULL;
2986 PyObject *rc = NULL;
2987 int clips = PyObject_IsTrue(extended);
2988
2989 mupdf::FzDevice dev;
2990 if (PyCallable_Check(callback) || method != Py_None) {
2991 dev = JM_new_lineart_device(callback, clips, method);
2992 } else {
2993 rc = PyList_New(0);
2994 dev = JM_new_lineart_device(rc, clips, method);
2995 }
2996 mupdf::FzRect prect = mupdf::fz_bound_page(page);
2997 ((jm_lineart_device*) dev.m_internal)->ptm = mupdf::ll_fz_make_matrix(1, 0, 0, -1, 0, prect.y1);
2998
2999 mupdf::FzCookie cookie;
3000 mupdf::FzMatrix identity;
3001 mupdf::fz_run_page( page, dev, *identity.internal(), cookie);
3002 mupdf::fz_close_device( dev);
3003 if (PyCallable_Check(callback) || method != Py_None)
3004 {
3005 Py_RETURN_NONE;
3006 }
3007 return rc;
3008 }
3009
3010
3011 //---------------------------------------------------------------------------
3012 // APPEND non-ascii runes in unicode escape format to fz_buffer
3013 //---------------------------------------------------------------------------
3014 void JM_append_rune(fz_buffer *buff, int ch)
3015 {
3016 char text[32];
3017 if (ch == 92) // prevent accidental "\u", "\U" sequences
3018 {
3019 mupdf::ll_fz_append_string(buff, "\\u005c");
3020 }
3021 else if ((ch >= 32 && ch <= 127) || ch == 10)
3022 {
3023 mupdf::ll_fz_append_byte(buff, ch);
3024 }
3025 else if (ch >= 0xd800 && ch <= 0xdfff) // orphaned surrogate Unicodes
3026 {
3027 mupdf::ll_fz_append_string(buff, "\\ufffd");
3028 }
3029 else if (ch <= 0xffff)
3030 {
3031 // 4 hex digits
3032 snprintf(text, sizeof(text), "\\u%04x", ch);
3033 mupdf::ll_fz_append_string(buff, text);
3034 }
3035 else
3036 {
3037 // 8 hex digits
3038 snprintf(text, sizeof(text), "\\U%08x", ch);
3039 mupdf::ll_fz_append_string(buff, text);
3040 }
3041 }
3042
3043
3044 mupdf::FzRect JM_make_spanlist(
3045 PyObject *line_dict,
3046 mupdf::FzStextLine& line,
3047 int raw,
3048 mupdf::FzBuffer& buff,
3049 mupdf::FzRect& tp_rect
3050 )
3051 {
3052 PyObject *span = NULL, *char_list = NULL, *char_dict;
3053 PyObject *span_list = PyList_New(0);
3054 mupdf::fz_clear_buffer(buff);
3055 fz_rect span_rect = fz_empty_rect;
3056 fz_rect line_rect = fz_empty_rect;
3057 fz_point span_origin = {0, 0};
3058 struct char_style
3059 {
3060 float size = -1;
3061 unsigned flags = 0;
3062
3063 #if MUPDF_VERSION_GE(1, 25, 2)
3064 /* From mupdf:include/mupdf/fitz/structured-text.h:fz_stext_char::flags, which
3065 uses anonymous enum values:
3066 FZ_STEXT_STRIKEOUT = 1,
3067 FZ_STEXT_UNDERLINE = 2,
3068 FZ_STEXT_SYNTHETIC = 4,
3069 FZ_STEXT_FILLED = 16,
3070 FZ_STEXT_STROKED = 32,
3071 FZ_STEXT_CLIPPED = 64
3072 */
3073 unsigned char_flags = 0;
3074 #endif
3075
3076 const char *font = "";
3077 unsigned argb = 0;
3078 float asc = 0;
3079 float desc = 0;
3080 uint16_t bidi = 0;
3081 };
3082 char_style old_style;
3083 char_style style;
3084
3085 for (mupdf::FzStextChar ch: line)
3086 {
3087 fz_rect r = JM_char_bbox(line, ch);
3088 if (!JM_rects_overlap(*tp_rect.internal(), r) && !fz_is_infinite_rect(tp_rect))
3089 {
3090 continue;
3091 }
3092 /* Info from:
3093 detect_super_script()
3094 fz_font_is_italic()
3095 fz_font_is_serif()
3096 fz_font_is_monospaced()
3097 fz_font_is_bold()
3098 */
3099 int flags = JM_char_font_flags( ch.m_internal->font, line.m_internal, ch.m_internal);
3100 fz_point origin = ch.m_internal->origin;
3101 style.size = ch.m_internal->size;
3102 style.flags = flags;
3103 #if MUPDF_VERSION_GE(1, 25, 2)
3104 /* FZ_STEXT_SYNTHETIC is per-char, not per-span. */
3105 style.char_flags = ch.m_internal->flags & ~FZ_STEXT_SYNTHETIC;
3106 #endif
3107 style.font = JM_font_name(ch.m_internal->font);
3108 #if MUPDF_VERSION_GE(1, 25, 0)
3109 style.argb = ch.m_internal->argb;
3110 #else
3111 style.argb = ch.m_internal->color;
3112 #endif
3113 style.asc = JM_font_ascender(ch.m_internal->font);
3114 style.desc = JM_font_descender(ch.m_internal->font);
3115
3116 if (0
3117 || style.size != old_style.size
3118 || style.flags != old_style.flags
3119 #if MUPDF_VERSION_GE(1, 25, 2)
3120 || style.char_flags != old_style.char_flags
3121 #endif
3122 || style.argb != old_style.argb
3123 || strcmp(style.font, old_style.font) != 0
3124 || style.bidi != old_style.bidi
3125 )
3126 {
3127 if (old_style.size >= 0)
3128 {
3129 // not first one, output previous
3130 if (raw)
3131 {
3132 // put character list in the span
3133 DICT_SETITEM_DROP(span, dictkey_chars, char_list);
3134 char_list = NULL;
3135 }
3136 else
3137 {
3138 // put text string in the span
3139 DICT_SETITEM_DROP(span, dictkey_text, JM_EscapeStrFromBuffer(buff));
3140 mupdf::fz_clear_buffer(buff);
3141 }
3142
3143 DICT_SETITEM_DROP(span, dictkey_origin, JM_py_from_point(span_origin));
3144 DICT_SETITEM_DROP(span, dictkey_bbox, JM_py_from_rect(span_rect));
3145 line_rect = mupdf::ll_fz_union_rect(line_rect, span_rect);
3146 LIST_APPEND_DROP(span_list, span);
3147 span = NULL;
3148 }
3149
3150 span = PyDict_New();
3151 float asc = style.asc, desc = style.desc;
3152 if (style.asc < 1e-3)
3153 {
3154 asc = 0.9f;
3155 desc = -0.1f;
3156 }
3157
3158 DICT_SETITEM_DROP(span, dictkey_size, Py_BuildValue("f", style.size));
3159 DICT_SETITEM_DROP(span, dictkey_flags, Py_BuildValue("I", style.flags));
3160 DICT_SETITEM_DROP(span, dictkey_bidi, Py_BuildValue("I", style.bidi));
3161 #if MUPDF_VERSION_GE(1, 25, 2)
3162 DICT_SETITEM_DROP(span, dictkey_char_flags, Py_BuildValue("I", style.char_flags));
3163 #endif
3164 DICT_SETITEM_DROP(span, dictkey_font, JM_EscapeStrFromStr(style.font));
3165 DICT_SETITEM_DROP(span, dictkey_color, Py_BuildValue("I", style.argb & 0xffffff));
3166 #if MUPDF_VERSION_GE(1, 25, 0)
3167 DICT_SETITEMSTR_DROP(span, "alpha", Py_BuildValue("I", style.argb >> 24));
3168 #endif
3169 DICT_SETITEMSTR_DROP(span, "ascender", Py_BuildValue("f", asc));
3170 DICT_SETITEMSTR_DROP(span, "descender", Py_BuildValue("f", desc));
3171
3172 old_style = style;
3173 span_rect = r;
3174 span_origin = origin;
3175
3176 }
3177 span_rect = mupdf::ll_fz_union_rect(span_rect, r);
3178
3179 if (raw)
3180 {
3181 // make and append a char dict
3182 char_dict = PyDict_New();
3183 DICT_SETITEM_DROP(char_dict, dictkey_origin, JM_py_from_point(ch.m_internal->origin));
3184
3185 DICT_SETITEM_DROP(char_dict, dictkey_bbox, JM_py_from_rect(r));
3186
3187 DICT_SETITEM_DROP(char_dict, dictkey_c, Py_BuildValue("C", ch.m_internal->c));
3188 DICT_SETITEMSTR_DROP(char_dict, "synthetic", Py_BuildValue("O", (ch.m_internal->flags & FZ_STEXT_SYNTHETIC) ? Py_True : Py_False));
3189 if (!char_list)
3190 {
3191 char_list = PyList_New(0);
3192 }
3193 LIST_APPEND_DROP(char_list, char_dict);
3194 }
3195 else
3196 {
3197 // add character byte to buffer
3198 JM_append_rune(buff.m_internal, ch.m_internal->c);
3199 }
3200 }
3201 // all characters processed, now flush remaining span
3202 if (span)
3203 {
3204 if (raw)
3205 {
3206 DICT_SETITEM_DROP(span, dictkey_chars, char_list);
3207 char_list = NULL;
3208 }
3209 else
3210 {
3211 DICT_SETITEM_DROP(span, dictkey_text, JM_EscapeStrFromBuffer(buff));
3212 mupdf::fz_clear_buffer(buff);
3213 }
3214 DICT_SETITEM_DROP(span, dictkey_origin, JM_py_from_point(span_origin));
3215 DICT_SETITEM_DROP(span, dictkey_bbox, JM_py_from_rect(span_rect));
3216
3217 if (!fz_is_empty_rect(span_rect))
3218 {
3219 LIST_APPEND_DROP(span_list, span);
3220 line_rect = fz_union_rect(line_rect, span_rect);
3221 }
3222 else
3223 {
3224 Py_DECREF(span);
3225 }
3226 span = NULL;
3227 }
3228 if (!mupdf::fz_is_empty_rect(line_rect))
3229 {
3230 DICT_SETITEM_DROP(line_dict, dictkey_spans, span_list);
3231 }
3232 else
3233 {
3234 DICT_SETITEM_DROP(line_dict, dictkey_spans, span_list);
3235 }
3236 return line_rect;
3237 }
3238
3239 //-----------------------------------------------------------------------------
3240 // Functions for wordlist output
3241 //-----------------------------------------------------------------------------
3242 int JM_append_word(
3243 PyObject* lines,
3244 fz_buffer* buff,
3245 fz_rect* wbbox,
3246 int block_n,
3247 int line_n,
3248 int word_n
3249 )
3250 {
3251 PyObject* s = JM_EscapeStrFromBuffer(buff);
3252 PyObject* litem = Py_BuildValue(
3253 "ffffOiii",
3254 wbbox->x0,
3255 wbbox->y0,
3256 wbbox->x1,
3257 wbbox->y1,
3258 s,
3259 block_n,
3260 line_n,
3261 word_n
3262 );
3263 LIST_APPEND_DROP(lines, litem);
3264 Py_DECREF(s);
3265 *wbbox = fz_empty_rect;
3266 return word_n + 1; // word counter
3267 }
3268
3269 PyObject* extractWORDS(mupdf::FzStextPage& this_tpage, PyObject *delimiters)
3270 {
3271 int block_n = -1;
3272 fz_rect wbbox = fz_empty_rect; // word bbox
3273 fz_rect tp_rect = this_tpage.m_internal->mediabox;
3274
3275 PyObject *lines = NULL;
3276 mupdf::FzBuffer buff = mupdf::fz_new_buffer(64);
3277 lines = PyList_New(0);
3278 for (mupdf::FzStextBlock block: this_tpage)
3279 {
3280 block_n++;
3281 if (block.m_internal->type != FZ_STEXT_BLOCK_TEXT)
3282 {
3283 continue;
3284 }
3285 int line_n = -1;
3286 for (mupdf::FzStextLine line: block)
3287 {
3288 line_n++;
3289 int word_n = 0; // word counter per line
3290 mupdf::fz_clear_buffer(buff); // reset word buffer
3291 size_t buflen = 0; // reset char counter
3292 int last_char_rtl = 0; // was last character RTL?
3293 for (mupdf::FzStextChar ch: line)
3294 {
3295 mupdf::FzRect cbbox = JM_char_bbox(line, ch);
3296 if (!JM_rects_overlap(tp_rect, *cbbox.internal()) && !fz_is_infinite_rect(tp_rect))
3297 {
3298 continue;
3299 }
3300
3301 int word_delimiter = JM_is_word_delimiter(ch.m_internal->c, delimiters);
3302 int this_char_rtl = JM_is_rtl_char(ch.m_internal->c);
3303 if (word_delimiter || this_char_rtl != last_char_rtl)
3304 {
3305 if (buflen == 0 && word_delimiter)
3306 {
3307 continue; // skip delimiters at line start
3308 }
3309 if (!fz_is_empty_rect(wbbox))
3310 {
3311 word_n = JM_append_word(
3312 lines,
3313 buff.m_internal,
3314 &wbbox,
3315 block_n,
3316 line_n,
3317 word_n
3318 );
3319 }
3320 mupdf::fz_clear_buffer(buff);
3321 buflen = 0; // reset char counter
3322 if (word_delimiter) continue;
3323 }
3324 // append one unicode character to the word
3325 JM_append_rune(buff.m_internal, ch.m_internal->c);
3326 last_char_rtl = this_char_rtl;
3327 buflen++;
3328 // enlarge word bbox
3329 wbbox = fz_union_rect(wbbox, JM_char_bbox(line, ch));
3330 }
3331 if (buflen && !fz_is_empty_rect(wbbox))
3332 {
3333 word_n = JM_append_word(
3334 lines,
3335 buff.m_internal,
3336 &wbbox,
3337 block_n,
3338 line_n,
3339 word_n
3340 );
3341 }
3342 mupdf::fz_clear_buffer(buff);
3343 buflen = 0;
3344 }
3345 }
3346 return lines;
3347 }
3348
3349
3350
3351 struct ScopedPyObject
3352 /* PyObject* wrapper, destructor calls Py_CLEAR() unless `release()` has been
3353 called. */
3354 {
3355 ScopedPyObject(PyObject* rhs=nullptr)
3356 :
3357 m_pyobject(rhs)
3358 {}
3359
3360 PyObject*& get()
3361 {
3362 return m_pyobject;
3363 }
3364
3365 ScopedPyObject& operator= (PyObject* rhs)
3366 {
3367 Py_CLEAR(m_pyobject);
3368 m_pyobject = rhs;
3369 return *this;
3370 }
3371
3372 PyObject* release()
3373 {
3374 PyObject* ret = m_pyobject;
3375 m_pyobject = nullptr;
3376 return ret;
3377 }
3378 ~ScopedPyObject()
3379 {
3380 Py_CLEAR(m_pyobject);
3381 }
3382
3383 PyObject* m_pyobject = nullptr;
3384 };
3385
3386
3387 PyObject* extractBLOCKS(mupdf::FzStextPage& self)
3388 {
3389 fz_stext_page *this_tpage = self.m_internal;
3390 fz_rect tp_rect = this_tpage->mediabox;
3391 mupdf::FzBuffer res(1024);
3392 ScopedPyObject lines( PyList_New(0));
3393 int block_n = -1;
3394 for (fz_stext_block* block = this_tpage->first_block; block; block = block->next)
3395 {
3396 ScopedPyObject text;
3397 block_n++;
3398 fz_rect blockrect = fz_empty_rect;
3399 if (block->type == FZ_STEXT_BLOCK_TEXT)
3400 {
3401 mupdf::fz_clear_buffer(res); // set text buffer to empty
3402 int line_n = -1;
3403 int last_char = 0;
3404 (void) line_n; /* Not actually used, but keeping in the code for now. */
3405 for (fz_stext_line* line = block->u.t.first_line; line; line = line->next)
3406 {
3407 line_n++;
3408 fz_rect linerect = fz_empty_rect;
3409 for (fz_stext_char* ch = line->first_char; ch; ch = ch->next)
3410 {
3411 fz_rect cbbox = JM_char_bbox(line, ch);
3412 if (!JM_rects_overlap(tp_rect, cbbox) && !fz_is_infinite_rect(tp_rect))
3413 {
3414 continue;
3415 }
3416 JM_append_rune(res.m_internal, ch->c);
3417 last_char = ch->c;
3418 linerect = fz_union_rect(linerect, cbbox);
3419 }
3420 if (last_char != 10 && !fz_is_empty_rect(linerect))
3421 {
3422 mupdf::fz_append_byte(res, 10);
3423 }
3424 blockrect = fz_union_rect(blockrect, linerect);
3425 }
3426 text = JM_EscapeStrFromBuffer(res);
3427 }
3428 else if (JM_rects_overlap(tp_rect, block->bbox) || fz_is_infinite_rect(tp_rect))
3429 {
3430 fz_image *img = block->u.i.image;
3431 fz_colorspace *cs = img->colorspace;
3432 text = PyUnicode_FromFormat(
3433 "<image: %s, width: %d, height: %d, bpc: %d>",
3434 mupdf::ll_fz_colorspace_name(cs),
3435 img->w,
3436 img->h,
3437 img->bpc
3438 );
3439 blockrect = fz_union_rect(blockrect, block->bbox);
3440 }
3441 if (!fz_is_empty_rect(blockrect))
3442 {
3443 ScopedPyObject litem = PyTuple_New(7);
3444 PyTuple_SET_ITEM(litem.get(), 0, Py_BuildValue("f", blockrect.x0));
3445 PyTuple_SET_ITEM(litem.get(), 1, Py_BuildValue("f", blockrect.y0));
3446 PyTuple_SET_ITEM(litem.get(), 2, Py_BuildValue("f", blockrect.x1));
3447 PyTuple_SET_ITEM(litem.get(), 3, Py_BuildValue("f", blockrect.y1));
3448 PyTuple_SET_ITEM(litem.get(), 4, Py_BuildValue("O", text.get()));
3449 PyTuple_SET_ITEM(litem.get(), 5, Py_BuildValue("i", block_n));
3450 PyTuple_SET_ITEM(litem.get(), 6, Py_BuildValue("i", block->type));
3451 LIST_APPEND(lines.get(), litem.get());
3452 }
3453 }
3454 return lines.release();
3455 }
3456
3457 #define EMPTY_STRING PyUnicode_FromString("")
3458
3459 static PyObject *JM_UnicodeFromStr(const char *c)
3460 {
3461 if (!c) return EMPTY_STRING;
3462 PyObject *val = Py_BuildValue("s", c);
3463 if (!val) {
3464 val = EMPTY_STRING;
3465 PyErr_Clear();
3466 }
3467 return val;
3468 }
3469
3470 PyObject* link_uri(mupdf::FzLink& link)
3471 {
3472 return JM_UnicodeFromStr( link.m_internal->uri);
3473 }
3474
3475 fz_stext_page* page_get_textpage(
3476 mupdf::FzPage& self,
3477 PyObject* clip,
3478 int flags,
3479 PyObject* matrix
3480 )
3481 {
3482 fz_context* ctx = mupdf::internal_context_get();
3483 fz_stext_page *tpage=NULL;
3484 fz_page *page = self.m_internal;
3485 fz_device *dev = NULL;
3486 fz_stext_options options;
3487 memset(&options, 0, sizeof options);
3488 options.flags = flags;
3489 fz_try(ctx) {
3490 // Default to page's rect if `clip` not specified, for #2048.
3491 fz_rect rect = (clip==Py_None) ? fz_bound_page(ctx, page) : JM_rect_from_py(clip);
3492 fz_matrix ctm = JM_matrix_from_py(matrix);
3493 tpage = fz_new_stext_page(ctx, rect);
3494 dev = fz_new_stext_device(ctx, tpage, &options);
3495 fz_run_page(ctx, page, dev, ctm, NULL);
3496 fz_close_device(ctx, dev);
3497 }
3498 fz_always(ctx) {
3499 fz_drop_device(ctx, dev);
3500 }
3501 fz_catch(ctx) {
3502 mupdf::internal_throw_exception(ctx);
3503 }
3504 return tpage;
3505 }
3506
3507 // return extension for pymupdf image type
3508 const char *JM_image_extension(int type)
3509 {
3510 switch (type) {
3511 case(FZ_IMAGE_RAW): return "raw";
3512 case(FZ_IMAGE_FLATE): return "flate";
3513 case(FZ_IMAGE_LZW): return "lzw";
3514 case(FZ_IMAGE_RLD): return "rld";
3515 case(FZ_IMAGE_BMP): return "bmp";
3516 case(FZ_IMAGE_GIF): return "gif";
3517 case(FZ_IMAGE_JBIG2): return "jb2";
3518 case(FZ_IMAGE_JPEG): return "jpeg";
3519 case(FZ_IMAGE_JPX): return "jpx";
3520 case(FZ_IMAGE_JXR): return "jxr";
3521 case(FZ_IMAGE_PNG): return "png";
3522 case(FZ_IMAGE_PNM): return "pnm";
3523 case(FZ_IMAGE_TIFF): return "tiff";
3524 default: return "n/a";
3525 }
3526 }
3527
3528 void JM_make_image_block(fz_stext_block *block, PyObject *block_dict)
3529 {
3530 fz_context* ctx = mupdf::internal_context_get();
3531 fz_image *image = block->u.i.image;
3532 fz_buffer *buf = NULL, *freebuf = NULL, *mask_buf = NULL;
3533 fz_compressed_buffer *buffer = fz_compressed_image_buffer(ctx, image);
3534 fz_var(buf);
3535 fz_var(freebuf);
3536 fz_var(mask_buf);
3537 int n = fz_colorspace_n(ctx, image->colorspace);
3538 int w = image->w;
3539 int h = image->h;
3540 const char *ext = "";
3541 int type = FZ_IMAGE_UNKNOWN;
3542 if (buffer) {
3543 type = buffer->params.type;
3544 ext = JM_image_extension(type);
3545 }
3546 if (type < FZ_IMAGE_BMP || type == FZ_IMAGE_JBIG2)
3547 type = FZ_IMAGE_UNKNOWN;
3548 PyObject *bytes = NULL;
3549 fz_var(bytes);
3550 PyObject *mask_bytes = NULL;
3551 fz_var(mask_bytes);
3552 fz_try(ctx) {
3553 if (!buffer || type == FZ_IMAGE_UNKNOWN)
3554 {
3555 buf = freebuf = fz_new_buffer_from_image_as_png(ctx, image, fz_default_color_params);
3556 ext = "png";
3557 }
3558 else if (n == 4 && strcmp(ext, "jpeg") == 0) // JPEG CMYK needs another step
3559 {
3560 buf = freebuf = fz_new_buffer_from_image_as_jpeg(ctx, image, fz_default_color_params, 95, 1);
3561 }
3562 else
3563 {
3564 buf = buffer->buffer;
3565 }
3566 bytes = JM_BinFromBuffer(buf);
3567 if (image->mask) {
3568 mask_buf = fz_new_buffer_from_image_as_png(ctx, image->mask, fz_default_color_params);
3569 mask_bytes = JM_BinFromBuffer(mask_buf);
3570 } else {
3571 mask_bytes = Py_BuildValue("s", NULL);
3572 }
3573 }
3574 fz_always(ctx) {
3575 if (!bytes)
3576 bytes = PyBytes_FromString("");
3577 DICT_SETITEM_DROP(block_dict, dictkey_width,
3578 Py_BuildValue("i", w));
3579 DICT_SETITEM_DROP(block_dict, dictkey_height,
3580 Py_BuildValue("i", h));
3581 DICT_SETITEM_DROP(block_dict, dictkey_ext,
3582 Py_BuildValue("s", ext));
3583 DICT_SETITEM_DROP(block_dict, dictkey_colorspace,
3584 Py_BuildValue("i", n));
3585 DICT_SETITEM_DROP(block_dict, dictkey_xres,
3586 Py_BuildValue("i", image->xres));
3587 DICT_SETITEM_DROP(block_dict, dictkey_yres,
3588 Py_BuildValue("i", image->xres));
3589 DICT_SETITEM_DROP(block_dict, dictkey_bpc,
3590 Py_BuildValue("i", (int) image->bpc));
3591 DICT_SETITEM_DROP(block_dict, dictkey_matrix,
3592 JM_py_from_matrix(block->u.i.transform));
3593 DICT_SETITEM_DROP(block_dict, dictkey_size,
3594 Py_BuildValue("n", PyBytes_Size(bytes)));
3595 DICT_SETITEM_DROP(block_dict, dictkey_image, bytes);
3596 DICT_SETITEMSTR_DROP(block_dict, "mask", mask_bytes);
3597 fz_drop_buffer(ctx, mask_buf);
3598 fz_drop_buffer(ctx, freebuf);
3599 }
3600 fz_catch(ctx) {;}
3601 return;
3602 }
3603
3604 static void JM_make_text_block(fz_stext_block *block, PyObject *block_dict, int raw, fz_buffer *buff, fz_rect tp_rect)
3605 {
3606 fz_stext_line *line;
3607 PyObject *line_list = PyList_New(0), *line_dict;
3608 fz_rect block_rect = fz_empty_rect;
3609 for (line = block->u.t.first_line; line; line = line->next) {
3610 if (fz_is_empty_rect(fz_intersect_rect(tp_rect, line->bbox)) &&
3611 !fz_is_infinite_rect(tp_rect)) {
3612 continue;
3613 }
3614 line_dict = PyDict_New();
3615 mupdf::FzStextLine line2(line);
3616 mupdf::FzBuffer buff2( mupdf::ll_fz_keep_buffer( buff));
3617 mupdf::FzRect tp_rect2( tp_rect);
3618 mupdf::FzRect line_rect2 = JM_make_spanlist(
3619 line_dict,
3620 line2,
3621 raw,
3622 buff2,
3623 tp_rect2
3624 );
3625 fz_rect& line_rect = *line_rect2.internal();
3626 block_rect = fz_union_rect(block_rect, line_rect);
3627 DICT_SETITEM_DROP(line_dict, dictkey_wmode,
3628 Py_BuildValue("i", line->wmode));
3629 DICT_SETITEM_DROP(line_dict, dictkey_dir, JM_py_from_point(line->dir));
3630 DICT_SETITEM_DROP(line_dict, dictkey_bbox,
3631 JM_py_from_rect(line_rect));
3632 LIST_APPEND_DROP(line_list, line_dict);
3633 }
3634 DICT_SETITEM_DROP(block_dict, dictkey_bbox, JM_py_from_rect(block_rect));
3635 DICT_SETITEM_DROP(block_dict, dictkey_lines, line_list);
3636 return;
3637 }
3638
3639 void JM_make_textpage_dict(fz_stext_page *tp, PyObject *page_dict, int raw)
3640 {
3641 fz_context* ctx = mupdf::internal_context_get();
3642 fz_stext_block *block;
3643 fz_buffer *text_buffer = fz_new_buffer(ctx, 128);
3644 PyObject *block_dict, *block_list = PyList_New(0);
3645 fz_rect tp_rect = tp->mediabox;
3646 int block_n = -1;
3647 for (block = tp->first_block; block; block = block->next) {
3648 block_n++;
3649 if (!fz_contains_rect(tp_rect, block->bbox) &&
3650 !fz_is_infinite_rect(tp_rect) &&
3651 block->type == FZ_STEXT_BLOCK_IMAGE) {
3652 continue;
3653 }
3654 if (!fz_is_infinite_rect(tp_rect) &&
3655 fz_is_empty_rect(fz_intersect_rect(tp_rect, block->bbox))) {
3656 continue;
3657 }
3658
3659 block_dict = PyDict_New();
3660 DICT_SETITEM_DROP(block_dict, dictkey_number, Py_BuildValue("i", block_n));
3661 DICT_SETITEM_DROP(block_dict, dictkey_type, Py_BuildValue("i", block->type));
3662 if (block->type == FZ_STEXT_BLOCK_IMAGE) {
3663 DICT_SETITEM_DROP(block_dict, dictkey_bbox, JM_py_from_rect(block->bbox));
3664 JM_make_image_block(block, block_dict);
3665 } else {
3666 JM_make_text_block(block, block_dict, raw, text_buffer, tp_rect);
3667 }
3668
3669 LIST_APPEND_DROP(block_list, block_dict);
3670 }
3671 DICT_SETITEM_DROP(page_dict, dictkey_blocks, block_list);
3672 fz_drop_buffer(ctx, text_buffer);
3673 }
3674
3675 //-----------------------------------------------------------------
3676 // get one pixel as a list
3677 //-----------------------------------------------------------------
3678 PyObject *pixmap_pixel(fz_pixmap* pm, int x, int y)
3679 {
3680 fz_context* ctx = mupdf::internal_context_get();
3681 PyObject *p = NULL;
3682 if (0
3683 || x < 0
3684 || x >= pm->w
3685 || y < 0
3686 || y >= pm->h
3687 )
3688 {
3689 throw std::range_error( MSG_PIXEL_OUTSIDE);
3690 }
3691 int n = pm->n;
3692 int stride = fz_pixmap_stride(ctx, pm);
3693 int i = stride * y + n * x;
3694 p = PyTuple_New(n);
3695 for (int j = 0; j < n; j++)
3696 {
3697 PyTuple_SET_ITEM(p, j, Py_BuildValue("i", pm->samples[i + j]));
3698 }
3699 return p;
3700 }
3701
3702 int pixmap_n(mupdf::FzPixmap& pixmap)
3703 {
3704 return mupdf::fz_pixmap_components( pixmap);
3705 }
3706
3707 static int
3708 JM_INT_ITEM(PyObject *obj, Py_ssize_t idx, int *result)
3709 {
3710 PyObject *temp = PySequence_ITEM(obj, idx);
3711 if (!temp) return 1;
3712 if (PyLong_Check(temp)) {
3713 *result = (int) PyLong_AsLong(temp);
3714 Py_DECREF(temp);
3715 } else if (PyFloat_Check(temp)) {
3716 *result = (int) PyFloat_AsDouble(temp);
3717 Py_DECREF(temp);
3718 } else {
3719 Py_DECREF(temp);
3720 return 1;
3721 }
3722 if (PyErr_Occurred()) {
3723 PyErr_Clear();
3724 return 1;
3725 }
3726 return 0;
3727 }
3728
3729 PyObject *set_pixel(fz_pixmap* pm, int x, int y, PyObject *color)
3730 {
3731 fz_context* ctx = mupdf::internal_context_get();
3732 if (0
3733 || x < 0
3734 || x >= pm->w
3735 || y < 0
3736 || y >= pm->h
3737 )
3738 {
3739 throw std::range_error( MSG_PIXEL_OUTSIDE);
3740 }
3741 int n = pm->n;
3742 if (!PySequence_Check(color) || PySequence_Size(color) != n) {
3743 throw std::range_error(MSG_BAD_COLOR_SEQ);
3744 }
3745 int i, j;
3746 unsigned char c[5];
3747 for (j = 0; j < n; j++) {
3748 if (JM_INT_ITEM(color, j, &i) == 1) {
3749 throw std::range_error(MSG_BAD_COLOR_SEQ);
3750 }
3751 if (i < 0 or i >= 256) {
3752 throw std::range_error(MSG_BAD_COLOR_SEQ);
3753 }
3754 c[j] = (unsigned char) i;
3755 }
3756 int stride = fz_pixmap_stride(ctx, pm);
3757 i = stride * y + n * x;
3758 for (j = 0; j < n; j++) {
3759 pm->samples[i + j] = c[j];
3760 }
3761 Py_RETURN_NONE;
3762 }
3763 //-------------------------------------------
3764 // make a buffer from an stext_page's text
3765 //-------------------------------------------
3766 fz_buffer *
3767 JM_new_buffer_from_stext_page(fz_stext_page *page)
3768 {
3769 fz_context* ctx = mupdf::internal_context_get();
3770 fz_stext_block *block;
3771 fz_stext_line *line;
3772 fz_stext_char *ch;
3773 fz_rect rect = page->mediabox;
3774 fz_buffer *buf = NULL;
3775
3776 fz_try(ctx)
3777 {
3778 buf = fz_new_buffer(ctx, 256);
3779 for (block = page->first_block; block; block = block->next) {
3780 if (block->type == FZ_STEXT_BLOCK_TEXT) {
3781 for (line = block->u.t.first_line; line; line = line->next) {
3782 for (ch = line->first_char; ch; ch = ch->next) {
3783 if (!JM_rects_overlap(rect, JM_char_bbox(line, ch)) &&
3784 !fz_is_infinite_rect(rect)) {
3785 continue;
3786 }
3787 fz_append_rune(ctx, buf, ch->c);
3788 }
3789 fz_append_byte(ctx, buf, '\n');
3790 }
3791 fz_append_byte(ctx, buf, '\n');
3792 }
3793 }
3794 }
3795 fz_catch(ctx) {
3796 fz_drop_buffer(ctx, buf);
3797 mupdf::internal_throw_exception(ctx);
3798 }
3799 return buf;
3800 }
3801
3802 static inline int canon(int c)
3803 {
3804 /* TODO: proper unicode case folding */
3805 /* TODO: character equivalence (a matches ä, etc) */
3806 if (c == 0xA0 || c == 0x2028 || c == 0x2029)
3807 return ' ';
3808 if (c == '\r' || c == '\n' || c == '\t')
3809 return ' ';
3810 if (c >= 'A' && c <= 'Z')
3811 return c - 'A' + 'a';
3812 return c;
3813 }
3814
3815 static inline int chartocanon(int *c, const char *s)
3816 {
3817 int n = fz_chartorune(c, s);
3818 *c = canon(*c);
3819 return n;
3820 }
3821
3822 static const char *match_string(const char *h, const char *n)
3823 {
3824 int hc, nc;
3825 const char *e = h;
3826 h += chartocanon(&hc, h);
3827 n += chartocanon(&nc, n);
3828 while (hc == nc)
3829 {
3830 e = h;
3831 if (hc == ' ')
3832 do
3833 h += chartocanon(&hc, h);
3834 while (hc == ' ');
3835 else
3836 h += chartocanon(&hc, h);
3837 if (nc == ' ')
3838 do
3839 n += chartocanon(&nc, n);
3840 while (nc == ' ');
3841 else
3842 n += chartocanon(&nc, n);
3843 }
3844 return nc == 0 ? e : NULL;
3845 }
3846
3847
3848 static const char *find_string(const char *s, const char *needle, const char **endp)
3849 {
3850 const char *end;
3851 while (*s)
3852 {
3853 end = match_string(s, needle);
3854 if (end)
3855 {
3856 *endp = end;
3857 return s;
3858 }
3859 ++s;
3860 }
3861 *endp = NULL;
3862 return NULL;
3863 }
3864
3865 struct highlight
3866 {
3867 Py_ssize_t len;
3868 PyObject *quads;
3869 float hfuzz, vfuzz;
3870 };
3871
3872
3873 static int
3874 JM_FLOAT_ITEM(PyObject *obj, Py_ssize_t idx, double *result)
3875 {
3876 PyObject *temp = PySequence_ITEM(obj, idx);
3877 if (!temp) return 1;
3878 *result = PyFloat_AsDouble(temp);
3879 Py_DECREF(temp);
3880 if (PyErr_Occurred()) {
3881 PyErr_Clear();
3882 return 1;
3883 }
3884 return 0;
3885 }
3886
3887
3888 //-----------------------------------------------------------------------------
3889 // fz_quad from PySequence. Four floats are treated as rect.
3890 // Else must be four pairs of floats.
3891 //-----------------------------------------------------------------------------
3892 static fz_quad
3893 JM_quad_from_py(PyObject *r)
3894 {
3895 fz_quad q = fz_make_quad(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT,
3896 FZ_MAX_INF_RECT, FZ_MIN_INF_RECT,
3897 FZ_MIN_INF_RECT, FZ_MAX_INF_RECT,
3898 FZ_MAX_INF_RECT, FZ_MAX_INF_RECT);
3899 fz_point p[4];
3900 double test, x, y;
3901 Py_ssize_t i;
3902 PyObject *obj = NULL;
3903
3904 if (!r || !PySequence_Check(r) || PySequence_Size(r) != 4)
3905 return q;
3906
3907 if (JM_FLOAT_ITEM(r, 0, &test) == 0)
3908 return fz_quad_from_rect(JM_rect_from_py(r));
3909
3910 for (i = 0; i < 4; i++) {
3911 obj = PySequence_ITEM(r, i); // next point item
3912 if (!obj || !PySequence_Check(obj) || PySequence_Size(obj) != 2)
3913 goto exit_result; // invalid: cancel the rest
3914
3915 if (JM_FLOAT_ITEM(obj, 0, &x) == 1) goto exit_result;
3916 if (JM_FLOAT_ITEM(obj, 1, &y) == 1) goto exit_result;
3917 if (x < FZ_MIN_INF_RECT) x = FZ_MIN_INF_RECT;
3918 if (y < FZ_MIN_INF_RECT) y = FZ_MIN_INF_RECT;
3919 if (x > FZ_MAX_INF_RECT) x = FZ_MAX_INF_RECT;
3920 if (y > FZ_MAX_INF_RECT) y = FZ_MAX_INF_RECT;
3921 p[i] = fz_make_point((float) x, (float) y);
3922
3923 Py_CLEAR(obj);
3924 }
3925 q.ul = p[0];
3926 q.ur = p[1];
3927 q.ll = p[2];
3928 q.lr = p[3];
3929 return q;
3930
3931 exit_result:;
3932 Py_CLEAR(obj);
3933 return q;
3934 }
3935
3936 static float hdist(fz_point *dir, fz_point *a, fz_point *b)
3937 {
3938 float dx = b->x - a->x;
3939 float dy = b->y - a->y;
3940 return fz_abs(dx * dir->x + dy * dir->y);
3941 }
3942
3943 static float vdist(fz_point *dir, fz_point *a, fz_point *b)
3944 {
3945 float dx = b->x - a->x;
3946 float dy = b->y - a->y;
3947 return fz_abs(dx * dir->y + dy * dir->x);
3948 }
3949
3950 static void on_highlight_char(fz_context *ctx, void *arg, fz_stext_line *line, fz_stext_char *ch)
3951 {
3952 struct highlight* hits = (struct highlight*) arg;
3953 float vfuzz = ch->size * hits->vfuzz;
3954 float hfuzz = ch->size * hits->hfuzz;
3955 fz_quad ch_quad = JM_char_quad(line, ch);
3956 if (hits->len > 0) {
3957 PyObject *quad = PySequence_ITEM(hits->quads, hits->len - 1);
3958 fz_quad end = JM_quad_from_py(quad);
3959 Py_DECREF(quad);
3960 if (hdist(&line->dir, &end.lr, &ch_quad.ll) < hfuzz
3961 && vdist(&line->dir, &end.lr, &ch_quad.ll) < vfuzz
3962 && hdist(&line->dir, &end.ur, &ch_quad.ul) < hfuzz
3963 && vdist(&line->dir, &end.ur, &ch_quad.ul) < vfuzz)
3964 {
3965 end.ur = ch_quad.ur;
3966 end.lr = ch_quad.lr;
3967 quad = JM_py_from_quad(end);
3968 PyList_SetItem(hits->quads, hits->len - 1, quad);
3969 return;
3970 }
3971 }
3972 LIST_APPEND_DROP(hits->quads, JM_py_from_quad(ch_quad));
3973 hits->len++;
3974 }
3975
3976
3977 PyObject* JM_search_stext_page(fz_stext_page *page, const char *needle)
3978 {
3979 fz_context* ctx = mupdf::internal_context_get();
3980 struct highlight hits;
3981 fz_stext_block *block;
3982 fz_stext_line *line;
3983 fz_stext_char *ch;
3984 fz_buffer *buffer = NULL;
3985 const char *haystack, *begin, *end;
3986 fz_rect rect = page->mediabox;
3987 int c, inside;
3988
3989 if (strlen(needle) == 0) Py_RETURN_NONE;
3990 PyObject *quads = PyList_New(0);
3991 hits.len = 0;
3992 hits.quads = quads;
3993 hits.hfuzz = 0.2f; /* merge kerns but not large gaps */
3994 hits.vfuzz = 0.1f;
3995
3996 fz_try(ctx) {
3997 buffer = JM_new_buffer_from_stext_page( page);
3998 haystack = fz_string_from_buffer(ctx, buffer);
3999 begin = find_string(haystack, needle, &end);
4000 if (!begin) goto no_more_matches;
4001
4002 inside = 0;
4003 for (block = page->first_block; block; block = block->next) {
4004 if (block->type != FZ_STEXT_BLOCK_TEXT) {
4005 continue;
4006 }
4007 for (line = block->u.t.first_line; line; line = line->next) {
4008 for (ch = line->first_char; ch; ch = ch->next) {
4009 if (!fz_is_infinite_rect(rect) &&
4010 !JM_rects_overlap(rect, JM_char_bbox(line, ch))) {
4011 goto next_char;
4012 }
4013 try_new_match:
4014 if (!inside) {
4015 if (haystack >= begin) inside = 1;
4016 }
4017 if (inside) {
4018 if (haystack < end) {
4019 on_highlight_char(ctx, &hits, line, ch);
4020 } else {
4021 inside = 0;
4022 begin = find_string(haystack, needle, &end);
4023 if (!begin) goto no_more_matches;
4024 else goto try_new_match;
4025 }
4026 }
4027 haystack += fz_chartorune(&c, haystack);
4028 next_char:;
4029 }
4030 assert(*haystack == '\n');
4031 ++haystack;
4032 }
4033 assert(*haystack == '\n');
4034 ++haystack;
4035 }
4036 no_more_matches:;
4037 }
4038 fz_always(ctx)
4039 fz_drop_buffer(ctx, buffer);
4040 fz_catch(ctx)
4041 mupdf::internal_throw_exception(ctx);
4042
4043 return quads;
4044 }
4045
4046 void pixmap_copy( fz_pixmap* pm, const fz_pixmap* src, int n)
4047 {
4048 assert(pm->w == src->w);
4049 assert(pm->h == src->h);
4050 assert(n <= pm->n);
4051 assert(n <= src->n);
4052
4053 if (pm->n == src->n)
4054 {
4055 // identical samples
4056 assert(pm->stride == src->stride);
4057 memcpy(pm->samples, src->samples, pm->w * pm->h * pm->n);
4058 }
4059 else
4060 {
4061 int nn;
4062 int do_alpha;
4063 if (pm->n > src->n)
4064 {
4065 assert(pm->n == src->n + 1);
4066 nn = src->n;
4067 assert(!src->alpha);
4068 assert(pm->alpha);
4069 do_alpha = 1;
4070 }
4071 else
4072 {
4073 assert(src->n == pm->n + 1);
4074 nn = pm->n;
4075 assert(src->alpha);
4076 assert(!pm->alpha);
4077 do_alpha = 0;
4078 }
4079 for (int y=0; y<pm->h; ++y)
4080 {
4081 for (int x=0; x<pm->w; ++x)
4082 {
4083 memcpy(
4084 pm->samples + pm->stride * y + pm->n * x,
4085 src->samples + src->stride * y + src->n * x,
4086 nn
4087 );
4088 if (do_alpha)
4089 {
4090 pm->samples[pm->stride * y + pm->n * x + pm->n-1] = 255;
4091 }
4092 }
4093 }
4094 }
4095 }
4096
4097
4098 PyObject* ll_JM_color_count(fz_pixmap *pm, PyObject *clip)
4099 {
4100 fz_context* ctx = mupdf::internal_context_get();
4101 PyObject* rc = PyDict_New();
4102 fz_irect irect = fz_pixmap_bbox(ctx, pm);
4103 irect = fz_intersect_irect(irect, fz_round_rect(JM_rect_from_py(clip)));
4104 if (fz_is_empty_irect(irect))
4105 {
4106 return rc;
4107 }
4108 size_t stride = pm->stride;
4109 size_t width = irect.x1 - irect.x0;
4110 size_t height = irect.y1 - irect.y0;
4111 size_t n = (size_t) pm->n;
4112 size_t substride = width * n;
4113 unsigned char* s = pm->samples + stride * (irect.y0 - pm->y) + n * (irect.x0 - pm->x);
4114 // Cache previous pixel.
4115 char oldpix[10];
4116 assert(n <= sizeof(oldpix));
4117 memcpy(oldpix, s, n);
4118 long cnt = 0;
4119 for (size_t i = 0; i < height; i++)
4120 {
4121 for (size_t j = 0; j < substride; j += n)
4122 {
4123 const char* newpix = (const char*) s + j;
4124 if (memcmp(oldpix, newpix, n))
4125 {
4126 /* Pixel differs from previous pixel, so update results with
4127 last run of pixels. We get a PyObject representation of pixel
4128 so we can look up in Python dict <rc>. */
4129 PyObject* pixel = PyBytes_FromStringAndSize(&oldpix[0], n);
4130 PyObject* c = PyDict_GetItem(rc, pixel);
4131 if (c) cnt += PyLong_AsLong(c);
4132 DICT_SETITEM_DROP(rc, pixel, PyLong_FromLong(cnt));
4133 Py_DECREF(pixel);
4134 /* Start next run of identical pixels. */
4135 cnt = 1;
4136 memcpy(oldpix, newpix, n);
4137 }
4138 else
4139 {
4140 cnt += 1;
4141 }
4142 }
4143 s += stride;
4144 }
4145 /* Update results with last pixel. */
4146 PyObject* pixel = PyBytes_FromStringAndSize(&oldpix[0], n);
4147 PyObject* c = PyDict_GetItem(rc, pixel);
4148 if (c) cnt += PyLong_AsLong(c);
4149 DICT_SETITEM_DROP(rc, pixel, PyLong_FromLong(cnt));
4150 Py_DECREF(pixel);
4151 PyErr_Clear();
4152 return rc;
4153 }
4154
4155 %}
4156
4157 /* Declarations for functions defined above. */
4158
4159 void page_merge(
4160 mupdf::PdfDocument& doc_des,
4161 mupdf::PdfDocument& doc_src,
4162 int page_from,
4163 int page_to,
4164 int rotate,
4165 int links,
4166 int copy_annots,
4167 mupdf::PdfGraftMap& graft_map
4168 );
4169
4170 void JM_merge_range(
4171 mupdf::PdfDocument& doc_des,
4172 mupdf::PdfDocument& doc_src,
4173 int spage,
4174 int epage,
4175 int apage,
4176 int rotate,
4177 int links,
4178 int annots,
4179 int show_progress,
4180 mupdf::PdfGraftMap& graft_map
4181 );
4182
4183 void FzDocument_insert_pdf(
4184 mupdf::FzDocument& doc,
4185 mupdf::FzDocument& src,
4186 int from_page,
4187 int to_page,
4188 int start_at,
4189 int rotate,
4190 int links,
4191 int annots,
4192 int show_progress,
4193 int final,
4194 mupdf::PdfGraftMap& graft_map
4195 );
4196
4197 int page_xref(mupdf::FzDocument& this_doc, int pno);
4198 void _newPage(mupdf::FzDocument& self, int pno=-1, float width=595, float height=842);
4199 void _newPage(mupdf::PdfDocument& self, int pno=-1, float width=595, float height=842);
4200 void JM_add_annot_id(mupdf::PdfAnnot& annot, const char* stem);
4201 void JM_set_annot_callout_line(mupdf::PdfAnnot& annot, PyObject *callout, int count);
4202 std::vector< std::string> JM_get_annot_id_list(mupdf::PdfPage& page);
4203 mupdf::PdfAnnot _add_caret_annot(mupdf::PdfPage& self, mupdf::FzPoint& point);
4204 mupdf::PdfAnnot _add_caret_annot(mupdf::FzPage& self, mupdf::FzPoint& point);
4205 const char* Tools_parse_da(mupdf::PdfAnnot& this_annot);
4206 PyObject* Annot_getAP(mupdf::PdfAnnot& annot);
4207 void Tools_update_da(mupdf::PdfAnnot& this_annot, const char* da_str);
4208 mupdf::FzPoint JM_point_from_py(PyObject* p);
4209 mupdf::FzRect Annot_rect(mupdf::PdfAnnot& annot);
4210 PyObject* util_transform_rect(PyObject* rect, PyObject* matrix);
4211 PyObject* Annot_rect3(mupdf::PdfAnnot& annot);
4212 mupdf::FzMatrix Page_derotate_matrix(mupdf::PdfPage& pdfpage);
4213 mupdf::FzMatrix Page_derotate_matrix(mupdf::FzPage& pdfpage);
4214 PyObject* JM_get_annot_xref_list(const mupdf::PdfObj& page_obj);
4215 PyObject* xref_object(mupdf::PdfDocument& pdf, int xref, int compressed=0, int ascii=0);
4216 PyObject* xref_object(mupdf::FzDocument& document, int xref, int compressed=0, int ascii=0);
4217
4218 PyObject* Link_is_external(mupdf::FzLink& this_link);
4219 PyObject* Page_addAnnot_FromString(mupdf::PdfPage& page, PyObject* linklist);
4220 PyObject* Page_addAnnot_FromString(mupdf::FzPage& page, PyObject* linklist);
4221 mupdf::FzLink Link_next(mupdf::FzLink& this_link);
4222
4223 static int page_count_fz2(void* document);
4224 int page_count_fz(mupdf::FzDocument& document);
4225 int page_count_pdf(mupdf::PdfDocument& pdf);
4226 int page_count(mupdf::FzDocument& document);
4227 int page_count(mupdf::PdfDocument& pdf);
4228
4229 PyObject* page_annot_xrefs(mupdf::PdfDocument& pdf, int pno);
4230 PyObject* page_annot_xrefs(mupdf::FzDocument& document, int pno);
4231 bool Outline_is_external(mupdf::FzOutline* outline);
4232 void Document_extend_toc_items(mupdf::PdfDocument& pdf, PyObject* items);
4233 void Document_extend_toc_items(mupdf::FzDocument& document, PyObject* items);
4234
4235 int ll_fz_absi(int i);
4236
4237 mupdf::FzDevice JM_new_texttrace_device(PyObject* out);
4238
4239 fz_rect JM_char_bbox(const mupdf::FzStextLine& line, const mupdf::FzStextChar& ch);
4240
4241 static fz_quad JM_char_quad( fz_stext_line *line, fz_stext_char *ch);
4242 void JM_print_stext_page_as_text(mupdf::FzBuffer& res, mupdf::FzStextPage& page);
4243
4244 void set_skip_quad_corrections(int on);
4245 void set_subset_fontnames(int on);
4246 void set_small_glyph_heights(int on);
4247
4248 mupdf::FzRect JM_cropbox(mupdf::PdfObj& page_obj);
4249 PyObject* get_cdrawings(mupdf::FzPage& page, PyObject *extended=NULL, PyObject *callback=NULL, PyObject *method=NULL);
4250
4251 mupdf::FzRect JM_make_spanlist(
4252 PyObject *line_dict,
4253 mupdf::FzStextLine& line,
4254 int raw,
4255 mupdf::FzBuffer& buff,
4256 mupdf::FzRect& tp_rect
4257 );
4258
4259 PyObject* extractWORDS(mupdf::FzStextPage& this_tpage, PyObject *delimiters);
4260 PyObject* extractBLOCKS(mupdf::FzStextPage& self);
4261
4262 PyObject* link_uri(mupdf::FzLink& link);
4263
4264 fz_stext_page* page_get_textpage(
4265 mupdf::FzPage& self,
4266 PyObject* clip,
4267 int flags,
4268 PyObject* matrix
4269 );
4270
4271 void JM_make_textpage_dict(fz_stext_page *tp, PyObject *page_dict, int raw);
4272 PyObject *pixmap_pixel(fz_pixmap* pm, int x, int y);
4273 int pixmap_n(mupdf::FzPixmap& pixmap);
4274
4275 PyObject* JM_search_stext_page(fz_stext_page *page, const char *needle);
4276
4277 PyObject *set_pixel(fz_pixmap* pm, int x, int y, PyObject *color);
4278
4279 /* Copies from <src> to <pm>, which must have same width and height. pm->n -
4280 src->n must be -1, 0 or +1. If -1, <src> must have alpha and <pm> must not have
4281 alpha, and we copy the non-alpha bytes. If +1 <src> must not have alpha and
4282 <pm> must have alpha and we set <pm>'s alpha bytes all to 255.*/
4283 void pixmap_copy(fz_pixmap* pm, const fz_pixmap* src, int n);
4284
4285 PyObject* ll_JM_color_count(fz_pixmap *pm, PyObject *clip);