Mercurial > hgrepos > Python2 > PyMuPDF
comparison src/extra.i @ 1:1d09e1dec1d9 upstream
ADD: PyMuPDF v1.26.4: the original sdist.
It does not yet contain MuPDF. This normally will be downloaded when
building PyMuPDF.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:37:51 +0200 |
| parents | |
| children | a6bc019ac0b2 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 1:1d09e1dec1d9 |
|---|---|
| 1 %module fitz_extra | |
| 2 | |
| 3 %pythoncode %{ | |
| 4 # pylint: disable=all | |
| 5 %} | |
| 6 | |
| 7 %begin | |
| 8 %{ | |
| 9 #define SWIG_PYTHON_INTERPRETER_NO_DEBUG | |
| 10 | |
| 11 /* This seems to be necessary on some Windows machines with Py_LIMITED_API, | |
| 12 otherwise compilation can fail because free() and malloc() are not declared. */ | |
| 13 #include <stdlib.h> | |
| 14 %} | |
| 15 | |
| 16 %init | |
| 17 %{ | |
| 18 /* Initialise some globals that require Python functions. | |
| 19 | |
| 20 [Prior to 2023-08-18 we initialised these global variables inline, | |
| 21 but this causes a SEGV on Windows with Python-3.10 for `dictkey_c` | |
| 22 (actually any string of length 1 failed).] */ | |
| 23 | |
| 24 dictkey_align = PyUnicode_InternFromString("align"); | |
| 25 dictkey_ascender = PyUnicode_InternFromString("ascender"); | |
| 26 dictkey_bidi = PyUnicode_InternFromString("bidi"); | |
| 27 dictkey_bbox = PyUnicode_InternFromString("bbox"); | |
| 28 dictkey_blocks = PyUnicode_InternFromString("blocks"); | |
| 29 dictkey_bpc = PyUnicode_InternFromString("bpc"); | |
| 30 dictkey_c = PyUnicode_InternFromString("c"); | |
| 31 dictkey_chars = PyUnicode_InternFromString("chars"); | |
| 32 dictkey_color = PyUnicode_InternFromString("color"); | |
| 33 dictkey_colorspace = PyUnicode_InternFromString("colorspace"); | |
| 34 dictkey_content = PyUnicode_InternFromString("content"); | |
| 35 dictkey_creationDate = PyUnicode_InternFromString("creationDate"); | |
| 36 dictkey_cs_name = PyUnicode_InternFromString("cs-name"); | |
| 37 dictkey_da = PyUnicode_InternFromString("da"); | |
| 38 dictkey_dashes = PyUnicode_InternFromString("dashes"); | |
| 39 dictkey_desc = PyUnicode_InternFromString("descender"); | |
| 40 dictkey_descender = PyUnicode_InternFromString("descender"); | |
| 41 dictkey_dir = PyUnicode_InternFromString("dir"); | |
| 42 dictkey_effect = PyUnicode_InternFromString("effect"); | |
| 43 dictkey_ext = PyUnicode_InternFromString("ext"); | |
| 44 dictkey_filename = PyUnicode_InternFromString("filename"); | |
| 45 dictkey_fill = PyUnicode_InternFromString("fill"); | |
| 46 dictkey_flags = PyUnicode_InternFromString("flags"); | |
| 47 dictkey_char_flags = PyUnicode_InternFromString("char_flags"); /* Only used with mupdf >= 1.25.2. */ | |
| 48 dictkey_font = PyUnicode_InternFromString("font"); | |
| 49 dictkey_glyph = PyUnicode_InternFromString("glyph"); | |
| 50 dictkey_height = PyUnicode_InternFromString("height"); | |
| 51 dictkey_id = PyUnicode_InternFromString("id"); | |
| 52 dictkey_image = PyUnicode_InternFromString("image"); | |
| 53 dictkey_items = PyUnicode_InternFromString("items"); | |
| 54 dictkey_length = PyUnicode_InternFromString("length"); | |
| 55 dictkey_lines = PyUnicode_InternFromString("lines"); | |
| 56 dictkey_matrix = PyUnicode_InternFromString("transform"); | |
| 57 dictkey_modDate = PyUnicode_InternFromString("modDate"); | |
| 58 dictkey_name = PyUnicode_InternFromString("name"); | |
| 59 dictkey_number = PyUnicode_InternFromString("number"); | |
| 60 dictkey_origin = PyUnicode_InternFromString("origin"); | |
| 61 dictkey_rect = PyUnicode_InternFromString("rect"); | |
| 62 dictkey_size = PyUnicode_InternFromString("size"); | |
| 63 dictkey_smask = PyUnicode_InternFromString("smask"); | |
| 64 dictkey_spans = PyUnicode_InternFromString("spans"); | |
| 65 dictkey_stroke = PyUnicode_InternFromString("stroke"); | |
| 66 dictkey_style = PyUnicode_InternFromString("style"); | |
| 67 dictkey_subject = PyUnicode_InternFromString("subject"); | |
| 68 dictkey_text = PyUnicode_InternFromString("text"); | |
| 69 dictkey_title = PyUnicode_InternFromString("title"); | |
| 70 dictkey_type = PyUnicode_InternFromString("type"); | |
| 71 dictkey_ufilename = PyUnicode_InternFromString("ufilename"); | |
| 72 dictkey_width = PyUnicode_InternFromString("width"); | |
| 73 dictkey_wmode = PyUnicode_InternFromString("wmode"); | |
| 74 dictkey_xref = PyUnicode_InternFromString("xref"); | |
| 75 dictkey_xres = PyUnicode_InternFromString("xres"); | |
| 76 dictkey_yres = PyUnicode_InternFromString("yres"); | |
| 77 %} | |
| 78 | |
| 79 %include std_string.i | |
| 80 | |
| 81 %include exception.i | |
| 82 %exception { | |
| 83 try { | |
| 84 $action | |
| 85 } | |
| 86 | |
| 87 /* this might not be ok on windows. | |
| 88 catch (Swig::DirectorException &e) { | |
| 89 SWIG_fail; | |
| 90 }*/ | |
| 91 catch(std::exception& e) { | |
| 92 SWIG_exception(SWIG_RuntimeError, e.what()); | |
| 93 } | |
| 94 catch(...) { | |
| 95 SWIG_exception(SWIG_RuntimeError, "Unknown exception"); | |
| 96 } | |
| 97 } | |
| 98 | |
| 99 %{ | |
| 100 #include "mupdf/classes2.h" | |
| 101 #include "mupdf/exceptions.h" | |
| 102 #include "mupdf/internal.h" | |
| 103 | |
| 104 #include <algorithm> | |
| 105 #include <float.h> | |
| 106 | |
| 107 | |
| 108 #define MAKE_MUPDF_VERSION_INT(major, minor, patch) ((major << 16) + (minor << 8) + (patch << 0)) | |
| 109 | |
| 110 #define MUPDF_VERSION_INT MAKE_MUPDF_VERSION_INT(FZ_VERSION_MAJOR, FZ_VERSION_MINOR, FZ_VERSION_PATCH) | |
| 111 | |
| 112 #define MUPDF_VERSION_GE(major, minor, patch) \ | |
| 113 MUPDF_VERSION_INT >= MAKE_MUPDF_VERSION_INT(major, minor, patch) | |
| 114 | |
| 115 /* Define a wrapper for PDF_NAME that returns a mupdf::PdfObj instead of a | |
| 116 pdf_obj*. This avoids implicit construction of a mupdf::PdfObj, which is | |
| 117 deliberately prohibited (with `explicit` on constructors) by recent MuPDF. */ | |
| 118 #define PDF_NAME2(X) mupdf::PdfObj(PDF_NAME(X)) | |
| 119 | |
| 120 /* Returns equivalent of `repr(x)`. */ | |
| 121 static std::string repr(PyObject* x) | |
| 122 { | |
| 123 PyObject* repr = PyObject_Repr(x); | |
| 124 PyObject* repr_str = PyUnicode_AsEncodedString(repr, "utf-8", "~E~"); | |
| 125 #ifdef Py_LIMITED_API | |
| 126 const char* repr_str_s = PyBytes_AsString(repr_str); | |
| 127 #else | |
| 128 const char* repr_str_s = PyBytes_AS_STRING(repr_str); | |
| 129 #endif | |
| 130 std::string ret = repr_str_s; | |
| 131 Py_DECREF(repr_str); | |
| 132 Py_DECREF(repr); | |
| 133 return ret; | |
| 134 } | |
| 135 | |
| 136 #ifdef Py_LIMITED_API | |
| 137 static PyObject* PySequence_ITEM(PyObject* o, Py_ssize_t i) | |
| 138 { | |
| 139 return PySequence_GetItem(o, i); | |
| 140 } | |
| 141 | |
| 142 static const char* PyUnicode_AsUTF8(PyObject* o) | |
| 143 { | |
| 144 static PyObject* string = nullptr; | |
| 145 Py_XDECREF(string); | |
| 146 string = PyUnicode_AsUTF8String(o); | |
| 147 return PyBytes_AsString(string); | |
| 148 } | |
| 149 #endif | |
| 150 | |
| 151 | |
| 152 /* These are also in pymupdf/__init__.py. */ | |
| 153 const char MSG_BAD_ANNOT_TYPE[] = "bad annot type"; | |
| 154 const char MSG_BAD_APN[] = "bad or missing annot AP/N"; | |
| 155 const char MSG_BAD_ARG_INK_ANNOT[] = "arg must be seq of seq of float pairs"; | |
| 156 const char MSG_BAD_ARG_POINTS[] = "bad seq of points"; | |
| 157 const char MSG_BAD_BUFFER[] = "bad type: 'buffer'"; | |
| 158 const char MSG_BAD_COLOR_SEQ[] = "bad color sequence"; | |
| 159 const char MSG_BAD_DOCUMENT[] = "cannot open broken document"; | |
| 160 const char MSG_BAD_FILETYPE[] = "bad filetype"; | |
| 161 const char MSG_BAD_LOCATION[] = "bad location"; | |
| 162 const char MSG_BAD_OC_CONFIG[] = "bad config number"; | |
| 163 const char MSG_BAD_OC_LAYER[] = "bad layer number"; | |
| 164 const char MSG_BAD_OC_REF[] = "bad 'oc' reference"; | |
| 165 const char MSG_BAD_PAGEID[] = "bad page id"; | |
| 166 const char MSG_BAD_PAGENO[] = "bad page number(s)"; | |
| 167 const char MSG_BAD_PDFROOT[] = "PDF has no root"; | |
| 168 const char MSG_BAD_RECT[] = "rect is infinite or empty"; | |
| 169 const char MSG_BAD_TEXT[] = "bad type: 'text'"; | |
| 170 const char MSG_BAD_XREF[] = "bad xref"; | |
| 171 const char MSG_COLOR_COUNT_FAILED[] = "color count failed"; | |
| 172 const char MSG_FILE_OR_BUFFER[] = "need font file or buffer"; | |
| 173 const char MSG_FONT_FAILED[] = "cannot create font"; | |
| 174 const char MSG_IS_NO_ANNOT[] = "is no annotation"; | |
| 175 const char MSG_IS_NO_IMAGE[] = "is no image"; | |
| 176 const char MSG_IS_NO_PDF[] = "is no PDF"; | |
| 177 const char MSG_IS_NO_DICT[] = "object is no PDF dict"; | |
| 178 const char MSG_PIX_NOALPHA[] = "source pixmap has no alpha"; | |
| 179 const char MSG_PIXEL_OUTSIDE[] = "pixel(s) outside image"; | |
| 180 | |
| 181 #define JM_BOOL(x) PyBool_FromLong((long) (x)) | |
| 182 | |
| 183 static PyObject *JM_UnicodeFromStr(const char *c); | |
| 184 | |
| 185 | |
| 186 #ifdef _WIN32 | |
| 187 | |
| 188 /* These functions are not provided on Windows. */ | |
| 189 | |
| 190 int vasprintf(char** str, const char* fmt, va_list ap) | |
| 191 { | |
| 192 va_list ap2; | |
| 193 | |
| 194 va_copy(ap2, ap); | |
| 195 int len = vsnprintf(nullptr, 0, fmt, ap2); | |
| 196 va_end(ap2); | |
| 197 | |
| 198 char* buffer = (char*) malloc(len + 1); | |
| 199 if (!buffer) | |
| 200 { | |
| 201 *str = nullptr; | |
| 202 return -1; | |
| 203 } | |
| 204 va_copy(ap2, ap); | |
| 205 int len2 = vsnprintf(buffer, len + 1, fmt, ap2); | |
| 206 va_end(ap2); | |
| 207 assert(len2 == len); | |
| 208 *str = buffer; | |
| 209 return len; | |
| 210 } | |
| 211 | |
| 212 int asprintf(char** str, const char* fmt, ...) | |
| 213 { | |
| 214 va_list ap; | |
| 215 va_start(ap, fmt); | |
| 216 int ret = vasprintf(str, fmt, ap); | |
| 217 va_end(ap); | |
| 218 | |
| 219 return ret; | |
| 220 } | |
| 221 #endif | |
| 222 | |
| 223 | |
| 224 static void messagev(const char* format, va_list va) | |
| 225 { | |
| 226 static PyObject* pymupdf_module = PyImport_ImportModule("pymupdf"); | |
| 227 static PyObject* message_fn = PyObject_GetAttrString(pymupdf_module, "message"); | |
| 228 char* text; | |
| 229 vasprintf(&text, format, va); | |
| 230 PyObject* text_py = PyString_FromString(text); | |
| 231 PyObject* args = PyTuple_Pack(1, text_py); | |
| 232 PyObject* ret = PyObject_CallObject(message_fn, args); | |
| 233 Py_XDECREF(ret); | |
| 234 Py_XDECREF(args); | |
| 235 Py_XDECREF(text_py); | |
| 236 free(text); | |
| 237 } | |
| 238 | |
| 239 static void messagef(const char* format, ...) | |
| 240 { | |
| 241 va_list args; | |
| 242 va_start(args, format); | |
| 243 messagev(format, args); | |
| 244 va_end(args); | |
| 245 } | |
| 246 | |
| 247 PyObject* JM_EscapeStrFromStr(const char* c) | |
| 248 { | |
| 249 if (!c) return PyUnicode_FromString(""); | |
| 250 PyObject* val = PyUnicode_DecodeRawUnicodeEscape(c, (Py_ssize_t) strlen(c), "replace"); | |
| 251 if (!val) | |
| 252 { | |
| 253 val = PyUnicode_FromString(""); | |
| 254 PyErr_Clear(); | |
| 255 } | |
| 256 return val; | |
| 257 } | |
| 258 | |
| 259 PyObject* JM_EscapeStrFromBuffer(fz_buffer* buff) | |
| 260 { | |
| 261 if (!buff) return PyUnicode_FromString(""); | |
| 262 unsigned char* s = nullptr; | |
| 263 size_t len = mupdf::ll_fz_buffer_storage(buff, &s); | |
| 264 PyObject* val = PyUnicode_DecodeRawUnicodeEscape((const char*) s, (Py_ssize_t) len, "replace"); | |
| 265 if (!val) | |
| 266 { | |
| 267 val = PyUnicode_FromString(""); | |
| 268 PyErr_Clear(); | |
| 269 } | |
| 270 return val; | |
| 271 } | |
| 272 | |
| 273 //---------------------------------------------------------------------------- | |
| 274 // Deep-copies a source page to the target. | |
| 275 // Modified version of function of pdfmerge.c: we also copy annotations, but | |
| 276 // we skip some subtypes. In addition we rotate output. | |
| 277 //---------------------------------------------------------------------------- | |
| 278 static void page_merge( | |
| 279 mupdf::PdfDocument& doc_des, | |
| 280 mupdf::PdfDocument& doc_src, | |
| 281 int page_from, | |
| 282 int page_to, | |
| 283 int rotate, | |
| 284 int links, | |
| 285 int copy_annots, | |
| 286 mupdf::PdfGraftMap& graft_map | |
| 287 ) | |
| 288 { | |
| 289 // list of object types (per page) we want to copy | |
| 290 | |
| 291 /* Fixme: on linux these get destructed /after/ | |
| 292 mupdf/platform/c++/implementation/internal.cpp:s_thread_state, which causes | |
| 293 problems - s_thread_state::m_ctx will have been freed. We have a hack | |
| 294 that sets s_thread_state::m_ctx when destructed, so it mostly works when | |
| 295 s_thread_state.get_context() is called after destruction, but this causes | |
| 296 memento leaks and is clearly incorrect. | |
| 297 | |
| 298 Perhaps we could use pdf_obj* known_page_objs[] = {...} and create PdfObj | |
| 299 wrappers as used - this would avoid any cleanup at exit. And it's a general | |
| 300 solution to problem of ordering of cleanup of globals. | |
| 301 */ | |
| 302 static pdf_obj* known_page_objs[] = { | |
| 303 PDF_NAME(Contents), | |
| 304 PDF_NAME(Resources), | |
| 305 PDF_NAME(MediaBox), | |
| 306 PDF_NAME(CropBox), | |
| 307 PDF_NAME(BleedBox), | |
| 308 PDF_NAME(TrimBox), | |
| 309 PDF_NAME(ArtBox), | |
| 310 PDF_NAME(Rotate), | |
| 311 PDF_NAME(UserUnit) | |
| 312 }; | |
| 313 int known_page_objs_num = sizeof(known_page_objs) / sizeof(known_page_objs[0]); | |
| 314 mupdf::PdfObj page_ref = mupdf::pdf_lookup_page_obj(doc_src, page_from); | |
| 315 | |
| 316 // make new page dict in dest doc | |
| 317 mupdf::PdfObj page_dict = mupdf::pdf_new_dict(doc_des, 4); | |
| 318 mupdf::pdf_dict_put(page_dict, PDF_NAME2(Type), PDF_NAME2(Page)); | |
| 319 | |
| 320 for (int i = 0; i < known_page_objs_num; ++i) | |
| 321 { | |
| 322 mupdf::PdfObj known_page_obj(known_page_objs[i]); | |
| 323 mupdf::PdfObj obj = mupdf::pdf_dict_get_inheritable(page_ref, known_page_obj); | |
| 324 if (obj.m_internal) | |
| 325 { | |
| 326 mupdf::pdf_dict_put( | |
| 327 page_dict, | |
| 328 known_page_obj, | |
| 329 mupdf::pdf_graft_mapped_object(graft_map, obj) | |
| 330 ); | |
| 331 } | |
| 332 } | |
| 333 | |
| 334 // Copy annotations, but skip Link, Popup, IRT, Widget types | |
| 335 // If selected, remove dict keys P (parent) and Popup | |
| 336 if (copy_annots) | |
| 337 { | |
| 338 mupdf::PdfObj old_annots = mupdf::pdf_dict_get(page_ref, PDF_NAME2(Annots)); | |
| 339 int n = mupdf::pdf_array_len(old_annots); | |
| 340 if (n > 0) | |
| 341 { | |
| 342 mupdf::PdfObj new_annots = mupdf::pdf_dict_put_array(page_dict, PDF_NAME2(Annots), n); | |
| 343 for (int i = 0; i < n; i++) | |
| 344 { | |
| 345 mupdf::PdfObj o = mupdf::pdf_array_get(old_annots, i); | |
| 346 if (!o.m_internal || !mupdf::pdf_is_dict(o)) // skip non-dict items | |
| 347 { | |
| 348 continue; // skip invalid/null/non-dict items | |
| 349 } | |
| 350 if (mupdf::pdf_dict_get(o, PDF_NAME2(IRT)).m_internal) continue; | |
| 351 mupdf::PdfObj subtype = mupdf::pdf_dict_get(o, PDF_NAME2(Subtype)); | |
| 352 if (mupdf::pdf_name_eq(subtype, PDF_NAME2(Link))) continue; | |
| 353 if (mupdf::pdf_name_eq(subtype, PDF_NAME2(Popup))) continue; | |
| 354 if (mupdf::pdf_name_eq(subtype, PDF_NAME2(Widget))) continue; | |
| 355 mupdf::pdf_dict_del(o, PDF_NAME2(Popup)); | |
| 356 mupdf::pdf_dict_del(o, PDF_NAME2(P)); | |
| 357 mupdf::PdfObj copy_o = mupdf::pdf_graft_mapped_object(graft_map, o); | |
| 358 mupdf::PdfObj annot = mupdf::pdf_new_indirect( | |
| 359 doc_des, | |
| 360 mupdf::pdf_to_num(copy_o), | |
| 361 0 | |
| 362 ); | |
| 363 mupdf::pdf_array_push(new_annots, annot); | |
| 364 } | |
| 365 } | |
| 366 } | |
| 367 // rotate the page | |
| 368 if (rotate != -1) | |
| 369 { | |
| 370 mupdf::pdf_dict_put_int(page_dict, PDF_NAME2(Rotate), rotate); | |
| 371 } | |
| 372 // Now add the page dictionary to dest PDF | |
| 373 mupdf::PdfObj ref = mupdf::pdf_add_object(doc_des, page_dict); | |
| 374 | |
| 375 // Insert new page at specified location | |
| 376 mupdf::pdf_insert_page(doc_des, page_to, ref); | |
| 377 } | |
| 378 | |
| 379 //----------------------------------------------------------------------------- | |
| 380 // Copy a range of pages (spage, epage) from a source PDF to a specified | |
| 381 // location (apage) of the target PDF. | |
| 382 // If spage > epage, the sequence of source pages is reversed. | |
| 383 //----------------------------------------------------------------------------- | |
| 384 static void JM_merge_range( | |
| 385 mupdf::PdfDocument& doc_des, | |
| 386 mupdf::PdfDocument& doc_src, | |
| 387 int spage, | |
| 388 int epage, | |
| 389 int apage, | |
| 390 int rotate, | |
| 391 int links, | |
| 392 int annots, | |
| 393 int show_progress, | |
| 394 mupdf::PdfGraftMap& graft_map | |
| 395 ) | |
| 396 { | |
| 397 int afterpage = apage; | |
| 398 int counter = 0; // copied pages counter | |
| 399 int total = mupdf::ll_fz_absi(epage - spage) + 1; // total pages to copy | |
| 400 | |
| 401 if (spage < epage) | |
| 402 { | |
| 403 for (int page = spage; page <= epage; page++, afterpage++) | |
| 404 { | |
| 405 page_merge(doc_des, doc_src, page, afterpage, rotate, links, annots, graft_map); | |
| 406 counter++; | |
| 407 if (show_progress > 0 && counter % show_progress == 0) | |
| 408 { | |
| 409 messagef("Inserted %i of %i pages.", counter, total); | |
| 410 } | |
| 411 } | |
| 412 } | |
| 413 else | |
| 414 { | |
| 415 for (int page = spage; page >= epage; page--, afterpage++) | |
| 416 { | |
| 417 page_merge(doc_des, doc_src, page, afterpage, rotate, links, annots, graft_map); | |
| 418 counter++; | |
| 419 if (show_progress > 0 && counter % show_progress == 0) | |
| 420 { | |
| 421 messagef("Inserted %i of %i pages.", counter, total); | |
| 422 } | |
| 423 } | |
| 424 } | |
| 425 } | |
| 426 | |
| 427 static bool JM_have_operation(mupdf::PdfDocument& pdf) | |
| 428 { | |
| 429 // Ensure valid journalling state | |
| 430 if (pdf.m_internal->journal and !mupdf::pdf_undoredo_step(pdf, 0)) | |
| 431 { | |
| 432 return 0; | |
| 433 } | |
| 434 return 1; | |
| 435 } | |
| 436 | |
| 437 static void JM_ensure_operation(mupdf::PdfDocument& pdf) | |
| 438 { | |
| 439 if (!JM_have_operation(pdf)) | |
| 440 { | |
| 441 throw std::runtime_error("No journalling operation started"); | |
| 442 } | |
| 443 } | |
| 444 | |
| 445 | |
| 446 static void FzDocument_insert_pdf( | |
| 447 mupdf::FzDocument& doc, | |
| 448 mupdf::FzDocument& src, | |
| 449 int from_page, | |
| 450 int to_page, | |
| 451 int start_at, | |
| 452 int rotate, | |
| 453 int links, | |
| 454 int annots, | |
| 455 int show_progress, | |
| 456 int final, | |
| 457 mupdf::PdfGraftMap& graft_map | |
| 458 ) | |
| 459 { | |
| 460 //std::cerr << __FILE__ << ":" << __LINE__ << ":" << __FUNCTION__ << "\n"; | |
| 461 mupdf::PdfDocument pdfout = mupdf::pdf_specifics(doc); | |
| 462 mupdf::PdfDocument pdfsrc = mupdf::pdf_specifics(src); | |
| 463 int outCount = mupdf::fz_count_pages(doc); | |
| 464 int srcCount = mupdf::fz_count_pages(src); | |
| 465 | |
| 466 // local copies of page numbers | |
| 467 int fp = from_page; | |
| 468 int tp = to_page; | |
| 469 int sa = start_at; | |
| 470 | |
| 471 // normalize page numbers | |
| 472 fp = std::max(fp, 0); // -1 = first page | |
| 473 fp = std::min(fp, srcCount - 1); // but do not exceed last page | |
| 474 | |
| 475 if (tp < 0) tp = srcCount - 1; // -1 = last page | |
| 476 tp = std::min(tp, srcCount - 1); // but do not exceed last page | |
| 477 | |
| 478 if (sa < 0) sa = outCount; // -1 = behind last page | |
| 479 sa = std::min(sa, outCount); // but that is also the limit | |
| 480 | |
| 481 if (!pdfout.m_internal || !pdfsrc.m_internal) | |
| 482 { | |
| 483 throw std::runtime_error("source or target not a PDF"); | |
| 484 } | |
| 485 JM_ensure_operation(pdfout); | |
| 486 JM_merge_range(pdfout, pdfsrc, fp, tp, sa, rotate, links, annots, show_progress, graft_map); | |
| 487 } | |
| 488 | |
| 489 static int page_xref(mupdf::FzDocument& this_doc, int pno) | |
| 490 { | |
| 491 int page_count = mupdf::fz_count_pages(this_doc); | |
| 492 int n = pno; | |
| 493 while (n < 0) | |
| 494 { | |
| 495 n += page_count; | |
| 496 } | |
| 497 mupdf::PdfDocument pdf = mupdf::pdf_specifics(this_doc); | |
| 498 assert(pdf.m_internal); | |
| 499 int xref = 0; | |
| 500 if (n >= page_count) | |
| 501 { | |
| 502 throw std::runtime_error(MSG_BAD_PAGENO);//, PyExc_ValueError); | |
| 503 } | |
| 504 xref = mupdf::pdf_to_num(mupdf::pdf_lookup_page_obj(pdf, n)); | |
| 505 return xref; | |
| 506 } | |
| 507 | |
| 508 static void _newPage(mupdf::PdfDocument& pdf, int pno=-1, float width=595, float height=842) | |
| 509 { | |
| 510 if (!pdf.m_internal) | |
| 511 { | |
| 512 throw std::runtime_error("is no PDF"); | |
| 513 } | |
| 514 mupdf::FzRect mediabox(0, 0, width, height); | |
| 515 if (pno < -1) | |
| 516 { | |
| 517 throw std::runtime_error("bad page number(s)"); // Should somehow be Python ValueError | |
| 518 } | |
| 519 JM_ensure_operation(pdf); | |
| 520 // create /Resources and /Contents objects | |
| 521 mupdf::PdfObj resources = mupdf::pdf_add_new_dict(pdf, 1); | |
| 522 mupdf::FzBuffer contents; | |
| 523 mupdf::PdfObj page_obj = mupdf::pdf_add_page(pdf, mediabox, 0, resources, contents); | |
| 524 mupdf::pdf_insert_page(pdf, pno, page_obj); | |
| 525 } | |
| 526 | |
| 527 static void _newPage(mupdf::FzDocument& self, int pno=-1, float width=595, float height=842) | |
| 528 { | |
| 529 mupdf::PdfDocument pdf = mupdf::pdf_specifics(self); | |
| 530 _newPage(pdf, pno, width, height); | |
| 531 } | |
| 532 | |
| 533 | |
| 534 //------------------------------------------------------------------------ | |
| 535 // return the annotation names (list of /NM entries) | |
| 536 //------------------------------------------------------------------------ | |
| 537 static std::vector< std::string> JM_get_annot_id_list(mupdf::PdfPage& page) | |
| 538 { | |
| 539 std::vector< std::string> names; | |
| 540 mupdf::PdfObj annots = mupdf::pdf_dict_get(page.obj(), PDF_NAME2(Annots)); | |
| 541 if (!annots.m_internal) return names; | |
| 542 int n = mupdf::pdf_array_len(annots); | |
| 543 for (int i = 0; i < n; i++) | |
| 544 { | |
| 545 mupdf::PdfObj annot_obj = mupdf::pdf_array_get(annots, i); | |
| 546 mupdf::PdfObj name = mupdf::pdf_dict_gets(annot_obj, "NM"); | |
| 547 if (name.m_internal) | |
| 548 { | |
| 549 names.push_back(mupdf::pdf_to_text_string(name)); | |
| 550 } | |
| 551 } | |
| 552 return names; | |
| 553 } | |
| 554 | |
| 555 | |
| 556 //------------------------------------------------------------------------ | |
| 557 // Add a unique /NM key to an annotation or widget. | |
| 558 // Append a number to 'stem' such that the result is a unique name. | |
| 559 //------------------------------------------------------------------------ | |
| 560 static void JM_add_annot_id(mupdf::PdfAnnot& annot, const char* stem) | |
| 561 { | |
| 562 mupdf::PdfPage page = mupdf::pdf_annot_page(annot); | |
| 563 mupdf::PdfObj annot_obj = mupdf::pdf_annot_obj(annot); | |
| 564 std::vector< std::string> names = JM_get_annot_id_list(page); | |
| 565 char* stem_id = nullptr; | |
| 566 for (int i=0; ; ++i) | |
| 567 { | |
| 568 free(stem_id); | |
| 569 asprintf(&stem_id, "fitz-%s%d", stem, i); | |
| 570 if (std::find(names.begin(), names.end(), stem_id) == names.end()) | |
| 571 { | |
| 572 break; | |
| 573 } | |
| 574 } | |
| 575 mupdf::PdfObj name = mupdf::pdf_new_string(stem_id, strlen(stem_id)); | |
| 576 free(stem_id); | |
| 577 mupdf::pdf_dict_puts(annot_obj, "NM", name); | |
| 578 page.m_internal->doc->resynth_required = 0; | |
| 579 } | |
| 580 | |
| 581 //---------------------------------------------------------------- | |
| 582 // page add_caret_annot | |
| 583 //---------------------------------------------------------------- | |
| 584 static mupdf::PdfAnnot _add_caret_annot(mupdf::PdfPage& page, mupdf::FzPoint& point) | |
| 585 { | |
| 586 mupdf::PdfAnnot annot = mupdf::pdf_create_annot(page, ::PDF_ANNOT_CARET); | |
| 587 mupdf::FzPoint p = point; | |
| 588 mupdf::FzRect r = mupdf::pdf_annot_rect(annot); | |
| 589 r = mupdf::fz_make_rect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0); | |
| 590 mupdf::pdf_set_annot_rect(annot, r); | |
| 591 mupdf::pdf_update_annot(annot); | |
| 592 JM_add_annot_id(annot, "A"); | |
| 593 return annot; | |
| 594 } | |
| 595 | |
| 596 static mupdf::PdfAnnot _add_caret_annot(mupdf::FzPage& page, mupdf::FzPoint& point) | |
| 597 { | |
| 598 mupdf::PdfPage pdf_page = mupdf::pdf_page_from_fz_page(page); | |
| 599 return _add_caret_annot(pdf_page, point); | |
| 600 } | |
| 601 | |
| 602 static const char* Tools_parse_da(mupdf::PdfAnnot& this_annot) | |
| 603 { | |
| 604 const char* da_str = nullptr; | |
| 605 mupdf::PdfObj this_annot_obj = mupdf::pdf_annot_obj(this_annot); | |
| 606 mupdf::PdfDocument pdf = mupdf::pdf_get_bound_document(this_annot_obj); | |
| 607 try | |
| 608 { | |
| 609 mupdf::PdfObj da = mupdf::pdf_dict_get_inheritable(this_annot_obj, PDF_NAME2(DA)); | |
| 610 if (!da.m_internal) | |
| 611 { | |
| 612 mupdf::PdfObj trailer = mupdf::pdf_trailer(pdf); | |
| 613 da = mupdf::pdf_dict_getl( | |
| 614 &trailer, | |
| 615 PDF_NAME(Root), | |
| 616 PDF_NAME(AcroForm), | |
| 617 PDF_NAME(DA), | |
| 618 nullptr | |
| 619 ); | |
| 620 } | |
| 621 da_str = mupdf::pdf_to_text_string(da); | |
| 622 } | |
| 623 catch (std::exception&) | |
| 624 { | |
| 625 return nullptr; | |
| 626 } | |
| 627 return da_str; | |
| 628 } | |
| 629 | |
| 630 //---------------------------------------------------------------------------- | |
| 631 // Turn fz_buffer into a Python bytes object | |
| 632 //---------------------------------------------------------------------------- | |
| 633 static PyObject* JM_BinFromBuffer(fz_buffer* buffer) | |
| 634 { | |
| 635 if (!buffer) | |
| 636 { | |
| 637 return PyBytes_FromStringAndSize("", 0); | |
| 638 } | |
| 639 unsigned char* c = nullptr; | |
| 640 size_t len = mupdf::ll_fz_buffer_storage(buffer, &c); | |
| 641 return PyBytes_FromStringAndSize((const char*) c, len); | |
| 642 } | |
| 643 static PyObject* JM_BinFromBuffer(mupdf::FzBuffer& buffer) | |
| 644 { | |
| 645 return JM_BinFromBuffer( buffer.m_internal); | |
| 646 } | |
| 647 | |
| 648 static PyObject* Annot_getAP(mupdf::PdfAnnot& annot) | |
| 649 { | |
| 650 mupdf::PdfObj annot_obj = mupdf::pdf_annot_obj(annot); | |
| 651 mupdf::PdfObj ap = mupdf::pdf_dict_getl( | |
| 652 &annot_obj, | |
| 653 PDF_NAME(AP), | |
| 654 PDF_NAME(N), | |
| 655 nullptr | |
| 656 ); | |
| 657 if (mupdf::pdf_is_stream(ap)) | |
| 658 { | |
| 659 mupdf::FzBuffer res = mupdf::pdf_load_stream(ap); | |
| 660 return JM_BinFromBuffer(res); | |
| 661 } | |
| 662 return PyBytes_FromStringAndSize("", 0); | |
| 663 } | |
| 664 | |
| 665 void Tools_update_da(mupdf::PdfAnnot& this_annot, const char* da_str) | |
| 666 { | |
| 667 mupdf::PdfObj this_annot_obj = mupdf::pdf_annot_obj(this_annot); | |
| 668 mupdf::pdf_dict_put_text_string(this_annot_obj, PDF_NAME2(DA), da_str); | |
| 669 mupdf::pdf_dict_del(this_annot_obj, PDF_NAME2(DS)); /* not supported */ | |
| 670 mupdf::pdf_dict_del(this_annot_obj, PDF_NAME2(RC)); /* not supported */ | |
| 671 } | |
| 672 | |
| 673 static int | |
| 674 jm_float_item(PyObject* obj, Py_ssize_t idx, double* result) | |
| 675 { | |
| 676 PyObject* temp = PySequence_ITEM(obj, idx); | |
| 677 if (!temp) return 1; | |
| 678 *result = PyFloat_AsDouble(temp); | |
| 679 Py_DECREF(temp); | |
| 680 if (PyErr_Occurred()) | |
| 681 { | |
| 682 PyErr_Clear(); | |
| 683 return 1; | |
| 684 } | |
| 685 return 0; | |
| 686 } | |
| 687 | |
| 688 | |
| 689 static mupdf::FzPoint JM_point_from_py(PyObject* p) | |
| 690 { | |
| 691 fz_point p0 = fz_make_point(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT); | |
| 692 if (!p || !PySequence_Check(p) || PySequence_Size(p) != 2) | |
| 693 { | |
| 694 return p0; | |
| 695 } | |
| 696 double x; | |
| 697 double y; | |
| 698 if (jm_float_item(p, 0, &x) == 1) return p0; | |
| 699 if (jm_float_item(p, 1, &y) == 1) return p0; | |
| 700 if (x < FZ_MIN_INF_RECT) x = FZ_MIN_INF_RECT; | |
| 701 if (y < FZ_MIN_INF_RECT) y = FZ_MIN_INF_RECT; | |
| 702 if (x > FZ_MAX_INF_RECT) x = FZ_MAX_INF_RECT; | |
| 703 if (y > FZ_MAX_INF_RECT) y = FZ_MAX_INF_RECT; | |
| 704 | |
| 705 return fz_make_point(x, y); | |
| 706 } | |
| 707 | |
| 708 static int s_list_append_drop(PyObject* list, PyObject* item) | |
| 709 { | |
| 710 if (!list || !PyList_Check(list) || !item) | |
| 711 { | |
| 712 return -2; | |
| 713 } | |
| 714 int rc = PyList_Append(list, item); | |
| 715 Py_DECREF(item); | |
| 716 return rc; | |
| 717 } | |
| 718 | |
| 719 static int LIST_APPEND_DROP(PyObject *list, PyObject *item) | |
| 720 { | |
| 721 if (!list || !PyList_Check(list) || !item) return -2; | |
| 722 int rc = PyList_Append(list, item); | |
| 723 Py_DECREF(item); | |
| 724 return rc; | |
| 725 } | |
| 726 | |
| 727 static int LIST_APPEND(PyObject *list, PyObject *item) | |
| 728 { | |
| 729 if (!list || !PyList_Check(list) || !item) return -2; | |
| 730 int rc = PyList_Append(list, item); | |
| 731 return rc; | |
| 732 } | |
| 733 | |
| 734 static int DICT_SETITEM_DROP(PyObject *dict, PyObject *key, PyObject *value) | |
| 735 { | |
| 736 if (!dict || !PyDict_Check(dict) || !key || !value) return -2; | |
| 737 int rc = PyDict_SetItem(dict, key, value); | |
| 738 Py_DECREF(value); | |
| 739 return rc; | |
| 740 } | |
| 741 | |
| 742 static int DICT_SETITEMSTR_DROP(PyObject *dict, const char *key, PyObject *value) | |
| 743 { | |
| 744 if (!dict || !PyDict_Check(dict) || !key || !value) return -2; | |
| 745 int rc = PyDict_SetItemString(dict, key, value); | |
| 746 Py_DECREF(value); | |
| 747 return rc; | |
| 748 } | |
| 749 | |
| 750 | |
| 751 //----------------------------------------------------------------------------- | |
| 752 // Functions converting between PySequences and pymupdf geometry objects | |
| 753 //----------------------------------------------------------------------------- | |
| 754 static int | |
| 755 jm_init_item(PyObject* obj, Py_ssize_t idx, int* result) | |
| 756 { | |
| 757 PyObject* temp = PySequence_ITEM(obj, idx); | |
| 758 if (!temp) | |
| 759 { | |
| 760 return 1; | |
| 761 } | |
| 762 if (PyLong_Check(temp)) | |
| 763 { | |
| 764 *result = (int) PyLong_AsLong(temp); | |
| 765 Py_DECREF(temp); | |
| 766 } | |
| 767 else if (PyFloat_Check(temp)) | |
| 768 { | |
| 769 *result = (int) PyFloat_AsDouble(temp); | |
| 770 Py_DECREF(temp); | |
| 771 } | |
| 772 else | |
| 773 { | |
| 774 Py_DECREF(temp); | |
| 775 return 1; | |
| 776 } | |
| 777 if (PyErr_Occurred()) | |
| 778 { | |
| 779 PyErr_Clear(); | |
| 780 return 1; | |
| 781 } | |
| 782 return 0; | |
| 783 } | |
| 784 | |
| 785 // TODO: ------------------------------------------------------------------ | |
| 786 // This is a temporary solution and should be replaced by a C++ extension: | |
| 787 // There is no way in Python specify an array of fz_point - as is required | |
| 788 // for function pdf_set_annot_callout_line(). | |
| 789 static void JM_set_annot_callout_line(mupdf::PdfAnnot& annot, PyObject *callout, int count) | |
| 790 { | |
| 791 fz_point points[3]; | |
| 792 mupdf::FzPoint p; | |
| 793 for (int i = 0; i < count; i++) | |
| 794 { | |
| 795 p = JM_point_from_py(PyTuple_GetItem(callout, (Py_ssize_t) i)); | |
| 796 points[i] = fz_make_point(p.x, p.y); | |
| 797 } | |
| 798 mupdf::pdf_set_annot_callout_line(annot, points, count); | |
| 799 } | |
| 800 | |
| 801 | |
| 802 //---------------------------------------------------------------------------- | |
| 803 // Return list of outline xref numbers. Recursive function. Arguments: | |
| 804 // 'obj' first OL item | |
| 805 // 'xrefs' empty Python list | |
| 806 //---------------------------------------------------------------------------- | |
| 807 static PyObject* JM_outline_xrefs(mupdf::PdfObj obj, PyObject* xrefs) | |
| 808 { | |
| 809 if (!obj.m_internal) | |
| 810 { | |
| 811 return xrefs; | |
| 812 } | |
| 813 PyObject* newxref = nullptr; | |
| 814 mupdf::PdfObj thisobj = obj; | |
| 815 while (thisobj.m_internal) | |
| 816 { | |
| 817 int nxr = mupdf::pdf_to_num(thisobj); | |
| 818 newxref = PyLong_FromLong((long) nxr); | |
| 819 if (PySequence_Contains(xrefs, newxref) | |
| 820 or mupdf::pdf_dict_get(thisobj, PDF_NAME2(Type)).m_internal | |
| 821 ) | |
| 822 { | |
| 823 // circular ref or top of chain: terminate | |
| 824 Py_DECREF(newxref); | |
| 825 break; | |
| 826 } | |
| 827 s_list_append_drop(xrefs, newxref); | |
| 828 mupdf::PdfObj first = mupdf::pdf_dict_get(thisobj, PDF_NAME2(First)); // try go down | |
| 829 if (mupdf::pdf_is_dict(first)) | |
| 830 { | |
| 831 xrefs = JM_outline_xrefs(first, xrefs); | |
| 832 } | |
| 833 thisobj = mupdf::pdf_dict_get(thisobj, PDF_NAME2(Next)); // try go next | |
| 834 mupdf::PdfObj parent = mupdf::pdf_dict_get(thisobj, PDF_NAME2(Parent)); // get parent | |
| 835 if (!mupdf::pdf_is_dict(thisobj)) | |
| 836 { | |
| 837 thisobj = parent; | |
| 838 } | |
| 839 } | |
| 840 return xrefs; | |
| 841 } | |
| 842 | |
| 843 | |
| 844 PyObject* dictkey_align = NULL; | |
| 845 PyObject* dictkey_ascender = NULL; | |
| 846 PyObject* dictkey_bidi = NULL; | |
| 847 PyObject* dictkey_bbox = NULL; | |
| 848 PyObject* dictkey_blocks = NULL; | |
| 849 PyObject* dictkey_bpc = NULL; | |
| 850 PyObject* dictkey_c = NULL; | |
| 851 PyObject* dictkey_chars = NULL; | |
| 852 PyObject* dictkey_color = NULL; | |
| 853 PyObject* dictkey_colorspace = NULL; | |
| 854 PyObject* dictkey_content = NULL; | |
| 855 PyObject* dictkey_creationDate = NULL; | |
| 856 PyObject* dictkey_cs_name = NULL; | |
| 857 PyObject* dictkey_da = NULL; | |
| 858 PyObject* dictkey_dashes = NULL; | |
| 859 PyObject* dictkey_desc = NULL; | |
| 860 PyObject* dictkey_descender = NULL; | |
| 861 PyObject* dictkey_dir = NULL; | |
| 862 PyObject* dictkey_effect = NULL; | |
| 863 PyObject* dictkey_ext = NULL; | |
| 864 PyObject* dictkey_filename = NULL; | |
| 865 PyObject* dictkey_fill = NULL; | |
| 866 PyObject* dictkey_flags = NULL; | |
| 867 PyObject* dictkey_char_bidi = NULL; | |
| 868 PyObject* dictkey_char_flags = NULL; | |
| 869 PyObject* dictkey_font = NULL; | |
| 870 PyObject* dictkey_glyph = NULL; | |
| 871 PyObject* dictkey_height = NULL; | |
| 872 PyObject* dictkey_id = NULL; | |
| 873 PyObject* dictkey_image = NULL; | |
| 874 PyObject* dictkey_items = NULL; | |
| 875 PyObject* dictkey_length = NULL; | |
| 876 PyObject* dictkey_lines = NULL; | |
| 877 PyObject* dictkey_matrix = NULL; | |
| 878 PyObject* dictkey_modDate = NULL; | |
| 879 PyObject* dictkey_name = NULL; | |
| 880 PyObject* dictkey_number = NULL; | |
| 881 PyObject* dictkey_origin = NULL; | |
| 882 PyObject* dictkey_rect = NULL; | |
| 883 PyObject* dictkey_size = NULL; | |
| 884 PyObject* dictkey_smask = NULL; | |
| 885 PyObject* dictkey_spans = NULL; | |
| 886 PyObject* dictkey_stroke = NULL; | |
| 887 PyObject* dictkey_style = NULL; | |
| 888 PyObject* dictkey_subject = NULL; | |
| 889 PyObject* dictkey_text = NULL; | |
| 890 PyObject* dictkey_title = NULL; | |
| 891 PyObject* dictkey_type = NULL; | |
| 892 PyObject* dictkey_ufilename = NULL; | |
| 893 PyObject* dictkey_width = NULL; | |
| 894 PyObject* dictkey_wmode = NULL; | |
| 895 PyObject* dictkey_xref = NULL; | |
| 896 PyObject* dictkey_xres = NULL; | |
| 897 PyObject* dictkey_yres = NULL; | |
| 898 | |
| 899 static int dict_setitem_drop(PyObject* dict, PyObject* key, PyObject* value) | |
| 900 { | |
| 901 if (!dict || !PyDict_Check(dict) || !key || !value) | |
| 902 { | |
| 903 return -2; | |
| 904 } | |
| 905 int rc = PyDict_SetItem(dict, key, value); | |
| 906 Py_DECREF(value); | |
| 907 return rc; | |
| 908 } | |
| 909 | |
| 910 static int dict_setitemstr_drop(PyObject* dict, const char* key, PyObject* value) | |
| 911 { | |
| 912 if (!dict || !PyDict_Check(dict) || !key || !value) | |
| 913 { | |
| 914 return -2; | |
| 915 } | |
| 916 int rc = PyDict_SetItemString(dict, key, value); | |
| 917 Py_DECREF(value); | |
| 918 return rc; | |
| 919 } | |
| 920 | |
| 921 | |
| 922 static void Document_extend_toc_items(mupdf::PdfDocument& pdf, PyObject* items) | |
| 923 { | |
| 924 PyObject* item=nullptr; | |
| 925 PyObject* itemdict=nullptr; | |
| 926 PyObject* xrefs=nullptr; | |
| 927 | |
| 928 PyObject* bold = PyUnicode_FromString("bold"); | |
| 929 PyObject* italic = PyUnicode_FromString("italic"); | |
| 930 PyObject* collapse = PyUnicode_FromString("collapse"); | |
| 931 PyObject* zoom = PyUnicode_FromString("zoom"); | |
| 932 | |
| 933 try | |
| 934 { | |
| 935 /* Need to define these things early because later code uses | |
| 936 `goto`; otherwise we get compiler warnings 'jump bypasses variable | |
| 937 initialization' */ | |
| 938 int xref = 0; | |
| 939 mupdf::PdfObj root; | |
| 940 mupdf::PdfObj olroot; | |
| 941 mupdf::PdfObj first; | |
| 942 Py_ssize_t n; | |
| 943 Py_ssize_t m; | |
| 944 | |
| 945 root = mupdf::pdf_dict_get(mupdf::pdf_trailer(pdf), PDF_NAME2(Root)); | |
| 946 if (!root.m_internal) goto end; | |
| 947 | |
| 948 olroot = mupdf::pdf_dict_get(root, PDF_NAME2(Outlines)); | |
| 949 if (!olroot.m_internal) goto end; | |
| 950 | |
| 951 first = mupdf::pdf_dict_get(olroot, PDF_NAME2(First)); | |
| 952 if (!first.m_internal) goto end; | |
| 953 | |
| 954 xrefs = PyList_New(0); // pre-allocate an empty list | |
| 955 xrefs = JM_outline_xrefs(first, xrefs); | |
| 956 n = PySequence_Size(xrefs); | |
| 957 m = PySequence_Size(items); | |
| 958 if (!n) goto end; | |
| 959 | |
| 960 if (n != m) | |
| 961 { | |
| 962 throw std::runtime_error("internal error finding outline xrefs"); | |
| 963 } | |
| 964 | |
| 965 // update all TOC item dictionaries | |
| 966 for (int i = 0; i < n; i++) | |
| 967 { | |
| 968 jm_init_item(xrefs, i, &xref); | |
| 969 item = PySequence_ITEM(items, i); | |
| 970 itemdict = PySequence_ITEM(item, 3); | |
| 971 if (!itemdict || !PyDict_Check(itemdict)) | |
| 972 { | |
| 973 throw std::runtime_error("need non-simple TOC format"); | |
| 974 } | |
| 975 PyDict_SetItem(itemdict, dictkey_xref, PySequence_ITEM(xrefs, i)); | |
| 976 mupdf::PdfObj bm = mupdf::pdf_load_object(pdf, xref); | |
| 977 int flags = mupdf::pdf_to_int(mupdf::pdf_dict_get(bm, PDF_NAME2(F))); | |
| 978 if (flags == 1) | |
| 979 { | |
| 980 PyDict_SetItem(itemdict, italic, Py_True); | |
| 981 } | |
| 982 else if (flags == 2) | |
| 983 { | |
| 984 PyDict_SetItem(itemdict, bold, Py_True); | |
| 985 } | |
| 986 else if (flags == 3) | |
| 987 { | |
| 988 PyDict_SetItem(itemdict, italic, Py_True); | |
| 989 PyDict_SetItem(itemdict, bold, Py_True); | |
| 990 } | |
| 991 int count = mupdf::pdf_to_int(mupdf::pdf_dict_get(bm, PDF_NAME2(Count))); | |
| 992 if (count < 0) | |
| 993 { | |
| 994 PyDict_SetItem(itemdict, collapse, Py_True); | |
| 995 } | |
| 996 else if (count > 0) | |
| 997 { | |
| 998 PyDict_SetItem(itemdict, collapse, Py_False); | |
| 999 } | |
| 1000 mupdf::PdfObj col = mupdf::pdf_dict_get(bm, PDF_NAME2(C)); | |
| 1001 if (mupdf::pdf_is_array(col) && mupdf::pdf_array_len(col) == 3) | |
| 1002 { | |
| 1003 PyObject* color = PyTuple_New(3); | |
| 1004 PyTuple_SET_ITEM(color, 0, Py_BuildValue("f", mupdf::pdf_to_real(mupdf::pdf_array_get(col, 0)))); | |
| 1005 PyTuple_SET_ITEM(color, 1, Py_BuildValue("f", mupdf::pdf_to_real(mupdf::pdf_array_get(col, 1)))); | |
| 1006 PyTuple_SET_ITEM(color, 2, Py_BuildValue("f", mupdf::pdf_to_real(mupdf::pdf_array_get(col, 2)))); | |
| 1007 dict_setitem_drop(itemdict, dictkey_color, color); | |
| 1008 } | |
| 1009 float z=0; | |
| 1010 mupdf::PdfObj obj = mupdf::pdf_dict_get(bm, PDF_NAME2(Dest)); | |
| 1011 if (!obj.m_internal || !mupdf::pdf_is_array(obj)) | |
| 1012 { | |
| 1013 obj = mupdf::pdf_dict_getl(&bm, PDF_NAME(A), PDF_NAME(D), nullptr); | |
| 1014 } | |
| 1015 if (mupdf::pdf_is_array(obj) && mupdf::pdf_array_len(obj) == 5) | |
| 1016 { | |
| 1017 z = mupdf::pdf_to_real(mupdf::pdf_array_get(obj, 4)); | |
| 1018 } | |
| 1019 dict_setitem_drop(itemdict, zoom, Py_BuildValue("f", z)); | |
| 1020 PyList_SetItem(item, 3, itemdict); | |
| 1021 PyList_SetItem(items, i, item); | |
| 1022 } | |
| 1023 end:; | |
| 1024 } | |
| 1025 catch (std::exception&) | |
| 1026 { | |
| 1027 } | |
| 1028 Py_CLEAR(xrefs); | |
| 1029 Py_CLEAR(bold); | |
| 1030 Py_CLEAR(italic); | |
| 1031 Py_CLEAR(collapse); | |
| 1032 Py_CLEAR(zoom); | |
| 1033 } | |
| 1034 | |
| 1035 static void Document_extend_toc_items(mupdf::FzDocument& document, PyObject* items) | |
| 1036 { | |
| 1037 mupdf::PdfDocument pdf = mupdf::pdf_document_from_fz_document(document); | |
| 1038 return Document_extend_toc_items(pdf, items); | |
| 1039 } | |
| 1040 | |
| 1041 //----------------------------------------------------------------------------- | |
| 1042 // PySequence from fz_rect | |
| 1043 //----------------------------------------------------------------------------- | |
| 1044 static PyObject* JM_py_from_rect(fz_rect r) | |
| 1045 { | |
| 1046 return Py_BuildValue("ffff", r.x0, r.y0, r.x1, r.y1); | |
| 1047 } | |
| 1048 static PyObject* JM_py_from_rect(mupdf::FzRect r) | |
| 1049 { | |
| 1050 return JM_py_from_rect(*r.internal()); | |
| 1051 } | |
| 1052 | |
| 1053 //----------------------------------------------------------------------------- | |
| 1054 // PySequence from fz_point | |
| 1055 //----------------------------------------------------------------------------- | |
| 1056 static PyObject* JM_py_from_point(fz_point p) | |
| 1057 { | |
| 1058 return Py_BuildValue("ff", p.x, p.y); | |
| 1059 } | |
| 1060 | |
| 1061 //----------------------------------------------------------------------------- | |
| 1062 // PySequence from fz_quad. | |
| 1063 //----------------------------------------------------------------------------- | |
| 1064 static PyObject * | |
| 1065 JM_py_from_quad(fz_quad q) | |
| 1066 { | |
| 1067 return Py_BuildValue("((f,f),(f,f),(f,f),(f,f))", | |
| 1068 q.ul.x, q.ul.y, q.ur.x, q.ur.y, | |
| 1069 q.ll.x, q.ll.y, q.lr.x, q.lr.y); | |
| 1070 } | |
| 1071 | |
| 1072 //---------------------------------------------------------------- | |
| 1073 // annotation rectangle | |
| 1074 //---------------------------------------------------------------- | |
| 1075 static mupdf::FzRect Annot_rect(mupdf::PdfAnnot& annot) | |
| 1076 { | |
| 1077 mupdf::FzRect rect = mupdf::pdf_bound_annot(annot); | |
| 1078 return rect; | |
| 1079 } | |
| 1080 | |
| 1081 static PyObject* Annot_rect3(mupdf::PdfAnnot& annot) | |
| 1082 { | |
| 1083 fz_rect rect = mupdf::ll_pdf_bound_annot(annot.m_internal); | |
| 1084 return JM_py_from_rect(rect); | |
| 1085 } | |
| 1086 | |
| 1087 //----------------------------------------------------------------------------- | |
| 1088 // PySequence to fz_rect. Default: infinite rect | |
| 1089 //----------------------------------------------------------------------------- | |
| 1090 static fz_rect JM_rect_from_py(PyObject* r) | |
| 1091 { | |
| 1092 if (!r || !PySequence_Check(r) || PySequence_Size(r) != 4) | |
| 1093 { | |
| 1094 return *mupdf::FzRect(mupdf::FzRect::Fixed_INFINITE).internal();// fz_infinite_rect; | |
| 1095 } | |
| 1096 double f[4]; | |
| 1097 for (int i = 0; i < 4; i++) | |
| 1098 { | |
| 1099 if (jm_float_item(r, i, &f[i]) == 1) | |
| 1100 { | |
| 1101 return *mupdf::FzRect(mupdf::FzRect::Fixed_INFINITE).internal(); | |
| 1102 } | |
| 1103 if (f[i] < FZ_MIN_INF_RECT) f[i] = FZ_MIN_INF_RECT; | |
| 1104 if (f[i] > FZ_MAX_INF_RECT) f[i] = FZ_MAX_INF_RECT; | |
| 1105 } | |
| 1106 return mupdf::ll_fz_make_rect( | |
| 1107 (float) f[0], | |
| 1108 (float) f[1], | |
| 1109 (float) f[2], | |
| 1110 (float) f[3] | |
| 1111 ); | |
| 1112 } | |
| 1113 | |
| 1114 //----------------------------------------------------------------------------- | |
| 1115 // PySequence to fz_matrix. Default: fz_identity | |
| 1116 //----------------------------------------------------------------------------- | |
| 1117 static fz_matrix JM_matrix_from_py(PyObject* m) | |
| 1118 { | |
| 1119 double a[6]; | |
| 1120 | |
| 1121 if (!m || !PySequence_Check(m) || PySequence_Size(m) != 6) | |
| 1122 { | |
| 1123 return fz_identity; | |
| 1124 } | |
| 1125 for (int i = 0; i < 6; i++) | |
| 1126 { | |
| 1127 if (jm_float_item(m, i, &a[i]) == 1) | |
| 1128 { | |
| 1129 return *mupdf::FzMatrix().internal(); | |
| 1130 } | |
| 1131 } | |
| 1132 return mupdf::ll_fz_make_matrix( | |
| 1133 (float) a[0], | |
| 1134 (float) a[1], | |
| 1135 (float) a[2], | |
| 1136 (float) a[3], | |
| 1137 (float) a[4], | |
| 1138 (float) a[5] | |
| 1139 ); | |
| 1140 } | |
| 1141 | |
| 1142 PyObject* util_transform_rect(PyObject* rect, PyObject* matrix) | |
| 1143 { | |
| 1144 return JM_py_from_rect( | |
| 1145 mupdf::ll_fz_transform_rect( | |
| 1146 JM_rect_from_py(rect), | |
| 1147 JM_matrix_from_py(matrix) | |
| 1148 ) | |
| 1149 ); | |
| 1150 } | |
| 1151 | |
| 1152 //---------------------------------------------------------------------------- | |
| 1153 // return normalized /Rotate value:one of 0, 90, 180, 270 | |
| 1154 //---------------------------------------------------------------------------- | |
| 1155 static int JM_norm_rotation(int rotate) | |
| 1156 { | |
| 1157 while (rotate < 0) rotate += 360; | |
| 1158 while (rotate >= 360) rotate -= 360; | |
| 1159 if (rotate % 90 != 0) return 0; | |
| 1160 return rotate; | |
| 1161 } | |
| 1162 | |
| 1163 | |
| 1164 //---------------------------------------------------------------------------- | |
| 1165 // return a PDF page's /Rotate value: one of (0, 90, 180, 270) | |
| 1166 //---------------------------------------------------------------------------- | |
| 1167 static int JM_page_rotation(mupdf::PdfPage& page) | |
| 1168 { | |
| 1169 int rotate = 0; | |
| 1170 rotate = mupdf::pdf_to_int( | |
| 1171 mupdf::pdf_dict_get_inheritable(page.obj(), PDF_NAME2(Rotate)) | |
| 1172 ); | |
| 1173 rotate = JM_norm_rotation(rotate); | |
| 1174 return rotate; | |
| 1175 } | |
| 1176 | |
| 1177 | |
| 1178 //---------------------------------------------------------------------------- | |
| 1179 // return a PDF page's MediaBox | |
| 1180 //---------------------------------------------------------------------------- | |
| 1181 static mupdf::FzRect JM_mediabox(mupdf::PdfObj& page_obj) | |
| 1182 { | |
| 1183 mupdf::FzRect mediabox = mupdf::pdf_to_rect( | |
| 1184 mupdf::pdf_dict_get_inheritable(page_obj, PDF_NAME2(MediaBox)) | |
| 1185 ); | |
| 1186 if (mupdf::fz_is_empty_rect(mediabox) || mupdf::fz_is_infinite_rect(mediabox)) | |
| 1187 { | |
| 1188 mediabox.x0 = 0; | |
| 1189 mediabox.y0 = 0; | |
| 1190 mediabox.x1 = 612; | |
| 1191 mediabox.y1 = 792; | |
| 1192 } | |
| 1193 mupdf::FzRect page_mediabox; | |
| 1194 page_mediabox.x0 = mupdf::fz_min(mediabox.x0, mediabox.x1); | |
| 1195 page_mediabox.y0 = mupdf::fz_min(mediabox.y0, mediabox.y1); | |
| 1196 page_mediabox.x1 = mupdf::fz_max(mediabox.x0, mediabox.x1); | |
| 1197 page_mediabox.y1 = mupdf::fz_max(mediabox.y0, mediabox.y1); | |
| 1198 if (0 | |
| 1199 || page_mediabox.x1 - page_mediabox.x0 < 1 | |
| 1200 || page_mediabox.y1 - page_mediabox.y0 < 1 | |
| 1201 ) | |
| 1202 { | |
| 1203 page_mediabox = *mupdf::FzRect(mupdf::FzRect::Fixed_UNIT).internal(); //fz_unit_rect; | |
| 1204 } | |
| 1205 return page_mediabox; | |
| 1206 } | |
| 1207 | |
| 1208 | |
| 1209 //---------------------------------------------------------------------------- | |
| 1210 // return a PDF page's CropBox | |
| 1211 //---------------------------------------------------------------------------- | |
| 1212 mupdf::FzRect JM_cropbox(mupdf::PdfObj& page_obj) | |
| 1213 { | |
| 1214 mupdf::FzRect mediabox = JM_mediabox(page_obj); | |
| 1215 mupdf::FzRect cropbox = mupdf::pdf_to_rect( | |
| 1216 mupdf::pdf_dict_get_inheritable(page_obj, PDF_NAME2(CropBox)) | |
| 1217 ); | |
| 1218 if (mupdf::fz_is_infinite_rect(cropbox) || mupdf::fz_is_empty_rect(cropbox)) | |
| 1219 { | |
| 1220 cropbox = mediabox; | |
| 1221 } | |
| 1222 float y0 = mediabox.y1 - cropbox.y1; | |
| 1223 float y1 = mediabox.y1 - cropbox.y0; | |
| 1224 cropbox.y0 = y0; | |
| 1225 cropbox.y1 = y1; | |
| 1226 return cropbox; | |
| 1227 } | |
| 1228 | |
| 1229 | |
| 1230 //---------------------------------------------------------------------------- | |
| 1231 // calculate width and height of the UNROTATED page | |
| 1232 //---------------------------------------------------------------------------- | |
| 1233 static mupdf::FzPoint JM_cropbox_size(mupdf::PdfObj& page_obj) | |
| 1234 { | |
| 1235 mupdf::FzPoint size; | |
| 1236 mupdf::FzRect rect = JM_cropbox(page_obj); | |
| 1237 float w = (rect.x0 < rect.x1) ? rect.x1 - rect.x0 : rect.x0 - rect.x1; | |
| 1238 float h = (rect.y0 < rect.y1) ? rect.y1 - rect.y0 : rect.y0 - rect.y1; | |
| 1239 size = fz_make_point(w, h); | |
| 1240 return size; | |
| 1241 } | |
| 1242 | |
| 1243 | |
| 1244 //---------------------------------------------------------------------------- | |
| 1245 // calculate page rotation matrices | |
| 1246 //---------------------------------------------------------------------------- | |
| 1247 static mupdf::FzMatrix JM_rotate_page_matrix(mupdf::PdfPage& page) | |
| 1248 { | |
| 1249 if (!page.m_internal) | |
| 1250 { | |
| 1251 return *mupdf::FzMatrix().internal(); // no valid pdf page given | |
| 1252 } | |
| 1253 int rotation = JM_page_rotation(page); | |
| 1254 if (rotation == 0) | |
| 1255 { | |
| 1256 return *mupdf::FzMatrix().internal(); // no rotation | |
| 1257 } | |
| 1258 auto po = page.obj(); | |
| 1259 mupdf::FzPoint cb_size = JM_cropbox_size(po); | |
| 1260 float w = cb_size.x; | |
| 1261 float h = cb_size.y; | |
| 1262 mupdf::FzMatrix m; | |
| 1263 if (rotation == 90) | |
| 1264 { | |
| 1265 m = mupdf::fz_make_matrix(0, 1, -1, 0, h, 0); | |
| 1266 } | |
| 1267 else if (rotation == 180) | |
| 1268 { | |
| 1269 m = mupdf::fz_make_matrix(-1, 0, 0, -1, w, h); | |
| 1270 } | |
| 1271 else | |
| 1272 { | |
| 1273 m = mupdf::fz_make_matrix(0, -1, 1, 0, 0, w); | |
| 1274 } | |
| 1275 return m; | |
| 1276 } | |
| 1277 | |
| 1278 | |
| 1279 static mupdf::FzMatrix JM_derotate_page_matrix(mupdf::PdfPage& page) | |
| 1280 { // just the inverse of rotation | |
| 1281 return mupdf::fz_invert_matrix(JM_rotate_page_matrix(page)); | |
| 1282 } | |
| 1283 | |
| 1284 //----------------------------------------------------------------------------- | |
| 1285 // PySequence from fz_matrix | |
| 1286 //----------------------------------------------------------------------------- | |
| 1287 static PyObject* JM_py_from_matrix(mupdf::FzMatrix m) | |
| 1288 { | |
| 1289 return Py_BuildValue("ffffff", m.a, m.b, m.c, m.d, m.e, m.f); | |
| 1290 } | |
| 1291 | |
| 1292 static mupdf::FzMatrix Page_derotate_matrix(mupdf::PdfPage& pdfpage) | |
| 1293 { | |
| 1294 if (!pdfpage.m_internal) | |
| 1295 { | |
| 1296 return mupdf::FzMatrix(); | |
| 1297 } | |
| 1298 return JM_derotate_page_matrix(pdfpage); | |
| 1299 } | |
| 1300 | |
| 1301 static mupdf::FzMatrix Page_derotate_matrix(mupdf::FzPage& page) | |
| 1302 { | |
| 1303 mupdf::PdfPage pdf_page = mupdf::pdf_page_from_fz_page(page); | |
| 1304 return Page_derotate_matrix(pdf_page); | |
| 1305 } | |
| 1306 | |
| 1307 | |
| 1308 static PyObject *lll_JM_get_annot_xref_list(pdf_obj *page_obj) | |
| 1309 { | |
| 1310 fz_context* ctx = mupdf::internal_context_get(); | |
| 1311 PyObject *names = PyList_New(0); | |
| 1312 pdf_obj *id, *subtype, *annots, *annot_obj; | |
| 1313 int xref, type, i, n; | |
| 1314 fz_try(ctx) { | |
| 1315 annots = pdf_dict_get(ctx, page_obj, PDF_NAME(Annots)); | |
| 1316 n = pdf_array_len(ctx, annots); | |
| 1317 for (i = 0; i < n; i++) { | |
| 1318 annot_obj = pdf_array_get(ctx, annots, i); | |
| 1319 xref = pdf_to_num(ctx, annot_obj); | |
| 1320 subtype = pdf_dict_get(ctx, annot_obj, PDF_NAME(Subtype)); | |
| 1321 if (!subtype) { | |
| 1322 continue; // subtype is required | |
| 1323 } | |
| 1324 type = pdf_annot_type_from_string(ctx, pdf_to_name(ctx, subtype)); | |
| 1325 if (type == PDF_ANNOT_UNKNOWN) { | |
| 1326 continue; // only accept valid annot types | |
| 1327 } | |
| 1328 id = pdf_dict_gets(ctx, annot_obj, "NM"); | |
| 1329 LIST_APPEND_DROP(names, Py_BuildValue("iis", xref, type, pdf_to_text_string(ctx, id))); | |
| 1330 } | |
| 1331 } | |
| 1332 fz_catch(ctx) { | |
| 1333 return names; | |
| 1334 } | |
| 1335 return names; | |
| 1336 } | |
| 1337 //------------------------------------------------------------------------ | |
| 1338 // return the xrefs and /NM ids of a page's annots, links and fields | |
| 1339 //------------------------------------------------------------------------ | |
| 1340 static PyObject* JM_get_annot_xref_list(const mupdf::PdfObj& page_obj) | |
| 1341 { | |
| 1342 PyObject* names = PyList_New(0); | |
| 1343 if (!page_obj.m_internal) | |
| 1344 { | |
| 1345 return names; | |
| 1346 } | |
| 1347 return lll_JM_get_annot_xref_list( page_obj.m_internal); | |
| 1348 } | |
| 1349 | |
| 1350 static mupdf::FzBuffer JM_object_to_buffer(const mupdf::PdfObj& what, int compress, int ascii) | |
| 1351 { | |
| 1352 mupdf::FzBuffer res = mupdf::fz_new_buffer(512); | |
| 1353 mupdf::FzOutput out(res); | |
| 1354 mupdf::pdf_print_obj(out, what, compress, ascii); | |
| 1355 out.fz_close_output(); | |
| 1356 mupdf::fz_terminate_buffer(res); | |
| 1357 return res; | |
| 1358 } | |
| 1359 | |
| 1360 static PyObject* JM_EscapeStrFromBuffer(mupdf::FzBuffer& buff) | |
| 1361 { | |
| 1362 if (!buff.m_internal) | |
| 1363 { | |
| 1364 return PyUnicode_FromString(""); | |
| 1365 } | |
| 1366 unsigned char* s = nullptr; | |
| 1367 size_t len = mupdf::fz_buffer_storage(buff, &s); | |
| 1368 PyObject* val = PyUnicode_DecodeRawUnicodeEscape((const char*) s, (Py_ssize_t) len, "replace"); | |
| 1369 if (!val) | |
| 1370 { | |
| 1371 val = PyUnicode_FromString(""); | |
| 1372 PyErr_Clear(); | |
| 1373 } | |
| 1374 return val; | |
| 1375 } | |
| 1376 | |
| 1377 static PyObject* xref_object(mupdf::PdfDocument& pdf, int xref, int compressed=0, int ascii=0) | |
| 1378 { | |
| 1379 if (!pdf.m_internal) | |
| 1380 { | |
| 1381 throw std::runtime_error(MSG_IS_NO_PDF); | |
| 1382 } | |
| 1383 int xreflen = mupdf::pdf_xref_len(pdf); | |
| 1384 if ((xref < 1 || xref >= xreflen) and xref != -1) | |
| 1385 { | |
| 1386 throw std::runtime_error(MSG_BAD_XREF); | |
| 1387 } | |
| 1388 mupdf::PdfObj obj = (xref > 0) ? mupdf::pdf_load_object(pdf, xref) : mupdf::pdf_trailer(pdf); | |
| 1389 mupdf::FzBuffer res = JM_object_to_buffer(mupdf::pdf_resolve_indirect(obj), compressed, ascii); | |
| 1390 PyObject* text = JM_EscapeStrFromBuffer(res); | |
| 1391 return text; | |
| 1392 } | |
| 1393 | |
| 1394 static PyObject* xref_object(mupdf::FzDocument& document, int xref, int compressed=0, int ascii=0) | |
| 1395 { | |
| 1396 mupdf::PdfDocument pdf = mupdf::pdf_document_from_fz_document(document); | |
| 1397 return xref_object(pdf, xref, compressed, ascii); | |
| 1398 } | |
| 1399 | |
| 1400 | |
| 1401 //------------------------------------- | |
| 1402 // fz_output for Python file objects | |
| 1403 //------------------------------------- | |
| 1404 | |
| 1405 static PyObject* Link_is_external(mupdf::FzLink& this_link) | |
| 1406 { | |
| 1407 const char* uri = this_link.m_internal->uri; | |
| 1408 if (!uri) | |
| 1409 { | |
| 1410 return PyBool_FromLong(0); | |
| 1411 } | |
| 1412 bool ret = mupdf::fz_is_external_link(uri); | |
| 1413 return PyBool_FromLong((long) ret); | |
| 1414 } | |
| 1415 | |
| 1416 static mupdf::FzLink Link_next(mupdf::FzLink& this_link) | |
| 1417 { | |
| 1418 return this_link.next(); | |
| 1419 } | |
| 1420 | |
| 1421 | |
| 1422 //----------------------------------------------------------------------------- | |
| 1423 // create PDF object from given string | |
| 1424 //----------------------------------------------------------------------------- | |
| 1425 static pdf_obj *lll_JM_pdf_obj_from_str(fz_context *ctx, pdf_document *doc, const char *src) | |
| 1426 { | |
| 1427 pdf_obj *result = NULL; | |
| 1428 pdf_lexbuf lexbuf; | |
| 1429 fz_stream *stream = fz_open_memory(ctx, (unsigned char *)src, strlen(src)); | |
| 1430 | |
| 1431 pdf_lexbuf_init(ctx, &lexbuf, PDF_LEXBUF_SMALL); | |
| 1432 | |
| 1433 fz_try(ctx) { | |
| 1434 result = pdf_parse_stm_obj(ctx, doc, stream, &lexbuf); | |
| 1435 } | |
| 1436 | |
| 1437 fz_always(ctx) { | |
| 1438 pdf_lexbuf_fin(ctx, &lexbuf); | |
| 1439 fz_drop_stream(ctx, stream); | |
| 1440 } | |
| 1441 | |
| 1442 fz_catch(ctx) { | |
| 1443 mupdf::internal_throw_exception(ctx); | |
| 1444 } | |
| 1445 | |
| 1446 return result; | |
| 1447 | |
| 1448 } | |
| 1449 | |
| 1450 /*********************************************************************/ | |
| 1451 // Page._addAnnot_FromString | |
| 1452 // Add new links provided as an array of string object definitions. | |
| 1453 /*********************************************************************/ | |
| 1454 PyObject* Page_addAnnot_FromString(mupdf::PdfPage& page, PyObject* linklist) | |
| 1455 { | |
| 1456 PyObject* txtpy = nullptr; | |
| 1457 int lcount = (int) PySequence_Size(linklist); // link count | |
| 1458 //printf("Page_addAnnot_FromString(): lcount=%i\n", lcount); | |
| 1459 if (lcount < 1) | |
| 1460 { | |
| 1461 Py_RETURN_NONE; | |
| 1462 } | |
| 1463 try | |
| 1464 { | |
| 1465 // insert links from the provided sources | |
| 1466 if (!page.m_internal) | |
| 1467 { | |
| 1468 throw std::runtime_error(MSG_IS_NO_PDF); | |
| 1469 } | |
| 1470 if (!mupdf::pdf_dict_get(page.obj(), PDF_NAME2(Annots)).m_internal) | |
| 1471 { | |
| 1472 mupdf::pdf_dict_put_array(page.obj(), PDF_NAME2(Annots), lcount); | |
| 1473 } | |
| 1474 mupdf::PdfObj annots = mupdf::pdf_dict_get(page.obj(), PDF_NAME2(Annots)); | |
| 1475 mupdf::PdfDocument doc = page.doc(); | |
| 1476 //printf("lcount=%i\n", lcount); | |
| 1477 fz_context* ctx = mupdf::internal_context_get(); | |
| 1478 for (int i = 0; i < lcount; i++) | |
| 1479 { | |
| 1480 const char* text = nullptr; | |
| 1481 txtpy = PySequence_ITEM(linklist, (Py_ssize_t) i); | |
| 1482 text = PyUnicode_AsUTF8(txtpy); | |
| 1483 Py_CLEAR(txtpy); | |
| 1484 if (!text) | |
| 1485 { | |
| 1486 messagef("skipping bad link / annot item %i.", i); | |
| 1487 continue; | |
| 1488 } | |
| 1489 try | |
| 1490 { | |
| 1491 pdf_obj* obj = lll_JM_pdf_obj_from_str(ctx, doc.m_internal, text); | |
| 1492 pdf_obj* annot = pdf_add_object_drop( | |
| 1493 ctx, | |
| 1494 doc.m_internal, | |
| 1495 obj | |
| 1496 ); | |
| 1497 pdf_obj* ind_obj = pdf_new_indirect(ctx, doc.m_internal, pdf_to_num(ctx, annot), 0); | |
| 1498 pdf_array_push_drop(ctx, annots.m_internal, ind_obj); | |
| 1499 pdf_drop_obj(ctx, annot); | |
| 1500 } | |
| 1501 catch (std::exception&) | |
| 1502 { | |
| 1503 messagef("skipping bad link / annot item %i.", i); | |
| 1504 } | |
| 1505 } | |
| 1506 } | |
| 1507 catch (std::exception&) | |
| 1508 { | |
| 1509 PyErr_Clear(); | |
| 1510 return nullptr; | |
| 1511 } | |
| 1512 Py_RETURN_NONE; | |
| 1513 } | |
| 1514 | |
| 1515 PyObject* Page_addAnnot_FromString(mupdf::FzPage& page, PyObject* linklist) | |
| 1516 { | |
| 1517 mupdf::PdfPage pdf_page = mupdf::pdf_page_from_fz_page(page); | |
| 1518 return Page_addAnnot_FromString(pdf_page, linklist); | |
| 1519 } | |
| 1520 | |
| 1521 static int page_count_fz2(void* document) | |
| 1522 { | |
| 1523 mupdf::FzDocument* document2 = (mupdf::FzDocument*) document; | |
| 1524 return mupdf::fz_count_pages(*document2); | |
| 1525 } | |
| 1526 | |
| 1527 static int page_count_fz(mupdf::FzDocument& document) | |
| 1528 { | |
| 1529 return mupdf::fz_count_pages(document); | |
| 1530 } | |
| 1531 | |
| 1532 static int page_count_pdf(mupdf::PdfDocument& pdf) | |
| 1533 { | |
| 1534 mupdf::FzDocument document = pdf.super(); | |
| 1535 return page_count_fz(document); | |
| 1536 } | |
| 1537 | |
| 1538 static int page_count(mupdf::FzDocument& document) | |
| 1539 { | |
| 1540 return mupdf::fz_count_pages(document); | |
| 1541 } | |
| 1542 | |
| 1543 static int page_count(mupdf::PdfDocument& pdf) | |
| 1544 { | |
| 1545 mupdf::FzDocument document = pdf.super(); | |
| 1546 return page_count(document); | |
| 1547 } | |
| 1548 | |
| 1549 static PyObject* page_annot_xrefs(mupdf::FzDocument& document, mupdf::PdfDocument& pdf, int pno) | |
| 1550 { | |
| 1551 int page_count = mupdf::fz_count_pages(document); | |
| 1552 int n = pno; | |
| 1553 while (n < 0) | |
| 1554 { | |
| 1555 n += page_count; | |
| 1556 } | |
| 1557 PyObject* annots = nullptr; | |
| 1558 if (n >= page_count) | |
| 1559 { | |
| 1560 throw std::runtime_error(MSG_BAD_PAGENO); | |
| 1561 } | |
| 1562 if (!pdf.m_internal) | |
| 1563 { | |
| 1564 throw std::runtime_error(MSG_IS_NO_PDF); | |
| 1565 } | |
| 1566 annots = JM_get_annot_xref_list(mupdf::pdf_lookup_page_obj(pdf, n)); | |
| 1567 return annots; | |
| 1568 } | |
| 1569 | |
| 1570 static PyObject* page_annot_xrefs(mupdf::FzDocument& document, int pno) | |
| 1571 { | |
| 1572 mupdf::PdfDocument pdf = mupdf::pdf_specifics(document); | |
| 1573 return page_annot_xrefs(document, pdf, pno); | |
| 1574 } | |
| 1575 | |
| 1576 static PyObject* page_annot_xrefs(mupdf::PdfDocument& pdf, int pno) | |
| 1577 { | |
| 1578 mupdf::FzDocument document = pdf.super(); | |
| 1579 return page_annot_xrefs(document, pdf, pno); | |
| 1580 } | |
| 1581 | |
| 1582 static bool Outline_is_external(mupdf::FzOutline* outline) | |
| 1583 { | |
| 1584 if (!outline->m_internal->uri) | |
| 1585 { | |
| 1586 return false; | |
| 1587 } | |
| 1588 return mupdf::ll_fz_is_external_link(outline->m_internal->uri); | |
| 1589 } | |
| 1590 | |
| 1591 int ll_fz_absi(int i) | |
| 1592 { | |
| 1593 return mupdf::ll_fz_absi(i); | |
| 1594 } | |
| 1595 | |
| 1596 enum | |
| 1597 { | |
| 1598 TEXT_FONT_SUPERSCRIPT = 1, | |
| 1599 TEXT_FONT_ITALIC = 2, | |
| 1600 TEXT_FONT_SERIFED = 4, | |
| 1601 TEXT_FONT_MONOSPACED = 8, | |
| 1602 TEXT_FONT_BOLD = 16, | |
| 1603 }; | |
| 1604 | |
| 1605 int g_skip_quad_corrections = 0; | |
| 1606 int g_subset_fontnames = 0; | |
| 1607 int g_small_glyph_heights = 0; | |
| 1608 | |
| 1609 void set_skip_quad_corrections(int on) | |
| 1610 { | |
| 1611 g_skip_quad_corrections = on; | |
| 1612 } | |
| 1613 | |
| 1614 void set_subset_fontnames(int on) | |
| 1615 { | |
| 1616 g_subset_fontnames = on; | |
| 1617 } | |
| 1618 | |
| 1619 void set_small_glyph_heights(int on) | |
| 1620 { | |
| 1621 g_small_glyph_heights = on; | |
| 1622 } | |
| 1623 | |
| 1624 struct jm_lineart_device | |
| 1625 { | |
| 1626 fz_device super; | |
| 1627 | |
| 1628 PyObject* out = {}; | |
| 1629 PyObject* method = {}; | |
| 1630 PyObject* pathdict = {}; | |
| 1631 PyObject* scissors = {}; | |
| 1632 float pathfactor = {}; | |
| 1633 fz_matrix ctm = {}; | |
| 1634 fz_matrix ptm = {}; | |
| 1635 fz_matrix rot = {}; | |
| 1636 fz_point lastpoint = {}; | |
| 1637 fz_point firstpoint = {}; | |
| 1638 int havemove = 0; | |
| 1639 fz_rect pathrect = {}; | |
| 1640 int clips = {}; | |
| 1641 int linecount = {}; | |
| 1642 float linewidth = {}; | |
| 1643 int path_type = {}; | |
| 1644 long depth = {}; | |
| 1645 size_t seqno = {}; | |
| 1646 char* layer_name; | |
| 1647 }; | |
| 1648 | |
| 1649 | |
| 1650 static void jm_lineart_drop_device(fz_context *ctx, fz_device *dev_) | |
| 1651 { | |
| 1652 jm_lineart_device *dev = (jm_lineart_device *)dev_; | |
| 1653 if (PyList_Check(dev->out)) { | |
| 1654 Py_CLEAR(dev->out); | |
| 1655 } | |
| 1656 Py_CLEAR(dev->method); | |
| 1657 Py_CLEAR(dev->scissors); | |
| 1658 mupdf::ll_fz_free(dev->layer_name); | |
| 1659 dev->layer_name = nullptr; | |
| 1660 } | |
| 1661 | |
| 1662 typedef jm_lineart_device jm_tracedraw_device; | |
| 1663 | |
| 1664 // need own versions of ascender / descender | |
| 1665 static float JM_font_ascender(fz_font* font) | |
| 1666 { | |
| 1667 if (g_skip_quad_corrections) | |
| 1668 { | |
| 1669 return 0.8f; | |
| 1670 } | |
| 1671 return mupdf::ll_fz_font_ascender(font); | |
| 1672 } | |
| 1673 | |
| 1674 static float JM_font_descender(fz_font* font) | |
| 1675 { | |
| 1676 if (g_skip_quad_corrections) | |
| 1677 { | |
| 1678 return -0.2f; | |
| 1679 } | |
| 1680 return mupdf::ll_fz_font_descender(font); | |
| 1681 } | |
| 1682 | |
| 1683 | |
| 1684 //---------------------------------------------------------------- | |
| 1685 // Return true if character is considered to be a word delimiter | |
| 1686 //---------------------------------------------------------------- | |
| 1687 static int | |
| 1688 JM_is_word_delimiter(int c, PyObject *delimiters) | |
| 1689 { | |
| 1690 if (c <= 32 || c == 160) return 1; // a standard delimiter | |
| 1691 if (0x202a <= c && c <= 0x202e) | |
| 1692 { | |
| 1693 return 1; // change between writing directions | |
| 1694 } | |
| 1695 | |
| 1696 // extra delimiters must be a non-empty sequence | |
| 1697 if (!delimiters || PyObject_Not(delimiters) || !PySequence_Check(delimiters)) { | |
| 1698 return 0; | |
| 1699 } | |
| 1700 | |
| 1701 // convert to tuple for easier looping | |
| 1702 PyObject *delims = PySequence_Tuple(delimiters); | |
| 1703 if (!delims) { | |
| 1704 PyErr_Clear(); | |
| 1705 return 0; | |
| 1706 } | |
| 1707 | |
| 1708 // Make 1-char PyObject from character given as integer | |
| 1709 PyObject *cchar = Py_BuildValue("C", c); // single character PyObject | |
| 1710 Py_ssize_t i, len = PyTuple_Size(delims); | |
| 1711 for (i = 0; i < len; i++) { | |
| 1712 int rc = PyUnicode_Compare(cchar, PyTuple_GET_ITEM(delims, i)); | |
| 1713 if (rc == 0) { // equal to a delimiter character | |
| 1714 Py_DECREF(cchar); | |
| 1715 Py_DECREF(delims); | |
| 1716 PyErr_Clear(); | |
| 1717 return 1; | |
| 1718 } | |
| 1719 } | |
| 1720 | |
| 1721 Py_DECREF(delims); | |
| 1722 PyErr_Clear(); | |
| 1723 return 0; | |
| 1724 } | |
| 1725 | |
| 1726 static int | |
| 1727 JM_is_rtl_char(int c) | |
| 1728 { | |
| 1729 if (c < 0x590 || c > 0x900) return 0; | |
| 1730 return 1; | |
| 1731 } | |
| 1732 | |
| 1733 static const char* JM_font_name(fz_font* font) | |
| 1734 { | |
| 1735 const char* name = mupdf::ll_fz_font_name(font); | |
| 1736 const char* s = strchr(name, '+'); | |
| 1737 if (g_subset_fontnames || !s || s-name != 6) | |
| 1738 { | |
| 1739 return name; | |
| 1740 } | |
| 1741 return s + 1; | |
| 1742 } | |
| 1743 | |
| 1744 static int detect_super_script(fz_stext_line *line, fz_stext_char *ch) | |
| 1745 { | |
| 1746 if (line->wmode == 0 && line->dir.x == 1 && line->dir.y == 0) | |
| 1747 { | |
| 1748 return ch->origin.y < line->first_char->origin.y - ch->size * 0.1f; | |
| 1749 } | |
| 1750 return 0; | |
| 1751 } | |
| 1752 | |
| 1753 static int JM_char_font_flags(fz_font *font, fz_stext_line *line, fz_stext_char *ch) | |
| 1754 { | |
| 1755 int flags = 0; | |
| 1756 if (line && ch) | |
| 1757 { | |
| 1758 flags += detect_super_script(line, ch) * TEXT_FONT_SUPERSCRIPT; | |
| 1759 } | |
| 1760 flags += mupdf::ll_fz_font_is_italic(font) * TEXT_FONT_ITALIC; | |
| 1761 flags += mupdf::ll_fz_font_is_serif(font) * TEXT_FONT_SERIFED; | |
| 1762 flags += mupdf::ll_fz_font_is_monospaced(font) * TEXT_FONT_MONOSPACED; | |
| 1763 flags += mupdf::ll_fz_font_is_bold(font) * TEXT_FONT_BOLD; | |
| 1764 return flags; | |
| 1765 } | |
| 1766 | |
| 1767 static void jm_trace_text_span( | |
| 1768 jm_tracedraw_device* dev, | |
| 1769 fz_text_span* span, | |
| 1770 int type, | |
| 1771 fz_matrix ctm, | |
| 1772 fz_colorspace* colorspace, | |
| 1773 const float* color, | |
| 1774 float alpha, | |
| 1775 size_t seqno | |
| 1776 ) | |
| 1777 { | |
| 1778 //printf("extra.jm_trace_text_span(): seqno=%zi\n", seqno); | |
| 1779 //fz_matrix join = mupdf::ll_fz_concat(span->trm, ctm); | |
| 1780 //double fsize = sqrt(fabs((double) span->trm.a * (double) span->trm.d)); | |
| 1781 fz_matrix mat = mupdf::ll_fz_concat(span->trm, ctm); // text transformation matrix | |
| 1782 fz_point dir = mupdf::ll_fz_transform_vector(mupdf::ll_fz_make_point(1, 0), mat); // writing direction | |
| 1783 double fsize = sqrt(dir.x * dir.x + dir.y * dir.y); // font size | |
| 1784 | |
| 1785 dir = mupdf::ll_fz_normalize_vector(dir); | |
| 1786 | |
| 1787 // compute effective ascender / descender | |
| 1788 double asc = (double) JM_font_ascender(span->font); | |
| 1789 double dsc = (double) JM_font_descender(span->font); | |
| 1790 if (asc < 1e-3) { // probably Tesseract font | |
| 1791 dsc = -0.1; | |
| 1792 asc = 0.9; | |
| 1793 } | |
| 1794 | |
| 1795 double ascsize = asc * fsize / (asc - dsc); | |
| 1796 double dscsize = dsc * fsize / (asc - dsc); | |
| 1797 int fflags = 0; // font flags | |
| 1798 int mono = mupdf::ll_fz_font_is_monospaced(span->font); | |
| 1799 fflags += mono * TEXT_FONT_MONOSPACED; | |
| 1800 fflags += mupdf::ll_fz_font_is_italic(span->font) * TEXT_FONT_ITALIC; | |
| 1801 fflags += mupdf::ll_fz_font_is_serif(span->font) * TEXT_FONT_SERIFED; | |
| 1802 fflags += mupdf::ll_fz_font_is_bold(span->font) * TEXT_FONT_BOLD; | |
| 1803 | |
| 1804 // walk through characters of span | |
| 1805 fz_matrix rot = mupdf::ll_fz_make_matrix(dir.x, dir.y, -dir.y, dir.x, 0, 0); | |
| 1806 if (dir.x == -1) | |
| 1807 { | |
| 1808 // left-right flip | |
| 1809 rot.d = 1; | |
| 1810 } | |
| 1811 PyObject* chars = PyTuple_New(span->len); | |
| 1812 double space_adv = 0; | |
| 1813 double last_adv = 0; | |
| 1814 fz_rect span_bbox; | |
| 1815 | |
| 1816 for (int i = 0; i < span->len; i++) | |
| 1817 { | |
| 1818 double adv = 0; | |
| 1819 if (span->items[i].gid >= 0) | |
| 1820 { | |
| 1821 adv = (double) mupdf::ll_fz_advance_glyph(span->font, span->items[i].gid, span->wmode); | |
| 1822 } | |
| 1823 adv *= fsize; | |
| 1824 last_adv = adv; | |
| 1825 if (span->items[i].ucs == 32) | |
| 1826 { | |
| 1827 space_adv = adv; | |
| 1828 } | |
| 1829 fz_point char_orig; | |
| 1830 char_orig = fz_make_point(span->items[i].x, span->items[i].y); | |
| 1831 char_orig = fz_transform_point(char_orig, ctm); | |
| 1832 fz_matrix m1 = mupdf::ll_fz_make_matrix(1, 0, 0, 1, -char_orig.x, -char_orig.y); | |
| 1833 m1 = mupdf::ll_fz_concat(m1, rot); | |
| 1834 m1 = mupdf::ll_fz_concat(m1, mupdf::ll_fz_make_matrix(1, 0, 0, 1, char_orig.x, char_orig.y)); | |
| 1835 float x0 = char_orig.x; | |
| 1836 float x1 = x0 + adv; | |
| 1837 float y0; | |
| 1838 float y1; | |
| 1839 if ( | |
| 1840 (mat.d > 0 && (dir.x == 1 || dir.x == -1)) | |
| 1841 || | |
| 1842 (mat.b !=0 && mat.b == -mat.c) | |
| 1843 ) // up-down flip | |
| 1844 { | |
| 1845 // up-down flip | |
| 1846 y0 = char_orig.y + dscsize; | |
| 1847 y1 = char_orig.y + ascsize; | |
| 1848 } | |
| 1849 else | |
| 1850 { | |
| 1851 y0 = char_orig.y - ascsize; | |
| 1852 y1 = char_orig.y - dscsize; | |
| 1853 } | |
| 1854 fz_rect char_bbox = mupdf::ll_fz_make_rect(x0, y0, x1, y1); | |
| 1855 char_bbox = mupdf::ll_fz_transform_rect(char_bbox, m1); | |
| 1856 PyTuple_SET_ITEM( | |
| 1857 chars, | |
| 1858 (Py_ssize_t) i, | |
| 1859 Py_BuildValue( | |
| 1860 "ii(ff)(ffff)", | |
| 1861 span->items[i].ucs, | |
| 1862 span->items[i].gid, | |
| 1863 char_orig.x, | |
| 1864 char_orig.y, | |
| 1865 char_bbox.x0, | |
| 1866 char_bbox.y0, | |
| 1867 char_bbox.x1, | |
| 1868 char_bbox.y1 | |
| 1869 ) | |
| 1870 ); | |
| 1871 if (i > 0) | |
| 1872 { | |
| 1873 span_bbox = mupdf::ll_fz_union_rect(span_bbox, char_bbox); | |
| 1874 } | |
| 1875 else | |
| 1876 { | |
| 1877 span_bbox = char_bbox; | |
| 1878 } | |
| 1879 } | |
| 1880 if (!space_adv) | |
| 1881 { | |
| 1882 if (!(fflags & TEXT_FONT_MONOSPACED)) | |
| 1883 { | |
| 1884 fz_font* out_font = nullptr; | |
| 1885 space_adv = mupdf::ll_fz_advance_glyph( | |
| 1886 span->font, | |
| 1887 mupdf::ll_fz_encode_character_with_fallback(span->font, 32, 0, 0, &out_font), | |
| 1888 span->wmode | |
| 1889 ); | |
| 1890 space_adv *= fsize; | |
| 1891 if (!space_adv) | |
| 1892 { | |
| 1893 space_adv = last_adv; | |
| 1894 } | |
| 1895 } | |
| 1896 else | |
| 1897 { | |
| 1898 space_adv = last_adv; // for mono any char width suffices | |
| 1899 } | |
| 1900 } | |
| 1901 // make the span dictionary | |
| 1902 PyObject* span_dict = PyDict_New(); | |
| 1903 dict_setitemstr_drop(span_dict, "dir", JM_py_from_point(dir)); | |
| 1904 dict_setitem_drop(span_dict, dictkey_font, JM_EscapeStrFromStr(JM_font_name(span->font))); | |
| 1905 dict_setitem_drop(span_dict, dictkey_wmode, PyLong_FromLong((long) span->wmode)); | |
| 1906 dict_setitem_drop(span_dict, dictkey_flags, PyLong_FromLong((long) fflags)); | |
| 1907 dict_setitemstr_drop(span_dict, "bidi_lvl", PyLong_FromLong((long) span->bidi_level)); | |
| 1908 dict_setitemstr_drop(span_dict, "bidi_dir", PyLong_FromLong((long) span->markup_dir)); | |
| 1909 dict_setitem_drop(span_dict, dictkey_ascender, PyFloat_FromDouble(asc)); | |
| 1910 dict_setitem_drop(span_dict, dictkey_descender, PyFloat_FromDouble(dsc)); | |
| 1911 dict_setitem_drop(span_dict, dictkey_colorspace, PyLong_FromLong(3)); | |
| 1912 float rgb[3]; | |
| 1913 if (colorspace) | |
| 1914 { | |
| 1915 mupdf::ll_fz_convert_color( | |
| 1916 colorspace, | |
| 1917 color, | |
| 1918 mupdf::ll_fz_device_rgb(), | |
| 1919 rgb, | |
| 1920 nullptr, | |
| 1921 fz_default_color_params | |
| 1922 ); | |
| 1923 } | |
| 1924 else | |
| 1925 { | |
| 1926 rgb[0] = rgb[1] = rgb[2] = 0; | |
| 1927 } | |
| 1928 double linewidth; | |
| 1929 if (dev->linewidth > 0) // width of character border | |
| 1930 { | |
| 1931 linewidth = (double) dev->linewidth; | |
| 1932 } | |
| 1933 else | |
| 1934 { | |
| 1935 linewidth = fsize * 0.05; // default: 5% of font size | |
| 1936 } | |
| 1937 if (0) std::cout | |
| 1938 << " dev->linewidth=" << dev->linewidth | |
| 1939 << " fsize=" << fsize | |
| 1940 << " linewidth=" << linewidth | |
| 1941 << "\n"; | |
| 1942 dict_setitem_drop(span_dict, dictkey_color, Py_BuildValue("fff", rgb[0], rgb[1], rgb[2])); | |
| 1943 dict_setitem_drop(span_dict, dictkey_size, PyFloat_FromDouble(fsize)); | |
| 1944 dict_setitemstr_drop(span_dict, "opacity", PyFloat_FromDouble((double) alpha)); | |
| 1945 dict_setitemstr_drop(span_dict, "linewidth", PyFloat_FromDouble((double) linewidth)); | |
| 1946 dict_setitemstr_drop(span_dict, "spacewidth", PyFloat_FromDouble(space_adv)); | |
| 1947 dict_setitem_drop(span_dict, dictkey_type, PyLong_FromLong((long) type)); | |
| 1948 dict_setitem_drop(span_dict, dictkey_bbox, JM_py_from_rect(span_bbox)); | |
| 1949 dict_setitemstr_drop(span_dict, "layer", JM_UnicodeFromStr(dev->layer_name)); | |
| 1950 dict_setitemstr_drop(span_dict, "seqno", PyLong_FromSize_t(seqno)); | |
| 1951 dict_setitem_drop(span_dict, dictkey_chars, chars); | |
| 1952 //std::cout << "span_dict=" << repr(span_dict) << "\n"; | |
| 1953 s_list_append_drop(dev->out, span_dict); | |
| 1954 } | |
| 1955 | |
| 1956 static inline void jm_increase_seqno(fz_context* ctx, fz_device* dev_) | |
| 1957 { | |
| 1958 jm_tracedraw_device* dev = (jm_tracedraw_device*) dev_; | |
| 1959 dev->seqno += 1; | |
| 1960 } | |
| 1961 | |
| 1962 static void jm_fill_path( | |
| 1963 fz_context* ctx, | |
| 1964 fz_device* dev, | |
| 1965 const fz_path*, | |
| 1966 int even_odd, | |
| 1967 fz_matrix, | |
| 1968 fz_colorspace*, | |
| 1969 const float* color, | |
| 1970 float alpha, | |
| 1971 fz_color_params | |
| 1972 ) | |
| 1973 { | |
| 1974 jm_increase_seqno(ctx, dev); | |
| 1975 } | |
| 1976 | |
| 1977 static void jm_fill_shade( | |
| 1978 fz_context* ctx, | |
| 1979 fz_device* dev, | |
| 1980 fz_shade* shd, | |
| 1981 fz_matrix ctm, | |
| 1982 float alpha, | |
| 1983 fz_color_params color_params | |
| 1984 ) | |
| 1985 { | |
| 1986 jm_increase_seqno(ctx, dev); | |
| 1987 } | |
| 1988 | |
| 1989 static void jm_fill_image( | |
| 1990 fz_context* ctx, | |
| 1991 fz_device* dev, | |
| 1992 fz_image* img, | |
| 1993 fz_matrix ctm, | |
| 1994 float alpha, | |
| 1995 fz_color_params color_params | |
| 1996 ) | |
| 1997 { | |
| 1998 jm_increase_seqno(ctx, dev); | |
| 1999 } | |
| 2000 | |
| 2001 static void jm_fill_image_mask( | |
| 2002 fz_context* ctx, | |
| 2003 fz_device* dev, | |
| 2004 fz_image* img, | |
| 2005 fz_matrix ctm, | |
| 2006 fz_colorspace* cs, | |
| 2007 const float* color, | |
| 2008 float alpha, | |
| 2009 fz_color_params color_params | |
| 2010 ) | |
| 2011 { | |
| 2012 jm_increase_seqno(ctx, dev); | |
| 2013 } | |
| 2014 | |
| 2015 static void jm_dev_linewidth( | |
| 2016 fz_context* ctx, | |
| 2017 fz_device* dev_, | |
| 2018 const fz_path* path, | |
| 2019 const fz_stroke_state* stroke, | |
| 2020 fz_matrix ctm, | |
| 2021 fz_colorspace* colorspace, | |
| 2022 const float* color, | |
| 2023 float alpha, | |
| 2024 fz_color_params color_params | |
| 2025 ) | |
| 2026 { | |
| 2027 jm_tracedraw_device* dev = (jm_tracedraw_device*) dev_; | |
| 2028 if (0) std::cout << "jm_dev_linewidth(): changing dev->linewidth from " << dev->linewidth | |
| 2029 << " to stroke->linewidth=" << stroke->linewidth | |
| 2030 << "\n"; | |
| 2031 dev->linewidth = stroke->linewidth; | |
| 2032 jm_increase_seqno(ctx, dev_); | |
| 2033 } | |
| 2034 | |
| 2035 static void jm_trace_text( | |
| 2036 jm_tracedraw_device* dev, | |
| 2037 const fz_text* text, | |
| 2038 int type, | |
| 2039 fz_matrix ctm, | |
| 2040 fz_colorspace* colorspace, | |
| 2041 const float* color, | |
| 2042 float alpha, | |
| 2043 size_t seqno | |
| 2044 ) | |
| 2045 { | |
| 2046 fz_text_span* span; | |
| 2047 for (span = text->head; span; span = span->next) | |
| 2048 { | |
| 2049 jm_trace_text_span(dev, span, type, ctm, colorspace, color, alpha, seqno); | |
| 2050 } | |
| 2051 } | |
| 2052 | |
| 2053 /*--------------------------------------------------------- | |
| 2054 There are 3 text trace types: | |
| 2055 0 - fill text (PDF Tr 0) | |
| 2056 1 - stroke text (PDF Tr 1) | |
| 2057 3 - ignore text (PDF Tr 3) | |
| 2058 ---------------------------------------------------------*/ | |
| 2059 static void | |
| 2060 jm_tracedraw_fill_text( | |
| 2061 fz_context* ctx, | |
| 2062 fz_device* dev_, | |
| 2063 const fz_text* text, | |
| 2064 fz_matrix ctm, | |
| 2065 fz_colorspace* colorspace, | |
| 2066 const float* color, | |
| 2067 float alpha, | |
| 2068 fz_color_params color_params | |
| 2069 ) | |
| 2070 { | |
| 2071 jm_tracedraw_device* dev = (jm_tracedraw_device*) dev_; | |
| 2072 jm_trace_text(dev, text, 0, ctm, colorspace, color, alpha, dev->seqno); | |
| 2073 dev->seqno += 1; | |
| 2074 } | |
| 2075 | |
| 2076 static void | |
| 2077 jm_tracedraw_stroke_text( | |
| 2078 fz_context* ctx, | |
| 2079 fz_device* dev_, | |
| 2080 const fz_text* text, | |
| 2081 const fz_stroke_state* stroke, | |
| 2082 fz_matrix ctm, | |
| 2083 fz_colorspace* colorspace, | |
| 2084 const float* color, | |
| 2085 float alpha, | |
| 2086 fz_color_params color_params | |
| 2087 ) | |
| 2088 { | |
| 2089 jm_tracedraw_device* dev = (jm_tracedraw_device*) dev_; | |
| 2090 jm_trace_text(dev, text, 1, ctm, colorspace, color, alpha, dev->seqno); | |
| 2091 dev->seqno += 1; | |
| 2092 } | |
| 2093 | |
| 2094 | |
| 2095 static void | |
| 2096 jm_tracedraw_ignore_text( | |
| 2097 fz_context* ctx, | |
| 2098 fz_device* dev_, | |
| 2099 const fz_text* text, | |
| 2100 fz_matrix ctm | |
| 2101 ) | |
| 2102 { | |
| 2103 jm_tracedraw_device* dev = (jm_tracedraw_device*) dev_; | |
| 2104 jm_trace_text(dev, text, 3, ctm, nullptr, nullptr, 1, dev->seqno); | |
| 2105 dev->seqno += 1; | |
| 2106 } | |
| 2107 | |
| 2108 static void | |
| 2109 jm_lineart_begin_layer(fz_context *ctx, fz_device *dev_, const char *name) | |
| 2110 { | |
| 2111 jm_tracedraw_device* dev = (jm_tracedraw_device*) dev_; | |
| 2112 mupdf::ll_fz_free(dev->layer_name); | |
| 2113 dev->layer_name = mupdf::ll_fz_strdup(name); | |
| 2114 } | |
| 2115 | |
| 2116 static void | |
| 2117 jm_lineart_end_layer(fz_context *ctx, fz_device *dev_) | |
| 2118 { | |
| 2119 jm_tracedraw_device* dev = (jm_tracedraw_device*) dev_; | |
| 2120 mupdf::ll_fz_free(dev->layer_name); | |
| 2121 dev->layer_name = nullptr; | |
| 2122 } | |
| 2123 | |
| 2124 | |
| 2125 mupdf::FzDevice JM_new_texttrace_device(PyObject* out) | |
| 2126 { | |
| 2127 mupdf::FzDevice device(sizeof(jm_tracedraw_device)); | |
| 2128 jm_tracedraw_device* dev = (jm_tracedraw_device*) device.m_internal; | |
| 2129 | |
| 2130 dev->super.close_device = nullptr; | |
| 2131 dev->super.drop_device = jm_lineart_drop_device; | |
| 2132 dev->super.fill_path = jm_fill_path; | |
| 2133 dev->super.stroke_path = jm_dev_linewidth; | |
| 2134 dev->super.clip_path = nullptr; | |
| 2135 dev->super.clip_stroke_path = nullptr; | |
| 2136 | |
| 2137 dev->super.fill_text = jm_tracedraw_fill_text; | |
| 2138 dev->super.stroke_text = jm_tracedraw_stroke_text; | |
| 2139 dev->super.clip_text = nullptr; | |
| 2140 dev->super.clip_stroke_text = nullptr; | |
| 2141 dev->super.ignore_text = jm_tracedraw_ignore_text; | |
| 2142 | |
| 2143 dev->super.fill_shade = jm_fill_shade; | |
| 2144 dev->super.fill_image = jm_fill_image; | |
| 2145 dev->super.fill_image_mask = jm_fill_image_mask; | |
| 2146 dev->super.clip_image_mask = nullptr; | |
| 2147 | |
| 2148 dev->super.pop_clip = nullptr; | |
| 2149 | |
| 2150 dev->super.begin_mask = nullptr; | |
| 2151 dev->super.end_mask = nullptr; | |
| 2152 dev->super.begin_group = nullptr; | |
| 2153 dev->super.end_group = nullptr; | |
| 2154 | |
| 2155 dev->super.begin_tile = nullptr; | |
| 2156 dev->super.end_tile = nullptr; | |
| 2157 | |
| 2158 dev->super.begin_layer = jm_lineart_begin_layer; | |
| 2159 dev->super.end_layer = jm_lineart_end_layer; | |
| 2160 | |
| 2161 dev->super.begin_structure = nullptr; | |
| 2162 dev->super.end_structure = nullptr; | |
| 2163 | |
| 2164 dev->super.begin_metatext = nullptr; | |
| 2165 dev->super.end_metatext = nullptr; | |
| 2166 | |
| 2167 dev->super.render_flags = nullptr; | |
| 2168 dev->super.set_default_colorspaces = nullptr; | |
| 2169 | |
| 2170 Py_XINCREF(out); | |
| 2171 dev->out = out; | |
| 2172 dev->seqno = 0; | |
| 2173 return device; | |
| 2174 } | |
| 2175 | |
| 2176 | |
| 2177 static fz_quad | |
| 2178 JM_char_quad(fz_stext_line *line, fz_stext_char *ch) | |
| 2179 { | |
| 2180 if (g_skip_quad_corrections) { // no special handling | |
| 2181 return ch->quad; | |
| 2182 } | |
| 2183 if (line->wmode) { // never touch vertical write mode | |
| 2184 return ch->quad; | |
| 2185 } | |
| 2186 fz_font *font = ch->font; | |
| 2187 float asc = JM_font_ascender(font); | |
| 2188 float dsc = JM_font_descender(font); | |
| 2189 float c, s, fsize = ch->size; | |
| 2190 float asc_dsc = asc - dsc + FLT_EPSILON; | |
| 2191 if (asc_dsc >= 1 && g_small_glyph_heights == 0) { // no problem | |
| 2192 return ch->quad; | |
| 2193 } | |
| 2194 if (asc < 1e-3) { // probably Tesseract glyphless font | |
| 2195 dsc = -0.1f; | |
| 2196 asc = 0.9f; | |
| 2197 asc_dsc = 1.0f; | |
| 2198 } | |
| 2199 | |
| 2200 if (g_small_glyph_heights || asc_dsc < 1) { | |
| 2201 dsc = dsc / asc_dsc; | |
| 2202 asc = asc / asc_dsc; | |
| 2203 } | |
| 2204 asc_dsc = asc - dsc; | |
| 2205 asc = asc * fsize / asc_dsc; | |
| 2206 dsc = dsc * fsize / asc_dsc; | |
| 2207 | |
| 2208 /* ------------------------------ | |
| 2209 Re-compute quad with the adjusted ascender / descender values: | |
| 2210 Move ch->origin to (0,0) and de-rotate quad, then adjust the corners, | |
| 2211 re-rotate and move back to ch->origin location. | |
| 2212 ------------------------------ */ | |
| 2213 fz_matrix trm1, trm2, xlate1, xlate2; | |
| 2214 fz_quad quad; | |
| 2215 c = line->dir.x; // cosine | |
| 2216 s = line->dir.y; // sine | |
| 2217 trm1 = mupdf::ll_fz_make_matrix(c, -s, s, c, 0, 0); // derotate | |
| 2218 trm2 = mupdf::ll_fz_make_matrix(c, s, -s, c, 0, 0); // rotate | |
| 2219 if (c == -1) { // left-right flip | |
| 2220 trm1.d = 1; | |
| 2221 trm2.d = 1; | |
| 2222 } | |
| 2223 xlate1 = mupdf::ll_fz_make_matrix(1, 0, 0, 1, -ch->origin.x, -ch->origin.y); | |
| 2224 xlate2 = mupdf::ll_fz_make_matrix(1, 0, 0, 1, ch->origin.x, ch->origin.y); | |
| 2225 | |
| 2226 quad = mupdf::ll_fz_transform_quad(ch->quad, xlate1); // move origin to (0,0) | |
| 2227 quad = mupdf::ll_fz_transform_quad(quad, trm1); // de-rotate corners | |
| 2228 | |
| 2229 // adjust vertical coordinates | |
| 2230 if (c == 1 && quad.ul.y > 0) { // up-down flip | |
| 2231 quad.ul.y = asc; | |
| 2232 quad.ur.y = asc; | |
| 2233 quad.ll.y = dsc; | |
| 2234 quad.lr.y = dsc; | |
| 2235 } else { | |
| 2236 quad.ul.y = -asc; | |
| 2237 quad.ur.y = -asc; | |
| 2238 quad.ll.y = -dsc; | |
| 2239 quad.lr.y = -dsc; | |
| 2240 } | |
| 2241 | |
| 2242 // adjust horizontal coordinates that are too crazy: | |
| 2243 // (1) left x must be >= 0 | |
| 2244 // (2) if bbox width is 0, lookup char advance in font. | |
| 2245 if (quad.ll.x < 0) { | |
| 2246 quad.ll.x = 0; | |
| 2247 quad.ul.x = 0; | |
| 2248 } | |
| 2249 float cwidth = quad.lr.x - quad.ll.x; | |
| 2250 if (cwidth < FLT_EPSILON) { | |
| 2251 int glyph = mupdf::ll_fz_encode_character( font, ch->c); | |
| 2252 if (glyph) { | |
| 2253 float fwidth = mupdf::ll_fz_advance_glyph( font, glyph, line->wmode); | |
| 2254 quad.lr.x = quad.ll.x + fwidth * fsize; | |
| 2255 quad.ur.x = quad.lr.x; | |
| 2256 } | |
| 2257 } | |
| 2258 | |
| 2259 quad = mupdf::ll_fz_transform_quad(quad, trm2); // rotate back | |
| 2260 quad = mupdf::ll_fz_transform_quad(quad, xlate2); // translate back | |
| 2261 return quad; | |
| 2262 } | |
| 2263 | |
| 2264 | |
| 2265 static fz_rect JM_char_bbox(fz_stext_line* line, fz_stext_char* ch) | |
| 2266 { | |
| 2267 fz_rect r = mupdf::ll_fz_rect_from_quad(JM_char_quad( line, ch)); | |
| 2268 if (!line->wmode) { | |
| 2269 return r; | |
| 2270 } | |
| 2271 if (r.y1 < r.y0 + ch->size) { | |
| 2272 r.y0 = r.y1 - ch->size; | |
| 2273 } | |
| 2274 return r; | |
| 2275 } | |
| 2276 | |
| 2277 fz_rect JM_char_bbox(const mupdf::FzStextLine& line, const mupdf::FzStextChar& ch) | |
| 2278 { | |
| 2279 return JM_char_bbox( line.m_internal, ch.m_internal); | |
| 2280 } | |
| 2281 | |
| 2282 static int JM_rects_overlap(const fz_rect a, const fz_rect b) | |
| 2283 { | |
| 2284 if (0 | |
| 2285 || a.x0 >= b.x1 | |
| 2286 || a.y0 >= b.y1 | |
| 2287 || a.x1 <= b.x0 | |
| 2288 || a.y1 <= b.y0 | |
| 2289 ) | |
| 2290 return 0; | |
| 2291 return 1; | |
| 2292 } | |
| 2293 | |
| 2294 // | |
| 2295 void JM_append_rune(fz_buffer *buff, int ch); | |
| 2296 | |
| 2297 //----------------------------------------------------------------------------- | |
| 2298 // Plain text output. An identical copy of fz_print_stext_page_as_text, | |
| 2299 // but lines within a block are concatenated by space instead a new-line | |
| 2300 // character (which else leads to 2 new-lines). | |
| 2301 //----------------------------------------------------------------------------- | |
| 2302 void JM_print_stext_page_as_text(mupdf::FzBuffer& res, mupdf::FzStextPage& page) | |
| 2303 { | |
| 2304 fz_rect rect = page.m_internal->mediabox; | |
| 2305 | |
| 2306 for (auto block: page) | |
| 2307 { | |
| 2308 if (block.m_internal->type == FZ_STEXT_BLOCK_TEXT) | |
| 2309 { | |
| 2310 for (auto line: block) | |
| 2311 { | |
| 2312 int last_char = 0; | |
| 2313 for (auto ch: line) | |
| 2314 { | |
| 2315 fz_rect chbbox = JM_char_bbox( line, ch); | |
| 2316 if (mupdf::ll_fz_is_infinite_rect(rect) | |
| 2317 || JM_rects_overlap(rect, chbbox) | |
| 2318 ) | |
| 2319 { | |
| 2320 last_char = ch.m_internal->c; | |
| 2321 JM_append_rune(res.m_internal, last_char); | |
| 2322 } | |
| 2323 } | |
| 2324 if (last_char != 10 && last_char > 0) | |
| 2325 { | |
| 2326 mupdf::ll_fz_append_string(res.m_internal, "\n"); | |
| 2327 } | |
| 2328 } | |
| 2329 } | |
| 2330 } | |
| 2331 } | |
| 2332 | |
| 2333 | |
| 2334 | |
| 2335 // path_type is one of: | |
| 2336 #define FILL_PATH 1 | |
| 2337 #define STROKE_PATH 2 | |
| 2338 #define CLIP_PATH 3 | |
| 2339 #define CLIP_STROKE_PATH 4 | |
| 2340 | |
| 2341 // Every scissor of a clip is a sub rectangle of the preceding clip scissor if | |
| 2342 // the clip level is larger. | |
| 2343 static fz_rect compute_scissor(jm_lineart_device *dev) | |
| 2344 { | |
| 2345 PyObject *last_scissor = NULL; | |
| 2346 fz_rect scissor; | |
| 2347 if (!dev->scissors) { | |
| 2348 dev->scissors = PyList_New(0); | |
| 2349 } | |
| 2350 Py_ssize_t num_scissors = PyList_Size(dev->scissors); | |
| 2351 if (num_scissors > 0) { | |
| 2352 last_scissor = PyList_GET_ITEM(dev->scissors, num_scissors-1); | |
| 2353 scissor = JM_rect_from_py(last_scissor); | |
| 2354 scissor = fz_intersect_rect(scissor, dev->pathrect); | |
| 2355 } else { | |
| 2356 scissor = dev->pathrect; | |
| 2357 } | |
| 2358 LIST_APPEND_DROP(dev->scissors, JM_py_from_rect(scissor)); | |
| 2359 return scissor; | |
| 2360 } | |
| 2361 | |
| 2362 | |
| 2363 /* | |
| 2364 -------------------------------------------------------------------------- | |
| 2365 Check whether the last 4 lines represent a quad. | |
| 2366 Because of how we count, the lines are a polyline already, i.e. last point | |
| 2367 of a line equals 1st point of next line. | |
| 2368 So we check for a polygon (last line's end point equals start point). | |
| 2369 If not true we return 0. | |
| 2370 -------------------------------------------------------------------------- | |
| 2371 */ | |
| 2372 static int | |
| 2373 jm_checkquad(jm_lineart_device* dev) | |
| 2374 { | |
| 2375 PyObject *items = PyDict_GetItem(dev->pathdict, dictkey_items); | |
| 2376 Py_ssize_t i, len = PyList_Size(items); | |
| 2377 float f[8]; // coordinates of the 4 corners | |
| 2378 mupdf::FzPoint temp, lp; // line = (temp, lp) | |
| 2379 PyObject *rect; | |
| 2380 PyObject *line; | |
| 2381 // fill the 8 floats in f, start from items[-4:] | |
| 2382 for (i = 0; i < 4; i++) { // store line start points | |
| 2383 line = PyList_GET_ITEM(items, len - 4 + i); | |
| 2384 temp = JM_point_from_py(PyTuple_GET_ITEM(line, 1)); | |
| 2385 f[i * 2] = temp.x; | |
| 2386 f[i * 2 + 1] = temp.y; | |
| 2387 lp = JM_point_from_py(PyTuple_GET_ITEM(line, 2)); | |
| 2388 } | |
| 2389 if (lp.x != f[0] || lp.y != f[1]) { | |
| 2390 // not a polygon! | |
| 2391 //dev_linecount -= 1; | |
| 2392 return 0; | |
| 2393 } | |
| 2394 | |
| 2395 // we have detected a quad | |
| 2396 dev->linecount = 0; // reset this | |
| 2397 // a quad item is ("qu", (ul, ur, ll, lr)), where the tuple items | |
| 2398 // are pairs of floats representing a quad corner each. | |
| 2399 rect = PyTuple_New(2); | |
| 2400 PyTuple_SET_ITEM(rect, 0, PyUnicode_FromString("qu")); | |
| 2401 /* ---------------------------------------------------- | |
| 2402 * relationship of float array to quad points: | |
| 2403 * (0, 1) = ul, (2, 3) = ll, (6, 7) = ur, (4, 5) = lr | |
| 2404 ---------------------------------------------------- */ | |
| 2405 fz_quad q = fz_make_quad(f[0], f[1], f[6], f[7], f[2], f[3], f[4], f[5]); | |
| 2406 PyTuple_SET_ITEM(rect, 1, JM_py_from_quad(q)); | |
| 2407 PyList_SetItem(items, len - 4, rect); // replace item -4 by rect | |
| 2408 PyList_SetSlice(items, len - 3, len, NULL); // delete remaining 3 items | |
| 2409 return 1; | |
| 2410 } | |
| 2411 | |
| 2412 | |
| 2413 /* | |
| 2414 -------------------------------------------------------------------------- | |
| 2415 Check whether the last 3 path items represent a rectangle. | |
| 2416 Line 1 and 3 must be horizontal, line 2 must be vertical. | |
| 2417 Returns 1 if we have modified the path, otherwise 0. | |
| 2418 -------------------------------------------------------------------------- | |
| 2419 */ | |
| 2420 static int | |
| 2421 jm_checkrect(jm_lineart_device* dev) | |
| 2422 { | |
| 2423 dev->linecount = 0; // reset line count | |
| 2424 long orientation = 0; // area orientation of rectangle | |
| 2425 mupdf::FzPoint ll, lr, ur, ul; | |
| 2426 mupdf::FzRect r; | |
| 2427 PyObject *rect; | |
| 2428 PyObject *line0, *line2; | |
| 2429 PyObject *items = PyDict_GetItem(dev->pathdict, dictkey_items); | |
| 2430 Py_ssize_t len = PyList_Size(items); | |
| 2431 | |
| 2432 line0 = PyList_GET_ITEM(items, len - 3); | |
| 2433 ll = JM_point_from_py(PyTuple_GET_ITEM(line0, 1)); | |
| 2434 lr = JM_point_from_py(PyTuple_GET_ITEM(line0, 2)); | |
| 2435 // no need to extract "line1"! | |
| 2436 line2 = PyList_GET_ITEM(items, len - 1); | |
| 2437 ur = JM_point_from_py(PyTuple_GET_ITEM(line2, 1)); | |
| 2438 ul = JM_point_from_py(PyTuple_GET_ITEM(line2, 2)); | |
| 2439 | |
| 2440 /* | |
| 2441 --------------------------------------------------------------------- | |
| 2442 Assumption: | |
| 2443 When decomposing rects, MuPDF always starts with a horizontal line, | |
| 2444 followed by a vertical line, followed by a horizontal line. | |
| 2445 First line: (ll, lr), third line: (ul, ur). | |
| 2446 If 1st line is below 3rd line, we record anti-clockwise (+1), else | |
| 2447 clockwise (-1) orientation. | |
| 2448 --------------------------------------------------------------------- | |
| 2449 */ | |
| 2450 if (ll.y != lr.y || | |
| 2451 ll.x != ul.x || | |
| 2452 ur.y != ul.y || | |
| 2453 ur.x != lr.x) { | |
| 2454 goto drop_out; // not a rectangle | |
| 2455 } | |
| 2456 | |
| 2457 // we have a rect, replace last 3 "l" items by one "re" item. | |
| 2458 if (ul.y < lr.y) { | |
| 2459 r = fz_make_rect(ul.x, ul.y, lr.x, lr.y); | |
| 2460 orientation = 1; | |
| 2461 } else { | |
| 2462 r = fz_make_rect(ll.x, ll.y, ur.x, ur.y); | |
| 2463 orientation = -1; | |
| 2464 } | |
| 2465 rect = PyTuple_New(3); | |
| 2466 PyTuple_SET_ITEM(rect, 0, PyUnicode_FromString("re")); | |
| 2467 PyTuple_SET_ITEM(rect, 1, JM_py_from_rect(r)); | |
| 2468 PyTuple_SET_ITEM(rect, 2, PyLong_FromLong(orientation)); | |
| 2469 PyList_SetItem(items, len - 3, rect); // replace item -3 by rect | |
| 2470 PyList_SetSlice(items, len - 2, len, NULL); // delete remaining 2 items | |
| 2471 return 1; | |
| 2472 drop_out:; | |
| 2473 return 0; | |
| 2474 } | |
| 2475 | |
| 2476 static PyObject * | |
| 2477 jm_lineart_color(fz_colorspace *colorspace, const float *color) | |
| 2478 { | |
| 2479 float rgb[3]; | |
| 2480 if (colorspace) { | |
| 2481 mupdf::ll_fz_convert_color(colorspace, color, mupdf::ll_fz_device_rgb(), | |
| 2482 rgb, NULL, fz_default_color_params); | |
| 2483 return Py_BuildValue("fff", rgb[0], rgb[1], rgb[2]); | |
| 2484 } | |
| 2485 return PyTuple_New(0); | |
| 2486 } | |
| 2487 | |
| 2488 static void | |
| 2489 trace_moveto(fz_context *ctx, void *dev_, float x, float y) | |
| 2490 { | |
| 2491 jm_lineart_device* dev = (jm_lineart_device*) dev_; | |
| 2492 dev->lastpoint = mupdf::ll_fz_transform_point(fz_make_point(x, y), dev->ctm); | |
| 2493 if (mupdf::ll_fz_is_infinite_rect(dev->pathrect)) | |
| 2494 { | |
| 2495 dev->pathrect = mupdf::ll_fz_make_rect( | |
| 2496 dev->lastpoint.x, | |
| 2497 dev->lastpoint.y, | |
| 2498 dev->lastpoint.x, | |
| 2499 dev->lastpoint.y | |
| 2500 ); | |
| 2501 } | |
| 2502 dev->firstpoint = dev->lastpoint; | |
| 2503 dev->havemove = 1; | |
| 2504 dev->linecount = 0; // reset # of consec. lines | |
| 2505 } | |
| 2506 | |
| 2507 static void | |
| 2508 trace_lineto(fz_context *ctx, void *dev_, float x, float y) | |
| 2509 { | |
| 2510 jm_lineart_device* dev = (jm_lineart_device*) dev_; | |
| 2511 fz_point p1 = fz_transform_point(fz_make_point(x, y), dev->ctm); | |
| 2512 dev->pathrect = fz_include_point_in_rect(dev->pathrect, p1); | |
| 2513 PyObject *list = PyTuple_New(3); | |
| 2514 PyTuple_SET_ITEM(list, 0, PyUnicode_FromString("l")); | |
| 2515 PyTuple_SET_ITEM(list, 1, JM_py_from_point(dev->lastpoint)); | |
| 2516 PyTuple_SET_ITEM(list, 2, JM_py_from_point(p1)); | |
| 2517 dev->lastpoint = p1; | |
| 2518 PyObject *items = PyDict_GetItem(dev->pathdict, dictkey_items); | |
| 2519 LIST_APPEND_DROP(items, list); | |
| 2520 dev->linecount += 1; // counts consecutive lines | |
| 2521 if (dev->linecount == 4 && dev->path_type != FILL_PATH) { // shrink to "re" or "qu" item | |
| 2522 jm_checkquad(dev); | |
| 2523 } | |
| 2524 } | |
| 2525 | |
| 2526 static void | |
| 2527 trace_curveto(fz_context *ctx, void *dev_, float x1, float y1, float x2, float y2, float x3, float y3) | |
| 2528 { | |
| 2529 jm_lineart_device* dev = (jm_lineart_device*) dev_; | |
| 2530 dev->linecount = 0; // reset # of consec. lines | |
| 2531 fz_point p1 = fz_make_point(x1, y1); | |
| 2532 fz_point p2 = fz_make_point(x2, y2); | |
| 2533 fz_point p3 = fz_make_point(x3, y3); | |
| 2534 p1 = fz_transform_point(p1, dev->ctm); | |
| 2535 p2 = fz_transform_point(p2, dev->ctm); | |
| 2536 p3 = fz_transform_point(p3, dev->ctm); | |
| 2537 dev->pathrect = fz_include_point_in_rect(dev->pathrect, p1); | |
| 2538 dev->pathrect = fz_include_point_in_rect(dev->pathrect, p2); | |
| 2539 dev->pathrect = fz_include_point_in_rect(dev->pathrect, p3); | |
| 2540 | |
| 2541 PyObject *list = PyTuple_New(5); | |
| 2542 PyTuple_SET_ITEM(list, 0, PyUnicode_FromString("c")); | |
| 2543 PyTuple_SET_ITEM(list, 1, JM_py_from_point(dev->lastpoint)); | |
| 2544 PyTuple_SET_ITEM(list, 2, JM_py_from_point(p1)); | |
| 2545 PyTuple_SET_ITEM(list, 3, JM_py_from_point(p2)); | |
| 2546 PyTuple_SET_ITEM(list, 4, JM_py_from_point(p3)); | |
| 2547 dev->lastpoint = p3; | |
| 2548 PyObject *items = PyDict_GetItem(dev->pathdict, dictkey_items); | |
| 2549 LIST_APPEND_DROP(items, list); | |
| 2550 } | |
| 2551 | |
| 2552 static void | |
| 2553 trace_close(fz_context *ctx, void *dev_) | |
| 2554 { | |
| 2555 jm_lineart_device* dev = (jm_lineart_device*) dev_; | |
| 2556 if (dev->linecount == 3) { | |
| 2557 if (jm_checkrect(dev)) { | |
| 2558 return; | |
| 2559 } | |
| 2560 } | |
| 2561 dev->linecount = 0; // reset # of consec. lines | |
| 2562 if (dev->havemove) { | |
| 2563 if (dev->firstpoint.x != dev->lastpoint.x || dev->firstpoint.y != dev->lastpoint.y) { | |
| 2564 PyObject *list = PyTuple_New(3); | |
| 2565 PyTuple_SET_ITEM(list, 0, PyUnicode_FromString("l")); | |
| 2566 PyTuple_SET_ITEM(list, 1, JM_py_from_point(dev->lastpoint)); | |
| 2567 PyTuple_SET_ITEM(list, 2, JM_py_from_point(dev->firstpoint)); | |
| 2568 dev->lastpoint = dev->firstpoint; | |
| 2569 PyObject *items = PyDict_GetItem(dev->pathdict, dictkey_items); | |
| 2570 LIST_APPEND_DROP(items, list); | |
| 2571 } | |
| 2572 dev->havemove = 0; | |
| 2573 DICT_SETITEMSTR_DROP(dev->pathdict, "closePath", JM_BOOL(0)); | |
| 2574 } else { | |
| 2575 DICT_SETITEMSTR_DROP(dev->pathdict, "closePath", JM_BOOL(1)); | |
| 2576 } | |
| 2577 } | |
| 2578 | |
| 2579 static const fz_path_walker trace_path_walker = | |
| 2580 { | |
| 2581 trace_moveto, | |
| 2582 trace_lineto, | |
| 2583 trace_curveto, | |
| 2584 trace_close | |
| 2585 }; | |
| 2586 | |
| 2587 /* | |
| 2588 --------------------------------------------------------------------- | |
| 2589 Create the "items" list of the path dictionary | |
| 2590 * either create or empty the path dictionary | |
| 2591 * reset the end point of the path | |
| 2592 * reset count of consecutive lines | |
| 2593 * invoke fz_walk_path(), which create the single items | |
| 2594 * if no items detected, empty path dict again | |
| 2595 --------------------------------------------------------------------- | |
| 2596 */ | |
| 2597 static void | |
| 2598 jm_lineart_path(jm_lineart_device *dev, const fz_path *path) | |
| 2599 { | |
| 2600 dev->pathrect = fz_infinite_rect; | |
| 2601 dev->linecount = 0; | |
| 2602 dev->lastpoint = fz_make_point(0, 0); | |
| 2603 dev->firstpoint = fz_make_point(0, 0); | |
| 2604 if (dev->pathdict) { | |
| 2605 Py_CLEAR(dev->pathdict); | |
| 2606 } | |
| 2607 dev->pathdict = PyDict_New(); | |
| 2608 DICT_SETITEM_DROP(dev->pathdict, dictkey_items, PyList_New(0)); | |
| 2609 mupdf::ll_fz_walk_path(path, &trace_path_walker, dev); | |
| 2610 // Check if any items were added ... | |
| 2611 if (!PyDict_GetItem(dev->pathdict, dictkey_items) || !PyList_Size(PyDict_GetItem(dev->pathdict, dictkey_items))) | |
| 2612 { | |
| 2613 Py_CLEAR(dev->pathdict); | |
| 2614 } | |
| 2615 } | |
| 2616 | |
| 2617 //--------------------------------------------------------------------------- | |
| 2618 // Append current path to list or merge into last path of the list. | |
| 2619 // (1) Append if first path, different item lists or not a 'stroke' version | |
| 2620 // of previous path | |
| 2621 // (2) If new path has the same items, merge its content into previous path | |
| 2622 // and change path["type"] to "fs". | |
| 2623 // (3) If "out" is callable, skip the previous and pass dictionary to it. | |
| 2624 //--------------------------------------------------------------------------- | |
| 2625 static void | |
| 2626 // todo: remove `method` arg - it is dev->method. | |
| 2627 jm_append_merge(jm_lineart_device *dev) | |
| 2628 { | |
| 2629 Py_ssize_t len; | |
| 2630 int rc; | |
| 2631 PyObject *prev; | |
| 2632 PyObject *previtems; | |
| 2633 PyObject *thisitems; | |
| 2634 const char *thistype; | |
| 2635 const char *prevtype; | |
| 2636 if (PyCallable_Check(dev->out) || dev->method != Py_None) { // function or method | |
| 2637 goto callback; | |
| 2638 } | |
| 2639 len = PyList_Size(dev->out); // len of output list so far | |
| 2640 if (len == 0) { // always append first path | |
| 2641 goto append; | |
| 2642 } | |
| 2643 thistype = PyUnicode_AsUTF8(PyDict_GetItem(dev->pathdict, dictkey_type)); | |
| 2644 if (strcmp(thistype, "s") != 0) { // if not stroke, then append | |
| 2645 goto append; | |
| 2646 } | |
| 2647 prev = PyList_GET_ITEM(dev->out, len - 1); // get prev path | |
| 2648 prevtype = PyUnicode_AsUTF8(PyDict_GetItem(prev, dictkey_type)); | |
| 2649 if (strcmp(prevtype, "f") != 0) { // if previous not fill, append | |
| 2650 goto append; | |
| 2651 } | |
| 2652 // last check: there must be the same list of items for "f" and "s". | |
| 2653 previtems = PyDict_GetItem(prev, dictkey_items); | |
| 2654 thisitems = PyDict_GetItem(dev->pathdict, dictkey_items); | |
| 2655 if (PyObject_RichCompareBool(previtems, thisitems, Py_NE)) { | |
| 2656 goto append; | |
| 2657 } | |
| 2658 rc = PyDict_Merge(prev, dev->pathdict, 0); // merge, do not override | |
| 2659 if (rc == 0) { | |
| 2660 DICT_SETITEM_DROP(prev, dictkey_type, PyUnicode_FromString("fs")); | |
| 2661 goto postappend; | |
| 2662 } else { | |
| 2663 messagef("could not merge stroke and fill path"); | |
| 2664 goto append; | |
| 2665 } | |
| 2666 append:; | |
| 2667 //printf("Appending to dev->out. len(dev->out)=%zi\n", PyList_Size(dev->out)); | |
| 2668 PyList_Append(dev->out, dev->pathdict); | |
| 2669 postappend:; | |
| 2670 Py_CLEAR(dev->pathdict); | |
| 2671 return; | |
| 2672 | |
| 2673 callback:; // callback function or method | |
| 2674 PyObject *resp = NULL; | |
| 2675 if (dev->method == Py_None) { | |
| 2676 resp = PyObject_CallFunctionObjArgs(dev->out, dev->pathdict, NULL); | |
| 2677 } else { | |
| 2678 resp = PyObject_CallMethodObjArgs(dev->out, dev->method, dev->pathdict, NULL); | |
| 2679 } | |
| 2680 if (resp) { | |
| 2681 Py_DECREF(resp); | |
| 2682 } else { | |
| 2683 messagef("calling cdrawings callback function/method failed!"); | |
| 2684 PyErr_Clear(); | |
| 2685 } | |
| 2686 Py_CLEAR(dev->pathdict); | |
| 2687 return; | |
| 2688 } | |
| 2689 | |
| 2690 static void | |
| 2691 jm_lineart_fill_path(fz_context *ctx, fz_device *dev_, const fz_path *path, | |
| 2692 int even_odd, fz_matrix ctm, fz_colorspace *colorspace, | |
| 2693 const float *color, float alpha, fz_color_params color_params) | |
| 2694 { | |
| 2695 jm_lineart_device *dev = (jm_lineart_device *) dev_; | |
| 2696 //printf("extra.jm_lineart_fill_path(): dev->seqno=%zi\n", dev->seqno); | |
| 2697 dev->ctm = ctm; //fz_concat(ctm, trace_device_ptm); | |
| 2698 dev->path_type = FILL_PATH; | |
| 2699 jm_lineart_path(dev, path); | |
| 2700 if (!dev->pathdict) { | |
| 2701 return; | |
| 2702 } | |
| 2703 DICT_SETITEM_DROP(dev->pathdict, dictkey_type, PyUnicode_FromString("f")); | |
| 2704 DICT_SETITEMSTR_DROP(dev->pathdict, "even_odd", JM_BOOL(even_odd)); | |
| 2705 DICT_SETITEMSTR_DROP(dev->pathdict, "fill_opacity", Py_BuildValue("f", alpha)); | |
| 2706 DICT_SETITEMSTR_DROP(dev->pathdict, "fill", jm_lineart_color(colorspace, color)); | |
| 2707 DICT_SETITEM_DROP(dev->pathdict, dictkey_rect, JM_py_from_rect(dev->pathrect)); | |
| 2708 DICT_SETITEMSTR_DROP(dev->pathdict, "seqno", PyLong_FromSize_t(dev->seqno)); | |
| 2709 DICT_SETITEMSTR_DROP(dev->pathdict, "layer", JM_UnicodeFromStr(dev->layer_name)); | |
| 2710 if (dev->clips) { | |
| 2711 DICT_SETITEMSTR_DROP(dev->pathdict, "level", PyLong_FromLong(dev->depth)); | |
| 2712 } | |
| 2713 jm_append_merge(dev); | |
| 2714 dev->seqno += 1; | |
| 2715 } | |
| 2716 | |
| 2717 static void | |
| 2718 jm_lineart_stroke_path(fz_context *ctx, fz_device *dev_, const fz_path *path, | |
| 2719 const fz_stroke_state *stroke, fz_matrix ctm, | |
| 2720 fz_colorspace *colorspace, const float *color, float alpha, | |
| 2721 fz_color_params color_params) | |
| 2722 { | |
| 2723 jm_lineart_device *dev = (jm_lineart_device *)dev_; | |
| 2724 //printf("extra.jm_lineart_stroke_path(): dev->seqno=%zi\n", dev->seqno); | |
| 2725 int i; | |
| 2726 dev->pathfactor = 1; | |
| 2727 if (ctm.a != 0 && fz_abs(ctm.a) == fz_abs(ctm.d)) { | |
| 2728 dev->pathfactor = fz_abs(ctm.a); | |
| 2729 } else { | |
| 2730 if (ctm.b != 0 && fz_abs(ctm.b) == fz_abs(ctm.c)) { | |
| 2731 dev->pathfactor = fz_abs(ctm.b); | |
| 2732 } | |
| 2733 } | |
| 2734 dev->ctm = ctm; // fz_concat(ctm, trace_device_ptm); | |
| 2735 dev->path_type = STROKE_PATH; | |
| 2736 | |
| 2737 jm_lineart_path(dev, path); | |
| 2738 if (!dev->pathdict) { | |
| 2739 return; | |
| 2740 } | |
| 2741 DICT_SETITEM_DROP(dev->pathdict, dictkey_type, PyUnicode_FromString("s")); | |
| 2742 DICT_SETITEMSTR_DROP(dev->pathdict, "stroke_opacity", Py_BuildValue("f", alpha)); | |
| 2743 DICT_SETITEMSTR_DROP(dev->pathdict, "color", jm_lineart_color(colorspace, color)); | |
| 2744 DICT_SETITEM_DROP(dev->pathdict, dictkey_width, Py_BuildValue("f", dev->pathfactor * stroke->linewidth)); | |
| 2745 DICT_SETITEMSTR_DROP(dev->pathdict, "lineCap", Py_BuildValue("iii", stroke->start_cap, stroke->dash_cap, stroke->end_cap)); | |
| 2746 DICT_SETITEMSTR_DROP(dev->pathdict, "lineJoin", Py_BuildValue("f", dev->pathfactor * stroke->linejoin)); | |
| 2747 if (!PyDict_GetItemString(dev->pathdict, "closePath")) { | |
| 2748 DICT_SETITEMSTR_DROP(dev->pathdict, "closePath", JM_BOOL(0)); | |
| 2749 } | |
| 2750 | |
| 2751 // output the "dashes" string | |
| 2752 if (stroke->dash_len) { | |
| 2753 mupdf::FzBuffer buff(256); | |
| 2754 mupdf::fz_append_string(buff, "[ "); // left bracket | |
| 2755 for (i = 0; i < stroke->dash_len; i++) { | |
| 2756 fz_append_printf(ctx, buff.m_internal, "%g ", dev->pathfactor * stroke->dash_list[i]); | |
| 2757 } | |
| 2758 fz_append_printf(ctx, buff.m_internal, "] %g", dev->pathfactor * stroke->dash_phase); | |
| 2759 DICT_SETITEMSTR_DROP(dev->pathdict, "dashes", JM_EscapeStrFromBuffer(buff)); | |
| 2760 } else { | |
| 2761 DICT_SETITEMSTR_DROP(dev->pathdict, "dashes", PyUnicode_FromString("[] 0")); | |
| 2762 } | |
| 2763 | |
| 2764 DICT_SETITEM_DROP(dev->pathdict, dictkey_rect, JM_py_from_rect(dev->pathrect)); | |
| 2765 DICT_SETITEMSTR_DROP(dev->pathdict, "layer", JM_UnicodeFromStr(dev->layer_name)); | |
| 2766 DICT_SETITEMSTR_DROP(dev->pathdict, "seqno", PyLong_FromSize_t(dev->seqno)); | |
| 2767 if (dev->clips) { | |
| 2768 DICT_SETITEMSTR_DROP(dev->pathdict, "level", PyLong_FromLong(dev->depth)); | |
| 2769 } | |
| 2770 // output the dict - potentially merging it with a previous fill_path twin | |
| 2771 jm_append_merge(dev); | |
| 2772 dev->seqno += 1; | |
| 2773 } | |
| 2774 | |
| 2775 static void | |
| 2776 jm_lineart_clip_path(fz_context *ctx, fz_device *dev_, const fz_path *path, int even_odd, fz_matrix ctm, fz_rect scissor) | |
| 2777 { | |
| 2778 jm_lineart_device *dev = (jm_lineart_device *)dev_; | |
| 2779 if (!dev->clips) return; | |
| 2780 dev->ctm = ctm; //fz_concat(ctm, trace_device_ptm); | |
| 2781 dev->path_type = CLIP_PATH; | |
| 2782 jm_lineart_path(dev, path); | |
| 2783 if (!dev->pathdict) { | |
| 2784 return; | |
| 2785 } | |
| 2786 DICT_SETITEM_DROP(dev->pathdict, dictkey_type, PyUnicode_FromString("clip")); | |
| 2787 DICT_SETITEMSTR_DROP(dev->pathdict, "even_odd", JM_BOOL(even_odd)); | |
| 2788 if (!PyDict_GetItemString(dev->pathdict, "closePath")) { | |
| 2789 DICT_SETITEMSTR_DROP(dev->pathdict, "closePath", JM_BOOL(0)); | |
| 2790 } | |
| 2791 DICT_SETITEMSTR_DROP(dev->pathdict, "scissor", JM_py_from_rect(compute_scissor(dev))); | |
| 2792 DICT_SETITEMSTR_DROP(dev->pathdict, "level", PyLong_FromLong(dev->depth)); | |
| 2793 DICT_SETITEMSTR_DROP(dev->pathdict, "layer", JM_UnicodeFromStr(dev->layer_name)); | |
| 2794 jm_append_merge(dev); | |
| 2795 dev->depth++; | |
| 2796 } | |
| 2797 | |
| 2798 static void | |
| 2799 jm_lineart_clip_stroke_path(fz_context *ctx, fz_device *dev_, const fz_path *path, const fz_stroke_state *stroke, fz_matrix ctm, fz_rect scissor) | |
| 2800 { | |
| 2801 jm_lineart_device *dev = (jm_lineart_device *)dev_; | |
| 2802 if (!dev->clips) return; | |
| 2803 dev->ctm = ctm; //fz_concat(ctm, trace_device_ptm); | |
| 2804 dev->path_type = CLIP_STROKE_PATH; | |
| 2805 jm_lineart_path(dev, path); | |
| 2806 if (!dev->pathdict) { | |
| 2807 return; | |
| 2808 } | |
| 2809 DICT_SETITEM_DROP(dev->pathdict, dictkey_type, PyUnicode_FromString("clip")); | |
| 2810 DICT_SETITEMSTR_DROP(dev->pathdict, "even_odd", Py_BuildValue("s", NULL)); | |
| 2811 if (!PyDict_GetItemString(dev->pathdict, "closePath")) { | |
| 2812 DICT_SETITEMSTR_DROP(dev->pathdict, "closePath", JM_BOOL(0)); | |
| 2813 } | |
| 2814 DICT_SETITEMSTR_DROP(dev->pathdict, "scissor", JM_py_from_rect(compute_scissor(dev))); | |
| 2815 DICT_SETITEMSTR_DROP(dev->pathdict, "level", PyLong_FromLong(dev->depth)); | |
| 2816 DICT_SETITEMSTR_DROP(dev->pathdict, "layer", JM_UnicodeFromStr(dev->layer_name)); | |
| 2817 jm_append_merge(dev); | |
| 2818 dev->depth++; | |
| 2819 } | |
| 2820 | |
| 2821 | |
| 2822 static void | |
| 2823 jm_lineart_clip_stroke_text(fz_context *ctx, fz_device *dev_, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm, fz_rect scissor) | |
| 2824 { | |
| 2825 jm_lineart_device *dev = (jm_lineart_device *)dev_; | |
| 2826 if (!dev->clips) return; | |
| 2827 compute_scissor(dev); | |
| 2828 dev->depth++; | |
| 2829 } | |
| 2830 | |
| 2831 static void | |
| 2832 jm_lineart_clip_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm, fz_rect scissor) | |
| 2833 { | |
| 2834 jm_lineart_device *dev = (jm_lineart_device *)dev_; | |
| 2835 if (!dev->clips) return; | |
| 2836 compute_scissor(dev); | |
| 2837 dev->depth++; | |
| 2838 } | |
| 2839 | |
| 2840 static void | |
| 2841 jm_lineart_clip_image_mask(fz_context *ctx, fz_device *dev_, fz_image *image, fz_matrix ctm, fz_rect scissor) | |
| 2842 { | |
| 2843 jm_lineart_device *dev = (jm_lineart_device *)dev_; | |
| 2844 if (!dev->clips) return; | |
| 2845 compute_scissor(dev); | |
| 2846 dev->depth++; | |
| 2847 } | |
| 2848 | |
| 2849 static void | |
| 2850 jm_lineart_pop_clip(fz_context *ctx, fz_device *dev_) | |
| 2851 { | |
| 2852 jm_lineart_device *dev = (jm_lineart_device *)dev_; | |
| 2853 if (!dev->clips) return; | |
| 2854 if (!dev->scissors) return; | |
| 2855 Py_ssize_t len = PyList_Size(dev->scissors); | |
| 2856 if (len < 1) return; | |
| 2857 PyList_SetSlice(dev->scissors, len - 1, len, NULL); | |
| 2858 dev->depth--; | |
| 2859 } | |
| 2860 | |
| 2861 | |
| 2862 static void | |
| 2863 jm_lineart_begin_group(fz_context *ctx, fz_device *dev_, fz_rect bbox, fz_colorspace *cs, int isolated, int knockout, int blendmode, float alpha) | |
| 2864 { | |
| 2865 jm_lineart_device *dev = (jm_lineart_device *)dev_; | |
| 2866 if (!dev->clips) return; | |
| 2867 dev->pathdict = Py_BuildValue("{s:s,s:N,s:N,s:N,s:s,s:f,s:i,s:N}", | |
| 2868 "type", "group", | |
| 2869 "rect", JM_py_from_rect(bbox), | |
| 2870 "isolated", JM_BOOL(isolated), | |
| 2871 "knockout", JM_BOOL(knockout), | |
| 2872 "blendmode", fz_blendmode_name(blendmode), | |
| 2873 "opacity", alpha, | |
| 2874 "level", dev->depth, | |
| 2875 "layer", JM_UnicodeFromStr(dev->layer_name) | |
| 2876 ); | |
| 2877 jm_append_merge(dev); | |
| 2878 dev->depth++; | |
| 2879 } | |
| 2880 | |
| 2881 static void | |
| 2882 jm_lineart_end_group(fz_context *ctx, fz_device *dev_) | |
| 2883 { | |
| 2884 jm_lineart_device *dev = (jm_lineart_device *)dev_; | |
| 2885 if (!dev->clips) return; | |
| 2886 dev->depth--; | |
| 2887 } | |
| 2888 | |
| 2889 static void jm_lineart_fill_text(fz_context *ctx, fz_device *dev, const fz_text *, fz_matrix, fz_colorspace *, const float *color, float alpha, fz_color_params) | |
| 2890 { | |
| 2891 jm_increase_seqno(ctx, dev); | |
| 2892 } | |
| 2893 | |
| 2894 static void jm_lineart_stroke_text(fz_context *ctx, fz_device *dev, const fz_text *, const fz_stroke_state *, fz_matrix, fz_colorspace *, const float *color, float alpha, fz_color_params) | |
| 2895 { | |
| 2896 jm_increase_seqno(ctx, dev); | |
| 2897 } | |
| 2898 | |
| 2899 static void jm_lineart_fill_shade(fz_context *ctx, fz_device *dev, fz_shade *shd, fz_matrix ctm, float alpha, fz_color_params color_params) | |
| 2900 { | |
| 2901 jm_increase_seqno(ctx, dev); | |
| 2902 } | |
| 2903 | |
| 2904 static void jm_lineart_fill_image(fz_context *ctx, fz_device *dev, fz_image *img, fz_matrix ctm, float alpha, fz_color_params color_params) | |
| 2905 { | |
| 2906 jm_increase_seqno(ctx, dev); | |
| 2907 } | |
| 2908 | |
| 2909 static void jm_lineart_fill_image_mask(fz_context *ctx, fz_device *dev, fz_image *img, fz_matrix ctm, fz_colorspace *, const float *color, float alpha, fz_color_params color_params) | |
| 2910 { | |
| 2911 jm_increase_seqno(ctx, dev); | |
| 2912 } | |
| 2913 | |
| 2914 static void jm_lineart_ignore_text(fz_context *ctx, fz_device *dev, const fz_text *, fz_matrix) | |
| 2915 { | |
| 2916 jm_increase_seqno(ctx, dev); | |
| 2917 } | |
| 2918 | |
| 2919 | |
| 2920 //------------------------------------------------------------------- | |
| 2921 // LINEART device for Python method Page.get_cdrawings() | |
| 2922 //------------------------------------------------------------------- | |
| 2923 mupdf::FzDevice JM_new_lineart_device(PyObject *out, int clips, PyObject *method) | |
| 2924 { | |
| 2925 //printf("extra.JM_new_lineart_device()\n"); | |
| 2926 jm_lineart_device* dev = (jm_lineart_device*) mupdf::ll_fz_new_device_of_size(sizeof(jm_lineart_device)); | |
| 2927 | |
| 2928 dev->super.close_device = NULL; | |
| 2929 dev->super.drop_device = jm_lineart_drop_device; | |
| 2930 dev->super.fill_path = jm_lineart_fill_path; | |
| 2931 dev->super.stroke_path = jm_lineart_stroke_path; | |
| 2932 dev->super.clip_path = jm_lineart_clip_path; | |
| 2933 dev->super.clip_stroke_path = jm_lineart_clip_stroke_path; | |
| 2934 | |
| 2935 dev->super.fill_text = jm_lineart_fill_text; | |
| 2936 dev->super.stroke_text = jm_lineart_stroke_text; | |
| 2937 dev->super.clip_text = jm_lineart_clip_text; | |
| 2938 dev->super.clip_stroke_text = jm_lineart_clip_stroke_text; | |
| 2939 dev->super.ignore_text = jm_lineart_ignore_text; | |
| 2940 | |
| 2941 dev->super.fill_shade = jm_lineart_fill_shade; | |
| 2942 dev->super.fill_image = jm_lineart_fill_image; | |
| 2943 dev->super.fill_image_mask = jm_lineart_fill_image_mask; | |
| 2944 dev->super.clip_image_mask = jm_lineart_clip_image_mask; | |
| 2945 | |
| 2946 dev->super.pop_clip = jm_lineart_pop_clip; | |
| 2947 | |
| 2948 dev->super.begin_mask = NULL; | |
| 2949 dev->super.end_mask = NULL; | |
| 2950 dev->super.begin_group = jm_lineart_begin_group; | |
| 2951 dev->super.end_group = jm_lineart_end_group; | |
| 2952 | |
| 2953 dev->super.begin_tile = NULL; | |
| 2954 dev->super.end_tile = NULL; | |
| 2955 | |
| 2956 dev->super.begin_layer = jm_lineart_begin_layer; | |
| 2957 dev->super.end_layer = jm_lineart_end_layer; | |
| 2958 | |
| 2959 dev->super.begin_structure = NULL; | |
| 2960 dev->super.end_structure = NULL; | |
| 2961 | |
| 2962 dev->super.begin_metatext = NULL; | |
| 2963 dev->super.end_metatext = NULL; | |
| 2964 | |
| 2965 dev->super.render_flags = NULL; | |
| 2966 dev->super.set_default_colorspaces = NULL; | |
| 2967 | |
| 2968 if (PyList_Check(out)) { | |
| 2969 Py_INCREF(out); | |
| 2970 } | |
| 2971 Py_INCREF(method); | |
| 2972 dev->out = out; | |
| 2973 dev->seqno = 0; | |
| 2974 dev->depth = 0; | |
| 2975 dev->clips = clips; | |
| 2976 dev->method = method; | |
| 2977 dev->pathdict = nullptr; | |
| 2978 | |
| 2979 return mupdf::FzDevice(&dev->super); | |
| 2980 } | |
| 2981 | |
| 2982 PyObject* get_cdrawings(mupdf::FzPage& page, PyObject *extended=NULL, PyObject *callback=NULL, PyObject *method=NULL) | |
| 2983 { | |
| 2984 //fz_page *page = (fz_page *) $self; | |
| 2985 //fz_device *dev = NULL; | |
| 2986 PyObject *rc = NULL; | |
| 2987 int clips = PyObject_IsTrue(extended); | |
| 2988 | |
| 2989 mupdf::FzDevice dev; | |
| 2990 if (PyCallable_Check(callback) || method != Py_None) { | |
| 2991 dev = JM_new_lineart_device(callback, clips, method); | |
| 2992 } else { | |
| 2993 rc = PyList_New(0); | |
| 2994 dev = JM_new_lineart_device(rc, clips, method); | |
| 2995 } | |
| 2996 mupdf::FzRect prect = mupdf::fz_bound_page(page); | |
| 2997 ((jm_lineart_device*) dev.m_internal)->ptm = mupdf::ll_fz_make_matrix(1, 0, 0, -1, 0, prect.y1); | |
| 2998 | |
| 2999 mupdf::FzCookie cookie; | |
| 3000 mupdf::FzMatrix identity; | |
| 3001 mupdf::fz_run_page( page, dev, *identity.internal(), cookie); | |
| 3002 mupdf::fz_close_device( dev); | |
| 3003 if (PyCallable_Check(callback) || method != Py_None) | |
| 3004 { | |
| 3005 Py_RETURN_NONE; | |
| 3006 } | |
| 3007 return rc; | |
| 3008 } | |
| 3009 | |
| 3010 | |
| 3011 //--------------------------------------------------------------------------- | |
| 3012 // APPEND non-ascii runes in unicode escape format to fz_buffer | |
| 3013 //--------------------------------------------------------------------------- | |
| 3014 void JM_append_rune(fz_buffer *buff, int ch) | |
| 3015 { | |
| 3016 char text[32]; | |
| 3017 if (ch == 92) // prevent accidental "\u", "\U" sequences | |
| 3018 { | |
| 3019 mupdf::ll_fz_append_string(buff, "\\u005c"); | |
| 3020 } | |
| 3021 else if ((ch >= 32 && ch <= 127) || ch == 10) | |
| 3022 { | |
| 3023 mupdf::ll_fz_append_byte(buff, ch); | |
| 3024 } | |
| 3025 else if (ch >= 0xd800 && ch <= 0xdfff) // orphaned surrogate Unicodes | |
| 3026 { | |
| 3027 mupdf::ll_fz_append_string(buff, "\\ufffd"); | |
| 3028 } | |
| 3029 else if (ch <= 0xffff) | |
| 3030 { | |
| 3031 // 4 hex digits | |
| 3032 snprintf(text, sizeof(text), "\\u%04x", ch); | |
| 3033 mupdf::ll_fz_append_string(buff, text); | |
| 3034 } | |
| 3035 else | |
| 3036 { | |
| 3037 // 8 hex digits | |
| 3038 snprintf(text, sizeof(text), "\\U%08x", ch); | |
| 3039 mupdf::ll_fz_append_string(buff, text); | |
| 3040 } | |
| 3041 } | |
| 3042 | |
| 3043 | |
| 3044 mupdf::FzRect JM_make_spanlist( | |
| 3045 PyObject *line_dict, | |
| 3046 mupdf::FzStextLine& line, | |
| 3047 int raw, | |
| 3048 mupdf::FzBuffer& buff, | |
| 3049 mupdf::FzRect& tp_rect | |
| 3050 ) | |
| 3051 { | |
| 3052 PyObject *span = NULL, *char_list = NULL, *char_dict; | |
| 3053 PyObject *span_list = PyList_New(0); | |
| 3054 mupdf::fz_clear_buffer(buff); | |
| 3055 fz_rect span_rect = fz_empty_rect; | |
| 3056 fz_rect line_rect = fz_empty_rect; | |
| 3057 fz_point span_origin = {0, 0}; | |
| 3058 struct char_style | |
| 3059 { | |
| 3060 float size = -1; | |
| 3061 unsigned flags = 0; | |
| 3062 | |
| 3063 #if MUPDF_VERSION_GE(1, 25, 2) | |
| 3064 /* From mupdf:include/mupdf/fitz/structured-text.h:fz_stext_char::flags, which | |
| 3065 uses anonymous enum values: | |
| 3066 FZ_STEXT_STRIKEOUT = 1, | |
| 3067 FZ_STEXT_UNDERLINE = 2, | |
| 3068 FZ_STEXT_SYNTHETIC = 4, | |
| 3069 FZ_STEXT_FILLED = 16, | |
| 3070 FZ_STEXT_STROKED = 32, | |
| 3071 FZ_STEXT_CLIPPED = 64 | |
| 3072 */ | |
| 3073 unsigned char_flags = 0; | |
| 3074 #endif | |
| 3075 | |
| 3076 const char *font = ""; | |
| 3077 unsigned argb = 0; | |
| 3078 float asc = 0; | |
| 3079 float desc = 0; | |
| 3080 uint16_t bidi = 0; | |
| 3081 }; | |
| 3082 char_style old_style; | |
| 3083 char_style style; | |
| 3084 | |
| 3085 for (mupdf::FzStextChar ch: line) | |
| 3086 { | |
| 3087 fz_rect r = JM_char_bbox(line, ch); | |
| 3088 if (!JM_rects_overlap(*tp_rect.internal(), r) && !fz_is_infinite_rect(tp_rect)) | |
| 3089 { | |
| 3090 continue; | |
| 3091 } | |
| 3092 /* Info from: | |
| 3093 detect_super_script() | |
| 3094 fz_font_is_italic() | |
| 3095 fz_font_is_serif() | |
| 3096 fz_font_is_monospaced() | |
| 3097 fz_font_is_bold() | |
| 3098 */ | |
| 3099 int flags = JM_char_font_flags( ch.m_internal->font, line.m_internal, ch.m_internal); | |
| 3100 fz_point origin = ch.m_internal->origin; | |
| 3101 style.size = ch.m_internal->size; | |
| 3102 style.flags = flags; | |
| 3103 #if MUPDF_VERSION_GE(1, 25, 2) | |
| 3104 /* FZ_STEXT_SYNTHETIC is per-char, not per-span. */ | |
| 3105 style.char_flags = ch.m_internal->flags & ~FZ_STEXT_SYNTHETIC; | |
| 3106 #endif | |
| 3107 style.font = JM_font_name(ch.m_internal->font); | |
| 3108 #if MUPDF_VERSION_GE(1, 25, 0) | |
| 3109 style.argb = ch.m_internal->argb; | |
| 3110 #else | |
| 3111 style.argb = ch.m_internal->color; | |
| 3112 #endif | |
| 3113 style.asc = JM_font_ascender(ch.m_internal->font); | |
| 3114 style.desc = JM_font_descender(ch.m_internal->font); | |
| 3115 | |
| 3116 if (0 | |
| 3117 || style.size != old_style.size | |
| 3118 || style.flags != old_style.flags | |
| 3119 #if MUPDF_VERSION_GE(1, 25, 2) | |
| 3120 || style.char_flags != old_style.char_flags | |
| 3121 #endif | |
| 3122 || style.argb != old_style.argb | |
| 3123 || strcmp(style.font, old_style.font) != 0 | |
| 3124 || style.bidi != old_style.bidi | |
| 3125 ) | |
| 3126 { | |
| 3127 if (old_style.size >= 0) | |
| 3128 { | |
| 3129 // not first one, output previous | |
| 3130 if (raw) | |
| 3131 { | |
| 3132 // put character list in the span | |
| 3133 DICT_SETITEM_DROP(span, dictkey_chars, char_list); | |
| 3134 char_list = NULL; | |
| 3135 } | |
| 3136 else | |
| 3137 { | |
| 3138 // put text string in the span | |
| 3139 DICT_SETITEM_DROP(span, dictkey_text, JM_EscapeStrFromBuffer(buff)); | |
| 3140 mupdf::fz_clear_buffer(buff); | |
| 3141 } | |
| 3142 | |
| 3143 DICT_SETITEM_DROP(span, dictkey_origin, JM_py_from_point(span_origin)); | |
| 3144 DICT_SETITEM_DROP(span, dictkey_bbox, JM_py_from_rect(span_rect)); | |
| 3145 line_rect = mupdf::ll_fz_union_rect(line_rect, span_rect); | |
| 3146 LIST_APPEND_DROP(span_list, span); | |
| 3147 span = NULL; | |
| 3148 } | |
| 3149 | |
| 3150 span = PyDict_New(); | |
| 3151 float asc = style.asc, desc = style.desc; | |
| 3152 if (style.asc < 1e-3) | |
| 3153 { | |
| 3154 asc = 0.9f; | |
| 3155 desc = -0.1f; | |
| 3156 } | |
| 3157 | |
| 3158 DICT_SETITEM_DROP(span, dictkey_size, Py_BuildValue("f", style.size)); | |
| 3159 DICT_SETITEM_DROP(span, dictkey_flags, Py_BuildValue("I", style.flags)); | |
| 3160 DICT_SETITEM_DROP(span, dictkey_bidi, Py_BuildValue("I", style.bidi)); | |
| 3161 #if MUPDF_VERSION_GE(1, 25, 2) | |
| 3162 DICT_SETITEM_DROP(span, dictkey_char_flags, Py_BuildValue("I", style.char_flags)); | |
| 3163 #endif | |
| 3164 DICT_SETITEM_DROP(span, dictkey_font, JM_EscapeStrFromStr(style.font)); | |
| 3165 DICT_SETITEM_DROP(span, dictkey_color, Py_BuildValue("I", style.argb & 0xffffff)); | |
| 3166 #if MUPDF_VERSION_GE(1, 25, 0) | |
| 3167 DICT_SETITEMSTR_DROP(span, "alpha", Py_BuildValue("I", style.argb >> 24)); | |
| 3168 #endif | |
| 3169 DICT_SETITEMSTR_DROP(span, "ascender", Py_BuildValue("f", asc)); | |
| 3170 DICT_SETITEMSTR_DROP(span, "descender", Py_BuildValue("f", desc)); | |
| 3171 | |
| 3172 old_style = style; | |
| 3173 span_rect = r; | |
| 3174 span_origin = origin; | |
| 3175 | |
| 3176 } | |
| 3177 span_rect = mupdf::ll_fz_union_rect(span_rect, r); | |
| 3178 | |
| 3179 if (raw) | |
| 3180 { | |
| 3181 // make and append a char dict | |
| 3182 char_dict = PyDict_New(); | |
| 3183 DICT_SETITEM_DROP(char_dict, dictkey_origin, JM_py_from_point(ch.m_internal->origin)); | |
| 3184 | |
| 3185 DICT_SETITEM_DROP(char_dict, dictkey_bbox, JM_py_from_rect(r)); | |
| 3186 | |
| 3187 DICT_SETITEM_DROP(char_dict, dictkey_c, Py_BuildValue("C", ch.m_internal->c)); | |
| 3188 DICT_SETITEMSTR_DROP(char_dict, "synthetic", Py_BuildValue("O", (ch.m_internal->flags & FZ_STEXT_SYNTHETIC) ? Py_True : Py_False)); | |
| 3189 if (!char_list) | |
| 3190 { | |
| 3191 char_list = PyList_New(0); | |
| 3192 } | |
| 3193 LIST_APPEND_DROP(char_list, char_dict); | |
| 3194 } | |
| 3195 else | |
| 3196 { | |
| 3197 // add character byte to buffer | |
| 3198 JM_append_rune(buff.m_internal, ch.m_internal->c); | |
| 3199 } | |
| 3200 } | |
| 3201 // all characters processed, now flush remaining span | |
| 3202 if (span) | |
| 3203 { | |
| 3204 if (raw) | |
| 3205 { | |
| 3206 DICT_SETITEM_DROP(span, dictkey_chars, char_list); | |
| 3207 char_list = NULL; | |
| 3208 } | |
| 3209 else | |
| 3210 { | |
| 3211 DICT_SETITEM_DROP(span, dictkey_text, JM_EscapeStrFromBuffer(buff)); | |
| 3212 mupdf::fz_clear_buffer(buff); | |
| 3213 } | |
| 3214 DICT_SETITEM_DROP(span, dictkey_origin, JM_py_from_point(span_origin)); | |
| 3215 DICT_SETITEM_DROP(span, dictkey_bbox, JM_py_from_rect(span_rect)); | |
| 3216 | |
| 3217 if (!fz_is_empty_rect(span_rect)) | |
| 3218 { | |
| 3219 LIST_APPEND_DROP(span_list, span); | |
| 3220 line_rect = fz_union_rect(line_rect, span_rect); | |
| 3221 } | |
| 3222 else | |
| 3223 { | |
| 3224 Py_DECREF(span); | |
| 3225 } | |
| 3226 span = NULL; | |
| 3227 } | |
| 3228 if (!mupdf::fz_is_empty_rect(line_rect)) | |
| 3229 { | |
| 3230 DICT_SETITEM_DROP(line_dict, dictkey_spans, span_list); | |
| 3231 } | |
| 3232 else | |
| 3233 { | |
| 3234 DICT_SETITEM_DROP(line_dict, dictkey_spans, span_list); | |
| 3235 } | |
| 3236 return line_rect; | |
| 3237 } | |
| 3238 | |
| 3239 //----------------------------------------------------------------------------- | |
| 3240 // Functions for wordlist output | |
| 3241 //----------------------------------------------------------------------------- | |
| 3242 int JM_append_word( | |
| 3243 PyObject* lines, | |
| 3244 fz_buffer* buff, | |
| 3245 fz_rect* wbbox, | |
| 3246 int block_n, | |
| 3247 int line_n, | |
| 3248 int word_n | |
| 3249 ) | |
| 3250 { | |
| 3251 PyObject* s = JM_EscapeStrFromBuffer(buff); | |
| 3252 PyObject* litem = Py_BuildValue( | |
| 3253 "ffffOiii", | |
| 3254 wbbox->x0, | |
| 3255 wbbox->y0, | |
| 3256 wbbox->x1, | |
| 3257 wbbox->y1, | |
| 3258 s, | |
| 3259 block_n, | |
| 3260 line_n, | |
| 3261 word_n | |
| 3262 ); | |
| 3263 LIST_APPEND_DROP(lines, litem); | |
| 3264 Py_DECREF(s); | |
| 3265 *wbbox = fz_empty_rect; | |
| 3266 return word_n + 1; // word counter | |
| 3267 } | |
| 3268 | |
| 3269 PyObject* extractWORDS(mupdf::FzStextPage& this_tpage, PyObject *delimiters) | |
| 3270 { | |
| 3271 int block_n = -1; | |
| 3272 fz_rect wbbox = fz_empty_rect; // word bbox | |
| 3273 fz_rect tp_rect = this_tpage.m_internal->mediabox; | |
| 3274 | |
| 3275 PyObject *lines = NULL; | |
| 3276 mupdf::FzBuffer buff = mupdf::fz_new_buffer(64); | |
| 3277 lines = PyList_New(0); | |
| 3278 for (mupdf::FzStextBlock block: this_tpage) | |
| 3279 { | |
| 3280 block_n++; | |
| 3281 if (block.m_internal->type != FZ_STEXT_BLOCK_TEXT) | |
| 3282 { | |
| 3283 continue; | |
| 3284 } | |
| 3285 int line_n = -1; | |
| 3286 for (mupdf::FzStextLine line: block) | |
| 3287 { | |
| 3288 line_n++; | |
| 3289 int word_n = 0; // word counter per line | |
| 3290 mupdf::fz_clear_buffer(buff); // reset word buffer | |
| 3291 size_t buflen = 0; // reset char counter | |
| 3292 int last_char_rtl = 0; // was last character RTL? | |
| 3293 for (mupdf::FzStextChar ch: line) | |
| 3294 { | |
| 3295 mupdf::FzRect cbbox = JM_char_bbox(line, ch); | |
| 3296 if (!JM_rects_overlap(tp_rect, *cbbox.internal()) && !fz_is_infinite_rect(tp_rect)) | |
| 3297 { | |
| 3298 continue; | |
| 3299 } | |
| 3300 | |
| 3301 int word_delimiter = JM_is_word_delimiter(ch.m_internal->c, delimiters); | |
| 3302 int this_char_rtl = JM_is_rtl_char(ch.m_internal->c); | |
| 3303 if (word_delimiter || this_char_rtl != last_char_rtl) | |
| 3304 { | |
| 3305 if (buflen == 0 && word_delimiter) | |
| 3306 { | |
| 3307 continue; // skip delimiters at line start | |
| 3308 } | |
| 3309 if (!fz_is_empty_rect(wbbox)) | |
| 3310 { | |
| 3311 word_n = JM_append_word( | |
| 3312 lines, | |
| 3313 buff.m_internal, | |
| 3314 &wbbox, | |
| 3315 block_n, | |
| 3316 line_n, | |
| 3317 word_n | |
| 3318 ); | |
| 3319 } | |
| 3320 mupdf::fz_clear_buffer(buff); | |
| 3321 buflen = 0; // reset char counter | |
| 3322 if (word_delimiter) continue; | |
| 3323 } | |
| 3324 // append one unicode character to the word | |
| 3325 JM_append_rune(buff.m_internal, ch.m_internal->c); | |
| 3326 last_char_rtl = this_char_rtl; | |
| 3327 buflen++; | |
| 3328 // enlarge word bbox | |
| 3329 wbbox = fz_union_rect(wbbox, JM_char_bbox(line, ch)); | |
| 3330 } | |
| 3331 if (buflen && !fz_is_empty_rect(wbbox)) | |
| 3332 { | |
| 3333 word_n = JM_append_word( | |
| 3334 lines, | |
| 3335 buff.m_internal, | |
| 3336 &wbbox, | |
| 3337 block_n, | |
| 3338 line_n, | |
| 3339 word_n | |
| 3340 ); | |
| 3341 } | |
| 3342 mupdf::fz_clear_buffer(buff); | |
| 3343 buflen = 0; | |
| 3344 } | |
| 3345 } | |
| 3346 return lines; | |
| 3347 } | |
| 3348 | |
| 3349 | |
| 3350 | |
| 3351 struct ScopedPyObject | |
| 3352 /* PyObject* wrapper, destructor calls Py_CLEAR() unless `release()` has been | |
| 3353 called. */ | |
| 3354 { | |
| 3355 ScopedPyObject(PyObject* rhs=nullptr) | |
| 3356 : | |
| 3357 m_pyobject(rhs) | |
| 3358 {} | |
| 3359 | |
| 3360 PyObject*& get() | |
| 3361 { | |
| 3362 return m_pyobject; | |
| 3363 } | |
| 3364 | |
| 3365 ScopedPyObject& operator= (PyObject* rhs) | |
| 3366 { | |
| 3367 Py_CLEAR(m_pyobject); | |
| 3368 m_pyobject = rhs; | |
| 3369 return *this; | |
| 3370 } | |
| 3371 | |
| 3372 PyObject* release() | |
| 3373 { | |
| 3374 PyObject* ret = m_pyobject; | |
| 3375 m_pyobject = nullptr; | |
| 3376 return ret; | |
| 3377 } | |
| 3378 ~ScopedPyObject() | |
| 3379 { | |
| 3380 Py_CLEAR(m_pyobject); | |
| 3381 } | |
| 3382 | |
| 3383 PyObject* m_pyobject = nullptr; | |
| 3384 }; | |
| 3385 | |
| 3386 | |
| 3387 PyObject* extractBLOCKS(mupdf::FzStextPage& self) | |
| 3388 { | |
| 3389 fz_stext_page *this_tpage = self.m_internal; | |
| 3390 fz_rect tp_rect = this_tpage->mediabox; | |
| 3391 mupdf::FzBuffer res(1024); | |
| 3392 ScopedPyObject lines( PyList_New(0)); | |
| 3393 int block_n = -1; | |
| 3394 for (fz_stext_block* block = this_tpage->first_block; block; block = block->next) | |
| 3395 { | |
| 3396 ScopedPyObject text; | |
| 3397 block_n++; | |
| 3398 fz_rect blockrect = fz_empty_rect; | |
| 3399 if (block->type == FZ_STEXT_BLOCK_TEXT) | |
| 3400 { | |
| 3401 mupdf::fz_clear_buffer(res); // set text buffer to empty | |
| 3402 int line_n = -1; | |
| 3403 int last_char = 0; | |
| 3404 (void) line_n; /* Not actually used, but keeping in the code for now. */ | |
| 3405 for (fz_stext_line* line = block->u.t.first_line; line; line = line->next) | |
| 3406 { | |
| 3407 line_n++; | |
| 3408 fz_rect linerect = fz_empty_rect; | |
| 3409 for (fz_stext_char* ch = line->first_char; ch; ch = ch->next) | |
| 3410 { | |
| 3411 fz_rect cbbox = JM_char_bbox(line, ch); | |
| 3412 if (!JM_rects_overlap(tp_rect, cbbox) && !fz_is_infinite_rect(tp_rect)) | |
| 3413 { | |
| 3414 continue; | |
| 3415 } | |
| 3416 JM_append_rune(res.m_internal, ch->c); | |
| 3417 last_char = ch->c; | |
| 3418 linerect = fz_union_rect(linerect, cbbox); | |
| 3419 } | |
| 3420 if (last_char != 10 && !fz_is_empty_rect(linerect)) | |
| 3421 { | |
| 3422 mupdf::fz_append_byte(res, 10); | |
| 3423 } | |
| 3424 blockrect = fz_union_rect(blockrect, linerect); | |
| 3425 } | |
| 3426 text = JM_EscapeStrFromBuffer(res); | |
| 3427 } | |
| 3428 else if (JM_rects_overlap(tp_rect, block->bbox) || fz_is_infinite_rect(tp_rect)) | |
| 3429 { | |
| 3430 fz_image *img = block->u.i.image; | |
| 3431 fz_colorspace *cs = img->colorspace; | |
| 3432 text = PyUnicode_FromFormat( | |
| 3433 "<image: %s, width: %d, height: %d, bpc: %d>", | |
| 3434 mupdf::ll_fz_colorspace_name(cs), | |
| 3435 img->w, | |
| 3436 img->h, | |
| 3437 img->bpc | |
| 3438 ); | |
| 3439 blockrect = fz_union_rect(blockrect, block->bbox); | |
| 3440 } | |
| 3441 if (!fz_is_empty_rect(blockrect)) | |
| 3442 { | |
| 3443 ScopedPyObject litem = PyTuple_New(7); | |
| 3444 PyTuple_SET_ITEM(litem.get(), 0, Py_BuildValue("f", blockrect.x0)); | |
| 3445 PyTuple_SET_ITEM(litem.get(), 1, Py_BuildValue("f", blockrect.y0)); | |
| 3446 PyTuple_SET_ITEM(litem.get(), 2, Py_BuildValue("f", blockrect.x1)); | |
| 3447 PyTuple_SET_ITEM(litem.get(), 3, Py_BuildValue("f", blockrect.y1)); | |
| 3448 PyTuple_SET_ITEM(litem.get(), 4, Py_BuildValue("O", text.get())); | |
| 3449 PyTuple_SET_ITEM(litem.get(), 5, Py_BuildValue("i", block_n)); | |
| 3450 PyTuple_SET_ITEM(litem.get(), 6, Py_BuildValue("i", block->type)); | |
| 3451 LIST_APPEND(lines.get(), litem.get()); | |
| 3452 } | |
| 3453 } | |
| 3454 return lines.release(); | |
| 3455 } | |
| 3456 | |
| 3457 #define EMPTY_STRING PyUnicode_FromString("") | |
| 3458 | |
| 3459 static PyObject *JM_UnicodeFromStr(const char *c) | |
| 3460 { | |
| 3461 if (!c) return EMPTY_STRING; | |
| 3462 PyObject *val = Py_BuildValue("s", c); | |
| 3463 if (!val) { | |
| 3464 val = EMPTY_STRING; | |
| 3465 PyErr_Clear(); | |
| 3466 } | |
| 3467 return val; | |
| 3468 } | |
| 3469 | |
| 3470 PyObject* link_uri(mupdf::FzLink& link) | |
| 3471 { | |
| 3472 return JM_UnicodeFromStr( link.m_internal->uri); | |
| 3473 } | |
| 3474 | |
| 3475 fz_stext_page* page_get_textpage( | |
| 3476 mupdf::FzPage& self, | |
| 3477 PyObject* clip, | |
| 3478 int flags, | |
| 3479 PyObject* matrix | |
| 3480 ) | |
| 3481 { | |
| 3482 fz_context* ctx = mupdf::internal_context_get(); | |
| 3483 fz_stext_page *tpage=NULL; | |
| 3484 fz_page *page = self.m_internal; | |
| 3485 fz_device *dev = NULL; | |
| 3486 fz_stext_options options; | |
| 3487 memset(&options, 0, sizeof options); | |
| 3488 options.flags = flags; | |
| 3489 fz_try(ctx) { | |
| 3490 // Default to page's rect if `clip` not specified, for #2048. | |
| 3491 fz_rect rect = (clip==Py_None) ? fz_bound_page(ctx, page) : JM_rect_from_py(clip); | |
| 3492 fz_matrix ctm = JM_matrix_from_py(matrix); | |
| 3493 tpage = fz_new_stext_page(ctx, rect); | |
| 3494 dev = fz_new_stext_device(ctx, tpage, &options); | |
| 3495 fz_run_page(ctx, page, dev, ctm, NULL); | |
| 3496 fz_close_device(ctx, dev); | |
| 3497 } | |
| 3498 fz_always(ctx) { | |
| 3499 fz_drop_device(ctx, dev); | |
| 3500 } | |
| 3501 fz_catch(ctx) { | |
| 3502 mupdf::internal_throw_exception(ctx); | |
| 3503 } | |
| 3504 return tpage; | |
| 3505 } | |
| 3506 | |
| 3507 // return extension for pymupdf image type | |
| 3508 const char *JM_image_extension(int type) | |
| 3509 { | |
| 3510 switch (type) { | |
| 3511 case(FZ_IMAGE_RAW): return "raw"; | |
| 3512 case(FZ_IMAGE_FLATE): return "flate"; | |
| 3513 case(FZ_IMAGE_LZW): return "lzw"; | |
| 3514 case(FZ_IMAGE_RLD): return "rld"; | |
| 3515 case(FZ_IMAGE_BMP): return "bmp"; | |
| 3516 case(FZ_IMAGE_GIF): return "gif"; | |
| 3517 case(FZ_IMAGE_JBIG2): return "jb2"; | |
| 3518 case(FZ_IMAGE_JPEG): return "jpeg"; | |
| 3519 case(FZ_IMAGE_JPX): return "jpx"; | |
| 3520 case(FZ_IMAGE_JXR): return "jxr"; | |
| 3521 case(FZ_IMAGE_PNG): return "png"; | |
| 3522 case(FZ_IMAGE_PNM): return "pnm"; | |
| 3523 case(FZ_IMAGE_TIFF): return "tiff"; | |
| 3524 default: return "n/a"; | |
| 3525 } | |
| 3526 } | |
| 3527 | |
| 3528 void JM_make_image_block(fz_stext_block *block, PyObject *block_dict) | |
| 3529 { | |
| 3530 fz_context* ctx = mupdf::internal_context_get(); | |
| 3531 fz_image *image = block->u.i.image; | |
| 3532 fz_buffer *buf = NULL, *freebuf = NULL, *mask_buf = NULL; | |
| 3533 fz_compressed_buffer *buffer = fz_compressed_image_buffer(ctx, image); | |
| 3534 fz_var(buf); | |
| 3535 fz_var(freebuf); | |
| 3536 fz_var(mask_buf); | |
| 3537 int n = fz_colorspace_n(ctx, image->colorspace); | |
| 3538 int w = image->w; | |
| 3539 int h = image->h; | |
| 3540 const char *ext = ""; | |
| 3541 int type = FZ_IMAGE_UNKNOWN; | |
| 3542 if (buffer) { | |
| 3543 type = buffer->params.type; | |
| 3544 ext = JM_image_extension(type); | |
| 3545 } | |
| 3546 if (type < FZ_IMAGE_BMP || type == FZ_IMAGE_JBIG2) | |
| 3547 type = FZ_IMAGE_UNKNOWN; | |
| 3548 PyObject *bytes = NULL; | |
| 3549 fz_var(bytes); | |
| 3550 PyObject *mask_bytes = NULL; | |
| 3551 fz_var(mask_bytes); | |
| 3552 fz_try(ctx) { | |
| 3553 if (!buffer || type == FZ_IMAGE_UNKNOWN) | |
| 3554 { | |
| 3555 buf = freebuf = fz_new_buffer_from_image_as_png(ctx, image, fz_default_color_params); | |
| 3556 ext = "png"; | |
| 3557 } | |
| 3558 else if (n == 4 && strcmp(ext, "jpeg") == 0) // JPEG CMYK needs another step | |
| 3559 { | |
| 3560 buf = freebuf = fz_new_buffer_from_image_as_jpeg(ctx, image, fz_default_color_params, 95, 1); | |
| 3561 } | |
| 3562 else | |
| 3563 { | |
| 3564 buf = buffer->buffer; | |
| 3565 } | |
| 3566 bytes = JM_BinFromBuffer(buf); | |
| 3567 if (image->mask) { | |
| 3568 mask_buf = fz_new_buffer_from_image_as_png(ctx, image->mask, fz_default_color_params); | |
| 3569 mask_bytes = JM_BinFromBuffer(mask_buf); | |
| 3570 } else { | |
| 3571 mask_bytes = Py_BuildValue("s", NULL); | |
| 3572 } | |
| 3573 } | |
| 3574 fz_always(ctx) { | |
| 3575 if (!bytes) | |
| 3576 bytes = PyBytes_FromString(""); | |
| 3577 DICT_SETITEM_DROP(block_dict, dictkey_width, | |
| 3578 Py_BuildValue("i", w)); | |
| 3579 DICT_SETITEM_DROP(block_dict, dictkey_height, | |
| 3580 Py_BuildValue("i", h)); | |
| 3581 DICT_SETITEM_DROP(block_dict, dictkey_ext, | |
| 3582 Py_BuildValue("s", ext)); | |
| 3583 DICT_SETITEM_DROP(block_dict, dictkey_colorspace, | |
| 3584 Py_BuildValue("i", n)); | |
| 3585 DICT_SETITEM_DROP(block_dict, dictkey_xres, | |
| 3586 Py_BuildValue("i", image->xres)); | |
| 3587 DICT_SETITEM_DROP(block_dict, dictkey_yres, | |
| 3588 Py_BuildValue("i", image->xres)); | |
| 3589 DICT_SETITEM_DROP(block_dict, dictkey_bpc, | |
| 3590 Py_BuildValue("i", (int) image->bpc)); | |
| 3591 DICT_SETITEM_DROP(block_dict, dictkey_matrix, | |
| 3592 JM_py_from_matrix(block->u.i.transform)); | |
| 3593 DICT_SETITEM_DROP(block_dict, dictkey_size, | |
| 3594 Py_BuildValue("n", PyBytes_Size(bytes))); | |
| 3595 DICT_SETITEM_DROP(block_dict, dictkey_image, bytes); | |
| 3596 DICT_SETITEMSTR_DROP(block_dict, "mask", mask_bytes); | |
| 3597 fz_drop_buffer(ctx, mask_buf); | |
| 3598 fz_drop_buffer(ctx, freebuf); | |
| 3599 } | |
| 3600 fz_catch(ctx) {;} | |
| 3601 return; | |
| 3602 } | |
| 3603 | |
| 3604 static void JM_make_text_block(fz_stext_block *block, PyObject *block_dict, int raw, fz_buffer *buff, fz_rect tp_rect) | |
| 3605 { | |
| 3606 fz_stext_line *line; | |
| 3607 PyObject *line_list = PyList_New(0), *line_dict; | |
| 3608 fz_rect block_rect = fz_empty_rect; | |
| 3609 for (line = block->u.t.first_line; line; line = line->next) { | |
| 3610 if (fz_is_empty_rect(fz_intersect_rect(tp_rect, line->bbox)) && | |
| 3611 !fz_is_infinite_rect(tp_rect)) { | |
| 3612 continue; | |
| 3613 } | |
| 3614 line_dict = PyDict_New(); | |
| 3615 mupdf::FzStextLine line2(line); | |
| 3616 mupdf::FzBuffer buff2( mupdf::ll_fz_keep_buffer( buff)); | |
| 3617 mupdf::FzRect tp_rect2( tp_rect); | |
| 3618 mupdf::FzRect line_rect2 = JM_make_spanlist( | |
| 3619 line_dict, | |
| 3620 line2, | |
| 3621 raw, | |
| 3622 buff2, | |
| 3623 tp_rect2 | |
| 3624 ); | |
| 3625 fz_rect& line_rect = *line_rect2.internal(); | |
| 3626 block_rect = fz_union_rect(block_rect, line_rect); | |
| 3627 DICT_SETITEM_DROP(line_dict, dictkey_wmode, | |
| 3628 Py_BuildValue("i", line->wmode)); | |
| 3629 DICT_SETITEM_DROP(line_dict, dictkey_dir, JM_py_from_point(line->dir)); | |
| 3630 DICT_SETITEM_DROP(line_dict, dictkey_bbox, | |
| 3631 JM_py_from_rect(line_rect)); | |
| 3632 LIST_APPEND_DROP(line_list, line_dict); | |
| 3633 } | |
| 3634 DICT_SETITEM_DROP(block_dict, dictkey_bbox, JM_py_from_rect(block_rect)); | |
| 3635 DICT_SETITEM_DROP(block_dict, dictkey_lines, line_list); | |
| 3636 return; | |
| 3637 } | |
| 3638 | |
| 3639 void JM_make_textpage_dict(fz_stext_page *tp, PyObject *page_dict, int raw) | |
| 3640 { | |
| 3641 fz_context* ctx = mupdf::internal_context_get(); | |
| 3642 fz_stext_block *block; | |
| 3643 fz_buffer *text_buffer = fz_new_buffer(ctx, 128); | |
| 3644 PyObject *block_dict, *block_list = PyList_New(0); | |
| 3645 fz_rect tp_rect = tp->mediabox; | |
| 3646 int block_n = -1; | |
| 3647 for (block = tp->first_block; block; block = block->next) { | |
| 3648 block_n++; | |
| 3649 if (!fz_contains_rect(tp_rect, block->bbox) && | |
| 3650 !fz_is_infinite_rect(tp_rect) && | |
| 3651 block->type == FZ_STEXT_BLOCK_IMAGE) { | |
| 3652 continue; | |
| 3653 } | |
| 3654 if (!fz_is_infinite_rect(tp_rect) && | |
| 3655 fz_is_empty_rect(fz_intersect_rect(tp_rect, block->bbox))) { | |
| 3656 continue; | |
| 3657 } | |
| 3658 | |
| 3659 block_dict = PyDict_New(); | |
| 3660 DICT_SETITEM_DROP(block_dict, dictkey_number, Py_BuildValue("i", block_n)); | |
| 3661 DICT_SETITEM_DROP(block_dict, dictkey_type, Py_BuildValue("i", block->type)); | |
| 3662 if (block->type == FZ_STEXT_BLOCK_IMAGE) { | |
| 3663 DICT_SETITEM_DROP(block_dict, dictkey_bbox, JM_py_from_rect(block->bbox)); | |
| 3664 JM_make_image_block(block, block_dict); | |
| 3665 } else { | |
| 3666 JM_make_text_block(block, block_dict, raw, text_buffer, tp_rect); | |
| 3667 } | |
| 3668 | |
| 3669 LIST_APPEND_DROP(block_list, block_dict); | |
| 3670 } | |
| 3671 DICT_SETITEM_DROP(page_dict, dictkey_blocks, block_list); | |
| 3672 fz_drop_buffer(ctx, text_buffer); | |
| 3673 } | |
| 3674 | |
| 3675 //----------------------------------------------------------------- | |
| 3676 // get one pixel as a list | |
| 3677 //----------------------------------------------------------------- | |
| 3678 PyObject *pixmap_pixel(fz_pixmap* pm, int x, int y) | |
| 3679 { | |
| 3680 fz_context* ctx = mupdf::internal_context_get(); | |
| 3681 PyObject *p = NULL; | |
| 3682 if (0 | |
| 3683 || x < 0 | |
| 3684 || x >= pm->w | |
| 3685 || y < 0 | |
| 3686 || y >= pm->h | |
| 3687 ) | |
| 3688 { | |
| 3689 throw std::range_error( MSG_PIXEL_OUTSIDE); | |
| 3690 } | |
| 3691 int n = pm->n; | |
| 3692 int stride = fz_pixmap_stride(ctx, pm); | |
| 3693 int i = stride * y + n * x; | |
| 3694 p = PyTuple_New(n); | |
| 3695 for (int j = 0; j < n; j++) | |
| 3696 { | |
| 3697 PyTuple_SET_ITEM(p, j, Py_BuildValue("i", pm->samples[i + j])); | |
| 3698 } | |
| 3699 return p; | |
| 3700 } | |
| 3701 | |
| 3702 int pixmap_n(mupdf::FzPixmap& pixmap) | |
| 3703 { | |
| 3704 return mupdf::fz_pixmap_components( pixmap); | |
| 3705 } | |
| 3706 | |
| 3707 static int | |
| 3708 JM_INT_ITEM(PyObject *obj, Py_ssize_t idx, int *result) | |
| 3709 { | |
| 3710 PyObject *temp = PySequence_ITEM(obj, idx); | |
| 3711 if (!temp) return 1; | |
| 3712 if (PyLong_Check(temp)) { | |
| 3713 *result = (int) PyLong_AsLong(temp); | |
| 3714 Py_DECREF(temp); | |
| 3715 } else if (PyFloat_Check(temp)) { | |
| 3716 *result = (int) PyFloat_AsDouble(temp); | |
| 3717 Py_DECREF(temp); | |
| 3718 } else { | |
| 3719 Py_DECREF(temp); | |
| 3720 return 1; | |
| 3721 } | |
| 3722 if (PyErr_Occurred()) { | |
| 3723 PyErr_Clear(); | |
| 3724 return 1; | |
| 3725 } | |
| 3726 return 0; | |
| 3727 } | |
| 3728 | |
| 3729 PyObject *set_pixel(fz_pixmap* pm, int x, int y, PyObject *color) | |
| 3730 { | |
| 3731 fz_context* ctx = mupdf::internal_context_get(); | |
| 3732 if (0 | |
| 3733 || x < 0 | |
| 3734 || x >= pm->w | |
| 3735 || y < 0 | |
| 3736 || y >= pm->h | |
| 3737 ) | |
| 3738 { | |
| 3739 throw std::range_error( MSG_PIXEL_OUTSIDE); | |
| 3740 } | |
| 3741 int n = pm->n; | |
| 3742 if (!PySequence_Check(color) || PySequence_Size(color) != n) { | |
| 3743 throw std::range_error(MSG_BAD_COLOR_SEQ); | |
| 3744 } | |
| 3745 int i, j; | |
| 3746 unsigned char c[5]; | |
| 3747 for (j = 0; j < n; j++) { | |
| 3748 if (JM_INT_ITEM(color, j, &i) == 1) { | |
| 3749 throw std::range_error(MSG_BAD_COLOR_SEQ); | |
| 3750 } | |
| 3751 if (i < 0 or i >= 256) { | |
| 3752 throw std::range_error(MSG_BAD_COLOR_SEQ); | |
| 3753 } | |
| 3754 c[j] = (unsigned char) i; | |
| 3755 } | |
| 3756 int stride = fz_pixmap_stride(ctx, pm); | |
| 3757 i = stride * y + n * x; | |
| 3758 for (j = 0; j < n; j++) { | |
| 3759 pm->samples[i + j] = c[j]; | |
| 3760 } | |
| 3761 Py_RETURN_NONE; | |
| 3762 } | |
| 3763 //------------------------------------------- | |
| 3764 // make a buffer from an stext_page's text | |
| 3765 //------------------------------------------- | |
| 3766 fz_buffer * | |
| 3767 JM_new_buffer_from_stext_page(fz_stext_page *page) | |
| 3768 { | |
| 3769 fz_context* ctx = mupdf::internal_context_get(); | |
| 3770 fz_stext_block *block; | |
| 3771 fz_stext_line *line; | |
| 3772 fz_stext_char *ch; | |
| 3773 fz_rect rect = page->mediabox; | |
| 3774 fz_buffer *buf = NULL; | |
| 3775 | |
| 3776 fz_try(ctx) | |
| 3777 { | |
| 3778 buf = fz_new_buffer(ctx, 256); | |
| 3779 for (block = page->first_block; block; block = block->next) { | |
| 3780 if (block->type == FZ_STEXT_BLOCK_TEXT) { | |
| 3781 for (line = block->u.t.first_line; line; line = line->next) { | |
| 3782 for (ch = line->first_char; ch; ch = ch->next) { | |
| 3783 if (!JM_rects_overlap(rect, JM_char_bbox(line, ch)) && | |
| 3784 !fz_is_infinite_rect(rect)) { | |
| 3785 continue; | |
| 3786 } | |
| 3787 fz_append_rune(ctx, buf, ch->c); | |
| 3788 } | |
| 3789 fz_append_byte(ctx, buf, '\n'); | |
| 3790 } | |
| 3791 fz_append_byte(ctx, buf, '\n'); | |
| 3792 } | |
| 3793 } | |
| 3794 } | |
| 3795 fz_catch(ctx) { | |
| 3796 fz_drop_buffer(ctx, buf); | |
| 3797 mupdf::internal_throw_exception(ctx); | |
| 3798 } | |
| 3799 return buf; | |
| 3800 } | |
| 3801 | |
| 3802 static inline int canon(int c) | |
| 3803 { | |
| 3804 /* TODO: proper unicode case folding */ | |
| 3805 /* TODO: character equivalence (a matches ä, etc) */ | |
| 3806 if (c == 0xA0 || c == 0x2028 || c == 0x2029) | |
| 3807 return ' '; | |
| 3808 if (c == '\r' || c == '\n' || c == '\t') | |
| 3809 return ' '; | |
| 3810 if (c >= 'A' && c <= 'Z') | |
| 3811 return c - 'A' + 'a'; | |
| 3812 return c; | |
| 3813 } | |
| 3814 | |
| 3815 static inline int chartocanon(int *c, const char *s) | |
| 3816 { | |
| 3817 int n = fz_chartorune(c, s); | |
| 3818 *c = canon(*c); | |
| 3819 return n; | |
| 3820 } | |
| 3821 | |
| 3822 static const char *match_string(const char *h, const char *n) | |
| 3823 { | |
| 3824 int hc, nc; | |
| 3825 const char *e = h; | |
| 3826 h += chartocanon(&hc, h); | |
| 3827 n += chartocanon(&nc, n); | |
| 3828 while (hc == nc) | |
| 3829 { | |
| 3830 e = h; | |
| 3831 if (hc == ' ') | |
| 3832 do | |
| 3833 h += chartocanon(&hc, h); | |
| 3834 while (hc == ' '); | |
| 3835 else | |
| 3836 h += chartocanon(&hc, h); | |
| 3837 if (nc == ' ') | |
| 3838 do | |
| 3839 n += chartocanon(&nc, n); | |
| 3840 while (nc == ' '); | |
| 3841 else | |
| 3842 n += chartocanon(&nc, n); | |
| 3843 } | |
| 3844 return nc == 0 ? e : NULL; | |
| 3845 } | |
| 3846 | |
| 3847 | |
| 3848 static const char *find_string(const char *s, const char *needle, const char **endp) | |
| 3849 { | |
| 3850 const char *end; | |
| 3851 while (*s) | |
| 3852 { | |
| 3853 end = match_string(s, needle); | |
| 3854 if (end) | |
| 3855 { | |
| 3856 *endp = end; | |
| 3857 return s; | |
| 3858 } | |
| 3859 ++s; | |
| 3860 } | |
| 3861 *endp = NULL; | |
| 3862 return NULL; | |
| 3863 } | |
| 3864 | |
| 3865 struct highlight | |
| 3866 { | |
| 3867 Py_ssize_t len; | |
| 3868 PyObject *quads; | |
| 3869 float hfuzz, vfuzz; | |
| 3870 }; | |
| 3871 | |
| 3872 | |
| 3873 static int | |
| 3874 JM_FLOAT_ITEM(PyObject *obj, Py_ssize_t idx, double *result) | |
| 3875 { | |
| 3876 PyObject *temp = PySequence_ITEM(obj, idx); | |
| 3877 if (!temp) return 1; | |
| 3878 *result = PyFloat_AsDouble(temp); | |
| 3879 Py_DECREF(temp); | |
| 3880 if (PyErr_Occurred()) { | |
| 3881 PyErr_Clear(); | |
| 3882 return 1; | |
| 3883 } | |
| 3884 return 0; | |
| 3885 } | |
| 3886 | |
| 3887 | |
| 3888 //----------------------------------------------------------------------------- | |
| 3889 // fz_quad from PySequence. Four floats are treated as rect. | |
| 3890 // Else must be four pairs of floats. | |
| 3891 //----------------------------------------------------------------------------- | |
| 3892 static fz_quad | |
| 3893 JM_quad_from_py(PyObject *r) | |
| 3894 { | |
| 3895 fz_quad q = fz_make_quad(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, | |
| 3896 FZ_MAX_INF_RECT, FZ_MIN_INF_RECT, | |
| 3897 FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, | |
| 3898 FZ_MAX_INF_RECT, FZ_MAX_INF_RECT); | |
| 3899 fz_point p[4]; | |
| 3900 double test, x, y; | |
| 3901 Py_ssize_t i; | |
| 3902 PyObject *obj = NULL; | |
| 3903 | |
| 3904 if (!r || !PySequence_Check(r) || PySequence_Size(r) != 4) | |
| 3905 return q; | |
| 3906 | |
| 3907 if (JM_FLOAT_ITEM(r, 0, &test) == 0) | |
| 3908 return fz_quad_from_rect(JM_rect_from_py(r)); | |
| 3909 | |
| 3910 for (i = 0; i < 4; i++) { | |
| 3911 obj = PySequence_ITEM(r, i); // next point item | |
| 3912 if (!obj || !PySequence_Check(obj) || PySequence_Size(obj) != 2) | |
| 3913 goto exit_result; // invalid: cancel the rest | |
| 3914 | |
| 3915 if (JM_FLOAT_ITEM(obj, 0, &x) == 1) goto exit_result; | |
| 3916 if (JM_FLOAT_ITEM(obj, 1, &y) == 1) goto exit_result; | |
| 3917 if (x < FZ_MIN_INF_RECT) x = FZ_MIN_INF_RECT; | |
| 3918 if (y < FZ_MIN_INF_RECT) y = FZ_MIN_INF_RECT; | |
| 3919 if (x > FZ_MAX_INF_RECT) x = FZ_MAX_INF_RECT; | |
| 3920 if (y > FZ_MAX_INF_RECT) y = FZ_MAX_INF_RECT; | |
| 3921 p[i] = fz_make_point((float) x, (float) y); | |
| 3922 | |
| 3923 Py_CLEAR(obj); | |
| 3924 } | |
| 3925 q.ul = p[0]; | |
| 3926 q.ur = p[1]; | |
| 3927 q.ll = p[2]; | |
| 3928 q.lr = p[3]; | |
| 3929 return q; | |
| 3930 | |
| 3931 exit_result:; | |
| 3932 Py_CLEAR(obj); | |
| 3933 return q; | |
| 3934 } | |
| 3935 | |
| 3936 static float hdist(fz_point *dir, fz_point *a, fz_point *b) | |
| 3937 { | |
| 3938 float dx = b->x - a->x; | |
| 3939 float dy = b->y - a->y; | |
| 3940 return fz_abs(dx * dir->x + dy * dir->y); | |
| 3941 } | |
| 3942 | |
| 3943 static float vdist(fz_point *dir, fz_point *a, fz_point *b) | |
| 3944 { | |
| 3945 float dx = b->x - a->x; | |
| 3946 float dy = b->y - a->y; | |
| 3947 return fz_abs(dx * dir->y + dy * dir->x); | |
| 3948 } | |
| 3949 | |
| 3950 static void on_highlight_char(fz_context *ctx, void *arg, fz_stext_line *line, fz_stext_char *ch) | |
| 3951 { | |
| 3952 struct highlight* hits = (struct highlight*) arg; | |
| 3953 float vfuzz = ch->size * hits->vfuzz; | |
| 3954 float hfuzz = ch->size * hits->hfuzz; | |
| 3955 fz_quad ch_quad = JM_char_quad(line, ch); | |
| 3956 if (hits->len > 0) { | |
| 3957 PyObject *quad = PySequence_ITEM(hits->quads, hits->len - 1); | |
| 3958 fz_quad end = JM_quad_from_py(quad); | |
| 3959 Py_DECREF(quad); | |
| 3960 if (hdist(&line->dir, &end.lr, &ch_quad.ll) < hfuzz | |
| 3961 && vdist(&line->dir, &end.lr, &ch_quad.ll) < vfuzz | |
| 3962 && hdist(&line->dir, &end.ur, &ch_quad.ul) < hfuzz | |
| 3963 && vdist(&line->dir, &end.ur, &ch_quad.ul) < vfuzz) | |
| 3964 { | |
| 3965 end.ur = ch_quad.ur; | |
| 3966 end.lr = ch_quad.lr; | |
| 3967 quad = JM_py_from_quad(end); | |
| 3968 PyList_SetItem(hits->quads, hits->len - 1, quad); | |
| 3969 return; | |
| 3970 } | |
| 3971 } | |
| 3972 LIST_APPEND_DROP(hits->quads, JM_py_from_quad(ch_quad)); | |
| 3973 hits->len++; | |
| 3974 } | |
| 3975 | |
| 3976 | |
| 3977 PyObject* JM_search_stext_page(fz_stext_page *page, const char *needle) | |
| 3978 { | |
| 3979 fz_context* ctx = mupdf::internal_context_get(); | |
| 3980 struct highlight hits; | |
| 3981 fz_stext_block *block; | |
| 3982 fz_stext_line *line; | |
| 3983 fz_stext_char *ch; | |
| 3984 fz_buffer *buffer = NULL; | |
| 3985 const char *haystack, *begin, *end; | |
| 3986 fz_rect rect = page->mediabox; | |
| 3987 int c, inside; | |
| 3988 | |
| 3989 if (strlen(needle) == 0) Py_RETURN_NONE; | |
| 3990 PyObject *quads = PyList_New(0); | |
| 3991 hits.len = 0; | |
| 3992 hits.quads = quads; | |
| 3993 hits.hfuzz = 0.2f; /* merge kerns but not large gaps */ | |
| 3994 hits.vfuzz = 0.1f; | |
| 3995 | |
| 3996 fz_try(ctx) { | |
| 3997 buffer = JM_new_buffer_from_stext_page( page); | |
| 3998 haystack = fz_string_from_buffer(ctx, buffer); | |
| 3999 begin = find_string(haystack, needle, &end); | |
| 4000 if (!begin) goto no_more_matches; | |
| 4001 | |
| 4002 inside = 0; | |
| 4003 for (block = page->first_block; block; block = block->next) { | |
| 4004 if (block->type != FZ_STEXT_BLOCK_TEXT) { | |
| 4005 continue; | |
| 4006 } | |
| 4007 for (line = block->u.t.first_line; line; line = line->next) { | |
| 4008 for (ch = line->first_char; ch; ch = ch->next) { | |
| 4009 if (!fz_is_infinite_rect(rect) && | |
| 4010 !JM_rects_overlap(rect, JM_char_bbox(line, ch))) { | |
| 4011 goto next_char; | |
| 4012 } | |
| 4013 try_new_match: | |
| 4014 if (!inside) { | |
| 4015 if (haystack >= begin) inside = 1; | |
| 4016 } | |
| 4017 if (inside) { | |
| 4018 if (haystack < end) { | |
| 4019 on_highlight_char(ctx, &hits, line, ch); | |
| 4020 } else { | |
| 4021 inside = 0; | |
| 4022 begin = find_string(haystack, needle, &end); | |
| 4023 if (!begin) goto no_more_matches; | |
| 4024 else goto try_new_match; | |
| 4025 } | |
| 4026 } | |
| 4027 haystack += fz_chartorune(&c, haystack); | |
| 4028 next_char:; | |
| 4029 } | |
| 4030 assert(*haystack == '\n'); | |
| 4031 ++haystack; | |
| 4032 } | |
| 4033 assert(*haystack == '\n'); | |
| 4034 ++haystack; | |
| 4035 } | |
| 4036 no_more_matches:; | |
| 4037 } | |
| 4038 fz_always(ctx) | |
| 4039 fz_drop_buffer(ctx, buffer); | |
| 4040 fz_catch(ctx) | |
| 4041 mupdf::internal_throw_exception(ctx); | |
| 4042 | |
| 4043 return quads; | |
| 4044 } | |
| 4045 | |
| 4046 void pixmap_copy( fz_pixmap* pm, const fz_pixmap* src, int n) | |
| 4047 { | |
| 4048 assert(pm->w == src->w); | |
| 4049 assert(pm->h == src->h); | |
| 4050 assert(n <= pm->n); | |
| 4051 assert(n <= src->n); | |
| 4052 | |
| 4053 if (pm->n == src->n) | |
| 4054 { | |
| 4055 // identical samples | |
| 4056 assert(pm->stride == src->stride); | |
| 4057 memcpy(pm->samples, src->samples, pm->w * pm->h * pm->n); | |
| 4058 } | |
| 4059 else | |
| 4060 { | |
| 4061 int nn; | |
| 4062 int do_alpha; | |
| 4063 if (pm->n > src->n) | |
| 4064 { | |
| 4065 assert(pm->n == src->n + 1); | |
| 4066 nn = src->n; | |
| 4067 assert(!src->alpha); | |
| 4068 assert(pm->alpha); | |
| 4069 do_alpha = 1; | |
| 4070 } | |
| 4071 else | |
| 4072 { | |
| 4073 assert(src->n == pm->n + 1); | |
| 4074 nn = pm->n; | |
| 4075 assert(src->alpha); | |
| 4076 assert(!pm->alpha); | |
| 4077 do_alpha = 0; | |
| 4078 } | |
| 4079 for (int y=0; y<pm->h; ++y) | |
| 4080 { | |
| 4081 for (int x=0; x<pm->w; ++x) | |
| 4082 { | |
| 4083 memcpy( | |
| 4084 pm->samples + pm->stride * y + pm->n * x, | |
| 4085 src->samples + src->stride * y + src->n * x, | |
| 4086 nn | |
| 4087 ); | |
| 4088 if (do_alpha) | |
| 4089 { | |
| 4090 pm->samples[pm->stride * y + pm->n * x + pm->n-1] = 255; | |
| 4091 } | |
| 4092 } | |
| 4093 } | |
| 4094 } | |
| 4095 } | |
| 4096 | |
| 4097 | |
| 4098 PyObject* ll_JM_color_count(fz_pixmap *pm, PyObject *clip) | |
| 4099 { | |
| 4100 fz_context* ctx = mupdf::internal_context_get(); | |
| 4101 PyObject* rc = PyDict_New(); | |
| 4102 fz_irect irect = fz_pixmap_bbox(ctx, pm); | |
| 4103 irect = fz_intersect_irect(irect, fz_round_rect(JM_rect_from_py(clip))); | |
| 4104 if (fz_is_empty_irect(irect)) | |
| 4105 { | |
| 4106 return rc; | |
| 4107 } | |
| 4108 size_t stride = pm->stride; | |
| 4109 size_t width = irect.x1 - irect.x0; | |
| 4110 size_t height = irect.y1 - irect.y0; | |
| 4111 size_t n = (size_t) pm->n; | |
| 4112 size_t substride = width * n; | |
| 4113 unsigned char* s = pm->samples + stride * (irect.y0 - pm->y) + n * (irect.x0 - pm->x); | |
| 4114 // Cache previous pixel. | |
| 4115 char oldpix[10]; | |
| 4116 assert(n <= sizeof(oldpix)); | |
| 4117 memcpy(oldpix, s, n); | |
| 4118 long cnt = 0; | |
| 4119 for (size_t i = 0; i < height; i++) | |
| 4120 { | |
| 4121 for (size_t j = 0; j < substride; j += n) | |
| 4122 { | |
| 4123 const char* newpix = (const char*) s + j; | |
| 4124 if (memcmp(oldpix, newpix, n)) | |
| 4125 { | |
| 4126 /* Pixel differs from previous pixel, so update results with | |
| 4127 last run of pixels. We get a PyObject representation of pixel | |
| 4128 so we can look up in Python dict <rc>. */ | |
| 4129 PyObject* pixel = PyBytes_FromStringAndSize(&oldpix[0], n); | |
| 4130 PyObject* c = PyDict_GetItem(rc, pixel); | |
| 4131 if (c) cnt += PyLong_AsLong(c); | |
| 4132 DICT_SETITEM_DROP(rc, pixel, PyLong_FromLong(cnt)); | |
| 4133 Py_DECREF(pixel); | |
| 4134 /* Start next run of identical pixels. */ | |
| 4135 cnt = 1; | |
| 4136 memcpy(oldpix, newpix, n); | |
| 4137 } | |
| 4138 else | |
| 4139 { | |
| 4140 cnt += 1; | |
| 4141 } | |
| 4142 } | |
| 4143 s += stride; | |
| 4144 } | |
| 4145 /* Update results with last pixel. */ | |
| 4146 PyObject* pixel = PyBytes_FromStringAndSize(&oldpix[0], n); | |
| 4147 PyObject* c = PyDict_GetItem(rc, pixel); | |
| 4148 if (c) cnt += PyLong_AsLong(c); | |
| 4149 DICT_SETITEM_DROP(rc, pixel, PyLong_FromLong(cnt)); | |
| 4150 Py_DECREF(pixel); | |
| 4151 PyErr_Clear(); | |
| 4152 return rc; | |
| 4153 } | |
| 4154 | |
| 4155 %} | |
| 4156 | |
| 4157 /* Declarations for functions defined above. */ | |
| 4158 | |
| 4159 void page_merge( | |
| 4160 mupdf::PdfDocument& doc_des, | |
| 4161 mupdf::PdfDocument& doc_src, | |
| 4162 int page_from, | |
| 4163 int page_to, | |
| 4164 int rotate, | |
| 4165 int links, | |
| 4166 int copy_annots, | |
| 4167 mupdf::PdfGraftMap& graft_map | |
| 4168 ); | |
| 4169 | |
| 4170 void JM_merge_range( | |
| 4171 mupdf::PdfDocument& doc_des, | |
| 4172 mupdf::PdfDocument& doc_src, | |
| 4173 int spage, | |
| 4174 int epage, | |
| 4175 int apage, | |
| 4176 int rotate, | |
| 4177 int links, | |
| 4178 int annots, | |
| 4179 int show_progress, | |
| 4180 mupdf::PdfGraftMap& graft_map | |
| 4181 ); | |
| 4182 | |
| 4183 void FzDocument_insert_pdf( | |
| 4184 mupdf::FzDocument& doc, | |
| 4185 mupdf::FzDocument& src, | |
| 4186 int from_page, | |
| 4187 int to_page, | |
| 4188 int start_at, | |
| 4189 int rotate, | |
| 4190 int links, | |
| 4191 int annots, | |
| 4192 int show_progress, | |
| 4193 int final, | |
| 4194 mupdf::PdfGraftMap& graft_map | |
| 4195 ); | |
| 4196 | |
| 4197 int page_xref(mupdf::FzDocument& this_doc, int pno); | |
| 4198 void _newPage(mupdf::FzDocument& self, int pno=-1, float width=595, float height=842); | |
| 4199 void _newPage(mupdf::PdfDocument& self, int pno=-1, float width=595, float height=842); | |
| 4200 void JM_add_annot_id(mupdf::PdfAnnot& annot, const char* stem); | |
| 4201 void JM_set_annot_callout_line(mupdf::PdfAnnot& annot, PyObject *callout, int count); | |
| 4202 std::vector< std::string> JM_get_annot_id_list(mupdf::PdfPage& page); | |
| 4203 mupdf::PdfAnnot _add_caret_annot(mupdf::PdfPage& self, mupdf::FzPoint& point); | |
| 4204 mupdf::PdfAnnot _add_caret_annot(mupdf::FzPage& self, mupdf::FzPoint& point); | |
| 4205 const char* Tools_parse_da(mupdf::PdfAnnot& this_annot); | |
| 4206 PyObject* Annot_getAP(mupdf::PdfAnnot& annot); | |
| 4207 void Tools_update_da(mupdf::PdfAnnot& this_annot, const char* da_str); | |
| 4208 mupdf::FzPoint JM_point_from_py(PyObject* p); | |
| 4209 mupdf::FzRect Annot_rect(mupdf::PdfAnnot& annot); | |
| 4210 PyObject* util_transform_rect(PyObject* rect, PyObject* matrix); | |
| 4211 PyObject* Annot_rect3(mupdf::PdfAnnot& annot); | |
| 4212 mupdf::FzMatrix Page_derotate_matrix(mupdf::PdfPage& pdfpage); | |
| 4213 mupdf::FzMatrix Page_derotate_matrix(mupdf::FzPage& pdfpage); | |
| 4214 PyObject* JM_get_annot_xref_list(const mupdf::PdfObj& page_obj); | |
| 4215 PyObject* xref_object(mupdf::PdfDocument& pdf, int xref, int compressed=0, int ascii=0); | |
| 4216 PyObject* xref_object(mupdf::FzDocument& document, int xref, int compressed=0, int ascii=0); | |
| 4217 | |
| 4218 PyObject* Link_is_external(mupdf::FzLink& this_link); | |
| 4219 PyObject* Page_addAnnot_FromString(mupdf::PdfPage& page, PyObject* linklist); | |
| 4220 PyObject* Page_addAnnot_FromString(mupdf::FzPage& page, PyObject* linklist); | |
| 4221 mupdf::FzLink Link_next(mupdf::FzLink& this_link); | |
| 4222 | |
| 4223 static int page_count_fz2(void* document); | |
| 4224 int page_count_fz(mupdf::FzDocument& document); | |
| 4225 int page_count_pdf(mupdf::PdfDocument& pdf); | |
| 4226 int page_count(mupdf::FzDocument& document); | |
| 4227 int page_count(mupdf::PdfDocument& pdf); | |
| 4228 | |
| 4229 PyObject* page_annot_xrefs(mupdf::PdfDocument& pdf, int pno); | |
| 4230 PyObject* page_annot_xrefs(mupdf::FzDocument& document, int pno); | |
| 4231 bool Outline_is_external(mupdf::FzOutline* outline); | |
| 4232 void Document_extend_toc_items(mupdf::PdfDocument& pdf, PyObject* items); | |
| 4233 void Document_extend_toc_items(mupdf::FzDocument& document, PyObject* items); | |
| 4234 | |
| 4235 int ll_fz_absi(int i); | |
| 4236 | |
| 4237 mupdf::FzDevice JM_new_texttrace_device(PyObject* out); | |
| 4238 | |
| 4239 fz_rect JM_char_bbox(const mupdf::FzStextLine& line, const mupdf::FzStextChar& ch); | |
| 4240 | |
| 4241 static fz_quad JM_char_quad( fz_stext_line *line, fz_stext_char *ch); | |
| 4242 void JM_print_stext_page_as_text(mupdf::FzBuffer& res, mupdf::FzStextPage& page); | |
| 4243 | |
| 4244 void set_skip_quad_corrections(int on); | |
| 4245 void set_subset_fontnames(int on); | |
| 4246 void set_small_glyph_heights(int on); | |
| 4247 | |
| 4248 mupdf::FzRect JM_cropbox(mupdf::PdfObj& page_obj); | |
| 4249 PyObject* get_cdrawings(mupdf::FzPage& page, PyObject *extended=NULL, PyObject *callback=NULL, PyObject *method=NULL); | |
| 4250 | |
| 4251 mupdf::FzRect JM_make_spanlist( | |
| 4252 PyObject *line_dict, | |
| 4253 mupdf::FzStextLine& line, | |
| 4254 int raw, | |
| 4255 mupdf::FzBuffer& buff, | |
| 4256 mupdf::FzRect& tp_rect | |
| 4257 ); | |
| 4258 | |
| 4259 PyObject* extractWORDS(mupdf::FzStextPage& this_tpage, PyObject *delimiters); | |
| 4260 PyObject* extractBLOCKS(mupdf::FzStextPage& self); | |
| 4261 | |
| 4262 PyObject* link_uri(mupdf::FzLink& link); | |
| 4263 | |
| 4264 fz_stext_page* page_get_textpage( | |
| 4265 mupdf::FzPage& self, | |
| 4266 PyObject* clip, | |
| 4267 int flags, | |
| 4268 PyObject* matrix | |
| 4269 ); | |
| 4270 | |
| 4271 void JM_make_textpage_dict(fz_stext_page *tp, PyObject *page_dict, int raw); | |
| 4272 PyObject *pixmap_pixel(fz_pixmap* pm, int x, int y); | |
| 4273 int pixmap_n(mupdf::FzPixmap& pixmap); | |
| 4274 | |
| 4275 PyObject* JM_search_stext_page(fz_stext_page *page, const char *needle); | |
| 4276 | |
| 4277 PyObject *set_pixel(fz_pixmap* pm, int x, int y, PyObject *color); | |
| 4278 | |
| 4279 /* Copies from <src> to <pm>, which must have same width and height. pm->n - | |
| 4280 src->n must be -1, 0 or +1. If -1, <src> must have alpha and <pm> must not have | |
| 4281 alpha, and we copy the non-alpha bytes. If +1 <src> must not have alpha and | |
| 4282 <pm> must have alpha and we set <pm>'s alpha bytes all to 255.*/ | |
| 4283 void pixmap_copy(fz_pixmap* pm, const fz_pixmap* src, int n); | |
| 4284 | |
| 4285 PyObject* ll_JM_color_count(fz_pixmap *pm, PyObject *clip); |
