Mercurial > hgrepos > Python2 > PyMuPDF
comparison src_classic/helper-other.i @ 1:1d09e1dec1d9 upstream
ADD: PyMuPDF v1.26.4: the original sdist.
It does not yet contain MuPDF. This normally will be downloaded when
building PyMuPDF.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:37:51 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 1:1d09e1dec1d9 |
|---|---|
| 1 %{ | |
| 2 /* | |
| 3 # ------------------------------------------------------------------------ | |
| 4 # Copyright 2020-2022, Harald Lieder, mailto:harald.lieder@outlook.com | |
| 5 # License: GNU AFFERO GPL 3.0, https://www.gnu.org/licenses/agpl-3.0.html | |
| 6 # | |
| 7 # Part of "PyMuPDF", a Python binding for "MuPDF" (http://mupdf.com), a | |
| 8 # lightweight PDF, XPS, and E-book viewer, renderer and toolkit which is | |
| 9 # maintained and developed by Artifex Software, Inc. https://artifex.com. | |
| 10 # ------------------------------------------------------------------------ | |
| 11 */ | |
| 12 fz_buffer *JM_object_to_buffer(fz_context *ctx, pdf_obj *val, int a, int b); | |
| 13 PyObject *JM_EscapeStrFromBuffer(fz_context *ctx, fz_buffer *buff); | |
| 14 pdf_obj *JM_pdf_obj_from_str(fz_context *ctx, pdf_document *doc, char *src); | |
| 15 | |
| 16 // exception handling | |
| 17 void *JM_ReturnException(fz_context *ctx) | |
| 18 { | |
| 19 PyErr_SetString(JM_Exc_CurrentException, fz_caught_message(ctx)); | |
| 20 JM_Exc_CurrentException = PyExc_RuntimeError; | |
| 21 return NULL; | |
| 22 } | |
| 23 | |
| 24 | |
| 25 static int LIST_APPEND_DROP(PyObject *list, PyObject *item) | |
| 26 { | |
| 27 if (!list || !PyList_Check(list) || !item) return -2; | |
| 28 int rc = PyList_Append(list, item); | |
| 29 Py_DECREF(item); | |
| 30 return rc; | |
| 31 } | |
| 32 | |
| 33 static int DICT_SETITEM_DROP(PyObject *dict, PyObject *key, PyObject *value) | |
| 34 { | |
| 35 if (!dict || !PyDict_Check(dict) || !key || !value) return -2; | |
| 36 int rc = PyDict_SetItem(dict, key, value); | |
| 37 Py_DECREF(value); | |
| 38 return rc; | |
| 39 } | |
| 40 | |
| 41 static int DICT_SETITEMSTR_DROP(PyObject *dict, const char *key, PyObject *value) | |
| 42 { | |
| 43 if (!dict || !PyDict_Check(dict) || !key || !value) return -2; | |
| 44 int rc = PyDict_SetItemString(dict, key, value); | |
| 45 Py_DECREF(value); | |
| 46 return rc; | |
| 47 } | |
| 48 | |
| 49 | |
| 50 //-------------------------------------- | |
| 51 // Ensure valid journalling state | |
| 52 //-------------------------------------- | |
| 53 int JM_have_operation(fz_context *ctx, pdf_document *pdf) | |
| 54 { | |
| 55 if (pdf->journal && !pdf_undoredo_step(ctx, pdf, 0)) { | |
| 56 return 0; | |
| 57 } | |
| 58 return 1; | |
| 59 } | |
| 60 | |
| 61 //---------------------------------- | |
| 62 // Set a PDF dict key to some value | |
| 63 //---------------------------------- | |
| 64 static pdf_obj | |
| 65 *JM_set_object_value(fz_context *ctx, pdf_obj *obj, const char *key, char *value) | |
| 66 { | |
| 67 fz_buffer *res = NULL; | |
| 68 pdf_obj *new_obj = NULL, *testkey = NULL; | |
| 69 PyObject *skey = PyUnicode_FromString(key); // Python version of dict key | |
| 70 PyObject *slash = PyUnicode_FromString("/"); // PDF path separator | |
| 71 PyObject *list = NULL, *newval=NULL, *newstr=NULL, *nullval=NULL; | |
| 72 const char eyecatcher[] = "fitz: replace me!"; | |
| 73 pdf_document *pdf = NULL; | |
| 74 fz_try(ctx) | |
| 75 { | |
| 76 pdf = pdf_get_bound_document(ctx, obj); | |
| 77 // split PDF key at path seps and take last key part | |
| 78 list = PyUnicode_Split(skey, slash, -1); | |
| 79 Py_ssize_t len = PySequence_Size(list); | |
| 80 Py_ssize_t i = len - 1; | |
| 81 Py_DECREF(skey); | |
| 82 skey = PySequence_GetItem(list, i); | |
| 83 | |
| 84 PySequence_DelItem(list, i); // del the last sub-key | |
| 85 len = PySequence_Size(list); // remaining length | |
| 86 testkey = pdf_dict_getp(ctx, obj, key); // check if key already exists | |
| 87 if (!testkey) { | |
| 88 /*----------------------------------------------------------------- | |
| 89 No, it will be created here. But we cannot allow this happening if | |
| 90 indirect objects are referenced. So we check all higher level | |
| 91 sub-paths for indirect references. | |
| 92 -----------------------------------------------------------------*/ | |
| 93 while (len > 0) { | |
| 94 PyObject *t = PyUnicode_Join(slash, list); // next high level | |
| 95 if (pdf_is_indirect(ctx, pdf_dict_getp(ctx, obj, JM_StrAsChar(t)))) { | |
| 96 Py_DECREF(t); | |
| 97 fz_throw(ctx, FZ_ERROR_GENERIC, "path to '%s' has indirects", JM_StrAsChar(skey)); | |
| 98 } | |
| 99 PySequence_DelItem(list, len - 1); // del last sub-key | |
| 100 len = PySequence_Size(list); // remaining length | |
| 101 Py_DECREF(t); | |
| 102 } | |
| 103 } | |
| 104 // Insert our eyecatcher. Will create all sub-paths in the chain, or | |
| 105 // respectively remove old value of key-path. | |
| 106 pdf_dict_putp_drop(ctx, obj, key, pdf_new_text_string(ctx, eyecatcher)); | |
| 107 testkey = pdf_dict_getp(ctx, obj, key); | |
| 108 if (!pdf_is_string(ctx, testkey)) { | |
| 109 fz_throw(ctx, FZ_ERROR_GENERIC, "cannot insert value for '%s'", key); | |
| 110 } | |
| 111 const char *temp = pdf_to_text_string(ctx, testkey); | |
| 112 if (strcmp(temp, eyecatcher) != 0) { | |
| 113 fz_throw(ctx, FZ_ERROR_GENERIC, "cannot insert value for '%s'", key); | |
| 114 } | |
| 115 // read the result as a string | |
| 116 res = JM_object_to_buffer(ctx, obj, 1, 0); | |
| 117 PyObject *objstr = JM_EscapeStrFromBuffer(ctx, res); | |
| 118 | |
| 119 // replace 'eyecatcher' by desired 'value' | |
| 120 nullval = PyUnicode_FromFormat("/%s(%s)", JM_StrAsChar(skey), eyecatcher); | |
| 121 newval = PyUnicode_FromFormat("/%s %s", JM_StrAsChar(skey), value); | |
| 122 newstr = PyUnicode_Replace(objstr, nullval, newval, 1); | |
| 123 | |
| 124 // make PDF object from resulting string | |
| 125 new_obj = JM_pdf_obj_from_str(ctx, pdf, JM_StrAsChar(newstr)); | |
| 126 } | |
| 127 fz_always(ctx) { | |
| 128 fz_drop_buffer(ctx, res); | |
| 129 Py_CLEAR(skey); | |
| 130 Py_CLEAR(slash); | |
| 131 Py_CLEAR(list); | |
| 132 Py_CLEAR(newval); | |
| 133 Py_CLEAR(newstr); | |
| 134 Py_CLEAR(nullval); | |
| 135 } | |
| 136 fz_catch(ctx) { | |
| 137 fz_rethrow(ctx); | |
| 138 } | |
| 139 return new_obj; | |
| 140 } | |
| 141 | |
| 142 | |
| 143 static void | |
| 144 JM_get_page_labels(fz_context *ctx, PyObject *liste, pdf_obj *nums) | |
| 145 { | |
| 146 int pno, i, n = pdf_array_len(ctx, nums); | |
| 147 char *c = NULL; | |
| 148 pdf_obj *val; | |
| 149 fz_buffer *res = NULL; | |
| 150 for (i = 0; i < n; i += 2) { | |
| 151 pdf_obj *key = pdf_resolve_indirect(ctx, pdf_array_get(ctx, nums, i)); | |
| 152 pno = pdf_to_int(ctx, key); | |
| 153 val = pdf_resolve_indirect(ctx, pdf_array_get(ctx, nums, i + 1)); | |
| 154 res = JM_object_to_buffer(ctx, val, 1, 0); | |
| 155 fz_buffer_storage(ctx, res, &c); | |
| 156 LIST_APPEND_DROP(liste, Py_BuildValue("is", pno, c)); | |
| 157 fz_drop_buffer(ctx, res); | |
| 158 } | |
| 159 } | |
| 160 | |
| 161 | |
| 162 PyObject *JM_EscapeStrFromBuffer(fz_context *ctx, fz_buffer *buff) | |
| 163 { | |
| 164 if (!buff) return EMPTY_STRING; | |
| 165 unsigned char *s = NULL; | |
| 166 size_t len = fz_buffer_storage(ctx, buff, &s); | |
| 167 PyObject *val = PyUnicode_DecodeRawUnicodeEscape((const char *) s, (Py_ssize_t) len, "replace"); | |
| 168 if (!val) { | |
| 169 val = EMPTY_STRING; | |
| 170 PyErr_Clear(); | |
| 171 } | |
| 172 return val; | |
| 173 } | |
| 174 | |
| 175 PyObject *JM_UnicodeFromBuffer(fz_context *ctx, fz_buffer *buff) | |
| 176 { | |
| 177 unsigned char *s = NULL; | |
| 178 Py_ssize_t len = (Py_ssize_t) fz_buffer_storage(ctx, buff, &s); | |
| 179 PyObject *val = PyUnicode_DecodeUTF8((const char *) s, len, "replace"); | |
| 180 if (!val) { | |
| 181 val = EMPTY_STRING; | |
| 182 PyErr_Clear(); | |
| 183 } | |
| 184 return val; | |
| 185 } | |
| 186 | |
| 187 PyObject *JM_UnicodeFromStr(const char *c) | |
| 188 { | |
| 189 if (!c) return EMPTY_STRING; | |
| 190 PyObject *val = Py_BuildValue("s", c); | |
| 191 if (!val) { | |
| 192 val = EMPTY_STRING; | |
| 193 PyErr_Clear(); | |
| 194 } | |
| 195 return val; | |
| 196 } | |
| 197 | |
| 198 PyObject *JM_EscapeStrFromStr(const char *c) | |
| 199 { | |
| 200 if (!c) return EMPTY_STRING; | |
| 201 PyObject *val = PyUnicode_DecodeRawUnicodeEscape(c, (Py_ssize_t) strlen(c), "replace"); | |
| 202 if (!val) { | |
| 203 val = EMPTY_STRING; | |
| 204 PyErr_Clear(); | |
| 205 } | |
| 206 return val; | |
| 207 } | |
| 208 | |
| 209 | |
| 210 // list of valid unicodes of a fz_font | |
| 211 void JM_valid_chars(fz_context *ctx, fz_font *font, void *arr) | |
| 212 { | |
| 213 FT_Face face = font->ft_face; | |
| 214 FT_ULong ucs; | |
| 215 FT_UInt gid; | |
| 216 long *table = (long *)arr; | |
| 217 fz_lock(ctx, FZ_LOCK_FREETYPE); | |
| 218 ucs = FT_Get_First_Char(face, &gid); | |
| 219 while (gid > 0) | |
| 220 { | |
| 221 if (gid < (FT_ULong)face->num_glyphs && face->num_glyphs > 0) | |
| 222 table[gid] = (long)ucs; | |
| 223 ucs = FT_Get_Next_Char(face, ucs, &gid); | |
| 224 } | |
| 225 fz_unlock(ctx, FZ_LOCK_FREETYPE); | |
| 226 return; | |
| 227 } | |
| 228 | |
| 229 | |
| 230 // redirect MuPDF warnings | |
| 231 void JM_mupdf_warning(void *user, const char *message) | |
| 232 { | |
| 233 LIST_APPEND_DROP(JM_mupdf_warnings_store, JM_EscapeStrFromStr(message)); | |
| 234 if (JM_mupdf_show_warnings) { | |
| 235 PySys_WriteStderr("mupdf: %s\n", message); | |
| 236 } | |
| 237 } | |
| 238 | |
| 239 // redirect MuPDF errors | |
| 240 void JM_mupdf_error(void *user, const char *message) | |
| 241 { | |
| 242 LIST_APPEND_DROP(JM_mupdf_warnings_store, JM_EscapeStrFromStr(message)); | |
| 243 if (JM_mupdf_show_errors) { | |
| 244 PySys_WriteStderr("mupdf: %s\n", message); | |
| 245 } | |
| 246 } | |
| 247 | |
| 248 // a simple tracer | |
| 249 void JM_TRACE(const char *id) | |
| 250 { | |
| 251 PySys_WriteStdout("%s\n", id); | |
| 252 } | |
| 253 | |
| 254 | |
| 255 // put a warning on Python-stdout | |
| 256 void JM_Warning(const char *id) | |
| 257 { | |
| 258 PySys_WriteStdout("warning: %s\n", id); | |
| 259 } | |
| 260 | |
| 261 #if JM_MEMORY == 1 | |
| 262 //----------------------------------------------------------------------------- | |
| 263 // The following 3 functions replace MuPDF standard memory allocation. | |
| 264 // This will ensure, that MuPDF memory handling becomes part of Python's | |
| 265 // memory management. | |
| 266 //----------------------------------------------------------------------------- | |
| 267 static void *JM_Py_Malloc(void *opaque, size_t size) | |
| 268 { | |
| 269 void *mem = PyMem_Malloc((Py_ssize_t) size); | |
| 270 if (mem) return mem; | |
| 271 fz_throw(gctx, FZ_ERROR_MEMORY, "malloc of %zu bytes failed", size); | |
| 272 } | |
| 273 | |
| 274 static void *JM_Py_Realloc(void *opaque, void *old, size_t size) | |
| 275 { | |
| 276 void *mem = PyMem_Realloc(old, (Py_ssize_t) size); | |
| 277 if (mem) return mem; | |
| 278 fz_throw(gctx, FZ_ERROR_MEMORY, "realloc of %zu bytes failed", size); | |
| 279 } | |
| 280 | |
| 281 static void JM_PY_Free(void *opaque, void *ptr) | |
| 282 { | |
| 283 PyMem_Free(ptr); | |
| 284 } | |
| 285 | |
| 286 const fz_alloc_context JM_Alloc_Context = | |
| 287 { | |
| 288 NULL, | |
| 289 JM_Py_Malloc, | |
| 290 JM_Py_Realloc, | |
| 291 JM_PY_Free | |
| 292 }; | |
| 293 #endif | |
| 294 | |
| 295 PyObject *JM_fitz_config() | |
| 296 { | |
| 297 #if defined(TOFU) | |
| 298 #define have_TOFU JM_BOOL(0) | |
| 299 #else | |
| 300 #define have_TOFU JM_BOOL(1) | |
| 301 #endif | |
| 302 #if defined(TOFU_CJK) | |
| 303 #define have_TOFU_CJK JM_BOOL(0) | |
| 304 #else | |
| 305 #define have_TOFU_CJK JM_BOOL(1) | |
| 306 #endif | |
| 307 #if defined(TOFU_CJK_EXT) | |
| 308 #define have_TOFU_CJK_EXT JM_BOOL(0) | |
| 309 #else | |
| 310 #define have_TOFU_CJK_EXT JM_BOOL(1) | |
| 311 #endif | |
| 312 #if defined(TOFU_CJK_LANG) | |
| 313 #define have_TOFU_CJK_LANG JM_BOOL(0) | |
| 314 #else | |
| 315 #define have_TOFU_CJK_LANG JM_BOOL(1) | |
| 316 #endif | |
| 317 #if defined(TOFU_EMOJI) | |
| 318 #define have_TOFU_EMOJI JM_BOOL(0) | |
| 319 #else | |
| 320 #define have_TOFU_EMOJI JM_BOOL(1) | |
| 321 #endif | |
| 322 #if defined(TOFU_HISTORIC) | |
| 323 #define have_TOFU_HISTORIC JM_BOOL(0) | |
| 324 #else | |
| 325 #define have_TOFU_HISTORIC JM_BOOL(1) | |
| 326 #endif | |
| 327 #if defined(TOFU_SYMBOL) | |
| 328 #define have_TOFU_SYMBOL JM_BOOL(0) | |
| 329 #else | |
| 330 #define have_TOFU_SYMBOL JM_BOOL(1) | |
| 331 #endif | |
| 332 #if defined(TOFU_SIL) | |
| 333 #define have_TOFU_SIL JM_BOOL(0) | |
| 334 #else | |
| 335 #define have_TOFU_SIL JM_BOOL(1) | |
| 336 #endif | |
| 337 #if defined(TOFU_BASE14) | |
| 338 #define have_TOFU_BASE14 JM_BOOL(0) | |
| 339 #else | |
| 340 #define have_TOFU_BASE14 JM_BOOL(1) | |
| 341 #endif | |
| 342 PyObject *dict = PyDict_New(); | |
| 343 DICT_SETITEMSTR_DROP(dict, "plotter-g", JM_BOOL(FZ_PLOTTERS_G)); | |
| 344 DICT_SETITEMSTR_DROP(dict, "plotter-rgb", JM_BOOL(FZ_PLOTTERS_RGB)); | |
| 345 DICT_SETITEMSTR_DROP(dict, "plotter-cmyk", JM_BOOL(FZ_PLOTTERS_CMYK)); | |
| 346 DICT_SETITEMSTR_DROP(dict, "plotter-n", JM_BOOL(FZ_PLOTTERS_N)); | |
| 347 DICT_SETITEMSTR_DROP(dict, "pdf", JM_BOOL(FZ_ENABLE_PDF)); | |
| 348 DICT_SETITEMSTR_DROP(dict, "xps", JM_BOOL(FZ_ENABLE_XPS)); | |
| 349 DICT_SETITEMSTR_DROP(dict, "svg", JM_BOOL(FZ_ENABLE_SVG)); | |
| 350 DICT_SETITEMSTR_DROP(dict, "cbz", JM_BOOL(FZ_ENABLE_CBZ)); | |
| 351 DICT_SETITEMSTR_DROP(dict, "img", JM_BOOL(FZ_ENABLE_IMG)); | |
| 352 DICT_SETITEMSTR_DROP(dict, "html", JM_BOOL(FZ_ENABLE_HTML)); | |
| 353 DICT_SETITEMSTR_DROP(dict, "epub", JM_BOOL(FZ_ENABLE_EPUB)); | |
| 354 DICT_SETITEMSTR_DROP(dict, "jpx", JM_BOOL(FZ_ENABLE_JPX)); | |
| 355 DICT_SETITEMSTR_DROP(dict, "js", JM_BOOL(FZ_ENABLE_JS)); | |
| 356 DICT_SETITEMSTR_DROP(dict, "tofu", have_TOFU); | |
| 357 DICT_SETITEMSTR_DROP(dict, "tofu-cjk", have_TOFU_CJK); | |
| 358 DICT_SETITEMSTR_DROP(dict, "tofu-cjk-ext", have_TOFU_CJK_EXT); | |
| 359 DICT_SETITEMSTR_DROP(dict, "tofu-cjk-lang", have_TOFU_CJK_LANG); | |
| 360 DICT_SETITEMSTR_DROP(dict, "tofu-emoji", have_TOFU_EMOJI); | |
| 361 DICT_SETITEMSTR_DROP(dict, "tofu-historic", have_TOFU_HISTORIC); | |
| 362 DICT_SETITEMSTR_DROP(dict, "tofu-symbol", have_TOFU_SYMBOL); | |
| 363 DICT_SETITEMSTR_DROP(dict, "tofu-sil", have_TOFU_SIL); | |
| 364 DICT_SETITEMSTR_DROP(dict, "icc", JM_BOOL(FZ_ENABLE_ICC)); | |
| 365 DICT_SETITEMSTR_DROP(dict, "base14", have_TOFU_BASE14); | |
| 366 DICT_SETITEMSTR_DROP(dict, "py-memory", JM_BOOL(JM_MEMORY)); | |
| 367 return dict; | |
| 368 } | |
| 369 | |
| 370 //---------------------------------------------------------------------------- | |
| 371 // Update a color float array with values from a Python sequence. | |
| 372 // Any error condition is treated as a no-op. | |
| 373 //---------------------------------------------------------------------------- | |
| 374 void JM_color_FromSequence(PyObject *color, int *n, float col[4]) | |
| 375 { | |
| 376 if (!color || color == Py_None) { | |
| 377 *n = -1; | |
| 378 return; | |
| 379 } | |
| 380 if (PyFloat_Check(color)) { // maybe just a single float | |
| 381 *n = 1; | |
| 382 float c = (float) PyFloat_AsDouble(color); | |
| 383 if (!INRANGE(c, 0, 1)) { | |
| 384 c = 1; | |
| 385 } | |
| 386 col[0] = c; | |
| 387 return; | |
| 388 } | |
| 389 | |
| 390 if (!PySequence_Check(color)) { | |
| 391 *n = -1; | |
| 392 return; | |
| 393 } | |
| 394 int len = (int) PySequence_Size(color), rc; | |
| 395 if (len == 0) { | |
| 396 *n = 0; | |
| 397 return; | |
| 398 } | |
| 399 if (!INRANGE(len, 1, 4) || len == 2) { | |
| 400 *n = -1; | |
| 401 return; | |
| 402 } | |
| 403 | |
| 404 double mcol[4] = {0,0,0,0}; // local color storage | |
| 405 Py_ssize_t i; | |
| 406 for (i = 0; i < len; i++) { | |
| 407 rc = JM_FLOAT_ITEM(color, i, &mcol[i]); | |
| 408 if (!INRANGE(mcol[i], 0, 1) || rc == 1) mcol[i] = 1; | |
| 409 } | |
| 410 | |
| 411 *n = len; | |
| 412 for (i = 0; i < len; i++) | |
| 413 col[i] = (float) mcol[i]; | |
| 414 return; | |
| 415 } | |
| 416 | |
| 417 // return extension for fitz image type | |
| 418 const char *JM_image_extension(int type) | |
| 419 { | |
| 420 switch (type) { | |
| 421 case(FZ_IMAGE_FAX): return "fax"; | |
| 422 case(FZ_IMAGE_RAW): return "raw"; | |
| 423 case(FZ_IMAGE_FLATE): return "flate"; | |
| 424 case(FZ_IMAGE_LZW): return "lzw"; | |
| 425 case(FZ_IMAGE_RLD): return "rld"; | |
| 426 case(FZ_IMAGE_BMP): return "bmp"; | |
| 427 case(FZ_IMAGE_GIF): return "gif"; | |
| 428 case(FZ_IMAGE_JBIG2): return "jb2"; | |
| 429 case(FZ_IMAGE_JPEG): return "jpeg"; | |
| 430 case(FZ_IMAGE_JPX): return "jpx"; | |
| 431 case(FZ_IMAGE_JXR): return "jxr"; | |
| 432 case(FZ_IMAGE_PNG): return "png"; | |
| 433 case(FZ_IMAGE_PNM): return "pnm"; | |
| 434 case(FZ_IMAGE_TIFF): return "tiff"; | |
| 435 // case(FZ_IMAGE_PSD): return "psd"; | |
| 436 case(FZ_IMAGE_UNKNOWN): return "n/a"; | |
| 437 default: return "n/a"; | |
| 438 } | |
| 439 } | |
| 440 | |
| 441 //---------------------------------------------------------------------------- | |
| 442 // Turn fz_buffer into a Python bytes object | |
| 443 //---------------------------------------------------------------------------- | |
| 444 PyObject *JM_BinFromBuffer(fz_context *ctx, fz_buffer *buffer) | |
| 445 { | |
| 446 if (!buffer) { | |
| 447 return PyBytes_FromString(""); | |
| 448 } | |
| 449 unsigned char *c = NULL; | |
| 450 size_t len = fz_buffer_storage(ctx, buffer, &c); | |
| 451 return PyBytes_FromStringAndSize((const char *) c, (Py_ssize_t) len); | |
| 452 } | |
| 453 | |
| 454 //---------------------------------------------------------------------------- | |
| 455 // Turn fz_buffer into a Python bytearray object | |
| 456 //---------------------------------------------------------------------------- | |
| 457 PyObject *JM_BArrayFromBuffer(fz_context *ctx, fz_buffer *buffer) | |
| 458 { | |
| 459 if (!buffer) { | |
| 460 return PyByteArray_FromStringAndSize("", 0); | |
| 461 } | |
| 462 unsigned char *c = NULL; | |
| 463 size_t len = fz_buffer_storage(ctx, buffer, &c); | |
| 464 return PyByteArray_FromStringAndSize((const char *) c, (Py_ssize_t) len); | |
| 465 } | |
| 466 | |
| 467 | |
| 468 //---------------------------------------------------------------------------- | |
| 469 // compress char* into a new buffer | |
| 470 //---------------------------------------------------------------------------- | |
| 471 fz_buffer *JM_compress_buffer(fz_context *ctx, fz_buffer *inbuffer) | |
| 472 { | |
| 473 fz_buffer *buf = NULL; | |
| 474 fz_try(ctx) { | |
| 475 size_t compressed_length = 0; | |
| 476 unsigned char *data = fz_new_deflated_data_from_buffer(ctx, | |
| 477 &compressed_length, inbuffer, FZ_DEFLATE_BEST); | |
| 478 if (data == NULL || compressed_length == 0) | |
| 479 return NULL; | |
| 480 buf = fz_new_buffer_from_data(ctx, data, compressed_length); | |
| 481 fz_resize_buffer(ctx, buf, compressed_length); | |
| 482 } | |
| 483 fz_catch(ctx) { | |
| 484 fz_drop_buffer(ctx, buf); | |
| 485 fz_rethrow(ctx); | |
| 486 } | |
| 487 return buf; | |
| 488 } | |
| 489 | |
| 490 //---------------------------------------------------------------------------- | |
| 491 // update a stream object | |
| 492 // compress stream when beneficial | |
| 493 //---------------------------------------------------------------------------- | |
| 494 void JM_update_stream(fz_context *ctx, pdf_document *doc, pdf_obj *obj, fz_buffer *buffer, int compress) | |
| 495 { | |
| 496 | |
| 497 fz_buffer *nres = NULL; | |
| 498 size_t len = fz_buffer_storage(ctx, buffer, NULL); | |
| 499 size_t nlen = len; | |
| 500 | |
| 501 if (compress == 1 && len > 30) { // ignore small stuff | |
| 502 nres = JM_compress_buffer(ctx, buffer); | |
| 503 nlen = fz_buffer_storage(ctx, nres, NULL); | |
| 504 } | |
| 505 | |
| 506 if (nlen < len && nres && compress==1) { // was it worth the effort? | |
| 507 pdf_dict_put(ctx, obj, PDF_NAME(Filter), PDF_NAME(FlateDecode)); | |
| 508 pdf_update_stream(ctx, doc, obj, nres, 1); | |
| 509 } else { | |
| 510 pdf_update_stream(ctx, doc, obj, buffer, 0); | |
| 511 } | |
| 512 fz_drop_buffer(ctx, nres); | |
| 513 } | |
| 514 | |
| 515 //----------------------------------------------------------------------------- | |
| 516 // return hex characters for n characters in input 'in' | |
| 517 //----------------------------------------------------------------------------- | |
| 518 void hexlify(int n, unsigned char *in, unsigned char *out) | |
| 519 { | |
| 520 const unsigned char hdigit[17] = "0123456789abcedf"; | |
| 521 int i, i1, i2; | |
| 522 for (i = 0; i < n; i++) { | |
| 523 i1 = in[i]>>4; | |
| 524 i2 = in[i] - i1*16; | |
| 525 out[2*i] = hdigit[i1]; | |
| 526 out[2*i + 1] = hdigit[i2]; | |
| 527 } | |
| 528 out[2*n] = 0; | |
| 529 } | |
| 530 | |
| 531 //---------------------------------------------------------------------------- | |
| 532 // Make fz_buffer from a PyBytes, PyByteArray, or io.BytesIO object | |
| 533 //---------------------------------------------------------------------------- | |
| 534 fz_buffer *JM_BufferFromBytes(fz_context *ctx, PyObject *stream) | |
| 535 { | |
| 536 char *c = NULL; | |
| 537 PyObject *mybytes = NULL; | |
| 538 size_t len = 0; | |
| 539 fz_buffer *res = NULL; | |
| 540 fz_var(res); | |
| 541 fz_try(ctx) { | |
| 542 if (PyBytes_Check(stream)) { | |
| 543 c = PyBytes_AS_STRING(stream); | |
| 544 len = (size_t) PyBytes_GET_SIZE(stream); | |
| 545 } else if (PyByteArray_Check(stream)) { | |
| 546 c = PyByteArray_AS_STRING(stream); | |
| 547 len = (size_t) PyByteArray_GET_SIZE(stream); | |
| 548 } else if (PyObject_HasAttrString(stream, "getvalue")) { | |
| 549 // we assume here that this delivers what we expect | |
| 550 mybytes = PyObject_CallMethod(stream, "getvalue", NULL); | |
| 551 c = PyBytes_AS_STRING(mybytes); | |
| 552 len = (size_t) PyBytes_GET_SIZE(mybytes); | |
| 553 } | |
| 554 // if none of the above, c is NULL and we return an empty buffer | |
| 555 if (c) { | |
| 556 res = fz_new_buffer_from_copied_data(ctx, (const unsigned char *) c, len); | |
| 557 } else { | |
| 558 res = fz_new_buffer(ctx, 1); | |
| 559 fz_append_byte(ctx, res, 10); | |
| 560 } | |
| 561 fz_terminate_buffer(ctx, res); | |
| 562 } | |
| 563 fz_always(ctx) { | |
| 564 Py_CLEAR(mybytes); | |
| 565 PyErr_Clear(); | |
| 566 } | |
| 567 fz_catch(ctx) { | |
| 568 fz_drop_buffer(ctx, res); | |
| 569 fz_rethrow(ctx); | |
| 570 } | |
| 571 return res; | |
| 572 } | |
| 573 | |
| 574 | |
| 575 //---------------------------------------------------------------------------- | |
| 576 // Deep-copies a source page to the target. | |
| 577 // Modified version of function of pdfmerge.c: we also copy annotations, but | |
| 578 // we skip some subtypes. In addition we rotate output. | |
| 579 //---------------------------------------------------------------------------- | |
| 580 static void | |
| 581 page_merge(fz_context *ctx, pdf_document *doc_des, pdf_document *doc_src, int page_from, int page_to, int rotate, int links, int copy_annots, pdf_graft_map *graft_map) | |
| 582 { | |
| 583 pdf_obj *page_ref = NULL; | |
| 584 pdf_obj *page_dict = NULL; | |
| 585 pdf_obj *obj = NULL, *ref = NULL; | |
| 586 | |
| 587 // list of object types (per page) we want to copy | |
| 588 static pdf_obj * const known_page_objs[] = { | |
| 589 PDF_NAME(Contents), | |
| 590 PDF_NAME(Resources), | |
| 591 PDF_NAME(MediaBox), | |
| 592 PDF_NAME(CropBox), | |
| 593 PDF_NAME(BleedBox), | |
| 594 PDF_NAME(TrimBox), | |
| 595 PDF_NAME(ArtBox), | |
| 596 PDF_NAME(Rotate), | |
| 597 PDF_NAME(UserUnit) | |
| 598 }; | |
| 599 | |
| 600 int i, n; | |
| 601 | |
| 602 fz_var(ref); | |
| 603 fz_var(page_dict); | |
| 604 | |
| 605 fz_try(ctx) { | |
| 606 page_ref = pdf_lookup_page_obj(ctx, doc_src, page_from); | |
| 607 | |
| 608 // make new page dict in dest doc | |
| 609 page_dict = pdf_new_dict(ctx, doc_des, 4); | |
| 610 pdf_dict_put(ctx, page_dict, PDF_NAME(Type), PDF_NAME(Page)); | |
| 611 | |
| 612 for (i = 0; i < (int) nelem(known_page_objs); i++) { | |
| 613 obj = pdf_dict_get_inheritable(ctx, page_ref, known_page_objs[i]); | |
| 614 if (obj != NULL) { | |
| 615 pdf_dict_put_drop(ctx, page_dict, known_page_objs[i], pdf_graft_mapped_object(ctx, graft_map, obj)); | |
| 616 } | |
| 617 } | |
| 618 | |
| 619 // Copy annotations, but skip Link, Popup, IRT, Widget types | |
| 620 // If selected, remove dict keys P (parent) and Popup | |
| 621 if (copy_annots) { | |
| 622 pdf_obj *old_annots = pdf_dict_get(ctx, page_ref, PDF_NAME(Annots)); | |
| 623 n = pdf_array_len(ctx, old_annots); | |
| 624 if (n > 0) { | |
| 625 pdf_obj *new_annots = pdf_dict_put_array(ctx, page_dict, PDF_NAME(Annots), n); | |
| 626 for (i = 0; i < n; i++) { | |
| 627 pdf_obj *o = pdf_array_get(ctx, old_annots, i); | |
| 628 if (!pdf_is_dict(ctx, o)) continue; // skip non-dict items | |
| 629 if (pdf_dict_get(ctx, o, PDF_NAME(IRT))) continue; | |
| 630 pdf_obj *subtype = pdf_dict_get(ctx, o, PDF_NAME(Subtype)); | |
| 631 if (pdf_name_eq(ctx, subtype, PDF_NAME(Link))) continue; | |
| 632 if (pdf_name_eq(ctx, subtype, PDF_NAME(Popup))) continue; | |
| 633 if (pdf_name_eq(ctx, subtype, PDF_NAME(Widget))) continue; | |
| 634 pdf_dict_del(ctx, o, PDF_NAME(Popup)); | |
| 635 pdf_dict_del(ctx, o, PDF_NAME(P)); | |
| 636 pdf_obj *copy_o = pdf_graft_mapped_object(ctx, graft_map, o); | |
| 637 pdf_obj *annot = pdf_new_indirect(ctx, doc_des, | |
| 638 pdf_to_num(ctx, copy_o), 0); | |
| 639 pdf_array_push_drop(ctx, new_annots, annot); | |
| 640 pdf_drop_obj(ctx, copy_o); | |
| 641 } | |
| 642 } | |
| 643 } | |
| 644 // rotate the page | |
| 645 if (rotate != -1) { | |
| 646 pdf_dict_put_int(ctx, page_dict, PDF_NAME(Rotate), (int64_t) rotate); | |
| 647 } | |
| 648 // Now add the page dictionary to dest PDF | |
| 649 ref = pdf_add_object(ctx, doc_des, page_dict); | |
| 650 | |
| 651 // Insert new page at specified location | |
| 652 pdf_insert_page(ctx, doc_des, page_to, ref); | |
| 653 | |
| 654 } | |
| 655 fz_always(ctx) { | |
| 656 pdf_drop_obj(ctx, page_dict); | |
| 657 pdf_drop_obj(ctx, ref); | |
| 658 } | |
| 659 fz_catch(ctx) { | |
| 660 fz_rethrow(ctx); | |
| 661 } | |
| 662 } | |
| 663 | |
| 664 //----------------------------------------------------------------------------- | |
| 665 // Copy a range of pages (spage, epage) from a source PDF to a specified | |
| 666 // location (apage) of the target PDF. | |
| 667 // If spage > epage, the sequence of source pages is reversed. | |
| 668 //----------------------------------------------------------------------------- | |
| 669 void JM_merge_range(fz_context *ctx, pdf_document *doc_des, pdf_document *doc_src, int spage, int epage, int apage, int rotate, int links, int annots, int show_progress, pdf_graft_map *graft_map) | |
| 670 { | |
| 671 int page, afterpage; | |
| 672 afterpage = apage; | |
| 673 int counter = 0; // copied pages counter | |
| 674 int total = fz_absi(epage - spage) + 1; // total pages to copy | |
| 675 | |
| 676 fz_try(ctx) { | |
| 677 if (spage < epage) { | |
| 678 for (page = spage; page <= epage; page++, afterpage++) { | |
| 679 page_merge(ctx, doc_des, doc_src, page, afterpage, rotate, links, annots, graft_map); | |
| 680 counter++; | |
| 681 if (show_progress > 0 && counter % show_progress == 0) { | |
| 682 PySys_WriteStdout("Inserted %i of %i pages.\n", counter, total); | |
| 683 } | |
| 684 } | |
| 685 } else { | |
| 686 for (page = spage; page >= epage; page--, afterpage++) { | |
| 687 page_merge(ctx, doc_des, doc_src, page, afterpage, rotate, links, annots, graft_map); | |
| 688 counter++; | |
| 689 if (show_progress > 0 && counter % show_progress == 0) { | |
| 690 PySys_WriteStdout("Inserted %i of %i pages.\n", counter, total); | |
| 691 } | |
| 692 } | |
| 693 } | |
| 694 } | |
| 695 | |
| 696 fz_catch(ctx) { | |
| 697 fz_rethrow(ctx); | |
| 698 } | |
| 699 } | |
| 700 | |
| 701 //---------------------------------------------------------------------------- | |
| 702 // Return list of outline xref numbers. Recursive function. Arguments: | |
| 703 // 'obj' first OL item | |
| 704 // 'xrefs' empty Python list | |
| 705 //---------------------------------------------------------------------------- | |
| 706 PyObject *JM_outline_xrefs(fz_context *ctx, pdf_obj *obj, PyObject *xrefs) | |
| 707 { | |
| 708 pdf_obj *first, *parent, *thisobj; | |
| 709 if (!obj) return xrefs; | |
| 710 PyObject *newxref = NULL; | |
| 711 thisobj = obj; | |
| 712 while (thisobj) { | |
| 713 newxref = PyLong_FromLong((long) pdf_to_num(ctx, thisobj)); | |
| 714 if (PySequence_Contains(xrefs, newxref) || | |
| 715 pdf_dict_get(ctx, thisobj, PDF_NAME(Type))) { | |
| 716 // circular ref or top of chain: terminate | |
| 717 Py_DECREF(newxref); | |
| 718 break; | |
| 719 } | |
| 720 LIST_APPEND_DROP(xrefs, newxref); | |
| 721 first = pdf_dict_get(ctx, thisobj, PDF_NAME(First)); // try go down | |
| 722 if (pdf_is_dict(ctx, first)) xrefs = JM_outline_xrefs(ctx, first, xrefs); | |
| 723 thisobj = pdf_dict_get(ctx, thisobj, PDF_NAME(Next)); // try go next | |
| 724 parent = pdf_dict_get(ctx, thisobj, PDF_NAME(Parent)); // get parent | |
| 725 if (!pdf_is_dict(ctx, thisobj)) { | |
| 726 thisobj = parent; | |
| 727 } | |
| 728 } | |
| 729 return xrefs; | |
| 730 } | |
| 731 | |
| 732 | |
| 733 //------------------------------------------------------------------- | |
| 734 // Return the contents of a font file, identified by xref | |
| 735 //------------------------------------------------------------------- | |
| 736 fz_buffer *JM_get_fontbuffer(fz_context *ctx, pdf_document *doc, int xref) | |
| 737 { | |
| 738 if (xref < 1) return NULL; | |
| 739 pdf_obj *o, *obj = NULL, *desft, *stream = NULL; | |
| 740 o = pdf_load_object(ctx, doc, xref); | |
| 741 desft = pdf_dict_get(ctx, o, PDF_NAME(DescendantFonts)); | |
| 742 if (desft) { | |
| 743 obj = pdf_resolve_indirect(ctx, pdf_array_get(ctx, desft, 0)); | |
| 744 obj = pdf_dict_get(ctx, obj, PDF_NAME(FontDescriptor)); | |
| 745 } else { | |
| 746 obj = pdf_dict_get(ctx, o, PDF_NAME(FontDescriptor)); | |
| 747 } | |
| 748 | |
| 749 if (!obj) { | |
| 750 pdf_drop_obj(ctx, o); | |
| 751 PySys_WriteStdout("invalid font - FontDescriptor missing"); | |
| 752 return NULL; | |
| 753 } | |
| 754 pdf_drop_obj(ctx, o); | |
| 755 o = obj; | |
| 756 | |
| 757 obj = pdf_dict_get(ctx, o, PDF_NAME(FontFile)); | |
| 758 if (obj) stream = obj; // ext = "pfa" | |
| 759 | |
| 760 obj = pdf_dict_get(ctx, o, PDF_NAME(FontFile2)); | |
| 761 if (obj) stream = obj; // ext = "ttf" | |
| 762 | |
| 763 obj = pdf_dict_get(ctx, o, PDF_NAME(FontFile3)); | |
| 764 if (obj) { | |
| 765 stream = obj; | |
| 766 | |
| 767 obj = pdf_dict_get(ctx, obj, PDF_NAME(Subtype)); | |
| 768 if (obj && !pdf_is_name(ctx, obj)) { | |
| 769 PySys_WriteStdout("invalid font descriptor subtype"); | |
| 770 return NULL; | |
| 771 } | |
| 772 | |
| 773 if (pdf_name_eq(ctx, obj, PDF_NAME(Type1C))) | |
| 774 ; /*Prev code did: ext = "cff", but this has no effect. */ | |
| 775 else if (pdf_name_eq(ctx, obj, PDF_NAME(CIDFontType0C))) | |
| 776 ; /*Prev code did: ext = "cid", but this has no effect. */ | |
| 777 else if (pdf_name_eq(ctx, obj, PDF_NAME(OpenType))) | |
| 778 ; /*Prev code did: ext = "otf", but this has no effect. */ | |
| 779 else | |
| 780 PySys_WriteStdout("warning: unhandled font type '%s'", pdf_to_name(ctx, obj)); | |
| 781 } | |
| 782 | |
| 783 if (!stream) { | |
| 784 PySys_WriteStdout("warning: unhandled font type"); | |
| 785 return NULL; | |
| 786 } | |
| 787 | |
| 788 return pdf_load_stream(ctx, stream); | |
| 789 } | |
| 790 | |
| 791 //----------------------------------------------------------------------------- | |
| 792 // Return the file extension of a font file, identified by xref | |
| 793 //----------------------------------------------------------------------------- | |
| 794 char *JM_get_fontextension(fz_context *ctx, pdf_document *doc, int xref) | |
| 795 { | |
| 796 if (xref < 1) return "n/a"; | |
| 797 pdf_obj *o, *obj = NULL, *desft; | |
| 798 o = pdf_load_object(ctx, doc, xref); | |
| 799 desft = pdf_dict_get(ctx, o, PDF_NAME(DescendantFonts)); | |
| 800 if (desft) { | |
| 801 obj = pdf_resolve_indirect(ctx, pdf_array_get(ctx, desft, 0)); | |
| 802 obj = pdf_dict_get(ctx, obj, PDF_NAME(FontDescriptor)); | |
| 803 } else { | |
| 804 obj = pdf_dict_get(ctx, o, PDF_NAME(FontDescriptor)); | |
| 805 } | |
| 806 | |
| 807 pdf_drop_obj(ctx, o); | |
| 808 if (!obj) return "n/a"; // this is a base-14 font | |
| 809 | |
| 810 o = obj; // we have the FontDescriptor | |
| 811 | |
| 812 obj = pdf_dict_get(ctx, o, PDF_NAME(FontFile)); | |
| 813 if (obj) return "pfa"; | |
| 814 | |
| 815 obj = pdf_dict_get(ctx, o, PDF_NAME(FontFile2)); | |
| 816 if (obj) return "ttf"; | |
| 817 | |
| 818 obj = pdf_dict_get(ctx, o, PDF_NAME(FontFile3)); | |
| 819 if (obj) { | |
| 820 obj = pdf_dict_get(ctx, obj, PDF_NAME(Subtype)); | |
| 821 if (obj && !pdf_is_name(ctx, obj)) { | |
| 822 PySys_WriteStdout("invalid font descriptor subtype"); | |
| 823 return "n/a"; | |
| 824 } | |
| 825 if (pdf_name_eq(ctx, obj, PDF_NAME(Type1C))) | |
| 826 return "cff"; | |
| 827 else if (pdf_name_eq(ctx, obj, PDF_NAME(CIDFontType0C))) | |
| 828 return "cid"; | |
| 829 else if (pdf_name_eq(ctx, obj, PDF_NAME(OpenType))) | |
| 830 return "otf"; | |
| 831 else | |
| 832 PySys_WriteStdout("unhandled font type '%s'", pdf_to_name(ctx, obj)); | |
| 833 } | |
| 834 | |
| 835 return "n/a"; | |
| 836 } | |
| 837 | |
| 838 | |
| 839 //----------------------------------------------------------------------------- | |
| 840 // create PDF object from given string (new in v1.14.0: MuPDF dropped it) | |
| 841 //----------------------------------------------------------------------------- | |
| 842 pdf_obj *JM_pdf_obj_from_str(fz_context *ctx, pdf_document *doc, char *src) | |
| 843 { | |
| 844 pdf_obj *result = NULL; | |
| 845 pdf_lexbuf lexbuf; | |
| 846 fz_stream *stream = fz_open_memory(ctx, (unsigned char *)src, strlen(src)); | |
| 847 | |
| 848 pdf_lexbuf_init(ctx, &lexbuf, PDF_LEXBUF_SMALL); | |
| 849 | |
| 850 fz_try(ctx) { | |
| 851 result = pdf_parse_stm_obj(ctx, doc, stream, &lexbuf); | |
| 852 } | |
| 853 | |
| 854 fz_always(ctx) { | |
| 855 pdf_lexbuf_fin(ctx, &lexbuf); | |
| 856 fz_drop_stream(ctx, stream); | |
| 857 } | |
| 858 | |
| 859 fz_catch(ctx) { | |
| 860 fz_rethrow(ctx); | |
| 861 } | |
| 862 | |
| 863 return result; | |
| 864 | |
| 865 } | |
| 866 | |
| 867 //---------------------------------------------------------------------------- | |
| 868 // return normalized /Rotate value:one of 0, 90, 180, 270 | |
| 869 //---------------------------------------------------------------------------- | |
| 870 int JM_norm_rotation(int rotate) | |
| 871 { | |
| 872 while (rotate < 0) rotate += 360; | |
| 873 while (rotate >= 360) rotate -= 360; | |
| 874 if (rotate % 90 != 0) return 0; | |
| 875 return rotate; | |
| 876 } | |
| 877 | |
| 878 | |
| 879 //---------------------------------------------------------------------------- | |
| 880 // return a PDF page's /Rotate value: one of (0, 90, 180, 270) | |
| 881 //---------------------------------------------------------------------------- | |
| 882 int JM_page_rotation(fz_context *ctx, pdf_page *page) | |
| 883 { | |
| 884 int rotate = 0; | |
| 885 fz_try(ctx) | |
| 886 { | |
| 887 rotate = pdf_to_int(ctx, | |
| 888 pdf_dict_get_inheritable(ctx, page->obj, PDF_NAME(Rotate))); | |
| 889 rotate = JM_norm_rotation(rotate); | |
| 890 } | |
| 891 fz_catch(ctx) return 0; | |
| 892 return rotate; | |
| 893 } | |
| 894 | |
| 895 | |
| 896 //---------------------------------------------------------------------------- | |
| 897 // return a PDF page's MediaBox | |
| 898 //---------------------------------------------------------------------------- | |
| 899 fz_rect JM_mediabox(fz_context *ctx, pdf_obj *page_obj) | |
| 900 { | |
| 901 fz_rect mediabox, page_mediabox; | |
| 902 | |
| 903 mediabox = pdf_to_rect(ctx, pdf_dict_get_inheritable(ctx, page_obj, | |
| 904 PDF_NAME(MediaBox))); | |
| 905 if (fz_is_empty_rect(mediabox) || fz_is_infinite_rect(mediabox)) | |
| 906 { | |
| 907 mediabox.x0 = 0; | |
| 908 mediabox.y0 = 0; | |
| 909 mediabox.x1 = 612; | |
| 910 mediabox.y1 = 792; | |
| 911 } | |
| 912 | |
| 913 page_mediabox.x0 = fz_min(mediabox.x0, mediabox.x1); | |
| 914 page_mediabox.y0 = fz_min(mediabox.y0, mediabox.y1); | |
| 915 page_mediabox.x1 = fz_max(mediabox.x0, mediabox.x1); | |
| 916 page_mediabox.y1 = fz_max(mediabox.y0, mediabox.y1); | |
| 917 | |
| 918 if (page_mediabox.x1 - page_mediabox.x0 < 1 || | |
| 919 page_mediabox.y1 - page_mediabox.y0 < 1) | |
| 920 page_mediabox = fz_unit_rect; | |
| 921 | |
| 922 return page_mediabox; | |
| 923 } | |
| 924 | |
| 925 | |
| 926 //---------------------------------------------------------------------------- | |
| 927 // return a PDF page's CropBox | |
| 928 //---------------------------------------------------------------------------- | |
| 929 fz_rect JM_cropbox(fz_context *ctx, pdf_obj *page_obj) | |
| 930 { | |
| 931 fz_rect mediabox = JM_mediabox(ctx, page_obj); | |
| 932 fz_rect cropbox = pdf_to_rect(ctx, | |
| 933 pdf_dict_get_inheritable(ctx, page_obj, PDF_NAME(CropBox))); | |
| 934 if (fz_is_infinite_rect(cropbox) || fz_is_empty_rect(cropbox)) | |
| 935 cropbox = mediabox; | |
| 936 float y0 = mediabox.y1 - cropbox.y1; | |
| 937 float y1 = mediabox.y1 - cropbox.y0; | |
| 938 cropbox.y0 = y0; | |
| 939 cropbox.y1 = y1; | |
| 940 return cropbox; | |
| 941 } | |
| 942 | |
| 943 | |
| 944 //---------------------------------------------------------------------------- | |
| 945 // calculate width and height of the UNROTATED page | |
| 946 //---------------------------------------------------------------------------- | |
| 947 fz_point JM_cropbox_size(fz_context *ctx, pdf_obj *page_obj) | |
| 948 { | |
| 949 fz_point size; | |
| 950 fz_try(ctx) | |
| 951 { | |
| 952 fz_rect rect = JM_cropbox(ctx, page_obj); | |
| 953 float w = (rect.x0 < rect.x1 ? rect.x1 - rect.x0 : rect.x0 - rect.x1); | |
| 954 float h = (rect.y0 < rect.y1 ? rect.y1 - rect.y0 : rect.y0 - rect.y1); | |
| 955 size = fz_make_point(w, h); | |
| 956 } | |
| 957 fz_catch(ctx) fz_rethrow(ctx); | |
| 958 return size; | |
| 959 } | |
| 960 | |
| 961 | |
| 962 //---------------------------------------------------------------------------- | |
| 963 // calculate page rotation matrices | |
| 964 //---------------------------------------------------------------------------- | |
| 965 fz_matrix JM_rotate_page_matrix(fz_context *ctx, pdf_page *page) | |
| 966 { | |
| 967 if (!page) return fz_identity; // no valid pdf page given | |
| 968 int rotation = JM_page_rotation(ctx, page); | |
| 969 if (rotation == 0) return fz_identity; // no rotation | |
| 970 fz_matrix m; | |
| 971 fz_point cb_size = JM_cropbox_size(ctx, page->obj); | |
| 972 float w = cb_size.x; | |
| 973 float h = cb_size.y; | |
| 974 if (rotation == 90) | |
| 975 m = fz_make_matrix(0, 1, -1, 0, h, 0); | |
| 976 else if (rotation == 180) | |
| 977 m = fz_make_matrix(-1, 0, 0, -1, w, h); | |
| 978 else | |
| 979 m = fz_make_matrix(0, -1, 1, 0, 0, w); | |
| 980 return m; | |
| 981 } | |
| 982 | |
| 983 | |
| 984 fz_matrix JM_derotate_page_matrix(fz_context *ctx, pdf_page *page) | |
| 985 { // just the inverse of rotation | |
| 986 return fz_invert_matrix(JM_rotate_page_matrix(ctx, page)); | |
| 987 } | |
| 988 | |
| 989 | |
| 990 //----------------------------------------------------------------------------- | |
| 991 // Insert a font in a PDF | |
| 992 //----------------------------------------------------------------------------- | |
| 993 PyObject * | |
| 994 JM_insert_font(fz_context *ctx, pdf_document *pdf, char *bfname, char *fontfile, | |
| 995 PyObject *fontbuffer, int set_simple, int idx, int wmode, int serif, | |
| 996 int encoding, int ordering) | |
| 997 { | |
| 998 pdf_obj *font_obj = NULL; | |
| 999 fz_font *font = NULL; | |
| 1000 fz_buffer *res = NULL; | |
| 1001 const unsigned char *data = NULL; | |
| 1002 int size, ixref = 0, index = 0, simple = 0; | |
| 1003 PyObject *value=NULL, *name=NULL, *subt=NULL, *exto = NULL; | |
| 1004 | |
| 1005 fz_var(exto); | |
| 1006 fz_var(name); | |
| 1007 fz_var(subt); | |
| 1008 fz_var(res); | |
| 1009 fz_var(font); | |
| 1010 fz_var(font_obj); | |
| 1011 fz_try(ctx) { | |
| 1012 ENSURE_OPERATION(ctx, pdf); | |
| 1013 //------------------------------------------------------------- | |
| 1014 // check for CJK font | |
| 1015 //------------------------------------------------------------- | |
| 1016 if (ordering > -1) { | |
| 1017 data = fz_lookup_cjk_font(ctx, ordering, &size, &index); | |
| 1018 } | |
| 1019 if (data) { | |
| 1020 font = fz_new_font_from_memory(ctx, NULL, data, size, index, 0); | |
| 1021 font_obj = pdf_add_cjk_font(ctx, pdf, font, ordering, wmode, serif); | |
| 1022 exto = JM_UnicodeFromStr("n/a"); | |
| 1023 simple = 0; | |
| 1024 goto weiter; | |
| 1025 } | |
| 1026 | |
| 1027 //------------------------------------------------------------- | |
| 1028 // check for PDF Base-14 font | |
| 1029 //------------------------------------------------------------- | |
| 1030 if (bfname) { | |
| 1031 data = fz_lookup_base14_font(ctx, bfname, &size); | |
| 1032 } | |
| 1033 if (data) { | |
| 1034 font = fz_new_font_from_memory(ctx, bfname, data, size, 0, 0); | |
| 1035 font_obj = pdf_add_simple_font(ctx, pdf, font, encoding); | |
| 1036 exto = JM_UnicodeFromStr("n/a"); | |
| 1037 simple = 1; | |
| 1038 goto weiter; | |
| 1039 } | |
| 1040 | |
| 1041 if (fontfile) { | |
| 1042 font = fz_new_font_from_file(ctx, NULL, fontfile, idx, 0); | |
| 1043 } else { | |
| 1044 res = JM_BufferFromBytes(ctx, fontbuffer); | |
| 1045 if (!res) { | |
| 1046 RAISEPY(ctx, MSG_FILE_OR_BUFFER, PyExc_ValueError); | |
| 1047 } | |
| 1048 font = fz_new_font_from_buffer(ctx, NULL, res, idx, 0); | |
| 1049 } | |
| 1050 | |
| 1051 if (!set_simple) { | |
| 1052 font_obj = pdf_add_cid_font(ctx, pdf, font); | |
| 1053 simple = 0; | |
| 1054 } else { | |
| 1055 font_obj = pdf_add_simple_font(ctx, pdf, font, encoding); | |
| 1056 simple = 2; | |
| 1057 } | |
| 1058 | |
| 1059 weiter: ; | |
| 1060 ixref = pdf_to_num(ctx, font_obj); | |
| 1061 name = JM_EscapeStrFromStr(pdf_to_name(ctx, | |
| 1062 pdf_dict_get(ctx, font_obj, PDF_NAME(BaseFont)))); | |
| 1063 | |
| 1064 subt = JM_UnicodeFromStr(pdf_to_name(ctx, | |
| 1065 pdf_dict_get(ctx, font_obj, PDF_NAME(Subtype)))); | |
| 1066 | |
| 1067 if (!exto) | |
| 1068 exto = JM_UnicodeFromStr(JM_get_fontextension(ctx, pdf, ixref)); | |
| 1069 | |
| 1070 float asc = fz_font_ascender(ctx, font); | |
| 1071 float dsc = fz_font_descender(ctx, font); | |
| 1072 value = Py_BuildValue("[i,{s:O,s:O,s:O,s:O,s:i,s:f,s:f}]", | |
| 1073 ixref, | |
| 1074 "name", name, // base font name | |
| 1075 "type", subt, // subtype | |
| 1076 "ext", exto, // file extension | |
| 1077 "simple", JM_BOOL(simple), // simple font? | |
| 1078 "ordering", ordering, // CJK font? | |
| 1079 "ascender", asc, | |
| 1080 "descender", dsc | |
| 1081 ); | |
| 1082 } | |
| 1083 fz_always(ctx) { | |
| 1084 Py_CLEAR(exto); | |
| 1085 Py_CLEAR(name); | |
| 1086 Py_CLEAR(subt); | |
| 1087 fz_drop_buffer(ctx, res); | |
| 1088 fz_drop_font(ctx, font); | |
| 1089 pdf_drop_obj(ctx, font_obj); | |
| 1090 } | |
| 1091 fz_catch(ctx) { | |
| 1092 fz_rethrow(ctx); | |
| 1093 } | |
| 1094 return value; | |
| 1095 } | |
| 1096 | |
| 1097 | |
| 1098 //----------------------------------------------------------------------------- | |
| 1099 // compute image insertion matrix | |
| 1100 //----------------------------------------------------------------------------- | |
| 1101 fz_matrix | |
| 1102 calc_image_matrix(int width, int height, PyObject *tr, int rotate, int keep) | |
| 1103 { | |
| 1104 float large, small, fw, fh, trw, trh, f, w, h; | |
| 1105 fz_rect trect = JM_rect_from_py(tr); | |
| 1106 fz_matrix rot = fz_rotate((float) rotate); | |
| 1107 trw = trect.x1 - trect.x0; | |
| 1108 trh = trect.y1 - trect.y0; | |
| 1109 w = trw; | |
| 1110 h = trh; | |
| 1111 if (keep) { | |
| 1112 large = (float) Py_MAX(width, height); | |
| 1113 fw = (float) width / large; | |
| 1114 fh = (float) height / large; | |
| 1115 } else { | |
| 1116 fw = fh = 1; | |
| 1117 } | |
| 1118 small = Py_MIN(fw, fh); | |
| 1119 if (rotate != 0 && rotate != 180) { | |
| 1120 f = fw; | |
| 1121 fw = fh; | |
| 1122 fh = f; | |
| 1123 } | |
| 1124 if (fw < 1) { | |
| 1125 if ((trw / fw) > (trh / fh)) { | |
| 1126 w = trh * small; | |
| 1127 h = trh; | |
| 1128 } else { | |
| 1129 w = trw; | |
| 1130 h = trw / small; | |
| 1131 } | |
| 1132 } else if (fw != fh) { | |
| 1133 if ((trw / fw) > (trh / fh)) { | |
| 1134 w = trh / small; | |
| 1135 h = trh; | |
| 1136 } else { | |
| 1137 w = trw; | |
| 1138 h = trw * small; | |
| 1139 } | |
| 1140 } else { | |
| 1141 w = trw; | |
| 1142 h = trh; | |
| 1143 } | |
| 1144 fz_point tmp = fz_make_point((trect.x0 + trect.x1) / 2, | |
| 1145 (trect.y0 + trect.y1) / 2); | |
| 1146 fz_matrix mat = fz_make_matrix(1, 0, 0, 1, -0.5, -0.5); | |
| 1147 mat = fz_concat(mat, rot); | |
| 1148 mat = fz_concat(mat, fz_scale(w, h)); | |
| 1149 mat = fz_concat(mat, fz_translate(tmp.x, tmp.y)); | |
| 1150 return mat; | |
| 1151 } | |
| 1152 | |
| 1153 // -------------------------------------------------------- | |
| 1154 // Callback function for the Story class | |
| 1155 // -------------------------------------------------------- | |
| 1156 static PyObject *make_story_elpos = NULL; // Py function returning object | |
| 1157 void Story_Callback(fz_context *ctx, void *opaque, fz_story_element_position *pos) | |
| 1158 { | |
| 1159 #define SETATTR(a, v) PyObject_SetAttrString(arg, a, v);Py_DECREF(v) | |
| 1160 // ------------------------------------------------------------------------ | |
| 1161 // 'opaque' is a tuple (userfunc, userdict), where 'userfunc' is a function | |
| 1162 // in the user's script and 'userdict' is a dictionary containing any | |
| 1163 // additional parameters of the user | |
| 1164 // userfunc will be called with the joined info of userdict and pos. | |
| 1165 // ------------------------------------------------------------------------ | |
| 1166 PyObject *callarg = (PyObject *) opaque; | |
| 1167 PyObject *userfunc = PyTuple_GET_ITEM(callarg, 0); | |
| 1168 PyObject *userdict = PyTuple_GET_ITEM(callarg, 1); | |
| 1169 | |
| 1170 PyObject *this_module = PyImport_AddModule("fitz"); // get our module | |
| 1171 if (!make_story_elpos) { // locate ElementPosition maker once | |
| 1172 make_story_elpos = Py_BuildValue("s", "make_story_elpos"); | |
| 1173 } | |
| 1174 // get access to ElementPosition() object | |
| 1175 PyObject *arg = PyObject_CallMethodObjArgs(this_module, make_story_elpos, NULL); | |
| 1176 Py_INCREF(arg); | |
| 1177 SETATTR("depth", Py_BuildValue("i", pos->depth)); | |
| 1178 SETATTR("heading", Py_BuildValue("i", pos->heading)); | |
| 1179 SETATTR("id", Py_BuildValue("s", pos->id)); | |
| 1180 SETATTR("rect", JM_py_from_rect(pos->rect)); | |
| 1181 SETATTR("text", Py_BuildValue("s", pos->text)); | |
| 1182 SETATTR("open_close", Py_BuildValue("i", pos->open_close)); | |
| 1183 SETATTR("rect_num", Py_BuildValue("i", pos->rectangle_num)); | |
| 1184 SETATTR("href", Py_BuildValue("s", pos->href)); | |
| 1185 | |
| 1186 // iterate over userdict items and set their attributes | |
| 1187 PyObject *pkey = NULL; | |
| 1188 PyObject *pval = NULL; | |
| 1189 Py_ssize_t ppos = 0; | |
| 1190 while (PyDict_Next(userdict, &ppos, &pkey, &pval)) { | |
| 1191 PyObject_SetAttr(arg, pkey, pval); | |
| 1192 } | |
| 1193 PyObject_CallFunctionObjArgs(userfunc, arg, NULL); | |
| 1194 #undef SETATTR | |
| 1195 } | |
| 1196 | |
| 1197 // ----------------------------------------------------------- | |
| 1198 // Return last archive if it is a tree and mount points match | |
| 1199 // ----------------------------------------------------------- | |
| 1200 fz_archive *JM_last_tree(fz_context *ctx, fz_archive *arch, const char *mount) | |
| 1201 { | |
| 1202 typedef struct | |
| 1203 { | |
| 1204 fz_archive *arch; | |
| 1205 char *dir; | |
| 1206 } multi_archive_entry; | |
| 1207 | |
| 1208 typedef struct | |
| 1209 { | |
| 1210 fz_archive super; | |
| 1211 int len; | |
| 1212 int max; | |
| 1213 multi_archive_entry *sub; | |
| 1214 } fz_multi_archive; | |
| 1215 | |
| 1216 if (!arch) { | |
| 1217 return NULL; | |
| 1218 } | |
| 1219 | |
| 1220 fz_multi_archive *multi = (fz_multi_archive *) arch; | |
| 1221 if (multi->len == 0) { // archive is empty | |
| 1222 return NULL; | |
| 1223 } | |
| 1224 int i = multi->len - 1; // read last sub archive | |
| 1225 multi_archive_entry *e = &multi->sub[i]; | |
| 1226 fz_archive *arch_ = e->arch; | |
| 1227 const char *mount_ = e->dir; | |
| 1228 const char *fmt = fz_archive_format(ctx, arch_); | |
| 1229 if (strcmp(fmt, "tree") != 0) { // not a tree archive | |
| 1230 return NULL; | |
| 1231 } | |
| 1232 if ((mount_ && mount && strcmp(mount, mount_) == 0) || (!mount && !mount_)) { // last sub archive is eligible! | |
| 1233 return arch_; | |
| 1234 } | |
| 1235 return NULL; | |
| 1236 } | |
| 1237 | |
| 1238 fz_archive *JM_archive_from_py(fz_context *ctx, fz_archive *arch, PyObject *path, const char *mount, int *drop_sub) | |
| 1239 { | |
| 1240 fz_stream *stream = NULL; | |
| 1241 fz_buffer *buff = NULL; | |
| 1242 *drop_sub = 1; | |
| 1243 fz_archive *sub = NULL; | |
| 1244 const char *my_mount = mount; | |
| 1245 fz_try(ctx) { | |
| 1246 // tree archive: tuple of memory items | |
| 1247 // check if we can add to last sub-archive | |
| 1248 sub = JM_last_tree(ctx, arch, my_mount); | |
| 1249 if (!sub) { | |
| 1250 sub = fz_new_tree_archive(ctx, NULL); | |
| 1251 } else { | |
| 1252 *drop_sub = 0; // never drop last sub-archive | |
| 1253 } | |
| 1254 | |
| 1255 // a single tree item | |
| 1256 if (PyBytes_Check(path) || PyByteArray_Check(path) || PyObject_HasAttrString(path, "getvalue")) { | |
| 1257 buff = JM_BufferFromBytes(ctx, path); | |
| 1258 fz_tree_archive_add_buffer(ctx, sub, mount, buff); | |
| 1259 goto finished; | |
| 1260 } | |
| 1261 | |
| 1262 // a tuple of tree items | |
| 1263 Py_ssize_t i, n = PyTuple_Size(path); | |
| 1264 for (i = 0; i < n; i++) { | |
| 1265 PyObject *item = PyTuple_GET_ITEM(path, i); | |
| 1266 PyObject *i0 = PySequence_GetItem(item, 0); // data | |
| 1267 PyObject *i1 = PySequence_GetItem(item, 1); // name | |
| 1268 buff = JM_BufferFromBytes(ctx, i0); | |
| 1269 fz_tree_archive_add_buffer(ctx, sub, PyUnicode_AsUTF8(i1), buff); | |
| 1270 fz_drop_buffer(ctx, buff); | |
| 1271 Py_DECREF(i0); | |
| 1272 Py_DECREF(i1); | |
| 1273 } | |
| 1274 buff = NULL; | |
| 1275 goto finished; | |
| 1276 | |
| 1277 finished:; | |
| 1278 } | |
| 1279 | |
| 1280 fz_always(ctx) { | |
| 1281 fz_drop_buffer(ctx, buff); | |
| 1282 fz_drop_stream(ctx, stream); | |
| 1283 } | |
| 1284 | |
| 1285 fz_catch(ctx) { | |
| 1286 fz_rethrow(ctx); | |
| 1287 } | |
| 1288 | |
| 1289 return sub; | |
| 1290 } | |
| 1291 | |
| 1292 | |
| 1293 int JM_rects_overlap(const fz_rect a, const fz_rect b) | |
| 1294 { | |
| 1295 if (0 | |
| 1296 || a.x0 >= b.x1 | |
| 1297 || a.y0 >= b.y1 | |
| 1298 || a.x1 <= b.x0 | |
| 1299 || a.y1 <= b.y0 | |
| 1300 ) | |
| 1301 return 0; | |
| 1302 return 1; | |
| 1303 } | |
| 1304 | |
| 1305 //----------------------------------------------------------------------------- | |
| 1306 // dummy structure for various tools and utilities | |
| 1307 //----------------------------------------------------------------------------- | |
| 1308 struct Tools {int index;}; | |
| 1309 | |
| 1310 typedef struct fz_item fz_item; | |
| 1311 | |
| 1312 struct fz_item | |
| 1313 { | |
| 1314 void *key; | |
| 1315 fz_storable *val; | |
| 1316 size_t size; | |
| 1317 fz_item *next; | |
| 1318 fz_item *prev; | |
| 1319 fz_store *store; | |
| 1320 const fz_store_type *type; | |
| 1321 }; | |
| 1322 | |
| 1323 struct fz_store | |
| 1324 { | |
| 1325 int refs; | |
| 1326 | |
| 1327 /* Every item in the store is kept in a doubly linked list, ordered | |
| 1328 * by usage (so LRU entries are at the end). */ | |
| 1329 fz_item *head; | |
| 1330 fz_item *tail; | |
| 1331 | |
| 1332 /* We have a hash table that allows to quickly find a subset of the | |
| 1333 * entries (those whose keys are indirect objects). */ | |
| 1334 fz_hash_table *hash; | |
| 1335 | |
| 1336 /* We keep track of the size of the store, and keep it below max. */ | |
| 1337 size_t max; | |
| 1338 size_t size; | |
| 1339 | |
| 1340 int defer_reap_count; | |
| 1341 int needs_reaping; | |
| 1342 }; | |
| 1343 | |
| 1344 %} |
