Mercurial > hgrepos > Python2 > PyMuPDF
view src_classic/fitz_old.i @ 40:aa33339d6b8a upstream
ADD: MuPDF v1.26.10: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.5.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Sat, 11 Oct 2025 11:31:38 +0200 |
| parents | 1d09e1dec1d9 |
| children |
line wrap: on
line source
%module fitz %pythonbegin %{ %} //------------------------------------------------------------------------ // SWIG macros: handle fitz exceptions //------------------------------------------------------------------------ %define FITZEXCEPTION(meth, cond) %exception meth { $action if (cond) { return JM_ReturnException(gctx); } } %enddef %define FITZEXCEPTION2(meth, cond) %exception meth { $action if (cond) { const char *msg = fz_caught_message(gctx); if (strcmp(msg, MSG_BAD_FILETYPE) == 0) { PyErr_SetString(PyExc_ValueError, msg); } else { PyErr_SetString(JM_Exc_FileDataError, MSG_BAD_DOCUMENT); } return NULL; } } %enddef //------------------------------------------------------------------------ // SWIG macro: check that a document is not closed / encrypted //------------------------------------------------------------------------ %define CLOSECHECK(meth, doc) %pythonprepend meth %{doc if self.is_closed or self.is_encrypted: raise ValueError("document closed or encrypted")%} %enddef %define CLOSECHECK0(meth, doc) %pythonprepend meth%{doc if self.is_closed: raise ValueError("document closed")%} %enddef //------------------------------------------------------------------------ // SWIG macro: check if object has a valid parent //------------------------------------------------------------------------ %define PARENTCHECK(meth, doc) %pythonprepend meth %{doc CheckParent(self)%} %enddef //------------------------------------------------------------------------ // SWIG macro: ensure object still exists //------------------------------------------------------------------------ %define ENSURE_OWNERSHIP(meth, doc) %pythonprepend meth %{doc EnsureOwnership(self)%} %enddef %include "mupdf/fitz/version.h" %{ #define MEMDEBUG 0 #if MEMDEBUG == 1 #define DEBUGMSG1(x) PySys_WriteStderr("[DEBUG] free %s ", x) #define DEBUGMSG2 PySys_WriteStderr("... done!\n") #else #define DEBUGMSG1(x) #define DEBUGMSG2 #endif #ifndef FLT_EPSILON #define FLT_EPSILON 1e-5 #endif #define SWIG_FILE_WITH_INIT // JM_MEMORY controls what allocators we tell MuPDF to use when we call // fz_new_context(): // // JM_MEMORY=0: MuPDF uses malloc()/free(). // JM_MEMORY=1: MuPDF uses PyMem_Malloc()/PyMem_Free(). // // There are also a small number of places where we call malloc() or // PyMem_Malloc() ourselves, depending on JM_MEMORY. // #define JM_MEMORY 0 #if JM_MEMORY == 1 #define JM_Alloc(type, len) PyMem_New(type, len) #define JM_Free(x) PyMem_Del(x) #else #define JM_Alloc(type, len) (type *) malloc(sizeof(type)*len) #define JM_Free(x) free(x) #endif #define EMPTY_STRING PyUnicode_FromString("") #define EXISTS(x) (x != NULL && PyObject_IsTrue(x)==1) #define RAISEPY(context, msg, exc) {JM_Exc_CurrentException=exc; fz_throw(context, FZ_ERROR_GENERIC, msg);} #define ASSERT_PDF(cond) if (cond == NULL) RAISEPY(gctx, MSG_IS_NO_PDF, PyExc_RuntimeError) #define ENSURE_OPERATION(ctx, pdf) if (!JM_have_operation(ctx, pdf)) RAISEPY(ctx, "No journalling operation started", PyExc_RuntimeError) #define INRANGE(v, low, high) ((low) <= v && v <= (high)) #define JM_BOOL(x) PyBool_FromLong((long) (x)) #define JM_PyErr_Clear if (PyErr_Occurred()) PyErr_Clear() #define JM_StrAsChar(x) (char *)PyUnicode_AsUTF8(x) #define JM_BinFromChar(x) PyBytes_FromString(x) #define JM_BinFromCharSize(x, y) PyBytes_FromStringAndSize(x, (Py_ssize_t) y) #include <mupdf/fitz.h> #include <mupdf/pdf.h> #include <time.h> // freetype includes >> -------------------------------------------------- #include <ft2build.h> #include FT_FREETYPE_H #ifdef FT_FONT_FORMATS_H #include FT_FONT_FORMATS_H #else #include FT_XFREE86_H #endif #include FT_TRUETYPE_TABLES_H #ifndef FT_SFNT_HEAD #define FT_SFNT_HEAD ft_sfnt_head #endif // << freetype includes -------------------------------------------------- void JM_delete_widget(fz_context *ctx, pdf_page *page, pdf_annot *annot); static void JM_get_page_labels(fz_context *ctx, PyObject *liste, pdf_obj *nums); static int DICT_SETITEMSTR_DROP(PyObject *dict, const char *key, PyObject *value); static int LIST_APPEND_DROP(PyObject *list, PyObject *item); static int LIST_APPEND_DROP(PyObject *list, PyObject *item); static fz_irect JM_irect_from_py(PyObject *r); static fz_matrix JM_matrix_from_py(PyObject *m); static fz_point JM_normalize_vector(float x, float y); static fz_point JM_point_from_py(PyObject *p); static fz_quad JM_quad_from_py(PyObject *r); static fz_rect JM_rect_from_py(PyObject *r); static int JM_FLOAT_ITEM(PyObject *obj, Py_ssize_t idx, double *result); static int JM_INT_ITEM(PyObject *obj, Py_ssize_t idx, int *result); static PyObject *JM_py_from_irect(fz_irect r); static PyObject *JM_py_from_matrix(fz_matrix m); static PyObject *JM_py_from_point(fz_point p); static PyObject *JM_py_from_quad(fz_quad q); static PyObject *JM_py_from_rect(fz_rect r); static void show(const char* prefix, PyObject* obj); // additional headers ---------------------------------------------- #if FZ_VERSION_MAJOR == 1 && FZ_VERSION_MINOR == 23 && FZ_VERSION_PATCH < 8 pdf_obj *pdf_lookup_page_loc(fz_context *ctx, pdf_document *doc, int needle, pdf_obj **parentp, int *indexp); fz_pixmap *fz_scale_pixmap(fz_context *ctx, fz_pixmap *src, float x, float y, float w, float h, const fz_irect *clip); int fz_pixmap_size(fz_context *ctx, fz_pixmap *src); void fz_subsample_pixmap(fz_context *ctx, fz_pixmap *tile, int factor); void fz_copy_pixmap_rect(fz_context *ctx, fz_pixmap *dest, fz_pixmap *src, fz_irect b, const fz_default_colorspaces *default_cs); void fz_write_pixmap_as_jpeg(fz_context *ctx, fz_output *out, fz_pixmap *pix, int jpg_quality); #endif static const float JM_font_ascender(fz_context *ctx, fz_font *font); static const float JM_font_descender(fz_context *ctx, fz_font *font); // end of additional headers -------------------------------------------- static PyObject *JM_mupdf_warnings_store; static int JM_mupdf_show_errors; static int JM_mupdf_show_warnings; static PyObject *JM_Exc_FileDataError; static PyObject *JM_Exc_CurrentException; %} //------------------------------------------------------------------------ // global context //------------------------------------------------------------------------ %init %{ #if FZ_VERSION_MAJOR == 1 && FZ_VERSION_MINOR >= 22 /* Stop Memento backtraces if we reach the Python interpreter. `cfunction_call()` isn't the only way that Python calls C though, so we might need extra calls to Memento_addBacktraceLimitFnname(). We put this inside `#ifdef MEMENTO` because memento.h's disabling macro causes "warning: statement with no effect" from cc. */ #ifdef MEMENTO Memento_addBacktraceLimitFnname("cfunction_call"); #endif #endif /* We end up with Memento leaks from fz_new_context()'s allocs even when our atexit handler calls fz_drop_context(), so remove these from Memento's accounting. */ Memento_startLeaking(); #if JM_MEMORY == 1 gctx = fz_new_context(&JM_Alloc_Context, NULL, FZ_STORE_DEFAULT); #else gctx = fz_new_context(NULL, NULL, FZ_STORE_DEFAULT); #endif Memento_stopLeaking(); if(!gctx) { PyErr_SetString(PyExc_RuntimeError, "Fatal error: cannot create global context."); return NULL; } fz_register_document_handlers(gctx); //------------------------------------------------------------------------ // START redirect stdout/stderr //------------------------------------------------------------------------ JM_mupdf_warnings_store = PyList_New(0); JM_mupdf_show_errors = 1; JM_mupdf_show_warnings = 0; char user[] = "PyMuPDF"; fz_set_warning_callback(gctx, JM_mupdf_warning, &user); fz_set_error_callback(gctx, JM_mupdf_error, &user); JM_Exc_FileDataError = NULL; JM_Exc_CurrentException = PyExc_RuntimeError; //------------------------------------------------------------------------ // STOP redirect stdout/stderr //------------------------------------------------------------------------ // init global constants //------------------------------------------------------------------------ dictkey_align = PyUnicode_InternFromString("align"); dictkey_ascender = PyUnicode_InternFromString("ascender"); dictkey_bbox = PyUnicode_InternFromString("bbox"); dictkey_blocks = PyUnicode_InternFromString("blocks"); dictkey_bpc = PyUnicode_InternFromString("bpc"); dictkey_c = PyUnicode_InternFromString("c"); dictkey_chars = PyUnicode_InternFromString("chars"); dictkey_color = PyUnicode_InternFromString("color"); dictkey_colorspace = PyUnicode_InternFromString("colorspace"); dictkey_content = PyUnicode_InternFromString("content"); dictkey_creationDate = PyUnicode_InternFromString("creationDate"); dictkey_cs_name = PyUnicode_InternFromString("cs-name"); dictkey_da = PyUnicode_InternFromString("da"); dictkey_dashes = PyUnicode_InternFromString("dashes"); dictkey_desc = PyUnicode_InternFromString("desc"); dictkey_desc = PyUnicode_InternFromString("descender"); dictkey_descender = PyUnicode_InternFromString("descender"); dictkey_dir = PyUnicode_InternFromString("dir"); dictkey_effect = PyUnicode_InternFromString("effect"); dictkey_ext = PyUnicode_InternFromString("ext"); dictkey_filename = PyUnicode_InternFromString("filename"); dictkey_fill = PyUnicode_InternFromString("fill"); dictkey_flags = PyUnicode_InternFromString("flags"); dictkey_font = PyUnicode_InternFromString("font"); dictkey_glyph = PyUnicode_InternFromString("glyph"); dictkey_height = PyUnicode_InternFromString("height"); dictkey_id = PyUnicode_InternFromString("id"); dictkey_image = PyUnicode_InternFromString("image"); dictkey_items = PyUnicode_InternFromString("items"); dictkey_length = PyUnicode_InternFromString("length"); dictkey_lines = PyUnicode_InternFromString("lines"); dictkey_matrix = PyUnicode_InternFromString("transform"); dictkey_modDate = PyUnicode_InternFromString("modDate"); dictkey_name = PyUnicode_InternFromString("name"); dictkey_number = PyUnicode_InternFromString("number"); dictkey_origin = PyUnicode_InternFromString("origin"); dictkey_rect = PyUnicode_InternFromString("rect"); dictkey_size = PyUnicode_InternFromString("size"); dictkey_smask = PyUnicode_InternFromString("smask"); dictkey_spans = PyUnicode_InternFromString("spans"); dictkey_stroke = PyUnicode_InternFromString("stroke"); dictkey_style = PyUnicode_InternFromString("style"); dictkey_subject = PyUnicode_InternFromString("subject"); dictkey_text = PyUnicode_InternFromString("text"); dictkey_title = PyUnicode_InternFromString("title"); dictkey_type = PyUnicode_InternFromString("type"); dictkey_ufilename = PyUnicode_InternFromString("ufilename"); dictkey_width = PyUnicode_InternFromString("width"); dictkey_wmode = PyUnicode_InternFromString("wmode"); dictkey_xref = PyUnicode_InternFromString("xref"); dictkey_xres = PyUnicode_InternFromString("xres"); dictkey_yres = PyUnicode_InternFromString("yres"); atexit( cleanup); %} %header %{ fz_context *gctx; static void cleanup() { fz_drop_context( gctx); } static int JM_UNIQUE_ID = 0; struct DeviceWrapper { fz_device *device; fz_display_list *list; }; %} //------------------------------------------------------------------------ // include version information and several other helpers //------------------------------------------------------------------------ %pythoncode %{ import sys import io import math import os import weakref import hashlib import typing import binascii import re import tarfile import zipfile import pathlib import string # PDF names must not contain these characters: INVALID_NAME_CHARS = set(string.whitespace + "()<>[]{}/%" + chr(0)) TESSDATA_PREFIX = os.getenv("TESSDATA_PREFIX") point_like = "point_like" rect_like = "rect_like" matrix_like = "matrix_like" quad_like = "quad_like" # ByteString is gone from typing in 3.14. # collections.abc.Buffer available from 3.12 only try: ByteString = typing.ByteString except AttributeError: ByteString = bytes | bytearray | memoryview AnyType = typing.Any OptInt = typing.Union[int, None] OptFloat = typing.Optional[float] OptStr = typing.Optional[str] OptDict = typing.Optional[dict] OptBytes = typing.Optional[ByteString] OptSeq = typing.Optional[typing.Sequence] try: from pymupdf_fonts import fontdescriptors, fontbuffers fitz_fontdescriptors = fontdescriptors.copy() for k in fitz_fontdescriptors.keys(): fitz_fontdescriptors[k]["loader"] = fontbuffers[k] del fontdescriptors, fontbuffers except ImportError: fitz_fontdescriptors = {} %} %include version.i %include helper-git-versions.i %include helper-defines.i %include helper-globals.i %include helper-geo-c.i %include helper-other.i %include helper-pixmap.i %include helper-geo-py.i %include helper-annot.i %include helper-fields.i %include helper-python.i %include helper-portfolio.i %include helper-select.i %include helper-stext.i %include helper-xobject.i %include helper-pdfinfo.i %include helper-convert.i %include helper-fileobj.i %include helper-devices.i %{ // Declaring these structs here prevents gcc from generating warnings like: // // warning: 'struct Document' declared inside parameter list will not be visible outside of this definition or declaration // struct Colorspace; struct Document; struct Font; struct Graftmap; struct TextPage; struct TextWriter; struct DocumentWriter; struct Xml; struct Archive; struct Story; %} //------------------------------------------------------------------------ // fz_document //------------------------------------------------------------------------ struct Document { %extend { ~Document() { DEBUGMSG1("Document"); fz_document *this_doc = (fz_document *) $self; fz_drop_document(gctx, this_doc); DEBUGMSG2; } FITZEXCEPTION2(Document, !result) %pythonprepend Document %{ """Creates a document. Use 'open' as a synonym. Notes: Basic usages: open() - new PDF document open(filename) - string, pathlib.Path, or file object. open(filename, fileype=type) - overwrite filename extension. open(type, buffer) - type: extension, buffer: bytes object. open(stream=buffer, filetype=type) - keyword version of previous. Parameters rect, width, height, fontsize: layout reflowable document on open (e.g. EPUB). Ignored if n/a. """ self.is_closed = False self.is_encrypted = False self.isEncrypted = False self.metadata = None self.FontInfos = [] self.Graftmaps = {} self.ShownPages = {} self.InsertedImages = {} self._page_refs = weakref.WeakValueDictionary() if not filename or type(filename) is str: pass elif hasattr(filename, "absolute"): filename = str(filename) elif hasattr(filename, "name"): filename = filename.name else: msg = "bad filename" raise TypeError(msg) if stream != None: if type(stream) is bytes: self.stream = stream elif type(stream) is bytearray: self.stream = bytes(stream) elif type(stream) is io.BytesIO: self.stream = stream.getvalue() else: msg = "bad type: 'stream'" raise TypeError(msg) stream = self.stream if not (filename or filetype): filename = "pdf" else: self.stream = None if filename and self.stream == None: self.name = filename from_file = True else: from_file = False self.name = "" if from_file: if not os.path.exists(filename): msg = f"no such file: '{filename}'" raise FileNotFoundError(msg) elif not os.path.isfile(filename): msg = f"'{filename}' is no file" raise FileDataError(msg) if from_file and os.path.getsize(filename) == 0 or type(self.stream) is bytes and len(self.stream) == 0: msg = "cannot open empty document" raise EmptyFileError(msg) %} %pythonappend Document %{ if self.thisown: self._graft_id = TOOLS.gen_id() if self.needs_pass is True: self.is_encrypted = True self.isEncrypted = True else: # we won't init until doc is decrypted self.init_doc() # the following hack detects invalid/empty SVG files, which else may lead # to interpreter crashes if filename and filename.lower().endswith("svg") or filetype and "svg" in filetype.lower(): try: _ = self.convert_to_pdf() # this seems to always work except: raise FileDataError("cannot open broken document") from None %} Document(const char *filename=NULL, PyObject *stream=NULL, const char *filetype=NULL, PyObject *rect=NULL, float width=0, float height=0, float fontsize=11) { int old_msg_option = JM_mupdf_show_errors; JM_mupdf_show_errors = 0; fz_document *doc = NULL; const fz_document_handler *handler; char *c = NULL; char *magic = NULL; size_t len = 0; fz_stream *data = NULL; float w = width, h = height; fz_rect r = JM_rect_from_py(rect); if (!fz_is_infinite_rect(r)) { w = r.x1 - r.x0; h = r.y1 - r.y0; } fz_try(gctx) { if (stream != Py_None) { // stream given, **MUST** be bytes! c = PyBytes_AS_STRING(stream); // just a pointer, no new obj len = (size_t) PyBytes_Size(stream); data = fz_open_memory(gctx, (const unsigned char *) c, len); magic = (char *)filename; if (!magic) magic = (char *)filetype; handler = fz_recognize_document(gctx, magic); if (!handler) { RAISEPY(gctx, MSG_BAD_FILETYPE, PyExc_ValueError); } doc = fz_open_document_with_stream(gctx, magic, data); } else { if (filename && strlen(filename)) { if (!filetype || strlen(filetype) == 0) { doc = fz_open_document(gctx, filename); } else { handler = fz_recognize_document(gctx, filetype); if (!handler) { RAISEPY(gctx, MSG_BAD_FILETYPE, PyExc_ValueError); } #if FZ_VERSION_MINOR >= 24 if (handler->open) { fz_stream* filename_stream = fz_open_file(gctx, filename); fz_try(gctx) { doc = handler->open(gctx, filename_stream, NULL, NULL); } fz_always(gctx) { fz_drop_stream(gctx, filename_stream); } fz_catch(gctx) { fz_rethrow(gctx); } } #else if (handler->open) { doc = handler->open(gctx, filename); } else if (handler->open_with_stream) { data = fz_open_file(gctx, filename); doc = handler->open_with_stream(gctx, data); } #endif } } else { pdf_document *pdf = pdf_create_document(gctx); doc = (fz_document *) pdf; } } } fz_always(gctx) { fz_drop_stream(gctx, data); } fz_catch(gctx) { JM_mupdf_show_errors = old_msg_option; return NULL; } if (w > 0 && h > 0) { fz_layout_document(gctx, doc, w, h, fontsize); } else if (fz_is_document_reflowable(gctx, doc)) { fz_layout_document(gctx, doc, 400, 600, 11); } return (struct Document *) doc; } FITZEXCEPTION(load_page, !result) %pythonprepend load_page %{ """Load a page. 'page_id' is either a 0-based page number or a tuple (chapter, pno), with chapter number and page number within that chapter. """ if self.is_closed or self.is_encrypted: raise ValueError("document closed or encrypted") if page_id is None: page_id = 0 if page_id not in self: raise ValueError("page not in document") if type(page_id) is int and page_id < 0: np = self.page_count while page_id < 0: page_id += np %} %pythonappend load_page %{ val.thisown = True val.parent = weakref.proxy(self) self._page_refs[id(val)] = val val._annot_refs = weakref.WeakValueDictionary() val.number = page_id %} struct Page * load_page(PyObject *page_id) { fz_page *page = NULL; fz_document *doc = (fz_document *) $self; int pno = 0, chapter = 0; fz_try(gctx) { if (PySequence_Check(page_id)) { if (JM_INT_ITEM(page_id, 0, &chapter) == 1) { RAISEPY(gctx, MSG_BAD_PAGEID, PyExc_ValueError); } if (JM_INT_ITEM(page_id, 1, &pno) == 1) { RAISEPY(gctx, MSG_BAD_PAGEID, PyExc_ValueError); } page = fz_load_chapter_page(gctx, doc, chapter, pno); } else { pno = (int) PyLong_AsLong(page_id); if (PyErr_Occurred()) { RAISEPY(gctx, MSG_BAD_PAGEID, PyExc_ValueError); } page = fz_load_page(gctx, doc, pno); } } fz_catch(gctx) { PyErr_Clear(); return NULL; } PyErr_Clear(); return (struct Page *) page; } FITZEXCEPTION(_remove_links_to, !result) PyObject *_remove_links_to(PyObject *numbers) { fz_try(gctx) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); remove_dest_range(gctx, pdf, numbers); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } CLOSECHECK0(_loadOutline, """Load first outline.""") struct Outline *_loadOutline() { fz_outline *ol = NULL; fz_document *doc = (fz_document *) $self; fz_try(gctx) { ol = fz_load_outline(gctx, doc); } fz_catch(gctx) { return NULL; } return (struct Outline *) ol; } void _dropOutline(struct Outline *ol) { DEBUGMSG1("Outline"); fz_outline *this_ol = (fz_outline *) ol; fz_drop_outline(gctx, this_ol); DEBUGMSG2; } FITZEXCEPTION(_insert_font, !result) CLOSECHECK0(_insert_font, """Utility: insert font from file or binary.""") PyObject * _insert_font(char *fontfile=NULL, PyObject *fontbuffer=NULL) { PyObject *value=NULL; pdf_document *pdf = pdf_specifics(gctx, (fz_document *)$self); fz_try(gctx) { ASSERT_PDF(pdf); if (!fontfile && !EXISTS(fontbuffer)) { RAISEPY(gctx, MSG_FILE_OR_BUFFER, PyExc_ValueError); } value = JM_insert_font(gctx, pdf, NULL, fontfile, fontbuffer, 0, 0, 0, 0, 0, -1); } fz_catch(gctx) { return NULL; } return value; } FITZEXCEPTION(get_outline_xrefs, !result) CLOSECHECK0(get_outline_xrefs, """Get list of outline xref numbers.""") PyObject * get_outline_xrefs() { PyObject *xrefs = PyList_New(0); pdf_document *pdf = pdf_specifics(gctx, (fz_document *)$self); if (!pdf) { return xrefs; } fz_try(gctx) { pdf_obj *root = pdf_dict_get(gctx, pdf_trailer(gctx, pdf), PDF_NAME(Root)); if (!root) goto finished; pdf_obj *olroot = pdf_dict_get(gctx, root, PDF_NAME(Outlines)); if (!olroot) goto finished; pdf_obj *first = pdf_dict_get(gctx, olroot, PDF_NAME(First)); if (!first) goto finished; xrefs = JM_outline_xrefs(gctx, first, xrefs); finished:; } fz_catch(gctx) { Py_DECREF(xrefs); return NULL; } return xrefs; } FITZEXCEPTION(xref_get_keys, !result) CLOSECHECK0(xref_get_keys, """Get the keys of PDF dict object at 'xref'. Use -1 for the PDF trailer.""") PyObject * xref_get_keys(int xref) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *)$self); pdf_obj *obj=NULL; PyObject *rc = NULL; int i, n; fz_try(gctx) { ASSERT_PDF(pdf); int xreflen = pdf_xref_len(gctx, pdf); if (!INRANGE(xref, 1, xreflen-1) && xref != -1) { RAISEPY(gctx, MSG_BAD_XREF, PyExc_ValueError); } if (xref > 0) { obj = pdf_load_object(gctx, pdf, xref); } else { obj = pdf_trailer(gctx, pdf); } n = pdf_dict_len(gctx, obj); rc = PyTuple_New(n); if (!n) goto finished; for (i = 0; i < n; i++) { const char *key = pdf_to_name(gctx, pdf_dict_get_key(gctx, obj, i)); PyTuple_SET_ITEM(rc, i, Py_BuildValue("s", key)); } finished:; } fz_always(gctx) { if (xref > 0) { pdf_drop_obj(gctx, obj); } } fz_catch(gctx) { return NULL; } return rc; } FITZEXCEPTION(xref_get_key, !result) CLOSECHECK0(xref_get_key, """Get PDF dict key value of object at 'xref'.""") PyObject * xref_get_key(int xref, const char *key) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *)$self); pdf_obj *obj=NULL, *subobj=NULL; PyObject *rc = NULL; fz_buffer *res = NULL; PyObject *text = NULL; fz_try(gctx) { ASSERT_PDF(pdf); int xreflen = pdf_xref_len(gctx, pdf); if (!INRANGE(xref, 1, xreflen-1) && xref != -1) { RAISEPY(gctx, MSG_BAD_XREF, PyExc_ValueError); } if (xref > 0) { obj = pdf_load_object(gctx, pdf, xref); } else { obj = pdf_trailer(gctx, pdf); } if (!obj) { goto not_found; } subobj = pdf_dict_getp(gctx, obj, key); if (!subobj) { goto not_found; } char *type; if (pdf_is_indirect(gctx, subobj)) { type = "xref"; text = PyUnicode_FromFormat("%i 0 R", pdf_to_num(gctx, subobj)); } else if (pdf_is_array(gctx, subobj)) { type = "array"; } else if (pdf_is_dict(gctx, subobj)) { type = "dict"; } else if (pdf_is_int(gctx, subobj)) { type = "int"; text = PyUnicode_FromFormat("%i", pdf_to_int(gctx, subobj)); } else if (pdf_is_real(gctx, subobj)) { type = "float"; } else if (pdf_is_null(gctx, subobj)) { type = "null"; text = PyUnicode_FromString("null"); } else if (pdf_is_bool(gctx, subobj)) { type = "bool"; if (pdf_to_bool(gctx, subobj)) { text = PyUnicode_FromString("true"); } else { text = PyUnicode_FromString("false"); } } else if (pdf_is_name(gctx, subobj)) { type = "name"; text = PyUnicode_FromFormat("/%s", pdf_to_name(gctx, subobj)); } else if (pdf_is_string(gctx, subobj)) { type = "string"; text = JM_UnicodeFromStr(pdf_to_text_string(gctx, subobj)); } else { type = "unknown"; } if (!text) { res = JM_object_to_buffer(gctx, subobj, 1, 0); text = JM_UnicodeFromBuffer(gctx, res); } rc = Py_BuildValue("sO", type, text); Py_DECREF(text); goto finished; not_found:; rc = Py_BuildValue("ss", "null", "null"); finished:; } fz_always(gctx) { if (xref > 0) { pdf_drop_obj(gctx, obj); } fz_drop_buffer(gctx, res); } fz_catch(gctx) { return NULL; } return rc; } FITZEXCEPTION(xref_set_key, !result) %pythonprepend xref_set_key %{ """Set the value of a PDF dictionary key.""" if self.is_closed or self.is_encrypted: raise ValueError("document closed or encrypted") if not key or not isinstance(key, str) or INVALID_NAME_CHARS.intersection(key) not in (set(), {"/"}): raise ValueError("bad 'key'") if not isinstance(value, str) or not value or value[0] == "/" and INVALID_NAME_CHARS.intersection(value[1:]) != set(): raise ValueError("bad 'value'") %} PyObject * xref_set_key(int xref, const char *key, char *value) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *)$self); pdf_obj *obj = NULL, *new_obj = NULL; int i, n; fz_try(gctx) { ASSERT_PDF(pdf); if (!key || strlen(key) == 0) { RAISEPY(gctx, "bad 'key'", PyExc_ValueError); } if (!value || strlen(value) == 0) { RAISEPY(gctx, "bad 'value'", PyExc_ValueError); } int xreflen = pdf_xref_len(gctx, pdf); if (!INRANGE(xref, 1, xreflen-1) && xref != -1) { RAISEPY(gctx, MSG_BAD_XREF, PyExc_ValueError); } if (xref != -1) { obj = pdf_load_object(gctx, pdf, xref); } else { obj = pdf_trailer(gctx, pdf); } // if val=="null" and no path hierarchy, delete "key" from object // chr(47) = "/" if (strcmp(value, "null") == 0 && strchr(key, 47) == NULL) { pdf_dict_dels(gctx, obj, key); goto finished; } new_obj = JM_set_object_value(gctx, obj, key, value); if (!new_obj) { goto finished; // did not work: skip update } if (xref != -1) { pdf_drop_obj(gctx, obj); obj = NULL; pdf_update_object(gctx, pdf, xref, new_obj); } else { n = pdf_dict_len(gctx, new_obj); for (i = 0; i < n; i++) { pdf_dict_put(gctx, obj, pdf_dict_get_key(gctx, new_obj, i), pdf_dict_get_val(gctx, new_obj, i)); } } finished:; } fz_always(gctx) { if (xref != -1) { pdf_drop_obj(gctx, obj); } pdf_drop_obj(gctx, new_obj); PyErr_Clear(); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } FITZEXCEPTION(_extend_toc_items, !result) CLOSECHECK0(_extend_toc_items, """Add color info to all items of an extended TOC list.""") PyObject * _extend_toc_items(PyObject *items) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *)$self); pdf_obj *bm, *col, *obj; int count, flags; PyObject *item=NULL, *itemdict=NULL, *xrefs, *bold, *italic, *collapse, *zoom; zoom = PyUnicode_FromString("zoom"); bold = PyUnicode_FromString("bold"); italic = PyUnicode_FromString("italic"); collapse = PyUnicode_FromString("collapse"); fz_try(gctx) { pdf_obj *root = pdf_dict_get(gctx, pdf_trailer(gctx, pdf), PDF_NAME(Root)); if (!root) goto finished; pdf_obj *olroot = pdf_dict_get(gctx, root, PDF_NAME(Outlines)); if (!olroot) goto finished; pdf_obj *first = pdf_dict_get(gctx, olroot, PDF_NAME(First)); if (!first) goto finished; xrefs = PyList_New(0); // pre-allocate an empty list xrefs = JM_outline_xrefs(gctx, first, xrefs); Py_ssize_t i, n = PySequence_Size(xrefs), m = PySequence_Size(items); if (!n) goto finished; if (n != m) { RAISEPY(gctx, "internal error finding outline xrefs", PyExc_IndexError); } int xref; // update all TOC item dictionaries for (i = 0; i < n; i++) { JM_INT_ITEM(xrefs, i, &xref); item = PySequence_ITEM(items, i); itemdict = PySequence_ITEM(item, 3); if (!itemdict || !PyDict_Check(itemdict)) { RAISEPY(gctx, "need non-simple TOC format", PyExc_ValueError); } PyDict_SetItem(itemdict, dictkey_xref, PySequence_ITEM(xrefs, i)); bm = pdf_load_object(gctx, pdf, xref); flags = pdf_to_int(gctx, (pdf_dict_get(gctx, bm, PDF_NAME(F)))); if (flags == 1) { PyDict_SetItem(itemdict, italic, Py_True); } else if (flags == 2) { PyDict_SetItem(itemdict, bold, Py_True); } else if (flags == 3) { PyDict_SetItem(itemdict, italic, Py_True); PyDict_SetItem(itemdict, bold, Py_True); } count = pdf_to_int(gctx, (pdf_dict_get(gctx, bm, PDF_NAME(Count)))); if (count < 0) { PyDict_SetItem(itemdict, collapse, Py_True); } else if (count > 0) { PyDict_SetItem(itemdict, collapse, Py_False); } col = pdf_dict_get(gctx, bm, PDF_NAME(C)); if (pdf_is_array(gctx, col) && pdf_array_len(gctx, col) == 3) { PyObject *color = PyTuple_New(3); PyTuple_SET_ITEM(color, 0, Py_BuildValue("f", pdf_to_real(gctx, pdf_array_get(gctx, col, 0)))); PyTuple_SET_ITEM(color, 1, Py_BuildValue("f", pdf_to_real(gctx, pdf_array_get(gctx, col, 1)))); PyTuple_SET_ITEM(color, 2, Py_BuildValue("f", pdf_to_real(gctx, pdf_array_get(gctx, col, 2)))); DICT_SETITEM_DROP(itemdict, dictkey_color, color); } float z=0; obj = pdf_dict_get(gctx, bm, PDF_NAME(Dest)); if (!obj || !pdf_is_array(gctx, obj)) { obj = pdf_dict_getl(gctx, bm, PDF_NAME(A), PDF_NAME(D), NULL); } if (pdf_is_array(gctx, obj) && pdf_array_len(gctx, obj) == 5) { z = pdf_to_real(gctx, pdf_array_get(gctx, obj, 4)); } DICT_SETITEM_DROP(itemdict, zoom, Py_BuildValue("f", z)); PyList_SetItem(item, 3, itemdict); PyList_SetItem(items, i, item); pdf_drop_obj(gctx, bm); bm = NULL; } finished:; } fz_always(gctx) { Py_CLEAR(xrefs); Py_CLEAR(bold); Py_CLEAR(italic); Py_CLEAR(collapse); Py_CLEAR(zoom); pdf_drop_obj(gctx, bm); PyErr_Clear(); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //---------------------------------------------------------------- // EmbeddedFiles utility functions //---------------------------------------------------------------- FITZEXCEPTION(_embfile_names, !result) CLOSECHECK0(_embfile_names, """Get list of embedded file names.""") PyObject *_embfile_names(PyObject *namelist) { fz_document *doc = (fz_document *) $self; pdf_document *pdf = pdf_specifics(gctx, doc); fz_try(gctx) { ASSERT_PDF(pdf); PyObject *val; pdf_obj *names = pdf_dict_getl(gctx, pdf_trailer(gctx, pdf), PDF_NAME(Root), PDF_NAME(Names), PDF_NAME(EmbeddedFiles), PDF_NAME(Names), NULL); if (pdf_is_array(gctx, names)) { int i, n = pdf_array_len(gctx, names); for (i=0; i < n; i+=2) { val = JM_EscapeStrFromStr(pdf_to_text_string(gctx, pdf_array_get(gctx, names, i))); LIST_APPEND_DROP(namelist, val); } } } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } FITZEXCEPTION(_embfile_del, !result) PyObject *_embfile_del(int idx) { fz_try(gctx) { fz_document *doc = (fz_document *) $self; pdf_document *pdf = pdf_document_from_fz_document(gctx, doc); pdf_obj *names = pdf_dict_getl(gctx, pdf_trailer(gctx, pdf), PDF_NAME(Root), PDF_NAME(Names), PDF_NAME(EmbeddedFiles), PDF_NAME(Names), NULL); pdf_array_delete(gctx, names, idx + 1); pdf_array_delete(gctx, names, idx); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } FITZEXCEPTION(_embfile_info, !result) PyObject *_embfile_info(int idx, PyObject *infodict) { fz_document *doc = (fz_document *) $self; pdf_document *pdf = pdf_document_from_fz_document(gctx, doc); char *name; int xref = 0, ci_xref=0; fz_try(gctx) { pdf_obj *names = pdf_dict_getl(gctx, pdf_trailer(gctx, pdf), PDF_NAME(Root), PDF_NAME(Names), PDF_NAME(EmbeddedFiles), PDF_NAME(Names), NULL); pdf_obj *o = pdf_array_get(gctx, names, 2*idx+1); pdf_obj *ci = pdf_dict_get(gctx, o, PDF_NAME(CI)); if (ci) { ci_xref = pdf_to_num(gctx, ci); } DICT_SETITEMSTR_DROP(infodict, "collection", Py_BuildValue("i", ci_xref)); name = (char *) pdf_to_text_string(gctx, pdf_dict_get(gctx, o, PDF_NAME(F))); DICT_SETITEM_DROP(infodict, dictkey_filename, JM_EscapeStrFromStr(name)); name = (char *) pdf_to_text_string(gctx, pdf_dict_get(gctx, o, PDF_NAME(UF))); DICT_SETITEM_DROP(infodict, dictkey_ufilename, JM_EscapeStrFromStr(name)); name = (char *) pdf_to_text_string(gctx, pdf_dict_get(gctx, o, PDF_NAME(Desc))); DICT_SETITEM_DROP(infodict, dictkey_desc, JM_UnicodeFromStr(name)); int len = -1, DL = -1; pdf_obj *fileentry = pdf_dict_getl(gctx, o, PDF_NAME(EF), PDF_NAME(F), NULL); xref = pdf_to_num(gctx, fileentry); o = pdf_dict_get(gctx, fileentry, PDF_NAME(Length)); if (o) len = pdf_to_int(gctx, o); o = pdf_dict_get(gctx, fileentry, PDF_NAME(DL)); if (o) { DL = pdf_to_int(gctx, o); } else { o = pdf_dict_getl(gctx, fileentry, PDF_NAME(Params), PDF_NAME(Size), NULL); if (o) DL = pdf_to_int(gctx, o); } DICT_SETITEM_DROP(infodict, dictkey_size, Py_BuildValue("i", DL)); DICT_SETITEM_DROP(infodict, dictkey_length, Py_BuildValue("i", len)); } fz_catch(gctx) { return NULL; } return Py_BuildValue("i", xref); } FITZEXCEPTION(_embfile_upd, !result) PyObject *_embfile_upd(int idx, PyObject *buffer = NULL, char *filename = NULL, char *ufilename = NULL, char *desc = NULL) { fz_document *doc = (fz_document *) $self; pdf_document *pdf = pdf_document_from_fz_document(gctx, doc); fz_buffer *res = NULL; fz_var(res); int xref = 0; fz_try(gctx) { pdf_obj *names = pdf_dict_getl(gctx, pdf_trailer(gctx, pdf), PDF_NAME(Root), PDF_NAME(Names), PDF_NAME(EmbeddedFiles), PDF_NAME(Names), NULL); pdf_obj *entry = pdf_array_get(gctx, names, 2*idx+1); pdf_obj *filespec = pdf_dict_getl(gctx, entry, PDF_NAME(EF), PDF_NAME(F), NULL); if (!filespec) { RAISEPY(gctx, "bad PDF: no /EF object", JM_Exc_FileDataError); } res = JM_BufferFromBytes(gctx, buffer); if (EXISTS(buffer) && !res) { RAISEPY(gctx, MSG_BAD_BUFFER, PyExc_TypeError); } if (res && buffer != Py_None) { JM_update_stream(gctx, pdf, filespec, res, 1); // adjust /DL and /Size parameters int64_t len = (int64_t) fz_buffer_storage(gctx, res, NULL); pdf_obj *l = pdf_new_int(gctx, len); pdf_dict_put(gctx, filespec, PDF_NAME(DL), l); pdf_dict_putl(gctx, filespec, l, PDF_NAME(Params), PDF_NAME(Size), NULL); } xref = pdf_to_num(gctx, filespec); if (filename) pdf_dict_put_text_string(gctx, entry, PDF_NAME(F), filename); if (ufilename) pdf_dict_put_text_string(gctx, entry, PDF_NAME(UF), ufilename); if (desc) pdf_dict_put_text_string(gctx, entry, PDF_NAME(Desc), desc); } fz_always(gctx) { fz_drop_buffer(gctx, res); } fz_catch(gctx) return NULL; return Py_BuildValue("i", xref); } FITZEXCEPTION(_embeddedFileGet, !result) PyObject *_embeddedFileGet(int idx) { fz_document *doc = (fz_document *) $self; PyObject *cont = NULL; pdf_document *pdf = pdf_document_from_fz_document(gctx, doc); fz_buffer *buf = NULL; fz_var(buf); fz_try(gctx) { pdf_obj *names = pdf_dict_getl(gctx, pdf_trailer(gctx, pdf), PDF_NAME(Root), PDF_NAME(Names), PDF_NAME(EmbeddedFiles), PDF_NAME(Names), NULL); pdf_obj *entry = pdf_array_get(gctx, names, 2*idx+1); pdf_obj *filespec = pdf_dict_getl(gctx, entry, PDF_NAME(EF), PDF_NAME(F), NULL); buf = pdf_load_stream(gctx, filespec); cont = JM_BinFromBuffer(gctx, buf); } fz_always(gctx) { fz_drop_buffer(gctx, buf); } fz_catch(gctx) { return NULL; } return cont; } FITZEXCEPTION(_embfile_add, !result) PyObject *_embfile_add(const char *name, PyObject *buffer, char *filename=NULL, char *ufilename=NULL, char *desc=NULL) { fz_document *doc = (fz_document *) $self; pdf_document *pdf = pdf_document_from_fz_document(gctx, doc); fz_buffer *data = NULL; fz_var(data); pdf_obj *names = NULL; int xref = 0; // xref of file entry fz_try(gctx) { ASSERT_PDF(pdf); data = JM_BufferFromBytes(gctx, buffer); if (!data) { RAISEPY(gctx, MSG_BAD_BUFFER, PyExc_TypeError); } names = pdf_dict_getl(gctx, pdf_trailer(gctx, pdf), PDF_NAME(Root), PDF_NAME(Names), PDF_NAME(EmbeddedFiles), PDF_NAME(Names), NULL); if (!pdf_is_array(gctx, names)) { pdf_obj *root = pdf_dict_get(gctx, pdf_trailer(gctx, pdf), PDF_NAME(Root)); names = pdf_new_array(gctx, pdf, 6); // an even number! pdf_dict_putl_drop(gctx, root, names, PDF_NAME(Names), PDF_NAME(EmbeddedFiles), PDF_NAME(Names), NULL); } pdf_obj *fileentry = JM_embed_file(gctx, pdf, data, filename, ufilename, desc, 1); xref = pdf_to_num(gctx, pdf_dict_getl(gctx, fileentry, PDF_NAME(EF), PDF_NAME(F), NULL)); pdf_array_push_drop(gctx, names, pdf_new_text_string(gctx, name)); pdf_array_push_drop(gctx, names, fileentry); } fz_always(gctx) { fz_drop_buffer(gctx, data); } fz_catch(gctx) { return NULL; } return Py_BuildValue("i", xref); } %pythoncode %{ def embfile_names(self) -> list: """Get list of names of EmbeddedFiles.""" filenames = [] self._embfile_names(filenames) return filenames def _embeddedFileIndex(self, item: typing.Union[int, str]) -> int: filenames = self.embfile_names() msg = "'%s' not in EmbeddedFiles array." % str(item) if item in filenames: idx = filenames.index(item) elif item in range(len(filenames)): idx = item else: raise ValueError(msg) return idx def embfile_count(self) -> int: """Get number of EmbeddedFiles.""" return len(self.embfile_names()) def embfile_del(self, item: typing.Union[int, str]): """Delete an entry from EmbeddedFiles. Notes: The argument must be name or index of an EmbeddedFiles item. Physical deletion of data will happen on save to a new file with appropriate garbage option. Args: item: name or number of item. Returns: None """ idx = self._embeddedFileIndex(item) return self._embfile_del(idx) def embfile_info(self, item: typing.Union[int, str]) -> dict: """Get information of an item in the EmbeddedFiles array. Args: item: number or name of item. Returns: Information dictionary. """ idx = self._embeddedFileIndex(item) infodict = {"name": self.embfile_names()[idx]} xref = self._embfile_info(idx, infodict) t, date = self.xref_get_key(xref, "Params/CreationDate") if t != "null": infodict["creationDate"] = date t, date = self.xref_get_key(xref, "Params/ModDate") if t != "null": infodict["modDate"] = date t, md5 = self.xref_get_key(xref, "Params/CheckSum") if t != "null": infodict["checksum"] = binascii.hexlify(md5.encode()).decode() return infodict def embfile_get(self, item: typing.Union[int, str]) -> bytes: """Get the content of an item in the EmbeddedFiles array. Args: item: number or name of item. Returns: (bytes) The file content. """ idx = self._embeddedFileIndex(item) return self._embeddedFileGet(idx) def embfile_upd(self, item: typing.Union[int, str], buffer: OptBytes =None, filename: OptStr =None, ufilename: OptStr =None, desc: OptStr =None,) -> None: """Change an item of the EmbeddedFiles array. Notes: Only provided parameters are changed. If all are omitted, the method is a no-op. Args: item: number or name of item. buffer: (binary data) the new file content. filename: (str) the new file name. ufilename: (unicode) the new filen ame. desc: (str) the new description. """ idx = self._embeddedFileIndex(item) xref = self._embfile_upd(idx, buffer=buffer, filename=filename, ufilename=ufilename, desc=desc) date = get_pdf_now() self.xref_set_key(xref, "Params/ModDate", get_pdf_str(date)) return xref def embfile_add(self, name: str, buffer: ByteString, filename: OptStr =None, ufilename: OptStr =None, desc: OptStr =None,) -> None: """Add an item to the EmbeddedFiles array. Args: name: name of the new item, must not already exist. buffer: (binary data) the file content. filename: (str) the file name, default: the name ufilename: (unicode) the file name, default: filename desc: (str) the description. """ filenames = self.embfile_names() msg = "Name '%s' already exists." % str(name) if name in filenames: raise ValueError(msg) if filename is None: filename = name if ufilename is None: ufilename = unicode(filename, "utf8") if str is bytes else filename if desc is None: desc = name xref = self._embfile_add(name, buffer=buffer, filename=filename, ufilename=ufilename, desc=desc) date = get_pdf_now() self.xref_set_key(xref, "Type", "/EmbeddedFile") self.xref_set_key(xref, "Params/CreationDate", get_pdf_str(date)) self.xref_set_key(xref, "Params/ModDate", get_pdf_str(date)) return xref %} FITZEXCEPTION(convert_to_pdf, !result) %pythonprepend convert_to_pdf %{ """Convert document to a PDF, selecting page range and optional rotation. Output bytes object.""" if self.is_closed or self.is_encrypted: raise ValueError("document closed or encrypted") %} PyObject *convert_to_pdf(int from_page=0, int to_page=-1, int rotate=0) { PyObject *doc = NULL; fz_document *fz_doc = (fz_document *) $self; fz_try(gctx) { int fp = from_page, tp = to_page, srcCount = fz_count_pages(gctx, fz_doc); if (fp < 0) fp = 0; if (fp > srcCount - 1) fp = srcCount - 1; if (tp < 0) tp = srcCount - 1; if (tp > srcCount - 1) tp = srcCount - 1; Py_ssize_t len0 = PyList_Size(JM_mupdf_warnings_store); doc = JM_convert_to_pdf(gctx, fz_doc, fp, tp, rotate); Py_ssize_t len1 = PyList_Size(JM_mupdf_warnings_store); Py_ssize_t i = len0; while (i < len1) { PySys_WriteStderr("%s\n", JM_StrAsChar(PyList_GetItem(JM_mupdf_warnings_store, i))); i++; } } fz_catch(gctx) { return NULL; } if (doc) { return doc; } Py_RETURN_NONE; } FITZEXCEPTION(page_count, !result) CLOSECHECK0(page_count, """Number of pages.""") %pythoncode%{@property%} PyObject *page_count() { PyObject *ret; fz_try(gctx) { ret = PyLong_FromLong((long) fz_count_pages(gctx, (fz_document *) $self)); } fz_catch(gctx) { PyErr_Clear(); return NULL; } return ret; } FITZEXCEPTION(chapter_count, !result) CLOSECHECK0(chapter_count, """Number of chapters.""") %pythoncode%{@property%} PyObject *chapter_count() { PyObject *ret; fz_try(gctx) { ret = PyLong_FromLong((long) fz_count_chapters(gctx, (fz_document *) $self)); } fz_catch(gctx) { return NULL; } return ret; } FITZEXCEPTION(last_location, !result) CLOSECHECK0(last_location, """Id (chapter, page) of last page.""") %pythoncode%{@property%} PyObject *last_location() { fz_document *this_doc = (fz_document *) $self; fz_location last_loc; fz_try(gctx) { last_loc = fz_last_page(gctx, this_doc); } fz_catch(gctx) { return NULL; } return Py_BuildValue("ii", last_loc.chapter, last_loc.page); } FITZEXCEPTION(chapter_page_count, !result) CLOSECHECK0(chapter_page_count, """Page count of chapter.""") PyObject *chapter_page_count(int chapter) { long pages = 0; fz_try(gctx) { int chapters = fz_count_chapters(gctx, (fz_document *) $self); if (chapter < 0 || chapter >= chapters) { RAISEPY(gctx, "bad chapter number", PyExc_ValueError); } pages = (long) fz_count_chapter_pages(gctx, (fz_document *) $self, chapter); } fz_catch(gctx) { return NULL; } return PyLong_FromLong(pages); } FITZEXCEPTION(prev_location, !result) %pythonprepend prev_location %{ """Get (chapter, page) of previous page.""" if self.is_closed or self.is_encrypted: raise ValueError("document closed or encrypted") if type(page_id) is int: page_id = (0, page_id) if page_id not in self: raise ValueError("page id not in document") if page_id == (0, 0): return () %} PyObject *prev_location(PyObject *page_id) { fz_document *this_doc = (fz_document *) $self; fz_location prev_loc, loc; PyObject *val; int pno; fz_try(gctx) { val = PySequence_GetItem(page_id, 0); if (!val) { RAISEPY(gctx, MSG_BAD_PAGEID, PyExc_ValueError); } int chapter = (int) PyLong_AsLong(val); Py_DECREF(val); if (PyErr_Occurred()) { RAISEPY(gctx, MSG_BAD_PAGEID, PyExc_ValueError); } val = PySequence_GetItem(page_id, 1); if (!val) { RAISEPY(gctx, MSG_BAD_PAGEID, PyExc_ValueError); } pno = (int) PyLong_AsLong(val); Py_DECREF(val); if (PyErr_Occurred()) { RAISEPY(gctx, MSG_BAD_PAGEID, PyExc_ValueError); } loc = fz_make_location(chapter, pno); prev_loc = fz_previous_page(gctx, this_doc, loc); } fz_catch(gctx) { PyErr_Clear(); return NULL; } return Py_BuildValue("ii", prev_loc.chapter, prev_loc.page); } FITZEXCEPTION(next_location, !result) %pythonprepend next_location %{ """Get (chapter, page) of next page.""" if self.is_closed or self.is_encrypted: raise ValueError("document closed or encrypted") if type(page_id) is int: page_id = (0, page_id) if page_id not in self: raise ValueError("page id not in document") if tuple(page_id) == self.last_location: return () %} PyObject *next_location(PyObject *page_id) { fz_document *this_doc = (fz_document *) $self; fz_location next_loc, loc; PyObject *val; int pno; fz_try(gctx) { val = PySequence_GetItem(page_id, 0); if (!val) { RAISEPY(gctx, MSG_BAD_PAGEID, PyExc_ValueError); } int chapter = (int) PyLong_AsLong(val); Py_DECREF(val); if (PyErr_Occurred()) { RAISEPY(gctx, MSG_BAD_PAGEID, PyExc_ValueError); } val = PySequence_GetItem(page_id, 1); if (!val) { RAISEPY(gctx, MSG_BAD_PAGEID, PyExc_ValueError); } pno = (int) PyLong_AsLong(val); Py_DECREF(val); if (PyErr_Occurred()) { RAISEPY(gctx, MSG_BAD_PAGEID, PyExc_ValueError); } loc = fz_make_location(chapter, pno); next_loc = fz_next_page(gctx, this_doc, loc); } fz_catch(gctx) { PyErr_Clear(); return NULL; } return Py_BuildValue("ii", next_loc.chapter, next_loc.page); } FITZEXCEPTION(location_from_page_number, !result) CLOSECHECK0(location_from_page_number, """Convert pno to (chapter, page).""") PyObject *location_from_page_number(int pno) { fz_document *this_doc = (fz_document *) $self; fz_location loc = fz_make_location(-1, -1); int page_count = fz_count_pages(gctx, this_doc); while (pno < 0) pno += page_count; fz_try(gctx) { if (pno >= page_count) { RAISEPY(gctx, MSG_BAD_PAGENO, PyExc_ValueError); } loc = fz_location_from_page_number(gctx, this_doc, pno); } fz_catch(gctx) { return NULL; } return Py_BuildValue("ii", loc.chapter, loc.page); } FITZEXCEPTION(page_number_from_location, !result) %pythonprepend page_number_from_location%{ """Convert (chapter, pno) to page number.""" if type(page_id) is int: np = self.page_count while page_id < 0: page_id += np page_id = (0, page_id) if page_id not in self: raise ValueError("page id not in document") %} PyObject *page_number_from_location(PyObject *page_id) { fz_document *this_doc = (fz_document *) $self; fz_location loc; long page_n = -1; PyObject *val; int pno; fz_try(gctx) { val = PySequence_GetItem(page_id, 0); if (!val) { RAISEPY(gctx, MSG_BAD_PAGEID, PyExc_ValueError); } int chapter = (int) PyLong_AsLong(val); Py_DECREF(val); if (PyErr_Occurred()) { RAISEPY(gctx, MSG_BAD_PAGEID, PyExc_ValueError); } val = PySequence_GetItem(page_id, 1); if (!val) { RAISEPY(gctx, MSG_BAD_PAGEID, PyExc_ValueError); } pno = (int) PyLong_AsLong(val); Py_DECREF(val); if (PyErr_Occurred()) { RAISEPY(gctx, MSG_BAD_PAGEID, PyExc_ValueError); } loc = fz_make_location(chapter, pno); page_n = (long) fz_page_number_from_location(gctx, this_doc, loc); } fz_catch(gctx) { PyErr_Clear(); return NULL; } return PyLong_FromLong(page_n); } FITZEXCEPTION(_getMetadata, !result) CLOSECHECK0(_getMetadata, """Get metadata.""") PyObject * _getMetadata(const char *key) { PyObject *res = NULL; fz_document *doc = (fz_document *) $self; int vsize; char *value; fz_try(gctx) { vsize = fz_lookup_metadata(gctx, doc, key, NULL, 0)+1; if(vsize > 1) { value = JM_Alloc(char, vsize); fz_lookup_metadata(gctx, doc, key, value, vsize); res = JM_UnicodeFromStr(value); JM_Free(value); } else { res = EMPTY_STRING; } } fz_always(gctx) { PyErr_Clear(); } fz_catch(gctx) { return EMPTY_STRING; } return res; } CLOSECHECK0(needs_pass, """Indicate password required.""") %pythoncode%{@property%} PyObject *needs_pass() { return JM_BOOL(fz_needs_password(gctx, (fz_document *) $self)); } %pythoncode%{@property%} CLOSECHECK0(language, """Document language.""") PyObject *language() { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); if (!pdf) Py_RETURN_NONE; fz_text_language lang = pdf_document_language(gctx, pdf); char buf[8]; if (lang == FZ_LANG_UNSET) Py_RETURN_NONE; return PyUnicode_FromString(fz_string_from_text_language(buf, lang)); } FITZEXCEPTION(set_language, !result) PyObject *set_language(char *language=NULL) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); fz_try(gctx) { ASSERT_PDF(pdf); fz_text_language lang; if (!language) lang = FZ_LANG_UNSET; else lang = fz_text_language_from_string(language); pdf_set_document_language(gctx, pdf, lang); } fz_catch(gctx) { return NULL; } Py_RETURN_TRUE; } %pythonprepend resolve_link %{ """Calculate internal link destination. Args: uri: (str) some Link.uri chapters: (bool) whether to use (chapter, page) format Returns: (page_id, x, y) where x, y are point coordinates on the page. page_id is either page number (if chapters=0), or (chapter, pno). """ %} PyObject *resolve_link(char *uri=NULL, int chapters=0) { if (!uri) { if (chapters) return Py_BuildValue("(ii)ff", -1, -1, 0, 0); return Py_BuildValue("iff", -1, 0, 0); } fz_document *this_doc = (fz_document *) $self; float xp = 0, yp = 0; fz_location loc = {0, 0}; fz_try(gctx) { loc = fz_resolve_link(gctx, (fz_document *) $self, uri, &xp, &yp); } fz_catch(gctx) { if (chapters) return Py_BuildValue("(ii)ff", -1, -1, 0, 0); return Py_BuildValue("iff", -1, 0, 0); } if (chapters) return Py_BuildValue("(ii)ff", loc.chapter, loc.page, xp, yp); int pno = fz_page_number_from_location(gctx, this_doc, loc); return Py_BuildValue("iff", pno, xp, yp); } FITZEXCEPTION(layout, !result) CLOSECHECK(layout, """Re-layout a reflowable document.""") %pythonappend layout %{ self._reset_page_refs() self.init_doc()%} PyObject *layout(PyObject *rect = NULL, float width = 0, float height = 0, float fontsize = 11) { fz_document *doc = (fz_document *) $self; if (!fz_is_document_reflowable(gctx, doc)) Py_RETURN_NONE; fz_try(gctx) { float w = width, h = height; fz_rect r = JM_rect_from_py(rect); if (!fz_is_infinite_rect(r)) { w = r.x1 - r.x0; h = r.y1 - r.y0; } if (w <= 0.0f || h <= 0.0f) { RAISEPY(gctx, "bad page size", PyExc_ValueError); } fz_layout_document(gctx, doc, w, h, fontsize); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } FITZEXCEPTION(make_bookmark, !result) CLOSECHECK(make_bookmark, """Make a page pointer before layouting document.""") PyObject *make_bookmark(PyObject *loc) { fz_document *doc = (fz_document *) $self; fz_location location; fz_bookmark mark; fz_try(gctx) { if (JM_INT_ITEM(loc, 0, &location.chapter) == 1) { RAISEPY(gctx, MSG_BAD_LOCATION, PyExc_ValueError); } if (JM_INT_ITEM(loc, 1, &location.page) == 1) { RAISEPY(gctx, MSG_BAD_LOCATION, PyExc_ValueError); } mark = fz_make_bookmark(gctx, doc, location); if (!mark) { RAISEPY(gctx, MSG_BAD_LOCATION, PyExc_ValueError); } } fz_catch(gctx) { return NULL; } return PyLong_FromVoidPtr((void *) mark); } FITZEXCEPTION(find_bookmark, !result) CLOSECHECK(find_bookmark, """Find new location after layouting a document.""") PyObject *find_bookmark(PyObject *bm) { fz_document *doc = (fz_document *) $self; fz_location location; fz_try(gctx) { intptr_t mark = (intptr_t) PyLong_AsVoidPtr(bm); location = fz_lookup_bookmark(gctx, doc, mark); } fz_catch(gctx) { return NULL; } return Py_BuildValue("ii", location.chapter, location.page); } CLOSECHECK0(is_reflowable, """Check if document is layoutable.""") %pythoncode%{@property%} PyObject *is_reflowable() { return JM_BOOL(fz_is_document_reflowable(gctx, (fz_document *) $self)); } FITZEXCEPTION(_deleteObject, !result) CLOSECHECK0(_deleteObject, """Delete object.""") PyObject *_deleteObject(int xref) { fz_document *doc = (fz_document *) $self; pdf_document *pdf = pdf_specifics(gctx, doc); fz_try(gctx) { ASSERT_PDF(pdf); if (!INRANGE(xref, 1, pdf_xref_len(gctx, pdf)-1)) { RAISEPY(gctx, MSG_BAD_XREF, PyExc_ValueError); } pdf_delete_object(gctx, pdf, xref); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } FITZEXCEPTION(pdf_catalog, !result) CLOSECHECK0(pdf_catalog, """Get xref of PDF catalog.""") PyObject *pdf_catalog() { fz_document *doc = (fz_document *) $self; pdf_document *pdf = pdf_specifics(gctx, doc); int xref = 0; if (!pdf) return Py_BuildValue("i", xref); fz_try(gctx) { pdf_obj *root = pdf_dict_get(gctx, pdf_trailer(gctx, pdf), PDF_NAME(Root)); xref = pdf_to_num(gctx, root); } fz_catch(gctx) { return NULL; } return Py_BuildValue("i", xref); } FITZEXCEPTION(_getPDFfileid, !result) CLOSECHECK0(_getPDFfileid, """Get PDF file id.""") PyObject *_getPDFfileid() { fz_document *doc = (fz_document *) $self; pdf_document *pdf = pdf_specifics(gctx, doc); if (!pdf) Py_RETURN_NONE; PyObject *idlist = PyList_New(0); fz_buffer *buffer = NULL; unsigned char *hex; pdf_obj *o; int n, i, len; PyObject *bytes; fz_try(gctx) { pdf_obj *identity = pdf_dict_get(gctx, pdf_trailer(gctx, pdf), PDF_NAME(ID)); if (identity) { n = pdf_array_len(gctx, identity); for (i = 0; i < n; i++) { o = pdf_array_get(gctx, identity, i); len = (int) pdf_to_str_len(gctx, o); buffer = fz_new_buffer(gctx, 2 * len); fz_buffer_storage(gctx, buffer, &hex); hexlify(len, (unsigned char *) pdf_to_text_string(gctx, o), hex); LIST_APPEND_DROP(idlist, JM_UnicodeFromStr(hex)); Py_CLEAR(bytes); fz_drop_buffer(gctx, buffer); buffer = NULL; } } } fz_catch(gctx) { fz_drop_buffer(gctx, buffer); } return idlist; } CLOSECHECK0(version_count, """Count versions of PDF document.""") %pythoncode%{@property%} PyObject *version_count() { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); if (!pdf) return Py_BuildValue("i", 0); return Py_BuildValue("i", pdf_count_versions(gctx, pdf)); } CLOSECHECK0(is_pdf, """Check for PDF.""") %pythoncode%{@property%} PyObject *is_pdf() { if (pdf_specifics(gctx, (fz_document *) $self)) Py_RETURN_TRUE; else Py_RETURN_FALSE; } #if FZ_VERSION_MAJOR == 1 && FZ_VERSION_MINOR <= 21 /* The underlying struct members that these methods give access to, are not available. */ CLOSECHECK0(has_xref_streams, """Check if xref table is a stream.""") %pythoncode%{@property%} PyObject *has_xref_streams() { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); if (!pdf) Py_RETURN_FALSE; if (pdf->has_xref_streams) Py_RETURN_TRUE; Py_RETURN_FALSE; } CLOSECHECK0(has_old_style_xrefs, """Check if xref table is old style.""") %pythoncode%{@property%} PyObject *has_old_style_xrefs() { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); if (!pdf) Py_RETURN_FALSE; if (pdf->has_old_style_xrefs) Py_RETURN_TRUE; Py_RETURN_FALSE; } #endif CLOSECHECK0(is_dirty, """True if PDF has unsaved changes.""") %pythoncode%{@property%} PyObject *is_dirty() { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); if (!pdf) Py_RETURN_FALSE; return JM_BOOL(pdf_has_unsaved_changes(gctx, pdf)); } CLOSECHECK0(can_save_incrementally, """Check whether incremental saves are possible.""") PyObject *can_save_incrementally() { pdf_document *pdf = pdf_document_from_fz_document(gctx, (fz_document *) $self); if (!pdf) Py_RETURN_FALSE; // gracefully handle non-PDF return JM_BOOL(pdf_can_be_saved_incrementally(gctx, pdf)); } CLOSECHECK0(is_fast_webaccess, """Check whether we have a linearized PDF.""") %pythoncode%{@property%} PyObject *is_fast_webaccess() { pdf_document *pdf = pdf_document_from_fz_document(gctx, (fz_document *) $self); if (!pdf) Py_RETURN_FALSE; // gracefully handle non-PDF return JM_BOOL(pdf_doc_was_linearized(gctx, pdf)); } CLOSECHECK0(is_repaired, """Check whether PDF was repaired.""") %pythoncode%{@property%} PyObject *is_repaired() { pdf_document *pdf = pdf_document_from_fz_document(gctx, (fz_document *) $self); if (!pdf) Py_RETURN_FALSE; // gracefully handle non-PDF return JM_BOOL(pdf_was_repaired(gctx, pdf)); } FITZEXCEPTION(save_snapshot, !result) %pythonprepend save_snapshot %{ """Save a file snapshot suitable for journalling.""" if self.is_closed: raise ValueError("doc is closed") if type(filename) == str: pass elif hasattr(filename, "open"): # assume: pathlib.Path filename = str(filename) elif hasattr(filename, "name"): # assume: file object filename = filename.name else: raise ValueError("filename must be str, Path or file object") if filename == self.name: raise ValueError("cannot snapshot to original") %} PyObject *save_snapshot(const char *filename) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); fz_try(gctx) { ASSERT_PDF(pdf); pdf_save_snapshot(gctx, pdf, filename); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } CLOSECHECK0(authenticate, """Decrypt document.""") %pythonappend authenticate %{ if val: # the doc is decrypted successfully and we init the outline self.is_encrypted = False self.isEncrypted = False self.init_doc() self.thisown = True %} PyObject *authenticate(char *password) { return Py_BuildValue("i", fz_authenticate_password(gctx, (fz_document *) $self, (const char *) password)); } //------------------------------------------------------------------ // save a PDF //------------------------------------------------------------------ FITZEXCEPTION(save, !result) %pythonprepend save %{ """Save PDF to file, pathlib.Path or file pointer.""" if self.is_closed or self.is_encrypted: raise ValueError("document closed or encrypted") if type(filename) == str: pass elif hasattr(filename, "open"): # assume: pathlib.Path filename = str(filename) elif hasattr(filename, "name"): # assume: file object filename = filename.name elif not hasattr(filename, "seek"): # assume file object raise ValueError("filename must be str, Path or file object") if filename == self.name and not incremental: raise ValueError("save to original must be incremental") if self.page_count < 1: raise ValueError("cannot save with zero pages") if incremental: if self.name != filename or self.stream: raise ValueError("incremental needs original file") if user_pw and len(user_pw) > 40 or owner_pw and len(owner_pw) > 40: raise ValueError("password length must not exceed 40") %} PyObject * save(PyObject *filename, int garbage=0, int clean=0, int deflate=0, int deflate_images=0, int deflate_fonts=0, int incremental=0, int ascii=0, int expand=0, int linear=0, int no_new_id=0, int appearance=0, int pretty=0, int encryption=1, int permissions=4095, char *owner_pw=NULL, char *user_pw=NULL) { pdf_write_options opts = pdf_default_write_options; opts.do_incremental = incremental; opts.do_ascii = ascii; opts.do_compress = deflate; opts.do_compress_images = deflate_images; opts.do_compress_fonts = deflate_fonts; opts.do_decompress = expand; opts.do_garbage = garbage; opts.do_pretty = pretty; opts.do_linear = linear; opts.do_clean = clean; opts.do_sanitize = clean; opts.dont_regenerate_id = no_new_id; opts.do_appearance = appearance; opts.do_encrypt = encryption; opts.permissions = permissions; if (owner_pw) { memcpy(&opts.opwd_utf8, owner_pw, strlen(owner_pw)+1); } else if (user_pw) { memcpy(&opts.opwd_utf8, user_pw, strlen(user_pw)+1); } if (user_pw) { memcpy(&opts.upwd_utf8, user_pw, strlen(user_pw)+1); } fz_document *doc = (fz_document *) $self; pdf_document *pdf = pdf_specifics(gctx, doc); fz_output *out = NULL; fz_try(gctx) { ASSERT_PDF(pdf); pdf->resynth_required = 0; JM_embedded_clean(gctx, pdf); if (no_new_id == 0) { JM_ensure_identity(gctx, pdf); } if (PyUnicode_Check(filename)) { pdf_save_document(gctx, pdf, JM_StrAsChar(filename), &opts); } else { out = JM_new_output_fileptr(gctx, filename); pdf_write_document(gctx, pdf, out, &opts); } } fz_always(gctx) { fz_drop_output(gctx, out); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } %pythoncode %{ def write(self, garbage=False, clean=False, deflate=False, deflate_images=False, deflate_fonts=False, incremental=False, ascii=False, expand=False, linear=False, no_new_id=False, appearance=False, pretty=False, encryption=1, permissions=4095, owner_pw=None, user_pw=None): from io import BytesIO bio = BytesIO() self.save(bio, garbage=garbage, clean=clean, no_new_id=no_new_id, appearance=appearance, deflate=deflate, deflate_images=deflate_images, deflate_fonts=deflate_fonts, incremental=incremental, ascii=ascii, expand=expand, linear=linear, pretty=pretty, encryption=encryption, permissions=permissions, owner_pw=owner_pw, user_pw=user_pw) return bio.getvalue() %} //---------------------------------------------------------------- // Insert pages from a source PDF into this PDF. // For reconstructing the links (_do_links method), we must save the // insertion point (start_at) if it was specified as -1. //---------------------------------------------------------------- FITZEXCEPTION(insert_pdf, !result) %pythonprepend insert_pdf %{ """Insert a page range from another PDF. Args: docsrc: PDF to copy from. Must be different object, but may be same file. from_page: (int) first source page to copy, 0-based, default 0. to_page: (int) last source page to copy, 0-based, default last page. start_at: (int) from_page will become this page number in target. rotate: (int) rotate copied pages, default -1 is no change. links: (int/bool) whether to also copy links. annots: (int/bool) whether to also copy annotations. show_progress: (int) progress message interval, 0 is no messages. final: (bool) indicates last insertion from this source PDF. _gmap: internal use only Copy sequence reversed if from_page > to_page.""" if self.is_closed or self.is_encrypted: raise ValueError("document closed or encrypted") if self._graft_id == docsrc._graft_id: raise ValueError("source and target cannot be same object") sa = start_at if sa < 0: sa = self.page_count if len(docsrc) > show_progress > 0: inname = os.path.basename(docsrc.name) if not inname: inname = "memory PDF" outname = os.path.basename(self.name) if not outname: outname = "memory PDF" print("Inserting '%s' at '%s'" % (inname, outname)) # retrieve / make a Graftmap to avoid duplicate objects isrt = docsrc._graft_id _gmap = self.Graftmaps.get(isrt, None) if _gmap is None: _gmap = Graftmap(self) self.Graftmaps[isrt] = _gmap %} %pythonappend insert_pdf %{ self._reset_page_refs() if links: self._do_links(docsrc, from_page = from_page, to_page = to_page, start_at = sa) if final == 1: self.Graftmaps[isrt] = None%} PyObject * insert_pdf(struct Document *docsrc, int from_page=-1, int to_page=-1, int start_at=-1, int rotate=-1, int links=1, int annots=1, int show_progress=0, int final = 1, struct Graftmap *_gmap=NULL) { fz_document *doc = (fz_document *) $self; fz_document *src = (fz_document *) docsrc; pdf_document *pdfout = pdf_specifics(gctx, doc); pdf_document *pdfsrc = pdf_specifics(gctx, src); int outCount = fz_count_pages(gctx, doc); int srcCount = fz_count_pages(gctx, src); // local copies of page numbers int fp = from_page, tp = to_page, sa = start_at; // normalize page numbers fp = Py_MAX(fp, 0); // -1 = first page fp = Py_MIN(fp, srcCount - 1); // but do not exceed last page if (tp < 0) tp = srcCount - 1; // -1 = last page tp = Py_MIN(tp, srcCount - 1); // but do not exceed last page if (sa < 0) sa = outCount; // -1 = behind last page sa = Py_MIN(sa, outCount); // but that is also the limit fz_try(gctx) { if (!pdfout || !pdfsrc) { RAISEPY(gctx, "source or target not a PDF", PyExc_TypeError); } ENSURE_OPERATION(gctx, pdfout); JM_merge_range(gctx, pdfout, pdfsrc, fp, tp, sa, rotate, links, annots, show_progress, (pdf_graft_map *) _gmap); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } %pythoncode %{ def insert_file(self, infile, from_page=-1, to_page=-1, start_at=-1, rotate=-1, links=True, annots=True,show_progress=0, final=1): """Insert an arbitrary supported document to an existing PDF. The infile may be given as a filename, a Document or a Pixmap. Other paramters - where applicable - equal those of insert_pdf(). """ src = None if isinstance(infile, Pixmap): if infile.colorspace.n > 3: infile = Pixmap(csRGB, infile) src = Document("png", infile.tobytes()) elif isinstance(infile, Document): src = infile else: src = Document(infile) if not src: raise ValueError("bad infile parameter") if not src.is_pdf: pdfbytes = src.convert_to_pdf() src = Document("pdf", pdfbytes) return self.insert_pdf(src, from_page=from_page, to_page=to_page, start_at=start_at, rotate=rotate,links=links, annots=annots, show_progress=show_progress, final=final) %} //------------------------------------------------------------------ // Create and insert a new page (PDF) //------------------------------------------------------------------ FITZEXCEPTION(_newPage, !result) CLOSECHECK(_newPage, """Make a new PDF page.""") %pythonappend _newPage %{self._reset_page_refs()%} PyObject *_newPage(int pno=-1, float width=595, float height=842) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); fz_rect mediabox = fz_unit_rect; mediabox.x1 = width; mediabox.y1 = height; pdf_obj *resources = NULL, *page_obj = NULL; fz_buffer *contents = NULL; fz_var(contents); fz_var(page_obj); fz_var(resources); fz_try(gctx) { ASSERT_PDF(pdf); if (pno < -1) { RAISEPY(gctx, MSG_BAD_PAGENO, PyExc_ValueError); } ENSURE_OPERATION(gctx, pdf); // create /Resources and /Contents objects resources = pdf_add_new_dict(gctx, pdf, 1); page_obj = pdf_add_page(gctx, pdf, mediabox, 0, resources, contents); pdf_insert_page(gctx, pdf, pno, page_obj); } fz_always(gctx) { fz_drop_buffer(gctx, contents); pdf_drop_obj(gctx, page_obj); pdf_drop_obj(gctx, resources); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //------------------------------------------------------------------ // Create sub-document to keep only selected pages. // Parameter is a Python sequence of the wanted page numbers. //------------------------------------------------------------------ FITZEXCEPTION(select, !result) %pythonprepend select %{"""Build sub-pdf with page numbers in the list.""" if self.is_closed or self.is_encrypted: raise ValueError("document closed or encrypted") if not self.is_pdf: raise ValueError("is no PDF") if not hasattr(pyliste, "__getitem__"): raise ValueError("sequence required") if len(pyliste) == 0 or min(pyliste) not in range(len(self)) or max(pyliste) not in range(len(self)): raise ValueError("bad page number(s)") pyliste = tuple(pyliste)%} %pythonappend select %{self._reset_page_refs()%} PyObject *select(PyObject *pyliste) { // preparatory stuff: // (1) get underlying pdf document, // (2) transform Python list into integer array pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); int *pages = NULL; fz_try(gctx) { // call retainpages (code copy of fz_clean_file.c) int i, len = (int) PyTuple_Size(pyliste); pages = fz_realloc_array(gctx, pages, len, int); for (i = 0; i < len; i++) { pages[i] = (int) PyLong_AsLong(PyTuple_GET_ITEM(pyliste, (Py_ssize_t) i)); } pdf_rearrange_pages(gctx, pdf, len, pages); if (pdf->rev_page_map) { pdf_drop_page_tree(gctx, pdf); } } fz_always(gctx) { fz_free(gctx, pages); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //------------------------------------------------------------------ // remove one page //------------------------------------------------------------------ FITZEXCEPTION(_delete_page, !result) PyObject *_delete_page(int pno) { fz_try(gctx) { fz_document *doc = (fz_document *) $self; pdf_document *pdf = pdf_specifics(gctx, doc); pdf_delete_page(gctx, pdf, pno); if (pdf->rev_page_map) { pdf_drop_page_tree(gctx, pdf); } } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //------------------------------------------------------------------ // get document permissions //------------------------------------------------------------------ %pythoncode%{@property%} %pythonprepend permissions %{ """Document permissions.""" if self.is_encrypted: return 0 %} PyObject *permissions() { fz_document *doc = (fz_document *) $self; pdf_document *pdf = pdf_document_from_fz_document(gctx, doc); // for PDF return result of standard function if (pdf) return Py_BuildValue("i", pdf_document_permissions(gctx, pdf)); // otherwise simulate the PDF return value int perm = (int) 0xFFFFFFFC; // all permissions granted // now switch off where needed if (!fz_has_permission(gctx, doc, FZ_PERMISSION_PRINT)) perm = perm ^ PDF_PERM_PRINT; if (!fz_has_permission(gctx, doc, FZ_PERMISSION_EDIT)) perm = perm ^ PDF_PERM_MODIFY; if (!fz_has_permission(gctx, doc, FZ_PERMISSION_COPY)) perm = perm ^ PDF_PERM_COPY; if (!fz_has_permission(gctx, doc, FZ_PERMISSION_ANNOTATE)) perm = perm ^ PDF_PERM_ANNOTATE; return Py_BuildValue("i", perm); } FITZEXCEPTION(journal_enable, !result) CLOSECHECK(journal_enable, """Activate document journalling.""") PyObject *journal_enable() { fz_try(gctx) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); ASSERT_PDF(pdf); pdf_enable_journal(gctx, pdf); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } FITZEXCEPTION(journal_start_op, !result) CLOSECHECK(journal_start_op, """Begin a journalling operation.""") PyObject *journal_start_op(const char *name=NULL) { fz_try(gctx) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); ASSERT_PDF(pdf); if (!pdf->journal) { RAISEPY(gctx, "Journalling not enabled", PyExc_RuntimeError); } if (name) { pdf_begin_operation(gctx, pdf, name); } else { pdf_begin_implicit_operation(gctx, pdf); } } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } FITZEXCEPTION(journal_stop_op, !result) CLOSECHECK(journal_stop_op, """End a journalling operation.""") PyObject *journal_stop_op() { fz_try(gctx) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); ASSERT_PDF(pdf); pdf_end_operation(gctx, pdf); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } FITZEXCEPTION(journal_position, !result) CLOSECHECK(journal_position, """Show journalling state.""") PyObject *journal_position() { int rc, steps=0; fz_try(gctx) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); ASSERT_PDF(pdf); rc = pdf_undoredo_state(gctx, pdf, &steps); } fz_catch(gctx) { return NULL; } return Py_BuildValue("ii", rc, steps); } FITZEXCEPTION(journal_op_name, !result) CLOSECHECK(journal_op_name, """Show operation name for given step.""") PyObject *journal_op_name(int step) { const char *name=NULL; fz_try(gctx) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); ASSERT_PDF(pdf); name = pdf_undoredo_step(gctx, pdf, step); } fz_catch(gctx) { return NULL; } if (name) { return PyUnicode_FromString(name); } else { Py_RETURN_NONE; } } FITZEXCEPTION(journal_can_do, !result) CLOSECHECK(journal_can_do, """Show if undo and / or redo are possible.""") PyObject *journal_can_do() { int undo=0, redo=0; fz_try(gctx) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); ASSERT_PDF(pdf); undo = pdf_can_undo(gctx, pdf); redo = pdf_can_redo(gctx, pdf); } fz_catch(gctx) { return NULL; } return Py_BuildValue("{s:N,s:N}", "undo", JM_BOOL(undo), "redo", JM_BOOL(redo)); } FITZEXCEPTION(journal_undo, !result) CLOSECHECK(journal_undo, """Move backwards in the journal.""") PyObject *journal_undo() { fz_try(gctx) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); ASSERT_PDF(pdf); pdf_undo(gctx, pdf); } fz_catch(gctx) { return NULL; } Py_RETURN_TRUE; } FITZEXCEPTION(journal_redo, !result) CLOSECHECK(journal_redo, """Move forward in the journal.""") PyObject *journal_redo() { fz_try(gctx) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); ASSERT_PDF(pdf); pdf_redo(gctx, pdf); } fz_catch(gctx) { return NULL; } Py_RETURN_TRUE; } FITZEXCEPTION(journal_save, !result) CLOSECHECK(journal_save, """Save journal to a file.""") PyObject *journal_save(PyObject *filename) { fz_output *out = NULL; fz_try(gctx) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); ASSERT_PDF(pdf); if (PyUnicode_Check(filename)) { pdf_save_journal(gctx, pdf, (const char *) PyUnicode_AsUTF8(filename)); } else { out = JM_new_output_fileptr(gctx, filename); pdf_write_journal(gctx, pdf, out); } } fz_always(gctx) { fz_drop_output(gctx, out); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } FITZEXCEPTION(journal_load, !result) CLOSECHECK(journal_load, """Load a journal from a file.""") PyObject *journal_load(PyObject *filename) { fz_buffer *res = NULL; fz_stream *stm = NULL; fz_try(gctx) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); ASSERT_PDF(pdf); if (PyUnicode_Check(filename)) { pdf_load_journal(gctx, pdf, PyUnicode_AsUTF8(filename)); } else { res = JM_BufferFromBytes(gctx, filename); stm = fz_open_buffer(gctx, res); pdf_deserialise_journal(gctx, pdf, stm); } if (!pdf->journal) { RAISEPY(gctx, "Journal and document do not match", JM_Exc_FileDataError); } } fz_always(gctx) { fz_drop_stream(gctx, stm); fz_drop_buffer(gctx, res); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } FITZEXCEPTION(journal_is_enabled, !result) CLOSECHECK(journal_is_enabled, """Check if journalling is enabled.""") PyObject *journal_is_enabled() { int enabled = 0; fz_try(gctx) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); enabled = pdf && pdf->journal; } fz_catch(gctx) { return NULL; } return JM_BOOL(enabled); } FITZEXCEPTION(_get_char_widths, !result) CLOSECHECK(_get_char_widths, """Return list of glyphs and glyph widths of a font.""") PyObject *_get_char_widths(int xref, char *bfname, char *ext, int ordering, int limit, int idx = 0) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); PyObject *wlist = NULL; int i, glyph, mylimit; mylimit = limit; if (mylimit < 256) mylimit = 256; const unsigned char *data; int size, index; fz_font *font = NULL; fz_buffer *buf = NULL; fz_try(gctx) { ASSERT_PDF(pdf); if (ordering >= 0) { data = fz_lookup_cjk_font(gctx, ordering, &size, &index); font = fz_new_font_from_memory(gctx, NULL, data, size, index, 0); goto weiter; } data = fz_lookup_base14_font(gctx, bfname, &size); if (data) { font = fz_new_font_from_memory(gctx, bfname, data, size, 0, 0); goto weiter; } buf = JM_get_fontbuffer(gctx, pdf, xref); if (!buf) { fz_throw(gctx, FZ_ERROR_GENERIC, "font at xref %d is not supported", xref); } font = fz_new_font_from_buffer(gctx, NULL, buf, idx, 0); weiter:; wlist = PyList_New(0); float adv; for (i = 0; i < mylimit; i++) { glyph = fz_encode_character(gctx, font, i); adv = fz_advance_glyph(gctx, font, glyph, 0); if (ordering >= 0) { glyph = i; } if (glyph > 0) { LIST_APPEND_DROP(wlist, Py_BuildValue("if", glyph, adv)); } else { LIST_APPEND_DROP(wlist, Py_BuildValue("if", glyph, 0.0)); } } } fz_always(gctx) { fz_drop_buffer(gctx, buf); fz_drop_font(gctx, font); } fz_catch(gctx) { return NULL; } return wlist; } FITZEXCEPTION(page_xref, !result) CLOSECHECK0(page_xref, """Get xref of page number.""") PyObject *page_xref(int pno) { fz_document *this_doc = (fz_document *) $self; int page_count = fz_count_pages(gctx, this_doc); int n = pno; while (n < 0) n += page_count; pdf_document *pdf = pdf_specifics(gctx, this_doc); int xref = 0; fz_try(gctx) { if (n >= page_count) { RAISEPY(gctx, MSG_BAD_PAGENO, PyExc_ValueError); } ASSERT_PDF(pdf); xref = pdf_to_num(gctx, pdf_lookup_page_obj(gctx, pdf, n)); } fz_catch(gctx) { return NULL; } return Py_BuildValue("i", xref); } FITZEXCEPTION(page_annot_xrefs, !result) CLOSECHECK0(page_annot_xrefs, """Get list annotations of page number.""") PyObject *page_annot_xrefs(int pno) { fz_document *this_doc = (fz_document *) $self; int page_count = fz_count_pages(gctx, this_doc); int n = pno; while (n < 0) n += page_count; pdf_document *pdf = pdf_specifics(gctx, this_doc); PyObject *annots = NULL; fz_try(gctx) { if (n >= page_count) { RAISEPY(gctx, MSG_BAD_PAGENO, PyExc_ValueError); } ASSERT_PDF(pdf); annots = JM_get_annot_xref_list(gctx, pdf_lookup_page_obj(gctx, pdf, n)); } fz_catch(gctx) { return NULL; } return annots; } FITZEXCEPTION(page_cropbox, !result) CLOSECHECK0(page_cropbox, """Get CropBox of page number (without loading page).""") %pythonappend page_cropbox %{val = Rect(JM_TUPLE3(val))%} PyObject *page_cropbox(int pno) { fz_document *this_doc = (fz_document *) $self; int page_count = fz_count_pages(gctx, this_doc); int n = pno; while (n < 0) n += page_count; pdf_obj *pageref = NULL; fz_var(pageref); pdf_document *pdf = pdf_specifics(gctx, this_doc); fz_try(gctx) { if (n >= page_count) { RAISEPY(gctx, MSG_BAD_PAGENO, PyExc_ValueError); } ASSERT_PDF(pdf); pageref = pdf_lookup_page_obj(gctx, pdf, n); } fz_catch(gctx) { return NULL; } return JM_py_from_rect(JM_cropbox(gctx, pageref)); } FITZEXCEPTION(_getPageInfo, !result) CLOSECHECK(_getPageInfo, """List fonts, images, XObjects used on a page.""") PyObject *_getPageInfo(int pno, int what) { fz_document *doc = (fz_document *) $self; pdf_document *pdf = pdf_specifics(gctx, doc); pdf_obj *pageref, *rsrc; PyObject *liste = NULL, *tracer = NULL; fz_var(liste); fz_var(tracer); fz_try(gctx) { int page_count = fz_count_pages(gctx, doc); int n = pno; // pno < 0 is allowed while (n < 0) n += page_count; // make it non-negative if (n >= page_count) { RAISEPY(gctx, MSG_BAD_PAGENO, PyExc_ValueError); } ASSERT_PDF(pdf); pageref = pdf_lookup_page_obj(gctx, pdf, n); rsrc = pdf_dict_get_inheritable(gctx, pageref, PDF_NAME(Resources)); liste = PyList_New(0); tracer = PyList_New(0); if (rsrc) { JM_scan_resources(gctx, pdf, rsrc, liste, what, 0, tracer); } } fz_always(gctx) { Py_CLEAR(tracer); } fz_catch(gctx) { Py_CLEAR(liste); return NULL; } return liste; } FITZEXCEPTION(extract_font, !result) CLOSECHECK(extract_font, """Get a font by xref. Returns a tuple or dictionary.""") PyObject *extract_font(int xref=0, int info_only=0, PyObject *named=NULL) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); fz_try(gctx) { ASSERT_PDF(pdf); } fz_catch(gctx) { return NULL; } fz_buffer *buffer = NULL; pdf_obj *obj, *basefont, *bname; PyObject *bytes = NULL; char *ext = NULL; PyObject *rc; fz_try(gctx) { obj = pdf_load_object(gctx, pdf, xref); pdf_obj *type = pdf_dict_get(gctx, obj, PDF_NAME(Type)); pdf_obj *subtype = pdf_dict_get(gctx, obj, PDF_NAME(Subtype)); if(pdf_name_eq(gctx, type, PDF_NAME(Font)) && strncmp(pdf_to_name(gctx, subtype), "CIDFontType", 11) != 0) { basefont = pdf_dict_get(gctx, obj, PDF_NAME(BaseFont)); if (!basefont || pdf_is_null(gctx, basefont)) { bname = pdf_dict_get(gctx, obj, PDF_NAME(Name)); } else { bname = basefont; } ext = JM_get_fontextension(gctx, pdf, xref); if (strcmp(ext, "n/a") != 0 && !info_only) { buffer = JM_get_fontbuffer(gctx, pdf, xref); bytes = JM_BinFromBuffer(gctx, buffer); fz_drop_buffer(gctx, buffer); } else { bytes = Py_BuildValue("y", ""); } if (PyObject_Not(named)) { rc = PyTuple_New(4); PyTuple_SET_ITEM(rc, 0, JM_EscapeStrFromStr(pdf_to_name(gctx, bname))); PyTuple_SET_ITEM(rc, 1, JM_UnicodeFromStr(ext)); PyTuple_SET_ITEM(rc, 2, JM_UnicodeFromStr(pdf_to_name(gctx, subtype))); PyTuple_SET_ITEM(rc, 3, bytes); } else { rc = PyDict_New(); DICT_SETITEM_DROP(rc, dictkey_name, JM_EscapeStrFromStr(pdf_to_name(gctx, bname))); DICT_SETITEM_DROP(rc, dictkey_ext, JM_UnicodeFromStr(ext)); DICT_SETITEM_DROP(rc, dictkey_type, JM_UnicodeFromStr(pdf_to_name(gctx, subtype))); DICT_SETITEM_DROP(rc, dictkey_content, bytes); } } else { if (PyObject_Not(named)) { rc = Py_BuildValue("sssy", "", "", "", ""); } else { rc = PyDict_New(); DICT_SETITEM_DROP(rc, dictkey_name, Py_BuildValue("s", "")); DICT_SETITEM_DROP(rc, dictkey_ext, Py_BuildValue("s", "")); DICT_SETITEM_DROP(rc, dictkey_type, Py_BuildValue("s", "")); DICT_SETITEM_DROP(rc, dictkey_content, Py_BuildValue("y", "")); } } } fz_always(gctx) { pdf_drop_obj(gctx, obj); JM_PyErr_Clear; } fz_catch(gctx) { if (PyObject_Not(named)) { rc = Py_BuildValue("sssy", "invalid-name", "", "", ""); } else { rc = PyDict_New(); DICT_SETITEM_DROP(rc, dictkey_name, Py_BuildValue("s", "invalid-name")); DICT_SETITEM_DROP(rc, dictkey_ext, Py_BuildValue("s", "")); DICT_SETITEM_DROP(rc, dictkey_type, Py_BuildValue("s", "")); DICT_SETITEM_DROP(rc, dictkey_content, Py_BuildValue("y", "")); } } return rc; } FITZEXCEPTION(extract_image, !result) CLOSECHECK(extract_image, """Get image by xref. Returns a dictionary.""") PyObject *extract_image(int xref) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); pdf_obj *obj = NULL; fz_buffer *res = NULL; fz_image *img = NULL; PyObject *rc = NULL; const char *ext = NULL; const char *cs_name = NULL; int img_type = 0, xres, yres, colorspace; int smask = 0, width, height, bpc; fz_compressed_buffer *cbuf = NULL; fz_var(img); fz_var(res); fz_var(obj); fz_try(gctx) { ASSERT_PDF(pdf); if (!INRANGE(xref, 1, pdf_xref_len(gctx, pdf)-1)) { RAISEPY(gctx, MSG_BAD_XREF, PyExc_ValueError); } obj = pdf_new_indirect(gctx, pdf, xref, 0); pdf_obj *subtype = pdf_dict_get(gctx, obj, PDF_NAME(Subtype)); if (!pdf_name_eq(gctx, subtype, PDF_NAME(Image))) { RAISEPY(gctx, "not an image", PyExc_ValueError); } pdf_obj *o = pdf_dict_geta(gctx, obj, PDF_NAME(SMask), PDF_NAME(Mask)); if (o) smask = pdf_to_num(gctx, o); if (pdf_is_jpx_image(gctx, obj)) { img_type = FZ_IMAGE_JPX; res = pdf_load_stream(gctx, obj); ext = "jpx"; } if (JM_is_jbig2_image(gctx, obj)) { img_type = FZ_IMAGE_JBIG2; res = pdf_load_stream(gctx, obj); ext = "jb2"; } if (img_type == FZ_IMAGE_UNKNOWN) { res = pdf_load_raw_stream(gctx, obj); unsigned char *c = NULL; fz_buffer_storage(gctx, res, &c); img_type = fz_recognize_image_format(gctx, c); ext = JM_image_extension(img_type); } if (img_type == FZ_IMAGE_UNKNOWN) { fz_drop_buffer(gctx, res); res = NULL; img = pdf_load_image(gctx, pdf, obj); cbuf = fz_compressed_image_buffer(gctx, img); if (cbuf && cbuf->params.type != FZ_IMAGE_RAW && cbuf->params.type != FZ_IMAGE_FAX && cbuf->params.type != FZ_IMAGE_FLATE && cbuf->params.type != FZ_IMAGE_LZW && cbuf->params.type != FZ_IMAGE_RLD) { img_type = cbuf->params.type; ext = JM_image_extension(img_type); res = cbuf->buffer; } else { res = fz_new_buffer_from_image_as_png(gctx, img, fz_default_color_params); ext = "png"; } } else { img = fz_new_image_from_buffer(gctx, res); } fz_image_resolution(img, &xres, &yres); width = img->w; height = img->h; colorspace = img->n; bpc = img->bpc; cs_name = fz_colorspace_name(gctx, img->colorspace); rc = PyDict_New(); DICT_SETITEM_DROP(rc, dictkey_ext, JM_UnicodeFromStr(ext)); DICT_SETITEM_DROP(rc, dictkey_smask, Py_BuildValue("i", smask)); DICT_SETITEM_DROP(rc, dictkey_width, Py_BuildValue("i", width)); DICT_SETITEM_DROP(rc, dictkey_height, Py_BuildValue("i", height)); DICT_SETITEM_DROP(rc, dictkey_colorspace, Py_BuildValue("i", colorspace)); DICT_SETITEM_DROP(rc, dictkey_bpc, Py_BuildValue("i", bpc)); DICT_SETITEM_DROP(rc, dictkey_xres, Py_BuildValue("i", xres)); DICT_SETITEM_DROP(rc, dictkey_yres, Py_BuildValue("i", yres)); DICT_SETITEM_DROP(rc, dictkey_cs_name, JM_UnicodeFromStr(cs_name)); DICT_SETITEM_DROP(rc, dictkey_image, JM_BinFromBuffer(gctx, res)); } fz_always(gctx) { fz_drop_image(gctx, img); if (!cbuf) fz_drop_buffer(gctx, res); pdf_drop_obj(gctx, obj); } fz_catch(gctx) { Py_CLEAR(rc); fz_warn(gctx, "%s", fz_caught_message(gctx)); Py_RETURN_FALSE; } if (!rc) Py_RETURN_NONE; return rc; } //------------------------------------------------------------------ // Delete all bookmarks (table of contents) // returns list of deleted (now available) xref numbers //------------------------------------------------------------------ CLOSECHECK(_delToC, """Delete the TOC.""") %pythonappend _delToC %{self.init_doc()%} PyObject *_delToC() { PyObject *xrefs = PyList_New(0); // create Python list pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); if (!pdf) return xrefs; // not a pdf pdf_obj *root, *olroot, *first; int xref_count, olroot_xref, i, xref; // get the main root root = pdf_dict_get(gctx, pdf_trailer(gctx, pdf), PDF_NAME(Root)); // get the outline root olroot = pdf_dict_get(gctx, root, PDF_NAME(Outlines)); if (!olroot) return xrefs; // no outlines or some problem first = pdf_dict_get(gctx, olroot, PDF_NAME(First)); // first outline xrefs = JM_outline_xrefs(gctx, first, xrefs); xref_count = (int) PyList_Size(xrefs); olroot_xref = pdf_to_num(gctx, olroot); // delete OL root pdf_delete_object(gctx, pdf, olroot_xref); // delete OL root pdf_dict_del(gctx, root, PDF_NAME(Outlines)); // delete OL root for (i = 0; i < xref_count; i++) { JM_INT_ITEM(xrefs, i, &xref); pdf_delete_object(gctx, pdf, xref); // delete outline item } LIST_APPEND_DROP(xrefs, Py_BuildValue("i", olroot_xref)); return xrefs; } //------------------------------------------------------------------ // Check: is xref a stream object? //------------------------------------------------------------------ CLOSECHECK0(xref_is_stream, """Check if xref is a stream object.""") PyObject *xref_is_stream(int xref=0) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); if (!pdf) Py_RETURN_FALSE; // not a PDF return JM_BOOL(pdf_obj_num_is_stream(gctx, pdf, xref)); } //------------------------------------------------------------------ // Return or set NeedAppearances //------------------------------------------------------------------ %pythonprepend need_appearances %{"""Get/set the NeedAppearances value.""" if self.is_closed: raise ValueError("document closed") if not self.is_form_pdf: return None %} PyObject *need_appearances(PyObject *value=NULL) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); int oldval = -1; pdf_obj *app = NULL; char appkey[] = "NeedAppearances"; fz_try(gctx) { pdf_obj *form = pdf_dict_getp(gctx, pdf_trailer(gctx, pdf), "Root/AcroForm"); app = pdf_dict_gets(gctx, form, appkey); if (pdf_is_bool(gctx, app)) { oldval = pdf_to_bool(gctx, app); } if (EXISTS(value)) { pdf_dict_puts_drop(gctx, form, appkey, PDF_TRUE); } else if (value == Py_False) { pdf_dict_puts_drop(gctx, form, appkey, PDF_FALSE); } } fz_catch(gctx) { Py_RETURN_NONE; } if (value != Py_None) { return value; } if (oldval >= 0) { return JM_BOOL(oldval); } Py_RETURN_NONE; } //------------------------------------------------------------------ // Return the /SigFlags value //------------------------------------------------------------------ CLOSECHECK0(get_sigflags, """Get the /SigFlags value.""") int get_sigflags() { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); if (!pdf) return -1; // not a PDF int sigflag = -1; fz_try(gctx) { pdf_obj *sigflags = pdf_dict_getl(gctx, pdf_trailer(gctx, pdf), PDF_NAME(Root), PDF_NAME(AcroForm), PDF_NAME(SigFlags), NULL); if (sigflags) { sigflag = (int) pdf_to_int(gctx, sigflags); } } fz_catch(gctx) { return -1; // any problem } return sigflag; } //------------------------------------------------------------------ // Check: is this an AcroForm with at least one field? //------------------------------------------------------------------ CLOSECHECK0(is_form_pdf, """Either False or PDF field count.""") %pythoncode%{@property%} PyObject *is_form_pdf() { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); if (!pdf) Py_RETURN_FALSE; // not a PDF int count = -1; // init count fz_try(gctx) { pdf_obj *fields = pdf_dict_getl(gctx, pdf_trailer(gctx, pdf), PDF_NAME(Root), PDF_NAME(AcroForm), PDF_NAME(Fields), NULL); if (pdf_is_array(gctx, fields)) { count = pdf_array_len(gctx, fields); } } fz_catch(gctx) { Py_RETURN_FALSE; } if (count >= 0) { return Py_BuildValue("i", count); } else { Py_RETURN_FALSE; } } //------------------------------------------------------------------ // Return the list of field font resource names //------------------------------------------------------------------ CLOSECHECK0(FormFonts, """Get list of field font resource names.""") %pythoncode%{@property%} PyObject *FormFonts() { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); if (!pdf) Py_RETURN_NONE; // not a PDF pdf_obj *fonts = NULL; PyObject *liste = PyList_New(0); fz_var(liste); fz_try(gctx) { fonts = pdf_dict_getl(gctx, pdf_trailer(gctx, pdf), PDF_NAME(Root), PDF_NAME(AcroForm), PDF_NAME(DR), PDF_NAME(Font), NULL); if (fonts && pdf_is_dict(gctx, fonts)) // fonts exist { int i, n = pdf_dict_len(gctx, fonts); for (i = 0; i < n; i++) { pdf_obj *f = pdf_dict_get_key(gctx, fonts, i); LIST_APPEND_DROP(liste, JM_UnicodeFromStr(pdf_to_name(gctx, f))); } } } fz_catch(gctx) { Py_DECREF(liste); Py_RETURN_NONE; // any problem yields None } return liste; } //------------------------------------------------------------------ // Add a field font //------------------------------------------------------------------ FITZEXCEPTION(_addFormFont, !result) CLOSECHECK(_addFormFont, """Add new form font.""") PyObject *_addFormFont(char *name, char *font) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); if (!pdf) Py_RETURN_NONE; // not a PDF pdf_obj *fonts = NULL; fz_try(gctx) { fonts = pdf_dict_getl(gctx, pdf_trailer(gctx, pdf), PDF_NAME(Root), PDF_NAME(AcroForm), PDF_NAME(DR), PDF_NAME(Font), NULL); if (!fonts || !pdf_is_dict(gctx, fonts)) { RAISEPY(gctx, "PDF has no form fonts yet", PyExc_RuntimeError); } pdf_obj *k = pdf_new_name(gctx, (const char *) name); pdf_obj *v = JM_pdf_obj_from_str(gctx, pdf, font); pdf_dict_put(gctx, fonts, k, v); } fz_catch(gctx) NULL; Py_RETURN_NONE; } //------------------------------------------------------------------ // Get Xref Number of Outline Root, create it if missing //------------------------------------------------------------------ FITZEXCEPTION(_getOLRootNumber, !result) CLOSECHECK(_getOLRootNumber, """Get xref of Outline Root, create it if missing.""") PyObject *_getOLRootNumber() { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); pdf_obj *ind_obj = NULL; pdf_obj *olroot2 = NULL; int ret; fz_var(ind_obj); fz_var(olroot2); fz_try(gctx) { ASSERT_PDF(pdf); // get main root pdf_obj *root = pdf_dict_get(gctx, pdf_trailer(gctx, pdf), PDF_NAME(Root)); // get outline root pdf_obj *olroot = pdf_dict_get(gctx, root, PDF_NAME(Outlines)); if (!olroot) { olroot2 = pdf_new_dict(gctx, pdf, 4); pdf_dict_put(gctx, olroot2, PDF_NAME(Type), PDF_NAME(Outlines)); ind_obj = pdf_add_object(gctx, pdf, olroot2); pdf_dict_put(gctx, root, PDF_NAME(Outlines), ind_obj); olroot = pdf_dict_get(gctx, root, PDF_NAME(Outlines)); } ret = pdf_to_num(gctx, olroot); } fz_always(gctx) { pdf_drop_obj(gctx, ind_obj); pdf_drop_obj(gctx, olroot2); } fz_catch(gctx) { return NULL; } return Py_BuildValue("i", ret); } //------------------------------------------------------------------ // Get a new Xref number //------------------------------------------------------------------ FITZEXCEPTION(get_new_xref, !result) CLOSECHECK(get_new_xref, """Make a new xref.""") PyObject *get_new_xref() { int xref = 0; fz_try(gctx) { fz_document *doc = (fz_document *) $self; pdf_document *pdf = pdf_specifics(gctx, doc); ASSERT_PDF(pdf); ENSURE_OPERATION(gctx, pdf); xref = pdf_create_object(gctx, pdf); } fz_catch(gctx) { return NULL; } return Py_BuildValue("i", xref); } //------------------------------------------------------------------ // Get Length of XREF table //------------------------------------------------------------------ FITZEXCEPTION(xref_length, !result) CLOSECHECK0(xref_length, """Get length of xref table.""") PyObject *xref_length() { int xreflen = 0; fz_try(gctx) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); if (pdf) xreflen = pdf_xref_len(gctx, pdf); } fz_catch(gctx) { return NULL; } return Py_BuildValue("i", xreflen); } //------------------------------------------------------------------ // Get XML Metadata //------------------------------------------------------------------ CLOSECHECK0(get_xml_metadata, """Get document XML metadata.""") PyObject *get_xml_metadata() { PyObject *rc = NULL; fz_buffer *buff = NULL; pdf_obj *xml = NULL; fz_try(gctx) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); if (pdf) { xml = pdf_dict_getl(gctx, pdf_trailer(gctx, pdf), PDF_NAME(Root), PDF_NAME(Metadata), NULL); } if (xml) { buff = pdf_load_stream(gctx, xml); rc = JM_UnicodeFromBuffer(gctx, buff); } else { rc = EMPTY_STRING; } } fz_always(gctx) { fz_drop_buffer(gctx, buff); PyErr_Clear(); } fz_catch(gctx) { return EMPTY_STRING; } return rc; } //------------------------------------------------------------------ // Get XML Metadata xref //------------------------------------------------------------------ FITZEXCEPTION(xref_xml_metadata, !result) CLOSECHECK0(xref_xml_metadata, """Get xref of document XML metadata.""") PyObject *xref_xml_metadata() { int xref = 0; fz_try(gctx) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); ASSERT_PDF(pdf); pdf_obj *root = pdf_dict_get(gctx, pdf_trailer(gctx, pdf), PDF_NAME(Root)); if (!root) { RAISEPY(gctx, MSG_BAD_PDFROOT, JM_Exc_FileDataError); } pdf_obj *xml = pdf_dict_get(gctx, root, PDF_NAME(Metadata)); if (xml) xref = pdf_to_num(gctx, xml); } fz_catch(gctx) {;} return Py_BuildValue("i", xref); } //------------------------------------------------------------------ // Delete XML Metadata //------------------------------------------------------------------ FITZEXCEPTION(del_xml_metadata, !result) CLOSECHECK(del_xml_metadata, """Delete XML metadata.""") PyObject *del_xml_metadata() { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); fz_try(gctx) { ASSERT_PDF(pdf); pdf_obj *root = pdf_dict_get(gctx, pdf_trailer(gctx, pdf), PDF_NAME(Root)); if (root) pdf_dict_del(gctx, root, PDF_NAME(Metadata)); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //------------------------------------------------------------------ // Set XML-based Metadata //------------------------------------------------------------------ FITZEXCEPTION(set_xml_metadata, !result) CLOSECHECK(set_xml_metadata, """Store XML document level metadata.""") PyObject *set_xml_metadata(char *metadata) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); fz_buffer *res = NULL; fz_try(gctx) { ASSERT_PDF(pdf); pdf_obj *root = pdf_dict_get(gctx, pdf_trailer(gctx, pdf), PDF_NAME(Root)); if (!root) { RAISEPY(gctx, MSG_BAD_PDFROOT, JM_Exc_FileDataError); } res = fz_new_buffer_from_copied_data(gctx, (const unsigned char *) metadata, strlen(metadata)); pdf_obj *xml = pdf_dict_get(gctx, root, PDF_NAME(Metadata)); if (xml) { JM_update_stream(gctx, pdf, xml, res, 0); } else { xml = pdf_add_stream(gctx, pdf, res, NULL, 0); pdf_dict_put(gctx, xml, PDF_NAME(Type), PDF_NAME(Metadata)); pdf_dict_put(gctx, xml, PDF_NAME(Subtype), PDF_NAME(XML)); pdf_dict_put_drop(gctx, root, PDF_NAME(Metadata), xml); } } fz_always(gctx) { fz_drop_buffer(gctx, res); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //------------------------------------------------------------------ // Get Object String of xref //------------------------------------------------------------------ FITZEXCEPTION(xref_object, !result) CLOSECHECK0(xref_object, """Get xref object source as a string.""") PyObject *xref_object(int xref, int compressed=0, int ascii=0) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); pdf_obj *obj = NULL; PyObject *text = NULL; fz_buffer *res=NULL; fz_try(gctx) { ASSERT_PDF(pdf); int xreflen = pdf_xref_len(gctx, pdf); if (!INRANGE(xref, 1, xreflen-1) && xref != -1) { RAISEPY(gctx, MSG_BAD_XREF, PyExc_ValueError); } if (xref > 0) { obj = pdf_load_object(gctx, pdf, xref); } else { obj = pdf_trailer(gctx, pdf); } res = JM_object_to_buffer(gctx, pdf_resolve_indirect(gctx, obj), compressed, ascii); text = JM_EscapeStrFromBuffer(gctx, res); } fz_always(gctx) { if (xref > 0) { pdf_drop_obj(gctx, obj); } fz_drop_buffer(gctx, res); } fz_catch(gctx) return EMPTY_STRING; return text; } %pythoncode %{ def pdf_trailer(self, compressed: bool=False, ascii:bool=False)->str: """Get PDF trailer as a string.""" return self.xref_object(-1, compressed=compressed, ascii=ascii)%} //------------------------------------------------------------------ // Get compressed stream of an object by xref // Py_RETURN_NONE if not stream //------------------------------------------------------------------ FITZEXCEPTION(xref_stream_raw, !result) CLOSECHECK(xref_stream_raw, """Get xref stream without decompression.""") PyObject *xref_stream_raw(int xref) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); PyObject *r = NULL; pdf_obj *obj = NULL; fz_var(obj); fz_buffer *res = NULL; fz_var(res); fz_try(gctx) { ASSERT_PDF(pdf); int xreflen = pdf_xref_len(gctx, pdf); if (!INRANGE(xref, 1, xreflen-1) && xref != -1) { RAISEPY(gctx, MSG_BAD_XREF, PyExc_ValueError); } if (xref >= 0) { obj = pdf_new_indirect(gctx, pdf, xref, 0); } else { obj = pdf_trailer(gctx, pdf); } if (pdf_is_stream(gctx, obj)) { res = pdf_load_raw_stream_number(gctx, pdf, xref); r = JM_BinFromBuffer(gctx, res); } } fz_always(gctx) { fz_drop_buffer(gctx, res); if (xref >= 0) { pdf_drop_obj(gctx, obj); } } fz_catch(gctx) { Py_CLEAR(r); return NULL; } if (!r) Py_RETURN_NONE; return r; } //------------------------------------------------------------------ // Get decompressed stream of an object by xref // Py_RETURN_NONE if not stream //------------------------------------------------------------------ FITZEXCEPTION(xref_stream, !result) CLOSECHECK(xref_stream, """Get decompressed xref stream.""") PyObject *xref_stream(int xref) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); PyObject *r = Py_None; pdf_obj *obj = NULL; fz_var(obj); fz_buffer *res = NULL; fz_var(res); fz_try(gctx) { ASSERT_PDF(pdf); int xreflen = pdf_xref_len(gctx, pdf); if (!INRANGE(xref, 1, xreflen-1) && xref != -1) { RAISEPY(gctx, MSG_BAD_XREF, PyExc_ValueError); } if (xref >= 0) { obj = pdf_new_indirect(gctx, pdf, xref, 0); } else { obj = pdf_trailer(gctx, pdf); } if (pdf_is_stream(gctx, obj)) { res = pdf_load_stream_number(gctx, pdf, xref); r = JM_BinFromBuffer(gctx, res); } } fz_always(gctx) { fz_drop_buffer(gctx, res); if (xref >= 0) { pdf_drop_obj(gctx, obj); } } fz_catch(gctx) { Py_CLEAR(r); return NULL; } return r; } //------------------------------------------------------------------ // Update an Xref number with a new object given as a string //------------------------------------------------------------------ FITZEXCEPTION(update_object, !result) CLOSECHECK(update_object, """Replace object definition source.""") PyObject *update_object(int xref, char *text, struct Page *page = NULL) { pdf_obj *new_obj; pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); fz_try(gctx) { ASSERT_PDF(pdf); int xreflen = pdf_xref_len(gctx, pdf); if (!INRANGE(xref, 1, xreflen-1)) { RAISEPY(gctx, MSG_BAD_XREF, PyExc_ValueError); } ENSURE_OPERATION(gctx, pdf); // create new object with passed-in string new_obj = JM_pdf_obj_from_str(gctx, pdf, text); pdf_update_object(gctx, pdf, xref, new_obj); pdf_drop_obj(gctx, new_obj); if (page) { pdf_page *pdfpage = pdf_page_from_fz_page(gctx, (fz_page *) page); JM_refresh_links(gctx, pdfpage); } } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //------------------------------------------------------------------ // Update a stream identified by its xref //------------------------------------------------------------------ FITZEXCEPTION(update_stream, !result) CLOSECHECK(update_stream, """Replace xref stream part.""") PyObject *update_stream(int xref=0, PyObject *stream=NULL, int new=1, int compress=1) { pdf_obj *obj = NULL; fz_var(obj); fz_buffer *res = NULL; fz_var(res); pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); fz_try(gctx) { ASSERT_PDF(pdf); int xreflen = pdf_xref_len(gctx, pdf); if (!INRANGE(xref, 1, xreflen-1)) { RAISEPY(gctx, MSG_BAD_XREF, PyExc_ValueError); } ENSURE_OPERATION(gctx, pdf); // get the object obj = pdf_new_indirect(gctx, pdf, xref, 0); if (!pdf_is_dict(gctx, obj)) { RAISEPY(gctx, MSG_IS_NO_DICT, PyExc_ValueError); } res = JM_BufferFromBytes(gctx, stream); if (!res) { RAISEPY(gctx, MSG_BAD_BUFFER, PyExc_TypeError); } JM_update_stream(gctx, pdf, obj, res, compress); } fz_always(gctx) { fz_drop_buffer(gctx, res); pdf_drop_obj(gctx, obj); } fz_catch(gctx) return NULL; Py_RETURN_NONE; } //------------------------------------------------------------------ // create / refresh the page map //------------------------------------------------------------------ FITZEXCEPTION(_make_page_map, !result) CLOSECHECK0(_make_page_map, """Make an array page number -> page object.""") PyObject *_make_page_map() { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); if (!pdf) Py_RETURN_NONE; fz_try(gctx) { pdf_drop_page_tree(gctx, pdf); pdf_load_page_tree(gctx, pdf); } fz_catch(gctx) { return NULL; } return Py_BuildValue("i", pdf->map_page_count); } //------------------------------------------------------------------ // full (deep) copy of one page //------------------------------------------------------------------ FITZEXCEPTION(fullcopy_page, !result) CLOSECHECK0(fullcopy_page, """Make a full page duplicate.""") %pythonappend fullcopy_page %{self._reset_page_refs()%} PyObject *fullcopy_page(int pno, int to = -1) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); int page_count = pdf_count_pages(gctx, pdf); fz_buffer *res = NULL, *nres=NULL; fz_buffer *contents_buffer = NULL; fz_var(pdf); fz_var(res); fz_var(nres); fz_var(contents_buffer); fz_try(gctx) { ASSERT_PDF(pdf); if (!INRANGE(pno, 0, page_count - 1) || !INRANGE(to, -1, page_count - 1)) { RAISEPY(gctx, MSG_BAD_PAGENO, PyExc_ValueError); } pdf_obj *page1 = pdf_resolve_indirect(gctx, pdf_lookup_page_obj(gctx, pdf, pno)); pdf_obj *page2 = pdf_deep_copy_obj(gctx, page1); pdf_obj *old_annots = pdf_dict_get(gctx, page2, PDF_NAME(Annots)); // copy annotations, but remove Popup and IRT types if (old_annots) { int i, n = pdf_array_len(gctx, old_annots); pdf_obj *new_annots = pdf_new_array(gctx, pdf, n); for (i = 0; i < n; i++) { pdf_obj *o = pdf_array_get(gctx, old_annots, i); pdf_obj *subtype = pdf_dict_get(gctx, o, PDF_NAME(Subtype)); if (pdf_name_eq(gctx, subtype, PDF_NAME(Popup))) continue; if (pdf_dict_gets(gctx, o, "IRT")) continue; pdf_obj *copy_o = pdf_deep_copy_obj(gctx, pdf_resolve_indirect(gctx, o)); int xref = pdf_create_object(gctx, pdf); pdf_update_object(gctx, pdf, xref, copy_o); pdf_drop_obj(gctx, copy_o); copy_o = pdf_new_indirect(gctx, pdf, xref, 0); pdf_dict_del(gctx, copy_o, PDF_NAME(Popup)); pdf_dict_del(gctx, copy_o, PDF_NAME(P)); pdf_array_push_drop(gctx, new_annots, copy_o); } pdf_dict_put_drop(gctx, page2, PDF_NAME(Annots), new_annots); } // copy the old contents stream(s) res = JM_read_contents(gctx, page1); // create new /Contents object for page2 if (res) { contents_buffer = fz_new_buffer_from_copied_data(gctx, " ", 1); pdf_obj *contents = pdf_add_stream(gctx, pdf, contents_buffer, NULL, 0); JM_update_stream(gctx, pdf, contents, res, 1); pdf_dict_put_drop(gctx, page2, PDF_NAME(Contents), contents); } // now insert target page, making sure it is an indirect object int xref = pdf_create_object(gctx, pdf); // get new xref pdf_update_object(gctx, pdf, xref, page2); // store new page pdf_drop_obj(gctx, page2); // give up this object for now page2 = pdf_new_indirect(gctx, pdf, xref, 0); // reread object pdf_insert_page(gctx, pdf, to, page2); // and store the page pdf_drop_obj(gctx, page2); } fz_always(gctx) { pdf_drop_page_tree(gctx, pdf); fz_drop_buffer(gctx, res); fz_drop_buffer(gctx, nres); fz_drop_buffer(gctx, contents_buffer); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //------------------------------------------------------------------ // move or copy one page //------------------------------------------------------------------ FITZEXCEPTION(_move_copy_page, !result) CLOSECHECK0(_move_copy_page, """Move or copy a PDF page reference.""") %pythonappend _move_copy_page %{self._reset_page_refs()%} PyObject *_move_copy_page(int pno, int nb, int before, int copy) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); int i1, i2, pos, count, same = 0; pdf_obj *parent1 = NULL, *parent2 = NULL, *parent = NULL; pdf_obj *kids1, *kids2; fz_try(gctx) { ASSERT_PDF(pdf); // get the two page objects ----------------------------------- // locate the /Kids arrays and indices in each pdf_obj *page1 = pdf_lookup_page_loc(gctx, pdf, pno, &parent1, &i1); kids1 = pdf_dict_get(gctx, parent1, PDF_NAME(Kids)); pdf_obj *page2 = pdf_lookup_page_loc(gctx, pdf, nb, &parent2, &i2); (void) page2; kids2 = pdf_dict_get(gctx, parent2, PDF_NAME(Kids)); if (before) // calc index of source page in target /Kids pos = i2; else pos = i2 + 1; // same /Kids array? ------------------------------------------ same = pdf_objcmp(gctx, kids1, kids2); // put source page in target /Kids array ---------------------- if (!copy && same != 0) // update parent in page object { pdf_dict_put(gctx, page1, PDF_NAME(Parent), parent2); } pdf_array_insert(gctx, kids2, page1, pos); if (same != 0) // different /Kids arrays ---------------------- { parent = parent2; while (parent) // increase /Count objects in parents { count = pdf_dict_get_int(gctx, parent, PDF_NAME(Count)); pdf_dict_put_int(gctx, parent, PDF_NAME(Count), count + 1); parent = pdf_dict_get(gctx, parent, PDF_NAME(Parent)); } if (!copy) // delete original item { pdf_array_delete(gctx, kids1, i1); parent = parent1; while (parent) // decrease /Count objects in parents { count = pdf_dict_get_int(gctx, parent, PDF_NAME(Count)); pdf_dict_put_int(gctx, parent, PDF_NAME(Count), count - 1); parent = pdf_dict_get(gctx, parent, PDF_NAME(Parent)); } } } else { // same /Kids array if (copy) { // source page is copied parent = parent2; while (parent) // increase /Count object in parents { count = pdf_dict_get_int(gctx, parent, PDF_NAME(Count)); pdf_dict_put_int(gctx, parent, PDF_NAME(Count), count + 1); parent = pdf_dict_get(gctx, parent, PDF_NAME(Parent)); } } else { if (i1 < pos) pdf_array_delete(gctx, kids1, i1); else pdf_array_delete(gctx, kids1, i1 + 1); } } if (pdf->rev_page_map) { // page map no longer valid: drop it pdf_drop_page_tree(gctx, pdf); } } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } FITZEXCEPTION(_remove_toc_item, !result) PyObject *_remove_toc_item(int xref) { // "remove" bookmark by letting it point to nowhere pdf_obj *item = NULL, *color; int i; pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); fz_try(gctx) { item = pdf_new_indirect(gctx, pdf, xref, 0); pdf_dict_del(gctx, item, PDF_NAME(Dest)); pdf_dict_del(gctx, item, PDF_NAME(A)); color = pdf_new_array(gctx, pdf, 3); for (i=0; i < 3; i++) { pdf_array_push_real(gctx, color, 0.8); } pdf_dict_put_drop(gctx, item, PDF_NAME(C), color); } fz_always(gctx) { pdf_drop_obj(gctx, item); } fz_catch(gctx){ return NULL; } Py_RETURN_NONE; } FITZEXCEPTION(_update_toc_item, !result) PyObject *_update_toc_item(int xref, char *action=NULL, char *title=NULL, int flags=0, PyObject *collapse=NULL, PyObject *color=NULL) { // "update" bookmark by letting it point to nowhere pdf_obj *item = NULL; pdf_obj *obj = NULL; Py_ssize_t i; double f; pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); fz_try(gctx) { item = pdf_new_indirect(gctx, pdf, xref, 0); if (title) { pdf_dict_put_text_string(gctx, item, PDF_NAME(Title), title); } if (action) { pdf_dict_del(gctx, item, PDF_NAME(Dest)); obj = JM_pdf_obj_from_str(gctx, pdf, action); pdf_dict_put_drop(gctx, item, PDF_NAME(A), obj); } pdf_dict_put_int(gctx, item, PDF_NAME(F), flags); if (EXISTS(color)) { pdf_obj *c = pdf_new_array(gctx, pdf, 3); for (i = 0; i < 3; i++) { JM_FLOAT_ITEM(color, i, &f); pdf_array_push_real(gctx, c, f); } pdf_dict_put_drop(gctx, item, PDF_NAME(C), c); } else if (color != Py_None) { pdf_dict_del(gctx, item, PDF_NAME(C)); } if (collapse != Py_None) { if (pdf_dict_get(gctx, item, PDF_NAME(Count))) { i = pdf_dict_get_int(gctx, item, PDF_NAME(Count)); if ((i < 0 && collapse == Py_False) || (i > 0 && collapse == Py_True)) { i = i * (-1); pdf_dict_put_int(gctx, item, PDF_NAME(Count), i); } } } } fz_always(gctx) { pdf_drop_obj(gctx, item); } fz_catch(gctx){ return NULL; } Py_RETURN_NONE; } //------------------------------------------------------------------ // PDF page label getting / setting //------------------------------------------------------------------ FITZEXCEPTION(_get_page_labels, !result) PyObject * _get_page_labels() { pdf_obj *obj, *nums, *kids; PyObject *rc = NULL; int i, n; pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); pdf_obj *pagelabels = NULL; fz_var(pagelabels); fz_try(gctx) { ASSERT_PDF(pdf); rc = PyList_New(0); pagelabels = pdf_new_name(gctx, "PageLabels"); obj = pdf_dict_getl(gctx, pdf_trailer(gctx, pdf), PDF_NAME(Root), pagelabels, NULL); if (!obj) { goto finished; } // simple case: direct /Nums object nums = pdf_resolve_indirect(gctx, pdf_dict_get(gctx, obj, PDF_NAME(Nums))); if (nums) { JM_get_page_labels(gctx, rc, nums); goto finished; } // case: /Kids/Nums nums = pdf_resolve_indirect(gctx, pdf_dict_getl(gctx, obj, PDF_NAME(Kids), PDF_NAME(Nums), NULL) ); if (nums) { JM_get_page_labels(gctx, rc, nums); goto finished; } // case: /Kids is an array of multiple /Nums kids = pdf_resolve_indirect(gctx, pdf_dict_get(gctx, obj, PDF_NAME(Kids))); if (!kids || !pdf_is_array(gctx, kids)) { goto finished; } n = pdf_array_len(gctx, kids); for (i = 0; i < n; i++) { nums = pdf_resolve_indirect(gctx, pdf_dict_get(gctx, pdf_array_get(gctx, kids, i), PDF_NAME(Nums))); JM_get_page_labels(gctx, rc, nums); } finished:; } fz_always(gctx) { PyErr_Clear(); pdf_drop_obj(gctx, pagelabels); } fz_catch(gctx){ Py_CLEAR(rc); return NULL; } return rc; } FITZEXCEPTION(_set_page_labels, !result) %pythonappend _set_page_labels %{ xref = self.pdf_catalog() text = self.xref_object(xref, compressed=True) text = text.replace("/Nums[]", "/Nums[%s]" % labels) self.update_object(xref, text)%} PyObject * _set_page_labels(char *labels) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); pdf_obj *pagelabels = NULL; fz_var(pagelabels); fz_try(gctx) { ASSERT_PDF(pdf); pagelabels = pdf_new_name(gctx, "PageLabels"); pdf_obj *root = pdf_dict_get(gctx, pdf_trailer(gctx, pdf), PDF_NAME(Root)); pdf_dict_del(gctx, root, pagelabels); pdf_dict_putl_drop(gctx, root, pdf_new_array(gctx, pdf, 0), pagelabels, PDF_NAME(Nums), NULL); } fz_always(gctx) { PyErr_Clear(); pdf_drop_obj(gctx, pagelabels); } fz_catch(gctx){ return NULL; } Py_RETURN_NONE; } //------------------------------------------------------------------ // PDF Optional Content functions //------------------------------------------------------------------ FITZEXCEPTION(get_layers, !result) CLOSECHECK0(get_layers, """Show optional OC layers.""") PyObject * get_layers() { PyObject *rc = NULL; pdf_layer_config info = {NULL, NULL}; fz_try(gctx) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) self); ASSERT_PDF(pdf); int i, n = pdf_count_layer_configs(gctx, pdf); if (n == 1) { pdf_obj *obj = pdf_dict_getl(gctx, pdf_trailer(gctx, pdf), PDF_NAME(Root), PDF_NAME(OCProperties), PDF_NAME(Configs), NULL); if (!pdf_is_array(gctx, obj)) n = 0; } rc = PyTuple_New(n); for (i = 0; i < n; i++) { pdf_layer_config_info(gctx, pdf, i, &info); PyObject *item = Py_BuildValue("{s:i,s:s,s:s}", "number", i, "name", info.name, "creator", info.creator); PyTuple_SET_ITEM(rc, i, item); info.name = NULL; info.creator = NULL; } } fz_catch(gctx) { Py_CLEAR(rc); return NULL; } return rc; } FITZEXCEPTION(switch_layer, !result) CLOSECHECK0(switch_layer, """Activate an OC layer.""") PyObject * switch_layer(int config, int as_default=0) { fz_try(gctx) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) self); ASSERT_PDF(pdf); pdf_obj *cfgs = pdf_dict_getl(gctx, pdf_trailer(gctx, pdf), PDF_NAME(Root), PDF_NAME(OCProperties), PDF_NAME(Configs), NULL); if (!pdf_is_array(gctx, cfgs) || !pdf_array_len(gctx, cfgs)) { if (config < 1) goto finished; RAISEPY(gctx, MSG_BAD_OC_LAYER, PyExc_ValueError); } if (config < 0) goto finished; pdf_select_layer_config(gctx, pdf, config); if (as_default) { pdf_set_layer_config_as_default(gctx, pdf); pdf_read_ocg(gctx, pdf); } finished:; } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } FITZEXCEPTION(get_layer, !result) CLOSECHECK0(get_layer, """Content of ON, OFF, RBGroups of an OC layer.""") PyObject * get_layer(int config=-1) { PyObject *rc; pdf_obj *obj = NULL; fz_try(gctx) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) self); ASSERT_PDF(pdf); pdf_obj *ocp = pdf_dict_getl(gctx, pdf_trailer(gctx, pdf), PDF_NAME(Root), PDF_NAME(OCProperties), NULL); if (!ocp) { rc = Py_BuildValue("s", NULL); goto finished; } if (config == -1) { obj = pdf_dict_get(gctx, ocp, PDF_NAME(D)); } else { obj = pdf_array_get(gctx, pdf_dict_get(gctx, ocp, PDF_NAME(Configs)), config); } if (!obj) { RAISEPY(gctx, MSG_BAD_OC_CONFIG, PyExc_ValueError); } rc = JM_get_ocg_arrays(gctx, obj); finished:; } fz_catch(gctx) { Py_CLEAR(rc); PyErr_Clear(); return NULL; } return rc; } FITZEXCEPTION(set_layer, !result) %pythonprepend set_layer %{"""Set the PDF keys /ON, /OFF, /RBGroups of an OC layer.""" if self.is_closed: raise ValueError("document closed") ocgs = set(self.get_ocgs().keys()) if ocgs == set(): raise ValueError("document has no optional content") if on: if type(on) not in (list, tuple): raise ValueError("bad type: 'on'") s = set(on).difference(ocgs) if s != set(): raise ValueError("bad OCGs in 'on': %s" % s) if off: if type(off) not in (list, tuple): raise ValueError("bad type: 'off'") s = set(off).difference(ocgs) if s != set(): raise ValueError("bad OCGs in 'off': %s" % s) if locked: if type(locked) not in (list, tuple): raise ValueError("bad type: 'locked'") s = set(locked).difference(ocgs) if s != set(): raise ValueError("bad OCGs in 'locked': %s" % s) if rbgroups: if type(rbgroups) not in (list, tuple): raise ValueError("bad type: 'rbgroups'") for x in rbgroups: if not type(x) in (list, tuple): raise ValueError("bad RBGroup '%s'" % x) s = set(x).difference(ocgs) if s != set(): raise ValueError("bad OCGs in RBGroup: %s" % s) if basestate: basestate = str(basestate).upper() if basestate == "UNCHANGED": basestate = "Unchanged" if basestate not in ("ON", "OFF", "Unchanged"): raise ValueError("bad 'basestate'") %} PyObject * set_layer(int config, const char *basestate=NULL, PyObject *on=NULL, PyObject *off=NULL, PyObject *rbgroups=NULL, PyObject *locked=NULL) { pdf_obj *obj = NULL; fz_try(gctx) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) self); ASSERT_PDF(pdf); pdf_obj *ocp = pdf_dict_getl(gctx, pdf_trailer(gctx, pdf), PDF_NAME(Root), PDF_NAME(OCProperties), NULL); if (!ocp) { goto finished; } if (config == -1) { obj = pdf_dict_get(gctx, ocp, PDF_NAME(D)); } else { obj = pdf_array_get(gctx, pdf_dict_get(gctx, ocp, PDF_NAME(Configs)), config); } if (!obj) { RAISEPY(gctx, MSG_BAD_OC_CONFIG, PyExc_ValueError); } JM_set_ocg_arrays(gctx, obj, basestate, on, off, rbgroups, locked); pdf_read_ocg(gctx, pdf); finished:; } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } FITZEXCEPTION(add_layer, !result) CLOSECHECK0(add_layer, """Add a new OC layer.""") PyObject *add_layer(char *name, char *creator=NULL, PyObject *on=NULL) { fz_try(gctx) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) self); ASSERT_PDF(pdf); JM_add_layer_config(gctx, pdf, name, creator, on); pdf_read_ocg(gctx, pdf); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } FITZEXCEPTION(layer_ui_configs, !result) CLOSECHECK0(layer_ui_configs, """Show OC visibility status modifiable by user.""") PyObject *layer_ui_configs() { typedef struct { const char *text; int depth; pdf_layer_config_ui_type type; int selected; int locked; } pdf_layer_config_ui; PyObject *rc = NULL; fz_try(gctx) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) self); ASSERT_PDF(pdf); pdf_layer_config_ui info; int i, n = pdf_count_layer_config_ui(gctx, pdf); rc = PyTuple_New(n); char *type = NULL; for (i = 0; i < n; i++) { pdf_layer_config_ui_info(gctx, pdf, i, (void *) &info); switch (info.type) { case (1): type = "checkbox"; break; case (2): type = "radiobox"; break; default: type = "label"; break; } PyObject *item = Py_BuildValue("{s:i,s:N,s:i,s:s,s:N,s:N}", "number", i, "text", JM_UnicodeFromStr(info.text), "depth", info.depth, "type", type, "on", JM_BOOL(info.selected), "locked", JM_BOOL(info.locked)); PyTuple_SET_ITEM(rc, i, item); } } fz_catch(gctx) { Py_CLEAR(rc); return NULL; } return rc; } FITZEXCEPTION(set_layer_ui_config, !result) CLOSECHECK0(set_layer_ui_config, ) %pythonprepend set_layer_ui_config %{ """Set / unset OC intent configuration.""" # The user might have given the name instead of sequence number, # so select by that name and continue with corresp. number if isinstance(number, str): select = [ui["number"] for ui in self.layer_ui_configs() if ui["text"] == number] if select == []: raise ValueError(f"bad OCG '{number}'.") number = select[0] # this is the number for the name %} PyObject *set_layer_ui_config(int number, int action=0) { fz_try(gctx) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) self); ASSERT_PDF(pdf); switch (action) { case (1): pdf_toggle_layer_config_ui(gctx, pdf, number); break; case (2): pdf_deselect_layer_config_ui(gctx, pdf, number); break; default: pdf_select_layer_config_ui(gctx, pdf, number); break; } } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } FITZEXCEPTION(get_ocgs, !result) CLOSECHECK0(get_ocgs, """Show existing optional content groups.""") PyObject * get_ocgs() { PyObject *rc = NULL; pdf_obj *ci = pdf_new_name(gctx, "CreatorInfo"); fz_try(gctx) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) self); ASSERT_PDF(pdf); pdf_obj *ocgs = pdf_dict_getl(gctx, pdf_dict_get(gctx, pdf_trailer(gctx, pdf), PDF_NAME(Root)), PDF_NAME(OCProperties), PDF_NAME(OCGs), NULL); rc = PyDict_New(); if (!pdf_is_array(gctx, ocgs)) goto fertig; int i, n = pdf_array_len(gctx, ocgs); for (i = 0; i < n; i++) { pdf_obj *ocg = pdf_array_get(gctx, ocgs, i); int xref = pdf_to_num(gctx, ocg); const char *name = pdf_to_text_string(gctx, pdf_dict_get(gctx, ocg, PDF_NAME(Name))); pdf_obj *obj = pdf_dict_getl(gctx, ocg, PDF_NAME(Usage), ci, PDF_NAME(Subtype), NULL); const char *usage = NULL; if (obj) usage = pdf_to_name(gctx, obj); PyObject *intents = PyList_New(0); pdf_obj *intent = pdf_dict_get(gctx, ocg, PDF_NAME(Intent)); if (intent) { if (pdf_is_name(gctx, intent)) { LIST_APPEND_DROP(intents, Py_BuildValue("s", pdf_to_name(gctx, intent))); } else if (pdf_is_array(gctx, intent)) { int j, m = pdf_array_len(gctx, intent); for (j = 0; j < m; j++) { pdf_obj *o = pdf_array_get(gctx, intent, j); if (pdf_is_name(gctx, o)) LIST_APPEND_DROP(intents, Py_BuildValue("s", pdf_to_name(gctx, o))); } } } int hidden = pdf_is_ocg_hidden(gctx, pdf, NULL, usage, ocg); PyObject *item = Py_BuildValue("{s:s,s:O,s:O,s:s}", "name", name, "intent", intents, "on", JM_BOOL(!hidden), "usage", usage); Py_DECREF(intents); PyObject *temp = Py_BuildValue("i", xref); DICT_SETITEM_DROP(rc, temp, item); Py_DECREF(temp); } fertig:; } fz_always(gctx) { pdf_drop_obj(gctx, ci); } fz_catch(gctx) { Py_CLEAR(rc); return NULL; } return rc; } FITZEXCEPTION(add_ocg, !result) CLOSECHECK0(add_ocg, """Add new optional content group.""") PyObject * add_ocg(char *name, int config=-1, int on=1, PyObject *intent=NULL, const char *usage=NULL) { int xref = 0; pdf_obj *obj = NULL, *cfg = NULL; pdf_obj *indocg = NULL; pdf_obj *ocg = NULL; pdf_obj *ci_name = NULL; fz_var(indocg); fz_var(ocg); fz_var(ci_name); fz_try(gctx) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) self); ASSERT_PDF(pdf); // ------------------------------ // make the OCG // ------------------------------ ocg = pdf_add_new_dict(gctx, pdf, 3); pdf_dict_put(gctx, ocg, PDF_NAME(Type), PDF_NAME(OCG)); pdf_dict_put_text_string(gctx, ocg, PDF_NAME(Name), name); pdf_obj *intents = pdf_dict_put_array(gctx, ocg, PDF_NAME(Intent), 2); if (!EXISTS(intent)) { pdf_array_push(gctx, intents, PDF_NAME(View)); } else if (!PyUnicode_Check(intent)) { int i, n = PySequence_Size(intent); for (i = 0; i < n; i++) { PyObject *item = PySequence_ITEM(intent, i); char *c = JM_StrAsChar(item); if (c) { pdf_array_push_drop(gctx, intents, pdf_new_name(gctx, c)); } Py_DECREF(item); } } else { char *c = JM_StrAsChar(intent); if (c) { pdf_array_push_drop(gctx, intents, pdf_new_name(gctx, c)); } } pdf_obj *use_for = pdf_dict_put_dict(gctx, ocg, PDF_NAME(Usage), 3); ci_name = pdf_new_name(gctx, "CreatorInfo"); pdf_obj *cre_info = pdf_dict_put_dict(gctx, use_for, ci_name, 2); pdf_dict_put_text_string(gctx, cre_info, PDF_NAME(Creator), "PyMuPDF"); if (usage) { pdf_dict_put_name(gctx, cre_info, PDF_NAME(Subtype), usage); } else { pdf_dict_put_name(gctx, cre_info, PDF_NAME(Subtype), "Artwork"); } indocg = pdf_add_object(gctx, pdf, ocg); // ------------------------------ // Insert OCG in the right config // ------------------------------ pdf_obj *ocp = JM_ensure_ocproperties(gctx, pdf); obj = pdf_dict_get(gctx, ocp, PDF_NAME(OCGs)); pdf_array_push(gctx, obj, indocg); if (config > -1) { obj = pdf_dict_get(gctx, ocp, PDF_NAME(Configs)); if (!pdf_is_array(gctx, obj)) { RAISEPY(gctx, MSG_BAD_OC_CONFIG, PyExc_ValueError); } cfg = pdf_array_get(gctx, obj, config); if (!cfg) { RAISEPY(gctx, MSG_BAD_OC_CONFIG, PyExc_ValueError); } } else { cfg = pdf_dict_get(gctx, ocp, PDF_NAME(D)); } obj = pdf_dict_get(gctx, cfg, PDF_NAME(Order)); if (!obj) { obj = pdf_dict_put_array(gctx, cfg, PDF_NAME(Order), 1); } pdf_array_push(gctx, obj, indocg); if (on) { obj = pdf_dict_get(gctx, cfg, PDF_NAME(ON)); if (!obj) { obj = pdf_dict_put_array(gctx, cfg, PDF_NAME(ON), 1); } } else { obj = pdf_dict_get(gctx, cfg, PDF_NAME(OFF)); if (!obj) { obj = pdf_dict_put_array(gctx, cfg, PDF_NAME(OFF), 1); } } pdf_array_push(gctx, obj, indocg); // let MuPDF take note: re-read OCProperties pdf_read_ocg(gctx, pdf); xref = pdf_to_num(gctx, indocg); } fz_always(gctx) { pdf_drop_obj(gctx, indocg); pdf_drop_obj(gctx, ocg); pdf_drop_obj(gctx, ci_name); } fz_catch(gctx) { return NULL; } return Py_BuildValue("i", xref); } struct Annot; void internal_keep_annot(struct Annot* annot) { pdf_keep_annot(gctx, (pdf_annot*) annot); } //------------------------------------------------------------------ // Initialize document: set outline and metadata properties //------------------------------------------------------------------ %pythoncode %{ def init_doc(self): if self.is_encrypted: raise ValueError("cannot initialize - document still encrypted") self._outline = self._loadOutline() if self._outline: self._outline.thisown = True self.metadata = dict([(k,self._getMetadata(v)) for k,v in {'format':'format', 'title':'info:Title', 'author':'info:Author','subject':'info:Subject', 'keywords':'info:Keywords','creator':'info:Creator', 'producer':'info:Producer', 'creationDate':'info:CreationDate', 'modDate':'info:ModDate', 'trapped':'info:Trapped'}.items()]) self.metadata['encryption'] = None if self._getMetadata('encryption')=='None' else self._getMetadata('encryption') outline = property(lambda self: self._outline) def get_page_fonts(self, pno: int, full: bool =False) -> list: """Retrieve a list of fonts used on a page. """ if self.is_closed or self.is_encrypted: raise ValueError("document closed or encrypted") if not self.is_pdf: return () if type(pno) is not int: try: pno = pno.number except: raise ValueError("need a Page or page number") val = self._getPageInfo(pno, 1) if full is False: return [v[:-1] for v in val] return val def get_page_images(self, pno: int, full: bool =False) -> list: """Retrieve a list of images used on a page. """ if self.is_closed or self.is_encrypted: raise ValueError("document closed or encrypted") if not self.is_pdf: return () if type(pno) is not int: try: pno = pno.number except: raise ValueError("need a Page or page number") val = self._getPageInfo(pno, 2) if full is False: return [v[:-1] for v in val] return val def get_page_xobjects(self, pno: int) -> list: """Retrieve a list of XObjects used on a page. """ if self.is_closed or self.is_encrypted: raise ValueError("document closed or encrypted") if not self.is_pdf: return () if type(pno) is not int: try: pno = pno.number except: raise ValueError("need a Page or page number") val = self._getPageInfo(pno, 3) rc = [(v[0], v[1], v[2], Rect(v[3])) for v in val] return rc def xref_is_image(self, xref): """Check if xref is an image object.""" if self.is_closed or self.is_encrypted: raise ValueError("document closed or encrypted") if self.xref_get_key(xref, "Subtype")[1] == "/Image": return True return False def xref_is_font(self, xref): """Check if xref is a font object.""" if self.is_closed or self.is_encrypted: raise ValueError("document closed or encrypted") if self.xref_get_key(xref, "Type")[1] == "/Font": return True return False def xref_is_xobject(self, xref): """Check if xref is a form xobject.""" if self.is_closed or self.is_encrypted: raise ValueError("document closed or encrypted") if self.xref_get_key(xref, "Subtype")[1] == "/Form": return True return False def copy_page(self, pno: int, to: int =-1): """Copy a page within a PDF document. This will only create another reference of the same page object. Args: pno: source page number to: put before this page, '-1' means after last page. """ if self.is_closed: raise ValueError("document closed") page_count = len(self) if ( pno not in range(page_count) or to not in range(-1, page_count) ): raise ValueError("bad page number(s)") before = 1 copy = 1 if to == -1: to = page_count - 1 before = 0 return self._move_copy_page(pno, to, before, copy) def move_page(self, pno: int, to: int =-1): """Move a page within a PDF document. Args: pno: source page number. to: put before this page, '-1' means after last page. """ if self.is_closed: raise ValueError("document closed") page_count = len(self) if ( pno not in range(page_count) or to not in range(-1, page_count) ): raise ValueError("bad page number(s)") before = 1 copy = 0 if to == -1: to = page_count - 1 before = 0 return self._move_copy_page(pno, to, before, copy) def delete_page(self, pno: int =-1): """ Delete one page from a PDF. """ if not self.is_pdf: raise ValueError("is no PDF") if self.is_closed: raise ValueError("document closed") page_count = self.page_count while pno < 0: pno += page_count if pno >= page_count: raise ValueError("bad page number(s)") # remove TOC bookmarks pointing to deleted page toc = self.get_toc() ol_xrefs = self.get_outline_xrefs() for i, item in enumerate(toc): if item[2] == pno + 1: self._remove_toc_item(ol_xrefs[i]) self._remove_links_to(frozenset((pno,))) self._delete_page(pno) self._reset_page_refs() def delete_pages(self, *args, **kw): """Delete pages from a PDF. Args: Either keywords 'from_page'/'to_page', or two integers to specify the first/last page to delete. Or a list/tuple/range object, which can contain arbitrary page numbers. """ if not self.is_pdf: raise ValueError("is no PDF") if self.is_closed: raise ValueError("document closed") page_count = self.page_count # page count of document f = t = -1 if kw: # check if keywords were used if args: # then no positional args are allowed raise ValueError("cannot mix keyword and positional argument") f = kw.get("from_page", -1) # first page to delete t = kw.get("to_page", -1) # last page to delete while f < 0: f += page_count while t < 0: t += page_count if not f <= t < page_count: raise ValueError("bad page number(s)") numbers = tuple(range(f, t + 1)) else: if len(args) > 2 or args == []: raise ValueError("need 1 or 2 positional arguments") if len(args) == 2: f, t = args if not (type(f) is int and type(t) is int): raise ValueError("both arguments must be int") if f > t: f, t = t, f if not f <= t < page_count: raise ValueError("bad page number(s)") numbers = tuple(range(f, t + 1)) else: r = args[0] if type(r) not in (int, range, list, tuple): raise ValueError("need int or sequence if one argument") numbers = tuple(r) numbers = list(map(int, set(numbers))) # ensure unique integers if numbers == []: print("nothing to delete") return numbers.sort() if numbers[0] < 0 or numbers[-1] >= page_count: raise ValueError("bad page number(s)") frozen_numbers = frozenset(numbers) toc = self.get_toc() for i, xref in enumerate(self.get_outline_xrefs()): if toc[i][2] - 1 in frozen_numbers: self._remove_toc_item(xref) # remove target in PDF object self._remove_links_to(frozen_numbers) for i in reversed(numbers): # delete pages, last to first self._delete_page(i) self._reset_page_refs() def saveIncr(self): """ Save PDF incrementally""" return self.save(self.name, incremental=True, encryption=PDF_ENCRYPT_KEEP) def ez_save(self, filename, garbage=3, clean=False, deflate=True, deflate_images=True, deflate_fonts=True, incremental=False, ascii=False, expand=False, linear=False, pretty=False, encryption=1, permissions=4095, owner_pw=None, user_pw=None, no_new_id=True): """ Save PDF using some different defaults""" return self.save(filename, garbage=garbage, clean=clean, deflate=deflate, deflate_images=deflate_images, deflate_fonts=deflate_fonts, incremental=incremental, ascii=ascii, expand=expand, linear=linear, pretty=pretty, encryption=encryption, permissions=permissions, owner_pw=owner_pw, user_pw=user_pw, no_new_id=no_new_id,) def reload_page(self, page: "struct Page *") -> "struct Page *": """Make a fresh copy of a page.""" old_annots = {} # copy annot references to here pno = page.number # save the page number for k, v in page._annot_refs.items(): # save the annot dictionary # We need to call pdf_keep_annot() here, otherwise `v`'s # refcount can reach zero even if there is an external # reference. self.internal_keep_annot(v) old_annots[k] = v page._erase() # remove the page page = None TOOLS.store_shrink(100) page = self.load_page(pno) # reload the page # copy annot refs over to the new dictionary page_proxy = weakref.proxy(page) for k, v in old_annots.items(): annot = old_annots[k] annot.parent = page_proxy # refresh parent to new page page._annot_refs[k] = annot return page @property def pagemode(self) -> str: """Return the PDF PageMode value. """ xref = self.pdf_catalog() if xref == 0: return None rc = self.xref_get_key(xref, "PageMode") if rc[0] == "null": return "UseNone" if rc[0] == "name": return rc[1][1:] return "UseNone" def set_pagemode(self, pagemode: str): """Set the PDF PageMode value.""" valid = ("UseNone", "UseOutlines", "UseThumbs", "FullScreen", "UseOC", "UseAttachments") xref = self.pdf_catalog() if xref == 0: raise ValueError("not a PDF") if not pagemode: raise ValueError("bad PageMode value") if pagemode[0] == "/": pagemode = pagemode[1:] for v in valid: if pagemode.lower() == v.lower(): self.xref_set_key(xref, "PageMode", f"/{v}") return True raise ValueError("bad PageMode value") @property def pagelayout(self) -> str: """Return the PDF PageLayout value. """ xref = self.pdf_catalog() if xref == 0: return None rc = self.xref_get_key(xref, "PageLayout") if rc[0] == "null": return "SinglePage" if rc[0] == "name": return rc[1][1:] return "SinglePage" def set_pagelayout(self, pagelayout: str): """Set the PDF PageLayout value.""" valid = ("SinglePage", "OneColumn", "TwoColumnLeft", "TwoColumnRight", "TwoPageLeft", "TwoPageRight") xref = self.pdf_catalog() if xref == 0: raise ValueError("not a PDF") if not pagelayout: raise ValueError("bad PageLayout value") if pagelayout[0] == "/": pagelayout = pagelayout[1:] for v in valid: if pagelayout.lower() == v.lower(): self.xref_set_key(xref, "PageLayout", f"/{v}") return True raise ValueError("bad PageLayout value") @property def markinfo(self) -> dict: """Return the PDF MarkInfo value.""" xref = self.pdf_catalog() if xref == 0: return None rc = self.xref_get_key(xref, "MarkInfo") if rc[0] == "null": return {} if rc[0] == "xref": xref = int(rc[1].split()[0]) val = self.xref_object(xref, compressed=True) elif rc[0] == "dict": val = rc[1] else: val = None if val == None or not (val[:2] == "<<" and val[-2:] == ">>"): return {} valid = {"Marked": False, "UserProperties": False, "Suspects": False} val = val[2:-2].split("/") for v in val[1:]: try: key, value = v.split() except: return valid if value == "true": valid[key] = True return valid def set_markinfo(self, markinfo: dict) -> bool: """Set the PDF MarkInfo values.""" xref = self.pdf_catalog() if xref == 0: raise ValueError("not a PDF") if not markinfo or not isinstance(markinfo, dict): return False valid = {"Marked": False, "UserProperties": False, "Suspects": False} if not set(valid.keys()).issuperset(markinfo.keys()): badkeys = f"bad MarkInfo key(s): {set(markinfo.keys()).difference(valid.keys())}" raise ValueError(badkeys) pdfdict = "<<" valid.update(markinfo) for key, value in valid.items(): value=str(value).lower() if not value in ("true", "false"): raise ValueError(f"bad key value '{key}': '{value}'") pdfdict += f"/{key} {value}" pdfdict += ">>" self.xref_set_key(xref, "MarkInfo", pdfdict) return True def __repr__(self) -> str: m = "closed " if self.is_closed else "" if self.stream is None: if self.name == "": return m + "Document(<new PDF, doc# %i>)" % self._graft_id return m + "Document('%s')" % (self.name,) return m + "Document('%s', <memory, doc# %i>)" % (self.name, self._graft_id) def __contains__(self, loc) -> bool: if type(loc) is int: if loc < self.page_count: return True return False if type(loc) not in (tuple, list) or len(loc) != 2: return False chapter, pno = loc if (type(chapter) != int or chapter < 0 or chapter >= self.chapter_count ): return False if (type(pno) != int or pno < 0 or pno >= self.chapter_page_count(chapter) ): return False return True def __getitem__(self, i: int =0)->"Page": assert isinstance(i, int) or (isinstance(i, tuple) and len(i) == 2 and all(isinstance(x, int) for x in i)) if i not in self: raise IndexError("page not in document") return self.load_page(i) def __delitem__(self, i: AnyType)->None: if not self.is_pdf: raise ValueError("is no PDF") if type(i) is int: return self.delete_page(i) if type(i) in (list, tuple, range): return self.delete_pages(i) if type(i) is not slice: raise ValueError("bad argument type") pc = self.page_count start = i.start if i.start else 0 stop = i.stop if i.stop else pc step = i.step if i.step else 1 while start < 0: start += pc if start >= pc: raise ValueError("bad page number(s)") while stop < 0: stop += pc if stop > pc: raise ValueError("bad page number(s)") return self.delete_pages(range(start, stop, step)) def pages(self, start: OptInt =None, stop: OptInt =None, step: OptInt =None): """Return a generator iterator over a page range. Arguments have the same meaning as for the range() built-in. """ # set the start value start = start or 0 while start < 0: start += self.page_count if start not in range(self.page_count): raise ValueError("bad start page number") # set the stop value stop = stop if stop is not None and stop <= self.page_count else self.page_count # set the step value if step == 0: raise ValueError("arg 3 must not be zero") if step is None: if start > stop: step = -1 else: step = 1 for pno in range(start, stop, step): yield (self.load_page(pno)) def __len__(self) -> int: return self.page_count def _forget_page(self, page: "struct Page *"): """Remove a page from document page dict.""" pid = id(page) if pid in self._page_refs: self._page_refs[pid] = None def _reset_page_refs(self): """Invalidate all pages in document dictionary.""" if getattr(self, "is_closed", True): return for page in self._page_refs.values(): if page: page._erase() page = None self._page_refs.clear() def _cleanup(self): self._reset_page_refs() for k in self.Graftmaps.keys(): self.Graftmaps[k] = None self.Graftmaps = {} self.ShownPages = {} self.InsertedImages = {} self.FontInfos = [] self.metadata = None self.stream = None self.is_closed = True def close(self): """Close the document.""" if getattr(self, "is_closed", False): raise ValueError("document closed") self._cleanup() if getattr(self, "thisown", False): self.__swig_destroy__(self) return else: raise RuntimeError("document object unavailable") def __del__(self): if not type(self) is Document: return self._cleanup() if getattr(self, "thisown", False): self.__swig_destroy__(self) def __enter__(self): return self def __exit__(self, *args): self.close() %} } }; /*****************************************************************************/ // fz_page /*****************************************************************************/ %nodefaultctor; struct Page { %extend { ~Page() { DEBUGMSG1("Page"); fz_page *this_page = (fz_page *) $self; fz_drop_page(gctx, this_page); DEBUGMSG2; } //---------------------------------------------------------------- // bound() //---------------------------------------------------------------- FITZEXCEPTION(bound, !result) PARENTCHECK(bound, """Get page rectangle.""") %pythonappend bound %{ val = Rect(val) if val.is_infinite and self.parent.is_pdf: cb = self.cropbox w, h = cb.width, cb.height if self.rotation not in (0, 180): w, h = h, w val = Rect(0, 0, w, h) msg = TOOLS.mupdf_warnings(reset=False).splitlines()[-1] print(msg, file=sys.stderr) %} PyObject *bound() { fz_rect rect = fz_infinite_rect; fz_try(gctx) { rect = fz_bound_page(gctx, (fz_page *) $self); } fz_catch(gctx) { ; } return JM_py_from_rect(rect); } %pythoncode %{rect = property(bound, doc="page rectangle")%} //---------------------------------------------------------------- // Page.get_image_bbox //---------------------------------------------------------------- %pythonprepend get_image_bbox %{ """Get rectangle occupied by image 'name'. 'name' is either an item of the image list, or the referencing name string - elem[7] of the resp. item. Option 'transform' also returns the image transformation matrix. """ CheckParent(self) doc = self.parent if doc.is_closed or doc.is_encrypted: raise ValueError("document closed or encrypted") inf_rect = Rect(1, 1, -1, -1) null_mat = Matrix() if transform: rc = (inf_rect, null_mat) else: rc = inf_rect if type(name) in (list, tuple): if not type(name[-1]) is int: raise ValueError("need item of full page image list") item = name else: imglist = [i for i in doc.get_page_images(self.number, True) if name == i[7]] if len(imglist) == 1: item = imglist[0] elif imglist == []: raise ValueError("bad image name") else: raise ValueError("found multiple images named '%s'." % name) xref = item[-1] if xref != 0 or transform == True: try: return self.get_image_rects(item, transform=transform)[0] except: return inf_rect %} %pythonappend get_image_bbox %{ if not bool(val): return rc for v in val: if v[0] != item[-3]: continue q = Quad(v[1]) bbox = q.rect if transform == 0: rc = bbox break hm = Matrix(util_hor_matrix(q.ll, q.lr)) h = abs(q.ll - q.ul) w = abs(q.ur - q.ul) m0 = Matrix(1 / w, 0, 0, 1 / h, 0, 0) m = ~(hm * m0) rc = (bbox, m) break val = rc%} PyObject * get_image_bbox(PyObject *name, int transform=0) { pdf_page *pdf_page = pdf_page_from_fz_page(gctx, (fz_page *) $self); PyObject *rc =NULL; fz_try(gctx) { rc = JM_image_reporter(gctx, pdf_page); } fz_catch(gctx) { Py_RETURN_NONE; } return rc; } //---------------------------------------------------------------- // run() //---------------------------------------------------------------- FITZEXCEPTION(run, !result) PARENTCHECK(run, """Run page through a device.""") PyObject *run(struct DeviceWrapper *dw, PyObject *m) { fz_try(gctx) { fz_run_page(gctx, (fz_page *) $self, dw->device, JM_matrix_from_py(m), NULL); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //---------------------------------------------------------------- // Page.extend_textpage //---------------------------------------------------------------- FITZEXCEPTION(extend_textpage, !result) PyObject * extend_textpage(struct TextPage *tpage, int flags=0, PyObject *matrix=NULL) { fz_page *page = (fz_page *) $self; fz_stext_page *tp = (fz_stext_page *) tpage; fz_device *dev = NULL; fz_stext_options options; memset(&options, 0, sizeof options); options.flags = flags; fz_try(gctx) { fz_matrix ctm = JM_matrix_from_py(matrix); dev = fz_new_stext_device(gctx, tp, &options); fz_run_page(gctx, page, dev, ctm, NULL); fz_close_device(gctx, dev); } fz_always(gctx) { fz_drop_device(gctx, dev); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //---------------------------------------------------------------- // Page.get_textpage //---------------------------------------------------------------- FITZEXCEPTION(_get_textpage, !result) %pythonappend _get_textpage %{val.thisown = True%} struct TextPage * _get_textpage(PyObject *clip=NULL, int flags=0, PyObject *matrix=NULL) { fz_stext_page *tpage=NULL; fz_page *page = (fz_page *) $self; fz_device *dev = NULL; fz_stext_options options; memset(&options, 0, sizeof options); options.flags = flags; fz_try(gctx) { // Default to page's rect if `clip` not specified, for #2048. fz_rect rect = (clip==Py_None) ? fz_bound_page(gctx, page) : JM_rect_from_py(clip); fz_matrix ctm = JM_matrix_from_py(matrix); tpage = fz_new_stext_page(gctx, rect); dev = fz_new_stext_device(gctx, tpage, &options); fz_run_page(gctx, page, dev, ctm, NULL); fz_close_device(gctx, dev); } fz_always(gctx) { fz_drop_device(gctx, dev); } fz_catch(gctx) { return NULL; } return (struct TextPage *) tpage; } %pythoncode %{ def get_textpage(self, clip: rect_like = None, flags: int = 0, matrix=None) -> "TextPage": CheckParent(self) if matrix is None: matrix = Matrix(1, 1) old_rotation = self.rotation if old_rotation != 0: self.set_rotation(0) try: textpage = self._get_textpage(clip, flags=flags, matrix=matrix) finally: if old_rotation != 0: self.set_rotation(old_rotation) textpage.parent = weakref.proxy(self) return textpage %} /* ****************** currently inactive //---------------------------------------------------------------- // Page._get_textpage_ocr //---------------------------------------------------------------- FITZEXCEPTION(_get_textpage_ocr, !result) %pythonappend _get_textpage_ocr %{val.thisown = True%} struct TextPage * _get_textpage_ocr(PyObject *clip=NULL, int flags=0, const char *language=NULL, const char *tessdata=NULL) { fz_stext_page *textpage=NULL; fz_try(gctx) { fz_rect rect = JM_rect_from_py(clip); textpage = JM_new_stext_page_ocr_from_page(gctx, (fz_page *) $self, rect, flags, language, tessdata); } fz_catch(gctx) { return NULL; } return (struct TextPage *) textpage; } ************************* */ //---------------------------------------------------------------- // Page.language //---------------------------------------------------------------- %pythoncode%{@property%} %pythonprepend language %{"""Page language."""%} PyObject *language() { pdf_page *pdfpage = pdf_page_from_fz_page(gctx, (fz_page *) $self); if (!pdfpage) Py_RETURN_NONE; pdf_obj *lang = pdf_dict_get_inheritable(gctx, pdfpage->obj, PDF_NAME(Lang)); if (!lang) Py_RETURN_NONE; return Py_BuildValue("s", pdf_to_str_buf(gctx, lang)); } //---------------------------------------------------------------- // Page.set_language //---------------------------------------------------------------- FITZEXCEPTION(set_language, !result) PARENTCHECK(set_language, """Set PDF page default language.""") PyObject *set_language(char *language=NULL) { pdf_page *pdfpage = pdf_page_from_fz_page(gctx, (fz_page *) $self); fz_try(gctx) { ASSERT_PDF(pdfpage); fz_text_language lang; char buf[8]; if (!language) { pdf_dict_del(gctx, pdfpage->obj, PDF_NAME(Lang)); } else { lang = fz_text_language_from_string(language); pdf_dict_put_text_string(gctx, pdfpage->obj, PDF_NAME(Lang), fz_string_from_text_language(buf, lang)); } } fz_catch(gctx) { return NULL; } Py_RETURN_TRUE; } //---------------------------------------------------------------- // Page.get_svg_image //---------------------------------------------------------------- FITZEXCEPTION(get_svg_image, !result) PARENTCHECK(get_svg_image, """Make SVG image from page.""") PyObject *get_svg_image(PyObject *matrix = NULL, int text_as_path=1) { fz_rect mediabox = fz_bound_page(gctx, (fz_page *) $self); fz_device *dev = NULL; fz_buffer *res = NULL; PyObject *text = NULL; fz_matrix ctm = JM_matrix_from_py(matrix); fz_output *out = NULL; fz_var(out); fz_var(dev); fz_var(res); fz_rect tbounds = mediabox; int text_option = (text_as_path == 1) ? FZ_SVG_TEXT_AS_PATH : FZ_SVG_TEXT_AS_TEXT; tbounds = fz_transform_rect(tbounds, ctm); fz_try(gctx) { res = fz_new_buffer(gctx, 1024); out = fz_new_output_with_buffer(gctx, res); dev = fz_new_svg_device(gctx, out, tbounds.x1-tbounds.x0, // width tbounds.y1-tbounds.y0, // height text_option, 1); fz_run_page(gctx, (fz_page *) $self, dev, ctm, NULL); fz_close_device(gctx, dev); text = JM_EscapeStrFromBuffer(gctx, res); } fz_always(gctx) { fz_drop_device(gctx, dev); fz_drop_output(gctx, out); fz_drop_buffer(gctx, res); } fz_catch(gctx) { return NULL; } return text; } //---------------------------------------------------------------- // page set opacity //---------------------------------------------------------------- FITZEXCEPTION(_set_opacity, !result) %pythonprepend _set_opacity %{ if CA >= 1 and ca >= 1 and blendmode == None: return None tCA = int(round(max(CA , 0) * 100)) if tCA >= 100: tCA = 99 tca = int(round(max(ca, 0) * 100)) if tca >= 100: tca = 99 gstate = "fitzca%02i%02i" % (tCA, tca) %} PyObject * _set_opacity(char *gstate=NULL, float CA=1, float ca=1, char *blendmode=NULL) { if (!gstate) Py_RETURN_NONE; pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); fz_try(gctx) { ASSERT_PDF(page); pdf_obj *resources = pdf_dict_get(gctx, page->obj, PDF_NAME(Resources)); if (!resources) { resources = pdf_dict_put_dict(gctx, page->obj, PDF_NAME(Resources), 2); } pdf_obj *extg = pdf_dict_get(gctx, resources, PDF_NAME(ExtGState)); if (!extg) { extg = pdf_dict_put_dict(gctx, resources, PDF_NAME(ExtGState), 2); } int i, n = pdf_dict_len(gctx, extg); for (i = 0; i < n; i++) { pdf_obj *o1 = pdf_dict_get_key(gctx, extg, i); char *name = (char *) pdf_to_name(gctx, o1); if (strcmp(name, gstate) == 0) goto finished; } pdf_obj *opa = pdf_new_dict(gctx, page->doc, 3); pdf_dict_put_real(gctx, opa, PDF_NAME(CA), (double) CA); pdf_dict_put_real(gctx, opa, PDF_NAME(ca), (double) ca); pdf_dict_puts_drop(gctx, extg, gstate, opa); finished:; } fz_always(gctx) { } fz_catch(gctx) { return NULL; } return Py_BuildValue("s", gstate); } //---------------------------------------------------------------- // page add_caret_annot //---------------------------------------------------------------- FITZEXCEPTION(_add_caret_annot, !result) struct Annot * _add_caret_annot(PyObject *point) { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); pdf_annot *annot = NULL; fz_try(gctx) { annot = pdf_create_annot(gctx, page, PDF_ANNOT_CARET); if (point) { fz_point p = JM_point_from_py(point); fz_rect r = pdf_annot_rect(gctx, annot); r = fz_make_rect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0); pdf_set_annot_rect(gctx, annot, r); } pdf_update_annot(gctx, annot); JM_add_annot_id(gctx, annot, "A"); } fz_catch(gctx) { return NULL; } return (struct Annot *) annot; } //---------------------------------------------------------------- // page addRedactAnnot //---------------------------------------------------------------- FITZEXCEPTION(_add_redact_annot, !result) struct Annot * _add_redact_annot(PyObject *quad, PyObject *text=NULL, PyObject *da_str=NULL, int align=0, PyObject *fill=NULL, PyObject *text_color=NULL) { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); pdf_annot *annot = NULL; float fcol[4] = { 1, 1, 1, 0}; int nfcol = 0, i; fz_try(gctx) { annot = pdf_create_annot(gctx, page, PDF_ANNOT_REDACT); pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); fz_quad q = JM_quad_from_py(quad); fz_rect r = fz_rect_from_quad(q); // TODO calculate de-rotated rect pdf_set_annot_rect(gctx, annot, r); if (EXISTS(fill)) { JM_color_FromSequence(fill, &nfcol, fcol); pdf_obj *arr = pdf_new_array(gctx, page->doc, nfcol); for (i = 0; i < nfcol; i++) { pdf_array_push_real(gctx, arr, fcol[i]); } pdf_dict_put_drop(gctx, annot_obj, PDF_NAME(IC), arr); } if (EXISTS(text)) { const char *otext = PyUnicode_AsUTF8(text); pdf_dict_puts_drop(gctx, annot_obj, "OverlayText", pdf_new_text_string(gctx, otext)); pdf_dict_put_text_string(gctx,annot_obj, PDF_NAME(DA), PyUnicode_AsUTF8(da_str)); pdf_dict_put_int(gctx, annot_obj, PDF_NAME(Q), (int64_t) align); } pdf_update_annot(gctx, annot); JM_add_annot_id(gctx, annot, "A"); } fz_catch(gctx) { return NULL; } return (struct Annot *) annot; } //---------------------------------------------------------------- // page addLineAnnot //---------------------------------------------------------------- FITZEXCEPTION(_add_line_annot, !result) struct Annot * _add_line_annot(PyObject *p1, PyObject *p2) { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); pdf_annot *annot = NULL; fz_try(gctx) { ASSERT_PDF(page); annot = pdf_create_annot(gctx, page, PDF_ANNOT_LINE); fz_point a = JM_point_from_py(p1); fz_point b = JM_point_from_py(p2); pdf_set_annot_line(gctx, annot, a, b); pdf_update_annot(gctx, annot); JM_add_annot_id(gctx, annot, "A"); } fz_catch(gctx) { return NULL; } return (struct Annot *) annot; } //---------------------------------------------------------------- // page addTextAnnot //---------------------------------------------------------------- FITZEXCEPTION(_add_text_annot, !result) struct Annot * _add_text_annot(PyObject *point, char *text, char *icon=NULL) { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); pdf_annot *annot = NULL; fz_rect r; fz_point p = JM_point_from_py(point); fz_var(annot); fz_try(gctx) { ASSERT_PDF(page); annot = pdf_create_annot(gctx, page, PDF_ANNOT_TEXT); r = pdf_annot_rect(gctx, annot); r = fz_make_rect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0); pdf_set_annot_rect(gctx, annot, r); pdf_set_annot_contents(gctx, annot, text); if (icon) { pdf_set_annot_icon_name(gctx, annot, icon); } pdf_update_annot(gctx, annot); JM_add_annot_id(gctx, annot, "A"); } fz_catch(gctx) { return NULL; } return (struct Annot *) annot; } //---------------------------------------------------------------- // page addInkAnnot //---------------------------------------------------------------- FITZEXCEPTION(_add_ink_annot, !result) struct Annot * _add_ink_annot(PyObject *list) { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); pdf_annot *annot = NULL; PyObject *p = NULL, *sublist = NULL; pdf_obj *inklist = NULL, *stroke = NULL; fz_matrix ctm, inv_ctm; fz_point point; fz_var(annot); fz_try(gctx) { ASSERT_PDF(page); if (!PySequence_Check(list)) { RAISEPY(gctx, MSG_BAD_ARG_INK_ANNOT, PyExc_ValueError); } pdf_page_transform(gctx, page, NULL, &ctm); inv_ctm = fz_invert_matrix(ctm); annot = pdf_create_annot(gctx, page, PDF_ANNOT_INK); pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); Py_ssize_t i, j, n0 = PySequence_Size(list), n1; inklist = pdf_new_array(gctx, page->doc, n0); for (j = 0; j < n0; j++) { sublist = PySequence_ITEM(list, j); n1 = PySequence_Size(sublist); stroke = pdf_new_array(gctx, page->doc, 2 * n1); for (i = 0; i < n1; i++) { p = PySequence_ITEM(sublist, i); if (!PySequence_Check(p) || PySequence_Size(p) != 2) { RAISEPY(gctx, MSG_BAD_ARG_INK_ANNOT, PyExc_ValueError); } point = fz_transform_point(JM_point_from_py(p), inv_ctm); Py_CLEAR(p); pdf_array_push_real(gctx, stroke, point.x); pdf_array_push_real(gctx, stroke, point.y); } pdf_array_push_drop(gctx, inklist, stroke); stroke = NULL; Py_CLEAR(sublist); } pdf_dict_put_drop(gctx, annot_obj, PDF_NAME(InkList), inklist); inklist = NULL; pdf_update_annot(gctx, annot); JM_add_annot_id(gctx, annot, "A"); } fz_catch(gctx) { Py_CLEAR(p); Py_CLEAR(sublist); return NULL; } return (struct Annot *) annot; } //---------------------------------------------------------------- // page addStampAnnot //---------------------------------------------------------------- FITZEXCEPTION(_add_stamp_annot, !result) struct Annot * _add_stamp_annot(PyObject *rect, int stamp=0) { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); pdf_annot *annot = NULL; pdf_obj *stamp_id[] = {PDF_NAME(Approved), PDF_NAME(AsIs), PDF_NAME(Confidential), PDF_NAME(Departmental), PDF_NAME(Experimental), PDF_NAME(Expired), PDF_NAME(Final), PDF_NAME(ForComment), PDF_NAME(ForPublicRelease), PDF_NAME(NotApproved), PDF_NAME(NotForPublicRelease), PDF_NAME(Sold), PDF_NAME(TopSecret), PDF_NAME(Draft)}; int n = nelem(stamp_id); pdf_obj *name = stamp_id[0]; fz_try(gctx) { ASSERT_PDF(page); fz_rect r = JM_rect_from_py(rect); if (fz_is_infinite_rect(r) || fz_is_empty_rect(r)) { RAISEPY(gctx, MSG_BAD_RECT, PyExc_ValueError); } if (INRANGE(stamp, 0, n-1)) { name = stamp_id[stamp]; } annot = pdf_create_annot(gctx, page, PDF_ANNOT_STAMP); pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); pdf_set_annot_rect(gctx, annot, r); pdf_dict_put(gctx, annot_obj, PDF_NAME(Name), name); pdf_set_annot_contents(gctx, annot, pdf_dict_get_name(gctx, annot_obj, PDF_NAME(Name))); pdf_update_annot(gctx, annot); JM_add_annot_id(gctx, annot, "A"); } fz_catch(gctx) { return NULL; } return (struct Annot *) annot; } //---------------------------------------------------------------- // page addFileAnnot //---------------------------------------------------------------- FITZEXCEPTION(_add_file_annot, !result) struct Annot * _add_file_annot(PyObject *point, PyObject *buffer, char *filename, char *ufilename=NULL, char *desc=NULL, char *icon=NULL) { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); pdf_annot *annot = NULL; char *uf = ufilename, *d = desc; if (!ufilename) uf = filename; if (!desc) d = filename; fz_buffer *filebuf = NULL; fz_rect r; fz_point p = JM_point_from_py(point); fz_var(filebuf); fz_try(gctx) { ASSERT_PDF(page); filebuf = JM_BufferFromBytes(gctx, buffer); if (!filebuf) { RAISEPY(gctx, MSG_BAD_BUFFER, PyExc_TypeError); } annot = pdf_create_annot(gctx, page, PDF_ANNOT_FILE_ATTACHMENT); pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); r = pdf_annot_rect(gctx, annot); r = fz_make_rect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0); pdf_set_annot_rect(gctx, annot, r); int flags = PDF_ANNOT_IS_PRINT; pdf_set_annot_flags(gctx, annot, flags); if (icon) pdf_set_annot_icon_name(gctx, annot, icon); pdf_obj *val = JM_embed_file(gctx, page->doc, filebuf, filename, uf, d, 1); pdf_dict_put_drop(gctx, annot_obj, PDF_NAME(FS), val); pdf_dict_put_text_string(gctx, annot_obj, PDF_NAME(Contents), filename); pdf_update_annot(gctx, annot); pdf_set_annot_rect(gctx, annot, r); pdf_set_annot_flags(gctx, annot, flags); JM_add_annot_id(gctx, annot, "A"); } fz_always(gctx) { fz_drop_buffer(gctx, filebuf); } fz_catch(gctx) { return NULL; } return (struct Annot *) annot; } //---------------------------------------------------------------- // page: add a text marker annotation //---------------------------------------------------------------- FITZEXCEPTION(_add_text_marker, !result) %pythonprepend _add_text_marker %{ CheckParent(self) if not self.parent.is_pdf: raise ValueError("is no PDF")%} %pythonappend _add_text_marker %{ if not val: return None val.parent = weakref.proxy(self) self._annot_refs[id(val)] = val%} struct Annot * _add_text_marker(PyObject *quads, int annot_type) { pdf_page *pdfpage = pdf_page_from_fz_page(gctx, (fz_page *) $self); pdf_annot *annot = NULL; PyObject *item = NULL; int rotation = JM_page_rotation(gctx, pdfpage); fz_quad q; fz_var(annot); fz_var(item); fz_try(gctx) { if (rotation != 0) { pdf_dict_put_int(gctx, pdfpage->obj, PDF_NAME(Rotate), 0); } annot = pdf_create_annot(gctx, pdfpage, annot_type); Py_ssize_t i, len = PySequence_Size(quads); for (i = 0; i < len; i++) { item = PySequence_ITEM(quads, i); q = JM_quad_from_py(item); Py_DECREF(item); pdf_add_annot_quad_point(gctx, annot, q); } pdf_update_annot(gctx, annot); JM_add_annot_id(gctx, annot, "A"); } fz_always(gctx) { if (rotation != 0) { pdf_dict_put_int(gctx, pdfpage->obj, PDF_NAME(Rotate), rotation); } } fz_catch(gctx) { pdf_drop_annot(gctx, annot); return NULL; } return (struct Annot *) annot; } //---------------------------------------------------------------- // page: add circle or rectangle annotation //---------------------------------------------------------------- FITZEXCEPTION(_add_square_or_circle, !result) struct Annot * _add_square_or_circle(PyObject *rect, int annot_type) { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); pdf_annot *annot = NULL; fz_try(gctx) { fz_rect r = JM_rect_from_py(rect); if (fz_is_infinite_rect(r) || fz_is_empty_rect(r)) { RAISEPY(gctx, MSG_BAD_RECT, PyExc_ValueError); } annot = pdf_create_annot(gctx, page, annot_type); pdf_set_annot_rect(gctx, annot, r); pdf_update_annot(gctx, annot); JM_add_annot_id(gctx, annot, "A"); } fz_catch(gctx) { return NULL; } return (struct Annot *) annot; } //---------------------------------------------------------------- // page: add multiline annotation //---------------------------------------------------------------- FITZEXCEPTION(_add_multiline, !result) struct Annot * _add_multiline(PyObject *points, int annot_type) { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); pdf_annot *annot = NULL; fz_try(gctx) { Py_ssize_t i, n = PySequence_Size(points); if (n < 2) { RAISEPY(gctx, MSG_BAD_ARG_POINTS, PyExc_ValueError); } annot = pdf_create_annot(gctx, page, annot_type); for (i = 0; i < n; i++) { PyObject *p = PySequence_ITEM(points, i); if (PySequence_Size(p) != 2) { Py_DECREF(p); RAISEPY(gctx, MSG_BAD_ARG_POINTS, PyExc_ValueError); } fz_point point = JM_point_from_py(p); Py_DECREF(p); pdf_add_annot_vertex(gctx, annot, point); } pdf_update_annot(gctx, annot); JM_add_annot_id(gctx, annot, "A"); } fz_catch(gctx) { return NULL; } return (struct Annot *) annot; } //---------------------------------------------------------------- // page addFreetextAnnot //---------------------------------------------------------------- FITZEXCEPTION(_add_freetext_annot, !result) %pythonappend _add_freetext_annot %{ ap = val._getAP() BT = ap.find(b"BT") ET = ap.find(b"ET") + 2 ap = ap[BT:ET] w = rect[2]-rect[0] h = rect[3]-rect[1] if rotate in (90, -90, 270): w, h = h, w re = b"0 0 %g %g re" % (w, h) ap = re + b"\nW\nn\n" + ap ope = None bwidth = b"" fill_string = ColorCode(fill_color, "f").encode() if fill_string: fill_string += b"\n" ope = b"f" stroke_string = ColorCode(border_color, "c").encode() if stroke_string: stroke_string += b"\n" bwidth = b"1 w\n" ope = b"S" if fill_string and stroke_string: ope = b"B" if ope != None: ap = bwidth + fill_string + stroke_string + re + b"\n" + ope + b"\n" + ap val._setAP(ap) %} struct Annot * _add_freetext_annot(PyObject *rect, char *text, float fontsize=11, char *fontname=NULL, PyObject *text_color=NULL, PyObject *fill_color=NULL, PyObject *border_color=NULL, int align=0, int rotate=0) { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); float fcol[4] = {1, 1, 1, 1}; // fill color: white int nfcol = 0; JM_color_FromSequence(fill_color, &nfcol, fcol); float tcol[4] = {0, 0, 0, 0}; // std. text color: black int ntcol = 0; JM_color_FromSequence(text_color, &ntcol, tcol); fz_rect r = JM_rect_from_py(rect); pdf_annot *annot = NULL; fz_try(gctx) { if (fz_is_infinite_rect(r) || fz_is_empty_rect(r)) { RAISEPY(gctx, MSG_BAD_RECT, PyExc_ValueError); } annot = pdf_create_annot(gctx, page, PDF_ANNOT_FREE_TEXT); pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); pdf_set_annot_contents(gctx, annot, text); pdf_set_annot_rect(gctx, annot, r); pdf_dict_put_int(gctx, annot_obj, PDF_NAME(Rotate), rotate); pdf_dict_put_int(gctx, annot_obj, PDF_NAME(Q), align); if (nfcol > 0) { pdf_set_annot_color(gctx, annot, nfcol, fcol); } // insert the default appearance string JM_make_annot_DA(gctx, annot, ntcol, tcol, fontname, fontsize); pdf_update_annot(gctx, annot); JM_add_annot_id(gctx, annot, "A"); } fz_catch(gctx) { return NULL; } return (struct Annot *) annot; } %pythoncode %{ @property def rotation_matrix(self) -> Matrix: """Reflects page rotation.""" return Matrix(TOOLS._rotate_matrix(self)) @property def derotation_matrix(self) -> Matrix: """Reflects page de-rotation.""" return Matrix(TOOLS._derotate_matrix(self)) def add_caret_annot(self, point: point_like) -> "struct Annot *": """Add a 'Caret' annotation.""" old_rotation = annot_preprocess(self) try: annot = self._add_caret_annot(point) finally: if old_rotation != 0: self.set_rotation(old_rotation) annot_postprocess(self, annot) return annot def add_strikeout_annot(self, quads=None, start=None, stop=None, clip=None) -> "struct Annot *": """Add a 'StrikeOut' annotation.""" if quads is None: q = get_highlight_selection(self, start=start, stop=stop, clip=clip) else: q = CheckMarkerArg(quads) return self._add_text_marker(q, PDF_ANNOT_STRIKE_OUT) def add_underline_annot(self, quads=None, start=None, stop=None, clip=None) -> "struct Annot *": """Add a 'Underline' annotation.""" if quads is None: q = get_highlight_selection(self, start=start, stop=stop, clip=clip) else: q = CheckMarkerArg(quads) return self._add_text_marker(q, PDF_ANNOT_UNDERLINE) def add_squiggly_annot(self, quads=None, start=None, stop=None, clip=None) -> "struct Annot *": """Add a 'Squiggly' annotation.""" if quads is None: q = get_highlight_selection(self, start=start, stop=stop, clip=clip) else: q = CheckMarkerArg(quads) return self._add_text_marker(q, PDF_ANNOT_SQUIGGLY) def add_highlight_annot(self, quads=None, start=None, stop=None, clip=None) -> "struct Annot *": """Add a 'Highlight' annotation.""" if quads is None: q = get_highlight_selection(self, start=start, stop=stop, clip=clip) else: q = CheckMarkerArg(quads) return self._add_text_marker(q, PDF_ANNOT_HIGHLIGHT) def add_rect_annot(self, rect: rect_like) -> "struct Annot *": """Add a 'Square' (rectangle) annotation.""" old_rotation = annot_preprocess(self) try: annot = self._add_square_or_circle(rect, PDF_ANNOT_SQUARE) finally: if old_rotation != 0: self.set_rotation(old_rotation) annot_postprocess(self, annot) return annot def add_circle_annot(self, rect: rect_like) -> "struct Annot *": """Add a 'Circle' (ellipse, oval) annotation.""" old_rotation = annot_preprocess(self) try: annot = self._add_square_or_circle(rect, PDF_ANNOT_CIRCLE) finally: if old_rotation != 0: self.set_rotation(old_rotation) annot_postprocess(self, annot) return annot def add_text_annot(self, point: point_like, text: str, icon: str ="Note") -> "struct Annot *": """Add a 'Text' (sticky note) annotation.""" old_rotation = annot_preprocess(self) try: annot = self._add_text_annot(point, text, icon=icon) finally: if old_rotation != 0: self.set_rotation(old_rotation) annot_postprocess(self, annot) return annot def add_line_annot(self, p1: point_like, p2: point_like) -> "struct Annot *": """Add a 'Line' annotation.""" old_rotation = annot_preprocess(self) try: annot = self._add_line_annot(p1, p2) finally: if old_rotation != 0: self.set_rotation(old_rotation) annot_postprocess(self, annot) return annot def add_polyline_annot(self, points: list) -> "struct Annot *": """Add a 'PolyLine' annotation.""" old_rotation = annot_preprocess(self) try: annot = self._add_multiline(points, PDF_ANNOT_POLY_LINE) finally: if old_rotation != 0: self.set_rotation(old_rotation) annot_postprocess(self, annot) return annot def add_polygon_annot(self, points: list) -> "struct Annot *": """Add a 'Polygon' annotation.""" old_rotation = annot_preprocess(self) try: annot = self._add_multiline(points, PDF_ANNOT_POLYGON) finally: if old_rotation != 0: self.set_rotation(old_rotation) annot_postprocess(self, annot) return annot def add_stamp_annot(self, rect: rect_like, stamp: int =0) -> "struct Annot *": """Add a ('rubber') 'Stamp' annotation.""" old_rotation = annot_preprocess(self) try: annot = self._add_stamp_annot(rect, stamp) finally: if old_rotation != 0: self.set_rotation(old_rotation) annot_postprocess(self, annot) return annot def add_ink_annot(self, handwriting: list) -> "struct Annot *": """Add a 'Ink' ('handwriting') annotation. The argument must be a list of lists of point_likes. """ old_rotation = annot_preprocess(self) try: annot = self._add_ink_annot(handwriting) finally: if old_rotation != 0: self.set_rotation(old_rotation) annot_postprocess(self, annot) return annot def add_file_annot(self, point: point_like, buffer: ByteString, filename: str, ufilename: OptStr =None, desc: OptStr =None, icon: OptStr =None) -> "struct Annot *": """Add a 'FileAttachment' annotation.""" old_rotation = annot_preprocess(self) try: annot = self._add_file_annot(point, buffer, filename, ufilename=ufilename, desc=desc, icon=icon) finally: if old_rotation != 0: self.set_rotation(old_rotation) annot_postprocess(self, annot) return annot def add_freetext_annot(self, rect: rect_like, text: str, fontsize: float =11, fontname: OptStr =None, border_color: OptSeq =None, text_color: OptSeq =None, fill_color: OptSeq =None, align: int =0, rotate: int =0) -> "struct Annot *": """Add a 'FreeText' annotation.""" old_rotation = annot_preprocess(self) try: annot = self._add_freetext_annot(rect, text, fontsize=fontsize, fontname=fontname, border_color=border_color,text_color=text_color, fill_color=fill_color, align=align, rotate=rotate) finally: if old_rotation != 0: self.set_rotation(old_rotation) annot_postprocess(self, annot) return annot def add_redact_annot(self, quad, text: OptStr =None, fontname: OptStr =None, fontsize: float =11, align: int =0, fill: OptSeq =None, text_color: OptSeq =None, cross_out: bool =True) -> "struct Annot *": """Add a 'Redact' annotation.""" da_str = None if text: CheckColor(fill) CheckColor(text_color) if not fontname: fontname = "Helv" if not fontsize: fontsize = 11 if not text_color: text_color = (0, 0, 0) if hasattr(text_color, "__float__"): text_color = (text_color, text_color, text_color) if len(text_color) > 3: text_color = text_color[:3] fmt = "{:g} {:g} {:g} rg /{f:s} {s:g} Tf" da_str = fmt.format(*text_color, f=fontname, s=fontsize) if fill is None: fill = (1, 1, 1) if fill: if hasattr(fill, "__float__"): fill = (fill, fill, fill) if len(fill) > 3: fill = fill[:3] old_rotation = annot_preprocess(self) try: annot = self._add_redact_annot(quad, text=text, da_str=da_str, align=align, fill=fill) finally: if old_rotation != 0: self.set_rotation(old_rotation) annot_postprocess(self, annot) #------------------------------------------------------------- # change appearance to show a crossed-out rectangle #------------------------------------------------------------- if cross_out: ap_tab = annot._getAP().splitlines()[:-1] # get the 4 commands only _, LL, LR, UR, UL = ap_tab ap_tab.append(LR) ap_tab.append(LL) ap_tab.append(UR) ap_tab.append(LL) ap_tab.append(UL) ap_tab.append(b"S") ap = b"\n".join(ap_tab) annot._setAP(ap, 0) return annot %} //---------------------------------------------------------------- // page load annot by name or xref //---------------------------------------------------------------- FITZEXCEPTION(_load_annot, !result) struct Annot * _load_annot(char *name, int xref) { pdf_annot *annot = NULL; pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); fz_try(gctx) { ASSERT_PDF(page); if (xref == 0) annot = JM_get_annot_by_name(gctx, page, name); else annot = JM_get_annot_by_xref(gctx, page, xref); } fz_catch(gctx) { return NULL; } return (struct Annot *) annot; } //---------------------------------------------------------------- // page load widget by xref //---------------------------------------------------------------- FITZEXCEPTION(load_widget, !result) %pythonprepend load_widget %{ """Load a widget by its xref.""" CheckParent(self) %} %pythonappend load_widget %{ if not val: return val val.thisown = True val.parent = weakref.proxy(self) self._annot_refs[id(val)] = val widget = Widget() TOOLS._fill_widget(val, widget) val = widget %} struct Annot * load_widget(int xref) { pdf_annot *annot = NULL; pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); fz_try(gctx) { ASSERT_PDF(page); annot = JM_get_widget_by_xref(gctx, page, xref); } fz_catch(gctx) { return NULL; } return (struct Annot *) annot; } //---------------------------------------------------------------- // page list Resource/Properties //---------------------------------------------------------------- FITZEXCEPTION(_get_resource_properties, !result) PyObject * _get_resource_properties() { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); PyObject *rc; fz_try(gctx) { ASSERT_PDF(page); rc = JM_get_resource_properties(gctx, page->obj); } fz_catch(gctx) { return NULL; } return rc; } //---------------------------------------------------------------- // page list Resource/Properties //---------------------------------------------------------------- FITZEXCEPTION(_set_resource_property, !result) PyObject * _set_resource_property(char *name, int xref) { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); fz_try(gctx) { ASSERT_PDF(page); JM_set_resource_property(gctx, page->obj, name, xref); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } %pythoncode %{ def _get_optional_content(self, oc: OptInt) -> OptStr: if oc == None or oc == 0: return None doc = self.parent check = doc.xref_object(oc, compressed=True) if not ("/Type/OCG" in check or "/Type/OCMD" in check): raise ValueError("bad optional content: 'oc'") props = {} for p, x in self._get_resource_properties(): props[x] = p if oc in props.keys(): return props[oc] i = 0 mc = "MC%i" % i while mc in props.values(): i += 1 mc = "MC%i" % i self._set_resource_property(mc, oc) return mc def get_oc_items(self) -> list: """Get OCGs and OCMDs used in the page's contents. Returns: List of items (name, xref, type), where type is one of "ocg" / "ocmd", and name is the property name. """ rc = [] for pname, xref in self._get_resource_properties(): text = self.parent.xref_object(xref, compressed=True) if "/Type/OCG" in text: octype = "ocg" elif "/Type/OCMD" in text: octype = "ocmd" else: continue rc.append((pname, xref, octype)) return rc %} //---------------------------------------------------------------- // page get list of annot names //---------------------------------------------------------------- PARENTCHECK(annot_names, """List of names of annotations, fields and links.""") PyObject *annot_names() { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); if (!page) { PyObject *rc = PyList_New(0); return rc; } return JM_get_annot_id_list(gctx, page); } //---------------------------------------------------------------- // page retrieve list of annotation xrefs //---------------------------------------------------------------- PARENTCHECK(annot_xrefs,"""List of xref numbers of annotations, fields and links.""") PyObject *annot_xrefs() { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); if (!page) { PyObject *rc = PyList_New(0); return rc; } return JM_get_annot_xref_list(gctx, page->obj); } %pythoncode %{ def load_annot(self, ident: typing.Union[str, int]) -> "struct Annot *": """Load an annot by name (/NM key) or xref. Args: ident: identifier, either name (str) or xref (int). """ CheckParent(self) if type(ident) is str: xref = 0 name = ident elif type(ident) is int: xref = ident name = None else: raise ValueError("identifier must be string or integer") val = self._load_annot(name, xref) if not val: return val val.thisown = True val.parent = weakref.proxy(self) self._annot_refs[id(val)] = val return val #--------------------------------------------------------------------- # page addWidget #--------------------------------------------------------------------- def add_widget(self, widget: Widget) -> "struct Annot *": """Add a 'Widget' (form field).""" CheckParent(self) doc = self.parent if not doc.is_pdf: raise ValueError("is no PDF") widget._validate() annot = self._addWidget(widget.field_type, widget.field_name) if not annot: return None annot.thisown = True annot.parent = weakref.proxy(self) # owning page object self._annot_refs[id(annot)] = annot widget.parent = annot.parent widget._annot = annot widget.update() return annot %} FITZEXCEPTION(_addWidget, !result) struct Annot *_addWidget(int field_type, char *field_name) { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); pdf_document *pdf = page->doc; pdf_annot *annot = NULL; fz_var(annot); fz_try(gctx) { annot = JM_create_widget(gctx, pdf, page, field_type, field_name); if (!annot) { RAISEPY(gctx, "cannot create widget", PyExc_RuntimeError); } JM_add_annot_id(gctx, annot, "W"); } fz_catch(gctx) { return NULL; } return (struct Annot *) annot; } //---------------------------------------------------------------- // Page.get_displaylist //---------------------------------------------------------------- FITZEXCEPTION(get_displaylist, !result) %pythonprepend get_displaylist %{ """Make a DisplayList from the page for Pixmap generation. Include (default) or exclude annotations.""" CheckParent(self) %} %pythonappend get_displaylist %{val.thisown = True%} struct DisplayList *get_displaylist(int annots=1) { fz_display_list *dl = NULL; fz_try(gctx) { if (annots) { dl = fz_new_display_list_from_page(gctx, (fz_page *) $self); } else { dl = fz_new_display_list_from_page_contents(gctx, (fz_page *) $self); } } fz_catch(gctx) { return NULL; } return (struct DisplayList *) dl; } //---------------------------------------------------------------- // Page.get_drawings //---------------------------------------------------------------- %pythoncode %{ def get_drawings(self, extended: bool = False) -> list: """Retrieve vector graphics. The extended version includes clips. Note: For greater comfort, this method converts point-like, rect-like, quad-like tuples of the C version to respective Point / Rect / Quad objects. It also adds default items that are missing in original path types. """ allkeys = ( "closePath", "fill", "color", "width", "lineCap", "lineJoin", "dashes", "stroke_opacity", "fill_opacity", "even_odd", ) val = self.get_cdrawings(extended=extended) for i in range(len(val)): npath = val[i] if not npath["type"].startswith("clip"): npath["rect"] = Rect(npath["rect"]) else: npath["scissor"] = Rect(npath["scissor"]) if npath["type"]!="group": items = npath["items"] newitems = [] for item in items: cmd = item[0] rest = item[1:] if cmd == "re": item = ("re", Rect(rest[0]).normalize(), rest[1]) elif cmd == "qu": item = ("qu", Quad(rest[0])) else: item = tuple([cmd] + [Point(i) for i in rest]) newitems.append(item) npath["items"] = newitems if npath["type"] in ("f", "s"): for k in allkeys: npath[k] = npath.get(k) val[i] = npath return val class Drawpath(object): """Reflects a path dictionary from get_cdrawings().""" def __init__(self, **args): self.__dict__.update(args) class Drawpathlist(object): """List of Path objects representing get_cdrawings() output.""" def __init__(self): self.paths = [] self.path_count = 0 self.group_count = 0 self.clip_count = 0 self.fill_count = 0 self.stroke_count = 0 self.fillstroke_count = 0 def append(self, path): self.paths.append(path) self.path_count += 1 if path.type == "clip": self.clip_count += 1 elif path.type == "group": self.group_count += 1 elif path.type == "f": self.fill_count += 1 elif path.type == "s": self.stroke_count += 1 elif path.type == "fs": self.fillstroke_count += 1 def clip_parents(self, i): """Return list of parent clip paths. Args: i: (int) return parents of this path. Returns: List of the clip parents.""" if i >= self.path_count: raise IndexError("bad path index") while i < 0: i += self.path_count lvl = self.paths[i].level clips = list( # clip paths before identified one reversed( [ p for p in self.paths[:i] if p.type == "clip" and p.level < lvl ] ) ) if clips == []: # none found: empty list return [] nclips = [clips[0]] # init return list for p in clips[1:]: if p.level >= nclips[-1].level: continue # only accept smaller clip levels nclips.append(p) return nclips def group_parents(self, i): """Return list of parent group paths. Args: i: (int) return parents of this path. Returns: List of the group parents.""" if i >= self.path_count: raise IndexError("bad path index") while i < 0: i += self.path_count lvl = self.paths[i].level groups = list( # group paths before identified one reversed( [ p for p in self.paths[:i] if p.type == "group" and p.level < lvl ] ) ) if groups == []: # none found: empty list return [] ngroups = [groups[0]] # init return list for p in groups[1:]: if p.level >= ngroups[-1].level: continue # only accept smaller group levels ngroups.append(p) return ngroups def __getitem__(self, item): return self.paths.__getitem__(item) def __len__(self): return self.paths.__len__() def get_lineart(self) -> object: """Get page drawings paths. Note: For greater comfort, this method converts point-like, rect-like, quad-like tuples of the C version to respective Point / Rect / Quad objects. Also adds default items that are missing in original path types. In contrast to get_drawings(), this output is an object. """ val = self.get_cdrawings(extended=True) paths = self.Drawpathlist() for path in val: npath = self.Drawpath(**path) if npath.type != "clip": npath.rect = Rect(path["rect"]) else: npath.scissor = Rect(path["scissor"]) if npath.type != "group": items = path["items"] newitems = [] for item in items: cmd = item[0] rest = item[1:] if cmd == "re": item = ("re", Rect(rest[0]).normalize(), rest[1]) elif cmd == "qu": item = ("qu", Quad(rest[0])) else: item = tuple([cmd] + [Point(i) for i in rest]) newitems.append(item) npath.items = newitems if npath.type == "f": npath.stroke_opacity = None npath.dashes = None npath.lineJoin = None npath.lineCap = None npath.color = None npath.width = None paths.append(npath) val = None return paths %} FITZEXCEPTION(get_cdrawings, !result) %pythonprepend get_cdrawings %{ """Extract vector graphics ("line art") from the page.""" CheckParent(self) old_rotation = self.rotation if old_rotation != 0: self.set_rotation(0) %} %pythonappend get_cdrawings %{ if old_rotation != 0: self.set_rotation(old_rotation) %} PyObject * get_cdrawings(PyObject *extended=NULL, PyObject *callback=NULL, PyObject *method=NULL) { fz_page *page = (fz_page *) $self; fz_device *dev = NULL; PyObject *rc = NULL; int clips = PyObject_IsTrue(extended); fz_var(rc); fz_try(gctx) { fz_rect prect = fz_bound_page(gctx, page); trace_device_ptm = fz_make_matrix(1, 0, 0, -1, 0, prect.y1); if (PyCallable_Check(callback) || method != Py_None) { dev = JM_new_lineart_device(gctx, callback, clips, method); } else { rc = PyList_New(0); dev = JM_new_lineart_device(gctx, rc, clips, method); } fz_run_page(gctx, page, dev, fz_identity, NULL); fz_close_device(gctx, dev); } fz_always(gctx) { fz_drop_device(gctx, dev); } fz_catch(gctx) { Py_CLEAR(rc); return NULL; } if (PyCallable_Check(callback) || method != Py_None) { Py_RETURN_NONE; } return rc; } FITZEXCEPTION(get_bboxlog, !result) %pythonprepend get_bboxlog %{ CheckParent(self) old_rotation = self.rotation if old_rotation != 0: self.set_rotation(0) %} %pythonappend get_bboxlog %{ if old_rotation != 0: self.set_rotation(old_rotation) %} PyObject * get_bboxlog(PyObject *layers=NULL) { fz_page *page = (fz_page *) $self; fz_device *dev = NULL; PyObject *rc = PyList_New(0); int inc_layers = PyObject_IsTrue(layers); fz_try(gctx) { dev = JM_new_bbox_device(gctx, rc, inc_layers); fz_run_page(gctx, page, dev, fz_identity, NULL); fz_close_device(gctx, dev); } fz_always(gctx) { fz_drop_device(gctx, dev); } fz_catch(gctx) { Py_CLEAR(rc); return NULL; } return rc; } FITZEXCEPTION(get_texttrace, !result) %pythonprepend get_texttrace %{ CheckParent(self) old_rotation = self.rotation if old_rotation != 0: self.set_rotation(0) %} %pythonappend get_texttrace %{ if old_rotation != 0: self.set_rotation(old_rotation) %} PyObject * get_texttrace() { fz_page *page = (fz_page *) $self; fz_device *dev = NULL; PyObject *rc = PyList_New(0); fz_try(gctx) { dev = JM_new_texttrace_device(gctx, rc); fz_rect prect = fz_bound_page(gctx, page); trace_device_rot = fz_identity; trace_device_ptm = fz_make_matrix(1, 0, 0, -1, 0, prect.y1); fz_run_page(gctx, page, dev, fz_identity, NULL); fz_close_device(gctx, dev); } fz_always(gctx) { fz_drop_device(gctx, dev); } fz_catch(gctx) { Py_CLEAR(rc); return NULL; } return rc; } //---------------------------------------------------------------- // Page apply redactions //---------------------------------------------------------------- FITZEXCEPTION(_apply_redactions, !result) PyObject *_apply_redactions(int images=PDF_REDACT_IMAGE_PIXELS) { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); int success = 0; pdf_redact_options opts = {0}; opts.black_boxes = 0; // no black boxes opts.image_method = images; // how to treat images fz_try(gctx) { ASSERT_PDF(page); success = pdf_redact_page(gctx, page->doc, page, &opts); } fz_catch(gctx) { return NULL; } return JM_BOOL(success); } //---------------------------------------------------------------- // Page._makePixmap //---------------------------------------------------------------- FITZEXCEPTION(_makePixmap, !result) struct Pixmap * _makePixmap(struct Document *doc, PyObject *ctm, struct Colorspace *cs, int alpha=0, int annots=1, PyObject *clip=NULL) { fz_pixmap *pix = NULL; fz_try(gctx) { pix = JM_pixmap_from_page(gctx, (fz_document *) doc, (fz_page *) $self, ctm, (fz_colorspace *) cs, alpha, annots, clip); } fz_catch(gctx) { return NULL; } return (struct Pixmap *) pix; } //---------------------------------------------------------------- // Page.set_mediabox //---------------------------------------------------------------- FITZEXCEPTION(set_mediabox, !result) PARENTCHECK(set_mediabox, """Set the MediaBox.""") PyObject *set_mediabox(PyObject *rect) { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); fz_try(gctx) { ASSERT_PDF(page); fz_rect mediabox = JM_rect_from_py(rect); if (fz_is_empty_rect(mediabox) || fz_is_infinite_rect(mediabox)) { RAISEPY(gctx, MSG_BAD_RECT, PyExc_ValueError); } pdf_dict_put_rect(gctx, page->obj, PDF_NAME(MediaBox), mediabox); pdf_dict_del(gctx, page->obj, PDF_NAME(CropBox)); pdf_dict_del(gctx, page->obj, PDF_NAME(ArtBox)); pdf_dict_del(gctx, page->obj, PDF_NAME(BleedBox)); pdf_dict_del(gctx, page->obj, PDF_NAME(TrimBox)); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //---------------------------------------------------------------- // Page.load_links() //---------------------------------------------------------------- PARENTCHECK(load_links, """Get first Link.""") %pythonappend load_links %{ if val: val.thisown = True val.parent = weakref.proxy(self) # owning page object self._annot_refs[id(val)] = val if self.parent.is_pdf: link_id = [x for x in self.annot_xrefs() if x[1] == PDF_ANNOT_LINK][0] val.xref = link_id[0] val.id = link_id[2] else: val.xref = 0 val.id = "" %} struct Link *load_links() { fz_link *l = NULL; fz_try(gctx) { l = fz_load_links(gctx, (fz_page *) $self); } fz_catch(gctx) { return NULL; } return (struct Link *) l; } %pythoncode %{first_link = property(load_links, doc="First link on page")%} //---------------------------------------------------------------- // Page.first_annot //---------------------------------------------------------------- PARENTCHECK(first_annot, """First annotation.""") %pythonappend first_annot %{ if val: val.thisown = True val.parent = weakref.proxy(self) # owning page object self._annot_refs[id(val)] = val %} %pythoncode %{@property%} struct Annot *first_annot() { pdf_annot *annot = NULL; pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); if (page) { annot = pdf_first_annot(gctx, page); if (annot) pdf_keep_annot(gctx, annot); } return (struct Annot *) annot; } //---------------------------------------------------------------- // first_widget //---------------------------------------------------------------- %pythoncode %{@property%} PARENTCHECK(first_widget, """First widget/field.""") %pythonappend first_widget %{ if val: val.thisown = True val.parent = weakref.proxy(self) # owning page object self._annot_refs[id(val)] = val widget = Widget() TOOLS._fill_widget(val, widget) val = widget %} struct Annot *first_widget() { pdf_annot *annot = NULL; pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); if (page) { annot = pdf_first_widget(gctx, page); if (annot) pdf_keep_annot(gctx, annot); } return (struct Annot *) annot; } //---------------------------------------------------------------- // Page.delete_link() - delete link //---------------------------------------------------------------- PARENTCHECK(delete_link, """Delete a Link.""") %pythonappend delete_link %{ if linkdict["xref"] == 0: return try: linkid = linkdict["id"] linkobj = self._annot_refs[linkid] linkobj._erase() except: pass %} void delete_link(PyObject *linkdict) { if (!PyDict_Check(linkdict)) return; // have no dictionary fz_try(gctx) { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); if (!page) goto finished; // have no PDF int xref = (int) PyInt_AsLong(PyDict_GetItem(linkdict, dictkey_xref)); if (xref < 1) goto finished; // invalid xref pdf_obj *annots = pdf_dict_get(gctx, page->obj, PDF_NAME(Annots)); if (!annots) goto finished; // have no annotations int len = pdf_array_len(gctx, annots); if (len == 0) goto finished; int i, oxref = 0; for (i = 0; i < len; i++) { oxref = pdf_to_num(gctx, pdf_array_get(gctx, annots, i)); if (xref == oxref) break; // found xref in annotations } if (xref != oxref) goto finished; // xref not in annotations pdf_array_delete(gctx, annots, i); // delete entry in annotations pdf_delete_object(gctx, page->doc, xref); // delete link obj pdf_dict_put(gctx, page->obj, PDF_NAME(Annots), annots); JM_refresh_links(gctx, page); finished:; } fz_catch(gctx) {;} } //---------------------------------------------------------------- // Page.delete_annot() - delete annotation and return the next one //---------------------------------------------------------------- %pythonprepend delete_annot %{ """Delete annot and return next one.""" CheckParent(self) CheckParent(annot)%} %pythonappend delete_annot %{ if val: val.thisown = True val.parent = weakref.proxy(self) # owning page object val.parent._annot_refs[id(val)] = val %} struct Annot *delete_annot(struct Annot *annot) { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); pdf_annot *irt_annot = NULL; while (1) { // first loop through all /IRT annots and remove them irt_annot = JM_find_annot_irt(gctx, (pdf_annot *) annot); if (!irt_annot) // no more there break; pdf_delete_annot(gctx, page, irt_annot); } pdf_annot *nextannot = pdf_next_annot(gctx, (pdf_annot *) annot); // store next pdf_delete_annot(gctx, page, (pdf_annot *) annot); if (nextannot) { nextannot = pdf_keep_annot(gctx, nextannot); } return (struct Annot *) nextannot; } //---------------------------------------------------------------- // mediabox: get the /MediaBox (PDF only) //---------------------------------------------------------------- %pythoncode %{@property%} PARENTCHECK(mediabox, """The MediaBox.""") %pythonappend mediabox %{val = Rect(JM_TUPLE3(val))%} PyObject *mediabox() { fz_rect rect = fz_infinite_rect; fz_try(gctx) { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); if (!page) { rect = fz_bound_page(gctx, (fz_page *) $self); } else { rect = JM_mediabox(gctx, page->obj); } } fz_catch(gctx) {;} return JM_py_from_rect(rect); } //---------------------------------------------------------------- // cropbox: get the /CropBox (PDF only) //---------------------------------------------------------------- %pythoncode %{@property%} PARENTCHECK(cropbox, """The CropBox.""") %pythonappend cropbox %{val = Rect(JM_TUPLE3(val))%} PyObject *cropbox() { fz_rect rect = fz_infinite_rect; fz_try(gctx) { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); if (!page) { rect = fz_bound_page(gctx, (fz_page *) $self); } else { rect = JM_cropbox(gctx, page->obj); } } fz_catch(gctx) {;} return JM_py_from_rect(rect); } PyObject *_other_box(const char *boxtype) { fz_rect rect = fz_infinite_rect; fz_try(gctx) { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); if (page) { pdf_obj *obj = pdf_dict_gets(gctx, page->obj, boxtype); if (pdf_is_array(gctx, obj)) { rect = pdf_to_rect(gctx, obj); } } } fz_catch(gctx) {;} if (fz_is_infinite_rect(rect)) { Py_RETURN_NONE; } return JM_py_from_rect(rect); } //---------------------------------------------------------------- // CropBox position: x0, y0 of /CropBox //---------------------------------------------------------------- %pythoncode %{ @property def cropbox_position(self): return self.cropbox.tl @property def artbox(self): """The ArtBox""" rect = self._other_box("ArtBox") if rect == None: return self.cropbox mb = self.mediabox return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1]) @property def trimbox(self): """The TrimBox""" rect = self._other_box("TrimBox") if rect == None: return self.cropbox mb = self.mediabox return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1]) @property def bleedbox(self): """The BleedBox""" rect = self._other_box("BleedBox") if rect == None: return self.cropbox mb = self.mediabox return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1]) def _set_pagebox(self, boxtype, rect): doc = self.parent if doc == None: raise ValueError("orphaned object: parent is None") if not doc.is_pdf: raise ValueError("is no PDF") valid_boxes = ("CropBox", "BleedBox", "TrimBox", "ArtBox") if boxtype not in valid_boxes: raise ValueError("bad boxtype") rect = Rect(rect) mb = self.mediabox rect = Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1]) if not (mb.x0 <= rect.x0 < rect.x1 <= mb.x1 and mb.y0 <= rect.y0 < rect.y1 <= mb.y1): raise ValueError(f"{boxtype} not in MediaBox") doc.xref_set_key(self.xref, boxtype, "[%g %g %g %g]" % tuple(rect)) def set_cropbox(self, rect): """Set the CropBox. Will also change Page.rect.""" return self._set_pagebox("CropBox", rect) def set_artbox(self, rect): """Set the ArtBox.""" return self._set_pagebox("ArtBox", rect) def set_bleedbox(self, rect): """Set the BleedBox.""" return self._set_pagebox("BleedBox", rect) def set_trimbox(self, rect): """Set the TrimBox.""" return self._set_pagebox("TrimBox", rect) %} //---------------------------------------------------------------- // rotation - return page rotation //---------------------------------------------------------------- PARENTCHECK(rotation, """Page rotation.""") %pythoncode %{@property%} int rotation() { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); if (!page) return 0; return JM_page_rotation(gctx, page); } /*********************************************************************/ // set_rotation() - set page rotation /*********************************************************************/ FITZEXCEPTION(set_rotation, !result) PARENTCHECK(set_rotation, """Set page rotation.""") PyObject *set_rotation(int rotation) { fz_try(gctx) { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); ASSERT_PDF(page); int rot = JM_norm_rotation(rotation); pdf_dict_put_int(gctx, page->obj, PDF_NAME(Rotate), (int64_t) rot); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } /*********************************************************************/ // Page._addAnnot_FromString // Add new links provided as an array of string object definitions. /*********************************************************************/ FITZEXCEPTION(_addAnnot_FromString, !result) PARENTCHECK(_addAnnot_FromString, """Add links from list of object sources.""") PyObject *_addAnnot_FromString(PyObject *linklist) { pdf_obj *annots, *annot, *ind_obj; pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); PyObject *txtpy = NULL; char *text = NULL; Py_ssize_t lcount = PyTuple_Size(linklist); // link count if (lcount < 1) Py_RETURN_NONE; Py_ssize_t i = -1; fz_var(text); // insert links from the provided sources fz_try(gctx) { ASSERT_PDF(page); if (!PyTuple_Check(linklist)) { RAISEPY(gctx, "bad 'linklist' argument", PyExc_ValueError); } if (!pdf_dict_get(gctx, page->obj, PDF_NAME(Annots))) { pdf_dict_put_array(gctx, page->obj, PDF_NAME(Annots), lcount); } annots = pdf_dict_get(gctx, page->obj, PDF_NAME(Annots)); for (i = 0; i < lcount; i++) { fz_try(gctx) { for (; i < lcount; i++) { text = JM_StrAsChar(PyTuple_GET_ITEM(linklist, i)); if (!text) { PySys_WriteStderr("skipping bad link / annot item %zi.\n", i); continue; } annot = pdf_add_object_drop(gctx, page->doc, JM_pdf_obj_from_str(gctx, page->doc, text)); ind_obj = pdf_new_indirect(gctx, page->doc, pdf_to_num(gctx, annot), 0); pdf_array_push_drop(gctx, annots, ind_obj); pdf_drop_obj(gctx, annot); } } fz_catch(gctx) { PySys_WriteStderr("skipping bad link / annot item %zi.\n", i); } } } fz_catch(gctx) { PyErr_Clear(); return NULL; } Py_RETURN_NONE; } //---------------------------------------------------------------- // Page clean contents stream //---------------------------------------------------------------- FITZEXCEPTION(clean_contents, !result) %pythonprepend clean_contents %{"""Clean page /Contents into one object.""" CheckParent(self) if not sanitize and not self.is_wrapped: self.wrap_contents()%} PyObject *clean_contents(int sanitize=1) { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); if (!page) { Py_RETURN_NONE; } #if FZ_VERSION_MAJOR == 1 && FZ_VERSION_MINOR >= 22 pdf_filter_factory list[2] = { 0 }; pdf_sanitize_filter_options sopts = { 0 }; pdf_filter_options filter = { 1, // recurse: true 0, // instance forms 0, // do not ascii-escape binary data 0, // no_update NULL, // end_page_opaque NULL, // end page list, // filters }; if (sanitize) { list[0].filter = pdf_new_sanitize_filter; list[0].options = &sopts; } #else pdf_filter_options filter = { NULL, // opaque NULL, // image filter NULL, // text filter NULL, // after text NULL, // end page 1, // recurse: true 1, // instance forms 1, // sanitize plus filtering 0 // do not ascii-escape binary data }; filter.sanitize = sanitize; #endif fz_try(gctx) { pdf_filter_page_contents(gctx, page->doc, page, &filter); } fz_catch(gctx) { Py_RETURN_NONE; } Py_RETURN_NONE; } //---------------------------------------------------------------- // Show a PDF page //---------------------------------------------------------------- FITZEXCEPTION(_show_pdf_page, !result) PyObject *_show_pdf_page(struct Page *fz_srcpage, int overlay=1, PyObject *matrix=NULL, int xref=0, int oc=0, PyObject *clip = NULL, struct Graftmap *graftmap = NULL, char *_imgname = NULL) { pdf_obj *xobj1=NULL, *xobj2=NULL, *resources; fz_buffer *res=NULL, *nres=NULL; fz_rect cropbox = JM_rect_from_py(clip); fz_matrix mat = JM_matrix_from_py(matrix); int rc_xref = xref; fz_var(xobj1); fz_var(xobj2); fz_try(gctx) { pdf_page *tpage = pdf_page_from_fz_page(gctx, (fz_page *) $self); pdf_obj *tpageref = tpage->obj; pdf_document *pdfout = tpage->doc; // target PDF ENSURE_OPERATION(gctx, pdfout); //------------------------------------------------------------- // convert the source page to a Form XObject //------------------------------------------------------------- xobj1 = JM_xobject_from_page(gctx, pdfout, (fz_page *) fz_srcpage, xref, (pdf_graft_map *) graftmap); if (!rc_xref) rc_xref = pdf_to_num(gctx, xobj1); //------------------------------------------------------------- // create referencing XObject (controls display on target page) //------------------------------------------------------------- // fill reference to xobj1 into the /Resources //------------------------------------------------------------- pdf_obj *subres1 = pdf_new_dict(gctx, pdfout, 5); pdf_dict_puts(gctx, subres1, "fullpage", xobj1); pdf_obj *subres = pdf_new_dict(gctx, pdfout, 5); pdf_dict_put_drop(gctx, subres, PDF_NAME(XObject), subres1); res = fz_new_buffer(gctx, 20); fz_append_string(gctx, res, "/fullpage Do"); xobj2 = pdf_new_xobject(gctx, pdfout, cropbox, mat, subres, res); if (oc > 0) { JM_add_oc_object(gctx, pdfout, pdf_resolve_indirect(gctx, xobj2), oc); } pdf_drop_obj(gctx, subres); fz_drop_buffer(gctx, res); //------------------------------------------------------------- // update target page with xobj2: //------------------------------------------------------------- // 1. insert Xobject in Resources //------------------------------------------------------------- resources = pdf_dict_get_inheritable(gctx, tpageref, PDF_NAME(Resources)); subres = pdf_dict_get(gctx, resources, PDF_NAME(XObject)); if (!subres) { subres = pdf_dict_put_dict(gctx, resources, PDF_NAME(XObject), 5); } pdf_dict_puts(gctx, subres, _imgname, xobj2); //------------------------------------------------------------- // 2. make and insert new Contents object //------------------------------------------------------------- nres = fz_new_buffer(gctx, 50); // buffer for Do-command fz_append_string(gctx, nres, " q /"); // Do-command fz_append_string(gctx, nres, _imgname); fz_append_string(gctx, nres, " Do Q "); JM_insert_contents(gctx, pdfout, tpageref, nres, overlay); fz_drop_buffer(gctx, nres); } fz_always(gctx) { pdf_drop_obj(gctx, xobj1); pdf_drop_obj(gctx, xobj2); } fz_catch(gctx) { return NULL; } return Py_BuildValue("i", rc_xref); } //---------------------------------------------------------------- // insert an image //---------------------------------------------------------------- FITZEXCEPTION(_insert_image, !result) PyObject * _insert_image(char *filename=NULL, struct Pixmap *pixmap=NULL, PyObject *stream=NULL, PyObject *imask=NULL, PyObject *clip=NULL, int overlay=1, int rotate=0, int keep_proportion=1, int oc=0, int width=0, int height=0, int xref=0, int alpha=-1, const char *_imgname=NULL, PyObject *digests=NULL) { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); pdf_document *pdf = page->doc; float w = width, h = height; fz_pixmap *pm = NULL; fz_pixmap *pix = NULL; fz_image *mask = NULL, *zimg = NULL, *image = NULL, *freethis = NULL; pdf_obj *resources, *xobject, *ref; fz_buffer *nres = NULL, *imgbuf = NULL, *maskbuf = NULL; fz_compressed_buffer *cbuf1 = NULL; int xres, yres, bpc, img_xref = xref, rc_digest = 0; unsigned char digest[16]; PyObject *md5_py = NULL, *temp; const char *template = "\nq\n%g %g %g %g %g %g cm\n/%s Do\nQ\n"; fz_try(gctx) { if (xref > 0) { ref = pdf_new_indirect(gctx, pdf, xref, 0); w = pdf_to_int(gctx, pdf_dict_geta(gctx, ref, PDF_NAME(Width), PDF_NAME(W))); h = pdf_to_int(gctx, pdf_dict_geta(gctx, ref, PDF_NAME(Height), PDF_NAME(H))); if ((w + h) == 0) { RAISEPY(gctx, MSG_IS_NO_IMAGE, PyExc_ValueError); } goto have_xref; } if (EXISTS(stream)) { imgbuf = JM_BufferFromBytes(gctx, stream); goto have_stream; } if (filename) { imgbuf = fz_read_file(gctx, filename); goto have_stream; } // process pixmap --------------------------------- fz_pixmap *arg_pix = (fz_pixmap *) pixmap; w = arg_pix->w; h = arg_pix->h; fz_md5_pixmap(gctx, arg_pix, digest); md5_py = PyBytes_FromStringAndSize(digest, 16); temp = PyDict_GetItem(digests, md5_py); if (temp) { img_xref = (int) PyLong_AsLong(temp); ref = pdf_new_indirect(gctx, page->doc, img_xref, 0); goto have_xref; } if (arg_pix->alpha == 0) { image = fz_new_image_from_pixmap(gctx, arg_pix, NULL); } else { pm = fz_convert_pixmap(gctx, arg_pix, NULL, NULL, NULL, fz_default_color_params, 1); pm->alpha = 0; pm->colorspace = NULL; mask = fz_new_image_from_pixmap(gctx, pm, NULL); image = fz_new_image_from_pixmap(gctx, arg_pix, mask); } goto have_image; // process stream --------------------------------- have_stream:; fz_md5 state; fz_md5_init(&state); fz_md5_update(&state, imgbuf->data, imgbuf->len); if (imask != Py_None) { maskbuf = JM_BufferFromBytes(gctx, imask); fz_md5_update(&state, maskbuf->data, maskbuf->len); } fz_md5_final(&state, digest); md5_py = PyBytes_FromStringAndSize(digest, 16); temp = PyDict_GetItem(digests, md5_py); if (temp) { img_xref = (int) PyLong_AsLong(temp); ref = pdf_new_indirect(gctx, page->doc, img_xref, 0); w = pdf_to_int(gctx, pdf_dict_geta(gctx, ref, PDF_NAME(Width), PDF_NAME(W))); h = pdf_to_int(gctx, pdf_dict_geta(gctx, ref, PDF_NAME(Height), PDF_NAME(H))); goto have_xref; } image = fz_new_image_from_buffer(gctx, imgbuf); w = image->w; h = image->h; if (imask == Py_None) { goto have_image; } cbuf1 = fz_compressed_image_buffer(gctx, image); if (!cbuf1) { RAISEPY(gctx, "uncompressed image cannot have mask", PyExc_ValueError); } bpc = image->bpc; fz_colorspace *colorspace = image->colorspace; fz_image_resolution(image, &xres, &yres); mask = fz_new_image_from_buffer(gctx, maskbuf); zimg = fz_new_image_from_compressed_buffer(gctx, w, h, bpc, colorspace, xres, yres, 1, 0, NULL, NULL, cbuf1, mask); freethis = image; image = zimg; zimg = NULL; goto have_image; have_image:; ref = pdf_add_image(gctx, pdf, image); if (oc) { JM_add_oc_object(gctx, pdf, ref, oc); } img_xref = pdf_to_num(gctx, ref); DICT_SETITEM_DROP(digests, md5_py, Py_BuildValue("i", img_xref)); rc_digest = 1; have_xref:; resources = pdf_dict_get_inheritable(gctx, page->obj, PDF_NAME(Resources)); if (!resources) { resources = pdf_dict_put_dict(gctx, page->obj, PDF_NAME(Resources), 2); } xobject = pdf_dict_get(gctx, resources, PDF_NAME(XObject)); if (!xobject) { xobject = pdf_dict_put_dict(gctx, resources, PDF_NAME(XObject), 2); } fz_matrix mat = calc_image_matrix(w, h, clip, rotate, keep_proportion); pdf_dict_puts_drop(gctx, xobject, _imgname, ref); nres = fz_new_buffer(gctx, 50); fz_append_printf(gctx, nres, template, mat.a, mat.b, mat.c, mat.d, mat.e, mat.f, _imgname); JM_insert_contents(gctx, pdf, page->obj, nres, overlay); } fz_always(gctx) { if (freethis) { fz_drop_image(gctx, freethis); } else { fz_drop_image(gctx, image); } fz_drop_image(gctx, mask); fz_drop_image(gctx, zimg); fz_drop_pixmap(gctx, pix); fz_drop_pixmap(gctx, pm); fz_drop_buffer(gctx, imgbuf); fz_drop_buffer(gctx, maskbuf); fz_drop_buffer(gctx, nres); } fz_catch(gctx) { return NULL; } if (rc_digest) { return Py_BuildValue("iO", img_xref, digests); } else { return Py_BuildValue("iO", img_xref, Py_None); } } //---------------------------------------------------------------- // Page.refresh() //---------------------------------------------------------------- %pythoncode %{ def refresh(self): doc = self.parent page = doc.reload_page(self) self = page %} //---------------------------------------------------------------- // insert font //---------------------------------------------------------------- %pythoncode %{ def insert_font(self, fontname="helv", fontfile=None, fontbuffer=None, set_simple=False, wmode=0, encoding=0): doc = self.parent if doc is None: raise ValueError("orphaned object: parent is None") idx = 0 if fontname.startswith("/"): fontname = fontname[1:] inv_chars = INVALID_NAME_CHARS.intersection(fontname) if inv_chars != set(): raise ValueError(f"bad fontname chars {inv_chars}") font = CheckFont(self, fontname) if font is not None: # font already in font list of page xref = font[0] # this is the xref if CheckFontInfo(doc, xref): # also in our document font list? return xref # yes: we are done # need to build the doc FontInfo entry - done via get_char_widths doc.get_char_widths(xref) return xref #-------------------------------------------------------------------------- # the font is not present for this page #-------------------------------------------------------------------------- bfname = Base14_fontdict.get(fontname.lower(), None) # BaseFont if Base-14 font serif = 0 CJK_number = -1 CJK_list_n = ["china-t", "china-s", "japan", "korea"] CJK_list_s = ["china-ts", "china-ss", "japan-s", "korea-s"] try: CJK_number = CJK_list_n.index(fontname) serif = 0 except: pass if CJK_number < 0: try: CJK_number = CJK_list_s.index(fontname) serif = 1 except: pass if fontname.lower() in fitz_fontdescriptors.keys(): import pymupdf_fonts fontbuffer = pymupdf_fonts.myfont(fontname) # make a copy del pymupdf_fonts # install the font for the page if fontfile != None: if type(fontfile) is str: fontfile_str = fontfile elif hasattr(fontfile, "absolute"): fontfile_str = str(fontfile) elif hasattr(fontfile, "name"): fontfile_str = fontfile.name else: raise ValueError("bad fontfile") else: fontfile_str = None val = self._insertFont(fontname, bfname, fontfile_str, fontbuffer, set_simple, idx, wmode, serif, encoding, CJK_number) if not val: # did not work, error return return val xref = val[0] # xref of installed font fontdict = val[1] if CheckFontInfo(doc, xref): # check again: document already has this font return xref # we are done # need to create document font info doc.get_char_widths(xref, fontdict=fontdict) return xref %} FITZEXCEPTION(_insertFont, !result) PyObject *_insertFont(char *fontname, char *bfname, char *fontfile, PyObject *fontbuffer, int set_simple, int idx, int wmode, int serif, int encoding, int ordering) { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); pdf_document *pdf; pdf_obj *resources, *fonts, *font_obj; PyObject *value; fz_try(gctx) { ASSERT_PDF(page); pdf = page->doc; value = JM_insert_font(gctx, pdf, bfname, fontfile,fontbuffer, set_simple, idx, wmode, serif, encoding, ordering); // get the objects /Resources, /Resources/Font resources = pdf_dict_get_inheritable(gctx, page->obj, PDF_NAME(Resources)); fonts = pdf_dict_get(gctx, resources, PDF_NAME(Font)); if (!fonts) { // page has no fonts yet fonts = pdf_new_dict(gctx, pdf, 5); pdf_dict_putl_drop(gctx, page->obj, fonts, PDF_NAME(Resources), PDF_NAME(Font), NULL); } // store font in resources and fonts objects will contain named reference to font int xref = 0; JM_INT_ITEM(value, 0, &xref); if (!xref) { RAISEPY(gctx, "cannot insert font", PyExc_RuntimeError); } font_obj = pdf_new_indirect(gctx, pdf, xref, 0); pdf_dict_puts_drop(gctx, fonts, fontname, font_obj); } fz_always(gctx) { ; } fz_catch(gctx) { return NULL; } return value; } //---------------------------------------------------------------- // Get page transformation matrix //---------------------------------------------------------------- %pythoncode %{@property%} PARENTCHECK(transformation_matrix, """Page transformation matrix.""") %pythonappend transformation_matrix %{ if self.rotation % 360 == 0: val = Matrix(val) else: val = Matrix(1, 0, 0, -1, 0, self.cropbox.height) %} PyObject *transformation_matrix() { fz_matrix ctm = fz_identity; pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); if (!page) return JM_py_from_matrix(ctm); fz_try(gctx) { pdf_page_transform(gctx, page, NULL, &ctm); } fz_catch(gctx) {;} return JM_py_from_matrix(ctm); } //---------------------------------------------------------------- // Page Get list of contents objects //---------------------------------------------------------------- FITZEXCEPTION(get_contents, !result) PARENTCHECK(get_contents, """Get xrefs of /Contents objects.""") PyObject *get_contents() { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); PyObject *list = NULL; pdf_obj *contents = NULL, *icont = NULL; int i, xref; size_t n = 0; fz_try(gctx) { ASSERT_PDF(page); contents = pdf_dict_get(gctx, page->obj, PDF_NAME(Contents)); if (pdf_is_array(gctx, contents)) { n = pdf_array_len(gctx, contents); list = PyList_New(n); for (i = 0; i < n; i++) { icont = pdf_array_get(gctx, contents, i); xref = pdf_to_num(gctx, icont); PyList_SET_ITEM(list, i, Py_BuildValue("i", xref)); } } else if (contents) { list = PyList_New(1); xref = pdf_to_num(gctx, contents); PyList_SET_ITEM(list, 0, Py_BuildValue("i", xref)); } } fz_catch(gctx) { return NULL; } if (list) { return list; } return PyList_New(0); } //---------------------------------------------------------------- // //---------------------------------------------------------------- %pythoncode %{ def set_contents(self, xref: int)->None: """Set object at 'xref' as the page's /Contents.""" CheckParent(self) doc = self.parent if doc.is_closed: raise ValueError("document closed") if not doc.is_pdf: raise ValueError("is no PDF") if not xref in range(1, doc.xref_length()): raise ValueError("bad xref") if not doc.xref_is_stream(xref): raise ValueError("xref is no stream") doc.xref_set_key(self.xref, "Contents", "%i 0 R" % xref) @property def is_wrapped(self): """Check if /Contents is wrapped with string pair "q" / "Q".""" if getattr(self, "was_wrapped", False): # costly checks only once return True cont = self.read_contents().split() if cont == []: # no contents treated as okay self.was_wrapped = True return True if cont[0] != b"q" or cont[-1] != b"Q": return False # potential "geometry" issue self.was_wrapped = True # cheap check next time return True def wrap_contents(self): if self.is_wrapped: # avoid unnecessary wrapping return TOOLS._insert_contents(self, b"q\n", False) TOOLS._insert_contents(self, b"\nQ", True) self.was_wrapped = True # indicate not needed again def links(self, kinds=None): """ Generator over the links of a page. Args: kinds: (list) link kinds to subselect from. If none, all links are returned. E.g. kinds=[LINK_URI] will only yield URI links. """ all_links = self.get_links() for link in all_links: if kinds is None or link["kind"] in kinds: yield (link) def annots(self, types=None): """ Generator over the annotations of a page. Args: types: (list) annotation types to subselect from. If none, all annotations are returned. E.g. types=[PDF_ANNOT_LINE] will only yield line annotations. """ skip_types = (PDF_ANNOT_LINK, PDF_ANNOT_POPUP, PDF_ANNOT_WIDGET) if not hasattr(types, "__getitem__"): annot_xrefs = [a[0] for a in self.annot_xrefs() if a[1] not in skip_types] else: annot_xrefs = [a[0] for a in self.annot_xrefs() if a[1] in types and a[1] not in skip_types] for xref in annot_xrefs: annot = self.load_annot(xref) annot._yielded=True yield annot def widgets(self, types=None): """ Generator over the widgets of a page. Args: types: (list) field types to subselect from. If none, all fields are returned. E.g. types=[PDF_WIDGET_TYPE_TEXT] will only yield text fields. """ widget_xrefs = [a[0] for a in self.annot_xrefs() if a[1] == PDF_ANNOT_WIDGET] for xref in widget_xrefs: widget = self.load_widget(xref) if types == None or widget.field_type in types: yield (widget) def __str__(self): CheckParent(self) x = self.parent.name if self.parent.stream is not None: x = "<memory, doc# %i>" % (self.parent._graft_id,) if x == "": x = "<new PDF, doc# %i>" % self.parent._graft_id return "page %s of %s" % (self.number, x) def __repr__(self): CheckParent(self) x = self.parent.name if self.parent.stream is not None: x = "<memory, doc# %i>" % (self.parent._graft_id,) if x == "": x = "<new PDF, doc# %i>" % self.parent._graft_id return "page %s of %s" % (self.number, x) def _reset_annot_refs(self): """Invalidate / delete all annots of this page.""" for annot in self._annot_refs.values(): if annot: annot._erase() self._annot_refs.clear() @property def xref(self): """PDF xref number of page.""" CheckParent(self) return self.parent.page_xref(self.number) def _erase(self): self._reset_annot_refs() self._image_infos = None try: self.parent._forget_page(self) except: pass if getattr(self, "thisown", False): self.__swig_destroy__(self) self.parent = None self.number = None def __del__(self): self._erase() def get_fonts(self, full=False): """List of fonts defined in the page object.""" CheckParent(self) return self.parent.get_page_fonts(self.number, full=full) def get_images(self, full=False): """List of images defined in the page object.""" CheckParent(self) ret = self.parent.get_page_images(self.number, full=full) return ret def get_xobjects(self): """List of xobjects defined in the page object.""" CheckParent(self) return self.parent.get_page_xobjects(self.number) def read_contents(self): """All /Contents streams concatenated to one bytes object.""" return TOOLS._get_all_contents(self) @property def mediabox_size(self): return Point(self.mediabox.x1, self.mediabox.y1) %} } }; %clearnodefaultctor; //------------------------------------------------------------------------ // Pixmap //------------------------------------------------------------------------ struct Pixmap { %extend { ~Pixmap() { DEBUGMSG1("Pixmap"); fz_pixmap *this_pix = (fz_pixmap *) $self; fz_drop_pixmap(gctx, this_pix); DEBUGMSG2; } FITZEXCEPTION(Pixmap, !result) %pythonprepend Pixmap %{"""Pixmap(colorspace, irect, alpha) - empty pixmap. Pixmap(colorspace, src) - copy changing colorspace. Pixmap(src, width, height,[clip]) - scaled copy, float dimensions. Pixmap(src, alpha=True) - copy adding / dropping alpha. Pixmap(source, mask) - from a non-alpha and a mask pixmap. Pixmap(file) - from an image file. Pixmap(memory) - from an image in memory (bytes). Pixmap(colorspace, width, height, samples, alpha) - from samples data. Pixmap(PDFdoc, xref) - from an image xref in a PDF document. """%} //---------------------------------------------------------------- // create empty pixmap with colorspace and IRect //---------------------------------------------------------------- Pixmap(struct Colorspace *cs, PyObject *bbox, int alpha = 0) { fz_pixmap *pm = NULL; fz_try(gctx) { pm = fz_new_pixmap_with_bbox(gctx, (fz_colorspace *) cs, JM_irect_from_py(bbox), NULL, alpha); } fz_catch(gctx) { return NULL; } return (struct Pixmap *) pm; } //---------------------------------------------------------------- // copy pixmap, converting colorspace //---------------------------------------------------------------- Pixmap(struct Colorspace *cs, struct Pixmap *spix) { fz_pixmap *pm = NULL; fz_try(gctx) { if (!fz_pixmap_colorspace(gctx, (fz_pixmap *) spix)) { RAISEPY(gctx, "source colorspace must not be None", PyExc_ValueError); } fz_colorspace *cspace = NULL; if (cs) { cspace = (fz_colorspace *) cs; } if (cspace) { pm = fz_convert_pixmap(gctx, (fz_pixmap *) spix, cspace, NULL, NULL, fz_default_color_params, 1); } else { pm = fz_new_pixmap_from_alpha_channel(gctx, (fz_pixmap *) spix); if (!pm) { RAISEPY(gctx, MSG_PIX_NOALPHA, PyExc_RuntimeError); } } } fz_catch(gctx) { return NULL; } return (struct Pixmap *) pm; } //---------------------------------------------------------------- // add mask to a pixmap w/o alpha channel //---------------------------------------------------------------- Pixmap(struct Pixmap *spix, struct Pixmap *mpix) { fz_pixmap *dst = NULL; fz_pixmap *spm = (fz_pixmap *) spix; fz_pixmap *mpm = (fz_pixmap *) mpix; fz_try(gctx) { if (!spix) { // intercept NULL for spix: make alpha only pix dst = fz_new_pixmap_from_alpha_channel(gctx, mpm); if (!dst) { RAISEPY(gctx, MSG_PIX_NOALPHA, PyExc_RuntimeError); } } else { dst = fz_new_pixmap_from_color_and_mask(gctx, spm, mpm); } } fz_catch(gctx) { return NULL; } return (struct Pixmap *) dst; } //---------------------------------------------------------------- // create pixmap as scaled copy of another one //---------------------------------------------------------------- Pixmap(struct Pixmap *spix, float w, float h, PyObject *clip=NULL) { fz_pixmap *pm = NULL; fz_pixmap *src_pix = (fz_pixmap *) spix; fz_try(gctx) { fz_irect bbox = JM_irect_from_py(clip); if (clip != Py_None && (fz_is_infinite_irect(bbox) || fz_is_empty_irect(bbox))) { RAISEPY(gctx, "bad clip parameter", PyExc_ValueError); } if (!fz_is_infinite_irect(bbox)) { pm = fz_scale_pixmap(gctx, src_pix, src_pix->x, src_pix->y, w, h, &bbox); } else { pm = fz_scale_pixmap(gctx, src_pix, src_pix->x, src_pix->y, w, h, NULL); } } fz_catch(gctx) { return NULL; } return (struct Pixmap *) pm; } //---------------------------------------------------------------- // copy pixmap & add / drop the alpha channel //---------------------------------------------------------------- Pixmap(struct Pixmap *spix, int alpha=1) { fz_pixmap *pm = NULL, *src_pix = (fz_pixmap *) spix; int n, w, h, i; fz_separations *seps = NULL; fz_try(gctx) { if (!INRANGE(alpha, 0, 1)) { RAISEPY(gctx, "bad alpha value", PyExc_ValueError); } fz_colorspace *cs = fz_pixmap_colorspace(gctx, src_pix); if (!cs && !alpha) { RAISEPY(gctx, "cannot drop alpha for 'NULL' colorspace", PyExc_ValueError); } n = fz_pixmap_colorants(gctx, src_pix); w = fz_pixmap_width(gctx, src_pix); h = fz_pixmap_height(gctx, src_pix); pm = fz_new_pixmap(gctx, cs, w, h, seps, alpha); pm->x = src_pix->x; pm->y = src_pix->y; pm->xres = src_pix->xres; pm->yres = src_pix->yres; // copy samples data ------------------------------------------ unsigned char *sptr = src_pix->samples; unsigned char *tptr = pm->samples; if (src_pix->alpha == pm->alpha) { // identical samples memcpy(tptr, sptr, w * h * (n + alpha)); } else { for (i = 0; i < w * h; i++) { memcpy(tptr, sptr, n); tptr += n; if (pm->alpha) { tptr[0] = 255; tptr++; } sptr += n + src_pix->alpha; } } } fz_catch(gctx) { return NULL; } return (struct Pixmap *) pm; } //---------------------------------------------------------------- // create pixmap from samples data //---------------------------------------------------------------- Pixmap(struct Colorspace *cs, int w, int h, PyObject *samples, int alpha=0) { int n = fz_colorspace_n(gctx, (fz_colorspace *) cs); int stride = (n + alpha) * w; fz_separations *seps = NULL; fz_buffer *res = NULL; fz_pixmap *pm = NULL; fz_try(gctx) { size_t size = 0; unsigned char *c = NULL; res = JM_BufferFromBytes(gctx, samples); if (!res) { RAISEPY(gctx, "bad samples data", PyExc_ValueError); } size = fz_buffer_storage(gctx, res, &c); if (stride * h != size) { RAISEPY(gctx, "bad samples length", PyExc_ValueError); } pm = fz_new_pixmap(gctx, (fz_colorspace *) cs, w, h, seps, alpha); memcpy(pm->samples, c, size); } fz_always(gctx) { fz_drop_buffer(gctx, res); } fz_catch(gctx) { return NULL; } return (struct Pixmap *) pm; } //---------------------------------------------------------------- // create pixmap from filename, file object, pathlib.Path or memory //---------------------------------------------------------------- Pixmap(PyObject *imagedata) { fz_buffer *res = NULL; fz_image *img = NULL; fz_pixmap *pm = NULL; PyObject *fname = NULL; PyObject *name = PyUnicode_FromString("name"); fz_try(gctx) { if (PyObject_HasAttrString(imagedata, "resolve")) { fname = PyObject_CallMethod(imagedata, "__str__", NULL); if (fname) { img = fz_new_image_from_file(gctx, JM_StrAsChar(fname)); } } else if (PyObject_HasAttr(imagedata, name)) { fname = PyObject_GetAttr(imagedata, name); if (fname) { img = fz_new_image_from_file(gctx, JM_StrAsChar(fname)); } } else if (PyUnicode_Check(imagedata)) { img = fz_new_image_from_file(gctx, JM_StrAsChar(imagedata)); } else { res = JM_BufferFromBytes(gctx, imagedata); if (!res || !fz_buffer_storage(gctx, res, NULL)) { RAISEPY(gctx, "bad image data", PyExc_ValueError); } img = fz_new_image_from_buffer(gctx, res); } pm = fz_get_pixmap_from_image(gctx, img, NULL, NULL, NULL, NULL); int xres, yres; fz_image_resolution(img, &xres, &yres); pm->xres = xres; pm->yres = yres; } fz_always(gctx) { Py_CLEAR(fname); Py_CLEAR(name); fz_drop_image(gctx, img); fz_drop_buffer(gctx, res); } fz_catch(gctx) { return NULL; } return (struct Pixmap *) pm; } //---------------------------------------------------------------- // Create pixmap from PDF image identified by XREF number //---------------------------------------------------------------- Pixmap(struct Document *doc, int xref) { fz_image *img = NULL; fz_pixmap *pix = NULL; pdf_obj *ref = NULL; pdf_obj *type; pdf_document *pdf = pdf_specifics(gctx, (fz_document *) doc); fz_try(gctx) { ASSERT_PDF(pdf); int xreflen = pdf_xref_len(gctx, pdf); if (!INRANGE(xref, 1, xreflen-1)) { RAISEPY(gctx, MSG_BAD_XREF, PyExc_ValueError); } ref = pdf_new_indirect(gctx, pdf, xref, 0); type = pdf_dict_get(gctx, ref, PDF_NAME(Subtype)); if (!pdf_name_eq(gctx, type, PDF_NAME(Image)) && !pdf_name_eq(gctx, type, PDF_NAME(Alpha)) && !pdf_name_eq(gctx, type, PDF_NAME(Luminosity))) { RAISEPY(gctx, MSG_IS_NO_IMAGE, PyExc_ValueError); } img = pdf_load_image(gctx, pdf, ref); pix = fz_get_pixmap_from_image(gctx, img, NULL, NULL, NULL, NULL); } fz_always(gctx) { fz_drop_image(gctx, img); pdf_drop_obj(gctx, ref); } fz_catch(gctx) { fz_drop_pixmap(gctx, pix); return NULL; } return (struct Pixmap *) pix; } //---------------------------------------------------------------- // warp //---------------------------------------------------------------- FITZEXCEPTION(warp, !result) %pythonprepend warp %{ """Return pixmap from a warped quad.""" EnsureOwnership(self) if not quad.is_convex: raise ValueError("quad must be convex")%} struct Pixmap *warp(PyObject *quad, int width, int height) { fz_point points[4]; fz_quad q = JM_quad_from_py(quad); fz_pixmap *dst = NULL; points[0] = q.ul; points[1] = q.ur; points[2] = q.lr; points[3] = q.ll; fz_try(gctx) { dst = fz_warp_pixmap(gctx, (fz_pixmap *) $self, points, width, height); } fz_catch(gctx) { return NULL; } return (struct Pixmap *) dst; } //---------------------------------------------------------------- // shrink //---------------------------------------------------------------- ENSURE_OWNERSHIP(shrink, """Divide width and height by 2**factor. E.g. factor=1 shrinks to 25% of original size (in place).""") void shrink(int factor) { if (factor < 1) { JM_Warning("ignoring shrink factor < 1"); return; } fz_subsample_pixmap(gctx, (fz_pixmap *) $self, factor); } //---------------------------------------------------------------- // apply gamma correction //---------------------------------------------------------------- ENSURE_OWNERSHIP(gamma_with, """Apply correction with some float. gamma=1 is a no-op.""") void gamma_with(float gamma) { if (!fz_pixmap_colorspace(gctx, (fz_pixmap *) $self)) { JM_Warning("colorspace invalid for function"); return; } fz_gamma_pixmap(gctx, (fz_pixmap *) $self, gamma); } //---------------------------------------------------------------- // tint pixmap with color //---------------------------------------------------------------- %pythonprepend tint_with %{"""Tint colors with modifiers for black and white.""" EnsureOwnership(self) if not self.colorspace or self.colorspace.n > 3: print("warning: colorspace invalid for function") return%} void tint_with(int black, int white) { fz_tint_pixmap(gctx, (fz_pixmap *) $self, black, white); } //----------------------------------------------------------------- // clear all of pixmap samples to 0x00 */ //----------------------------------------------------------------- ENSURE_OWNERSHIP(clear_with, """Fill all color components with same value.""") void clear_with() { fz_clear_pixmap(gctx, (fz_pixmap *) $self); } //----------------------------------------------------------------- // clear total pixmap with value */ //----------------------------------------------------------------- void clear_with(int value) { fz_clear_pixmap_with_value(gctx, (fz_pixmap *) $self, value); } //----------------------------------------------------------------- // clear pixmap rectangle with value //----------------------------------------------------------------- void clear_with(int value, PyObject *bbox) { JM_clear_pixmap_rect_with_value(gctx, (fz_pixmap *) $self, value, JM_irect_from_py(bbox)); } //----------------------------------------------------------------- // copy pixmaps //----------------------------------------------------------------- FITZEXCEPTION(copy, !result) ENSURE_OWNERSHIP(copy, """Copy bbox from another Pixmap.""") PyObject *copy(struct Pixmap *src, PyObject *bbox) { fz_try(gctx) { fz_pixmap *pm = (fz_pixmap *) $self, *src_pix = (fz_pixmap *) src; if (!fz_pixmap_colorspace(gctx, src_pix)) { RAISEPY(gctx, "cannot copy pixmap with NULL colorspace", PyExc_ValueError); } if (pm->alpha != src_pix->alpha) { RAISEPY(gctx, "source and target alpha must be equal", PyExc_ValueError); } fz_copy_pixmap_rect(gctx, pm, src_pix, JM_irect_from_py(bbox), NULL); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //----------------------------------------------------------------- // set alpha values //----------------------------------------------------------------- FITZEXCEPTION(set_alpha, !result) ENSURE_OWNERSHIP(set_alpha, """Set alpha channel to values contained in a byte array. If None, all alphas are 255. Args: alphavalues: (bytes) with length (width * height) or 'None'. premultiply: (bool, True) premultiply colors with alpha values. opaque: (tuple, length colorspace.n) this color receives opacity 0. matte: (tuple, length colorspace.n) preblending background color. """) PyObject *set_alpha(PyObject *alphavalues=NULL, int premultiply=1, PyObject *opaque=NULL, PyObject *matte=NULL) { fz_buffer *res = NULL; fz_pixmap *pix = (fz_pixmap *) $self; unsigned char alpha = 0, m = 0; fz_try(gctx) { if (pix->alpha == 0) { RAISEPY(gctx, MSG_PIX_NOALPHA, PyExc_ValueError); } size_t i, k, j; size_t n = fz_pixmap_colorants(gctx, pix); size_t w = (size_t) fz_pixmap_width(gctx, pix); size_t h = (size_t) fz_pixmap_height(gctx, pix); size_t balen = w * h * (n+1); int colors[4]; // make this color opaque int bgcolor[4]; // preblending background color int zero_out = 0, bground = 0; if (opaque && PySequence_Check(opaque) && PySequence_Size(opaque) == n) { for (i = 0; i < n; i++) { if (JM_INT_ITEM(opaque, i, &colors[i]) == 1) { RAISEPY(gctx, "bad opaque components", PyExc_ValueError); } } zero_out = 1; } if (matte && PySequence_Check(matte) && PySequence_Size(matte) == n) { for (i = 0; i < n; i++) { if (JM_INT_ITEM(matte, i, &bgcolor[i]) == 1) { RAISEPY(gctx, "bad matte components", PyExc_ValueError); } } bground = 1; } unsigned char *data = NULL; size_t data_len = 0; if (alphavalues && PyObject_IsTrue(alphavalues)) { res = JM_BufferFromBytes(gctx, alphavalues); data_len = fz_buffer_storage(gctx, res, &data); if (data_len < w * h) { RAISEPY(gctx, "bad alpha values", PyExc_ValueError); } } i = k = j = 0; int data_fix = 255; while (i < balen) { alpha = data[k]; if (zero_out) { for (j = i; j < i+n; j++) { if (pix->samples[j] != (unsigned char) colors[j - i]) { data_fix = 255; break; } else { data_fix = 0; } } } if (data_len) { if (data_fix == 0) { pix->samples[i+n] = 0; } else { pix->samples[i+n] = alpha; } if (premultiply && !bground) { for (j = i; j < i+n; j++) { pix->samples[j] = fz_mul255(pix->samples[j], alpha); } } else if (bground) { for (j = i; j < i+n; j++) { m = (unsigned char) bgcolor[j - i]; pix->samples[j] = m + fz_mul255((pix->samples[j] - m), alpha); } } } else { pix->samples[i+n] = data_fix; } i += n+1; k += 1; } } fz_always(gctx) { fz_drop_buffer(gctx, res); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //----------------------------------------------------------------- // Pixmap._tobytes //----------------------------------------------------------------- FITZEXCEPTION(_tobytes, !result) PyObject *_tobytes(int format, int jpg_quality) { fz_output *out = NULL; fz_buffer *res = NULL; PyObject *barray = NULL; fz_pixmap *pm = (fz_pixmap *) $self; fz_try(gctx) { size_t size = fz_pixmap_stride(gctx, pm) * pm->h; res = fz_new_buffer(gctx, size); out = fz_new_output_with_buffer(gctx, res); switch(format) { case(1): fz_write_pixmap_as_png(gctx, out, pm); break; case(2): fz_write_pixmap_as_pnm(gctx, out, pm); break; case(3): fz_write_pixmap_as_pam(gctx, out, pm); break; case(5): // Adobe Photoshop Document fz_write_pixmap_as_psd(gctx, out, pm); break; case(6): // Postscript format fz_write_pixmap_as_ps(gctx, out, pm); break; #if FZ_VERSION_MAJOR == 1 && FZ_VERSION_MINOR >= 22 case(7): // JPEG format #if FZ_VERSION_MINOR < 24 fz_write_pixmap_as_jpeg(gctx, out, pm, jpg_quality); #else fz_write_pixmap_as_jpeg(gctx, out, pm, jpg_quality, 0 /*invert_cmyk*/); #endif break; #endif default: fz_write_pixmap_as_png(gctx, out, pm); break; } barray = JM_BinFromBuffer(gctx, res); } fz_always(gctx) { fz_drop_output(gctx, out); fz_drop_buffer(gctx, res); } fz_catch(gctx) { return NULL; } return barray; } %pythoncode %{ def tobytes(self, output="png", jpg_quality=95): """Convert to binary image stream of desired type. Can be used as input to GUI packages like tkinter. Args: output: (str) image type, default is PNG. Others are JPG, JPEG, PNM, PGM, PPM, PBM, PAM, PSD, PS. Returns: Bytes object. """ EnsureOwnership(self) valid_formats = {"png": 1, "pnm": 2, "pgm": 2, "ppm": 2, "pbm": 2, "pam": 3, "psd": 5, "ps": 6, "jpg": 7, "jpeg": 7} idx = valid_formats.get(output.lower(), None) if idx==None: raise ValueError(f"Image format {output} not in {tuple(valid_formats.keys())}") if self.alpha and idx in (2, 6, 7): raise ValueError("'%s' cannot have alpha" % output) if self.colorspace and self.colorspace.n > 3 and idx in (1, 2, 4): raise ValueError("unsupported colorspace for '%s'" % output) if idx == 7: self.set_dpi(self.xres, self.yres) barray = self._tobytes(idx, jpg_quality) return barray %} //----------------------------------------------------------------- // output as PDF-OCR //----------------------------------------------------------------- FITZEXCEPTION(pdfocr_save, !result) %pythonprepend pdfocr_save %{ """Save pixmap as an OCR-ed PDF page.""" EnsureOwnership(self) if not os.getenv("TESSDATA_PREFIX") and not tessdata: raise RuntimeError("No OCR support: TESSDATA_PREFIX not set") %} ENSURE_OWNERSHIP(pdfocr_save, ) PyObject *pdfocr_save(PyObject *filename, int compress=1, char *language=NULL, char *tessdata=NULL) { fz_pdfocr_options opts; memset(&opts, 0, sizeof opts); opts.compress = compress; if (language) { fz_strlcpy(opts.language, language, sizeof(opts.language)); } if (tessdata) { fz_strlcpy(opts.datadir, tessdata, sizeof(opts.language)); } fz_output *out = NULL; fz_pixmap *pix = (fz_pixmap *) $self; fz_try(gctx) { if (PyUnicode_Check(filename)) { fz_save_pixmap_as_pdfocr(gctx, pix, (char *) PyUnicode_AsUTF8(filename), 0, &opts); } else { out = JM_new_output_fileptr(gctx, filename); fz_write_pixmap_as_pdfocr(gctx, out, pix, &opts); } } fz_always(gctx) { fz_drop_output(gctx, out); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } %pythoncode %{ def pdfocr_tobytes(self, compress=True, language="eng", tessdata=None): """Save pixmap as an OCR-ed PDF page. Args: compress: (bool) compress, default 1 (True). language: (str) language(s) occurring on page, default "eng" (English), multiples like "eng+ger" for English and German. tessdata: (str) folder name of Tesseract's language support. Must be given if environment variable TESSDATA_PREFIX is not set. Notes: On failure, make sure Tesseract is installed and you have set the environment variable "TESSDATA_PREFIX" to the folder containing your Tesseract's language support data. """ if not os.getenv("TESSDATA_PREFIX") and not tessdata: raise RuntimeError("No OCR support: TESSDATA_PREFIX not set") EnsureOwnership(self) from io import BytesIO bio = BytesIO() self.pdfocr_save(bio, compress=compress, language=language, tessdata=tessdata) return bio.getvalue() %} //----------------------------------------------------------------- // _writeIMG //----------------------------------------------------------------- FITZEXCEPTION(_writeIMG, !result) PyObject *_writeIMG(char *filename, int format, int jpg_quality) { fz_try(gctx) { fz_pixmap *pm = (fz_pixmap *) $self; switch(format) { case(1): fz_save_pixmap_as_png(gctx, pm, filename); break; case(2): fz_save_pixmap_as_pnm(gctx, pm, filename); break; case(3): fz_save_pixmap_as_pam(gctx, pm, filename); break; case(5): // Adobe Photoshop Document fz_save_pixmap_as_psd(gctx, pm, filename); break; case(6): // Postscript fz_save_pixmap_as_ps(gctx, pm, filename, 0); break; #if FZ_VERSION_MAJOR == 1 && FZ_VERSION_MINOR >= 22 case(7): // JPEG fz_save_pixmap_as_jpeg(gctx, pm, filename, jpg_quality); break; #endif default: fz_save_pixmap_as_png(gctx, pm, filename); break; } } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } %pythoncode %{ def save(self, filename, output=None, jpg_quality=95): """Output as image in format determined by filename extension. Args: output: (str) only use to overrule filename extension. Default is PNG. Others are JPEG, JPG, PNM, PGM, PPM, PBM, PAM, PSD, PS. """ EnsureOwnership(self) valid_formats = {"png": 1, "pnm": 2, "pgm": 2, "ppm": 2, "pbm": 2, "pam": 3, "psd": 5, "ps": 6, "jpg": 7, "jpeg": 7} if type(filename) is str: pass elif hasattr(filename, "absolute"): filename = str(filename) elif hasattr(filename, "name"): filename = filename.name if output is None: _, ext = os.path.splitext(filename) output = ext[1:] idx = valid_formats.get(output.lower(), None) if idx == None: raise ValueError(f"Image format {output} not in {tuple(valid_formats.keys())}") if self.alpha and idx in (2, 6, 7): raise ValueError("'%s' cannot have alpha" % output) if self.colorspace and self.colorspace.n > 3 and idx in (1, 2, 4): raise ValueError("unsupported colorspace for '%s'" % output) if idx == 7: self.set_dpi(self.xres, self.yres) return self._writeIMG(filename, idx, jpg_quality) def pil_save(self, *args, unmultiply=False, **kwargs): """Write to image file using Pillow. Args are passed to Pillow's Image.save method, see their documentation. Use instead of save when other output formats are desired. :arg bool unmultiply: generates Pillow mode "RGBa" instead of "RGBA". Relevant for colorspace RGB with alpha only. """ EnsureOwnership(self) try: from PIL import Image except ImportError: print("Pillow not installed") raise cspace = self.colorspace if cspace is None: mode = "L" elif cspace.n == 1: mode = "L" if self.alpha == 0 else "LA" elif cspace.n == 3: mode = "RGB" if self.alpha == 0 else "RGBA" if mode == "RGBA" and unmultiply: mode = "RGBa" else: mode = "CMYK" img = Image.frombytes(mode, (self.width, self.height), self.samples) if "dpi" not in kwargs.keys(): kwargs["dpi"] = (self.xres, self.yres) img.save(*args, **kwargs) def pil_tobytes(self, *args, unmultiply=False, **kwargs): """Convert to binary image stream using pillow. Args are passed to Pillow's Image.save method, see their documentation. Use instead of 'tobytes' when other output formats are needed. """ EnsureOwnership(self) from io import BytesIO bytes_out = BytesIO() self.pil_save(bytes_out, *args, unmultiply=unmultiply, **kwargs) return bytes_out.getvalue() %} //----------------------------------------------------------------- // invert_irect //----------------------------------------------------------------- %pythonprepend invert_irect %{"""Invert the colors inside a bbox."""%} PyObject *invert_irect(PyObject *bbox = NULL) { fz_pixmap *pm = (fz_pixmap *) $self; if (!fz_pixmap_colorspace(gctx, pm)) { JM_Warning("ignored for stencil pixmap"); return JM_BOOL(0); } fz_irect r = JM_irect_from_py(bbox); if (fz_is_infinite_irect(r)) r = fz_pixmap_bbox(gctx, pm); return JM_BOOL(JM_invert_pixmap_rect(gctx, pm, r)); } //----------------------------------------------------------------- // get one pixel as a list //----------------------------------------------------------------- FITZEXCEPTION(pixel, !result) ENSURE_OWNERSHIP(pixel, """Get color tuple of pixel (x, y). Includes alpha byte if applicable.""") PyObject *pixel(int x, int y) { PyObject *p = NULL; fz_try(gctx) { fz_pixmap *pm = (fz_pixmap *) $self; if (!INRANGE(x, 0, pm->w - 1) || !INRANGE(y, 0, pm->h - 1)) { RAISEPY(gctx, MSG_PIXEL_OUTSIDE, PyExc_ValueError); } int n = pm->n; int stride = fz_pixmap_stride(gctx, pm); int j, i = stride * y + n * x; p = PyTuple_New(n); for (j = 0; j < n; j++) { PyTuple_SET_ITEM(p, j, Py_BuildValue("i", pm->samples[i + j])); } } fz_catch(gctx) { return NULL; } return p; } //----------------------------------------------------------------- // Set one pixel to a given color tuple //----------------------------------------------------------------- FITZEXCEPTION(set_pixel, !result) ENSURE_OWNERSHIP(set_pixel, """Set color of pixel (x, y).""") PyObject *set_pixel(int x, int y, PyObject *color) { fz_try(gctx) { fz_pixmap *pm = (fz_pixmap *) $self; if (!INRANGE(x, 0, pm->w - 1) || !INRANGE(y, 0, pm->h - 1)) { RAISEPY(gctx, MSG_PIXEL_OUTSIDE, PyExc_ValueError); } int n = pm->n; if (!PySequence_Check(color) || PySequence_Size(color) != n) { RAISEPY(gctx, MSG_BAD_COLOR_SEQ, PyExc_ValueError); } int i, j; unsigned char c[5]; for (j = 0; j < n; j++) { if (JM_INT_ITEM(color, j, &i) == 1) { RAISEPY(gctx, MSG_BAD_COLOR_SEQ, PyExc_ValueError); } if (!INRANGE(i, 0, 255)) { RAISEPY(gctx, MSG_BAD_COLOR_SEQ, PyExc_ValueError); } c[j] = (unsigned char) i; } int stride = fz_pixmap_stride(gctx, pm); i = stride * y + n * x; for (j = 0; j < n; j++) { pm->samples[i + j] = c[j]; } } fz_catch(gctx) { PyErr_Clear(); return NULL; } Py_RETURN_NONE; } //----------------------------------------------------------------- // Set Pixmap origin //----------------------------------------------------------------- ENSURE_OWNERSHIP(set_origin, """Set top-left coordinates.""") PyObject *set_origin(int x, int y) { fz_pixmap *pm = (fz_pixmap *) $self; pm->x = x; pm->y = y; Py_RETURN_NONE; } ENSURE_OWNERSHIP(set_dpi, """Set resolution in both dimensions.""") PyObject *set_dpi(int xres, int yres) { fz_pixmap *pm = (fz_pixmap *) $self; pm->xres = xres; pm->yres = yres; Py_RETURN_NONE; } //----------------------------------------------------------------- // Set a rect to a given color tuple //----------------------------------------------------------------- FITZEXCEPTION(set_rect, !result) ENSURE_OWNERSHIP(set_rect, """Set color of all pixels in bbox.""") PyObject *set_rect(PyObject *bbox, PyObject *color) { PyObject *rc = NULL; fz_try(gctx) { fz_pixmap *pm = (fz_pixmap *) $self; Py_ssize_t j, n = (Py_ssize_t) pm->n; if (!PySequence_Check(color) || PySequence_Size(color) != n) { RAISEPY(gctx, MSG_BAD_COLOR_SEQ, PyExc_ValueError); } unsigned char c[5]; int i; for (j = 0; j < n; j++) { if (JM_INT_ITEM(color, j, &i) == 1) { RAISEPY(gctx, MSG_BAD_COLOR_SEQ, PyExc_ValueError); } if (!INRANGE(i, 0, 255)) { RAISEPY(gctx, MSG_BAD_COLOR_SEQ, PyExc_ValueError); } c[j] = (unsigned char) i; } i = JM_fill_pixmap_rect_with_color(gctx, pm, c, JM_irect_from_py(bbox)); rc = JM_BOOL(i); } fz_catch(gctx) { PyErr_Clear(); return NULL; } return rc; } //----------------------------------------------------------------- // check if monochrome //----------------------------------------------------------------- %pythoncode %{@property%} ENSURE_OWNERSHIP(is_monochrome, """Check if pixmap is monochrome.""") PyObject *is_monochrome() { return JM_BOOL(fz_is_pixmap_monochrome(gctx, (fz_pixmap *) $self)); } //----------------------------------------------------------------- // check if unicolor (only one color there) //----------------------------------------------------------------- %pythoncode %{@property%} ENSURE_OWNERSHIP(is_unicolor, """Check if pixmap has only one color.""") PyObject *is_unicolor() { fz_pixmap *pm = (fz_pixmap *) $self; size_t i, n = pm->n, count = pm->w * pm->h * n; unsigned char *s = pm->samples; for (i = n; i < count; i += n) { if (memcmp(s, s + i, n) != 0) { Py_RETURN_FALSE; } } Py_RETURN_TRUE; } //----------------------------------------------------------------- // count each pixmap color //----------------------------------------------------------------- FITZEXCEPTION(color_count, !result) ENSURE_OWNERSHIP(color_count, """Return count of each color.""") PyObject *color_count(int colors=0, PyObject *clip=NULL) { fz_pixmap *pm = (fz_pixmap *) $self; PyObject *rc = NULL; fz_try(gctx) { rc = JM_color_count(gctx, pm, clip); if (!rc) { RAISEPY(gctx, MSG_COLOR_COUNT_FAILED, PyExc_RuntimeError); } } fz_catch(gctx) { return NULL; } if (!colors) { Py_ssize_t len = PyDict_Size(rc); Py_DECREF(rc); return PyLong_FromSsize_t(len); } return rc; } %pythoncode %{ def color_topusage(self, clip=None): """Return most frequent color and its usage ratio.""" EnsureOwnership(self) allpixels = 0 cnt = 0 if clip != None and self.irect in Rect(clip): clip = self.irect for pixel, count in self.color_count(colors=True,clip=clip).items(): allpixels += count if count > cnt: cnt = count maxpixel = pixel if not allpixels: return (1, bytes([255] * self.n)) return (cnt / allpixels, maxpixel) %} //----------------------------------------------------------------- // MD5 digest of pixmap //----------------------------------------------------------------- %pythoncode %{@property%} ENSURE_OWNERSHIP(digest, """MD5 digest of pixmap (bytes).""") PyObject *digest() { unsigned char digest[16]; fz_md5_pixmap(gctx, (fz_pixmap *) $self, digest); return PyBytes_FromStringAndSize(digest, 16); } //----------------------------------------------------------------- // get length of one image row //----------------------------------------------------------------- %pythoncode %{@property%} ENSURE_OWNERSHIP(stride, """Length of one image line (width * n).""") PyObject *stride() { return PyLong_FromSize_t((size_t) fz_pixmap_stride(gctx, (fz_pixmap *) $self)); } //----------------------------------------------------------------- // x, y, width, height, xres, yres, n //----------------------------------------------------------------- %pythoncode %{@property%} ENSURE_OWNERSHIP(xres, """Resolution in x direction.""") int xres() { fz_pixmap *this_pix = (fz_pixmap *) $self; return this_pix->xres; } %pythoncode %{@property%} ENSURE_OWNERSHIP(yres, """Resolution in y direction.""") int yres() { fz_pixmap *this_pix = (fz_pixmap *) $self; return this_pix->yres; } %pythoncode %{@property%} ENSURE_OWNERSHIP(w, """The width.""") PyObject *w() { return PyLong_FromSize_t((size_t) fz_pixmap_width(gctx, (fz_pixmap *) $self)); } %pythoncode %{@property%} ENSURE_OWNERSHIP(h, """The height.""") PyObject *h() { return PyLong_FromSize_t((size_t) fz_pixmap_height(gctx, (fz_pixmap *) $self)); } %pythoncode %{@property%} ENSURE_OWNERSHIP(x, """x component of Pixmap origin.""") int x() { return fz_pixmap_x(gctx, (fz_pixmap *) $self); } %pythoncode %{@property%} ENSURE_OWNERSHIP(y, """y component of Pixmap origin.""") int y() { return fz_pixmap_y(gctx, (fz_pixmap *) $self); } %pythoncode %{@property%} ENSURE_OWNERSHIP(n, """The size of one pixel.""") int n() { return fz_pixmap_components(gctx, (fz_pixmap *) $self); } //----------------------------------------------------------------- // check alpha channel //----------------------------------------------------------------- %pythoncode %{@property%} ENSURE_OWNERSHIP(alpha, """Indicates presence of alpha channel.""") int alpha() { return fz_pixmap_alpha(gctx, (fz_pixmap *) $self); } //----------------------------------------------------------------- // get colorspace of pixmap //----------------------------------------------------------------- %pythoncode %{@property%} ENSURE_OWNERSHIP(colorspace, """Pixmap Colorspace.""") struct Colorspace *colorspace() { return (struct Colorspace *) fz_pixmap_colorspace(gctx, (fz_pixmap *) $self); } //----------------------------------------------------------------- // return irect of pixmap //----------------------------------------------------------------- %pythoncode %{@property%} ENSURE_OWNERSHIP(irect, """Pixmap bbox - an IRect object.""") %pythonappend irect %{val = IRect(val)%} PyObject *irect() { return JM_py_from_irect(fz_pixmap_bbox(gctx, (fz_pixmap *) $self)); } //----------------------------------------------------------------- // return size of pixmap //----------------------------------------------------------------- %pythoncode %{@property%} ENSURE_OWNERSHIP(size, """Pixmap size.""") PyObject *size() { return PyLong_FromSize_t(fz_pixmap_size(gctx, (fz_pixmap *) $self)); } //----------------------------------------------------------------- // samples //----------------------------------------------------------------- %pythoncode %{@property%} ENSURE_OWNERSHIP(samples_mv, """Pixmap samples memoryview.""") PyObject *samples_mv() { fz_pixmap *pm = (fz_pixmap *) $self; Py_ssize_t s = (Py_ssize_t) pm->w; s *= pm->h; s *= pm->n; return PyMemoryView_FromMemory((char *) pm->samples, s, PyBUF_READ); } %pythoncode %{@property%} ENSURE_OWNERSHIP(samples_ptr, """Pixmap samples pointer.""") PyObject *samples_ptr() { fz_pixmap *pm = (fz_pixmap *) $self; return PyLong_FromVoidPtr((void *) pm->samples); } %pythoncode %{ @property def samples(self)->bytes: return bytes(self.samples_mv) width = w height = h def __len__(self): return self.size def __repr__(self): EnsureOwnership(self) if not type(self) is Pixmap: return if self.colorspace: return "Pixmap(%s, %s, %s)" % (self.colorspace.name, self.irect, self.alpha) else: return "Pixmap(%s, %s, %s)" % ('None', self.irect, self.alpha) def __enter__(self): return self def __exit__(self, *args): if getattr(self, "thisown", False): self.__swig_destroy__(self) def __del__(self): if not type(self) is Pixmap: return if getattr(self, "thisown", False): self.__swig_destroy__(self) %} } }; /* fz_colorspace */ struct Colorspace { %extend { ~Colorspace() { DEBUGMSG1("Colorspace"); fz_colorspace *this_cs = (fz_colorspace *) $self; fz_drop_colorspace(gctx, this_cs); DEBUGMSG2; } %pythonprepend Colorspace %{"""Supported are GRAY, RGB and CMYK."""%} Colorspace(int type) { fz_colorspace *cs = NULL; switch(type) { case CS_GRAY: cs = fz_device_gray(gctx); break; case CS_CMYK: cs = fz_device_cmyk(gctx); break; case CS_RGB: default: cs = fz_device_rgb(gctx); break; } fz_keep_colorspace(gctx, cs); return (struct Colorspace *) cs; } //----------------------------------------------------------------- // number of bytes to define color of one pixel //----------------------------------------------------------------- %pythoncode %{@property%} %pythonprepend n %{"""Size of one pixel."""%} PyObject *n() { return Py_BuildValue("i", fz_colorspace_n(gctx, (fz_colorspace *) $self)); } //----------------------------------------------------------------- // name of colorspace //----------------------------------------------------------------- PyObject *_name() { return JM_UnicodeFromStr(fz_colorspace_name(gctx, (fz_colorspace *) $self)); } %pythoncode %{ @property def name(self): """Name of the Colorspace.""" if self.n == 1: return csGRAY._name() elif self.n == 3: return csRGB._name() elif self.n == 4: return csCMYK._name() return self._name() def __repr__(self): x = ("", "GRAY", "", "RGB", "CMYK")[self.n] return "Colorspace(CS_%s) - %s" % (x, self.name) %} } }; /* fz_device wrapper */ %rename(Device) DeviceWrapper; struct DeviceWrapper { %extend { FITZEXCEPTION(DeviceWrapper, !result) DeviceWrapper(struct Pixmap *pm, PyObject *clip) { struct DeviceWrapper *dw = NULL; fz_try(gctx) { dw = (struct DeviceWrapper *)calloc(1, sizeof(struct DeviceWrapper)); fz_irect bbox = JM_irect_from_py(clip); if (fz_is_infinite_irect(bbox)) dw->device = fz_new_draw_device(gctx, fz_identity, (fz_pixmap *) pm); else dw->device = fz_new_draw_device_with_bbox(gctx, fz_identity, (fz_pixmap *) pm, &bbox); } fz_catch(gctx) { return NULL; } return dw; } DeviceWrapper(struct DisplayList *dl) { struct DeviceWrapper *dw = NULL; fz_try(gctx) { dw = (struct DeviceWrapper *)calloc(1, sizeof(struct DeviceWrapper)); dw->device = fz_new_list_device(gctx, (fz_display_list *) dl); dw->list = (fz_display_list *) dl; fz_keep_display_list(gctx, (fz_display_list *) dl); } fz_catch(gctx) { return NULL; } return dw; } DeviceWrapper(struct TextPage *tp, int flags = 0) { struct DeviceWrapper *dw = NULL; fz_try(gctx) { dw = (struct DeviceWrapper *)calloc(1, sizeof(struct DeviceWrapper)); fz_stext_options opts = { 0 }; opts.flags = flags; dw->device = fz_new_stext_device(gctx, (fz_stext_page *) tp, &opts); } fz_catch(gctx) { return NULL; } return dw; } ~DeviceWrapper() { fz_display_list *list = $self->list; DEBUGMSG1("Device"); fz_close_device(gctx, $self->device); fz_drop_device(gctx, $self->device); DEBUGMSG2; if(list) { DEBUGMSG1("DisplayList after Device"); fz_drop_display_list(gctx, list); DEBUGMSG2; } } } }; //------------------------------------------------------------------------ // fz_outline //------------------------------------------------------------------------ %nodefaultctor; struct Outline { %immutable; %extend { ~Outline() { DEBUGMSG1("Outline"); fz_outline *this_ol = (fz_outline *) $self; fz_drop_outline(gctx, this_ol); DEBUGMSG2; } %pythoncode %{@property%} PyObject *uri() { fz_outline *ol = (fz_outline *) $self; return JM_UnicodeFromStr(ol->uri); } /* `%newobject foo;` is equivalent to wrapping C fn in python like: ret = _foo() ret.thisown=true return ret. */ %newobject next; %pythoncode %{@property%} struct Outline *next() { fz_outline *ol = (fz_outline *) $self; fz_outline *next_ol = ol->next; if (!next_ol) return NULL; next_ol = fz_keep_outline(gctx, next_ol); return (struct Outline *) next_ol; } %newobject down; %pythoncode %{@property%} struct Outline *down() { fz_outline *ol = (fz_outline *) $self; fz_outline *down_ol = ol->down; if (!down_ol) return NULL; down_ol = fz_keep_outline(gctx, down_ol); return (struct Outline *) down_ol; } %pythoncode %{@property%} PyObject *is_external() { fz_outline *ol = (fz_outline *) $self; if (!ol->uri) Py_RETURN_FALSE; return JM_BOOL(fz_is_external_link(gctx, ol->uri)); } %pythoncode %{@property%} int page() { fz_outline *ol = (fz_outline *) $self; return ol->page.page; } %pythoncode %{@property%} float x() { fz_outline *ol = (fz_outline *) $self; return ol->x; } %pythoncode %{@property%} float y() { fz_outline *ol = (fz_outline *) $self; return ol->y; } %pythoncode %{@property%} PyObject *title() { fz_outline *ol = (fz_outline *) $self; return JM_UnicodeFromStr(ol->title); } %pythoncode %{@property%} PyObject *is_open() { fz_outline *ol = (fz_outline *) $self; return JM_BOOL(ol->is_open); } %pythoncode %{ @property def dest(self): '''outline destination details''' return linkDest(self, None) def __del__(self): if not isinstance(self, Outline): return if getattr(self, "thisown", False): self.__swig_destroy__(self) %} } }; %clearnodefaultctor; //------------------------------------------------------------------------ // Annotation //------------------------------------------------------------------------ %nodefaultctor; struct Annot { %extend { ~Annot() { DEBUGMSG1("Annot"); pdf_annot *this_annot = (pdf_annot *) $self; pdf_drop_annot(gctx, this_annot); DEBUGMSG2; } //---------------------------------------------------------------- // annotation rectangle //---------------------------------------------------------------- %pythoncode %{@property%} PARENTCHECK(rect, """annotation rectangle""") %pythonappend rect %{ val = Rect(val) val *= self.parent.derotation_matrix %} PyObject * rect() { fz_rect r = pdf_bound_annot(gctx, (pdf_annot *) $self); return JM_py_from_rect(r); } %pythoncode %{@property%} PARENTCHECK(rect_delta, """annotation delta values to rectangle""") PyObject * rect_delta() { PyObject *rc=NULL; float d; fz_try(gctx) { pdf_obj *annot_obj = pdf_annot_obj(gctx, (pdf_annot *) $self); pdf_obj *arr = pdf_dict_get(gctx, annot_obj, PDF_NAME(RD)); int i, n = pdf_array_len(gctx, arr); if (n != 4) { rc = Py_BuildValue("s", NULL); } else { rc = PyTuple_New(4); for (i = 0; i < n; i++) { d = pdf_to_real(gctx, pdf_array_get(gctx, arr, i)); if (i == 2 || i == 3) d *= -1; PyTuple_SET_ITEM(rc, i, Py_BuildValue("f", d)); } } } fz_catch(gctx) { Py_RETURN_NONE; } return rc; } //---------------------------------------------------------------- // annotation xref number //---------------------------------------------------------------- PARENTCHECK(xref, """annotation xref""") %pythoncode %{@property%} PyObject *xref() { pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); return Py_BuildValue("i", pdf_to_num(gctx, annot_obj)); } //---------------------------------------------------------------- // annotation get IRT xref number //---------------------------------------------------------------- PARENTCHECK(irt_xref, """annotation IRT xref""") %pythoncode %{@property%} PyObject *irt_xref() { pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); pdf_obj *irt = pdf_dict_get(gctx, annot_obj, PDF_NAME(IRT)); if (!irt) return PyLong_FromLong(0); return PyLong_FromLong((long) pdf_to_num(gctx, irt)); } //---------------------------------------------------------------- // annotation set IRT xref number //---------------------------------------------------------------- FITZEXCEPTION(set_irt_xref, !result) PARENTCHECK(set_irt_xref, """Set annotation IRT xref""") PyObject *set_irt_xref(int xref) { fz_try(gctx) { pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); pdf_page *page = pdf_annot_page(gctx, annot); if (!INRANGE(xref, 1, pdf_xref_len(gctx, page->doc) - 1)) { RAISEPY(gctx, MSG_BAD_XREF, PyExc_ValueError); } pdf_obj *irt = pdf_new_indirect(gctx, page->doc, xref, 0); pdf_obj *subt = pdf_dict_get(gctx, irt, PDF_NAME(Subtype)); int irt_subt = pdf_annot_type_from_string(gctx, pdf_to_name(gctx, subt)); if (irt_subt < 0) { pdf_drop_obj(gctx, irt); RAISEPY(gctx, MSG_IS_NO_ANNOT, PyExc_ValueError); } pdf_dict_put_drop(gctx, annot_obj, PDF_NAME(IRT), irt); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //---------------------------------------------------------------- // annotation get AP/N Matrix //---------------------------------------------------------------- PARENTCHECK(apn_matrix, """annotation appearance matrix""") %pythonappend apn_matrix %{val = Matrix(val)%} %pythoncode %{@property%} PyObject * apn_matrix() { pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); pdf_obj *ap = pdf_dict_getl(gctx, annot_obj, PDF_NAME(AP), PDF_NAME(N), NULL); if (!ap) return JM_py_from_matrix(fz_identity); fz_matrix mat = pdf_dict_get_matrix(gctx, ap, PDF_NAME(Matrix)); return JM_py_from_matrix(mat); } //---------------------------------------------------------------- // annotation get AP/N BBox //---------------------------------------------------------------- PARENTCHECK(apn_bbox, """annotation appearance bbox""") %pythonappend apn_bbox %{ val = Rect(val) * self.parent.transformation_matrix val *= self.parent.derotation_matrix%} %pythoncode %{@property%} PyObject * apn_bbox() { pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); pdf_obj *ap = pdf_dict_getl(gctx, annot_obj, PDF_NAME(AP), PDF_NAME(N), NULL); if (!ap) return JM_py_from_rect(fz_infinite_rect); fz_rect rect = pdf_dict_get_rect(gctx, ap, PDF_NAME(BBox)); return JM_py_from_rect(rect); } //---------------------------------------------------------------- // annotation set AP/N Matrix //---------------------------------------------------------------- FITZEXCEPTION(set_apn_matrix, !result) PARENTCHECK(set_apn_matrix, """Set annotation appearance matrix.""") PyObject * set_apn_matrix(PyObject *matrix) { pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); fz_try(gctx) { pdf_obj *ap = pdf_dict_getl(gctx, annot_obj, PDF_NAME(AP), PDF_NAME(N), NULL); if (!ap) { RAISEPY(gctx, MSG_BAD_APN, PyExc_RuntimeError); } fz_matrix mat = JM_matrix_from_py(matrix); pdf_dict_put_matrix(gctx, ap, PDF_NAME(Matrix), mat); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //---------------------------------------------------------------- // annotation set AP/N BBox //---------------------------------------------------------------- FITZEXCEPTION(set_apn_bbox, !result) %pythonprepend set_apn_bbox %{ """Set annotation appearance bbox.""" CheckParent(self) page = self.parent rot = page.rotation_matrix mat = page.transformation_matrix bbox *= rot * ~mat %} PyObject * set_apn_bbox(PyObject *bbox) { pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); fz_try(gctx) { pdf_obj *ap = pdf_dict_getl(gctx, annot_obj, PDF_NAME(AP), PDF_NAME(N), NULL); if (!ap) { RAISEPY(gctx, MSG_BAD_APN, PyExc_RuntimeError); } fz_rect rect = JM_rect_from_py(bbox); pdf_dict_put_rect(gctx, ap, PDF_NAME(BBox), rect); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //---------------------------------------------------------------- // annotation show blend mode (/BM) //---------------------------------------------------------------- %pythoncode %{@property%} PARENTCHECK(blendmode, """annotation BlendMode""") PyObject *blendmode() { PyObject *blend_mode = NULL; fz_try(gctx) { pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); pdf_obj *obj, *obj1, *obj2; obj = pdf_dict_get(gctx, annot_obj, PDF_NAME(BM)); if (obj) { blend_mode = JM_UnicodeFromStr(pdf_to_name(gctx, obj)); goto finished; } // loop through the /AP/N/Resources/ExtGState objects obj = pdf_dict_getl(gctx, annot_obj, PDF_NAME(AP), PDF_NAME(N), PDF_NAME(Resources), PDF_NAME(ExtGState), NULL); if (pdf_is_dict(gctx, obj)) { int i, j, m, n = pdf_dict_len(gctx, obj); for (i = 0; i < n; i++) { obj1 = pdf_dict_get_val(gctx, obj, i); if (pdf_is_dict(gctx, obj1)) { m = pdf_dict_len(gctx, obj1); for (j = 0; j < m; j++) { obj2 = pdf_dict_get_key(gctx, obj1, j); if (pdf_objcmp(gctx, obj2, PDF_NAME(BM)) == 0) { blend_mode = JM_UnicodeFromStr(pdf_to_name(gctx, pdf_dict_get_val(gctx, obj1, j))); goto finished; } } } } } finished:; } fz_catch(gctx) { Py_RETURN_NONE; } if (blend_mode) return blend_mode; Py_RETURN_NONE; } //---------------------------------------------------------------- // annotation set blend mode (/BM) //---------------------------------------------------------------- FITZEXCEPTION(set_blendmode, !result) PARENTCHECK(set_blendmode, """Set annotation BlendMode.""") PyObject * set_blendmode(char *blend_mode) { fz_try(gctx) { pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); pdf_dict_put_name(gctx, annot_obj, PDF_NAME(BM), blend_mode); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //---------------------------------------------------------------- // annotation get optional content //---------------------------------------------------------------- FITZEXCEPTION(get_oc, !result) PARENTCHECK(get_oc, """Get annotation optional content reference.""") PyObject *get_oc() { int oc = 0; fz_try(gctx) { pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); pdf_obj *obj = pdf_dict_get(gctx, annot_obj, PDF_NAME(OC)); if (obj) { oc = pdf_to_num(gctx, obj); } } fz_catch(gctx) { return NULL; } return Py_BuildValue("i", oc); } //---------------------------------------------------------------- // annotation set open //---------------------------------------------------------------- FITZEXCEPTION(set_open, !result) PARENTCHECK(set_open, """Set 'open' status of annotation or its Popup.""") PyObject *set_open(int is_open) { fz_try(gctx) { pdf_annot *annot = (pdf_annot *) $self; pdf_set_annot_is_open(gctx, annot, is_open); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //---------------------------------------------------------------- // annotation inquiry: is open //---------------------------------------------------------------- FITZEXCEPTION(is_open, !result) PARENTCHECK(is_open, """Get 'open' status of annotation or its Popup.""") %pythoncode %{@property%} PyObject * is_open() { int is_open; fz_try(gctx) { pdf_annot *annot = (pdf_annot *) $self; is_open = pdf_annot_is_open(gctx, annot); } fz_catch(gctx) { return NULL; } return JM_BOOL(is_open); } //---------------------------------------------------------------- // annotation inquiry: has Popup //---------------------------------------------------------------- FITZEXCEPTION(has_popup, !result) PARENTCHECK(has_popup, """Check if annotation has a Popup.""") %pythoncode %{@property%} PyObject * has_popup() { int has_popup = 0; fz_try(gctx) { pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); pdf_obj *obj = pdf_dict_get(gctx, annot_obj, PDF_NAME(Popup)); if (obj) has_popup = 1; } fz_catch(gctx) { return NULL; } return JM_BOOL(has_popup); } //---------------------------------------------------------------- // annotation set Popup //---------------------------------------------------------------- FITZEXCEPTION(set_popup, !result) PARENTCHECK(set_popup, """Create annotation 'Popup' or update rectangle.""") PyObject * set_popup(PyObject *rect) { fz_try(gctx) { pdf_annot *annot = (pdf_annot *) $self; pdf_page *pdfpage = pdf_annot_page(gctx, annot); fz_matrix rot = JM_rotate_page_matrix(gctx, pdfpage); fz_rect r = fz_transform_rect(JM_rect_from_py(rect), rot); pdf_set_annot_popup(gctx, annot, r); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //---------------------------------------------------------------- // annotation Popup rectangle //---------------------------------------------------------------- FITZEXCEPTION(popup_rect, !result) PARENTCHECK(popup_rect, """annotation 'Popup' rectangle""") %pythoncode %{@property%} %pythonappend popup_rect %{ val = Rect(val) * self.parent.transformation_matrix val *= self.parent.derotation_matrix%} PyObject * popup_rect() { fz_rect rect = fz_infinite_rect; fz_try(gctx) { pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); pdf_obj *obj = pdf_dict_get(gctx, annot_obj, PDF_NAME(Popup)); if (obj) { rect = pdf_dict_get_rect(gctx, obj, PDF_NAME(Rect)); } } fz_catch(gctx) { return NULL; } return JM_py_from_rect(rect); } //---------------------------------------------------------------- // annotation Popup xref //---------------------------------------------------------------- FITZEXCEPTION(popup_xref, !result) PARENTCHECK(popup_xref, """annotation 'Popup' xref""") %pythoncode %{@property%} PyObject * popup_xref() { int xref = 0; fz_try(gctx) { pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); pdf_obj *obj = pdf_dict_get(gctx, annot_obj, PDF_NAME(Popup)); if (obj) { xref = pdf_to_num(gctx, obj); } } fz_catch(gctx) { return NULL; } return Py_BuildValue("i", xref); } //---------------------------------------------------------------- // annotation set optional content //---------------------------------------------------------------- FITZEXCEPTION(set_oc, !result) PARENTCHECK(set_oc, """Set / remove annotation OC xref.""") PyObject * set_oc(int oc=0) { fz_try(gctx) { pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); if (!oc) { pdf_dict_del(gctx, annot_obj, PDF_NAME(OC)); } else { JM_add_oc_object(gctx, pdf_get_bound_document(gctx, annot_obj), annot_obj, oc); } } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } %pythoncode%{@property%} %pythonprepend language %{"""annotation language"""%} PyObject *language() { pdf_annot *this_annot = (pdf_annot *) $self; fz_text_language lang = pdf_annot_language(gctx, this_annot); char buf[8]; if (lang == FZ_LANG_UNSET) Py_RETURN_NONE; return Py_BuildValue("s", fz_string_from_text_language(buf, lang)); } //---------------------------------------------------------------- // annotation set language (/Lang) //---------------------------------------------------------------- FITZEXCEPTION(set_language, !result) PARENTCHECK(set_language, """Set annotation language.""") PyObject *set_language(char *language=NULL) { pdf_annot *this_annot = (pdf_annot *) $self; fz_try(gctx) { fz_text_language lang; if (!language) lang = FZ_LANG_UNSET; else lang = fz_text_language_from_string(language); pdf_set_annot_language(gctx, this_annot, lang); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //---------------------------------------------------------------- // annotation get decompressed appearance stream source //---------------------------------------------------------------- FITZEXCEPTION(_getAP, !result) PyObject * _getAP() { PyObject *r = NULL; fz_buffer *res = NULL; fz_var(res); fz_try(gctx) { pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); pdf_obj *ap = pdf_dict_getl(gctx, annot_obj, PDF_NAME(AP), PDF_NAME(N), NULL); if (pdf_is_stream(gctx, ap)) res = pdf_load_stream(gctx, ap); if (res) { r = JM_BinFromBuffer(gctx, res); } } fz_always(gctx) { fz_drop_buffer(gctx, res); } fz_catch(gctx) { Py_RETURN_NONE; } if (!r) Py_RETURN_NONE; return r; } //---------------------------------------------------------------- // annotation update /AP stream //---------------------------------------------------------------- FITZEXCEPTION(_setAP, !result) PyObject * _setAP(PyObject *buffer, int rect=0) { fz_buffer *res = NULL; fz_var(res); fz_try(gctx) { pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); pdf_page *page = pdf_annot_page(gctx, annot); pdf_obj *apobj = pdf_dict_getl(gctx, annot_obj, PDF_NAME(AP), PDF_NAME(N), NULL); if (!apobj) { RAISEPY(gctx, MSG_BAD_APN, PyExc_RuntimeError); } if (!pdf_is_stream(gctx, apobj)) { RAISEPY(gctx, MSG_BAD_APN, PyExc_RuntimeError); } res = JM_BufferFromBytes(gctx, buffer); if (!res) { RAISEPY(gctx, MSG_BAD_BUFFER, PyExc_ValueError); } JM_update_stream(gctx, page->doc, apobj, res, 1); if (rect) { fz_rect bbox = pdf_dict_get_rect(gctx, annot_obj, PDF_NAME(Rect)); pdf_dict_put_rect(gctx, apobj, PDF_NAME(BBox), bbox); } } fz_always(gctx) { fz_drop_buffer(gctx, res); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //---------------------------------------------------------------- // redaction annotation get values //---------------------------------------------------------------- FITZEXCEPTION(_get_redact_values, !result) %pythonappend _get_redact_values %{ if not val: return val val["rect"] = self.rect text_color, fontname, fontsize = TOOLS._parse_da(self) val["text_color"] = text_color val["fontname"] = fontname val["fontsize"] = fontsize fill = self.colors["fill"] val["fill"] = fill %} PyObject * _get_redact_values() { pdf_annot *annot = (pdf_annot *) $self; if (pdf_annot_type(gctx, annot) != PDF_ANNOT_REDACT) Py_RETURN_NONE; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); PyObject *values = PyDict_New(); pdf_obj *obj = NULL; const char *text = NULL; fz_try(gctx) { obj = pdf_dict_gets(gctx, annot_obj, "RO"); if (obj) { JM_Warning("Ignoring redaction key '/RO'."); int xref = pdf_to_num(gctx, obj); DICT_SETITEM_DROP(values, dictkey_xref, Py_BuildValue("i", xref)); } obj = pdf_dict_gets(gctx, annot_obj, "OverlayText"); if (obj) { text = pdf_to_text_string(gctx, obj); DICT_SETITEM_DROP(values, dictkey_text, JM_UnicodeFromStr(text)); } else { DICT_SETITEM_DROP(values, dictkey_text, Py_BuildValue("s", "")); } obj = pdf_dict_get(gctx, annot_obj, PDF_NAME(Q)); int align = 0; if (obj) { align = pdf_to_int(gctx, obj); } DICT_SETITEM_DROP(values, dictkey_align, Py_BuildValue("i", align)); } fz_catch(gctx) { Py_DECREF(values); return NULL; } return values; } //---------------------------------------------------------------- // annotation get TextPage //---------------------------------------------------------------- %pythonappend get_textpage %{ if val: val.thisown = True %} FITZEXCEPTION(get_textpage, !result) PARENTCHECK(get_textpage, """Make annotation TextPage.""") struct TextPage * get_textpage(PyObject *clip=NULL, int flags = 0) { fz_stext_page *textpage=NULL; fz_stext_options options = { 0 }; options.flags = flags; fz_try(gctx) { pdf_annot *annot = (pdf_annot *) $self; textpage = pdf_new_stext_page_from_annot(gctx, annot, &options); } fz_catch(gctx) { return NULL; } return (struct TextPage *) textpage; } //---------------------------------------------------------------- // annotation set name //---------------------------------------------------------------- FITZEXCEPTION(set_name, !result) PARENTCHECK(set_name, """Set /Name (icon) of annotation.""") PyObject * set_name(char *name) { fz_try(gctx) { pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); pdf_dict_put_name(gctx, annot_obj, PDF_NAME(Name), name); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //---------------------------------------------------------------- // annotation set rectangle //---------------------------------------------------------------- PARENTCHECK(set_rect, """Set annotation rectangle.""") FITZEXCEPTION(set_rect, !result) PyObject * set_rect(PyObject *rect) { pdf_annot *annot = (pdf_annot *) $self; int type = pdf_annot_type(gctx, annot); int err_source = 0; // what raised the error fz_var(err_source); fz_try(gctx) { pdf_page *pdfpage = pdf_annot_page(gctx, annot); fz_matrix rot = JM_rotate_page_matrix(gctx, pdfpage); fz_rect r = fz_transform_rect(JM_rect_from_py(rect), rot); if (fz_is_empty_rect(r) || fz_is_infinite_rect(r)) { RAISEPY(gctx, MSG_BAD_RECT, PyExc_ValueError); } err_source = 1; // indicate that error was from MuPDF pdf_set_annot_rect(gctx, annot, r); } fz_catch(gctx) { if (err_source == 0) { return NULL; } PySys_WriteStderr("cannot set rect: '%s'\n", fz_caught_message(gctx)); Py_RETURN_FALSE; } Py_RETURN_NONE; } //---------------------------------------------------------------- // annotation set rotation //---------------------------------------------------------------- PARENTCHECK(set_rotation, """Set annotation rotation.""") PyObject * set_rotation(int rotate=0) { pdf_annot *annot = (pdf_annot *) $self; int type = pdf_annot_type(gctx, annot); switch (type) { case PDF_ANNOT_CARET: break; case PDF_ANNOT_CIRCLE: break; case PDF_ANNOT_FREE_TEXT: break; case PDF_ANNOT_FILE_ATTACHMENT: break; case PDF_ANNOT_INK: break; case PDF_ANNOT_LINE: break; case PDF_ANNOT_POLY_LINE: break; case PDF_ANNOT_POLYGON: break; case PDF_ANNOT_SQUARE: break; case PDF_ANNOT_STAMP: break; case PDF_ANNOT_TEXT: break; default: Py_RETURN_NONE; } int rot = rotate; while (rot < 0) rot += 360; while (rot >= 360) rot -= 360; if (type == PDF_ANNOT_FREE_TEXT && rot % 90 != 0) rot = 0; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); pdf_dict_put_int(gctx, annot_obj, PDF_NAME(Rotate), rot); Py_RETURN_NONE; } //---------------------------------------------------------------- // annotation get rotation //---------------------------------------------------------------- %pythoncode %{@property%} PARENTCHECK(rotation, """annotation rotation""") int rotation() { pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); pdf_obj *rotation = pdf_dict_get(gctx, annot_obj, PDF_NAME(Rotate)); if (!rotation) return -1; return pdf_to_int(gctx, rotation); } //---------------------------------------------------------------- // annotation vertices (for "Line", "Polgon", "Ink", etc. //---------------------------------------------------------------- PARENTCHECK(vertices, """annotation vertex points""") %pythoncode %{@property%} PyObject *vertices() { PyObject *res = NULL, *res1 = NULL; pdf_obj *o, *o1; pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); pdf_page *page = pdf_annot_page(gctx, annot); int i, j; fz_point point; // point object to work with fz_matrix page_ctm; // page transformation matrix pdf_page_transform(gctx, page, NULL, &page_ctm); fz_matrix derot = JM_derotate_page_matrix(gctx, page); page_ctm = fz_concat(page_ctm, derot); //---------------------------------------------------------------- // The following objects occur in different annotation types. // So we are sure that (!o) occurs at most once. // Every pair of floats is one point, that needs to be separately // transformed with the page transformation matrix. //---------------------------------------------------------------- o = pdf_dict_get(gctx, annot_obj, PDF_NAME(Vertices)); if (o) goto weiter; o = pdf_dict_get(gctx, annot_obj, PDF_NAME(L)); if (o) goto weiter; o = pdf_dict_get(gctx, annot_obj, PDF_NAME(QuadPoints)); if (o) goto weiter; o = pdf_dict_gets(gctx, annot_obj, "CL"); if (o) goto weiter; o = pdf_dict_get(gctx, annot_obj, PDF_NAME(InkList)); if (o) goto inklist; Py_RETURN_NONE; // handle lists with 1-level depth -------------------------------- weiter:; res = PyList_New(0); // create Python list for (i = 0; i < pdf_array_len(gctx, o); i += 2) { point.x = pdf_to_real(gctx, pdf_array_get(gctx, o, i)); point.y = pdf_to_real(gctx, pdf_array_get(gctx, o, i+1)); point = fz_transform_point(point, page_ctm); LIST_APPEND_DROP(res, Py_BuildValue("ff", point.x, point.y)); } return res; // InkList has 2-level lists -------------------------------------- inklist:; res = PyList_New(0); for (i = 0; i < pdf_array_len(gctx, o); i++) { res1 = PyList_New(0); o1 = pdf_array_get(gctx, o, i); for (j = 0; j < pdf_array_len(gctx, o1); j += 2) { point.x = pdf_to_real(gctx, pdf_array_get(gctx, o1, j)); point.y = pdf_to_real(gctx, pdf_array_get(gctx, o1, j+1)); point = fz_transform_point(point, page_ctm); LIST_APPEND_DROP(res1, Py_BuildValue("ff", point.x, point.y)); } LIST_APPEND_DROP(res, res1); } return res; } //---------------------------------------------------------------- // annotation colors //---------------------------------------------------------------- %pythoncode %{@property%} PARENTCHECK(colors, """Color definitions.""") PyObject *colors() { pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); return JM_annot_colors(gctx, annot_obj); } //---------------------------------------------------------------- // annotation update appearance //---------------------------------------------------------------- PyObject *_update_appearance(float opacity=-1, char *blend_mode=NULL, PyObject *fill_color=NULL, int rotate = -1) { pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); pdf_page *page = pdf_annot_page(gctx, annot); pdf_document *pdf = page->doc; int type = pdf_annot_type(gctx, annot); float fcol[4] = {1,1,1,1}; // std fill color: white int i, nfcol = 0; // number of color components JM_color_FromSequence(fill_color, &nfcol, fcol); fz_try(gctx) { // remove fill color from unsupported annots // or if so requested if ((type != PDF_ANNOT_SQUARE && type != PDF_ANNOT_CIRCLE && type != PDF_ANNOT_LINE && type != PDF_ANNOT_POLY_LINE && type != PDF_ANNOT_POLYGON ) || nfcol == 0 ) { pdf_dict_del(gctx, annot_obj, PDF_NAME(IC)); } else if (nfcol > 0) { pdf_set_annot_interior_color(gctx, annot, nfcol, fcol); } int insert_rot = (rotate >= 0) ? 1 : 0; switch (type) { case PDF_ANNOT_CARET: case PDF_ANNOT_CIRCLE: case PDF_ANNOT_FREE_TEXT: case PDF_ANNOT_FILE_ATTACHMENT: case PDF_ANNOT_INK: case PDF_ANNOT_LINE: case PDF_ANNOT_POLY_LINE: case PDF_ANNOT_POLYGON: case PDF_ANNOT_SQUARE: case PDF_ANNOT_STAMP: case PDF_ANNOT_TEXT: break; default: insert_rot = 0; } if (insert_rot) { pdf_dict_put_int(gctx, annot_obj, PDF_NAME(Rotate), rotate); } pdf_dirty_annot(gctx, annot); pdf_update_annot(gctx, annot); // let MuPDF update pdf->resynth_required = 0; // insert fill color if (type == PDF_ANNOT_FREE_TEXT) { if (nfcol > 0) { pdf_set_annot_color(gctx, annot, nfcol, fcol); } } else if (nfcol > 0) { pdf_obj *col = pdf_new_array(gctx, page->doc, nfcol); for (i = 0; i < nfcol; i++) { pdf_array_push_real(gctx, col, fcol[i]); } pdf_dict_put_drop(gctx,annot_obj, PDF_NAME(IC), col); } } fz_catch(gctx) { PySys_WriteStderr("cannot update annot: '%s'\n", fz_caught_message(gctx)); Py_RETURN_FALSE; } if ((opacity < 0 || opacity >= 1) && !blend_mode) // no opacity, no blend_mode goto normal_exit; fz_try(gctx) { // create or update /ExtGState pdf_obj *ap = pdf_dict_getl(gctx, annot_obj, PDF_NAME(AP), PDF_NAME(N), NULL); if (!ap) { // should never happen RAISEPY(gctx, MSG_BAD_APN, PyExc_RuntimeError); } pdf_obj *resources = pdf_dict_get(gctx, ap, PDF_NAME(Resources)); if (!resources) { // no Resources yet: make one resources = pdf_dict_put_dict(gctx, ap, PDF_NAME(Resources), 2); } pdf_obj *alp0 = pdf_new_dict(gctx, page->doc, 3); if (opacity >= 0 && opacity < 1) { pdf_dict_put_real(gctx, alp0, PDF_NAME(CA), (double) opacity); pdf_dict_put_real(gctx, alp0, PDF_NAME(ca), (double) opacity); pdf_dict_put_real(gctx, annot_obj, PDF_NAME(CA), (double) opacity); } if (blend_mode) { pdf_dict_put_name(gctx, alp0, PDF_NAME(BM), blend_mode); pdf_dict_put_name(gctx, annot_obj, PDF_NAME(BM), blend_mode); } pdf_obj *extg = pdf_dict_get(gctx, resources, PDF_NAME(ExtGState)); if (!extg) { // no ExtGState yet: make one extg = pdf_dict_put_dict(gctx, resources, PDF_NAME(ExtGState), 2); } pdf_dict_put_drop(gctx, extg, PDF_NAME(H), alp0); } fz_catch(gctx) { PySys_WriteStderr("cannot set opacity or blend mode\n"); Py_RETURN_FALSE; } normal_exit:; Py_RETURN_TRUE; } %pythoncode %{ def update(self, blend_mode: OptStr =None, opacity: OptFloat =None, fontsize: float =0, fontname: OptStr =None, text_color: OptSeq =None, border_color: OptSeq =None, fill_color: OptSeq =None, cross_out: bool =True, rotate: int =-1, ): """Update annot appearance. Notes: Depending on the annot type, some parameters make no sense, while others are only available in this method to achieve the desired result. This is especially true for 'FreeText' annots. Args: blend_mode: set the blend mode, all annotations. opacity: set the opacity, all annotations. fontsize: set fontsize, 'FreeText' only. fontname: set the font, 'FreeText' only. border_color: set border color, 'FreeText' only. text_color: set text color, 'FreeText' only. fill_color: set fill color, all annotations. cross_out: draw diagonal lines, 'Redact' only. rotate: set rotation, 'FreeText' and some others. """ CheckParent(self) def color_string(cs, code): """Return valid PDF color operator for a given color sequence. """ cc = ColorCode(cs, code) if not cc: return b"" return (cc + "\n").encode() annot_type = self.type[0] # get the annot type dt = self.border.get("dashes", None) # get the dashes spec bwidth = self.border.get("width", -1) # get border line width stroke = self.colors["stroke"] # get the stroke color if fill_color != None: # change of fill color requested fill = fill_color else: # put in current annot value fill = self.colors["fill"] rect = None # self.rect # prevent MuPDF fiddling with it apnmat = self.apn_matrix # prevent MuPDF fiddling with it if rotate != -1: # sanitize rotation value while rotate < 0: rotate += 360 while rotate >= 360: rotate -= 360 if annot_type == PDF_ANNOT_FREE_TEXT and rotate % 90 != 0: rotate = 0 #------------------------------------------------------------------ # handle opacity and blend mode #------------------------------------------------------------------ if blend_mode is None: blend_mode = self.blendmode if not hasattr(opacity, "__float__"): opacity = self.opacity if 0 <= opacity < 1 or blend_mode is not None: opa_code = "/H gs\n" # then we must reference this 'gs' else: opa_code = "" if annot_type == PDF_ANNOT_FREE_TEXT: CheckColor(border_color) CheckColor(text_color) CheckColor(fill_color) tcol, fname, fsize = TOOLS._parse_da(self) # read and update default appearance as necessary update_default_appearance = False if fsize <= 0: fsize = 12 update_default_appearance = True if text_color is not None: tcol = text_color update_default_appearance = True if fontname is not None: fname = fontname update_default_appearance = True if fontsize > 0: fsize = fontsize update_default_appearance = True if update_default_appearance: da_str = "" if len(tcol) == 3: fmt = "{:g} {:g} {:g} rg /{f:s} {s:g} Tf" elif len(tcol) == 1: fmt = "{:g} g /{f:s} {s:g} Tf" elif len(tcol) == 4: fmt = "{:g} {:g} {:g} {:g} k /{f:s} {s:g} Tf" da_str = fmt.format(*tcol, f=fname, s=fsize) TOOLS._update_da(self, da_str) #------------------------------------------------------------------ # now invoke MuPDF to update the annot appearance #------------------------------------------------------------------ val = self._update_appearance( opacity=opacity, blend_mode=blend_mode, fill_color=fill, rotate=rotate, ) if val == False: raise RuntimeError("Error updating annotation.") bfill = color_string(fill, "f") bstroke = color_string(stroke, "c") p_ctm = self.parent.transformation_matrix imat = ~p_ctm # inverse page transf. matrix if dt: dashes = "[" + " ".join(map(str, dt)) + "] 0 d\n" dashes = dashes.encode("utf-8") else: dashes = None if self.line_ends: line_end_le, line_end_ri = self.line_ends else: line_end_le, line_end_ri = 0, 0 # init line end codes # read contents as created by MuPDF ap = self._getAP() ap_tab = ap.splitlines() # split in single lines ap_updated = False # assume we did nothing if annot_type == PDF_ANNOT_REDACT: if cross_out: # create crossed-out rect ap_updated = True ap_tab = ap_tab[:-1] _, LL, LR, UR, UL = ap_tab ap_tab.append(LR) ap_tab.append(LL) ap_tab.append(UR) ap_tab.append(LL) ap_tab.append(UL) ap_tab.append(b"S") if bwidth > 0 or bstroke != b"": ap_updated = True ntab = [b"%g w" % bwidth] if bwidth > 0 else [] for line in ap_tab: if line.endswith(b"w"): continue if line.endswith(b"RG") and bstroke != b"": line = bstroke[:-1] ntab.append(line) ap_tab = ntab ap = b"\n".join(ap_tab) if annot_type == PDF_ANNOT_FREE_TEXT: BT = ap.find(b"BT") ET = ap.find(b"ET") + 2 ap = ap[BT:ET] w, h = self.rect.width, self.rect.height if rotate in (90, 270) or not (apnmat.b == apnmat.c == 0): w, h = h, w re = b"0 0 %g %g re" % (w, h) ap = re + b"\nW\nn\n" + ap ope = None fill_string = color_string(fill, "f") if fill_string: ope = b"f" stroke_string = color_string(border_color, "c") if stroke_string and bwidth > 0: ope = b"S" bwidth = b"%g w\n" % bwidth else: bwidth = stroke_string = b"" if fill_string and stroke_string: ope = b"B" if ope != None: ap = bwidth + fill_string + stroke_string + re + b"\n" + ope + b"\n" + ap if dashes != None: # handle dashes ap = dashes + b"\n" + ap dashes = None ap_updated = True if annot_type in (PDF_ANNOT_POLYGON, PDF_ANNOT_POLY_LINE): ap = b"\n".join(ap_tab[:-1]) + b"\n" ap_updated = True if bfill != b"": if annot_type == PDF_ANNOT_POLYGON: ap = ap + bfill + b"b" # close, fill, and stroke elif annot_type == PDF_ANNOT_POLY_LINE: ap = ap + b"S" # stroke else: if annot_type == PDF_ANNOT_POLYGON: ap = ap + b"s" # close and stroke elif annot_type == PDF_ANNOT_POLY_LINE: ap = ap + b"S" # stroke if dashes is not None: # handle dashes ap = dashes + ap # reset dashing - only applies for LINE annots with line ends given ap = ap.replace(b"\nS\n", b"\nS\n[] 0 d\n", 1) ap_updated = True if opa_code: ap = opa_code.encode("utf-8") + ap ap_updated = True ap = b"q\n" + ap + b"\nQ\n" #---------------------------------------------------------------------- # the following handles line end symbols for 'Polygon' and 'Polyline' #---------------------------------------------------------------------- if line_end_le + line_end_ri > 0 and annot_type in (PDF_ANNOT_POLYGON, PDF_ANNOT_POLY_LINE): le_funcs = (None, TOOLS._le_square, TOOLS._le_circle, TOOLS._le_diamond, TOOLS._le_openarrow, TOOLS._le_closedarrow, TOOLS._le_butt, TOOLS._le_ropenarrow, TOOLS._le_rclosedarrow, TOOLS._le_slash) le_funcs_range = range(1, len(le_funcs)) d = 2 * max(1, self.border["width"]) rect = self.rect + (-d, -d, d, d) ap_updated = True points = self.vertices if line_end_le in le_funcs_range: p1 = Point(points[0]) * imat p2 = Point(points[1]) * imat left = le_funcs[line_end_le](self, p1, p2, False, fill_color) ap += left.encode() if line_end_ri in le_funcs_range: p1 = Point(points[-2]) * imat p2 = Point(points[-1]) * imat left = le_funcs[line_end_ri](self, p1, p2, True, fill_color) ap += left.encode() if ap_updated: if rect: # rect modified here? self.set_rect(rect) self._setAP(ap, rect=1) else: self._setAP(ap, rect=0) #------------------------------- # handle annotation rotations #------------------------------- if annot_type not in ( # only these types are supported PDF_ANNOT_CARET, PDF_ANNOT_CIRCLE, PDF_ANNOT_FILE_ATTACHMENT, PDF_ANNOT_INK, PDF_ANNOT_LINE, PDF_ANNOT_POLY_LINE, PDF_ANNOT_POLYGON, PDF_ANNOT_SQUARE, PDF_ANNOT_STAMP, PDF_ANNOT_TEXT, ): return rot = self.rotation # get value from annot object if rot == -1: # nothing to change return M = (self.rect.tl + self.rect.br) / 2 # center of annot rect if rot == 0: # undo rotations if abs(apnmat - Matrix(1, 1)) < 1e-5: return # matrix already is a no-op quad = self.rect.morph(M, ~apnmat) # derotate rect self.set_rect(quad.rect) self.set_apn_matrix(Matrix(1, 1)) # appearance matrix = no-op return mat = Matrix(rot) quad = self.rect.morph(M, mat) self.set_rect(quad.rect) self.set_apn_matrix(apnmat * mat) %} //---------------------------------------------------------------- // annotation set colors //---------------------------------------------------------------- %pythoncode %{ def set_colors(self, colors=None, stroke=None, fill=None): """Set 'stroke' and 'fill' colors. Use either a dict or the direct arguments. """ CheckParent(self) doc = self.parent.parent if type(colors) is not dict: colors = {"fill": fill, "stroke": stroke} fill = colors.get("fill") stroke = colors.get("stroke") fill_annots = (PDF_ANNOT_CIRCLE, PDF_ANNOT_SQUARE, PDF_ANNOT_LINE, PDF_ANNOT_POLY_LINE, PDF_ANNOT_POLYGON, PDF_ANNOT_REDACT,) if stroke in ([], ()): doc.xref_set_key(self.xref, "C", "[]") elif stroke is not None: if hasattr(stroke, "__float__"): stroke = [float(stroke)] CheckColor(stroke) if len(stroke) == 1: s = "[%g]" % stroke[0] elif len(stroke) == 3: s = "[%g %g %g]" % tuple(stroke) else: s = "[%g %g %g %g]" % tuple(stroke) doc.xref_set_key(self.xref, "C", s) if fill and self.type[0] not in fill_annots: print("Warning: fill color ignored for annot type '%s'." % self.type[1]) return if fill in ([], ()): doc.xref_set_key(self.xref, "IC", "[]") elif fill is not None: if hasattr(fill, "__float__"): fill = [float(fill)] CheckColor(fill) if len(fill) == 1: s = "[%g]" % fill[0] elif len(fill) == 3: s = "[%g %g %g]" % tuple(fill) else: s = "[%g %g %g %g]" % tuple(fill) doc.xref_set_key(self.xref, "IC", s) %} //---------------------------------------------------------------- // annotation line_ends //---------------------------------------------------------------- %pythoncode %{@property%} PARENTCHECK(line_ends, """Line end codes.""") PyObject * line_ends() { pdf_annot *annot = (pdf_annot *) $self; // return nothing for invalid annot types if (!pdf_annot_has_line_ending_styles(gctx, annot)) Py_RETURN_NONE; int lstart = (int) pdf_annot_line_start_style(gctx, annot); int lend = (int) pdf_annot_line_end_style(gctx, annot); return Py_BuildValue("ii", lstart, lend); } //---------------------------------------------------------------- // annotation set line ends //---------------------------------------------------------------- PARENTCHECK(set_line_ends, """Set line end codes.""") void set_line_ends(int start, int end) { pdf_annot *annot = (pdf_annot *) $self; if (pdf_annot_has_line_ending_styles(gctx, annot)) pdf_set_annot_line_ending_styles(gctx, annot, start, end); else JM_Warning("bad annot type for line ends"); } //---------------------------------------------------------------- // annotation type //---------------------------------------------------------------- PARENTCHECK(type, """annotation type""") %pythoncode %{@property%} PyObject *type() { pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); int type = pdf_annot_type(gctx, annot); const char *c = pdf_string_from_annot_type(gctx, type); pdf_obj *o = pdf_dict_gets(gctx, annot_obj, "IT"); if (!o || !pdf_is_name(gctx, o)) return Py_BuildValue("is", type, c); // no IT entry const char *it = pdf_to_name(gctx, o); return Py_BuildValue("iss", type, c, it); } //---------------------------------------------------------------- // annotation opacity //---------------------------------------------------------------- PARENTCHECK(opacity, """Opacity.""") %pythoncode %{@property%} PyObject *opacity() { pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); double opy = -1; pdf_obj *ca = pdf_dict_get(gctx, annot_obj, PDF_NAME(CA)); if (pdf_is_number(gctx, ca)) opy = pdf_to_real(gctx, ca); return Py_BuildValue("f", opy); } //---------------------------------------------------------------- // annotation set opacity //---------------------------------------------------------------- PARENTCHECK(set_opacity, """Set opacity.""") void set_opacity(float opacity) { pdf_annot *annot = (pdf_annot *) $self; if (!INRANGE(opacity, 0.0f, 1.0f)) { pdf_set_annot_opacity(gctx, annot, 1); return; } pdf_set_annot_opacity(gctx, annot, opacity); if (opacity < 1.0f) { pdf_page *page = pdf_annot_page(gctx, annot); page->transparency = 1; } } //---------------------------------------------------------------- // annotation get attached file info //---------------------------------------------------------------- %pythoncode %{@property%} FITZEXCEPTION(file_info, !result) PARENTCHECK(file_info, """Attached file information.""") PyObject *file_info() { PyObject *res = PyDict_New(); // create Python dict char *filename = NULL; char *desc = NULL; int length = -1, size = -1; pdf_obj *stream = NULL, *o = NULL, *fs = NULL; pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); fz_try(gctx) { int type = (int) pdf_annot_type(gctx, annot); if (type != PDF_ANNOT_FILE_ATTACHMENT) { RAISEPY(gctx, MSG_BAD_ANNOT_TYPE, PyExc_TypeError); } stream = pdf_dict_getl(gctx, annot_obj, PDF_NAME(FS), PDF_NAME(EF), PDF_NAME(F), NULL); if (!stream) { RAISEPY(gctx, "bad PDF: file entry not found", JM_Exc_FileDataError); } } fz_catch(gctx) { return NULL; } fs = pdf_dict_get(gctx, annot_obj, PDF_NAME(FS)); o = pdf_dict_get(gctx, fs, PDF_NAME(UF)); if (o) { filename = (char *) pdf_to_text_string(gctx, o); } else { o = pdf_dict_get(gctx, fs, PDF_NAME(F)); if (o) filename = (char *) pdf_to_text_string(gctx, o); } o = pdf_dict_get(gctx, fs, PDF_NAME(Desc)); if (o) desc = (char *) pdf_to_text_string(gctx, o); o = pdf_dict_get(gctx, stream, PDF_NAME(Length)); if (o) length = pdf_to_int(gctx, o); o = pdf_dict_getl(gctx, stream, PDF_NAME(Params), PDF_NAME(Size), NULL); if (o) size = pdf_to_int(gctx, o); DICT_SETITEM_DROP(res, dictkey_filename, JM_EscapeStrFromStr(filename)); DICT_SETITEM_DROP(res, dictkey_desc, JM_UnicodeFromStr(desc)); DICT_SETITEM_DROP(res, dictkey_length, Py_BuildValue("i", length)); DICT_SETITEM_DROP(res, dictkey_size, Py_BuildValue("i", size)); return res; } //---------------------------------------------------------------- // annotation get attached file content //---------------------------------------------------------------- FITZEXCEPTION(get_file, !result) PARENTCHECK(get_file, """Retrieve attached file content.""") PyObject * get_file() { PyObject *res = NULL; pdf_obj *stream = NULL; fz_buffer *buf = NULL; pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); fz_var(buf); fz_try(gctx) { int type = (int) pdf_annot_type(gctx, annot); if (type != PDF_ANNOT_FILE_ATTACHMENT) { RAISEPY(gctx, MSG_BAD_ANNOT_TYPE, PyExc_TypeError); } stream = pdf_dict_getl(gctx, annot_obj, PDF_NAME(FS), PDF_NAME(EF), PDF_NAME(F), NULL); if (!stream) { RAISEPY(gctx, "bad PDF: file entry not found", JM_Exc_FileDataError); } buf = pdf_load_stream(gctx, stream); res = JM_BinFromBuffer(gctx, buf); } fz_always(gctx) { fz_drop_buffer(gctx, buf); } fz_catch(gctx) { return NULL; } return res; } //---------------------------------------------------------------- // annotation get attached sound stream //---------------------------------------------------------------- FITZEXCEPTION(get_sound, !result) PARENTCHECK(get_sound, """Retrieve sound stream.""") PyObject * get_sound() { PyObject *res = NULL; PyObject *stream = NULL; fz_buffer *buf = NULL; pdf_obj *obj = NULL; pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); fz_var(buf); fz_try(gctx) { int type = (int) pdf_annot_type(gctx, annot); pdf_obj *sound = pdf_dict_get(gctx, annot_obj, PDF_NAME(Sound)); if (type != PDF_ANNOT_SOUND || !sound) { RAISEPY(gctx, MSG_BAD_ANNOT_TYPE, PyExc_TypeError); } if (pdf_dict_get(gctx, sound, PDF_NAME(F))) { RAISEPY(gctx, "unsupported sound stream", JM_Exc_FileDataError); } res = PyDict_New(); obj = pdf_dict_get(gctx, sound, PDF_NAME(R)); if (obj) { DICT_SETITEMSTR_DROP(res, "rate", Py_BuildValue("f", pdf_to_real(gctx, obj))); } obj = pdf_dict_get(gctx, sound, PDF_NAME(C)); if (obj) { DICT_SETITEMSTR_DROP(res, "channels", Py_BuildValue("i", pdf_to_int(gctx, obj))); } obj = pdf_dict_get(gctx, sound, PDF_NAME(B)); if (obj) { DICT_SETITEMSTR_DROP(res, "bps", Py_BuildValue("i", pdf_to_int(gctx, obj))); } obj = pdf_dict_get(gctx, sound, PDF_NAME(E)); if (obj) { DICT_SETITEMSTR_DROP(res, "encoding", Py_BuildValue("s", pdf_to_name(gctx, obj))); } obj = pdf_dict_gets(gctx, sound, "CO"); if (obj) { DICT_SETITEMSTR_DROP(res, "compression", Py_BuildValue("s", pdf_to_name(gctx, obj))); } buf = pdf_load_stream(gctx, sound); stream = JM_BinFromBuffer(gctx, buf); DICT_SETITEMSTR_DROP(res, "stream", stream); } fz_always(gctx) { fz_drop_buffer(gctx, buf); } fz_catch(gctx) { Py_CLEAR(res); return NULL; } return res; } //---------------------------------------------------------------- // annotation update attached file //---------------------------------------------------------------- FITZEXCEPTION(update_file, !result) %pythonprepend update_file %{"""Update attached file.""" CheckParent(self)%} PyObject * update_file(PyObject *buffer=NULL, char *filename=NULL, char *ufilename=NULL, char *desc=NULL) { pdf_document *pdf = NULL; // to be filled in fz_buffer *res = NULL; // for compressed content pdf_obj *stream = NULL, *fs = NULL; pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); fz_try(gctx) { pdf = pdf_get_bound_document(gctx, annot_obj); // the owning PDF int type = (int) pdf_annot_type(gctx, annot); if (type != PDF_ANNOT_FILE_ATTACHMENT) { RAISEPY(gctx, MSG_BAD_ANNOT_TYPE, PyExc_TypeError); } stream = pdf_dict_getl(gctx, annot_obj, PDF_NAME(FS), PDF_NAME(EF), PDF_NAME(F), NULL); // the object for file content if (!stream) { RAISEPY(gctx, "bad PDF: no /EF object", JM_Exc_FileDataError); } fs = pdf_dict_get(gctx, annot_obj, PDF_NAME(FS)); // file content given res = JM_BufferFromBytes(gctx, buffer); if (buffer && !res) { RAISEPY(gctx, MSG_BAD_BUFFER, PyExc_ValueError); } if (res) { JM_update_stream(gctx, pdf, stream, res, 1); // adjust /DL and /Size parameters int64_t len = (int64_t) fz_buffer_storage(gctx, res, NULL); pdf_obj *l = pdf_new_int(gctx, len); pdf_dict_put(gctx, stream, PDF_NAME(DL), l); pdf_dict_putl(gctx, stream, l, PDF_NAME(Params), PDF_NAME(Size), NULL); } if (filename) { pdf_dict_put_text_string(gctx, stream, PDF_NAME(F), filename); pdf_dict_put_text_string(gctx, fs, PDF_NAME(F), filename); pdf_dict_put_text_string(gctx, stream, PDF_NAME(UF), filename); pdf_dict_put_text_string(gctx, fs, PDF_NAME(UF), filename); pdf_dict_put_text_string(gctx, annot_obj, PDF_NAME(Contents), filename); } if (ufilename) { pdf_dict_put_text_string(gctx, stream, PDF_NAME(UF), ufilename); pdf_dict_put_text_string(gctx, fs, PDF_NAME(UF), ufilename); } if (desc) { pdf_dict_put_text_string(gctx, stream, PDF_NAME(Desc), desc); pdf_dict_put_text_string(gctx, fs, PDF_NAME(Desc), desc); } } fz_always(gctx) { fz_drop_buffer(gctx, res); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //---------------------------------------------------------------- // annotation info //---------------------------------------------------------------- %pythoncode %{@property%} PARENTCHECK(info, """Various information details.""") PyObject *info() { pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); PyObject *res = PyDict_New(); pdf_obj *o; DICT_SETITEM_DROP(res, dictkey_content, JM_UnicodeFromStr(pdf_annot_contents(gctx, annot))); o = pdf_dict_get(gctx, annot_obj, PDF_NAME(Name)); DICT_SETITEM_DROP(res, dictkey_name, JM_UnicodeFromStr(pdf_to_name(gctx, o))); // Title (= author) o = pdf_dict_get(gctx, annot_obj, PDF_NAME(T)); DICT_SETITEM_DROP(res, dictkey_title, JM_UnicodeFromStr(pdf_to_text_string(gctx, o))); // CreationDate o = pdf_dict_gets(gctx, annot_obj, "CreationDate"); DICT_SETITEM_DROP(res, dictkey_creationDate, JM_UnicodeFromStr(pdf_to_text_string(gctx, o))); // ModDate o = pdf_dict_get(gctx, annot_obj, PDF_NAME(M)); DICT_SETITEM_DROP(res, dictkey_modDate, JM_UnicodeFromStr(pdf_to_text_string(gctx, o))); // Subj o = pdf_dict_gets(gctx, annot_obj, "Subj"); DICT_SETITEM_DROP(res, dictkey_subject, Py_BuildValue("s",pdf_to_text_string(gctx, o))); // Identification (PDF key /NM) o = pdf_dict_gets(gctx, annot_obj, "NM"); DICT_SETITEM_DROP(res, dictkey_id, JM_UnicodeFromStr(pdf_to_text_string(gctx, o))); return res; } //---------------------------------------------------------------- // annotation set information //---------------------------------------------------------------- FITZEXCEPTION(set_info, !result) %pythonprepend set_info %{ """Set various properties.""" CheckParent(self) if type(info) is dict: # build the args from the dictionary content = info.get("content", None) title = info.get("title", None) creationDate = info.get("creationDate", None) modDate = info.get("modDate", None) subject = info.get("subject", None) info = None %} PyObject * set_info(PyObject *info=NULL, char *content=NULL, char *title=NULL, char *creationDate=NULL, char *modDate=NULL, char *subject=NULL) { pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); // use this to indicate a 'markup' annot type int is_markup = pdf_annot_has_author(gctx, annot); fz_try(gctx) { // contents if (content) pdf_set_annot_contents(gctx, annot, content); if (is_markup) { // title (= author) if (title) pdf_set_annot_author(gctx, annot, title); // creation date if (creationDate) pdf_dict_put_text_string(gctx, annot_obj, PDF_NAME(CreationDate), creationDate); // mod date if (modDate) pdf_dict_put_text_string(gctx, annot_obj, PDF_NAME(M), modDate); // subject if (subject) pdf_dict_puts_drop(gctx, annot_obj, "Subj", pdf_new_text_string(gctx, subject)); } } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //---------------------------------------------------------------- // annotation border //---------------------------------------------------------------- %pythoncode %{@property%} %pythonprepend border %{ """Border information.""" CheckParent(self) atype = self.type[0] if atype not in (PDF_ANNOT_CIRCLE, PDF_ANNOT_FREE_TEXT, PDF_ANNOT_INK, PDF_ANNOT_LINE, PDF_ANNOT_POLY_LINE,PDF_ANNOT_POLYGON, PDF_ANNOT_SQUARE): return {} %} PyObject *border() { pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); return JM_annot_border(gctx, annot_obj); } //---------------------------------------------------------------- // set annotation border //---------------------------------------------------------------- %pythonprepend set_border %{ """Set border properties. Either a dict, or direct arguments width, style, dashes or clouds.""" CheckParent(self) atype, atname = self.type[:2] # annotation type if atype not in (PDF_ANNOT_CIRCLE, PDF_ANNOT_FREE_TEXT, PDF_ANNOT_INK, PDF_ANNOT_LINE, PDF_ANNOT_POLY_LINE,PDF_ANNOT_POLYGON, PDF_ANNOT_SQUARE): print(f"Cannot set border for '{atname}'.") return None if not atype in (PDF_ANNOT_CIRCLE, PDF_ANNOT_FREE_TEXT,PDF_ANNOT_POLYGON, PDF_ANNOT_SQUARE): if clouds > 0: print(f"Cannot set cloudy border for '{atname}'.") clouds = -1 # do not set border effect if type(border) is not dict: border = {"width": width, "style": style, "dashes": dashes, "clouds": clouds} border.setdefault("width", -1) border.setdefault("style", None) border.setdefault("dashes", None) border.setdefault("clouds", -1) if border["width"] == None: border["width"] = -1 if border["clouds"] == None: border["clouds"] = -1 if hasattr(border["dashes"], "__getitem__"): # ensure sequence items are integers border["dashes"] = tuple(border["dashes"]) for item in border["dashes"]: if not isinstance(item, int): border["dashes"] = None break %} PyObject * set_border(PyObject *border=NULL, float width=-1, char *style=NULL, PyObject *dashes=NULL, int clouds=-1) { pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); pdf_document *pdf = pdf_get_bound_document(gctx, annot_obj); return JM_annot_set_border(gctx, border, pdf, annot_obj); } //---------------------------------------------------------------- // annotation flags //---------------------------------------------------------------- %pythoncode %{@property%} PARENTCHECK(flags, """Flags field.""") int flags() { pdf_annot *annot = (pdf_annot *) $self; return pdf_annot_flags(gctx, annot); } //---------------------------------------------------------------- // annotation clean contents //---------------------------------------------------------------- FITZEXCEPTION(clean_contents, !result) PARENTCHECK(clean_contents, """Clean appearance contents stream.""") PyObject *clean_contents(int sanitize=1) { pdf_annot *annot = (pdf_annot *) $self; pdf_document *pdf = pdf_get_bound_document(gctx, pdf_annot_obj(gctx, annot)); #if FZ_VERSION_MAJOR == 1 && FZ_VERSION_MINOR >= 22 pdf_filter_factory list[2] = { 0 }; pdf_sanitize_filter_options sopts = { 0 }; pdf_filter_options filter = { 1, // recurse: true 0, // instance forms 0, // do not ascii-escape binary data 0, // no_update NULL, // end_page_opaque NULL, // end page list, // filters }; if (sanitize) { list[0].filter = pdf_new_sanitize_filter; list[0].options = &sopts; } #else pdf_filter_options filter = { NULL, // opaque NULL, // image filter NULL, // text filter NULL, // after text NULL, // end page 1, // recurse: true 1, // instance forms 1, // sanitize, 0 // do not ascii-escape binary data }; filter.sanitize = sanitize; #endif fz_try(gctx) { pdf_filter_annot_contents(gctx, pdf, annot, &filter); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //---------------------------------------------------------------- // set annotation flags //---------------------------------------------------------------- PARENTCHECK(set_flags, """Set annotation flags.""") void set_flags(int flags) { pdf_annot *annot = (pdf_annot *) $self; pdf_set_annot_flags(gctx, annot, flags); } //---------------------------------------------------------------- // annotation delete responses //---------------------------------------------------------------- FITZEXCEPTION(delete_responses, !result) PARENTCHECK(delete_responses, """Delete 'Popup' and responding annotations.""") PyObject * delete_responses() { pdf_annot *annot = (pdf_annot *) $self; pdf_obj *annot_obj = pdf_annot_obj(gctx, annot); pdf_page *page = pdf_annot_page(gctx, annot); pdf_annot *irt_annot = NULL; fz_try(gctx) { while (1) { irt_annot = JM_find_annot_irt(gctx, annot); if (!irt_annot) break; pdf_delete_annot(gctx, page, irt_annot); } pdf_dict_del(gctx, annot_obj, PDF_NAME(Popup)); pdf_obj *annots = pdf_dict_get(gctx, page->obj, PDF_NAME(Annots)); int i, n = pdf_array_len(gctx, annots), found = 0; for (i = n - 1; i >= 0; i--) { pdf_obj *o = pdf_array_get(gctx, annots, i); pdf_obj *p = pdf_dict_get(gctx, o, PDF_NAME(Parent)); if (!p) continue; if (!pdf_objcmp(gctx, p, annot_obj)) { pdf_array_delete(gctx, annots, i); found = 1; } } if (found > 0) { pdf_dict_put(gctx, page->obj, PDF_NAME(Annots), annots); } } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //---------------------------------------------------------------- // next annotation //---------------------------------------------------------------- PARENTCHECK(next, """Next annotation.""") %pythonappend next %{ if not val: return None val.thisown = True val.parent = self.parent # copy owning page object from previous annot val.parent._annot_refs[id(val)] = val if val.type[0] == PDF_ANNOT_WIDGET: widget = Widget() TOOLS._fill_widget(val, widget) val = widget %} %pythoncode %{@property%} struct Annot *next() { pdf_annot *this_annot = (pdf_annot *) $self; int type = pdf_annot_type(gctx, this_annot); pdf_annot *annot; if (type != PDF_ANNOT_WIDGET) { annot = pdf_next_annot(gctx, this_annot); } else { annot = pdf_next_widget(gctx, this_annot); } if (annot) pdf_keep_annot(gctx, annot); return (struct Annot *) annot; } //---------------------------------------------------------------- // annotation pixmap //---------------------------------------------------------------- FITZEXCEPTION(get_pixmap, !result) %pythonprepend get_pixmap %{"""annotation Pixmap""" CheckParent(self) cspaces = {"gray": csGRAY, "rgb": csRGB, "cmyk": csCMYK} if type(colorspace) is str: colorspace = cspaces.get(colorspace.lower(), None) if dpi: matrix = Matrix(dpi / 72, dpi / 72) %} %pythonappend get_pixmap %{ val.thisown = True if dpi: val.set_dpi(dpi, dpi) %} struct Pixmap * get_pixmap(PyObject *matrix = NULL, PyObject *dpi=NULL, struct Colorspace *colorspace = NULL, int alpha = 0) { fz_matrix ctm = JM_matrix_from_py(matrix); fz_colorspace *cs = (fz_colorspace *) colorspace; fz_pixmap *pix = NULL; if (!cs) { cs = fz_device_rgb(gctx); } fz_try(gctx) { pix = pdf_new_pixmap_from_annot(gctx, (pdf_annot *) $self, ctm, cs, NULL, alpha); } fz_catch(gctx) { return NULL; } return (struct Pixmap *) pix; } %pythoncode %{ def _erase(self): self.__swig_destroy__(self) self.parent = None def __str__(self): CheckParent(self) return "'%s' annotation on %s" % (self.type[1], str(self.parent)) def __repr__(self): CheckParent(self) return "'%s' annotation on %s" % (self.type[1], str(self.parent)) def __del__(self): if self.parent is None: return self._erase()%} } }; %clearnodefaultctor; //------------------------------------------------------------------------ // fz_link //------------------------------------------------------------------------ %nodefaultctor; struct Link { %immutable; %extend { ~Link() { DEBUGMSG1("Link"); fz_link *this_link = (fz_link *) $self; fz_drop_link(gctx, this_link); DEBUGMSG2; } PyObject *_border(struct Document *doc, int xref) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) doc); if (!pdf) Py_RETURN_NONE; pdf_obj *link_obj = pdf_new_indirect(gctx, pdf, xref, 0); if (!link_obj) Py_RETURN_NONE; PyObject *b = JM_annot_border(gctx, link_obj); pdf_drop_obj(gctx, link_obj); return b; } PyObject *_setBorder(PyObject *border, struct Document *doc, int xref) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) doc); if (!pdf) Py_RETURN_NONE; pdf_obj *link_obj = pdf_new_indirect(gctx, pdf, xref, 0); if (!link_obj) Py_RETURN_NONE; PyObject *b = JM_annot_set_border(gctx, border, pdf, link_obj); pdf_drop_obj(gctx, link_obj); return b; } FITZEXCEPTION(_colors, !result) PyObject *_colors(struct Document *doc, int xref) { pdf_document *pdf = pdf_specifics(gctx, (fz_document *) doc); if (!pdf) Py_RETURN_NONE; PyObject *b = NULL; pdf_obj *link_obj; fz_try(gctx) { link_obj = pdf_new_indirect(gctx, pdf, xref, 0); if (!link_obj) { RAISEPY(gctx, MSG_BAD_XREF, PyExc_ValueError); } b = JM_annot_colors(gctx, link_obj); } fz_always(gctx) { pdf_drop_obj(gctx, link_obj); } fz_catch(gctx) { return NULL; } return b; } %pythoncode %{ @property def border(self): return self._border(self.parent.parent.this, self.xref) @property def flags(self)->int: CheckParent(self) doc = self.parent.parent if not doc.is_pdf: return 0 f = doc.xref_get_key(self.xref, "F") if f[1] != "null": return int(f[1]) return 0 def set_flags(self, flags): CheckParent(self) doc = self.parent.parent if not doc.is_pdf: raise ValueError("is no PDF") if not type(flags) is int: raise ValueError("bad 'flags' value") doc.xref_set_key(self.xref, "F", str(flags)) return None def set_border(self, border=None, width=0, dashes=None, style=None): if type(border) is not dict: border = {"width": width, "style": style, "dashes": dashes} return self._setBorder(border, self.parent.parent.this, self.xref) @property def colors(self): return self._colors(self.parent.parent.this, self.xref) def set_colors(self, colors=None, stroke=None, fill=None): """Set border colors.""" CheckParent(self) doc = self.parent.parent if type(colors) is not dict: colors = {"fill": fill, "stroke": stroke} fill = colors.get("fill") stroke = colors.get("stroke") if fill is not None: print("warning: links have no fill color") if stroke in ([], ()): doc.xref_set_key(self.xref, "C", "[]") return if hasattr(stroke, "__float__"): stroke = [float(stroke)] CheckColor(stroke) if len(stroke) == 1: s = "[%g]" % stroke[0] elif len(stroke) == 3: s = "[%g %g %g]" % tuple(stroke) else: s = "[%g %g %g %g]" % tuple(stroke) doc.xref_set_key(self.xref, "C", s) %} %pythoncode %{@property%} PARENTCHECK(uri, """Uri string.""") PyObject *uri() { fz_link *this_link = (fz_link *) $self; return JM_UnicodeFromStr(this_link->uri); } %pythoncode %{@property%} PARENTCHECK(is_external, """Flag the link as external.""") PyObject *is_external() { fz_link *this_link = (fz_link *) $self; if (!this_link->uri) Py_RETURN_FALSE; return JM_BOOL(fz_is_external_link(gctx, this_link->uri)); } %pythoncode %{ page = -1 @property def dest(self): """Create link destination details.""" if hasattr(self, "parent") and self.parent is None: raise ValueError("orphaned object: parent is None") if self.parent.parent.is_closed or self.parent.parent.is_encrypted: raise ValueError("document closed or encrypted") doc = self.parent.parent if self.is_external or self.uri.startswith("#"): uri = None else: uri = doc.resolve_link(self.uri) return linkDest(self, uri) %} PARENTCHECK(rect, """Rectangle ('hot area').""") %pythoncode %{@property%} %pythonappend rect %{val = Rect(val)%} PyObject *rect() { fz_link *this_link = (fz_link *) $self; return JM_py_from_rect(this_link->rect); } //---------------------------------------------------------------- // next link //---------------------------------------------------------------- // we need to increase the link refs number // so that it will not be freed when the head is dropped PARENTCHECK(next, """Next link.""") %pythonappend next %{ if val: val.thisown = True val.parent = self.parent # copy owning page from prev link val.parent._annot_refs[id(val)] = val if self.xref > 0: # prev link has an xref link_xrefs = [x[0] for x in self.parent.annot_xrefs() if x[1] == PDF_ANNOT_LINK] link_ids = [x[2] for x in self.parent.annot_xrefs() if x[1] == PDF_ANNOT_LINK] idx = link_xrefs.index(self.xref) val.xref = link_xrefs[idx + 1] val.id = link_ids[idx + 1] else: val.xref = 0 val.id = "" %} %pythoncode %{@property%} struct Link *next() { fz_link *this_link = (fz_link *) $self; fz_link *next_link = this_link->next; if (!next_link) return NULL; next_link = fz_keep_link(gctx, next_link); return (struct Link *) next_link; } %pythoncode %{ def _erase(self): self.__swig_destroy__(self) self.parent = None def __str__(self): CheckParent(self) return "link on " + str(self.parent) def __repr__(self): CheckParent(self) return "link on " + str(self.parent) def __del__(self): self._erase()%} } }; %clearnodefaultctor; //------------------------------------------------------------------------ // fz_display_list //------------------------------------------------------------------------ struct DisplayList { %extend { ~DisplayList() { DEBUGMSG1("DisplayList"); fz_display_list *this_dl = (fz_display_list *) $self; fz_drop_display_list(gctx, this_dl); DEBUGMSG2; } FITZEXCEPTION(DisplayList, !result) DisplayList(PyObject *mediabox) { fz_display_list *dl = NULL; fz_try(gctx) { dl = fz_new_display_list(gctx, JM_rect_from_py(mediabox)); } fz_catch(gctx) { return NULL; } return (struct DisplayList *) dl; } FITZEXCEPTION(run, !result) PyObject *run(struct DeviceWrapper *dw, PyObject *m, PyObject *area) { fz_try(gctx) { fz_run_display_list(gctx, (fz_display_list *) $self, dw->device, JM_matrix_from_py(m), JM_rect_from_py(area), NULL); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //---------------------------------------------------------------- // DisplayList.rect //---------------------------------------------------------------- %pythoncode%{@property%} %pythonappend rect %{val = Rect(val)%} PyObject *rect() { return JM_py_from_rect(fz_bound_display_list(gctx, (fz_display_list *) $self)); } //---------------------------------------------------------------- // DisplayList.get_pixmap //---------------------------------------------------------------- FITZEXCEPTION(get_pixmap, !result) %pythonappend get_pixmap %{val.thisown = True%} struct Pixmap *get_pixmap(PyObject *matrix=NULL, struct Colorspace *colorspace=NULL, int alpha=0, PyObject *clip=NULL) { fz_colorspace *cs = NULL; fz_pixmap *pix = NULL; if (colorspace) cs = (fz_colorspace *) colorspace; else cs = fz_device_rgb(gctx); fz_try(gctx) { pix = JM_pixmap_from_display_list(gctx, (fz_display_list *) $self, matrix, cs, alpha, clip, NULL); } fz_catch(gctx) { return NULL; } return (struct Pixmap *) pix; } //---------------------------------------------------------------- // DisplayList.get_textpage //---------------------------------------------------------------- FITZEXCEPTION(get_textpage, !result) %pythonappend get_textpage %{val.thisown = True%} struct TextPage *get_textpage(int flags = 3) { fz_display_list *this_dl = (fz_display_list *) $self; fz_stext_page *tp = NULL; fz_try(gctx) { fz_stext_options stext_options = { 0 }; stext_options.flags = flags; tp = fz_new_stext_page_from_display_list(gctx, this_dl, &stext_options); } fz_catch(gctx) { return NULL; } return (struct TextPage *) tp; } %pythoncode %{ def __del__(self): if not type(self) is DisplayList: return if getattr(self, "thisown", False): self.__swig_destroy__(self) %} } }; //------------------------------------------------------------------------ // fz_stext_page //------------------------------------------------------------------------ struct TextPage { %extend { ~TextPage() { DEBUGMSG1("TextPage"); fz_stext_page *this_tp = (fz_stext_page *) $self; fz_drop_stext_page(gctx, this_tp); DEBUGMSG2; } FITZEXCEPTION(TextPage, !result) %pythonappend TextPage %{self.thisown=True%} TextPage(PyObject *mediabox) { fz_stext_page *tp = NULL; fz_try(gctx) { tp = fz_new_stext_page(gctx, JM_rect_from_py(mediabox)); } fz_catch(gctx) { return NULL; } return (struct TextPage *) tp; } //---------------------------------------------------------------- // method search() //---------------------------------------------------------------- FITZEXCEPTION(search, !result) %pythonprepend search %{"""Locate 'needle' returning rects or quads."""%} %pythonappend search %{ if not val: return val items = len(val) for i in range(items): # change entries to quads or rects q = Quad(val[i]) if quads: val[i] = q else: val[i] = q.rect if quads: return val i = 0 # join overlapping rects on the same line while i < items - 1: v1 = val[i] v2 = val[i + 1] if v1.y1 != v2.y1 or (v1 & v2).is_empty: i += 1 continue # no overlap on same line val[i] = v1 | v2 # join rectangles del val[i + 1] # remove v2 items -= 1 # reduce item count %} PyObject *search(const char *needle, int hit_max=0, int quads=1) { PyObject *liste = NULL; fz_try(gctx) { liste = JM_search_stext_page(gctx, (fz_stext_page *) $self, needle); } fz_catch(gctx) { return NULL; } return liste; } //---------------------------------------------------------------- // Get list of all blocks with block type and bbox as a Python list //---------------------------------------------------------------- FITZEXCEPTION(_getNewBlockList, !result) PyObject * _getNewBlockList(PyObject *page_dict, int raw) { fz_try(gctx) { JM_make_textpage_dict(gctx, (fz_stext_page *) $self, page_dict, raw); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } %pythoncode %{ def _textpage_dict(self, raw=False): page_dict = {"width": self.rect.width, "height": self.rect.height} self._getNewBlockList(page_dict, raw) return page_dict %} //---------------------------------------------------------------- // Get image meta information as a Python dictionary //---------------------------------------------------------------- FITZEXCEPTION(extractIMGINFO, !result) %pythonprepend extractIMGINFO %{"""Return a list with image meta information."""%} PyObject * extractIMGINFO(int hashes=0) { fz_stext_block *block; int block_n = -1; fz_stext_page *this_tpage = (fz_stext_page *) $self; PyObject *rc = NULL, *block_dict = NULL; fz_pixmap *pix = NULL; fz_try(gctx) { rc = PyList_New(0); for (block = this_tpage->first_block; block; block = block->next) { block_n++; if (block->type == FZ_STEXT_BLOCK_TEXT) { continue; } unsigned char digest[16]; fz_image *img = block->u.i.image; Py_ssize_t img_size = 0; fz_compressed_buffer *cbuff = fz_compressed_image_buffer(gctx, img); if (cbuff) { img_size = (Py_ssize_t) cbuff->buffer->len; } if (hashes) { pix = fz_get_pixmap_from_image(gctx, img, NULL, NULL, NULL, NULL); if (img_size == 0) { img_size = (Py_ssize_t) pix->w * pix->h * pix->n; } fz_md5_pixmap(gctx, pix, digest); fz_drop_pixmap(gctx, pix); pix = NULL; } fz_colorspace *cs = img->colorspace; block_dict = PyDict_New(); DICT_SETITEM_DROP(block_dict, dictkey_number, Py_BuildValue("i", block_n)); DICT_SETITEM_DROP(block_dict, dictkey_bbox, JM_py_from_rect(block->bbox)); DICT_SETITEM_DROP(block_dict, dictkey_matrix, JM_py_from_matrix(block->u.i.transform)); DICT_SETITEM_DROP(block_dict, dictkey_width, Py_BuildValue("i", img->w)); DICT_SETITEM_DROP(block_dict, dictkey_height, Py_BuildValue("i", img->h)); DICT_SETITEM_DROP(block_dict, dictkey_colorspace, Py_BuildValue("i", fz_colorspace_n(gctx, cs))); DICT_SETITEM_DROP(block_dict, dictkey_cs_name, Py_BuildValue("s", fz_colorspace_name(gctx, cs))); DICT_SETITEM_DROP(block_dict, dictkey_xres, Py_BuildValue("i", img->xres)); DICT_SETITEM_DROP(block_dict, dictkey_yres, Py_BuildValue("i", img->xres)); DICT_SETITEM_DROP(block_dict, dictkey_bpc, Py_BuildValue("i", (int) img->bpc)); DICT_SETITEM_DROP(block_dict, dictkey_size, Py_BuildValue("n", img_size)); if (hashes) { DICT_SETITEMSTR_DROP(block_dict, "digest", PyBytes_FromStringAndSize(digest, 16)); } LIST_APPEND_DROP(rc, block_dict); } } fz_always(gctx) { } fz_catch(gctx) { Py_CLEAR(rc); Py_CLEAR(block_dict); fz_drop_pixmap(gctx, pix); return NULL; } return rc; } //---------------------------------------------------------------- // Get text blocks with their bbox and concatenated lines // as a Python list //---------------------------------------------------------------- FITZEXCEPTION(extractBLOCKS, !result) %pythonprepend extractBLOCKS %{"""Return a list with text block information."""%} PyObject * extractBLOCKS() { fz_stext_block *block; fz_stext_line *line; fz_stext_char *ch; int block_n = -1; PyObject *text = NULL, *litem; fz_buffer *res = NULL; fz_var(res); fz_stext_page *this_tpage = (fz_stext_page *) $self; fz_rect tp_rect = this_tpage->mediabox; PyObject *lines = NULL; fz_try(gctx) { res = fz_new_buffer(gctx, 1024); lines = PyList_New(0); for (block = this_tpage->first_block; block; block = block->next) { block_n++; fz_rect blockrect = fz_empty_rect; if (block->type == FZ_STEXT_BLOCK_TEXT) { fz_clear_buffer(gctx, res); // set text buffer to empty int line_n = -1; int last_char = 0; for (line = block->u.t.first_line; line; line = line->next) { line_n++; fz_rect linerect = fz_empty_rect; for (ch = line->first_char; ch; ch = ch->next) { fz_rect cbbox = JM_char_bbox(gctx, line, ch); if (!JM_rects_overlap(tp_rect, cbbox) && !fz_is_infinite_rect(tp_rect)) { continue; } JM_append_rune(gctx, res, ch->c); last_char = ch->c; linerect = fz_union_rect(linerect, cbbox); } if (last_char != 10 && !fz_is_empty_rect(linerect)) { fz_append_byte(gctx, res, 10); } blockrect = fz_union_rect(blockrect, linerect); } text = JM_EscapeStrFromBuffer(gctx, res); } else if (JM_rects_overlap(tp_rect, block->bbox) || fz_is_infinite_rect(tp_rect)) { fz_image *img = block->u.i.image; fz_colorspace *cs = img->colorspace; text = PyUnicode_FromFormat("<image: %s, width: %d, height: %d, bpc: %d>", fz_colorspace_name(gctx, cs), img->w, img->h, img->bpc); blockrect = fz_union_rect(blockrect, block->bbox); } if (!fz_is_empty_rect(blockrect)) { litem = PyTuple_New(7); PyTuple_SET_ITEM(litem, 0, Py_BuildValue("f", blockrect.x0)); PyTuple_SET_ITEM(litem, 1, Py_BuildValue("f", blockrect.y0)); PyTuple_SET_ITEM(litem, 2, Py_BuildValue("f", blockrect.x1)); PyTuple_SET_ITEM(litem, 3, Py_BuildValue("f", blockrect.y1)); PyTuple_SET_ITEM(litem, 4, Py_BuildValue("O", text)); PyTuple_SET_ITEM(litem, 5, Py_BuildValue("i", block_n)); PyTuple_SET_ITEM(litem, 6, Py_BuildValue("i", block->type)); LIST_APPEND_DROP(lines, litem); } Py_CLEAR(text); } } fz_always(gctx) { fz_drop_buffer(gctx, res); PyErr_Clear(); } fz_catch(gctx) { Py_CLEAR(lines); return NULL; } return lines; } //---------------------------------------------------------------- // Get text words with their bbox //---------------------------------------------------------------- FITZEXCEPTION(extractWORDS, !result) %pythonprepend extractWORDS %{"""Return a list with text word information."""%} PyObject * extractWORDS(PyObject *delimiters=NULL) { fz_stext_block *block; fz_stext_line *line; fz_stext_char *ch; fz_buffer *buff = NULL; fz_var(buff); size_t buflen = 0; int block_n = -1, line_n, word_n; fz_rect wbbox = fz_empty_rect; // word bbox fz_stext_page *this_tpage = (fz_stext_page *) $self; fz_rect tp_rect = this_tpage->mediabox; int word_delimiter = 0; PyObject *lines = NULL; fz_try(gctx) { buff = fz_new_buffer(gctx, 64); lines = PyList_New(0); for (block = this_tpage->first_block; block; block = block->next) { block_n++; if (block->type != FZ_STEXT_BLOCK_TEXT) { continue; } line_n = -1; for (line = block->u.t.first_line; line; line = line->next) { line_n++; word_n = 0; // word counter per line fz_clear_buffer(gctx, buff); // reset word buffer buflen = 0; // reset char counter for (ch = line->first_char; ch; ch = ch->next) { fz_rect cbbox = JM_char_bbox(gctx, line, ch); if (!JM_rects_overlap(tp_rect, cbbox) && !fz_is_infinite_rect(tp_rect)) { continue; } word_delimiter = JM_is_word_delimiter(ch->c, delimiters); if (word_delimiter) { if (buflen == 0) continue; // skip spaces at line start if (!fz_is_empty_rect(wbbox)) { // output word word_n = JM_append_word(gctx, lines, buff, &wbbox, block_n, line_n, word_n); } fz_clear_buffer(gctx, buff); buflen = 0; // reset char counter continue; } // append one unicode character to the word JM_append_rune(gctx, buff, ch->c); buflen++; // enlarge word bbox wbbox = fz_union_rect(wbbox, JM_char_bbox(gctx, line, ch)); } if (buflen && !fz_is_empty_rect(wbbox)) { word_n = JM_append_word(gctx, lines, buff, &wbbox, block_n, line_n, word_n); } fz_clear_buffer(gctx, buff); buflen = 0; } } } fz_always(gctx) { fz_drop_buffer(gctx, buff); PyErr_Clear(); } fz_catch(gctx) { return NULL; } return lines; } //---------------------------------------------------------------- // TextPage poolsize //---------------------------------------------------------------- %pythonprepend poolsize %{"""TextPage current poolsize."""%} PyObject *poolsize() { fz_stext_page *tpage = (fz_stext_page *) $self; size_t size = fz_pool_size(gctx, tpage->pool); return PyLong_FromSize_t(size); } //---------------------------------------------------------------- // TextPage rectangle //---------------------------------------------------------------- %pythoncode %{@property%} %pythonprepend rect %{"""TextPage rectangle."""%} %pythonappend rect %{val = Rect(val)%} PyObject *rect() { fz_stext_page *this_tpage = (fz_stext_page *) $self; fz_rect mediabox = this_tpage->mediabox; return JM_py_from_rect(mediabox); } //---------------------------------------------------------------- // method _extractText() //---------------------------------------------------------------- FITZEXCEPTION(_extractText, !result) %newobject _extractText; PyObject *_extractText(int format) { fz_buffer *res = NULL; fz_output *out = NULL; PyObject *text = NULL; fz_var(res); fz_var(out); fz_stext_page *this_tpage = (fz_stext_page *) $self; fz_try(gctx) { res = fz_new_buffer(gctx, 1024); out = fz_new_output_with_buffer(gctx, res); switch(format) { case(1): fz_print_stext_page_as_html(gctx, out, this_tpage, 0); break; case(3): fz_print_stext_page_as_xml(gctx, out, this_tpage, 0); break; case(4): fz_print_stext_page_as_xhtml(gctx, out, this_tpage, 0); break; default: JM_print_stext_page_as_text(gctx, res, this_tpage); break; } text = JM_EscapeStrFromBuffer(gctx, res); } fz_always(gctx) { fz_drop_buffer(gctx, res); fz_drop_output(gctx, out); } fz_catch(gctx) { return NULL; } return text; } //---------------------------------------------------------------- // method extractTextbox() //---------------------------------------------------------------- FITZEXCEPTION(extractTextbox, !result) PyObject *extractTextbox(PyObject *rect) { fz_stext_page *this_tpage = (fz_stext_page *) $self; fz_rect area = JM_rect_from_py(rect); PyObject *rc = NULL; fz_try(gctx) { rc = JM_copy_rectangle(gctx, this_tpage, area); } fz_catch(gctx) { return NULL; } return rc; } //---------------------------------------------------------------- // method extractSelection() //---------------------------------------------------------------- PyObject *extractSelection(PyObject *pointa, PyObject *pointb) { fz_stext_page *this_tpage = (fz_stext_page *) $self; fz_point a = JM_point_from_py(pointa); fz_point b = JM_point_from_py(pointb); char *found = fz_copy_selection(gctx, this_tpage, a, b, 0); PyObject *rc = NULL; if (found) { rc = PyUnicode_FromString(found); JM_Free(found); } else { rc = EMPTY_STRING; } return rc; } %pythoncode %{ def extractText(self, sort=False) -> str: """Return simple, bare text on the page.""" if sort is False: return self._extractText(0) blocks = self.extractBLOCKS()[:] blocks.sort(key=lambda b: (b[3], b[0])) return "".join([b[4] for b in blocks]) def extractHTML(self) -> str: """Return page content as a HTML string.""" return self._extractText(1) def extractJSON(self, cb=None, sort=False) -> str: """Return 'extractDICT' converted to JSON format.""" import base64, json val = self._textpage_dict(raw=False) class b64encode(json.JSONEncoder): def default(self, s): if type(s) in (bytes, bytearray): return base64.b64encode(s).decode() if cb is not None: val["width"] = cb.width val["height"] = cb.height if sort is True: blocks = val["blocks"] blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0])) val["blocks"] = blocks val = json.dumps(val, separators=(",", ":"), cls=b64encode, indent=1) return val def extractRAWJSON(self, cb=None, sort=False) -> str: """Return 'extractRAWDICT' converted to JSON format.""" import base64, json val = self._textpage_dict(raw=True) class b64encode(json.JSONEncoder): def default(self,s): if type(s) in (bytes, bytearray): return base64.b64encode(s).decode() if cb is not None: val["width"] = cb.width val["height"] = cb.height if sort is True: blocks = val["blocks"] blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0])) val["blocks"] = blocks val = json.dumps(val, separators=(",", ":"), cls=b64encode, indent=1) return val def extractXML(self) -> str: """Return page content as a XML string.""" return self._extractText(3) def extractXHTML(self) -> str: """Return page content as a XHTML string.""" return self._extractText(4) def extractDICT(self, cb=None, sort=False) -> dict: """Return page content as a Python dict of images and text spans.""" val = self._textpage_dict(raw=False) if cb is not None: val["width"] = cb.width val["height"] = cb.height if sort is True: blocks = val["blocks"] blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0])) val["blocks"] = blocks return val def extractRAWDICT(self, cb=None, sort=False) -> dict: """Return page content as a Python dict of images and text characters.""" val = self._textpage_dict(raw=True) if cb is not None: val["width"] = cb.width val["height"] = cb.height if sort is True: blocks = val["blocks"] blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0])) val["blocks"] = blocks return val def __del__(self): if not type(self) is TextPage: return if getattr(self, "thisown", False): self.__swig_destroy__(self) %} } }; //------------------------------------------------------------------------ // Graftmap - only used internally for inter-PDF object copy operations //------------------------------------------------------------------------ struct Graftmap { %extend { ~Graftmap() { DEBUGMSG1("Graftmap"); pdf_graft_map *this_gm = (pdf_graft_map *) $self; pdf_drop_graft_map(gctx, this_gm); DEBUGMSG2; } FITZEXCEPTION(Graftmap, !result) Graftmap(struct Document *doc) { pdf_graft_map *map = NULL; fz_try(gctx) { pdf_document *dst = pdf_specifics(gctx, (fz_document *) doc); ASSERT_PDF(dst); map = pdf_new_graft_map(gctx, dst); } fz_catch(gctx) { return NULL; } return (struct Graftmap *) map; } %pythoncode %{ def __del__(self): if not type(self) is Graftmap: return if getattr(self, "thisown", False): self.__swig_destroy__(self) %} } }; //------------------------------------------------------------------------ // TextWriter //------------------------------------------------------------------------ struct TextWriter { %extend { ~TextWriter() { DEBUGMSG1("TextWriter"); fz_text *this_tw = (fz_text *) $self; fz_drop_text(gctx, this_tw); DEBUGMSG2; } FITZEXCEPTION(TextWriter, !result) %pythonprepend TextWriter %{"""Stores text spans for later output on compatible PDF pages."""%} %pythonappend TextWriter %{ self.opacity = opacity self.color = color self.rect = Rect(page_rect) self.ctm = Matrix(1, 0, 0, -1, 0, self.rect.height) self.ictm = ~self.ctm self.last_point = Point() self.last_point.__doc__ = "Position following last text insertion." self.text_rect = Rect() self.text_rect.__doc__ = "Accumulated area of text spans." self.used_fonts = set() self.thisown = True %} TextWriter(PyObject *page_rect, float opacity=1, PyObject *color=NULL ) { fz_text *text = NULL; fz_try(gctx) { text = fz_new_text(gctx); } fz_catch(gctx) { return NULL; } return (struct TextWriter *) text; } FITZEXCEPTION(append, !result) %pythonprepend append %{ """Store 'text' at point 'pos' using 'font' and 'fontsize'.""" pos = Point(pos) * self.ictm if font is None: font = Font("helv") if not font.is_writable: raise ValueError("Unsupported font '%s'." % font.name) if right_to_left: text = self.clean_rtl(text) text = "".join(reversed(text)) right_to_left = 0 %} %pythonappend append %{ self.last_point = Point(val[-2:]) * self.ctm self.text_rect = self._bbox * self.ctm val = self.text_rect, self.last_point if font.flags["mono"] == 1: self.used_fonts.add(font) %} PyObject * append(PyObject *pos, char *text, struct Font *font=NULL, float fontsize=11, char *language=NULL, int right_to_left=0, int small_caps=0) { fz_text_language lang = fz_text_language_from_string(language); fz_point p = JM_point_from_py(pos); fz_matrix trm = fz_make_matrix(fontsize, 0, 0, fontsize, p.x, p.y); int markup_dir = 0, wmode = 0; fz_try(gctx) { if (small_caps == 0) { trm = fz_show_string(gctx, (fz_text *) $self, (fz_font *) font, trm, text, wmode, right_to_left, markup_dir, lang); } else { trm = JM_show_string_cs(gctx, (fz_text *) $self, (fz_font *) font, trm, text, wmode, right_to_left, markup_dir, lang); } } fz_catch(gctx) { return NULL; } return JM_py_from_matrix(trm); } %pythoncode %{ def appendv(self, pos, text, font=None, fontsize=11, language=None, small_caps=False): """Append text in vertical write mode.""" lheight = fontsize * 1.2 for c in text: self.append(pos, c, font=font, fontsize=fontsize, language=language, small_caps=small_caps) pos.y += lheight return self.text_rect, self.last_point def clean_rtl(self, text): """Revert the sequence of Latin text parts. Text with right-to-left writing direction (Arabic, Hebrew) often contains Latin parts, which are written in left-to-right: numbers, names, etc. For output as PDF text we need *everything* in right-to-left. E.g. an input like "<arabic> ABCDE FG HIJ <arabic> KL <arabic>" will be converted to "<arabic> JIH GF EDCBA <arabic> LK <arabic>". The Arabic parts remain untouched. Args: text: str Returns: Massaged string. """ if not text: return text # split into words at space boundaries words = text.split(" ") idx = [] for i in range(len(words)): w = words[i] # revert character sequence for Latin only words if not (len(w) < 2 or max([ord(c) for c in w]) > 255): words[i] = "".join(reversed(w)) idx.append(i) # stored index of Latin word # adjacent Latin words must revert their sequence, too idx2 = [] # store indices of adjacent Latin words for i in range(len(idx)): if idx2 == []: # empty yet? idx2.append(idx[i]) # store Latin word number elif idx[i] > idx2[-1] + 1: # large gap to last? if len(idx2) > 1: # at least two consecutives? words[idx2[0] : idx2[-1] + 1] = reversed( words[idx2[0] : idx2[-1] + 1] ) # revert their sequence idx2 = [idx[i]] # re-initialize elif idx[i] == idx2[-1] + 1: # new adjacent Latin word idx2.append(idx[i]) text = " ".join(words) return text %} %pythoncode %{@property%} %pythonappend _bbox%{val = Rect(val)%} PyObject *_bbox() { return JM_py_from_rect(fz_bound_text(gctx, (fz_text *) $self, NULL, fz_identity)); } FITZEXCEPTION(write_text, !result) %pythonprepend write_text%{ """Write the text to a PDF page having the TextWriter's page size. Args: page: a PDF page having same size. color: override text color. opacity: override transparency. overlay: put in foreground or background. morph: tuple(Point, Matrix), apply a matrix with a fixpoint. matrix: Matrix to be used instead of 'morph' argument. render_mode: (int) PDF render mode operator 'Tr'. """ CheckParent(page) if abs(self.rect - page.rect) > 1e-3: raise ValueError("incompatible page rect") if morph != None: if (type(morph) not in (tuple, list) or type(morph[0]) is not Point or type(morph[1]) is not Matrix ): raise ValueError("morph must be (Point, Matrix) or None") if matrix != None and morph != None: raise ValueError("only one of matrix, morph is allowed") if getattr(opacity, "__float__", None) is None or opacity == -1: opacity = self.opacity if color is None: color = self.color %} %pythonappend write_text%{ max_nums = val[0] content = val[1] max_alp, max_font = max_nums old_cont_lines = content.splitlines() optcont = page._get_optional_content(oc) if optcont != None: bdc = "/OC /%s BDC" % optcont emc = "EMC" else: bdc = emc = "" new_cont_lines = ["q"] if bdc: new_cont_lines.append(bdc) cb = page.cropbox_position if page.rotation in (90, 270): delta = page.rect.height - page.rect.width else: delta = 0 mb = page.mediabox if bool(cb) or mb.y0 != 0 or delta != 0: new_cont_lines.append("1 0 0 1 %g %g cm" % (cb.x, cb.y + mb.y0 - delta)) if morph: p = morph[0] * self.ictm delta = Matrix(1, 1).pretranslate(p.x, p.y) matrix = ~delta * morph[1] * delta if morph or matrix: new_cont_lines.append("%g %g %g %g %g %g cm" % JM_TUPLE(matrix)) for line in old_cont_lines: if line.endswith(" cm"): continue if line == "BT": new_cont_lines.append(line) new_cont_lines.append("%i Tr" % render_mode) continue if line.endswith(" gs"): alp = int(line.split()[0][4:]) + max_alp line = "/Alp%i gs" % alp elif line.endswith(" Tf"): temp = line.split() fsize = float(temp[1]) if render_mode != 0: w = fsize * 0.05 else: w = 1 new_cont_lines.append("%g w" % w) font = int(temp[0][2:]) + max_font line = " ".join(["/F%i" % font] + temp[1:]) elif line.endswith(" rg"): new_cont_lines.append(line.replace("rg", "RG")) elif line.endswith(" g"): new_cont_lines.append(line.replace(" g", " G")) elif line.endswith(" k"): new_cont_lines.append(line.replace(" k", " K")) new_cont_lines.append(line) if emc: new_cont_lines.append(emc) new_cont_lines.append("Q\n") content = "\n".join(new_cont_lines).encode("utf-8") TOOLS._insert_contents(page, content, overlay=overlay) val = None for font in self.used_fonts: repair_mono_font(page, font) %} PyObject *write_text(struct Page *page, PyObject *color=NULL, float opacity=-1, int overlay=1, PyObject *morph=NULL, PyObject *matrix=NULL, int render_mode=0, int oc=0) { pdf_page *pdfpage = pdf_page_from_fz_page(gctx, (fz_page *) page); pdf_obj *resources = NULL; fz_buffer *contents = NULL; fz_device *dev = NULL; PyObject *result = NULL, *max_nums, *cont_string; float alpha = 1; if (opacity >= 0 && opacity < 1) alpha = opacity; fz_colorspace *colorspace; int ncol = 1; float dev_color[4] = {0, 0, 0, 0}; if (EXISTS(color)) { JM_color_FromSequence(color, &ncol, dev_color); } switch(ncol) { case 3: colorspace = fz_device_rgb(gctx); break; case 4: colorspace = fz_device_cmyk(gctx); break; default: colorspace = fz_device_gray(gctx); break; } fz_var(contents); fz_var(resources); fz_var(dev); fz_try(gctx) { ASSERT_PDF(pdfpage); resources = pdf_new_dict(gctx, pdfpage->doc, 5); contents = fz_new_buffer(gctx, 1024); dev = pdf_new_pdf_device(gctx, pdfpage->doc, fz_identity, resources, contents); fz_fill_text(gctx, dev, (fz_text *) $self, fz_identity, colorspace, dev_color, alpha, fz_default_color_params); fz_close_device(gctx, dev); // copy generated resources into the one of the page max_nums = JM_merge_resources(gctx, pdfpage, resources); cont_string = JM_EscapeStrFromBuffer(gctx, contents); result = Py_BuildValue("OO", max_nums, cont_string); Py_DECREF(cont_string); Py_DECREF(max_nums); } fz_always(gctx) { fz_drop_buffer(gctx, contents); pdf_drop_obj(gctx, resources); fz_drop_device(gctx, dev); } fz_catch(gctx) { return NULL; } return result; } %pythoncode %{ def __del__(self): if not type(self) is TextWriter: return if getattr(self, "thisown", False): self.__swig_destroy__(self) %} } }; //------------------------------------------------------------------------ // Font //------------------------------------------------------------------------ struct Font { %extend { ~Font() { DEBUGMSG1("Font"); fz_font *this_font = (fz_font *) $self; fz_drop_font(gctx, this_font); DEBUGMSG2; } FITZEXCEPTION(Font, !result) %pythonprepend Font %{ if fontbuffer: if hasattr(fontbuffer, "getvalue"): fontbuffer = fontbuffer.getvalue() elif isinstance(fontbuffer, bytearray): fontbuffer = bytes(fontbuffer) if not isinstance(fontbuffer, bytes): raise ValueError("bad type: 'fontbuffer'") if isinstance(fontname, str): fname_lower = fontname.lower() if "/" in fname_lower or "\\" in fname_lower or "." in fname_lower: print("Warning: did you mean a fontfile?") if fname_lower in ("cjk", "china-t", "china-ts"): ordering = 0 elif fname_lower.startswith("china-s"): ordering = 1 elif fname_lower.startswith("korea"): ordering = 3 elif fname_lower.startswith("japan"): ordering = 2 elif fname_lower in fitz_fontdescriptors.keys(): import pymupdf_fonts # optional fonts fontbuffer = pymupdf_fonts.myfont(fname_lower) # make a copy fontname = None # ensure using fontbuffer only del pymupdf_fonts # remove package again elif ordering < 0: fontname = Base14_fontdict.get(fontname, fontname) %} %pythonappend Font %{self.thisown = True%} Font(char *fontname=NULL, char *fontfile=NULL, PyObject *fontbuffer=NULL, int script=0, char *language=NULL, int ordering=-1, int is_bold=0, int is_italic=0, int is_serif=0, int embed=1) { fz_font *font = NULL; fz_try(gctx) { fz_text_language lang = fz_text_language_from_string(language); font = JM_get_font(gctx, fontname, fontfile, fontbuffer, script, lang, ordering, is_bold, is_italic, is_serif, embed); } fz_catch(gctx) { return NULL; } return (struct Font *) font; } %pythonprepend glyph_advance %{"""Return the glyph width of a unicode (font size 1)."""%} PyObject *glyph_advance(int chr, char *language=NULL, int script=0, int wmode=0, int small_caps=0) { fz_font *font, *thisfont = (fz_font *) $self; int gid; fz_text_language lang = fz_text_language_from_string(language); if (small_caps) { gid = fz_encode_character_sc(gctx, thisfont, chr); if (gid >= 0) font = thisfont; } else { gid = fz_encode_character_with_fallback(gctx, thisfont, chr, script, lang, &font); } return PyFloat_FromDouble((double) fz_advance_glyph(gctx, font, gid, wmode)); } FITZEXCEPTION(text_length, !result) %pythonprepend text_length %{"""Return length of unicode 'text' under a fontsize."""%} PyObject *text_length(PyObject *text, double fontsize=11, char *language=NULL, int script=0, int wmode=0, int small_caps=0) { fz_font *font=NULL, *thisfont = (fz_font *) $self; fz_text_language lang = fz_text_language_from_string(language); double rc = 0; int gid; fz_try(gctx) { if (!PyUnicode_Check(text) || PyUnicode_READY(text) != 0) { RAISEPY(gctx, MSG_BAD_TEXT, PyExc_TypeError); } Py_ssize_t i, len = PyUnicode_GET_LENGTH(text); int kind = PyUnicode_KIND(text); void *data = PyUnicode_DATA(text); for (i = 0; i < len; i++) { int c = PyUnicode_READ(kind, data, i); if (small_caps) { gid = fz_encode_character_sc(gctx, thisfont, c); if (gid >= 0) font = thisfont; } else { gid = fz_encode_character_with_fallback(gctx,thisfont, c, script, lang, &font); } rc += (double) fz_advance_glyph(gctx, font, gid, wmode); } } fz_catch(gctx) { PyErr_Clear(); return NULL; } rc *= fontsize; return PyFloat_FromDouble(rc); } FITZEXCEPTION(char_lengths, !result) %pythonprepend char_lengths %{"""Return tuple of char lengths of unicode 'text' under a fontsize."""%} PyObject *char_lengths(PyObject *text, double fontsize=11, char *language=NULL, int script=0, int wmode=0, int small_caps=0) { fz_font *font, *thisfont = (fz_font *) $self; fz_text_language lang = fz_text_language_from_string(language); PyObject *rc = NULL; int gid; fz_try(gctx) { if (!PyUnicode_Check(text) || PyUnicode_READY(text) != 0) { RAISEPY(gctx, MSG_BAD_TEXT, PyExc_TypeError); } Py_ssize_t i, len = PyUnicode_GET_LENGTH(text); int kind = PyUnicode_KIND(text); void *data = PyUnicode_DATA(text); rc = PyTuple_New(len); for (i = 0; i < len; i++) { int c = PyUnicode_READ(kind, data, i); if (small_caps) { gid = fz_encode_character_sc(gctx, thisfont, c); if (gid >= 0) font = thisfont; } else { gid = fz_encode_character_with_fallback(gctx,thisfont, c, script, lang, &font); } PyTuple_SET_ITEM(rc, i, PyFloat_FromDouble(fontsize * (double) fz_advance_glyph(gctx, font, gid, wmode))); } } fz_catch(gctx) { PyErr_Clear(); Py_CLEAR(rc); return NULL; } return rc; } %pythonprepend glyph_bbox %{"""Return the glyph bbox of a unicode (font size 1)."""%} %pythonappend glyph_bbox %{val = Rect(val)%} PyObject *glyph_bbox(int chr, char *language=NULL, int script=0, int small_caps=0) { fz_font *font, *thisfont = (fz_font *) $self; int gid; fz_text_language lang = fz_text_language_from_string(language); if (small_caps) { gid = fz_encode_character_sc(gctx, thisfont, chr); if (gid >= 0) font = thisfont; } else { gid = fz_encode_character_with_fallback(gctx, thisfont, chr, script, lang, &font); } return JM_py_from_rect(fz_bound_glyph(gctx, font, gid, fz_identity)); } %pythonprepend has_glyph %{"""Check whether font has a glyph for this unicode."""%} PyObject *has_glyph(int chr, char *language=NULL, int script=0, int fallback=0, int small_caps=0) { fz_font *font, *thisfont = (fz_font *) $self; fz_text_language lang; int gid = 0; if (fallback) { lang = fz_text_language_from_string(language); gid = fz_encode_character_with_fallback(gctx, (fz_font *) $self, chr, script, lang, &font); } else { if (!small_caps) { gid = fz_encode_character(gctx, thisfont, chr); } else { gid = fz_encode_character_sc(gctx, thisfont, chr); } } return Py_BuildValue("i", gid); } %pythoncode %{ def valid_codepoints(self): from array import array gc = self.glyph_count cp = array("l", (0,) * gc) arr = cp.buffer_info() self._valid_unicodes(arr) return array("l", sorted(set(cp))[1:]) %} void _valid_unicodes(PyObject *arr) { fz_font *font = (fz_font *) $self; PyObject *temp = PySequence_ITEM(arr, 0); void *ptr = PyLong_AsVoidPtr(temp); JM_valid_chars(gctx, font, ptr); Py_DECREF(temp); } %pythoncode %{@property%} PyObject *flags() { fz_font_flags_t *f = fz_font_flags((fz_font *) $self); if (!f) Py_RETURN_NONE; return Py_BuildValue( "{s:N,s:N,s:N,s:N,s:N,s:N,s:N,s:N,s:N,s:N,s:N,s:N" #if FZ_VERSION_MAJOR == 1 && FZ_VERSION_MINOR >= 22 ",s:N,s:N" #endif "}", "mono", JM_BOOL(f->is_mono), "serif", JM_BOOL(f->is_serif), "bold", JM_BOOL(f->is_bold), "italic", JM_BOOL(f->is_italic), "substitute", JM_BOOL(f->ft_substitute), "stretch", JM_BOOL(f->ft_stretch), "fake-bold", JM_BOOL(f->fake_bold), "fake-italic", JM_BOOL(f->fake_italic), "opentype", JM_BOOL(f->has_opentype), "invalid-bbox", JM_BOOL(f->invalid_bbox), "cjk", JM_BOOL(f->cjk), "cjk-lang", (f->cjk ? PyLong_FromUnsignedLong((unsigned long) f->cjk_lang) : Py_BuildValue("s",NULL)) #if FZ_VERSION_MAJOR == 1 && FZ_VERSION_MINOR >= 22 , "embed", JM_BOOL(f->embed), "never-embed", JM_BOOL(f->never_embed) #endif ); } %pythoncode %{@property%} PyObject *is_bold() { fz_font *font = (fz_font *) $self; if (fz_font_is_bold(gctx,font)) { Py_RETURN_TRUE; } Py_RETURN_FALSE; } %pythoncode %{@property%} PyObject *is_serif() { fz_font *font = (fz_font *) $self; if (fz_font_is_serif(gctx,font)) { Py_RETURN_TRUE; } Py_RETURN_FALSE; } %pythoncode %{@property%} PyObject *is_italic() { fz_font *font = (fz_font *) $self; if (fz_font_is_italic(gctx,font)) { Py_RETURN_TRUE; } Py_RETURN_FALSE; } %pythoncode %{@property%} PyObject *is_monospaced() { fz_font *font = (fz_font *) $self; if (fz_font_is_monospaced(gctx,font)) { Py_RETURN_TRUE; } Py_RETURN_FALSE; } /* temporarily disabled * PyObject *is_writable() * { * fz_font *font = (fz_font *) $self; * if (fz_font_t3_procs(gctx, font) || * fz_font_flags(font)->ft_substitute || * !pdf_font_writing_supported(font)) { * Py_RETURN_FALSE; * } * Py_RETURN_TRUE; * } */ %pythoncode %{@property%} PyObject *name() { return JM_UnicodeFromStr(fz_font_name(gctx, (fz_font *) $self)); } %pythoncode %{@property%} int glyph_count() { fz_font *this_font = (fz_font *) $self; return this_font->glyph_count; } %pythoncode %{@property%} PyObject *buffer() { fz_font *this_font = (fz_font *) $self; unsigned char *data = NULL; size_t len = fz_buffer_storage(gctx, this_font->buffer, &data); return JM_BinFromCharSize(data, len); } %pythoncode %{@property%} %pythonappend bbox%{val = Rect(val)%} PyObject *bbox() { fz_font *this_font = (fz_font *) $self; return JM_py_from_rect(fz_font_bbox(gctx, this_font)); } %pythoncode %{@property%} %pythonprepend ascender %{"""Return the glyph ascender value."""%} float ascender() { return fz_font_ascender(gctx, (fz_font *) $self); } %pythoncode %{@property%} %pythonprepend descender %{"""Return the glyph descender value."""%} float descender() { return fz_font_descender(gctx, (fz_font *) $self); } %pythoncode %{ @property def is_writable(self): return True def glyph_name_to_unicode(self, name): """Return the unicode for a glyph name.""" return glyph_name_to_unicode(name) def unicode_to_glyph_name(self, ch): """Return the glyph name for a unicode.""" return unicode_to_glyph_name(ch) def __repr__(self): return "Font('%s')" % self.name def __del__(self): if not type(self) is Font: return if getattr(self, "thisown", False): self.__swig_destroy__(self) %} } }; //------------------------------------------------------------------------ // DocumentWriter //------------------------------------------------------------------------ struct DocumentWriter { %extend { ~DocumentWriter() { // need this structure to free any fz_output the writer may have typedef struct { // copied from pdf_write.c fz_document_writer super; pdf_document *pdf; pdf_write_options opts; fz_output *out; fz_rect mediabox; pdf_obj *resources; fz_buffer *contents; } pdf_writer; fz_document_writer *writer_fz = (fz_document_writer *) $self; fz_output *out = NULL; pdf_writer *writer_pdf = (pdf_writer *) writer_fz; if (writer_pdf) { out = writer_pdf->out; if (out) { DEBUGMSG1("Output of DocumentWriter"); fz_drop_output(gctx, out); writer_pdf->out = NULL; DEBUGMSG2; } } DEBUGMSG1("DocumentWriter"); fz_drop_document_writer( gctx, writer_fz); DEBUGMSG2; } FITZEXCEPTION(DocumentWriter, !result) %pythonprepend DocumentWriter %{ if type(path) is str: pass elif hasattr(path, "absolute"): path = str(path) elif hasattr(path, "name"): path = path.name if options==None: options="" %} %pythonappend DocumentWriter %{ %} DocumentWriter( PyObject* path, const char* options=NULL) { fz_output *out = NULL; fz_document_writer* ret=NULL; fz_try(gctx) { if (PyUnicode_Check(path)) { ret = fz_new_pdf_writer( gctx, PyUnicode_AsUTF8(path), options); } else { out = JM_new_output_fileptr(gctx, path); ret = fz_new_pdf_writer_with_output(gctx, out, options); } } fz_catch(gctx) { return NULL; } return (struct DocumentWriter*) ret; } struct DeviceWrapper* begin_page( PyObject* mediabox) { fz_rect mediabox2 = JM_rect_from_py(mediabox); fz_device* device = fz_begin_page( gctx, (fz_document_writer*) $self, mediabox2); struct DeviceWrapper* device_wrapper = (struct DeviceWrapper*) calloc(1, sizeof(struct DeviceWrapper)) ; device_wrapper->device = device; device_wrapper->list = NULL; return device_wrapper; } void end_page() { fz_end_page( gctx, (fz_document_writer*) $self); } void close() { fz_document_writer *writer = (fz_document_writer*) $self; fz_close_document_writer( gctx, writer); } %pythoncode %{ def __del__(self): if not type(self) is DocumentWriter: return if getattr(self, "thisown", False): self.__swig_destroy__(self) def __enter__(self): return self def __exit__(self, *args): self.close() %} } }; //------------------------------------------------------------------------ // Archive //------------------------------------------------------------------------ struct Archive { %extend { ~Archive() { DEBUGMSG1("Archive"); fz_drop_archive( gctx, (fz_archive *) $self); DEBUGMSG2; } FITZEXCEPTION(Archive, !result) %pythonprepend Archive %{ self._subarchives = [] %} %pythonappend Archive %{ self.thisown = True if args != (): self.add(*args) %} //--------------------------------------- // new empty archive //--------------------------------------- Archive(struct Archive *a0=NULL, const char *path=NULL) { fz_archive *arch=NULL; fz_try(gctx) { arch = fz_new_multi_archive(gctx); } fz_catch(gctx) { return NULL; } return (struct Archive *) arch; } Archive(PyObject *a0=NULL, const char *path=NULL) { fz_archive *arch=NULL; fz_try(gctx) { arch = fz_new_multi_archive(gctx); } fz_catch(gctx) { return NULL; } return (struct Archive *) arch; } FITZEXCEPTION(has_entry, !result) PyObject *has_entry(const char *name) { fz_archive *arch = (fz_archive *) $self; int ret = 0; fz_try(gctx) { ret = fz_has_archive_entry(gctx, arch, name); } fz_catch(gctx) { return NULL; } return JM_BOOL(ret); } FITZEXCEPTION(read_entry, !result) PyObject *read_entry(const char *name) { fz_archive *arch = (fz_archive *) $self; PyObject *ret = NULL; fz_buffer *buff = NULL; fz_try(gctx) { buff = fz_read_archive_entry(gctx, arch, name); ret = JM_BinFromBuffer(gctx, buff); } fz_always(gctx) { fz_drop_buffer(gctx, buff); } fz_catch(gctx) { return NULL; } return ret; } //-------------------------------------- // add dir //-------------------------------------- FITZEXCEPTION(_add_dir, !result) PyObject *_add_dir(const char *folder, const char *path=NULL) { fz_archive *arch = (fz_archive *) $self; fz_archive *sub = NULL; fz_try(gctx) { sub = fz_open_directory(gctx, folder); fz_mount_multi_archive(gctx, arch, sub, path); } fz_always(gctx) { fz_drop_archive(gctx, sub); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //---------------------------------- // add archive //---------------------------------- FITZEXCEPTION(_add_arch, !result) PyObject *_add_arch(struct Archive *subarch, const char *path=NULL) { fz_archive *arch = (fz_archive *) $self; fz_archive *sub = (fz_archive *) subarch; fz_try(gctx) { fz_mount_multi_archive(gctx, arch, sub, path); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //---------------------------------- // add ZIP/TAR from file //---------------------------------- FITZEXCEPTION(_add_ziptarfile, !result) PyObject *_add_ziptarfile(const char *filepath, int type, const char *path=NULL) { fz_archive *arch = (fz_archive *) $self; fz_archive *sub = NULL; fz_try(gctx) { if (type==1) { sub = fz_open_zip_archive(gctx, filepath); } else { sub = fz_open_tar_archive(gctx, filepath); } fz_mount_multi_archive(gctx, arch, sub, path); } fz_always(gctx) { fz_drop_archive(gctx, sub); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //---------------------------------- // add ZIP/TAR from memory //---------------------------------- FITZEXCEPTION(_add_ziptarmemory, !result) PyObject *_add_ziptarmemory(PyObject *memory, int type, const char *path=NULL) { fz_archive *arch = (fz_archive *) $self; fz_archive *sub = NULL; fz_stream *stream = NULL; fz_buffer *buff = NULL; fz_try(gctx) { buff = JM_BufferFromBytes(gctx, memory); stream = fz_open_buffer(gctx, buff); if (type==1) { sub = fz_open_zip_archive_with_stream(gctx, stream); } else { sub = fz_open_tar_archive_with_stream(gctx, stream); } fz_mount_multi_archive(gctx, arch, sub, path); } fz_always(gctx) { fz_drop_stream(gctx, stream); fz_drop_buffer(gctx, buff); fz_drop_archive(gctx, sub); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } //---------------------------------- // add "tree" item //---------------------------------- FITZEXCEPTION(_add_treeitem, !result) PyObject *_add_treeitem(PyObject *memory, const char *name, const char *path=NULL) { fz_archive *arch = (fz_archive *) $self; fz_archive *sub = NULL; fz_buffer *buff = NULL; int drop_sub = 0; fz_try(gctx) { buff = JM_BufferFromBytes(gctx, memory); sub = JM_last_tree(gctx, arch, path); if (!sub) { sub = fz_new_tree_archive(gctx, NULL); drop_sub = 1; } fz_tree_archive_add_buffer(gctx, sub, name, buff); if (drop_sub) { fz_mount_multi_archive(gctx, arch, sub, path); } } fz_always(gctx) { fz_drop_buffer(gctx, buff); if (drop_sub) { fz_drop_archive(gctx, sub); } } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } %pythoncode %{ def add(self, content, path=None): """Add a sub-archive. Args: content: content to be added. May be one of Archive, folder name, file name, raw bytes (bytes, bytearray), zipfile, tarfile, or a sequence of any of these types. path: (str) a "virtual" path name, under which the elements of content can be retrieved. Use it to e.g. cope with duplicate element names. """ bin_ok = lambda x: isinstance(x, (bytes, bytearray, io.BytesIO)) entries = [] mount = None fmt = None def make_subarch(): subarch = {"fmt": fmt, "entries": entries, "path": mount} if fmt != "tree" or self._subarchives == []: self._subarchives.append(subarch) else: ltree = self._subarchives[-1] if ltree["fmt"] != "tree" or ltree["path"] != subarch["path"]: self._subarchives.append(subarch) else: ltree["entries"].extend(subarch["entries"]) self._subarchives[-1] = ltree return if isinstance(content, zipfile.ZipFile): fmt = "zip" entries = content.namelist() mount = path filename = getattr(content, "filename", None) fp = getattr(content, "fp", None) if filename: self._add_ziptarfile(filename, 1, path) else: self._add_ziptarmemory(fp.getvalue(), 1, path) return make_subarch() if isinstance(content, tarfile.TarFile): fmt = "tar" entries = content.getnames() mount = path filename = getattr(content.fileobj, "name", None) fp = content.fileobj if not isinstance(fp, io.BytesIO) and not filename: fp = fp.fileobj if filename: self._add_ziptarfile(filename, 0, path) else: self._add_ziptarmemory(fp.getvalue(), 0, path) return make_subarch() if isinstance(content, Archive): fmt = "multi" mount = path self._add_arch(content, path) return make_subarch() if bin_ok(content): if not (path and type(path) is str): raise ValueError("need name for binary content") fmt = "tree" mount = None entries = [path] self._add_treeitem(content, path) return make_subarch() if hasattr(content, "name"): content = content.name elif isinstance(content, pathlib.Path): content = str(content) if os.path.isdir(str(content)): a0 = str(content) fmt = "dir" mount = path entries = os.listdir(a0) self._add_dir(a0, path) return make_subarch() if os.path.isfile(str(content)): if not (path and type(path) is str): raise ValueError("need name for binary content") a0 = str(content) _ = open(a0, "rb") ff = _.read() _.close() fmt = "tree" mount = None entries = [path] self._add_treeitem(ff, path) return make_subarch() if type(content) is str or not getattr(content, "__getitem__", None): raise ValueError("bad archive content") #---------------------------------------- # handling sequence types here #---------------------------------------- if len(content) == 2: # covers the tree item plus path data, name = content if bin_ok(data) or os.path.isfile(str(data)): if not type(name) is str: raise ValueError(f"bad item name {name}") mount = path fmt = "tree" if bin_ok(data): self._add_treeitem(data, name, path=mount) else: _ = open(str(data), "rb") ff = _.read() _.close() seld._add_treeitem(ff, name, path=mount) entries = [name] return make_subarch() # deal with sequence of disparate items for item in content: self.add(item, path) __doc__ = """Archive(dirname [, path]) - from folder Archive(file [, path]) - from file name or object Archive(data, name) - from memory item Archive() - empty archive Archive(archive [, path]) - from archive """ @property def entry_list(self): """List of sub archives.""" return self._subarchives def __repr__(self): return f"Archive, sub-archives: {len(self._subarchives)}" def __del__(self): if not type(self) is Archive: return if getattr(self, "thisown", False): self.__swig_destroy__(self) %} } }; //------------------------------------------------------------------------ // Xml //------------------------------------------------------------------------ struct Xml { %extend { ~Xml() { DEBUGMSG1("Xml"); fz_drop_xml( gctx, (fz_xml*) $self); DEBUGMSG2; } FITZEXCEPTION(Xml, !result) Xml(fz_xml* xml) { fz_keep_xml( gctx, xml); return (struct Xml*) xml; } Xml(const char *html) { fz_buffer *buff = NULL; fz_xml *ret = NULL; fz_try(gctx) { buff = fz_new_buffer_from_copied_data(gctx, html, strlen(html)+1); ret = fz_parse_xml_from_html5(gctx, buff); } fz_always(gctx) { fz_drop_buffer(gctx, buff); } fz_catch(gctx) { return NULL; } fz_keep_xml(gctx, ret); return (struct Xml*) ret; } %pythoncode %{@property%} FITZEXCEPTION (root, !result) struct Xml* root() { fz_xml* ret = NULL; fz_try(gctx) { ret = fz_xml_root((fz_xml_doc *) $self); } fz_catch(gctx) { return NULL; } return (struct Xml*) ret; } FITZEXCEPTION (bodytag, !result) struct Xml* bodytag() { fz_xml* ret = NULL; fz_try(gctx) { ret = fz_keep_xml( gctx, fz_dom_body( gctx, (fz_xml *) $self)); } fz_catch(gctx) { return NULL; } return (struct Xml*) ret; } FITZEXCEPTION (append_child, !result) PyObject *append_child( struct Xml* child) { fz_try(gctx) { fz_dom_append_child( gctx, (fz_xml *) $self, (fz_xml *) child); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } FITZEXCEPTION (create_text_node, !result) struct Xml* create_text_node( const char *text) { fz_xml* ret = NULL; fz_try(gctx) { ret = fz_dom_create_text_node( gctx,(fz_xml *) $self, text); } fz_catch(gctx) { return NULL; } fz_keep_xml( gctx, ret); return (struct Xml*) ret; } FITZEXCEPTION (create_element, !result) struct Xml* create_element( const char *tag) { fz_xml* ret = NULL; fz_try(gctx) { ret = fz_dom_create_element( gctx, (fz_xml *)$self, tag); } fz_catch(gctx) { return NULL; } fz_keep_xml( gctx, ret); return (struct Xml*) ret; } struct Xml *find(const char *tag, const char *att, const char *match) { fz_xml* ret=NULL; ret = fz_dom_find( gctx, (fz_xml *)$self, tag, att, match); if (!ret) { return NULL; } fz_keep_xml( gctx, ret); return (struct Xml*) ret; } struct Xml *find_next( const char *tag, const char *att, const char *match) { fz_xml* ret=NULL; ret = fz_dom_find_next( gctx, (fz_xml *)$self, tag, att, match); if (!ret) { return NULL; } fz_keep_xml( gctx, ret); return (struct Xml*) ret; } %pythoncode %{@property%} struct Xml *next() { fz_xml* ret=NULL; ret = fz_dom_next( gctx, (fz_xml *)$self); if (!ret) { return NULL; } fz_keep_xml( gctx, ret); return (struct Xml*) ret; } %pythoncode %{@property%} struct Xml *previous() { fz_xml* ret=NULL; ret = fz_dom_previous( gctx, (fz_xml *)$self); if (!ret) { return NULL; } fz_keep_xml( gctx, ret); return (struct Xml*) ret; } FITZEXCEPTION (set_attribute, !result) PyObject *set_attribute(const char *key, const char *value) { fz_try(gctx) { if (strlen(key)==0) { RAISEPY(gctx, "key must not be empty", PyExc_ValueError); } fz_dom_add_attribute(gctx, (fz_xml *)$self, key, value); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } FITZEXCEPTION (remove_attribute, !result) PyObject *remove_attribute(const char *key) { fz_try(gctx) { if (strlen(key)==0) { RAISEPY(gctx, "key must not be empty", PyExc_ValueError); } fz_xml *elt = (fz_xml *)$self; fz_dom_remove_attribute(gctx, elt, key); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } FITZEXCEPTION (get_attribute_value, !result) PyObject *get_attribute_value(const char *key) { const char *ret=NULL; fz_try(gctx) { if (strlen(key)==0) { RAISEPY(gctx, "key must not be empty", PyExc_ValueError); } fz_xml *elt = (fz_xml *)$self; ret=fz_dom_attribute(gctx, elt, key); } fz_catch(gctx) { return NULL; } return Py_BuildValue("s", ret); } FITZEXCEPTION (get_attributes, !result) PyObject *get_attributes() { fz_xml *this = (fz_xml *) $self; if (fz_xml_text(this)) { // text node has none Py_RETURN_NONE; } PyObject *result=PyDict_New(); fz_try(gctx) { int i=0; const char *key=NULL; const char *val=NULL; while (1) { val = fz_dom_get_attribute(gctx, this, i, &key); if (!val || !key) { break; } PyObject *temp = Py_BuildValue("s",val); PyDict_SetItemString(result, key, temp); Py_DECREF(temp); i += 1; } } fz_catch(gctx) { Py_DECREF(result); return NULL; } return result; } FITZEXCEPTION (insert_before, !result) PyObject *insert_before(struct Xml *node) { fz_xml *existing = (fz_xml *) $self; fz_xml *what = (fz_xml *) node; fz_try(gctx) { fz_dom_insert_before(gctx, existing, what); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } FITZEXCEPTION (insert_after, !result) PyObject *insert_after(struct Xml *node) { fz_xml *existing = (fz_xml *) $self; fz_xml *what = (fz_xml *) node; fz_try(gctx) { fz_dom_insert_after(gctx, existing, what); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } FITZEXCEPTION (clone, !result) struct Xml* clone() { fz_xml* ret = NULL; fz_try(gctx) { ret = fz_dom_clone( gctx, (fz_xml *)$self); } fz_catch(gctx) { return NULL; } fz_keep_xml( gctx, ret); return (struct Xml*) ret; } %pythoncode %{@property%} struct Xml *parent() { fz_xml* ret = NULL; ret = fz_dom_parent( gctx, (fz_xml *)$self); if (!ret) { return NULL; } fz_keep_xml( gctx, ret); return (struct Xml*) ret; } %pythoncode %{@property%} struct Xml *first_child() { fz_xml* ret = NULL; fz_xml *this = (fz_xml *)$self; if (fz_xml_text(this)) { // a text node has no child return NULL; } ret = fz_dom_first_child( gctx, (fz_xml *)$self); if (!ret) { return NULL; } fz_keep_xml( gctx, ret); return (struct Xml*) ret; } FITZEXCEPTION (remove, !result) PyObject *remove() { fz_try(gctx) { fz_dom_remove( gctx, (fz_xml *)$self); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } %pythoncode %{@property%} PyObject *text() { return Py_BuildValue("s", fz_xml_text((fz_xml *)$self)); } %pythoncode %{@property%} PyObject *tagname() { return Py_BuildValue("s", fz_xml_tag((fz_xml *)$self)); } %pythoncode %{ def _get_node_tree(self): def show_node(node, items, shift): while node != None: if node.is_text: items.append((shift, f'"{node.text}"')) node = node.next continue items.append((shift, f"({node.tagname}")) for k, v in node.get_attributes().items(): items.append((shift, f"={k} '{v}'")) child = node.first_child if child: items = show_node(child, items, shift + 1) items.append((shift, f"){node.tagname}")) node = node.next return items shift = 0 items = [] items = show_node(self, items, shift) return items def debug(self): """Print a list of the node tree below self.""" items = self._get_node_tree() for item in items: print(" " * item[0] + item[1].replace("\n", "\\n")) @property def is_text(self): """Check if this is a text node.""" return self.text != None @property def last_child(self): """Return last child node.""" child = self.first_child if child==None: return None while True: if child.next == None: return child child = child.next @staticmethod def color_text(color): if type(color) is str: return color if type(color) is int: return f"rgb({sRGB_to_rgb(color)})" if type(color) in (tuple, list): return f"rgb{tuple(color)}" return color def add_number_list(self, start=1, numtype=None): """Add numbered list ("ol" tag)""" child = self.create_element("ol") if start > 1: child.set_attribute("start", str(start)) if numtype != None: child.set_attribute("type", numtype) self.append_child(child) return child def add_description_list(self): """Add description list ("dl" tag)""" child = self.create_element("dl") self.append_child(child) return child def add_image(self, name, width=None, height=None, imgfloat=None, align=None): """Add image node (tag "img").""" child = self.create_element("img") if width != None: child.set_attribute("width", f"{width}") if height != None: child.set_attribute("height", f"{height}") if imgfloat != None: child.set_attribute("style", f"float: {imgfloat}") if align != None: child.set_attribute("align", f"{align}") child.set_attribute("src", f"{name}") self.append_child(child) return child def add_bullet_list(self): """Add bulleted list ("ul" tag)""" child = self.create_element("ul") self.append_child(child) return child def add_list_item(self): """Add item ("li" tag) under a (numbered or bulleted) list.""" if self.tagname not in ("ol", "ul"): raise ValueError("cannot add list item to", self.tagname) child = self.create_element("li") self.append_child(child) return child def add_span(self): child = self.create_element("span") self.append_child(child) return child def add_paragraph(self): """Add "p" tag""" child = self.create_element("p") if self.tagname != "p": self.append_child(child) else: self.parent.append_child(child) return child def add_header(self, level=1): """Add header tag""" if level not in range(1, 7): raise ValueError("Header level must be in [1, 6]") this_tag = self.tagname new_tag = f"h{level}" child = self.create_element(new_tag) prev = self if this_tag not in ("h1", "h2", "h3", "h4", "h5", "h6", "p"): self.append_child(child) return child self.parent.append_child(child) return child def add_division(self): """Add "div" tag""" child = self.create_element("div") self.append_child(child) return child def add_horizontal_line(self): """Add horizontal line ("hr" tag)""" child = self.create_element("hr") self.append_child(child) return child def add_link(self, href, text=None): """Add a hyperlink ("a" tag)""" child = self.create_element("a") if not isinstance(text, str): text = href child.set_attribute("href", href) child.append_child(self.create_text_node(text)) prev = self.span_bottom() if prev == None: prev = self prev.append_child(child) return self def add_code(self, text=None): """Add a "code" tag""" child = self.create_element("code") if type(text) is str: child.append_child(self.create_text_node(text)) prev = self.span_bottom() if prev == None: prev = self prev.append_child(child) return self add_var = add_code add_samp = add_code add_kbd = add_code def add_superscript(self, text=None): """Add a superscript ("sup" tag)""" child = self.create_element("sup") if type(text) is str: child.append_child(self.create_text_node(text)) prev = self.span_bottom() if prev == None: prev = self prev.append_child(child) return self def add_subscript(self, text=None): """Add a subscript ("sub" tag)""" child = self.create_element("sub") if type(text) is str: child.append_child(self.create_text_node(text)) prev = self.span_bottom() if prev == None: prev = self prev.append_child(child) return self def add_codeblock(self): """Add monospaced lines ("pre" node)""" child = self.create_element("pre") self.append_child(child) return child def span_bottom(self): """Find deepest level in stacked spans.""" parent = self child = self.last_child if child == None: return None while child.is_text: child = child.previous if child == None: break if child == None or child.tagname != "span": return None while True: if child == None: return parent if child.tagname in ("a", "sub","sup","body") or child.is_text: child = child.next continue if child.tagname == "span": parent = child child = child.first_child else: return parent def append_styled_span(self, style): span = self.create_element("span") span.add_style(style) prev = self.span_bottom() if prev == None: prev = self prev.append_child(span) return prev def set_margins(self, val): """Set margin values via CSS style""" text = "margins: %s" % val self.append_styled_span(text) return self def set_font(self, font): """Set font-family name via CSS style""" text = "font-family: %s" % font self.append_styled_span(text) return self def set_color(self, color): """Set text color via CSS style""" text = f"color: %s" % self.color_text(color) self.append_styled_span(text) return self def set_columns(self, cols): """Set number of text columns via CSS style""" text = f"columns: {cols}" self.append_styled_span(text) return self def set_bgcolor(self, color): """Set background color via CSS style""" text = f"background-color: %s" % self.color_text(color) self.add_style(text) # does not work on span level return self def set_opacity(self, opacity): """Set opacity via CSS style""" text = f"opacity: {opacity}" self.append_styled_span(text) return self def set_align(self, align): """Set text alignment via CSS style""" text = "text-align: %s" if isinstance( align, str): t = align elif align == TEXT_ALIGN_LEFT: t = "left" elif align == TEXT_ALIGN_CENTER: t = "center" elif align == TEXT_ALIGN_RIGHT: t = "right" elif align == TEXT_ALIGN_JUSTIFY: t = "justify" else: raise ValueError(f"Unrecognised align={align}") text = text % t self.add_style(text) return self def set_underline(self, val="underline"): text = "text-decoration: %s" % val self.append_styled_span(text) return self def set_pagebreak_before(self): """Insert a page break before this node.""" text = "page-break-before: always" self.add_style(text) return self def set_pagebreak_after(self): """Insert a page break after this node.""" text = "page-break-after: always" self.add_style(text) return self def set_fontsize(self, fontsize): """Set font size name via CSS style""" if type(fontsize) is str: px="" else: px="px" text = f"font-size: {fontsize}{px}" self.append_styled_span(text) return self def set_lineheight(self, lineheight): """Set line height name via CSS style - block-level only.""" text = f"line-height: {lineheight}" self.add_style(text) return self def set_leading(self, leading): """Set inter-line spacing value via CSS style - block-level only.""" text = f"-mupdf-leading: {leading}" self.add_style(text) return self def set_word_spacing(self, spacing): """Set inter-word spacing value via CSS style""" text = f"word-spacing: {spacing}" self.append_styled_span(text) return self def set_letter_spacing(self, spacing): """Set inter-letter spacing value via CSS style""" text = f"letter-spacing: {spacing}" self.append_styled_span(text) return self def set_text_indent(self, indent): """Set text indentation name via CSS style - block-level only.""" text = f"text-indent: {indent}" self.add_style(text) return self def set_bold(self, val=True): """Set bold on / off via CSS style""" if val: val="bold" else: val="normal" text = "font-weight: %s" % val self.append_styled_span(text) return self def set_italic(self, val=True): """Set italic on / off via CSS style""" if val: val="italic" else: val="normal" text = "font-style: %s" % val self.append_styled_span(text) return self def set_properties( self, align=None, bgcolor=None, bold=None, color=None, columns=None, font=None, fontsize=None, indent=None, italic=None, leading=None, letter_spacing=None, lineheight=None, margins=None, pagebreak_after=None, pagebreak_before=None, word_spacing=None, unqid=None, cls=None, ): """Set any or all properties of a node. To be used for existing nodes preferrably. """ root = self.root temp = root.add_division() if align is not None: temp.set_align(align) if bgcolor is not None: temp.set_bgcolor(bgcolor) if bold is not None: temp.set_bold(bold) if color is not None: temp.set_color(color) if columns is not None: temp.set_columns(columns) if font is not None: temp.set_font(font) if fontsize is not None: temp.set_fontsize(fontsize) if indent is not None: temp.set_text_indent(indent) if italic is not None: temp.set_italic(italic) if leading is not None: temp.set_leading(leading) if letter_spacing is not None: temp.set_letter_spacing(letter_spacing) if lineheight is not None: temp.set_lineheight(lineheight) if margins is not None: temp.set_margins(margins) if pagebreak_after is not None: temp.set_pagebreak_after() if pagebreak_before is not None: temp.set_pagebreak_before() if word_spacing is not None: temp.set_word_spacing(word_spacing) if unqid is not None: self.set_id(unqid) if cls is not None: self.add_class(cls) styles = [] top_style = temp.get_attribute_value("style") if top_style is not None: styles.append(top_style) child = temp.first_child while child: styles.append(child.get_attribute_value("style")) child = child.first_child self.set_attribute("style", ";".join(styles)) temp.remove() return self def set_id(self, unique): """Set a unique id.""" # check uniqueness tagname = self.tagname root = self.root if root.find(None, "id", unique): raise ValueError(f"id '{unique}' already exists") self.set_attribute("id", unique) return self def add_text(self, text): """Add text. Line breaks are honored.""" lines = text.splitlines() line_count = len(lines) prev = self.span_bottom() if prev == None: prev = self for i, line in enumerate(lines): prev.append_child(self.create_text_node(line)) if i < line_count - 1: prev.append_child(self.create_element("br")) return self def add_style(self, text): """Set some style via CSS style. Replaces complete style spec.""" style = self.get_attribute_value("style") if style != None and text in style: return self self.remove_attribute("style") if style == None: style = text else: style += ";" + text self.set_attribute("style", style) return self def add_class(self, text): """Set some class via CSS. Replaces complete class spec.""" cls = self.get_attribute_value("class") if cls != None and text in cls: return self self.remove_attribute("class") if cls == None: cls = text else: cls += " " + text self.set_attribute("class", cls) return self def insert_text(self, text): lines = text.splitlines() line_count = len(lines) for i, line in enumerate(lines): self.append_child(self.create_text_node(line)) if i < line_count - 1: self.append_child(self.create_element("br")) return self def __enter__(self): return self def __exit__(self, *args): pass def __del__(self): if not type(self) is Xml: return if getattr(self, "thisown", False): self.__swig_destroy__(self) %} } }; //------------------------------------------------------------------------ // Story //------------------------------------------------------------------------ struct Story { %extend { ~Story() { DEBUGMSG1("Story"); fz_story *this_story = (fz_story *) $self; fz_drop_story(gctx, this_story); DEBUGMSG2; } FITZEXCEPTION(Story, !result) %pythonprepend Story %{ if archive != None and isinstance(archive, Archive) == False: archive = Archive(archive) %} Story(const char* html=NULL, const char *user_css=NULL, double em=12, struct Archive *archive=NULL) { fz_story* story = NULL; fz_buffer *buffer = NULL; fz_archive* arch = NULL; fz_var(story); fz_var(buffer); const char *html2=""; if (html) { html2=html; } fz_try(gctx) { buffer = fz_new_buffer_from_copied_data(gctx, html2, strlen(html2)+1); if (archive) { arch = (fz_archive *) archive; } story = fz_new_story(gctx, buffer, user_css, em, arch); } fz_always(gctx) { fz_drop_buffer(gctx, buffer); } fz_catch(gctx) { return NULL; } struct Story* ret = (struct Story *) story; return ret; } FITZEXCEPTION(reset, !result) PyObject* reset() { fz_try(gctx) { fz_reset_story(gctx, (fz_story *)$self); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } FITZEXCEPTION(place, !result) PyObject* place( PyObject* where) { PyObject* ret = NULL; fz_try(gctx) { fz_rect where2 = JM_rect_from_py(where); fz_rect filled; int more = fz_place_story( gctx, (fz_story*) $self, where2, &filled); ret = PyTuple_New(2); PyTuple_SET_ITEM( ret, 0, Py_BuildValue( "i", more)); PyTuple_SET_ITEM( ret, 1, JM_py_from_rect( filled)); } fz_catch(gctx) { return NULL; } return ret; } FITZEXCEPTION(draw, !result) PyObject* draw( struct DeviceWrapper* device, PyObject* matrix=NULL) { fz_try(gctx) { fz_matrix ctm2 = JM_matrix_from_py( matrix); fz_device *dev = (device) ? device->device : NULL; fz_draw_story( gctx, (fz_story*) $self, dev, ctm2); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } FITZEXCEPTION(document, !result) struct Xml* document() { fz_xml* dom=NULL; fz_try(gctx) { dom = fz_story_document( gctx, (fz_story*) $self); } fz_catch(gctx) { return NULL; } fz_keep_xml( gctx, dom); return (struct Xml*) dom; } FITZEXCEPTION(element_positions, !result) %pythonprepend element_positions %{ """Trigger a callback function to record where items have been placed. Args: function: a function accepting exactly one argument. args: an optional dictionary for passing additional data. """ if type(args) is dict: for k in args.keys(): if not (type(k) is str and k.isidentifier()): raise ValueError(f"invalid key '{k}'") else: args = {} if not callable(function) or function.__code__.co_argcount != 1: raise ValueError("callback 'function' must be a callable with exactly one argument") %} PyObject* element_positions(PyObject *function, PyObject *args) { PyObject *callarg=NULL; fz_try(gctx) { callarg = Py_BuildValue("OO", function, args); fz_story_positions(gctx, (fz_story *) $self, Story_Callback, callarg); } fz_always(gctx) { Py_CLEAR(callarg); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } %pythoncode %{ def write(self, writer, rectfn, positionfn=None, pagefn=None): dev = None page_num = 0 rect_num = 0 filled = Rect(0, 0, 0, 0) while 1: mediabox, rect, ctm = rectfn(rect_num, filled) rect_num += 1 if mediabox: # new page. page_num += 1 more, filled = self.place( rect) #print(f"write(): positionfn={positionfn}") if positionfn: def positionfn2(position): # We add a `.page_num` member to the # `ElementPosition` instance. position.page_num = page_num #print(f"write(): position={position}") positionfn(position) self.element_positions(positionfn2, {}) if writer: if mediabox: # new page. if dev: if pagefn: pagefn(page_num, medibox, dev, 1) writer.end_page() dev = writer.begin_page( mediabox) if pagefn: pagefn(page_num, mediabox, dev, 0) self.draw( dev, ctm) if not more: if pagefn: pagefn( page_num, mediabox, dev, 1) writer.end_page() else: self.draw(None, ctm) if not more: break @staticmethod def write_stabilized(writer, contentfn, rectfn, user_css=None, em=12, positionfn=None, pagefn=None, archive=None, add_header_ids=True): positions = list() content = None # Iterate until stable. while 1: content_prev = content content = contentfn( positions) stable = False if content == content_prev: stable = True content2 = content story = Story(content2, user_css, em, archive) if add_header_ids: story.add_header_ids() positions = list() def positionfn2(position): #print(f"write_stabilized(): stable={stable} positionfn={positionfn} position={position}") positions.append(position) if stable and positionfn: positionfn(position) story.write( writer if stable else None, rectfn, positionfn2, pagefn, ) if stable: break def add_header_ids(self): ''' Look for `<h1..6>` items in `self` and adds unique `id` attributes if not already present. ''' dom = self.body i = 0 x = dom.find(None, None, None) while x: name = x.tagname if len(name) == 2 and name[0]=="h" and name[1] in "123456": attr = x.get_attribute_value("id") if not attr: id_ = f"h_id_{i}" #print(f"name={name}: setting id={id_}") x.set_attribute("id", id_) i += 1 x = x.find_next(None, None, None) def write_with_links(self, rectfn, positionfn=None, pagefn=None): #print("write_with_links()") stream = io.BytesIO() writer = DocumentWriter(stream) positions = [] def positionfn2(position): #print(f"write_with_links(): position={position}") positions.append(position) if positionfn: positionfn(position) self.write(writer, rectfn, positionfn=positionfn2, pagefn=pagefn) writer.close() stream.seek(0) return Story.add_pdf_links(stream, positions) @staticmethod def write_stabilized_with_links(contentfn, rectfn, user_css=None, em=12, positionfn=None, pagefn=None, archive=None, add_header_ids=True): #print("write_stabilized_with_links()") stream = io.BytesIO() writer = DocumentWriter(stream) positions = [] def positionfn2(position): #print(f"write_stabilized_with_links(): position={position}") positions.append(position) if positionfn: positionfn(position) Story.write_stabilized(writer, contentfn, rectfn, user_css, em, positionfn2, pagefn, archive, add_header_ids) writer.close() stream.seek(0) return Story.add_pdf_links(stream, positions) @staticmethod def add_pdf_links(document_or_stream, positions): """ Adds links to PDF document. Args: document_or_stream: A PDF `Document` or raw PDF content, for example an `io.BytesIO` instance. positions: List of `ElementPosition`'s for `document_or_stream`, typically from Story.element_positions(). We raise an exception if two or more positions have same id. Returns: `document_or_stream` if a `Document` instance, otherwise a new `Document` instance. We raise an exception if an `href` in `positions` refers to an internal position `#<name>` but no item in `postions` has `id = name`. """ if isinstance(document_or_stream, Document): document = document_or_stream else: document = Document("pdf", document_or_stream) # Create dict from id to position, which we will use to find # link destinations. # id_to_position = dict() #print(f"positions: {positions}") for position in positions: #print(f"add_pdf_links(): position: {position}") if (position.open_close & 1) and position.id: #print(f"add_pdf_links(): position with id: {position}") if position.id in id_to_position: #print(f"Ignoring duplicate positions with id={position.id!r}") pass else: id_to_position[ position.id] = position # Insert links for all positions that have an `href` starting # with '#'. # for position_from in positions: if ((position_from.open_close & 1) and position_from.href and position_from.href.startswith("#") ): # This is a `<a href="#...">...</a>` internal link. #print(f"add_pdf_links(): position with href: {position}") target_id = position_from.href[1:] try: position_to = id_to_position[ target_id] except Exception as e: raise RuntimeError(f"No destination with id={target_id}, required by position_from: {position_from}") # Make link from `position_from`'s rect to top-left of # `position_to`'s rect. if 0: print(f"add_pdf_links(): making link from:") print(f"add_pdf_links(): {position_from}") print(f"add_pdf_links(): to:") print(f"add_pdf_links(): {position_to}") link = dict() link["kind"] = LINK_GOTO link["from"] = Rect(position_from.rect) x0, y0, x1, y1 = position_to.rect # This appears to work well with viewers which scroll # to make destination point top-left of window. link["to"] = Point(x0, y0) link["page"] = position_to.page_num - 1 document[position_from.page_num - 1].insert_link(link) return document @property def body(self): dom = self.document() return dom.bodytag() def __del__(self): if not type(self) is Story: return if getattr(self, "thisown", False): self.__swig_destroy__(self) %} } }; //------------------------------------------------------------------------ // Tools - a collection of tools and utilities //------------------------------------------------------------------------ struct Tools { %extend { Tools() { /* It looks like global objects are never destructed when running with SWIG, so we use Memento_startLeaking()/Memento_stopLeaking(). */ Memento_startLeaking(); void* p = malloc( sizeof(struct Tools)); Memento_stopLeaking(); //fprintf(stderr, "Tools constructor p=%p\n", p); return (struct Tools*) p; } ~Tools() { /* This is not called. */ struct Tools* p = (struct Tools*) $self; //fprintf(stderr, "~Tools() p=%p\n", p); free(p); } %pythonprepend gen_id %{"""Return a unique positive integer."""%} PyObject *gen_id() { JM_UNIQUE_ID += 1; if (JM_UNIQUE_ID < 0) JM_UNIQUE_ID = 1; return Py_BuildValue("i", JM_UNIQUE_ID); } FITZEXCEPTION(set_icc, !result) %pythonprepend set_icc %{"""Set ICC color handling on or off."""%} PyObject *set_icc(int on=0) { fz_try(gctx) { if (on) { if (FZ_ENABLE_ICC) fz_enable_icc(gctx); else { RAISEPY(gctx, "MuPDF built w/o ICC support",PyExc_ValueError); } } else if (FZ_ENABLE_ICC) { fz_disable_icc(gctx); } } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } %pythonprepend set_annot_stem %{"""Get / set id prefix for annotations."""%} char *set_annot_stem(char *stem=NULL) { if (!stem) { return JM_annot_id_stem; } size_t len = strlen(stem) + 1; if (len > 50) len = 50; memcpy(&JM_annot_id_stem, stem, len); return JM_annot_id_stem; } %pythonprepend set_small_glyph_heights %{"""Set / unset small glyph heights."""%} PyObject *set_small_glyph_heights(PyObject *on=NULL) { if (!on || on == Py_None) { return JM_BOOL(small_glyph_heights); } if (PyObject_IsTrue(on)) { small_glyph_heights = 1; } else { small_glyph_heights = 0; } return JM_BOOL(small_glyph_heights); } %pythonprepend set_subset_fontnames %{"""Set / unset returning fontnames with their subset prefix."""%} PyObject *set_subset_fontnames(PyObject *on=NULL) { if (!on || on == Py_None) { return JM_BOOL(subset_fontnames); } if (PyObject_IsTrue(on)) { subset_fontnames = 1; } else { subset_fontnames = 0; } return JM_BOOL(subset_fontnames); } %pythonprepend set_low_memory %{"""Set / unset MuPDF device caching."""%} PyObject *set_low_memory(PyObject *on=NULL) { if (!on || on == Py_None) { return JM_BOOL(no_device_caching); } if (PyObject_IsTrue(on)) { no_device_caching = 1; } else { no_device_caching = 0; } return JM_BOOL(no_device_caching); } %pythonprepend unset_quad_corrections %{"""Set ascender / descender corrections on or off."""%} PyObject *unset_quad_corrections(PyObject *on=NULL) { if (!on || on == Py_None) { return JM_BOOL(skip_quad_corrections); } if (PyObject_IsTrue(on)) { skip_quad_corrections = 1; } else { skip_quad_corrections = 0; } return JM_BOOL(skip_quad_corrections); } %pythonprepend store_shrink %{"""Free 'percent' of current store size."""%} PyObject *store_shrink(int percent) { if (percent >= 100) { fz_empty_store(gctx); return Py_BuildValue("i", 0); } if (percent > 0) fz_shrink_store(gctx, 100 - percent); return Py_BuildValue("i", (int) gctx->store->size); } %pythoncode%{@property%} %pythonprepend store_size %{"""MuPDF current store size."""%} PyObject *store_size() { return Py_BuildValue("i", (int) gctx->store->size); } %pythoncode%{@property%} %pythonprepend store_maxsize %{"""MuPDF store size limit."""%} PyObject *store_maxsize() { return Py_BuildValue("i", (int) gctx->store->max); } %pythonprepend show_aa_level %{"""Show anti-aliasing values."""%} %pythonappend show_aa_level %{ temp = {"graphics": val[0], "text": val[1], "graphics_min_line_width": val[2]} val = temp%} PyObject *show_aa_level() { return Py_BuildValue("iif", fz_graphics_aa_level(gctx), fz_text_aa_level(gctx), fz_graphics_min_line_width(gctx)); } %pythonprepend set_aa_level %{"""Set anti-aliasing level."""%} void set_aa_level(int level) { fz_set_aa_level(gctx, level); } %pythonprepend set_graphics_min_line_width %{"""Set the graphics minimum line width."""%} void set_graphics_min_line_width(float min_line_width) { fz_set_graphics_min_line_width(gctx, min_line_width); } FITZEXCEPTION(image_profile, !result) %pythonprepend image_profile %{"""Metadata of an image binary stream."""%} PyObject *image_profile(PyObject *stream, int keep_image=0) { PyObject *rc = NULL; fz_try(gctx) { rc = JM_image_profile(gctx, stream, keep_image); } fz_catch(gctx) { return NULL; } return rc; } PyObject *_rotate_matrix(struct Page *page) { pdf_page *pdfpage = pdf_page_from_fz_page(gctx, (fz_page *) page); if (!pdfpage) return JM_py_from_matrix(fz_identity); return JM_py_from_matrix(JM_rotate_page_matrix(gctx, pdfpage)); } PyObject *_derotate_matrix(struct Page *page) { pdf_page *pdfpage = pdf_page_from_fz_page(gctx, (fz_page *) page); if (!pdfpage) return JM_py_from_matrix(fz_identity); return JM_py_from_matrix(JM_derotate_page_matrix(gctx, pdfpage)); } %pythoncode%{@property%} %pythonprepend fitz_config %{"""PyMuPDF configuration parameters."""%} PyObject *fitz_config() { return JM_fitz_config(); } %pythonprepend glyph_cache_empty %{"""Empty the glyph cache."""%} void glyph_cache_empty() { fz_purge_glyph_cache(gctx); } FITZEXCEPTION(_fill_widget, !result) %pythonappend _fill_widget %{ widget.rect = Rect(annot.rect) widget.xref = annot.xref widget.parent = annot.parent widget._annot = annot # backpointer to annot object if not widget.script: widget.script = None if not widget.script_stroke: widget.script_stroke = None if not widget.script_format: widget.script_format = None if not widget.script_change: widget.script_change = None if not widget.script_calc: widget.script_calc = None if not widget.script_blur: widget.script_blur = None if not widget.script_focus: widget.script_focus = None %} PyObject *_fill_widget(struct Annot *annot, PyObject *widget) { fz_try(gctx) { JM_get_widget_properties(gctx, (pdf_annot *) annot, widget); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } FITZEXCEPTION(_save_widget, !result) PyObject *_save_widget(struct Annot *annot, PyObject *widget) { fz_try(gctx) { JM_set_widget_properties(gctx, (pdf_annot *) annot, widget); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } FITZEXCEPTION(_reset_widget, !result) PyObject *_reset_widget(struct Annot *annot) { fz_try(gctx) { pdf_annot *this_annot = (pdf_annot *) annot; pdf_obj *this_annot_obj = pdf_annot_obj(gctx, this_annot); pdf_document *pdf = pdf_get_bound_document(gctx, this_annot_obj); pdf_field_reset(gctx, pdf, this_annot_obj); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } // Ensure that widgets with a /AA/C JavaScript are in AcroForm/CO FITZEXCEPTION(_ensure_widget_calc, !result) PyObject *_ensure_widget_calc(struct Annot *annot) { pdf_obj *PDFNAME_CO=NULL; fz_try(gctx) { pdf_obj *annot_obj = pdf_annot_obj(gctx, (pdf_annot *) annot); pdf_document *pdf = pdf_get_bound_document(gctx, annot_obj); PDFNAME_CO = pdf_new_name(gctx, "CO"); // = PDF_NAME(CO) pdf_obj *acro = pdf_dict_getl(gctx, // get AcroForm dict pdf_trailer(gctx, pdf), PDF_NAME(Root), PDF_NAME(AcroForm), NULL); pdf_obj *CO = pdf_dict_get(gctx, acro, PDFNAME_CO); // = AcroForm/CO if (!CO) { CO = pdf_dict_put_array(gctx, acro, PDFNAME_CO, 2); } int i, n = pdf_array_len(gctx, CO); int xref, nxref, found = 0; xref = pdf_to_num(gctx, annot_obj); for (i = 0; i < n; i++) { nxref = pdf_to_num(gctx, pdf_array_get(gctx, CO, i)); if (xref == nxref) { found = 1; break; } } if (!found) { pdf_array_push_drop(gctx, CO, pdf_new_indirect(gctx, pdf, xref, 0)); } } fz_always(gctx) { pdf_drop_obj(gctx, PDFNAME_CO); } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } FITZEXCEPTION(_parse_da, !result) %pythonappend _parse_da %{ if not val: return ((0,), "", 0) font = "Helv" fsize = 12 col = (0, 0, 0) dat = val.split() # split on any whitespace for i, item in enumerate(dat): if item == "Tf": font = dat[i - 2][1:] fsize = float(dat[i - 1]) dat[i] = dat[i-1] = dat[i-2] = "" continue if item == "g": # unicolor text col = [(float(dat[i - 1]))] dat[i] = dat[i-1] = "" continue if item == "rg": # RGB colored text col = [float(f) for f in dat[i - 3:i]] dat[i] = dat[i-1] = dat[i-2] = dat[i-3] = "" continue if item == "k": # CMYK colored text col = [float(f) for f in dat[i - 4:i]] dat[i] = dat[i-1] = dat[i-2] = dat[i-3] = dat[i-4] = "" continue val = (col, font, fsize) %} PyObject *_parse_da(struct Annot *annot) { char *da_str = NULL; pdf_annot *this_annot = (pdf_annot *) annot; pdf_obj *this_annot_obj = pdf_annot_obj(gctx, this_annot); pdf_document *pdf = pdf_get_bound_document(gctx, this_annot_obj); fz_try(gctx) { pdf_obj *da = pdf_dict_get_inheritable(gctx, this_annot_obj, PDF_NAME(DA)); if (!da) { pdf_obj *trailer = pdf_trailer(gctx, pdf); da = pdf_dict_getl(gctx, trailer, PDF_NAME(Root), PDF_NAME(AcroForm), PDF_NAME(DA), NULL); } da_str = (char *) pdf_to_text_string(gctx, da); } fz_catch(gctx) { return NULL; } return JM_UnicodeFromStr(da_str); } FITZEXCEPTION(_update_da, !result) PyObject *_update_da(struct Annot *annot, char *da_str) { fz_try(gctx) { pdf_annot *this_annot = (pdf_annot *) annot; pdf_obj *this_annot_obj = pdf_annot_obj(gctx, this_annot); pdf_dict_put_text_string(gctx, this_annot_obj, PDF_NAME(DA), da_str); pdf_dict_del(gctx, this_annot_obj, PDF_NAME(DS)); /* not supported */ pdf_dict_del(gctx, this_annot_obj, PDF_NAME(RC)); /* not supported */ } fz_catch(gctx) { return NULL; } Py_RETURN_NONE; } FITZEXCEPTION(_get_all_contents, !result) %pythonprepend _get_all_contents %{"""Concatenate all /Contents objects of a page into a bytes object."""%} PyObject *_get_all_contents(struct Page *fzpage) { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) fzpage); fz_buffer *res = NULL; PyObject *result = NULL; fz_try(gctx) { ASSERT_PDF(page); res = JM_read_contents(gctx, page->obj); result = JM_BinFromBuffer(gctx, res); } fz_always(gctx) { fz_drop_buffer(gctx, res); } fz_catch(gctx) { return NULL; } return result; } FITZEXCEPTION(_insert_contents, !result) %pythonprepend _insert_contents %{"""Add bytes as a new /Contents object for a page, and return its xref."""%} PyObject *_insert_contents(struct Page *page, PyObject *newcont, int overlay=1) { fz_buffer *contbuf = NULL; int xref = 0; pdf_page *pdfpage = pdf_page_from_fz_page(gctx, (fz_page *) page); fz_try(gctx) { ASSERT_PDF(pdfpage); ENSURE_OPERATION(gctx, pdfpage->doc); contbuf = JM_BufferFromBytes(gctx, newcont); xref = JM_insert_contents(gctx, pdfpage->doc, pdfpage->obj, contbuf, overlay); } fz_always(gctx) { fz_drop_buffer(gctx, contbuf); } fz_catch(gctx) { return NULL; } return Py_BuildValue("i", xref); } %pythonprepend mupdf_version %{"""Get version of MuPDF binary build."""%} PyObject *mupdf_version() { return Py_BuildValue("s", FZ_VERSION); } %pythonprepend mupdf_warnings %{"""Get the MuPDF warnings/errors with optional reset (default)."""%} %pythonappend mupdf_warnings %{ val = "\n".join(val) if reset: self.reset_mupdf_warnings()%} PyObject *mupdf_warnings(int reset=1) { Py_INCREF(JM_mupdf_warnings_store); return JM_mupdf_warnings_store; } int _int_from_language(char *language) { return fz_text_language_from_string(language); } %pythonprepend reset_mupdf_warnings %{"""Empty the MuPDF warnings/errors store."""%} void reset_mupdf_warnings() { Py_CLEAR(JM_mupdf_warnings_store); JM_mupdf_warnings_store = PyList_New(0); } %pythonprepend mupdf_display_errors %{"""Set MuPDF error display to True or False."""%} PyObject *mupdf_display_errors(PyObject *on=NULL) { if (!on || on == Py_None) { return JM_BOOL(JM_mupdf_show_errors); } if (PyObject_IsTrue(on)) { JM_mupdf_show_errors = 1; } else { JM_mupdf_show_errors = 0; } return JM_BOOL(JM_mupdf_show_errors); } %pythonprepend mupdf_display_warnings %{"""Set MuPDF warnings display to True or False."""%} PyObject *mupdf_display_warnings(PyObject *on=NULL) { if (!on || on == Py_None) { return JM_BOOL(JM_mupdf_show_warnings); } if (PyObject_IsTrue(on)) { JM_mupdf_show_warnings = 1; } else { JM_mupdf_show_warnings = 0; } return JM_BOOL(JM_mupdf_show_warnings); } %pythoncode %{ def _le_annot_parms(self, annot, p1, p2, fill_color): """Get common parameters for making annot line end symbols. Returns: m: matrix that maps p1, p2 to points L, P on the x-axis im: its inverse L, P: transformed p1, p2 w: line width scol: stroke color string fcol: fill color store_shrink opacity: opacity string (gs command) """ w = annot.border["width"] # line width sc = annot.colors["stroke"] # stroke color if not sc: # black if missing sc = (0,0,0) scol = " ".join(map(str, sc)) + " RG\n" if fill_color: fc = fill_color else: fc = annot.colors["fill"] # fill color if not fc: fc = (1,1,1) # white if missing fcol = " ".join(map(str, fc)) + " rg\n" # nr = annot.rect np1 = p1 # point coord relative to annot rect np2 = p2 # point coord relative to annot rect m = Matrix(util_hor_matrix(np1, np2)) # matrix makes the line horizontal im = ~m # inverted matrix L = np1 * m # converted start (left) point R = np2 * m # converted end (right) point if 0 <= annot.opacity < 1: opacity = "/H gs\n" else: opacity = "" return m, im, L, R, w, scol, fcol, opacity def _oval_string(self, p1, p2, p3, p4): """Return /AP string defining an oval within a 4-polygon provided as points """ def bezier(p, q, r): f = "%f %f %f %f %f %f c\n" return f % (p.x, p.y, q.x, q.y, r.x, r.y) kappa = 0.55228474983 # magic number ml = p1 + (p4 - p1) * 0.5 # middle points ... mo = p1 + (p2 - p1) * 0.5 # for each ... mr = p2 + (p3 - p2) * 0.5 # polygon ... mu = p4 + (p3 - p4) * 0.5 # side ol1 = ml + (p1 - ml) * kappa # the 8 bezier ol2 = mo + (p1 - mo) * kappa # helper points or1 = mo + (p2 - mo) * kappa or2 = mr + (p2 - mr) * kappa ur1 = mr + (p3 - mr) * kappa ur2 = mu + (p3 - mu) * kappa ul1 = mu + (p4 - mu) * kappa ul2 = ml + (p4 - ml) * kappa # now draw, starting from middle point of left side ap = "%f %f m\n" % (ml.x, ml.y) ap += bezier(ol1, ol2, mo) ap += bezier(or1, or2, mr) ap += bezier(ur1, ur2, mu) ap += bezier(ul1, ul2, ml) return ap def _le_diamond(self, annot, p1, p2, lr, fill_color): """Make stream commands for diamond line end symbol. "lr" denotes left (False) or right point. """ m, im, L, R, w, scol, fcol, opacity = self._le_annot_parms(annot, p1, p2, fill_color) shift = 2.5 # 2*shift*width = length of square edge d = shift * max(1, w) M = R - (d/2., 0) if lr else L + (d/2., 0) r = Rect(M, M) + (-d, -d, d, d) # the square # the square makes line longer by (2*shift - 1)*width p = (r.tl + (r.bl - r.tl) * 0.5) * im ap = "q\n%s%f %f m\n" % (opacity, p.x, p.y) p = (r.tl + (r.tr - r.tl) * 0.5) * im ap += "%f %f l\n" % (p.x, p.y) p = (r.tr + (r.br - r.tr) * 0.5) * im ap += "%f %f l\n" % (p.x, p.y) p = (r.br + (r.bl - r.br) * 0.5) * im ap += "%f %f l\n" % (p.x, p.y) ap += "%g w\n" % w ap += scol + fcol + "b\nQ\n" return ap def _le_square(self, annot, p1, p2, lr, fill_color): """Make stream commands for square line end symbol. "lr" denotes left (False) or right point. """ m, im, L, R, w, scol, fcol, opacity = self._le_annot_parms(annot, p1, p2, fill_color) shift = 2.5 # 2*shift*width = length of square edge d = shift * max(1, w) M = R - (d/2., 0) if lr else L + (d/2., 0) r = Rect(M, M) + (-d, -d, d, d) # the square # the square makes line longer by (2*shift - 1)*width p = r.tl * im ap = "q\n%s%f %f m\n" % (opacity, p.x, p.y) p = r.tr * im ap += "%f %f l\n" % (p.x, p.y) p = r.br * im ap += "%f %f l\n" % (p.x, p.y) p = r.bl * im ap += "%f %f l\n" % (p.x, p.y) ap += "%g w\n" % w ap += scol + fcol + "b\nQ\n" return ap def _le_circle(self, annot, p1, p2, lr, fill_color): """Make stream commands for circle line end symbol. "lr" denotes left (False) or right point. """ m, im, L, R, w, scol, fcol, opacity = self._le_annot_parms(annot, p1, p2, fill_color) shift = 2.5 # 2*shift*width = length of square edge d = shift * max(1, w) M = R - (d/2., 0) if lr else L + (d/2., 0) r = Rect(M, M) + (-d, -d, d, d) # the square ap = "q\n" + opacity + self._oval_string(r.tl * im, r.tr * im, r.br * im, r.bl * im) ap += "%g w\n" % w ap += scol + fcol + "b\nQ\n" return ap def _le_butt(self, annot, p1, p2, lr, fill_color): """Make stream commands for butt line end symbol. "lr" denotes left (False) or right point. """ m, im, L, R, w, scol, fcol, opacity = self._le_annot_parms(annot, p1, p2, fill_color) shift = 3 d = shift * max(1, w) M = R if lr else L top = (M + (0, -d/2.)) * im bot = (M + (0, d/2.)) * im ap = "\nq\n%s%f %f m\n" % (opacity, top.x, top.y) ap += "%f %f l\n" % (bot.x, bot.y) ap += "%g w\n" % w ap += scol + "s\nQ\n" return ap def _le_slash(self, annot, p1, p2, lr, fill_color): """Make stream commands for slash line end symbol. "lr" denotes left (False) or right point. """ m, im, L, R, w, scol, fcol, opacity = self._le_annot_parms(annot, p1, p2, fill_color) rw = 1.1547 * max(1, w) * 1.0 # makes rect diagonal a 30 deg inclination M = R if lr else L r = Rect(M.x - rw, M.y - 2 * w, M.x + rw, M.y + 2 * w) top = r.tl * im bot = r.br * im ap = "\nq\n%s%f %f m\n" % (opacity, top.x, top.y) ap += "%f %f l\n" % (bot.x, bot.y) ap += "%g w\n" % w ap += scol + "s\nQ\n" return ap def _le_openarrow(self, annot, p1, p2, lr, fill_color): """Make stream commands for open arrow line end symbol. "lr" denotes left (False) or right point. """ m, im, L, R, w, scol, fcol, opacity = self._le_annot_parms(annot, p1, p2, fill_color) shift = 2.5 d = shift * max(1, w) p2 = R + (d/2., 0) if lr else L - (d/2., 0) p1 = p2 + (-2*d, -d) if lr else p2 + (2*d, -d) p3 = p2 + (-2*d, d) if lr else p2 + (2*d, d) p1 *= im p2 *= im p3 *= im ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y) ap += "%f %f l\n" % (p2.x, p2.y) ap += "%f %f l\n" % (p3.x, p3.y) ap += "%g w\n" % w ap += scol + "S\nQ\n" return ap def _le_closedarrow(self, annot, p1, p2, lr, fill_color): """Make stream commands for closed arrow line end symbol. "lr" denotes left (False) or right point. """ m, im, L, R, w, scol, fcol, opacity = self._le_annot_parms(annot, p1, p2, fill_color) shift = 2.5 d = shift * max(1, w) p2 = R + (d/2., 0) if lr else L - (d/2., 0) p1 = p2 + (-2*d, -d) if lr else p2 + (2*d, -d) p3 = p2 + (-2*d, d) if lr else p2 + (2*d, d) p1 *= im p2 *= im p3 *= im ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y) ap += "%f %f l\n" % (p2.x, p2.y) ap += "%f %f l\n" % (p3.x, p3.y) ap += "%g w\n" % w ap += scol + fcol + "b\nQ\n" return ap def _le_ropenarrow(self, annot, p1, p2, lr, fill_color): """Make stream commands for right open arrow line end symbol. "lr" denotes left (False) or right point. """ m, im, L, R, w, scol, fcol, opacity = self._le_annot_parms(annot, p1, p2, fill_color) shift = 2.5 d = shift * max(1, w) p2 = R - (d/3., 0) if lr else L + (d/3., 0) p1 = p2 + (2*d, -d) if lr else p2 + (-2*d, -d) p3 = p2 + (2*d, d) if lr else p2 + (-2*d, d) p1 *= im p2 *= im p3 *= im ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y) ap += "%f %f l\n" % (p2.x, p2.y) ap += "%f %f l\n" % (p3.x, p3.y) ap += "%g w\n" % w ap += scol + fcol + "S\nQ\n" return ap def _le_rclosedarrow(self, annot, p1, p2, lr, fill_color): """Make stream commands for right closed arrow line end symbol. "lr" denotes left (False) or right point. """ m, im, L, R, w, scol, fcol, opacity = self._le_annot_parms(annot, p1, p2, fill_color) shift = 2.5 d = shift * max(1, w) p2 = R - (2*d, 0) if lr else L + (2*d, 0) p1 = p2 + (2*d, -d) if lr else p2 + (-2*d, -d) p3 = p2 + (2*d, d) if lr else p2 + (-2*d, d) p1 *= im p2 *= im p3 *= im ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y) ap += "%f %f l\n" % (p2.x, p2.y) ap += "%f %f l\n" % (p3.x, p3.y) ap += "%g w\n" % w ap += scol + fcol + "b\nQ\n" return ap def __del__(self): if not type(self) is Tools: return if getattr(self, "thisown", False): self.__swig_destroy__(self) %} } };
