diff src/extra.i @ 1:1d09e1dec1d9 upstream

ADD: PyMuPDF v1.26.4: the original sdist. It does not yet contain MuPDF. This normally will be downloaded when building PyMuPDF.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:37:51 +0200
parents
children a6bc019ac0b2
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/extra.i	Mon Sep 15 11:37:51 2025 +0200
@@ -0,0 +1,4285 @@
+%module fitz_extra
+
+%pythoncode %{
+# pylint: disable=all
+%}
+
+%begin
+%{
+#define SWIG_PYTHON_INTERPRETER_NO_DEBUG
+
+/* This seems to be necessary on some Windows machines with Py_LIMITED_API,
+otherwise compilation can fail because free() and malloc() are not declared. */
+#include <stdlib.h>
+%}
+
+%init
+%{
+    /* Initialise some globals that require Python functions.
+    
+    [Prior to 2023-08-18 we initialised these global variables inline,
+    but this causes a SEGV on Windows with Python-3.10 for `dictkey_c`
+    (actually any string of length 1 failed).] */
+    
+    dictkey_align = PyUnicode_InternFromString("align");
+    dictkey_ascender = PyUnicode_InternFromString("ascender");
+    dictkey_bidi = PyUnicode_InternFromString("bidi");
+    dictkey_bbox = PyUnicode_InternFromString("bbox");
+    dictkey_blocks = PyUnicode_InternFromString("blocks");
+    dictkey_bpc = PyUnicode_InternFromString("bpc");
+    dictkey_c = PyUnicode_InternFromString("c");
+    dictkey_chars = PyUnicode_InternFromString("chars");
+    dictkey_color = PyUnicode_InternFromString("color");
+    dictkey_colorspace = PyUnicode_InternFromString("colorspace");
+    dictkey_content = PyUnicode_InternFromString("content");
+    dictkey_creationDate = PyUnicode_InternFromString("creationDate");
+    dictkey_cs_name = PyUnicode_InternFromString("cs-name");
+    dictkey_da = PyUnicode_InternFromString("da");
+    dictkey_dashes = PyUnicode_InternFromString("dashes");
+    dictkey_desc = PyUnicode_InternFromString("descender");
+    dictkey_descender = PyUnicode_InternFromString("descender");
+    dictkey_dir = PyUnicode_InternFromString("dir");
+    dictkey_effect = PyUnicode_InternFromString("effect");
+    dictkey_ext = PyUnicode_InternFromString("ext");
+    dictkey_filename = PyUnicode_InternFromString("filename");
+    dictkey_fill = PyUnicode_InternFromString("fill");
+    dictkey_flags = PyUnicode_InternFromString("flags");
+    dictkey_char_flags = PyUnicode_InternFromString("char_flags");  /* Only used with mupdf >= 1.25.2. */
+    dictkey_font = PyUnicode_InternFromString("font");
+    dictkey_glyph = PyUnicode_InternFromString("glyph");
+    dictkey_height = PyUnicode_InternFromString("height");
+    dictkey_id = PyUnicode_InternFromString("id");
+    dictkey_image = PyUnicode_InternFromString("image");
+    dictkey_items = PyUnicode_InternFromString("items");
+    dictkey_length = PyUnicode_InternFromString("length");
+    dictkey_lines = PyUnicode_InternFromString("lines");
+    dictkey_matrix = PyUnicode_InternFromString("transform");
+    dictkey_modDate = PyUnicode_InternFromString("modDate");
+    dictkey_name = PyUnicode_InternFromString("name");
+    dictkey_number = PyUnicode_InternFromString("number");
+    dictkey_origin = PyUnicode_InternFromString("origin");
+    dictkey_rect = PyUnicode_InternFromString("rect");
+    dictkey_size = PyUnicode_InternFromString("size");
+    dictkey_smask = PyUnicode_InternFromString("smask");
+    dictkey_spans = PyUnicode_InternFromString("spans");
+    dictkey_stroke = PyUnicode_InternFromString("stroke");
+    dictkey_style = PyUnicode_InternFromString("style");
+    dictkey_subject = PyUnicode_InternFromString("subject");
+    dictkey_text = PyUnicode_InternFromString("text");
+    dictkey_title = PyUnicode_InternFromString("title");
+    dictkey_type = PyUnicode_InternFromString("type");
+    dictkey_ufilename = PyUnicode_InternFromString("ufilename");
+    dictkey_width = PyUnicode_InternFromString("width");
+    dictkey_wmode = PyUnicode_InternFromString("wmode");
+    dictkey_xref = PyUnicode_InternFromString("xref");
+    dictkey_xres = PyUnicode_InternFromString("xres");
+    dictkey_yres = PyUnicode_InternFromString("yres");
+%}
+
+%include std_string.i
+
+%include exception.i
+%exception {
+    try {
+        $action
+    }
+
+/* this might not be ok on windows.
+catch (Swig::DirectorException &e) {
+    SWIG_fail;
+}*/
+catch(std::exception& e) {
+    SWIG_exception(SWIG_RuntimeError, e.what());
+}
+catch(...) {
+        SWIG_exception(SWIG_RuntimeError, "Unknown exception");
+    }
+}
+
+%{
+#include "mupdf/classes2.h"
+#include "mupdf/exceptions.h"
+#include "mupdf/internal.h"
+
+#include <algorithm>
+#include <float.h>
+
+
+#define MAKE_MUPDF_VERSION_INT(major, minor, patch) ((major << 16) + (minor << 8) + (patch << 0))
+
+#define MUPDF_VERSION_INT MAKE_MUPDF_VERSION_INT(FZ_VERSION_MAJOR, FZ_VERSION_MINOR, FZ_VERSION_PATCH)
+
+#define MUPDF_VERSION_GE(major, minor, patch) \
+        MUPDF_VERSION_INT >= MAKE_MUPDF_VERSION_INT(major, minor, patch)
+
+/* Define a wrapper for PDF_NAME that returns a mupdf::PdfObj instead of a
+pdf_obj*. This avoids implicit construction of a mupdf::PdfObj, which is
+deliberately prohibited (with `explicit` on constructors) by recent MuPDF. */
+#define PDF_NAME2(X) mupdf::PdfObj(PDF_NAME(X))
+
+/* Returns equivalent of `repr(x)`. */
+static std::string repr(PyObject* x)
+{
+    PyObject* repr = PyObject_Repr(x);
+    PyObject* repr_str = PyUnicode_AsEncodedString(repr, "utf-8", "~E~");
+    #ifdef Py_LIMITED_API
+        const char* repr_str_s = PyBytes_AsString(repr_str);
+    #else
+        const char* repr_str_s = PyBytes_AS_STRING(repr_str);
+    #endif
+    std::string ret = repr_str_s;
+    Py_DECREF(repr_str);
+    Py_DECREF(repr);
+    return ret;
+}
+
+#ifdef Py_LIMITED_API
+    static PyObject* PySequence_ITEM(PyObject* o, Py_ssize_t i)
+    {
+        return PySequence_GetItem(o, i);
+    }
+
+    static const char* PyUnicode_AsUTF8(PyObject* o)
+    {
+        static PyObject* string = nullptr;
+        Py_XDECREF(string);
+        string = PyUnicode_AsUTF8String(o);
+        return PyBytes_AsString(string);
+    }
+#endif
+
+
+/* These are also in pymupdf/__init__.py. */
+const char MSG_BAD_ANNOT_TYPE[] = "bad annot type";
+const char MSG_BAD_APN[] = "bad or missing annot AP/N";
+const char MSG_BAD_ARG_INK_ANNOT[] = "arg must be seq of seq of float pairs";
+const char MSG_BAD_ARG_POINTS[] = "bad seq of points";
+const char MSG_BAD_BUFFER[] = "bad type: 'buffer'";
+const char MSG_BAD_COLOR_SEQ[] = "bad color sequence";
+const char MSG_BAD_DOCUMENT[] = "cannot open broken document";
+const char MSG_BAD_FILETYPE[] = "bad filetype";
+const char MSG_BAD_LOCATION[] = "bad location";
+const char MSG_BAD_OC_CONFIG[] = "bad config number";
+const char MSG_BAD_OC_LAYER[] = "bad layer number";
+const char MSG_BAD_OC_REF[] = "bad 'oc' reference";
+const char MSG_BAD_PAGEID[] = "bad page id";
+const char MSG_BAD_PAGENO[] = "bad page number(s)";
+const char MSG_BAD_PDFROOT[] = "PDF has no root";
+const char MSG_BAD_RECT[] = "rect is infinite or empty";
+const char MSG_BAD_TEXT[] = "bad type: 'text'";
+const char MSG_BAD_XREF[] = "bad xref";
+const char MSG_COLOR_COUNT_FAILED[] = "color count failed";
+const char MSG_FILE_OR_BUFFER[] = "need font file or buffer";
+const char MSG_FONT_FAILED[] = "cannot create font";
+const char MSG_IS_NO_ANNOT[] = "is no annotation";
+const char MSG_IS_NO_IMAGE[] = "is no image";
+const char MSG_IS_NO_PDF[] = "is no PDF";
+const char MSG_IS_NO_DICT[] = "object is no PDF dict";
+const char MSG_PIX_NOALPHA[] = "source pixmap has no alpha";
+const char MSG_PIXEL_OUTSIDE[] = "pixel(s) outside image";
+
+#define JM_BOOL(x) PyBool_FromLong((long) (x))
+
+static PyObject *JM_UnicodeFromStr(const char *c);
+
+
+#ifdef _WIN32
+
+/* These functions are not provided on Windows. */
+
+int vasprintf(char** str, const char* fmt, va_list ap)
+{
+    va_list ap2;
+
+    va_copy(ap2, ap);
+    int len = vsnprintf(nullptr, 0, fmt, ap2);
+    va_end(ap2);
+    
+    char* buffer = (char*) malloc(len + 1);
+    if (!buffer)
+    {
+        *str = nullptr;
+        return -1;
+    }
+    va_copy(ap2, ap);
+    int len2 = vsnprintf(buffer, len + 1, fmt, ap2);
+    va_end(ap2);
+    assert(len2 == len);
+    *str = buffer;
+    return len;
+}
+
+int asprintf(char** str, const char* fmt, ...)
+{
+    va_list ap;
+    va_start(ap, fmt);
+    int ret = vasprintf(str, fmt, ap);
+    va_end(ap);
+
+    return ret;
+}
+#endif
+
+
+static void messagev(const char* format, va_list va)
+{
+    static PyObject* pymupdf_module = PyImport_ImportModule("pymupdf");
+    static PyObject* message_fn = PyObject_GetAttrString(pymupdf_module, "message");
+    char* text;
+    vasprintf(&text, format, va);
+    PyObject* text_py = PyString_FromString(text);
+    PyObject* args = PyTuple_Pack(1, text_py);
+    PyObject* ret = PyObject_CallObject(message_fn, args);
+    Py_XDECREF(ret);
+    Py_XDECREF(args);
+    Py_XDECREF(text_py);
+    free(text);
+}
+
+static void messagef(const char* format, ...)
+{
+    va_list args;
+    va_start(args, format);
+    messagev(format, args);
+    va_end(args);
+}
+
+PyObject* JM_EscapeStrFromStr(const char* c)
+{
+    if (!c) return PyUnicode_FromString("");
+    PyObject* val = PyUnicode_DecodeRawUnicodeEscape(c, (Py_ssize_t) strlen(c), "replace");
+    if (!val)
+    {
+        val = PyUnicode_FromString("");
+        PyErr_Clear();
+    }
+    return val;
+}
+
+PyObject* JM_EscapeStrFromBuffer(fz_buffer* buff)
+{
+    if (!buff) return PyUnicode_FromString("");
+    unsigned char* s = nullptr;
+    size_t len = mupdf::ll_fz_buffer_storage(buff, &s);
+    PyObject* val = PyUnicode_DecodeRawUnicodeEscape((const char*) s, (Py_ssize_t) len, "replace");
+    if (!val)
+    {
+        val = PyUnicode_FromString("");
+        PyErr_Clear();
+    }
+    return val;
+}
+
+//----------------------------------------------------------------------------
+// Deep-copies a source page to the target.
+// Modified version of function of pdfmerge.c: we also copy annotations, but
+// we skip some subtypes. In addition we rotate output.
+//----------------------------------------------------------------------------
+static void page_merge(
+        mupdf::PdfDocument& doc_des,
+        mupdf::PdfDocument& doc_src,
+        int page_from,
+        int page_to,
+        int rotate,
+        int links,
+        int copy_annots,
+        mupdf::PdfGraftMap& graft_map
+        )
+{
+    // list of object types (per page) we want to copy
+
+    /* Fixme: on linux these get destructed /after/
+    mupdf/platform/c++/implementation/internal.cpp:s_thread_state, which causes
+    problems - s_thread_state::m_ctx will have been freed. We have a hack
+    that sets s_thread_state::m_ctx when destructed, so it mostly works when
+    s_thread_state.get_context() is called after destruction, but this causes
+    memento leaks and is clearly incorrect.
+    
+    Perhaps we could use pdf_obj* known_page_objs[] = {...} and create PdfObj
+    wrappers as used - this would avoid any cleanup at exit. And it's a general
+    solution to problem of ordering of cleanup of globals.
+    */
+    static pdf_obj* known_page_objs[] = {
+            PDF_NAME(Contents),
+            PDF_NAME(Resources),
+            PDF_NAME(MediaBox),
+            PDF_NAME(CropBox),
+            PDF_NAME(BleedBox),
+            PDF_NAME(TrimBox),
+            PDF_NAME(ArtBox),
+            PDF_NAME(Rotate),
+            PDF_NAME(UserUnit)
+            };
+    int known_page_objs_num = sizeof(known_page_objs) / sizeof(known_page_objs[0]);
+    mupdf::PdfObj   page_ref = mupdf::pdf_lookup_page_obj(doc_src, page_from);
+
+    // make new page dict in dest doc
+    mupdf::PdfObj   page_dict = mupdf::pdf_new_dict(doc_des, 4);
+    mupdf::pdf_dict_put(page_dict, PDF_NAME2(Type), PDF_NAME2(Page));
+
+    for (int i = 0; i < known_page_objs_num; ++i)
+    {
+        mupdf::PdfObj   known_page_obj(known_page_objs[i]);
+        mupdf::PdfObj   obj = mupdf::pdf_dict_get_inheritable(page_ref, known_page_obj);
+        if (obj.m_internal)
+        {
+            mupdf::pdf_dict_put(
+                    page_dict,
+                    known_page_obj,
+                    mupdf::pdf_graft_mapped_object(graft_map, obj)
+                    );
+        }
+    }
+
+    // Copy annotations, but skip Link, Popup, IRT, Widget types
+    // If selected, remove dict keys P (parent) and Popup
+    if (copy_annots)
+    {
+        mupdf::PdfObj old_annots = mupdf::pdf_dict_get(page_ref, PDF_NAME2(Annots));
+        int n = mupdf::pdf_array_len(old_annots);
+        if (n > 0)
+        {
+            mupdf::PdfObj new_annots = mupdf::pdf_dict_put_array(page_dict, PDF_NAME2(Annots), n);
+            for (int i = 0; i < n; i++)
+            {
+                mupdf::PdfObj o = mupdf::pdf_array_get(old_annots, i);
+                if (!o.m_internal || !mupdf::pdf_is_dict(o)) // skip non-dict items
+                {
+                    continue;   // skip invalid/null/non-dict items
+                }
+                if (mupdf::pdf_dict_get(o, PDF_NAME2(IRT)).m_internal) continue;
+                mupdf::PdfObj subtype = mupdf::pdf_dict_get(o, PDF_NAME2(Subtype));
+                if (mupdf::pdf_name_eq(subtype, PDF_NAME2(Link))) continue;
+                if (mupdf::pdf_name_eq(subtype, PDF_NAME2(Popup))) continue;
+                if (mupdf::pdf_name_eq(subtype, PDF_NAME2(Widget))) continue;
+                mupdf::pdf_dict_del(o, PDF_NAME2(Popup));
+                mupdf::pdf_dict_del(o, PDF_NAME2(P));
+                mupdf::PdfObj copy_o = mupdf::pdf_graft_mapped_object(graft_map, o);
+                mupdf::PdfObj annot = mupdf::pdf_new_indirect(
+                        doc_des,
+                        mupdf::pdf_to_num(copy_o),
+                        0
+                        );
+                mupdf::pdf_array_push(new_annots, annot);
+            }
+        }
+    }
+    // rotate the page
+    if (rotate != -1)
+    {
+        mupdf::pdf_dict_put_int(page_dict, PDF_NAME2(Rotate), rotate);
+    }
+    // Now add the page dictionary to dest PDF
+    mupdf::PdfObj ref = mupdf::pdf_add_object(doc_des, page_dict);
+
+    // Insert new page at specified location
+    mupdf::pdf_insert_page(doc_des, page_to, ref);
+}
+
+//-----------------------------------------------------------------------------
+// Copy a range of pages (spage, epage) from a source PDF to a specified
+// location (apage) of the target PDF.
+// If spage > epage, the sequence of source pages is reversed.
+//-----------------------------------------------------------------------------
+static void JM_merge_range(
+        mupdf::PdfDocument& doc_des,
+        mupdf::PdfDocument& doc_src,
+        int spage,
+        int epage,
+        int apage,
+        int rotate,
+        int links,
+        int annots,
+        int show_progress,
+        mupdf::PdfGraftMap& graft_map
+        )
+{
+    int afterpage = apage;
+    int counter = 0;  // copied pages counter
+    int total = mupdf::ll_fz_absi(epage - spage) + 1;  // total pages to copy
+
+    if (spage < epage)
+    {
+        for (int page = spage; page <= epage; page++, afterpage++)
+        {
+            page_merge(doc_des, doc_src, page, afterpage, rotate, links, annots, graft_map);
+            counter++;
+            if (show_progress > 0 && counter % show_progress == 0)
+            {
+                messagef("Inserted %i of %i pages.", counter, total);
+            }
+        }
+    }
+    else
+    {
+        for (int page = spage; page >= epage; page--, afterpage++)
+        {
+            page_merge(doc_des, doc_src, page, afterpage, rotate, links, annots, graft_map);
+            counter++;
+            if (show_progress > 0 && counter % show_progress == 0)
+            {
+                messagef("Inserted %i of %i pages.", counter, total);
+            }
+        }
+    }
+}
+
+static bool JM_have_operation(mupdf::PdfDocument& pdf)
+{
+    // Ensure valid journalling state
+    if (pdf.m_internal->journal and !mupdf::pdf_undoredo_step(pdf, 0))
+    {
+        return 0;
+    }
+    return 1;
+}
+
+static void JM_ensure_operation(mupdf::PdfDocument& pdf)
+{
+    if (!JM_have_operation(pdf))
+    {
+        throw std::runtime_error("No journalling operation started");
+    }
+}
+
+
+static void FzDocument_insert_pdf(
+        mupdf::FzDocument& doc,
+        mupdf::FzDocument& src,
+        int from_page,
+        int to_page,
+        int start_at,
+        int rotate,
+        int links,
+        int annots,
+        int show_progress,
+        int final,
+        mupdf::PdfGraftMap& graft_map
+        )
+{
+    //std::cerr << __FILE__ << ":" << __LINE__ << ":" << __FUNCTION__ << "\n";
+    mupdf::PdfDocument pdfout = mupdf::pdf_specifics(doc);
+    mupdf::PdfDocument pdfsrc = mupdf::pdf_specifics(src);
+    int outCount = mupdf::fz_count_pages(doc);
+    int srcCount = mupdf::fz_count_pages(src);
+
+    // local copies of page numbers
+    int fp = from_page;
+    int tp = to_page;
+    int sa = start_at;
+
+    // normalize page numbers
+    fp = std::max(fp, 0);               // -1 = first page
+    fp = std::min(fp, srcCount - 1);    // but do not exceed last page
+
+    if (tp < 0) tp = srcCount - 1;      // -1 = last page
+    tp = std::min(tp, srcCount - 1);    // but do not exceed last page
+
+    if (sa < 0) sa = outCount;          // -1 = behind last page
+    sa = std::min(sa, outCount);        // but that is also the limit
+
+    if (!pdfout.m_internal || !pdfsrc.m_internal)
+    {
+        throw std::runtime_error("source or target not a PDF");
+    }
+    JM_ensure_operation(pdfout);
+    JM_merge_range(pdfout, pdfsrc, fp, tp, sa, rotate, links, annots, show_progress, graft_map);
+}
+
+static int page_xref(mupdf::FzDocument& this_doc, int pno)
+{
+    int page_count = mupdf::fz_count_pages(this_doc);
+    int n = pno;
+    while (n < 0)
+    {
+        n += page_count;
+    }
+    mupdf::PdfDocument pdf = mupdf::pdf_specifics(this_doc);
+    assert(pdf.m_internal);
+    int xref = 0;
+    if (n >= page_count)
+    {
+        throw std::runtime_error(MSG_BAD_PAGENO);//, PyExc_ValueError);
+    }
+    xref = mupdf::pdf_to_num(mupdf::pdf_lookup_page_obj(pdf, n));
+    return xref;
+}
+
+static void _newPage(mupdf::PdfDocument& pdf, int pno=-1, float width=595, float height=842)
+{
+    if (!pdf.m_internal)
+    {
+        throw std::runtime_error("is no PDF");
+    }
+    mupdf::FzRect mediabox(0, 0, width, height);
+    if (pno < -1)
+    {
+        throw std::runtime_error("bad page number(s)");  // Should somehow be Python ValueError
+    }
+    JM_ensure_operation(pdf);
+    // create /Resources and /Contents objects
+    mupdf::PdfObj resources = mupdf::pdf_add_new_dict(pdf, 1);
+    mupdf::FzBuffer contents;
+    mupdf::PdfObj page_obj = mupdf::pdf_add_page(pdf, mediabox, 0, resources, contents);
+    mupdf::pdf_insert_page(pdf, pno, page_obj);
+}
+
+static void _newPage(mupdf::FzDocument& self, int pno=-1, float width=595, float height=842)
+{
+    mupdf::PdfDocument pdf = mupdf::pdf_specifics(self);
+    _newPage(pdf, pno, width, height);
+}
+
+
+//------------------------------------------------------------------------
+// return the annotation names (list of /NM entries)
+//------------------------------------------------------------------------
+static std::vector< std::string> JM_get_annot_id_list(mupdf::PdfPage& page)
+{
+    std::vector< std::string> names;
+    mupdf::PdfObj annots = mupdf::pdf_dict_get(page.obj(), PDF_NAME2(Annots));
+    if (!annots.m_internal) return names;
+    int n = mupdf::pdf_array_len(annots);
+    for (int i = 0; i < n; i++)
+    {
+        mupdf::PdfObj annot_obj = mupdf::pdf_array_get(annots, i);
+        mupdf::PdfObj name = mupdf::pdf_dict_gets(annot_obj, "NM");
+        if (name.m_internal)
+        {
+            names.push_back(mupdf::pdf_to_text_string(name));
+        }
+    }
+    return names;
+}
+
+
+//------------------------------------------------------------------------
+// Add a unique /NM key to an annotation or widget.
+// Append a number to 'stem' such that the result is a unique name.
+//------------------------------------------------------------------------
+static void JM_add_annot_id(mupdf::PdfAnnot& annot, const char* stem)
+{
+    mupdf::PdfPage page = mupdf::pdf_annot_page(annot);
+    mupdf::PdfObj annot_obj = mupdf::pdf_annot_obj(annot);
+    std::vector< std::string> names = JM_get_annot_id_list(page);
+    char* stem_id = nullptr;
+    for (int i=0; ; ++i)
+    {
+        free(stem_id);
+        asprintf(&stem_id,  "fitz-%s%d", stem, i);
+        if (std::find(names.begin(), names.end(), stem_id) == names.end())
+        {
+            break;
+        }
+    }
+    mupdf::PdfObj name = mupdf::pdf_new_string(stem_id, strlen(stem_id));
+    free(stem_id);
+    mupdf::pdf_dict_puts(annot_obj, "NM", name);
+    page.m_internal->doc->resynth_required = 0;
+}
+
+//----------------------------------------------------------------
+// page add_caret_annot
+//----------------------------------------------------------------
+static mupdf::PdfAnnot _add_caret_annot(mupdf::PdfPage& page, mupdf::FzPoint& point)
+{
+    mupdf::PdfAnnot annot = mupdf::pdf_create_annot(page, ::PDF_ANNOT_CARET);
+    mupdf::FzPoint  p = point;
+    mupdf::FzRect   r = mupdf::pdf_annot_rect(annot);
+    r = mupdf::fz_make_rect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0);
+    mupdf::pdf_set_annot_rect(annot, r);
+    mupdf::pdf_update_annot(annot);
+    JM_add_annot_id(annot, "A");
+    return annot;
+}
+
+static mupdf::PdfAnnot _add_caret_annot(mupdf::FzPage& page, mupdf::FzPoint& point)
+{
+    mupdf::PdfPage  pdf_page = mupdf::pdf_page_from_fz_page(page);
+    return _add_caret_annot(pdf_page, point);
+}
+
+static const char* Tools_parse_da(mupdf::PdfAnnot& this_annot)
+{
+    const char* da_str = nullptr;
+    mupdf::PdfObj this_annot_obj = mupdf::pdf_annot_obj(this_annot);
+    mupdf::PdfDocument pdf = mupdf::pdf_get_bound_document(this_annot_obj);
+    try
+    {
+        mupdf::PdfObj da = mupdf::pdf_dict_get_inheritable(this_annot_obj, PDF_NAME2(DA));
+        if (!da.m_internal)
+        {
+            mupdf::PdfObj trailer = mupdf::pdf_trailer(pdf);
+            da = mupdf::pdf_dict_getl(
+                    &trailer,
+                    PDF_NAME(Root),
+                    PDF_NAME(AcroForm),
+                    PDF_NAME(DA),
+                    nullptr
+                    );
+        }
+        da_str = mupdf::pdf_to_text_string(da);
+    }
+    catch (std::exception&)
+    {
+        return nullptr;
+    }
+    return da_str;
+}
+
+//----------------------------------------------------------------------------
+// Turn fz_buffer into a Python bytes object
+//----------------------------------------------------------------------------
+static PyObject* JM_BinFromBuffer(fz_buffer* buffer)
+{
+    if (!buffer)
+    {
+        return PyBytes_FromStringAndSize("", 0);
+    }
+    unsigned char* c = nullptr;
+    size_t len = mupdf::ll_fz_buffer_storage(buffer, &c);
+    return PyBytes_FromStringAndSize((const char*) c, len);
+}
+static PyObject* JM_BinFromBuffer(mupdf::FzBuffer& buffer)
+{
+    return JM_BinFromBuffer( buffer.m_internal);
+}
+
+static PyObject* Annot_getAP(mupdf::PdfAnnot& annot)
+{
+    mupdf::PdfObj annot_obj = mupdf::pdf_annot_obj(annot);
+    mupdf::PdfObj ap = mupdf::pdf_dict_getl(
+            &annot_obj,
+            PDF_NAME(AP),
+            PDF_NAME(N),
+            nullptr
+            );
+    if (mupdf::pdf_is_stream(ap))
+    {
+        mupdf::FzBuffer res = mupdf::pdf_load_stream(ap);
+        return JM_BinFromBuffer(res);
+    }
+    return PyBytes_FromStringAndSize("", 0);
+}
+
+void Tools_update_da(mupdf::PdfAnnot& this_annot, const char* da_str)
+{
+    mupdf::PdfObj this_annot_obj = mupdf::pdf_annot_obj(this_annot);
+    mupdf::pdf_dict_put_text_string(this_annot_obj, PDF_NAME2(DA), da_str);
+    mupdf::pdf_dict_del(this_annot_obj, PDF_NAME2(DS)); /* not supported */
+    mupdf::pdf_dict_del(this_annot_obj, PDF_NAME2(RC)); /* not supported */
+}
+
+static int
+jm_float_item(PyObject* obj, Py_ssize_t idx, double* result)
+{
+    PyObject* temp = PySequence_ITEM(obj, idx);
+    if (!temp) return 1;
+    *result = PyFloat_AsDouble(temp);
+    Py_DECREF(temp);
+    if (PyErr_Occurred())
+    {
+        PyErr_Clear();
+        return 1;
+    }
+    return 0;
+}
+
+
+static mupdf::FzPoint JM_point_from_py(PyObject* p)
+{
+    fz_point p0 = fz_make_point(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT);
+    if (!p || !PySequence_Check(p) || PySequence_Size(p) != 2)
+    {
+        return p0;
+    }
+    double x;
+    double y;
+    if (jm_float_item(p, 0, &x) == 1) return p0;
+    if (jm_float_item(p, 1, &y) == 1) return p0;
+    if (x < FZ_MIN_INF_RECT) x = FZ_MIN_INF_RECT;
+    if (y < FZ_MIN_INF_RECT) y = FZ_MIN_INF_RECT;
+    if (x > FZ_MAX_INF_RECT) x = FZ_MAX_INF_RECT;
+    if (y > FZ_MAX_INF_RECT) y = FZ_MAX_INF_RECT;
+
+    return fz_make_point(x, y);
+}
+
+static int s_list_append_drop(PyObject* list, PyObject* item)
+{
+    if (!list || !PyList_Check(list) || !item)
+    {
+        return -2;
+    }
+    int rc = PyList_Append(list, item);
+    Py_DECREF(item);
+    return rc;
+}
+
+static int LIST_APPEND_DROP(PyObject *list, PyObject *item)
+{
+    if (!list || !PyList_Check(list) || !item) return -2;
+    int rc = PyList_Append(list, item);
+    Py_DECREF(item);
+    return rc;
+}
+
+static int LIST_APPEND(PyObject *list, PyObject *item)
+{
+    if (!list || !PyList_Check(list) || !item) return -2;
+    int rc = PyList_Append(list, item);
+    return rc;
+}
+
+static int DICT_SETITEM_DROP(PyObject *dict, PyObject *key, PyObject *value)
+{
+    if (!dict || !PyDict_Check(dict) || !key || !value) return -2;
+    int rc = PyDict_SetItem(dict, key, value);
+    Py_DECREF(value);
+    return rc;
+}
+
+static int DICT_SETITEMSTR_DROP(PyObject *dict, const char *key, PyObject *value)
+{
+    if (!dict || !PyDict_Check(dict) || !key || !value) return -2;
+    int rc = PyDict_SetItemString(dict, key, value);
+    Py_DECREF(value);
+    return rc;
+}
+
+
+//-----------------------------------------------------------------------------
+// Functions converting between PySequences and pymupdf geometry objects
+//-----------------------------------------------------------------------------
+static int
+jm_init_item(PyObject* obj, Py_ssize_t idx, int* result)
+{
+    PyObject* temp = PySequence_ITEM(obj, idx);
+    if (!temp)
+    {
+        return 1;
+    }
+    if (PyLong_Check(temp))
+    {
+        *result = (int) PyLong_AsLong(temp);
+        Py_DECREF(temp);
+    }
+    else if (PyFloat_Check(temp))
+    {
+        *result = (int) PyFloat_AsDouble(temp);
+        Py_DECREF(temp);
+    }
+    else
+    {
+        Py_DECREF(temp);
+        return 1;
+    }
+    if (PyErr_Occurred())
+    {
+        PyErr_Clear();
+        return 1;
+    }
+    return 0;
+}
+
+// TODO: ------------------------------------------------------------------
+// This is a temporary solution and should be replaced by a C++ extension:
+// There is no way in Python specify an array of fz_point - as is required
+// for function pdf_set_annot_callout_line().
+static void JM_set_annot_callout_line(mupdf::PdfAnnot& annot, PyObject *callout, int count)
+{
+    fz_point points[3];
+    mupdf::FzPoint p;
+    for (int i = 0; i < count; i++)
+    {
+        p = JM_point_from_py(PyTuple_GetItem(callout, (Py_ssize_t) i));
+        points[i] = fz_make_point(p.x, p.y);
+    }
+    mupdf::pdf_set_annot_callout_line(annot, points, count);
+}
+
+
+//----------------------------------------------------------------------------
+// Return list of outline xref numbers. Recursive function. Arguments:
+// 'obj' first OL item
+// 'xrefs' empty Python list
+//----------------------------------------------------------------------------
+static PyObject* JM_outline_xrefs(mupdf::PdfObj obj, PyObject* xrefs)
+{
+    if (!obj.m_internal)
+    {
+        return xrefs;
+    }
+    PyObject* newxref = nullptr;
+    mupdf::PdfObj thisobj = obj;
+    while (thisobj.m_internal)
+    {
+        int nxr = mupdf::pdf_to_num(thisobj);
+        newxref = PyLong_FromLong((long) nxr);
+        if (PySequence_Contains(xrefs, newxref)
+                or mupdf::pdf_dict_get(thisobj, PDF_NAME2(Type)).m_internal
+                )
+        {
+            // circular ref or top of chain: terminate
+            Py_DECREF(newxref);
+            break;
+        }
+        s_list_append_drop(xrefs, newxref);
+        mupdf::PdfObj first = mupdf::pdf_dict_get(thisobj, PDF_NAME2(First));  // try go down
+        if (mupdf::pdf_is_dict(first))
+        {
+            xrefs = JM_outline_xrefs(first, xrefs);
+        }
+        thisobj = mupdf::pdf_dict_get(thisobj, PDF_NAME2(Next));  // try go next
+        mupdf::PdfObj parent = mupdf::pdf_dict_get(thisobj, PDF_NAME2(Parent));  // get parent
+        if (!mupdf::pdf_is_dict(thisobj))
+        {
+            thisobj = parent;
+        }
+    }
+    return xrefs;
+}
+
+
+PyObject* dictkey_align = NULL;
+PyObject* dictkey_ascender = NULL;
+PyObject* dictkey_bidi = NULL;
+PyObject* dictkey_bbox = NULL;
+PyObject* dictkey_blocks = NULL;
+PyObject* dictkey_bpc = NULL;
+PyObject* dictkey_c = NULL;
+PyObject* dictkey_chars = NULL;
+PyObject* dictkey_color = NULL;
+PyObject* dictkey_colorspace = NULL;
+PyObject* dictkey_content = NULL;
+PyObject* dictkey_creationDate = NULL;
+PyObject* dictkey_cs_name = NULL;
+PyObject* dictkey_da = NULL;
+PyObject* dictkey_dashes = NULL;
+PyObject* dictkey_desc = NULL;
+PyObject* dictkey_descender = NULL;
+PyObject* dictkey_dir = NULL;
+PyObject* dictkey_effect = NULL;
+PyObject* dictkey_ext = NULL;
+PyObject* dictkey_filename = NULL;
+PyObject* dictkey_fill = NULL;
+PyObject* dictkey_flags = NULL;
+PyObject* dictkey_char_bidi = NULL;
+PyObject* dictkey_char_flags = NULL;
+PyObject* dictkey_font = NULL;
+PyObject* dictkey_glyph = NULL;
+PyObject* dictkey_height = NULL;
+PyObject* dictkey_id = NULL;
+PyObject* dictkey_image = NULL;
+PyObject* dictkey_items = NULL;
+PyObject* dictkey_length = NULL;
+PyObject* dictkey_lines = NULL;
+PyObject* dictkey_matrix = NULL;
+PyObject* dictkey_modDate = NULL;
+PyObject* dictkey_name = NULL;
+PyObject* dictkey_number = NULL;
+PyObject* dictkey_origin = NULL;
+PyObject* dictkey_rect = NULL;
+PyObject* dictkey_size = NULL;
+PyObject* dictkey_smask = NULL;
+PyObject* dictkey_spans = NULL;
+PyObject* dictkey_stroke = NULL;
+PyObject* dictkey_style = NULL;
+PyObject* dictkey_subject = NULL;
+PyObject* dictkey_text = NULL;
+PyObject* dictkey_title = NULL;
+PyObject* dictkey_type = NULL;
+PyObject* dictkey_ufilename = NULL;
+PyObject* dictkey_width = NULL;
+PyObject* dictkey_wmode = NULL;
+PyObject* dictkey_xref = NULL;
+PyObject* dictkey_xres = NULL;
+PyObject* dictkey_yres = NULL;
+
+static int dict_setitem_drop(PyObject* dict, PyObject* key, PyObject* value)
+{
+    if (!dict || !PyDict_Check(dict) || !key || !value)
+    {
+        return -2;
+    }
+    int rc = PyDict_SetItem(dict, key, value);
+    Py_DECREF(value);
+    return rc;
+}
+
+static int dict_setitemstr_drop(PyObject* dict, const char* key, PyObject* value)
+{
+    if (!dict || !PyDict_Check(dict) || !key || !value)
+    {
+        return -2;
+    }
+    int rc = PyDict_SetItemString(dict, key, value);
+    Py_DECREF(value);
+    return rc;
+}
+
+
+static void Document_extend_toc_items(mupdf::PdfDocument& pdf, PyObject* items)
+{
+    PyObject* item=nullptr;
+    PyObject* itemdict=nullptr;
+    PyObject* xrefs=nullptr;
+    
+    PyObject* bold = PyUnicode_FromString("bold");
+    PyObject* italic = PyUnicode_FromString("italic");
+    PyObject* collapse = PyUnicode_FromString("collapse");
+    PyObject* zoom = PyUnicode_FromString("zoom");
+    
+    try
+    {
+        /* Need to define these things early because later code uses
+        `goto`; otherwise we get compiler warnings 'jump bypasses variable
+        initialization' */
+        int xref = 0;
+        mupdf::PdfObj   root;
+        mupdf::PdfObj   olroot;
+        mupdf::PdfObj   first;
+        Py_ssize_t  n;
+        Py_ssize_t  m;
+        
+        root = mupdf::pdf_dict_get(mupdf::pdf_trailer(pdf), PDF_NAME2(Root));
+        if (!root.m_internal) goto end;
+        
+        olroot = mupdf::pdf_dict_get(root, PDF_NAME2(Outlines));
+        if (!olroot.m_internal) goto end;
+        
+        first = mupdf::pdf_dict_get(olroot, PDF_NAME2(First));
+        if (!first.m_internal) goto end;
+        
+        xrefs = PyList_New(0);  // pre-allocate an empty list
+        xrefs = JM_outline_xrefs(first, xrefs);
+        n = PySequence_Size(xrefs);
+        m = PySequence_Size(items);
+        if (!n) goto end;
+        
+        if (n != m)
+        {
+            throw std::runtime_error("internal error finding outline xrefs");
+        }
+
+        // update all TOC item dictionaries
+        for (int i = 0; i < n; i++)
+        {
+            jm_init_item(xrefs, i, &xref);
+            item = PySequence_ITEM(items, i);
+            itemdict = PySequence_ITEM(item, 3);
+            if (!itemdict || !PyDict_Check(itemdict))
+            {
+                throw std::runtime_error("need non-simple TOC format");
+            }
+            PyDict_SetItem(itemdict, dictkey_xref, PySequence_ITEM(xrefs, i));
+            mupdf::PdfObj bm = mupdf::pdf_load_object(pdf, xref);
+            int flags = mupdf::pdf_to_int(mupdf::pdf_dict_get(bm, PDF_NAME2(F)));
+            if (flags == 1)
+            {
+                PyDict_SetItem(itemdict, italic, Py_True);
+            }
+            else if (flags == 2)
+            {
+                PyDict_SetItem(itemdict, bold, Py_True);
+            }
+            else if (flags == 3)
+            {
+                PyDict_SetItem(itemdict, italic, Py_True);
+                PyDict_SetItem(itemdict, bold, Py_True);
+            }
+            int count = mupdf::pdf_to_int(mupdf::pdf_dict_get(bm, PDF_NAME2(Count)));
+            if (count < 0)
+            {
+                PyDict_SetItem(itemdict, collapse, Py_True);
+            }
+            else if (count > 0)
+            {
+                PyDict_SetItem(itemdict, collapse, Py_False);
+            }
+            mupdf::PdfObj col = mupdf::pdf_dict_get(bm, PDF_NAME2(C));
+            if (mupdf::pdf_is_array(col) && mupdf::pdf_array_len(col) == 3)
+            {
+                PyObject* color = PyTuple_New(3);
+                PyTuple_SET_ITEM(color, 0, Py_BuildValue("f", mupdf::pdf_to_real(mupdf::pdf_array_get(col, 0))));
+                PyTuple_SET_ITEM(color, 1, Py_BuildValue("f", mupdf::pdf_to_real(mupdf::pdf_array_get(col, 1))));
+                PyTuple_SET_ITEM(color, 2, Py_BuildValue("f", mupdf::pdf_to_real(mupdf::pdf_array_get(col, 2))));
+                dict_setitem_drop(itemdict, dictkey_color, color);
+            }
+            float z=0;
+            mupdf::PdfObj obj = mupdf::pdf_dict_get(bm, PDF_NAME2(Dest));
+            if (!obj.m_internal || !mupdf::pdf_is_array(obj))
+            {
+                obj = mupdf::pdf_dict_getl(&bm, PDF_NAME(A), PDF_NAME(D), nullptr);
+            }
+            if (mupdf::pdf_is_array(obj) && mupdf::pdf_array_len(obj) == 5)
+            {
+                z = mupdf::pdf_to_real(mupdf::pdf_array_get(obj, 4));
+            }
+            dict_setitem_drop(itemdict, zoom, Py_BuildValue("f", z));
+            PyList_SetItem(item, 3, itemdict);
+            PyList_SetItem(items, i, item);
+        }
+        end:;
+    }
+    catch (std::exception&)
+    {
+    }
+    Py_CLEAR(xrefs);
+    Py_CLEAR(bold);
+    Py_CLEAR(italic);
+    Py_CLEAR(collapse);
+    Py_CLEAR(zoom);
+}
+
+static void Document_extend_toc_items(mupdf::FzDocument& document, PyObject* items)
+{
+    mupdf::PdfDocument  pdf = mupdf::pdf_document_from_fz_document(document);
+    return Document_extend_toc_items(pdf, items);
+}
+
+//-----------------------------------------------------------------------------
+// PySequence from fz_rect
+//-----------------------------------------------------------------------------
+static PyObject* JM_py_from_rect(fz_rect r)
+{
+    return Py_BuildValue("ffff", r.x0, r.y0, r.x1, r.y1);
+}
+static PyObject* JM_py_from_rect(mupdf::FzRect r)
+{
+    return JM_py_from_rect(*r.internal());
+}
+
+//-----------------------------------------------------------------------------
+// PySequence from fz_point
+//-----------------------------------------------------------------------------
+static PyObject* JM_py_from_point(fz_point p)
+{
+    return Py_BuildValue("ff", p.x, p.y);
+}
+
+//-----------------------------------------------------------------------------
+// PySequence from fz_quad.
+//-----------------------------------------------------------------------------
+static PyObject *
+JM_py_from_quad(fz_quad q)
+{
+    return Py_BuildValue("((f,f),(f,f),(f,f),(f,f))",
+                          q.ul.x, q.ul.y, q.ur.x, q.ur.y,
+                          q.ll.x, q.ll.y, q.lr.x, q.lr.y);
+}
+
+//----------------------------------------------------------------
+// annotation rectangle
+//----------------------------------------------------------------
+static mupdf::FzRect Annot_rect(mupdf::PdfAnnot& annot)
+{
+    mupdf::FzRect rect = mupdf::pdf_bound_annot(annot);
+    return rect;
+}
+
+static PyObject* Annot_rect3(mupdf::PdfAnnot& annot)
+{
+    fz_rect rect = mupdf::ll_pdf_bound_annot(annot.m_internal);
+    return JM_py_from_rect(rect);
+}
+
+//-----------------------------------------------------------------------------
+// PySequence to fz_rect. Default: infinite rect
+//-----------------------------------------------------------------------------
+static fz_rect JM_rect_from_py(PyObject* r)
+{
+    if (!r || !PySequence_Check(r) || PySequence_Size(r) != 4)
+    {
+        return *mupdf::FzRect(mupdf::FzRect::Fixed_INFINITE).internal();// fz_infinite_rect;
+    }
+    double f[4];
+    for (int i = 0; i < 4; i++)
+    {
+        if (jm_float_item(r, i, &f[i]) == 1)
+        {
+            return *mupdf::FzRect(mupdf::FzRect::Fixed_INFINITE).internal();
+        }
+        if (f[i] < FZ_MIN_INF_RECT) f[i] = FZ_MIN_INF_RECT;
+        if (f[i] > FZ_MAX_INF_RECT) f[i] = FZ_MAX_INF_RECT;
+    }
+    return mupdf::ll_fz_make_rect(
+            (float) f[0],
+            (float) f[1],
+            (float) f[2],
+            (float) f[3]
+            );
+}
+
+//-----------------------------------------------------------------------------
+// PySequence to fz_matrix. Default: fz_identity
+//-----------------------------------------------------------------------------
+static fz_matrix JM_matrix_from_py(PyObject* m)
+{
+    double a[6];
+
+    if (!m || !PySequence_Check(m) || PySequence_Size(m) != 6)
+    {
+        return fz_identity;
+    }
+    for (int i = 0; i < 6; i++)
+    {
+        if (jm_float_item(m, i, &a[i]) == 1)
+        {
+            return *mupdf::FzMatrix().internal();
+        }
+    }
+    return mupdf::ll_fz_make_matrix(
+            (float) a[0],
+            (float) a[1],
+            (float) a[2],
+            (float) a[3],
+            (float) a[4],
+            (float) a[5]
+            );
+}
+
+PyObject* util_transform_rect(PyObject* rect, PyObject* matrix)
+{
+    return JM_py_from_rect(
+            mupdf::ll_fz_transform_rect(
+                JM_rect_from_py(rect),
+                JM_matrix_from_py(matrix)
+                )
+            );
+}
+
+//----------------------------------------------------------------------------
+// return normalized /Rotate value:one of 0, 90, 180, 270
+//----------------------------------------------------------------------------
+static int JM_norm_rotation(int rotate)
+{
+    while (rotate < 0) rotate += 360;
+    while (rotate >= 360) rotate -= 360;
+    if (rotate % 90 != 0) return 0;
+    return rotate;
+}
+
+
+//----------------------------------------------------------------------------
+// return a PDF page's /Rotate value: one of (0, 90, 180, 270)
+//----------------------------------------------------------------------------
+static int JM_page_rotation(mupdf::PdfPage& page)
+{
+    int rotate = 0;
+    rotate = mupdf::pdf_to_int(
+            mupdf::pdf_dict_get_inheritable(page.obj(), PDF_NAME2(Rotate))
+            );
+    rotate = JM_norm_rotation(rotate);
+    return rotate;
+}
+
+
+//----------------------------------------------------------------------------
+// return a PDF page's MediaBox
+//----------------------------------------------------------------------------
+static mupdf::FzRect JM_mediabox(mupdf::PdfObj& page_obj)
+{
+    mupdf::FzRect mediabox = mupdf::pdf_to_rect(
+            mupdf::pdf_dict_get_inheritable(page_obj, PDF_NAME2(MediaBox))
+            );
+    if (mupdf::fz_is_empty_rect(mediabox) || mupdf::fz_is_infinite_rect(mediabox))
+    {
+        mediabox.x0 = 0;
+        mediabox.y0 = 0;
+        mediabox.x1 = 612;
+        mediabox.y1 = 792;
+    }
+    mupdf::FzRect   page_mediabox;
+    page_mediabox.x0 = mupdf::fz_min(mediabox.x0, mediabox.x1);
+    page_mediabox.y0 = mupdf::fz_min(mediabox.y0, mediabox.y1);
+    page_mediabox.x1 = mupdf::fz_max(mediabox.x0, mediabox.x1);
+    page_mediabox.y1 = mupdf::fz_max(mediabox.y0, mediabox.y1);
+    if (0
+            || page_mediabox.x1 - page_mediabox.x0 < 1
+            || page_mediabox.y1 - page_mediabox.y0 < 1
+            )
+    {
+        page_mediabox = *mupdf::FzRect(mupdf::FzRect::Fixed_UNIT).internal(); //fz_unit_rect;
+    }
+    return page_mediabox;
+}
+
+
+//----------------------------------------------------------------------------
+// return a PDF page's CropBox
+//----------------------------------------------------------------------------
+mupdf::FzRect JM_cropbox(mupdf::PdfObj& page_obj)
+{
+    mupdf::FzRect mediabox = JM_mediabox(page_obj);
+    mupdf::FzRect cropbox = mupdf::pdf_to_rect(
+                mupdf::pdf_dict_get_inheritable(page_obj, PDF_NAME2(CropBox))
+                );
+    if (mupdf::fz_is_infinite_rect(cropbox) || mupdf::fz_is_empty_rect(cropbox))
+    {
+        cropbox = mediabox;
+    }
+    float y0 = mediabox.y1 - cropbox.y1;
+    float y1 = mediabox.y1 - cropbox.y0;
+    cropbox.y0 = y0;
+    cropbox.y1 = y1;
+    return cropbox;
+}
+
+
+//----------------------------------------------------------------------------
+// calculate width and height of the UNROTATED page
+//----------------------------------------------------------------------------
+static mupdf::FzPoint JM_cropbox_size(mupdf::PdfObj& page_obj)
+{
+    mupdf::FzPoint size;
+    mupdf::FzRect rect = JM_cropbox(page_obj);
+    float w = (rect.x0 < rect.x1) ? rect.x1 - rect.x0 : rect.x0 - rect.x1;
+    float h = (rect.y0 < rect.y1) ? rect.y1 - rect.y0 : rect.y0 - rect.y1;
+    size = fz_make_point(w, h);
+    return size;
+}
+
+
+//----------------------------------------------------------------------------
+// calculate page rotation matrices
+//----------------------------------------------------------------------------
+static mupdf::FzMatrix JM_rotate_page_matrix(mupdf::PdfPage& page)
+{
+    if (!page.m_internal)
+    {
+        return *mupdf::FzMatrix().internal();  // no valid pdf page given
+    }
+    int rotation = JM_page_rotation(page);
+    if (rotation == 0)
+    {
+        return *mupdf::FzMatrix().internal();  // no rotation
+    }
+    auto po = page.obj();
+    mupdf::FzPoint cb_size = JM_cropbox_size(po);
+    float w = cb_size.x;
+    float h = cb_size.y;
+    mupdf::FzMatrix m;
+    if (rotation == 90)
+    {
+        m = mupdf::fz_make_matrix(0, 1, -1, 0, h, 0);
+    }
+    else if (rotation == 180)
+    {
+        m = mupdf::fz_make_matrix(-1, 0, 0, -1, w, h);
+    }
+    else
+    {
+        m = mupdf::fz_make_matrix(0, -1, 1, 0, 0, w);
+    }
+    return m;
+}
+
+
+static mupdf::FzMatrix JM_derotate_page_matrix(mupdf::PdfPage& page)
+{  // just the inverse of rotation
+    return mupdf::fz_invert_matrix(JM_rotate_page_matrix(page));
+}
+
+//-----------------------------------------------------------------------------
+// PySequence from fz_matrix
+//-----------------------------------------------------------------------------
+static PyObject* JM_py_from_matrix(mupdf::FzMatrix m)
+{
+    return Py_BuildValue("ffffff", m.a, m.b, m.c, m.d, m.e, m.f);
+}
+
+static mupdf::FzMatrix Page_derotate_matrix(mupdf::PdfPage& pdfpage)
+{
+    if (!pdfpage.m_internal)
+    {
+        return mupdf::FzMatrix();
+    }
+    return JM_derotate_page_matrix(pdfpage);
+}
+
+static mupdf::FzMatrix Page_derotate_matrix(mupdf::FzPage& page)
+{
+    mupdf::PdfPage pdf_page = mupdf::pdf_page_from_fz_page(page);
+    return Page_derotate_matrix(pdf_page);
+}
+
+
+static PyObject *lll_JM_get_annot_xref_list(pdf_obj *page_obj)
+{
+    fz_context* ctx = mupdf::internal_context_get();
+    PyObject *names = PyList_New(0);
+    pdf_obj *id, *subtype, *annots, *annot_obj;
+    int xref, type, i, n;
+    fz_try(ctx) {
+        annots = pdf_dict_get(ctx, page_obj, PDF_NAME(Annots));
+        n = pdf_array_len(ctx, annots);
+        for (i = 0; i < n; i++) {
+            annot_obj = pdf_array_get(ctx, annots, i);
+            xref = pdf_to_num(ctx, annot_obj);
+            subtype = pdf_dict_get(ctx, annot_obj, PDF_NAME(Subtype));
+            if (!subtype) {
+                continue;  // subtype is required
+            }
+            type = pdf_annot_type_from_string(ctx, pdf_to_name(ctx, subtype));
+            if (type == PDF_ANNOT_UNKNOWN) {
+                continue;  // only accept valid annot types
+            }
+            id = pdf_dict_gets(ctx, annot_obj, "NM");
+            LIST_APPEND_DROP(names, Py_BuildValue("iis", xref, type, pdf_to_text_string(ctx, id)));
+        }
+    }
+    fz_catch(ctx) {
+        return names;
+    }
+    return names;
+}
+//------------------------------------------------------------------------
+// return the xrefs and /NM ids of a page's annots, links and fields
+//------------------------------------------------------------------------
+static PyObject* JM_get_annot_xref_list(const mupdf::PdfObj& page_obj)
+{
+    PyObject* names = PyList_New(0);
+    if (!page_obj.m_internal)
+    {
+        return names;
+    }
+    return lll_JM_get_annot_xref_list( page_obj.m_internal);
+}
+
+static mupdf::FzBuffer JM_object_to_buffer(const mupdf::PdfObj& what, int compress, int ascii)
+{
+    mupdf::FzBuffer res = mupdf::fz_new_buffer(512);
+    mupdf::FzOutput out(res);
+    mupdf::pdf_print_obj(out, what, compress, ascii);
+    out.fz_close_output();
+    mupdf::fz_terminate_buffer(res);
+    return res;
+}
+
+static PyObject* JM_EscapeStrFromBuffer(mupdf::FzBuffer& buff)
+{
+    if (!buff.m_internal)
+    {
+        return PyUnicode_FromString("");
+    }
+    unsigned char* s = nullptr;
+    size_t len = mupdf::fz_buffer_storage(buff, &s);
+    PyObject* val = PyUnicode_DecodeRawUnicodeEscape((const char*) s, (Py_ssize_t) len, "replace");
+    if (!val)
+    {
+        val = PyUnicode_FromString("");
+        PyErr_Clear();
+    }
+    return val;
+}
+
+static PyObject* xref_object(mupdf::PdfDocument& pdf, int xref, int compressed=0, int ascii=0)
+{
+    if (!pdf.m_internal)
+    {
+        throw std::runtime_error(MSG_IS_NO_PDF);
+    }
+    int xreflen = mupdf::pdf_xref_len(pdf);
+    if ((xref < 1 || xref >= xreflen) and xref != -1) 
+    {
+        throw std::runtime_error(MSG_BAD_XREF);
+    }
+    mupdf::PdfObj obj = (xref > 0) ? mupdf::pdf_load_object(pdf, xref) : mupdf::pdf_trailer(pdf);
+    mupdf::FzBuffer res = JM_object_to_buffer(mupdf::pdf_resolve_indirect(obj), compressed, ascii);
+    PyObject* text = JM_EscapeStrFromBuffer(res);
+    return text;
+}
+
+static PyObject* xref_object(mupdf::FzDocument& document, int xref, int compressed=0, int ascii=0)
+{
+    mupdf::PdfDocument pdf = mupdf::pdf_document_from_fz_document(document);
+    return xref_object(pdf, xref, compressed, ascii);
+}
+
+
+//-------------------------------------
+// fz_output for Python file objects
+//-------------------------------------
+
+static PyObject* Link_is_external(mupdf::FzLink& this_link)
+{
+    const char* uri = this_link.m_internal->uri;
+    if (!uri)
+    {
+        return PyBool_FromLong(0);
+    }
+    bool ret = mupdf::fz_is_external_link(uri);
+    return PyBool_FromLong((long) ret);
+}
+
+static mupdf::FzLink Link_next(mupdf::FzLink& this_link)
+{
+    return this_link.next();
+}
+
+
+//-----------------------------------------------------------------------------
+// create PDF object from given string
+//-----------------------------------------------------------------------------
+static pdf_obj *lll_JM_pdf_obj_from_str(fz_context *ctx, pdf_document *doc, const char *src)
+{
+    pdf_obj *result = NULL;
+    pdf_lexbuf lexbuf;
+    fz_stream *stream = fz_open_memory(ctx, (unsigned char *)src, strlen(src));
+
+    pdf_lexbuf_init(ctx, &lexbuf, PDF_LEXBUF_SMALL);
+
+    fz_try(ctx) {
+        result = pdf_parse_stm_obj(ctx, doc, stream, &lexbuf);
+    }
+
+    fz_always(ctx) {
+        pdf_lexbuf_fin(ctx, &lexbuf);
+        fz_drop_stream(ctx, stream);
+    }
+
+    fz_catch(ctx) {
+        mupdf::internal_throw_exception(ctx);
+    }
+
+    return result;
+
+}
+
+/*********************************************************************/
+// Page._addAnnot_FromString
+// Add new links provided as an array of string object definitions.
+/*********************************************************************/
+PyObject* Page_addAnnot_FromString(mupdf::PdfPage& page, PyObject* linklist)
+{
+    PyObject* txtpy = nullptr;
+    int lcount = (int) PySequence_Size(linklist); // link count
+    //printf("Page_addAnnot_FromString(): lcount=%i\n", lcount);
+    if (lcount < 1)
+    {
+        Py_RETURN_NONE;
+    }
+    try
+    {
+        // insert links from the provided sources
+        if (!page.m_internal)
+        {
+            throw std::runtime_error(MSG_IS_NO_PDF);
+        }
+        if (!mupdf::pdf_dict_get(page.obj(), PDF_NAME2(Annots)).m_internal)
+        {
+            mupdf::pdf_dict_put_array(page.obj(), PDF_NAME2(Annots), lcount);
+        }
+        mupdf::PdfObj annots = mupdf::pdf_dict_get(page.obj(), PDF_NAME2(Annots));
+        mupdf::PdfDocument doc = page.doc();
+        //printf("lcount=%i\n", lcount);
+        fz_context* ctx = mupdf::internal_context_get();
+        for (int i = 0; i < lcount; i++)
+        {
+            const char* text = nullptr;
+            txtpy = PySequence_ITEM(linklist, (Py_ssize_t) i);
+            text = PyUnicode_AsUTF8(txtpy);
+            Py_CLEAR(txtpy);
+            if (!text)
+            {
+                messagef("skipping bad link / annot item %i.", i);
+                continue;
+            }
+            try
+            {
+                pdf_obj* obj = lll_JM_pdf_obj_from_str(ctx, doc.m_internal, text);
+                pdf_obj* annot = pdf_add_object_drop(
+                        ctx,
+                        doc.m_internal,
+                        obj
+                        );
+                pdf_obj* ind_obj = pdf_new_indirect(ctx, doc.m_internal, pdf_to_num(ctx, annot), 0);
+                pdf_array_push_drop(ctx, annots.m_internal, ind_obj);
+                pdf_drop_obj(ctx, annot);
+             }
+            catch (std::exception&)
+            {
+                messagef("skipping bad link / annot item %i.", i);
+            }
+        }
+    }
+    catch (std::exception&)
+    {
+        PyErr_Clear();
+        return nullptr;
+    }
+    Py_RETURN_NONE;
+}
+
+PyObject* Page_addAnnot_FromString(mupdf::FzPage& page, PyObject* linklist)
+{
+    mupdf::PdfPage pdf_page = mupdf::pdf_page_from_fz_page(page);
+    return Page_addAnnot_FromString(pdf_page, linklist);
+}
+
+static int page_count_fz2(void* document)
+{
+    mupdf::FzDocument* document2 = (mupdf::FzDocument*) document;
+    return mupdf::fz_count_pages(*document2);
+}
+
+static int page_count_fz(mupdf::FzDocument& document)
+{
+    return mupdf::fz_count_pages(document);
+}
+
+static int page_count_pdf(mupdf::PdfDocument& pdf)
+{
+    mupdf::FzDocument document = pdf.super();
+    return page_count_fz(document);
+}
+
+static int page_count(mupdf::FzDocument& document)
+{
+    return mupdf::fz_count_pages(document);
+}
+
+static int page_count(mupdf::PdfDocument& pdf)
+{
+    mupdf::FzDocument document = pdf.super();
+    return page_count(document);
+}
+
+static PyObject* page_annot_xrefs(mupdf::FzDocument& document, mupdf::PdfDocument& pdf, int pno)
+{
+    int page_count = mupdf::fz_count_pages(document);
+    int n = pno;
+    while (n < 0)
+    {
+        n += page_count;
+    }
+    PyObject* annots = nullptr;
+    if (n >= page_count)
+    {
+        throw std::runtime_error(MSG_BAD_PAGENO);
+    }
+    if (!pdf.m_internal)
+    {
+        throw std::runtime_error(MSG_IS_NO_PDF);
+    }
+    annots = JM_get_annot_xref_list(mupdf::pdf_lookup_page_obj(pdf, n));
+    return annots;
+}
+
+static PyObject* page_annot_xrefs(mupdf::FzDocument& document, int pno)
+{
+    mupdf::PdfDocument pdf = mupdf::pdf_specifics(document);
+    return page_annot_xrefs(document, pdf, pno);
+}
+
+static PyObject* page_annot_xrefs(mupdf::PdfDocument& pdf, int pno)
+{
+    mupdf::FzDocument document = pdf.super();
+    return page_annot_xrefs(document, pdf, pno);
+}
+
+static bool Outline_is_external(mupdf::FzOutline* outline)
+{
+    if (!outline->m_internal->uri)
+    {
+        return false;
+    }
+    return mupdf::ll_fz_is_external_link(outline->m_internal->uri);
+}
+
+int ll_fz_absi(int i)
+{
+    return mupdf::ll_fz_absi(i);
+}
+
+enum
+{
+    TEXT_FONT_SUPERSCRIPT = 1,
+    TEXT_FONT_ITALIC = 2,
+    TEXT_FONT_SERIFED = 4,
+    TEXT_FONT_MONOSPACED = 8,
+    TEXT_FONT_BOLD = 16,
+};
+
+int g_skip_quad_corrections = 0;
+int g_subset_fontnames = 0;
+int g_small_glyph_heights = 0;
+
+void set_skip_quad_corrections(int on)
+{
+    g_skip_quad_corrections = on;
+}
+
+void set_subset_fontnames(int on)
+{
+    g_subset_fontnames = on;
+}
+
+void set_small_glyph_heights(int on)
+{
+    g_small_glyph_heights = on;
+}
+
+struct jm_lineart_device
+{
+    fz_device super;
+    
+    PyObject* out = {};
+    PyObject* method = {};
+    PyObject* pathdict = {};
+    PyObject* scissors = {};
+    float pathfactor = {};
+    fz_matrix ctm = {};
+    fz_matrix ptm = {};
+    fz_matrix rot = {};
+    fz_point lastpoint = {};
+    fz_point firstpoint = {};
+    int havemove = 0;
+    fz_rect pathrect = {};
+    int clips = {};
+    int linecount = {};
+    float linewidth = {};
+    int path_type = {};
+    long depth = {};
+    size_t seqno = {};
+    char* layer_name;
+};
+
+
+static void jm_lineart_drop_device(fz_context *ctx, fz_device *dev_)
+{
+    jm_lineart_device *dev = (jm_lineart_device *)dev_;
+    if (PyList_Check(dev->out)) {
+        Py_CLEAR(dev->out);
+    }
+    Py_CLEAR(dev->method);
+    Py_CLEAR(dev->scissors);
+    mupdf::ll_fz_free(dev->layer_name);
+    dev->layer_name = nullptr;
+}
+
+typedef jm_lineart_device jm_tracedraw_device;
+
+// need own versions of ascender / descender
+static float JM_font_ascender(fz_font* font)
+{
+    if (g_skip_quad_corrections)
+    {
+        return 0.8f;
+    }
+    return mupdf::ll_fz_font_ascender(font);
+}
+
+static float JM_font_descender(fz_font* font)
+{
+    if (g_skip_quad_corrections)
+    {
+        return -0.2f;
+    }
+    return mupdf::ll_fz_font_descender(font);
+}
+
+
+//----------------------------------------------------------------
+// Return true if character is considered to be a word delimiter
+//----------------------------------------------------------------
+static int 
+JM_is_word_delimiter(int c, PyObject *delimiters)
+{
+    if (c <= 32 || c == 160) return 1;  // a standard delimiter
+    if (0x202a <= c && c <= 0x202e)
+    {
+        return 1; // change between writing directions
+    }
+
+    // extra delimiters must be a non-empty sequence
+    if (!delimiters || PyObject_Not(delimiters) || !PySequence_Check(delimiters)) {  
+        return 0;
+    }
+
+    // convert to tuple for easier looping
+    PyObject *delims = PySequence_Tuple(delimiters);
+    if (!delims) {
+        PyErr_Clear();
+        return 0;
+    }
+
+    // Make 1-char PyObject from character given as integer
+    PyObject *cchar = Py_BuildValue("C", c);  // single character PyObject
+    Py_ssize_t i, len = PyTuple_Size(delims);
+    for (i = 0; i < len; i++) {
+        int rc = PyUnicode_Compare(cchar, PyTuple_GET_ITEM(delims, i));
+        if (rc == 0) {  // equal to a delimiter character
+            Py_DECREF(cchar);
+            Py_DECREF(delims);
+            PyErr_Clear();
+            return 1;
+        }
+    }
+
+    Py_DECREF(delims);
+    PyErr_Clear();
+    return 0;
+}
+
+static int 
+JM_is_rtl_char(int c)
+{
+    if (c < 0x590 || c > 0x900) return 0;
+    return 1;
+}
+
+static const char* JM_font_name(fz_font* font)
+{
+    const char* name = mupdf::ll_fz_font_name(font);
+    const char* s = strchr(name, '+');
+    if (g_subset_fontnames || !s || s-name != 6)
+    {
+        return name;
+    }
+    return s + 1;
+}
+
+static int detect_super_script(fz_stext_line *line, fz_stext_char *ch)
+{
+    if (line->wmode == 0 && line->dir.x == 1 && line->dir.y == 0)
+    {
+        return ch->origin.y < line->first_char->origin.y - ch->size * 0.1f;
+    }
+    return 0;
+}
+
+static int JM_char_font_flags(fz_font *font, fz_stext_line *line, fz_stext_char *ch)
+{
+    int flags = 0;
+    if (line && ch)
+    {
+        flags += detect_super_script(line, ch) * TEXT_FONT_SUPERSCRIPT;
+    }
+    flags += mupdf::ll_fz_font_is_italic(font) * TEXT_FONT_ITALIC;
+    flags += mupdf::ll_fz_font_is_serif(font) * TEXT_FONT_SERIFED;
+    flags += mupdf::ll_fz_font_is_monospaced(font) * TEXT_FONT_MONOSPACED;
+    flags += mupdf::ll_fz_font_is_bold(font) * TEXT_FONT_BOLD;
+    return flags;
+}
+
+static void jm_trace_text_span(
+        jm_tracedraw_device* dev,
+        fz_text_span* span,
+        int type,
+        fz_matrix ctm,
+        fz_colorspace* colorspace,
+        const float* color,
+        float alpha,
+        size_t seqno
+        )
+{
+    //printf("extra.jm_trace_text_span(): seqno=%zi\n", seqno);
+    //fz_matrix join = mupdf::ll_fz_concat(span->trm, ctm);
+    //double fsize = sqrt(fabs((double) span->trm.a * (double) span->trm.d));
+    fz_matrix mat = mupdf::ll_fz_concat(span->trm, ctm); // text transformation matrix
+    fz_point dir = mupdf::ll_fz_transform_vector(mupdf::ll_fz_make_point(1, 0), mat); // writing direction
+    double fsize = sqrt(dir.x * dir.x + dir.y * dir.y); // font size
+
+    dir = mupdf::ll_fz_normalize_vector(dir);
+
+    // compute effective ascender / descender
+    double asc = (double) JM_font_ascender(span->font);
+    double dsc = (double) JM_font_descender(span->font);
+    if (asc < 1e-3) {  // probably Tesseract font
+        dsc = -0.1;
+        asc = 0.9;
+    }
+
+    double ascsize = asc * fsize / (asc - dsc);
+    double dscsize = dsc * fsize / (asc - dsc);
+    int fflags = 0; // font flags
+    int mono = mupdf::ll_fz_font_is_monospaced(span->font);
+    fflags += mono * TEXT_FONT_MONOSPACED;
+    fflags += mupdf::ll_fz_font_is_italic(span->font) * TEXT_FONT_ITALIC;
+    fflags += mupdf::ll_fz_font_is_serif(span->font) * TEXT_FONT_SERIFED;
+    fflags += mupdf::ll_fz_font_is_bold(span->font) * TEXT_FONT_BOLD;
+
+    // walk through characters of span
+    fz_matrix rot = mupdf::ll_fz_make_matrix(dir.x, dir.y, -dir.y, dir.x, 0, 0);
+    if (dir.x == -1)
+    {
+        // left-right flip
+        rot.d = 1;
+    }
+    PyObject* chars = PyTuple_New(span->len);
+    double space_adv = 0;
+    double last_adv = 0;
+    fz_rect span_bbox;
+    
+    for (int i = 0; i < span->len; i++)
+    {
+        double adv = 0;
+        if (span->items[i].gid >= 0)
+        {
+            adv = (double) mupdf::ll_fz_advance_glyph(span->font, span->items[i].gid, span->wmode);
+        }
+        adv *= fsize;
+        last_adv = adv;
+        if (span->items[i].ucs == 32)
+        {
+            space_adv = adv;
+        }
+        fz_point char_orig;
+        char_orig = fz_make_point(span->items[i].x, span->items[i].y);
+        char_orig = fz_transform_point(char_orig, ctm);
+        fz_matrix m1 = mupdf::ll_fz_make_matrix(1, 0, 0, 1, -char_orig.x, -char_orig.y);
+        m1 = mupdf::ll_fz_concat(m1, rot);
+        m1 = mupdf::ll_fz_concat(m1, mupdf::ll_fz_make_matrix(1, 0, 0, 1, char_orig.x, char_orig.y));
+        float x0 = char_orig.x;
+        float x1 = x0 + adv;
+        float y0;
+        float y1;
+        if (
+                (mat.d > 0 && (dir.x == 1 || dir.x == -1))
+                ||
+                (mat.b !=0 && mat.b == -mat.c)
+                )   // up-down flip
+        {
+            // up-down flip
+            y0 = char_orig.y + dscsize;
+            y1 = char_orig.y + ascsize;
+        }
+        else
+        {
+            y0 = char_orig.y - ascsize;
+            y1 = char_orig.y - dscsize;
+        }
+        fz_rect char_bbox = mupdf::ll_fz_make_rect(x0, y0, x1, y1);
+        char_bbox = mupdf::ll_fz_transform_rect(char_bbox, m1);
+        PyTuple_SET_ITEM(
+                chars,
+                (Py_ssize_t) i,
+                Py_BuildValue(
+                    "ii(ff)(ffff)",
+                    span->items[i].ucs,
+                    span->items[i].gid,
+                    char_orig.x,
+                    char_orig.y,
+                    char_bbox.x0,
+                    char_bbox.y0,
+                    char_bbox.x1,
+                    char_bbox.y1
+                    )
+                );
+        if (i > 0)
+        {
+            span_bbox = mupdf::ll_fz_union_rect(span_bbox, char_bbox);
+        }
+        else
+        {
+            span_bbox = char_bbox;
+        }
+    }
+    if (!space_adv)
+    {
+        if (!(fflags & TEXT_FONT_MONOSPACED))
+        {
+            fz_font* out_font = nullptr;
+            space_adv = mupdf::ll_fz_advance_glyph(
+                    span->font,
+                    mupdf::ll_fz_encode_character_with_fallback(span->font, 32, 0, 0, &out_font),
+                    span->wmode
+                    );
+            space_adv *= fsize;
+            if (!space_adv)
+            {
+                space_adv = last_adv;
+            }
+        }
+        else
+        {
+            space_adv = last_adv; // for mono any char width suffices
+        }
+    }
+    // make the span dictionary
+    PyObject* span_dict = PyDict_New();
+    dict_setitemstr_drop(span_dict, "dir", JM_py_from_point(dir));
+    dict_setitem_drop(span_dict, dictkey_font, JM_EscapeStrFromStr(JM_font_name(span->font)));
+    dict_setitem_drop(span_dict, dictkey_wmode, PyLong_FromLong((long) span->wmode));
+    dict_setitem_drop(span_dict, dictkey_flags, PyLong_FromLong((long) fflags));
+    dict_setitemstr_drop(span_dict, "bidi_lvl", PyLong_FromLong((long) span->bidi_level));
+    dict_setitemstr_drop(span_dict, "bidi_dir", PyLong_FromLong((long) span->markup_dir));
+    dict_setitem_drop(span_dict, dictkey_ascender, PyFloat_FromDouble(asc));
+    dict_setitem_drop(span_dict, dictkey_descender, PyFloat_FromDouble(dsc));
+    dict_setitem_drop(span_dict, dictkey_colorspace, PyLong_FromLong(3));
+    float rgb[3];
+    if (colorspace)
+    {
+        mupdf::ll_fz_convert_color(
+                colorspace,
+                color,
+                mupdf::ll_fz_device_rgb(),
+                rgb,
+                nullptr,
+                fz_default_color_params
+                );
+    }
+    else
+    {
+        rgb[0] = rgb[1] = rgb[2] = 0;
+    }
+    double linewidth;
+    if (dev->linewidth > 0)  // width of character border
+    {
+        linewidth = (double) dev->linewidth;
+    }
+    else
+    {
+	linewidth = fsize * 0.05;  // default: 5% of font size
+    }
+    if (0) std::cout
+            << " dev->linewidth=" << dev->linewidth
+            << " fsize=" << fsize
+            << " linewidth=" << linewidth
+            << "\n";
+    dict_setitem_drop(span_dict, dictkey_color, Py_BuildValue("fff", rgb[0], rgb[1], rgb[2]));
+    dict_setitem_drop(span_dict, dictkey_size, PyFloat_FromDouble(fsize));
+    dict_setitemstr_drop(span_dict, "opacity", PyFloat_FromDouble((double) alpha));
+    dict_setitemstr_drop(span_dict, "linewidth", PyFloat_FromDouble((double) linewidth));
+    dict_setitemstr_drop(span_dict, "spacewidth", PyFloat_FromDouble(space_adv));
+    dict_setitem_drop(span_dict, dictkey_type, PyLong_FromLong((long) type));
+    dict_setitem_drop(span_dict, dictkey_bbox, JM_py_from_rect(span_bbox));
+    dict_setitemstr_drop(span_dict, "layer", JM_UnicodeFromStr(dev->layer_name));
+    dict_setitemstr_drop(span_dict, "seqno", PyLong_FromSize_t(seqno));
+    dict_setitem_drop(span_dict, dictkey_chars, chars);
+    //std::cout << "span_dict=" << repr(span_dict) << "\n";
+    s_list_append_drop(dev->out, span_dict);
+}
+
+static inline void jm_increase_seqno(fz_context* ctx, fz_device* dev_)
+{
+    jm_tracedraw_device* dev = (jm_tracedraw_device*) dev_;
+    dev->seqno += 1;
+}
+
+static void jm_fill_path(
+        fz_context* ctx,
+        fz_device* dev,
+        const fz_path*,
+        int even_odd,
+        fz_matrix,
+        fz_colorspace*,
+        const float* color,
+        float alpha,
+        fz_color_params
+        )
+{
+    jm_increase_seqno(ctx, dev);
+}
+
+static void jm_fill_shade(
+        fz_context* ctx,
+        fz_device* dev,
+        fz_shade* shd,
+        fz_matrix ctm,
+        float alpha,
+        fz_color_params color_params
+        )
+{
+    jm_increase_seqno(ctx, dev);
+}
+
+static void jm_fill_image(
+        fz_context* ctx,
+        fz_device* dev,
+        fz_image* img,
+        fz_matrix ctm,
+        float alpha,
+        fz_color_params color_params
+        )
+{
+    jm_increase_seqno(ctx, dev);
+}
+
+static void jm_fill_image_mask(
+        fz_context* ctx,
+        fz_device* dev,
+        fz_image* img,
+        fz_matrix ctm,
+        fz_colorspace* cs,
+        const float* color,
+        float alpha,
+        fz_color_params color_params
+        )
+{
+    jm_increase_seqno(ctx, dev);
+}
+
+static void jm_dev_linewidth(
+        fz_context* ctx,
+        fz_device* dev_,
+        const fz_path* path,
+        const fz_stroke_state* stroke,
+        fz_matrix ctm,
+        fz_colorspace* colorspace,
+        const float* color,
+        float alpha,
+        fz_color_params color_params
+        )
+{
+    jm_tracedraw_device* dev = (jm_tracedraw_device*) dev_;
+    if (0) std::cout << "jm_dev_linewidth(): changing dev->linewidth from " << dev->linewidth
+            << " to stroke->linewidth=" << stroke->linewidth
+            << "\n";
+    dev->linewidth = stroke->linewidth;
+    jm_increase_seqno(ctx, dev_);
+}
+
+static void jm_trace_text(
+        jm_tracedraw_device* dev,
+        const fz_text* text,
+        int type,
+        fz_matrix ctm,
+        fz_colorspace* colorspace,
+        const float* color,
+        float alpha,
+        size_t seqno
+        )
+{
+    fz_text_span* span;
+    for (span = text->head; span; span = span->next)
+    {
+        jm_trace_text_span(dev, span, type, ctm, colorspace, color, alpha, seqno);
+    }
+}
+
+/*---------------------------------------------------------
+There are 3 text trace types:
+0 - fill text (PDF Tr 0)
+1 - stroke text (PDF Tr 1)
+3 - ignore text (PDF Tr 3)
+---------------------------------------------------------*/
+static void
+jm_tracedraw_fill_text(
+        fz_context* ctx,
+        fz_device* dev_,
+        const fz_text* text,
+        fz_matrix ctm,
+        fz_colorspace* colorspace,
+        const float* color,
+        float alpha,
+        fz_color_params color_params
+        )
+{
+    jm_tracedraw_device* dev = (jm_tracedraw_device*) dev_;
+    jm_trace_text(dev, text, 0, ctm, colorspace, color, alpha, dev->seqno);
+    dev->seqno += 1;
+}
+
+static void
+jm_tracedraw_stroke_text(
+        fz_context* ctx,
+        fz_device* dev_,
+        const fz_text* text,
+        const fz_stroke_state* stroke,
+        fz_matrix ctm,
+        fz_colorspace* colorspace,
+        const float* color,
+        float alpha,
+        fz_color_params color_params
+        )
+{
+    jm_tracedraw_device* dev = (jm_tracedraw_device*) dev_;
+    jm_trace_text(dev, text, 1, ctm, colorspace, color, alpha, dev->seqno);
+    dev->seqno += 1;
+}
+
+
+static void
+jm_tracedraw_ignore_text(
+        fz_context* ctx,
+        fz_device* dev_,
+        const fz_text* text,
+        fz_matrix ctm
+        )
+{
+    jm_tracedraw_device* dev = (jm_tracedraw_device*) dev_;
+    jm_trace_text(dev, text, 3, ctm, nullptr, nullptr, 1, dev->seqno);
+    dev->seqno += 1;
+}
+
+static void
+jm_lineart_begin_layer(fz_context *ctx, fz_device *dev_, const char *name)
+{
+    jm_tracedraw_device* dev = (jm_tracedraw_device*) dev_;
+    mupdf::ll_fz_free(dev->layer_name);
+    dev->layer_name = mupdf::ll_fz_strdup(name);
+}
+
+static void
+jm_lineart_end_layer(fz_context *ctx, fz_device *dev_)
+{
+    jm_tracedraw_device* dev = (jm_tracedraw_device*) dev_;
+    mupdf::ll_fz_free(dev->layer_name);
+    dev->layer_name = nullptr;
+}
+
+
+mupdf::FzDevice JM_new_texttrace_device(PyObject* out)
+{
+    mupdf::FzDevice device(sizeof(jm_tracedraw_device));
+    jm_tracedraw_device* dev = (jm_tracedraw_device*) device.m_internal;
+    
+    dev->super.close_device = nullptr;    
+    dev->super.drop_device = jm_lineart_drop_device;    
+    dev->super.fill_path = jm_fill_path;
+    dev->super.stroke_path = jm_dev_linewidth;
+    dev->super.clip_path = nullptr;
+    dev->super.clip_stroke_path = nullptr;
+
+    dev->super.fill_text = jm_tracedraw_fill_text;
+    dev->super.stroke_text = jm_tracedraw_stroke_text;
+    dev->super.clip_text = nullptr;
+    dev->super.clip_stroke_text = nullptr;
+    dev->super.ignore_text = jm_tracedraw_ignore_text;
+
+    dev->super.fill_shade = jm_fill_shade;
+    dev->super.fill_image = jm_fill_image;
+    dev->super.fill_image_mask = jm_fill_image_mask;
+    dev->super.clip_image_mask = nullptr;
+
+    dev->super.pop_clip = nullptr;
+
+    dev->super.begin_mask = nullptr;
+    dev->super.end_mask = nullptr;
+    dev->super.begin_group = nullptr;
+    dev->super.end_group = nullptr;
+
+    dev->super.begin_tile = nullptr;
+    dev->super.end_tile = nullptr;
+
+    dev->super.begin_layer = jm_lineart_begin_layer;
+    dev->super.end_layer = jm_lineart_end_layer;
+
+    dev->super.begin_structure = nullptr;
+    dev->super.end_structure = nullptr;
+
+    dev->super.begin_metatext = nullptr;
+    dev->super.end_metatext = nullptr;
+
+    dev->super.render_flags = nullptr;
+    dev->super.set_default_colorspaces = nullptr;
+
+    Py_XINCREF(out);
+    dev->out = out;
+    dev->seqno = 0;
+    return device;
+}
+
+
+static fz_quad
+JM_char_quad(fz_stext_line *line, fz_stext_char *ch)
+{
+    if (g_skip_quad_corrections) {  // no special handling
+        return ch->quad;
+    }
+    if (line->wmode) {  // never touch vertical write mode
+        return ch->quad;
+    }
+    fz_font *font = ch->font;
+    float asc = JM_font_ascender(font);
+    float dsc = JM_font_descender(font);
+    float c, s, fsize = ch->size;
+    float asc_dsc = asc - dsc + FLT_EPSILON;
+    if (asc_dsc >= 1 && g_small_glyph_heights == 0) {  // no problem
+       return ch->quad;
+    }
+    if (asc < 1e-3) {  // probably Tesseract glyphless font
+        dsc = -0.1f;
+        asc = 0.9f;
+        asc_dsc = 1.0f;
+    }
+
+    if (g_small_glyph_heights || asc_dsc < 1) {
+        dsc = dsc / asc_dsc;
+        asc = asc / asc_dsc;
+    }
+    asc_dsc = asc - dsc;
+    asc = asc * fsize / asc_dsc;
+    dsc = dsc * fsize / asc_dsc;
+
+    /* ------------------------------
+    Re-compute quad with the adjusted ascender / descender values:
+    Move ch->origin to (0,0) and de-rotate quad, then adjust the corners,
+    re-rotate and move back to ch->origin location.
+    ------------------------------ */
+    fz_matrix trm1, trm2, xlate1, xlate2;
+    fz_quad quad;
+    c = line->dir.x;  // cosine
+    s = line->dir.y;  // sine
+    trm1 = mupdf::ll_fz_make_matrix(c, -s, s, c, 0, 0);  // derotate
+    trm2 = mupdf::ll_fz_make_matrix(c, s, -s, c, 0, 0);  // rotate
+    if (c == -1) {  // left-right flip
+        trm1.d = 1;
+        trm2.d = 1;
+    }
+    xlate1 = mupdf::ll_fz_make_matrix(1, 0, 0, 1, -ch->origin.x, -ch->origin.y);
+    xlate2 = mupdf::ll_fz_make_matrix(1, 0, 0, 1, ch->origin.x, ch->origin.y);
+
+    quad = mupdf::ll_fz_transform_quad(ch->quad, xlate1);  // move origin to (0,0)
+    quad = mupdf::ll_fz_transform_quad(quad, trm1);  // de-rotate corners
+
+    // adjust vertical coordinates
+    if (c == 1 && quad.ul.y > 0) {  // up-down flip
+        quad.ul.y = asc;
+        quad.ur.y = asc;
+        quad.ll.y = dsc;
+        quad.lr.y = dsc;
+    } else {
+        quad.ul.y = -asc;
+        quad.ur.y = -asc;
+        quad.ll.y = -dsc;
+        quad.lr.y = -dsc;
+    }
+
+    // adjust horizontal coordinates that are too crazy:
+    // (1) left x must be >= 0
+    // (2) if bbox width is 0, lookup char advance in font.
+    if (quad.ll.x < 0) {
+        quad.ll.x = 0;
+        quad.ul.x = 0;
+    }
+    float cwidth = quad.lr.x - quad.ll.x;
+    if (cwidth < FLT_EPSILON) {
+        int glyph = mupdf::ll_fz_encode_character( font, ch->c);
+        if (glyph) {
+            float fwidth = mupdf::ll_fz_advance_glyph( font, glyph, line->wmode);
+            quad.lr.x = quad.ll.x + fwidth * fsize;
+            quad.ur.x = quad.lr.x;
+        }
+    }
+
+    quad = mupdf::ll_fz_transform_quad(quad, trm2);  // rotate back
+    quad = mupdf::ll_fz_transform_quad(quad, xlate2);  // translate back
+    return quad;
+}
+
+
+static fz_rect JM_char_bbox(fz_stext_line* line, fz_stext_char* ch)
+{
+    fz_rect r = mupdf::ll_fz_rect_from_quad(JM_char_quad( line, ch));
+    if (!line->wmode) {
+        return r;
+    }
+    if (r.y1 < r.y0 + ch->size) {
+        r.y0 = r.y1 - ch->size;
+    }
+    return r;
+}
+
+fz_rect JM_char_bbox(const mupdf::FzStextLine& line, const mupdf::FzStextChar& ch)
+{
+    return JM_char_bbox( line.m_internal, ch.m_internal);
+}
+
+static int JM_rects_overlap(const fz_rect a, const fz_rect b)
+{
+    if (0
+            || a.x0 >= b.x1
+            || a.y0 >= b.y1
+            || a.x1 <= b.x0
+            || a.y1 <= b.y0
+            )
+        return 0;
+    return 1;
+}
+
+//
+void JM_append_rune(fz_buffer *buff, int ch);
+
+//-----------------------------------------------------------------------------
+// Plain text output. An identical copy of fz_print_stext_page_as_text,
+// but lines within a block are concatenated by space instead a new-line
+// character (which else leads to 2 new-lines).
+//-----------------------------------------------------------------------------
+void JM_print_stext_page_as_text(mupdf::FzBuffer& res, mupdf::FzStextPage& page)
+{
+    fz_rect rect = page.m_internal->mediabox;
+
+    for (auto block: page)
+    {
+        if (block.m_internal->type == FZ_STEXT_BLOCK_TEXT)
+        {
+            for (auto line: block)
+            {
+                int last_char = 0;
+                for (auto ch: line)
+                {
+                    fz_rect chbbox = JM_char_bbox( line, ch);
+                    if (mupdf::ll_fz_is_infinite_rect(rect)
+                            || JM_rects_overlap(rect, chbbox)
+                            )
+                    {
+                        last_char = ch.m_internal->c;
+                        JM_append_rune(res.m_internal, last_char);
+                    }
+                }
+                if (last_char != 10 && last_char > 0)
+                {
+                    mupdf::ll_fz_append_string(res.m_internal, "\n");
+                }
+            }
+        }
+    }
+}
+
+
+
+// path_type is one of:
+#define FILL_PATH 1
+#define STROKE_PATH 2
+#define CLIP_PATH 3
+#define CLIP_STROKE_PATH 4
+
+// Every scissor of a clip is a sub rectangle of the preceding clip scissor if
+// the clip level is larger.
+static fz_rect compute_scissor(jm_lineart_device *dev)
+{
+    PyObject *last_scissor = NULL;
+    fz_rect scissor;
+    if (!dev->scissors) {
+        dev->scissors = PyList_New(0);
+    }
+    Py_ssize_t num_scissors = PyList_Size(dev->scissors);
+    if (num_scissors > 0) {
+        last_scissor = PyList_GET_ITEM(dev->scissors, num_scissors-1);
+        scissor = JM_rect_from_py(last_scissor);
+        scissor = fz_intersect_rect(scissor, dev->pathrect);
+    } else {
+        scissor = dev->pathrect;
+    }
+    LIST_APPEND_DROP(dev->scissors, JM_py_from_rect(scissor));
+    return scissor;
+}
+
+
+/*
+--------------------------------------------------------------------------
+Check whether the last 4 lines represent a quad.
+Because of how we count, the lines are a polyline already, i.e. last point
+of a line equals 1st point of next line.
+So we check for a polygon (last line's end point equals start point).
+If not true we return 0.
+--------------------------------------------------------------------------
+*/
+static int
+jm_checkquad(jm_lineart_device* dev)
+{
+    PyObject *items = PyDict_GetItem(dev->pathdict, dictkey_items);
+    Py_ssize_t i, len = PyList_Size(items);
+    float f[8]; // coordinates of the 4 corners
+    mupdf::FzPoint temp, lp; // line = (temp, lp)
+    PyObject *rect;
+    PyObject *line;
+    // fill the 8 floats in f, start from items[-4:]
+    for (i = 0; i < 4; i++) {  // store line start points
+        line = PyList_GET_ITEM(items, len - 4 + i);
+        temp = JM_point_from_py(PyTuple_GET_ITEM(line, 1));
+        f[i * 2] = temp.x;
+        f[i * 2 + 1] = temp.y;
+        lp = JM_point_from_py(PyTuple_GET_ITEM(line, 2));
+    }
+    if (lp.x != f[0] || lp.y != f[1]) {
+        // not a polygon!
+        //dev_linecount -= 1;
+        return 0;
+    }
+
+    // we have detected a quad
+    dev->linecount = 0;  // reset this
+    // a quad item is ("qu", (ul, ur, ll, lr)), where the tuple items
+    // are pairs of floats representing a quad corner each.
+    rect = PyTuple_New(2);
+    PyTuple_SET_ITEM(rect, 0, PyUnicode_FromString("qu"));
+    /* ----------------------------------------------------
+    * relationship of float array to quad points:
+    * (0, 1) = ul, (2, 3) = ll, (6, 7) = ur, (4, 5) = lr
+    ---------------------------------------------------- */
+    fz_quad q = fz_make_quad(f[0], f[1], f[6], f[7], f[2], f[3], f[4], f[5]);
+    PyTuple_SET_ITEM(rect, 1, JM_py_from_quad(q));
+    PyList_SetItem(items, len - 4, rect); // replace item -4 by rect
+    PyList_SetSlice(items, len - 3, len, NULL); // delete remaining 3 items
+    return 1;
+}
+
+
+/*
+--------------------------------------------------------------------------
+Check whether the last 3 path items represent a rectangle.
+Line 1 and 3 must be horizontal, line 2 must be vertical.
+Returns 1 if we have modified the path, otherwise 0.
+--------------------------------------------------------------------------
+*/
+static int
+jm_checkrect(jm_lineart_device* dev)
+{
+    dev->linecount = 0; // reset line count
+    long orientation = 0; // area orientation of rectangle
+    mupdf::FzPoint ll, lr, ur, ul;
+    mupdf::FzRect r;
+    PyObject *rect;
+    PyObject *line0, *line2;
+    PyObject *items = PyDict_GetItem(dev->pathdict, dictkey_items);
+    Py_ssize_t len = PyList_Size(items);
+
+    line0 = PyList_GET_ITEM(items, len - 3);
+    ll = JM_point_from_py(PyTuple_GET_ITEM(line0, 1));
+    lr = JM_point_from_py(PyTuple_GET_ITEM(line0, 2));
+    // no need to extract "line1"!
+    line2 = PyList_GET_ITEM(items, len - 1);
+    ur = JM_point_from_py(PyTuple_GET_ITEM(line2, 1));
+    ul = JM_point_from_py(PyTuple_GET_ITEM(line2, 2));
+
+    /*
+    ---------------------------------------------------------------------
+    Assumption:
+    When decomposing rects, MuPDF always starts with a horizontal line,
+    followed by a vertical line, followed by a horizontal line.
+    First line: (ll, lr), third line: (ul, ur).
+    If 1st line is below 3rd line, we record anti-clockwise (+1), else
+    clockwise (-1) orientation.
+    ---------------------------------------------------------------------
+    */
+    if (ll.y != lr.y ||
+        ll.x != ul.x ||
+        ur.y != ul.y ||
+        ur.x != lr.x) {
+        goto drop_out;  // not a rectangle
+    }
+
+    // we have a rect, replace last 3 "l" items by one "re" item.
+    if (ul.y < lr.y) {
+        r = fz_make_rect(ul.x, ul.y, lr.x, lr.y);
+        orientation = 1;
+    } else {
+        r = fz_make_rect(ll.x, ll.y, ur.x, ur.y);
+        orientation = -1;
+    }
+    rect = PyTuple_New(3);
+    PyTuple_SET_ITEM(rect, 0, PyUnicode_FromString("re"));
+    PyTuple_SET_ITEM(rect, 1, JM_py_from_rect(r));
+    PyTuple_SET_ITEM(rect, 2, PyLong_FromLong(orientation));
+    PyList_SetItem(items, len - 3, rect); // replace item -3 by rect
+    PyList_SetSlice(items, len - 2, len, NULL); // delete remaining 2 items
+    return 1;
+    drop_out:;
+    return 0;
+}
+
+static PyObject *
+jm_lineart_color(fz_colorspace *colorspace, const float *color)
+{
+    float rgb[3];
+    if (colorspace) {
+        mupdf::ll_fz_convert_color(colorspace, color, mupdf::ll_fz_device_rgb(),
+                         rgb, NULL, fz_default_color_params);
+        return Py_BuildValue("fff", rgb[0], rgb[1], rgb[2]);
+    }
+    return PyTuple_New(0);
+}
+
+static void
+trace_moveto(fz_context *ctx, void *dev_, float x, float y)
+{
+    jm_lineart_device* dev = (jm_lineart_device*) dev_;
+    dev->lastpoint = mupdf::ll_fz_transform_point(fz_make_point(x, y), dev->ctm);
+    if (mupdf::ll_fz_is_infinite_rect(dev->pathrect))
+    {
+        dev->pathrect = mupdf::ll_fz_make_rect(
+                dev->lastpoint.x,
+                dev->lastpoint.y,
+                dev->lastpoint.x,
+                dev->lastpoint.y
+                );
+    }
+    dev->firstpoint = dev->lastpoint;
+    dev->havemove = 1;
+    dev->linecount = 0;  // reset # of consec. lines
+}
+
+static void
+trace_lineto(fz_context *ctx, void *dev_, float x, float y)
+{
+    jm_lineart_device* dev = (jm_lineart_device*) dev_;
+    fz_point p1 = fz_transform_point(fz_make_point(x, y), dev->ctm);
+    dev->pathrect = fz_include_point_in_rect(dev->pathrect, p1);
+    PyObject *list = PyTuple_New(3);
+    PyTuple_SET_ITEM(list, 0, PyUnicode_FromString("l"));
+    PyTuple_SET_ITEM(list, 1, JM_py_from_point(dev->lastpoint));
+    PyTuple_SET_ITEM(list, 2, JM_py_from_point(p1));
+    dev->lastpoint = p1;
+    PyObject *items = PyDict_GetItem(dev->pathdict, dictkey_items);
+    LIST_APPEND_DROP(items, list);
+    dev->linecount += 1;  // counts consecutive lines
+    if (dev->linecount == 4 && dev->path_type != FILL_PATH) {  // shrink to "re" or "qu" item
+        jm_checkquad(dev);
+    }
+}
+
+static void
+trace_curveto(fz_context *ctx, void *dev_, float x1, float y1, float x2, float y2, float x3, float y3)
+{
+    jm_lineart_device* dev = (jm_lineart_device*) dev_;
+    dev->linecount = 0;  // reset # of consec. lines
+    fz_point p1 = fz_make_point(x1, y1);
+    fz_point p2 = fz_make_point(x2, y2);
+    fz_point p3 = fz_make_point(x3, y3);
+    p1 = fz_transform_point(p1, dev->ctm);
+    p2 = fz_transform_point(p2, dev->ctm);
+    p3 = fz_transform_point(p3, dev->ctm);
+    dev->pathrect = fz_include_point_in_rect(dev->pathrect, p1);
+    dev->pathrect = fz_include_point_in_rect(dev->pathrect, p2);
+    dev->pathrect = fz_include_point_in_rect(dev->pathrect, p3);
+
+    PyObject *list = PyTuple_New(5);
+    PyTuple_SET_ITEM(list, 0, PyUnicode_FromString("c"));
+    PyTuple_SET_ITEM(list, 1, JM_py_from_point(dev->lastpoint));
+    PyTuple_SET_ITEM(list, 2, JM_py_from_point(p1));
+    PyTuple_SET_ITEM(list, 3, JM_py_from_point(p2));
+    PyTuple_SET_ITEM(list, 4, JM_py_from_point(p3));
+    dev->lastpoint = p3;
+    PyObject *items = PyDict_GetItem(dev->pathdict, dictkey_items);
+    LIST_APPEND_DROP(items, list);
+}
+
+static void
+trace_close(fz_context *ctx, void *dev_)
+{
+    jm_lineart_device* dev = (jm_lineart_device*) dev_;
+    if (dev->linecount == 3) {
+        if (jm_checkrect(dev)) {
+            return;
+        }
+    }
+    dev->linecount = 0;  // reset # of consec. lines
+	if (dev->havemove) {
+		if (dev->firstpoint.x != dev->lastpoint.x || dev->firstpoint.y != dev->lastpoint.y) {
+			PyObject *list = PyTuple_New(3);
+			PyTuple_SET_ITEM(list, 0, PyUnicode_FromString("l"));
+			PyTuple_SET_ITEM(list, 1, JM_py_from_point(dev->lastpoint));
+			PyTuple_SET_ITEM(list, 2, JM_py_from_point(dev->firstpoint));
+			dev->lastpoint = dev->firstpoint;
+			PyObject *items = PyDict_GetItem(dev->pathdict, dictkey_items);
+			LIST_APPEND_DROP(items, list);
+		}
+		dev->havemove = 0;
+		DICT_SETITEMSTR_DROP(dev->pathdict, "closePath", JM_BOOL(0));
+	} else {
+		DICT_SETITEMSTR_DROP(dev->pathdict, "closePath", JM_BOOL(1));
+	}
+}
+
+static const fz_path_walker trace_path_walker =
+    {
+        trace_moveto,
+        trace_lineto,
+        trace_curveto,
+        trace_close
+    };
+
+/*
+---------------------------------------------------------------------
+Create the "items" list of the path dictionary
+* either create or empty the path dictionary
+* reset the end point of the path
+* reset count of consecutive lines
+* invoke fz_walk_path(), which create the single items
+* if no items detected, empty path dict again
+---------------------------------------------------------------------
+*/
+static void
+jm_lineart_path(jm_lineart_device *dev, const fz_path *path)
+{
+    dev->pathrect = fz_infinite_rect;
+    dev->linecount = 0;
+    dev->lastpoint = fz_make_point(0, 0);
+    dev->firstpoint = fz_make_point(0, 0);
+    if (dev->pathdict) {
+        Py_CLEAR(dev->pathdict);
+    }
+    dev->pathdict = PyDict_New();
+    DICT_SETITEM_DROP(dev->pathdict, dictkey_items, PyList_New(0));
+    mupdf::ll_fz_walk_path(path, &trace_path_walker, dev);
+    // Check if any items were added ...
+    if (!PyDict_GetItem(dev->pathdict, dictkey_items) || !PyList_Size(PyDict_GetItem(dev->pathdict, dictkey_items)))
+    {
+        Py_CLEAR(dev->pathdict);
+    }
+}
+
+//---------------------------------------------------------------------------
+// Append current path to list or merge into last path of the list.
+// (1) Append if first path, different item lists or not a 'stroke' version
+//     of previous path
+// (2) If new path has the same items, merge its content into previous path
+//     and change path["type"] to "fs".
+// (3) If "out" is callable, skip the previous and pass dictionary to it.
+//---------------------------------------------------------------------------
+static void
+// todo: remove `method` arg - it is dev->method.
+jm_append_merge(jm_lineart_device *dev)
+{
+    Py_ssize_t len;
+    int rc;
+    PyObject *prev;
+    PyObject *previtems;
+    PyObject *thisitems;
+    const char *thistype;
+    const char *prevtype;
+    if (PyCallable_Check(dev->out) || dev->method != Py_None) {  // function or method
+        goto callback;
+    }
+    len = PyList_Size(dev->out);  // len of output list so far
+    if (len == 0) {  // always append first path 
+        goto append;
+    }
+    thistype = PyUnicode_AsUTF8(PyDict_GetItem(dev->pathdict, dictkey_type));
+    if (strcmp(thistype, "s") != 0) {  // if not stroke, then append
+        goto append;
+    }
+    prev = PyList_GET_ITEM(dev->out, len - 1);  // get prev path
+    prevtype = PyUnicode_AsUTF8(PyDict_GetItem(prev, dictkey_type));
+    if (strcmp(prevtype, "f") != 0) {  // if previous not fill, append
+        goto append;
+    }
+    // last check: there must be the same list of items for "f" and "s".
+    previtems = PyDict_GetItem(prev, dictkey_items);
+    thisitems = PyDict_GetItem(dev->pathdict, dictkey_items);
+    if (PyObject_RichCompareBool(previtems, thisitems, Py_NE)) {
+        goto append;
+    }
+    rc = PyDict_Merge(prev, dev->pathdict, 0);  // merge, do not override
+    if (rc == 0) {
+        DICT_SETITEM_DROP(prev, dictkey_type, PyUnicode_FromString("fs"));
+        goto postappend;
+    } else {
+        messagef("could not merge stroke and fill path");
+        goto append;
+    }
+    append:;
+    //printf("Appending to dev->out. len(dev->out)=%zi\n", PyList_Size(dev->out));
+    PyList_Append(dev->out, dev->pathdict);
+    postappend:;
+    Py_CLEAR(dev->pathdict);
+    return;
+
+    callback:;  // callback function or method
+    PyObject *resp = NULL;
+    if (dev->method == Py_None) {
+        resp = PyObject_CallFunctionObjArgs(dev->out, dev->pathdict, NULL);
+    } else {
+        resp = PyObject_CallMethodObjArgs(dev->out, dev->method, dev->pathdict, NULL);
+    }
+    if (resp) {
+        Py_DECREF(resp);
+    } else {
+        messagef("calling cdrawings callback function/method failed!");
+        PyErr_Clear();
+    }
+    Py_CLEAR(dev->pathdict);
+    return;
+}
+
+static void
+jm_lineart_fill_path(fz_context *ctx, fz_device *dev_, const fz_path *path,
+                int even_odd, fz_matrix ctm, fz_colorspace *colorspace,
+                const float *color, float alpha, fz_color_params color_params)
+{
+    jm_lineart_device *dev = (jm_lineart_device *) dev_;
+    //printf("extra.jm_lineart_fill_path(): dev->seqno=%zi\n", dev->seqno);
+    dev->ctm = ctm; //fz_concat(ctm, trace_device_ptm);
+    dev->path_type = FILL_PATH;
+    jm_lineart_path(dev, path);
+    if (!dev->pathdict) {
+        return;
+    }
+    DICT_SETITEM_DROP(dev->pathdict, dictkey_type, PyUnicode_FromString("f"));
+    DICT_SETITEMSTR_DROP(dev->pathdict, "even_odd", JM_BOOL(even_odd));
+    DICT_SETITEMSTR_DROP(dev->pathdict, "fill_opacity", Py_BuildValue("f", alpha));
+    DICT_SETITEMSTR_DROP(dev->pathdict, "fill", jm_lineart_color(colorspace, color));
+    DICT_SETITEM_DROP(dev->pathdict, dictkey_rect, JM_py_from_rect(dev->pathrect));
+    DICT_SETITEMSTR_DROP(dev->pathdict, "seqno", PyLong_FromSize_t(dev->seqno));
+    DICT_SETITEMSTR_DROP(dev->pathdict, "layer", JM_UnicodeFromStr(dev->layer_name));
+    if (dev->clips)    {
+        DICT_SETITEMSTR_DROP(dev->pathdict, "level", PyLong_FromLong(dev->depth));
+    }
+    jm_append_merge(dev);
+    dev->seqno += 1;
+}
+
+static void
+jm_lineart_stroke_path(fz_context *ctx, fz_device *dev_, const fz_path *path,
+                const fz_stroke_state *stroke, fz_matrix ctm,
+                fz_colorspace *colorspace, const float *color, float alpha,
+                fz_color_params color_params)
+{
+    jm_lineart_device *dev = (jm_lineart_device *)dev_;
+    //printf("extra.jm_lineart_stroke_path(): dev->seqno=%zi\n", dev->seqno);
+    int i;
+    dev->pathfactor = 1;
+    if (ctm.a != 0 && fz_abs(ctm.a) == fz_abs(ctm.d)) {
+        dev->pathfactor = fz_abs(ctm.a);
+    } else {
+        if (ctm.b != 0 && fz_abs(ctm.b) == fz_abs(ctm.c)) {
+            dev->pathfactor = fz_abs(ctm.b);
+        }
+    }
+    dev->ctm = ctm; // fz_concat(ctm, trace_device_ptm);
+    dev->path_type = STROKE_PATH;
+
+    jm_lineart_path(dev, path);
+    if (!dev->pathdict) {
+        return;
+    }
+    DICT_SETITEM_DROP(dev->pathdict, dictkey_type, PyUnicode_FromString("s"));
+    DICT_SETITEMSTR_DROP(dev->pathdict, "stroke_opacity", Py_BuildValue("f", alpha));
+    DICT_SETITEMSTR_DROP(dev->pathdict, "color", jm_lineart_color(colorspace, color));
+    DICT_SETITEM_DROP(dev->pathdict, dictkey_width, Py_BuildValue("f", dev->pathfactor * stroke->linewidth));
+    DICT_SETITEMSTR_DROP(dev->pathdict, "lineCap", Py_BuildValue("iii", stroke->start_cap, stroke->dash_cap, stroke->end_cap));
+    DICT_SETITEMSTR_DROP(dev->pathdict, "lineJoin", Py_BuildValue("f", dev->pathfactor * stroke->linejoin));
+    if (!PyDict_GetItemString(dev->pathdict, "closePath")) {
+        DICT_SETITEMSTR_DROP(dev->pathdict, "closePath", JM_BOOL(0));
+    }
+
+    // output the "dashes" string
+    if (stroke->dash_len) {
+        mupdf::FzBuffer buff(256);
+        mupdf::fz_append_string(buff, "[ ");  // left bracket
+        for (i = 0; i < stroke->dash_len; i++) {
+            fz_append_printf(ctx, buff.m_internal, "%g ", dev->pathfactor * stroke->dash_list[i]);
+        }
+        fz_append_printf(ctx, buff.m_internal, "] %g", dev->pathfactor * stroke->dash_phase);
+        DICT_SETITEMSTR_DROP(dev->pathdict, "dashes", JM_EscapeStrFromBuffer(buff));
+    } else {
+        DICT_SETITEMSTR_DROP(dev->pathdict, "dashes", PyUnicode_FromString("[] 0"));
+    }
+
+    DICT_SETITEM_DROP(dev->pathdict, dictkey_rect, JM_py_from_rect(dev->pathrect));
+    DICT_SETITEMSTR_DROP(dev->pathdict, "layer", JM_UnicodeFromStr(dev->layer_name));
+    DICT_SETITEMSTR_DROP(dev->pathdict, "seqno", PyLong_FromSize_t(dev->seqno));
+    if (dev->clips) {
+        DICT_SETITEMSTR_DROP(dev->pathdict, "level", PyLong_FromLong(dev->depth));
+    }
+    // output the dict - potentially merging it with a previous fill_path twin
+    jm_append_merge(dev);
+    dev->seqno += 1;
+}
+
+static void
+jm_lineart_clip_path(fz_context *ctx, fz_device *dev_, const fz_path *path, int even_odd, fz_matrix ctm, fz_rect scissor)
+{
+    jm_lineart_device *dev = (jm_lineart_device *)dev_;
+    if (!dev->clips) return;
+    dev->ctm = ctm; //fz_concat(ctm, trace_device_ptm);
+    dev->path_type = CLIP_PATH;
+    jm_lineart_path(dev, path);
+	if (!dev->pathdict) {
+		return;
+	}
+    DICT_SETITEM_DROP(dev->pathdict, dictkey_type, PyUnicode_FromString("clip"));
+    DICT_SETITEMSTR_DROP(dev->pathdict, "even_odd", JM_BOOL(even_odd));
+    if (!PyDict_GetItemString(dev->pathdict, "closePath")) {
+        DICT_SETITEMSTR_DROP(dev->pathdict, "closePath", JM_BOOL(0));
+    }
+    DICT_SETITEMSTR_DROP(dev->pathdict, "scissor", JM_py_from_rect(compute_scissor(dev)));
+    DICT_SETITEMSTR_DROP(dev->pathdict, "level", PyLong_FromLong(dev->depth));
+    DICT_SETITEMSTR_DROP(dev->pathdict, "layer", JM_UnicodeFromStr(dev->layer_name));
+    jm_append_merge(dev);
+    dev->depth++;
+}
+
+static void
+jm_lineart_clip_stroke_path(fz_context *ctx, fz_device *dev_, const fz_path *path, const fz_stroke_state *stroke, fz_matrix ctm, fz_rect scissor)
+{
+    jm_lineart_device *dev = (jm_lineart_device *)dev_;
+    if (!dev->clips) return;
+    dev->ctm = ctm; //fz_concat(ctm, trace_device_ptm);
+    dev->path_type = CLIP_STROKE_PATH;
+    jm_lineart_path(dev, path);
+	if (!dev->pathdict) {
+		return;
+	}
+    DICT_SETITEM_DROP(dev->pathdict, dictkey_type, PyUnicode_FromString("clip"));
+    DICT_SETITEMSTR_DROP(dev->pathdict, "even_odd", Py_BuildValue("s", NULL));
+    if (!PyDict_GetItemString(dev->pathdict, "closePath")) {
+        DICT_SETITEMSTR_DROP(dev->pathdict, "closePath", JM_BOOL(0));
+    }
+    DICT_SETITEMSTR_DROP(dev->pathdict, "scissor", JM_py_from_rect(compute_scissor(dev)));
+    DICT_SETITEMSTR_DROP(dev->pathdict, "level", PyLong_FromLong(dev->depth));
+    DICT_SETITEMSTR_DROP(dev->pathdict, "layer", JM_UnicodeFromStr(dev->layer_name));
+    jm_append_merge(dev);
+    dev->depth++;
+}
+
+
+static void
+jm_lineart_clip_stroke_text(fz_context *ctx, fz_device *dev_, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm, fz_rect scissor)
+{
+   jm_lineart_device *dev = (jm_lineart_device *)dev_;
+   if (!dev->clips) return;
+   compute_scissor(dev);
+   dev->depth++;
+}
+
+static void
+jm_lineart_clip_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm, fz_rect scissor)
+{
+   jm_lineart_device *dev = (jm_lineart_device *)dev_;
+   if (!dev->clips) return;
+   compute_scissor(dev);
+   dev->depth++;
+}
+
+static void
+jm_lineart_clip_image_mask(fz_context *ctx, fz_device *dev_, fz_image *image, fz_matrix ctm, fz_rect scissor)
+{
+   jm_lineart_device *dev = (jm_lineart_device *)dev_;
+   if (!dev->clips) return;
+   compute_scissor(dev);
+   dev->depth++;
+}
+ 
+static void
+jm_lineart_pop_clip(fz_context *ctx, fz_device *dev_)
+{
+    jm_lineart_device *dev = (jm_lineart_device *)dev_;
+    if (!dev->clips) return;
+    if (!dev->scissors) return;
+    Py_ssize_t len = PyList_Size(dev->scissors);
+    if (len < 1) return;
+    PyList_SetSlice(dev->scissors, len - 1, len, NULL);
+    dev->depth--;
+}
+
+
+static void
+jm_lineart_begin_group(fz_context *ctx, fz_device *dev_, fz_rect bbox, fz_colorspace *cs, int isolated, int knockout, int blendmode, float alpha)
+{
+    jm_lineart_device *dev = (jm_lineart_device *)dev_;
+    if (!dev->clips) return;
+    dev->pathdict = Py_BuildValue("{s:s,s:N,s:N,s:N,s:s,s:f,s:i,s:N}",
+                        "type", "group",
+                        "rect", JM_py_from_rect(bbox),
+                        "isolated", JM_BOOL(isolated),
+                        "knockout", JM_BOOL(knockout),
+                        "blendmode", fz_blendmode_name(blendmode),
+                        "opacity", alpha,
+                        "level", dev->depth,
+                        "layer", JM_UnicodeFromStr(dev->layer_name)
+                    );
+    jm_append_merge(dev);
+    dev->depth++;
+}
+
+static void
+jm_lineart_end_group(fz_context *ctx, fz_device *dev_)
+{
+    jm_lineart_device *dev = (jm_lineart_device *)dev_;
+    if (!dev->clips) return;
+    dev->depth--;
+}
+
+static void jm_lineart_fill_text(fz_context *ctx, fz_device *dev, const fz_text *, fz_matrix, fz_colorspace *, const float *color, float alpha, fz_color_params)
+{
+    jm_increase_seqno(ctx, dev);
+}
+
+static void jm_lineart_stroke_text(fz_context *ctx, fz_device *dev, const fz_text *, const fz_stroke_state *, fz_matrix, fz_colorspace *, const float *color, float alpha, fz_color_params)
+{
+    jm_increase_seqno(ctx, dev);
+}
+
+static void jm_lineart_fill_shade(fz_context *ctx, fz_device *dev, fz_shade *shd, fz_matrix ctm, float alpha, fz_color_params color_params)
+{
+    jm_increase_seqno(ctx, dev);
+}
+
+static void jm_lineart_fill_image(fz_context *ctx, fz_device *dev, fz_image *img, fz_matrix ctm, float alpha, fz_color_params color_params)
+{
+    jm_increase_seqno(ctx, dev);
+}
+
+static void jm_lineart_fill_image_mask(fz_context *ctx, fz_device *dev, fz_image *img, fz_matrix ctm, fz_colorspace *, const float *color, float alpha, fz_color_params color_params)
+{
+    jm_increase_seqno(ctx, dev);
+}
+
+static void jm_lineart_ignore_text(fz_context *ctx, fz_device *dev, const fz_text *, fz_matrix)
+{
+    jm_increase_seqno(ctx, dev);
+}
+
+
+//-------------------------------------------------------------------
+// LINEART device for Python method Page.get_cdrawings()
+//-------------------------------------------------------------------
+mupdf::FzDevice JM_new_lineart_device(PyObject *out, int clips, PyObject *method)
+{
+    //printf("extra.JM_new_lineart_device()\n");
+    jm_lineart_device* dev = (jm_lineart_device*) mupdf::ll_fz_new_device_of_size(sizeof(jm_lineart_device));
+
+    dev->super.close_device = NULL;
+    dev->super.drop_device = jm_lineart_drop_device;
+    dev->super.fill_path = jm_lineart_fill_path;
+    dev->super.stroke_path = jm_lineart_stroke_path;
+    dev->super.clip_path = jm_lineart_clip_path;
+    dev->super.clip_stroke_path = jm_lineart_clip_stroke_path;
+
+    dev->super.fill_text = jm_lineart_fill_text;
+    dev->super.stroke_text = jm_lineart_stroke_text;
+    dev->super.clip_text = jm_lineart_clip_text;
+    dev->super.clip_stroke_text = jm_lineart_clip_stroke_text;
+    dev->super.ignore_text = jm_lineart_ignore_text;
+
+    dev->super.fill_shade = jm_lineart_fill_shade;
+    dev->super.fill_image = jm_lineart_fill_image;
+    dev->super.fill_image_mask = jm_lineart_fill_image_mask;
+    dev->super.clip_image_mask = jm_lineart_clip_image_mask;
+
+    dev->super.pop_clip = jm_lineart_pop_clip;
+
+    dev->super.begin_mask = NULL;
+    dev->super.end_mask = NULL;
+    dev->super.begin_group = jm_lineart_begin_group;
+    dev->super.end_group = jm_lineart_end_group;
+
+    dev->super.begin_tile = NULL;
+    dev->super.end_tile = NULL;
+
+    dev->super.begin_layer = jm_lineart_begin_layer;
+    dev->super.end_layer = jm_lineart_end_layer;
+
+    dev->super.begin_structure = NULL;
+    dev->super.end_structure = NULL;
+
+    dev->super.begin_metatext = NULL;
+    dev->super.end_metatext = NULL;
+
+    dev->super.render_flags = NULL;
+    dev->super.set_default_colorspaces = NULL;
+
+    if (PyList_Check(out)) {
+        Py_INCREF(out);
+    }
+    Py_INCREF(method);
+    dev->out = out;
+    dev->seqno = 0;
+    dev->depth = 0;
+    dev->clips = clips;
+    dev->method = method;
+    dev->pathdict = nullptr;
+
+    return mupdf::FzDevice(&dev->super);
+}
+
+PyObject* get_cdrawings(mupdf::FzPage& page, PyObject *extended=NULL, PyObject *callback=NULL, PyObject *method=NULL)
+{
+    //fz_page *page = (fz_page *) $self;
+    //fz_device *dev = NULL;
+    PyObject *rc = NULL;
+    int clips = PyObject_IsTrue(extended);
+
+    mupdf::FzDevice dev;
+    if (PyCallable_Check(callback) || method != Py_None) {
+        dev = JM_new_lineart_device(callback, clips, method);
+    } else {
+        rc = PyList_New(0);
+        dev = JM_new_lineart_device(rc, clips, method);
+    }
+    mupdf::FzRect prect = mupdf::fz_bound_page(page);
+    ((jm_lineart_device*) dev.m_internal)->ptm = mupdf::ll_fz_make_matrix(1, 0, 0, -1, 0, prect.y1);
+    
+    mupdf::FzCookie cookie;
+    mupdf::FzMatrix identity;
+    mupdf::fz_run_page( page, dev, *identity.internal(), cookie);
+    mupdf::fz_close_device( dev);
+    if (PyCallable_Check(callback) || method != Py_None)
+    {
+        Py_RETURN_NONE;
+    }
+    return rc;
+}
+
+
+//---------------------------------------------------------------------------
+// APPEND non-ascii runes in unicode escape format to fz_buffer
+//---------------------------------------------------------------------------
+void JM_append_rune(fz_buffer *buff, int ch)
+{
+    char text[32];
+    if (ch == 92)  // prevent accidental "\u", "\U" sequences
+    {
+        mupdf::ll_fz_append_string(buff, "\\u005c");
+    }
+    else if ((ch >= 32 && ch <= 127) || ch == 10)
+    {
+        mupdf::ll_fz_append_byte(buff, ch);
+    }
+    else if (ch >= 0xd800 && ch <= 0xdfff)  // orphaned surrogate Unicodes
+    {
+        mupdf::ll_fz_append_string(buff, "\\ufffd");
+    }
+    else if (ch <= 0xffff)
+    {
+        // 4 hex digits
+        snprintf(text, sizeof(text), "\\u%04x", ch);
+        mupdf::ll_fz_append_string(buff, text);
+    }
+    else
+    {
+        // 8 hex digits
+        snprintf(text, sizeof(text), "\\U%08x", ch);
+        mupdf::ll_fz_append_string(buff, text);
+    }
+}
+
+
+mupdf::FzRect JM_make_spanlist(
+        PyObject *line_dict,
+        mupdf::FzStextLine& line,
+        int raw,
+        mupdf::FzBuffer& buff,
+        mupdf::FzRect& tp_rect
+        )
+{
+    PyObject *span = NULL, *char_list = NULL, *char_dict;
+    PyObject *span_list = PyList_New(0);
+    mupdf::fz_clear_buffer(buff);
+    fz_rect span_rect = fz_empty_rect;
+    fz_rect line_rect = fz_empty_rect;
+    fz_point span_origin = {0, 0};
+    struct char_style
+    {
+        float size = -1;
+        unsigned flags = 0;
+        
+        #if MUPDF_VERSION_GE(1, 25, 2)
+        /* From mupdf:include/mupdf/fitz/structured-text.h:fz_stext_char::flags, which
+        uses anonymous enum values:
+        FZ_STEXT_STRIKEOUT = 1,
+        FZ_STEXT_UNDERLINE = 2,
+        FZ_STEXT_SYNTHETIC = 4,
+        FZ_STEXT_FILLED = 16,
+        FZ_STEXT_STROKED = 32,
+        FZ_STEXT_CLIPPED = 64
+        */
+        unsigned char_flags = 0;
+        #endif
+        
+        const char *font = "";
+        unsigned argb = 0;
+        float asc = 0;
+        float desc = 0;
+        uint16_t bidi = 0;
+    };
+    char_style old_style;
+    char_style style;
+
+    for (mupdf::FzStextChar ch: line)
+    {
+        fz_rect r = JM_char_bbox(line, ch);
+        if (!JM_rects_overlap(*tp_rect.internal(), r) && !fz_is_infinite_rect(tp_rect))
+        {
+            continue;
+        }
+        /* Info from:
+        detect_super_script()
+        fz_font_is_italic()
+        fz_font_is_serif()
+        fz_font_is_monospaced()
+        fz_font_is_bold()
+        */
+        int flags = JM_char_font_flags( ch.m_internal->font, line.m_internal, ch.m_internal);
+        fz_point origin = ch.m_internal->origin;
+        style.size = ch.m_internal->size;
+        style.flags = flags;
+        #if MUPDF_VERSION_GE(1, 25, 2)
+        /* FZ_STEXT_SYNTHETIC is per-char, not per-span. */
+        style.char_flags = ch.m_internal->flags & ~FZ_STEXT_SYNTHETIC;
+        #endif
+        style.font = JM_font_name(ch.m_internal->font);
+        #if MUPDF_VERSION_GE(1, 25, 0)
+            style.argb = ch.m_internal->argb;
+        #else
+            style.argb = ch.m_internal->color;
+        #endif
+        style.asc = JM_font_ascender(ch.m_internal->font);
+        style.desc = JM_font_descender(ch.m_internal->font);
+
+        if (0
+                || style.size != old_style.size
+                || style.flags != old_style.flags
+                #if MUPDF_VERSION_GE(1, 25, 2)
+                || style.char_flags != old_style.char_flags
+                #endif
+                || style.argb != old_style.argb
+                || strcmp(style.font, old_style.font) != 0
+                || style.bidi != old_style.bidi
+                )
+        {
+            if (old_style.size >= 0)
+            {
+                // not first one, output previous
+                if (raw)
+                {
+                    // put character list in the span
+                    DICT_SETITEM_DROP(span, dictkey_chars, char_list);
+                    char_list = NULL;
+                }
+                else
+                {
+                    // put text string in the span
+                    DICT_SETITEM_DROP(span, dictkey_text, JM_EscapeStrFromBuffer(buff));
+                    mupdf::fz_clear_buffer(buff);
+                }
+
+                DICT_SETITEM_DROP(span, dictkey_origin, JM_py_from_point(span_origin));
+                DICT_SETITEM_DROP(span, dictkey_bbox, JM_py_from_rect(span_rect));
+                line_rect = mupdf::ll_fz_union_rect(line_rect, span_rect);
+                LIST_APPEND_DROP(span_list, span);
+                span = NULL;
+            }
+
+            span = PyDict_New();
+            float asc = style.asc, desc = style.desc;
+            if (style.asc < 1e-3)
+            {
+                asc = 0.9f;
+                desc = -0.1f;
+            }
+
+            DICT_SETITEM_DROP(span, dictkey_size, Py_BuildValue("f", style.size));
+            DICT_SETITEM_DROP(span, dictkey_flags, Py_BuildValue("I", style.flags));
+            DICT_SETITEM_DROP(span, dictkey_bidi, Py_BuildValue("I", style.bidi));
+            #if MUPDF_VERSION_GE(1, 25, 2)
+            DICT_SETITEM_DROP(span, dictkey_char_flags, Py_BuildValue("I", style.char_flags));
+            #endif
+            DICT_SETITEM_DROP(span, dictkey_font, JM_EscapeStrFromStr(style.font));
+            DICT_SETITEM_DROP(span, dictkey_color, Py_BuildValue("I", style.argb & 0xffffff));
+            #if MUPDF_VERSION_GE(1, 25, 0)
+            DICT_SETITEMSTR_DROP(span, "alpha", Py_BuildValue("I", style.argb >> 24));
+            #endif
+            DICT_SETITEMSTR_DROP(span, "ascender", Py_BuildValue("f", asc));
+            DICT_SETITEMSTR_DROP(span, "descender", Py_BuildValue("f", desc));
+
+            old_style = style;
+            span_rect = r;
+            span_origin = origin;
+
+        }
+        span_rect = mupdf::ll_fz_union_rect(span_rect, r);
+
+        if (raw)
+        {
+            // make and append a char dict
+            char_dict = PyDict_New();
+            DICT_SETITEM_DROP(char_dict, dictkey_origin, JM_py_from_point(ch.m_internal->origin));
+
+            DICT_SETITEM_DROP(char_dict, dictkey_bbox, JM_py_from_rect(r));
+
+            DICT_SETITEM_DROP(char_dict, dictkey_c, Py_BuildValue("C", ch.m_internal->c));
+            DICT_SETITEMSTR_DROP(char_dict, "synthetic", Py_BuildValue("O", (ch.m_internal->flags & FZ_STEXT_SYNTHETIC) ? Py_True : Py_False));
+            if (!char_list)
+            {
+                char_list = PyList_New(0);
+            }
+            LIST_APPEND_DROP(char_list, char_dict);
+        }
+        else
+        {
+            // add character byte to buffer
+            JM_append_rune(buff.m_internal, ch.m_internal->c);
+        }
+    }
+    // all characters processed, now flush remaining span
+    if (span)
+    {
+        if (raw)
+        {
+            DICT_SETITEM_DROP(span, dictkey_chars, char_list);
+            char_list = NULL;
+        }
+        else
+        {
+            DICT_SETITEM_DROP(span, dictkey_text, JM_EscapeStrFromBuffer(buff));
+            mupdf::fz_clear_buffer(buff);
+        }
+        DICT_SETITEM_DROP(span, dictkey_origin, JM_py_from_point(span_origin));
+        DICT_SETITEM_DROP(span, dictkey_bbox, JM_py_from_rect(span_rect));
+
+        if (!fz_is_empty_rect(span_rect))
+        {
+            LIST_APPEND_DROP(span_list, span);
+            line_rect = fz_union_rect(line_rect, span_rect);
+        }
+        else
+        {
+            Py_DECREF(span);
+        }
+        span = NULL;
+    }
+    if (!mupdf::fz_is_empty_rect(line_rect))
+    {
+        DICT_SETITEM_DROP(line_dict, dictkey_spans, span_list);
+    }
+    else
+    {
+        DICT_SETITEM_DROP(line_dict, dictkey_spans, span_list);
+    }
+    return line_rect;
+}
+
+//-----------------------------------------------------------------------------
+// Functions for wordlist output
+//-----------------------------------------------------------------------------
+int JM_append_word(
+        PyObject* lines,
+        fz_buffer* buff,
+        fz_rect* wbbox,
+        int block_n,
+        int line_n,
+        int word_n
+        )
+{
+    PyObject* s = JM_EscapeStrFromBuffer(buff);
+    PyObject* litem = Py_BuildValue(
+            "ffffOiii",
+            wbbox->x0,
+            wbbox->y0,
+            wbbox->x1,
+            wbbox->y1,
+            s,
+            block_n,
+            line_n,
+            word_n
+            );
+    LIST_APPEND_DROP(lines, litem);
+    Py_DECREF(s);
+    *wbbox = fz_empty_rect;
+    return word_n + 1;  // word counter
+}
+
+PyObject* extractWORDS(mupdf::FzStextPage& this_tpage, PyObject *delimiters)
+{
+    int block_n = -1;
+    fz_rect wbbox = fz_empty_rect;  // word bbox
+    fz_rect tp_rect = this_tpage.m_internal->mediabox;
+
+    PyObject *lines = NULL;
+    mupdf::FzBuffer buff = mupdf::fz_new_buffer(64);
+    lines = PyList_New(0);
+    for (mupdf::FzStextBlock block: this_tpage)
+    {
+        block_n++;
+        if (block.m_internal->type != FZ_STEXT_BLOCK_TEXT)
+        {
+            continue;
+        }
+        int line_n = -1;
+        for (mupdf::FzStextLine line: block)
+        {
+            line_n++;
+            int word_n = 0;                 // word counter per line
+            mupdf::fz_clear_buffer(buff);   // reset word buffer
+            size_t buflen = 0;              // reset char counter
+            int last_char_rtl = 0;          // was last character RTL?
+            for (mupdf::FzStextChar ch: line)
+            {
+                mupdf::FzRect cbbox = JM_char_bbox(line, ch);
+                if (!JM_rects_overlap(tp_rect, *cbbox.internal()) && !fz_is_infinite_rect(tp_rect))
+                {
+                    continue;
+                }
+
+                int word_delimiter = JM_is_word_delimiter(ch.m_internal->c, delimiters);
+                int this_char_rtl = JM_is_rtl_char(ch.m_internal->c);
+                if (word_delimiter || this_char_rtl != last_char_rtl)
+                {
+                    if (buflen == 0 && word_delimiter)
+                    {
+                        continue;  // skip delimiters at line start
+                    }
+                    if (!fz_is_empty_rect(wbbox))
+                    {
+                        word_n = JM_append_word(
+                                lines,
+                                buff.m_internal,
+                                &wbbox,
+                                block_n,
+                                line_n,
+                                word_n
+                                );
+                    }
+                    mupdf::fz_clear_buffer(buff);
+                    buflen = 0;  // reset char counter
+                    if (word_delimiter) continue;
+                }
+                // append one unicode character to the word
+                JM_append_rune(buff.m_internal, ch.m_internal->c);
+                last_char_rtl = this_char_rtl;
+                buflen++;
+                // enlarge word bbox
+                wbbox = fz_union_rect(wbbox, JM_char_bbox(line, ch));
+            }
+            if (buflen && !fz_is_empty_rect(wbbox))
+            {
+                word_n = JM_append_word(
+                        lines,
+                        buff.m_internal,
+                        &wbbox,
+                        block_n,
+                        line_n,
+                        word_n
+                        );
+            }
+            mupdf::fz_clear_buffer(buff);
+            buflen = 0;
+        }
+    }
+    return lines;
+}
+
+
+
+struct ScopedPyObject
+/* PyObject* wrapper, destructor calls Py_CLEAR() unless `release()` has been
+called. */
+{
+    ScopedPyObject(PyObject* rhs=nullptr)
+    :
+    m_pyobject(rhs)
+    {}
+    
+    PyObject*& get()
+    {
+        return m_pyobject;
+    }
+    
+    ScopedPyObject& operator= (PyObject* rhs)
+    {
+        Py_CLEAR(m_pyobject);
+        m_pyobject = rhs;
+        return *this;
+    }
+    
+    PyObject* release()
+    {
+        PyObject* ret = m_pyobject;
+        m_pyobject = nullptr;
+        return ret;
+    }
+    ~ScopedPyObject()
+    {
+        Py_CLEAR(m_pyobject);
+    }
+    
+    PyObject*   m_pyobject = nullptr;
+};
+
+
+PyObject* extractBLOCKS(mupdf::FzStextPage& self)
+{
+    fz_stext_page *this_tpage = self.m_internal;
+    fz_rect tp_rect = this_tpage->mediabox;
+    mupdf::FzBuffer res(1024);
+    ScopedPyObject lines( PyList_New(0));
+    int block_n = -1;
+    for (fz_stext_block* block = this_tpage->first_block; block; block = block->next)
+    {
+        ScopedPyObject text;
+        block_n++;
+        fz_rect blockrect = fz_empty_rect;
+        if (block->type == FZ_STEXT_BLOCK_TEXT)
+        {
+            mupdf::fz_clear_buffer(res);  // set text buffer to empty
+            int line_n = -1;
+            int last_char = 0;
+            (void) line_n;  /* Not actually used, but keeping in the code for now. */
+            for (fz_stext_line* line = block->u.t.first_line; line; line = line->next)
+            {
+                line_n++;
+                fz_rect linerect = fz_empty_rect;
+                for (fz_stext_char* ch = line->first_char; ch; ch = ch->next)
+                {
+                    fz_rect cbbox = JM_char_bbox(line, ch);
+                    if (!JM_rects_overlap(tp_rect, cbbox) && !fz_is_infinite_rect(tp_rect))
+                    {
+                        continue;
+                    }
+                    JM_append_rune(res.m_internal, ch->c);
+                    last_char = ch->c;
+                    linerect = fz_union_rect(linerect, cbbox);
+                }
+                if (last_char != 10 && !fz_is_empty_rect(linerect))
+                {
+                    mupdf::fz_append_byte(res, 10);
+                }
+                blockrect = fz_union_rect(blockrect, linerect);
+            }
+            text = JM_EscapeStrFromBuffer(res);
+        }
+        else if (JM_rects_overlap(tp_rect, block->bbox) || fz_is_infinite_rect(tp_rect))
+        {
+            fz_image *img = block->u.i.image;
+            fz_colorspace *cs = img->colorspace;
+            text = PyUnicode_FromFormat(
+                    "<image: %s, width: %d, height: %d, bpc: %d>",
+                    mupdf::ll_fz_colorspace_name(cs),
+                    img->w,
+                    img->h,
+                    img->bpc
+                    );
+            blockrect = fz_union_rect(blockrect, block->bbox);
+        }
+        if (!fz_is_empty_rect(blockrect))
+        {
+            ScopedPyObject litem = PyTuple_New(7);
+            PyTuple_SET_ITEM(litem.get(), 0, Py_BuildValue("f", blockrect.x0));
+            PyTuple_SET_ITEM(litem.get(), 1, Py_BuildValue("f", blockrect.y0));
+            PyTuple_SET_ITEM(litem.get(), 2, Py_BuildValue("f", blockrect.x1));
+            PyTuple_SET_ITEM(litem.get(), 3, Py_BuildValue("f", blockrect.y1));
+            PyTuple_SET_ITEM(litem.get(), 4, Py_BuildValue("O", text.get()));
+            PyTuple_SET_ITEM(litem.get(), 5, Py_BuildValue("i", block_n));
+            PyTuple_SET_ITEM(litem.get(), 6, Py_BuildValue("i", block->type));
+            LIST_APPEND(lines.get(), litem.get());
+        }
+    }
+    return lines.release();
+}
+
+#define EMPTY_STRING PyUnicode_FromString("")
+
+static PyObject *JM_UnicodeFromStr(const char *c)
+{
+    if (!c) return EMPTY_STRING;
+    PyObject *val = Py_BuildValue("s", c);
+    if (!val) {
+        val = EMPTY_STRING;
+        PyErr_Clear();
+    }
+    return val;
+}
+
+PyObject* link_uri(mupdf::FzLink& link)
+{
+    return JM_UnicodeFromStr( link.m_internal->uri);
+}
+
+fz_stext_page* page_get_textpage(
+        mupdf::FzPage& self,
+        PyObject* clip,
+        int flags,
+        PyObject* matrix
+        )
+{
+    fz_context* ctx = mupdf::internal_context_get();
+    fz_stext_page *tpage=NULL;
+    fz_page *page = self.m_internal;
+    fz_device *dev = NULL;
+    fz_stext_options options;
+    memset(&options, 0, sizeof options);
+    options.flags = flags;
+    fz_try(ctx) {
+        // Default to page's rect if `clip` not specified, for #2048.
+        fz_rect rect = (clip==Py_None) ? fz_bound_page(ctx, page) : JM_rect_from_py(clip);
+        fz_matrix ctm = JM_matrix_from_py(matrix);
+        tpage = fz_new_stext_page(ctx, rect);
+        dev = fz_new_stext_device(ctx, tpage, &options);
+        fz_run_page(ctx, page, dev, ctm, NULL);
+        fz_close_device(ctx, dev);
+    }
+    fz_always(ctx) {
+        fz_drop_device(ctx, dev);
+    }
+    fz_catch(ctx) {
+        mupdf::internal_throw_exception(ctx);
+    }
+    return tpage;
+}
+
+// return extension for pymupdf image type
+const char *JM_image_extension(int type)
+{
+    switch (type) {
+        case(FZ_IMAGE_RAW): return "raw";
+        case(FZ_IMAGE_FLATE): return "flate";
+        case(FZ_IMAGE_LZW): return "lzw";
+        case(FZ_IMAGE_RLD): return "rld";
+        case(FZ_IMAGE_BMP): return "bmp";
+        case(FZ_IMAGE_GIF): return "gif";
+        case(FZ_IMAGE_JBIG2): return "jb2";
+        case(FZ_IMAGE_JPEG): return "jpeg";
+        case(FZ_IMAGE_JPX): return "jpx";
+        case(FZ_IMAGE_JXR): return "jxr";
+        case(FZ_IMAGE_PNG): return "png";
+        case(FZ_IMAGE_PNM): return "pnm";
+        case(FZ_IMAGE_TIFF): return "tiff";
+        default: return "n/a";
+    }
+}
+
+void JM_make_image_block(fz_stext_block *block, PyObject *block_dict)
+{
+    fz_context* ctx = mupdf::internal_context_get();
+    fz_image *image = block->u.i.image;
+    fz_buffer *buf = NULL, *freebuf = NULL, *mask_buf = NULL;
+    fz_compressed_buffer *buffer = fz_compressed_image_buffer(ctx, image);
+    fz_var(buf);
+    fz_var(freebuf);
+    fz_var(mask_buf);
+    int n = fz_colorspace_n(ctx, image->colorspace);
+    int w = image->w;
+    int h = image->h;
+    const char *ext = "";
+    int type = FZ_IMAGE_UNKNOWN;
+    if (buffer) {
+        type = buffer->params.type;
+        ext = JM_image_extension(type);
+    }
+    if (type < FZ_IMAGE_BMP || type == FZ_IMAGE_JBIG2)
+        type = FZ_IMAGE_UNKNOWN;
+    PyObject *bytes = NULL;
+    fz_var(bytes);
+    PyObject *mask_bytes = NULL;
+    fz_var(mask_bytes);
+    fz_try(ctx) {
+        if (!buffer || type == FZ_IMAGE_UNKNOWN)
+        {
+            buf = freebuf = fz_new_buffer_from_image_as_png(ctx, image, fz_default_color_params);
+            ext = "png";
+        }
+        else if (n == 4 && strcmp(ext, "jpeg") == 0) // JPEG CMYK needs another step
+        {
+            buf = freebuf = fz_new_buffer_from_image_as_jpeg(ctx, image, fz_default_color_params, 95, 1);        
+        }
+        else
+        {
+            buf = buffer->buffer;
+        } 
+        bytes = JM_BinFromBuffer(buf);
+        if (image->mask) {
+            mask_buf = fz_new_buffer_from_image_as_png(ctx, image->mask, fz_default_color_params);
+            mask_bytes = JM_BinFromBuffer(mask_buf);
+        } else {
+            mask_bytes = Py_BuildValue("s", NULL);
+        }
+    }
+    fz_always(ctx) {
+        if (!bytes)
+            bytes = PyBytes_FromString("");
+        DICT_SETITEM_DROP(block_dict, dictkey_width,
+                        Py_BuildValue("i", w));
+        DICT_SETITEM_DROP(block_dict, dictkey_height,
+                        Py_BuildValue("i", h));
+        DICT_SETITEM_DROP(block_dict, dictkey_ext,
+                        Py_BuildValue("s", ext));
+        DICT_SETITEM_DROP(block_dict, dictkey_colorspace,
+                        Py_BuildValue("i", n));
+        DICT_SETITEM_DROP(block_dict, dictkey_xres,
+                        Py_BuildValue("i", image->xres));
+        DICT_SETITEM_DROP(block_dict, dictkey_yres,
+                        Py_BuildValue("i", image->xres));
+        DICT_SETITEM_DROP(block_dict, dictkey_bpc,
+                        Py_BuildValue("i", (int) image->bpc));
+        DICT_SETITEM_DROP(block_dict, dictkey_matrix,
+                        JM_py_from_matrix(block->u.i.transform));
+        DICT_SETITEM_DROP(block_dict, dictkey_size,
+                        Py_BuildValue("n", PyBytes_Size(bytes)));
+        DICT_SETITEM_DROP(block_dict, dictkey_image, bytes);
+        DICT_SETITEMSTR_DROP(block_dict, "mask", mask_bytes);
+        fz_drop_buffer(ctx, mask_buf);
+        fz_drop_buffer(ctx, freebuf);
+    }
+    fz_catch(ctx) {;}
+    return;
+}
+
+static void JM_make_text_block(fz_stext_block *block, PyObject *block_dict, int raw, fz_buffer *buff, fz_rect tp_rect)
+{
+    fz_stext_line *line;
+    PyObject *line_list = PyList_New(0), *line_dict;
+    fz_rect block_rect = fz_empty_rect;
+    for (line = block->u.t.first_line; line; line = line->next) {
+        if (fz_is_empty_rect(fz_intersect_rect(tp_rect, line->bbox)) &&
+            !fz_is_infinite_rect(tp_rect)) {
+            continue;
+        }
+        line_dict = PyDict_New();
+        mupdf::FzStextLine line2(line);
+        mupdf::FzBuffer buff2( mupdf::ll_fz_keep_buffer( buff));
+        mupdf::FzRect tp_rect2( tp_rect);
+        mupdf::FzRect line_rect2 = JM_make_spanlist(
+                line_dict,
+                line2,
+                raw,
+                buff2,
+                tp_rect2
+                );
+        fz_rect& line_rect = *line_rect2.internal();
+        block_rect = fz_union_rect(block_rect, line_rect);
+        DICT_SETITEM_DROP(line_dict, dictkey_wmode,
+                    Py_BuildValue("i", line->wmode));
+        DICT_SETITEM_DROP(line_dict, dictkey_dir, JM_py_from_point(line->dir));
+        DICT_SETITEM_DROP(line_dict, dictkey_bbox,
+                    JM_py_from_rect(line_rect));
+        LIST_APPEND_DROP(line_list, line_dict);
+    }
+    DICT_SETITEM_DROP(block_dict, dictkey_bbox, JM_py_from_rect(block_rect));
+    DICT_SETITEM_DROP(block_dict, dictkey_lines, line_list);
+    return;
+}
+
+void JM_make_textpage_dict(fz_stext_page *tp, PyObject *page_dict, int raw)
+{
+    fz_context* ctx = mupdf::internal_context_get();
+    fz_stext_block *block;
+    fz_buffer *text_buffer = fz_new_buffer(ctx, 128);
+    PyObject *block_dict, *block_list = PyList_New(0);
+    fz_rect tp_rect = tp->mediabox;
+    int block_n = -1;
+    for (block = tp->first_block; block; block = block->next) {
+        block_n++;
+        if (!fz_contains_rect(tp_rect, block->bbox) &&
+            !fz_is_infinite_rect(tp_rect) &&
+            block->type == FZ_STEXT_BLOCK_IMAGE) {
+            continue;
+        }
+        if (!fz_is_infinite_rect(tp_rect) &&
+            fz_is_empty_rect(fz_intersect_rect(tp_rect, block->bbox))) {
+            continue;
+        }
+
+        block_dict = PyDict_New();
+        DICT_SETITEM_DROP(block_dict, dictkey_number, Py_BuildValue("i", block_n));
+        DICT_SETITEM_DROP(block_dict, dictkey_type, Py_BuildValue("i", block->type));
+        if (block->type == FZ_STEXT_BLOCK_IMAGE) {
+            DICT_SETITEM_DROP(block_dict, dictkey_bbox, JM_py_from_rect(block->bbox));
+            JM_make_image_block(block, block_dict);
+        } else {
+            JM_make_text_block(block, block_dict, raw, text_buffer, tp_rect);
+        }
+
+        LIST_APPEND_DROP(block_list, block_dict);
+    }
+    DICT_SETITEM_DROP(page_dict, dictkey_blocks, block_list);
+    fz_drop_buffer(ctx, text_buffer);
+}
+
+//-----------------------------------------------------------------
+// get one pixel as a list
+//-----------------------------------------------------------------
+PyObject *pixmap_pixel(fz_pixmap* pm, int x, int y)
+{
+    fz_context* ctx = mupdf::internal_context_get();
+    PyObject *p = NULL;
+    if (0
+            || x < 0
+            || x >= pm->w
+            || y < 0
+            || y >= pm->h
+            )
+    {
+        throw std::range_error( MSG_PIXEL_OUTSIDE);
+    }
+    int n = pm->n;
+    int stride = fz_pixmap_stride(ctx, pm);
+    int i = stride * y + n * x;
+    p = PyTuple_New(n);
+    for (int j = 0; j < n; j++)
+    {
+        PyTuple_SET_ITEM(p, j, Py_BuildValue("i", pm->samples[i + j]));
+    }
+    return p;
+}
+
+int pixmap_n(mupdf::FzPixmap& pixmap)
+{
+    return mupdf::fz_pixmap_components( pixmap);
+}
+
+static int
+JM_INT_ITEM(PyObject *obj, Py_ssize_t idx, int *result)
+{
+    PyObject *temp = PySequence_ITEM(obj, idx);
+    if (!temp) return 1;
+    if (PyLong_Check(temp)) {
+        *result = (int) PyLong_AsLong(temp);
+        Py_DECREF(temp);
+    } else if (PyFloat_Check(temp)) {
+        *result = (int) PyFloat_AsDouble(temp);
+        Py_DECREF(temp);
+    } else {
+        Py_DECREF(temp);
+        return 1;
+    }
+    if (PyErr_Occurred()) {
+        PyErr_Clear();
+        return 1;
+    }
+    return 0;
+}
+
+PyObject *set_pixel(fz_pixmap* pm, int x, int y, PyObject *color)
+{
+    fz_context* ctx = mupdf::internal_context_get();
+    if (0
+            || x < 0
+            || x >= pm->w
+            || y < 0
+            || y >= pm->h
+            )
+    {
+        throw std::range_error( MSG_PIXEL_OUTSIDE);
+    }
+    int n = pm->n;
+    if (!PySequence_Check(color) || PySequence_Size(color) != n) {
+        throw std::range_error(MSG_BAD_COLOR_SEQ);
+    }
+    int i, j;
+    unsigned char c[5];
+    for (j = 0; j < n; j++) {
+        if (JM_INT_ITEM(color, j, &i) == 1) {
+            throw std::range_error(MSG_BAD_COLOR_SEQ);
+        }
+        if (i < 0 or i >= 256) {
+            throw std::range_error(MSG_BAD_COLOR_SEQ);
+        }
+        c[j] = (unsigned char) i;
+    }
+    int stride = fz_pixmap_stride(ctx, pm);
+    i = stride * y + n * x;
+    for (j = 0; j < n; j++) {
+        pm->samples[i + j] = c[j];
+    }
+    Py_RETURN_NONE;
+}
+//-------------------------------------------
+// make a buffer from an stext_page's text
+//-------------------------------------------
+fz_buffer *
+JM_new_buffer_from_stext_page(fz_stext_page *page)
+{
+    fz_context* ctx = mupdf::internal_context_get();
+    fz_stext_block *block;
+    fz_stext_line *line;
+    fz_stext_char *ch;
+    fz_rect rect = page->mediabox;
+    fz_buffer *buf = NULL;
+
+    fz_try(ctx)
+    {
+        buf = fz_new_buffer(ctx, 256);
+        for (block = page->first_block; block; block = block->next) {
+            if (block->type == FZ_STEXT_BLOCK_TEXT) {
+                for (line = block->u.t.first_line; line; line = line->next) {
+                    for (ch = line->first_char; ch; ch = ch->next) {
+                        if (!JM_rects_overlap(rect, JM_char_bbox(line, ch)) &&
+                            !fz_is_infinite_rect(rect)) {
+                            continue;
+                        }
+                        fz_append_rune(ctx, buf, ch->c);
+                    }
+                    fz_append_byte(ctx, buf, '\n');
+                }
+                fz_append_byte(ctx, buf, '\n');
+            }
+        }
+    }
+    fz_catch(ctx) {
+        fz_drop_buffer(ctx, buf);
+        mupdf::internal_throw_exception(ctx);
+    }
+    return buf;
+}
+
+static inline int canon(int c)
+{
+    /* TODO: proper unicode case folding */
+    /* TODO: character equivalence (a matches รค, etc) */
+    if (c == 0xA0 || c == 0x2028 || c == 0x2029)
+        return ' ';
+    if (c == '\r' || c == '\n' || c == '\t')
+        return ' ';
+    if (c >= 'A' && c <= 'Z')
+        return c - 'A' + 'a';
+    return c;
+}
+
+static inline int chartocanon(int *c, const char *s)
+{
+    int n = fz_chartorune(c, s);
+    *c = canon(*c);
+    return n;
+}
+
+static const char *match_string(const char *h, const char *n)
+{
+    int hc, nc;
+    const char *e = h;
+    h += chartocanon(&hc, h);
+    n += chartocanon(&nc, n);
+    while (hc == nc)
+    {
+        e = h;
+        if (hc == ' ')
+            do
+                h += chartocanon(&hc, h);
+            while (hc == ' ');
+        else
+            h += chartocanon(&hc, h);
+        if (nc == ' ')
+            do
+                n += chartocanon(&nc, n);
+            while (nc == ' ');
+        else
+            n += chartocanon(&nc, n);
+    }
+    return nc == 0 ? e : NULL;
+}
+
+
+static const char *find_string(const char *s, const char *needle, const char **endp)
+{
+    const char *end;
+    while (*s)
+    {
+        end = match_string(s, needle);
+        if (end)
+        {
+            *endp = end;
+            return s;
+        }
+        ++s;
+    }
+    *endp = NULL;
+    return NULL;
+}
+
+struct highlight
+{
+    Py_ssize_t len;
+    PyObject *quads;
+    float hfuzz, vfuzz;
+};
+
+
+static int
+JM_FLOAT_ITEM(PyObject *obj, Py_ssize_t idx, double *result)
+{
+    PyObject *temp = PySequence_ITEM(obj, idx);
+    if (!temp) return 1;
+    *result = PyFloat_AsDouble(temp);
+    Py_DECREF(temp);
+    if (PyErr_Occurred()) {
+        PyErr_Clear();
+        return 1;
+    }
+    return 0;
+}
+
+
+//-----------------------------------------------------------------------------
+// fz_quad from PySequence. Four floats are treated as rect.
+// Else must be four pairs of floats.
+//-----------------------------------------------------------------------------
+static fz_quad
+JM_quad_from_py(PyObject *r)
+{
+    fz_quad q = fz_make_quad(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT,
+                             FZ_MAX_INF_RECT, FZ_MIN_INF_RECT,
+                             FZ_MIN_INF_RECT, FZ_MAX_INF_RECT,
+                             FZ_MAX_INF_RECT, FZ_MAX_INF_RECT);
+    fz_point p[4];
+    double test, x, y;
+    Py_ssize_t i;
+    PyObject *obj = NULL;
+
+    if (!r || !PySequence_Check(r) || PySequence_Size(r) != 4)
+        return q;
+
+    if (JM_FLOAT_ITEM(r, 0, &test) == 0)
+        return fz_quad_from_rect(JM_rect_from_py(r));
+
+    for (i = 0; i < 4; i++) {
+        obj = PySequence_ITEM(r, i);  // next point item
+        if (!obj || !PySequence_Check(obj) || PySequence_Size(obj) != 2)
+            goto exit_result;  // invalid: cancel the rest
+
+        if (JM_FLOAT_ITEM(obj, 0, &x) == 1) goto exit_result;
+        if (JM_FLOAT_ITEM(obj, 1, &y) == 1) goto exit_result;
+        if (x < FZ_MIN_INF_RECT) x = FZ_MIN_INF_RECT;
+        if (y < FZ_MIN_INF_RECT) y = FZ_MIN_INF_RECT;
+        if (x > FZ_MAX_INF_RECT) x = FZ_MAX_INF_RECT;
+        if (y > FZ_MAX_INF_RECT) y = FZ_MAX_INF_RECT;
+        p[i] = fz_make_point((float) x, (float) y);
+
+        Py_CLEAR(obj);
+    }
+    q.ul = p[0];
+    q.ur = p[1];
+    q.ll = p[2];
+    q.lr = p[3];
+    return q;
+
+    exit_result:;
+    Py_CLEAR(obj);
+    return q;
+}
+
+static float hdist(fz_point *dir, fz_point *a, fz_point *b)
+{
+    float dx = b->x - a->x;
+    float dy = b->y - a->y;
+    return fz_abs(dx * dir->x + dy * dir->y);
+}
+
+static float vdist(fz_point *dir, fz_point *a, fz_point *b)
+{
+    float dx = b->x - a->x;
+    float dy = b->y - a->y;
+    return fz_abs(dx * dir->y + dy * dir->x);
+}
+
+static void on_highlight_char(fz_context *ctx, void *arg, fz_stext_line *line, fz_stext_char *ch)
+{
+    struct highlight* hits = (struct highlight*) arg;
+    float vfuzz = ch->size * hits->vfuzz;
+    float hfuzz = ch->size * hits->hfuzz;
+    fz_quad ch_quad = JM_char_quad(line, ch);
+    if (hits->len > 0) {
+        PyObject *quad = PySequence_ITEM(hits->quads, hits->len - 1);
+        fz_quad end = JM_quad_from_py(quad);
+        Py_DECREF(quad);
+        if (hdist(&line->dir, &end.lr, &ch_quad.ll) < hfuzz
+            && vdist(&line->dir, &end.lr, &ch_quad.ll) < vfuzz
+            && hdist(&line->dir, &end.ur, &ch_quad.ul) < hfuzz
+            && vdist(&line->dir, &end.ur, &ch_quad.ul) < vfuzz)
+        {
+            end.ur = ch_quad.ur;
+            end.lr = ch_quad.lr;
+            quad = JM_py_from_quad(end);
+            PyList_SetItem(hits->quads, hits->len - 1, quad);
+            return;
+        }
+    }
+    LIST_APPEND_DROP(hits->quads, JM_py_from_quad(ch_quad));
+    hits->len++;
+}
+
+
+PyObject* JM_search_stext_page(fz_stext_page *page, const char *needle)
+{
+    fz_context* ctx = mupdf::internal_context_get();
+    struct highlight hits;
+    fz_stext_block *block;
+    fz_stext_line *line;
+    fz_stext_char *ch;
+    fz_buffer *buffer = NULL;
+    const char *haystack, *begin, *end;
+    fz_rect rect = page->mediabox;
+    int c, inside;
+
+    if (strlen(needle) == 0) Py_RETURN_NONE;
+    PyObject *quads = PyList_New(0);
+    hits.len = 0;
+    hits.quads = quads;
+    hits.hfuzz = 0.2f; /* merge kerns but not large gaps */
+    hits.vfuzz = 0.1f;
+
+    fz_try(ctx) {
+        buffer = JM_new_buffer_from_stext_page( page);
+        haystack = fz_string_from_buffer(ctx, buffer);
+        begin = find_string(haystack, needle, &end);
+        if (!begin) goto no_more_matches;
+
+        inside = 0;
+        for (block = page->first_block; block; block = block->next) {
+            if (block->type != FZ_STEXT_BLOCK_TEXT) {
+                continue;
+            }
+            for (line = block->u.t.first_line; line; line = line->next) {
+                for (ch = line->first_char; ch; ch = ch->next) {
+                    if (!fz_is_infinite_rect(rect) &&
+                        !JM_rects_overlap(rect, JM_char_bbox(line, ch))) {
+                            goto next_char;
+                        }
+try_new_match:
+                    if (!inside) {
+                        if (haystack >= begin) inside = 1;
+                    }
+                    if (inside) {
+                        if (haystack < end) {
+                            on_highlight_char(ctx, &hits, line, ch);
+                        } else {
+                            inside = 0;
+                            begin = find_string(haystack, needle, &end);
+                            if (!begin) goto no_more_matches;
+                            else goto try_new_match;
+                        }
+                    }
+                    haystack += fz_chartorune(&c, haystack);
+next_char:;
+                }
+                assert(*haystack == '\n');
+                ++haystack;
+            }
+            assert(*haystack == '\n');
+            ++haystack;
+        }
+no_more_matches:;
+    }
+    fz_always(ctx)
+        fz_drop_buffer(ctx, buffer);
+    fz_catch(ctx)
+        mupdf::internal_throw_exception(ctx);
+
+    return quads;
+}
+
+void pixmap_copy( fz_pixmap* pm, const fz_pixmap* src, int n)
+{
+    assert(pm->w == src->w);
+    assert(pm->h == src->h);
+    assert(n <= pm->n);
+    assert(n <= src->n);
+
+    if (pm->n == src->n)
+    {
+        // identical samples
+        assert(pm->stride == src->stride);
+        memcpy(pm->samples, src->samples, pm->w * pm->h * pm->n);
+    }
+    else
+    {
+        int nn;
+        int do_alpha;
+        if (pm->n > src->n)
+        {
+            assert(pm->n == src->n + 1);
+            nn = src->n;
+            assert(!src->alpha);
+            assert(pm->alpha);
+            do_alpha = 1;
+        }
+        else
+        {
+            assert(src->n == pm->n + 1);
+            nn = pm->n;
+            assert(src->alpha);
+            assert(!pm->alpha);
+            do_alpha = 0;
+        }
+        for (int y=0; y<pm->h; ++y)
+        {
+            for (int x=0; x<pm->w; ++x)
+            {
+                memcpy(
+                        pm->samples + pm->stride * y + pm->n * x,
+                        src->samples + src->stride * y + src->n * x,
+                        nn
+                        );
+                if (do_alpha)
+                {
+                    pm->samples[pm->stride * y + pm->n * x + pm->n-1] = 255;
+                }
+            }
+        }
+    }
+}
+
+
+PyObject* ll_JM_color_count(fz_pixmap *pm, PyObject *clip)
+{
+    fz_context* ctx = mupdf::internal_context_get();
+    PyObject* rc = PyDict_New();
+    fz_irect irect = fz_pixmap_bbox(ctx, pm);
+    irect = fz_intersect_irect(irect, fz_round_rect(JM_rect_from_py(clip)));
+    if (fz_is_empty_irect(irect))
+    {
+        return rc;
+    }
+    size_t stride = pm->stride;
+    size_t width = irect.x1 - irect.x0;
+    size_t height = irect.y1 - irect.y0;
+    size_t n = (size_t) pm->n;
+    size_t substride = width * n;
+    unsigned char* s = pm->samples + stride * (irect.y0 - pm->y) + n * (irect.x0 - pm->x);
+    // Cache previous pixel.
+    char oldpix[10];
+    assert(n <= sizeof(oldpix));
+    memcpy(oldpix, s, n);
+    long cnt = 0;
+    for (size_t i = 0; i < height; i++)
+    {
+        for (size_t j = 0; j < substride; j += n)
+        {
+            const char* newpix = (const char*) s + j;
+            if (memcmp(oldpix, newpix, n))
+            {
+                /* Pixel differs from previous pixel, so update results with
+                last run of pixels. We get a PyObject representation of pixel
+                so we can look up in Python dict <rc>. */
+                PyObject* pixel = PyBytes_FromStringAndSize(&oldpix[0], n);
+                PyObject* c = PyDict_GetItem(rc, pixel);
+                if (c) cnt += PyLong_AsLong(c);
+                DICT_SETITEM_DROP(rc, pixel, PyLong_FromLong(cnt));
+                Py_DECREF(pixel);
+                /* Start next run of identical pixels. */
+                cnt = 1;
+                memcpy(oldpix, newpix, n);
+            }
+            else
+            {
+                cnt += 1;
+            }
+        }
+        s += stride;
+    }
+    /* Update results with last pixel. */
+    PyObject* pixel = PyBytes_FromStringAndSize(&oldpix[0], n);
+    PyObject* c = PyDict_GetItem(rc, pixel);
+    if (c) cnt += PyLong_AsLong(c);
+    DICT_SETITEM_DROP(rc, pixel, PyLong_FromLong(cnt));
+    Py_DECREF(pixel);
+    PyErr_Clear();
+    return rc;
+}
+
+%}
+
+/* Declarations for functions defined above. */
+
+void page_merge(
+        mupdf::PdfDocument& doc_des,
+        mupdf::PdfDocument& doc_src,
+        int page_from,
+        int page_to,
+        int rotate,
+        int links,
+        int copy_annots,
+        mupdf::PdfGraftMap& graft_map
+        );
+
+void JM_merge_range(
+        mupdf::PdfDocument& doc_des,
+        mupdf::PdfDocument& doc_src,
+        int spage,
+        int epage,
+        int apage,
+        int rotate,
+        int links,
+        int annots,
+        int show_progress,
+        mupdf::PdfGraftMap& graft_map
+        );
+
+void FzDocument_insert_pdf(
+        mupdf::FzDocument& doc,
+        mupdf::FzDocument& src,
+        int from_page,
+        int to_page,
+        int start_at,
+        int rotate,
+        int links,
+        int annots,
+        int show_progress,
+        int final,
+        mupdf::PdfGraftMap& graft_map
+        );
+
+int page_xref(mupdf::FzDocument& this_doc, int pno);
+void _newPage(mupdf::FzDocument& self, int pno=-1, float width=595, float height=842);
+void _newPage(mupdf::PdfDocument& self, int pno=-1, float width=595, float height=842);
+void JM_add_annot_id(mupdf::PdfAnnot& annot, const char* stem);
+void JM_set_annot_callout_line(mupdf::PdfAnnot& annot, PyObject *callout, int count);
+std::vector< std::string> JM_get_annot_id_list(mupdf::PdfPage& page);
+mupdf::PdfAnnot _add_caret_annot(mupdf::PdfPage& self, mupdf::FzPoint& point);
+mupdf::PdfAnnot _add_caret_annot(mupdf::FzPage& self, mupdf::FzPoint& point);
+const char* Tools_parse_da(mupdf::PdfAnnot& this_annot);
+PyObject* Annot_getAP(mupdf::PdfAnnot& annot);
+void Tools_update_da(mupdf::PdfAnnot& this_annot, const char* da_str);
+mupdf::FzPoint JM_point_from_py(PyObject* p);
+mupdf::FzRect Annot_rect(mupdf::PdfAnnot& annot);
+PyObject* util_transform_rect(PyObject* rect, PyObject* matrix);
+PyObject* Annot_rect3(mupdf::PdfAnnot& annot);
+mupdf::FzMatrix Page_derotate_matrix(mupdf::PdfPage& pdfpage);
+mupdf::FzMatrix Page_derotate_matrix(mupdf::FzPage& pdfpage);
+PyObject* JM_get_annot_xref_list(const mupdf::PdfObj& page_obj);
+PyObject* xref_object(mupdf::PdfDocument& pdf, int xref, int compressed=0, int ascii=0);
+PyObject* xref_object(mupdf::FzDocument& document, int xref, int compressed=0, int ascii=0);
+
+PyObject* Link_is_external(mupdf::FzLink& this_link);
+PyObject* Page_addAnnot_FromString(mupdf::PdfPage& page, PyObject* linklist);
+PyObject* Page_addAnnot_FromString(mupdf::FzPage& page, PyObject* linklist);
+mupdf::FzLink Link_next(mupdf::FzLink& this_link);
+
+static int page_count_fz2(void* document);
+int page_count_fz(mupdf::FzDocument& document);
+int page_count_pdf(mupdf::PdfDocument& pdf);
+int page_count(mupdf::FzDocument& document);
+int page_count(mupdf::PdfDocument& pdf);
+
+PyObject* page_annot_xrefs(mupdf::PdfDocument& pdf, int pno);
+PyObject* page_annot_xrefs(mupdf::FzDocument& document, int pno);
+bool Outline_is_external(mupdf::FzOutline* outline);
+void Document_extend_toc_items(mupdf::PdfDocument& pdf, PyObject* items);
+void Document_extend_toc_items(mupdf::FzDocument& document, PyObject* items);
+
+int ll_fz_absi(int i);
+
+mupdf::FzDevice JM_new_texttrace_device(PyObject* out);
+
+fz_rect JM_char_bbox(const mupdf::FzStextLine& line, const mupdf::FzStextChar& ch);
+
+static fz_quad JM_char_quad( fz_stext_line *line, fz_stext_char *ch);
+void JM_print_stext_page_as_text(mupdf::FzBuffer& res, mupdf::FzStextPage& page);
+
+void set_skip_quad_corrections(int on);
+void set_subset_fontnames(int on);
+void set_small_glyph_heights(int on);
+
+mupdf::FzRect JM_cropbox(mupdf::PdfObj& page_obj);
+PyObject* get_cdrawings(mupdf::FzPage& page, PyObject *extended=NULL, PyObject *callback=NULL, PyObject *method=NULL);
+
+mupdf::FzRect JM_make_spanlist(
+        PyObject *line_dict,
+        mupdf::FzStextLine& line,
+        int raw,
+        mupdf::FzBuffer& buff,
+        mupdf::FzRect& tp_rect
+        );
+
+PyObject* extractWORDS(mupdf::FzStextPage& this_tpage, PyObject *delimiters);
+PyObject* extractBLOCKS(mupdf::FzStextPage& self);
+
+PyObject* link_uri(mupdf::FzLink& link);
+
+fz_stext_page* page_get_textpage(
+        mupdf::FzPage& self,
+        PyObject* clip,
+        int flags,
+        PyObject* matrix
+        );
+
+void JM_make_textpage_dict(fz_stext_page *tp, PyObject *page_dict, int raw);
+PyObject *pixmap_pixel(fz_pixmap* pm, int x, int y);
+int pixmap_n(mupdf::FzPixmap& pixmap);
+
+PyObject* JM_search_stext_page(fz_stext_page *page, const char *needle);
+
+PyObject *set_pixel(fz_pixmap* pm, int x, int y, PyObject *color);
+
+/* Copies from <src> to <pm>, which must have same width and height. pm->n -
+src->n must be -1, 0 or +1. If -1, <src> must have alpha and <pm> must not have
+alpha, and we copy the non-alpha bytes. If +1 <src> must not have alpha and
+<pm> must have alpha and we set <pm>'s alpha bytes all to 255.*/
+void pixmap_copy(fz_pixmap* pm, const fz_pixmap* src, int n);
+
+PyObject* ll_JM_color_count(fz_pixmap *pm, PyObject *clip);