Mercurial > hgrepos > Python2 > PyMuPDF

diff tests/test_general.py @ 1:1d09e1dec1d9 upstream
ADD: PyMuPDF v1.26.4: the original sdist. It does not yet contain MuPDF. This normally will be downloaded when building PyMuPDF.
author: Franz Glasner <fzglas.hg@dom66.de>
date: Mon, 15 Sep 2025 11:37:51 +0200
children: a6bc019ac0b2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_general.py	Mon Sep 15 11:37:51 2025 +0200
@@ -0,0 +1,2053 @@
+# encoding utf-8
+"""
+* Confirm sample doc has no links and no annots.
+* Confirm proper release of file handles via Document.close()
+* Confirm properly raising exceptions in document creation
+"""
+import io
+import os
+
+import fnmatch
+import json
+import pymupdf
+import pathlib
+import pickle
+import platform
+import re
+import shutil
+import subprocess
+import sys
+import textwrap
+import time
+import util
+
+import gentle_compare
+
+scriptdir = os.path.abspath(os.path.dirname(__file__))
+filename = os.path.join(scriptdir, "resources", "001003ED.pdf")
+
+
+def test_haslinks():
+    doc = pymupdf.open(filename)
+    assert doc.has_links() == False
+
+
+def test_hasannots():
+    doc = pymupdf.open(filename)
+    assert doc.has_annots() == False
+
+
+def test_haswidgets():
+    doc = pymupdf.open(filename)
+    assert doc.is_form_pdf == False
+
+
+def test_isrepaired():
+    doc = pymupdf.open(filename)
+    assert doc.is_repaired == False
+    pymupdf.TOOLS.mupdf_warnings()
+
+
+def test_isdirty():
+    doc = pymupdf.open(filename)
+    assert doc.is_dirty == False
+
+
+def test_cansaveincrementally():
+    doc = pymupdf.open(filename)
+    assert doc.can_save_incrementally() == True
+
+
+def test_iswrapped():
+    doc = pymupdf.open(filename)
+    page = doc[0]
+    assert page.is_wrapped
+    wt = pymupdf.TOOLS.mupdf_warnings()
+    if pymupdf.mupdf_version_tuple >= (1, 26, 0):
+        assert wt == 'bogus font ascent/descent values (0 / 0)'
+    else:
+        assert not wt
+
+
+def test_wrapcontents():
+    doc = pymupdf.open(filename)
+    page = doc[0]
+    page.wrap_contents()
+    xref = page.get_contents()[0]
+    cont = page.read_contents()
+    doc.update_stream(xref, cont)
+    page.set_contents(xref)
+    assert len(page.get_contents()) == 1
+    page.clean_contents()
+    rebased = hasattr(pymupdf, 'mupdf')
+    if rebased:
+        wt = pymupdf.TOOLS.mupdf_warnings()
+        if pymupdf.mupdf_version_tuple >= (1, 26, 0):
+            assert wt == 'bogus font ascent/descent values (0 / 0)\nPDF stream Length incorrect'
+        else:
+            assert wt == 'PDF stream Length incorrect'
+
+
+def test_page_clean_contents():
+    """Assert that page contents cleaning actually is invoked."""
+    doc = pymupdf.open()
+    page = doc.new_page()
+
+    # draw two rectangles - will lead to two /Contents objects
+    page.draw_rect((10, 10, 20, 20))
+    page.draw_rect((20, 20, 30, 30))
+    assert len(page.get_contents()) == 2
+    assert page.read_contents().startswith(b"q") == False
+
+    # clean / consolidate into one /Contents object
+    page.clean_contents()
+    assert len(page.get_contents()) == 1
+    assert page.read_contents().startswith(b"q") == True
+
+
+def test_annot_clean_contents():
+    """Assert that annot contents cleaning actually is invoked."""
+    doc = pymupdf.open()
+    page = doc.new_page()
+    annot = page.add_highlight_annot((10, 10, 20, 20))
+
+    # the annotation appearance will not start with command b"q"
+
+
+    # invoke appearance stream cleaning and reformatting
+    annot.clean_contents()
+
+    # appearance stream should now indeed start with command b"q"
+    assert annot._getAP().startswith(b"q") == True
+
+
+def test_config():
+    assert pymupdf.TOOLS.fitz_config["py-memory"] in (True, False)
+
+
+def test_glyphnames():
+    name = "INFINITY"
+    infinity = pymupdf.glyph_name_to_unicode(name)
+    assert pymupdf.unicode_to_glyph_name(infinity) == name
+
+
+def test_rgbcodes():
+    sRGB = 0xFFFFFF
+    assert pymupdf.sRGB_to_pdf(sRGB) == (1, 1, 1)
+    assert pymupdf.sRGB_to_rgb(sRGB) == (255, 255, 255)
+
+
+def test_pdfstring():
+    pymupdf.get_pdf_now()
+    pymupdf.get_pdf_str("Beijing, chinesisch 北京")
+    pymupdf.get_text_length("Beijing, chinesisch 北京", fontname="china-s")
+    pymupdf.get_pdf_str("Latin characters êßöäü")
+
+
+def test_open_exceptions():
+    path  = os.path.normpath(f'{__file__}/../../tests/resources/001003ED.pdf')
+    doc = pymupdf.open(path, filetype="xps")
+    assert 'PDF' in doc.metadata["format"]
+
+    doc = pymupdf.open(path, filetype="xxx")
+    assert 'PDF' in doc.metadata["format"]
+
+    try:
+        pymupdf.open("x.y")
+    except Exception as e:
+        assert repr(e).startswith("FileNotFoundError")
+    else:
+        assert 0
+
+    try:
+        pymupdf.open(stream=b"", filetype="pdf")
+    except RuntimeError as e:
+        assert repr(e).startswith("EmptyFileError"), f'{repr(e)=}'
+    else:
+        print(f'{doc.metadata["format"]=}')
+        assert 0
+
+
+def test_bug1945():
+    pdf = pymupdf.open(f'{scriptdir}/resources/bug1945.pdf')
+    buffer_ = io.BytesIO()
+    pdf.save(buffer_, clean=True)
+
+
+def test_bug1971():
+    for _ in range(2):
+        doc = pymupdf.Document(f'{scriptdir}/resources/bug1971.pdf')
+        page = next(doc.pages())
+        page.get_drawings()
+        doc.close()
+        assert doc.is_closed
+
+def test_default_font():
+    f = pymupdf.Font()
+    assert str(f) == "Font('Noto Serif Regular')"
+    assert repr(f) == "Font('Noto Serif Regular')"
+
+def test_add_ink_annot():
+    import math
+    document = pymupdf.Document()
+    page = document.new_page()
+    line1 = []
+    line2 = []
+    for a in range( 0, 360*2, 15):
+        x = a
+        c = 300 + 200 * math.cos( a * math.pi/180)
+        s = 300 + 100 * math.sin( a * math.pi/180)
+        line1.append( (x, c))
+        line2.append( (x, s))
+    page.add_ink_annot( [line1, line2])
+    page.insert_text((100, 72), 'Hello world')
+    page.add_text_annot((200,200), "Some Text")
+    page.get_bboxlog()
+    path = f'{scriptdir}/resources/test_add_ink_annot.pdf'
+    document.save( path)
+    print( f'Have saved to: path={path!r}')
+
+def test_techwriter_append():
+    print(pymupdf.__doc__)
+    doc = pymupdf.open()
+    page = doc.new_page()
+    tw = pymupdf.TextWriter(page.rect)
+    text = "Red rectangle = TextWriter.text_rect, blue circle = .last_point"
+    r = tw.append((100, 100), text)
+    print(f'r={r!r}')
+    tw.write_text(page)
+    page.draw_rect(tw.text_rect, color=pymupdf.pdfcolor["red"])
+    page.draw_circle(tw.last_point, 2, color=pymupdf.pdfcolor["blue"])
+    path = f"{scriptdir}/resources/test_techwriter_append.pdf"
+    doc.ez_save(path)
+    print( f'Have saved to: {path}')
+
+def test_opacity():
+    doc = pymupdf.open()
+    page = doc.new_page()
+
+    annot1 = page.add_circle_annot((50, 50, 100, 100))
+    annot1.set_colors(fill=(1, 0, 0), stroke=(1, 0, 0))
+    annot1.set_opacity(2 / 3)
+    annot1.update(blend_mode="Multiply")
+
+    annot2 = page.add_circle_annot((75, 75, 125, 125))
+    annot2.set_colors(fill=(0, 0, 1), stroke=(0, 0, 1))
+    annot2.set_opacity(1 / 3)
+    annot2.update(blend_mode="Multiply")
+    outfile = f'{scriptdir}/resources/opacity.pdf'
+    doc.save(outfile, expand=True, pretty=True)
+    print("saved", outfile)
+
+def test_get_text_dict():
+    import json
+    doc=pymupdf.open(f'{scriptdir}/resources/v110-changes.pdf')
+    page=doc[0]
+    blocks=page.get_text("dict")["blocks"]
+    # Check no opaque types in `blocks`.
+    json.dumps( blocks, indent=4)
+    wt = pymupdf.TOOLS.mupdf_warnings()
+    if pymupdf.mupdf_version_tuple >= (1, 26, 0):
+        assert wt == 'bogus font ascent/descent values (0 / 0)'
+    else:
+        assert not wt
+
+def test_font():
+    font = pymupdf.Font()
+    print(repr(font))
+    bbox = font.glyph_bbox( 65)
+    print( f'bbox={bbox!r}')
+
+def test_insert_font():
+    doc=pymupdf.open(f'{scriptdir}/resources/v110-changes.pdf')
+    page = doc[0]
+    i = page.insert_font()
+    print( f'page.insert_font() => {i}')
+
+def test_2173():
+    from pymupdf import IRect, Pixmap, CS_RGB, Colorspace
+    for i in range( 100):
+        #print( f'i={i!r}')
+        image = Pixmap(Colorspace(CS_RGB), IRect(0, 0, 13, 37))
+    print( 'test_2173() finished')
+
+def test_texttrace():
+    import time
+    document = pymupdf.Document( f'{scriptdir}/resources/joined.pdf')
+    t = time.time()
+    for page in document:
+        tt = page.get_texttrace()
+    t = time.time() - t
+    print( f'test_texttrace(): t={t!r}')
+
+    # Repeat, this time writing data to file.
+    import json
+    path = f'{scriptdir}/resources/test_texttrace.txt'
+    print( f'test_texttrace(): Writing to: {path}')
+    with open( path, 'w') as f:
+        for i, page in enumerate(document):
+            tt = page.get_texttrace()
+            print( f'page {i} json:\n{json.dumps(tt, indent="    ")}', file=f)
+
+
+def test_2533():
+    """Assert correct char bbox in page.get_texttrace().
+
+    Search for a unique char on page and confirm that page.get_texttrace()
+    returns the same bbox as the search method.
+    """
+    if hasattr(pymupdf, 'mupdf') and not pymupdf.g_use_extra:
+        print('Not running test_2533() because rebased with use_extra=0 known to fail')
+        return
+    pymupdf.TOOLS.set_small_glyph_heights(True)
+    try:
+        doc = pymupdf.open(os.path.join(scriptdir, "resources", "test_2533.pdf"))
+        page = doc[0]
+        NEEDLE = "民"
+        ord_NEEDLE = ord(NEEDLE)
+        for span in page.get_texttrace():
+            for char in span["chars"]:
+                if char[0] == ord_NEEDLE:
+                    bbox = pymupdf.Rect(char[3])
+                    break
+        bbox2 = page.search_for(NEEDLE)[0]
+        assert bbox2 == bbox, f'{bbox=} {bbox2=} {bbox2-bbox=}.'
+    finally:
+        pymupdf.TOOLS.set_small_glyph_heights(False)
+
+
+def test_2645():
+    """Assert same font size calculation in corner cases.
+    """
+    folder = os.path.join(scriptdir, "resources")
+    files = ("test_2645_1.pdf", "test_2645_2.pdf", "test_2645_3.pdf")
+    for f in files:
+        doc = pymupdf.open(os.path.join(folder, f))
+        page = doc[0]
+        fontsize0 = page.get_texttrace()[0]["size"]
+        fontsize1 = page.get_text("dict", flags=pymupdf.TEXTFLAGS_TEXT)["blocks"][0]["lines"][
+            0
+        ]["spans"][0]["size"]
+        assert abs(fontsize0 - fontsize1) < 1e-5
+
+
+def test_2506():
+    """Ensure expected font size across text writing angles."""
+    doc = pymupdf.open()
+    page = doc.new_page()
+    point = pymupdf.Point(100, 300)  # insertion point
+    fontsize = 11  # fontsize
+    text = "Hello"  # text
+    angles = (0, 30, 60, 90, 120)  # some angles
+
+    # write text with different angles
+    for angle in angles:
+        page.insert_text(
+            point, text, fontsize=fontsize, morph=(point, pymupdf.Matrix(angle))
+        )
+
+    # ensure correct fontsize for get_texttrace() - forgiving rounding problems
+    for span in page.get_texttrace():
+        print(span["dir"])
+        assert round(span["size"]) == fontsize
+
+    # ensure correct fontsize for get_text() - forgiving rounding problems
+    for block in page.get_text("dict")["blocks"]:
+        for line in block["lines"]:
+            print(line["dir"])
+            for span in line["spans"]:
+                print(span["size"])
+                assert round(span["size"]) == fontsize
+
+
+def test_2108():
+    doc = pymupdf.open(f'{scriptdir}/resources/test_2108.pdf')
+    page = doc[0]
+    areas = page.search_for("{sig}")
+    rect = areas[0]
+    page.add_redact_annot(rect)
+    page.apply_redactions()
+    text = page.get_text()
+
+    text_expected = b'Frau\nClaire Dunphy\nTeststra\xc3\x9fe 5\n12345 Stadt\nVertragsnummer:  12345\nSehr geehrte Frau Dunphy,\nText\nMit freundlichen Gr\xc3\xbc\xc3\x9fen\nTestfirma\nVertrag:\n  12345\nAnsprechpartner:\nJay Pritchet\nTelefon:\n123456\nE-Mail:\ntest@test.de\nDatum:\n07.12.2022\n'.decode('utf8')
+
+    if 1:
+        # Verbose info.
+        print(f'test_2108(): text is:\n{text}')
+        print(f'')
+        print(f'test_2108(): repr(text) is:\n{text!r}')
+        print(f'')
+        print(f'test_2108(): repr(text.encode("utf8")) is:\n{text.encode("utf8")!r}')
+        print(f'')
+        print(f'test_2108(): text_expected is:\n{text_expected}')
+        print(f'')
+        print(f'test_2108(): repr(text_expected) is:\n{text_expected!r}')
+        print(f'')
+        print(f'test_2108(): repr(text_expected.encode("utf8")) is:\n{text_expected.encode("utf8")!r}')
+
+        ok1 = (text == text_expected)
+        ok2 = (text.encode("utf8") == text_expected.encode("utf8"))
+        ok3 = (repr(text.encode("utf8")) == repr(text_expected.encode("utf8")))
+
+        print(f'')
+        print(f'ok1={ok1}')
+        print(f'ok2={ok2}')
+        print(f'ok3={ok3}')
+
+        print(f'')
+
+    print(f'{pymupdf.mupdf_version_tuple=}')
+    if pymupdf.mupdf_version_tuple >= (1, 21, 2):
+        print('Asserting text==text_expected')
+        assert text == text_expected
+    else:
+        print('Asserting text!=text_expected')
+        assert text != text_expected
+
+
+def test_2238():
+    filepath = f'{scriptdir}/resources/test2238.pdf'
+    doc = pymupdf.open(filepath)
+    rebased = hasattr(pymupdf, 'mupdf')
+    if rebased:
+        wt = pymupdf.TOOLS.mupdf_warnings()
+        wt_expected = ''
+        if pymupdf.mupdf_version_tuple >= (1, 26):
+            wt_expected += 'garbage bytes before version marker\n'
+            wt_expected += 'syntax error: expected \'obj\' keyword (6 0 ?)\n'
+        else:
+            wt_expected += 'format error: cannot recognize version marker\n'
+        wt_expected += 'trying to repair broken xref\n'
+        wt_expected += 'repairing PDF document'
+        assert wt == wt_expected, f'{wt=}'
+    first_page = doc.load_page(0).get_text('text', clip=pymupdf.INFINITE_RECT())
+    last_page = doc.load_page(-1).get_text('text', clip=pymupdf.INFINITE_RECT())
+
+    print(f'first_page={first_page!r}')
+    print(f'last_page={last_page!r}')
+    assert first_page == 'Hello World\n'
+    assert last_page == 'Hello World\n'
+
+    first_page = doc.load_page(0).get_text('text')
+    last_page = doc.load_page(-1).get_text('text')
+
+    print(f'first_page={first_page!r}')
+    print(f'last_page={last_page!r}')
+    assert first_page == 'Hello World\n'
+    assert last_page == 'Hello World\n'
+
+
+def test_2093():
+    if platform.python_implementation() == 'GraalVM':
+        print(f'test_2093(): Not running because slow on GraalVM.')
+        return
+    
+    doc = pymupdf.open(f'{scriptdir}/resources/test2093.pdf')
+
+    def average_color(page):
+        pixmap = page.get_pixmap()
+        p_average = [0] * pixmap.n
+        for y in range(pixmap.height):
+            for x in range(pixmap.width):
+                p = pixmap.pixel(x, y)
+                for i in range(pixmap.n):
+                    p_average[i] += p[i]
+        for i in range(pixmap.n):
+            p_average[i] /= (pixmap.height * pixmap.width)
+        return p_average
+
+    page = doc.load_page(0)
+    pixel_average_before = average_color(page)
+
+    rx=135.123
+    ry=123.56878
+    rw=69.8409
+    rh=9.46397
+
+    x0 = rx
+    y0 = ry
+    x1 = rx + rw
+    y1 = ry + rh
+
+    rect = pymupdf.Rect(x0, y0, x1, y1)
+
+    font = pymupdf.Font("Helvetica")
+    fill_color=(0,0,0)
+    page.add_redact_annot(
+        quad=rect,
+        #text="null",
+        fontname=font.name,
+        fontsize=12,
+        align=pymupdf.TEXT_ALIGN_CENTER,
+        fill=fill_color,
+        text_color=(1,1,1),
+    )
+
+    page.apply_redactions()
+    pixel_average_after = average_color(page)
+
+    print(f'pixel_average_before={pixel_average_before!r}')
+    print(f'pixel_average_after={pixel_average_after!r}')
+
+    # Before this bug was fixed (MuPDF-1.22):
+    #   pixel_average_before=[130.864323120088, 115.23577810900859, 92.9268559996174]
+    #   pixel_average_after=[138.68844553555772, 123.05687162237561, 100.74275056194105]
+    # After fix:
+    #   pixel_average_before=[130.864323120088, 115.23577810900859, 92.9268559996174]
+    #   pixel_average_after=[130.8889209934799, 115.25722751837269, 92.94327384463327]
+    #
+    for i in range(len(pixel_average_before)):
+        diff = pixel_average_before[i] - pixel_average_after[i]
+        assert abs(diff) < 0.1
+
+    out = f'{scriptdir}/resources/test2093-out.pdf'
+    doc.save(out)
+    print(f'Have written to: {out}')
+
+
+def test_2182():
+    print(f'test_2182() started')
+    doc = pymupdf.open(f'{scriptdir}/resources/test2182.pdf')
+    page = doc[0]
+    for annot in page.annots():
+        print(annot)
+    print(f'test_2182() finished')
+
+
+def test_2246():
+    """
+    Test / confirm identical text positions generated by
+    * page.insert_text()
+    versus
+    * TextWriter.write_text()
+
+    ... under varying situations as follows:
+
+    1. MediaBox does not start at (0, 0)
+    2. CropBox origin is different from that of MediaBox
+    3. Check for all 4 possible page rotations
+
+    The test writes the same text at the same positions using `page.insert_text()`,
+    respectively `TextWriter.write_text()`.
+    Then extracts the text spans and confirms that they all occupy the same bbox.
+    This ensures coincidence of text positions of page.of insert_text()
+    (which is assumed correct) and TextWriter.write_text().
+    """
+    def bbox_count(rot):
+        """Make a page and insert identical text via different methods.
+
+        Desired page rotation is a parameter. MediaBox and CropBox are chosen
+        to be "awkward": MediaBox does not start at (0,0) and CropBox is a
+        true subset of MediaBox.
+        """
+        # bboxes of spans on page: same text positions are represented by ONE bbox
+        bboxes = set()
+        doc = pymupdf.open()
+        # prepare a page with desired MediaBox / CropBox peculiarities
+        mediabox = pymupdf.paper_rect("letter")
+        page = doc.new_page(width=mediabox.width, height=mediabox.height)
+        xref = page.xref
+        newmbox = list(map(float, doc.xref_get_key(xref, "MediaBox")[1][1:-1].split()))
+        newmbox = pymupdf.Rect(newmbox)
+        mbox = newmbox + (10, 20, 10, 20)
+        cbox = mbox + (10, 10, -10, -10)
+        doc.xref_set_key(xref, "MediaBox", "[%g %g %g %g]" % tuple(mbox))
+        doc.xref_set_key(xref, "CrobBox", "[%g %g %g %g]" % tuple(cbox))
+        # set page to desired rotation
+        page.set_rotation(rot)
+        page.insert_text((50, 50), "Text inserted at (50,50)")
+        tw = pymupdf.TextWriter(page.rect)
+        tw.append((50, 50), "Text inserted at (50,50)")
+        tw.write_text(page)
+        blocks = page.get_text("dict")["blocks"]
+        for b in blocks:
+            for l in b["lines"]:
+                for s in l["spans"]:
+                    # store bbox rounded to 3 decimal places
+                    bboxes.add(pymupdf.Rect(pymupdf.JM_TUPLE3(s["bbox"])))
+        return len(bboxes)  # should be 1!
+
+    # the following tests must all pass
+    assert bbox_count(0) == 1
+    assert bbox_count(90) == 1
+    assert bbox_count(180) == 1
+    assert bbox_count(270) == 1
+
+
+def test_2430():
+    """Confirm that multiple font property checks will not destroy Py_None."""
+    font = pymupdf.Font("helv")
+    for i in range(1000):
+        _ = font.flags
+
+def test_2692():
+    document = pymupdf.Document(f'{scriptdir}/resources/2.pdf')
+    for page in document:
+        pix = page.get_pixmap(clip=pymupdf.Rect(0,0,10,10))
+        dl = page.get_displaylist(annots=True)
+        pix = dl.get_pixmap(
+                matrix=pymupdf.Identity,
+                colorspace=pymupdf.csRGB,
+                alpha=False,
+                clip=pymupdf.Rect(0,0,10,10),
+                )
+        pix = dl.get_pixmap(
+                matrix=pymupdf.Identity,
+                #colorspace=pymupdf.csRGB,
+                alpha=False,
+                clip=pymupdf.Rect(0,0,10,10),
+                )
+    
+
+def test_2596():
+    """Confirm correctly abandoning cache when reloading a page."""
+    if platform.python_implementation() == 'GraalVM':
+        print(f'test_2596(): not running on Graal.')
+        return
+    doc = pymupdf.Document(f"{scriptdir}/resources/test_2596.pdf")
+    page = doc[0]
+    pix0 = page.get_pixmap()  # render the page
+    _ = doc.tobytes(garbage=3)  # save with garbage collection
+
+    # Note this will invalidate cache content for this page.
+    # Reloading the page now empties the cache, so rendering
+    # will deliver the same pixmap
+    page = doc.reload_page(page)
+    pix1 = page.get_pixmap()
+    assert pix1.samples == pix0.samples
+    rebased = hasattr(pymupdf, 'mupdf')
+    if pymupdf.mupdf_version_tuple < (1, 26, 6):
+        wt = pymupdf.TOOLS.mupdf_warnings()
+        assert wt == 'too many indirections (possible indirection cycle involving 24 0 R)'
+
+
+def test_2730():
+    """Ensure identical output across text extractions."""
+    doc = pymupdf.open(f"{scriptdir}/resources/test_2730.pdf")
+    page = doc[0]
+    s1 = set(page.get_text())  # plain text extraction
+    s2 = set(page.get_text(sort=True))  # uses "blocks" extraction
+    s3 = set(page.get_textbox(page.rect))
+    assert s1 == s2
+    assert s1 == s3
+
+
+def test_2553():
+    """Ensure identical output across text extractions."""
+    verbose = 0
+    doc = pymupdf.open(f"{scriptdir}/resources/test_2553.pdf")
+    page = doc[0]
+
+    # extract plain text, build set of all characters
+    list1 = page.get_text()
+    set1 = set(list1)
+
+    # extract text blocks, build set of all characters
+    list2 = page.get_text(sort=True)  # internally uses "blocks"
+    set2 = set(list2)
+    
+    # extract textbox content, build set of all characters
+    list3 = page.get_textbox(page.rect)
+    set3 = set(list3)
+    
+    def show(l):
+        ret = f'len={len(l)}\n'
+        for c in l:
+            cc = ord(c)
+            if (cc >= 32 and cc < 127) or c == '\n':
+                ret += c
+            else:
+                ret += f' [0x{hex(cc)}]'
+        return ret
+    
+    if verbose:
+        print(f'list1:\n{show(list1)}')
+        print(f'list2:\n{show(list2)}')
+        print(f'list3:\n{show(list3)}')
+    
+    # all sets must be equal
+    assert set1 == set2
+    assert set1 == set3
+
+    # With mupdf later than 1.23.4, this special page contains no invalid
+    # Unicodes.
+    #
+    print(f'Checking no occurrence of 0xFFFD, {pymupdf.mupdf_version_tuple=}.')
+    assert chr(0xFFFD) not in set1
+
+def test_2553_2():
+    doc = pymupdf.open(f"{scriptdir}/resources/test_2553-2.pdf")
+    page = doc[0]
+
+    # extract plain text, ensure that there are no 0xFFFD characters
+    text = page.get_text()
+    assert chr(0xfffd) not in text
+
+def test_2635():
+    """Rendering a page before and after cleaning it should yield the same pixmap."""
+    doc = pymupdf.open(f"{scriptdir}/resources/test_2635.pdf")
+    page = doc[0]
+    pix1 = page.get_pixmap()  # pixmap before cleaning
+
+    page.clean_contents()  # clean page
+    pix2 = page.get_pixmap()  # pixmap after cleaning
+    assert pix1.samples == pix2.samples  # assert equality
+
+
+def test_resolve_names():
+    """Test PDF name resolution."""
+    # guard against wrong PyMuPDF architecture version
+    if not hasattr(pymupdf.Document, "resolve_names"):
+        print("PyMuPDF version does not support resolving PDF names")
+        return
+    pickle_in = open(f"{scriptdir}/resources/cython.pickle", "rb")
+    old_names = pickle.load(pickle_in)
+    doc = pymupdf.open(f"{scriptdir}/resources/cython.pdf")
+    new_names = doc.resolve_names()
+    assert new_names == old_names
+
+def test_2777():
+    document = pymupdf.Document()
+    page = document.new_page()
+    print(page.mediabox.width)
+
+def test_2710():
+    doc = pymupdf.open(f'{scriptdir}/resources/test_2710.pdf')
+    page = doc.load_page(0)
+    
+    print(f'test_2710(): {page.cropbox=}')
+    print(f'test_2710(): {page.mediabox=}')
+    print(f'test_2710(): {page.rect=}')
+    
+    def numbers_approx_eq(a, b):
+        return abs(a-b) < 0.001
+    def points_approx_eq(a, b):
+        return numbers_approx_eq(a.x, b.x) and numbers_approx_eq(a.y, b.y)
+    def rects_approx_eq(a, b):
+        return points_approx_eq(a.bottom_left, b.bottom_left) and points_approx_eq(a.top_right, b.top_right)
+    def assert_rects_approx_eq(a, b):
+        assert rects_approx_eq(a, b), f'Not nearly identical: {a=} {b=}'
+                
+    blocks = page.get_text('blocks')
+    print(f'test_2710(): {blocks=}')
+    assert len(blocks) == 2
+    block = blocks[1]
+    rect = pymupdf.Rect(block[:4])
+    text = block[4]
+    print(f'test_2710(): {rect=}')
+    print(f'test_2710(): {text=}')
+    assert text == 'Text at left page border\n'
+    
+    assert_rects_approx_eq(page.cropbox, pymupdf.Rect(30.0, 30.0, 565.3200073242188, 811.9199829101562))
+    assert_rects_approx_eq(page.mediabox, pymupdf.Rect(0.0, 0.0, 595.3200073242188, 841.9199829101562))
+    print(f'test_2710(): {pymupdf.mupdf_version_tuple=}')
+    # 2023-11-05: Currently broken in mupdf master.
+    print(f'test_2710(): Not Checking page.rect and rect.')
+    rebased = hasattr(pymupdf, 'mupdf')
+    if rebased:
+        wt = pymupdf.TOOLS.mupdf_warnings()
+        assert wt == (
+                "syntax error: cannot find ExtGState resource 'GS7'\n"
+                "syntax error: cannot find ExtGState resource 'GS8'\n"
+                "encountered syntax errors; page may not be correct"
+                )
+
+
+def test_2736():
+    """Check handling of CropBox changes vis-a-vis a MediaBox with
+       negative coordinates."""
+    doc = pymupdf.open()
+    page = doc.new_page()
+
+    # fake a MediaBox for demo purposes
+    doc.xref_set_key(page.xref, "MediaBox", "[-30 -20 595 842]")
+
+    assert page.cropbox == pymupdf.Rect(-30, 0, 595, 862)
+    assert page.rect == pymupdf.Rect(0, 0, 625, 862)
+
+    # change the CropBox: shift by (10, 10) in both dimensions. Please note:
+    # To achieve this, 10 must be subtracted from 862! yo must never be negative!
+    page.set_cropbox(pymupdf.Rect(-20, 0, 595, 852))
+
+    # get CropBox from the page definition
+    assert doc.xref_get_key(page.xref, "CropBox")[1] == "[-20 -10 595 842]"
+    assert page.rect == pymupdf.Rect(0, 0, 615, 852)
+
+    error = False
+    text = ""
+    try:  # check error detection
+        page.set_cropbox((-35, -10, 595, 842))
+    except Exception as e:
+        text = str(e)
+        error = True
+    assert error == True
+    assert text == "CropBox not in MediaBox"
+
+
+def test_subset_fonts():
+    """Confirm subset_fonts is working."""
+    if not hasattr(pymupdf, "mupdf"):
+        print("Not testing 'test_subset_fonts' in classic.")
+        return
+    text = "Just some arbitrary text."
+    arch = pymupdf.Archive()
+    css = pymupdf.css_for_pymupdf_font("ubuntu", archive=arch)
+    css += "* {font-family: ubuntu;}"
+    doc = pymupdf.open()
+    page = doc.new_page()
+    page.insert_htmlbox(page.rect, text, css=css, archive=arch)
+    doc.subset_fonts(verbose=True)
+    found = False
+    for xref in range(1, doc.xref_length()):
+        if "+Ubuntu#20Regular" in doc.xref_object(xref):
+            found = True
+            break
+    assert found is True
+
+
+def test_2957_1():
+    """Text following a redaction must not change coordinates."""
+    # test file with redactions
+    doc = pymupdf.open(os.path.join(scriptdir, "resources", "test_2957_1.pdf"))
+    page = doc[0]
+    # search for string that must not move by redactions
+    rects0 = page.search_for("6e9f73dfb4384a2b8af6ebba")
+    # sort rectangles vertically
+    rects0 = sorted(rects0, key=lambda r: r.y1)
+    assert len(rects0) == 2  # must be 2 redactions
+    page.apply_redactions()
+
+    # reload page to finalize updates
+    page = doc.reload_page(page)
+
+    # the two string must retain their positions (except rounding errors)
+    rects1 = page.search_for("6e9f73dfb4384a2b8af6ebba")
+    rects1 = sorted(rects1, key=lambda r: r.y1)
+
+    assert page.first_annot is None  # make sure annotations have disappeared
+    for i in range(2):
+        r0 = rects0[i].irect  # take rounded rects
+        r1 = rects1[i].irect
+        assert r0 == r1
+
+
+def test_2957_2():
+    """Redacted text must not change positions of remaining text."""
+    doc = pymupdf.open(os.path.join(scriptdir, "resources", "test_2957_2.pdf"))
+    page = doc[0]
+    words0 = page.get_text("words")  # all words before redacting
+    page.apply_redactions()  # remove/redact the word "longer"
+    words1 = page.get_text("words")  # extract words again
+    assert len(words1) == len(words0) - 1  # must be one word less
+    assert words0[3][4] == "longer"  # just confirm test file is correct one
+    del words0[3]  # remove the redacted word from first list
+    for i in range(len(words1)):  # compare words
+        w1 = words1[i]  # word after redaction
+        bbox1 = pymupdf.Rect(w1[:4]).irect  # its IRect coordinates
+        w0 = words0[i]  # word before redaction
+        bbox0 = pymupdf.Rect(w0[:4]).irect  # its IRect coordinates
+        assert bbox0 == bbox1  # must be same coordinates
+
+
+def test_707560():
+    """https://bugs.ghostscript.com/show_bug.cgi?id=707560
+    Ensure that redactions also remove characters with an empty width bbox.
+    """
+    # Make text that will contain characters with an empty bbox.
+
+    greetings = (
+        "Hello, World!",  # english
+        "Hallo, Welt!",  # german
+        "سلام دنیا!",  # persian
+        "வணக்கம், உலகம்!",  # tamil
+        "สวัสดีชาวโลก!",  # thai
+        "Привіт Світ!",  # ucranian
+        "שלום עולם!",  # hebrew
+        "ওহে বিশ্ব!",  # bengali
+        "你好世界！",  # chinese
+        "こんにちは世界！",  # japanese
+        "안녕하세요, 월드!",  # korean
+        "नमस्कार, विश्व !",  # sanskrit
+        "हैलो वर्ल्ड!",  # hindi
+    )
+    text = " ... ".join([g for g in greetings])
+    where = (50, 50, 400, 500)
+    story = pymupdf.Story(text)
+    bio = io.BytesIO()
+    writer = pymupdf.DocumentWriter(bio)
+    more = True
+    while more:
+        dev = writer.begin_page(pymupdf.paper_rect("a4"))
+        more, _ = story.place(where)
+        story.draw(dev)
+        writer.end_page()
+    writer.close()
+    doc = pymupdf.open("pdf", bio)
+    page = doc[0]
+    text = page.get_text()
+    assert text, "Unexpected: test page has no text."
+    page.add_redact_annot(page.rect)
+    page.apply_redactions()
+    assert not page.get_text(), "Unexpected: text not fully redacted."
+
+
+def test_3070():
+    with pymupdf.open(os.path.abspath(f'{__file__}/../../tests/resources/test_3070.pdf')) as pdf:
+      links = pdf[0].get_links()
+      links[0]['uri'] = "https://www.ddg.gg"
+      pdf[0].update_link(links[0])
+      pdf.save(os.path.abspath(f'{__file__}/../../tests/test_3070_out.pdf'))
+
+def test_bboxlog_2885():
+    doc = pymupdf.open(os.path.abspath(f'{__file__}/../../tests/resources/test_2885.pdf'))
+    page=doc[0]
+    
+    bbl = page.get_bboxlog()
+    wt = pymupdf.TOOLS.mupdf_warnings()
+    assert wt == 'invalid marked content and clip nesting'
+    
+    bbl = page.get_bboxlog(layers=True)
+    wt = pymupdf.TOOLS.mupdf_warnings()
+    assert wt == 'invalid marked content and clip nesting'
+
+def test_3081():
+    '''
+    Check Document.close() closes file handles, even if a Page instance exists.
+    '''
+    path1 = os.path.abspath(f'{__file__}/../../tests/resources/1.pdf')
+    path2 = os.path.abspath(f'{__file__}/../../tests/test_3081-2.pdf')
+    
+    rebased = hasattr(pymupdf, 'mupdf')
+    
+    import shutil
+    import sys
+    import traceback
+    shutil.copy2(path1, path2)
+    
+    # Find next two available fds.
+    next_fd_1 = os.open(path2, os.O_RDONLY)
+    next_fd_2 = os.open(path2, os.O_RDONLY)
+    os.close(next_fd_1)
+    os.close(next_fd_2)
+
+    def next_fd():
+        fd = os.open(path2, os.O_RDONLY)
+        os.close(fd)
+        return fd
+    
+    fd1 = next_fd()
+    document = pymupdf.open(path2)
+    page = document[0]
+    fd2 = next_fd()
+    document.close()
+    if rebased:
+        assert document.this is None
+        assert page.this is None
+    try:
+        document.page_count()
+    except Exception as e:
+        print(f'Received expected exception: {e}')
+        #traceback.print_exc(file=sys.stdout)
+        assert str(e) == 'document closed'
+    else:
+        assert 0, 'Did not receive expected exception.'
+    fd3 = next_fd()
+    try:
+        page.bound()
+    except Exception as e:
+        print(f'Received expected exception: {e}')
+        #traceback.print_exc(file=sys.stdout)
+        if rebased:
+            assert str(e) == 'page is None'
+        else:
+            assert str(e) == 'orphaned object: parent is None'
+    else:
+        assert 0, 'Did not receive expected exception.'
+    page = None
+    fd4 = next_fd()
+    print(f'{next_fd_1=} {next_fd_2=}')
+    print(f'{fd1=} {fd2=} {fd3=} {fd4=}')
+    print(f'{document=}')
+    assert fd1 == next_fd_1
+    assert fd2 == next_fd_2 # Checks document only uses one fd.
+    assert fd3 == next_fd_1 # Checks no leaked fds after document close.
+    assert fd4 == next_fd_1 # Checks no leaked fds after failed page access.
+
+def test_xml():
+    path = os.path.abspath(f'{__file__}/../../tests/resources/2.pdf')
+    with pymupdf.open(path) as document:
+        document.get_xml_metadata()
+
+def test_3112_set_xml_metadata():
+    document = pymupdf.Document()
+    document.set_xml_metadata('hello world')
+
+def test_archive_3126():
+    if not hasattr(pymupdf, 'mupdf'):
+        print(f'Not running because known to fail with classic.')
+        return
+    p = os.path.abspath(f'{__file__}/../../tests/resources')
+    p = pathlib.Path(p)
+    archive = pymupdf.Archive(p)
+    
+def test_3140():
+    if not hasattr(pymupdf, 'mupdf'):
+        print(f'Not running test_3140 on classic, because Page.insert_htmlbox() not available.')
+        return
+    css2 = ''
+    path = os.path.abspath(f'{__file__}/../../tests/resources/2.pdf')
+    oldfile = os.path.abspath(f'{__file__}/../../tests/test_3140_old.pdf')
+    newfile = os.path.abspath(f'{__file__}/../../tests/test_3140_new.pdf')
+    import shutil
+    shutil.copy2(path, oldfile)
+    def next_fd():
+        fd = os.open(path, os.O_RDONLY)
+        os.close(fd)
+        return fd
+    fd1 = next_fd()
+    with pymupdf.open(oldfile) as doc:  # open document
+        page = doc[0]
+        rect = pymupdf.Rect(130, 400, 430, 600)
+        CELLS = pymupdf.make_table(rect, cols=3, rows=5)
+        shape = page.new_shape()  # create Shape
+        for i in range(5):
+            for j in range(3):
+                qtext = "<b>" + "Ques #" + str(i*3+j+1) + ": " + "</b>" # codespell:ignore
+                atext = "<b>" + "Ans:" + "</b>" # codespell:ignore
+                qtext = qtext + '<br>' + atext
+                shape.draw_rect(CELLS[i][j])  # draw rectangle
+                page.insert_htmlbox(CELLS[i][j], qtext, css=css2, scale_low=0)
+        shape.finish(width=2.5, color=pymupdf.pdfcolor["blue"], )
+        shape.commit()  # write all stuff to the page
+        doc.subset_fonts()
+        doc.ez_save(newfile)
+    fd2 = next_fd()
+    assert fd2 == fd1, f'{fd1=} {fd2=}'
+    os.remove(oldfile)
+
+def test_cli():
+    if not hasattr(pymupdf, 'mupdf'):
+        print('test_cli(): Not running on classic because of fitz_old.')
+        return
+    import subprocess
+    subprocess.run(f'pymupdf -h', shell=1, check=1)
+
+
+def check_lines(expected_regexes, actual):
+    '''
+    Checks lines in <actual> match regexes in <expected_regexes>.
+    '''
+    print(f'check_lines():', flush=1)
+    print(f'{expected_regexes=}', flush=1)
+    print(f'{actual=}', flush=1)
+    def str_to_list(s):
+        if isinstance(s, str):
+            return s.split('\n') if s else list()
+        return s
+    expected_regexes = str_to_list(expected_regexes)
+    actual = str_to_list(actual)
+    if expected_regexes and expected_regexes[-1]:
+        expected_regexes.append('') # Always expect a trailing empty line.
+    # Remove `None` regexes and make all regexes match entire lines.
+    expected_regexes = [f'^{i}$' for i in expected_regexes if i is not None]
+    print(f'{expected_regexes=}', flush=1)
+    for expected_regex_line, actual_line in zip(expected_regexes, actual):
+        print(f'    {expected_regex_line=}', flush=1)
+        print(f'            {actual_line=}', flush=1)
+        assert re.match(expected_regex_line, actual_line)
+    assert len(expected_regexes) == len(actual), \
+            f'expected/actual lines mismatch: {len(expected_regexes)=} {len(actual)=}.'
+
+def test_cli_out():
+    '''
+    Check redirection of messages and log diagnostics with environment
+    variables PYMUPDF_LOG and PYMUPDF_MESSAGE.
+    '''
+    if not hasattr(pymupdf, 'mupdf'):
+        print('test_cli(): Not running on classic because of fitz_old.')
+        return
+    import platform
+    import re
+    import subprocess
+    log_prefix = None
+    if os.environ.get('PYMUPDF_USE_EXTRA') == '0':
+        log_prefix = f'.+Using non-default setting from PYMUPDF_USE_EXTRA: \'0\''
+    
+    def check(
+            expect_out,
+            expect_err,
+            message=None,
+            log=None,
+            verbose=0,
+            ):
+        '''
+        Sets PYMUPDF_MESSAGE to `message` and PYMUPDF_LOG to `log`, runs
+        `pymupdf internal`, and checks lines stdout and stderr match regexes in
+        `expect_out` and `expect_err`. Note that we enclose regexes in `^...$`.
+        '''
+        env = dict()
+        if log:
+            env['PYMUPDF_LOG'] = log
+        if message:
+            env['PYMUPDF_MESSAGE'] = message
+        env = os.environ | env
+        print(f'Running with {env=}: pymupdf internal', flush=1)
+        cp = subprocess.run(f'pymupdf internal', shell=1, check=1, capture_output=1, env=env, text=True)
+        
+        if verbose:
+            #print(f'{cp.stdout=}.', flush=1)
+            #print(f'{cp.stderr=}.', flush=1)
+            sys.stdout.write(f'stdout:\n{textwrap.indent(cp.stdout, "    ")}')
+            sys.stdout.write(f'stderr:\n{textwrap.indent(cp.stderr, "    ")}')
+        check_lines(expect_out, cp.stdout)
+        check_lines(expect_err, cp.stderr)
+    
+    #
+    print(f'Checking default, all output to stdout.')
+    check(
+            [
+                log_prefix,
+                'This is from PyMuPDF message[(][)][.]',
+                '.+This is from PyMuPDF log[(][)].',
+            ],
+            '',
+            )
+    
+    #
+    if platform.system() != 'Windows':
+        print(f'Checking redirection of everything to /dev/null.')
+        check('', '', 'path:/dev/null', 'path:/dev/null')
+    
+    #
+    print(f'Checking redirection to files.')
+    path_out = os.path.abspath(f'{__file__}/../../tests/test_cli_out.out')
+    path_err = os.path.abspath(f'{__file__}/../../tests/test_cli_out.err')
+    check('', '', f'path:{path_out}', f'path:{path_err}')
+    def read(path):
+        with open(path) as f:
+            return f.read()
+    out = read(path_out)
+    err = read(path_err)
+    check_lines(['This is from PyMuPDF message[(][)][.]'], out)
+    check_lines([log_prefix, '.+This is from PyMuPDF log[(][)][.]'], err)
+    
+    #
+    print(f'Checking redirection to fds.')
+    check(
+            [
+                'This is from PyMuPDF message[(][)][.]',
+            ],
+            [
+                log_prefix,
+                '.+This is from PyMuPDF log[(][)].',
+            ],
+            'fd:1',
+            'fd:2',
+            )
+
+
+def test_use_python_logging():
+    '''
+    Checks pymupdf.use_python_logging().
+    '''
+    log_prefix = None
+    if os.environ.get('PYMUPDF_USE_EXTRA') == '0':
+        log_prefix = f'.+Using non-default setting from PYMUPDF_USE_EXTRA: \'0\''
+    
+    if os.path.basename(__file__).startswith(f'test_fitz_'):
+        # Do nothing, because command `pymupdf` outputs diagnostics containing
+        # `pymupdf` which are not renamed to `fitz`, which breaks our checking.
+        print(f'Not testing with fitz alias.')
+        return
+    
+    def check(
+            code,
+            regexes_stdout,
+            regexes_stderr,
+            env = None,
+            ):
+        code = textwrap.dedent(code)
+        path = os.path.abspath(f'{__file__}/../../tests/resources_test_logging.py')
+        with open(path, 'w') as f:
+            f.write(code)
+        command = f'{sys.executable} {path}'
+        if env:
+            print(f'{env=}.')
+            env = os.environ | env
+        print(f'Running: {command}', flush=1)
+        try:
+            cp = subprocess.run(command, shell=1, check=1, capture_output=1, text=True, env=env)
+        except Exception as e:
+            print(f'Command failed: {command}.', flush=1)
+            print(f'Stdout\n{textwrap.indent(e.stdout, "    ")}', flush=1)
+            print(f'Stderr\n{textwrap.indent(e.stderr, "    ")}', flush=1)
+            raise
+        check_lines(regexes_stdout, cp.stdout)
+        check_lines(regexes_stderr, cp.stderr)
+    
+    print(f'## Basic use of `logging` sends output to stderr instead of default stdout.')
+    check(
+            '''
+            import pymupdf
+            pymupdf.message('this is pymupdf.message()')
+            pymupdf.log('this is pymupdf.log()')
+            pymupdf.set_messages(pylogging=1)
+            pymupdf.set_log(pylogging=1)
+            pymupdf.message('this is pymupdf.message() 2')
+            pymupdf.log('this is pymupdf.log() 2')
+            ''',
+            [
+                log_prefix,
+                'this is pymupdf.message[(][)]',
+                '.+this is pymupdf.log[(][)]',
+            ],
+            [
+                'this is pymupdf.message[(][)] 2',
+                '.+this is pymupdf.log[(][)] 2',
+            ],
+            )
+    
+    print(f'## Calling logging.basicConfig() makes logging output contain <LEVEL>:<name> prefixes.')
+    check(
+            '''
+            import pymupdf
+            
+            import logging
+            logging.basicConfig()
+            pymupdf.set_messages(pylogging=1)
+            pymupdf.set_log(pylogging=1)
+            
+            pymupdf.message('this is pymupdf.message()')
+            pymupdf.log('this is pymupdf.log()')
+            ''',
+            [
+                log_prefix,
+            ],
+            [
+                'WARNING:pymupdf:this is pymupdf.message[(][)]',
+                'WARNING:pymupdf:.+this is pymupdf.log[(][)]',
+            ],
+            )
+    
+    print(f'## Setting PYMUPDF_USE_PYTHON_LOGGING=1 makes PyMuPDF use logging on startup.')
+    check(
+            '''
+            import pymupdf
+            pymupdf.message('this is pymupdf.message()')
+            pymupdf.log('this is pymupdf.log()')
+            ''',
+            '',
+            [
+                log_prefix,
+                'this is pymupdf.message[(][)]',
+                '.+this is pymupdf.log[(][)]',
+            ],
+            env = dict(
+                    PYMUPDF_MESSAGE='logging:',
+                    PYMUPDF_LOG='logging:',
+                    ),
+            )
+    
+    print(f'## Pass explicit logger to pymupdf.use_python_logging() with logging.basicConfig().')
+    check(
+            '''
+            import pymupdf
+            
+            import logging
+            logging.basicConfig()
+            
+            logger = logging.getLogger('foo')
+            pymupdf.set_messages(pylogging_logger=logger, pylogging_level=logging.WARNING)
+            pymupdf.set_log(pylogging_logger=logger, pylogging_level=logging.ERROR)
+            
+            pymupdf.message('this is pymupdf.message()')
+            pymupdf.log('this is pymupdf.log()')
+            ''',
+            [
+                log_prefix,
+            ],
+            [
+                'WARNING:foo:this is pymupdf.message[(][)]',
+                'ERROR:foo:.+this is pymupdf.log[(][)]',
+            ],
+            )
+    
+    print(f'## Check pymupdf.set_messages() pylogging_level args.')
+    check(
+            '''
+            import pymupdf
+            
+            import logging
+            logging.basicConfig(level=logging.DEBUG)
+            logger = logging.getLogger('pymupdf')
+            
+            pymupdf.set_messages(pylogging_level=logging.CRITICAL)
+            pymupdf.set_log(pylogging_level=logging.INFO)
+            
+            pymupdf.message('this is pymupdf.message()')
+            pymupdf.log('this is pymupdf.log()')
+            ''',
+            [
+                log_prefix,
+            ],
+            [
+                'CRITICAL:pymupdf:this is pymupdf.message[(][)]',
+                'INFO:pymupdf:.+this is pymupdf.log[(][)]',
+            ],
+            )
+    
+    print(f'## Check messages() with sys.stdout=None.')
+    check(
+            '''
+            import sys
+            sys.stdout = None
+            import pymupdf
+            
+            pymupdf.message('this is pymupdf.message()')
+            pymupdf.log('this is pymupdf.log()')
+            ''',
+            [],
+            [],
+            )
+    
+
+def relpath(path, start=None):
+    '''
+    A 'safe' alternative to os.path.relpath(). Avoids an exception on Windows
+    if the drive needs to change - in this case we use os.path.abspath().
+    '''
+    try:
+        return os.path.relpath(path, start)
+    except ValueError:
+        # os.path.relpath() fails if trying to change drives.
+        assert platform.system() == 'Windows'
+        return os.path.abspath(path)
+
+
+def test_open():
+
+    if not hasattr(pymupdf, 'mupdf'):
+        print('test_open(): not running on classic.')
+        return
+    
+    import re
+    import textwrap
+    import traceback
+    
+    resources = relpath(os.path.abspath(f'{__file__}/../../tests/resources'))
+    
+    # We convert all strings to use `/` instead of os.sep, which avoids
+    # problems with regex's on windows.
+    resources = resources.replace(os.sep, '/')
+    
+    def check(filename=None, stream=None, filetype=None, exception=None):
+        '''
+        Checks we receive expected exception if specified.
+        '''
+        if isinstance(filename, str):
+            filename = filename.replace(os.sep, '/')
+        if exception:
+            etype, eregex = exception
+            if isinstance(eregex, (tuple, list)):
+                # Treat as sequence of regexes to look for.
+                eregex = '.*'.join(eregex)
+            try:
+                pymupdf.open(filename=filename, stream=stream, filetype=filetype)
+            except etype as e:
+                text = traceback.format_exc(limit=0)
+                text = text.replace(os.sep, '/')
+                text = textwrap.indent(text, '    ', lambda line: 1)
+                assert re.search(eregex, text, re.DOTALL), \
+                        f'Incorrect exception text, expected {eregex=}, received:\n{text}'
+                print(f'Received expected exception for {filename=} {stream=} {filetype=}:\n{text}')
+            except Exception as e:
+                assert 0, \
+                        f'Incorrect exception, expected {etype}, received {type(e)=}.'
+            else:
+                assert 0, f'Did not received exception, expected {etype=}. {filename=} {stream=} {filetype=} {exception=}'
+        else:
+            document = pymupdf.open(filename=filename, stream=stream, filetype=filetype)
+            return document
+    
+    check(f'{resources}/1.pdf')
+    
+    check(f'{resources}/Bezier.epub')
+    
+    path = 1234
+    etype = TypeError
+    eregex = re.escape(f'bad filename: type(filename)=<class \'int\'> filename={path}.')
+    check(path, exception=(etype, eregex))
+    
+    path = 'test_open-this-file-will-not-exist'
+    etype = pymupdf.FileNotFoundError
+    eregex = f'no such file: \'{path}\''
+    check(path, exception=(etype, eregex))
+    
+    path = resources
+    etype = pymupdf.FileDataError
+    eregex = re.escape(f'\'{path}\' is no file')
+    check(path, exception=(etype, eregex))
+    
+    path = relpath(os.path.abspath(f'{resources}/../test_open_empty'))
+    path = path.replace(os.sep, '/')
+    with open(path, 'w') as f:
+        pass
+    etype = pymupdf.EmptyFileError
+    eregex = re.escape(f'Cannot open empty file: filename={path!r}.')
+    check(path, exception=(etype, eregex))
+    
+    path = f'{resources}/1.pdf'
+    filetype = 'xps'
+    etype = pymupdf.FileDataError
+    # 2023-12-12: On OpenBSD, for some reason the SWIG catch code only catches
+    # the exception as FzErrorBase.
+    etype2 = 'FzErrorBase' if platform.system() == 'OpenBSD' else 'FzErrorFormat'
+    eregex = (
+            # With a sysinstall with separate MuPDF install, we get
+            # `mupdf.FzErrorFormat` instead of `pymupdf.mupdf.FzErrorFormat`. So
+            # we just search for the former.
+            re.escape(f'mupdf.{etype2}: code=7: cannot recognize zip archive'),
+            re.escape(f'pymupdf.FileDataError: Failed to open file {path!r} as type {filetype!r}.'),
+            )
+    check(path, filetype=filetype, exception=None)
+    
+    path = f'{resources}/chinese-tables.pickle'
+    etype = pymupdf.FileDataError
+    etype2 = 'FzErrorBase' if platform.system() == 'OpenBSD' else 'FzErrorUnsupported'
+    etext = (
+            re.escape(f'mupdf.{etype2}: code=6: cannot find document handler for file: {path}'),
+            re.escape(f'pymupdf.FileDataError: Failed to open file {path!r}.'),
+            )
+    check(path, exception=(etype, etext))
+    
+    stream = 123
+    etype = TypeError
+    etext = re.escape('bad stream: type(stream)=<class \'int\'>.')
+    check(stream=stream, exception=(etype, etext))
+    
+    check(stream=b'', exception=(pymupdf.EmptyFileError, re.escape('Cannot open empty stream.')))
+
+
+def test_open2():
+    '''
+    Checks behaviour of fz_open_document() and fz_open_document_with_stream()
+    with different filenames/magic values.
+    '''
+    if platform.system() == 'Windows':
+        print(f'test_open2(): not running on Windows because `git ls-files` known fail on Github Windows runners.')
+        return
+    
+    root = os.path.normpath(f'{__file__}/../..')
+    root = relpath(root)
+    
+    # Find tests/resources/test_open2.* input files/streams. We calculate
+    # paths relative to the PyMuPDF checkout directory <root>, to allow use
+    # of tests/resources/test_open2_expected.json regardless of the actual
+    # checkout directory.
+    print()
+    sys.path.append(root)
+    try:
+        import pipcl
+    finally:
+        del sys.path[0]
+    paths = pipcl.git_items(f'{root}/tests/resources')
+    paths = fnmatch.filter(paths, f'test_open2.*')
+    paths = [f'tests/resources/{i}' for i in paths]
+    
+    # Get list of extensions of input files.
+    extensions = set()
+    extensions.add('.txt')
+    extensions.add('')
+    for path in paths:
+        _, ext = os.path.splitext(path)
+        extensions.add(ext)
+    extensions = sorted(list(extensions))
+    
+    def get_result(e, document):
+        '''
+        Return fz_lookup_metadata(document, 'format') or [ERROR].
+        '''
+        if e:
+            return f'[error]'
+        else:
+            try:
+                return pymupdf.mupdf.fz_lookup_metadata2(document, 'format')
+            except Exception:
+                return ''
+    
+    def dict_set_path(dict_, *items):
+        for item in items[:-2]:
+            dict_ = dict_.setdefault(item, dict())
+        dict_[items[-2]] = items[-1]
+
+    results = dict()
+    
+    # Prevent warnings while we are running.
+    _g_out_message = pymupdf._g_out_message
+    pymupdf._g_out_message = None
+    try:
+        results = dict()
+        
+        for path in paths:
+            print(path)
+            for ext in extensions:
+                path2 = f'{root}/foo{ext}'
+                path3 = shutil.copy2(f'{root}/{path}', path2)
+                assert(path3 == path2)
+                
+                # Test fz_open_document().
+                e = None
+                document = None
+                try:
+                    document = pymupdf.mupdf.fz_open_document(path2)
+                except Exception as ee:
+                    e = ee
+                wt = pymupdf.TOOLS.mupdf_warnings()
+                text = get_result(e, document)
+                print(f'    fz_open_document({path2}) => {text}')
+                dict_set_path(results, path, ext, 'file', text)
+
+                # Test fz_open_document_with_stream().
+                e = None
+                document = None
+                with open(f'{root}/{path}', 'rb') as f:
+                    data = f.read()
+                stream = pymupdf.mupdf.fz_open_memory(pymupdf.mupdf.python_buffer_data(data), len(data))
+                try:
+                    document = pymupdf.mupdf.fz_open_document_with_stream(ext, stream)
+                except Exception as ee:
+                    e = ee
+                wt = pymupdf.TOOLS.mupdf_warnings()
+                text = get_result(e, document)
+                print(f'    fz_open_document_with_stream(magic={ext!r}) => {text}')
+                dict_set_path(results, path, ext, 'stream', text)
+                
+    finally:
+        pymupdf._g_out_message = _g_out_message
+    
+    # Create html table.
+    path_html = os.path.normpath(f'{__file__}/../../tests/test_open2.html')
+    with open(path_html, 'w') as f:
+        f.write(f'<html>\n')
+        f.write(f'<body>\n')
+        f.write(f'<p>{time.strftime("%F-%T")}\n')
+        f.write(f'<table border="1" style="border-collapse:collapse" cellpadding="4">\n')
+        f.write(f'<tr><td></td><th colspan="{len(extensions)}">Extension/magic')
+        f.write(f'<tr><th style="border-bottom: 4px solid black; border-right: 4px solid black;">Data file</th>')
+        for ext in extensions:
+            f.write(f'<th style="border-bottom: 4px solid black;">{ext}</th>')
+        f.write('\n')
+        for path in sorted(results.keys()):
+            _, ext = os.path.splitext(path)
+            f.write(f'<tr><th style="border-right: 4px solid black;">{os.path.basename(path)}</th>')
+            for ext2 in sorted(results[path].keys()):
+                text_file = results[path][ext2]['file']
+                text_stream = results[path][ext2]['stream']
+                b1, b2 = ('<b>', '</b>') if ext2==ext else ('', '')
+                if text_file == text_stream:
+                    if text_file == '[error]':
+                        f.write(f'<td><div style="color: #808080;">{b1}{text_file}{b2}</div></td>')
+                    else:
+                        f.write(f'<td>{b1}{text_file}{b2}</td>')
+                else:
+                    f.write(f'<td>file: {b1}{text_file}{b2}<br>')
+                    f.write(f'stream: {b1}{text_stream}{b2}</td>')
+            f.write('</tr>\n')
+        f.write(f'</table>\n')
+        f.write(f'/<body>\n')
+        f.write(f'</html>\n')
+    print(f'Have created: {path_html}')
+    
+    path_out = os.path.normpath(f'{__file__}/../../tests/test_open2.json')
+    with open(path_out, 'w') as f:
+        json.dump(results, f, indent=4, sort_keys=1)
+        
+    if pymupdf.mupdf_version_tuple >= (1, 26):
+        with open(os.path.normpath(f'{__file__}/../../tests/resources/test_open2_expected.json')) as f:
+            results_expected = json.load(f)
+        if results != results_expected:
+            print(f'results != results_expected:')
+            def show(r, name):
+                text = json.dumps(r, indent=4, sort_keys=1)
+                print(f'{name}:')
+                print(textwrap.indent(text, '    '))
+            show(results_expected, 'results_expected')
+            show(results, 'results')
+            assert 0
+    
+
+def test_533():
+    if not hasattr(pymupdf, 'mupdf'):
+        print('test_533(): Not running on classic.')
+        return
+    path = os.path.abspath(f'{__file__}/../../tests/resources/2.pdf')
+    doc = pymupdf.open(path)
+    print()
+    for p in doc:
+        print(f'test_533(): for p in doc: {p=}.')
+    for p in list(doc)[:]:
+        print(f'test_533(): for p in list(doc)[:]: {p=}.')
+    for p in doc[:]:
+        print(f'test_533(): for p in doc[:]: {p=}.')
+
+def test_3354():
+    document = pymupdf.open(filename)
+    v = dict(foo='bar')
+    document.metadata = v
+    assert document.metadata == v
+
+def test_scientific_numbers():
+    '''
+    This is #3381.
+    '''
+    doc = pymupdf.open()
+    page = doc.new_page(width=595, height=842)
+    point = pymupdf.Point(1e-11, -1e-10)
+    page.insert_text(point, "Test")
+    contents = page.read_contents()
+    print(f'{contents=}')
+    assert b" 1e-" not in contents
+
+def test_3615():
+    print('')
+    print(f'{pymupdf.pymupdf_version=}', flush=1)
+    print(f'{pymupdf.VersionBind=}', flush=1)
+    path = os.path.normpath(f'{__file__}/../../tests/resources/test_3615.epub')
+    doc = pymupdf.open(path)
+    print(doc.pagemode)
+    print(doc.pagelayout)
+    wt = pymupdf.TOOLS.mupdf_warnings()
+    assert wt
+
+def test_3654():
+    path = os.path.normpath(f'{__file__}/../../tests/resources/test_3654.docx')
+    content = ""
+    with pymupdf.open(path) as document:
+        for page in document:
+            content += page.get_text() + '\n\n'
+    content = content.strip()
+
+def test_3727():
+    path = os.path.normpath(f'{__file__}/../../tests/resources/test_3727.pdf')
+    doc = pymupdf.open(path)
+    for page in doc:
+        page.get_pixmap(matrix = pymupdf.Matrix(2,2))
+
+def test_3569():
+    path = os.path.normpath(f'{__file__}/../../tests/resources/test_3569.pdf')
+    document = pymupdf.open(path)
+    page = document[0]
+    svg = page.get_svg_image(text_as_path=False)
+    print(f'{svg=}')
+    if pymupdf.mupdf_version_tuple >= (1, 27):
+        assert svg == (
+                '<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" version="1.1" width="3024" height="2160" viewBox="0 0 3024 2160">\n'
+                '<defs>\n'
+                '<clipPath id="clip_1">\n'
+                '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" d="M25432 10909H29692V15642H25432V10909"/>\n'
+                '</clipPath>\n'
+                '<clipPath id="clip_2">\n'
+                '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" d="M28526 38017 31807 40376V40379L31312 41314V42889H28202L25092 42888V42887L28524 38017H28526"/>\n'
+                '</clipPath>\n'
+                '</defs>\n'
+                '<g clip-path="url(#clip_1)">\n'
+                '<g inkscape:groupmode="layer" inkscape:label="CED - Text">\n'
+                '<text xml:space="preserve" transform="matrix(.06 0 0 .06 3024 2160)" font-size="174.644" font-family="ArialMT"><tspan y="-28538" x="-14909 -14841.063 -14773.127 -14676.024 -14578.922 -14520.766 -14423.663">**L1-13</tspan></text>\n'
+                '</g>\n'
+                '</g>\n'
+                '<g clip-path="url(#clip_2)">\n'
+                '<g inkscape:groupmode="layer" inkscape:label="Level 03|S-COLS">\n'
+                '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" d="M31130 41483V42083L30530 41483ZM31130 42083 30530 41483V42083Z" fill="#7f7f7f"/>\n'
+                '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" stroke-linecap="butt" stroke-miterlimit="10" stroke-linejoin="miter" fill="none" stroke="#7f7f7f" d="M31130 41483V42083L30530 41483ZM31130 42083 30530 41483V42083Z"/>\n'
+                '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" stroke-width="9" stroke-linecap="round" stroke-linejoin="round" fill="none" stroke="#7f7f7f" d="M30530 41483H31130V42083H30530V41483"/>\n'
+                '</g>\n'
+                '</g>\n'
+                '</svg>\n'
+                )
+    else:
+        assert svg == (
+                '<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" version="1.1" width="3024" height="2160" viewBox="0 0 3024 2160">\n'
+                '<defs>\n'
+                '<clipPath id="clip_1">\n'
+                '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" d="M25432 10909H29692V15642H25432V10909"/>\n'
+                '</clipPath>\n'
+                '<clipPath id="clip_2">\n'
+                '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" d="M28526 38017 31807 40376V40379L31312 41314V42889H28202L25092 42888V42887L28524 38017H28526"/>\n'
+                '</clipPath>\n'
+                '</defs>\n'
+                '<g clip-path="url(#clip_1)">\n'
+                '<g inkscape:groupmode="layer" inkscape:label="CED - Text">\n'
+                '<text xml:space="preserve" transform="matrix(.06 0 0 .06 3024 2160)" font-size="174.644" font-family="ArialMT"><tspan y="-28538" x="-14909 -14841.063 -14773.127 -14676.024 -14578.922 -14520.766 -14423.663">**L1-13</tspan></text>\n'
+                '</g>\n'
+                '</g>\n'
+                '<g clip-path="url(#clip_2)">\n'
+                '<g inkscape:groupmode="layer" inkscape:label="Level 03|S-COLS">\n'
+                '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" d="M31130 41483V42083L30530 41483ZM31130 42083 30530 41483V42083Z" fill="#7f7f7f"/>\n'
+                '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" stroke-width="0" stroke-linecap="butt" stroke-miterlimit="10" stroke-linejoin="miter" fill="none" stroke="#7f7f7f" d="M31130 41483V42083L30530 41483ZM31130 42083 30530 41483V42083Z"/>\n'
+                '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" stroke-width="9" stroke-linecap="round" stroke-linejoin="round" fill="none" stroke="#7f7f7f" d="M30530 41483H31130V42083H30530V41483"/>\n'
+                '</g>\n'
+                '</g>\n'
+                '</svg>\n'
+                )
+    wt = pymupdf.TOOLS.mupdf_warnings()
+    assert wt == 'unknown cid collection: PDFAUTOCAD-Indentity0\nnon-embedded font using identity encoding: ArialMT (mapping via )\ninvalid marked content and clip nesting'
+
+def test_3450():
+    # This issue is a slow-down, so we just show time taken - it's not safe
+    # to fail if test takes too long because that can give spurious failures
+    # depending on hardware etc.
+    #
+    # On a mac-mini, PyMuPDF-1.24.8 takes 60s, PyMuPDF-1.24.9 takes 4s.
+    #
+    if os.environ.get('PYMUPDF_RUNNING_ON_VALGRIND') == '1':
+        print(f'test_3450(): not running on valgrind because very slow.', flush=1)
+        return
+    path = os.path.normpath(f'{__file__}/../../tests/resources/test_3450.pdf')
+    pdf = pymupdf.open(path)
+    page = pdf[0]
+    t = time.time()
+    pix = page.get_pixmap(alpha=False, dpi=150)
+    t = time.time() - t
+    print(f'test_3450(): {t=}')
+
+def test_3859():
+    print(f'{pymupdf.mupdf.PDF_NULL=}.')
+    print(f'{pymupdf.mupdf.PDF_TRUE=}.')
+    print(f'{pymupdf.mupdf.PDF_FALSE=}.')
+    for name in ('NULL', 'TRUE', 'FALSE'):
+        name2 = f'PDF_{name}'
+        v = getattr(pymupdf.mupdf, name2)
+        print(f'{name=} {name2=} {v=} {type(v)=}')
+        assert type(v)==pymupdf.mupdf.PdfObj, f'`v` is not a pymupdf.mupdf.PdfObj.'
+
+def test_3905():
+    data = b'A,B,C,D\r\n1,2,1,2\r\n2,2,1,2\r\n'
+    try:
+        document = pymupdf.open(stream=data, filetype='pdf')
+    except pymupdf.FileDataError as e:
+        print(f'test_3905(): e: {e}')
+    else:
+        assert 0
+    wt = pymupdf.TOOLS.mupdf_warnings()
+    if pymupdf.mupdf_version_tuple >= (1, 26):
+        assert wt == 'format error: cannot find version marker\ntrying to repair broken xref\nrepairing PDF document'
+    else:
+        assert wt == 'format error: cannot recognize version marker\ntrying to repair broken xref\nrepairing PDF document'
+
+def test_3624():
+    path = os.path.normpath(f'{__file__}/../../tests/resources/test_3624.pdf')
+    path_png_expected = os.path.normpath(f'{__file__}/../../tests/resources/test_3624_expected.png')
+    path_png = os.path.normpath(f'{__file__}/../../tests/test_3624.png')
+    with pymupdf.open(path) as document:
+        page = document[0]
+        pixmap = page.get_pixmap(matrix=pymupdf.Matrix(2, 2))
+        print(f'Saving to {path_png=}.')
+        pixmap.save(path_png)
+        rms = gentle_compare.pixmaps_rms(path_png_expected, path_png)
+        print(f'{rms=}')
+        # We get small differences in sysinstall tests, where some thirdparty
+        # libraries can differ.
+        if rms > 1:
+            pixmap_diff = gentle_compare.pixmaps_diff(path_png_expected, path_png)
+            path_png_diff = os.path.normpath(f'{__file__}/../../tests/test_3624_diff.png')
+            pixmap_diff.save(path_png_diff)
+            assert 0, f'{rms=}'
+
+
+def test_4043():
+    path = os.path.normpath(f'{__file__}/../../tests/resources/test_4043.pdf')
+    doc = pymupdf.open(path)
+    doc.fullcopy_page(1)
+
+
+def test_4018():
+    document = pymupdf.open()
+    for page in document.pages(-1, -1):
+        pass
+
+def test_4034():
+    # tests/resources/test_4034.pdf is first two pages of input file in
+    # https://github.com/pymupdf/PyMuPDF/issues/4034.
+    path = os.path.normpath(f'{__file__}/../../tests/resources/test_4034.pdf')
+    path_clean = os.path.normpath(f'{__file__}/../../tests/test_4034_out.pdf')
+    with pymupdf.open(path) as document:
+        pixmap1 = document[0].get_pixmap()
+        document.save(path_clean, clean=1)
+    with pymupdf.open(path_clean) as document:
+        page = document[0]
+        pixmap2 = document[0].get_pixmap()
+    rms = gentle_compare.pixmaps_rms(pixmap1, pixmap2)
+    print(f'test_4034(): Comparison of original/cleaned page 0 pixmaps: {rms=}.')
+    if pymupdf.mupdf_version_tuple < (1, 25, 2):
+        assert 30 < rms < 50
+    else:
+        assert rms == 0
+
+def test_4309():
+    document = pymupdf.open()
+    page = document.new_page()
+    document.delete_page()
+
+def test_4263():
+    path = os.path.normpath(f'{__file__}/../../tests/resources/test_4263.pdf')
+    path_out = f'{path}.linerarized.pdf'
+    command = f'pymupdf clean -linear {path} {path_out}'
+    print(f'Running: {command}')
+    cp = subprocess.run(command, shell=1, check=0)
+    if pymupdf.mupdf_version_tuple < (1, 26):
+        assert cp.returncode == 0
+    else:
+        # Support for linerarisation dropped in MuPDF-1.26.
+        assert cp.returncode
+
+def test_4224():
+    path = os.path.normpath(f'{__file__}/../../tests/resources/test_4224.pdf')
+    with pymupdf.open(path) as document:
+        for page in document.pages():
+            pixmap = page.get_pixmap(dpi=150)
+            path_pixmap = f'{path}.{page.number}.png'
+            pixmap.save(path_pixmap)
+            print(f'Have created: {path_pixmap}')
+    if pymupdf.mupdf_version_tuple < (1, 25, 5):
+        wt = pymupdf.TOOLS.mupdf_warnings()
+        assert wt == 'format error: negative code in 1d faxd\npadding truncated image'
+
+def test_4319():
+    # Have not seen this test reproduce issue #4319, but keeping it anyway.
+    path = os.path.normpath(f'{__file__}/../../tests/resources/test_4319.pdf')
+    doc = pymupdf.open()
+    page = doc.new_page()
+    page.insert_text((10, 100), "some text")
+    doc.save(path)
+    doc.close()
+    doc = pymupdf.open(path)
+    page = doc[0]
+    pc = doc.page_count
+    doc.close()
+    os.remove(path)
+    print(f"removed {doc.name=}")
+
+def test_3886():
+    path = os.path.normpath(f'{__file__}/../../tests/resources/test_3886.pdf')
+    path_clean0 = os.path.normpath(f'{__file__}/../../tests/resources/test_3886_clean0.pdf')
+    path_clean1 = os.path.normpath(f'{__file__}/../../tests/resources/test_3886_clean1.pdf')
+    
+    with pymupdf.open(path) as document:
+        pixmap = document[0].get_pixmap()
+        document.save(path_clean0, clean=0)
+    
+    with pymupdf.open(path) as document:
+        document.save(path_clean1, clean=1)
+    
+    with pymupdf.open(path_clean0) as document:
+        pixmap_clean0 = document[0].get_pixmap()
+    
+    with pymupdf.open(path_clean1) as document:
+        pixmap_clean1 = document[0].get_pixmap()
+    
+    rms_0 = gentle_compare.pixmaps_rms(pixmap, pixmap_clean0)
+    rms_1 = gentle_compare.pixmaps_rms(pixmap, pixmap_clean1)
+    print(f'test_3886(): {rms_0=} {rms_1=}')
+
+def test_4415():
+    path = os.path.normpath(f'{__file__}/../../tests/resources/test_4415.pdf')
+    path_out = os.path.normpath(f'{__file__}/../../tests/resources/test_4415_out.png')
+    path_out_expected = os.path.normpath(f'{__file__}/../../tests/resources/test_4415_out_expected.png')
+    with pymupdf.open(path) as document:
+        page = document[0]
+        rot = page.rotation
+        orig = pymupdf.Point(100, 100)  # apparent insertion point
+        text = 'Text at Top-Left'
+        mrot = page.derotation_matrix  # matrix annihilating page rotation
+        page.insert_text(orig * mrot, text, fontsize=60, rotate=rot)
+        pixmap = page.get_pixmap()
+        pixmap.save(path_out)
+        rms = gentle_compare.pixmaps_rms(path_out_expected, path_out)
+        assert rms == 0, f'{rms=}'
+
+def test_4466():
+    path = os.path.normpath(f'{__file__}/../../tests/test_4466.pdf')
+    with pymupdf.Document(path) as document:
+        for page in document:
+            print(f'{page=}', flush=1)
+            pixmap = page.get_pixmap(clip=(0, 0, 10, 10))
+            print(f'{pixmap.n=} {pixmap.size=} {pixmap.stride=} {pixmap.width=} {pixmap.height=} {pixmap.x=} {pixmap.y=}', flush=1)
+            pixmap.is_unicolor  # Used to crash.
+
+
+def test_4479():
+    # This passes with pymupdf-1.24.14, fails with pymupdf==1.25.*, passes with
+    # pymupdf-1.26.0.
+    print()
+    path = os.path.normpath(f'{__file__}/../../tests/resources/test_4479.pdf')
+    with pymupdf.open(path) as document:
+        
+        def show(items):
+            for item in items:
+                print(f'    {repr(item)}')
+        
+        items = document.layer_ui_configs()
+        show(items)
+        assert items == [
+                {'depth': 0, 'locked': 0, 'number': 0, 'on': 1, 'text': 'layer_0', 'type': 'checkbox'},
+                {'depth': 0, 'locked': 0, 'number': 1, 'on': 1, 'text': 'layer_1', 'type': 'checkbox'},
+                {'depth': 0, 'locked': 0, 'number': 2, 'on': 0, 'text': 'layer_2', 'type': 'checkbox'},
+                {'depth': 0, 'locked': 0, 'number': 3, 'on': 1, 'text': 'layer_3', 'type': 'checkbox'},
+                {'depth': 0, 'locked': 0, 'number': 4, 'on': 1, 'text': 'layer_4', 'type': 'checkbox'},
+                {'depth': 0, 'locked': 0, 'number': 5, 'on': 1, 'text': 'layer_5', 'type': 'checkbox'},
+                {'depth': 0, 'locked': 0, 'number': 6, 'on': 1, 'text': 'layer_6', 'type': 'checkbox'},
+                {'depth': 0, 'locked': 0, 'number': 7, 'on': 1, 'text': 'layer_7', 'type': 'checkbox'},
+                ]
+        
+        document.set_layer_ui_config(0, pymupdf.PDF_OC_OFF)
+        items = document.layer_ui_configs()
+        show(items)
+        assert items == [
+                {'depth': 0, 'locked': 0, 'number': 0, 'on': 0, 'text': 'layer_0', 'type': 'checkbox'},
+                {'depth': 0, 'locked': 0, 'number': 1, 'on': 1, 'text': 'layer_1', 'type': 'checkbox'},
+                {'depth': 0, 'locked': 0, 'number': 2, 'on': 0, 'text': 'layer_2', 'type': 'checkbox'},
+                {'depth': 0, 'locked': 0, 'number': 3, 'on': 1, 'text': 'layer_3', 'type': 'checkbox'},
+                {'depth': 0, 'locked': 0, 'number': 4, 'on': 1, 'text': 'layer_4', 'type': 'checkbox'},
+                {'depth': 0, 'locked': 0, 'number': 5, 'on': 1, 'text': 'layer_5', 'type': 'checkbox'},
+                {'depth': 0, 'locked': 0, 'number': 6, 'on': 1, 'text': 'layer_6', 'type': 'checkbox'},
+                {'depth': 0, 'locked': 0, 'number': 7, 'on': 1, 'text': 'layer_7', 'type': 'checkbox'},
+                ]
+        
+
+def test_4533():
+    print()
+    path = util.download(
+            'https://github.com/user-attachments/files/20497146/NineData_user_manual_V3.0.5.pdf',
+            'test_4533.pdf',
+            size=16864501,
+            )
+    # This bug is a segv so we run the test in a child process.
+    command = f'{sys.executable} -c "import pymupdf; document = pymupdf.open({path!r}); print(len(document))"'
+    print(f'Running: {command}')
+    cp = subprocess.run(command, shell=1, check=0)
+    e = cp.returncode
+    print(f'{e=}')
+    if pymupdf.mupdf_version_tuple >= (1, 26, 6):
+        assert e == 0
+    else:
+        assert e != 0
+
+
+def test_4564():
+    path = os.path.normpath(f'{__file__}/../../tests/resources/test_4564.pdf')
+    print()
+    with pymupdf.open(path) as document:
+        for key in sorted(document.metadata.keys()):
+            value = document.metadata[key]
+            print(f'{key}: {value!r}')
+    if pymupdf.mupdf_version_tuple >= (1, 27):
+        assert document.metadata['producer'] == 'Adobe PSL 1.3e for Canon\x00'
+    else:
+        assert document.metadata['producer'] == 'Adobe PSL 1.3e for Canon\udcc0\udc80'
+
+
+def test_4496():
+    path = os.path.normpath(f'{__file__}/../../tests/resources/test_4496.hwpx')
+    with pymupdf.open(path) as document:
+        print(document.page_count)
+
+
+def test_gitinfo():
+    # This doesn't really test very much, but can be useful to see the current
+    # values.
+    print('')
+    print(f'test_4496():')
+    print(f'{pymupdf.mupdf_location=}')
+    print(f'{pymupdf.mupdf_version=}')
+    print(f'{pymupdf.pymupdf_git_branch=}')
+    print(f'{pymupdf.pymupdf_git_sha=}')
+    print(f'{pymupdf.pymupdf_version=}')
+    print(f'pymupdf.pymupdf_git_diff:\n{textwrap.indent(pymupdf.pymupdf_git_diff, "    ")}')
+    
+
+def test_4392():
+    print()
+    path = os.path.normpath(f'{__file__}/../../tests/test_4392.py')
+    with open(path, 'w') as f:
+        f.write('import pymupdf\n')
+    
+    command = f'pytest {path}'
+    print(f'Running: {command}', flush=1)
+    e1 = subprocess.run(command, shell=1, check=0).returncode
+    print(f'{e1=}')
+    
+    command = f'pytest -Werror {path}'
+    print(f'Running: {command}', flush=1)
+    e2 = subprocess.run(command, shell=1, check=0).returncode
+    print(f'{e2=}')
+    
+    command = f'{sys.executable} -Werror -c "import pymupdf"'
+    print(f'Running: {command}', flush=1)
+    e3 = subprocess.run(command, shell=1, check=0).returncode
+    print(f'{e3=}')
+    
+    print(f'{e1=} {e2=} {e3=}')
+    
+    print(f'{pymupdf.swig_version=}')
+    print(f'{pymupdf.swig_version_tuple=}')
+    
+    assert e1 == 5
+    if pymupdf.swig_version_tuple >= (4, 4):
+        assert e2 == 5
+        assert e3 == 0
+    else:
+        # We get SEGV's etc with older swig.
+        if platform.system() == 'Windows':
+            assert (e2, e3) == (0xc0000005, 0xc0000005)
+        else:
+            # On plain linux we get (139, 139). On manylinux we get (-11,
+            # -11). On MacOS we get (-11, -11).
+            assert (e2, e3) == (139, 139) or (e2, e3) == (-11, -11)
+
+
+def test_4639():
+    path = os.path.normpath(f'{__file__}/../../tests/resources/test_4639.pdf')
+    with pymupdf.open(path) as document:
+        page = document[-1]
+        page.get_bboxlog(layers=True)
+
+
+def test_4590():
+
+    # Create test PDF.
+    path = os.path.normpath(f'{__file__}/../../tests/test_4590.pdf')
+    with pymupdf.open() as document:
+        page = document.new_page()
+        
+        # Add some text
+        text = 'This PDF contains a file attachment annotation.'
+        page.insert_text((72, 72), text, fontsize=12)
+
+        # Create a sample file.
+        path_sample = os.path.normpath(f'{__file__}/../../tests/test_4590_annotation_sample.txt')
+        with open(path_sample, 'w') as f:
+            f.write('This is a sample attachment file.')
+
+        # Read file as bytes
+        with open(path_sample, 'rb') as f:
+            sample = f.read()
+
+        # Define annotation position (rect or point)
+        annot_pos = pymupdf.Rect(72, 100, 92, 120)  # PushPin icon rectangle
+
+        # Add the file attachment annotation
+        page.add_file_annot(
+                point = annot_pos,
+                buffer_ = sample,
+                filename = 'sample.txt',
+                ufilename = 'sample.txt',
+                desc = 'A test attachment file.',
+                icon = 'PushPin',
+                )
+
+        # Save the PDF
+        document.save(path)
+    
+    # Check pymupdf.Document.scrub() works.
+    with pymupdf.open(path) as document:
+        document.scrub()
author	Franz Glasner <fzglas.hg@dom66.de>
date	Mon, 15 Sep 2025 11:37:51 +0200
parents
children	a6bc019ac0b2