diff mupdf-source/scripts/mutool.py @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/scripts/mutool.py	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,384 @@
+#!/usr/bin/env python3
+
+'''
+Intended to behaves exactly like mutool, but uses the mupdf python => C++ =>
+mupdf.so wrappers.
+
+The code is intended to be similar to the mutool C code, to simplify
+comparison.
+'''
+
+import getopt
+import os
+import sys
+import textwrap
+
+if os.environ.get('MUPDF_PYTHON') in ('swig', None):
+    # PYTHONPATH should have been set up to point to a build/shared-*/
+    # directory containing mupdf.so generated by scripts/mupdfwrap.py and SWIG.
+    import mupdf
+elif os.environ.get('MUPDF_PYTHON') == 'cppyy':
+    sys.path.insert(0, os.path.abspath(f'{__file__}/../../platform/python'))
+    import mupdf_cppyy
+    del sys.path[0]
+    mupdf = mupdf_cppyy.cppyy.gbl.mupdf
+else:
+    raise Exception(f'Unrecognised $MUPDF_PYTHON: {os.environ.get("MUPDF_PYTHON")}')
+
+def usage():
+    print( textwrap.dedent('''
+            usage: mutool.py <command> [options]
+            \tclean\t-- rewrite pdf file
+            \tconvert\t-- convert document
+            \ttrace\t-- trace device calls
+            \tdraw\t-- convert document
+            '''))
+
+
+# Things for clean
+#
+def clean_usage():
+    print(textwrap.dedent(
+            f'''
+            usage: mutool clean [options] input.pdf [output.pdf] [pages]
+            \t-p -\tpassword
+            \t-g\tgarbage collect unused objects
+            \t-gg\tin addition to -g compact xref table
+            \t-ggg\tin addition to -gg merge duplicate objects
+            \t-gggg\tin addition to -ggg check streams for duplication
+            \t-l\tlinearize PDF
+            \t-D\tsave file without encryption
+            \t-E -\tsave file with new encryption (rc4-40, rc4-128, aes-128, or aes-256)
+            \t-O -\towner password (only if encrypting)
+            \t-U -\tuser password (only if encrypting)
+            \t-P -\tpermission flags (only if encrypting)
+            \t-a\tascii hex encode binary streams
+            \t-d\tdecompress streams
+            \t-z\tdeflate uncompressed streams
+            \t-f\tcompress font streams
+            \t-i\tcompress image streams
+            \t-c\tclean content streams
+            \t-s\tsanitize content streams
+            \t-A\tcreate appearance streams for annotations
+            \t-AA\trecreate appearance streams for annotations
+            \tpages\tcomma separated list of page numbers and ranges
+            '''
+            ))
+    sys.exit(1)
+
+def clean(argv):
+    outfile = 'out.pdf'
+    password = ''
+    opts = mupdf.PdfCleanOptions()
+    opts.write.do_garbage += 1
+    errors = 0
+    items, argv = getopt.getopt( argv, 'adfgilp:sczDAE:O:U:P:')
+    for option, value in items:
+         if 0:   pass   # lgtm [py/unreachable-statement]
+         elif option == '-p': password = value
+         elif option == '-d': opts.write.do_decompress += 1
+         elif option == '-z': opts.write.do_compress += 1
+         elif option == '-f': opts.write.do_compress_fonts += 1
+         elif option == '-i': opts.write.do_compress_images += 1
+         elif option == '-a': opts.write.do_ascii += 1
+         elif option == '-g': opts.write.do_garbage += 1
+         elif option == '-l': opts.write.do_linear += 1
+         elif option == '-c': opts.write.do_clean += 1
+         elif option == '-s': opts.write.do_sanitize += 1
+         elif option == '-A': opts.write.do_appearance += 1
+         elif option == '-D': opts.write.do_encrypt = PDF_ENCRYPT_NONE
+         elif option == '-E': opts.write.do_encrypt = encrypt_method_from_string(value)
+         elif option == '-P': opts.write.permissions = int(value)
+         elif option == '-O': opts.write.opwd_utf8 = value[:128]
+         elif option == '-U': opts.write.upwd_utf8 = value[:128]
+         else:
+            clean_usage()
+
+    if (opts.write.do_ascii or opts.write.do_decompress) and not opts.write.do_compress:
+        opts.write.do_pretty = 1
+
+    if not argv:
+        clean_usage()
+
+    infile = argv.pop(0)
+
+    if argv and '.pdf' in argv[0].lower():
+        outfile = argv.pop(0)
+
+    try:
+        mupdf.pdf_clean_file(infile, outfile, password, opts, argv)
+    except Exception as e:
+        print( f'mupdf.pdf_clean_file() failed: {e}')
+        errors += 1
+        if 0:
+            # Enable for debugging.
+            import traceback
+            traceback.print_exc()
+    return errors != 0;
+
+
+
+# Things for draw.
+#
+
+import mutool_draw
+
+draw = mutool_draw.draw
+
+
+
+# Things for convert.
+#
+
+
+def convert_usage():
+    print( textwrap.dedent(
+            f'''
+            mutool convert version {mupdf.FZ_VERSION}
+            Usage: mutool convert [options] file [pages]
+            \t-p -\tpassword
+
+            \t-A -\tnumber of bits of antialiasing (0 to 8)
+            \t-W -\tpage width for EPUB layout
+            \t-H -\tpage height for EPUB layout
+            \t-S -\tfont size for EPUB layout
+            \t-U -\tfile name of user stylesheet for EPUB layout
+            \t-X\tdisable document styles for EPUB layout
+
+            \t-o -\toutput file name (%d for page number)
+            \t-F -\toutput format (default inferred from output file name)
+            \t\t\traster: cbz, png, pnm, pgm, ppm, pam, pbm, pkm.
+            \t\t\tprint-raster: pcl, pclm, ps, pwg.
+            \t\t\tvector: pdf, svg.
+            \t\t\ttext: html, xhtml, text, stext.
+            \t-O -\tcomma separated list of options for output format
+
+            \tpages\tcomma separated list of page ranges (N=last page)
+            '''
+        ))
+    print( mupdf.fz_draw_options_usage)
+    print( mupdf.fz_pcl_write_options_usage)
+    print( mupdf.fz_pclm_write_options_usage)
+    print( mupdf.fz_pwg_write_options_usage)
+    print( mupdf.fz_stext_options_usage)
+    print( mupdf.fz_pdf_write_options_usage)
+    print( mupdf.fz_svg_write_options_usage)
+    sys.exit(1)
+
+
+def convert_runpage( doc, number, out):
+    page = mupdf.FzPage( doc, number - 1)
+    mediabox = page.fz_bound_page()
+    dev = out.fz_begin_page(mediabox)
+    page.fz_run_page( dev, mupdf.FzMatrix(mupdf.fz_identity), mupdf.FzCookie())
+    out.fz_end_page()
+
+def convert_runrange( doc, count, range_, out):
+    start = None
+    end = None
+    while 1:
+        range_, start, end = mupdf.fz_parse_page_range( range_, count)
+        if range_ is None:
+            break
+        step = +1 if end > start else -1
+        for i in range( start, end, step):
+            convert_runpage( doc, i, out)
+
+def convert( argv):
+    # input options
+    password = ''
+    alphabits = 8
+    layout_w = mupdf.FZ_DEFAULT_LAYOUT_W
+    layout_h = mupdf.FZ_DEFAULT_LAYOUT_H
+    layout_em = mupdf.FZ_DEFAULT_LAYOUT_EM
+    layout_css = None
+    layout_use_doc_css = 1
+
+    # output options
+    output = None
+    format_ = None
+    options = ''
+
+    items, argv = getopt.getopt( argv, 'p:A:W:H:S:U:Xo:F:O:')
+    for option, value in items:
+        if 0: pass  # lgtm [py/unreachable-statement]
+        elif option == '-p':    password = value
+        elif option == '-A':    alphabits = int(value)
+        elif option == '-W':    layout_w = float( value)
+        elif option == '-H':    layout_h = float( value)
+        elif option == '-S':    layout_em = float( value)
+        elif option == '-U':    layout_css = value
+        elif option == '-X':    layout_use_doc_css = 0
+        elif option == '-o':    output = value
+        elif option == '-F':    format_ = value
+        elif option == '-O':    options = value
+        else:   assert 0
+
+    if not argv or (not format_ and not output):
+        convert_usage()
+
+    mupdf.fz_set_aa_level( alphabits)
+    if layout_css:
+        buf = mupdf.FzBuffer( layout_css)
+        mupdf.fz_set_user_css( buf.string_from_buffer())
+
+    mupdf.fz_set_use_document_css(layout_use_doc_css)
+
+    if format_:
+        out = mupdf.FzDocumentWriter( output, format_, options)
+    else:
+        out = mupdf.FzDocumentWriter( output, options, mupdf.FzDocumentWriter.OutputType_PDF)
+
+    i = 0
+    while 1:
+        if i >= len( argv):
+            break
+        arg = argv[i]
+        doc = mupdf.FzDocument( arg)
+        if doc.fz_needs_password():
+            if not doc.fz_authenticate_password( password):
+                raise Exception( f'cannot authenticate password: {arg}')
+        doc.fz_layout_document( layout_w, layout_h, layout_em)
+        count = doc.fz_count_pages()
+
+        range_ = '1-N'
+        if i + 1 < len(argv) and mupdf.fz_is_page_range(ctx, argv[i+1]):
+            i += 1
+            range_ = argv[i]
+        convert_runrange( doc, count, range_, out)
+        i += 1
+
+    out.fz_close_document_writer()
+
+
+
+# Things for trace.
+#
+
+def trace_usage():
+    print( textwrap.dedent('''
+            Usage: mutool trace [options] file [pages]
+            \t-p -\tpassword
+
+            \t-W -\tpage width for EPUB layout
+            \t-H -\tpage height for EPUB layout
+            \t-S -\tfont size for EPUB layout
+            \t-U -\tfile name of user stylesheet for EPUB layout
+            \t-X\tdisable document styles for EPUB layout
+
+            \t-d\tuse display list
+
+            \tpages\tcomma separated list of page numbers and ranges
+            '''))
+    sys.exit( 1)
+
+def trace_runpage( use_display_list, doc, number):
+    page = mupdf.FzPage( doc, number-1)
+    mediabox = page.fz_bound_page()
+    print( f'<page number="{number}" mediabox="{mediabox.x0} {mediabox.y0} {mediabox.x1} {mediabox.y1}">')
+    output = mupdf.FzOutput( mupdf.FzOutput.Fixed_STDOUT)
+    dev = mupdf.FzDevice( output)
+    if use_display_list:
+        list_ = mupdf.FzDisplayList( page)
+        list_.fz_run_display_list( dev, mupdf.FzMatrix(mupdf.fz_identity), mupdf.FzRect(mupdf.fz_infinite_rect), mupdf.FzCookie())
+    else:
+        page.fz_run_page( dev, mupdf.FzMatrix(mupdf.fz_identity), mupdf.FzCookie())
+    output.fz_close_output()
+    print( '</page>')
+
+def trace_runrange( use_display_list, doc, count, range_):
+    start = None
+    end = None
+    while 1:
+        range_, start, end = mupdf.fz_parse_page_range( range_, count)
+        print(f'range_={range_!r} start={start} end={end}')
+        if range_ is None:
+            break
+        step = +1 if end > start else -1
+        for i in range( start, end, step):
+            trace_runpage( use_display_list, doc, i)
+
+def trace( argv):
+
+    password = ''
+    layout_w = mupdf.FZ_DEFAULT_LAYOUT_W
+    layout_h = mupdf.FZ_DEFAULT_LAYOUT_H
+    layout_em = mupdf.FZ_DEFAULT_LAYOUT_EM
+    layout_css = None
+    layout_use_doc_css = 1
+
+    use_display_list = 0
+
+    argv_i = 0
+    while 1:
+        arg = argv[ argv_i]
+        if arg == '-p':
+            password = next( opt)
+        elif arg == '-W':
+            argv_i += 1
+            layout_w = float( argv[argv_i])
+        elif arg == '-H':
+            argv_i += 1
+            layout_h = float( argv[argv_i])
+        elif arg == '-S':
+            argv_i += 1
+            layout_em = float( argv[argv_i])
+        elif arg == '-U':
+            argv_i += 1
+            layout_css = argv[argv_i]
+        elif arg == '-X':
+            layout_use_doc_css = 0
+        elif arg == '-d':
+            use_display_list = 1
+        else:
+            break
+        argv_i += 1
+
+    if argv_i == len( argv):
+        trace_usage()
+
+    if layout_css:
+        buffer_ = mupdf.FzBuffer( layout_css)
+        mupdf.fz_set_user_css( buffer_.string_from_buffer())
+
+    mupdf.fz_set_use_document_css( layout_use_doc_css)
+
+    for argv_i in range( argv_i, len( argv)):
+        arg = argv[ argv_i]
+        doc = mupdf.FzDocument( arg)
+        if doc.fz_needs_password():
+            doc.fz_authenticate_password( password)
+        doc.fz_layout_document( layout_w, layout_h, layout_em)
+        print( f'<document filename="{arg}">')
+        count = doc.fz_count_pages()
+        if argv_i + 1 < len( argv) and mupdf.fz_is_page_range( argv[ argv_i+1]):
+            argv_i += 1
+            trace_runrange( use_display_list, doc, count, argv[ argv_i])
+        else:
+            trace_runrange( use_display_list, doc, count, '1-N')
+        print( '</document>')
+
+
+
+def main( argv):
+    arg1 = argv[1]
+    fn = getattr( sys.modules[__name__], arg1, None)
+    if not fn:
+        print( f'cannot find {arg1}')
+        usage()
+        sys.exit(1)
+
+    return fn( argv[2:])
+
+
+if __name__ == '__main__':
+    try:
+        e = main( sys.argv)
+        sys.exit(e)
+    except Exception as e:
+        if 0:   # Enable when debugging.
+            sys.stdout.flush()
+            sys.stderr.flush()
+            print(f'Exception: {e}')
+            sys.stdout.flush()
+        raise