view mupdf-source/scripts/mutool.py @ 40:aa33339d6b8a upstream

ADD: MuPDF v1.26.10: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.5.
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 11 Oct 2025 11:31:38 +0200
parents b50eed0cc0ef
children
line wrap: on
line source

#!/usr/bin/env python3

'''
Intended to behaves exactly like mutool, but uses the mupdf python => C++ =>
mupdf.so wrappers.

The code is intended to be similar to the mutool C code, to simplify
comparison.
'''

import getopt
import os
import sys
import textwrap

if os.environ.get('MUPDF_PYTHON') in ('swig', None):
    # PYTHONPATH should have been set up to point to a build/shared-*/
    # directory containing mupdf.so generated by scripts/mupdfwrap.py and SWIG.
    import mupdf
elif os.environ.get('MUPDF_PYTHON') == 'cppyy':
    sys.path.insert(0, os.path.abspath(f'{__file__}/../../platform/python'))
    import mupdf_cppyy
    del sys.path[0]
    mupdf = mupdf_cppyy.cppyy.gbl.mupdf
else:
    raise Exception(f'Unrecognised $MUPDF_PYTHON: {os.environ.get("MUPDF_PYTHON")}')

def usage():
    print( textwrap.dedent('''
            usage: mutool.py <command> [options]
            \tclean\t-- rewrite pdf file
            \tconvert\t-- convert document
            \ttrace\t-- trace device calls
            \tdraw\t-- convert document
            '''))


# Things for clean
#
def clean_usage():
    print(textwrap.dedent(
            f'''
            usage: mutool clean [options] input.pdf [output.pdf] [pages]
            \t-p -\tpassword
            \t-g\tgarbage collect unused objects
            \t-gg\tin addition to -g compact xref table
            \t-ggg\tin addition to -gg merge duplicate objects
            \t-gggg\tin addition to -ggg check streams for duplication
            \t-l\tlinearize PDF
            \t-D\tsave file without encryption
            \t-E -\tsave file with new encryption (rc4-40, rc4-128, aes-128, or aes-256)
            \t-O -\towner password (only if encrypting)
            \t-U -\tuser password (only if encrypting)
            \t-P -\tpermission flags (only if encrypting)
            \t-a\tascii hex encode binary streams
            \t-d\tdecompress streams
            \t-z\tdeflate uncompressed streams
            \t-f\tcompress font streams
            \t-i\tcompress image streams
            \t-c\tclean content streams
            \t-s\tsanitize content streams
            \t-A\tcreate appearance streams for annotations
            \t-AA\trecreate appearance streams for annotations
            \tpages\tcomma separated list of page numbers and ranges
            '''
            ))
    sys.exit(1)

def clean(argv):
    outfile = 'out.pdf'
    password = ''
    opts = mupdf.PdfCleanOptions()
    opts.write.do_garbage += 1
    errors = 0
    items, argv = getopt.getopt( argv, 'adfgilp:sczDAE:O:U:P:')
    for option, value in items:
         if 0:   pass   # lgtm [py/unreachable-statement]
         elif option == '-p': password = value
         elif option == '-d': opts.write.do_decompress += 1
         elif option == '-z': opts.write.do_compress += 1
         elif option == '-f': opts.write.do_compress_fonts += 1
         elif option == '-i': opts.write.do_compress_images += 1
         elif option == '-a': opts.write.do_ascii += 1
         elif option == '-g': opts.write.do_garbage += 1
         elif option == '-l': opts.write.do_linear += 1
         elif option == '-c': opts.write.do_clean += 1
         elif option == '-s': opts.write.do_sanitize += 1
         elif option == '-A': opts.write.do_appearance += 1
         elif option == '-D': opts.write.do_encrypt = PDF_ENCRYPT_NONE
         elif option == '-E': opts.write.do_encrypt = encrypt_method_from_string(value)
         elif option == '-P': opts.write.permissions = int(value)
         elif option == '-O': opts.write.opwd_utf8 = value[:128]
         elif option == '-U': opts.write.upwd_utf8 = value[:128]
         else:
            clean_usage()

    if (opts.write.do_ascii or opts.write.do_decompress) and not opts.write.do_compress:
        opts.write.do_pretty = 1

    if not argv:
        clean_usage()

    infile = argv.pop(0)

    if argv and '.pdf' in argv[0].lower():
        outfile = argv.pop(0)

    try:
        mupdf.pdf_clean_file(infile, outfile, password, opts, argv)
    except Exception as e:
        print( f'mupdf.pdf_clean_file() failed: {e}')
        errors += 1
        if 0:
            # Enable for debugging.
            import traceback
            traceback.print_exc()
    return errors != 0;



# Things for draw.
#

import mutool_draw

draw = mutool_draw.draw



# Things for convert.
#


def convert_usage():
    print( textwrap.dedent(
            f'''
            mutool convert version {mupdf.FZ_VERSION}
            Usage: mutool convert [options] file [pages]
            \t-p -\tpassword

            \t-A -\tnumber of bits of antialiasing (0 to 8)
            \t-W -\tpage width for EPUB layout
            \t-H -\tpage height for EPUB layout
            \t-S -\tfont size for EPUB layout
            \t-U -\tfile name of user stylesheet for EPUB layout
            \t-X\tdisable document styles for EPUB layout

            \t-o -\toutput file name (%d for page number)
            \t-F -\toutput format (default inferred from output file name)
            \t\t\traster: cbz, png, pnm, pgm, ppm, pam, pbm, pkm.
            \t\t\tprint-raster: pcl, pclm, ps, pwg.
            \t\t\tvector: pdf, svg.
            \t\t\ttext: html, xhtml, text, stext.
            \t-O -\tcomma separated list of options for output format

            \tpages\tcomma separated list of page ranges (N=last page)
            '''
        ))
    print( mupdf.fz_draw_options_usage)
    print( mupdf.fz_pcl_write_options_usage)
    print( mupdf.fz_pclm_write_options_usage)
    print( mupdf.fz_pwg_write_options_usage)
    print( mupdf.fz_stext_options_usage)
    print( mupdf.fz_pdf_write_options_usage)
    print( mupdf.fz_svg_write_options_usage)
    sys.exit(1)


def convert_runpage( doc, number, out):
    page = mupdf.FzPage( doc, number - 1)
    mediabox = page.fz_bound_page()
    dev = out.fz_begin_page(mediabox)
    page.fz_run_page( dev, mupdf.FzMatrix(mupdf.fz_identity), mupdf.FzCookie())
    out.fz_end_page()

def convert_runrange( doc, count, range_, out):
    start = None
    end = None
    while 1:
        range_, start, end = mupdf.fz_parse_page_range( range_, count)
        if range_ is None:
            break
        step = +1 if end > start else -1
        for i in range( start, end, step):
            convert_runpage( doc, i, out)

def convert( argv):
    # input options
    password = ''
    alphabits = 8
    layout_w = mupdf.FZ_DEFAULT_LAYOUT_W
    layout_h = mupdf.FZ_DEFAULT_LAYOUT_H
    layout_em = mupdf.FZ_DEFAULT_LAYOUT_EM
    layout_css = None
    layout_use_doc_css = 1

    # output options
    output = None
    format_ = None
    options = ''

    items, argv = getopt.getopt( argv, 'p:A:W:H:S:U:Xo:F:O:')
    for option, value in items:
        if 0: pass  # lgtm [py/unreachable-statement]
        elif option == '-p':    password = value
        elif option == '-A':    alphabits = int(value)
        elif option == '-W':    layout_w = float( value)
        elif option == '-H':    layout_h = float( value)
        elif option == '-S':    layout_em = float( value)
        elif option == '-U':    layout_css = value
        elif option == '-X':    layout_use_doc_css = 0
        elif option == '-o':    output = value
        elif option == '-F':    format_ = value
        elif option == '-O':    options = value
        else:   assert 0

    if not argv or (not format_ and not output):
        convert_usage()

    mupdf.fz_set_aa_level( alphabits)
    if layout_css:
        buf = mupdf.FzBuffer( layout_css)
        mupdf.fz_set_user_css( buf.string_from_buffer())

    mupdf.fz_set_use_document_css(layout_use_doc_css)

    if format_:
        out = mupdf.FzDocumentWriter( output, format_, options)
    else:
        out = mupdf.FzDocumentWriter( output, options, mupdf.FzDocumentWriter.OutputType_PDF)

    i = 0
    while 1:
        if i >= len( argv):
            break
        arg = argv[i]
        doc = mupdf.FzDocument( arg)
        if doc.fz_needs_password():
            if not doc.fz_authenticate_password( password):
                raise Exception( f'cannot authenticate password: {arg}')
        doc.fz_layout_document( layout_w, layout_h, layout_em)
        count = doc.fz_count_pages()

        range_ = '1-N'
        if i + 1 < len(argv) and mupdf.fz_is_page_range(ctx, argv[i+1]):
            i += 1
            range_ = argv[i]
        convert_runrange( doc, count, range_, out)
        i += 1

    out.fz_close_document_writer()



# Things for trace.
#

def trace_usage():
    print( textwrap.dedent('''
            Usage: mutool trace [options] file [pages]
            \t-p -\tpassword

            \t-W -\tpage width for EPUB layout
            \t-H -\tpage height for EPUB layout
            \t-S -\tfont size for EPUB layout
            \t-U -\tfile name of user stylesheet for EPUB layout
            \t-X\tdisable document styles for EPUB layout

            \t-d\tuse display list

            \tpages\tcomma separated list of page numbers and ranges
            '''))
    sys.exit( 1)

def trace_runpage( use_display_list, doc, number):
    page = mupdf.FzPage( doc, number-1)
    mediabox = page.fz_bound_page()
    print( f'<page number="{number}" mediabox="{mediabox.x0} {mediabox.y0} {mediabox.x1} {mediabox.y1}">')
    output = mupdf.FzOutput( mupdf.FzOutput.Fixed_STDOUT)
    dev = mupdf.FzDevice( output)
    if use_display_list:
        list_ = mupdf.FzDisplayList( page)
        list_.fz_run_display_list( dev, mupdf.FzMatrix(mupdf.fz_identity), mupdf.FzRect(mupdf.fz_infinite_rect), mupdf.FzCookie())
    else:
        page.fz_run_page( dev, mupdf.FzMatrix(mupdf.fz_identity), mupdf.FzCookie())
    output.fz_close_output()
    print( '</page>')

def trace_runrange( use_display_list, doc, count, range_):
    start = None
    end = None
    while 1:
        range_, start, end = mupdf.fz_parse_page_range( range_, count)
        print(f'range_={range_!r} start={start} end={end}')
        if range_ is None:
            break
        step = +1 if end > start else -1
        for i in range( start, end, step):
            trace_runpage( use_display_list, doc, i)

def trace( argv):

    password = ''
    layout_w = mupdf.FZ_DEFAULT_LAYOUT_W
    layout_h = mupdf.FZ_DEFAULT_LAYOUT_H
    layout_em = mupdf.FZ_DEFAULT_LAYOUT_EM
    layout_css = None
    layout_use_doc_css = 1

    use_display_list = 0

    argv_i = 0
    while 1:
        arg = argv[ argv_i]
        if arg == '-p':
            password = next( opt)
        elif arg == '-W':
            argv_i += 1
            layout_w = float( argv[argv_i])
        elif arg == '-H':
            argv_i += 1
            layout_h = float( argv[argv_i])
        elif arg == '-S':
            argv_i += 1
            layout_em = float( argv[argv_i])
        elif arg == '-U':
            argv_i += 1
            layout_css = argv[argv_i]
        elif arg == '-X':
            layout_use_doc_css = 0
        elif arg == '-d':
            use_display_list = 1
        else:
            break
        argv_i += 1

    if argv_i == len( argv):
        trace_usage()

    if layout_css:
        buffer_ = mupdf.FzBuffer( layout_css)
        mupdf.fz_set_user_css( buffer_.string_from_buffer())

    mupdf.fz_set_use_document_css( layout_use_doc_css)

    for argv_i in range( argv_i, len( argv)):
        arg = argv[ argv_i]
        doc = mupdf.FzDocument( arg)
        if doc.fz_needs_password():
            doc.fz_authenticate_password( password)
        doc.fz_layout_document( layout_w, layout_h, layout_em)
        print( f'<document filename="{arg}">')
        count = doc.fz_count_pages()
        if argv_i + 1 < len( argv) and mupdf.fz_is_page_range( argv[ argv_i+1]):
            argv_i += 1
            trace_runrange( use_display_list, doc, count, argv[ argv_i])
        else:
            trace_runrange( use_display_list, doc, count, '1-N')
        print( '</document>')



def main( argv):
    arg1 = argv[1]
    fn = getattr( sys.modules[__name__], arg1, None)
    if not fn:
        print( f'cannot find {arg1}')
        usage()
        sys.exit(1)

    return fn( argv[2:])


if __name__ == '__main__':
    try:
        e = main( sys.argv)
        sys.exit(e)
    except Exception as e:
        if 0:   # Enable when debugging.
            sys.stdout.flush()
            sys.stderr.flush()
            print(f'Exception: {e}')
            sys.stdout.flush()
        raise