Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/scripts/mutool.py @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/scripts/mutool.py Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,384 @@ +#!/usr/bin/env python3 + +''' +Intended to behaves exactly like mutool, but uses the mupdf python => C++ => +mupdf.so wrappers. + +The code is intended to be similar to the mutool C code, to simplify +comparison. +''' + +import getopt +import os +import sys +import textwrap + +if os.environ.get('MUPDF_PYTHON') in ('swig', None): + # PYTHONPATH should have been set up to point to a build/shared-*/ + # directory containing mupdf.so generated by scripts/mupdfwrap.py and SWIG. + import mupdf +elif os.environ.get('MUPDF_PYTHON') == 'cppyy': + sys.path.insert(0, os.path.abspath(f'{__file__}/../../platform/python')) + import mupdf_cppyy + del sys.path[0] + mupdf = mupdf_cppyy.cppyy.gbl.mupdf +else: + raise Exception(f'Unrecognised $MUPDF_PYTHON: {os.environ.get("MUPDF_PYTHON")}') + +def usage(): + print( textwrap.dedent(''' + usage: mutool.py <command> [options] + \tclean\t-- rewrite pdf file + \tconvert\t-- convert document + \ttrace\t-- trace device calls + \tdraw\t-- convert document + ''')) + + +# Things for clean +# +def clean_usage(): + print(textwrap.dedent( + f''' + usage: mutool clean [options] input.pdf [output.pdf] [pages] + \t-p -\tpassword + \t-g\tgarbage collect unused objects + \t-gg\tin addition to -g compact xref table + \t-ggg\tin addition to -gg merge duplicate objects + \t-gggg\tin addition to -ggg check streams for duplication + \t-l\tlinearize PDF + \t-D\tsave file without encryption + \t-E -\tsave file with new encryption (rc4-40, rc4-128, aes-128, or aes-256) + \t-O -\towner password (only if encrypting) + \t-U -\tuser password (only if encrypting) + \t-P -\tpermission flags (only if encrypting) + \t-a\tascii hex encode binary streams + \t-d\tdecompress streams + \t-z\tdeflate uncompressed streams + \t-f\tcompress font streams + \t-i\tcompress image streams + \t-c\tclean content streams + \t-s\tsanitize content streams + \t-A\tcreate appearance streams for annotations + \t-AA\trecreate appearance streams for annotations + \tpages\tcomma separated list of page numbers and ranges + ''' + )) + sys.exit(1) + +def clean(argv): + outfile = 'out.pdf' + password = '' + opts = mupdf.PdfCleanOptions() + opts.write.do_garbage += 1 + errors = 0 + items, argv = getopt.getopt( argv, 'adfgilp:sczDAE:O:U:P:') + for option, value in items: + if 0: pass # lgtm [py/unreachable-statement] + elif option == '-p': password = value + elif option == '-d': opts.write.do_decompress += 1 + elif option == '-z': opts.write.do_compress += 1 + elif option == '-f': opts.write.do_compress_fonts += 1 + elif option == '-i': opts.write.do_compress_images += 1 + elif option == '-a': opts.write.do_ascii += 1 + elif option == '-g': opts.write.do_garbage += 1 + elif option == '-l': opts.write.do_linear += 1 + elif option == '-c': opts.write.do_clean += 1 + elif option == '-s': opts.write.do_sanitize += 1 + elif option == '-A': opts.write.do_appearance += 1 + elif option == '-D': opts.write.do_encrypt = PDF_ENCRYPT_NONE + elif option == '-E': opts.write.do_encrypt = encrypt_method_from_string(value) + elif option == '-P': opts.write.permissions = int(value) + elif option == '-O': opts.write.opwd_utf8 = value[:128] + elif option == '-U': opts.write.upwd_utf8 = value[:128] + else: + clean_usage() + + if (opts.write.do_ascii or opts.write.do_decompress) and not opts.write.do_compress: + opts.write.do_pretty = 1 + + if not argv: + clean_usage() + + infile = argv.pop(0) + + if argv and '.pdf' in argv[0].lower(): + outfile = argv.pop(0) + + try: + mupdf.pdf_clean_file(infile, outfile, password, opts, argv) + except Exception as e: + print( f'mupdf.pdf_clean_file() failed: {e}') + errors += 1 + if 0: + # Enable for debugging. + import traceback + traceback.print_exc() + return errors != 0; + + + +# Things for draw. +# + +import mutool_draw + +draw = mutool_draw.draw + + + +# Things for convert. +# + + +def convert_usage(): + print( textwrap.dedent( + f''' + mutool convert version {mupdf.FZ_VERSION} + Usage: mutool convert [options] file [pages] + \t-p -\tpassword + + \t-A -\tnumber of bits of antialiasing (0 to 8) + \t-W -\tpage width for EPUB layout + \t-H -\tpage height for EPUB layout + \t-S -\tfont size for EPUB layout + \t-U -\tfile name of user stylesheet for EPUB layout + \t-X\tdisable document styles for EPUB layout + + \t-o -\toutput file name (%d for page number) + \t-F -\toutput format (default inferred from output file name) + \t\t\traster: cbz, png, pnm, pgm, ppm, pam, pbm, pkm. + \t\t\tprint-raster: pcl, pclm, ps, pwg. + \t\t\tvector: pdf, svg. + \t\t\ttext: html, xhtml, text, stext. + \t-O -\tcomma separated list of options for output format + + \tpages\tcomma separated list of page ranges (N=last page) + ''' + )) + print( mupdf.fz_draw_options_usage) + print( mupdf.fz_pcl_write_options_usage) + print( mupdf.fz_pclm_write_options_usage) + print( mupdf.fz_pwg_write_options_usage) + print( mupdf.fz_stext_options_usage) + print( mupdf.fz_pdf_write_options_usage) + print( mupdf.fz_svg_write_options_usage) + sys.exit(1) + + +def convert_runpage( doc, number, out): + page = mupdf.FzPage( doc, number - 1) + mediabox = page.fz_bound_page() + dev = out.fz_begin_page(mediabox) + page.fz_run_page( dev, mupdf.FzMatrix(mupdf.fz_identity), mupdf.FzCookie()) + out.fz_end_page() + +def convert_runrange( doc, count, range_, out): + start = None + end = None + while 1: + range_, start, end = mupdf.fz_parse_page_range( range_, count) + if range_ is None: + break + step = +1 if end > start else -1 + for i in range( start, end, step): + convert_runpage( doc, i, out) + +def convert( argv): + # input options + password = '' + alphabits = 8 + layout_w = mupdf.FZ_DEFAULT_LAYOUT_W + layout_h = mupdf.FZ_DEFAULT_LAYOUT_H + layout_em = mupdf.FZ_DEFAULT_LAYOUT_EM + layout_css = None + layout_use_doc_css = 1 + + # output options + output = None + format_ = None + options = '' + + items, argv = getopt.getopt( argv, 'p:A:W:H:S:U:Xo:F:O:') + for option, value in items: + if 0: pass # lgtm [py/unreachable-statement] + elif option == '-p': password = value + elif option == '-A': alphabits = int(value) + elif option == '-W': layout_w = float( value) + elif option == '-H': layout_h = float( value) + elif option == '-S': layout_em = float( value) + elif option == '-U': layout_css = value + elif option == '-X': layout_use_doc_css = 0 + elif option == '-o': output = value + elif option == '-F': format_ = value + elif option == '-O': options = value + else: assert 0 + + if not argv or (not format_ and not output): + convert_usage() + + mupdf.fz_set_aa_level( alphabits) + if layout_css: + buf = mupdf.FzBuffer( layout_css) + mupdf.fz_set_user_css( buf.string_from_buffer()) + + mupdf.fz_set_use_document_css(layout_use_doc_css) + + if format_: + out = mupdf.FzDocumentWriter( output, format_, options) + else: + out = mupdf.FzDocumentWriter( output, options, mupdf.FzDocumentWriter.OutputType_PDF) + + i = 0 + while 1: + if i >= len( argv): + break + arg = argv[i] + doc = mupdf.FzDocument( arg) + if doc.fz_needs_password(): + if not doc.fz_authenticate_password( password): + raise Exception( f'cannot authenticate password: {arg}') + doc.fz_layout_document( layout_w, layout_h, layout_em) + count = doc.fz_count_pages() + + range_ = '1-N' + if i + 1 < len(argv) and mupdf.fz_is_page_range(ctx, argv[i+1]): + i += 1 + range_ = argv[i] + convert_runrange( doc, count, range_, out) + i += 1 + + out.fz_close_document_writer() + + + +# Things for trace. +# + +def trace_usage(): + print( textwrap.dedent(''' + Usage: mutool trace [options] file [pages] + \t-p -\tpassword + + \t-W -\tpage width for EPUB layout + \t-H -\tpage height for EPUB layout + \t-S -\tfont size for EPUB layout + \t-U -\tfile name of user stylesheet for EPUB layout + \t-X\tdisable document styles for EPUB layout + + \t-d\tuse display list + + \tpages\tcomma separated list of page numbers and ranges + ''')) + sys.exit( 1) + +def trace_runpage( use_display_list, doc, number): + page = mupdf.FzPage( doc, number-1) + mediabox = page.fz_bound_page() + print( f'<page number="{number}" mediabox="{mediabox.x0} {mediabox.y0} {mediabox.x1} {mediabox.y1}">') + output = mupdf.FzOutput( mupdf.FzOutput.Fixed_STDOUT) + dev = mupdf.FzDevice( output) + if use_display_list: + list_ = mupdf.FzDisplayList( page) + list_.fz_run_display_list( dev, mupdf.FzMatrix(mupdf.fz_identity), mupdf.FzRect(mupdf.fz_infinite_rect), mupdf.FzCookie()) + else: + page.fz_run_page( dev, mupdf.FzMatrix(mupdf.fz_identity), mupdf.FzCookie()) + output.fz_close_output() + print( '</page>') + +def trace_runrange( use_display_list, doc, count, range_): + start = None + end = None + while 1: + range_, start, end = mupdf.fz_parse_page_range( range_, count) + print(f'range_={range_!r} start={start} end={end}') + if range_ is None: + break + step = +1 if end > start else -1 + for i in range( start, end, step): + trace_runpage( use_display_list, doc, i) + +def trace( argv): + + password = '' + layout_w = mupdf.FZ_DEFAULT_LAYOUT_W + layout_h = mupdf.FZ_DEFAULT_LAYOUT_H + layout_em = mupdf.FZ_DEFAULT_LAYOUT_EM + layout_css = None + layout_use_doc_css = 1 + + use_display_list = 0 + + argv_i = 0 + while 1: + arg = argv[ argv_i] + if arg == '-p': + password = next( opt) + elif arg == '-W': + argv_i += 1 + layout_w = float( argv[argv_i]) + elif arg == '-H': + argv_i += 1 + layout_h = float( argv[argv_i]) + elif arg == '-S': + argv_i += 1 + layout_em = float( argv[argv_i]) + elif arg == '-U': + argv_i += 1 + layout_css = argv[argv_i] + elif arg == '-X': + layout_use_doc_css = 0 + elif arg == '-d': + use_display_list = 1 + else: + break + argv_i += 1 + + if argv_i == len( argv): + trace_usage() + + if layout_css: + buffer_ = mupdf.FzBuffer( layout_css) + mupdf.fz_set_user_css( buffer_.string_from_buffer()) + + mupdf.fz_set_use_document_css( layout_use_doc_css) + + for argv_i in range( argv_i, len( argv)): + arg = argv[ argv_i] + doc = mupdf.FzDocument( arg) + if doc.fz_needs_password(): + doc.fz_authenticate_password( password) + doc.fz_layout_document( layout_w, layout_h, layout_em) + print( f'<document filename="{arg}">') + count = doc.fz_count_pages() + if argv_i + 1 < len( argv) and mupdf.fz_is_page_range( argv[ argv_i+1]): + argv_i += 1 + trace_runrange( use_display_list, doc, count, argv[ argv_i]) + else: + trace_runrange( use_display_list, doc, count, '1-N') + print( '</document>') + + + +def main( argv): + arg1 = argv[1] + fn = getattr( sys.modules[__name__], arg1, None) + if not fn: + print( f'cannot find {arg1}') + usage() + sys.exit(1) + + return fn( argv[2:]) + + +if __name__ == '__main__': + try: + e = main( sys.argv) + sys.exit(e) + except Exception as e: + if 0: # Enable when debugging. + sys.stdout.flush() + sys.stderr.flush() + print(f'Exception: {e}') + sys.stdout.flush() + raise
