Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/scripts/mutool.py @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 #!/usr/bin/env python3 | |
| 2 | |
| 3 ''' | |
| 4 Intended to behaves exactly like mutool, but uses the mupdf python => C++ => | |
| 5 mupdf.so wrappers. | |
| 6 | |
| 7 The code is intended to be similar to the mutool C code, to simplify | |
| 8 comparison. | |
| 9 ''' | |
| 10 | |
| 11 import getopt | |
| 12 import os | |
| 13 import sys | |
| 14 import textwrap | |
| 15 | |
| 16 if os.environ.get('MUPDF_PYTHON') in ('swig', None): | |
| 17 # PYTHONPATH should have been set up to point to a build/shared-*/ | |
| 18 # directory containing mupdf.so generated by scripts/mupdfwrap.py and SWIG. | |
| 19 import mupdf | |
| 20 elif os.environ.get('MUPDF_PYTHON') == 'cppyy': | |
| 21 sys.path.insert(0, os.path.abspath(f'{__file__}/../../platform/python')) | |
| 22 import mupdf_cppyy | |
| 23 del sys.path[0] | |
| 24 mupdf = mupdf_cppyy.cppyy.gbl.mupdf | |
| 25 else: | |
| 26 raise Exception(f'Unrecognised $MUPDF_PYTHON: {os.environ.get("MUPDF_PYTHON")}') | |
| 27 | |
| 28 def usage(): | |
| 29 print( textwrap.dedent(''' | |
| 30 usage: mutool.py <command> [options] | |
| 31 \tclean\t-- rewrite pdf file | |
| 32 \tconvert\t-- convert document | |
| 33 \ttrace\t-- trace device calls | |
| 34 \tdraw\t-- convert document | |
| 35 ''')) | |
| 36 | |
| 37 | |
| 38 # Things for clean | |
| 39 # | |
| 40 def clean_usage(): | |
| 41 print(textwrap.dedent( | |
| 42 f''' | |
| 43 usage: mutool clean [options] input.pdf [output.pdf] [pages] | |
| 44 \t-p -\tpassword | |
| 45 \t-g\tgarbage collect unused objects | |
| 46 \t-gg\tin addition to -g compact xref table | |
| 47 \t-ggg\tin addition to -gg merge duplicate objects | |
| 48 \t-gggg\tin addition to -ggg check streams for duplication | |
| 49 \t-l\tlinearize PDF | |
| 50 \t-D\tsave file without encryption | |
| 51 \t-E -\tsave file with new encryption (rc4-40, rc4-128, aes-128, or aes-256) | |
| 52 \t-O -\towner password (only if encrypting) | |
| 53 \t-U -\tuser password (only if encrypting) | |
| 54 \t-P -\tpermission flags (only if encrypting) | |
| 55 \t-a\tascii hex encode binary streams | |
| 56 \t-d\tdecompress streams | |
| 57 \t-z\tdeflate uncompressed streams | |
| 58 \t-f\tcompress font streams | |
| 59 \t-i\tcompress image streams | |
| 60 \t-c\tclean content streams | |
| 61 \t-s\tsanitize content streams | |
| 62 \t-A\tcreate appearance streams for annotations | |
| 63 \t-AA\trecreate appearance streams for annotations | |
| 64 \tpages\tcomma separated list of page numbers and ranges | |
| 65 ''' | |
| 66 )) | |
| 67 sys.exit(1) | |
| 68 | |
| 69 def clean(argv): | |
| 70 outfile = 'out.pdf' | |
| 71 password = '' | |
| 72 opts = mupdf.PdfCleanOptions() | |
| 73 opts.write.do_garbage += 1 | |
| 74 errors = 0 | |
| 75 items, argv = getopt.getopt( argv, 'adfgilp:sczDAE:O:U:P:') | |
| 76 for option, value in items: | |
| 77 if 0: pass # lgtm [py/unreachable-statement] | |
| 78 elif option == '-p': password = value | |
| 79 elif option == '-d': opts.write.do_decompress += 1 | |
| 80 elif option == '-z': opts.write.do_compress += 1 | |
| 81 elif option == '-f': opts.write.do_compress_fonts += 1 | |
| 82 elif option == '-i': opts.write.do_compress_images += 1 | |
| 83 elif option == '-a': opts.write.do_ascii += 1 | |
| 84 elif option == '-g': opts.write.do_garbage += 1 | |
| 85 elif option == '-l': opts.write.do_linear += 1 | |
| 86 elif option == '-c': opts.write.do_clean += 1 | |
| 87 elif option == '-s': opts.write.do_sanitize += 1 | |
| 88 elif option == '-A': opts.write.do_appearance += 1 | |
| 89 elif option == '-D': opts.write.do_encrypt = PDF_ENCRYPT_NONE | |
| 90 elif option == '-E': opts.write.do_encrypt = encrypt_method_from_string(value) | |
| 91 elif option == '-P': opts.write.permissions = int(value) | |
| 92 elif option == '-O': opts.write.opwd_utf8 = value[:128] | |
| 93 elif option == '-U': opts.write.upwd_utf8 = value[:128] | |
| 94 else: | |
| 95 clean_usage() | |
| 96 | |
| 97 if (opts.write.do_ascii or opts.write.do_decompress) and not opts.write.do_compress: | |
| 98 opts.write.do_pretty = 1 | |
| 99 | |
| 100 if not argv: | |
| 101 clean_usage() | |
| 102 | |
| 103 infile = argv.pop(0) | |
| 104 | |
| 105 if argv and '.pdf' in argv[0].lower(): | |
| 106 outfile = argv.pop(0) | |
| 107 | |
| 108 try: | |
| 109 mupdf.pdf_clean_file(infile, outfile, password, opts, argv) | |
| 110 except Exception as e: | |
| 111 print( f'mupdf.pdf_clean_file() failed: {e}') | |
| 112 errors += 1 | |
| 113 if 0: | |
| 114 # Enable for debugging. | |
| 115 import traceback | |
| 116 traceback.print_exc() | |
| 117 return errors != 0; | |
| 118 | |
| 119 | |
| 120 | |
| 121 # Things for draw. | |
| 122 # | |
| 123 | |
| 124 import mutool_draw | |
| 125 | |
| 126 draw = mutool_draw.draw | |
| 127 | |
| 128 | |
| 129 | |
| 130 # Things for convert. | |
| 131 # | |
| 132 | |
| 133 | |
| 134 def convert_usage(): | |
| 135 print( textwrap.dedent( | |
| 136 f''' | |
| 137 mutool convert version {mupdf.FZ_VERSION} | |
| 138 Usage: mutool convert [options] file [pages] | |
| 139 \t-p -\tpassword | |
| 140 | |
| 141 \t-A -\tnumber of bits of antialiasing (0 to 8) | |
| 142 \t-W -\tpage width for EPUB layout | |
| 143 \t-H -\tpage height for EPUB layout | |
| 144 \t-S -\tfont size for EPUB layout | |
| 145 \t-U -\tfile name of user stylesheet for EPUB layout | |
| 146 \t-X\tdisable document styles for EPUB layout | |
| 147 | |
| 148 \t-o -\toutput file name (%d for page number) | |
| 149 \t-F -\toutput format (default inferred from output file name) | |
| 150 \t\t\traster: cbz, png, pnm, pgm, ppm, pam, pbm, pkm. | |
| 151 \t\t\tprint-raster: pcl, pclm, ps, pwg. | |
| 152 \t\t\tvector: pdf, svg. | |
| 153 \t\t\ttext: html, xhtml, text, stext. | |
| 154 \t-O -\tcomma separated list of options for output format | |
| 155 | |
| 156 \tpages\tcomma separated list of page ranges (N=last page) | |
| 157 ''' | |
| 158 )) | |
| 159 print( mupdf.fz_draw_options_usage) | |
| 160 print( mupdf.fz_pcl_write_options_usage) | |
| 161 print( mupdf.fz_pclm_write_options_usage) | |
| 162 print( mupdf.fz_pwg_write_options_usage) | |
| 163 print( mupdf.fz_stext_options_usage) | |
| 164 print( mupdf.fz_pdf_write_options_usage) | |
| 165 print( mupdf.fz_svg_write_options_usage) | |
| 166 sys.exit(1) | |
| 167 | |
| 168 | |
| 169 def convert_runpage( doc, number, out): | |
| 170 page = mupdf.FzPage( doc, number - 1) | |
| 171 mediabox = page.fz_bound_page() | |
| 172 dev = out.fz_begin_page(mediabox) | |
| 173 page.fz_run_page( dev, mupdf.FzMatrix(mupdf.fz_identity), mupdf.FzCookie()) | |
| 174 out.fz_end_page() | |
| 175 | |
| 176 def convert_runrange( doc, count, range_, out): | |
| 177 start = None | |
| 178 end = None | |
| 179 while 1: | |
| 180 range_, start, end = mupdf.fz_parse_page_range( range_, count) | |
| 181 if range_ is None: | |
| 182 break | |
| 183 step = +1 if end > start else -1 | |
| 184 for i in range( start, end, step): | |
| 185 convert_runpage( doc, i, out) | |
| 186 | |
| 187 def convert( argv): | |
| 188 # input options | |
| 189 password = '' | |
| 190 alphabits = 8 | |
| 191 layout_w = mupdf.FZ_DEFAULT_LAYOUT_W | |
| 192 layout_h = mupdf.FZ_DEFAULT_LAYOUT_H | |
| 193 layout_em = mupdf.FZ_DEFAULT_LAYOUT_EM | |
| 194 layout_css = None | |
| 195 layout_use_doc_css = 1 | |
| 196 | |
| 197 # output options | |
| 198 output = None | |
| 199 format_ = None | |
| 200 options = '' | |
| 201 | |
| 202 items, argv = getopt.getopt( argv, 'p:A:W:H:S:U:Xo:F:O:') | |
| 203 for option, value in items: | |
| 204 if 0: pass # lgtm [py/unreachable-statement] | |
| 205 elif option == '-p': password = value | |
| 206 elif option == '-A': alphabits = int(value) | |
| 207 elif option == '-W': layout_w = float( value) | |
| 208 elif option == '-H': layout_h = float( value) | |
| 209 elif option == '-S': layout_em = float( value) | |
| 210 elif option == '-U': layout_css = value | |
| 211 elif option == '-X': layout_use_doc_css = 0 | |
| 212 elif option == '-o': output = value | |
| 213 elif option == '-F': format_ = value | |
| 214 elif option == '-O': options = value | |
| 215 else: assert 0 | |
| 216 | |
| 217 if not argv or (not format_ and not output): | |
| 218 convert_usage() | |
| 219 | |
| 220 mupdf.fz_set_aa_level( alphabits) | |
| 221 if layout_css: | |
| 222 buf = mupdf.FzBuffer( layout_css) | |
| 223 mupdf.fz_set_user_css( buf.string_from_buffer()) | |
| 224 | |
| 225 mupdf.fz_set_use_document_css(layout_use_doc_css) | |
| 226 | |
| 227 if format_: | |
| 228 out = mupdf.FzDocumentWriter( output, format_, options) | |
| 229 else: | |
| 230 out = mupdf.FzDocumentWriter( output, options, mupdf.FzDocumentWriter.OutputType_PDF) | |
| 231 | |
| 232 i = 0 | |
| 233 while 1: | |
| 234 if i >= len( argv): | |
| 235 break | |
| 236 arg = argv[i] | |
| 237 doc = mupdf.FzDocument( arg) | |
| 238 if doc.fz_needs_password(): | |
| 239 if not doc.fz_authenticate_password( password): | |
| 240 raise Exception( f'cannot authenticate password: {arg}') | |
| 241 doc.fz_layout_document( layout_w, layout_h, layout_em) | |
| 242 count = doc.fz_count_pages() | |
| 243 | |
| 244 range_ = '1-N' | |
| 245 if i + 1 < len(argv) and mupdf.fz_is_page_range(ctx, argv[i+1]): | |
| 246 i += 1 | |
| 247 range_ = argv[i] | |
| 248 convert_runrange( doc, count, range_, out) | |
| 249 i += 1 | |
| 250 | |
| 251 out.fz_close_document_writer() | |
| 252 | |
| 253 | |
| 254 | |
| 255 # Things for trace. | |
| 256 # | |
| 257 | |
| 258 def trace_usage(): | |
| 259 print( textwrap.dedent(''' | |
| 260 Usage: mutool trace [options] file [pages] | |
| 261 \t-p -\tpassword | |
| 262 | |
| 263 \t-W -\tpage width for EPUB layout | |
| 264 \t-H -\tpage height for EPUB layout | |
| 265 \t-S -\tfont size for EPUB layout | |
| 266 \t-U -\tfile name of user stylesheet for EPUB layout | |
| 267 \t-X\tdisable document styles for EPUB layout | |
| 268 | |
| 269 \t-d\tuse display list | |
| 270 | |
| 271 \tpages\tcomma separated list of page numbers and ranges | |
| 272 ''')) | |
| 273 sys.exit( 1) | |
| 274 | |
| 275 def trace_runpage( use_display_list, doc, number): | |
| 276 page = mupdf.FzPage( doc, number-1) | |
| 277 mediabox = page.fz_bound_page() | |
| 278 print( f'<page number="{number}" mediabox="{mediabox.x0} {mediabox.y0} {mediabox.x1} {mediabox.y1}">') | |
| 279 output = mupdf.FzOutput( mupdf.FzOutput.Fixed_STDOUT) | |
| 280 dev = mupdf.FzDevice( output) | |
| 281 if use_display_list: | |
| 282 list_ = mupdf.FzDisplayList( page) | |
| 283 list_.fz_run_display_list( dev, mupdf.FzMatrix(mupdf.fz_identity), mupdf.FzRect(mupdf.fz_infinite_rect), mupdf.FzCookie()) | |
| 284 else: | |
| 285 page.fz_run_page( dev, mupdf.FzMatrix(mupdf.fz_identity), mupdf.FzCookie()) | |
| 286 output.fz_close_output() | |
| 287 print( '</page>') | |
| 288 | |
| 289 def trace_runrange( use_display_list, doc, count, range_): | |
| 290 start = None | |
| 291 end = None | |
| 292 while 1: | |
| 293 range_, start, end = mupdf.fz_parse_page_range( range_, count) | |
| 294 print(f'range_={range_!r} start={start} end={end}') | |
| 295 if range_ is None: | |
| 296 break | |
| 297 step = +1 if end > start else -1 | |
| 298 for i in range( start, end, step): | |
| 299 trace_runpage( use_display_list, doc, i) | |
| 300 | |
| 301 def trace( argv): | |
| 302 | |
| 303 password = '' | |
| 304 layout_w = mupdf.FZ_DEFAULT_LAYOUT_W | |
| 305 layout_h = mupdf.FZ_DEFAULT_LAYOUT_H | |
| 306 layout_em = mupdf.FZ_DEFAULT_LAYOUT_EM | |
| 307 layout_css = None | |
| 308 layout_use_doc_css = 1 | |
| 309 | |
| 310 use_display_list = 0 | |
| 311 | |
| 312 argv_i = 0 | |
| 313 while 1: | |
| 314 arg = argv[ argv_i] | |
| 315 if arg == '-p': | |
| 316 password = next( opt) | |
| 317 elif arg == '-W': | |
| 318 argv_i += 1 | |
| 319 layout_w = float( argv[argv_i]) | |
| 320 elif arg == '-H': | |
| 321 argv_i += 1 | |
| 322 layout_h = float( argv[argv_i]) | |
| 323 elif arg == '-S': | |
| 324 argv_i += 1 | |
| 325 layout_em = float( argv[argv_i]) | |
| 326 elif arg == '-U': | |
| 327 argv_i += 1 | |
| 328 layout_css = argv[argv_i] | |
| 329 elif arg == '-X': | |
| 330 layout_use_doc_css = 0 | |
| 331 elif arg == '-d': | |
| 332 use_display_list = 1 | |
| 333 else: | |
| 334 break | |
| 335 argv_i += 1 | |
| 336 | |
| 337 if argv_i == len( argv): | |
| 338 trace_usage() | |
| 339 | |
| 340 if layout_css: | |
| 341 buffer_ = mupdf.FzBuffer( layout_css) | |
| 342 mupdf.fz_set_user_css( buffer_.string_from_buffer()) | |
| 343 | |
| 344 mupdf.fz_set_use_document_css( layout_use_doc_css) | |
| 345 | |
| 346 for argv_i in range( argv_i, len( argv)): | |
| 347 arg = argv[ argv_i] | |
| 348 doc = mupdf.FzDocument( arg) | |
| 349 if doc.fz_needs_password(): | |
| 350 doc.fz_authenticate_password( password) | |
| 351 doc.fz_layout_document( layout_w, layout_h, layout_em) | |
| 352 print( f'<document filename="{arg}">') | |
| 353 count = doc.fz_count_pages() | |
| 354 if argv_i + 1 < len( argv) and mupdf.fz_is_page_range( argv[ argv_i+1]): | |
| 355 argv_i += 1 | |
| 356 trace_runrange( use_display_list, doc, count, argv[ argv_i]) | |
| 357 else: | |
| 358 trace_runrange( use_display_list, doc, count, '1-N') | |
| 359 print( '</document>') | |
| 360 | |
| 361 | |
| 362 | |
| 363 def main( argv): | |
| 364 arg1 = argv[1] | |
| 365 fn = getattr( sys.modules[__name__], arg1, None) | |
| 366 if not fn: | |
| 367 print( f'cannot find {arg1}') | |
| 368 usage() | |
| 369 sys.exit(1) | |
| 370 | |
| 371 return fn( argv[2:]) | |
| 372 | |
| 373 | |
| 374 if __name__ == '__main__': | |
| 375 try: | |
| 376 e = main( sys.argv) | |
| 377 sys.exit(e) | |
| 378 except Exception as e: | |
| 379 if 0: # Enable when debugging. | |
| 380 sys.stdout.flush() | |
| 381 sys.stderr.flush() | |
| 382 print(f'Exception: {e}') | |
| 383 sys.stdout.flush() | |
| 384 raise |
