comparison mupdf-source/scripts/mutool.py @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 #!/usr/bin/env python3
2
3 '''
4 Intended to behaves exactly like mutool, but uses the mupdf python => C++ =>
5 mupdf.so wrappers.
6
7 The code is intended to be similar to the mutool C code, to simplify
8 comparison.
9 '''
10
11 import getopt
12 import os
13 import sys
14 import textwrap
15
16 if os.environ.get('MUPDF_PYTHON') in ('swig', None):
17 # PYTHONPATH should have been set up to point to a build/shared-*/
18 # directory containing mupdf.so generated by scripts/mupdfwrap.py and SWIG.
19 import mupdf
20 elif os.environ.get('MUPDF_PYTHON') == 'cppyy':
21 sys.path.insert(0, os.path.abspath(f'{__file__}/../../platform/python'))
22 import mupdf_cppyy
23 del sys.path[0]
24 mupdf = mupdf_cppyy.cppyy.gbl.mupdf
25 else:
26 raise Exception(f'Unrecognised $MUPDF_PYTHON: {os.environ.get("MUPDF_PYTHON")}')
27
28 def usage():
29 print( textwrap.dedent('''
30 usage: mutool.py <command> [options]
31 \tclean\t-- rewrite pdf file
32 \tconvert\t-- convert document
33 \ttrace\t-- trace device calls
34 \tdraw\t-- convert document
35 '''))
36
37
38 # Things for clean
39 #
40 def clean_usage():
41 print(textwrap.dedent(
42 f'''
43 usage: mutool clean [options] input.pdf [output.pdf] [pages]
44 \t-p -\tpassword
45 \t-g\tgarbage collect unused objects
46 \t-gg\tin addition to -g compact xref table
47 \t-ggg\tin addition to -gg merge duplicate objects
48 \t-gggg\tin addition to -ggg check streams for duplication
49 \t-l\tlinearize PDF
50 \t-D\tsave file without encryption
51 \t-E -\tsave file with new encryption (rc4-40, rc4-128, aes-128, or aes-256)
52 \t-O -\towner password (only if encrypting)
53 \t-U -\tuser password (only if encrypting)
54 \t-P -\tpermission flags (only if encrypting)
55 \t-a\tascii hex encode binary streams
56 \t-d\tdecompress streams
57 \t-z\tdeflate uncompressed streams
58 \t-f\tcompress font streams
59 \t-i\tcompress image streams
60 \t-c\tclean content streams
61 \t-s\tsanitize content streams
62 \t-A\tcreate appearance streams for annotations
63 \t-AA\trecreate appearance streams for annotations
64 \tpages\tcomma separated list of page numbers and ranges
65 '''
66 ))
67 sys.exit(1)
68
69 def clean(argv):
70 outfile = 'out.pdf'
71 password = ''
72 opts = mupdf.PdfCleanOptions()
73 opts.write.do_garbage += 1
74 errors = 0
75 items, argv = getopt.getopt( argv, 'adfgilp:sczDAE:O:U:P:')
76 for option, value in items:
77 if 0: pass # lgtm [py/unreachable-statement]
78 elif option == '-p': password = value
79 elif option == '-d': opts.write.do_decompress += 1
80 elif option == '-z': opts.write.do_compress += 1
81 elif option == '-f': opts.write.do_compress_fonts += 1
82 elif option == '-i': opts.write.do_compress_images += 1
83 elif option == '-a': opts.write.do_ascii += 1
84 elif option == '-g': opts.write.do_garbage += 1
85 elif option == '-l': opts.write.do_linear += 1
86 elif option == '-c': opts.write.do_clean += 1
87 elif option == '-s': opts.write.do_sanitize += 1
88 elif option == '-A': opts.write.do_appearance += 1
89 elif option == '-D': opts.write.do_encrypt = PDF_ENCRYPT_NONE
90 elif option == '-E': opts.write.do_encrypt = encrypt_method_from_string(value)
91 elif option == '-P': opts.write.permissions = int(value)
92 elif option == '-O': opts.write.opwd_utf8 = value[:128]
93 elif option == '-U': opts.write.upwd_utf8 = value[:128]
94 else:
95 clean_usage()
96
97 if (opts.write.do_ascii or opts.write.do_decompress) and not opts.write.do_compress:
98 opts.write.do_pretty = 1
99
100 if not argv:
101 clean_usage()
102
103 infile = argv.pop(0)
104
105 if argv and '.pdf' in argv[0].lower():
106 outfile = argv.pop(0)
107
108 try:
109 mupdf.pdf_clean_file(infile, outfile, password, opts, argv)
110 except Exception as e:
111 print( f'mupdf.pdf_clean_file() failed: {e}')
112 errors += 1
113 if 0:
114 # Enable for debugging.
115 import traceback
116 traceback.print_exc()
117 return errors != 0;
118
119
120
121 # Things for draw.
122 #
123
124 import mutool_draw
125
126 draw = mutool_draw.draw
127
128
129
130 # Things for convert.
131 #
132
133
134 def convert_usage():
135 print( textwrap.dedent(
136 f'''
137 mutool convert version {mupdf.FZ_VERSION}
138 Usage: mutool convert [options] file [pages]
139 \t-p -\tpassword
140
141 \t-A -\tnumber of bits of antialiasing (0 to 8)
142 \t-W -\tpage width for EPUB layout
143 \t-H -\tpage height for EPUB layout
144 \t-S -\tfont size for EPUB layout
145 \t-U -\tfile name of user stylesheet for EPUB layout
146 \t-X\tdisable document styles for EPUB layout
147
148 \t-o -\toutput file name (%d for page number)
149 \t-F -\toutput format (default inferred from output file name)
150 \t\t\traster: cbz, png, pnm, pgm, ppm, pam, pbm, pkm.
151 \t\t\tprint-raster: pcl, pclm, ps, pwg.
152 \t\t\tvector: pdf, svg.
153 \t\t\ttext: html, xhtml, text, stext.
154 \t-O -\tcomma separated list of options for output format
155
156 \tpages\tcomma separated list of page ranges (N=last page)
157 '''
158 ))
159 print( mupdf.fz_draw_options_usage)
160 print( mupdf.fz_pcl_write_options_usage)
161 print( mupdf.fz_pclm_write_options_usage)
162 print( mupdf.fz_pwg_write_options_usage)
163 print( mupdf.fz_stext_options_usage)
164 print( mupdf.fz_pdf_write_options_usage)
165 print( mupdf.fz_svg_write_options_usage)
166 sys.exit(1)
167
168
169 def convert_runpage( doc, number, out):
170 page = mupdf.FzPage( doc, number - 1)
171 mediabox = page.fz_bound_page()
172 dev = out.fz_begin_page(mediabox)
173 page.fz_run_page( dev, mupdf.FzMatrix(mupdf.fz_identity), mupdf.FzCookie())
174 out.fz_end_page()
175
176 def convert_runrange( doc, count, range_, out):
177 start = None
178 end = None
179 while 1:
180 range_, start, end = mupdf.fz_parse_page_range( range_, count)
181 if range_ is None:
182 break
183 step = +1 if end > start else -1
184 for i in range( start, end, step):
185 convert_runpage( doc, i, out)
186
187 def convert( argv):
188 # input options
189 password = ''
190 alphabits = 8
191 layout_w = mupdf.FZ_DEFAULT_LAYOUT_W
192 layout_h = mupdf.FZ_DEFAULT_LAYOUT_H
193 layout_em = mupdf.FZ_DEFAULT_LAYOUT_EM
194 layout_css = None
195 layout_use_doc_css = 1
196
197 # output options
198 output = None
199 format_ = None
200 options = ''
201
202 items, argv = getopt.getopt( argv, 'p:A:W:H:S:U:Xo:F:O:')
203 for option, value in items:
204 if 0: pass # lgtm [py/unreachable-statement]
205 elif option == '-p': password = value
206 elif option == '-A': alphabits = int(value)
207 elif option == '-W': layout_w = float( value)
208 elif option == '-H': layout_h = float( value)
209 elif option == '-S': layout_em = float( value)
210 elif option == '-U': layout_css = value
211 elif option == '-X': layout_use_doc_css = 0
212 elif option == '-o': output = value
213 elif option == '-F': format_ = value
214 elif option == '-O': options = value
215 else: assert 0
216
217 if not argv or (not format_ and not output):
218 convert_usage()
219
220 mupdf.fz_set_aa_level( alphabits)
221 if layout_css:
222 buf = mupdf.FzBuffer( layout_css)
223 mupdf.fz_set_user_css( buf.string_from_buffer())
224
225 mupdf.fz_set_use_document_css(layout_use_doc_css)
226
227 if format_:
228 out = mupdf.FzDocumentWriter( output, format_, options)
229 else:
230 out = mupdf.FzDocumentWriter( output, options, mupdf.FzDocumentWriter.OutputType_PDF)
231
232 i = 0
233 while 1:
234 if i >= len( argv):
235 break
236 arg = argv[i]
237 doc = mupdf.FzDocument( arg)
238 if doc.fz_needs_password():
239 if not doc.fz_authenticate_password( password):
240 raise Exception( f'cannot authenticate password: {arg}')
241 doc.fz_layout_document( layout_w, layout_h, layout_em)
242 count = doc.fz_count_pages()
243
244 range_ = '1-N'
245 if i + 1 < len(argv) and mupdf.fz_is_page_range(ctx, argv[i+1]):
246 i += 1
247 range_ = argv[i]
248 convert_runrange( doc, count, range_, out)
249 i += 1
250
251 out.fz_close_document_writer()
252
253
254
255 # Things for trace.
256 #
257
258 def trace_usage():
259 print( textwrap.dedent('''
260 Usage: mutool trace [options] file [pages]
261 \t-p -\tpassword
262
263 \t-W -\tpage width for EPUB layout
264 \t-H -\tpage height for EPUB layout
265 \t-S -\tfont size for EPUB layout
266 \t-U -\tfile name of user stylesheet for EPUB layout
267 \t-X\tdisable document styles for EPUB layout
268
269 \t-d\tuse display list
270
271 \tpages\tcomma separated list of page numbers and ranges
272 '''))
273 sys.exit( 1)
274
275 def trace_runpage( use_display_list, doc, number):
276 page = mupdf.FzPage( doc, number-1)
277 mediabox = page.fz_bound_page()
278 print( f'<page number="{number}" mediabox="{mediabox.x0} {mediabox.y0} {mediabox.x1} {mediabox.y1}">')
279 output = mupdf.FzOutput( mupdf.FzOutput.Fixed_STDOUT)
280 dev = mupdf.FzDevice( output)
281 if use_display_list:
282 list_ = mupdf.FzDisplayList( page)
283 list_.fz_run_display_list( dev, mupdf.FzMatrix(mupdf.fz_identity), mupdf.FzRect(mupdf.fz_infinite_rect), mupdf.FzCookie())
284 else:
285 page.fz_run_page( dev, mupdf.FzMatrix(mupdf.fz_identity), mupdf.FzCookie())
286 output.fz_close_output()
287 print( '</page>')
288
289 def trace_runrange( use_display_list, doc, count, range_):
290 start = None
291 end = None
292 while 1:
293 range_, start, end = mupdf.fz_parse_page_range( range_, count)
294 print(f'range_={range_!r} start={start} end={end}')
295 if range_ is None:
296 break
297 step = +1 if end > start else -1
298 for i in range( start, end, step):
299 trace_runpage( use_display_list, doc, i)
300
301 def trace( argv):
302
303 password = ''
304 layout_w = mupdf.FZ_DEFAULT_LAYOUT_W
305 layout_h = mupdf.FZ_DEFAULT_LAYOUT_H
306 layout_em = mupdf.FZ_DEFAULT_LAYOUT_EM
307 layout_css = None
308 layout_use_doc_css = 1
309
310 use_display_list = 0
311
312 argv_i = 0
313 while 1:
314 arg = argv[ argv_i]
315 if arg == '-p':
316 password = next( opt)
317 elif arg == '-W':
318 argv_i += 1
319 layout_w = float( argv[argv_i])
320 elif arg == '-H':
321 argv_i += 1
322 layout_h = float( argv[argv_i])
323 elif arg == '-S':
324 argv_i += 1
325 layout_em = float( argv[argv_i])
326 elif arg == '-U':
327 argv_i += 1
328 layout_css = argv[argv_i]
329 elif arg == '-X':
330 layout_use_doc_css = 0
331 elif arg == '-d':
332 use_display_list = 1
333 else:
334 break
335 argv_i += 1
336
337 if argv_i == len( argv):
338 trace_usage()
339
340 if layout_css:
341 buffer_ = mupdf.FzBuffer( layout_css)
342 mupdf.fz_set_user_css( buffer_.string_from_buffer())
343
344 mupdf.fz_set_use_document_css( layout_use_doc_css)
345
346 for argv_i in range( argv_i, len( argv)):
347 arg = argv[ argv_i]
348 doc = mupdf.FzDocument( arg)
349 if doc.fz_needs_password():
350 doc.fz_authenticate_password( password)
351 doc.fz_layout_document( layout_w, layout_h, layout_em)
352 print( f'<document filename="{arg}">')
353 count = doc.fz_count_pages()
354 if argv_i + 1 < len( argv) and mupdf.fz_is_page_range( argv[ argv_i+1]):
355 argv_i += 1
356 trace_runrange( use_display_list, doc, count, argv[ argv_i])
357 else:
358 trace_runrange( use_display_list, doc, count, '1-N')
359 print( '</document>')
360
361
362
363 def main( argv):
364 arg1 = argv[1]
365 fn = getattr( sys.modules[__name__], arg1, None)
366 if not fn:
367 print( f'cannot find {arg1}')
368 usage()
369 sys.exit(1)
370
371 return fn( argv[2:])
372
373
374 if __name__ == '__main__':
375 try:
376 e = main( sys.argv)
377 sys.exit(e)
378 except Exception as e:
379 if 0: # Enable when debugging.
380 sys.stdout.flush()
381 sys.stderr.flush()
382 print(f'Exception: {e}')
383 sys.stdout.flush()
384 raise