Mercurial > hgrepos > Python2 > PyMuPDF
comparison src_classic/__main__.py @ 1:1d09e1dec1d9 upstream
ADD: PyMuPDF v1.26.4: the original sdist.
It does not yet contain MuPDF. This normally will be downloaded when
building PyMuPDF.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:37:51 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 1:1d09e1dec1d9 |
|---|---|
| 1 # ----------------------------------------------------------------------------- | |
| 2 # Copyright 2020-2022, Harald Lieder, mailto:harald.lieder@outlook.com | |
| 3 # License: GNU AFFERO GPL 3.0, https://www.gnu.org/licenses/agpl-3.0.html | |
| 4 # Part of "PyMuPDF", Python bindings for "MuPDF" (http://mupdf.com), a | |
| 5 # lightweight PDF, XPS, and E-book viewer, renderer and toolkit which is | |
| 6 # maintained and developed by Artifex Software, Inc. https://artifex.com. | |
| 7 # ----------------------------------------------------------------------------- | |
| 8 import argparse | |
| 9 import bisect | |
| 10 import os | |
| 11 import sys | |
| 12 import statistics | |
| 13 from typing import Dict, List, Set, Tuple | |
| 14 | |
| 15 import fitz | |
| 16 from fitz.fitz import ( | |
| 17 TEXT_INHIBIT_SPACES, | |
| 18 TEXT_PRESERVE_LIGATURES, | |
| 19 TEXT_PRESERVE_WHITESPACE, | |
| 20 ) | |
| 21 | |
| 22 mycenter = lambda x: (" %s " % x).center(75, "-") | |
| 23 | |
| 24 | |
| 25 def recoverpix(doc, item): | |
| 26 """Return image for a given XREF.""" | |
| 27 x = item[0] # xref of PDF image | |
| 28 s = item[1] # xref of its /SMask | |
| 29 if s == 0: # no smask: use direct image output | |
| 30 return doc.extract_image(x) | |
| 31 | |
| 32 def getimage(pix): | |
| 33 if pix.colorspace.n != 4: | |
| 34 return pix | |
| 35 tpix = fitz.Pixmap(fitz.csRGB, pix) | |
| 36 return tpix | |
| 37 | |
| 38 # we need to reconstruct the alpha channel with the smask | |
| 39 pix1 = fitz.Pixmap(doc, x) | |
| 40 pix2 = fitz.Pixmap(doc, s) # create pixmap of the /SMask entry | |
| 41 | |
| 42 """Sanity check: | |
| 43 - both pixmaps must have the same rectangle | |
| 44 - both pixmaps must have alpha=0 | |
| 45 - pix2 must consist of 1 byte per pixel | |
| 46 """ | |
| 47 if not (pix1.irect == pix2.irect and pix1.alpha == pix2.alpha == 0 and pix2.n == 1): | |
| 48 print("Warning: unsupported /SMask %i for %i:" % (s, x)) | |
| 49 print(pix2) | |
| 50 pix2 = None | |
| 51 return getimage(pix1) # return the pixmap as is | |
| 52 | |
| 53 pix = fitz.Pixmap(pix1) # copy of pix1, with an alpha channel added | |
| 54 pix.set_alpha(pix2.samples) # treat pix2.samples as the alpha values | |
| 55 pix1 = pix2 = None # free temp pixmaps | |
| 56 | |
| 57 # we may need to adjust something for CMYK pixmaps here: | |
| 58 return getimage(pix) | |
| 59 | |
| 60 | |
| 61 def open_file(filename, password, show=False, pdf=True): | |
| 62 """Open and authenticate a document.""" | |
| 63 doc = fitz.open(filename) | |
| 64 if not doc.is_pdf and pdf is True: | |
| 65 sys.exit("this command supports PDF files only") | |
| 66 rc = -1 | |
| 67 if not doc.needs_pass: | |
| 68 return doc | |
| 69 if password: | |
| 70 rc = doc.authenticate(password) | |
| 71 if not rc: | |
| 72 sys.exit("authentication unsuccessful") | |
| 73 if show is True: | |
| 74 print("authenticated as %s" % "owner" if rc > 2 else "user") | |
| 75 else: | |
| 76 sys.exit("'%s' requires a password" % doc.name) | |
| 77 return doc | |
| 78 | |
| 79 | |
| 80 def print_dict(item): | |
| 81 """Print a Python dictionary.""" | |
| 82 l = max([len(k) for k in item.keys()]) + 1 | |
| 83 for k, v in item.items(): | |
| 84 msg = "%s: %s" % (k.rjust(l), v) | |
| 85 print(msg) | |
| 86 return | |
| 87 | |
| 88 | |
| 89 def print_xref(doc, xref): | |
| 90 """Print an object given by XREF number. | |
| 91 | |
| 92 Simulate the PDF source in "pretty" format. | |
| 93 For a stream also print its size. | |
| 94 """ | |
| 95 print("%i 0 obj" % xref) | |
| 96 xref_str = doc.xref_object(xref) | |
| 97 print(xref_str) | |
| 98 if doc.xref_is_stream(xref): | |
| 99 temp = xref_str.split() | |
| 100 try: | |
| 101 idx = temp.index("/Length") + 1 | |
| 102 size = temp[idx] | |
| 103 if size.endswith("0 R"): | |
| 104 size = "unknown" | |
| 105 except: | |
| 106 size = "unknown" | |
| 107 print("stream\n...%s bytes" % size) | |
| 108 print("endstream") | |
| 109 print("endobj") | |
| 110 | |
| 111 | |
| 112 def get_list(rlist, limit, what="page"): | |
| 113 """Transform a page / xref specification into a list of integers. | |
| 114 | |
| 115 Args | |
| 116 ---- | |
| 117 rlist: (str) the specification | |
| 118 limit: maximum number, i.e. number of pages, number of objects | |
| 119 what: a string to be used in error messages | |
| 120 Returns | |
| 121 ------- | |
| 122 A list of integers representing the specification. | |
| 123 """ | |
| 124 N = str(limit - 1) | |
| 125 rlist = rlist.replace("N", N).replace(" ", "") | |
| 126 rlist_arr = rlist.split(",") | |
| 127 out_list = [] | |
| 128 for seq, item in enumerate(rlist_arr): | |
| 129 n = seq + 1 | |
| 130 if item.isdecimal(): # a single integer | |
| 131 i = int(item) | |
| 132 if 1 <= i < limit: | |
| 133 out_list.append(int(item)) | |
| 134 else: | |
| 135 sys.exit("bad %s specification at item %i" % (what, n)) | |
| 136 continue | |
| 137 try: # this must be a range now, and all of the following must work: | |
| 138 i1, i2 = item.split("-") # will fail if not 2 items produced | |
| 139 i1 = int(i1) # will fail on non-integers | |
| 140 i2 = int(i2) | |
| 141 except: | |
| 142 sys.exit("bad %s range specification at item %i" % (what, n)) | |
| 143 | |
| 144 if not (1 <= i1 < limit and 1 <= i2 < limit): | |
| 145 sys.exit("bad %s range specification at item %i" % (what, n)) | |
| 146 | |
| 147 if i1 == i2: # just in case: a range of equal numbers | |
| 148 out_list.append(i1) | |
| 149 continue | |
| 150 | |
| 151 if i1 < i2: # first less than second | |
| 152 out_list += list(range(i1, i2 + 1)) | |
| 153 else: # first larger than second | |
| 154 out_list += list(range(i1, i2 - 1, -1)) | |
| 155 | |
| 156 return out_list | |
| 157 | |
| 158 | |
| 159 def show(args): | |
| 160 doc = open_file(args.input, args.password, True) | |
| 161 size = os.path.getsize(args.input) / 1024 | |
| 162 flag = "KB" | |
| 163 if size > 1000: | |
| 164 size /= 1024 | |
| 165 flag = "MB" | |
| 166 size = round(size, 1) | |
| 167 meta = doc.metadata | |
| 168 print( | |
| 169 "'%s', pages: %i, objects: %i, %g %s, %s, encryption: %s" | |
| 170 % ( | |
| 171 args.input, | |
| 172 doc.page_count, | |
| 173 doc.xref_length() - 1, | |
| 174 size, | |
| 175 flag, | |
| 176 meta["format"], | |
| 177 meta["encryption"], | |
| 178 ) | |
| 179 ) | |
| 180 n = doc.is_form_pdf | |
| 181 if n > 0: | |
| 182 s = doc.get_sigflags() | |
| 183 print( | |
| 184 "document contains %i root form fields and is %ssigned" | |
| 185 % (n, "not " if s != 3 else "") | |
| 186 ) | |
| 187 n = doc.embfile_count() | |
| 188 if n > 0: | |
| 189 print("document contains %i embedded files" % n) | |
| 190 print() | |
| 191 if args.catalog: | |
| 192 print(mycenter("PDF catalog")) | |
| 193 xref = doc.pdf_catalog() | |
| 194 print_xref(doc, xref) | |
| 195 print() | |
| 196 if args.metadata: | |
| 197 print(mycenter("PDF metadata")) | |
| 198 print_dict(doc.metadata) | |
| 199 print() | |
| 200 if args.xrefs: | |
| 201 print(mycenter("object information")) | |
| 202 xrefl = get_list(args.xrefs, doc.xref_length(), what="xref") | |
| 203 for xref in xrefl: | |
| 204 print_xref(doc, xref) | |
| 205 print() | |
| 206 if args.pages: | |
| 207 print(mycenter("page information")) | |
| 208 pagel = get_list(args.pages, doc.page_count + 1) | |
| 209 for pno in pagel: | |
| 210 n = pno - 1 | |
| 211 xref = doc.page_xref(n) | |
| 212 print("Page %i:" % pno) | |
| 213 print_xref(doc, xref) | |
| 214 print() | |
| 215 if args.trailer: | |
| 216 print(mycenter("PDF trailer")) | |
| 217 print(doc.pdf_trailer()) | |
| 218 print() | |
| 219 doc.close() | |
| 220 | |
| 221 | |
| 222 def clean(args): | |
| 223 doc = open_file(args.input, args.password, pdf=True) | |
| 224 encryption = args.encryption | |
| 225 encrypt = ("keep", "none", "rc4-40", "rc4-128", "aes-128", "aes-256").index( | |
| 226 encryption | |
| 227 ) | |
| 228 | |
| 229 if not args.pages: # simple cleaning | |
| 230 doc.save( | |
| 231 args.output, | |
| 232 garbage=args.garbage, | |
| 233 deflate=args.compress, | |
| 234 pretty=args.pretty, | |
| 235 clean=args.sanitize, | |
| 236 ascii=args.ascii, | |
| 237 linear=args.linear, | |
| 238 encryption=encrypt, | |
| 239 owner_pw=args.owner, | |
| 240 user_pw=args.user, | |
| 241 permissions=args.permission, | |
| 242 ) | |
| 243 return | |
| 244 | |
| 245 # create sub document from page numbers | |
| 246 pages = get_list(args.pages, doc.page_count + 1) | |
| 247 outdoc = fitz.open() | |
| 248 for pno in pages: | |
| 249 n = pno - 1 | |
| 250 outdoc.insert_pdf(doc, from_page=n, to_page=n) | |
| 251 outdoc.save( | |
| 252 args.output, | |
| 253 garbage=args.garbage, | |
| 254 deflate=args.compress, | |
| 255 pretty=args.pretty, | |
| 256 clean=args.sanitize, | |
| 257 ascii=args.ascii, | |
| 258 linear=args.linear, | |
| 259 encryption=encrypt, | |
| 260 owner_pw=args.owner, | |
| 261 user_pw=args.user, | |
| 262 permissions=args.permission, | |
| 263 ) | |
| 264 doc.close() | |
| 265 outdoc.close() | |
| 266 return | |
| 267 | |
| 268 | |
| 269 def doc_join(args): | |
| 270 """Join pages from several PDF documents.""" | |
| 271 doc_list = args.input # a list of input PDFs | |
| 272 doc = fitz.open() # output PDF | |
| 273 for src_item in doc_list: # process one input PDF | |
| 274 src_list = src_item.split(",") | |
| 275 password = src_list[1] if len(src_list) > 1 else None | |
| 276 src = open_file(src_list[0], password, pdf=True) | |
| 277 pages = ",".join(src_list[2:]) # get 'pages' specifications | |
| 278 if pages: # if anything there, retrieve a list of desired pages | |
| 279 page_list = get_list(",".join(src_list[2:]), src.page_count + 1) | |
| 280 else: # take all pages | |
| 281 page_list = range(1, src.page_count + 1) | |
| 282 for i in page_list: | |
| 283 doc.insert_pdf(src, from_page=i - 1, to_page=i - 1) # copy each source page | |
| 284 src.close() | |
| 285 | |
| 286 doc.save(args.output, garbage=4, deflate=True) | |
| 287 doc.close() | |
| 288 | |
| 289 | |
| 290 def embedded_copy(args): | |
| 291 """Copy embedded files between PDFs.""" | |
| 292 doc = open_file(args.input, args.password, pdf=True) | |
| 293 if not doc.can_save_incrementally() and ( | |
| 294 not args.output or args.output == args.input | |
| 295 ): | |
| 296 sys.exit("cannot save PDF incrementally") | |
| 297 src = open_file(args.source, args.pwdsource) | |
| 298 names = set(args.name) if args.name else set() | |
| 299 src_names = set(src.embfile_names()) | |
| 300 if names: | |
| 301 if not names <= src_names: | |
| 302 sys.exit("not all names are contained in source") | |
| 303 else: | |
| 304 names = src_names | |
| 305 if not names: | |
| 306 sys.exit("nothing to copy") | |
| 307 intersect = names & set(doc.embfile_names()) # any equal name already in target? | |
| 308 if intersect: | |
| 309 sys.exit("following names already exist in receiving PDF: %s" % str(intersect)) | |
| 310 | |
| 311 for item in names: | |
| 312 info = src.embfile_info(item) | |
| 313 buff = src.embfile_get(item) | |
| 314 doc.embfile_add( | |
| 315 item, | |
| 316 buff, | |
| 317 filename=info["filename"], | |
| 318 ufilename=info["ufilename"], | |
| 319 desc=info["desc"], | |
| 320 ) | |
| 321 print("copied entry '%s' from '%s'" % (item, src.name)) | |
| 322 src.close() | |
| 323 if args.output and args.output != args.input: | |
| 324 doc.save(args.output, garbage=3) | |
| 325 else: | |
| 326 doc.saveIncr() | |
| 327 doc.close() | |
| 328 | |
| 329 | |
| 330 def embedded_del(args): | |
| 331 """Delete an embedded file entry.""" | |
| 332 doc = open_file(args.input, args.password, pdf=True) | |
| 333 if not doc.can_save_incrementally() and ( | |
| 334 not args.output or args.output == args.input | |
| 335 ): | |
| 336 sys.exit("cannot save PDF incrementally") | |
| 337 | |
| 338 try: | |
| 339 doc.embfile_del(args.name) | |
| 340 except ValueError: | |
| 341 sys.exit("no such embedded file '%s'" % args.name) | |
| 342 if not args.output or args.output == args.input: | |
| 343 doc.save_incr() | |
| 344 else: | |
| 345 doc.save(args.output, garbage=1) | |
| 346 doc.close() | |
| 347 | |
| 348 | |
| 349 def embedded_get(args): | |
| 350 """Retrieve contents of an embedded file.""" | |
| 351 doc = open_file(args.input, args.password, pdf=True) | |
| 352 try: | |
| 353 stream = doc.embfile_get(args.name) | |
| 354 d = doc.embfile_info(args.name) | |
| 355 except ValueError: | |
| 356 sys.exit("no such embedded file '%s'" % args.name) | |
| 357 filename = args.output if args.output else d["filename"] | |
| 358 output = open(filename, "wb") | |
| 359 output.write(stream) | |
| 360 output.close() | |
| 361 print("saved entry '%s' as '%s'" % (args.name, filename)) | |
| 362 doc.close() | |
| 363 | |
| 364 | |
| 365 def embedded_add(args): | |
| 366 """Insert a new embedded file.""" | |
| 367 doc = open_file(args.input, args.password, pdf=True) | |
| 368 if not doc.can_save_incrementally() and ( | |
| 369 args.output is None or args.output == args.input | |
| 370 ): | |
| 371 sys.exit("cannot save PDF incrementally") | |
| 372 | |
| 373 try: | |
| 374 doc.embfile_del(args.name) | |
| 375 sys.exit("entry '%s' already exists" % args.name) | |
| 376 except: | |
| 377 pass | |
| 378 | |
| 379 if not os.path.exists(args.path) or not os.path.isfile(args.path): | |
| 380 sys.exit("no such file '%s'" % args.path) | |
| 381 stream = open(args.path, "rb").read() | |
| 382 filename = args.path | |
| 383 ufilename = filename | |
| 384 if not args.desc: | |
| 385 desc = filename | |
| 386 else: | |
| 387 desc = args.desc | |
| 388 doc.embfile_add( | |
| 389 args.name, stream, filename=filename, ufilename=ufilename, desc=desc | |
| 390 ) | |
| 391 if not args.output or args.output == args.input: | |
| 392 doc.saveIncr() | |
| 393 else: | |
| 394 doc.save(args.output, garbage=3) | |
| 395 doc.close() | |
| 396 | |
| 397 | |
| 398 def embedded_upd(args): | |
| 399 """Update contents or metadata of an embedded file.""" | |
| 400 doc = open_file(args.input, args.password, pdf=True) | |
| 401 if not doc.can_save_incrementally() and ( | |
| 402 args.output is None or args.output == args.input | |
| 403 ): | |
| 404 sys.exit("cannot save PDF incrementally") | |
| 405 | |
| 406 try: | |
| 407 doc.embfile_info(args.name) | |
| 408 except: | |
| 409 sys.exit("no such embedded file '%s'" % args.name) | |
| 410 | |
| 411 if ( | |
| 412 args.path is not None | |
| 413 and os.path.exists(args.path) | |
| 414 and os.path.isfile(args.path) | |
| 415 ): | |
| 416 stream = open(args.path, "rb").read() | |
| 417 else: | |
| 418 stream = None | |
| 419 | |
| 420 if args.filename: | |
| 421 filename = args.filename | |
| 422 else: | |
| 423 filename = None | |
| 424 | |
| 425 if args.ufilename: | |
| 426 ufilename = args.ufilename | |
| 427 elif args.filename: | |
| 428 ufilename = args.filename | |
| 429 else: | |
| 430 ufilename = None | |
| 431 | |
| 432 if args.desc: | |
| 433 desc = args.desc | |
| 434 else: | |
| 435 desc = None | |
| 436 | |
| 437 doc.embfile_upd( | |
| 438 args.name, stream, filename=filename, ufilename=ufilename, desc=desc | |
| 439 ) | |
| 440 if args.output is None or args.output == args.input: | |
| 441 doc.saveIncr() | |
| 442 else: | |
| 443 doc.save(args.output, garbage=3) | |
| 444 doc.close() | |
| 445 | |
| 446 | |
| 447 def embedded_list(args): | |
| 448 """List embedded files.""" | |
| 449 doc = open_file(args.input, args.password, pdf=True) | |
| 450 names = doc.embfile_names() | |
| 451 if args.name is not None: | |
| 452 if args.name not in names: | |
| 453 sys.exit("no such embedded file '%s'" % args.name) | |
| 454 else: | |
| 455 print() | |
| 456 print( | |
| 457 "printing 1 of %i embedded file%s:" | |
| 458 % (len(names), "s" if len(names) > 1 else "") | |
| 459 ) | |
| 460 print() | |
| 461 print_dict(doc.embfile_info(args.name)) | |
| 462 print() | |
| 463 return | |
| 464 if not names: | |
| 465 print("'%s' contains no embedded files" % doc.name) | |
| 466 return | |
| 467 if len(names) > 1: | |
| 468 msg = "'%s' contains the following %i embedded files" % (doc.name, len(names)) | |
| 469 else: | |
| 470 msg = "'%s' contains the following embedded file" % doc.name | |
| 471 print(msg) | |
| 472 print() | |
| 473 for name in names: | |
| 474 if not args.detail: | |
| 475 print(name) | |
| 476 continue | |
| 477 _ = doc.embfile_info(name) | |
| 478 print_dict(doc.embfile_info(name)) | |
| 479 print() | |
| 480 doc.close() | |
| 481 | |
| 482 | |
| 483 def extract_objects(args): | |
| 484 """Extract images and / or fonts from a PDF.""" | |
| 485 if not args.fonts and not args.images: | |
| 486 sys.exit("neither fonts nor images requested") | |
| 487 doc = open_file(args.input, args.password, pdf=True) | |
| 488 | |
| 489 if args.pages: | |
| 490 pages = get_list(args.pages, doc.page_count + 1) | |
| 491 else: | |
| 492 pages = range(1, doc.page_count + 1) | |
| 493 | |
| 494 if not args.output: | |
| 495 out_dir = os.path.abspath(os.curdir) | |
| 496 else: | |
| 497 out_dir = args.output | |
| 498 if not (os.path.exists(out_dir) and os.path.isdir(out_dir)): | |
| 499 sys.exit("output directory %s does not exist" % out_dir) | |
| 500 | |
| 501 font_xrefs = set() # already saved fonts | |
| 502 image_xrefs = set() # already saved images | |
| 503 | |
| 504 for pno in pages: | |
| 505 if args.fonts: | |
| 506 itemlist = doc.get_page_fonts(pno - 1) | |
| 507 for item in itemlist: | |
| 508 xref = item[0] | |
| 509 if xref not in font_xrefs: | |
| 510 font_xrefs.add(xref) | |
| 511 fontname, ext, _, buffer = doc.extract_font(xref) | |
| 512 if ext == "n/a" or not buffer: | |
| 513 continue | |
| 514 outname = os.path.join( | |
| 515 out_dir, f"{fontname.replace(' ', '-')}-{xref}.{ext}" | |
| 516 ) | |
| 517 outfile = open(outname, "wb") | |
| 518 outfile.write(buffer) | |
| 519 outfile.close() | |
| 520 buffer = None | |
| 521 if args.images: | |
| 522 itemlist = doc.get_page_images(pno - 1) | |
| 523 for item in itemlist: | |
| 524 xref = item[0] | |
| 525 if xref not in image_xrefs: | |
| 526 image_xrefs.add(xref) | |
| 527 pix = recoverpix(doc, item) | |
| 528 if type(pix) is dict: | |
| 529 ext = pix["ext"] | |
| 530 imgdata = pix["image"] | |
| 531 outname = os.path.join(out_dir, "img-%i.%s" % (xref, ext)) | |
| 532 outfile = open(outname, "wb") | |
| 533 outfile.write(imgdata) | |
| 534 outfile.close() | |
| 535 else: | |
| 536 outname = os.path.join(out_dir, "img-%i.png" % xref) | |
| 537 pix2 = ( | |
| 538 pix | |
| 539 if pix.colorspace.n < 4 | |
| 540 else fitz.Pixmap(fitz.csRGB, pix) | |
| 541 ) | |
| 542 pix2.save(outname) | |
| 543 | |
| 544 if args.fonts: | |
| 545 print("saved %i fonts to '%s'" % (len(font_xrefs), out_dir)) | |
| 546 if args.images: | |
| 547 print("saved %i images to '%s'" % (len(image_xrefs), out_dir)) | |
| 548 doc.close() | |
| 549 | |
| 550 | |
| 551 def page_simple(page, textout, GRID, fontsize, noformfeed, skip_empty, flags): | |
| 552 eop = b"\n" if noformfeed else bytes([12]) | |
| 553 text = page.get_text("text", flags=flags) | |
| 554 if not text: | |
| 555 if not skip_empty: | |
| 556 textout.write(eop) # write formfeed | |
| 557 return | |
| 558 textout.write(text.encode("utf8", errors="surrogatepass")) | |
| 559 textout.write(eop) | |
| 560 return | |
| 561 | |
| 562 | |
| 563 def page_blocksort(page, textout, GRID, fontsize, noformfeed, skip_empty, flags): | |
| 564 eop = b"\n" if noformfeed else bytes([12]) | |
| 565 blocks = page.get_text("blocks", flags=flags) | |
| 566 if blocks == []: | |
| 567 if not skip_empty: | |
| 568 textout.write(eop) # write formfeed | |
| 569 return | |
| 570 blocks.sort(key=lambda b: (b[3], b[0])) | |
| 571 for b in blocks: | |
| 572 textout.write(b[4].encode("utf8", errors="surrogatepass")) | |
| 573 textout.write(eop) | |
| 574 return | |
| 575 | |
| 576 | |
| 577 def page_layout(page, textout, GRID, fontsize, noformfeed, skip_empty, flags): | |
| 578 eop = b"\n" if noformfeed else bytes([12]) | |
| 579 | |
| 580 # -------------------------------------------------------------------- | |
| 581 def find_line_index(values: List[int], value: int) -> int: | |
| 582 """Find the right row coordinate. | |
| 583 | |
| 584 Args: | |
| 585 values: (list) y-coordinates of rows. | |
| 586 value: (int) lookup for this value (y-origin of char). | |
| 587 Returns: | |
| 588 y-ccordinate of appropriate line for value. | |
| 589 """ | |
| 590 i = bisect.bisect_right(values, value) | |
| 591 if i: | |
| 592 return values[i - 1] | |
| 593 raise RuntimeError("Line for %g not found in %s" % (value, values)) | |
| 594 | |
| 595 # -------------------------------------------------------------------- | |
| 596 def curate_rows(rows: Set[int], GRID) -> List: | |
| 597 rows = list(rows) | |
| 598 rows.sort() # sort ascending | |
| 599 nrows = [rows[0]] | |
| 600 for h in rows[1:]: | |
| 601 if h >= nrows[-1] + GRID: # only keep significant differences | |
| 602 nrows.append(h) | |
| 603 return nrows # curated list of line bottom coordinates | |
| 604 | |
| 605 def process_blocks(blocks: List[Dict], page: fitz.Page): | |
| 606 rows = set() | |
| 607 page_width = page.rect.width | |
| 608 page_height = page.rect.height | |
| 609 rowheight = page_height | |
| 610 left = page_width | |
| 611 right = 0 | |
| 612 chars = [] | |
| 613 for block in blocks: | |
| 614 for line in block["lines"]: | |
| 615 if line["dir"] != (1, 0): # ignore non-horizontal text | |
| 616 continue | |
| 617 x0, y0, x1, y1 = line["bbox"] | |
| 618 if y1 < 0 or y0 > page.rect.height: # ignore if outside CropBox | |
| 619 continue | |
| 620 # upd row height | |
| 621 height = y1 - y0 | |
| 622 | |
| 623 if rowheight > height: | |
| 624 rowheight = height | |
| 625 for span in line["spans"]: | |
| 626 if span["size"] <= fontsize: | |
| 627 continue | |
| 628 for c in span["chars"]: | |
| 629 x0, _, x1, _ = c["bbox"] | |
| 630 cwidth = x1 - x0 | |
| 631 ox, oy = c["origin"] | |
| 632 oy = int(round(oy)) | |
| 633 rows.add(oy) | |
| 634 ch = c["c"] | |
| 635 if left > ox and ch != " ": | |
| 636 left = ox # update left coordinate | |
| 637 if right < x1: | |
| 638 right = x1 # update right coordinate | |
| 639 # handle ligatures: | |
| 640 if cwidth == 0 and chars != []: # potential ligature | |
| 641 old_ch, old_ox, old_oy, old_cwidth = chars[-1] | |
| 642 if old_oy == oy: # ligature | |
| 643 if old_ch != chr(0xFB00): # previous "ff" char lig? | |
| 644 lig = joinligature(old_ch + ch) # no | |
| 645 # convert to one of the 3-char ligatures: | |
| 646 elif ch == "i": | |
| 647 lig = chr(0xFB03) # "ffi" | |
| 648 elif ch == "l": | |
| 649 lig = chr(0xFB04) # "ffl" | |
| 650 else: # something wrong, leave old char in place | |
| 651 lig = old_ch | |
| 652 chars[-1] = (lig, old_ox, old_oy, old_cwidth) | |
| 653 continue | |
| 654 chars.append((ch, ox, oy, cwidth)) # all chars on page | |
| 655 return chars, rows, left, right, rowheight | |
| 656 | |
| 657 def joinligature(lig: str) -> str: | |
| 658 """Return ligature character for a given pair / triple of characters. | |
| 659 | |
| 660 Args: | |
| 661 lig: (str) 2/3 characters, e.g. "ff" | |
| 662 Returns: | |
| 663 Ligature, e.g. "ff" -> chr(0xFB00) | |
| 664 """ | |
| 665 | |
| 666 if lig == "ff": | |
| 667 return chr(0xFB00) | |
| 668 elif lig == "fi": | |
| 669 return chr(0xFB01) | |
| 670 elif lig == "fl": | |
| 671 return chr(0xFB02) | |
| 672 elif lig == "ffi": | |
| 673 return chr(0xFB03) | |
| 674 elif lig == "ffl": | |
| 675 return chr(0xFB04) | |
| 676 elif lig == "ft": | |
| 677 return chr(0xFB05) | |
| 678 elif lig == "st": | |
| 679 return chr(0xFB06) | |
| 680 return lig | |
| 681 | |
| 682 # -------------------------------------------------------------------- | |
| 683 def make_textline(left, slot, minslot, lchars): | |
| 684 """Produce the text of one output line. | |
| 685 | |
| 686 Args: | |
| 687 left: (float) left most coordinate used on page | |
| 688 slot: (float) avg width of one character in any font in use. | |
| 689 minslot: (float) min width for the characters in this line. | |
| 690 chars: (list[tuple]) characters of this line. | |
| 691 Returns: | |
| 692 text: (str) text string for this line | |
| 693 """ | |
| 694 text = "" # we output this | |
| 695 old_char = "" | |
| 696 old_x1 = 0 # end coordinate of last char | |
| 697 old_ox = 0 # x-origin of last char | |
| 698 if minslot <= fitz.EPSILON: | |
| 699 raise RuntimeError("program error: minslot too small = %g" % minslot) | |
| 700 | |
| 701 for c in lchars: # loop over characters | |
| 702 char, ox, _, cwidth = c | |
| 703 ox = ox - left # its (relative) start coordinate | |
| 704 x1 = ox + cwidth # ending coordinate | |
| 705 | |
| 706 # eliminate overprint effect | |
| 707 if old_char == char and ox - old_ox <= cwidth * 0.2: | |
| 708 continue | |
| 709 | |
| 710 # omit spaces overlapping previous char | |
| 711 if char == " " and (old_x1 - ox) / cwidth > 0.8: | |
| 712 continue | |
| 713 | |
| 714 old_char = char | |
| 715 # close enough to previous? | |
| 716 if ox < old_x1 + minslot: # assume char adjacent to previous | |
| 717 text += char # append to output | |
| 718 old_x1 = x1 # new end coord | |
| 719 old_ox = ox # new origin.x | |
| 720 continue | |
| 721 | |
| 722 # else next char starts after some gap: | |
| 723 # fill in right number of spaces, so char is positioned | |
| 724 # in the right slot of the line | |
| 725 if char == " ": # rest relevant for non-space only | |
| 726 continue | |
| 727 delta = int(ox / slot) - len(text) | |
| 728 if ox > old_x1 and delta > 1: | |
| 729 text += " " * delta | |
| 730 # now append char | |
| 731 text += char | |
| 732 old_x1 = x1 # new end coordinate | |
| 733 old_ox = ox # new origin | |
| 734 return text.rstrip() | |
| 735 | |
| 736 # extract page text by single characters ("rawdict") | |
| 737 blocks = page.get_text("rawdict", flags=flags)["blocks"] | |
| 738 chars, rows, left, right, rowheight = process_blocks(blocks, page) | |
| 739 | |
| 740 if chars == []: | |
| 741 if not skip_empty: | |
| 742 textout.write(eop) # write formfeed | |
| 743 return | |
| 744 # compute list of line coordinates - ignoring small (GRID) differences | |
| 745 rows = curate_rows(rows, GRID) | |
| 746 | |
| 747 # sort all chars by x-coordinates, so every line will receive char info, | |
| 748 # sorted from left to right. | |
| 749 chars.sort(key=lambda c: c[1]) | |
| 750 | |
| 751 # populate the lines with their char info | |
| 752 lines = {} # key: y1-ccordinate, value: char list | |
| 753 for c in chars: | |
| 754 _, _, oy, _ = c | |
| 755 y = find_line_index(rows, oy) # y-coord of the right line | |
| 756 lchars = lines.get(y, []) # read line chars so far | |
| 757 lchars.append(c) # append this char | |
| 758 lines[y] = lchars # write back to line | |
| 759 | |
| 760 # ensure line coordinates are ascending | |
| 761 keys = list(lines.keys()) | |
| 762 keys.sort() | |
| 763 | |
| 764 # ------------------------------------------------------------------------- | |
| 765 # Compute "char resolution" for the page: the char width corresponding to | |
| 766 # 1 text char position on output - call it 'slot'. | |
| 767 # For each line, compute median of its char widths. The minimum across all | |
| 768 # lines is 'slot'. | |
| 769 # The minimum char width of each line is used to determine if spaces must | |
| 770 # be inserted in between two characters. | |
| 771 # ------------------------------------------------------------------------- | |
| 772 slot = right - left | |
| 773 minslots = {} | |
| 774 for k in keys: | |
| 775 lchars = lines[k] | |
| 776 ccount = len(lchars) | |
| 777 if ccount < 2: | |
| 778 minslots[k] = 1 | |
| 779 continue | |
| 780 widths = [c[3] for c in lchars] | |
| 781 widths.sort() | |
| 782 this_slot = statistics.median(widths) # take median value | |
| 783 if this_slot < slot: | |
| 784 slot = this_slot | |
| 785 minslots[k] = widths[0] | |
| 786 | |
| 787 # compute line advance in text output | |
| 788 rowheight = rowheight * (rows[-1] - rows[0]) / (rowheight * len(rows)) * 1.2 | |
| 789 rowpos = rows[0] # first line positioned here | |
| 790 textout.write(b"\n") | |
| 791 for k in keys: # walk through the lines | |
| 792 while rowpos < k: # honor distance between lines | |
| 793 textout.write(b"\n") | |
| 794 rowpos += rowheight | |
| 795 text = make_textline(left, slot, minslots[k], lines[k]) | |
| 796 textout.write((text + "\n").encode("utf8", errors="surrogatepass")) | |
| 797 rowpos = k + rowheight | |
| 798 | |
| 799 textout.write(eop) # write formfeed | |
| 800 | |
| 801 | |
| 802 def gettext(args): | |
| 803 doc = open_file(args.input, args.password, pdf=False) | |
| 804 pagel = get_list(args.pages, doc.page_count + 1) | |
| 805 output = args.output | |
| 806 if output == None: | |
| 807 filename, _ = os.path.splitext(doc.name) | |
| 808 output = filename + ".txt" | |
| 809 textout = open(output, "wb") | |
| 810 flags = TEXT_PRESERVE_LIGATURES | TEXT_PRESERVE_WHITESPACE | |
| 811 if args.convert_white: | |
| 812 flags ^= TEXT_PRESERVE_WHITESPACE | |
| 813 if args.noligatures: | |
| 814 flags ^= TEXT_PRESERVE_LIGATURES | |
| 815 if args.extra_spaces: | |
| 816 flags ^= TEXT_INHIBIT_SPACES | |
| 817 func = { | |
| 818 "simple": page_simple, | |
| 819 "blocks": page_blocksort, | |
| 820 "layout": page_layout, | |
| 821 } | |
| 822 for pno in pagel: | |
| 823 page = doc[pno - 1] | |
| 824 func[args.mode]( | |
| 825 page, | |
| 826 textout, | |
| 827 args.grid, | |
| 828 args.fontsize, | |
| 829 args.noformfeed, | |
| 830 args.skip_empty, | |
| 831 flags=flags, | |
| 832 ) | |
| 833 | |
| 834 textout.close() | |
| 835 | |
| 836 | |
| 837 def main(): | |
| 838 """Define command configurations.""" | |
| 839 parser = argparse.ArgumentParser( | |
| 840 prog="fitz", | |
| 841 description=mycenter("Basic PyMuPDF Functions"), | |
| 842 ) | |
| 843 subps = parser.add_subparsers( | |
| 844 title="Subcommands", help="Enter 'command -h' for subcommand specific help" | |
| 845 ) | |
| 846 | |
| 847 # ------------------------------------------------------------------------- | |
| 848 # 'show' command | |
| 849 # ------------------------------------------------------------------------- | |
| 850 ps_show = subps.add_parser("show", description=mycenter("display PDF information")) | |
| 851 ps_show.add_argument("input", type=str, help="PDF filename") | |
| 852 ps_show.add_argument("-password", help="password") | |
| 853 ps_show.add_argument("-catalog", action="store_true", help="show PDF catalog") | |
| 854 ps_show.add_argument("-trailer", action="store_true", help="show PDF trailer") | |
| 855 ps_show.add_argument("-metadata", action="store_true", help="show PDF metadata") | |
| 856 ps_show.add_argument( | |
| 857 "-xrefs", type=str, help="show selected objects, format: 1,5-7,N" | |
| 858 ) | |
| 859 ps_show.add_argument( | |
| 860 "-pages", type=str, help="show selected pages, format: 1,5-7,50-N" | |
| 861 ) | |
| 862 ps_show.set_defaults(func=show) | |
| 863 | |
| 864 # ------------------------------------------------------------------------- | |
| 865 # 'clean' command | |
| 866 # ------------------------------------------------------------------------- | |
| 867 ps_clean = subps.add_parser( | |
| 868 "clean", description=mycenter("optimize PDF, or create sub-PDF if pages given") | |
| 869 ) | |
| 870 ps_clean.add_argument("input", type=str, help="PDF filename") | |
| 871 ps_clean.add_argument("output", type=str, help="output PDF filename") | |
| 872 ps_clean.add_argument("-password", help="password") | |
| 873 | |
| 874 ps_clean.add_argument( | |
| 875 "-encryption", | |
| 876 help="encryption method", | |
| 877 choices=("keep", "none", "rc4-40", "rc4-128", "aes-128", "aes-256"), | |
| 878 default="none", | |
| 879 ) | |
| 880 | |
| 881 ps_clean.add_argument("-owner", type=str, help="owner password") | |
| 882 ps_clean.add_argument("-user", type=str, help="user password") | |
| 883 | |
| 884 ps_clean.add_argument( | |
| 885 "-garbage", | |
| 886 type=int, | |
| 887 help="garbage collection level", | |
| 888 choices=range(5), | |
| 889 default=0, | |
| 890 ) | |
| 891 | |
| 892 ps_clean.add_argument( | |
| 893 "-compress", | |
| 894 action="store_true", | |
| 895 default=False, | |
| 896 help="compress (deflate) output", | |
| 897 ) | |
| 898 | |
| 899 ps_clean.add_argument( | |
| 900 "-ascii", action="store_true", default=False, help="ASCII encode binary data" | |
| 901 ) | |
| 902 | |
| 903 ps_clean.add_argument( | |
| 904 "-linear", | |
| 905 action="store_true", | |
| 906 default=False, | |
| 907 help="format for fast web display", | |
| 908 ) | |
| 909 | |
| 910 ps_clean.add_argument( | |
| 911 "-permission", type=int, default=-1, help="integer with permission levels" | |
| 912 ) | |
| 913 | |
| 914 ps_clean.add_argument( | |
| 915 "-sanitize", | |
| 916 action="store_true", | |
| 917 default=False, | |
| 918 help="sanitize / clean contents", | |
| 919 ) | |
| 920 ps_clean.add_argument( | |
| 921 "-pretty", action="store_true", default=False, help="prettify PDF structure" | |
| 922 ) | |
| 923 ps_clean.add_argument( | |
| 924 "-pages", help="output selected pages pages, format: 1,5-7,50-N" | |
| 925 ) | |
| 926 ps_clean.set_defaults(func=clean) | |
| 927 | |
| 928 # ------------------------------------------------------------------------- | |
| 929 # 'join' command | |
| 930 # ------------------------------------------------------------------------- | |
| 931 ps_join = subps.add_parser( | |
| 932 "join", | |
| 933 description=mycenter("join PDF documents"), | |
| 934 epilog="specify each input as 'filename[,password[,pages]]'", | |
| 935 ) | |
| 936 ps_join.add_argument("input", nargs="*", help="input filenames") | |
| 937 ps_join.add_argument("-output", required=True, help="output filename") | |
| 938 ps_join.set_defaults(func=doc_join) | |
| 939 | |
| 940 # ------------------------------------------------------------------------- | |
| 941 # 'extract' command | |
| 942 # ------------------------------------------------------------------------- | |
| 943 ps_extract = subps.add_parser( | |
| 944 "extract", description=mycenter("extract images and fonts to disk") | |
| 945 ) | |
| 946 ps_extract.add_argument("input", type=str, help="PDF filename") | |
| 947 ps_extract.add_argument("-images", action="store_true", help="extract images") | |
| 948 ps_extract.add_argument("-fonts", action="store_true", help="extract fonts") | |
| 949 ps_extract.add_argument( | |
| 950 "-output", help="folder to receive output, defaults to current" | |
| 951 ) | |
| 952 ps_extract.add_argument("-password", help="password") | |
| 953 ps_extract.add_argument( | |
| 954 "-pages", type=str, help="consider these pages only, format: 1,5-7,50-N" | |
| 955 ) | |
| 956 ps_extract.set_defaults(func=extract_objects) | |
| 957 | |
| 958 # ------------------------------------------------------------------------- | |
| 959 # 'embed-info' | |
| 960 # ------------------------------------------------------------------------- | |
| 961 ps_show = subps.add_parser( | |
| 962 "embed-info", description=mycenter("list embedded files") | |
| 963 ) | |
| 964 ps_show.add_argument("input", help="PDF filename") | |
| 965 ps_show.add_argument("-name", help="if given, report only this one") | |
| 966 ps_show.add_argument("-detail", action="store_true", help="detail information") | |
| 967 ps_show.add_argument("-password", help="password") | |
| 968 ps_show.set_defaults(func=embedded_list) | |
| 969 | |
| 970 # ------------------------------------------------------------------------- | |
| 971 # 'embed-add' command | |
| 972 # ------------------------------------------------------------------------- | |
| 973 ps_embed_add = subps.add_parser( | |
| 974 "embed-add", description=mycenter("add embedded file") | |
| 975 ) | |
| 976 ps_embed_add.add_argument("input", help="PDF filename") | |
| 977 ps_embed_add.add_argument("-password", help="password") | |
| 978 ps_embed_add.add_argument( | |
| 979 "-output", help="output PDF filename, incremental save if none" | |
| 980 ) | |
| 981 ps_embed_add.add_argument("-name", required=True, help="name of new entry") | |
| 982 ps_embed_add.add_argument("-path", required=True, help="path to data for new entry") | |
| 983 ps_embed_add.add_argument("-desc", help="description of new entry") | |
| 984 ps_embed_add.set_defaults(func=embedded_add) | |
| 985 | |
| 986 # ------------------------------------------------------------------------- | |
| 987 # 'embed-del' command | |
| 988 # ------------------------------------------------------------------------- | |
| 989 ps_embed_del = subps.add_parser( | |
| 990 "embed-del", description=mycenter("delete embedded file") | |
| 991 ) | |
| 992 ps_embed_del.add_argument("input", help="PDF filename") | |
| 993 ps_embed_del.add_argument("-password", help="password") | |
| 994 ps_embed_del.add_argument( | |
| 995 "-output", help="output PDF filename, incremental save if none" | |
| 996 ) | |
| 997 ps_embed_del.add_argument("-name", required=True, help="name of entry to delete") | |
| 998 ps_embed_del.set_defaults(func=embedded_del) | |
| 999 | |
| 1000 # ------------------------------------------------------------------------- | |
| 1001 # 'embed-upd' command | |
| 1002 # ------------------------------------------------------------------------- | |
| 1003 ps_embed_upd = subps.add_parser( | |
| 1004 "embed-upd", | |
| 1005 description=mycenter("update embedded file"), | |
| 1006 epilog="except '-name' all parameters are optional", | |
| 1007 ) | |
| 1008 ps_embed_upd.add_argument("input", help="PDF filename") | |
| 1009 ps_embed_upd.add_argument("-name", required=True, help="name of entry") | |
| 1010 ps_embed_upd.add_argument("-password", help="password") | |
| 1011 ps_embed_upd.add_argument( | |
| 1012 "-output", help="Output PDF filename, incremental save if none" | |
| 1013 ) | |
| 1014 ps_embed_upd.add_argument("-path", help="path to new data for entry") | |
| 1015 ps_embed_upd.add_argument("-filename", help="new filename to store in entry") | |
| 1016 ps_embed_upd.add_argument( | |
| 1017 "-ufilename", help="new unicode filename to store in entry" | |
| 1018 ) | |
| 1019 ps_embed_upd.add_argument("-desc", help="new description to store in entry") | |
| 1020 ps_embed_upd.set_defaults(func=embedded_upd) | |
| 1021 | |
| 1022 # ------------------------------------------------------------------------- | |
| 1023 # 'embed-extract' command | |
| 1024 # ------------------------------------------------------------------------- | |
| 1025 ps_embed_extract = subps.add_parser( | |
| 1026 "embed-extract", description=mycenter("extract embedded file to disk") | |
| 1027 ) | |
| 1028 ps_embed_extract.add_argument("input", type=str, help="PDF filename") | |
| 1029 ps_embed_extract.add_argument("-name", required=True, help="name of entry") | |
| 1030 ps_embed_extract.add_argument("-password", help="password") | |
| 1031 ps_embed_extract.add_argument( | |
| 1032 "-output", help="output filename, default is stored name" | |
| 1033 ) | |
| 1034 ps_embed_extract.set_defaults(func=embedded_get) | |
| 1035 | |
| 1036 # ------------------------------------------------------------------------- | |
| 1037 # 'embed-copy' command | |
| 1038 # ------------------------------------------------------------------------- | |
| 1039 ps_embed_copy = subps.add_parser( | |
| 1040 "embed-copy", description=mycenter("copy embedded files between PDFs") | |
| 1041 ) | |
| 1042 ps_embed_copy.add_argument("input", type=str, help="PDF to receive embedded files") | |
| 1043 ps_embed_copy.add_argument("-password", help="password of input") | |
| 1044 ps_embed_copy.add_argument( | |
| 1045 "-output", help="output PDF, incremental save to 'input' if omitted" | |
| 1046 ) | |
| 1047 ps_embed_copy.add_argument( | |
| 1048 "-source", required=True, help="copy embedded files from here" | |
| 1049 ) | |
| 1050 ps_embed_copy.add_argument("-pwdsource", help="password of 'source' PDF") | |
| 1051 ps_embed_copy.add_argument( | |
| 1052 "-name", nargs="*", help="restrict copy to these entries" | |
| 1053 ) | |
| 1054 ps_embed_copy.set_defaults(func=embedded_copy) | |
| 1055 | |
| 1056 # ------------------------------------------------------------------------- | |
| 1057 # 'textlayout' command | |
| 1058 # ------------------------------------------------------------------------- | |
| 1059 ps_gettext = subps.add_parser( | |
| 1060 "gettext", description=mycenter("extract text in various formatting modes") | |
| 1061 ) | |
| 1062 ps_gettext.add_argument("input", type=str, help="input document filename") | |
| 1063 ps_gettext.add_argument("-password", help="password for input document") | |
| 1064 ps_gettext.add_argument( | |
| 1065 "-mode", | |
| 1066 type=str, | |
| 1067 help="mode: simple, block sort, or layout (default)", | |
| 1068 choices=("simple", "blocks", "layout"), | |
| 1069 default="layout", | |
| 1070 ) | |
| 1071 ps_gettext.add_argument( | |
| 1072 "-pages", | |
| 1073 type=str, | |
| 1074 help="select pages, format: 1,5-7,50-N", | |
| 1075 default="1-N", | |
| 1076 ) | |
| 1077 ps_gettext.add_argument( | |
| 1078 "-noligatures", | |
| 1079 action="store_true", | |
| 1080 help="expand ligature characters (default False)", | |
| 1081 default=False, | |
| 1082 ) | |
| 1083 ps_gettext.add_argument( | |
| 1084 "-convert-white", | |
| 1085 action="store_true", | |
| 1086 help="convert whitespace characters to white (default False)", | |
| 1087 default=False, | |
| 1088 ) | |
| 1089 ps_gettext.add_argument( | |
| 1090 "-extra-spaces", | |
| 1091 action="store_true", | |
| 1092 help="fill gaps with spaces (default False)", | |
| 1093 default=False, | |
| 1094 ) | |
| 1095 ps_gettext.add_argument( | |
| 1096 "-noformfeed", | |
| 1097 action="store_true", | |
| 1098 help="write linefeeds, no formfeeds (default False)", | |
| 1099 default=False, | |
| 1100 ) | |
| 1101 ps_gettext.add_argument( | |
| 1102 "-skip-empty", | |
| 1103 action="store_true", | |
| 1104 help="suppress pages with no text (default False)", | |
| 1105 default=False, | |
| 1106 ) | |
| 1107 ps_gettext.add_argument( | |
| 1108 "-output", | |
| 1109 help="store text in this file (default inputfilename.txt)", | |
| 1110 ) | |
| 1111 ps_gettext.add_argument( | |
| 1112 "-grid", | |
| 1113 type=float, | |
| 1114 help="merge lines if closer than this (default 2)", | |
| 1115 default=2, | |
| 1116 ) | |
| 1117 ps_gettext.add_argument( | |
| 1118 "-fontsize", | |
| 1119 type=float, | |
| 1120 help="only include text with a larger fontsize (default 3)", | |
| 1121 default=3, | |
| 1122 ) | |
| 1123 ps_gettext.set_defaults(func=gettext) | |
| 1124 | |
| 1125 # ------------------------------------------------------------------------- | |
| 1126 # start program | |
| 1127 # ------------------------------------------------------------------------- | |
| 1128 args = parser.parse_args() # create parameter arguments class | |
| 1129 if not hasattr(args, "func"): # no function selected | |
| 1130 parser.print_help() # so print top level help | |
| 1131 else: | |
| 1132 args.func(args) # execute requested command | |
| 1133 | |
| 1134 | |
| 1135 if __name__ == "__main__": | |
| 1136 main() |
