Mercurial > hgrepos > Python2 > PyMuPDF
comparison src_classic/utils.py @ 3:2c135c81b16c
MERGE: upstream PyMuPDF 1.26.4 with MuPDF 1.26.7
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:44:09 +0200 |
| parents | 1d09e1dec1d9 |
| children |
comparison
equal
deleted
inserted
replaced
| 0:6015a75abc2d | 3:2c135c81b16c |
|---|---|
| 1 # ------------------------------------------------------------------------ | |
| 2 # Copyright 2020-2022, Harald Lieder, mailto:harald.lieder@outlook.com | |
| 3 # License: GNU AFFERO GPL 3.0, https://www.gnu.org/licenses/agpl-3.0.html | |
| 4 # | |
| 5 # Part of "PyMuPDF", a Python binding for "MuPDF" (http://mupdf.com), a | |
| 6 # lightweight PDF, XPS, and E-book viewer, renderer and toolkit which is | |
| 7 # maintained and developed by Artifex Software, Inc. https://artifex.com. | |
| 8 # ------------------------------------------------------------------------ | |
| 9 import io | |
| 10 import json | |
| 11 import math | |
| 12 import os | |
| 13 import random | |
| 14 import string | |
| 15 import tempfile | |
| 16 import typing | |
| 17 import warnings | |
| 18 | |
| 19 from fitz_old import * | |
| 20 | |
| 21 TESSDATA_PREFIX = os.getenv("TESSDATA_PREFIX") | |
| 22 point_like = "point_like" | |
| 23 rect_like = "rect_like" | |
| 24 matrix_like = "matrix_like" | |
| 25 quad_like = "quad_like" | |
| 26 | |
| 27 # ByteString is gone from typing in 3.14. | |
| 28 # collections.abc.Buffer available from 3.12 only | |
| 29 try: | |
| 30 ByteString = typing.ByteString | |
| 31 except AttributeError: | |
| 32 ByteString = bytes | bytearray | memoryview | |
| 33 | |
| 34 AnyType = typing.Any | |
| 35 OptInt = typing.Union[int, None] | |
| 36 OptFloat = typing.Optional[float] | |
| 37 OptStr = typing.Optional[str] | |
| 38 OptDict = typing.Optional[dict] | |
| 39 OptBytes = typing.Optional[ByteString] | |
| 40 OptSeq = typing.Optional[typing.Sequence] | |
| 41 | |
| 42 """ | |
| 43 This is a collection of functions to extend PyMupdf. | |
| 44 """ | |
| 45 | |
| 46 | |
| 47 def write_text(page: Page, **kwargs) -> None: | |
| 48 """Write the text of one or more TextWriter objects. | |
| 49 | |
| 50 Args: | |
| 51 rect: target rectangle. If None, the union of the text writers is used. | |
| 52 writers: one or more TextWriter objects. | |
| 53 overlay: put in foreground or background. | |
| 54 keep_proportion: maintain aspect ratio of rectangle sides. | |
| 55 rotate: arbitrary rotation angle. | |
| 56 oc: the xref of an optional content object | |
| 57 """ | |
| 58 if type(page) is not Page: | |
| 59 raise ValueError("bad page parameter") | |
| 60 s = { | |
| 61 k | |
| 62 for k in kwargs.keys() | |
| 63 if k | |
| 64 not in { | |
| 65 "rect", | |
| 66 "writers", | |
| 67 "opacity", | |
| 68 "color", | |
| 69 "overlay", | |
| 70 "keep_proportion", | |
| 71 "rotate", | |
| 72 "oc", | |
| 73 } | |
| 74 } | |
| 75 if s != set(): | |
| 76 raise ValueError("bad keywords: " + str(s)) | |
| 77 | |
| 78 rect = kwargs.get("rect") | |
| 79 writers = kwargs.get("writers") | |
| 80 opacity = kwargs.get("opacity") | |
| 81 color = kwargs.get("color") | |
| 82 overlay = bool(kwargs.get("overlay", True)) | |
| 83 keep_proportion = bool(kwargs.get("keep_proportion", True)) | |
| 84 rotate = int(kwargs.get("rotate", 0)) | |
| 85 oc = int(kwargs.get("oc", 0)) | |
| 86 | |
| 87 if not writers: | |
| 88 raise ValueError("need at least one TextWriter") | |
| 89 if type(writers) is TextWriter: | |
| 90 if rotate == 0 and rect is None: | |
| 91 writers.write_text(page, opacity=opacity, color=color, overlay=overlay) | |
| 92 return None | |
| 93 else: | |
| 94 writers = (writers,) | |
| 95 clip = writers[0].text_rect | |
| 96 textdoc = Document() | |
| 97 tpage = textdoc.new_page(width=page.rect.width, height=page.rect.height) | |
| 98 for writer in writers: | |
| 99 clip |= writer.text_rect | |
| 100 writer.write_text(tpage, opacity=opacity, color=color) | |
| 101 if rect is None: | |
| 102 rect = clip | |
| 103 page.show_pdf_page( | |
| 104 rect, | |
| 105 textdoc, | |
| 106 0, | |
| 107 overlay=overlay, | |
| 108 keep_proportion=keep_proportion, | |
| 109 rotate=rotate, | |
| 110 clip=clip, | |
| 111 oc=oc, | |
| 112 ) | |
| 113 textdoc = None | |
| 114 tpage = None | |
| 115 | |
| 116 | |
| 117 def show_pdf_page(*args, **kwargs) -> int: | |
| 118 """Show page number 'pno' of PDF 'src' in rectangle 'rect'. | |
| 119 | |
| 120 Args: | |
| 121 rect: (rect-like) where to place the source image | |
| 122 src: (document) source PDF | |
| 123 pno: (int) source page number | |
| 124 overlay: (bool) put in foreground | |
| 125 keep_proportion: (bool) do not change width-height-ratio | |
| 126 rotate: (int) degrees (multiple of 90) | |
| 127 clip: (rect-like) part of source page rectangle | |
| 128 Returns: | |
| 129 xref of inserted object (for reuse) | |
| 130 """ | |
| 131 if len(args) not in (3, 4): | |
| 132 raise ValueError("bad number of positional parameters") | |
| 133 pno = None | |
| 134 if len(args) == 3: | |
| 135 page, rect, src = args | |
| 136 else: | |
| 137 page, rect, src, pno = args | |
| 138 if pno == None: | |
| 139 pno = int(kwargs.get("pno", 0)) | |
| 140 overlay = bool(kwargs.get("overlay", True)) | |
| 141 keep_proportion = bool(kwargs.get("keep_proportion", True)) | |
| 142 rotate = float(kwargs.get("rotate", 0)) | |
| 143 oc = int(kwargs.get("oc", 0)) | |
| 144 clip = kwargs.get("clip") | |
| 145 | |
| 146 def calc_matrix(sr, tr, keep=True, rotate=0): | |
| 147 """Calculate transformation matrix from source to target rect. | |
| 148 | |
| 149 Notes: | |
| 150 The product of four matrices in this sequence: (1) translate correct | |
| 151 source corner to origin, (2) rotate, (3) scale, (4) translate to | |
| 152 target's top-left corner. | |
| 153 Args: | |
| 154 sr: source rect in PDF (!) coordinate system | |
| 155 tr: target rect in PDF coordinate system | |
| 156 keep: whether to keep source ratio of width to height | |
| 157 rotate: rotation angle in degrees | |
| 158 Returns: | |
| 159 Transformation matrix. | |
| 160 """ | |
| 161 # calc center point of source rect | |
| 162 smp = (sr.tl + sr.br) / 2.0 | |
| 163 # calc center point of target rect | |
| 164 tmp = (tr.tl + tr.br) / 2.0 | |
| 165 | |
| 166 # m moves to (0, 0), then rotates | |
| 167 m = Matrix(1, 0, 0, 1, -smp.x, -smp.y) * Matrix(rotate) | |
| 168 | |
| 169 sr1 = sr * m # resulting source rect to calculate scale factors | |
| 170 | |
| 171 fw = tr.width / sr1.width # scale the width | |
| 172 fh = tr.height / sr1.height # scale the height | |
| 173 if keep: | |
| 174 fw = fh = min(fw, fh) # take min if keeping aspect ratio | |
| 175 | |
| 176 m *= Matrix(fw, fh) # concat scale matrix | |
| 177 m *= Matrix(1, 0, 0, 1, tmp.x, tmp.y) # concat move to target center | |
| 178 return JM_TUPLE(m) | |
| 179 | |
| 180 CheckParent(page) | |
| 181 doc = page.parent | |
| 182 | |
| 183 if not doc.is_pdf or not src.is_pdf: | |
| 184 raise ValueError("is no PDF") | |
| 185 | |
| 186 if rect.is_empty or rect.is_infinite: | |
| 187 raise ValueError("rect must be finite and not empty") | |
| 188 | |
| 189 while pno < 0: # support negative page numbers | |
| 190 pno += src.page_count | |
| 191 src_page = src[pno] # load source page | |
| 192 if src_page.get_contents() == []: | |
| 193 raise ValueError("nothing to show - source page empty") | |
| 194 | |
| 195 tar_rect = rect * ~page.transformation_matrix # target rect in PDF coordinates | |
| 196 | |
| 197 src_rect = src_page.rect if not clip else src_page.rect & clip # source rect | |
| 198 if src_rect.is_empty or src_rect.is_infinite: | |
| 199 raise ValueError("clip must be finite and not empty") | |
| 200 src_rect = src_rect * ~src_page.transformation_matrix # ... in PDF coord | |
| 201 | |
| 202 matrix = calc_matrix(src_rect, tar_rect, keep=keep_proportion, rotate=rotate) | |
| 203 | |
| 204 # list of existing /Form /XObjects | |
| 205 ilst = [i[1] for i in doc.get_page_xobjects(page.number)] | |
| 206 ilst += [i[7] for i in doc.get_page_images(page.number)] | |
| 207 ilst += [i[4] for i in doc.get_page_fonts(page.number)] | |
| 208 | |
| 209 # create a name not in that list | |
| 210 n = "fzFrm" | |
| 211 i = 0 | |
| 212 _imgname = n + "0" | |
| 213 while _imgname in ilst: | |
| 214 i += 1 | |
| 215 _imgname = n + str(i) | |
| 216 | |
| 217 isrc = src._graft_id # used as key for graftmaps | |
| 218 if doc._graft_id == isrc: | |
| 219 raise ValueError("source document must not equal target") | |
| 220 | |
| 221 # retrieve / make Graftmap for source PDF | |
| 222 gmap = doc.Graftmaps.get(isrc, None) | |
| 223 if gmap is None: | |
| 224 gmap = Graftmap(doc) | |
| 225 doc.Graftmaps[isrc] = gmap | |
| 226 | |
| 227 # take note of generated xref for automatic reuse | |
| 228 pno_id = (isrc, pno) # id of src[pno] | |
| 229 xref = doc.ShownPages.get(pno_id, 0) | |
| 230 | |
| 231 xref = page._show_pdf_page( | |
| 232 src_page, | |
| 233 overlay=overlay, | |
| 234 matrix=matrix, | |
| 235 xref=xref, | |
| 236 oc=oc, | |
| 237 clip=src_rect, | |
| 238 graftmap=gmap, | |
| 239 _imgname=_imgname, | |
| 240 ) | |
| 241 doc.ShownPages[pno_id] = xref | |
| 242 | |
| 243 return xref | |
| 244 | |
| 245 | |
| 246 def replace_image(page: Page, xref: int, *, filename=None, pixmap=None, stream=None): | |
| 247 """Replace the image referred to by xref. | |
| 248 | |
| 249 Replace the image by changing the object definition stored under xref. This | |
| 250 will leave the pages appearance instructions intact, so the new image is | |
| 251 being displayed with the same bbox, rotation etc. | |
| 252 By providing a small fully transparent image, an effect as if the image had | |
| 253 been deleted can be achieved. | |
| 254 A typical use may include replacing large images by a smaller version, | |
| 255 e.g. with a lower resolution or graylevel instead of colored. | |
| 256 | |
| 257 Args: | |
| 258 xref: the xref of the image to replace. | |
| 259 filename, pixmap, stream: exactly one of these must be provided. The | |
| 260 meaning being the same as in Page.insert_image. | |
| 261 """ | |
| 262 doc = page.parent # the owning document | |
| 263 if not doc.xref_is_image(xref): | |
| 264 raise ValueError("xref not an image") # insert new image anywhere in page | |
| 265 if bool(filename) + bool(stream) + bool(pixmap) != 1: | |
| 266 raise ValueError("Exactly one of filename/stream/pixmap must be given") | |
| 267 new_xref = page.insert_image( | |
| 268 page.rect, filename=filename, stream=stream, pixmap=pixmap | |
| 269 ) | |
| 270 doc.xref_copy(new_xref, xref) # copy over new to old | |
| 271 last_contents_xref = page.get_contents()[-1] | |
| 272 # new image insertion has created a new /Contents source, | |
| 273 # which we will set to spaces now | |
| 274 doc.update_stream(last_contents_xref, b" ") | |
| 275 | |
| 276 | |
| 277 def delete_image(page: Page, xref: int): | |
| 278 """Delete the image referred to by xef. | |
| 279 | |
| 280 Actually replaces by a small transparent Pixmap using method Page.replace_image. | |
| 281 | |
| 282 Args: | |
| 283 xref: xref of the image to delete. | |
| 284 """ | |
| 285 # make a small 100% transparent pixmap (of just any dimension) | |
| 286 pix = fitz_old.Pixmap(fitz_old.csGRAY, (0, 0, 1, 1), 1) | |
| 287 pix.clear_with() # clear all samples bytes to 0x00 | |
| 288 page.replace_image(xref, pixmap=pix) | |
| 289 | |
| 290 | |
| 291 def insert_image(page, rect, **kwargs): | |
| 292 """Insert an image for display in a rectangle. | |
| 293 | |
| 294 Args: | |
| 295 rect: (rect_like) position of image on the page. | |
| 296 alpha: (int, optional) set to 0 if image has no transparency. | |
| 297 filename: (str, Path, file object) image filename. | |
| 298 keep_proportion: (bool) keep width / height ratio (default). | |
| 299 mask: (bytes, optional) image consisting of alpha values to use. | |
| 300 oc: (int) xref of OCG or OCMD to declare as Optional Content. | |
| 301 overlay: (bool) put in foreground (default) or background. | |
| 302 pixmap: (Pixmap) use this as image. | |
| 303 rotate: (int) rotate by 0, 90, 180 or 270 degrees. | |
| 304 stream: (bytes) use this as image. | |
| 305 xref: (int) use this as image. | |
| 306 | |
| 307 'page' and 'rect' are positional, all other parameters are keywords. | |
| 308 | |
| 309 If 'xref' is given, that image is used. Other input options are ignored. | |
| 310 Else, exactly one of pixmap, stream or filename must be given. | |
| 311 | |
| 312 'alpha=0' for non-transparent images improves performance significantly. | |
| 313 Affects stream and filename only. | |
| 314 | |
| 315 Optimum transparent insertions are possible by using filename / stream in | |
| 316 conjunction with a 'mask' image of alpha values. | |
| 317 | |
| 318 Returns: | |
| 319 xref (int) of inserted image. Re-use as argument for multiple insertions. | |
| 320 """ | |
| 321 CheckParent(page) | |
| 322 doc = page.parent | |
| 323 if not doc.is_pdf: | |
| 324 raise ValueError("is no PDF") | |
| 325 | |
| 326 valid_keys = { | |
| 327 "alpha", | |
| 328 "filename", | |
| 329 "height", | |
| 330 "keep_proportion", | |
| 331 "mask", | |
| 332 "oc", | |
| 333 "overlay", | |
| 334 "pixmap", | |
| 335 "rotate", | |
| 336 "stream", | |
| 337 "width", | |
| 338 "xref", | |
| 339 } | |
| 340 s = set(kwargs.keys()).difference(valid_keys) | |
| 341 if s != set(): | |
| 342 raise ValueError(f"bad key argument(s): {s}.") | |
| 343 filename = kwargs.get("filename") | |
| 344 pixmap = kwargs.get("pixmap") | |
| 345 stream = kwargs.get("stream") | |
| 346 mask = kwargs.get("mask") | |
| 347 rotate = int(kwargs.get("rotate", 0)) | |
| 348 width = int(kwargs.get("width", 0)) | |
| 349 height = int(kwargs.get("height", 0)) | |
| 350 alpha = int(kwargs.get("alpha", -1)) | |
| 351 oc = int(kwargs.get("oc", 0)) | |
| 352 xref = int(kwargs.get("xref", 0)) | |
| 353 keep_proportion = bool(kwargs.get("keep_proportion", True)) | |
| 354 overlay = bool(kwargs.get("overlay", True)) | |
| 355 | |
| 356 if xref == 0 and (bool(filename) + bool(stream) + bool(pixmap) != 1): | |
| 357 raise ValueError("xref=0 needs exactly one of filename, pixmap, stream") | |
| 358 | |
| 359 if filename: | |
| 360 if type(filename) is str: | |
| 361 pass | |
| 362 elif hasattr(filename, "absolute"): | |
| 363 filename = str(filename) | |
| 364 elif hasattr(filename, "name"): | |
| 365 filename = filename.name | |
| 366 else: | |
| 367 raise ValueError("bad filename") | |
| 368 | |
| 369 if filename and not os.path.exists(filename): | |
| 370 raise FileNotFoundError("No such file: '%s'" % filename) | |
| 371 elif stream and type(stream) not in (bytes, bytearray, io.BytesIO): | |
| 372 raise ValueError("stream must be bytes-like / BytesIO") | |
| 373 elif pixmap and type(pixmap) is not Pixmap: | |
| 374 raise ValueError("pixmap must be a Pixmap") | |
| 375 if mask and not (stream or filename): | |
| 376 raise ValueError("mask requires stream or filename") | |
| 377 if mask and type(mask) not in (bytes, bytearray, io.BytesIO): | |
| 378 raise ValueError("mask must be bytes-like / BytesIO") | |
| 379 while rotate < 0: | |
| 380 rotate += 360 | |
| 381 while rotate >= 360: | |
| 382 rotate -= 360 | |
| 383 if rotate not in (0, 90, 180, 270): | |
| 384 raise ValueError("bad rotate value") | |
| 385 | |
| 386 r = Rect(rect) | |
| 387 if r.is_empty or r.is_infinite: | |
| 388 raise ValueError("rect must be finite and not empty") | |
| 389 clip = r * ~page.transformation_matrix | |
| 390 | |
| 391 # Create a unique image reference name. | |
| 392 ilst = [i[7] for i in doc.get_page_images(page.number)] | |
| 393 ilst += [i[1] for i in doc.get_page_xobjects(page.number)] | |
| 394 ilst += [i[4] for i in doc.get_page_fonts(page.number)] | |
| 395 n = "fzImg" # 'fitz image' | |
| 396 i = 0 | |
| 397 _imgname = n + "0" # first name candidate | |
| 398 while _imgname in ilst: | |
| 399 i += 1 | |
| 400 _imgname = n + str(i) # try new name | |
| 401 | |
| 402 digests = doc.InsertedImages | |
| 403 xref, digests = page._insert_image( | |
| 404 filename=filename, | |
| 405 pixmap=pixmap, | |
| 406 stream=stream, | |
| 407 imask=mask, | |
| 408 clip=clip, | |
| 409 overlay=overlay, | |
| 410 oc=oc, | |
| 411 xref=xref, | |
| 412 rotate=rotate, | |
| 413 keep_proportion=keep_proportion, | |
| 414 width=width, | |
| 415 height=height, | |
| 416 alpha=alpha, | |
| 417 _imgname=_imgname, | |
| 418 digests=digests, | |
| 419 ) | |
| 420 if digests != None: | |
| 421 doc.InsertedImages = digests | |
| 422 | |
| 423 return xref | |
| 424 | |
| 425 | |
| 426 def search_for(*args, **kwargs) -> list: | |
| 427 """Search for a string on a page. | |
| 428 | |
| 429 Args: | |
| 430 text: string to be searched for | |
| 431 clip: restrict search to this rectangle | |
| 432 quads: (bool) return quads instead of rectangles | |
| 433 flags: bit switches, default: join hyphened words | |
| 434 textpage: a pre-created TextPage | |
| 435 Returns: | |
| 436 a list of rectangles or quads, each containing one occurrence. | |
| 437 """ | |
| 438 if len(args) != 2: | |
| 439 raise ValueError("bad number of positional parameters") | |
| 440 page, text = args | |
| 441 quads = kwargs.get("quads", 0) | |
| 442 clip = kwargs.get("clip") | |
| 443 textpage = kwargs.get("textpage") | |
| 444 if clip != None: | |
| 445 clip = Rect(clip) | |
| 446 flags = kwargs.get( | |
| 447 "flags", | |
| 448 TEXT_DEHYPHENATE | |
| 449 | TEXT_PRESERVE_WHITESPACE | |
| 450 | TEXT_PRESERVE_LIGATURES | |
| 451 | TEXT_MEDIABOX_CLIP, | |
| 452 ) | |
| 453 | |
| 454 CheckParent(page) | |
| 455 tp = textpage | |
| 456 if tp is None: | |
| 457 tp = page.get_textpage(clip=clip, flags=flags) # create TextPage | |
| 458 elif getattr(tp, "parent") != page: | |
| 459 raise ValueError("not a textpage of this page") | |
| 460 rlist = tp.search(text, quads=quads) | |
| 461 if textpage is None: | |
| 462 del tp | |
| 463 return rlist | |
| 464 | |
| 465 | |
| 466 def search_page_for( | |
| 467 doc: Document, | |
| 468 pno: int, | |
| 469 text: str, | |
| 470 quads: bool = False, | |
| 471 clip: rect_like = None, | |
| 472 flags: int = TEXT_DEHYPHENATE | |
| 473 | TEXT_PRESERVE_LIGATURES | |
| 474 | TEXT_PRESERVE_WHITESPACE | |
| 475 | TEXT_MEDIABOX_CLIP, | |
| 476 textpage: TextPage = None, | |
| 477 ) -> list: | |
| 478 """Search for a string on a page. | |
| 479 | |
| 480 Args: | |
| 481 pno: page number | |
| 482 text: string to be searched for | |
| 483 clip: restrict search to this rectangle | |
| 484 quads: (bool) return quads instead of rectangles | |
| 485 flags: bit switches, default: join hyphened words | |
| 486 textpage: reuse a prepared textpage | |
| 487 Returns: | |
| 488 a list of rectangles or quads, each containing an occurrence. | |
| 489 """ | |
| 490 | |
| 491 return doc[pno].search_for( | |
| 492 text, | |
| 493 quads=quads, | |
| 494 clip=clip, | |
| 495 flags=flags, | |
| 496 textpage=textpage, | |
| 497 ) | |
| 498 | |
| 499 | |
| 500 def get_text_blocks( | |
| 501 page: Page, | |
| 502 clip: rect_like = None, | |
| 503 flags: OptInt = None, | |
| 504 textpage: TextPage = None, | |
| 505 sort: bool = False, | |
| 506 ) -> list: | |
| 507 """Return the text blocks on a page. | |
| 508 | |
| 509 Notes: | |
| 510 Lines in a block are concatenated with line breaks. | |
| 511 Args: | |
| 512 flags: (int) control the amount of data parsed into the textpage. | |
| 513 Returns: | |
| 514 A list of the blocks. Each item contains the containing rectangle | |
| 515 coordinates, text lines, block type and running block number. | |
| 516 """ | |
| 517 CheckParent(page) | |
| 518 if flags is None: | |
| 519 flags = ( | |
| 520 TEXT_PRESERVE_WHITESPACE | |
| 521 | TEXT_PRESERVE_IMAGES | |
| 522 | TEXT_PRESERVE_LIGATURES | |
| 523 | TEXT_MEDIABOX_CLIP | |
| 524 ) | |
| 525 tp = textpage | |
| 526 if tp is None: | |
| 527 tp = page.get_textpage(clip=clip, flags=flags) | |
| 528 elif getattr(tp, "parent") != page: | |
| 529 raise ValueError("not a textpage of this page") | |
| 530 | |
| 531 blocks = tp.extractBLOCKS() | |
| 532 if textpage is None: | |
| 533 del tp | |
| 534 if sort is True: | |
| 535 blocks.sort(key=lambda b: (b[3], b[0])) | |
| 536 return blocks | |
| 537 | |
| 538 | |
| 539 def get_text_words( | |
| 540 page: Page, | |
| 541 clip: rect_like = None, | |
| 542 flags: OptInt = None, | |
| 543 textpage: TextPage = None, | |
| 544 sort: bool = False, | |
| 545 delimiters=None, | |
| 546 ) -> list: | |
| 547 """Return the text words as a list with the bbox for each word. | |
| 548 | |
| 549 Args: | |
| 550 flags: (int) control the amount of data parsed into the textpage. | |
| 551 delimiters: (str,list) characters to use as word delimiters | |
| 552 | |
| 553 Returns: | |
| 554 Word tuples (x0, y0, x1, y1, "word", bno, lno, wno). | |
| 555 """ | |
| 556 CheckParent(page) | |
| 557 if flags is None: | |
| 558 flags = TEXT_PRESERVE_WHITESPACE | TEXT_PRESERVE_LIGATURES | TEXT_MEDIABOX_CLIP | |
| 559 | |
| 560 tp = textpage | |
| 561 if tp is None: | |
| 562 tp = page.get_textpage(clip=clip, flags=flags) | |
| 563 elif getattr(tp, "parent") != page: | |
| 564 raise ValueError("not a textpage of this page") | |
| 565 | |
| 566 words = tp.extractWORDS(delimiters) | |
| 567 if textpage is None: | |
| 568 del tp | |
| 569 if sort is True: | |
| 570 words.sort(key=lambda w: (w[3], w[0])) | |
| 571 | |
| 572 return words | |
| 573 | |
| 574 | |
| 575 def get_textbox( | |
| 576 page: Page, | |
| 577 rect: rect_like, | |
| 578 textpage: TextPage = None, | |
| 579 ) -> str: | |
| 580 tp = textpage | |
| 581 if tp is None: | |
| 582 tp = page.get_textpage() | |
| 583 elif getattr(tp, "parent") != page: | |
| 584 raise ValueError("not a textpage of this page") | |
| 585 rc = tp.extractTextbox(rect) | |
| 586 if textpage is None: | |
| 587 del tp | |
| 588 return rc | |
| 589 | |
| 590 | |
| 591 def get_text_selection( | |
| 592 page: Page, | |
| 593 p1: point_like, | |
| 594 p2: point_like, | |
| 595 clip: rect_like = None, | |
| 596 textpage: TextPage = None, | |
| 597 ): | |
| 598 CheckParent(page) | |
| 599 tp = textpage | |
| 600 if tp is None: | |
| 601 tp = page.get_textpage(clip=clip, flags=TEXT_DEHYPHENATE) | |
| 602 elif getattr(tp, "parent") != page: | |
| 603 raise ValueError("not a textpage of this page") | |
| 604 rc = tp.extractSelection(p1, p2) | |
| 605 if textpage is None: | |
| 606 del tp | |
| 607 return rc | |
| 608 | |
| 609 | |
| 610 def get_textpage_ocr( | |
| 611 page: Page, | |
| 612 flags: int = 0, | |
| 613 language: str = "eng", | |
| 614 dpi: int = 72, | |
| 615 full: bool = False, | |
| 616 tessdata: str = None, | |
| 617 ) -> TextPage: | |
| 618 """Create a Textpage from combined results of normal and OCR text parsing. | |
| 619 | |
| 620 Args: | |
| 621 flags: (int) control content becoming part of the result. | |
| 622 language: (str) specify expected language(s). Deafault is "eng" (English). | |
| 623 dpi: (int) resolution in dpi, default 72. | |
| 624 full: (bool) whether to OCR the full page image, or only its images (default) | |
| 625 """ | |
| 626 CheckParent(page) | |
| 627 if not os.getenv("TESSDATA_PREFIX") and not tessdata: | |
| 628 raise RuntimeError("No OCR support: TESSDATA_PREFIX not set") | |
| 629 | |
| 630 def full_ocr(page, dpi, language, flags): | |
| 631 zoom = dpi / 72 | |
| 632 mat = Matrix(zoom, zoom) | |
| 633 pix = page.get_pixmap(matrix=mat) | |
| 634 ocr_pdf = Document( | |
| 635 "pdf", | |
| 636 pix.pdfocr_tobytes(compress=False, language=language, tessdata=tessdata), | |
| 637 ) | |
| 638 ocr_page = ocr_pdf.load_page(0) | |
| 639 unzoom = page.rect.width / ocr_page.rect.width | |
| 640 ctm = Matrix(unzoom, unzoom) * page.derotation_matrix | |
| 641 tpage = ocr_page.get_textpage(flags=flags, matrix=ctm) | |
| 642 ocr_pdf.close() | |
| 643 pix = None | |
| 644 tpage.parent = weakref.proxy(page) | |
| 645 return tpage | |
| 646 | |
| 647 # if OCR for the full page, OCR its pixmap @ desired dpi | |
| 648 if full is True: | |
| 649 return full_ocr(page, dpi, language, flags) | |
| 650 | |
| 651 # For partial OCR, make a normal textpage, then extend it with text that | |
| 652 # is OCRed from each image. | |
| 653 # Because of this, we need the images flag bit set ON. | |
| 654 tpage = page.get_textpage(flags=flags) | |
| 655 for block in page.get_text("dict", flags=TEXT_PRESERVE_IMAGES)["blocks"]: | |
| 656 if block["type"] != 1: # only look at images | |
| 657 continue | |
| 658 bbox = Rect(block["bbox"]) | |
| 659 if bbox.width <= 3 or bbox.height <= 3: # ignore tiny stuff | |
| 660 continue | |
| 661 try: | |
| 662 pix = Pixmap(block["image"]) # get image pixmap | |
| 663 if pix.n - pix.alpha != 3: # we need to convert this to RGB! | |
| 664 pix = Pixmap(csRGB, pix) | |
| 665 if pix.alpha: # must remove alpha channel | |
| 666 pix = Pixmap(pix, 0) | |
| 667 imgdoc = Document( | |
| 668 "pdf", pix.pdfocr_tobytes(language=language, tessdata=tessdata) | |
| 669 ) # pdf with OCRed page | |
| 670 imgpage = imgdoc.load_page(0) # read image as a page | |
| 671 pix = None | |
| 672 # compute matrix to transform coordinates back to that of 'page' | |
| 673 imgrect = imgpage.rect # page size of image PDF | |
| 674 shrink = Matrix(1 / imgrect.width, 1 / imgrect.height) | |
| 675 mat = shrink * block["transform"] | |
| 676 imgpage.extend_textpage(tpage, flags=0, matrix=mat) | |
| 677 imgdoc.close() | |
| 678 except RuntimeError: | |
| 679 tpage = None | |
| 680 print("Falling back to full page OCR") | |
| 681 return full_ocr(page, dpi, language, flags) | |
| 682 | |
| 683 return tpage | |
| 684 | |
| 685 | |
| 686 def get_image_info(page: Page, hashes: bool = False, xrefs: bool = False) -> list: | |
| 687 """Extract image information only from a TextPage. | |
| 688 | |
| 689 Args: | |
| 690 hashes: (bool) include MD5 hash for each image. | |
| 691 xrefs: (bool) try to find the xref for each image. Sets hashes to true. | |
| 692 """ | |
| 693 doc = page.parent | |
| 694 if xrefs and doc.is_pdf: | |
| 695 hashes = True | |
| 696 if not doc.is_pdf: | |
| 697 xrefs = False | |
| 698 imginfo = getattr(page, "_image_info", None) | |
| 699 if imginfo and not xrefs: | |
| 700 return imginfo | |
| 701 if not imginfo: | |
| 702 tp = page.get_textpage(flags=TEXT_PRESERVE_IMAGES) | |
| 703 imginfo = tp.extractIMGINFO(hashes=hashes) | |
| 704 del tp | |
| 705 if hashes: | |
| 706 page._image_info = imginfo | |
| 707 if not xrefs or not doc.is_pdf: | |
| 708 return imginfo | |
| 709 imglist = page.get_images() | |
| 710 digests = {} | |
| 711 for item in imglist: | |
| 712 xref = item[0] | |
| 713 pix = Pixmap(doc, xref) | |
| 714 digests[pix.digest] = xref | |
| 715 del pix | |
| 716 for i in range(len(imginfo)): | |
| 717 item = imginfo[i] | |
| 718 xref = digests.get(item["digest"], 0) | |
| 719 item["xref"] = xref | |
| 720 imginfo[i] = item | |
| 721 return imginfo | |
| 722 | |
| 723 | |
| 724 def get_image_rects(page: Page, name, transform=False) -> list: | |
| 725 """Return list of image positions on a page. | |
| 726 | |
| 727 Args: | |
| 728 name: (str, list, int) image identification. May be reference name, an | |
| 729 item of the page's image list or an xref. | |
| 730 transform: (bool) whether to also return the transformation matrix. | |
| 731 Returns: | |
| 732 A list of Rect objects or tuples of (Rect, Matrix) for all image | |
| 733 locations on the page. | |
| 734 """ | |
| 735 if type(name) in (list, tuple): | |
| 736 xref = name[0] | |
| 737 elif type(name) is int: | |
| 738 xref = name | |
| 739 else: | |
| 740 imglist = [i for i in page.get_images() if i[7] == name] | |
| 741 if imglist == []: | |
| 742 raise ValueError("bad image name") | |
| 743 elif len(imglist) != 1: | |
| 744 raise ValueError("multiple image names found") | |
| 745 xref = imglist[0][0] | |
| 746 pix = Pixmap(page.parent, xref) # make pixmap of the image to compute MD5 | |
| 747 digest = pix.digest | |
| 748 del pix | |
| 749 infos = page.get_image_info(hashes=True) | |
| 750 if not transform: | |
| 751 bboxes = [Rect(im["bbox"]) for im in infos if im["digest"] == digest] | |
| 752 else: | |
| 753 bboxes = [ | |
| 754 (Rect(im["bbox"]), Matrix(im["transform"])) | |
| 755 for im in infos | |
| 756 if im["digest"] == digest | |
| 757 ] | |
| 758 return bboxes | |
| 759 | |
| 760 | |
| 761 def get_text( | |
| 762 page: Page, | |
| 763 option: str = "text", | |
| 764 clip: rect_like = None, | |
| 765 flags: OptInt = None, | |
| 766 textpage: TextPage = None, | |
| 767 sort: bool = False, | |
| 768 delimiters=None, | |
| 769 ): | |
| 770 """Extract text from a page or an annotation. | |
| 771 | |
| 772 This is a unifying wrapper for various methods of the TextPage class. | |
| 773 | |
| 774 Args: | |
| 775 option: (str) text, words, blocks, html, dict, json, rawdict, xhtml or xml. | |
| 776 clip: (rect-like) restrict output to this area. | |
| 777 flags: bit switches to e.g. exclude images or decompose ligatures. | |
| 778 textpage: reuse this TextPage and make no new one. If specified, | |
| 779 'flags' and 'clip' are ignored. | |
| 780 | |
| 781 Returns: | |
| 782 the output of methods get_text_words / get_text_blocks or TextPage | |
| 783 methods extractText, extractHTML, extractDICT, extractJSON, extractRAWDICT, | |
| 784 extractXHTML or etractXML respectively. | |
| 785 Default and misspelling choice is "text". | |
| 786 """ | |
| 787 formats = { | |
| 788 "text": fitz.TEXTFLAGS_TEXT, | |
| 789 "html": fitz.TEXTFLAGS_HTML, | |
| 790 "json": fitz.TEXTFLAGS_DICT, | |
| 791 "rawjson": fitz.TEXTFLAGS_RAWDICT, | |
| 792 "xml": fitz.TEXTFLAGS_XML, | |
| 793 "xhtml": fitz.TEXTFLAGS_XHTML, | |
| 794 "dict": fitz.TEXTFLAGS_DICT, | |
| 795 "rawdict": fitz.TEXTFLAGS_RAWDICT, | |
| 796 "words": fitz.TEXTFLAGS_WORDS, | |
| 797 "blocks": fitz.TEXTFLAGS_BLOCKS, | |
| 798 } | |
| 799 option = option.lower() | |
| 800 if option not in formats: | |
| 801 option = "text" | |
| 802 if flags is None: | |
| 803 flags = formats[option] | |
| 804 | |
| 805 if option == "words": | |
| 806 return get_text_words( | |
| 807 page, | |
| 808 clip=clip, | |
| 809 flags=flags, | |
| 810 textpage=textpage, | |
| 811 sort=sort, | |
| 812 delimiters=delimiters, | |
| 813 ) | |
| 814 if option == "blocks": | |
| 815 return get_text_blocks( | |
| 816 page, clip=clip, flags=flags, textpage=textpage, sort=sort | |
| 817 ) | |
| 818 CheckParent(page) | |
| 819 cb = None | |
| 820 if option in ("html", "xml", "xhtml"): # no clipping for MuPDF functions | |
| 821 clip = page.cropbox | |
| 822 if clip != None: | |
| 823 clip = Rect(clip) | |
| 824 cb = None | |
| 825 elif type(page) is Page: | |
| 826 cb = page.cropbox | |
| 827 | |
| 828 # TextPage with or without images | |
| 829 tp = textpage | |
| 830 if tp is None: | |
| 831 tp = page.get_textpage(clip=clip, flags=flags) | |
| 832 elif getattr(tp, "parent") != page: | |
| 833 raise ValueError("not a textpage of this page") | |
| 834 | |
| 835 if option == "json": | |
| 836 t = tp.extractJSON(cb=cb, sort=sort) | |
| 837 elif option == "rawjson": | |
| 838 t = tp.extractRAWJSON(cb=cb, sort=sort) | |
| 839 elif option == "dict": | |
| 840 t = tp.extractDICT(cb=cb, sort=sort) | |
| 841 elif option == "rawdict": | |
| 842 t = tp.extractRAWDICT(cb=cb, sort=sort) | |
| 843 elif option == "html": | |
| 844 t = tp.extractHTML() | |
| 845 elif option == "xml": | |
| 846 t = tp.extractXML() | |
| 847 elif option == "xhtml": | |
| 848 t = tp.extractXHTML() | |
| 849 else: | |
| 850 t = tp.extractText(sort=sort) | |
| 851 | |
| 852 if textpage is None: | |
| 853 del tp | |
| 854 return t | |
| 855 | |
| 856 | |
| 857 def get_page_text( | |
| 858 doc: Document, | |
| 859 pno: int, | |
| 860 option: str = "text", | |
| 861 clip: rect_like = None, | |
| 862 flags: OptInt = None, | |
| 863 textpage: TextPage = None, | |
| 864 sort: bool = False, | |
| 865 ) -> typing.Any: | |
| 866 """Extract a document page's text by page number. | |
| 867 | |
| 868 Notes: | |
| 869 Convenience function calling page.get_text(). | |
| 870 Args: | |
| 871 pno: page number | |
| 872 option: (str) text, words, blocks, html, dict, json, rawdict, xhtml or xml. | |
| 873 Returns: | |
| 874 output from page.TextPage(). | |
| 875 """ | |
| 876 return doc[pno].get_text(option, clip=clip, flags=flags, sort=sort) | |
| 877 | |
| 878 | |
| 879 def get_pixmap( | |
| 880 page: Page, | |
| 881 *, | |
| 882 matrix: matrix_like = Identity, | |
| 883 dpi=None, | |
| 884 colorspace: Colorspace = csRGB, | |
| 885 clip: rect_like = None, | |
| 886 alpha: bool = False, | |
| 887 annots: bool = True, | |
| 888 ) -> Pixmap: | |
| 889 """Create pixmap of page. | |
| 890 | |
| 891 Keyword args: | |
| 892 matrix: Matrix for transformation (default: Identity). | |
| 893 dpi: desired dots per inch. If given, matrix is ignored. | |
| 894 colorspace: (str/Colorspace) cmyk, rgb, gray - case ignored, default csRGB. | |
| 895 clip: (irect-like) restrict rendering to this area. | |
| 896 alpha: (bool) whether to include alpha channel | |
| 897 annots: (bool) whether to also render annotations | |
| 898 """ | |
| 899 CheckParent(page) | |
| 900 if dpi: | |
| 901 zoom = dpi / 72 | |
| 902 matrix = Matrix(zoom, zoom) | |
| 903 | |
| 904 if type(colorspace) is str: | |
| 905 if colorspace.upper() == "GRAY": | |
| 906 colorspace = csGRAY | |
| 907 elif colorspace.upper() == "CMYK": | |
| 908 colorspace = csCMYK | |
| 909 else: | |
| 910 colorspace = csRGB | |
| 911 if colorspace.n not in (1, 3, 4): | |
| 912 raise ValueError("unsupported colorspace") | |
| 913 | |
| 914 dl = page.get_displaylist(annots=annots) | |
| 915 pix = dl.get_pixmap(matrix=matrix, colorspace=colorspace, alpha=alpha, clip=clip) | |
| 916 dl = None | |
| 917 if dpi: | |
| 918 pix.set_dpi(dpi, dpi) | |
| 919 return pix | |
| 920 | |
| 921 | |
| 922 def get_page_pixmap( | |
| 923 doc: Document, | |
| 924 pno: int, | |
| 925 *, | |
| 926 matrix: matrix_like = Identity, | |
| 927 dpi=None, | |
| 928 colorspace: Colorspace = csRGB, | |
| 929 clip: rect_like = None, | |
| 930 alpha: bool = False, | |
| 931 annots: bool = True, | |
| 932 ) -> Pixmap: | |
| 933 """Create pixmap of document page by page number. | |
| 934 | |
| 935 Notes: | |
| 936 Convenience function calling page.get_pixmap. | |
| 937 Args: | |
| 938 pno: (int) page number | |
| 939 matrix: Matrix for transformation (default: Identity). | |
| 940 colorspace: (str,Colorspace) rgb, rgb, gray - case ignored, default csRGB. | |
| 941 clip: (irect-like) restrict rendering to this area. | |
| 942 alpha: (bool) include alpha channel | |
| 943 annots: (bool) also render annotations | |
| 944 """ | |
| 945 return doc[pno].get_pixmap( | |
| 946 matrix=matrix, | |
| 947 dpi=dpi, | |
| 948 colorspace=colorspace, | |
| 949 clip=clip, | |
| 950 alpha=alpha, | |
| 951 annots=annots, | |
| 952 ) | |
| 953 | |
| 954 | |
| 955 def getLinkDict(ln) -> dict: | |
| 956 nl = {"kind": ln.dest.kind, "xref": 0} | |
| 957 try: | |
| 958 nl["from"] = ln.rect | |
| 959 except: | |
| 960 pass | |
| 961 pnt = Point(0, 0) | |
| 962 if ln.dest.flags & LINK_FLAG_L_VALID: | |
| 963 pnt.x = ln.dest.lt.x | |
| 964 if ln.dest.flags & LINK_FLAG_T_VALID: | |
| 965 pnt.y = ln.dest.lt.y | |
| 966 | |
| 967 if ln.dest.kind == LINK_URI: | |
| 968 nl["uri"] = ln.dest.uri | |
| 969 | |
| 970 elif ln.dest.kind == LINK_GOTO: | |
| 971 nl["page"] = ln.dest.page | |
| 972 nl["to"] = pnt | |
| 973 if ln.dest.flags & LINK_FLAG_R_IS_ZOOM: | |
| 974 nl["zoom"] = ln.dest.rb.x | |
| 975 else: | |
| 976 nl["zoom"] = 0.0 | |
| 977 | |
| 978 elif ln.dest.kind == LINK_GOTOR: | |
| 979 nl["file"] = ln.dest.fileSpec.replace("\\", "/") | |
| 980 nl["page"] = ln.dest.page | |
| 981 if ln.dest.page < 0: | |
| 982 nl["to"] = ln.dest.dest | |
| 983 else: | |
| 984 nl["to"] = pnt | |
| 985 if ln.dest.flags & LINK_FLAG_R_IS_ZOOM: | |
| 986 nl["zoom"] = ln.dest.rb.x | |
| 987 else: | |
| 988 nl["zoom"] = 0.0 | |
| 989 | |
| 990 elif ln.dest.kind == LINK_LAUNCH: | |
| 991 nl["file"] = ln.dest.fileSpec.replace("\\", "/") | |
| 992 | |
| 993 elif ln.dest.kind == LINK_NAMED: | |
| 994 nl["name"] = ln.dest.named | |
| 995 | |
| 996 else: | |
| 997 nl["page"] = ln.dest.page | |
| 998 | |
| 999 return nl | |
| 1000 | |
| 1001 | |
| 1002 def get_links(page: Page) -> list: | |
| 1003 """Create a list of all links contained in a PDF page. | |
| 1004 | |
| 1005 Notes: | |
| 1006 see PyMuPDF ducmentation for details. | |
| 1007 """ | |
| 1008 | |
| 1009 CheckParent(page) | |
| 1010 ln = page.first_link | |
| 1011 links = [] | |
| 1012 while ln: | |
| 1013 nl = getLinkDict(ln) | |
| 1014 links.append(nl) | |
| 1015 ln = ln.next | |
| 1016 if links != [] and page.parent.is_pdf: | |
| 1017 linkxrefs = [x for x in page.annot_xrefs() if x[1] == PDF_ANNOT_LINK] | |
| 1018 if len(linkxrefs) == len(links): | |
| 1019 for i in range(len(linkxrefs)): | |
| 1020 links[i]["xref"] = linkxrefs[i][0] | |
| 1021 links[i]["id"] = linkxrefs[i][2] | |
| 1022 return links | |
| 1023 | |
| 1024 | |
| 1025 def get_toc( | |
| 1026 doc: Document, | |
| 1027 simple: bool = True, | |
| 1028 ) -> list: | |
| 1029 """Create a table of contents. | |
| 1030 | |
| 1031 Args: | |
| 1032 simple: a bool to control output. Returns a list, where each entry consists of outline level, title, page number and link destination (if simple = False). For details see PyMuPDF's documentation. | |
| 1033 """ | |
| 1034 | |
| 1035 def recurse(olItem, liste, lvl): | |
| 1036 """Recursively follow the outline item chain and record item information in a list.""" | |
| 1037 while olItem: | |
| 1038 if olItem.title: | |
| 1039 title = olItem.title | |
| 1040 else: | |
| 1041 title = " " | |
| 1042 | |
| 1043 if not olItem.is_external: | |
| 1044 if olItem.uri: | |
| 1045 if olItem.page == -1: | |
| 1046 resolve = doc.resolve_link(olItem.uri) | |
| 1047 page = resolve[0] + 1 | |
| 1048 else: | |
| 1049 page = olItem.page + 1 | |
| 1050 else: | |
| 1051 page = -1 | |
| 1052 else: | |
| 1053 page = -1 | |
| 1054 | |
| 1055 if not simple: | |
| 1056 link = getLinkDict(olItem) | |
| 1057 liste.append([lvl, title, page, link]) | |
| 1058 else: | |
| 1059 liste.append([lvl, title, page]) | |
| 1060 | |
| 1061 if olItem.down: | |
| 1062 liste = recurse(olItem.down, liste, lvl + 1) | |
| 1063 olItem = olItem.next | |
| 1064 return liste | |
| 1065 | |
| 1066 # ensure document is open | |
| 1067 if doc.is_closed: | |
| 1068 raise ValueError("document closed") | |
| 1069 doc.init_doc() | |
| 1070 olItem = doc.outline | |
| 1071 if not olItem: | |
| 1072 return [] | |
| 1073 lvl = 1 | |
| 1074 liste = [] | |
| 1075 toc = recurse(olItem, liste, lvl) | |
| 1076 if doc.is_pdf and simple is False: | |
| 1077 doc._extend_toc_items(toc) | |
| 1078 return toc | |
| 1079 | |
| 1080 | |
| 1081 def del_toc_item( | |
| 1082 doc: Document, | |
| 1083 idx: int, | |
| 1084 ) -> None: | |
| 1085 """Delete TOC / bookmark item by index.""" | |
| 1086 xref = doc.get_outline_xrefs()[idx] | |
| 1087 doc._remove_toc_item(xref) | |
| 1088 | |
| 1089 | |
| 1090 def set_toc_item( | |
| 1091 doc: Document, | |
| 1092 idx: int, | |
| 1093 dest_dict: OptDict = None, | |
| 1094 kind: OptInt = None, | |
| 1095 pno: OptInt = None, | |
| 1096 uri: OptStr = None, | |
| 1097 title: OptStr = None, | |
| 1098 to: point_like = None, | |
| 1099 filename: OptStr = None, | |
| 1100 zoom: float = 0, | |
| 1101 ) -> None: | |
| 1102 """Update TOC item by index. | |
| 1103 | |
| 1104 It allows changing the item's title and link destination. | |
| 1105 | |
| 1106 Args: | |
| 1107 idx: (int) desired index of the TOC list, as created by get_toc. | |
| 1108 dest_dict: (dict) destination dictionary as created by get_toc(False). | |
| 1109 Outrules all other parameters. If None, the remaining parameters | |
| 1110 are used to make a dest dictionary. | |
| 1111 kind: (int) kind of link (LINK_GOTO, etc.). If None, then only the | |
| 1112 title will be updated. If LINK_NONE, the TOC item will be deleted. | |
| 1113 pno: (int) page number (1-based like in get_toc). Required if LINK_GOTO. | |
| 1114 uri: (str) the URL, required if LINK_URI. | |
| 1115 title: (str) the new title. No change if None. | |
| 1116 to: (point-like) destination on the target page. If omitted, (72, 36) | |
| 1117 will be used as taget coordinates. | |
| 1118 filename: (str) destination filename, required for LINK_GOTOR and | |
| 1119 LINK_LAUNCH. | |
| 1120 name: (str) a destination name for LINK_NAMED. | |
| 1121 zoom: (float) a zoom factor for the target location (LINK_GOTO). | |
| 1122 """ | |
| 1123 xref = doc.get_outline_xrefs()[idx] | |
| 1124 page_xref = 0 | |
| 1125 if type(dest_dict) is dict: | |
| 1126 if dest_dict["kind"] == LINK_GOTO: | |
| 1127 pno = dest_dict["page"] | |
| 1128 page_xref = doc.page_xref(pno) | |
| 1129 page_height = doc.page_cropbox(pno).height | |
| 1130 to = dest_dict.get("to", Point(72, 36)) | |
| 1131 to.y = page_height - to.y | |
| 1132 dest_dict["to"] = to | |
| 1133 action = getDestStr(page_xref, dest_dict) | |
| 1134 if not action.startswith("/A"): | |
| 1135 raise ValueError("bad bookmark dest") | |
| 1136 color = dest_dict.get("color") | |
| 1137 if color: | |
| 1138 color = list(map(float, color)) | |
| 1139 if len(color) != 3 or min(color) < 0 or max(color) > 1: | |
| 1140 raise ValueError("bad color value") | |
| 1141 bold = dest_dict.get("bold", False) | |
| 1142 italic = dest_dict.get("italic", False) | |
| 1143 flags = italic + 2 * bold | |
| 1144 collapse = dest_dict.get("collapse") | |
| 1145 return doc._update_toc_item( | |
| 1146 xref, | |
| 1147 action=action[2:], | |
| 1148 title=title, | |
| 1149 color=color, | |
| 1150 flags=flags, | |
| 1151 collapse=collapse, | |
| 1152 ) | |
| 1153 | |
| 1154 if kind == LINK_NONE: # delete bookmark item | |
| 1155 return doc.del_toc_item(idx) | |
| 1156 if kind is None and title is None: # treat as no-op | |
| 1157 return None | |
| 1158 if kind is None: # only update title text | |
| 1159 return doc._update_toc_item(xref, action=None, title=title) | |
| 1160 | |
| 1161 if kind == LINK_GOTO: | |
| 1162 if pno is None or pno not in range(1, doc.page_count + 1): | |
| 1163 raise ValueError("bad page number") | |
| 1164 page_xref = doc.page_xref(pno - 1) | |
| 1165 page_height = doc.page_cropbox(pno - 1).height | |
| 1166 if to is None: | |
| 1167 to = Point(72, page_height - 36) | |
| 1168 else: | |
| 1169 to = Point(to) | |
| 1170 to.y = page_height - to.y | |
| 1171 | |
| 1172 ddict = { | |
| 1173 "kind": kind, | |
| 1174 "to": to, | |
| 1175 "uri": uri, | |
| 1176 "page": pno, | |
| 1177 "file": filename, | |
| 1178 "zoom": zoom, | |
| 1179 } | |
| 1180 action = getDestStr(page_xref, ddict) | |
| 1181 if action == "" or not action.startswith("/A"): | |
| 1182 raise ValueError("bad bookmark dest") | |
| 1183 | |
| 1184 return doc._update_toc_item(xref, action=action[2:], title=title) | |
| 1185 | |
| 1186 | |
| 1187 def get_area(*args) -> float: | |
| 1188 """Calculate area of rectangle.\nparameter is one of 'px' (default), 'in', 'cm', or 'mm'.""" | |
| 1189 rect = args[0] | |
| 1190 if len(args) > 1: | |
| 1191 unit = args[1] | |
| 1192 else: | |
| 1193 unit = "px" | |
| 1194 u = {"px": (1, 1), "in": (1.0, 72.0), "cm": (2.54, 72.0), "mm": (25.4, 72.0)} | |
| 1195 f = (u[unit][0] / u[unit][1]) ** 2 | |
| 1196 return f * rect.width * rect.height | |
| 1197 | |
| 1198 | |
| 1199 def set_metadata(doc: Document, m: dict) -> None: | |
| 1200 """Update the PDF /Info object. | |
| 1201 | |
| 1202 Args: | |
| 1203 m: a dictionary like doc.metadata. | |
| 1204 """ | |
| 1205 if not doc.is_pdf: | |
| 1206 raise ValueError("is no PDF") | |
| 1207 if doc.is_closed or doc.is_encrypted: | |
| 1208 raise ValueError("document closed or encrypted") | |
| 1209 if type(m) is not dict: | |
| 1210 raise ValueError("bad metadata") | |
| 1211 keymap = { | |
| 1212 "author": "Author", | |
| 1213 "producer": "Producer", | |
| 1214 "creator": "Creator", | |
| 1215 "title": "Title", | |
| 1216 "format": None, | |
| 1217 "encryption": None, | |
| 1218 "creationDate": "CreationDate", | |
| 1219 "modDate": "ModDate", | |
| 1220 "subject": "Subject", | |
| 1221 "keywords": "Keywords", | |
| 1222 "trapped": "Trapped", | |
| 1223 } | |
| 1224 valid_keys = set(keymap.keys()) | |
| 1225 diff_set = set(m.keys()).difference(valid_keys) | |
| 1226 if diff_set != set(): | |
| 1227 msg = "bad dict key(s): %s" % diff_set | |
| 1228 raise ValueError(msg) | |
| 1229 | |
| 1230 t, temp = doc.xref_get_key(-1, "Info") | |
| 1231 if t != "xref": | |
| 1232 info_xref = 0 | |
| 1233 else: | |
| 1234 info_xref = int(temp.replace("0 R", "")) | |
| 1235 | |
| 1236 if m == {} and info_xref == 0: # nothing to do | |
| 1237 return | |
| 1238 | |
| 1239 if info_xref == 0: # no prev metadata: get new xref | |
| 1240 info_xref = doc.get_new_xref() | |
| 1241 doc.update_object(info_xref, "<<>>") # fill it with empty object | |
| 1242 doc.xref_set_key(-1, "Info", "%i 0 R" % info_xref) | |
| 1243 elif m == {}: # remove existing metadata | |
| 1244 doc.xref_set_key(-1, "Info", "null") | |
| 1245 return | |
| 1246 | |
| 1247 for key, val in [(k, v) for k, v in m.items() if keymap[k] != None]: | |
| 1248 pdf_key = keymap[key] | |
| 1249 if not bool(val) or val in ("none", "null"): | |
| 1250 val = "null" | |
| 1251 else: | |
| 1252 val = get_pdf_str(val) | |
| 1253 doc.xref_set_key(info_xref, pdf_key, val) | |
| 1254 doc.init_doc() | |
| 1255 return | |
| 1256 | |
| 1257 | |
| 1258 def getDestStr(xref: int, ddict: dict) -> str: | |
| 1259 """Calculate the PDF action string. | |
| 1260 | |
| 1261 Notes: | |
| 1262 Supports Link annotations and outline items (bookmarks). | |
| 1263 """ | |
| 1264 if not ddict: | |
| 1265 return "" | |
| 1266 str_goto = "/A<</S/GoTo/D[%i 0 R/XYZ %g %g %g]>>" | |
| 1267 str_gotor1 = "/A<</S/GoToR/D[%s /XYZ %g %g %g]/F<</F%s/UF%s/Type/Filespec>>>>" | |
| 1268 str_gotor2 = "/A<</S/GoToR/D%s/F<</F%s/UF%s/Type/Filespec>>>>" | |
| 1269 str_launch = "/A<</S/Launch/F<</F%s/UF%s/Type/Filespec>>>>" | |
| 1270 str_uri = "/A<</S/URI/URI%s>>" | |
| 1271 | |
| 1272 if type(ddict) in (int, float): | |
| 1273 dest = str_goto % (xref, 0, ddict, 0) | |
| 1274 return dest | |
| 1275 d_kind = ddict.get("kind", LINK_NONE) | |
| 1276 | |
| 1277 if d_kind == LINK_NONE: | |
| 1278 return "" | |
| 1279 | |
| 1280 if ddict["kind"] == LINK_GOTO: | |
| 1281 d_zoom = ddict.get("zoom", 0) | |
| 1282 to = ddict.get("to", Point(0, 0)) | |
| 1283 d_left, d_top = to | |
| 1284 dest = str_goto % (xref, d_left, d_top, d_zoom) | |
| 1285 return dest | |
| 1286 | |
| 1287 if ddict["kind"] == LINK_URI: | |
| 1288 dest = str_uri % (get_pdf_str(ddict["uri"]),) | |
| 1289 return dest | |
| 1290 | |
| 1291 if ddict["kind"] == LINK_LAUNCH: | |
| 1292 fspec = get_pdf_str(ddict["file"]) | |
| 1293 dest = str_launch % (fspec, fspec) | |
| 1294 return dest | |
| 1295 | |
| 1296 if ddict["kind"] == LINK_GOTOR and ddict["page"] < 0: | |
| 1297 fspec = get_pdf_str(ddict["file"]) | |
| 1298 dest = str_gotor2 % (get_pdf_str(ddict["to"]), fspec, fspec) | |
| 1299 return dest | |
| 1300 | |
| 1301 if ddict["kind"] == LINK_GOTOR and ddict["page"] >= 0: | |
| 1302 fspec = get_pdf_str(ddict["file"]) | |
| 1303 dest = str_gotor1 % ( | |
| 1304 ddict["page"], | |
| 1305 ddict["to"].x, | |
| 1306 ddict["to"].y, | |
| 1307 ddict["zoom"], | |
| 1308 fspec, | |
| 1309 fspec, | |
| 1310 ) | |
| 1311 return dest | |
| 1312 | |
| 1313 return "" | |
| 1314 | |
| 1315 | |
| 1316 def set_toc( | |
| 1317 doc: Document, | |
| 1318 toc: list, | |
| 1319 collapse: int = 1, | |
| 1320 ) -> int: | |
| 1321 """Create new outline tree (table of contents, TOC). | |
| 1322 | |
| 1323 Args: | |
| 1324 toc: (list, tuple) each entry must contain level, title, page and | |
| 1325 optionally top margin on the page. None or '()' remove the TOC. | |
| 1326 collapse: (int) collapses entries beyond this level. Zero or None | |
| 1327 shows all entries unfolded. | |
| 1328 Returns: | |
| 1329 the number of inserted items, or the number of removed items respectively. | |
| 1330 """ | |
| 1331 if doc.is_closed or doc.is_encrypted: | |
| 1332 raise ValueError("document closed or encrypted") | |
| 1333 if not doc.is_pdf: | |
| 1334 raise ValueError("is no PDF") | |
| 1335 if not toc: # remove all entries | |
| 1336 return len(doc._delToC()) | |
| 1337 | |
| 1338 # validity checks -------------------------------------------------------- | |
| 1339 if type(toc) not in (list, tuple): | |
| 1340 raise ValueError("'toc' must be list or tuple") | |
| 1341 toclen = len(toc) | |
| 1342 page_count = doc.page_count | |
| 1343 t0 = toc[0] | |
| 1344 if type(t0) not in (list, tuple): | |
| 1345 raise ValueError("items must be sequences of 3 or 4 items") | |
| 1346 if t0[0] != 1: | |
| 1347 raise ValueError("hierarchy level of item 0 must be 1") | |
| 1348 for i in list(range(toclen - 1)): | |
| 1349 t1 = toc[i] | |
| 1350 t2 = toc[i + 1] | |
| 1351 if not -1 <= t1[2] <= page_count: | |
| 1352 raise ValueError("row %i: page number out of range" % i) | |
| 1353 if (type(t2) not in (list, tuple)) or len(t2) not in (3, 4): | |
| 1354 raise ValueError("bad row %i" % (i + 1)) | |
| 1355 if (type(t2[0]) is not int) or t2[0] < 1: | |
| 1356 raise ValueError("bad hierarchy level in row %i" % (i + 1)) | |
| 1357 if t2[0] > t1[0] + 1: | |
| 1358 raise ValueError("bad hierarchy level in row %i" % (i + 1)) | |
| 1359 # no formal errors in toc -------------------------------------------------- | |
| 1360 | |
| 1361 # -------------------------------------------------------------------------- | |
| 1362 # make a list of xref numbers, which we can use for our TOC entries | |
| 1363 # -------------------------------------------------------------------------- | |
| 1364 old_xrefs = doc._delToC() # del old outlines, get their xref numbers | |
| 1365 | |
| 1366 # prepare table of xrefs for new bookmarks | |
| 1367 old_xrefs = [] | |
| 1368 xref = [0] + old_xrefs | |
| 1369 xref[0] = doc._getOLRootNumber() # entry zero is outline root xref number | |
| 1370 if toclen > len(old_xrefs): # too few old xrefs? | |
| 1371 for i in range((toclen - len(old_xrefs))): | |
| 1372 xref.append(doc.get_new_xref()) # acquire new ones | |
| 1373 | |
| 1374 lvltab = {0: 0} # to store last entry per hierarchy level | |
| 1375 | |
| 1376 # ------------------------------------------------------------------------------ | |
| 1377 # contains new outline objects as strings - first one is the outline root | |
| 1378 # ------------------------------------------------------------------------------ | |
| 1379 olitems = [{"count": 0, "first": -1, "last": -1, "xref": xref[0]}] | |
| 1380 # ------------------------------------------------------------------------------ | |
| 1381 # build olitems as a list of PDF-like connnected dictionaries | |
| 1382 # ------------------------------------------------------------------------------ | |
| 1383 for i in range(toclen): | |
| 1384 o = toc[i] | |
| 1385 lvl = o[0] # level | |
| 1386 title = get_pdf_str(o[1]) # title | |
| 1387 pno = min(doc.page_count - 1, max(0, o[2] - 1)) # page number | |
| 1388 page_xref = doc.page_xref(pno) | |
| 1389 page_height = doc.page_cropbox(pno).height | |
| 1390 top = Point(72, page_height - 36) | |
| 1391 dest_dict = {"to": top, "kind": LINK_GOTO} # fall back target | |
| 1392 if o[2] < 0: | |
| 1393 dest_dict["kind"] = LINK_NONE | |
| 1394 if len(o) > 3: # some target is specified | |
| 1395 if type(o[3]) in (int, float): # convert a number to a point | |
| 1396 dest_dict["to"] = Point(72, page_height - o[3]) | |
| 1397 else: # if something else, make sure we have a dict | |
| 1398 dest_dict = o[3] if type(o[3]) is dict else dest_dict | |
| 1399 if "to" not in dest_dict: # target point not in dict? | |
| 1400 dest_dict["to"] = top # put default in | |
| 1401 else: # transform target to PDF coordinates | |
| 1402 point = +dest_dict["to"] | |
| 1403 point.y = page_height - point.y | |
| 1404 dest_dict["to"] = point | |
| 1405 d = {} | |
| 1406 d["first"] = -1 | |
| 1407 d["count"] = 0 | |
| 1408 d["last"] = -1 | |
| 1409 d["prev"] = -1 | |
| 1410 d["next"] = -1 | |
| 1411 d["dest"] = getDestStr(page_xref, dest_dict) | |
| 1412 d["top"] = dest_dict["to"] | |
| 1413 d["title"] = title | |
| 1414 d["parent"] = lvltab[lvl - 1] | |
| 1415 d["xref"] = xref[i + 1] | |
| 1416 d["color"] = dest_dict.get("color") | |
| 1417 d["flags"] = dest_dict.get("italic", 0) + 2 * dest_dict.get("bold", 0) | |
| 1418 lvltab[lvl] = i + 1 | |
| 1419 parent = olitems[lvltab[lvl - 1]] # the parent entry | |
| 1420 | |
| 1421 if ( | |
| 1422 dest_dict.get("collapse") or collapse and lvl > collapse | |
| 1423 ): # suppress expansion | |
| 1424 parent["count"] -= 1 # make /Count negative | |
| 1425 else: | |
| 1426 parent["count"] += 1 # positive /Count | |
| 1427 | |
| 1428 if parent["first"] == -1: | |
| 1429 parent["first"] = i + 1 | |
| 1430 parent["last"] = i + 1 | |
| 1431 else: | |
| 1432 d["prev"] = parent["last"] | |
| 1433 prev = olitems[parent["last"]] | |
| 1434 prev["next"] = i + 1 | |
| 1435 parent["last"] = i + 1 | |
| 1436 olitems.append(d) | |
| 1437 | |
| 1438 # ------------------------------------------------------------------------------ | |
| 1439 # now create each outline item as a string and insert it in the PDF | |
| 1440 # ------------------------------------------------------------------------------ | |
| 1441 for i, ol in enumerate(olitems): | |
| 1442 txt = "<<" | |
| 1443 if ol["count"] != 0: | |
| 1444 txt += "/Count %i" % ol["count"] | |
| 1445 try: | |
| 1446 txt += ol["dest"] | |
| 1447 except: | |
| 1448 pass | |
| 1449 try: | |
| 1450 if ol["first"] > -1: | |
| 1451 txt += "/First %i 0 R" % xref[ol["first"]] | |
| 1452 except: | |
| 1453 pass | |
| 1454 try: | |
| 1455 if ol["last"] > -1: | |
| 1456 txt += "/Last %i 0 R" % xref[ol["last"]] | |
| 1457 except: | |
| 1458 pass | |
| 1459 try: | |
| 1460 if ol["next"] > -1: | |
| 1461 txt += "/Next %i 0 R" % xref[ol["next"]] | |
| 1462 except: | |
| 1463 pass | |
| 1464 try: | |
| 1465 if ol["parent"] > -1: | |
| 1466 txt += "/Parent %i 0 R" % xref[ol["parent"]] | |
| 1467 except: | |
| 1468 pass | |
| 1469 try: | |
| 1470 if ol["prev"] > -1: | |
| 1471 txt += "/Prev %i 0 R" % xref[ol["prev"]] | |
| 1472 except: | |
| 1473 pass | |
| 1474 try: | |
| 1475 txt += "/Title" + ol["title"] | |
| 1476 except: | |
| 1477 pass | |
| 1478 | |
| 1479 if ol.get("color") and len(ol["color"]) == 3: | |
| 1480 txt += "/C[ %g %g %g]" % tuple(ol["color"]) | |
| 1481 if ol.get("flags", 0) > 0: | |
| 1482 txt += "/F %i" % ol["flags"] | |
| 1483 | |
| 1484 if i == 0: # special: this is the outline root | |
| 1485 txt += "/Type/Outlines" # so add the /Type entry | |
| 1486 txt += ">>" | |
| 1487 doc.update_object(xref[i], txt) # insert the PDF object | |
| 1488 | |
| 1489 doc.init_doc() | |
| 1490 return toclen | |
| 1491 | |
| 1492 | |
| 1493 def do_links( | |
| 1494 doc1: Document, | |
| 1495 doc2: Document, | |
| 1496 from_page: int = -1, | |
| 1497 to_page: int = -1, | |
| 1498 start_at: int = -1, | |
| 1499 ) -> None: | |
| 1500 """Insert links contained in copied page range into destination PDF. | |
| 1501 | |
| 1502 Parameter values **must** equal those of method insert_pdf(), which must | |
| 1503 have been previously executed. | |
| 1504 """ | |
| 1505 | |
| 1506 # -------------------------------------------------------------------------- | |
| 1507 # internal function to create the actual "/Annots" object string | |
| 1508 # -------------------------------------------------------------------------- | |
| 1509 def cre_annot(lnk, xref_dst, pno_src, ctm): | |
| 1510 """Create annotation object string for a passed-in link.""" | |
| 1511 | |
| 1512 r = lnk["from"] * ctm # rect in PDF coordinates | |
| 1513 rect = "%g %g %g %g" % tuple(r) | |
| 1514 if lnk["kind"] == LINK_GOTO: | |
| 1515 txt = annot_skel["goto1"] # annot_goto | |
| 1516 idx = pno_src.index(lnk["page"]) | |
| 1517 p = lnk["to"] * ctm # target point in PDF coordinates | |
| 1518 annot = txt % (xref_dst[idx], p.x, p.y, lnk["zoom"], rect) | |
| 1519 | |
| 1520 elif lnk["kind"] == LINK_GOTOR: | |
| 1521 if lnk["page"] >= 0: | |
| 1522 txt = annot_skel["gotor1"] # annot_gotor | |
| 1523 pnt = lnk.get("to", Point(0, 0)) # destination point | |
| 1524 if type(pnt) is not Point: | |
| 1525 pnt = Point(0, 0) | |
| 1526 annot = txt % ( | |
| 1527 lnk["page"], | |
| 1528 pnt.x, | |
| 1529 pnt.y, | |
| 1530 lnk["zoom"], | |
| 1531 lnk["file"], | |
| 1532 lnk["file"], | |
| 1533 rect, | |
| 1534 ) | |
| 1535 else: | |
| 1536 txt = annot_skel["gotor2"] # annot_gotor_n | |
| 1537 to = get_pdf_str(lnk["to"]) | |
| 1538 to = to[1:-1] | |
| 1539 f = lnk["file"] | |
| 1540 annot = txt % (to, f, rect) | |
| 1541 | |
| 1542 elif lnk["kind"] == LINK_LAUNCH: | |
| 1543 txt = annot_skel["launch"] # annot_launch | |
| 1544 annot = txt % (lnk["file"], lnk["file"], rect) | |
| 1545 | |
| 1546 elif lnk["kind"] == LINK_URI: | |
| 1547 txt = annot_skel["uri"] # annot_uri | |
| 1548 annot = txt % (lnk["uri"], rect) | |
| 1549 | |
| 1550 else: | |
| 1551 annot = "" | |
| 1552 | |
| 1553 return annot | |
| 1554 | |
| 1555 # -------------------------------------------------------------------------- | |
| 1556 | |
| 1557 # validate & normalize parameters | |
| 1558 if from_page < 0: | |
| 1559 fp = 0 | |
| 1560 elif from_page >= doc2.page_count: | |
| 1561 fp = doc2.page_count - 1 | |
| 1562 else: | |
| 1563 fp = from_page | |
| 1564 | |
| 1565 if to_page < 0 or to_page >= doc2.page_count: | |
| 1566 tp = doc2.page_count - 1 | |
| 1567 else: | |
| 1568 tp = to_page | |
| 1569 | |
| 1570 if start_at < 0: | |
| 1571 raise ValueError("'start_at' must be >= 0") | |
| 1572 sa = start_at | |
| 1573 | |
| 1574 incr = 1 if fp <= tp else -1 # page range could be reversed | |
| 1575 | |
| 1576 # lists of source / destination page numbers | |
| 1577 pno_src = list(range(fp, tp + incr, incr)) | |
| 1578 pno_dst = [sa + i for i in range(len(pno_src))] | |
| 1579 | |
| 1580 # lists of source / destination page xrefs | |
| 1581 xref_src = [] | |
| 1582 xref_dst = [] | |
| 1583 for i in range(len(pno_src)): | |
| 1584 p_src = pno_src[i] | |
| 1585 p_dst = pno_dst[i] | |
| 1586 old_xref = doc2.page_xref(p_src) | |
| 1587 new_xref = doc1.page_xref(p_dst) | |
| 1588 xref_src.append(old_xref) | |
| 1589 xref_dst.append(new_xref) | |
| 1590 | |
| 1591 # create the links for each copied page in destination PDF | |
| 1592 for i in range(len(xref_src)): | |
| 1593 page_src = doc2[pno_src[i]] # load source page | |
| 1594 links = page_src.get_links() # get all its links | |
| 1595 if len(links) == 0: # no links there | |
| 1596 page_src = None | |
| 1597 continue | |
| 1598 ctm = ~page_src.transformation_matrix # calc page transformation matrix | |
| 1599 page_dst = doc1[pno_dst[i]] # load destination page | |
| 1600 link_tab = [] # store all link definitions here | |
| 1601 for l in links: | |
| 1602 if l["kind"] == LINK_GOTO and (l["page"] not in pno_src): | |
| 1603 continue # GOTO link target not in copied pages | |
| 1604 annot_text = cre_annot(l, xref_dst, pno_src, ctm) | |
| 1605 if annot_text: | |
| 1606 link_tab.append(annot_text) | |
| 1607 if link_tab != []: | |
| 1608 page_dst._addAnnot_FromString(tuple(link_tab)) | |
| 1609 | |
| 1610 return | |
| 1611 | |
| 1612 | |
| 1613 def getLinkText(page: Page, lnk: dict) -> str: | |
| 1614 # -------------------------------------------------------------------------- | |
| 1615 # define skeletons for /Annots object texts | |
| 1616 # -------------------------------------------------------------------------- | |
| 1617 ctm = page.transformation_matrix | |
| 1618 ictm = ~ctm | |
| 1619 r = lnk["from"] | |
| 1620 rect = "%g %g %g %g" % tuple(r * ictm) | |
| 1621 | |
| 1622 annot = "" | |
| 1623 if lnk["kind"] == LINK_GOTO: | |
| 1624 if lnk["page"] >= 0: | |
| 1625 txt = annot_skel["goto1"] # annot_goto | |
| 1626 pno = lnk["page"] | |
| 1627 xref = page.parent.page_xref(pno) | |
| 1628 pnt = lnk.get("to", Point(0, 0)) # destination point | |
| 1629 ipnt = pnt * ictm | |
| 1630 annot = txt % (xref, ipnt.x, ipnt.y, lnk.get("zoom", 0), rect) | |
| 1631 else: | |
| 1632 txt = annot_skel["goto2"] # annot_goto_n | |
| 1633 annot = txt % (get_pdf_str(lnk["to"]), rect) | |
| 1634 | |
| 1635 elif lnk["kind"] == LINK_GOTOR: | |
| 1636 if lnk["page"] >= 0: | |
| 1637 txt = annot_skel["gotor1"] # annot_gotor | |
| 1638 pnt = lnk.get("to", Point(0, 0)) # destination point | |
| 1639 if type(pnt) is not Point: | |
| 1640 pnt = Point(0, 0) | |
| 1641 annot = txt % ( | |
| 1642 lnk["page"], | |
| 1643 pnt.x, | |
| 1644 pnt.y, | |
| 1645 lnk.get("zoom", 0), | |
| 1646 lnk["file"], | |
| 1647 lnk["file"], | |
| 1648 rect, | |
| 1649 ) | |
| 1650 else: | |
| 1651 txt = annot_skel["gotor2"] # annot_gotor_n | |
| 1652 annot = txt % (get_pdf_str(lnk["to"]), lnk["file"], rect) | |
| 1653 | |
| 1654 elif lnk["kind"] == LINK_LAUNCH: | |
| 1655 txt = annot_skel["launch"] # annot_launch | |
| 1656 annot = txt % (lnk["file"], lnk["file"], rect) | |
| 1657 | |
| 1658 elif lnk["kind"] == LINK_URI: | |
| 1659 txt = annot_skel["uri"] # txt = annot_uri | |
| 1660 annot = txt % (lnk["uri"], rect) | |
| 1661 | |
| 1662 elif lnk["kind"] == LINK_NAMED: | |
| 1663 txt = annot_skel["named"] # annot_named | |
| 1664 annot = txt % (lnk["name"], rect) | |
| 1665 if not annot: | |
| 1666 return annot | |
| 1667 | |
| 1668 # add a /NM PDF key to the object definition | |
| 1669 link_names = dict( # existing ids and their xref | |
| 1670 [(x[0], x[2]) for x in page.annot_xrefs() if x[1] == PDF_ANNOT_LINK] | |
| 1671 ) | |
| 1672 | |
| 1673 old_name = lnk.get("id", "") # id value in the argument | |
| 1674 | |
| 1675 if old_name and (lnk["xref"], old_name) in link_names.items(): | |
| 1676 name = old_name # no new name if this is an update only | |
| 1677 else: | |
| 1678 i = 0 | |
| 1679 stem = TOOLS.set_annot_stem() + "-L%i" | |
| 1680 while True: | |
| 1681 name = stem % i | |
| 1682 if name not in link_names.values(): | |
| 1683 break | |
| 1684 i += 1 | |
| 1685 # add /NM key to object definition | |
| 1686 annot = annot.replace("/Link", "/Link/NM(%s)" % name) | |
| 1687 return annot | |
| 1688 | |
| 1689 | |
| 1690 def delete_widget(page: Page, widget: Widget) -> Widget: | |
| 1691 """Delete widget from page and return the next one.""" | |
| 1692 CheckParent(page) | |
| 1693 annot = getattr(widget, "_annot", None) | |
| 1694 if annot is None: | |
| 1695 raise ValueError("bad type: widget") | |
| 1696 nextwidget = widget.next | |
| 1697 page.delete_annot(annot) | |
| 1698 widget._annot.__del__() | |
| 1699 widget._annot.parent = None | |
| 1700 keylist = list(widget.__dict__.keys()) | |
| 1701 for key in keylist: | |
| 1702 del widget.__dict__[key] | |
| 1703 return nextwidget | |
| 1704 | |
| 1705 | |
| 1706 def update_link(page: Page, lnk: dict) -> None: | |
| 1707 """Update a link on the current page.""" | |
| 1708 CheckParent(page) | |
| 1709 annot = getLinkText(page, lnk) | |
| 1710 if annot == "": | |
| 1711 raise ValueError("link kind not supported") | |
| 1712 | |
| 1713 page.parent.update_object(lnk["xref"], annot, page=page) | |
| 1714 return | |
| 1715 | |
| 1716 | |
| 1717 def insert_link(page: Page, lnk: dict, mark: bool = True) -> None: | |
| 1718 """Insert a new link for the current page.""" | |
| 1719 CheckParent(page) | |
| 1720 annot = getLinkText(page, lnk) | |
| 1721 if annot == "": | |
| 1722 raise ValueError("link kind not supported") | |
| 1723 page._addAnnot_FromString((annot,)) | |
| 1724 return | |
| 1725 | |
| 1726 | |
| 1727 def insert_textbox( | |
| 1728 page: Page, | |
| 1729 rect: rect_like, | |
| 1730 buffer: typing.Union[str, list], | |
| 1731 fontname: str = "helv", | |
| 1732 fontfile: OptStr = None, | |
| 1733 set_simple: int = 0, | |
| 1734 encoding: int = 0, | |
| 1735 fontsize: float = 11, | |
| 1736 lineheight: OptFloat = None, | |
| 1737 color: OptSeq = None, | |
| 1738 fill: OptSeq = None, | |
| 1739 expandtabs: int = 1, | |
| 1740 align: int = 0, | |
| 1741 rotate: int = 0, | |
| 1742 render_mode: int = 0, | |
| 1743 border_width: float = 0.05, | |
| 1744 morph: OptSeq = None, | |
| 1745 overlay: bool = True, | |
| 1746 stroke_opacity: float = 1, | |
| 1747 fill_opacity: float = 1, | |
| 1748 oc: int = 0, | |
| 1749 ) -> float: | |
| 1750 """Insert text into a given rectangle. | |
| 1751 | |
| 1752 Notes: | |
| 1753 Creates a Shape object, uses its same-named method and commits it. | |
| 1754 Parameters: | |
| 1755 rect: (rect-like) area to use for text. | |
| 1756 buffer: text to be inserted | |
| 1757 fontname: a Base-14 font, font name or '/name' | |
| 1758 fontfile: name of a font file | |
| 1759 fontsize: font size | |
| 1760 lineheight: overwrite the font property | |
| 1761 color: RGB color triple | |
| 1762 expandtabs: handles tabulators with string function | |
| 1763 align: left, center, right, justified | |
| 1764 rotate: 0, 90, 180, or 270 degrees | |
| 1765 morph: morph box with a matrix and a fixpoint | |
| 1766 overlay: put text in foreground or background | |
| 1767 Returns: | |
| 1768 unused or deficit rectangle area (float) | |
| 1769 """ | |
| 1770 img = page.new_shape() | |
| 1771 rc = img.insert_textbox( | |
| 1772 rect, | |
| 1773 buffer, | |
| 1774 fontsize=fontsize, | |
| 1775 lineheight=lineheight, | |
| 1776 fontname=fontname, | |
| 1777 fontfile=fontfile, | |
| 1778 set_simple=set_simple, | |
| 1779 encoding=encoding, | |
| 1780 color=color, | |
| 1781 fill=fill, | |
| 1782 expandtabs=expandtabs, | |
| 1783 render_mode=render_mode, | |
| 1784 border_width=border_width, | |
| 1785 align=align, | |
| 1786 rotate=rotate, | |
| 1787 morph=morph, | |
| 1788 stroke_opacity=stroke_opacity, | |
| 1789 fill_opacity=fill_opacity, | |
| 1790 oc=oc, | |
| 1791 ) | |
| 1792 if rc >= 0: | |
| 1793 img.commit(overlay) | |
| 1794 return rc | |
| 1795 | |
| 1796 | |
| 1797 def insert_text( | |
| 1798 page: Page, | |
| 1799 point: point_like, | |
| 1800 text: typing.Union[str, list], | |
| 1801 fontsize: float = 11, | |
| 1802 lineheight: OptFloat = None, | |
| 1803 fontname: str = "helv", | |
| 1804 fontfile: OptStr = None, | |
| 1805 set_simple: int = 0, | |
| 1806 encoding: int = 0, | |
| 1807 color: OptSeq = None, | |
| 1808 fill: OptSeq = None, | |
| 1809 border_width: float = 0.05, | |
| 1810 render_mode: int = 0, | |
| 1811 rotate: int = 0, | |
| 1812 morph: OptSeq = None, | |
| 1813 overlay: bool = True, | |
| 1814 stroke_opacity: float = 1, | |
| 1815 fill_opacity: float = 1, | |
| 1816 oc: int = 0, | |
| 1817 ): | |
| 1818 img = page.new_shape() | |
| 1819 rc = img.insert_text( | |
| 1820 point, | |
| 1821 text, | |
| 1822 fontsize=fontsize, | |
| 1823 lineheight=lineheight, | |
| 1824 fontname=fontname, | |
| 1825 fontfile=fontfile, | |
| 1826 set_simple=set_simple, | |
| 1827 encoding=encoding, | |
| 1828 color=color, | |
| 1829 fill=fill, | |
| 1830 border_width=border_width, | |
| 1831 render_mode=render_mode, | |
| 1832 rotate=rotate, | |
| 1833 morph=morph, | |
| 1834 stroke_opacity=stroke_opacity, | |
| 1835 fill_opacity=fill_opacity, | |
| 1836 oc=oc, | |
| 1837 ) | |
| 1838 if rc >= 0: | |
| 1839 img.commit(overlay) | |
| 1840 return rc | |
| 1841 | |
| 1842 | |
| 1843 def new_page( | |
| 1844 doc: Document, | |
| 1845 pno: int = -1, | |
| 1846 width: float = 595, | |
| 1847 height: float = 842, | |
| 1848 ) -> Page: | |
| 1849 """Create and return a new page object. | |
| 1850 | |
| 1851 Args: | |
| 1852 pno: (int) insert before this page. Default: after last page. | |
| 1853 width: (float) page width in points. Default: 595 (ISO A4 width). | |
| 1854 height: (float) page height in points. Default 842 (ISO A4 height). | |
| 1855 Returns: | |
| 1856 A Page object. | |
| 1857 """ | |
| 1858 doc._newPage(pno, width=width, height=height) | |
| 1859 return doc[pno] | |
| 1860 | |
| 1861 | |
| 1862 def insert_page( | |
| 1863 doc: Document, | |
| 1864 pno: int, | |
| 1865 text: typing.Union[str, list, None] = None, | |
| 1866 fontsize: float = 11, | |
| 1867 width: float = 595, | |
| 1868 height: float = 842, | |
| 1869 fontname: str = "helv", | |
| 1870 fontfile: OptStr = None, | |
| 1871 color: OptSeq = (0,), | |
| 1872 ) -> int: | |
| 1873 """Create a new PDF page and insert some text. | |
| 1874 | |
| 1875 Notes: | |
| 1876 Function combining Document.new_page() and Page.insert_text(). | |
| 1877 For parameter details see these methods. | |
| 1878 """ | |
| 1879 page = doc.new_page(pno=pno, width=width, height=height) | |
| 1880 if not bool(text): | |
| 1881 return 0 | |
| 1882 rc = page.insert_text( | |
| 1883 (50, 72), | |
| 1884 text, | |
| 1885 fontsize=fontsize, | |
| 1886 fontname=fontname, | |
| 1887 fontfile=fontfile, | |
| 1888 color=color, | |
| 1889 ) | |
| 1890 return rc | |
| 1891 | |
| 1892 | |
| 1893 def draw_line( | |
| 1894 page: Page, | |
| 1895 p1: point_like, | |
| 1896 p2: point_like, | |
| 1897 color: OptSeq = (0,), | |
| 1898 dashes: OptStr = None, | |
| 1899 width: float = 1, | |
| 1900 lineCap: int = 0, | |
| 1901 lineJoin: int = 0, | |
| 1902 overlay: bool = True, | |
| 1903 morph: OptSeq = None, | |
| 1904 stroke_opacity: float = 1, | |
| 1905 fill_opacity: float = 1, | |
| 1906 oc=0, | |
| 1907 ) -> Point: | |
| 1908 """Draw a line from point p1 to point p2.""" | |
| 1909 img = page.new_shape() | |
| 1910 p = img.draw_line(Point(p1), Point(p2)) | |
| 1911 img.finish( | |
| 1912 color=color, | |
| 1913 dashes=dashes, | |
| 1914 width=width, | |
| 1915 closePath=False, | |
| 1916 lineCap=lineCap, | |
| 1917 lineJoin=lineJoin, | |
| 1918 morph=morph, | |
| 1919 stroke_opacity=stroke_opacity, | |
| 1920 fill_opacity=fill_opacity, | |
| 1921 oc=oc, | |
| 1922 ) | |
| 1923 img.commit(overlay) | |
| 1924 | |
| 1925 return p | |
| 1926 | |
| 1927 | |
| 1928 def draw_squiggle( | |
| 1929 page: Page, | |
| 1930 p1: point_like, | |
| 1931 p2: point_like, | |
| 1932 breadth: float = 2, | |
| 1933 color: OptSeq = (0,), | |
| 1934 dashes: OptStr = None, | |
| 1935 width: float = 1, | |
| 1936 lineCap: int = 0, | |
| 1937 lineJoin: int = 0, | |
| 1938 overlay: bool = True, | |
| 1939 morph: OptSeq = None, | |
| 1940 stroke_opacity: float = 1, | |
| 1941 fill_opacity: float = 1, | |
| 1942 oc: int = 0, | |
| 1943 ) -> Point: | |
| 1944 """Draw a squiggly line from point p1 to point p2.""" | |
| 1945 img = page.new_shape() | |
| 1946 p = img.draw_squiggle(Point(p1), Point(p2), breadth=breadth) | |
| 1947 img.finish( | |
| 1948 color=color, | |
| 1949 dashes=dashes, | |
| 1950 width=width, | |
| 1951 closePath=False, | |
| 1952 lineCap=lineCap, | |
| 1953 lineJoin=lineJoin, | |
| 1954 morph=morph, | |
| 1955 stroke_opacity=stroke_opacity, | |
| 1956 fill_opacity=fill_opacity, | |
| 1957 oc=oc, | |
| 1958 ) | |
| 1959 img.commit(overlay) | |
| 1960 | |
| 1961 return p | |
| 1962 | |
| 1963 | |
| 1964 def draw_zigzag( | |
| 1965 page: Page, | |
| 1966 p1: point_like, | |
| 1967 p2: point_like, | |
| 1968 breadth: float = 2, | |
| 1969 color: OptSeq = (0,), | |
| 1970 dashes: OptStr = None, | |
| 1971 width: float = 1, | |
| 1972 lineCap: int = 0, | |
| 1973 lineJoin: int = 0, | |
| 1974 overlay: bool = True, | |
| 1975 morph: OptSeq = None, | |
| 1976 stroke_opacity: float = 1, | |
| 1977 fill_opacity: float = 1, | |
| 1978 oc: int = 0, | |
| 1979 ) -> Point: | |
| 1980 """Draw a zigzag line from point p1 to point p2.""" | |
| 1981 img = page.new_shape() | |
| 1982 p = img.draw_zigzag(Point(p1), Point(p2), breadth=breadth) | |
| 1983 img.finish( | |
| 1984 color=color, | |
| 1985 dashes=dashes, | |
| 1986 width=width, | |
| 1987 closePath=False, | |
| 1988 lineCap=lineCap, | |
| 1989 lineJoin=lineJoin, | |
| 1990 morph=morph, | |
| 1991 stroke_opacity=stroke_opacity, | |
| 1992 fill_opacity=fill_opacity, | |
| 1993 oc=oc, | |
| 1994 ) | |
| 1995 img.commit(overlay) | |
| 1996 | |
| 1997 return p | |
| 1998 | |
| 1999 | |
| 2000 def draw_rect( | |
| 2001 page: Page, | |
| 2002 rect: rect_like, | |
| 2003 color: OptSeq = (0,), | |
| 2004 fill: OptSeq = None, | |
| 2005 dashes: OptStr = None, | |
| 2006 width: float = 1, | |
| 2007 lineCap: int = 0, | |
| 2008 lineJoin: int = 0, | |
| 2009 morph: OptSeq = None, | |
| 2010 overlay: bool = True, | |
| 2011 stroke_opacity: float = 1, | |
| 2012 fill_opacity: float = 1, | |
| 2013 oc: int = 0, | |
| 2014 radius=None, | |
| 2015 ) -> Point: | |
| 2016 """Draw a rectangle. See Shape class method for details.""" | |
| 2017 img = page.new_shape() | |
| 2018 Q = img.draw_rect(Rect(rect), radius=radius) | |
| 2019 img.finish( | |
| 2020 color=color, | |
| 2021 fill=fill, | |
| 2022 dashes=dashes, | |
| 2023 width=width, | |
| 2024 lineCap=lineCap, | |
| 2025 lineJoin=lineJoin, | |
| 2026 morph=morph, | |
| 2027 stroke_opacity=stroke_opacity, | |
| 2028 fill_opacity=fill_opacity, | |
| 2029 oc=oc, | |
| 2030 ) | |
| 2031 img.commit(overlay) | |
| 2032 | |
| 2033 return Q | |
| 2034 | |
| 2035 | |
| 2036 def draw_quad( | |
| 2037 page: Page, | |
| 2038 quad: quad_like, | |
| 2039 color: OptSeq = (0,), | |
| 2040 fill: OptSeq = None, | |
| 2041 dashes: OptStr = None, | |
| 2042 width: float = 1, | |
| 2043 lineCap: int = 0, | |
| 2044 lineJoin: int = 0, | |
| 2045 morph: OptSeq = None, | |
| 2046 overlay: bool = True, | |
| 2047 stroke_opacity: float = 1, | |
| 2048 fill_opacity: float = 1, | |
| 2049 oc: int = 0, | |
| 2050 ) -> Point: | |
| 2051 """Draw a quadrilateral.""" | |
| 2052 img = page.new_shape() | |
| 2053 Q = img.draw_quad(Quad(quad)) | |
| 2054 img.finish( | |
| 2055 color=color, | |
| 2056 fill=fill, | |
| 2057 dashes=dashes, | |
| 2058 width=width, | |
| 2059 lineCap=lineCap, | |
| 2060 lineJoin=lineJoin, | |
| 2061 morph=morph, | |
| 2062 stroke_opacity=stroke_opacity, | |
| 2063 fill_opacity=fill_opacity, | |
| 2064 oc=oc, | |
| 2065 ) | |
| 2066 img.commit(overlay) | |
| 2067 | |
| 2068 return Q | |
| 2069 | |
| 2070 | |
| 2071 def draw_polyline( | |
| 2072 page: Page, | |
| 2073 points: list, | |
| 2074 color: OptSeq = (0,), | |
| 2075 fill: OptSeq = None, | |
| 2076 dashes: OptStr = None, | |
| 2077 width: float = 1, | |
| 2078 morph: OptSeq = None, | |
| 2079 lineCap: int = 0, | |
| 2080 lineJoin: int = 0, | |
| 2081 overlay: bool = True, | |
| 2082 closePath: bool = False, | |
| 2083 stroke_opacity: float = 1, | |
| 2084 fill_opacity: float = 1, | |
| 2085 oc: int = 0, | |
| 2086 ) -> Point: | |
| 2087 """Draw multiple connected line segments.""" | |
| 2088 img = page.new_shape() | |
| 2089 Q = img.draw_polyline(points) | |
| 2090 img.finish( | |
| 2091 color=color, | |
| 2092 fill=fill, | |
| 2093 dashes=dashes, | |
| 2094 width=width, | |
| 2095 lineCap=lineCap, | |
| 2096 lineJoin=lineJoin, | |
| 2097 morph=morph, | |
| 2098 closePath=closePath, | |
| 2099 stroke_opacity=stroke_opacity, | |
| 2100 fill_opacity=fill_opacity, | |
| 2101 oc=oc, | |
| 2102 ) | |
| 2103 img.commit(overlay) | |
| 2104 | |
| 2105 return Q | |
| 2106 | |
| 2107 | |
| 2108 def draw_circle( | |
| 2109 page: Page, | |
| 2110 center: point_like, | |
| 2111 radius: float, | |
| 2112 color: OptSeq = (0,), | |
| 2113 fill: OptSeq = None, | |
| 2114 morph: OptSeq = None, | |
| 2115 dashes: OptStr = None, | |
| 2116 width: float = 1, | |
| 2117 lineCap: int = 0, | |
| 2118 lineJoin: int = 0, | |
| 2119 overlay: bool = True, | |
| 2120 stroke_opacity: float = 1, | |
| 2121 fill_opacity: float = 1, | |
| 2122 oc: int = 0, | |
| 2123 ) -> Point: | |
| 2124 """Draw a circle given its center and radius.""" | |
| 2125 img = page.new_shape() | |
| 2126 Q = img.draw_circle(Point(center), radius) | |
| 2127 img.finish( | |
| 2128 color=color, | |
| 2129 fill=fill, | |
| 2130 dashes=dashes, | |
| 2131 width=width, | |
| 2132 lineCap=lineCap, | |
| 2133 lineJoin=lineJoin, | |
| 2134 morph=morph, | |
| 2135 stroke_opacity=stroke_opacity, | |
| 2136 fill_opacity=fill_opacity, | |
| 2137 oc=oc, | |
| 2138 ) | |
| 2139 img.commit(overlay) | |
| 2140 return Q | |
| 2141 | |
| 2142 | |
| 2143 def draw_oval( | |
| 2144 page: Page, | |
| 2145 rect: typing.Union[rect_like, quad_like], | |
| 2146 color: OptSeq = (0,), | |
| 2147 fill: OptSeq = None, | |
| 2148 dashes: OptStr = None, | |
| 2149 morph: OptSeq = None, | |
| 2150 width: float = 1, | |
| 2151 lineCap: int = 0, | |
| 2152 lineJoin: int = 0, | |
| 2153 overlay: bool = True, | |
| 2154 stroke_opacity: float = 1, | |
| 2155 fill_opacity: float = 1, | |
| 2156 oc: int = 0, | |
| 2157 ) -> Point: | |
| 2158 """Draw an oval given its containing rectangle or quad.""" | |
| 2159 img = page.new_shape() | |
| 2160 Q = img.draw_oval(rect) | |
| 2161 img.finish( | |
| 2162 color=color, | |
| 2163 fill=fill, | |
| 2164 dashes=dashes, | |
| 2165 width=width, | |
| 2166 lineCap=lineCap, | |
| 2167 lineJoin=lineJoin, | |
| 2168 morph=morph, | |
| 2169 stroke_opacity=stroke_opacity, | |
| 2170 fill_opacity=fill_opacity, | |
| 2171 oc=oc, | |
| 2172 ) | |
| 2173 img.commit(overlay) | |
| 2174 | |
| 2175 return Q | |
| 2176 | |
| 2177 | |
| 2178 def draw_curve( | |
| 2179 page: Page, | |
| 2180 p1: point_like, | |
| 2181 p2: point_like, | |
| 2182 p3: point_like, | |
| 2183 color: OptSeq = (0,), | |
| 2184 fill: OptSeq = None, | |
| 2185 dashes: OptStr = None, | |
| 2186 width: float = 1, | |
| 2187 morph: OptSeq = None, | |
| 2188 closePath: bool = False, | |
| 2189 lineCap: int = 0, | |
| 2190 lineJoin: int = 0, | |
| 2191 overlay: bool = True, | |
| 2192 stroke_opacity: float = 1, | |
| 2193 fill_opacity: float = 1, | |
| 2194 oc: int = 0, | |
| 2195 ) -> Point: | |
| 2196 """Draw a special Bezier curve from p1 to p3, generating control points on lines p1 to p2 and p2 to p3.""" | |
| 2197 img = page.new_shape() | |
| 2198 Q = img.draw_curve(Point(p1), Point(p2), Point(p3)) | |
| 2199 img.finish( | |
| 2200 color=color, | |
| 2201 fill=fill, | |
| 2202 dashes=dashes, | |
| 2203 width=width, | |
| 2204 lineCap=lineCap, | |
| 2205 lineJoin=lineJoin, | |
| 2206 morph=morph, | |
| 2207 closePath=closePath, | |
| 2208 stroke_opacity=stroke_opacity, | |
| 2209 fill_opacity=fill_opacity, | |
| 2210 oc=oc, | |
| 2211 ) | |
| 2212 img.commit(overlay) | |
| 2213 | |
| 2214 return Q | |
| 2215 | |
| 2216 | |
| 2217 def draw_bezier( | |
| 2218 page: Page, | |
| 2219 p1: point_like, | |
| 2220 p2: point_like, | |
| 2221 p3: point_like, | |
| 2222 p4: point_like, | |
| 2223 color: OptSeq = (0,), | |
| 2224 fill: OptSeq = None, | |
| 2225 dashes: OptStr = None, | |
| 2226 width: float = 1, | |
| 2227 morph: OptStr = None, | |
| 2228 closePath: bool = False, | |
| 2229 lineCap: int = 0, | |
| 2230 lineJoin: int = 0, | |
| 2231 overlay: bool = True, | |
| 2232 stroke_opacity: float = 1, | |
| 2233 fill_opacity: float = 1, | |
| 2234 oc: int = 0, | |
| 2235 ) -> Point: | |
| 2236 """Draw a general cubic Bezier curve from p1 to p4 using control points p2 and p3.""" | |
| 2237 img = page.new_shape() | |
| 2238 Q = img.draw_bezier(Point(p1), Point(p2), Point(p3), Point(p4)) | |
| 2239 img.finish( | |
| 2240 color=color, | |
| 2241 fill=fill, | |
| 2242 dashes=dashes, | |
| 2243 width=width, | |
| 2244 lineCap=lineCap, | |
| 2245 lineJoin=lineJoin, | |
| 2246 morph=morph, | |
| 2247 closePath=closePath, | |
| 2248 stroke_opacity=stroke_opacity, | |
| 2249 fill_opacity=fill_opacity, | |
| 2250 oc=oc, | |
| 2251 ) | |
| 2252 img.commit(overlay) | |
| 2253 | |
| 2254 return Q | |
| 2255 | |
| 2256 | |
| 2257 def draw_sector( | |
| 2258 page: Page, | |
| 2259 center: point_like, | |
| 2260 point: point_like, | |
| 2261 beta: float, | |
| 2262 color: OptSeq = (0,), | |
| 2263 fill: OptSeq = None, | |
| 2264 dashes: OptStr = None, | |
| 2265 fullSector: bool = True, | |
| 2266 morph: OptSeq = None, | |
| 2267 width: float = 1, | |
| 2268 closePath: bool = False, | |
| 2269 lineCap: int = 0, | |
| 2270 lineJoin: int = 0, | |
| 2271 overlay: bool = True, | |
| 2272 stroke_opacity: float = 1, | |
| 2273 fill_opacity: float = 1, | |
| 2274 oc: int = 0, | |
| 2275 ) -> Point: | |
| 2276 """Draw a circle sector given circle center, one arc end point and the angle of the arc. | |
| 2277 | |
| 2278 Parameters: | |
| 2279 center -- center of circle | |
| 2280 point -- arc end point | |
| 2281 beta -- angle of arc (degrees) | |
| 2282 fullSector -- connect arc ends with center | |
| 2283 """ | |
| 2284 img = page.new_shape() | |
| 2285 Q = img.draw_sector(Point(center), Point(point), beta, fullSector=fullSector) | |
| 2286 img.finish( | |
| 2287 color=color, | |
| 2288 fill=fill, | |
| 2289 dashes=dashes, | |
| 2290 width=width, | |
| 2291 lineCap=lineCap, | |
| 2292 lineJoin=lineJoin, | |
| 2293 morph=morph, | |
| 2294 closePath=closePath, | |
| 2295 stroke_opacity=stroke_opacity, | |
| 2296 fill_opacity=fill_opacity, | |
| 2297 oc=oc, | |
| 2298 ) | |
| 2299 img.commit(overlay) | |
| 2300 | |
| 2301 return Q | |
| 2302 | |
| 2303 | |
| 2304 # ---------------------------------------------------------------------- | |
| 2305 # Name: wx.lib.colourdb.py | |
| 2306 # Purpose: Adds a bunch of colour names and RGB values to the | |
| 2307 # colour database so they can be found by name | |
| 2308 # | |
| 2309 # Author: Robin Dunn | |
| 2310 # | |
| 2311 # Created: 13-March-2001 | |
| 2312 # Copyright: (c) 2001-2017 by Total Control Software | |
| 2313 # Licence: wxWindows license | |
| 2314 # Tags: phoenix-port, unittest, documented | |
| 2315 # ---------------------------------------------------------------------- | |
| 2316 | |
| 2317 | |
| 2318 def getColorList() -> list: | |
| 2319 """ | |
| 2320 Returns a list of just the colour names used by this module. | |
| 2321 :rtype: list of strings | |
| 2322 """ | |
| 2323 | |
| 2324 return [x[0] for x in getColorInfoList()] | |
| 2325 | |
| 2326 | |
| 2327 def getColorInfoList() -> list: | |
| 2328 """ | |
| 2329 Returns the list of colour name/value tuples used by this module. | |
| 2330 :rtype: list of tuples | |
| 2331 """ | |
| 2332 | |
| 2333 return [ | |
| 2334 ("ALICEBLUE", 240, 248, 255), | |
| 2335 ("ANTIQUEWHITE", 250, 235, 215), | |
| 2336 ("ANTIQUEWHITE1", 255, 239, 219), | |
| 2337 ("ANTIQUEWHITE2", 238, 223, 204), | |
| 2338 ("ANTIQUEWHITE3", 205, 192, 176), | |
| 2339 ("ANTIQUEWHITE4", 139, 131, 120), | |
| 2340 ("AQUAMARINE", 127, 255, 212), | |
| 2341 ("AQUAMARINE1", 127, 255, 212), | |
| 2342 ("AQUAMARINE2", 118, 238, 198), | |
| 2343 ("AQUAMARINE3", 102, 205, 170), | |
| 2344 ("AQUAMARINE4", 69, 139, 116), | |
| 2345 ("AZURE", 240, 255, 255), | |
| 2346 ("AZURE1", 240, 255, 255), | |
| 2347 ("AZURE2", 224, 238, 238), | |
| 2348 ("AZURE3", 193, 205, 205), | |
| 2349 ("AZURE4", 131, 139, 139), | |
| 2350 ("BEIGE", 245, 245, 220), | |
| 2351 ("BISQUE", 255, 228, 196), | |
| 2352 ("BISQUE1", 255, 228, 196), | |
| 2353 ("BISQUE2", 238, 213, 183), | |
| 2354 ("BISQUE3", 205, 183, 158), | |
| 2355 ("BISQUE4", 139, 125, 107), | |
| 2356 ("BLACK", 0, 0, 0), | |
| 2357 ("BLANCHEDALMOND", 255, 235, 205), | |
| 2358 ("BLUE", 0, 0, 255), | |
| 2359 ("BLUE1", 0, 0, 255), | |
| 2360 ("BLUE2", 0, 0, 238), | |
| 2361 ("BLUE3", 0, 0, 205), | |
| 2362 ("BLUE4", 0, 0, 139), | |
| 2363 ("BLUEVIOLET", 138, 43, 226), | |
| 2364 ("BROWN", 165, 42, 42), | |
| 2365 ("BROWN1", 255, 64, 64), | |
| 2366 ("BROWN2", 238, 59, 59), | |
| 2367 ("BROWN3", 205, 51, 51), | |
| 2368 ("BROWN4", 139, 35, 35), | |
| 2369 ("BURLYWOOD", 222, 184, 135), | |
| 2370 ("BURLYWOOD1", 255, 211, 155), | |
| 2371 ("BURLYWOOD2", 238, 197, 145), | |
| 2372 ("BURLYWOOD3", 205, 170, 125), | |
| 2373 ("BURLYWOOD4", 139, 115, 85), | |
| 2374 ("CADETBLUE", 95, 158, 160), | |
| 2375 ("CADETBLUE1", 152, 245, 255), | |
| 2376 ("CADETBLUE2", 142, 229, 238), | |
| 2377 ("CADETBLUE3", 122, 197, 205), | |
| 2378 ("CADETBLUE4", 83, 134, 139), | |
| 2379 ("CHARTREUSE", 127, 255, 0), | |
| 2380 ("CHARTREUSE1", 127, 255, 0), | |
| 2381 ("CHARTREUSE2", 118, 238, 0), | |
| 2382 ("CHARTREUSE3", 102, 205, 0), | |
| 2383 ("CHARTREUSE4", 69, 139, 0), | |
| 2384 ("CHOCOLATE", 210, 105, 30), | |
| 2385 ("CHOCOLATE1", 255, 127, 36), | |
| 2386 ("CHOCOLATE2", 238, 118, 33), | |
| 2387 ("CHOCOLATE3", 205, 102, 29), | |
| 2388 ("CHOCOLATE4", 139, 69, 19), | |
| 2389 ("COFFEE", 156, 79, 0), | |
| 2390 ("CORAL", 255, 127, 80), | |
| 2391 ("CORAL1", 255, 114, 86), | |
| 2392 ("CORAL2", 238, 106, 80), | |
| 2393 ("CORAL3", 205, 91, 69), | |
| 2394 ("CORAL4", 139, 62, 47), | |
| 2395 ("CORNFLOWERBLUE", 100, 149, 237), | |
| 2396 ("CORNSILK", 255, 248, 220), | |
| 2397 ("CORNSILK1", 255, 248, 220), | |
| 2398 ("CORNSILK2", 238, 232, 205), | |
| 2399 ("CORNSILK3", 205, 200, 177), | |
| 2400 ("CORNSILK4", 139, 136, 120), | |
| 2401 ("CYAN", 0, 255, 255), | |
| 2402 ("CYAN1", 0, 255, 255), | |
| 2403 ("CYAN2", 0, 238, 238), | |
| 2404 ("CYAN3", 0, 205, 205), | |
| 2405 ("CYAN4", 0, 139, 139), | |
| 2406 ("DARKBLUE", 0, 0, 139), | |
| 2407 ("DARKCYAN", 0, 139, 139), | |
| 2408 ("DARKGOLDENROD", 184, 134, 11), | |
| 2409 ("DARKGOLDENROD1", 255, 185, 15), | |
| 2410 ("DARKGOLDENROD2", 238, 173, 14), | |
| 2411 ("DARKGOLDENROD3", 205, 149, 12), | |
| 2412 ("DARKGOLDENROD4", 139, 101, 8), | |
| 2413 ("DARKGREEN", 0, 100, 0), | |
| 2414 ("DARKGRAY", 169, 169, 169), | |
| 2415 ("DARKKHAKI", 189, 183, 107), | |
| 2416 ("DARKMAGENTA", 139, 0, 139), | |
| 2417 ("DARKOLIVEGREEN", 85, 107, 47), | |
| 2418 ("DARKOLIVEGREEN1", 202, 255, 112), | |
| 2419 ("DARKOLIVEGREEN2", 188, 238, 104), | |
| 2420 ("DARKOLIVEGREEN3", 162, 205, 90), | |
| 2421 ("DARKOLIVEGREEN4", 110, 139, 61), | |
| 2422 ("DARKORANGE", 255, 140, 0), | |
| 2423 ("DARKORANGE1", 255, 127, 0), | |
| 2424 ("DARKORANGE2", 238, 118, 0), | |
| 2425 ("DARKORANGE3", 205, 102, 0), | |
| 2426 ("DARKORANGE4", 139, 69, 0), | |
| 2427 ("DARKORCHID", 153, 50, 204), | |
| 2428 ("DARKORCHID1", 191, 62, 255), | |
| 2429 ("DARKORCHID2", 178, 58, 238), | |
| 2430 ("DARKORCHID3", 154, 50, 205), | |
| 2431 ("DARKORCHID4", 104, 34, 139), | |
| 2432 ("DARKRED", 139, 0, 0), | |
| 2433 ("DARKSALMON", 233, 150, 122), | |
| 2434 ("DARKSEAGREEN", 143, 188, 143), | |
| 2435 ("DARKSEAGREEN1", 193, 255, 193), | |
| 2436 ("DARKSEAGREEN2", 180, 238, 180), | |
| 2437 ("DARKSEAGREEN3", 155, 205, 155), | |
| 2438 ("DARKSEAGREEN4", 105, 139, 105), | |
| 2439 ("DARKSLATEBLUE", 72, 61, 139), | |
| 2440 ("DARKSLATEGRAY", 47, 79, 79), | |
| 2441 ("DARKTURQUOISE", 0, 206, 209), | |
| 2442 ("DARKVIOLET", 148, 0, 211), | |
| 2443 ("DEEPPINK", 255, 20, 147), | |
| 2444 ("DEEPPINK1", 255, 20, 147), | |
| 2445 ("DEEPPINK2", 238, 18, 137), | |
| 2446 ("DEEPPINK3", 205, 16, 118), | |
| 2447 ("DEEPPINK4", 139, 10, 80), | |
| 2448 ("DEEPSKYBLUE", 0, 191, 255), | |
| 2449 ("DEEPSKYBLUE1", 0, 191, 255), | |
| 2450 ("DEEPSKYBLUE2", 0, 178, 238), | |
| 2451 ("DEEPSKYBLUE3", 0, 154, 205), | |
| 2452 ("DEEPSKYBLUE4", 0, 104, 139), | |
| 2453 ("DIMGRAY", 105, 105, 105), | |
| 2454 ("DODGERBLUE", 30, 144, 255), | |
| 2455 ("DODGERBLUE1", 30, 144, 255), | |
| 2456 ("DODGERBLUE2", 28, 134, 238), | |
| 2457 ("DODGERBLUE3", 24, 116, 205), | |
| 2458 ("DODGERBLUE4", 16, 78, 139), | |
| 2459 ("FIREBRICK", 178, 34, 34), | |
| 2460 ("FIREBRICK1", 255, 48, 48), | |
| 2461 ("FIREBRICK2", 238, 44, 44), | |
| 2462 ("FIREBRICK3", 205, 38, 38), | |
| 2463 ("FIREBRICK4", 139, 26, 26), | |
| 2464 ("FLORALWHITE", 255, 250, 240), | |
| 2465 ("FORESTGREEN", 34, 139, 34), | |
| 2466 ("GAINSBORO", 220, 220, 220), | |
| 2467 ("GHOSTWHITE", 248, 248, 255), | |
| 2468 ("GOLD", 255, 215, 0), | |
| 2469 ("GOLD1", 255, 215, 0), | |
| 2470 ("GOLD2", 238, 201, 0), | |
| 2471 ("GOLD3", 205, 173, 0), | |
| 2472 ("GOLD4", 139, 117, 0), | |
| 2473 ("GOLDENROD", 218, 165, 32), | |
| 2474 ("GOLDENROD1", 255, 193, 37), | |
| 2475 ("GOLDENROD2", 238, 180, 34), | |
| 2476 ("GOLDENROD3", 205, 155, 29), | |
| 2477 ("GOLDENROD4", 139, 105, 20), | |
| 2478 ("GREEN YELLOW", 173, 255, 47), | |
| 2479 ("GREEN", 0, 255, 0), | |
| 2480 ("GREEN1", 0, 255, 0), | |
| 2481 ("GREEN2", 0, 238, 0), | |
| 2482 ("GREEN3", 0, 205, 0), | |
| 2483 ("GREEN4", 0, 139, 0), | |
| 2484 ("GREENYELLOW", 173, 255, 47), | |
| 2485 ("GRAY", 190, 190, 190), | |
| 2486 ("GRAY0", 0, 0, 0), | |
| 2487 ("GRAY1", 3, 3, 3), | |
| 2488 ("GRAY10", 26, 26, 26), | |
| 2489 ("GRAY100", 255, 255, 255), | |
| 2490 ("GRAY11", 28, 28, 28), | |
| 2491 ("GRAY12", 31, 31, 31), | |
| 2492 ("GRAY13", 33, 33, 33), | |
| 2493 ("GRAY14", 36, 36, 36), | |
| 2494 ("GRAY15", 38, 38, 38), | |
| 2495 ("GRAY16", 41, 41, 41), | |
| 2496 ("GRAY17", 43, 43, 43), | |
| 2497 ("GRAY18", 46, 46, 46), | |
| 2498 ("GRAY19", 48, 48, 48), | |
| 2499 ("GRAY2", 5, 5, 5), | |
| 2500 ("GRAY20", 51, 51, 51), | |
| 2501 ("GRAY21", 54, 54, 54), | |
| 2502 ("GRAY22", 56, 56, 56), | |
| 2503 ("GRAY23", 59, 59, 59), | |
| 2504 ("GRAY24", 61, 61, 61), | |
| 2505 ("GRAY25", 64, 64, 64), | |
| 2506 ("GRAY26", 66, 66, 66), | |
| 2507 ("GRAY27", 69, 69, 69), | |
| 2508 ("GRAY28", 71, 71, 71), | |
| 2509 ("GRAY29", 74, 74, 74), | |
| 2510 ("GRAY3", 8, 8, 8), | |
| 2511 ("GRAY30", 77, 77, 77), | |
| 2512 ("GRAY31", 79, 79, 79), | |
| 2513 ("GRAY32", 82, 82, 82), | |
| 2514 ("GRAY33", 84, 84, 84), | |
| 2515 ("GRAY34", 87, 87, 87), | |
| 2516 ("GRAY35", 89, 89, 89), | |
| 2517 ("GRAY36", 92, 92, 92), | |
| 2518 ("GRAY37", 94, 94, 94), | |
| 2519 ("GRAY38", 97, 97, 97), | |
| 2520 ("GRAY39", 99, 99, 99), | |
| 2521 ("GRAY4", 10, 10, 10), | |
| 2522 ("GRAY40", 102, 102, 102), | |
| 2523 ("GRAY41", 105, 105, 105), | |
| 2524 ("GRAY42", 107, 107, 107), | |
| 2525 ("GRAY43", 110, 110, 110), | |
| 2526 ("GRAY44", 112, 112, 112), | |
| 2527 ("GRAY45", 115, 115, 115), | |
| 2528 ("GRAY46", 117, 117, 117), | |
| 2529 ("GRAY47", 120, 120, 120), | |
| 2530 ("GRAY48", 122, 122, 122), | |
| 2531 ("GRAY49", 125, 125, 125), | |
| 2532 ("GRAY5", 13, 13, 13), | |
| 2533 ("GRAY50", 127, 127, 127), | |
| 2534 ("GRAY51", 130, 130, 130), | |
| 2535 ("GRAY52", 133, 133, 133), | |
| 2536 ("GRAY53", 135, 135, 135), | |
| 2537 ("GRAY54", 138, 138, 138), | |
| 2538 ("GRAY55", 140, 140, 140), | |
| 2539 ("GRAY56", 143, 143, 143), | |
| 2540 ("GRAY57", 145, 145, 145), | |
| 2541 ("GRAY58", 148, 148, 148), | |
| 2542 ("GRAY59", 150, 150, 150), | |
| 2543 ("GRAY6", 15, 15, 15), | |
| 2544 ("GRAY60", 153, 153, 153), | |
| 2545 ("GRAY61", 156, 156, 156), | |
| 2546 ("GRAY62", 158, 158, 158), | |
| 2547 ("GRAY63", 161, 161, 161), | |
| 2548 ("GRAY64", 163, 163, 163), | |
| 2549 ("GRAY65", 166, 166, 166), | |
| 2550 ("GRAY66", 168, 168, 168), | |
| 2551 ("GRAY67", 171, 171, 171), | |
| 2552 ("GRAY68", 173, 173, 173), | |
| 2553 ("GRAY69", 176, 176, 176), | |
| 2554 ("GRAY7", 18, 18, 18), | |
| 2555 ("GRAY70", 179, 179, 179), | |
| 2556 ("GRAY71", 181, 181, 181), | |
| 2557 ("GRAY72", 184, 184, 184), | |
| 2558 ("GRAY73", 186, 186, 186), | |
| 2559 ("GRAY74", 189, 189, 189), | |
| 2560 ("GRAY75", 191, 191, 191), | |
| 2561 ("GRAY76", 194, 194, 194), | |
| 2562 ("GRAY77", 196, 196, 196), | |
| 2563 ("GRAY78", 199, 199, 199), | |
| 2564 ("GRAY79", 201, 201, 201), | |
| 2565 ("GRAY8", 20, 20, 20), | |
| 2566 ("GRAY80", 204, 204, 204), | |
| 2567 ("GRAY81", 207, 207, 207), | |
| 2568 ("GRAY82", 209, 209, 209), | |
| 2569 ("GRAY83", 212, 212, 212), | |
| 2570 ("GRAY84", 214, 214, 214), | |
| 2571 ("GRAY85", 217, 217, 217), | |
| 2572 ("GRAY86", 219, 219, 219), | |
| 2573 ("GRAY87", 222, 222, 222), | |
| 2574 ("GRAY88", 224, 224, 224), | |
| 2575 ("GRAY89", 227, 227, 227), | |
| 2576 ("GRAY9", 23, 23, 23), | |
| 2577 ("GRAY90", 229, 229, 229), | |
| 2578 ("GRAY91", 232, 232, 232), | |
| 2579 ("GRAY92", 235, 235, 235), | |
| 2580 ("GRAY93", 237, 237, 237), | |
| 2581 ("GRAY94", 240, 240, 240), | |
| 2582 ("GRAY95", 242, 242, 242), | |
| 2583 ("GRAY96", 245, 245, 245), | |
| 2584 ("GRAY97", 247, 247, 247), | |
| 2585 ("GRAY98", 250, 250, 250), | |
| 2586 ("GRAY99", 252, 252, 252), | |
| 2587 ("HONEYDEW", 240, 255, 240), | |
| 2588 ("HONEYDEW1", 240, 255, 240), | |
| 2589 ("HONEYDEW2", 224, 238, 224), | |
| 2590 ("HONEYDEW3", 193, 205, 193), | |
| 2591 ("HONEYDEW4", 131, 139, 131), | |
| 2592 ("HOTPINK", 255, 105, 180), | |
| 2593 ("HOTPINK1", 255, 110, 180), | |
| 2594 ("HOTPINK2", 238, 106, 167), | |
| 2595 ("HOTPINK3", 205, 96, 144), | |
| 2596 ("HOTPINK4", 139, 58, 98), | |
| 2597 ("INDIANRED", 205, 92, 92), | |
| 2598 ("INDIANRED1", 255, 106, 106), | |
| 2599 ("INDIANRED2", 238, 99, 99), | |
| 2600 ("INDIANRED3", 205, 85, 85), | |
| 2601 ("INDIANRED4", 139, 58, 58), | |
| 2602 ("IVORY", 255, 255, 240), | |
| 2603 ("IVORY1", 255, 255, 240), | |
| 2604 ("IVORY2", 238, 238, 224), | |
| 2605 ("IVORY3", 205, 205, 193), | |
| 2606 ("IVORY4", 139, 139, 131), | |
| 2607 ("KHAKI", 240, 230, 140), | |
| 2608 ("KHAKI1", 255, 246, 143), | |
| 2609 ("KHAKI2", 238, 230, 133), | |
| 2610 ("KHAKI3", 205, 198, 115), | |
| 2611 ("KHAKI4", 139, 134, 78), | |
| 2612 ("LAVENDER", 230, 230, 250), | |
| 2613 ("LAVENDERBLUSH", 255, 240, 245), | |
| 2614 ("LAVENDERBLUSH1", 255, 240, 245), | |
| 2615 ("LAVENDERBLUSH2", 238, 224, 229), | |
| 2616 ("LAVENDERBLUSH3", 205, 193, 197), | |
| 2617 ("LAVENDERBLUSH4", 139, 131, 134), | |
| 2618 ("LAWNGREEN", 124, 252, 0), | |
| 2619 ("LEMONCHIFFON", 255, 250, 205), | |
| 2620 ("LEMONCHIFFON1", 255, 250, 205), | |
| 2621 ("LEMONCHIFFON2", 238, 233, 191), | |
| 2622 ("LEMONCHIFFON3", 205, 201, 165), | |
| 2623 ("LEMONCHIFFON4", 139, 137, 112), | |
| 2624 ("LIGHTBLUE", 173, 216, 230), | |
| 2625 ("LIGHTBLUE1", 191, 239, 255), | |
| 2626 ("LIGHTBLUE2", 178, 223, 238), | |
| 2627 ("LIGHTBLUE3", 154, 192, 205), | |
| 2628 ("LIGHTBLUE4", 104, 131, 139), | |
| 2629 ("LIGHTCORAL", 240, 128, 128), | |
| 2630 ("LIGHTCYAN", 224, 255, 255), | |
| 2631 ("LIGHTCYAN1", 224, 255, 255), | |
| 2632 ("LIGHTCYAN2", 209, 238, 238), | |
| 2633 ("LIGHTCYAN3", 180, 205, 205), | |
| 2634 ("LIGHTCYAN4", 122, 139, 139), | |
| 2635 ("LIGHTGOLDENROD", 238, 221, 130), | |
| 2636 ("LIGHTGOLDENROD1", 255, 236, 139), | |
| 2637 ("LIGHTGOLDENROD2", 238, 220, 130), | |
| 2638 ("LIGHTGOLDENROD3", 205, 190, 112), | |
| 2639 ("LIGHTGOLDENROD4", 139, 129, 76), | |
| 2640 ("LIGHTGOLDENRODYELLOW", 250, 250, 210), | |
| 2641 ("LIGHTGREEN", 144, 238, 144), | |
| 2642 ("LIGHTGRAY", 211, 211, 211), | |
| 2643 ("LIGHTPINK", 255, 182, 193), | |
| 2644 ("LIGHTPINK1", 255, 174, 185), | |
| 2645 ("LIGHTPINK2", 238, 162, 173), | |
| 2646 ("LIGHTPINK3", 205, 140, 149), | |
| 2647 ("LIGHTPINK4", 139, 95, 101), | |
| 2648 ("LIGHTSALMON", 255, 160, 122), | |
| 2649 ("LIGHTSALMON1", 255, 160, 122), | |
| 2650 ("LIGHTSALMON2", 238, 149, 114), | |
| 2651 ("LIGHTSALMON3", 205, 129, 98), | |
| 2652 ("LIGHTSALMON4", 139, 87, 66), | |
| 2653 ("LIGHTSEAGREEN", 32, 178, 170), | |
| 2654 ("LIGHTSKYBLUE", 135, 206, 250), | |
| 2655 ("LIGHTSKYBLUE1", 176, 226, 255), | |
| 2656 ("LIGHTSKYBLUE2", 164, 211, 238), | |
| 2657 ("LIGHTSKYBLUE3", 141, 182, 205), | |
| 2658 ("LIGHTSKYBLUE4", 96, 123, 139), | |
| 2659 ("LIGHTSLATEBLUE", 132, 112, 255), | |
| 2660 ("LIGHTSLATEGRAY", 119, 136, 153), | |
| 2661 ("LIGHTSTEELBLUE", 176, 196, 222), | |
| 2662 ("LIGHTSTEELBLUE1", 202, 225, 255), | |
| 2663 ("LIGHTSTEELBLUE2", 188, 210, 238), | |
| 2664 ("LIGHTSTEELBLUE3", 162, 181, 205), | |
| 2665 ("LIGHTSTEELBLUE4", 110, 123, 139), | |
| 2666 ("LIGHTYELLOW", 255, 255, 224), | |
| 2667 ("LIGHTYELLOW1", 255, 255, 224), | |
| 2668 ("LIGHTYELLOW2", 238, 238, 209), | |
| 2669 ("LIGHTYELLOW3", 205, 205, 180), | |
| 2670 ("LIGHTYELLOW4", 139, 139, 122), | |
| 2671 ("LIMEGREEN", 50, 205, 50), | |
| 2672 ("LINEN", 250, 240, 230), | |
| 2673 ("MAGENTA", 255, 0, 255), | |
| 2674 ("MAGENTA1", 255, 0, 255), | |
| 2675 ("MAGENTA2", 238, 0, 238), | |
| 2676 ("MAGENTA3", 205, 0, 205), | |
| 2677 ("MAGENTA4", 139, 0, 139), | |
| 2678 ("MAROON", 176, 48, 96), | |
| 2679 ("MAROON1", 255, 52, 179), | |
| 2680 ("MAROON2", 238, 48, 167), | |
| 2681 ("MAROON3", 205, 41, 144), | |
| 2682 ("MAROON4", 139, 28, 98), | |
| 2683 ("MEDIUMAQUAMARINE", 102, 205, 170), | |
| 2684 ("MEDIUMBLUE", 0, 0, 205), | |
| 2685 ("MEDIUMORCHID", 186, 85, 211), | |
| 2686 ("MEDIUMORCHID1", 224, 102, 255), | |
| 2687 ("MEDIUMORCHID2", 209, 95, 238), | |
| 2688 ("MEDIUMORCHID3", 180, 82, 205), | |
| 2689 ("MEDIUMORCHID4", 122, 55, 139), | |
| 2690 ("MEDIUMPURPLE", 147, 112, 219), | |
| 2691 ("MEDIUMPURPLE1", 171, 130, 255), | |
| 2692 ("MEDIUMPURPLE2", 159, 121, 238), | |
| 2693 ("MEDIUMPURPLE3", 137, 104, 205), | |
| 2694 ("MEDIUMPURPLE4", 93, 71, 139), | |
| 2695 ("MEDIUMSEAGREEN", 60, 179, 113), | |
| 2696 ("MEDIUMSLATEBLUE", 123, 104, 238), | |
| 2697 ("MEDIUMSPRINGGREEN", 0, 250, 154), | |
| 2698 ("MEDIUMTURQUOISE", 72, 209, 204), | |
| 2699 ("MEDIUMVIOLETRED", 199, 21, 133), | |
| 2700 ("MIDNIGHTBLUE", 25, 25, 112), | |
| 2701 ("MINTCREAM", 245, 255, 250), | |
| 2702 ("MISTYROSE", 255, 228, 225), | |
| 2703 ("MISTYROSE1", 255, 228, 225), | |
| 2704 ("MISTYROSE2", 238, 213, 210), | |
| 2705 ("MISTYROSE3", 205, 183, 181), | |
| 2706 ("MISTYROSE4", 139, 125, 123), | |
| 2707 ("MOCCASIN", 255, 228, 181), | |
| 2708 ("MUPDFBLUE", 37, 114, 172), | |
| 2709 ("NAVAJOWHITE", 255, 222, 173), | |
| 2710 ("NAVAJOWHITE1", 255, 222, 173), | |
| 2711 ("NAVAJOWHITE2", 238, 207, 161), | |
| 2712 ("NAVAJOWHITE3", 205, 179, 139), | |
| 2713 ("NAVAJOWHITE4", 139, 121, 94), | |
| 2714 ("NAVY", 0, 0, 128), | |
| 2715 ("NAVYBLUE", 0, 0, 128), | |
| 2716 ("OLDLACE", 253, 245, 230), | |
| 2717 ("OLIVEDRAB", 107, 142, 35), | |
| 2718 ("OLIVEDRAB1", 192, 255, 62), | |
| 2719 ("OLIVEDRAB2", 179, 238, 58), | |
| 2720 ("OLIVEDRAB3", 154, 205, 50), | |
| 2721 ("OLIVEDRAB4", 105, 139, 34), | |
| 2722 ("ORANGE", 255, 165, 0), | |
| 2723 ("ORANGE1", 255, 165, 0), | |
| 2724 ("ORANGE2", 238, 154, 0), | |
| 2725 ("ORANGE3", 205, 133, 0), | |
| 2726 ("ORANGE4", 139, 90, 0), | |
| 2727 ("ORANGERED", 255, 69, 0), | |
| 2728 ("ORANGERED1", 255, 69, 0), | |
| 2729 ("ORANGERED2", 238, 64, 0), | |
| 2730 ("ORANGERED3", 205, 55, 0), | |
| 2731 ("ORANGERED4", 139, 37, 0), | |
| 2732 ("ORCHID", 218, 112, 214), | |
| 2733 ("ORCHID1", 255, 131, 250), | |
| 2734 ("ORCHID2", 238, 122, 233), | |
| 2735 ("ORCHID3", 205, 105, 201), | |
| 2736 ("ORCHID4", 139, 71, 137), | |
| 2737 ("PALEGOLDENROD", 238, 232, 170), | |
| 2738 ("PALEGREEN", 152, 251, 152), | |
| 2739 ("PALEGREEN1", 154, 255, 154), | |
| 2740 ("PALEGREEN2", 144, 238, 144), | |
| 2741 ("PALEGREEN3", 124, 205, 124), | |
| 2742 ("PALEGREEN4", 84, 139, 84), | |
| 2743 ("PALETURQUOISE", 175, 238, 238), | |
| 2744 ("PALETURQUOISE1", 187, 255, 255), | |
| 2745 ("PALETURQUOISE2", 174, 238, 238), | |
| 2746 ("PALETURQUOISE3", 150, 205, 205), | |
| 2747 ("PALETURQUOISE4", 102, 139, 139), | |
| 2748 ("PALEVIOLETRED", 219, 112, 147), | |
| 2749 ("PALEVIOLETRED1", 255, 130, 171), | |
| 2750 ("PALEVIOLETRED2", 238, 121, 159), | |
| 2751 ("PALEVIOLETRED3", 205, 104, 137), | |
| 2752 ("PALEVIOLETRED4", 139, 71, 93), | |
| 2753 ("PAPAYAWHIP", 255, 239, 213), | |
| 2754 ("PEACHPUFF", 255, 218, 185), | |
| 2755 ("PEACHPUFF1", 255, 218, 185), | |
| 2756 ("PEACHPUFF2", 238, 203, 173), | |
| 2757 ("PEACHPUFF3", 205, 175, 149), | |
| 2758 ("PEACHPUFF4", 139, 119, 101), | |
| 2759 ("PERU", 205, 133, 63), | |
| 2760 ("PINK", 255, 192, 203), | |
| 2761 ("PINK1", 255, 181, 197), | |
| 2762 ("PINK2", 238, 169, 184), | |
| 2763 ("PINK3", 205, 145, 158), | |
| 2764 ("PINK4", 139, 99, 108), | |
| 2765 ("PLUM", 221, 160, 221), | |
| 2766 ("PLUM1", 255, 187, 255), | |
| 2767 ("PLUM2", 238, 174, 238), | |
| 2768 ("PLUM3", 205, 150, 205), | |
| 2769 ("PLUM4", 139, 102, 139), | |
| 2770 ("POWDERBLUE", 176, 224, 230), | |
| 2771 ("PURPLE", 160, 32, 240), | |
| 2772 ("PURPLE1", 155, 48, 255), | |
| 2773 ("PURPLE2", 145, 44, 238), | |
| 2774 ("PURPLE3", 125, 38, 205), | |
| 2775 ("PURPLE4", 85, 26, 139), | |
| 2776 ("PY_COLOR", 240, 255, 210), | |
| 2777 ("RED", 255, 0, 0), | |
| 2778 ("RED1", 255, 0, 0), | |
| 2779 ("RED2", 238, 0, 0), | |
| 2780 ("RED3", 205, 0, 0), | |
| 2781 ("RED4", 139, 0, 0), | |
| 2782 ("ROSYBROWN", 188, 143, 143), | |
| 2783 ("ROSYBROWN1", 255, 193, 193), | |
| 2784 ("ROSYBROWN2", 238, 180, 180), | |
| 2785 ("ROSYBROWN3", 205, 155, 155), | |
| 2786 ("ROSYBROWN4", 139, 105, 105), | |
| 2787 ("ROYALBLUE", 65, 105, 225), | |
| 2788 ("ROYALBLUE1", 72, 118, 255), | |
| 2789 ("ROYALBLUE2", 67, 110, 238), | |
| 2790 ("ROYALBLUE3", 58, 95, 205), | |
| 2791 ("ROYALBLUE4", 39, 64, 139), | |
| 2792 ("SADDLEBROWN", 139, 69, 19), | |
| 2793 ("SALMON", 250, 128, 114), | |
| 2794 ("SALMON1", 255, 140, 105), | |
| 2795 ("SALMON2", 238, 130, 98), | |
| 2796 ("SALMON3", 205, 112, 84), | |
| 2797 ("SALMON4", 139, 76, 57), | |
| 2798 ("SANDYBROWN", 244, 164, 96), | |
| 2799 ("SEAGREEN", 46, 139, 87), | |
| 2800 ("SEAGREEN1", 84, 255, 159), | |
| 2801 ("SEAGREEN2", 78, 238, 148), | |
| 2802 ("SEAGREEN3", 67, 205, 128), | |
| 2803 ("SEAGREEN4", 46, 139, 87), | |
| 2804 ("SEASHELL", 255, 245, 238), | |
| 2805 ("SEASHELL1", 255, 245, 238), | |
| 2806 ("SEASHELL2", 238, 229, 222), | |
| 2807 ("SEASHELL3", 205, 197, 191), | |
| 2808 ("SEASHELL4", 139, 134, 130), | |
| 2809 ("SIENNA", 160, 82, 45), | |
| 2810 ("SIENNA1", 255, 130, 71), | |
| 2811 ("SIENNA2", 238, 121, 66), | |
| 2812 ("SIENNA3", 205, 104, 57), | |
| 2813 ("SIENNA4", 139, 71, 38), | |
| 2814 ("SKYBLUE", 135, 206, 235), | |
| 2815 ("SKYBLUE1", 135, 206, 255), | |
| 2816 ("SKYBLUE2", 126, 192, 238), | |
| 2817 ("SKYBLUE3", 108, 166, 205), | |
| 2818 ("SKYBLUE4", 74, 112, 139), | |
| 2819 ("SLATEBLUE", 106, 90, 205), | |
| 2820 ("SLATEBLUE1", 131, 111, 255), | |
| 2821 ("SLATEBLUE2", 122, 103, 238), | |
| 2822 ("SLATEBLUE3", 105, 89, 205), | |
| 2823 ("SLATEBLUE4", 71, 60, 139), | |
| 2824 ("SLATEGRAY", 112, 128, 144), | |
| 2825 ("SNOW", 255, 250, 250), | |
| 2826 ("SNOW1", 255, 250, 250), | |
| 2827 ("SNOW2", 238, 233, 233), | |
| 2828 ("SNOW3", 205, 201, 201), | |
| 2829 ("SNOW4", 139, 137, 137), | |
| 2830 ("SPRINGGREEN", 0, 255, 127), | |
| 2831 ("SPRINGGREEN1", 0, 255, 127), | |
| 2832 ("SPRINGGREEN2", 0, 238, 118), | |
| 2833 ("SPRINGGREEN3", 0, 205, 102), | |
| 2834 ("SPRINGGREEN4", 0, 139, 69), | |
| 2835 ("STEELBLUE", 70, 130, 180), | |
| 2836 ("STEELBLUE1", 99, 184, 255), | |
| 2837 ("STEELBLUE2", 92, 172, 238), | |
| 2838 ("STEELBLUE3", 79, 148, 205), | |
| 2839 ("STEELBLUE4", 54, 100, 139), | |
| 2840 ("TAN", 210, 180, 140), | |
| 2841 ("TAN1", 255, 165, 79), | |
| 2842 ("TAN2", 238, 154, 73), | |
| 2843 ("TAN3", 205, 133, 63), | |
| 2844 ("TAN4", 139, 90, 43), | |
| 2845 ("THISTLE", 216, 191, 216), | |
| 2846 ("THISTLE1", 255, 225, 255), | |
| 2847 ("THISTLE2", 238, 210, 238), | |
| 2848 ("THISTLE3", 205, 181, 205), | |
| 2849 ("THISTLE4", 139, 123, 139), | |
| 2850 ("TOMATO", 255, 99, 71), | |
| 2851 ("TOMATO1", 255, 99, 71), | |
| 2852 ("TOMATO2", 238, 92, 66), | |
| 2853 ("TOMATO3", 205, 79, 57), | |
| 2854 ("TOMATO4", 139, 54, 38), | |
| 2855 ("TURQUOISE", 64, 224, 208), | |
| 2856 ("TURQUOISE1", 0, 245, 255), | |
| 2857 ("TURQUOISE2", 0, 229, 238), | |
| 2858 ("TURQUOISE3", 0, 197, 205), | |
| 2859 ("TURQUOISE4", 0, 134, 139), | |
| 2860 ("VIOLET", 238, 130, 238), | |
| 2861 ("VIOLETRED", 208, 32, 144), | |
| 2862 ("VIOLETRED1", 255, 62, 150), | |
| 2863 ("VIOLETRED2", 238, 58, 140), | |
| 2864 ("VIOLETRED3", 205, 50, 120), | |
| 2865 ("VIOLETRED4", 139, 34, 82), | |
| 2866 ("WHEAT", 245, 222, 179), | |
| 2867 ("WHEAT1", 255, 231, 186), | |
| 2868 ("WHEAT2", 238, 216, 174), | |
| 2869 ("WHEAT3", 205, 186, 150), | |
| 2870 ("WHEAT4", 139, 126, 102), | |
| 2871 ("WHITE", 255, 255, 255), | |
| 2872 ("WHITESMOKE", 245, 245, 245), | |
| 2873 ("YELLOW", 255, 255, 0), | |
| 2874 ("YELLOW1", 255, 255, 0), | |
| 2875 ("YELLOW2", 238, 238, 0), | |
| 2876 ("YELLOW3", 205, 205, 0), | |
| 2877 ("YELLOW4", 139, 139, 0), | |
| 2878 ("YELLOWGREEN", 154, 205, 50), | |
| 2879 ] | |
| 2880 | |
| 2881 | |
| 2882 def getColorInfoDict() -> dict: | |
| 2883 d = {} | |
| 2884 for item in getColorInfoList(): | |
| 2885 d[item[0].lower()] = item[1:] | |
| 2886 return d | |
| 2887 | |
| 2888 | |
| 2889 def getColor(name: str) -> tuple: | |
| 2890 """Retrieve RGB color in PDF format by name. | |
| 2891 | |
| 2892 Returns: | |
| 2893 a triple of floats in range 0 to 1. In case of name-not-found, "white" is returned. | |
| 2894 """ | |
| 2895 try: | |
| 2896 c = getColorInfoList()[getColorList().index(name.upper())] | |
| 2897 return (c[1] / 255.0, c[2] / 255.0, c[3] / 255.0) | |
| 2898 except: | |
| 2899 return (1, 1, 1) | |
| 2900 | |
| 2901 | |
| 2902 def getColorHSV(name: str) -> tuple: | |
| 2903 """Retrieve the hue, saturation, value triple of a color name. | |
| 2904 | |
| 2905 Returns: | |
| 2906 a triple (degree, percent, percent). If not found (-1, -1, -1) is returned. | |
| 2907 """ | |
| 2908 try: | |
| 2909 x = getColorInfoList()[getColorList().index(name.upper())] | |
| 2910 except: | |
| 2911 return (-1, -1, -1) | |
| 2912 | |
| 2913 r = x[1] / 255.0 | |
| 2914 g = x[2] / 255.0 | |
| 2915 b = x[3] / 255.0 | |
| 2916 cmax = max(r, g, b) | |
| 2917 V = round(cmax * 100, 1) | |
| 2918 cmin = min(r, g, b) | |
| 2919 delta = cmax - cmin | |
| 2920 if delta == 0: | |
| 2921 hue = 0 | |
| 2922 elif cmax == r: | |
| 2923 hue = 60.0 * (((g - b) / delta) % 6) | |
| 2924 elif cmax == g: | |
| 2925 hue = 60.0 * (((b - r) / delta) + 2) | |
| 2926 else: | |
| 2927 hue = 60.0 * (((r - g) / delta) + 4) | |
| 2928 | |
| 2929 H = int(round(hue)) | |
| 2930 | |
| 2931 if cmax == 0: | |
| 2932 sat = 0 | |
| 2933 else: | |
| 2934 sat = delta / cmax | |
| 2935 S = int(round(sat * 100)) | |
| 2936 | |
| 2937 return (H, S, V) | |
| 2938 | |
| 2939 | |
| 2940 def _get_font_properties(doc: Document, xref: int) -> tuple: | |
| 2941 fontname, ext, stype, buffer = doc.extract_font(xref) | |
| 2942 asc = 0.8 | |
| 2943 dsc = -0.2 | |
| 2944 if ext == "": | |
| 2945 return fontname, ext, stype, asc, dsc | |
| 2946 | |
| 2947 if buffer: | |
| 2948 try: | |
| 2949 font = Font(fontbuffer=buffer) | |
| 2950 asc = font.ascender | |
| 2951 dsc = font.descender | |
| 2952 bbox = font.bbox | |
| 2953 if asc - dsc < 1: | |
| 2954 if bbox.y0 < dsc: | |
| 2955 dsc = bbox.y0 | |
| 2956 asc = 1 - dsc | |
| 2957 except: | |
| 2958 asc *= 1.2 | |
| 2959 dsc *= 1.2 | |
| 2960 return fontname, ext, stype, asc, dsc | |
| 2961 if ext != "n/a": | |
| 2962 try: | |
| 2963 font = Font(fontname) | |
| 2964 asc = font.ascender | |
| 2965 dsc = font.descender | |
| 2966 except: | |
| 2967 asc *= 1.2 | |
| 2968 dsc *= 1.2 | |
| 2969 else: | |
| 2970 asc *= 1.2 | |
| 2971 dsc *= 1.2 | |
| 2972 return fontname, ext, stype, asc, dsc | |
| 2973 | |
| 2974 | |
| 2975 def get_char_widths( | |
| 2976 doc: Document, xref: int, limit: int = 256, idx: int = 0, fontdict: OptDict = None | |
| 2977 ) -> list: | |
| 2978 """Get list of glyph information of a font. | |
| 2979 | |
| 2980 Notes: | |
| 2981 Must be provided by its XREF number. If we already dealt with the | |
| 2982 font, it will be recorded in doc.FontInfos. Otherwise we insert an | |
| 2983 entry there. | |
| 2984 Finally we return the glyphs for the font. This is a list of | |
| 2985 (glyph, width) where glyph is an integer controlling the char | |
| 2986 appearance, and width is a float controlling the char's spacing: | |
| 2987 width * fontsize is the actual space. | |
| 2988 For 'simple' fonts, glyph == ord(char) will usually be true. | |
| 2989 Exceptions are 'Symbol' and 'ZapfDingbats'. We are providing data for these directly here. | |
| 2990 """ | |
| 2991 fontinfo = CheckFontInfo(doc, xref) | |
| 2992 if fontinfo is None: # not recorded yet: create it | |
| 2993 if fontdict is None: | |
| 2994 name, ext, stype, asc, dsc = _get_font_properties(doc, xref) | |
| 2995 fontdict = { | |
| 2996 "name": name, | |
| 2997 "type": stype, | |
| 2998 "ext": ext, | |
| 2999 "ascender": asc, | |
| 3000 "descender": dsc, | |
| 3001 } | |
| 3002 else: | |
| 3003 name = fontdict["name"] | |
| 3004 ext = fontdict["ext"] | |
| 3005 stype = fontdict["type"] | |
| 3006 ordering = fontdict["ordering"] | |
| 3007 simple = fontdict["simple"] | |
| 3008 | |
| 3009 if ext == "": | |
| 3010 raise ValueError("xref is not a font") | |
| 3011 | |
| 3012 # check for 'simple' fonts | |
| 3013 if stype in ("Type1", "MMType1", "TrueType"): | |
| 3014 simple = True | |
| 3015 else: | |
| 3016 simple = False | |
| 3017 | |
| 3018 # check for CJK fonts | |
| 3019 if name in ("Fangti", "Ming"): | |
| 3020 ordering = 0 | |
| 3021 elif name in ("Heiti", "Song"): | |
| 3022 ordering = 1 | |
| 3023 elif name in ("Gothic", "Mincho"): | |
| 3024 ordering = 2 | |
| 3025 elif name in ("Dotum", "Batang"): | |
| 3026 ordering = 3 | |
| 3027 else: | |
| 3028 ordering = -1 | |
| 3029 | |
| 3030 fontdict["simple"] = simple | |
| 3031 | |
| 3032 if name == "ZapfDingbats": | |
| 3033 glyphs = zapf_glyphs | |
| 3034 elif name == "Symbol": | |
| 3035 glyphs = symbol_glyphs | |
| 3036 else: | |
| 3037 glyphs = None | |
| 3038 | |
| 3039 fontdict["glyphs"] = glyphs | |
| 3040 fontdict["ordering"] = ordering | |
| 3041 fontinfo = [xref, fontdict] | |
| 3042 doc.FontInfos.append(fontinfo) | |
| 3043 else: | |
| 3044 fontdict = fontinfo[1] | |
| 3045 glyphs = fontdict["glyphs"] | |
| 3046 simple = fontdict["simple"] | |
| 3047 ordering = fontdict["ordering"] | |
| 3048 | |
| 3049 if glyphs is None: | |
| 3050 oldlimit = 0 | |
| 3051 else: | |
| 3052 oldlimit = len(glyphs) | |
| 3053 | |
| 3054 mylimit = max(256, limit) | |
| 3055 | |
| 3056 if mylimit <= oldlimit: | |
| 3057 return glyphs | |
| 3058 | |
| 3059 if ordering < 0: # not a CJK font | |
| 3060 glyphs = doc._get_char_widths( | |
| 3061 xref, fontdict["name"], fontdict["ext"], fontdict["ordering"], mylimit, idx | |
| 3062 ) | |
| 3063 else: # CJK fonts use char codes and width = 1 | |
| 3064 glyphs = None | |
| 3065 | |
| 3066 fontdict["glyphs"] = glyphs | |
| 3067 fontinfo[1] = fontdict | |
| 3068 UpdateFontInfo(doc, fontinfo) | |
| 3069 | |
| 3070 return glyphs | |
| 3071 | |
| 3072 | |
| 3073 class Shape(object): | |
| 3074 """Create a new shape.""" | |
| 3075 | |
| 3076 @staticmethod | |
| 3077 def horizontal_angle(C, P): | |
| 3078 """Return the angle to the horizontal for the connection from C to P. | |
| 3079 This uses the arcus sine function and resolves its inherent ambiguity by | |
| 3080 looking up in which quadrant vector S = P - C is located. | |
| 3081 """ | |
| 3082 S = Point(P - C).unit # unit vector 'C' -> 'P' | |
| 3083 alfa = math.asin(abs(S.y)) # absolute angle from horizontal | |
| 3084 if S.x < 0: # make arcsin result unique | |
| 3085 if S.y <= 0: # bottom-left | |
| 3086 alfa = -(math.pi - alfa) | |
| 3087 else: # top-left | |
| 3088 alfa = math.pi - alfa | |
| 3089 else: | |
| 3090 if S.y >= 0: # top-right | |
| 3091 pass | |
| 3092 else: # bottom-right | |
| 3093 alfa = -alfa | |
| 3094 return alfa | |
| 3095 | |
| 3096 def __init__(self, page: Page): | |
| 3097 CheckParent(page) | |
| 3098 self.page = page | |
| 3099 self.doc = page.parent | |
| 3100 if not self.doc.is_pdf: | |
| 3101 raise ValueError("is no PDF") | |
| 3102 self.height = page.mediabox_size.y | |
| 3103 self.width = page.mediabox_size.x | |
| 3104 self.x = page.cropbox_position.x | |
| 3105 self.y = page.cropbox_position.y | |
| 3106 | |
| 3107 self.pctm = page.transformation_matrix # page transf. matrix | |
| 3108 self.ipctm = ~self.pctm # inverted transf. matrix | |
| 3109 | |
| 3110 self.draw_cont = "" | |
| 3111 self.text_cont = "" | |
| 3112 self.totalcont = "" | |
| 3113 self.lastPoint = None | |
| 3114 self.rect = None | |
| 3115 | |
| 3116 def updateRect(self, x): | |
| 3117 if self.rect is None: | |
| 3118 if len(x) == 2: | |
| 3119 self.rect = Rect(x, x) | |
| 3120 else: | |
| 3121 self.rect = Rect(x) | |
| 3122 | |
| 3123 else: | |
| 3124 if len(x) == 2: | |
| 3125 x = Point(x) | |
| 3126 self.rect.x0 = min(self.rect.x0, x.x) | |
| 3127 self.rect.y0 = min(self.rect.y0, x.y) | |
| 3128 self.rect.x1 = max(self.rect.x1, x.x) | |
| 3129 self.rect.y1 = max(self.rect.y1, x.y) | |
| 3130 else: | |
| 3131 x = Rect(x) | |
| 3132 self.rect.x0 = min(self.rect.x0, x.x0) | |
| 3133 self.rect.y0 = min(self.rect.y0, x.y0) | |
| 3134 self.rect.x1 = max(self.rect.x1, x.x1) | |
| 3135 self.rect.y1 = max(self.rect.y1, x.y1) | |
| 3136 | |
| 3137 def draw_line(self, p1: point_like, p2: point_like) -> Point: | |
| 3138 """Draw a line between two points.""" | |
| 3139 p1 = Point(p1) | |
| 3140 p2 = Point(p2) | |
| 3141 if not (self.lastPoint == p1): | |
| 3142 self.draw_cont += "%g %g m\n" % JM_TUPLE(p1 * self.ipctm) | |
| 3143 self.lastPoint = p1 | |
| 3144 self.updateRect(p1) | |
| 3145 | |
| 3146 self.draw_cont += "%g %g l\n" % JM_TUPLE(p2 * self.ipctm) | |
| 3147 self.updateRect(p2) | |
| 3148 self.lastPoint = p2 | |
| 3149 return self.lastPoint | |
| 3150 | |
| 3151 def draw_polyline(self, points: list) -> Point: | |
| 3152 """Draw several connected line segments.""" | |
| 3153 for i, p in enumerate(points): | |
| 3154 if i == 0: | |
| 3155 if not (self.lastPoint == Point(p)): | |
| 3156 self.draw_cont += "%g %g m\n" % JM_TUPLE(Point(p) * self.ipctm) | |
| 3157 self.lastPoint = Point(p) | |
| 3158 else: | |
| 3159 self.draw_cont += "%g %g l\n" % JM_TUPLE(Point(p) * self.ipctm) | |
| 3160 self.updateRect(p) | |
| 3161 | |
| 3162 self.lastPoint = Point(points[-1]) | |
| 3163 return self.lastPoint | |
| 3164 | |
| 3165 def draw_bezier( | |
| 3166 self, | |
| 3167 p1: point_like, | |
| 3168 p2: point_like, | |
| 3169 p3: point_like, | |
| 3170 p4: point_like, | |
| 3171 ) -> Point: | |
| 3172 """Draw a standard cubic Bezier curve.""" | |
| 3173 p1 = Point(p1) | |
| 3174 p2 = Point(p2) | |
| 3175 p3 = Point(p3) | |
| 3176 p4 = Point(p4) | |
| 3177 if not (self.lastPoint == p1): | |
| 3178 self.draw_cont += "%g %g m\n" % JM_TUPLE(p1 * self.ipctm) | |
| 3179 self.draw_cont += "%g %g %g %g %g %g c\n" % JM_TUPLE( | |
| 3180 list(p2 * self.ipctm) + list(p3 * self.ipctm) + list(p4 * self.ipctm) | |
| 3181 ) | |
| 3182 self.updateRect(p1) | |
| 3183 self.updateRect(p2) | |
| 3184 self.updateRect(p3) | |
| 3185 self.updateRect(p4) | |
| 3186 self.lastPoint = p4 | |
| 3187 return self.lastPoint | |
| 3188 | |
| 3189 def draw_oval(self, tetra: typing.Union[quad_like, rect_like]) -> Point: | |
| 3190 """Draw an ellipse inside a tetrapod.""" | |
| 3191 if len(tetra) != 4: | |
| 3192 raise ValueError("invalid arg length") | |
| 3193 if hasattr(tetra[0], "__float__"): | |
| 3194 q = Rect(tetra).quad | |
| 3195 else: | |
| 3196 q = Quad(tetra) | |
| 3197 | |
| 3198 mt = q.ul + (q.ur - q.ul) * 0.5 | |
| 3199 mr = q.ur + (q.lr - q.ur) * 0.5 | |
| 3200 mb = q.ll + (q.lr - q.ll) * 0.5 | |
| 3201 ml = q.ul + (q.ll - q.ul) * 0.5 | |
| 3202 if not (self.lastPoint == ml): | |
| 3203 self.draw_cont += "%g %g m\n" % JM_TUPLE(ml * self.ipctm) | |
| 3204 self.lastPoint = ml | |
| 3205 self.draw_curve(ml, q.ll, mb) | |
| 3206 self.draw_curve(mb, q.lr, mr) | |
| 3207 self.draw_curve(mr, q.ur, mt) | |
| 3208 self.draw_curve(mt, q.ul, ml) | |
| 3209 self.updateRect(q.rect) | |
| 3210 self.lastPoint = ml | |
| 3211 return self.lastPoint | |
| 3212 | |
| 3213 def draw_circle(self, center: point_like, radius: float) -> Point: | |
| 3214 """Draw a circle given its center and radius.""" | |
| 3215 if not radius > EPSILON: | |
| 3216 raise ValueError("radius must be positive") | |
| 3217 center = Point(center) | |
| 3218 p1 = center - (radius, 0) | |
| 3219 return self.draw_sector(center, p1, 360, fullSector=False) | |
| 3220 | |
| 3221 def draw_curve( | |
| 3222 self, | |
| 3223 p1: point_like, | |
| 3224 p2: point_like, | |
| 3225 p3: point_like, | |
| 3226 ) -> Point: | |
| 3227 """Draw a curve between points using one control point.""" | |
| 3228 kappa = 0.55228474983 | |
| 3229 p1 = Point(p1) | |
| 3230 p2 = Point(p2) | |
| 3231 p3 = Point(p3) | |
| 3232 k1 = p1 + (p2 - p1) * kappa | |
| 3233 k2 = p3 + (p2 - p3) * kappa | |
| 3234 return self.draw_bezier(p1, k1, k2, p3) | |
| 3235 | |
| 3236 def draw_sector( | |
| 3237 self, | |
| 3238 center: point_like, | |
| 3239 point: point_like, | |
| 3240 beta: float, | |
| 3241 fullSector: bool = True, | |
| 3242 ) -> Point: | |
| 3243 """Draw a circle sector.""" | |
| 3244 center = Point(center) | |
| 3245 point = Point(point) | |
| 3246 l3 = "%g %g m\n" | |
| 3247 l4 = "%g %g %g %g %g %g c\n" | |
| 3248 l5 = "%g %g l\n" | |
| 3249 betar = math.radians(-beta) | |
| 3250 w360 = math.radians(math.copysign(360, betar)) * (-1) | |
| 3251 w90 = math.radians(math.copysign(90, betar)) | |
| 3252 w45 = w90 / 2 | |
| 3253 while abs(betar) > 2 * math.pi: | |
| 3254 betar += w360 # bring angle below 360 degrees | |
| 3255 if not (self.lastPoint == point): | |
| 3256 self.draw_cont += l3 % JM_TUPLE(point * self.ipctm) | |
| 3257 self.lastPoint = point | |
| 3258 Q = Point(0, 0) # just make sure it exists | |
| 3259 C = center | |
| 3260 P = point | |
| 3261 S = P - C # vector 'center' -> 'point' | |
| 3262 rad = abs(S) # circle radius | |
| 3263 | |
| 3264 if not rad > EPSILON: | |
| 3265 raise ValueError("radius must be positive") | |
| 3266 | |
| 3267 alfa = self.horizontal_angle(center, point) | |
| 3268 while abs(betar) > abs(w90): # draw 90 degree arcs | |
| 3269 q1 = C.x + math.cos(alfa + w90) * rad | |
| 3270 q2 = C.y + math.sin(alfa + w90) * rad | |
| 3271 Q = Point(q1, q2) # the arc's end point | |
| 3272 r1 = C.x + math.cos(alfa + w45) * rad / math.cos(w45) | |
| 3273 r2 = C.y + math.sin(alfa + w45) * rad / math.cos(w45) | |
| 3274 R = Point(r1, r2) # crossing point of tangents | |
| 3275 kappah = (1 - math.cos(w45)) * 4 / 3 / abs(R - Q) | |
| 3276 kappa = kappah * abs(P - Q) | |
| 3277 cp1 = P + (R - P) * kappa # control point 1 | |
| 3278 cp2 = Q + (R - Q) * kappa # control point 2 | |
| 3279 self.draw_cont += l4 % JM_TUPLE( | |
| 3280 list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm) | |
| 3281 ) | |
| 3282 | |
| 3283 betar -= w90 # reduce parm angle by 90 deg | |
| 3284 alfa += w90 # advance start angle by 90 deg | |
| 3285 P = Q # advance to arc end point | |
| 3286 # draw (remaining) arc | |
| 3287 if abs(betar) > 1e-3: # significant degrees left? | |
| 3288 beta2 = betar / 2 | |
| 3289 q1 = C.x + math.cos(alfa + betar) * rad | |
| 3290 q2 = C.y + math.sin(alfa + betar) * rad | |
| 3291 Q = Point(q1, q2) # the arc's end point | |
| 3292 r1 = C.x + math.cos(alfa + beta2) * rad / math.cos(beta2) | |
| 3293 r2 = C.y + math.sin(alfa + beta2) * rad / math.cos(beta2) | |
| 3294 R = Point(r1, r2) # crossing point of tangents | |
| 3295 # kappa height is 4/3 of segment height | |
| 3296 kappah = (1 - math.cos(beta2)) * 4 / 3 / abs(R - Q) # kappa height | |
| 3297 kappa = kappah * abs(P - Q) / (1 - math.cos(betar)) | |
| 3298 cp1 = P + (R - P) * kappa # control point 1 | |
| 3299 cp2 = Q + (R - Q) * kappa # control point 2 | |
| 3300 self.draw_cont += l4 % JM_TUPLE( | |
| 3301 list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm) | |
| 3302 ) | |
| 3303 if fullSector: | |
| 3304 self.draw_cont += l3 % JM_TUPLE(point * self.ipctm) | |
| 3305 self.draw_cont += l5 % JM_TUPLE(center * self.ipctm) | |
| 3306 self.draw_cont += l5 % JM_TUPLE(Q * self.ipctm) | |
| 3307 self.lastPoint = Q | |
| 3308 return self.lastPoint | |
| 3309 | |
| 3310 def draw_rect(self, rect: rect_like, *, radius=None) -> Point: | |
| 3311 """Draw a rectangle. | |
| 3312 | |
| 3313 Args: | |
| 3314 radius: if not None, the rectangle will have rounded corners. | |
| 3315 This is the radius of the curvature, given as percentage of | |
| 3316 the rectangle width or height. Valid are values 0 < v <= 0.5. | |
| 3317 For a sequence of two values, the corners will have different | |
| 3318 radii. Otherwise, the percentage will be computed from the | |
| 3319 shorter side. A value of (0.5, 0.5) will draw an ellipse. | |
| 3320 """ | |
| 3321 r = Rect(rect) | |
| 3322 if radius == None: # standard rectangle | |
| 3323 self.draw_cont += "%g %g %g %g re\n" % JM_TUPLE( | |
| 3324 list(r.bl * self.ipctm) + [r.width, r.height] | |
| 3325 ) | |
| 3326 self.updateRect(r) | |
| 3327 self.lastPoint = r.tl | |
| 3328 return self.lastPoint | |
| 3329 # rounded corners requested. This requires 1 or 2 values, each | |
| 3330 # with 0 < value <= 0.5 | |
| 3331 if hasattr(radius, "__float__"): | |
| 3332 if radius <= 0 or radius > 0.5: | |
| 3333 raise ValueError(f"bad radius value {radius}.") | |
| 3334 d = min(r.width, r.height) * radius | |
| 3335 px = (d, 0) | |
| 3336 py = (0, d) | |
| 3337 elif hasattr(radius, "__len__") and len(radius) == 2: | |
| 3338 rx, ry = radius | |
| 3339 px = (rx * r.width, 0) | |
| 3340 py = (0, ry * r.height) | |
| 3341 if min(rx, ry) <= 0 or max(rx, ry) > 0.5: | |
| 3342 raise ValueError(f"bad radius value {radius}.") | |
| 3343 else: | |
| 3344 raise ValueError(f"bad radius value {radius}.") | |
| 3345 | |
| 3346 lp = self.draw_line(r.tl + py, r.bl - py) | |
| 3347 lp = self.draw_curve(lp, r.bl, r.bl + px) | |
| 3348 | |
| 3349 lp = self.draw_line(lp, r.br - px) | |
| 3350 lp = self.draw_curve(lp, r.br, r.br - py) | |
| 3351 | |
| 3352 lp = self.draw_line(lp, r.tr + py) | |
| 3353 lp = self.draw_curve(lp, r.tr, r.tr - px) | |
| 3354 | |
| 3355 lp = self.draw_line(lp, r.tl + px) | |
| 3356 self.lastPoint = self.draw_curve(lp, r.tl, r.tl + py) | |
| 3357 | |
| 3358 self.updateRect(r) | |
| 3359 return self.lastPoint | |
| 3360 | |
| 3361 def draw_quad(self, quad: quad_like) -> Point: | |
| 3362 """Draw a Quad.""" | |
| 3363 q = Quad(quad) | |
| 3364 return self.draw_polyline([q.ul, q.ll, q.lr, q.ur, q.ul]) | |
| 3365 | |
| 3366 def draw_zigzag( | |
| 3367 self, | |
| 3368 p1: point_like, | |
| 3369 p2: point_like, | |
| 3370 breadth: float = 2, | |
| 3371 ) -> Point: | |
| 3372 """Draw a zig-zagged line from p1 to p2.""" | |
| 3373 p1 = Point(p1) | |
| 3374 p2 = Point(p2) | |
| 3375 S = p2 - p1 # vector start - end | |
| 3376 rad = abs(S) # distance of points | |
| 3377 cnt = 4 * int(round(rad / (4 * breadth), 0)) # always take full phases | |
| 3378 if cnt < 4: | |
| 3379 raise ValueError("points too close") | |
| 3380 mb = rad / cnt # revised breadth | |
| 3381 matrix = Matrix(util_hor_matrix(p1, p2)) # normalize line to x-axis | |
| 3382 i_mat = ~matrix # get original position | |
| 3383 points = [] # stores edges | |
| 3384 for i in range(1, cnt): | |
| 3385 if i % 4 == 1: # point "above" connection | |
| 3386 p = Point(i, -1) * mb | |
| 3387 elif i % 4 == 3: # point "below" connection | |
| 3388 p = Point(i, 1) * mb | |
| 3389 else: # ignore others | |
| 3390 continue | |
| 3391 points.append(p * i_mat) | |
| 3392 self.draw_polyline([p1] + points + [p2]) # add start and end points | |
| 3393 return p2 | |
| 3394 | |
| 3395 def draw_squiggle( | |
| 3396 self, | |
| 3397 p1: point_like, | |
| 3398 p2: point_like, | |
| 3399 breadth=2, | |
| 3400 ) -> Point: | |
| 3401 """Draw a squiggly line from p1 to p2.""" | |
| 3402 p1 = Point(p1) | |
| 3403 p2 = Point(p2) | |
| 3404 S = p2 - p1 # vector start - end | |
| 3405 rad = abs(S) # distance of points | |
| 3406 cnt = 4 * int(round(rad / (4 * breadth), 0)) # always take full phases | |
| 3407 if cnt < 4: | |
| 3408 raise ValueError("points too close") | |
| 3409 mb = rad / cnt # revised breadth | |
| 3410 matrix = Matrix(util_hor_matrix(p1, p2)) # normalize line to x-axis | |
| 3411 i_mat = ~matrix # get original position | |
| 3412 k = 2.4142135623765633 # y of draw_curve helper point | |
| 3413 | |
| 3414 points = [] # stores edges | |
| 3415 for i in range(1, cnt): | |
| 3416 if i % 4 == 1: # point "above" connection | |
| 3417 p = Point(i, -k) * mb | |
| 3418 elif i % 4 == 3: # point "below" connection | |
| 3419 p = Point(i, k) * mb | |
| 3420 else: # else on connection line | |
| 3421 p = Point(i, 0) * mb | |
| 3422 points.append(p * i_mat) | |
| 3423 | |
| 3424 points = [p1] + points + [p2] | |
| 3425 cnt = len(points) | |
| 3426 i = 0 | |
| 3427 while i + 2 < cnt: | |
| 3428 self.draw_curve(points[i], points[i + 1], points[i + 2]) | |
| 3429 i += 2 | |
| 3430 return p2 | |
| 3431 | |
| 3432 # ============================================================================== | |
| 3433 # Shape.insert_text | |
| 3434 # ============================================================================== | |
| 3435 def insert_text( | |
| 3436 self, | |
| 3437 point: point_like, | |
| 3438 buffer: typing.Union[str, list], | |
| 3439 fontsize: float = 11, | |
| 3440 lineheight: OptFloat = None, | |
| 3441 fontname: str = "helv", | |
| 3442 fontfile: OptStr = None, | |
| 3443 set_simple: bool = 0, | |
| 3444 encoding: int = 0, | |
| 3445 color: OptSeq = None, | |
| 3446 fill: OptSeq = None, | |
| 3447 render_mode: int = 0, | |
| 3448 border_width: float = 0.05, | |
| 3449 rotate: int = 0, | |
| 3450 morph: OptSeq = None, | |
| 3451 stroke_opacity: float = 1, | |
| 3452 fill_opacity: float = 1, | |
| 3453 oc: int = 0, | |
| 3454 ) -> int: | |
| 3455 # ensure 'text' is a list of strings, worth dealing with | |
| 3456 if not bool(buffer): | |
| 3457 return 0 | |
| 3458 | |
| 3459 if type(buffer) not in (list, tuple): | |
| 3460 text = buffer.splitlines() | |
| 3461 else: | |
| 3462 text = buffer | |
| 3463 | |
| 3464 if not len(text) > 0: | |
| 3465 return 0 | |
| 3466 | |
| 3467 point = Point(point) | |
| 3468 try: | |
| 3469 maxcode = max([ord(c) for c in " ".join(text)]) | |
| 3470 except: | |
| 3471 return 0 | |
| 3472 | |
| 3473 # ensure valid 'fontname' | |
| 3474 fname = fontname | |
| 3475 if fname.startswith("/"): | |
| 3476 fname = fname[1:] | |
| 3477 | |
| 3478 xref = self.page.insert_font( | |
| 3479 fontname=fname, fontfile=fontfile, encoding=encoding, set_simple=set_simple | |
| 3480 ) | |
| 3481 fontinfo = CheckFontInfo(self.doc, xref) | |
| 3482 | |
| 3483 fontdict = fontinfo[1] | |
| 3484 ordering = fontdict["ordering"] | |
| 3485 simple = fontdict["simple"] | |
| 3486 bfname = fontdict["name"] | |
| 3487 ascender = fontdict["ascender"] | |
| 3488 descender = fontdict["descender"] | |
| 3489 if lineheight: | |
| 3490 lheight = fontsize * lineheight | |
| 3491 elif ascender - descender <= 1: | |
| 3492 lheight = fontsize * 1.2 | |
| 3493 else: | |
| 3494 lheight = fontsize * (ascender - descender) | |
| 3495 | |
| 3496 if maxcode > 255: | |
| 3497 glyphs = self.doc.get_char_widths(xref, maxcode + 1) | |
| 3498 else: | |
| 3499 glyphs = fontdict["glyphs"] | |
| 3500 | |
| 3501 tab = [] | |
| 3502 for t in text: | |
| 3503 if simple and bfname not in ("Symbol", "ZapfDingbats"): | |
| 3504 g = None | |
| 3505 else: | |
| 3506 g = glyphs | |
| 3507 tab.append(getTJstr(t, g, simple, ordering)) | |
| 3508 text = tab | |
| 3509 | |
| 3510 color_str = ColorCode(color, "c") | |
| 3511 fill_str = ColorCode(fill, "f") | |
| 3512 if not fill and render_mode == 0: # ensure fill color when 0 Tr | |
| 3513 fill = color | |
| 3514 fill_str = ColorCode(color, "f") | |
| 3515 | |
| 3516 morphing = CheckMorph(morph) | |
| 3517 rot = rotate | |
| 3518 if rot % 90 != 0: | |
| 3519 raise ValueError("bad rotate value") | |
| 3520 | |
| 3521 while rot < 0: | |
| 3522 rot += 360 | |
| 3523 rot = rot % 360 # text rotate = 0, 90, 270, 180 | |
| 3524 | |
| 3525 templ1 = "\nq\n%s%sBT\n%s1 0 0 1 %g %g Tm\n/%s %g Tf " | |
| 3526 templ2 = "TJ\n0 -%g TD\n" | |
| 3527 cmp90 = "0 1 -1 0 0 0 cm\n" # rotates 90 deg counter-clockwise | |
| 3528 cmm90 = "0 -1 1 0 0 0 cm\n" # rotates 90 deg clockwise | |
| 3529 cm180 = "-1 0 0 -1 0 0 cm\n" # rotates by 180 deg. | |
| 3530 height = self.height | |
| 3531 width = self.width | |
| 3532 | |
| 3533 # setting up for standard rotation directions | |
| 3534 # case rotate = 0 | |
| 3535 if morphing: | |
| 3536 m1 = Matrix(1, 0, 0, 1, morph[0].x + self.x, height - morph[0].y - self.y) | |
| 3537 mat = ~m1 * morph[1] * m1 | |
| 3538 cm = "%g %g %g %g %g %g cm\n" % JM_TUPLE(mat) | |
| 3539 else: | |
| 3540 cm = "" | |
| 3541 top = height - point.y - self.y # start of 1st char | |
| 3542 left = point.x + self.x # start of 1. char | |
| 3543 space = top # space available | |
| 3544 headroom = point.y + self.y # distance to page border | |
| 3545 if rot == 90: | |
| 3546 left = height - point.y - self.y | |
| 3547 top = -point.x - self.x | |
| 3548 cm += cmp90 | |
| 3549 space = width - abs(top) | |
| 3550 headroom = point.x + self.x | |
| 3551 | |
| 3552 elif rot == 270: | |
| 3553 left = -height + point.y + self.y | |
| 3554 top = point.x + self.x | |
| 3555 cm += cmm90 | |
| 3556 space = abs(top) | |
| 3557 headroom = width - point.x - self.x | |
| 3558 | |
| 3559 elif rot == 180: | |
| 3560 left = -point.x - self.x | |
| 3561 top = -height + point.y + self.y | |
| 3562 cm += cm180 | |
| 3563 space = abs(point.y + self.y) | |
| 3564 headroom = height - point.y - self.y | |
| 3565 | |
| 3566 optcont = self.page._get_optional_content(oc) | |
| 3567 if optcont != None: | |
| 3568 bdc = "/OC /%s BDC\n" % optcont | |
| 3569 emc = "EMC\n" | |
| 3570 else: | |
| 3571 bdc = emc = "" | |
| 3572 | |
| 3573 alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity) | |
| 3574 if alpha == None: | |
| 3575 alpha = "" | |
| 3576 else: | |
| 3577 alpha = "/%s gs\n" % alpha | |
| 3578 nres = templ1 % (bdc, alpha, cm, left, top, fname, fontsize) | |
| 3579 | |
| 3580 if render_mode > 0: | |
| 3581 nres += "%i Tr " % render_mode | |
| 3582 nres += "%g w " % (border_width * fontsize) | |
| 3583 | |
| 3584 if color is not None: | |
| 3585 nres += color_str | |
| 3586 if fill is not None: | |
| 3587 nres += fill_str | |
| 3588 | |
| 3589 # ========================================================================= | |
| 3590 # start text insertion | |
| 3591 # ========================================================================= | |
| 3592 nres += text[0] | |
| 3593 nlines = 1 # set output line counter | |
| 3594 if len(text) > 1: | |
| 3595 nres += templ2 % lheight # line 1 | |
| 3596 else: | |
| 3597 nres += templ2[:2] | |
| 3598 for i in range(1, len(text)): | |
| 3599 if space < lheight: | |
| 3600 break # no space left on page | |
| 3601 if i > 1: | |
| 3602 nres += "\nT* " | |
| 3603 nres += text[i] + templ2[:2] | |
| 3604 space -= lheight | |
| 3605 nlines += 1 | |
| 3606 | |
| 3607 nres += "\nET\n%sQ\n" % emc | |
| 3608 | |
| 3609 # ===================================================================== | |
| 3610 # end of text insertion | |
| 3611 # ===================================================================== | |
| 3612 # update the /Contents object | |
| 3613 self.text_cont += nres | |
| 3614 return nlines | |
| 3615 | |
| 3616 # ========================================================================= | |
| 3617 # Shape.insert_textbox | |
| 3618 # ========================================================================= | |
| 3619 def insert_textbox( | |
| 3620 self, | |
| 3621 rect: rect_like, | |
| 3622 buffer: typing.Union[str, list], | |
| 3623 fontname: OptStr = "helv", | |
| 3624 fontfile: OptStr = None, | |
| 3625 fontsize: float = 11, | |
| 3626 lineheight: OptFloat = None, | |
| 3627 set_simple: bool = 0, | |
| 3628 encoding: int = 0, | |
| 3629 color: OptSeq = None, | |
| 3630 fill: OptSeq = None, | |
| 3631 expandtabs: int = 1, | |
| 3632 border_width: float = 0.05, | |
| 3633 align: int = 0, | |
| 3634 render_mode: int = 0, | |
| 3635 rotate: int = 0, | |
| 3636 morph: OptSeq = None, | |
| 3637 stroke_opacity: float = 1, | |
| 3638 fill_opacity: float = 1, | |
| 3639 oc: int = 0, | |
| 3640 ) -> float: | |
| 3641 """Insert text into a given rectangle. | |
| 3642 | |
| 3643 Args: | |
| 3644 rect -- the textbox to fill | |
| 3645 buffer -- text to be inserted | |
| 3646 fontname -- a Base-14 font, font name or '/name' | |
| 3647 fontfile -- name of a font file | |
| 3648 fontsize -- font size | |
| 3649 lineheight -- overwrite the font property | |
| 3650 color -- RGB stroke color triple | |
| 3651 fill -- RGB fill color triple | |
| 3652 render_mode -- text rendering control | |
| 3653 border_width -- thickness of glyph borders as percentage of fontsize | |
| 3654 expandtabs -- handles tabulators with string function | |
| 3655 align -- left, center, right, justified | |
| 3656 rotate -- 0, 90, 180, or 270 degrees | |
| 3657 morph -- morph box with a matrix and a fixpoint | |
| 3658 Returns: | |
| 3659 unused or deficit rectangle area (float) | |
| 3660 """ | |
| 3661 rect = Rect(rect) | |
| 3662 if rect.is_empty or rect.is_infinite: | |
| 3663 raise ValueError("text box must be finite and not empty") | |
| 3664 | |
| 3665 color_str = ColorCode(color, "c") | |
| 3666 fill_str = ColorCode(fill, "f") | |
| 3667 if fill is None and render_mode == 0: # ensure fill color for 0 Tr | |
| 3668 fill = color | |
| 3669 fill_str = ColorCode(color, "f") | |
| 3670 | |
| 3671 optcont = self.page._get_optional_content(oc) | |
| 3672 if optcont != None: | |
| 3673 bdc = "/OC /%s BDC\n" % optcont | |
| 3674 emc = "EMC\n" | |
| 3675 else: | |
| 3676 bdc = emc = "" | |
| 3677 | |
| 3678 # determine opacity / transparency | |
| 3679 alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity) | |
| 3680 if alpha == None: | |
| 3681 alpha = "" | |
| 3682 else: | |
| 3683 alpha = "/%s gs\n" % alpha | |
| 3684 | |
| 3685 if rotate % 90 != 0: | |
| 3686 raise ValueError("rotate must be multiple of 90") | |
| 3687 | |
| 3688 rot = rotate | |
| 3689 while rot < 0: | |
| 3690 rot += 360 | |
| 3691 rot = rot % 360 | |
| 3692 | |
| 3693 # is buffer worth of dealing with? | |
| 3694 if not bool(buffer): | |
| 3695 return rect.height if rot in (0, 180) else rect.width | |
| 3696 | |
| 3697 cmp90 = "0 1 -1 0 0 0 cm\n" # rotates counter-clockwise | |
| 3698 cmm90 = "0 -1 1 0 0 0 cm\n" # rotates clockwise | |
| 3699 cm180 = "-1 0 0 -1 0 0 cm\n" # rotates by 180 deg. | |
| 3700 height = self.height | |
| 3701 | |
| 3702 fname = fontname | |
| 3703 if fname.startswith("/"): | |
| 3704 fname = fname[1:] | |
| 3705 | |
| 3706 xref = self.page.insert_font( | |
| 3707 fontname=fname, fontfile=fontfile, encoding=encoding, set_simple=set_simple | |
| 3708 ) | |
| 3709 fontinfo = CheckFontInfo(self.doc, xref) | |
| 3710 | |
| 3711 fontdict = fontinfo[1] | |
| 3712 ordering = fontdict["ordering"] | |
| 3713 simple = fontdict["simple"] | |
| 3714 glyphs = fontdict["glyphs"] | |
| 3715 bfname = fontdict["name"] | |
| 3716 ascender = fontdict["ascender"] | |
| 3717 descender = fontdict["descender"] | |
| 3718 | |
| 3719 if lineheight: | |
| 3720 lheight_factor = lineheight | |
| 3721 elif ascender - descender <= 1: | |
| 3722 lheight_factor = 1.2 | |
| 3723 else: | |
| 3724 lheight_factor = ascender - descender | |
| 3725 lheight = fontsize * lheight_factor | |
| 3726 | |
| 3727 # create a list from buffer, split into its lines | |
| 3728 if type(buffer) in (list, tuple): | |
| 3729 t0 = "\n".join(buffer) | |
| 3730 else: | |
| 3731 t0 = buffer | |
| 3732 | |
| 3733 maxcode = max([ord(c) for c in t0]) | |
| 3734 # replace invalid char codes for simple fonts | |
| 3735 if simple and maxcode > 255: | |
| 3736 t0 = "".join([c if ord(c) < 256 else "?" for c in t0]) | |
| 3737 | |
| 3738 t0 = t0.splitlines() | |
| 3739 | |
| 3740 glyphs = self.doc.get_char_widths(xref, maxcode + 1) | |
| 3741 if simple and bfname not in ("Symbol", "ZapfDingbats"): | |
| 3742 tj_glyphs = None | |
| 3743 else: | |
| 3744 tj_glyphs = glyphs | |
| 3745 | |
| 3746 # ---------------------------------------------------------------------- | |
| 3747 # calculate pixel length of a string | |
| 3748 # ---------------------------------------------------------------------- | |
| 3749 def pixlen(x): | |
| 3750 """Calculate pixel length of x.""" | |
| 3751 if ordering < 0: | |
| 3752 return sum([glyphs[ord(c)][1] for c in x]) * fontsize | |
| 3753 else: | |
| 3754 return len(x) * fontsize | |
| 3755 | |
| 3756 # --------------------------------------------------------------------- | |
| 3757 | |
| 3758 if ordering < 0: | |
| 3759 blen = glyphs[32][1] * fontsize # pixel size of space character | |
| 3760 else: | |
| 3761 blen = fontsize | |
| 3762 | |
| 3763 text = "" # output buffer | |
| 3764 | |
| 3765 if CheckMorph(morph): | |
| 3766 m1 = Matrix( | |
| 3767 1, 0, 0, 1, morph[0].x + self.x, self.height - morph[0].y - self.y | |
| 3768 ) | |
| 3769 mat = ~m1 * morph[1] * m1 | |
| 3770 cm = "%g %g %g %g %g %g cm\n" % JM_TUPLE(mat) | |
| 3771 else: | |
| 3772 cm = "" | |
| 3773 | |
| 3774 # --------------------------------------------------------------------- | |
| 3775 # adjust for text orientation / rotation | |
| 3776 # --------------------------------------------------------------------- | |
| 3777 progr = 1 # direction of line progress | |
| 3778 c_pnt = Point(0, fontsize * ascender) # used for line progress | |
| 3779 if rot == 0: # normal orientation | |
| 3780 point = rect.tl + c_pnt # line 1 is 'lheight' below top | |
| 3781 maxwidth = rect.width # pixels available in one line | |
| 3782 maxheight = rect.height # available text height | |
| 3783 | |
| 3784 elif rot == 90: # rotate counter clockwise | |
| 3785 c_pnt = Point(fontsize * ascender, 0) # progress in x-direction | |
| 3786 point = rect.bl + c_pnt # line 1 'lheight' away from left | |
| 3787 maxwidth = rect.height # pixels available in one line | |
| 3788 maxheight = rect.width # available text height | |
| 3789 cm += cmp90 | |
| 3790 | |
| 3791 elif rot == 180: # text upside down | |
| 3792 # progress upwards in y direction | |
| 3793 c_pnt = -Point(0, fontsize * ascender) | |
| 3794 point = rect.br + c_pnt # line 1 'lheight' above bottom | |
| 3795 maxwidth = rect.width # pixels available in one line | |
| 3796 progr = -1 # subtract lheight for next line | |
| 3797 maxheight = rect.height # available text height | |
| 3798 cm += cm180 | |
| 3799 | |
| 3800 else: # rotate clockwise (270 or -90) | |
| 3801 # progress from right to left | |
| 3802 c_pnt = -Point(fontsize * ascender, 0) | |
| 3803 point = rect.tr + c_pnt # line 1 'lheight' left of right | |
| 3804 maxwidth = rect.height # pixels available in one line | |
| 3805 progr = -1 # subtract lheight for next line | |
| 3806 maxheight = rect.width # available text height | |
| 3807 cm += cmm90 | |
| 3808 | |
| 3809 # ===================================================================== | |
| 3810 # line loop | |
| 3811 # ===================================================================== | |
| 3812 just_tab = [] # 'justify' indicators per line | |
| 3813 | |
| 3814 for i, line in enumerate(t0): | |
| 3815 line_t = line.expandtabs(expandtabs).split(" ") # split into words | |
| 3816 num_words = len(line_t) | |
| 3817 lbuff = "" # init line buffer | |
| 3818 rest = maxwidth # available line pixels | |
| 3819 # ================================================================= | |
| 3820 # word loop | |
| 3821 # ================================================================= | |
| 3822 for j in range(num_words): | |
| 3823 word = line_t[j] | |
| 3824 pl_w = pixlen(word) # pixel len of word | |
| 3825 if rest >= pl_w: # does it fit on the line? | |
| 3826 lbuff += word + " " # yes, append word | |
| 3827 rest -= pl_w + blen # update available line space | |
| 3828 continue # next word | |
| 3829 | |
| 3830 # word doesn't fit - output line (if not empty) | |
| 3831 if lbuff: | |
| 3832 lbuff = lbuff.rstrip() + "\n" # line full, append line break | |
| 3833 text += lbuff # append to total text | |
| 3834 just_tab.append(True) # can align-justify | |
| 3835 | |
| 3836 lbuff = "" # re-init line buffer | |
| 3837 rest = maxwidth # re-init avail. space | |
| 3838 | |
| 3839 if pl_w <= maxwidth: # word shorter than 1 line? | |
| 3840 lbuff = word + " " # start the line with it | |
| 3841 rest = maxwidth - pl_w - blen # update free space | |
| 3842 continue | |
| 3843 | |
| 3844 # long word: split across multiple lines - char by char ... | |
| 3845 if len(just_tab) > 0: | |
| 3846 just_tab[-1] = False # cannot align-justify | |
| 3847 for c in word: | |
| 3848 if pixlen(lbuff) <= maxwidth - pixlen(c): | |
| 3849 lbuff += c | |
| 3850 else: # line full | |
| 3851 lbuff += "\n" # close line | |
| 3852 text += lbuff # append to text | |
| 3853 just_tab.append(False) # cannot align-justify | |
| 3854 lbuff = c # start new line with this char | |
| 3855 | |
| 3856 lbuff += " " # finish long word | |
| 3857 rest = maxwidth - pixlen(lbuff) # long word stored | |
| 3858 | |
| 3859 if lbuff: # unprocessed line content? | |
| 3860 text += lbuff.rstrip() # append to text | |
| 3861 just_tab.append(False) # cannot align-justify | |
| 3862 | |
| 3863 if i < len(t0) - 1: # not the last line? | |
| 3864 text += "\n" # insert line break | |
| 3865 | |
| 3866 # compute used part of the textbox | |
| 3867 if text.endswith("\n"): | |
| 3868 text = text[:-1] | |
| 3869 lb_count = text.count("\n") + 1 # number of lines written | |
| 3870 | |
| 3871 # text height = line count * line height plus one descender value | |
| 3872 text_height = lheight * lb_count - descender * fontsize | |
| 3873 | |
| 3874 more = text_height - maxheight # difference to height limit | |
| 3875 if more > EPSILON: # landed too much outside rect | |
| 3876 return (-1) * more # return deficit, don't output | |
| 3877 | |
| 3878 more = abs(more) | |
| 3879 if more < EPSILON: | |
| 3880 more = 0 # don't bother with epsilons | |
| 3881 nres = "\nq\n%s%sBT\n" % (bdc, alpha) + cm # initialize output buffer | |
| 3882 templ = "1 0 0 1 %g %g Tm /%s %g Tf " | |
| 3883 # center, right, justify: output each line with its own specifics | |
| 3884 text_t = text.splitlines() # split text in lines again | |
| 3885 just_tab[-1] = False # never justify last line | |
| 3886 for i, t in enumerate(text_t): | |
| 3887 pl = maxwidth - pixlen(t) # length of empty line part | |
| 3888 pnt = point + c_pnt * (i * lheight_factor) # text start of line | |
| 3889 if align == 1: # center: right shift by half width | |
| 3890 if rot in (0, 180): | |
| 3891 pnt = pnt + Point(pl / 2, 0) * progr | |
| 3892 else: | |
| 3893 pnt = pnt - Point(0, pl / 2) * progr | |
| 3894 elif align == 2: # right: right shift by full width | |
| 3895 if rot in (0, 180): | |
| 3896 pnt = pnt + Point(pl, 0) * progr | |
| 3897 else: | |
| 3898 pnt = pnt - Point(0, pl) * progr | |
| 3899 elif align == 3: # justify | |
| 3900 spaces = t.count(" ") # number of spaces in line | |
| 3901 if spaces > 0 and just_tab[i]: # if any, and we may justify | |
| 3902 spacing = pl / spaces # make every space this much larger | |
| 3903 else: | |
| 3904 spacing = 0 # keep normal space length | |
| 3905 top = height - pnt.y - self.y | |
| 3906 left = pnt.x + self.x | |
| 3907 if rot == 90: | |
| 3908 left = height - pnt.y - self.y | |
| 3909 top = -pnt.x - self.x | |
| 3910 elif rot == 270: | |
| 3911 left = -height + pnt.y + self.y | |
| 3912 top = pnt.x + self.x | |
| 3913 elif rot == 180: | |
| 3914 left = -pnt.x - self.x | |
| 3915 top = -height + pnt.y + self.y | |
| 3916 | |
| 3917 nres += templ % (left, top, fname, fontsize) | |
| 3918 | |
| 3919 if render_mode > 0: | |
| 3920 nres += "%i Tr " % render_mode | |
| 3921 nres += "%g w " % (border_width * fontsize) | |
| 3922 | |
| 3923 if align == 3: | |
| 3924 nres += "%g Tw " % spacing | |
| 3925 | |
| 3926 if color is not None: | |
| 3927 nres += color_str | |
| 3928 if fill is not None: | |
| 3929 nres += fill_str | |
| 3930 nres += "%sTJ\n" % getTJstr(t, tj_glyphs, simple, ordering) | |
| 3931 | |
| 3932 nres += "ET\n%sQ\n" % emc | |
| 3933 | |
| 3934 self.text_cont += nres | |
| 3935 self.updateRect(rect) | |
| 3936 return more | |
| 3937 | |
| 3938 def finish( | |
| 3939 self, | |
| 3940 width: float = 1, | |
| 3941 color: OptSeq = (0,), | |
| 3942 fill: OptSeq = None, | |
| 3943 lineCap: int = 0, | |
| 3944 lineJoin: int = 0, | |
| 3945 dashes: OptStr = None, | |
| 3946 even_odd: bool = False, | |
| 3947 morph: OptSeq = None, | |
| 3948 closePath: bool = True, | |
| 3949 fill_opacity: float = 1, | |
| 3950 stroke_opacity: float = 1, | |
| 3951 oc: int = 0, | |
| 3952 ) -> None: | |
| 3953 """Finish the current drawing segment. | |
| 3954 | |
| 3955 Notes: | |
| 3956 Apply colors, opacity, dashes, line style and width, or | |
| 3957 morphing. Also whether to close the path | |
| 3958 by connecting last to first point. | |
| 3959 """ | |
| 3960 if self.draw_cont == "": # treat empty contents as no-op | |
| 3961 return | |
| 3962 | |
| 3963 if width == 0: # border color makes no sense then | |
| 3964 color = None | |
| 3965 elif color == None: # vice versa | |
| 3966 width = 0 | |
| 3967 # if color == None and fill == None: | |
| 3968 # raise ValueError("at least one of 'color' or 'fill' must be given") | |
| 3969 color_str = ColorCode(color, "c") # ensure proper color string | |
| 3970 fill_str = ColorCode(fill, "f") # ensure proper fill string | |
| 3971 | |
| 3972 optcont = self.page._get_optional_content(oc) | |
| 3973 if optcont is not None: | |
| 3974 self.draw_cont = "/OC /%s BDC\n" % optcont + self.draw_cont | |
| 3975 emc = "EMC\n" | |
| 3976 else: | |
| 3977 emc = "" | |
| 3978 | |
| 3979 alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity) | |
| 3980 if alpha != None: | |
| 3981 self.draw_cont = "/%s gs\n" % alpha + self.draw_cont | |
| 3982 | |
| 3983 if width != 1 and width != 0: | |
| 3984 self.draw_cont += "%g w\n" % width | |
| 3985 | |
| 3986 if lineCap != 0: | |
| 3987 self.draw_cont = "%i J\n" % lineCap + self.draw_cont | |
| 3988 if lineJoin != 0: | |
| 3989 self.draw_cont = "%i j\n" % lineJoin + self.draw_cont | |
| 3990 | |
| 3991 if dashes not in (None, "", "[] 0"): | |
| 3992 self.draw_cont = "%s d\n" % dashes + self.draw_cont | |
| 3993 | |
| 3994 if closePath: | |
| 3995 self.draw_cont += "h\n" | |
| 3996 self.lastPoint = None | |
| 3997 | |
| 3998 if color is not None: | |
| 3999 self.draw_cont += color_str | |
| 4000 | |
| 4001 if fill is not None: | |
| 4002 self.draw_cont += fill_str | |
| 4003 if color is not None: | |
| 4004 if not even_odd: | |
| 4005 self.draw_cont += "B\n" | |
| 4006 else: | |
| 4007 self.draw_cont += "B*\n" | |
| 4008 else: | |
| 4009 if not even_odd: | |
| 4010 self.draw_cont += "f\n" | |
| 4011 else: | |
| 4012 self.draw_cont += "f*\n" | |
| 4013 else: | |
| 4014 self.draw_cont += "S\n" | |
| 4015 | |
| 4016 self.draw_cont += emc | |
| 4017 if CheckMorph(morph): | |
| 4018 m1 = Matrix( | |
| 4019 1, 0, 0, 1, morph[0].x + self.x, self.height - morph[0].y - self.y | |
| 4020 ) | |
| 4021 mat = ~m1 * morph[1] * m1 | |
| 4022 self.draw_cont = "%g %g %g %g %g %g cm\n" % JM_TUPLE(mat) + self.draw_cont | |
| 4023 | |
| 4024 self.totalcont += "\nq\n" + self.draw_cont + "Q\n" | |
| 4025 self.draw_cont = "" | |
| 4026 self.lastPoint = None | |
| 4027 return | |
| 4028 | |
| 4029 def commit(self, overlay: bool = True) -> None: | |
| 4030 """Update the page's /Contents object with Shape data. The argument controls whether data appear in foreground (default) or background.""" | |
| 4031 CheckParent(self.page) # doc may have died meanwhile | |
| 4032 self.totalcont += self.text_cont | |
| 4033 | |
| 4034 self.totalcont = self.totalcont.encode() | |
| 4035 | |
| 4036 if self.totalcont != b"": | |
| 4037 # make /Contents object with dummy stream | |
| 4038 xref = TOOLS._insert_contents(self.page, b" ", overlay) | |
| 4039 # update it with potential compression | |
| 4040 self.doc.update_stream(xref, self.totalcont) | |
| 4041 | |
| 4042 self.lastPoint = None # clean up ... | |
| 4043 self.rect = None # | |
| 4044 self.draw_cont = "" # for potential ... | |
| 4045 self.text_cont = "" # ... | |
| 4046 self.totalcont = "" # re-use | |
| 4047 return | |
| 4048 | |
| 4049 | |
| 4050 def apply_redactions(page: Page, images: int = 2) -> bool: | |
| 4051 """Apply the redaction annotations of the page. | |
| 4052 | |
| 4053 Args: | |
| 4054 page: the PDF page. | |
| 4055 images: 0 - ignore images, 1 - remove complete overlapping image, | |
| 4056 2 - blank out overlapping image parts. | |
| 4057 """ | |
| 4058 | |
| 4059 def center_rect(annot_rect, text, font, fsize): | |
| 4060 """Calculate minimal sub-rectangle for the overlay text. | |
| 4061 | |
| 4062 Notes: | |
| 4063 Because 'insert_textbox' supports no vertical text centering, | |
| 4064 we calculate an approximate number of lines here and return a | |
| 4065 sub-rect with smaller height, which should still be sufficient. | |
| 4066 Args: | |
| 4067 annot_rect: the annotation rectangle | |
| 4068 text: the text to insert. | |
| 4069 font: the fontname. Must be one of the CJK or Base-14 set, else | |
| 4070 the rectangle is returned unchanged. | |
| 4071 fsize: the fontsize | |
| 4072 Returns: | |
| 4073 A rectangle to use instead of the annot rectangle. | |
| 4074 """ | |
| 4075 if not text: | |
| 4076 return annot_rect | |
| 4077 try: | |
| 4078 text_width = get_text_length(text, font, fsize) | |
| 4079 except ValueError: # unsupported font | |
| 4080 return annot_rect | |
| 4081 line_height = fsize * 1.2 | |
| 4082 limit = annot_rect.width | |
| 4083 h = math.ceil(text_width / limit) * line_height # estimate rect height | |
| 4084 if h >= annot_rect.height: | |
| 4085 return annot_rect | |
| 4086 r = annot_rect | |
| 4087 y = (annot_rect.tl.y + annot_rect.bl.y - h) * 0.5 | |
| 4088 r.y0 = y | |
| 4089 return r | |
| 4090 | |
| 4091 CheckParent(page) | |
| 4092 doc = page.parent | |
| 4093 if doc.is_encrypted or doc.is_closed: | |
| 4094 raise ValueError("document closed or encrypted") | |
| 4095 if not doc.is_pdf: | |
| 4096 raise ValueError("is no PDF") | |
| 4097 | |
| 4098 redact_annots = [] # storage of annot values | |
| 4099 for annot in page.annots(types=(PDF_ANNOT_REDACT,)): # loop redactions | |
| 4100 redact_annots.append(annot._get_redact_values()) # save annot values | |
| 4101 | |
| 4102 if redact_annots == []: # any redactions on this page? | |
| 4103 return False # no redactions | |
| 4104 | |
| 4105 rc = page._apply_redactions(images) # call MuPDF redaction process step | |
| 4106 if not rc: # should not happen really | |
| 4107 raise ValueError("Error applying redactions.") | |
| 4108 | |
| 4109 # now write replacement text in old redact rectangles | |
| 4110 shape = page.new_shape() | |
| 4111 for redact in redact_annots: | |
| 4112 annot_rect = redact["rect"] | |
| 4113 fill = redact["fill"] | |
| 4114 if fill: | |
| 4115 shape.draw_rect(annot_rect) # colorize the rect background | |
| 4116 shape.finish(fill=fill, color=fill) | |
| 4117 if "text" in redact.keys(): # if we also have text | |
| 4118 text = redact["text"] | |
| 4119 align = redact.get("align", 0) | |
| 4120 fname = redact["fontname"] | |
| 4121 fsize = redact["fontsize"] | |
| 4122 color = redact["text_color"] | |
| 4123 # try finding vertical centered sub-rect | |
| 4124 trect = center_rect(annot_rect, text, fname, fsize) | |
| 4125 | |
| 4126 rc = -1 | |
| 4127 while rc < 0 and fsize >= 4: # while not enough room | |
| 4128 # (re-) try insertion | |
| 4129 rc = shape.insert_textbox( | |
| 4130 trect, | |
| 4131 text, | |
| 4132 fontname=fname, | |
| 4133 fontsize=fsize, | |
| 4134 color=color, | |
| 4135 align=align, | |
| 4136 ) | |
| 4137 fsize -= 0.5 # reduce font if unsuccessful | |
| 4138 shape.commit() # append new contents object | |
| 4139 return True | |
| 4140 | |
| 4141 | |
| 4142 # ------------------------------------------------------------------------------ | |
| 4143 # Remove potentially sensitive data from a PDF. Similar to the Adobe | |
| 4144 # Acrobat 'sanitize' function | |
| 4145 # ------------------------------------------------------------------------------ | |
| 4146 def scrub( | |
| 4147 doc: Document, | |
| 4148 attached_files: bool = True, | |
| 4149 clean_pages: bool = True, | |
| 4150 embedded_files: bool = True, | |
| 4151 hidden_text: bool = True, | |
| 4152 javascript: bool = True, | |
| 4153 metadata: bool = True, | |
| 4154 redactions: bool = True, | |
| 4155 redact_images: int = 0, | |
| 4156 remove_links: bool = True, | |
| 4157 reset_fields: bool = True, | |
| 4158 reset_responses: bool = True, | |
| 4159 thumbnails: bool = True, | |
| 4160 xml_metadata: bool = True, | |
| 4161 ) -> None: | |
| 4162 def remove_hidden(cont_lines): | |
| 4163 """Remove hidden text from a PDF page. | |
| 4164 | |
| 4165 Args: | |
| 4166 cont_lines: list of lines with /Contents content. Should have status | |
| 4167 from after page.cleanContents(). | |
| 4168 | |
| 4169 Returns: | |
| 4170 List of /Contents lines from which hidden text has been removed. | |
| 4171 | |
| 4172 Notes: | |
| 4173 The input must have been created after the page's /Contents object(s) | |
| 4174 have been cleaned with page.cleanContents(). This ensures a standard | |
| 4175 formatting: one command per line, single spaces between operators. | |
| 4176 This allows for drastic simplification of this code. | |
| 4177 """ | |
| 4178 out_lines = [] # will return this | |
| 4179 in_text = False # indicate if within BT/ET object | |
| 4180 suppress = False # indicate text suppression active | |
| 4181 make_return = False | |
| 4182 for line in cont_lines: | |
| 4183 if line == b"BT": # start of text object | |
| 4184 in_text = True # switch on | |
| 4185 out_lines.append(line) # output it | |
| 4186 continue | |
| 4187 if line == b"ET": # end of text object | |
| 4188 in_text = False # switch off | |
| 4189 out_lines.append(line) # output it | |
| 4190 continue | |
| 4191 if line == b"3 Tr": # text suppression operator | |
| 4192 suppress = True # switch on | |
| 4193 make_return = True | |
| 4194 continue | |
| 4195 if line[-2:] == b"Tr" and line[0] != b"3": | |
| 4196 suppress = False # text rendering changed | |
| 4197 out_lines.append(line) | |
| 4198 continue | |
| 4199 if line == b"Q": # unstack command also switches off | |
| 4200 suppress = False | |
| 4201 out_lines.append(line) | |
| 4202 continue | |
| 4203 if suppress and in_text: # suppress hidden lines | |
| 4204 continue | |
| 4205 out_lines.append(line) | |
| 4206 if make_return: | |
| 4207 return out_lines | |
| 4208 else: | |
| 4209 return None | |
| 4210 | |
| 4211 if not doc.is_pdf: # only works for PDF | |
| 4212 raise ValueError("is no PDF") | |
| 4213 if doc.is_encrypted or doc.is_closed: | |
| 4214 raise ValueError("closed or encrypted doc") | |
| 4215 | |
| 4216 if clean_pages is False: | |
| 4217 hidden_text = False | |
| 4218 redactions = False | |
| 4219 | |
| 4220 if metadata: | |
| 4221 doc.set_metadata({}) # remove standard metadata | |
| 4222 | |
| 4223 for page in doc: | |
| 4224 if reset_fields: | |
| 4225 # reset form fields (widgets) | |
| 4226 for widget in page.widgets(): | |
| 4227 widget.reset() | |
| 4228 | |
| 4229 if remove_links: | |
| 4230 links = page.get_links() # list of all links on page | |
| 4231 for link in links: # remove all links | |
| 4232 page.delete_link(link) | |
| 4233 | |
| 4234 found_redacts = False | |
| 4235 for annot in page.annots(): | |
| 4236 if annot.type[0] == PDF_ANNOT_FILE_ATTACHMENT and attached_files: | |
| 4237 annot.update_file(buffer=b" ") # set file content to empty | |
| 4238 if reset_responses: | |
| 4239 annot.delete_responses() | |
| 4240 if annot.type[0] == PDF_ANNOT_REDACT: | |
| 4241 found_redacts = True | |
| 4242 | |
| 4243 if redactions and found_redacts: | |
| 4244 page.apply_redactions(images=redact_images) | |
| 4245 | |
| 4246 if not (clean_pages or hidden_text): | |
| 4247 continue # done with the page | |
| 4248 | |
| 4249 page.clean_contents() | |
| 4250 if not page.get_contents(): | |
| 4251 continue | |
| 4252 if hidden_text: | |
| 4253 xref = page.get_contents()[0] # only one b/o cleaning! | |
| 4254 cont = doc.xref_stream(xref) | |
| 4255 cont_lines = remove_hidden(cont.splitlines()) # remove hidden text | |
| 4256 if cont_lines: # something was actually removed | |
| 4257 cont = b"\n".join(cont_lines) | |
| 4258 doc.update_stream(xref, cont) # rewrite the page /Contents | |
| 4259 | |
| 4260 if thumbnails: # remove page thumbnails? | |
| 4261 if doc.xref_get_key(page.xref, "Thumb")[0] != "null": | |
| 4262 doc.xref_set_key(page.xref, "Thumb", "null") | |
| 4263 | |
| 4264 # pages are scrubbed, now perform document-wide scrubbing | |
| 4265 # remove embedded files | |
| 4266 if embedded_files: | |
| 4267 for name in doc.embfile_names(): | |
| 4268 doc.embfile_del(name) | |
| 4269 | |
| 4270 if xml_metadata: | |
| 4271 doc.del_xml_metadata() | |
| 4272 if not (xml_metadata or javascript): | |
| 4273 xref_limit = 0 | |
| 4274 else: | |
| 4275 xref_limit = doc.xref_length() | |
| 4276 for xref in range(1, xref_limit): | |
| 4277 if not doc.xref_object(xref): | |
| 4278 msg = "bad xref %i - clean PDF before scrubbing" % xref | |
| 4279 raise ValueError(msg) | |
| 4280 if javascript and doc.xref_get_key(xref, "S")[1] == "/JavaScript": | |
| 4281 # a /JavaScript action object | |
| 4282 obj = "<</S/JavaScript/JS()>>" # replace with a null JavaScript | |
| 4283 doc.update_object(xref, obj) # update this object | |
| 4284 continue # no further handling | |
| 4285 | |
| 4286 if not xml_metadata: | |
| 4287 continue | |
| 4288 | |
| 4289 if doc.xref_get_key(xref, "Type")[1] == "/Metadata": | |
| 4290 # delete any metadata object directly | |
| 4291 doc.update_object(xref, "<<>>") | |
| 4292 doc.update_stream(xref, b"deleted", new=True) | |
| 4293 continue | |
| 4294 | |
| 4295 if doc.xref_get_key(xref, "Metadata")[0] != "null": | |
| 4296 doc.xref_set_key(xref, "Metadata", "null") | |
| 4297 | |
| 4298 | |
| 4299 def fill_textbox( | |
| 4300 writer: TextWriter, | |
| 4301 rect: rect_like, | |
| 4302 text: typing.Union[str, list], | |
| 4303 pos: point_like = None, | |
| 4304 font: typing.Optional[Font] = None, | |
| 4305 fontsize: float = 11, | |
| 4306 lineheight: OptFloat = None, | |
| 4307 align: int = 0, | |
| 4308 warn: bool = None, | |
| 4309 right_to_left: bool = False, | |
| 4310 small_caps: bool = False, | |
| 4311 ) -> tuple: | |
| 4312 """Fill a rectangle with text. | |
| 4313 | |
| 4314 Args: | |
| 4315 writer: TextWriter object (= "self") | |
| 4316 rect: rect-like to receive the text. | |
| 4317 text: string or list/tuple of strings. | |
| 4318 pos: point-like start position of first word. | |
| 4319 font: Font object (default Font('helv')). | |
| 4320 fontsize: the fontsize. | |
| 4321 lineheight: overwrite the font property | |
| 4322 align: (int) 0 = left, 1 = center, 2 = right, 3 = justify | |
| 4323 warn: (bool) text overflow action: none, warn, or exception | |
| 4324 right_to_left: (bool) indicate right-to-left language. | |
| 4325 """ | |
| 4326 rect = Rect(rect) | |
| 4327 if rect.is_empty: | |
| 4328 raise ValueError("fill rect must not empty.") | |
| 4329 if type(font) is not Font: | |
| 4330 font = Font("helv") | |
| 4331 | |
| 4332 def textlen(x): | |
| 4333 """Return length of a string.""" | |
| 4334 return font.text_length( | |
| 4335 x, fontsize=fontsize, small_caps=small_caps | |
| 4336 ) # abbreviation | |
| 4337 | |
| 4338 def char_lengths(x): | |
| 4339 """Return list of single character lengths for a string.""" | |
| 4340 return font.char_lengths(x, fontsize=fontsize, small_caps=small_caps) | |
| 4341 | |
| 4342 def append_this(pos, text): | |
| 4343 return writer.append( | |
| 4344 pos, text, font=font, fontsize=fontsize, small_caps=small_caps | |
| 4345 ) | |
| 4346 | |
| 4347 tolerance = fontsize * 0.2 # extra distance to left border | |
| 4348 space_len = textlen(" ") | |
| 4349 std_width = rect.width - tolerance | |
| 4350 std_start = rect.x0 + tolerance | |
| 4351 | |
| 4352 def norm_words(width, words): | |
| 4353 """Cut any word in pieces no longer than 'width'.""" | |
| 4354 nwords = [] | |
| 4355 word_lengths = [] | |
| 4356 for w in words: | |
| 4357 wl_lst = char_lengths(w) | |
| 4358 wl = sum(wl_lst) | |
| 4359 if wl <= width: # nothing to do - copy over | |
| 4360 nwords.append(w) | |
| 4361 word_lengths.append(wl) | |
| 4362 continue | |
| 4363 | |
| 4364 # word longer than rect width - split it in parts | |
| 4365 n = len(wl_lst) | |
| 4366 while n > 0: | |
| 4367 wl = sum(wl_lst[:n]) | |
| 4368 if wl <= width: | |
| 4369 nwords.append(w[:n]) | |
| 4370 word_lengths.append(wl) | |
| 4371 w = w[n:] | |
| 4372 wl_lst = wl_lst[n:] | |
| 4373 n = len(wl_lst) | |
| 4374 else: | |
| 4375 n -= 1 | |
| 4376 return nwords, word_lengths | |
| 4377 | |
| 4378 def output_justify(start, line): | |
| 4379 """Justified output of a line.""" | |
| 4380 # ignore leading / trailing / multiple spaces | |
| 4381 words = [w for w in line.split(" ") if w != ""] | |
| 4382 nwords = len(words) | |
| 4383 if nwords == 0: | |
| 4384 return | |
| 4385 if nwords == 1: # single word cannot be justified | |
| 4386 append_this(start, words[0]) | |
| 4387 return | |
| 4388 tl = sum([textlen(w) for w in words]) # total word lengths | |
| 4389 gaps = nwords - 1 # number of word gaps | |
| 4390 gapl = (std_width - tl) / gaps # width of each gap | |
| 4391 for w in words: | |
| 4392 _, lp = append_this(start, w) # output one word | |
| 4393 start.x = lp.x + gapl # next start at word end plus gap | |
| 4394 return | |
| 4395 | |
| 4396 asc = font.ascender | |
| 4397 dsc = font.descender | |
| 4398 if not lineheight: | |
| 4399 if asc - dsc <= 1: | |
| 4400 lheight = 1.2 | |
| 4401 else: | |
| 4402 lheight = asc - dsc | |
| 4403 else: | |
| 4404 lheight = lineheight | |
| 4405 | |
| 4406 LINEHEIGHT = fontsize * lheight # effective line height | |
| 4407 width = std_width # available horizontal space | |
| 4408 | |
| 4409 # starting point of text | |
| 4410 if pos is not None: | |
| 4411 pos = Point(pos) | |
| 4412 else: # default is just below rect top-left | |
| 4413 pos = rect.tl + (tolerance, fontsize * asc) | |
| 4414 if not pos in rect: | |
| 4415 raise ValueError("Text must start in rectangle.") | |
| 4416 | |
| 4417 # calculate displacement factor for alignment | |
| 4418 if align == TEXT_ALIGN_CENTER: | |
| 4419 factor = 0.5 | |
| 4420 elif align == TEXT_ALIGN_RIGHT: | |
| 4421 factor = 1.0 | |
| 4422 else: | |
| 4423 factor = 0 | |
| 4424 | |
| 4425 # split in lines if just a string was given | |
| 4426 if type(text) is str: | |
| 4427 textlines = text.splitlines() | |
| 4428 else: | |
| 4429 textlines = [] | |
| 4430 for line in text: | |
| 4431 textlines.extend(line.splitlines()) | |
| 4432 | |
| 4433 max_lines = int((rect.y1 - pos.y) / LINEHEIGHT) + 1 | |
| 4434 | |
| 4435 new_lines = [] # the final list of textbox lines | |
| 4436 no_justify = [] # no justify for these line numbers | |
| 4437 for i, line in enumerate(textlines): | |
| 4438 if line in ("", " "): | |
| 4439 new_lines.append((line, space_len)) | |
| 4440 width = rect.width - tolerance | |
| 4441 no_justify.append((len(new_lines) - 1)) | |
| 4442 continue | |
| 4443 if i == 0: | |
| 4444 width = rect.x1 - pos.x | |
| 4445 else: | |
| 4446 width = rect.width - tolerance | |
| 4447 | |
| 4448 if right_to_left: # reverses Arabic / Hebrew text front to back | |
| 4449 line = writer.clean_rtl(line) | |
| 4450 tl = textlen(line) | |
| 4451 if tl <= width: # line short enough | |
| 4452 new_lines.append((line, tl)) | |
| 4453 no_justify.append((len(new_lines) - 1)) | |
| 4454 continue | |
| 4455 | |
| 4456 # we need to split the line in fitting parts | |
| 4457 words = line.split(" ") # the words in the line | |
| 4458 | |
| 4459 # cut in parts any words that are longer than rect width | |
| 4460 words, word_lengths = norm_words(std_width, words) | |
| 4461 | |
| 4462 n = len(words) | |
| 4463 while True: | |
| 4464 line0 = " ".join(words[:n]) | |
| 4465 wl = sum(word_lengths[:n]) + space_len * (len(word_lengths[:n]) - 1) | |
| 4466 if wl <= width: | |
| 4467 new_lines.append((line0, wl)) | |
| 4468 words = words[n:] | |
| 4469 word_lengths = word_lengths[n:] | |
| 4470 n = len(words) | |
| 4471 line0 = None | |
| 4472 else: | |
| 4473 n -= 1 | |
| 4474 | |
| 4475 if len(words) == 0: | |
| 4476 break | |
| 4477 | |
| 4478 # ------------------------------------------------------------------------- | |
| 4479 # List of lines created. Each item is (text, tl), where 'tl' is the PDF | |
| 4480 # output length (float) and 'text' is the text. Except for justified text, | |
| 4481 # this is output-ready. | |
| 4482 # ------------------------------------------------------------------------- | |
| 4483 nlines = len(new_lines) | |
| 4484 if nlines > max_lines: | |
| 4485 msg = "Only fitting %i of %i lines." % (max_lines, nlines) | |
| 4486 if warn == True: | |
| 4487 print("Warning: " + msg) | |
| 4488 elif warn == False: | |
| 4489 raise ValueError(msg) | |
| 4490 | |
| 4491 start = Point() | |
| 4492 no_justify += [len(new_lines) - 1] # no justifying of last line | |
| 4493 for i in range(max_lines): | |
| 4494 try: | |
| 4495 line, tl = new_lines.pop(0) | |
| 4496 except IndexError: | |
| 4497 break | |
| 4498 | |
| 4499 if right_to_left: # Arabic, Hebrew | |
| 4500 line = "".join(reversed(line)) | |
| 4501 | |
| 4502 if i == 0: # may have different start for first line | |
| 4503 start = pos | |
| 4504 | |
| 4505 if align == TEXT_ALIGN_JUSTIFY and i not in no_justify and tl < std_width: | |
| 4506 output_justify(start, line) | |
| 4507 start.x = std_start | |
| 4508 start.y += LINEHEIGHT | |
| 4509 continue | |
| 4510 | |
| 4511 if i > 0 or pos.x == std_start: # left, center, right alignments | |
| 4512 start.x += (width - tl) * factor | |
| 4513 | |
| 4514 append_this(start, line) | |
| 4515 start.x = std_start | |
| 4516 start.y += LINEHEIGHT | |
| 4517 | |
| 4518 return new_lines # return non-written lines | |
| 4519 | |
| 4520 | |
| 4521 # ------------------------------------------------------------------------ | |
| 4522 # Optional Content functions | |
| 4523 # ------------------------------------------------------------------------ | |
| 4524 def get_oc(doc: Document, xref: int) -> int: | |
| 4525 """Return optional content object xref for an image or form xobject. | |
| 4526 | |
| 4527 Args: | |
| 4528 xref: (int) xref number of an image or form xobject. | |
| 4529 """ | |
| 4530 if doc.is_closed or doc.is_encrypted: | |
| 4531 raise ValueError("document close or encrypted") | |
| 4532 t, name = doc.xref_get_key(xref, "Subtype") | |
| 4533 if t != "name" or name not in ("/Image", "/Form"): | |
| 4534 raise ValueError("bad object type at xref %i" % xref) | |
| 4535 t, oc = doc.xref_get_key(xref, "OC") | |
| 4536 if t != "xref": | |
| 4537 return 0 | |
| 4538 rc = int(oc.replace("0 R", "")) | |
| 4539 return rc | |
| 4540 | |
| 4541 | |
| 4542 def set_oc(doc: Document, xref: int, oc: int) -> None: | |
| 4543 """Attach optional content object to image or form xobject. | |
| 4544 | |
| 4545 Args: | |
| 4546 xref: (int) xref number of an image or form xobject | |
| 4547 oc: (int) xref number of an OCG or OCMD | |
| 4548 """ | |
| 4549 if doc.is_closed or doc.is_encrypted: | |
| 4550 raise ValueError("document close or encrypted") | |
| 4551 t, name = doc.xref_get_key(xref, "Subtype") | |
| 4552 if t != "name" or name not in ("/Image", "/Form"): | |
| 4553 raise ValueError("bad object type at xref %i" % xref) | |
| 4554 if oc > 0: | |
| 4555 t, name = doc.xref_get_key(oc, "Type") | |
| 4556 if t != "name" or name not in ("/OCG", "/OCMD"): | |
| 4557 raise ValueError("bad object type at xref %i" % oc) | |
| 4558 if oc == 0 and "OC" in doc.xref_get_keys(xref): | |
| 4559 doc.xref_set_key(xref, "OC", "null") | |
| 4560 return None | |
| 4561 doc.xref_set_key(xref, "OC", "%i 0 R" % oc) | |
| 4562 return None | |
| 4563 | |
| 4564 | |
| 4565 def set_ocmd( | |
| 4566 doc: Document, | |
| 4567 xref: int = 0, | |
| 4568 ocgs: typing.Union[list, None] = None, | |
| 4569 policy: OptStr = None, | |
| 4570 ve: typing.Union[list, None] = None, | |
| 4571 ) -> int: | |
| 4572 """Create or update an OCMD object in a PDF document. | |
| 4573 | |
| 4574 Args: | |
| 4575 xref: (int) 0 for creating a new object, otherwise update existing one. | |
| 4576 ocgs: (list) OCG xref numbers, which shall be subject to 'policy'. | |
| 4577 policy: one of 'AllOn', 'AllOff', 'AnyOn', 'AnyOff' (any casing). | |
| 4578 ve: (list) visibility expression. Use instead of 'ocgs' with 'policy'. | |
| 4579 | |
| 4580 Returns: | |
| 4581 Xref of the created or updated OCMD. | |
| 4582 """ | |
| 4583 | |
| 4584 all_ocgs = set(doc.get_ocgs().keys()) | |
| 4585 | |
| 4586 def ve_maker(ve): | |
| 4587 if type(ve) not in (list, tuple) or len(ve) < 2: | |
| 4588 raise ValueError("bad 've' format: %s" % ve) | |
| 4589 if ve[0].lower() not in ("and", "or", "not"): | |
| 4590 raise ValueError("bad operand: %s" % ve[0]) | |
| 4591 if ve[0].lower() == "not" and len(ve) != 2: | |
| 4592 raise ValueError("bad 've' format: %s" % ve) | |
| 4593 item = "[/%s" % ve[0].title() | |
| 4594 for x in ve[1:]: | |
| 4595 if type(x) is int: | |
| 4596 if x not in all_ocgs: | |
| 4597 raise ValueError("bad OCG %i" % x) | |
| 4598 item += " %i 0 R" % x | |
| 4599 else: | |
| 4600 item += " %s" % ve_maker(x) | |
| 4601 item += "]" | |
| 4602 return item | |
| 4603 | |
| 4604 text = "<</Type/OCMD" | |
| 4605 | |
| 4606 if ocgs and type(ocgs) in (list, tuple): # some OCGs are provided | |
| 4607 s = set(ocgs).difference(all_ocgs) # contains illegal xrefs | |
| 4608 if s != set(): | |
| 4609 msg = "bad OCGs: %s" % s | |
| 4610 raise ValueError(msg) | |
| 4611 text += "/OCGs[" + " ".join(map(lambda x: "%i 0 R" % x, ocgs)) + "]" | |
| 4612 | |
| 4613 if policy: | |
| 4614 policy = str(policy).lower() | |
| 4615 pols = { | |
| 4616 "anyon": "AnyOn", | |
| 4617 "allon": "AllOn", | |
| 4618 "anyoff": "AnyOff", | |
| 4619 "alloff": "AllOff", | |
| 4620 } | |
| 4621 if policy not in ("anyon", "allon", "anyoff", "alloff"): | |
| 4622 raise ValueError("bad policy: %s" % policy) | |
| 4623 text += "/P/%s" % pols[policy] | |
| 4624 | |
| 4625 if ve: | |
| 4626 text += "/VE%s" % ve_maker(ve) | |
| 4627 | |
| 4628 text += ">>" | |
| 4629 | |
| 4630 # make new object or replace old OCMD (check type first) | |
| 4631 if xref == 0: | |
| 4632 xref = doc.get_new_xref() | |
| 4633 elif "/Type/OCMD" not in doc.xref_object(xref, compressed=True): | |
| 4634 raise ValueError("bad xref or not an OCMD") | |
| 4635 doc.update_object(xref, text) | |
| 4636 return xref | |
| 4637 | |
| 4638 | |
| 4639 def get_ocmd(doc: Document, xref: int) -> dict: | |
| 4640 """Return the definition of an OCMD (optional content membership dictionary). | |
| 4641 | |
| 4642 Recognizes PDF dict keys /OCGs (PDF array of OCGs), /P (policy string) and | |
| 4643 /VE (visibility expression, PDF array). Via string manipulation, this | |
| 4644 info is converted to a Python dictionary with keys "xref", "ocgs", "policy" | |
| 4645 and "ve" - ready to recycle as input for 'set_ocmd()'. | |
| 4646 """ | |
| 4647 | |
| 4648 if xref not in range(doc.xref_length()): | |
| 4649 raise ValueError("bad xref") | |
| 4650 text = doc.xref_object(xref, compressed=True) | |
| 4651 if "/Type/OCMD" not in text: | |
| 4652 raise ValueError("bad object type") | |
| 4653 textlen = len(text) | |
| 4654 | |
| 4655 p0 = text.find("/OCGs[") # look for /OCGs key | |
| 4656 p1 = text.find("]", p0) | |
| 4657 if p0 < 0 or p1 < 0: # no OCGs found | |
| 4658 ocgs = None | |
| 4659 else: | |
| 4660 ocgs = text[p0 + 6 : p1].replace("0 R", " ").split() | |
| 4661 ocgs = list(map(int, ocgs)) | |
| 4662 | |
| 4663 p0 = text.find("/P/") # look for /P policy key | |
| 4664 if p0 < 0: | |
| 4665 policy = None | |
| 4666 else: | |
| 4667 p1 = text.find("ff", p0) | |
| 4668 if p1 < 0: | |
| 4669 p1 = text.find("on", p0) | |
| 4670 if p1 < 0: # some irregular syntax | |
| 4671 raise ValueError("bad object at xref") | |
| 4672 else: | |
| 4673 policy = text[p0 + 3 : p1 + 2] | |
| 4674 | |
| 4675 p0 = text.find("/VE[") # look for /VE visibility expression key | |
| 4676 if p0 < 0: # no visibility expression found | |
| 4677 ve = None | |
| 4678 else: | |
| 4679 lp = rp = 0 # find end of /VE by finding last ']'. | |
| 4680 p1 = p0 | |
| 4681 while lp < 1 or lp != rp: | |
| 4682 p1 += 1 | |
| 4683 if not p1 < textlen: # some irregular syntax | |
| 4684 raise ValueError("bad object at xref") | |
| 4685 if text[p1] == "[": | |
| 4686 lp += 1 | |
| 4687 if text[p1] == "]": | |
| 4688 rp += 1 | |
| 4689 # p1 now positioned at the last "]" | |
| 4690 ve = text[p0 + 3 : p1 + 1] # the PDF /VE array | |
| 4691 ve = ( | |
| 4692 ve.replace("/And", '"and",') | |
| 4693 .replace("/Not", '"not",') | |
| 4694 .replace("/Or", '"or",') | |
| 4695 ) | |
| 4696 ve = ve.replace(" 0 R]", "]").replace(" 0 R", ",").replace("][", "],[") | |
| 4697 try: | |
| 4698 ve = json.loads(ve) | |
| 4699 except: | |
| 4700 print("bad /VE key: ", ve) | |
| 4701 raise | |
| 4702 return {"xref": xref, "ocgs": ocgs, "policy": policy, "ve": ve} | |
| 4703 | |
| 4704 | |
| 4705 """ | |
| 4706 Handle page labels for PDF documents. | |
| 4707 | |
| 4708 Reading | |
| 4709 ------- | |
| 4710 * compute the label of a page | |
| 4711 * find page number(s) having the given label. | |
| 4712 | |
| 4713 Writing | |
| 4714 ------- | |
| 4715 Supports setting (defining) page labels for PDF documents. | |
| 4716 | |
| 4717 A big Thank You goes to WILLIAM CHAPMAN who contributed the idea and | |
| 4718 significant parts of the following code during late December 2020 | |
| 4719 through early January 2021. | |
| 4720 """ | |
| 4721 | |
| 4722 | |
| 4723 def rule_dict(item): | |
| 4724 """Make a Python dict from a PDF page label rule. | |
| 4725 | |
| 4726 Args: | |
| 4727 item -- a tuple (pno, rule) with the start page number and the rule | |
| 4728 string like <</S/D...>>. | |
| 4729 Returns: | |
| 4730 A dict like | |
| 4731 {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int}. | |
| 4732 """ | |
| 4733 # Jorj McKie, 2021-01-06 | |
| 4734 | |
| 4735 pno, rule = item | |
| 4736 rule = rule[2:-2].split("/")[1:] # strip "<<" and ">>" | |
| 4737 d = {"startpage": pno, "prefix": "", "firstpagenum": 1} | |
| 4738 skip = False | |
| 4739 for i, item in enumerate(rule): | |
| 4740 if skip: # this item has already been processed | |
| 4741 skip = False # deactivate skipping again | |
| 4742 continue | |
| 4743 if item == "S": # style specification | |
| 4744 d["style"] = rule[i + 1] # next item has the style | |
| 4745 skip = True # do not process next item again | |
| 4746 continue | |
| 4747 if item.startswith("P"): # prefix specification: extract the string | |
| 4748 x = item[1:].replace("(", "").replace(")", "") | |
| 4749 d["prefix"] = x | |
| 4750 continue | |
| 4751 if item.startswith("St"): # start page number specification | |
| 4752 x = int(item[2:]) | |
| 4753 d["firstpagenum"] = x | |
| 4754 return d | |
| 4755 | |
| 4756 | |
| 4757 def get_label_pno(pgNo, labels): | |
| 4758 """Return the label for this page number. | |
| 4759 | |
| 4760 Args: | |
| 4761 pgNo: page number, 0-based. | |
| 4762 labels: result of doc._get_page_labels(). | |
| 4763 Returns: | |
| 4764 The label (str) of the page number. Errors return an empty string. | |
| 4765 """ | |
| 4766 # Jorj McKie, 2021-01-06 | |
| 4767 | |
| 4768 item = [x for x in labels if x[0] <= pgNo][-1] | |
| 4769 rule = rule_dict(item) | |
| 4770 prefix = rule.get("prefix", "") | |
| 4771 style = rule.get("style", "") | |
| 4772 pagenumber = pgNo - rule["startpage"] + rule["firstpagenum"] | |
| 4773 return construct_label(style, prefix, pagenumber) | |
| 4774 | |
| 4775 | |
| 4776 def get_label(page): | |
| 4777 """Return the label for this PDF page. | |
| 4778 | |
| 4779 Args: | |
| 4780 page: page object. | |
| 4781 Returns: | |
| 4782 The label (str) of the page. Errors return an empty string. | |
| 4783 """ | |
| 4784 # Jorj McKie, 2021-01-06 | |
| 4785 | |
| 4786 labels = page.parent._get_page_labels() | |
| 4787 if not labels: | |
| 4788 return "" | |
| 4789 labels.sort() | |
| 4790 return get_label_pno(page.number, labels) | |
| 4791 | |
| 4792 | |
| 4793 def get_page_numbers(doc, label, only_one=False): | |
| 4794 """Return a list of page numbers with the given label. | |
| 4795 | |
| 4796 Args: | |
| 4797 doc: PDF document object (resp. 'self'). | |
| 4798 label: (str) label. | |
| 4799 only_one: (bool) stop searching after first hit. | |
| 4800 Returns: | |
| 4801 List of page numbers having this label. | |
| 4802 """ | |
| 4803 # Jorj McKie, 2021-01-06 | |
| 4804 | |
| 4805 numbers = [] | |
| 4806 if not label: | |
| 4807 return numbers | |
| 4808 labels = doc._get_page_labels() | |
| 4809 if labels == []: | |
| 4810 return numbers | |
| 4811 for i in range(doc.page_count): | |
| 4812 plabel = get_label_pno(i, labels) | |
| 4813 if plabel == label: | |
| 4814 numbers.append(i) | |
| 4815 if only_one: | |
| 4816 break | |
| 4817 return numbers | |
| 4818 | |
| 4819 | |
| 4820 def construct_label(style, prefix, pno) -> str: | |
| 4821 """Construct a label based on style, prefix and page number.""" | |
| 4822 # William Chapman, 2021-01-06 | |
| 4823 | |
| 4824 n_str = "" | |
| 4825 if style == "D": | |
| 4826 n_str = str(pno) | |
| 4827 elif style == "r": | |
| 4828 n_str = integerToRoman(pno).lower() | |
| 4829 elif style == "R": | |
| 4830 n_str = integerToRoman(pno).upper() | |
| 4831 elif style == "a": | |
| 4832 n_str = integerToLetter(pno).lower() | |
| 4833 elif style == "A": | |
| 4834 n_str = integerToLetter(pno).upper() | |
| 4835 result = prefix + n_str | |
| 4836 return result | |
| 4837 | |
| 4838 | |
| 4839 def integerToLetter(i) -> str: | |
| 4840 """Returns letter sequence string for integer i.""" | |
| 4841 # William Chapman, Jorj McKie, 2021-01-06 | |
| 4842 | |
| 4843 ls = string.ascii_uppercase | |
| 4844 n, a = 1, i | |
| 4845 while pow(26, n) <= a: | |
| 4846 a -= int(math.pow(26, n)) | |
| 4847 n += 1 | |
| 4848 | |
| 4849 str_t = "" | |
| 4850 for j in reversed(range(n)): | |
| 4851 f, g = divmod(a, int(math.pow(26, j))) | |
| 4852 str_t += ls[f] | |
| 4853 a = g | |
| 4854 return str_t | |
| 4855 | |
| 4856 | |
| 4857 def integerToRoman(num: int) -> str: | |
| 4858 """Return roman numeral for an integer.""" | |
| 4859 # William Chapman, Jorj McKie, 2021-01-06 | |
| 4860 | |
| 4861 roman = ( | |
| 4862 (1000, "M"), | |
| 4863 (900, "CM"), | |
| 4864 (500, "D"), | |
| 4865 (400, "CD"), | |
| 4866 (100, "C"), | |
| 4867 (90, "XC"), | |
| 4868 (50, "L"), | |
| 4869 (40, "XL"), | |
| 4870 (10, "X"), | |
| 4871 (9, "IX"), | |
| 4872 (5, "V"), | |
| 4873 (4, "IV"), | |
| 4874 (1, "I"), | |
| 4875 ) | |
| 4876 | |
| 4877 def roman_num(num): | |
| 4878 for r, ltr in roman: | |
| 4879 x, _ = divmod(num, r) | |
| 4880 yield ltr * x | |
| 4881 num -= r * x | |
| 4882 if num <= 0: | |
| 4883 break | |
| 4884 | |
| 4885 return "".join([a for a in roman_num(num)]) | |
| 4886 | |
| 4887 | |
| 4888 def get_page_labels(doc): | |
| 4889 """Return page label definitions in PDF document. | |
| 4890 | |
| 4891 Args: | |
| 4892 doc: PDF document (resp. 'self'). | |
| 4893 Returns: | |
| 4894 A list of dictionaries with the following format: | |
| 4895 {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int}. | |
| 4896 """ | |
| 4897 # Jorj McKie, 2021-01-10 | |
| 4898 return [rule_dict(item) for item in doc._get_page_labels()] | |
| 4899 | |
| 4900 | |
| 4901 def set_page_labels(doc, labels): | |
| 4902 """Add / replace page label definitions in PDF document. | |
| 4903 | |
| 4904 Args: | |
| 4905 doc: PDF document (resp. 'self'). | |
| 4906 labels: list of label dictionaries like: | |
| 4907 {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int}, | |
| 4908 as returned by get_page_labels(). | |
| 4909 """ | |
| 4910 # William Chapman, 2021-01-06 | |
| 4911 | |
| 4912 def create_label_str(label): | |
| 4913 """Convert Python label dict to correspnding PDF rule string. | |
| 4914 | |
| 4915 Args: | |
| 4916 label: (dict) build rule for the label. | |
| 4917 Returns: | |
| 4918 PDF label rule string wrapped in "<<", ">>". | |
| 4919 """ | |
| 4920 s = "%i<<" % label["startpage"] | |
| 4921 if label.get("prefix", "") != "": | |
| 4922 s += "/P(%s)" % label["prefix"] | |
| 4923 if label.get("style", "") != "": | |
| 4924 s += "/S/%s" % label["style"] | |
| 4925 if label.get("firstpagenum", 1) > 1: | |
| 4926 s += "/St %i" % label["firstpagenum"] | |
| 4927 s += ">>" | |
| 4928 return s | |
| 4929 | |
| 4930 def create_nums(labels): | |
| 4931 """Return concatenated string of all labels rules. | |
| 4932 | |
| 4933 Args: | |
| 4934 labels: (list) dictionaries as created by function 'rule_dict'. | |
| 4935 Returns: | |
| 4936 PDF compatible string for page label definitions, ready to be | |
| 4937 enclosed in PDF array 'Nums[...]'. | |
| 4938 """ | |
| 4939 labels.sort(key=lambda x: x["startpage"]) | |
| 4940 s = "".join([create_label_str(label) for label in labels]) | |
| 4941 return s | |
| 4942 | |
| 4943 doc._set_page_labels(create_nums(labels)) | |
| 4944 | |
| 4945 | |
| 4946 # End of Page Label Code ------------------------------------------------- | |
| 4947 | |
| 4948 | |
| 4949 def has_links(doc: Document) -> bool: | |
| 4950 """Check whether there are links on any page.""" | |
| 4951 if doc.is_closed: | |
| 4952 raise ValueError("document closed") | |
| 4953 if not doc.is_pdf: | |
| 4954 raise ValueError("is no PDF") | |
| 4955 for i in range(doc.page_count): | |
| 4956 for item in doc.page_annot_xrefs(i): | |
| 4957 if item[1] == PDF_ANNOT_LINK: | |
| 4958 return True | |
| 4959 return False | |
| 4960 | |
| 4961 | |
| 4962 def has_annots(doc: Document) -> bool: | |
| 4963 """Check whether there are annotations on any page.""" | |
| 4964 if doc.is_closed: | |
| 4965 raise ValueError("document closed") | |
| 4966 if not doc.is_pdf: | |
| 4967 raise ValueError("is no PDF") | |
| 4968 for i in range(doc.page_count): | |
| 4969 for item in doc.page_annot_xrefs(i): | |
| 4970 if not (item[1] == PDF_ANNOT_LINK or item[1] == PDF_ANNOT_WIDGET): | |
| 4971 return True | |
| 4972 return False | |
| 4973 | |
| 4974 | |
| 4975 # ------------------------------------------------------------------- | |
| 4976 # Functions to recover the quad contained in a text extraction bbox | |
| 4977 # ------------------------------------------------------------------- | |
| 4978 def recover_bbox_quad(line_dir: tuple, span: dict, bbox: tuple) -> Quad: | |
| 4979 """Compute the quad located inside the bbox. | |
| 4980 | |
| 4981 The bbox may be any of the resp. tuples occurring inside the given span. | |
| 4982 | |
| 4983 Args: | |
| 4984 line_dir: (tuple) 'line["dir"]' of the owning line or None. | |
| 4985 span: (dict) the span. May be from get_texttrace() method. | |
| 4986 bbox: (tuple) the bbox of the span or any of its characters. | |
| 4987 Returns: | |
| 4988 The quad which is wrapped by the bbox. | |
| 4989 """ | |
| 4990 if line_dir == None: | |
| 4991 line_dir = span["dir"] | |
| 4992 cos, sin = line_dir | |
| 4993 bbox = Rect(bbox) # make it a rect | |
| 4994 if TOOLS.set_small_glyph_heights(): # ==> just fontsize as height | |
| 4995 d = 1 | |
| 4996 else: | |
| 4997 d = span["ascender"] - span["descender"] | |
| 4998 | |
| 4999 height = d * span["size"] # the quad's rectangle height | |
| 5000 # The following are distances from the bbox corners, at wich we find the | |
| 5001 # respective quad points. The computation depends on in which quadrant | |
| 5002 # the text writing angle is located. | |
| 5003 hs = height * sin | |
| 5004 hc = height * cos | |
| 5005 if hc >= 0 and hs <= 0: # quadrant 1 | |
| 5006 ul = bbox.bl - (0, hc) | |
| 5007 ur = bbox.tr + (hs, 0) | |
| 5008 ll = bbox.bl - (hs, 0) | |
| 5009 lr = bbox.tr + (0, hc) | |
| 5010 elif hc <= 0 and hs <= 0: # quadrant 2 | |
| 5011 ul = bbox.br + (hs, 0) | |
| 5012 ur = bbox.tl - (0, hc) | |
| 5013 ll = bbox.br + (0, hc) | |
| 5014 lr = bbox.tl - (hs, 0) | |
| 5015 elif hc <= 0 and hs >= 0: # quadrant 3 | |
| 5016 ul = bbox.tr - (0, hc) | |
| 5017 ur = bbox.bl + (hs, 0) | |
| 5018 ll = bbox.tr - (hs, 0) | |
| 5019 lr = bbox.bl + (0, hc) | |
| 5020 else: # quadrant 4 | |
| 5021 ul = bbox.tl + (hs, 0) | |
| 5022 ur = bbox.br - (0, hc) | |
| 5023 ll = bbox.tl + (0, hc) | |
| 5024 lr = bbox.br - (hs, 0) | |
| 5025 return Quad(ul, ur, ll, lr) | |
| 5026 | |
| 5027 | |
| 5028 def recover_quad(line_dir: tuple, span: dict) -> Quad: | |
| 5029 """Recover the quadrilateral of a text span. | |
| 5030 | |
| 5031 Args: | |
| 5032 line_dir: (tuple) 'line["dir"]' of the owning line. | |
| 5033 span: the span. | |
| 5034 Returns: | |
| 5035 The quadrilateral enveloping the span's text. | |
| 5036 """ | |
| 5037 if type(line_dir) is not tuple or len(line_dir) != 2: | |
| 5038 raise ValueError("bad line dir argument") | |
| 5039 if type(span) is not dict: | |
| 5040 raise ValueError("bad span argument") | |
| 5041 return recover_bbox_quad(line_dir, span, span["bbox"]) | |
| 5042 | |
| 5043 | |
| 5044 def recover_line_quad(line: dict, spans: list = None) -> Quad: | |
| 5045 """Calculate the line quad for 'dict' / 'rawdict' text extractions. | |
| 5046 | |
| 5047 The lower quad points are those of the first, resp. last span quad. | |
| 5048 The upper points are determined by the maximum span quad height. | |
| 5049 From this, compute a rect with bottom-left in (0, 0), convert this to a | |
| 5050 quad and rotate and shift back to cover the text of the spans. | |
| 5051 | |
| 5052 Args: | |
| 5053 spans: (list, optional) sub-list of spans to consider. | |
| 5054 Returns: | |
| 5055 Quad covering selected spans. | |
| 5056 """ | |
| 5057 if spans == None: # no sub-selection | |
| 5058 spans = line["spans"] # all spans | |
| 5059 if len(spans) == 0: | |
| 5060 raise ValueError("bad span list") | |
| 5061 line_dir = line["dir"] # text direction | |
| 5062 cos, sin = line_dir | |
| 5063 q0 = recover_quad(line_dir, spans[0]) # quad of first span | |
| 5064 if len(spans) > 1: # get quad of last span | |
| 5065 q1 = recover_quad(line_dir, spans[-1]) | |
| 5066 else: | |
| 5067 q1 = q0 # last = first | |
| 5068 | |
| 5069 line_ll = q0.ll # lower-left of line quad | |
| 5070 line_lr = q1.lr # lower-right of line quad | |
| 5071 | |
| 5072 mat0 = planish_line(line_ll, line_lr) | |
| 5073 | |
| 5074 # map base line to x-axis such that line_ll goes to (0, 0) | |
| 5075 x_lr = line_lr * mat0 | |
| 5076 | |
| 5077 small = TOOLS.set_small_glyph_heights() # small glyph heights? | |
| 5078 | |
| 5079 h = max( | |
| 5080 [s["size"] * (1 if small else (s["ascender"] - s["descender"])) for s in spans] | |
| 5081 ) | |
| 5082 | |
| 5083 line_rect = Rect(0, -h, x_lr.x, 0) # line rectangle | |
| 5084 line_quad = line_rect.quad # make it a quad and: | |
| 5085 line_quad *= ~mat0 | |
| 5086 return line_quad | |
| 5087 | |
| 5088 | |
| 5089 def recover_span_quad(line_dir: tuple, span: dict, chars: list = None) -> Quad: | |
| 5090 """Calculate the span quad for 'dict' / 'rawdict' text extractions. | |
| 5091 | |
| 5092 Notes: | |
| 5093 There are two execution paths: | |
| 5094 1. For the full span quad, the result of 'recover_quad' is returned. | |
| 5095 2. For the quad of a sub-list of characters, the char quads are | |
| 5096 computed and joined. This is only supported for the "rawdict" | |
| 5097 extraction option. | |
| 5098 | |
| 5099 Args: | |
| 5100 line_dir: (tuple) 'line["dir"]' of the owning line. | |
| 5101 span: (dict) the span. | |
| 5102 chars: (list, optional) sub-list of characters to consider. | |
| 5103 Returns: | |
| 5104 Quad covering selected characters. | |
| 5105 """ | |
| 5106 if line_dir == None: # must be a span from get_texttrace() | |
| 5107 line_dir = span["dir"] | |
| 5108 if chars == None: # no sub-selection | |
| 5109 return recover_quad(line_dir, span) | |
| 5110 if not "chars" in span.keys(): | |
| 5111 raise ValueError("need 'rawdict' option to sub-select chars") | |
| 5112 | |
| 5113 q0 = recover_char_quad(line_dir, span, chars[0]) # quad of first char | |
| 5114 if len(chars) > 1: # get quad of last char | |
| 5115 q1 = recover_char_quad(line_dir, span, chars[-1]) | |
| 5116 else: | |
| 5117 q1 = q0 # last = first | |
| 5118 | |
| 5119 span_ll = q0.ll # lower-left of span quad | |
| 5120 span_lr = q1.lr # lower-right of span quad | |
| 5121 mat0 = planish_line(span_ll, span_lr) | |
| 5122 # map base line to x-axis such that span_ll goes to (0, 0) | |
| 5123 x_lr = span_lr * mat0 | |
| 5124 | |
| 5125 small = TOOLS.set_small_glyph_heights() # small glyph heights? | |
| 5126 h = span["size"] * (1 if small else (span["ascender"] - span["descender"])) | |
| 5127 | |
| 5128 span_rect = Rect(0, -h, x_lr.x, 0) # line rectangle | |
| 5129 span_quad = span_rect.quad # make it a quad and: | |
| 5130 span_quad *= ~mat0 # rotate back and shift back | |
| 5131 return span_quad | |
| 5132 | |
| 5133 | |
| 5134 def recover_char_quad(line_dir: tuple, span: dict, char: dict) -> Quad: | |
| 5135 """Recover the quadrilateral of a text character. | |
| 5136 | |
| 5137 This requires the "rawdict" option of text extraction. | |
| 5138 | |
| 5139 Args: | |
| 5140 line_dir: (tuple) 'line["dir"]' of the span's line. | |
| 5141 span: (dict) the span dict. | |
| 5142 char: (dict) the character dict. | |
| 5143 Returns: | |
| 5144 The quadrilateral enveloping the character. | |
| 5145 """ | |
| 5146 if line_dir == None: | |
| 5147 line_dir = span["dir"] | |
| 5148 if type(line_dir) is not tuple or len(line_dir) != 2: | |
| 5149 raise ValueError("bad line dir argument") | |
| 5150 if type(span) is not dict: | |
| 5151 raise ValueError("bad span argument") | |
| 5152 if type(char) is dict: | |
| 5153 bbox = Rect(char["bbox"]) | |
| 5154 elif type(char) is tuple: | |
| 5155 bbox = Rect(char[3]) | |
| 5156 else: | |
| 5157 raise ValueError("bad span argument") | |
| 5158 | |
| 5159 return recover_bbox_quad(line_dir, span, bbox) | |
| 5160 | |
| 5161 | |
| 5162 # ------------------------------------------------------------------- | |
| 5163 # Building font subsets using fontTools | |
| 5164 # ------------------------------------------------------------------- | |
| 5165 def subset_fonts(doc: Document, verbose: bool = False) -> None: | |
| 5166 """Build font subsets of a PDF. Requires package 'fontTools'. | |
| 5167 | |
| 5168 Eligible fonts are potentially replaced by smaller versions. Page text is | |
| 5169 NOT rewritten and thus should retain properties like being hidden or | |
| 5170 controlled by optional content. | |
| 5171 """ | |
| 5172 # Font binaries: - "buffer" -> (names, xrefs, (unicodes, glyphs)) | |
| 5173 # An embedded font is uniquely defined by its fontbuffer only. It may have | |
| 5174 # multiple names and xrefs. | |
| 5175 # Once the sets of used unicodes and glyphs are known, we compute a | |
| 5176 # smaller version of the buffer user package fontTools. | |
| 5177 font_buffers = {} | |
| 5178 | |
| 5179 def get_old_widths(xref): | |
| 5180 """Retrieve old font '/W' and '/DW' values.""" | |
| 5181 df = doc.xref_get_key(xref, "DescendantFonts") | |
| 5182 if df[0] != "array": # only handle xref specifications | |
| 5183 return None, None | |
| 5184 df_xref = int(df[1][1:-1].replace("0 R", "")) | |
| 5185 widths = doc.xref_get_key(df_xref, "W") | |
| 5186 if widths[0] != "array": # no widths key found | |
| 5187 widths = None | |
| 5188 else: | |
| 5189 widths = widths[1] | |
| 5190 dwidths = doc.xref_get_key(df_xref, "DW") | |
| 5191 if dwidths[0] != "int": | |
| 5192 dwidths = None | |
| 5193 else: | |
| 5194 dwidths = dwidths[1] | |
| 5195 return widths, dwidths | |
| 5196 | |
| 5197 def set_old_widths(xref, widths, dwidths): | |
| 5198 """Restore the old '/W' and '/DW' in subsetted font. | |
| 5199 | |
| 5200 If either parameter is None or evaluates to False, the corresponding | |
| 5201 dictionary key will be set to null. | |
| 5202 """ | |
| 5203 df = doc.xref_get_key(xref, "DescendantFonts") | |
| 5204 if df[0] != "array": # only handle xref specs | |
| 5205 return None | |
| 5206 df_xref = int(df[1][1:-1].replace("0 R", "")) | |
| 5207 if (type(widths) is not str or not widths) and doc.xref_get_key(df_xref, "W")[ | |
| 5208 0 | |
| 5209 ] != "null": | |
| 5210 doc.xref_set_key(df_xref, "W", "null") | |
| 5211 else: | |
| 5212 doc.xref_set_key(df_xref, "W", widths) | |
| 5213 if (type(dwidths) is not str or not dwidths) and doc.xref_get_key( | |
| 5214 df_xref, "DW" | |
| 5215 )[0] != "null": | |
| 5216 doc.xref_set_key(df_xref, "DW", "null") | |
| 5217 else: | |
| 5218 doc.xref_set_key(df_xref, "DW", dwidths) | |
| 5219 return None | |
| 5220 | |
| 5221 def set_subset_fontname(new_xref): | |
| 5222 """Generate a name prefix to tag a font as subset. | |
| 5223 | |
| 5224 We use a random generator to select 6 upper case ASCII characters. | |
| 5225 The prefixed name must be put in the font xref as the "/BaseFont" value | |
| 5226 and in the FontDescriptor object as the '/FontName' value. | |
| 5227 """ | |
| 5228 # The following generates a prefix like 'ABCDEF+' | |
| 5229 prefix = "".join(random.choices(tuple(string.ascii_uppercase), k=6)) + "+" | |
| 5230 font_str = doc.xref_object(new_xref, compressed=True) | |
| 5231 font_str = font_str.replace("/BaseFont/", "/BaseFont/" + prefix) | |
| 5232 df = doc.xref_get_key(new_xref, "DescendantFonts") | |
| 5233 if df[0] == "array": | |
| 5234 df_xref = int(df[1][1:-1].replace("0 R", "")) | |
| 5235 fd = doc.xref_get_key(df_xref, "FontDescriptor") | |
| 5236 if fd[0] == "xref": | |
| 5237 fd_xref = int(fd[1].replace("0 R", "")) | |
| 5238 fd_str = doc.xref_object(fd_xref, compressed=True) | |
| 5239 fd_str = fd_str.replace("/FontName/", "/FontName/" + prefix) | |
| 5240 doc.update_object(fd_xref, fd_str) | |
| 5241 doc.update_object(new_xref, font_str) | |
| 5242 return None | |
| 5243 | |
| 5244 def build_subset(buffer, unc_set, gid_set): | |
| 5245 """Build font subset using fontTools. | |
| 5246 | |
| 5247 Args: | |
| 5248 buffer: (bytes) the font given as a binary buffer. | |
| 5249 unc_set: (set) required glyph ids. | |
| 5250 Returns: | |
| 5251 Either None if subsetting is unsuccessful or the subset font buffer. | |
| 5252 """ | |
| 5253 try: | |
| 5254 import fontTools.subset as fts | |
| 5255 except ImportError: | |
| 5256 print("This method requires fontTools to be installed.") | |
| 5257 raise | |
| 5258 tmp_dir = tempfile.gettempdir() | |
| 5259 oldfont_path = f"{tmp_dir}/oldfont.ttf" | |
| 5260 newfont_path = f"{tmp_dir}/newfont.ttf" | |
| 5261 uncfile_path = f"{tmp_dir}/uncfile.txt" | |
| 5262 args = [ | |
| 5263 oldfont_path, | |
| 5264 "--retain-gids", | |
| 5265 f"--output-file={newfont_path}", | |
| 5266 "--layout-features='*'", | |
| 5267 "--passthrough-tables", | |
| 5268 "--ignore-missing-glyphs", | |
| 5269 "--ignore-missing-unicodes", | |
| 5270 "--symbol-cmap", | |
| 5271 ] | |
| 5272 | |
| 5273 unc_file = open( | |
| 5274 f"{tmp_dir}/uncfile.txt", "w" | |
| 5275 ) # store glyph ids or unicodes as file | |
| 5276 if 0xFFFD in unc_set: # error unicode exists -> use glyphs | |
| 5277 args.append(f"--gids-file={uncfile_path}") | |
| 5278 gid_set.add(189) | |
| 5279 unc_list = list(gid_set) | |
| 5280 for unc in unc_list: | |
| 5281 unc_file.write("%i\n" % unc) | |
| 5282 else: | |
| 5283 args.append(f"--unicodes-file={uncfile_path}") | |
| 5284 unc_set.add(255) | |
| 5285 unc_list = list(unc_set) | |
| 5286 for unc in unc_list: | |
| 5287 unc_file.write("%04x\n" % unc) | |
| 5288 | |
| 5289 unc_file.close() | |
| 5290 fontfile = open(oldfont_path, "wb") # store fontbuffer as a file | |
| 5291 fontfile.write(buffer) | |
| 5292 fontfile.close() | |
| 5293 try: | |
| 5294 os.remove(newfont_path) # remove old file | |
| 5295 except: | |
| 5296 pass | |
| 5297 try: # invoke fontTools subsetter | |
| 5298 fts.main(args) | |
| 5299 font = Font(fontfile=newfont_path) | |
| 5300 new_buffer = font.buffer | |
| 5301 if len(font.valid_codepoints()) == 0: | |
| 5302 new_buffer = None | |
| 5303 except: | |
| 5304 new_buffer = None | |
| 5305 try: | |
| 5306 os.remove(uncfile_path) | |
| 5307 except: | |
| 5308 pass | |
| 5309 try: | |
| 5310 os.remove(oldfont_path) | |
| 5311 except: | |
| 5312 pass | |
| 5313 try: | |
| 5314 os.remove(newfont_path) | |
| 5315 except: | |
| 5316 pass | |
| 5317 return new_buffer | |
| 5318 | |
| 5319 def repl_fontnames(doc): | |
| 5320 """Populate 'font_buffers'. | |
| 5321 | |
| 5322 For each font candidate, store its xref and the list of names | |
| 5323 by which PDF text may refer to it (there may be multiple). | |
| 5324 """ | |
| 5325 | |
| 5326 def norm_name(name): | |
| 5327 """Recreate font name that contains PDF hex codes. | |
| 5328 | |
| 5329 E.g. #20 -> space, chr(32) | |
| 5330 """ | |
| 5331 while "#" in name: | |
| 5332 p = name.find("#") | |
| 5333 c = int(name[p + 1 : p + 3], 16) | |
| 5334 name = name.replace(name[p : p + 3], chr(c)) | |
| 5335 return name | |
| 5336 | |
| 5337 def get_fontnames(doc, item): | |
| 5338 """Return a list of fontnames for an item of page.get_fonts(). | |
| 5339 | |
| 5340 There may be multiple names e.g. for Type0 fonts. | |
| 5341 """ | |
| 5342 fontname = item[3] | |
| 5343 names = [fontname] | |
| 5344 fontname = doc.xref_get_key(item[0], "BaseFont")[1][1:] | |
| 5345 fontname = norm_name(fontname) | |
| 5346 if fontname not in names: | |
| 5347 names.append(fontname) | |
| 5348 descendents = doc.xref_get_key(item[0], "DescendantFonts") | |
| 5349 if descendents[0] != "array": | |
| 5350 return names | |
| 5351 descendents = descendents[1][1:-1] | |
| 5352 if descendents.endswith(" 0 R"): | |
| 5353 xref = int(descendents[:-4]) | |
| 5354 descendents = doc.xref_object(xref, compressed=True) | |
| 5355 p1 = descendents.find("/BaseFont") | |
| 5356 if p1 >= 0: | |
| 5357 p2 = descendents.find("/", p1 + 1) | |
| 5358 p1 = min(descendents.find("/", p2 + 1), descendents.find(">>", p2 + 1)) | |
| 5359 fontname = descendents[p2 + 1 : p1] | |
| 5360 fontname = norm_name(fontname) | |
| 5361 if fontname not in names: | |
| 5362 names.append(fontname) | |
| 5363 return names | |
| 5364 | |
| 5365 for i in range(doc.page_count): | |
| 5366 for f in doc.get_page_fonts(i, full=True): | |
| 5367 font_xref = f[0] # font xref | |
| 5368 font_ext = f[1] # font file extension | |
| 5369 basename = f[3] # font basename | |
| 5370 | |
| 5371 if font_ext not in ( # skip if not supported by fontTools | |
| 5372 "otf", | |
| 5373 "ttf", | |
| 5374 "woff", | |
| 5375 "woff2", | |
| 5376 ): | |
| 5377 continue | |
| 5378 # skip fonts which already are subsets | |
| 5379 if len(basename) > 6 and basename[6] == "+": | |
| 5380 continue | |
| 5381 | |
| 5382 extr = doc.extract_font(font_xref) | |
| 5383 fontbuffer = extr[-1] | |
| 5384 names = get_fontnames(doc, f) | |
| 5385 name_set, xref_set, subsets = font_buffers.get( | |
| 5386 fontbuffer, (set(), set(), (set(), set())) | |
| 5387 ) | |
| 5388 xref_set.add(font_xref) | |
| 5389 for name in names: | |
| 5390 name_set.add(name) | |
| 5391 font = Font(fontbuffer=fontbuffer) | |
| 5392 name_set.add(font.name) | |
| 5393 del font | |
| 5394 font_buffers[fontbuffer] = (name_set, xref_set, subsets) | |
| 5395 return None | |
| 5396 | |
| 5397 def find_buffer_by_name(name): | |
| 5398 for buffer in font_buffers.keys(): | |
| 5399 name_set, _, _ = font_buffers[buffer] | |
| 5400 if name in name_set: | |
| 5401 return buffer | |
| 5402 return None | |
| 5403 | |
| 5404 # ----------------- | |
| 5405 # main function | |
| 5406 # ----------------- | |
| 5407 repl_fontnames(doc) # populate font information | |
| 5408 if not font_buffers: # nothing found to do | |
| 5409 if verbose: | |
| 5410 print("No fonts to subset.") | |
| 5411 return 0 | |
| 5412 | |
| 5413 old_fontsize = 0 | |
| 5414 new_fontsize = 0 | |
| 5415 for fontbuffer in font_buffers.keys(): | |
| 5416 old_fontsize += len(fontbuffer) | |
| 5417 | |
| 5418 # Scan page text for usage of subsettable fonts | |
| 5419 for page in doc: | |
| 5420 # go through the text and extend set of used glyphs by font | |
| 5421 # we use a modified MuPDF trace device, which delivers us glyph ids. | |
| 5422 for span in page.get_texttrace(): | |
| 5423 if type(span) is not dict: # skip useless information | |
| 5424 continue | |
| 5425 fontname = span["font"][:33] # fontname for the span | |
| 5426 buffer = find_buffer_by_name(fontname) | |
| 5427 if buffer is None: | |
| 5428 continue | |
| 5429 name_set, xref_set, (set_ucs, set_gid) = font_buffers[buffer] | |
| 5430 for c in span["chars"]: | |
| 5431 set_ucs.add(c[0]) # unicode | |
| 5432 set_gid.add(c[1]) # glyph id | |
| 5433 font_buffers[buffer] = (name_set, xref_set, (set_ucs, set_gid)) | |
| 5434 | |
| 5435 # build the font subsets | |
| 5436 for old_buffer in font_buffers.keys(): | |
| 5437 name_set, xref_set, subsets = font_buffers[old_buffer] | |
| 5438 new_buffer = build_subset(old_buffer, subsets[0], subsets[1]) | |
| 5439 fontname = list(name_set)[0] | |
| 5440 if new_buffer == None or len(new_buffer) >= len(old_buffer): | |
| 5441 # subset was not created or did not get smaller | |
| 5442 if verbose: | |
| 5443 print(f"Cannot subset '{fontname}'.") | |
| 5444 continue | |
| 5445 if verbose: | |
| 5446 print(f"Built subset of font '{fontname}'.") | |
| 5447 val = doc._insert_font(fontbuffer=new_buffer) # store subset font in PDF | |
| 5448 new_xref = val[0] # get its xref | |
| 5449 set_subset_fontname(new_xref) # tag fontname as subset font | |
| 5450 font_str = doc.xref_object( # get its object definition | |
| 5451 new_xref, | |
| 5452 compressed=True, | |
| 5453 ) | |
| 5454 # walk through the original font xrefs and replace each by the subset def | |
| 5455 for font_xref in xref_set: | |
| 5456 # we need the original '/W' and '/DW' width values | |
| 5457 width_table, def_width = get_old_widths(font_xref) | |
| 5458 # ... and replace original font definition at xref with it | |
| 5459 doc.update_object(font_xref, font_str) | |
| 5460 # now copy over old '/W' and '/DW' values | |
| 5461 if width_table or def_width: | |
| 5462 set_old_widths(font_xref, width_table, def_width) | |
| 5463 # 'new_xref' remains unused in the PDF and must be removed | |
| 5464 # by garbage collection. | |
| 5465 new_fontsize += len(new_buffer) | |
| 5466 | |
| 5467 return old_fontsize - new_fontsize | |
| 5468 | |
| 5469 | |
| 5470 # ------------------------------------------------------------------- | |
| 5471 # Copy XREF object to another XREF | |
| 5472 # ------------------------------------------------------------------- | |
| 5473 def xref_copy(doc: Document, source: int, target: int, *, keep: list = None) -> None: | |
| 5474 """Copy a PDF dictionary object to another one given their xref numbers. | |
| 5475 | |
| 5476 Args: | |
| 5477 doc: PDF document object | |
| 5478 source: source xref number | |
| 5479 target: target xref number, the xref must already exist | |
| 5480 keep: an optional list of 1st level keys in target that should not be | |
| 5481 removed before copying. | |
| 5482 Notes: | |
| 5483 This works similar to the copy() method of dictionaries in Python. The | |
| 5484 source may be a stream object. | |
| 5485 """ | |
| 5486 if doc.xref_is_stream(source): | |
| 5487 # read new xref stream, maintaining compression | |
| 5488 stream = doc.xref_stream_raw(source) | |
| 5489 doc.update_stream( | |
| 5490 target, | |
| 5491 stream, | |
| 5492 compress=False, # keeps source compression | |
| 5493 new=True, # in case target is no stream | |
| 5494 ) | |
| 5495 | |
| 5496 # empty the target completely, observe exceptions | |
| 5497 if keep is None: | |
| 5498 keep = [] | |
| 5499 for key in doc.xref_get_keys(target): | |
| 5500 if key in keep: | |
| 5501 continue | |
| 5502 doc.xref_set_key(target, key, "null") | |
| 5503 # copy over all source dict items | |
| 5504 for key in doc.xref_get_keys(source): | |
| 5505 item = doc.xref_get_key(source, key) | |
| 5506 doc.xref_set_key(target, key, item[1]) | |
| 5507 return None |
