comparison src_classic/utils.py @ 3:2c135c81b16c

MERGE: upstream PyMuPDF 1.26.4 with MuPDF 1.26.7
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:44:09 +0200
parents 1d09e1dec1d9
children
comparison
equal deleted inserted replaced
0:6015a75abc2d 3:2c135c81b16c
1 # ------------------------------------------------------------------------
2 # Copyright 2020-2022, Harald Lieder, mailto:harald.lieder@outlook.com
3 # License: GNU AFFERO GPL 3.0, https://www.gnu.org/licenses/agpl-3.0.html
4 #
5 # Part of "PyMuPDF", a Python binding for "MuPDF" (http://mupdf.com), a
6 # lightweight PDF, XPS, and E-book viewer, renderer and toolkit which is
7 # maintained and developed by Artifex Software, Inc. https://artifex.com.
8 # ------------------------------------------------------------------------
9 import io
10 import json
11 import math
12 import os
13 import random
14 import string
15 import tempfile
16 import typing
17 import warnings
18
19 from fitz_old import *
20
21 TESSDATA_PREFIX = os.getenv("TESSDATA_PREFIX")
22 point_like = "point_like"
23 rect_like = "rect_like"
24 matrix_like = "matrix_like"
25 quad_like = "quad_like"
26
27 # ByteString is gone from typing in 3.14.
28 # collections.abc.Buffer available from 3.12 only
29 try:
30 ByteString = typing.ByteString
31 except AttributeError:
32 ByteString = bytes | bytearray | memoryview
33
34 AnyType = typing.Any
35 OptInt = typing.Union[int, None]
36 OptFloat = typing.Optional[float]
37 OptStr = typing.Optional[str]
38 OptDict = typing.Optional[dict]
39 OptBytes = typing.Optional[ByteString]
40 OptSeq = typing.Optional[typing.Sequence]
41
42 """
43 This is a collection of functions to extend PyMupdf.
44 """
45
46
47 def write_text(page: Page, **kwargs) -> None:
48 """Write the text of one or more TextWriter objects.
49
50 Args:
51 rect: target rectangle. If None, the union of the text writers is used.
52 writers: one or more TextWriter objects.
53 overlay: put in foreground or background.
54 keep_proportion: maintain aspect ratio of rectangle sides.
55 rotate: arbitrary rotation angle.
56 oc: the xref of an optional content object
57 """
58 if type(page) is not Page:
59 raise ValueError("bad page parameter")
60 s = {
61 k
62 for k in kwargs.keys()
63 if k
64 not in {
65 "rect",
66 "writers",
67 "opacity",
68 "color",
69 "overlay",
70 "keep_proportion",
71 "rotate",
72 "oc",
73 }
74 }
75 if s != set():
76 raise ValueError("bad keywords: " + str(s))
77
78 rect = kwargs.get("rect")
79 writers = kwargs.get("writers")
80 opacity = kwargs.get("opacity")
81 color = kwargs.get("color")
82 overlay = bool(kwargs.get("overlay", True))
83 keep_proportion = bool(kwargs.get("keep_proportion", True))
84 rotate = int(kwargs.get("rotate", 0))
85 oc = int(kwargs.get("oc", 0))
86
87 if not writers:
88 raise ValueError("need at least one TextWriter")
89 if type(writers) is TextWriter:
90 if rotate == 0 and rect is None:
91 writers.write_text(page, opacity=opacity, color=color, overlay=overlay)
92 return None
93 else:
94 writers = (writers,)
95 clip = writers[0].text_rect
96 textdoc = Document()
97 tpage = textdoc.new_page(width=page.rect.width, height=page.rect.height)
98 for writer in writers:
99 clip |= writer.text_rect
100 writer.write_text(tpage, opacity=opacity, color=color)
101 if rect is None:
102 rect = clip
103 page.show_pdf_page(
104 rect,
105 textdoc,
106 0,
107 overlay=overlay,
108 keep_proportion=keep_proportion,
109 rotate=rotate,
110 clip=clip,
111 oc=oc,
112 )
113 textdoc = None
114 tpage = None
115
116
117 def show_pdf_page(*args, **kwargs) -> int:
118 """Show page number 'pno' of PDF 'src' in rectangle 'rect'.
119
120 Args:
121 rect: (rect-like) where to place the source image
122 src: (document) source PDF
123 pno: (int) source page number
124 overlay: (bool) put in foreground
125 keep_proportion: (bool) do not change width-height-ratio
126 rotate: (int) degrees (multiple of 90)
127 clip: (rect-like) part of source page rectangle
128 Returns:
129 xref of inserted object (for reuse)
130 """
131 if len(args) not in (3, 4):
132 raise ValueError("bad number of positional parameters")
133 pno = None
134 if len(args) == 3:
135 page, rect, src = args
136 else:
137 page, rect, src, pno = args
138 if pno == None:
139 pno = int(kwargs.get("pno", 0))
140 overlay = bool(kwargs.get("overlay", True))
141 keep_proportion = bool(kwargs.get("keep_proportion", True))
142 rotate = float(kwargs.get("rotate", 0))
143 oc = int(kwargs.get("oc", 0))
144 clip = kwargs.get("clip")
145
146 def calc_matrix(sr, tr, keep=True, rotate=0):
147 """Calculate transformation matrix from source to target rect.
148
149 Notes:
150 The product of four matrices in this sequence: (1) translate correct
151 source corner to origin, (2) rotate, (3) scale, (4) translate to
152 target's top-left corner.
153 Args:
154 sr: source rect in PDF (!) coordinate system
155 tr: target rect in PDF coordinate system
156 keep: whether to keep source ratio of width to height
157 rotate: rotation angle in degrees
158 Returns:
159 Transformation matrix.
160 """
161 # calc center point of source rect
162 smp = (sr.tl + sr.br) / 2.0
163 # calc center point of target rect
164 tmp = (tr.tl + tr.br) / 2.0
165
166 # m moves to (0, 0), then rotates
167 m = Matrix(1, 0, 0, 1, -smp.x, -smp.y) * Matrix(rotate)
168
169 sr1 = sr * m # resulting source rect to calculate scale factors
170
171 fw = tr.width / sr1.width # scale the width
172 fh = tr.height / sr1.height # scale the height
173 if keep:
174 fw = fh = min(fw, fh) # take min if keeping aspect ratio
175
176 m *= Matrix(fw, fh) # concat scale matrix
177 m *= Matrix(1, 0, 0, 1, tmp.x, tmp.y) # concat move to target center
178 return JM_TUPLE(m)
179
180 CheckParent(page)
181 doc = page.parent
182
183 if not doc.is_pdf or not src.is_pdf:
184 raise ValueError("is no PDF")
185
186 if rect.is_empty or rect.is_infinite:
187 raise ValueError("rect must be finite and not empty")
188
189 while pno < 0: # support negative page numbers
190 pno += src.page_count
191 src_page = src[pno] # load source page
192 if src_page.get_contents() == []:
193 raise ValueError("nothing to show - source page empty")
194
195 tar_rect = rect * ~page.transformation_matrix # target rect in PDF coordinates
196
197 src_rect = src_page.rect if not clip else src_page.rect & clip # source rect
198 if src_rect.is_empty or src_rect.is_infinite:
199 raise ValueError("clip must be finite and not empty")
200 src_rect = src_rect * ~src_page.transformation_matrix # ... in PDF coord
201
202 matrix = calc_matrix(src_rect, tar_rect, keep=keep_proportion, rotate=rotate)
203
204 # list of existing /Form /XObjects
205 ilst = [i[1] for i in doc.get_page_xobjects(page.number)]
206 ilst += [i[7] for i in doc.get_page_images(page.number)]
207 ilst += [i[4] for i in doc.get_page_fonts(page.number)]
208
209 # create a name not in that list
210 n = "fzFrm"
211 i = 0
212 _imgname = n + "0"
213 while _imgname in ilst:
214 i += 1
215 _imgname = n + str(i)
216
217 isrc = src._graft_id # used as key for graftmaps
218 if doc._graft_id == isrc:
219 raise ValueError("source document must not equal target")
220
221 # retrieve / make Graftmap for source PDF
222 gmap = doc.Graftmaps.get(isrc, None)
223 if gmap is None:
224 gmap = Graftmap(doc)
225 doc.Graftmaps[isrc] = gmap
226
227 # take note of generated xref for automatic reuse
228 pno_id = (isrc, pno) # id of src[pno]
229 xref = doc.ShownPages.get(pno_id, 0)
230
231 xref = page._show_pdf_page(
232 src_page,
233 overlay=overlay,
234 matrix=matrix,
235 xref=xref,
236 oc=oc,
237 clip=src_rect,
238 graftmap=gmap,
239 _imgname=_imgname,
240 )
241 doc.ShownPages[pno_id] = xref
242
243 return xref
244
245
246 def replace_image(page: Page, xref: int, *, filename=None, pixmap=None, stream=None):
247 """Replace the image referred to by xref.
248
249 Replace the image by changing the object definition stored under xref. This
250 will leave the pages appearance instructions intact, so the new image is
251 being displayed with the same bbox, rotation etc.
252 By providing a small fully transparent image, an effect as if the image had
253 been deleted can be achieved.
254 A typical use may include replacing large images by a smaller version,
255 e.g. with a lower resolution or graylevel instead of colored.
256
257 Args:
258 xref: the xref of the image to replace.
259 filename, pixmap, stream: exactly one of these must be provided. The
260 meaning being the same as in Page.insert_image.
261 """
262 doc = page.parent # the owning document
263 if not doc.xref_is_image(xref):
264 raise ValueError("xref not an image") # insert new image anywhere in page
265 if bool(filename) + bool(stream) + bool(pixmap) != 1:
266 raise ValueError("Exactly one of filename/stream/pixmap must be given")
267 new_xref = page.insert_image(
268 page.rect, filename=filename, stream=stream, pixmap=pixmap
269 )
270 doc.xref_copy(new_xref, xref) # copy over new to old
271 last_contents_xref = page.get_contents()[-1]
272 # new image insertion has created a new /Contents source,
273 # which we will set to spaces now
274 doc.update_stream(last_contents_xref, b" ")
275
276
277 def delete_image(page: Page, xref: int):
278 """Delete the image referred to by xef.
279
280 Actually replaces by a small transparent Pixmap using method Page.replace_image.
281
282 Args:
283 xref: xref of the image to delete.
284 """
285 # make a small 100% transparent pixmap (of just any dimension)
286 pix = fitz_old.Pixmap(fitz_old.csGRAY, (0, 0, 1, 1), 1)
287 pix.clear_with() # clear all samples bytes to 0x00
288 page.replace_image(xref, pixmap=pix)
289
290
291 def insert_image(page, rect, **kwargs):
292 """Insert an image for display in a rectangle.
293
294 Args:
295 rect: (rect_like) position of image on the page.
296 alpha: (int, optional) set to 0 if image has no transparency.
297 filename: (str, Path, file object) image filename.
298 keep_proportion: (bool) keep width / height ratio (default).
299 mask: (bytes, optional) image consisting of alpha values to use.
300 oc: (int) xref of OCG or OCMD to declare as Optional Content.
301 overlay: (bool) put in foreground (default) or background.
302 pixmap: (Pixmap) use this as image.
303 rotate: (int) rotate by 0, 90, 180 or 270 degrees.
304 stream: (bytes) use this as image.
305 xref: (int) use this as image.
306
307 'page' and 'rect' are positional, all other parameters are keywords.
308
309 If 'xref' is given, that image is used. Other input options are ignored.
310 Else, exactly one of pixmap, stream or filename must be given.
311
312 'alpha=0' for non-transparent images improves performance significantly.
313 Affects stream and filename only.
314
315 Optimum transparent insertions are possible by using filename / stream in
316 conjunction with a 'mask' image of alpha values.
317
318 Returns:
319 xref (int) of inserted image. Re-use as argument for multiple insertions.
320 """
321 CheckParent(page)
322 doc = page.parent
323 if not doc.is_pdf:
324 raise ValueError("is no PDF")
325
326 valid_keys = {
327 "alpha",
328 "filename",
329 "height",
330 "keep_proportion",
331 "mask",
332 "oc",
333 "overlay",
334 "pixmap",
335 "rotate",
336 "stream",
337 "width",
338 "xref",
339 }
340 s = set(kwargs.keys()).difference(valid_keys)
341 if s != set():
342 raise ValueError(f"bad key argument(s): {s}.")
343 filename = kwargs.get("filename")
344 pixmap = kwargs.get("pixmap")
345 stream = kwargs.get("stream")
346 mask = kwargs.get("mask")
347 rotate = int(kwargs.get("rotate", 0))
348 width = int(kwargs.get("width", 0))
349 height = int(kwargs.get("height", 0))
350 alpha = int(kwargs.get("alpha", -1))
351 oc = int(kwargs.get("oc", 0))
352 xref = int(kwargs.get("xref", 0))
353 keep_proportion = bool(kwargs.get("keep_proportion", True))
354 overlay = bool(kwargs.get("overlay", True))
355
356 if xref == 0 and (bool(filename) + bool(stream) + bool(pixmap) != 1):
357 raise ValueError("xref=0 needs exactly one of filename, pixmap, stream")
358
359 if filename:
360 if type(filename) is str:
361 pass
362 elif hasattr(filename, "absolute"):
363 filename = str(filename)
364 elif hasattr(filename, "name"):
365 filename = filename.name
366 else:
367 raise ValueError("bad filename")
368
369 if filename and not os.path.exists(filename):
370 raise FileNotFoundError("No such file: '%s'" % filename)
371 elif stream and type(stream) not in (bytes, bytearray, io.BytesIO):
372 raise ValueError("stream must be bytes-like / BytesIO")
373 elif pixmap and type(pixmap) is not Pixmap:
374 raise ValueError("pixmap must be a Pixmap")
375 if mask and not (stream or filename):
376 raise ValueError("mask requires stream or filename")
377 if mask and type(mask) not in (bytes, bytearray, io.BytesIO):
378 raise ValueError("mask must be bytes-like / BytesIO")
379 while rotate < 0:
380 rotate += 360
381 while rotate >= 360:
382 rotate -= 360
383 if rotate not in (0, 90, 180, 270):
384 raise ValueError("bad rotate value")
385
386 r = Rect(rect)
387 if r.is_empty or r.is_infinite:
388 raise ValueError("rect must be finite and not empty")
389 clip = r * ~page.transformation_matrix
390
391 # Create a unique image reference name.
392 ilst = [i[7] for i in doc.get_page_images(page.number)]
393 ilst += [i[1] for i in doc.get_page_xobjects(page.number)]
394 ilst += [i[4] for i in doc.get_page_fonts(page.number)]
395 n = "fzImg" # 'fitz image'
396 i = 0
397 _imgname = n + "0" # first name candidate
398 while _imgname in ilst:
399 i += 1
400 _imgname = n + str(i) # try new name
401
402 digests = doc.InsertedImages
403 xref, digests = page._insert_image(
404 filename=filename,
405 pixmap=pixmap,
406 stream=stream,
407 imask=mask,
408 clip=clip,
409 overlay=overlay,
410 oc=oc,
411 xref=xref,
412 rotate=rotate,
413 keep_proportion=keep_proportion,
414 width=width,
415 height=height,
416 alpha=alpha,
417 _imgname=_imgname,
418 digests=digests,
419 )
420 if digests != None:
421 doc.InsertedImages = digests
422
423 return xref
424
425
426 def search_for(*args, **kwargs) -> list:
427 """Search for a string on a page.
428
429 Args:
430 text: string to be searched for
431 clip: restrict search to this rectangle
432 quads: (bool) return quads instead of rectangles
433 flags: bit switches, default: join hyphened words
434 textpage: a pre-created TextPage
435 Returns:
436 a list of rectangles or quads, each containing one occurrence.
437 """
438 if len(args) != 2:
439 raise ValueError("bad number of positional parameters")
440 page, text = args
441 quads = kwargs.get("quads", 0)
442 clip = kwargs.get("clip")
443 textpage = kwargs.get("textpage")
444 if clip != None:
445 clip = Rect(clip)
446 flags = kwargs.get(
447 "flags",
448 TEXT_DEHYPHENATE
449 | TEXT_PRESERVE_WHITESPACE
450 | TEXT_PRESERVE_LIGATURES
451 | TEXT_MEDIABOX_CLIP,
452 )
453
454 CheckParent(page)
455 tp = textpage
456 if tp is None:
457 tp = page.get_textpage(clip=clip, flags=flags) # create TextPage
458 elif getattr(tp, "parent") != page:
459 raise ValueError("not a textpage of this page")
460 rlist = tp.search(text, quads=quads)
461 if textpage is None:
462 del tp
463 return rlist
464
465
466 def search_page_for(
467 doc: Document,
468 pno: int,
469 text: str,
470 quads: bool = False,
471 clip: rect_like = None,
472 flags: int = TEXT_DEHYPHENATE
473 | TEXT_PRESERVE_LIGATURES
474 | TEXT_PRESERVE_WHITESPACE
475 | TEXT_MEDIABOX_CLIP,
476 textpage: TextPage = None,
477 ) -> list:
478 """Search for a string on a page.
479
480 Args:
481 pno: page number
482 text: string to be searched for
483 clip: restrict search to this rectangle
484 quads: (bool) return quads instead of rectangles
485 flags: bit switches, default: join hyphened words
486 textpage: reuse a prepared textpage
487 Returns:
488 a list of rectangles or quads, each containing an occurrence.
489 """
490
491 return doc[pno].search_for(
492 text,
493 quads=quads,
494 clip=clip,
495 flags=flags,
496 textpage=textpage,
497 )
498
499
500 def get_text_blocks(
501 page: Page,
502 clip: rect_like = None,
503 flags: OptInt = None,
504 textpage: TextPage = None,
505 sort: bool = False,
506 ) -> list:
507 """Return the text blocks on a page.
508
509 Notes:
510 Lines in a block are concatenated with line breaks.
511 Args:
512 flags: (int) control the amount of data parsed into the textpage.
513 Returns:
514 A list of the blocks. Each item contains the containing rectangle
515 coordinates, text lines, block type and running block number.
516 """
517 CheckParent(page)
518 if flags is None:
519 flags = (
520 TEXT_PRESERVE_WHITESPACE
521 | TEXT_PRESERVE_IMAGES
522 | TEXT_PRESERVE_LIGATURES
523 | TEXT_MEDIABOX_CLIP
524 )
525 tp = textpage
526 if tp is None:
527 tp = page.get_textpage(clip=clip, flags=flags)
528 elif getattr(tp, "parent") != page:
529 raise ValueError("not a textpage of this page")
530
531 blocks = tp.extractBLOCKS()
532 if textpage is None:
533 del tp
534 if sort is True:
535 blocks.sort(key=lambda b: (b[3], b[0]))
536 return blocks
537
538
539 def get_text_words(
540 page: Page,
541 clip: rect_like = None,
542 flags: OptInt = None,
543 textpage: TextPage = None,
544 sort: bool = False,
545 delimiters=None,
546 ) -> list:
547 """Return the text words as a list with the bbox for each word.
548
549 Args:
550 flags: (int) control the amount of data parsed into the textpage.
551 delimiters: (str,list) characters to use as word delimiters
552
553 Returns:
554 Word tuples (x0, y0, x1, y1, "word", bno, lno, wno).
555 """
556 CheckParent(page)
557 if flags is None:
558 flags = TEXT_PRESERVE_WHITESPACE | TEXT_PRESERVE_LIGATURES | TEXT_MEDIABOX_CLIP
559
560 tp = textpage
561 if tp is None:
562 tp = page.get_textpage(clip=clip, flags=flags)
563 elif getattr(tp, "parent") != page:
564 raise ValueError("not a textpage of this page")
565
566 words = tp.extractWORDS(delimiters)
567 if textpage is None:
568 del tp
569 if sort is True:
570 words.sort(key=lambda w: (w[3], w[0]))
571
572 return words
573
574
575 def get_textbox(
576 page: Page,
577 rect: rect_like,
578 textpage: TextPage = None,
579 ) -> str:
580 tp = textpage
581 if tp is None:
582 tp = page.get_textpage()
583 elif getattr(tp, "parent") != page:
584 raise ValueError("not a textpage of this page")
585 rc = tp.extractTextbox(rect)
586 if textpage is None:
587 del tp
588 return rc
589
590
591 def get_text_selection(
592 page: Page,
593 p1: point_like,
594 p2: point_like,
595 clip: rect_like = None,
596 textpage: TextPage = None,
597 ):
598 CheckParent(page)
599 tp = textpage
600 if tp is None:
601 tp = page.get_textpage(clip=clip, flags=TEXT_DEHYPHENATE)
602 elif getattr(tp, "parent") != page:
603 raise ValueError("not a textpage of this page")
604 rc = tp.extractSelection(p1, p2)
605 if textpage is None:
606 del tp
607 return rc
608
609
610 def get_textpage_ocr(
611 page: Page,
612 flags: int = 0,
613 language: str = "eng",
614 dpi: int = 72,
615 full: bool = False,
616 tessdata: str = None,
617 ) -> TextPage:
618 """Create a Textpage from combined results of normal and OCR text parsing.
619
620 Args:
621 flags: (int) control content becoming part of the result.
622 language: (str) specify expected language(s). Deafault is "eng" (English).
623 dpi: (int) resolution in dpi, default 72.
624 full: (bool) whether to OCR the full page image, or only its images (default)
625 """
626 CheckParent(page)
627 if not os.getenv("TESSDATA_PREFIX") and not tessdata:
628 raise RuntimeError("No OCR support: TESSDATA_PREFIX not set")
629
630 def full_ocr(page, dpi, language, flags):
631 zoom = dpi / 72
632 mat = Matrix(zoom, zoom)
633 pix = page.get_pixmap(matrix=mat)
634 ocr_pdf = Document(
635 "pdf",
636 pix.pdfocr_tobytes(compress=False, language=language, tessdata=tessdata),
637 )
638 ocr_page = ocr_pdf.load_page(0)
639 unzoom = page.rect.width / ocr_page.rect.width
640 ctm = Matrix(unzoom, unzoom) * page.derotation_matrix
641 tpage = ocr_page.get_textpage(flags=flags, matrix=ctm)
642 ocr_pdf.close()
643 pix = None
644 tpage.parent = weakref.proxy(page)
645 return tpage
646
647 # if OCR for the full page, OCR its pixmap @ desired dpi
648 if full is True:
649 return full_ocr(page, dpi, language, flags)
650
651 # For partial OCR, make a normal textpage, then extend it with text that
652 # is OCRed from each image.
653 # Because of this, we need the images flag bit set ON.
654 tpage = page.get_textpage(flags=flags)
655 for block in page.get_text("dict", flags=TEXT_PRESERVE_IMAGES)["blocks"]:
656 if block["type"] != 1: # only look at images
657 continue
658 bbox = Rect(block["bbox"])
659 if bbox.width <= 3 or bbox.height <= 3: # ignore tiny stuff
660 continue
661 try:
662 pix = Pixmap(block["image"]) # get image pixmap
663 if pix.n - pix.alpha != 3: # we need to convert this to RGB!
664 pix = Pixmap(csRGB, pix)
665 if pix.alpha: # must remove alpha channel
666 pix = Pixmap(pix, 0)
667 imgdoc = Document(
668 "pdf", pix.pdfocr_tobytes(language=language, tessdata=tessdata)
669 ) # pdf with OCRed page
670 imgpage = imgdoc.load_page(0) # read image as a page
671 pix = None
672 # compute matrix to transform coordinates back to that of 'page'
673 imgrect = imgpage.rect # page size of image PDF
674 shrink = Matrix(1 / imgrect.width, 1 / imgrect.height)
675 mat = shrink * block["transform"]
676 imgpage.extend_textpage(tpage, flags=0, matrix=mat)
677 imgdoc.close()
678 except RuntimeError:
679 tpage = None
680 print("Falling back to full page OCR")
681 return full_ocr(page, dpi, language, flags)
682
683 return tpage
684
685
686 def get_image_info(page: Page, hashes: bool = False, xrefs: bool = False) -> list:
687 """Extract image information only from a TextPage.
688
689 Args:
690 hashes: (bool) include MD5 hash for each image.
691 xrefs: (bool) try to find the xref for each image. Sets hashes to true.
692 """
693 doc = page.parent
694 if xrefs and doc.is_pdf:
695 hashes = True
696 if not doc.is_pdf:
697 xrefs = False
698 imginfo = getattr(page, "_image_info", None)
699 if imginfo and not xrefs:
700 return imginfo
701 if not imginfo:
702 tp = page.get_textpage(flags=TEXT_PRESERVE_IMAGES)
703 imginfo = tp.extractIMGINFO(hashes=hashes)
704 del tp
705 if hashes:
706 page._image_info = imginfo
707 if not xrefs or not doc.is_pdf:
708 return imginfo
709 imglist = page.get_images()
710 digests = {}
711 for item in imglist:
712 xref = item[0]
713 pix = Pixmap(doc, xref)
714 digests[pix.digest] = xref
715 del pix
716 for i in range(len(imginfo)):
717 item = imginfo[i]
718 xref = digests.get(item["digest"], 0)
719 item["xref"] = xref
720 imginfo[i] = item
721 return imginfo
722
723
724 def get_image_rects(page: Page, name, transform=False) -> list:
725 """Return list of image positions on a page.
726
727 Args:
728 name: (str, list, int) image identification. May be reference name, an
729 item of the page's image list or an xref.
730 transform: (bool) whether to also return the transformation matrix.
731 Returns:
732 A list of Rect objects or tuples of (Rect, Matrix) for all image
733 locations on the page.
734 """
735 if type(name) in (list, tuple):
736 xref = name[0]
737 elif type(name) is int:
738 xref = name
739 else:
740 imglist = [i for i in page.get_images() if i[7] == name]
741 if imglist == []:
742 raise ValueError("bad image name")
743 elif len(imglist) != 1:
744 raise ValueError("multiple image names found")
745 xref = imglist[0][0]
746 pix = Pixmap(page.parent, xref) # make pixmap of the image to compute MD5
747 digest = pix.digest
748 del pix
749 infos = page.get_image_info(hashes=True)
750 if not transform:
751 bboxes = [Rect(im["bbox"]) for im in infos if im["digest"] == digest]
752 else:
753 bboxes = [
754 (Rect(im["bbox"]), Matrix(im["transform"]))
755 for im in infos
756 if im["digest"] == digest
757 ]
758 return bboxes
759
760
761 def get_text(
762 page: Page,
763 option: str = "text",
764 clip: rect_like = None,
765 flags: OptInt = None,
766 textpage: TextPage = None,
767 sort: bool = False,
768 delimiters=None,
769 ):
770 """Extract text from a page or an annotation.
771
772 This is a unifying wrapper for various methods of the TextPage class.
773
774 Args:
775 option: (str) text, words, blocks, html, dict, json, rawdict, xhtml or xml.
776 clip: (rect-like) restrict output to this area.
777 flags: bit switches to e.g. exclude images or decompose ligatures.
778 textpage: reuse this TextPage and make no new one. If specified,
779 'flags' and 'clip' are ignored.
780
781 Returns:
782 the output of methods get_text_words / get_text_blocks or TextPage
783 methods extractText, extractHTML, extractDICT, extractJSON, extractRAWDICT,
784 extractXHTML or etractXML respectively.
785 Default and misspelling choice is "text".
786 """
787 formats = {
788 "text": fitz.TEXTFLAGS_TEXT,
789 "html": fitz.TEXTFLAGS_HTML,
790 "json": fitz.TEXTFLAGS_DICT,
791 "rawjson": fitz.TEXTFLAGS_RAWDICT,
792 "xml": fitz.TEXTFLAGS_XML,
793 "xhtml": fitz.TEXTFLAGS_XHTML,
794 "dict": fitz.TEXTFLAGS_DICT,
795 "rawdict": fitz.TEXTFLAGS_RAWDICT,
796 "words": fitz.TEXTFLAGS_WORDS,
797 "blocks": fitz.TEXTFLAGS_BLOCKS,
798 }
799 option = option.lower()
800 if option not in formats:
801 option = "text"
802 if flags is None:
803 flags = formats[option]
804
805 if option == "words":
806 return get_text_words(
807 page,
808 clip=clip,
809 flags=flags,
810 textpage=textpage,
811 sort=sort,
812 delimiters=delimiters,
813 )
814 if option == "blocks":
815 return get_text_blocks(
816 page, clip=clip, flags=flags, textpage=textpage, sort=sort
817 )
818 CheckParent(page)
819 cb = None
820 if option in ("html", "xml", "xhtml"): # no clipping for MuPDF functions
821 clip = page.cropbox
822 if clip != None:
823 clip = Rect(clip)
824 cb = None
825 elif type(page) is Page:
826 cb = page.cropbox
827
828 # TextPage with or without images
829 tp = textpage
830 if tp is None:
831 tp = page.get_textpage(clip=clip, flags=flags)
832 elif getattr(tp, "parent") != page:
833 raise ValueError("not a textpage of this page")
834
835 if option == "json":
836 t = tp.extractJSON(cb=cb, sort=sort)
837 elif option == "rawjson":
838 t = tp.extractRAWJSON(cb=cb, sort=sort)
839 elif option == "dict":
840 t = tp.extractDICT(cb=cb, sort=sort)
841 elif option == "rawdict":
842 t = tp.extractRAWDICT(cb=cb, sort=sort)
843 elif option == "html":
844 t = tp.extractHTML()
845 elif option == "xml":
846 t = tp.extractXML()
847 elif option == "xhtml":
848 t = tp.extractXHTML()
849 else:
850 t = tp.extractText(sort=sort)
851
852 if textpage is None:
853 del tp
854 return t
855
856
857 def get_page_text(
858 doc: Document,
859 pno: int,
860 option: str = "text",
861 clip: rect_like = None,
862 flags: OptInt = None,
863 textpage: TextPage = None,
864 sort: bool = False,
865 ) -> typing.Any:
866 """Extract a document page's text by page number.
867
868 Notes:
869 Convenience function calling page.get_text().
870 Args:
871 pno: page number
872 option: (str) text, words, blocks, html, dict, json, rawdict, xhtml or xml.
873 Returns:
874 output from page.TextPage().
875 """
876 return doc[pno].get_text(option, clip=clip, flags=flags, sort=sort)
877
878
879 def get_pixmap(
880 page: Page,
881 *,
882 matrix: matrix_like = Identity,
883 dpi=None,
884 colorspace: Colorspace = csRGB,
885 clip: rect_like = None,
886 alpha: bool = False,
887 annots: bool = True,
888 ) -> Pixmap:
889 """Create pixmap of page.
890
891 Keyword args:
892 matrix: Matrix for transformation (default: Identity).
893 dpi: desired dots per inch. If given, matrix is ignored.
894 colorspace: (str/Colorspace) cmyk, rgb, gray - case ignored, default csRGB.
895 clip: (irect-like) restrict rendering to this area.
896 alpha: (bool) whether to include alpha channel
897 annots: (bool) whether to also render annotations
898 """
899 CheckParent(page)
900 if dpi:
901 zoom = dpi / 72
902 matrix = Matrix(zoom, zoom)
903
904 if type(colorspace) is str:
905 if colorspace.upper() == "GRAY":
906 colorspace = csGRAY
907 elif colorspace.upper() == "CMYK":
908 colorspace = csCMYK
909 else:
910 colorspace = csRGB
911 if colorspace.n not in (1, 3, 4):
912 raise ValueError("unsupported colorspace")
913
914 dl = page.get_displaylist(annots=annots)
915 pix = dl.get_pixmap(matrix=matrix, colorspace=colorspace, alpha=alpha, clip=clip)
916 dl = None
917 if dpi:
918 pix.set_dpi(dpi, dpi)
919 return pix
920
921
922 def get_page_pixmap(
923 doc: Document,
924 pno: int,
925 *,
926 matrix: matrix_like = Identity,
927 dpi=None,
928 colorspace: Colorspace = csRGB,
929 clip: rect_like = None,
930 alpha: bool = False,
931 annots: bool = True,
932 ) -> Pixmap:
933 """Create pixmap of document page by page number.
934
935 Notes:
936 Convenience function calling page.get_pixmap.
937 Args:
938 pno: (int) page number
939 matrix: Matrix for transformation (default: Identity).
940 colorspace: (str,Colorspace) rgb, rgb, gray - case ignored, default csRGB.
941 clip: (irect-like) restrict rendering to this area.
942 alpha: (bool) include alpha channel
943 annots: (bool) also render annotations
944 """
945 return doc[pno].get_pixmap(
946 matrix=matrix,
947 dpi=dpi,
948 colorspace=colorspace,
949 clip=clip,
950 alpha=alpha,
951 annots=annots,
952 )
953
954
955 def getLinkDict(ln) -> dict:
956 nl = {"kind": ln.dest.kind, "xref": 0}
957 try:
958 nl["from"] = ln.rect
959 except:
960 pass
961 pnt = Point(0, 0)
962 if ln.dest.flags & LINK_FLAG_L_VALID:
963 pnt.x = ln.dest.lt.x
964 if ln.dest.flags & LINK_FLAG_T_VALID:
965 pnt.y = ln.dest.lt.y
966
967 if ln.dest.kind == LINK_URI:
968 nl["uri"] = ln.dest.uri
969
970 elif ln.dest.kind == LINK_GOTO:
971 nl["page"] = ln.dest.page
972 nl["to"] = pnt
973 if ln.dest.flags & LINK_FLAG_R_IS_ZOOM:
974 nl["zoom"] = ln.dest.rb.x
975 else:
976 nl["zoom"] = 0.0
977
978 elif ln.dest.kind == LINK_GOTOR:
979 nl["file"] = ln.dest.fileSpec.replace("\\", "/")
980 nl["page"] = ln.dest.page
981 if ln.dest.page < 0:
982 nl["to"] = ln.dest.dest
983 else:
984 nl["to"] = pnt
985 if ln.dest.flags & LINK_FLAG_R_IS_ZOOM:
986 nl["zoom"] = ln.dest.rb.x
987 else:
988 nl["zoom"] = 0.0
989
990 elif ln.dest.kind == LINK_LAUNCH:
991 nl["file"] = ln.dest.fileSpec.replace("\\", "/")
992
993 elif ln.dest.kind == LINK_NAMED:
994 nl["name"] = ln.dest.named
995
996 else:
997 nl["page"] = ln.dest.page
998
999 return nl
1000
1001
1002 def get_links(page: Page) -> list:
1003 """Create a list of all links contained in a PDF page.
1004
1005 Notes:
1006 see PyMuPDF ducmentation for details.
1007 """
1008
1009 CheckParent(page)
1010 ln = page.first_link
1011 links = []
1012 while ln:
1013 nl = getLinkDict(ln)
1014 links.append(nl)
1015 ln = ln.next
1016 if links != [] and page.parent.is_pdf:
1017 linkxrefs = [x for x in page.annot_xrefs() if x[1] == PDF_ANNOT_LINK]
1018 if len(linkxrefs) == len(links):
1019 for i in range(len(linkxrefs)):
1020 links[i]["xref"] = linkxrefs[i][0]
1021 links[i]["id"] = linkxrefs[i][2]
1022 return links
1023
1024
1025 def get_toc(
1026 doc: Document,
1027 simple: bool = True,
1028 ) -> list:
1029 """Create a table of contents.
1030
1031 Args:
1032 simple: a bool to control output. Returns a list, where each entry consists of outline level, title, page number and link destination (if simple = False). For details see PyMuPDF's documentation.
1033 """
1034
1035 def recurse(olItem, liste, lvl):
1036 """Recursively follow the outline item chain and record item information in a list."""
1037 while olItem:
1038 if olItem.title:
1039 title = olItem.title
1040 else:
1041 title = " "
1042
1043 if not olItem.is_external:
1044 if olItem.uri:
1045 if olItem.page == -1:
1046 resolve = doc.resolve_link(olItem.uri)
1047 page = resolve[0] + 1
1048 else:
1049 page = olItem.page + 1
1050 else:
1051 page = -1
1052 else:
1053 page = -1
1054
1055 if not simple:
1056 link = getLinkDict(olItem)
1057 liste.append([lvl, title, page, link])
1058 else:
1059 liste.append([lvl, title, page])
1060
1061 if olItem.down:
1062 liste = recurse(olItem.down, liste, lvl + 1)
1063 olItem = olItem.next
1064 return liste
1065
1066 # ensure document is open
1067 if doc.is_closed:
1068 raise ValueError("document closed")
1069 doc.init_doc()
1070 olItem = doc.outline
1071 if not olItem:
1072 return []
1073 lvl = 1
1074 liste = []
1075 toc = recurse(olItem, liste, lvl)
1076 if doc.is_pdf and simple is False:
1077 doc._extend_toc_items(toc)
1078 return toc
1079
1080
1081 def del_toc_item(
1082 doc: Document,
1083 idx: int,
1084 ) -> None:
1085 """Delete TOC / bookmark item by index."""
1086 xref = doc.get_outline_xrefs()[idx]
1087 doc._remove_toc_item(xref)
1088
1089
1090 def set_toc_item(
1091 doc: Document,
1092 idx: int,
1093 dest_dict: OptDict = None,
1094 kind: OptInt = None,
1095 pno: OptInt = None,
1096 uri: OptStr = None,
1097 title: OptStr = None,
1098 to: point_like = None,
1099 filename: OptStr = None,
1100 zoom: float = 0,
1101 ) -> None:
1102 """Update TOC item by index.
1103
1104 It allows changing the item's title and link destination.
1105
1106 Args:
1107 idx: (int) desired index of the TOC list, as created by get_toc.
1108 dest_dict: (dict) destination dictionary as created by get_toc(False).
1109 Outrules all other parameters. If None, the remaining parameters
1110 are used to make a dest dictionary.
1111 kind: (int) kind of link (LINK_GOTO, etc.). If None, then only the
1112 title will be updated. If LINK_NONE, the TOC item will be deleted.
1113 pno: (int) page number (1-based like in get_toc). Required if LINK_GOTO.
1114 uri: (str) the URL, required if LINK_URI.
1115 title: (str) the new title. No change if None.
1116 to: (point-like) destination on the target page. If omitted, (72, 36)
1117 will be used as taget coordinates.
1118 filename: (str) destination filename, required for LINK_GOTOR and
1119 LINK_LAUNCH.
1120 name: (str) a destination name for LINK_NAMED.
1121 zoom: (float) a zoom factor for the target location (LINK_GOTO).
1122 """
1123 xref = doc.get_outline_xrefs()[idx]
1124 page_xref = 0
1125 if type(dest_dict) is dict:
1126 if dest_dict["kind"] == LINK_GOTO:
1127 pno = dest_dict["page"]
1128 page_xref = doc.page_xref(pno)
1129 page_height = doc.page_cropbox(pno).height
1130 to = dest_dict.get("to", Point(72, 36))
1131 to.y = page_height - to.y
1132 dest_dict["to"] = to
1133 action = getDestStr(page_xref, dest_dict)
1134 if not action.startswith("/A"):
1135 raise ValueError("bad bookmark dest")
1136 color = dest_dict.get("color")
1137 if color:
1138 color = list(map(float, color))
1139 if len(color) != 3 or min(color) < 0 or max(color) > 1:
1140 raise ValueError("bad color value")
1141 bold = dest_dict.get("bold", False)
1142 italic = dest_dict.get("italic", False)
1143 flags = italic + 2 * bold
1144 collapse = dest_dict.get("collapse")
1145 return doc._update_toc_item(
1146 xref,
1147 action=action[2:],
1148 title=title,
1149 color=color,
1150 flags=flags,
1151 collapse=collapse,
1152 )
1153
1154 if kind == LINK_NONE: # delete bookmark item
1155 return doc.del_toc_item(idx)
1156 if kind is None and title is None: # treat as no-op
1157 return None
1158 if kind is None: # only update title text
1159 return doc._update_toc_item(xref, action=None, title=title)
1160
1161 if kind == LINK_GOTO:
1162 if pno is None or pno not in range(1, doc.page_count + 1):
1163 raise ValueError("bad page number")
1164 page_xref = doc.page_xref(pno - 1)
1165 page_height = doc.page_cropbox(pno - 1).height
1166 if to is None:
1167 to = Point(72, page_height - 36)
1168 else:
1169 to = Point(to)
1170 to.y = page_height - to.y
1171
1172 ddict = {
1173 "kind": kind,
1174 "to": to,
1175 "uri": uri,
1176 "page": pno,
1177 "file": filename,
1178 "zoom": zoom,
1179 }
1180 action = getDestStr(page_xref, ddict)
1181 if action == "" or not action.startswith("/A"):
1182 raise ValueError("bad bookmark dest")
1183
1184 return doc._update_toc_item(xref, action=action[2:], title=title)
1185
1186
1187 def get_area(*args) -> float:
1188 """Calculate area of rectangle.\nparameter is one of 'px' (default), 'in', 'cm', or 'mm'."""
1189 rect = args[0]
1190 if len(args) > 1:
1191 unit = args[1]
1192 else:
1193 unit = "px"
1194 u = {"px": (1, 1), "in": (1.0, 72.0), "cm": (2.54, 72.0), "mm": (25.4, 72.0)}
1195 f = (u[unit][0] / u[unit][1]) ** 2
1196 return f * rect.width * rect.height
1197
1198
1199 def set_metadata(doc: Document, m: dict) -> None:
1200 """Update the PDF /Info object.
1201
1202 Args:
1203 m: a dictionary like doc.metadata.
1204 """
1205 if not doc.is_pdf:
1206 raise ValueError("is no PDF")
1207 if doc.is_closed or doc.is_encrypted:
1208 raise ValueError("document closed or encrypted")
1209 if type(m) is not dict:
1210 raise ValueError("bad metadata")
1211 keymap = {
1212 "author": "Author",
1213 "producer": "Producer",
1214 "creator": "Creator",
1215 "title": "Title",
1216 "format": None,
1217 "encryption": None,
1218 "creationDate": "CreationDate",
1219 "modDate": "ModDate",
1220 "subject": "Subject",
1221 "keywords": "Keywords",
1222 "trapped": "Trapped",
1223 }
1224 valid_keys = set(keymap.keys())
1225 diff_set = set(m.keys()).difference(valid_keys)
1226 if diff_set != set():
1227 msg = "bad dict key(s): %s" % diff_set
1228 raise ValueError(msg)
1229
1230 t, temp = doc.xref_get_key(-1, "Info")
1231 if t != "xref":
1232 info_xref = 0
1233 else:
1234 info_xref = int(temp.replace("0 R", ""))
1235
1236 if m == {} and info_xref == 0: # nothing to do
1237 return
1238
1239 if info_xref == 0: # no prev metadata: get new xref
1240 info_xref = doc.get_new_xref()
1241 doc.update_object(info_xref, "<<>>") # fill it with empty object
1242 doc.xref_set_key(-1, "Info", "%i 0 R" % info_xref)
1243 elif m == {}: # remove existing metadata
1244 doc.xref_set_key(-1, "Info", "null")
1245 return
1246
1247 for key, val in [(k, v) for k, v in m.items() if keymap[k] != None]:
1248 pdf_key = keymap[key]
1249 if not bool(val) or val in ("none", "null"):
1250 val = "null"
1251 else:
1252 val = get_pdf_str(val)
1253 doc.xref_set_key(info_xref, pdf_key, val)
1254 doc.init_doc()
1255 return
1256
1257
1258 def getDestStr(xref: int, ddict: dict) -> str:
1259 """Calculate the PDF action string.
1260
1261 Notes:
1262 Supports Link annotations and outline items (bookmarks).
1263 """
1264 if not ddict:
1265 return ""
1266 str_goto = "/A<</S/GoTo/D[%i 0 R/XYZ %g %g %g]>>"
1267 str_gotor1 = "/A<</S/GoToR/D[%s /XYZ %g %g %g]/F<</F%s/UF%s/Type/Filespec>>>>"
1268 str_gotor2 = "/A<</S/GoToR/D%s/F<</F%s/UF%s/Type/Filespec>>>>"
1269 str_launch = "/A<</S/Launch/F<</F%s/UF%s/Type/Filespec>>>>"
1270 str_uri = "/A<</S/URI/URI%s>>"
1271
1272 if type(ddict) in (int, float):
1273 dest = str_goto % (xref, 0, ddict, 0)
1274 return dest
1275 d_kind = ddict.get("kind", LINK_NONE)
1276
1277 if d_kind == LINK_NONE:
1278 return ""
1279
1280 if ddict["kind"] == LINK_GOTO:
1281 d_zoom = ddict.get("zoom", 0)
1282 to = ddict.get("to", Point(0, 0))
1283 d_left, d_top = to
1284 dest = str_goto % (xref, d_left, d_top, d_zoom)
1285 return dest
1286
1287 if ddict["kind"] == LINK_URI:
1288 dest = str_uri % (get_pdf_str(ddict["uri"]),)
1289 return dest
1290
1291 if ddict["kind"] == LINK_LAUNCH:
1292 fspec = get_pdf_str(ddict["file"])
1293 dest = str_launch % (fspec, fspec)
1294 return dest
1295
1296 if ddict["kind"] == LINK_GOTOR and ddict["page"] < 0:
1297 fspec = get_pdf_str(ddict["file"])
1298 dest = str_gotor2 % (get_pdf_str(ddict["to"]), fspec, fspec)
1299 return dest
1300
1301 if ddict["kind"] == LINK_GOTOR and ddict["page"] >= 0:
1302 fspec = get_pdf_str(ddict["file"])
1303 dest = str_gotor1 % (
1304 ddict["page"],
1305 ddict["to"].x,
1306 ddict["to"].y,
1307 ddict["zoom"],
1308 fspec,
1309 fspec,
1310 )
1311 return dest
1312
1313 return ""
1314
1315
1316 def set_toc(
1317 doc: Document,
1318 toc: list,
1319 collapse: int = 1,
1320 ) -> int:
1321 """Create new outline tree (table of contents, TOC).
1322
1323 Args:
1324 toc: (list, tuple) each entry must contain level, title, page and
1325 optionally top margin on the page. None or '()' remove the TOC.
1326 collapse: (int) collapses entries beyond this level. Zero or None
1327 shows all entries unfolded.
1328 Returns:
1329 the number of inserted items, or the number of removed items respectively.
1330 """
1331 if doc.is_closed or doc.is_encrypted:
1332 raise ValueError("document closed or encrypted")
1333 if not doc.is_pdf:
1334 raise ValueError("is no PDF")
1335 if not toc: # remove all entries
1336 return len(doc._delToC())
1337
1338 # validity checks --------------------------------------------------------
1339 if type(toc) not in (list, tuple):
1340 raise ValueError("'toc' must be list or tuple")
1341 toclen = len(toc)
1342 page_count = doc.page_count
1343 t0 = toc[0]
1344 if type(t0) not in (list, tuple):
1345 raise ValueError("items must be sequences of 3 or 4 items")
1346 if t0[0] != 1:
1347 raise ValueError("hierarchy level of item 0 must be 1")
1348 for i in list(range(toclen - 1)):
1349 t1 = toc[i]
1350 t2 = toc[i + 1]
1351 if not -1 <= t1[2] <= page_count:
1352 raise ValueError("row %i: page number out of range" % i)
1353 if (type(t2) not in (list, tuple)) or len(t2) not in (3, 4):
1354 raise ValueError("bad row %i" % (i + 1))
1355 if (type(t2[0]) is not int) or t2[0] < 1:
1356 raise ValueError("bad hierarchy level in row %i" % (i + 1))
1357 if t2[0] > t1[0] + 1:
1358 raise ValueError("bad hierarchy level in row %i" % (i + 1))
1359 # no formal errors in toc --------------------------------------------------
1360
1361 # --------------------------------------------------------------------------
1362 # make a list of xref numbers, which we can use for our TOC entries
1363 # --------------------------------------------------------------------------
1364 old_xrefs = doc._delToC() # del old outlines, get their xref numbers
1365
1366 # prepare table of xrefs for new bookmarks
1367 old_xrefs = []
1368 xref = [0] + old_xrefs
1369 xref[0] = doc._getOLRootNumber() # entry zero is outline root xref number
1370 if toclen > len(old_xrefs): # too few old xrefs?
1371 for i in range((toclen - len(old_xrefs))):
1372 xref.append(doc.get_new_xref()) # acquire new ones
1373
1374 lvltab = {0: 0} # to store last entry per hierarchy level
1375
1376 # ------------------------------------------------------------------------------
1377 # contains new outline objects as strings - first one is the outline root
1378 # ------------------------------------------------------------------------------
1379 olitems = [{"count": 0, "first": -1, "last": -1, "xref": xref[0]}]
1380 # ------------------------------------------------------------------------------
1381 # build olitems as a list of PDF-like connnected dictionaries
1382 # ------------------------------------------------------------------------------
1383 for i in range(toclen):
1384 o = toc[i]
1385 lvl = o[0] # level
1386 title = get_pdf_str(o[1]) # title
1387 pno = min(doc.page_count - 1, max(0, o[2] - 1)) # page number
1388 page_xref = doc.page_xref(pno)
1389 page_height = doc.page_cropbox(pno).height
1390 top = Point(72, page_height - 36)
1391 dest_dict = {"to": top, "kind": LINK_GOTO} # fall back target
1392 if o[2] < 0:
1393 dest_dict["kind"] = LINK_NONE
1394 if len(o) > 3: # some target is specified
1395 if type(o[3]) in (int, float): # convert a number to a point
1396 dest_dict["to"] = Point(72, page_height - o[3])
1397 else: # if something else, make sure we have a dict
1398 dest_dict = o[3] if type(o[3]) is dict else dest_dict
1399 if "to" not in dest_dict: # target point not in dict?
1400 dest_dict["to"] = top # put default in
1401 else: # transform target to PDF coordinates
1402 point = +dest_dict["to"]
1403 point.y = page_height - point.y
1404 dest_dict["to"] = point
1405 d = {}
1406 d["first"] = -1
1407 d["count"] = 0
1408 d["last"] = -1
1409 d["prev"] = -1
1410 d["next"] = -1
1411 d["dest"] = getDestStr(page_xref, dest_dict)
1412 d["top"] = dest_dict["to"]
1413 d["title"] = title
1414 d["parent"] = lvltab[lvl - 1]
1415 d["xref"] = xref[i + 1]
1416 d["color"] = dest_dict.get("color")
1417 d["flags"] = dest_dict.get("italic", 0) + 2 * dest_dict.get("bold", 0)
1418 lvltab[lvl] = i + 1
1419 parent = olitems[lvltab[lvl - 1]] # the parent entry
1420
1421 if (
1422 dest_dict.get("collapse") or collapse and lvl > collapse
1423 ): # suppress expansion
1424 parent["count"] -= 1 # make /Count negative
1425 else:
1426 parent["count"] += 1 # positive /Count
1427
1428 if parent["first"] == -1:
1429 parent["first"] = i + 1
1430 parent["last"] = i + 1
1431 else:
1432 d["prev"] = parent["last"]
1433 prev = olitems[parent["last"]]
1434 prev["next"] = i + 1
1435 parent["last"] = i + 1
1436 olitems.append(d)
1437
1438 # ------------------------------------------------------------------------------
1439 # now create each outline item as a string and insert it in the PDF
1440 # ------------------------------------------------------------------------------
1441 for i, ol in enumerate(olitems):
1442 txt = "<<"
1443 if ol["count"] != 0:
1444 txt += "/Count %i" % ol["count"]
1445 try:
1446 txt += ol["dest"]
1447 except:
1448 pass
1449 try:
1450 if ol["first"] > -1:
1451 txt += "/First %i 0 R" % xref[ol["first"]]
1452 except:
1453 pass
1454 try:
1455 if ol["last"] > -1:
1456 txt += "/Last %i 0 R" % xref[ol["last"]]
1457 except:
1458 pass
1459 try:
1460 if ol["next"] > -1:
1461 txt += "/Next %i 0 R" % xref[ol["next"]]
1462 except:
1463 pass
1464 try:
1465 if ol["parent"] > -1:
1466 txt += "/Parent %i 0 R" % xref[ol["parent"]]
1467 except:
1468 pass
1469 try:
1470 if ol["prev"] > -1:
1471 txt += "/Prev %i 0 R" % xref[ol["prev"]]
1472 except:
1473 pass
1474 try:
1475 txt += "/Title" + ol["title"]
1476 except:
1477 pass
1478
1479 if ol.get("color") and len(ol["color"]) == 3:
1480 txt += "/C[ %g %g %g]" % tuple(ol["color"])
1481 if ol.get("flags", 0) > 0:
1482 txt += "/F %i" % ol["flags"]
1483
1484 if i == 0: # special: this is the outline root
1485 txt += "/Type/Outlines" # so add the /Type entry
1486 txt += ">>"
1487 doc.update_object(xref[i], txt) # insert the PDF object
1488
1489 doc.init_doc()
1490 return toclen
1491
1492
1493 def do_links(
1494 doc1: Document,
1495 doc2: Document,
1496 from_page: int = -1,
1497 to_page: int = -1,
1498 start_at: int = -1,
1499 ) -> None:
1500 """Insert links contained in copied page range into destination PDF.
1501
1502 Parameter values **must** equal those of method insert_pdf(), which must
1503 have been previously executed.
1504 """
1505
1506 # --------------------------------------------------------------------------
1507 # internal function to create the actual "/Annots" object string
1508 # --------------------------------------------------------------------------
1509 def cre_annot(lnk, xref_dst, pno_src, ctm):
1510 """Create annotation object string for a passed-in link."""
1511
1512 r = lnk["from"] * ctm # rect in PDF coordinates
1513 rect = "%g %g %g %g" % tuple(r)
1514 if lnk["kind"] == LINK_GOTO:
1515 txt = annot_skel["goto1"] # annot_goto
1516 idx = pno_src.index(lnk["page"])
1517 p = lnk["to"] * ctm # target point in PDF coordinates
1518 annot = txt % (xref_dst[idx], p.x, p.y, lnk["zoom"], rect)
1519
1520 elif lnk["kind"] == LINK_GOTOR:
1521 if lnk["page"] >= 0:
1522 txt = annot_skel["gotor1"] # annot_gotor
1523 pnt = lnk.get("to", Point(0, 0)) # destination point
1524 if type(pnt) is not Point:
1525 pnt = Point(0, 0)
1526 annot = txt % (
1527 lnk["page"],
1528 pnt.x,
1529 pnt.y,
1530 lnk["zoom"],
1531 lnk["file"],
1532 lnk["file"],
1533 rect,
1534 )
1535 else:
1536 txt = annot_skel["gotor2"] # annot_gotor_n
1537 to = get_pdf_str(lnk["to"])
1538 to = to[1:-1]
1539 f = lnk["file"]
1540 annot = txt % (to, f, rect)
1541
1542 elif lnk["kind"] == LINK_LAUNCH:
1543 txt = annot_skel["launch"] # annot_launch
1544 annot = txt % (lnk["file"], lnk["file"], rect)
1545
1546 elif lnk["kind"] == LINK_URI:
1547 txt = annot_skel["uri"] # annot_uri
1548 annot = txt % (lnk["uri"], rect)
1549
1550 else:
1551 annot = ""
1552
1553 return annot
1554
1555 # --------------------------------------------------------------------------
1556
1557 # validate & normalize parameters
1558 if from_page < 0:
1559 fp = 0
1560 elif from_page >= doc2.page_count:
1561 fp = doc2.page_count - 1
1562 else:
1563 fp = from_page
1564
1565 if to_page < 0 or to_page >= doc2.page_count:
1566 tp = doc2.page_count - 1
1567 else:
1568 tp = to_page
1569
1570 if start_at < 0:
1571 raise ValueError("'start_at' must be >= 0")
1572 sa = start_at
1573
1574 incr = 1 if fp <= tp else -1 # page range could be reversed
1575
1576 # lists of source / destination page numbers
1577 pno_src = list(range(fp, tp + incr, incr))
1578 pno_dst = [sa + i for i in range(len(pno_src))]
1579
1580 # lists of source / destination page xrefs
1581 xref_src = []
1582 xref_dst = []
1583 for i in range(len(pno_src)):
1584 p_src = pno_src[i]
1585 p_dst = pno_dst[i]
1586 old_xref = doc2.page_xref(p_src)
1587 new_xref = doc1.page_xref(p_dst)
1588 xref_src.append(old_xref)
1589 xref_dst.append(new_xref)
1590
1591 # create the links for each copied page in destination PDF
1592 for i in range(len(xref_src)):
1593 page_src = doc2[pno_src[i]] # load source page
1594 links = page_src.get_links() # get all its links
1595 if len(links) == 0: # no links there
1596 page_src = None
1597 continue
1598 ctm = ~page_src.transformation_matrix # calc page transformation matrix
1599 page_dst = doc1[pno_dst[i]] # load destination page
1600 link_tab = [] # store all link definitions here
1601 for l in links:
1602 if l["kind"] == LINK_GOTO and (l["page"] not in pno_src):
1603 continue # GOTO link target not in copied pages
1604 annot_text = cre_annot(l, xref_dst, pno_src, ctm)
1605 if annot_text:
1606 link_tab.append(annot_text)
1607 if link_tab != []:
1608 page_dst._addAnnot_FromString(tuple(link_tab))
1609
1610 return
1611
1612
1613 def getLinkText(page: Page, lnk: dict) -> str:
1614 # --------------------------------------------------------------------------
1615 # define skeletons for /Annots object texts
1616 # --------------------------------------------------------------------------
1617 ctm = page.transformation_matrix
1618 ictm = ~ctm
1619 r = lnk["from"]
1620 rect = "%g %g %g %g" % tuple(r * ictm)
1621
1622 annot = ""
1623 if lnk["kind"] == LINK_GOTO:
1624 if lnk["page"] >= 0:
1625 txt = annot_skel["goto1"] # annot_goto
1626 pno = lnk["page"]
1627 xref = page.parent.page_xref(pno)
1628 pnt = lnk.get("to", Point(0, 0)) # destination point
1629 ipnt = pnt * ictm
1630 annot = txt % (xref, ipnt.x, ipnt.y, lnk.get("zoom", 0), rect)
1631 else:
1632 txt = annot_skel["goto2"] # annot_goto_n
1633 annot = txt % (get_pdf_str(lnk["to"]), rect)
1634
1635 elif lnk["kind"] == LINK_GOTOR:
1636 if lnk["page"] >= 0:
1637 txt = annot_skel["gotor1"] # annot_gotor
1638 pnt = lnk.get("to", Point(0, 0)) # destination point
1639 if type(pnt) is not Point:
1640 pnt = Point(0, 0)
1641 annot = txt % (
1642 lnk["page"],
1643 pnt.x,
1644 pnt.y,
1645 lnk.get("zoom", 0),
1646 lnk["file"],
1647 lnk["file"],
1648 rect,
1649 )
1650 else:
1651 txt = annot_skel["gotor2"] # annot_gotor_n
1652 annot = txt % (get_pdf_str(lnk["to"]), lnk["file"], rect)
1653
1654 elif lnk["kind"] == LINK_LAUNCH:
1655 txt = annot_skel["launch"] # annot_launch
1656 annot = txt % (lnk["file"], lnk["file"], rect)
1657
1658 elif lnk["kind"] == LINK_URI:
1659 txt = annot_skel["uri"] # txt = annot_uri
1660 annot = txt % (lnk["uri"], rect)
1661
1662 elif lnk["kind"] == LINK_NAMED:
1663 txt = annot_skel["named"] # annot_named
1664 annot = txt % (lnk["name"], rect)
1665 if not annot:
1666 return annot
1667
1668 # add a /NM PDF key to the object definition
1669 link_names = dict( # existing ids and their xref
1670 [(x[0], x[2]) for x in page.annot_xrefs() if x[1] == PDF_ANNOT_LINK]
1671 )
1672
1673 old_name = lnk.get("id", "") # id value in the argument
1674
1675 if old_name and (lnk["xref"], old_name) in link_names.items():
1676 name = old_name # no new name if this is an update only
1677 else:
1678 i = 0
1679 stem = TOOLS.set_annot_stem() + "-L%i"
1680 while True:
1681 name = stem % i
1682 if name not in link_names.values():
1683 break
1684 i += 1
1685 # add /NM key to object definition
1686 annot = annot.replace("/Link", "/Link/NM(%s)" % name)
1687 return annot
1688
1689
1690 def delete_widget(page: Page, widget: Widget) -> Widget:
1691 """Delete widget from page and return the next one."""
1692 CheckParent(page)
1693 annot = getattr(widget, "_annot", None)
1694 if annot is None:
1695 raise ValueError("bad type: widget")
1696 nextwidget = widget.next
1697 page.delete_annot(annot)
1698 widget._annot.__del__()
1699 widget._annot.parent = None
1700 keylist = list(widget.__dict__.keys())
1701 for key in keylist:
1702 del widget.__dict__[key]
1703 return nextwidget
1704
1705
1706 def update_link(page: Page, lnk: dict) -> None:
1707 """Update a link on the current page."""
1708 CheckParent(page)
1709 annot = getLinkText(page, lnk)
1710 if annot == "":
1711 raise ValueError("link kind not supported")
1712
1713 page.parent.update_object(lnk["xref"], annot, page=page)
1714 return
1715
1716
1717 def insert_link(page: Page, lnk: dict, mark: bool = True) -> None:
1718 """Insert a new link for the current page."""
1719 CheckParent(page)
1720 annot = getLinkText(page, lnk)
1721 if annot == "":
1722 raise ValueError("link kind not supported")
1723 page._addAnnot_FromString((annot,))
1724 return
1725
1726
1727 def insert_textbox(
1728 page: Page,
1729 rect: rect_like,
1730 buffer: typing.Union[str, list],
1731 fontname: str = "helv",
1732 fontfile: OptStr = None,
1733 set_simple: int = 0,
1734 encoding: int = 0,
1735 fontsize: float = 11,
1736 lineheight: OptFloat = None,
1737 color: OptSeq = None,
1738 fill: OptSeq = None,
1739 expandtabs: int = 1,
1740 align: int = 0,
1741 rotate: int = 0,
1742 render_mode: int = 0,
1743 border_width: float = 0.05,
1744 morph: OptSeq = None,
1745 overlay: bool = True,
1746 stroke_opacity: float = 1,
1747 fill_opacity: float = 1,
1748 oc: int = 0,
1749 ) -> float:
1750 """Insert text into a given rectangle.
1751
1752 Notes:
1753 Creates a Shape object, uses its same-named method and commits it.
1754 Parameters:
1755 rect: (rect-like) area to use for text.
1756 buffer: text to be inserted
1757 fontname: a Base-14 font, font name or '/name'
1758 fontfile: name of a font file
1759 fontsize: font size
1760 lineheight: overwrite the font property
1761 color: RGB color triple
1762 expandtabs: handles tabulators with string function
1763 align: left, center, right, justified
1764 rotate: 0, 90, 180, or 270 degrees
1765 morph: morph box with a matrix and a fixpoint
1766 overlay: put text in foreground or background
1767 Returns:
1768 unused or deficit rectangle area (float)
1769 """
1770 img = page.new_shape()
1771 rc = img.insert_textbox(
1772 rect,
1773 buffer,
1774 fontsize=fontsize,
1775 lineheight=lineheight,
1776 fontname=fontname,
1777 fontfile=fontfile,
1778 set_simple=set_simple,
1779 encoding=encoding,
1780 color=color,
1781 fill=fill,
1782 expandtabs=expandtabs,
1783 render_mode=render_mode,
1784 border_width=border_width,
1785 align=align,
1786 rotate=rotate,
1787 morph=morph,
1788 stroke_opacity=stroke_opacity,
1789 fill_opacity=fill_opacity,
1790 oc=oc,
1791 )
1792 if rc >= 0:
1793 img.commit(overlay)
1794 return rc
1795
1796
1797 def insert_text(
1798 page: Page,
1799 point: point_like,
1800 text: typing.Union[str, list],
1801 fontsize: float = 11,
1802 lineheight: OptFloat = None,
1803 fontname: str = "helv",
1804 fontfile: OptStr = None,
1805 set_simple: int = 0,
1806 encoding: int = 0,
1807 color: OptSeq = None,
1808 fill: OptSeq = None,
1809 border_width: float = 0.05,
1810 render_mode: int = 0,
1811 rotate: int = 0,
1812 morph: OptSeq = None,
1813 overlay: bool = True,
1814 stroke_opacity: float = 1,
1815 fill_opacity: float = 1,
1816 oc: int = 0,
1817 ):
1818 img = page.new_shape()
1819 rc = img.insert_text(
1820 point,
1821 text,
1822 fontsize=fontsize,
1823 lineheight=lineheight,
1824 fontname=fontname,
1825 fontfile=fontfile,
1826 set_simple=set_simple,
1827 encoding=encoding,
1828 color=color,
1829 fill=fill,
1830 border_width=border_width,
1831 render_mode=render_mode,
1832 rotate=rotate,
1833 morph=morph,
1834 stroke_opacity=stroke_opacity,
1835 fill_opacity=fill_opacity,
1836 oc=oc,
1837 )
1838 if rc >= 0:
1839 img.commit(overlay)
1840 return rc
1841
1842
1843 def new_page(
1844 doc: Document,
1845 pno: int = -1,
1846 width: float = 595,
1847 height: float = 842,
1848 ) -> Page:
1849 """Create and return a new page object.
1850
1851 Args:
1852 pno: (int) insert before this page. Default: after last page.
1853 width: (float) page width in points. Default: 595 (ISO A4 width).
1854 height: (float) page height in points. Default 842 (ISO A4 height).
1855 Returns:
1856 A Page object.
1857 """
1858 doc._newPage(pno, width=width, height=height)
1859 return doc[pno]
1860
1861
1862 def insert_page(
1863 doc: Document,
1864 pno: int,
1865 text: typing.Union[str, list, None] = None,
1866 fontsize: float = 11,
1867 width: float = 595,
1868 height: float = 842,
1869 fontname: str = "helv",
1870 fontfile: OptStr = None,
1871 color: OptSeq = (0,),
1872 ) -> int:
1873 """Create a new PDF page and insert some text.
1874
1875 Notes:
1876 Function combining Document.new_page() and Page.insert_text().
1877 For parameter details see these methods.
1878 """
1879 page = doc.new_page(pno=pno, width=width, height=height)
1880 if not bool(text):
1881 return 0
1882 rc = page.insert_text(
1883 (50, 72),
1884 text,
1885 fontsize=fontsize,
1886 fontname=fontname,
1887 fontfile=fontfile,
1888 color=color,
1889 )
1890 return rc
1891
1892
1893 def draw_line(
1894 page: Page,
1895 p1: point_like,
1896 p2: point_like,
1897 color: OptSeq = (0,),
1898 dashes: OptStr = None,
1899 width: float = 1,
1900 lineCap: int = 0,
1901 lineJoin: int = 0,
1902 overlay: bool = True,
1903 morph: OptSeq = None,
1904 stroke_opacity: float = 1,
1905 fill_opacity: float = 1,
1906 oc=0,
1907 ) -> Point:
1908 """Draw a line from point p1 to point p2."""
1909 img = page.new_shape()
1910 p = img.draw_line(Point(p1), Point(p2))
1911 img.finish(
1912 color=color,
1913 dashes=dashes,
1914 width=width,
1915 closePath=False,
1916 lineCap=lineCap,
1917 lineJoin=lineJoin,
1918 morph=morph,
1919 stroke_opacity=stroke_opacity,
1920 fill_opacity=fill_opacity,
1921 oc=oc,
1922 )
1923 img.commit(overlay)
1924
1925 return p
1926
1927
1928 def draw_squiggle(
1929 page: Page,
1930 p1: point_like,
1931 p2: point_like,
1932 breadth: float = 2,
1933 color: OptSeq = (0,),
1934 dashes: OptStr = None,
1935 width: float = 1,
1936 lineCap: int = 0,
1937 lineJoin: int = 0,
1938 overlay: bool = True,
1939 morph: OptSeq = None,
1940 stroke_opacity: float = 1,
1941 fill_opacity: float = 1,
1942 oc: int = 0,
1943 ) -> Point:
1944 """Draw a squiggly line from point p1 to point p2."""
1945 img = page.new_shape()
1946 p = img.draw_squiggle(Point(p1), Point(p2), breadth=breadth)
1947 img.finish(
1948 color=color,
1949 dashes=dashes,
1950 width=width,
1951 closePath=False,
1952 lineCap=lineCap,
1953 lineJoin=lineJoin,
1954 morph=morph,
1955 stroke_opacity=stroke_opacity,
1956 fill_opacity=fill_opacity,
1957 oc=oc,
1958 )
1959 img.commit(overlay)
1960
1961 return p
1962
1963
1964 def draw_zigzag(
1965 page: Page,
1966 p1: point_like,
1967 p2: point_like,
1968 breadth: float = 2,
1969 color: OptSeq = (0,),
1970 dashes: OptStr = None,
1971 width: float = 1,
1972 lineCap: int = 0,
1973 lineJoin: int = 0,
1974 overlay: bool = True,
1975 morph: OptSeq = None,
1976 stroke_opacity: float = 1,
1977 fill_opacity: float = 1,
1978 oc: int = 0,
1979 ) -> Point:
1980 """Draw a zigzag line from point p1 to point p2."""
1981 img = page.new_shape()
1982 p = img.draw_zigzag(Point(p1), Point(p2), breadth=breadth)
1983 img.finish(
1984 color=color,
1985 dashes=dashes,
1986 width=width,
1987 closePath=False,
1988 lineCap=lineCap,
1989 lineJoin=lineJoin,
1990 morph=morph,
1991 stroke_opacity=stroke_opacity,
1992 fill_opacity=fill_opacity,
1993 oc=oc,
1994 )
1995 img.commit(overlay)
1996
1997 return p
1998
1999
2000 def draw_rect(
2001 page: Page,
2002 rect: rect_like,
2003 color: OptSeq = (0,),
2004 fill: OptSeq = None,
2005 dashes: OptStr = None,
2006 width: float = 1,
2007 lineCap: int = 0,
2008 lineJoin: int = 0,
2009 morph: OptSeq = None,
2010 overlay: bool = True,
2011 stroke_opacity: float = 1,
2012 fill_opacity: float = 1,
2013 oc: int = 0,
2014 radius=None,
2015 ) -> Point:
2016 """Draw a rectangle. See Shape class method for details."""
2017 img = page.new_shape()
2018 Q = img.draw_rect(Rect(rect), radius=radius)
2019 img.finish(
2020 color=color,
2021 fill=fill,
2022 dashes=dashes,
2023 width=width,
2024 lineCap=lineCap,
2025 lineJoin=lineJoin,
2026 morph=morph,
2027 stroke_opacity=stroke_opacity,
2028 fill_opacity=fill_opacity,
2029 oc=oc,
2030 )
2031 img.commit(overlay)
2032
2033 return Q
2034
2035
2036 def draw_quad(
2037 page: Page,
2038 quad: quad_like,
2039 color: OptSeq = (0,),
2040 fill: OptSeq = None,
2041 dashes: OptStr = None,
2042 width: float = 1,
2043 lineCap: int = 0,
2044 lineJoin: int = 0,
2045 morph: OptSeq = None,
2046 overlay: bool = True,
2047 stroke_opacity: float = 1,
2048 fill_opacity: float = 1,
2049 oc: int = 0,
2050 ) -> Point:
2051 """Draw a quadrilateral."""
2052 img = page.new_shape()
2053 Q = img.draw_quad(Quad(quad))
2054 img.finish(
2055 color=color,
2056 fill=fill,
2057 dashes=dashes,
2058 width=width,
2059 lineCap=lineCap,
2060 lineJoin=lineJoin,
2061 morph=morph,
2062 stroke_opacity=stroke_opacity,
2063 fill_opacity=fill_opacity,
2064 oc=oc,
2065 )
2066 img.commit(overlay)
2067
2068 return Q
2069
2070
2071 def draw_polyline(
2072 page: Page,
2073 points: list,
2074 color: OptSeq = (0,),
2075 fill: OptSeq = None,
2076 dashes: OptStr = None,
2077 width: float = 1,
2078 morph: OptSeq = None,
2079 lineCap: int = 0,
2080 lineJoin: int = 0,
2081 overlay: bool = True,
2082 closePath: bool = False,
2083 stroke_opacity: float = 1,
2084 fill_opacity: float = 1,
2085 oc: int = 0,
2086 ) -> Point:
2087 """Draw multiple connected line segments."""
2088 img = page.new_shape()
2089 Q = img.draw_polyline(points)
2090 img.finish(
2091 color=color,
2092 fill=fill,
2093 dashes=dashes,
2094 width=width,
2095 lineCap=lineCap,
2096 lineJoin=lineJoin,
2097 morph=morph,
2098 closePath=closePath,
2099 stroke_opacity=stroke_opacity,
2100 fill_opacity=fill_opacity,
2101 oc=oc,
2102 )
2103 img.commit(overlay)
2104
2105 return Q
2106
2107
2108 def draw_circle(
2109 page: Page,
2110 center: point_like,
2111 radius: float,
2112 color: OptSeq = (0,),
2113 fill: OptSeq = None,
2114 morph: OptSeq = None,
2115 dashes: OptStr = None,
2116 width: float = 1,
2117 lineCap: int = 0,
2118 lineJoin: int = 0,
2119 overlay: bool = True,
2120 stroke_opacity: float = 1,
2121 fill_opacity: float = 1,
2122 oc: int = 0,
2123 ) -> Point:
2124 """Draw a circle given its center and radius."""
2125 img = page.new_shape()
2126 Q = img.draw_circle(Point(center), radius)
2127 img.finish(
2128 color=color,
2129 fill=fill,
2130 dashes=dashes,
2131 width=width,
2132 lineCap=lineCap,
2133 lineJoin=lineJoin,
2134 morph=morph,
2135 stroke_opacity=stroke_opacity,
2136 fill_opacity=fill_opacity,
2137 oc=oc,
2138 )
2139 img.commit(overlay)
2140 return Q
2141
2142
2143 def draw_oval(
2144 page: Page,
2145 rect: typing.Union[rect_like, quad_like],
2146 color: OptSeq = (0,),
2147 fill: OptSeq = None,
2148 dashes: OptStr = None,
2149 morph: OptSeq = None,
2150 width: float = 1,
2151 lineCap: int = 0,
2152 lineJoin: int = 0,
2153 overlay: bool = True,
2154 stroke_opacity: float = 1,
2155 fill_opacity: float = 1,
2156 oc: int = 0,
2157 ) -> Point:
2158 """Draw an oval given its containing rectangle or quad."""
2159 img = page.new_shape()
2160 Q = img.draw_oval(rect)
2161 img.finish(
2162 color=color,
2163 fill=fill,
2164 dashes=dashes,
2165 width=width,
2166 lineCap=lineCap,
2167 lineJoin=lineJoin,
2168 morph=morph,
2169 stroke_opacity=stroke_opacity,
2170 fill_opacity=fill_opacity,
2171 oc=oc,
2172 )
2173 img.commit(overlay)
2174
2175 return Q
2176
2177
2178 def draw_curve(
2179 page: Page,
2180 p1: point_like,
2181 p2: point_like,
2182 p3: point_like,
2183 color: OptSeq = (0,),
2184 fill: OptSeq = None,
2185 dashes: OptStr = None,
2186 width: float = 1,
2187 morph: OptSeq = None,
2188 closePath: bool = False,
2189 lineCap: int = 0,
2190 lineJoin: int = 0,
2191 overlay: bool = True,
2192 stroke_opacity: float = 1,
2193 fill_opacity: float = 1,
2194 oc: int = 0,
2195 ) -> Point:
2196 """Draw a special Bezier curve from p1 to p3, generating control points on lines p1 to p2 and p2 to p3."""
2197 img = page.new_shape()
2198 Q = img.draw_curve(Point(p1), Point(p2), Point(p3))
2199 img.finish(
2200 color=color,
2201 fill=fill,
2202 dashes=dashes,
2203 width=width,
2204 lineCap=lineCap,
2205 lineJoin=lineJoin,
2206 morph=morph,
2207 closePath=closePath,
2208 stroke_opacity=stroke_opacity,
2209 fill_opacity=fill_opacity,
2210 oc=oc,
2211 )
2212 img.commit(overlay)
2213
2214 return Q
2215
2216
2217 def draw_bezier(
2218 page: Page,
2219 p1: point_like,
2220 p2: point_like,
2221 p3: point_like,
2222 p4: point_like,
2223 color: OptSeq = (0,),
2224 fill: OptSeq = None,
2225 dashes: OptStr = None,
2226 width: float = 1,
2227 morph: OptStr = None,
2228 closePath: bool = False,
2229 lineCap: int = 0,
2230 lineJoin: int = 0,
2231 overlay: bool = True,
2232 stroke_opacity: float = 1,
2233 fill_opacity: float = 1,
2234 oc: int = 0,
2235 ) -> Point:
2236 """Draw a general cubic Bezier curve from p1 to p4 using control points p2 and p3."""
2237 img = page.new_shape()
2238 Q = img.draw_bezier(Point(p1), Point(p2), Point(p3), Point(p4))
2239 img.finish(
2240 color=color,
2241 fill=fill,
2242 dashes=dashes,
2243 width=width,
2244 lineCap=lineCap,
2245 lineJoin=lineJoin,
2246 morph=morph,
2247 closePath=closePath,
2248 stroke_opacity=stroke_opacity,
2249 fill_opacity=fill_opacity,
2250 oc=oc,
2251 )
2252 img.commit(overlay)
2253
2254 return Q
2255
2256
2257 def draw_sector(
2258 page: Page,
2259 center: point_like,
2260 point: point_like,
2261 beta: float,
2262 color: OptSeq = (0,),
2263 fill: OptSeq = None,
2264 dashes: OptStr = None,
2265 fullSector: bool = True,
2266 morph: OptSeq = None,
2267 width: float = 1,
2268 closePath: bool = False,
2269 lineCap: int = 0,
2270 lineJoin: int = 0,
2271 overlay: bool = True,
2272 stroke_opacity: float = 1,
2273 fill_opacity: float = 1,
2274 oc: int = 0,
2275 ) -> Point:
2276 """Draw a circle sector given circle center, one arc end point and the angle of the arc.
2277
2278 Parameters:
2279 center -- center of circle
2280 point -- arc end point
2281 beta -- angle of arc (degrees)
2282 fullSector -- connect arc ends with center
2283 """
2284 img = page.new_shape()
2285 Q = img.draw_sector(Point(center), Point(point), beta, fullSector=fullSector)
2286 img.finish(
2287 color=color,
2288 fill=fill,
2289 dashes=dashes,
2290 width=width,
2291 lineCap=lineCap,
2292 lineJoin=lineJoin,
2293 morph=morph,
2294 closePath=closePath,
2295 stroke_opacity=stroke_opacity,
2296 fill_opacity=fill_opacity,
2297 oc=oc,
2298 )
2299 img.commit(overlay)
2300
2301 return Q
2302
2303
2304 # ----------------------------------------------------------------------
2305 # Name: wx.lib.colourdb.py
2306 # Purpose: Adds a bunch of colour names and RGB values to the
2307 # colour database so they can be found by name
2308 #
2309 # Author: Robin Dunn
2310 #
2311 # Created: 13-March-2001
2312 # Copyright: (c) 2001-2017 by Total Control Software
2313 # Licence: wxWindows license
2314 # Tags: phoenix-port, unittest, documented
2315 # ----------------------------------------------------------------------
2316
2317
2318 def getColorList() -> list:
2319 """
2320 Returns a list of just the colour names used by this module.
2321 :rtype: list of strings
2322 """
2323
2324 return [x[0] for x in getColorInfoList()]
2325
2326
2327 def getColorInfoList() -> list:
2328 """
2329 Returns the list of colour name/value tuples used by this module.
2330 :rtype: list of tuples
2331 """
2332
2333 return [
2334 ("ALICEBLUE", 240, 248, 255),
2335 ("ANTIQUEWHITE", 250, 235, 215),
2336 ("ANTIQUEWHITE1", 255, 239, 219),
2337 ("ANTIQUEWHITE2", 238, 223, 204),
2338 ("ANTIQUEWHITE3", 205, 192, 176),
2339 ("ANTIQUEWHITE4", 139, 131, 120),
2340 ("AQUAMARINE", 127, 255, 212),
2341 ("AQUAMARINE1", 127, 255, 212),
2342 ("AQUAMARINE2", 118, 238, 198),
2343 ("AQUAMARINE3", 102, 205, 170),
2344 ("AQUAMARINE4", 69, 139, 116),
2345 ("AZURE", 240, 255, 255),
2346 ("AZURE1", 240, 255, 255),
2347 ("AZURE2", 224, 238, 238),
2348 ("AZURE3", 193, 205, 205),
2349 ("AZURE4", 131, 139, 139),
2350 ("BEIGE", 245, 245, 220),
2351 ("BISQUE", 255, 228, 196),
2352 ("BISQUE1", 255, 228, 196),
2353 ("BISQUE2", 238, 213, 183),
2354 ("BISQUE3", 205, 183, 158),
2355 ("BISQUE4", 139, 125, 107),
2356 ("BLACK", 0, 0, 0),
2357 ("BLANCHEDALMOND", 255, 235, 205),
2358 ("BLUE", 0, 0, 255),
2359 ("BLUE1", 0, 0, 255),
2360 ("BLUE2", 0, 0, 238),
2361 ("BLUE3", 0, 0, 205),
2362 ("BLUE4", 0, 0, 139),
2363 ("BLUEVIOLET", 138, 43, 226),
2364 ("BROWN", 165, 42, 42),
2365 ("BROWN1", 255, 64, 64),
2366 ("BROWN2", 238, 59, 59),
2367 ("BROWN3", 205, 51, 51),
2368 ("BROWN4", 139, 35, 35),
2369 ("BURLYWOOD", 222, 184, 135),
2370 ("BURLYWOOD1", 255, 211, 155),
2371 ("BURLYWOOD2", 238, 197, 145),
2372 ("BURLYWOOD3", 205, 170, 125),
2373 ("BURLYWOOD4", 139, 115, 85),
2374 ("CADETBLUE", 95, 158, 160),
2375 ("CADETBLUE1", 152, 245, 255),
2376 ("CADETBLUE2", 142, 229, 238),
2377 ("CADETBLUE3", 122, 197, 205),
2378 ("CADETBLUE4", 83, 134, 139),
2379 ("CHARTREUSE", 127, 255, 0),
2380 ("CHARTREUSE1", 127, 255, 0),
2381 ("CHARTREUSE2", 118, 238, 0),
2382 ("CHARTREUSE3", 102, 205, 0),
2383 ("CHARTREUSE4", 69, 139, 0),
2384 ("CHOCOLATE", 210, 105, 30),
2385 ("CHOCOLATE1", 255, 127, 36),
2386 ("CHOCOLATE2", 238, 118, 33),
2387 ("CHOCOLATE3", 205, 102, 29),
2388 ("CHOCOLATE4", 139, 69, 19),
2389 ("COFFEE", 156, 79, 0),
2390 ("CORAL", 255, 127, 80),
2391 ("CORAL1", 255, 114, 86),
2392 ("CORAL2", 238, 106, 80),
2393 ("CORAL3", 205, 91, 69),
2394 ("CORAL4", 139, 62, 47),
2395 ("CORNFLOWERBLUE", 100, 149, 237),
2396 ("CORNSILK", 255, 248, 220),
2397 ("CORNSILK1", 255, 248, 220),
2398 ("CORNSILK2", 238, 232, 205),
2399 ("CORNSILK3", 205, 200, 177),
2400 ("CORNSILK4", 139, 136, 120),
2401 ("CYAN", 0, 255, 255),
2402 ("CYAN1", 0, 255, 255),
2403 ("CYAN2", 0, 238, 238),
2404 ("CYAN3", 0, 205, 205),
2405 ("CYAN4", 0, 139, 139),
2406 ("DARKBLUE", 0, 0, 139),
2407 ("DARKCYAN", 0, 139, 139),
2408 ("DARKGOLDENROD", 184, 134, 11),
2409 ("DARKGOLDENROD1", 255, 185, 15),
2410 ("DARKGOLDENROD2", 238, 173, 14),
2411 ("DARKGOLDENROD3", 205, 149, 12),
2412 ("DARKGOLDENROD4", 139, 101, 8),
2413 ("DARKGREEN", 0, 100, 0),
2414 ("DARKGRAY", 169, 169, 169),
2415 ("DARKKHAKI", 189, 183, 107),
2416 ("DARKMAGENTA", 139, 0, 139),
2417 ("DARKOLIVEGREEN", 85, 107, 47),
2418 ("DARKOLIVEGREEN1", 202, 255, 112),
2419 ("DARKOLIVEGREEN2", 188, 238, 104),
2420 ("DARKOLIVEGREEN3", 162, 205, 90),
2421 ("DARKOLIVEGREEN4", 110, 139, 61),
2422 ("DARKORANGE", 255, 140, 0),
2423 ("DARKORANGE1", 255, 127, 0),
2424 ("DARKORANGE2", 238, 118, 0),
2425 ("DARKORANGE3", 205, 102, 0),
2426 ("DARKORANGE4", 139, 69, 0),
2427 ("DARKORCHID", 153, 50, 204),
2428 ("DARKORCHID1", 191, 62, 255),
2429 ("DARKORCHID2", 178, 58, 238),
2430 ("DARKORCHID3", 154, 50, 205),
2431 ("DARKORCHID4", 104, 34, 139),
2432 ("DARKRED", 139, 0, 0),
2433 ("DARKSALMON", 233, 150, 122),
2434 ("DARKSEAGREEN", 143, 188, 143),
2435 ("DARKSEAGREEN1", 193, 255, 193),
2436 ("DARKSEAGREEN2", 180, 238, 180),
2437 ("DARKSEAGREEN3", 155, 205, 155),
2438 ("DARKSEAGREEN4", 105, 139, 105),
2439 ("DARKSLATEBLUE", 72, 61, 139),
2440 ("DARKSLATEGRAY", 47, 79, 79),
2441 ("DARKTURQUOISE", 0, 206, 209),
2442 ("DARKVIOLET", 148, 0, 211),
2443 ("DEEPPINK", 255, 20, 147),
2444 ("DEEPPINK1", 255, 20, 147),
2445 ("DEEPPINK2", 238, 18, 137),
2446 ("DEEPPINK3", 205, 16, 118),
2447 ("DEEPPINK4", 139, 10, 80),
2448 ("DEEPSKYBLUE", 0, 191, 255),
2449 ("DEEPSKYBLUE1", 0, 191, 255),
2450 ("DEEPSKYBLUE2", 0, 178, 238),
2451 ("DEEPSKYBLUE3", 0, 154, 205),
2452 ("DEEPSKYBLUE4", 0, 104, 139),
2453 ("DIMGRAY", 105, 105, 105),
2454 ("DODGERBLUE", 30, 144, 255),
2455 ("DODGERBLUE1", 30, 144, 255),
2456 ("DODGERBLUE2", 28, 134, 238),
2457 ("DODGERBLUE3", 24, 116, 205),
2458 ("DODGERBLUE4", 16, 78, 139),
2459 ("FIREBRICK", 178, 34, 34),
2460 ("FIREBRICK1", 255, 48, 48),
2461 ("FIREBRICK2", 238, 44, 44),
2462 ("FIREBRICK3", 205, 38, 38),
2463 ("FIREBRICK4", 139, 26, 26),
2464 ("FLORALWHITE", 255, 250, 240),
2465 ("FORESTGREEN", 34, 139, 34),
2466 ("GAINSBORO", 220, 220, 220),
2467 ("GHOSTWHITE", 248, 248, 255),
2468 ("GOLD", 255, 215, 0),
2469 ("GOLD1", 255, 215, 0),
2470 ("GOLD2", 238, 201, 0),
2471 ("GOLD3", 205, 173, 0),
2472 ("GOLD4", 139, 117, 0),
2473 ("GOLDENROD", 218, 165, 32),
2474 ("GOLDENROD1", 255, 193, 37),
2475 ("GOLDENROD2", 238, 180, 34),
2476 ("GOLDENROD3", 205, 155, 29),
2477 ("GOLDENROD4", 139, 105, 20),
2478 ("GREEN YELLOW", 173, 255, 47),
2479 ("GREEN", 0, 255, 0),
2480 ("GREEN1", 0, 255, 0),
2481 ("GREEN2", 0, 238, 0),
2482 ("GREEN3", 0, 205, 0),
2483 ("GREEN4", 0, 139, 0),
2484 ("GREENYELLOW", 173, 255, 47),
2485 ("GRAY", 190, 190, 190),
2486 ("GRAY0", 0, 0, 0),
2487 ("GRAY1", 3, 3, 3),
2488 ("GRAY10", 26, 26, 26),
2489 ("GRAY100", 255, 255, 255),
2490 ("GRAY11", 28, 28, 28),
2491 ("GRAY12", 31, 31, 31),
2492 ("GRAY13", 33, 33, 33),
2493 ("GRAY14", 36, 36, 36),
2494 ("GRAY15", 38, 38, 38),
2495 ("GRAY16", 41, 41, 41),
2496 ("GRAY17", 43, 43, 43),
2497 ("GRAY18", 46, 46, 46),
2498 ("GRAY19", 48, 48, 48),
2499 ("GRAY2", 5, 5, 5),
2500 ("GRAY20", 51, 51, 51),
2501 ("GRAY21", 54, 54, 54),
2502 ("GRAY22", 56, 56, 56),
2503 ("GRAY23", 59, 59, 59),
2504 ("GRAY24", 61, 61, 61),
2505 ("GRAY25", 64, 64, 64),
2506 ("GRAY26", 66, 66, 66),
2507 ("GRAY27", 69, 69, 69),
2508 ("GRAY28", 71, 71, 71),
2509 ("GRAY29", 74, 74, 74),
2510 ("GRAY3", 8, 8, 8),
2511 ("GRAY30", 77, 77, 77),
2512 ("GRAY31", 79, 79, 79),
2513 ("GRAY32", 82, 82, 82),
2514 ("GRAY33", 84, 84, 84),
2515 ("GRAY34", 87, 87, 87),
2516 ("GRAY35", 89, 89, 89),
2517 ("GRAY36", 92, 92, 92),
2518 ("GRAY37", 94, 94, 94),
2519 ("GRAY38", 97, 97, 97),
2520 ("GRAY39", 99, 99, 99),
2521 ("GRAY4", 10, 10, 10),
2522 ("GRAY40", 102, 102, 102),
2523 ("GRAY41", 105, 105, 105),
2524 ("GRAY42", 107, 107, 107),
2525 ("GRAY43", 110, 110, 110),
2526 ("GRAY44", 112, 112, 112),
2527 ("GRAY45", 115, 115, 115),
2528 ("GRAY46", 117, 117, 117),
2529 ("GRAY47", 120, 120, 120),
2530 ("GRAY48", 122, 122, 122),
2531 ("GRAY49", 125, 125, 125),
2532 ("GRAY5", 13, 13, 13),
2533 ("GRAY50", 127, 127, 127),
2534 ("GRAY51", 130, 130, 130),
2535 ("GRAY52", 133, 133, 133),
2536 ("GRAY53", 135, 135, 135),
2537 ("GRAY54", 138, 138, 138),
2538 ("GRAY55", 140, 140, 140),
2539 ("GRAY56", 143, 143, 143),
2540 ("GRAY57", 145, 145, 145),
2541 ("GRAY58", 148, 148, 148),
2542 ("GRAY59", 150, 150, 150),
2543 ("GRAY6", 15, 15, 15),
2544 ("GRAY60", 153, 153, 153),
2545 ("GRAY61", 156, 156, 156),
2546 ("GRAY62", 158, 158, 158),
2547 ("GRAY63", 161, 161, 161),
2548 ("GRAY64", 163, 163, 163),
2549 ("GRAY65", 166, 166, 166),
2550 ("GRAY66", 168, 168, 168),
2551 ("GRAY67", 171, 171, 171),
2552 ("GRAY68", 173, 173, 173),
2553 ("GRAY69", 176, 176, 176),
2554 ("GRAY7", 18, 18, 18),
2555 ("GRAY70", 179, 179, 179),
2556 ("GRAY71", 181, 181, 181),
2557 ("GRAY72", 184, 184, 184),
2558 ("GRAY73", 186, 186, 186),
2559 ("GRAY74", 189, 189, 189),
2560 ("GRAY75", 191, 191, 191),
2561 ("GRAY76", 194, 194, 194),
2562 ("GRAY77", 196, 196, 196),
2563 ("GRAY78", 199, 199, 199),
2564 ("GRAY79", 201, 201, 201),
2565 ("GRAY8", 20, 20, 20),
2566 ("GRAY80", 204, 204, 204),
2567 ("GRAY81", 207, 207, 207),
2568 ("GRAY82", 209, 209, 209),
2569 ("GRAY83", 212, 212, 212),
2570 ("GRAY84", 214, 214, 214),
2571 ("GRAY85", 217, 217, 217),
2572 ("GRAY86", 219, 219, 219),
2573 ("GRAY87", 222, 222, 222),
2574 ("GRAY88", 224, 224, 224),
2575 ("GRAY89", 227, 227, 227),
2576 ("GRAY9", 23, 23, 23),
2577 ("GRAY90", 229, 229, 229),
2578 ("GRAY91", 232, 232, 232),
2579 ("GRAY92", 235, 235, 235),
2580 ("GRAY93", 237, 237, 237),
2581 ("GRAY94", 240, 240, 240),
2582 ("GRAY95", 242, 242, 242),
2583 ("GRAY96", 245, 245, 245),
2584 ("GRAY97", 247, 247, 247),
2585 ("GRAY98", 250, 250, 250),
2586 ("GRAY99", 252, 252, 252),
2587 ("HONEYDEW", 240, 255, 240),
2588 ("HONEYDEW1", 240, 255, 240),
2589 ("HONEYDEW2", 224, 238, 224),
2590 ("HONEYDEW3", 193, 205, 193),
2591 ("HONEYDEW4", 131, 139, 131),
2592 ("HOTPINK", 255, 105, 180),
2593 ("HOTPINK1", 255, 110, 180),
2594 ("HOTPINK2", 238, 106, 167),
2595 ("HOTPINK3", 205, 96, 144),
2596 ("HOTPINK4", 139, 58, 98),
2597 ("INDIANRED", 205, 92, 92),
2598 ("INDIANRED1", 255, 106, 106),
2599 ("INDIANRED2", 238, 99, 99),
2600 ("INDIANRED3", 205, 85, 85),
2601 ("INDIANRED4", 139, 58, 58),
2602 ("IVORY", 255, 255, 240),
2603 ("IVORY1", 255, 255, 240),
2604 ("IVORY2", 238, 238, 224),
2605 ("IVORY3", 205, 205, 193),
2606 ("IVORY4", 139, 139, 131),
2607 ("KHAKI", 240, 230, 140),
2608 ("KHAKI1", 255, 246, 143),
2609 ("KHAKI2", 238, 230, 133),
2610 ("KHAKI3", 205, 198, 115),
2611 ("KHAKI4", 139, 134, 78),
2612 ("LAVENDER", 230, 230, 250),
2613 ("LAVENDERBLUSH", 255, 240, 245),
2614 ("LAVENDERBLUSH1", 255, 240, 245),
2615 ("LAVENDERBLUSH2", 238, 224, 229),
2616 ("LAVENDERBLUSH3", 205, 193, 197),
2617 ("LAVENDERBLUSH4", 139, 131, 134),
2618 ("LAWNGREEN", 124, 252, 0),
2619 ("LEMONCHIFFON", 255, 250, 205),
2620 ("LEMONCHIFFON1", 255, 250, 205),
2621 ("LEMONCHIFFON2", 238, 233, 191),
2622 ("LEMONCHIFFON3", 205, 201, 165),
2623 ("LEMONCHIFFON4", 139, 137, 112),
2624 ("LIGHTBLUE", 173, 216, 230),
2625 ("LIGHTBLUE1", 191, 239, 255),
2626 ("LIGHTBLUE2", 178, 223, 238),
2627 ("LIGHTBLUE3", 154, 192, 205),
2628 ("LIGHTBLUE4", 104, 131, 139),
2629 ("LIGHTCORAL", 240, 128, 128),
2630 ("LIGHTCYAN", 224, 255, 255),
2631 ("LIGHTCYAN1", 224, 255, 255),
2632 ("LIGHTCYAN2", 209, 238, 238),
2633 ("LIGHTCYAN3", 180, 205, 205),
2634 ("LIGHTCYAN4", 122, 139, 139),
2635 ("LIGHTGOLDENROD", 238, 221, 130),
2636 ("LIGHTGOLDENROD1", 255, 236, 139),
2637 ("LIGHTGOLDENROD2", 238, 220, 130),
2638 ("LIGHTGOLDENROD3", 205, 190, 112),
2639 ("LIGHTGOLDENROD4", 139, 129, 76),
2640 ("LIGHTGOLDENRODYELLOW", 250, 250, 210),
2641 ("LIGHTGREEN", 144, 238, 144),
2642 ("LIGHTGRAY", 211, 211, 211),
2643 ("LIGHTPINK", 255, 182, 193),
2644 ("LIGHTPINK1", 255, 174, 185),
2645 ("LIGHTPINK2", 238, 162, 173),
2646 ("LIGHTPINK3", 205, 140, 149),
2647 ("LIGHTPINK4", 139, 95, 101),
2648 ("LIGHTSALMON", 255, 160, 122),
2649 ("LIGHTSALMON1", 255, 160, 122),
2650 ("LIGHTSALMON2", 238, 149, 114),
2651 ("LIGHTSALMON3", 205, 129, 98),
2652 ("LIGHTSALMON4", 139, 87, 66),
2653 ("LIGHTSEAGREEN", 32, 178, 170),
2654 ("LIGHTSKYBLUE", 135, 206, 250),
2655 ("LIGHTSKYBLUE1", 176, 226, 255),
2656 ("LIGHTSKYBLUE2", 164, 211, 238),
2657 ("LIGHTSKYBLUE3", 141, 182, 205),
2658 ("LIGHTSKYBLUE4", 96, 123, 139),
2659 ("LIGHTSLATEBLUE", 132, 112, 255),
2660 ("LIGHTSLATEGRAY", 119, 136, 153),
2661 ("LIGHTSTEELBLUE", 176, 196, 222),
2662 ("LIGHTSTEELBLUE1", 202, 225, 255),
2663 ("LIGHTSTEELBLUE2", 188, 210, 238),
2664 ("LIGHTSTEELBLUE3", 162, 181, 205),
2665 ("LIGHTSTEELBLUE4", 110, 123, 139),
2666 ("LIGHTYELLOW", 255, 255, 224),
2667 ("LIGHTYELLOW1", 255, 255, 224),
2668 ("LIGHTYELLOW2", 238, 238, 209),
2669 ("LIGHTYELLOW3", 205, 205, 180),
2670 ("LIGHTYELLOW4", 139, 139, 122),
2671 ("LIMEGREEN", 50, 205, 50),
2672 ("LINEN", 250, 240, 230),
2673 ("MAGENTA", 255, 0, 255),
2674 ("MAGENTA1", 255, 0, 255),
2675 ("MAGENTA2", 238, 0, 238),
2676 ("MAGENTA3", 205, 0, 205),
2677 ("MAGENTA4", 139, 0, 139),
2678 ("MAROON", 176, 48, 96),
2679 ("MAROON1", 255, 52, 179),
2680 ("MAROON2", 238, 48, 167),
2681 ("MAROON3", 205, 41, 144),
2682 ("MAROON4", 139, 28, 98),
2683 ("MEDIUMAQUAMARINE", 102, 205, 170),
2684 ("MEDIUMBLUE", 0, 0, 205),
2685 ("MEDIUMORCHID", 186, 85, 211),
2686 ("MEDIUMORCHID1", 224, 102, 255),
2687 ("MEDIUMORCHID2", 209, 95, 238),
2688 ("MEDIUMORCHID3", 180, 82, 205),
2689 ("MEDIUMORCHID4", 122, 55, 139),
2690 ("MEDIUMPURPLE", 147, 112, 219),
2691 ("MEDIUMPURPLE1", 171, 130, 255),
2692 ("MEDIUMPURPLE2", 159, 121, 238),
2693 ("MEDIUMPURPLE3", 137, 104, 205),
2694 ("MEDIUMPURPLE4", 93, 71, 139),
2695 ("MEDIUMSEAGREEN", 60, 179, 113),
2696 ("MEDIUMSLATEBLUE", 123, 104, 238),
2697 ("MEDIUMSPRINGGREEN", 0, 250, 154),
2698 ("MEDIUMTURQUOISE", 72, 209, 204),
2699 ("MEDIUMVIOLETRED", 199, 21, 133),
2700 ("MIDNIGHTBLUE", 25, 25, 112),
2701 ("MINTCREAM", 245, 255, 250),
2702 ("MISTYROSE", 255, 228, 225),
2703 ("MISTYROSE1", 255, 228, 225),
2704 ("MISTYROSE2", 238, 213, 210),
2705 ("MISTYROSE3", 205, 183, 181),
2706 ("MISTYROSE4", 139, 125, 123),
2707 ("MOCCASIN", 255, 228, 181),
2708 ("MUPDFBLUE", 37, 114, 172),
2709 ("NAVAJOWHITE", 255, 222, 173),
2710 ("NAVAJOWHITE1", 255, 222, 173),
2711 ("NAVAJOWHITE2", 238, 207, 161),
2712 ("NAVAJOWHITE3", 205, 179, 139),
2713 ("NAVAJOWHITE4", 139, 121, 94),
2714 ("NAVY", 0, 0, 128),
2715 ("NAVYBLUE", 0, 0, 128),
2716 ("OLDLACE", 253, 245, 230),
2717 ("OLIVEDRAB", 107, 142, 35),
2718 ("OLIVEDRAB1", 192, 255, 62),
2719 ("OLIVEDRAB2", 179, 238, 58),
2720 ("OLIVEDRAB3", 154, 205, 50),
2721 ("OLIVEDRAB4", 105, 139, 34),
2722 ("ORANGE", 255, 165, 0),
2723 ("ORANGE1", 255, 165, 0),
2724 ("ORANGE2", 238, 154, 0),
2725 ("ORANGE3", 205, 133, 0),
2726 ("ORANGE4", 139, 90, 0),
2727 ("ORANGERED", 255, 69, 0),
2728 ("ORANGERED1", 255, 69, 0),
2729 ("ORANGERED2", 238, 64, 0),
2730 ("ORANGERED3", 205, 55, 0),
2731 ("ORANGERED4", 139, 37, 0),
2732 ("ORCHID", 218, 112, 214),
2733 ("ORCHID1", 255, 131, 250),
2734 ("ORCHID2", 238, 122, 233),
2735 ("ORCHID3", 205, 105, 201),
2736 ("ORCHID4", 139, 71, 137),
2737 ("PALEGOLDENROD", 238, 232, 170),
2738 ("PALEGREEN", 152, 251, 152),
2739 ("PALEGREEN1", 154, 255, 154),
2740 ("PALEGREEN2", 144, 238, 144),
2741 ("PALEGREEN3", 124, 205, 124),
2742 ("PALEGREEN4", 84, 139, 84),
2743 ("PALETURQUOISE", 175, 238, 238),
2744 ("PALETURQUOISE1", 187, 255, 255),
2745 ("PALETURQUOISE2", 174, 238, 238),
2746 ("PALETURQUOISE3", 150, 205, 205),
2747 ("PALETURQUOISE4", 102, 139, 139),
2748 ("PALEVIOLETRED", 219, 112, 147),
2749 ("PALEVIOLETRED1", 255, 130, 171),
2750 ("PALEVIOLETRED2", 238, 121, 159),
2751 ("PALEVIOLETRED3", 205, 104, 137),
2752 ("PALEVIOLETRED4", 139, 71, 93),
2753 ("PAPAYAWHIP", 255, 239, 213),
2754 ("PEACHPUFF", 255, 218, 185),
2755 ("PEACHPUFF1", 255, 218, 185),
2756 ("PEACHPUFF2", 238, 203, 173),
2757 ("PEACHPUFF3", 205, 175, 149),
2758 ("PEACHPUFF4", 139, 119, 101),
2759 ("PERU", 205, 133, 63),
2760 ("PINK", 255, 192, 203),
2761 ("PINK1", 255, 181, 197),
2762 ("PINK2", 238, 169, 184),
2763 ("PINK3", 205, 145, 158),
2764 ("PINK4", 139, 99, 108),
2765 ("PLUM", 221, 160, 221),
2766 ("PLUM1", 255, 187, 255),
2767 ("PLUM2", 238, 174, 238),
2768 ("PLUM3", 205, 150, 205),
2769 ("PLUM4", 139, 102, 139),
2770 ("POWDERBLUE", 176, 224, 230),
2771 ("PURPLE", 160, 32, 240),
2772 ("PURPLE1", 155, 48, 255),
2773 ("PURPLE2", 145, 44, 238),
2774 ("PURPLE3", 125, 38, 205),
2775 ("PURPLE4", 85, 26, 139),
2776 ("PY_COLOR", 240, 255, 210),
2777 ("RED", 255, 0, 0),
2778 ("RED1", 255, 0, 0),
2779 ("RED2", 238, 0, 0),
2780 ("RED3", 205, 0, 0),
2781 ("RED4", 139, 0, 0),
2782 ("ROSYBROWN", 188, 143, 143),
2783 ("ROSYBROWN1", 255, 193, 193),
2784 ("ROSYBROWN2", 238, 180, 180),
2785 ("ROSYBROWN3", 205, 155, 155),
2786 ("ROSYBROWN4", 139, 105, 105),
2787 ("ROYALBLUE", 65, 105, 225),
2788 ("ROYALBLUE1", 72, 118, 255),
2789 ("ROYALBLUE2", 67, 110, 238),
2790 ("ROYALBLUE3", 58, 95, 205),
2791 ("ROYALBLUE4", 39, 64, 139),
2792 ("SADDLEBROWN", 139, 69, 19),
2793 ("SALMON", 250, 128, 114),
2794 ("SALMON1", 255, 140, 105),
2795 ("SALMON2", 238, 130, 98),
2796 ("SALMON3", 205, 112, 84),
2797 ("SALMON4", 139, 76, 57),
2798 ("SANDYBROWN", 244, 164, 96),
2799 ("SEAGREEN", 46, 139, 87),
2800 ("SEAGREEN1", 84, 255, 159),
2801 ("SEAGREEN2", 78, 238, 148),
2802 ("SEAGREEN3", 67, 205, 128),
2803 ("SEAGREEN4", 46, 139, 87),
2804 ("SEASHELL", 255, 245, 238),
2805 ("SEASHELL1", 255, 245, 238),
2806 ("SEASHELL2", 238, 229, 222),
2807 ("SEASHELL3", 205, 197, 191),
2808 ("SEASHELL4", 139, 134, 130),
2809 ("SIENNA", 160, 82, 45),
2810 ("SIENNA1", 255, 130, 71),
2811 ("SIENNA2", 238, 121, 66),
2812 ("SIENNA3", 205, 104, 57),
2813 ("SIENNA4", 139, 71, 38),
2814 ("SKYBLUE", 135, 206, 235),
2815 ("SKYBLUE1", 135, 206, 255),
2816 ("SKYBLUE2", 126, 192, 238),
2817 ("SKYBLUE3", 108, 166, 205),
2818 ("SKYBLUE4", 74, 112, 139),
2819 ("SLATEBLUE", 106, 90, 205),
2820 ("SLATEBLUE1", 131, 111, 255),
2821 ("SLATEBLUE2", 122, 103, 238),
2822 ("SLATEBLUE3", 105, 89, 205),
2823 ("SLATEBLUE4", 71, 60, 139),
2824 ("SLATEGRAY", 112, 128, 144),
2825 ("SNOW", 255, 250, 250),
2826 ("SNOW1", 255, 250, 250),
2827 ("SNOW2", 238, 233, 233),
2828 ("SNOW3", 205, 201, 201),
2829 ("SNOW4", 139, 137, 137),
2830 ("SPRINGGREEN", 0, 255, 127),
2831 ("SPRINGGREEN1", 0, 255, 127),
2832 ("SPRINGGREEN2", 0, 238, 118),
2833 ("SPRINGGREEN3", 0, 205, 102),
2834 ("SPRINGGREEN4", 0, 139, 69),
2835 ("STEELBLUE", 70, 130, 180),
2836 ("STEELBLUE1", 99, 184, 255),
2837 ("STEELBLUE2", 92, 172, 238),
2838 ("STEELBLUE3", 79, 148, 205),
2839 ("STEELBLUE4", 54, 100, 139),
2840 ("TAN", 210, 180, 140),
2841 ("TAN1", 255, 165, 79),
2842 ("TAN2", 238, 154, 73),
2843 ("TAN3", 205, 133, 63),
2844 ("TAN4", 139, 90, 43),
2845 ("THISTLE", 216, 191, 216),
2846 ("THISTLE1", 255, 225, 255),
2847 ("THISTLE2", 238, 210, 238),
2848 ("THISTLE3", 205, 181, 205),
2849 ("THISTLE4", 139, 123, 139),
2850 ("TOMATO", 255, 99, 71),
2851 ("TOMATO1", 255, 99, 71),
2852 ("TOMATO2", 238, 92, 66),
2853 ("TOMATO3", 205, 79, 57),
2854 ("TOMATO4", 139, 54, 38),
2855 ("TURQUOISE", 64, 224, 208),
2856 ("TURQUOISE1", 0, 245, 255),
2857 ("TURQUOISE2", 0, 229, 238),
2858 ("TURQUOISE3", 0, 197, 205),
2859 ("TURQUOISE4", 0, 134, 139),
2860 ("VIOLET", 238, 130, 238),
2861 ("VIOLETRED", 208, 32, 144),
2862 ("VIOLETRED1", 255, 62, 150),
2863 ("VIOLETRED2", 238, 58, 140),
2864 ("VIOLETRED3", 205, 50, 120),
2865 ("VIOLETRED4", 139, 34, 82),
2866 ("WHEAT", 245, 222, 179),
2867 ("WHEAT1", 255, 231, 186),
2868 ("WHEAT2", 238, 216, 174),
2869 ("WHEAT3", 205, 186, 150),
2870 ("WHEAT4", 139, 126, 102),
2871 ("WHITE", 255, 255, 255),
2872 ("WHITESMOKE", 245, 245, 245),
2873 ("YELLOW", 255, 255, 0),
2874 ("YELLOW1", 255, 255, 0),
2875 ("YELLOW2", 238, 238, 0),
2876 ("YELLOW3", 205, 205, 0),
2877 ("YELLOW4", 139, 139, 0),
2878 ("YELLOWGREEN", 154, 205, 50),
2879 ]
2880
2881
2882 def getColorInfoDict() -> dict:
2883 d = {}
2884 for item in getColorInfoList():
2885 d[item[0].lower()] = item[1:]
2886 return d
2887
2888
2889 def getColor(name: str) -> tuple:
2890 """Retrieve RGB color in PDF format by name.
2891
2892 Returns:
2893 a triple of floats in range 0 to 1. In case of name-not-found, "white" is returned.
2894 """
2895 try:
2896 c = getColorInfoList()[getColorList().index(name.upper())]
2897 return (c[1] / 255.0, c[2] / 255.0, c[3] / 255.0)
2898 except:
2899 return (1, 1, 1)
2900
2901
2902 def getColorHSV(name: str) -> tuple:
2903 """Retrieve the hue, saturation, value triple of a color name.
2904
2905 Returns:
2906 a triple (degree, percent, percent). If not found (-1, -1, -1) is returned.
2907 """
2908 try:
2909 x = getColorInfoList()[getColorList().index(name.upper())]
2910 except:
2911 return (-1, -1, -1)
2912
2913 r = x[1] / 255.0
2914 g = x[2] / 255.0
2915 b = x[3] / 255.0
2916 cmax = max(r, g, b)
2917 V = round(cmax * 100, 1)
2918 cmin = min(r, g, b)
2919 delta = cmax - cmin
2920 if delta == 0:
2921 hue = 0
2922 elif cmax == r:
2923 hue = 60.0 * (((g - b) / delta) % 6)
2924 elif cmax == g:
2925 hue = 60.0 * (((b - r) / delta) + 2)
2926 else:
2927 hue = 60.0 * (((r - g) / delta) + 4)
2928
2929 H = int(round(hue))
2930
2931 if cmax == 0:
2932 sat = 0
2933 else:
2934 sat = delta / cmax
2935 S = int(round(sat * 100))
2936
2937 return (H, S, V)
2938
2939
2940 def _get_font_properties(doc: Document, xref: int) -> tuple:
2941 fontname, ext, stype, buffer = doc.extract_font(xref)
2942 asc = 0.8
2943 dsc = -0.2
2944 if ext == "":
2945 return fontname, ext, stype, asc, dsc
2946
2947 if buffer:
2948 try:
2949 font = Font(fontbuffer=buffer)
2950 asc = font.ascender
2951 dsc = font.descender
2952 bbox = font.bbox
2953 if asc - dsc < 1:
2954 if bbox.y0 < dsc:
2955 dsc = bbox.y0
2956 asc = 1 - dsc
2957 except:
2958 asc *= 1.2
2959 dsc *= 1.2
2960 return fontname, ext, stype, asc, dsc
2961 if ext != "n/a":
2962 try:
2963 font = Font(fontname)
2964 asc = font.ascender
2965 dsc = font.descender
2966 except:
2967 asc *= 1.2
2968 dsc *= 1.2
2969 else:
2970 asc *= 1.2
2971 dsc *= 1.2
2972 return fontname, ext, stype, asc, dsc
2973
2974
2975 def get_char_widths(
2976 doc: Document, xref: int, limit: int = 256, idx: int = 0, fontdict: OptDict = None
2977 ) -> list:
2978 """Get list of glyph information of a font.
2979
2980 Notes:
2981 Must be provided by its XREF number. If we already dealt with the
2982 font, it will be recorded in doc.FontInfos. Otherwise we insert an
2983 entry there.
2984 Finally we return the glyphs for the font. This is a list of
2985 (glyph, width) where glyph is an integer controlling the char
2986 appearance, and width is a float controlling the char's spacing:
2987 width * fontsize is the actual space.
2988 For 'simple' fonts, glyph == ord(char) will usually be true.
2989 Exceptions are 'Symbol' and 'ZapfDingbats'. We are providing data for these directly here.
2990 """
2991 fontinfo = CheckFontInfo(doc, xref)
2992 if fontinfo is None: # not recorded yet: create it
2993 if fontdict is None:
2994 name, ext, stype, asc, dsc = _get_font_properties(doc, xref)
2995 fontdict = {
2996 "name": name,
2997 "type": stype,
2998 "ext": ext,
2999 "ascender": asc,
3000 "descender": dsc,
3001 }
3002 else:
3003 name = fontdict["name"]
3004 ext = fontdict["ext"]
3005 stype = fontdict["type"]
3006 ordering = fontdict["ordering"]
3007 simple = fontdict["simple"]
3008
3009 if ext == "":
3010 raise ValueError("xref is not a font")
3011
3012 # check for 'simple' fonts
3013 if stype in ("Type1", "MMType1", "TrueType"):
3014 simple = True
3015 else:
3016 simple = False
3017
3018 # check for CJK fonts
3019 if name in ("Fangti", "Ming"):
3020 ordering = 0
3021 elif name in ("Heiti", "Song"):
3022 ordering = 1
3023 elif name in ("Gothic", "Mincho"):
3024 ordering = 2
3025 elif name in ("Dotum", "Batang"):
3026 ordering = 3
3027 else:
3028 ordering = -1
3029
3030 fontdict["simple"] = simple
3031
3032 if name == "ZapfDingbats":
3033 glyphs = zapf_glyphs
3034 elif name == "Symbol":
3035 glyphs = symbol_glyphs
3036 else:
3037 glyphs = None
3038
3039 fontdict["glyphs"] = glyphs
3040 fontdict["ordering"] = ordering
3041 fontinfo = [xref, fontdict]
3042 doc.FontInfos.append(fontinfo)
3043 else:
3044 fontdict = fontinfo[1]
3045 glyphs = fontdict["glyphs"]
3046 simple = fontdict["simple"]
3047 ordering = fontdict["ordering"]
3048
3049 if glyphs is None:
3050 oldlimit = 0
3051 else:
3052 oldlimit = len(glyphs)
3053
3054 mylimit = max(256, limit)
3055
3056 if mylimit <= oldlimit:
3057 return glyphs
3058
3059 if ordering < 0: # not a CJK font
3060 glyphs = doc._get_char_widths(
3061 xref, fontdict["name"], fontdict["ext"], fontdict["ordering"], mylimit, idx
3062 )
3063 else: # CJK fonts use char codes and width = 1
3064 glyphs = None
3065
3066 fontdict["glyphs"] = glyphs
3067 fontinfo[1] = fontdict
3068 UpdateFontInfo(doc, fontinfo)
3069
3070 return glyphs
3071
3072
3073 class Shape(object):
3074 """Create a new shape."""
3075
3076 @staticmethod
3077 def horizontal_angle(C, P):
3078 """Return the angle to the horizontal for the connection from C to P.
3079 This uses the arcus sine function and resolves its inherent ambiguity by
3080 looking up in which quadrant vector S = P - C is located.
3081 """
3082 S = Point(P - C).unit # unit vector 'C' -> 'P'
3083 alfa = math.asin(abs(S.y)) # absolute angle from horizontal
3084 if S.x < 0: # make arcsin result unique
3085 if S.y <= 0: # bottom-left
3086 alfa = -(math.pi - alfa)
3087 else: # top-left
3088 alfa = math.pi - alfa
3089 else:
3090 if S.y >= 0: # top-right
3091 pass
3092 else: # bottom-right
3093 alfa = -alfa
3094 return alfa
3095
3096 def __init__(self, page: Page):
3097 CheckParent(page)
3098 self.page = page
3099 self.doc = page.parent
3100 if not self.doc.is_pdf:
3101 raise ValueError("is no PDF")
3102 self.height = page.mediabox_size.y
3103 self.width = page.mediabox_size.x
3104 self.x = page.cropbox_position.x
3105 self.y = page.cropbox_position.y
3106
3107 self.pctm = page.transformation_matrix # page transf. matrix
3108 self.ipctm = ~self.pctm # inverted transf. matrix
3109
3110 self.draw_cont = ""
3111 self.text_cont = ""
3112 self.totalcont = ""
3113 self.lastPoint = None
3114 self.rect = None
3115
3116 def updateRect(self, x):
3117 if self.rect is None:
3118 if len(x) == 2:
3119 self.rect = Rect(x, x)
3120 else:
3121 self.rect = Rect(x)
3122
3123 else:
3124 if len(x) == 2:
3125 x = Point(x)
3126 self.rect.x0 = min(self.rect.x0, x.x)
3127 self.rect.y0 = min(self.rect.y0, x.y)
3128 self.rect.x1 = max(self.rect.x1, x.x)
3129 self.rect.y1 = max(self.rect.y1, x.y)
3130 else:
3131 x = Rect(x)
3132 self.rect.x0 = min(self.rect.x0, x.x0)
3133 self.rect.y0 = min(self.rect.y0, x.y0)
3134 self.rect.x1 = max(self.rect.x1, x.x1)
3135 self.rect.y1 = max(self.rect.y1, x.y1)
3136
3137 def draw_line(self, p1: point_like, p2: point_like) -> Point:
3138 """Draw a line between two points."""
3139 p1 = Point(p1)
3140 p2 = Point(p2)
3141 if not (self.lastPoint == p1):
3142 self.draw_cont += "%g %g m\n" % JM_TUPLE(p1 * self.ipctm)
3143 self.lastPoint = p1
3144 self.updateRect(p1)
3145
3146 self.draw_cont += "%g %g l\n" % JM_TUPLE(p2 * self.ipctm)
3147 self.updateRect(p2)
3148 self.lastPoint = p2
3149 return self.lastPoint
3150
3151 def draw_polyline(self, points: list) -> Point:
3152 """Draw several connected line segments."""
3153 for i, p in enumerate(points):
3154 if i == 0:
3155 if not (self.lastPoint == Point(p)):
3156 self.draw_cont += "%g %g m\n" % JM_TUPLE(Point(p) * self.ipctm)
3157 self.lastPoint = Point(p)
3158 else:
3159 self.draw_cont += "%g %g l\n" % JM_TUPLE(Point(p) * self.ipctm)
3160 self.updateRect(p)
3161
3162 self.lastPoint = Point(points[-1])
3163 return self.lastPoint
3164
3165 def draw_bezier(
3166 self,
3167 p1: point_like,
3168 p2: point_like,
3169 p3: point_like,
3170 p4: point_like,
3171 ) -> Point:
3172 """Draw a standard cubic Bezier curve."""
3173 p1 = Point(p1)
3174 p2 = Point(p2)
3175 p3 = Point(p3)
3176 p4 = Point(p4)
3177 if not (self.lastPoint == p1):
3178 self.draw_cont += "%g %g m\n" % JM_TUPLE(p1 * self.ipctm)
3179 self.draw_cont += "%g %g %g %g %g %g c\n" % JM_TUPLE(
3180 list(p2 * self.ipctm) + list(p3 * self.ipctm) + list(p4 * self.ipctm)
3181 )
3182 self.updateRect(p1)
3183 self.updateRect(p2)
3184 self.updateRect(p3)
3185 self.updateRect(p4)
3186 self.lastPoint = p4
3187 return self.lastPoint
3188
3189 def draw_oval(self, tetra: typing.Union[quad_like, rect_like]) -> Point:
3190 """Draw an ellipse inside a tetrapod."""
3191 if len(tetra) != 4:
3192 raise ValueError("invalid arg length")
3193 if hasattr(tetra[0], "__float__"):
3194 q = Rect(tetra).quad
3195 else:
3196 q = Quad(tetra)
3197
3198 mt = q.ul + (q.ur - q.ul) * 0.5
3199 mr = q.ur + (q.lr - q.ur) * 0.5
3200 mb = q.ll + (q.lr - q.ll) * 0.5
3201 ml = q.ul + (q.ll - q.ul) * 0.5
3202 if not (self.lastPoint == ml):
3203 self.draw_cont += "%g %g m\n" % JM_TUPLE(ml * self.ipctm)
3204 self.lastPoint = ml
3205 self.draw_curve(ml, q.ll, mb)
3206 self.draw_curve(mb, q.lr, mr)
3207 self.draw_curve(mr, q.ur, mt)
3208 self.draw_curve(mt, q.ul, ml)
3209 self.updateRect(q.rect)
3210 self.lastPoint = ml
3211 return self.lastPoint
3212
3213 def draw_circle(self, center: point_like, radius: float) -> Point:
3214 """Draw a circle given its center and radius."""
3215 if not radius > EPSILON:
3216 raise ValueError("radius must be positive")
3217 center = Point(center)
3218 p1 = center - (radius, 0)
3219 return self.draw_sector(center, p1, 360, fullSector=False)
3220
3221 def draw_curve(
3222 self,
3223 p1: point_like,
3224 p2: point_like,
3225 p3: point_like,
3226 ) -> Point:
3227 """Draw a curve between points using one control point."""
3228 kappa = 0.55228474983
3229 p1 = Point(p1)
3230 p2 = Point(p2)
3231 p3 = Point(p3)
3232 k1 = p1 + (p2 - p1) * kappa
3233 k2 = p3 + (p2 - p3) * kappa
3234 return self.draw_bezier(p1, k1, k2, p3)
3235
3236 def draw_sector(
3237 self,
3238 center: point_like,
3239 point: point_like,
3240 beta: float,
3241 fullSector: bool = True,
3242 ) -> Point:
3243 """Draw a circle sector."""
3244 center = Point(center)
3245 point = Point(point)
3246 l3 = "%g %g m\n"
3247 l4 = "%g %g %g %g %g %g c\n"
3248 l5 = "%g %g l\n"
3249 betar = math.radians(-beta)
3250 w360 = math.radians(math.copysign(360, betar)) * (-1)
3251 w90 = math.radians(math.copysign(90, betar))
3252 w45 = w90 / 2
3253 while abs(betar) > 2 * math.pi:
3254 betar += w360 # bring angle below 360 degrees
3255 if not (self.lastPoint == point):
3256 self.draw_cont += l3 % JM_TUPLE(point * self.ipctm)
3257 self.lastPoint = point
3258 Q = Point(0, 0) # just make sure it exists
3259 C = center
3260 P = point
3261 S = P - C # vector 'center' -> 'point'
3262 rad = abs(S) # circle radius
3263
3264 if not rad > EPSILON:
3265 raise ValueError("radius must be positive")
3266
3267 alfa = self.horizontal_angle(center, point)
3268 while abs(betar) > abs(w90): # draw 90 degree arcs
3269 q1 = C.x + math.cos(alfa + w90) * rad
3270 q2 = C.y + math.sin(alfa + w90) * rad
3271 Q = Point(q1, q2) # the arc's end point
3272 r1 = C.x + math.cos(alfa + w45) * rad / math.cos(w45)
3273 r2 = C.y + math.sin(alfa + w45) * rad / math.cos(w45)
3274 R = Point(r1, r2) # crossing point of tangents
3275 kappah = (1 - math.cos(w45)) * 4 / 3 / abs(R - Q)
3276 kappa = kappah * abs(P - Q)
3277 cp1 = P + (R - P) * kappa # control point 1
3278 cp2 = Q + (R - Q) * kappa # control point 2
3279 self.draw_cont += l4 % JM_TUPLE(
3280 list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm)
3281 )
3282
3283 betar -= w90 # reduce parm angle by 90 deg
3284 alfa += w90 # advance start angle by 90 deg
3285 P = Q # advance to arc end point
3286 # draw (remaining) arc
3287 if abs(betar) > 1e-3: # significant degrees left?
3288 beta2 = betar / 2
3289 q1 = C.x + math.cos(alfa + betar) * rad
3290 q2 = C.y + math.sin(alfa + betar) * rad
3291 Q = Point(q1, q2) # the arc's end point
3292 r1 = C.x + math.cos(alfa + beta2) * rad / math.cos(beta2)
3293 r2 = C.y + math.sin(alfa + beta2) * rad / math.cos(beta2)
3294 R = Point(r1, r2) # crossing point of tangents
3295 # kappa height is 4/3 of segment height
3296 kappah = (1 - math.cos(beta2)) * 4 / 3 / abs(R - Q) # kappa height
3297 kappa = kappah * abs(P - Q) / (1 - math.cos(betar))
3298 cp1 = P + (R - P) * kappa # control point 1
3299 cp2 = Q + (R - Q) * kappa # control point 2
3300 self.draw_cont += l4 % JM_TUPLE(
3301 list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm)
3302 )
3303 if fullSector:
3304 self.draw_cont += l3 % JM_TUPLE(point * self.ipctm)
3305 self.draw_cont += l5 % JM_TUPLE(center * self.ipctm)
3306 self.draw_cont += l5 % JM_TUPLE(Q * self.ipctm)
3307 self.lastPoint = Q
3308 return self.lastPoint
3309
3310 def draw_rect(self, rect: rect_like, *, radius=None) -> Point:
3311 """Draw a rectangle.
3312
3313 Args:
3314 radius: if not None, the rectangle will have rounded corners.
3315 This is the radius of the curvature, given as percentage of
3316 the rectangle width or height. Valid are values 0 < v <= 0.5.
3317 For a sequence of two values, the corners will have different
3318 radii. Otherwise, the percentage will be computed from the
3319 shorter side. A value of (0.5, 0.5) will draw an ellipse.
3320 """
3321 r = Rect(rect)
3322 if radius == None: # standard rectangle
3323 self.draw_cont += "%g %g %g %g re\n" % JM_TUPLE(
3324 list(r.bl * self.ipctm) + [r.width, r.height]
3325 )
3326 self.updateRect(r)
3327 self.lastPoint = r.tl
3328 return self.lastPoint
3329 # rounded corners requested. This requires 1 or 2 values, each
3330 # with 0 < value <= 0.5
3331 if hasattr(radius, "__float__"):
3332 if radius <= 0 or radius > 0.5:
3333 raise ValueError(f"bad radius value {radius}.")
3334 d = min(r.width, r.height) * radius
3335 px = (d, 0)
3336 py = (0, d)
3337 elif hasattr(radius, "__len__") and len(radius) == 2:
3338 rx, ry = radius
3339 px = (rx * r.width, 0)
3340 py = (0, ry * r.height)
3341 if min(rx, ry) <= 0 or max(rx, ry) > 0.5:
3342 raise ValueError(f"bad radius value {radius}.")
3343 else:
3344 raise ValueError(f"bad radius value {radius}.")
3345
3346 lp = self.draw_line(r.tl + py, r.bl - py)
3347 lp = self.draw_curve(lp, r.bl, r.bl + px)
3348
3349 lp = self.draw_line(lp, r.br - px)
3350 lp = self.draw_curve(lp, r.br, r.br - py)
3351
3352 lp = self.draw_line(lp, r.tr + py)
3353 lp = self.draw_curve(lp, r.tr, r.tr - px)
3354
3355 lp = self.draw_line(lp, r.tl + px)
3356 self.lastPoint = self.draw_curve(lp, r.tl, r.tl + py)
3357
3358 self.updateRect(r)
3359 return self.lastPoint
3360
3361 def draw_quad(self, quad: quad_like) -> Point:
3362 """Draw a Quad."""
3363 q = Quad(quad)
3364 return self.draw_polyline([q.ul, q.ll, q.lr, q.ur, q.ul])
3365
3366 def draw_zigzag(
3367 self,
3368 p1: point_like,
3369 p2: point_like,
3370 breadth: float = 2,
3371 ) -> Point:
3372 """Draw a zig-zagged line from p1 to p2."""
3373 p1 = Point(p1)
3374 p2 = Point(p2)
3375 S = p2 - p1 # vector start - end
3376 rad = abs(S) # distance of points
3377 cnt = 4 * int(round(rad / (4 * breadth), 0)) # always take full phases
3378 if cnt < 4:
3379 raise ValueError("points too close")
3380 mb = rad / cnt # revised breadth
3381 matrix = Matrix(util_hor_matrix(p1, p2)) # normalize line to x-axis
3382 i_mat = ~matrix # get original position
3383 points = [] # stores edges
3384 for i in range(1, cnt):
3385 if i % 4 == 1: # point "above" connection
3386 p = Point(i, -1) * mb
3387 elif i % 4 == 3: # point "below" connection
3388 p = Point(i, 1) * mb
3389 else: # ignore others
3390 continue
3391 points.append(p * i_mat)
3392 self.draw_polyline([p1] + points + [p2]) # add start and end points
3393 return p2
3394
3395 def draw_squiggle(
3396 self,
3397 p1: point_like,
3398 p2: point_like,
3399 breadth=2,
3400 ) -> Point:
3401 """Draw a squiggly line from p1 to p2."""
3402 p1 = Point(p1)
3403 p2 = Point(p2)
3404 S = p2 - p1 # vector start - end
3405 rad = abs(S) # distance of points
3406 cnt = 4 * int(round(rad / (4 * breadth), 0)) # always take full phases
3407 if cnt < 4:
3408 raise ValueError("points too close")
3409 mb = rad / cnt # revised breadth
3410 matrix = Matrix(util_hor_matrix(p1, p2)) # normalize line to x-axis
3411 i_mat = ~matrix # get original position
3412 k = 2.4142135623765633 # y of draw_curve helper point
3413
3414 points = [] # stores edges
3415 for i in range(1, cnt):
3416 if i % 4 == 1: # point "above" connection
3417 p = Point(i, -k) * mb
3418 elif i % 4 == 3: # point "below" connection
3419 p = Point(i, k) * mb
3420 else: # else on connection line
3421 p = Point(i, 0) * mb
3422 points.append(p * i_mat)
3423
3424 points = [p1] + points + [p2]
3425 cnt = len(points)
3426 i = 0
3427 while i + 2 < cnt:
3428 self.draw_curve(points[i], points[i + 1], points[i + 2])
3429 i += 2
3430 return p2
3431
3432 # ==============================================================================
3433 # Shape.insert_text
3434 # ==============================================================================
3435 def insert_text(
3436 self,
3437 point: point_like,
3438 buffer: typing.Union[str, list],
3439 fontsize: float = 11,
3440 lineheight: OptFloat = None,
3441 fontname: str = "helv",
3442 fontfile: OptStr = None,
3443 set_simple: bool = 0,
3444 encoding: int = 0,
3445 color: OptSeq = None,
3446 fill: OptSeq = None,
3447 render_mode: int = 0,
3448 border_width: float = 0.05,
3449 rotate: int = 0,
3450 morph: OptSeq = None,
3451 stroke_opacity: float = 1,
3452 fill_opacity: float = 1,
3453 oc: int = 0,
3454 ) -> int:
3455 # ensure 'text' is a list of strings, worth dealing with
3456 if not bool(buffer):
3457 return 0
3458
3459 if type(buffer) not in (list, tuple):
3460 text = buffer.splitlines()
3461 else:
3462 text = buffer
3463
3464 if not len(text) > 0:
3465 return 0
3466
3467 point = Point(point)
3468 try:
3469 maxcode = max([ord(c) for c in " ".join(text)])
3470 except:
3471 return 0
3472
3473 # ensure valid 'fontname'
3474 fname = fontname
3475 if fname.startswith("/"):
3476 fname = fname[1:]
3477
3478 xref = self.page.insert_font(
3479 fontname=fname, fontfile=fontfile, encoding=encoding, set_simple=set_simple
3480 )
3481 fontinfo = CheckFontInfo(self.doc, xref)
3482
3483 fontdict = fontinfo[1]
3484 ordering = fontdict["ordering"]
3485 simple = fontdict["simple"]
3486 bfname = fontdict["name"]
3487 ascender = fontdict["ascender"]
3488 descender = fontdict["descender"]
3489 if lineheight:
3490 lheight = fontsize * lineheight
3491 elif ascender - descender <= 1:
3492 lheight = fontsize * 1.2
3493 else:
3494 lheight = fontsize * (ascender - descender)
3495
3496 if maxcode > 255:
3497 glyphs = self.doc.get_char_widths(xref, maxcode + 1)
3498 else:
3499 glyphs = fontdict["glyphs"]
3500
3501 tab = []
3502 for t in text:
3503 if simple and bfname not in ("Symbol", "ZapfDingbats"):
3504 g = None
3505 else:
3506 g = glyphs
3507 tab.append(getTJstr(t, g, simple, ordering))
3508 text = tab
3509
3510 color_str = ColorCode(color, "c")
3511 fill_str = ColorCode(fill, "f")
3512 if not fill and render_mode == 0: # ensure fill color when 0 Tr
3513 fill = color
3514 fill_str = ColorCode(color, "f")
3515
3516 morphing = CheckMorph(morph)
3517 rot = rotate
3518 if rot % 90 != 0:
3519 raise ValueError("bad rotate value")
3520
3521 while rot < 0:
3522 rot += 360
3523 rot = rot % 360 # text rotate = 0, 90, 270, 180
3524
3525 templ1 = "\nq\n%s%sBT\n%s1 0 0 1 %g %g Tm\n/%s %g Tf "
3526 templ2 = "TJ\n0 -%g TD\n"
3527 cmp90 = "0 1 -1 0 0 0 cm\n" # rotates 90 deg counter-clockwise
3528 cmm90 = "0 -1 1 0 0 0 cm\n" # rotates 90 deg clockwise
3529 cm180 = "-1 0 0 -1 0 0 cm\n" # rotates by 180 deg.
3530 height = self.height
3531 width = self.width
3532
3533 # setting up for standard rotation directions
3534 # case rotate = 0
3535 if morphing:
3536 m1 = Matrix(1, 0, 0, 1, morph[0].x + self.x, height - morph[0].y - self.y)
3537 mat = ~m1 * morph[1] * m1
3538 cm = "%g %g %g %g %g %g cm\n" % JM_TUPLE(mat)
3539 else:
3540 cm = ""
3541 top = height - point.y - self.y # start of 1st char
3542 left = point.x + self.x # start of 1. char
3543 space = top # space available
3544 headroom = point.y + self.y # distance to page border
3545 if rot == 90:
3546 left = height - point.y - self.y
3547 top = -point.x - self.x
3548 cm += cmp90
3549 space = width - abs(top)
3550 headroom = point.x + self.x
3551
3552 elif rot == 270:
3553 left = -height + point.y + self.y
3554 top = point.x + self.x
3555 cm += cmm90
3556 space = abs(top)
3557 headroom = width - point.x - self.x
3558
3559 elif rot == 180:
3560 left = -point.x - self.x
3561 top = -height + point.y + self.y
3562 cm += cm180
3563 space = abs(point.y + self.y)
3564 headroom = height - point.y - self.y
3565
3566 optcont = self.page._get_optional_content(oc)
3567 if optcont != None:
3568 bdc = "/OC /%s BDC\n" % optcont
3569 emc = "EMC\n"
3570 else:
3571 bdc = emc = ""
3572
3573 alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
3574 if alpha == None:
3575 alpha = ""
3576 else:
3577 alpha = "/%s gs\n" % alpha
3578 nres = templ1 % (bdc, alpha, cm, left, top, fname, fontsize)
3579
3580 if render_mode > 0:
3581 nres += "%i Tr " % render_mode
3582 nres += "%g w " % (border_width * fontsize)
3583
3584 if color is not None:
3585 nres += color_str
3586 if fill is not None:
3587 nres += fill_str
3588
3589 # =========================================================================
3590 # start text insertion
3591 # =========================================================================
3592 nres += text[0]
3593 nlines = 1 # set output line counter
3594 if len(text) > 1:
3595 nres += templ2 % lheight # line 1
3596 else:
3597 nres += templ2[:2]
3598 for i in range(1, len(text)):
3599 if space < lheight:
3600 break # no space left on page
3601 if i > 1:
3602 nres += "\nT* "
3603 nres += text[i] + templ2[:2]
3604 space -= lheight
3605 nlines += 1
3606
3607 nres += "\nET\n%sQ\n" % emc
3608
3609 # =====================================================================
3610 # end of text insertion
3611 # =====================================================================
3612 # update the /Contents object
3613 self.text_cont += nres
3614 return nlines
3615
3616 # =========================================================================
3617 # Shape.insert_textbox
3618 # =========================================================================
3619 def insert_textbox(
3620 self,
3621 rect: rect_like,
3622 buffer: typing.Union[str, list],
3623 fontname: OptStr = "helv",
3624 fontfile: OptStr = None,
3625 fontsize: float = 11,
3626 lineheight: OptFloat = None,
3627 set_simple: bool = 0,
3628 encoding: int = 0,
3629 color: OptSeq = None,
3630 fill: OptSeq = None,
3631 expandtabs: int = 1,
3632 border_width: float = 0.05,
3633 align: int = 0,
3634 render_mode: int = 0,
3635 rotate: int = 0,
3636 morph: OptSeq = None,
3637 stroke_opacity: float = 1,
3638 fill_opacity: float = 1,
3639 oc: int = 0,
3640 ) -> float:
3641 """Insert text into a given rectangle.
3642
3643 Args:
3644 rect -- the textbox to fill
3645 buffer -- text to be inserted
3646 fontname -- a Base-14 font, font name or '/name'
3647 fontfile -- name of a font file
3648 fontsize -- font size
3649 lineheight -- overwrite the font property
3650 color -- RGB stroke color triple
3651 fill -- RGB fill color triple
3652 render_mode -- text rendering control
3653 border_width -- thickness of glyph borders as percentage of fontsize
3654 expandtabs -- handles tabulators with string function
3655 align -- left, center, right, justified
3656 rotate -- 0, 90, 180, or 270 degrees
3657 morph -- morph box with a matrix and a fixpoint
3658 Returns:
3659 unused or deficit rectangle area (float)
3660 """
3661 rect = Rect(rect)
3662 if rect.is_empty or rect.is_infinite:
3663 raise ValueError("text box must be finite and not empty")
3664
3665 color_str = ColorCode(color, "c")
3666 fill_str = ColorCode(fill, "f")
3667 if fill is None and render_mode == 0: # ensure fill color for 0 Tr
3668 fill = color
3669 fill_str = ColorCode(color, "f")
3670
3671 optcont = self.page._get_optional_content(oc)
3672 if optcont != None:
3673 bdc = "/OC /%s BDC\n" % optcont
3674 emc = "EMC\n"
3675 else:
3676 bdc = emc = ""
3677
3678 # determine opacity / transparency
3679 alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
3680 if alpha == None:
3681 alpha = ""
3682 else:
3683 alpha = "/%s gs\n" % alpha
3684
3685 if rotate % 90 != 0:
3686 raise ValueError("rotate must be multiple of 90")
3687
3688 rot = rotate
3689 while rot < 0:
3690 rot += 360
3691 rot = rot % 360
3692
3693 # is buffer worth of dealing with?
3694 if not bool(buffer):
3695 return rect.height if rot in (0, 180) else rect.width
3696
3697 cmp90 = "0 1 -1 0 0 0 cm\n" # rotates counter-clockwise
3698 cmm90 = "0 -1 1 0 0 0 cm\n" # rotates clockwise
3699 cm180 = "-1 0 0 -1 0 0 cm\n" # rotates by 180 deg.
3700 height = self.height
3701
3702 fname = fontname
3703 if fname.startswith("/"):
3704 fname = fname[1:]
3705
3706 xref = self.page.insert_font(
3707 fontname=fname, fontfile=fontfile, encoding=encoding, set_simple=set_simple
3708 )
3709 fontinfo = CheckFontInfo(self.doc, xref)
3710
3711 fontdict = fontinfo[1]
3712 ordering = fontdict["ordering"]
3713 simple = fontdict["simple"]
3714 glyphs = fontdict["glyphs"]
3715 bfname = fontdict["name"]
3716 ascender = fontdict["ascender"]
3717 descender = fontdict["descender"]
3718
3719 if lineheight:
3720 lheight_factor = lineheight
3721 elif ascender - descender <= 1:
3722 lheight_factor = 1.2
3723 else:
3724 lheight_factor = ascender - descender
3725 lheight = fontsize * lheight_factor
3726
3727 # create a list from buffer, split into its lines
3728 if type(buffer) in (list, tuple):
3729 t0 = "\n".join(buffer)
3730 else:
3731 t0 = buffer
3732
3733 maxcode = max([ord(c) for c in t0])
3734 # replace invalid char codes for simple fonts
3735 if simple and maxcode > 255:
3736 t0 = "".join([c if ord(c) < 256 else "?" for c in t0])
3737
3738 t0 = t0.splitlines()
3739
3740 glyphs = self.doc.get_char_widths(xref, maxcode + 1)
3741 if simple and bfname not in ("Symbol", "ZapfDingbats"):
3742 tj_glyphs = None
3743 else:
3744 tj_glyphs = glyphs
3745
3746 # ----------------------------------------------------------------------
3747 # calculate pixel length of a string
3748 # ----------------------------------------------------------------------
3749 def pixlen(x):
3750 """Calculate pixel length of x."""
3751 if ordering < 0:
3752 return sum([glyphs[ord(c)][1] for c in x]) * fontsize
3753 else:
3754 return len(x) * fontsize
3755
3756 # ---------------------------------------------------------------------
3757
3758 if ordering < 0:
3759 blen = glyphs[32][1] * fontsize # pixel size of space character
3760 else:
3761 blen = fontsize
3762
3763 text = "" # output buffer
3764
3765 if CheckMorph(morph):
3766 m1 = Matrix(
3767 1, 0, 0, 1, morph[0].x + self.x, self.height - morph[0].y - self.y
3768 )
3769 mat = ~m1 * morph[1] * m1
3770 cm = "%g %g %g %g %g %g cm\n" % JM_TUPLE(mat)
3771 else:
3772 cm = ""
3773
3774 # ---------------------------------------------------------------------
3775 # adjust for text orientation / rotation
3776 # ---------------------------------------------------------------------
3777 progr = 1 # direction of line progress
3778 c_pnt = Point(0, fontsize * ascender) # used for line progress
3779 if rot == 0: # normal orientation
3780 point = rect.tl + c_pnt # line 1 is 'lheight' below top
3781 maxwidth = rect.width # pixels available in one line
3782 maxheight = rect.height # available text height
3783
3784 elif rot == 90: # rotate counter clockwise
3785 c_pnt = Point(fontsize * ascender, 0) # progress in x-direction
3786 point = rect.bl + c_pnt # line 1 'lheight' away from left
3787 maxwidth = rect.height # pixels available in one line
3788 maxheight = rect.width # available text height
3789 cm += cmp90
3790
3791 elif rot == 180: # text upside down
3792 # progress upwards in y direction
3793 c_pnt = -Point(0, fontsize * ascender)
3794 point = rect.br + c_pnt # line 1 'lheight' above bottom
3795 maxwidth = rect.width # pixels available in one line
3796 progr = -1 # subtract lheight for next line
3797 maxheight = rect.height # available text height
3798 cm += cm180
3799
3800 else: # rotate clockwise (270 or -90)
3801 # progress from right to left
3802 c_pnt = -Point(fontsize * ascender, 0)
3803 point = rect.tr + c_pnt # line 1 'lheight' left of right
3804 maxwidth = rect.height # pixels available in one line
3805 progr = -1 # subtract lheight for next line
3806 maxheight = rect.width # available text height
3807 cm += cmm90
3808
3809 # =====================================================================
3810 # line loop
3811 # =====================================================================
3812 just_tab = [] # 'justify' indicators per line
3813
3814 for i, line in enumerate(t0):
3815 line_t = line.expandtabs(expandtabs).split(" ") # split into words
3816 num_words = len(line_t)
3817 lbuff = "" # init line buffer
3818 rest = maxwidth # available line pixels
3819 # =================================================================
3820 # word loop
3821 # =================================================================
3822 for j in range(num_words):
3823 word = line_t[j]
3824 pl_w = pixlen(word) # pixel len of word
3825 if rest >= pl_w: # does it fit on the line?
3826 lbuff += word + " " # yes, append word
3827 rest -= pl_w + blen # update available line space
3828 continue # next word
3829
3830 # word doesn't fit - output line (if not empty)
3831 if lbuff:
3832 lbuff = lbuff.rstrip() + "\n" # line full, append line break
3833 text += lbuff # append to total text
3834 just_tab.append(True) # can align-justify
3835
3836 lbuff = "" # re-init line buffer
3837 rest = maxwidth # re-init avail. space
3838
3839 if pl_w <= maxwidth: # word shorter than 1 line?
3840 lbuff = word + " " # start the line with it
3841 rest = maxwidth - pl_w - blen # update free space
3842 continue
3843
3844 # long word: split across multiple lines - char by char ...
3845 if len(just_tab) > 0:
3846 just_tab[-1] = False # cannot align-justify
3847 for c in word:
3848 if pixlen(lbuff) <= maxwidth - pixlen(c):
3849 lbuff += c
3850 else: # line full
3851 lbuff += "\n" # close line
3852 text += lbuff # append to text
3853 just_tab.append(False) # cannot align-justify
3854 lbuff = c # start new line with this char
3855
3856 lbuff += " " # finish long word
3857 rest = maxwidth - pixlen(lbuff) # long word stored
3858
3859 if lbuff: # unprocessed line content?
3860 text += lbuff.rstrip() # append to text
3861 just_tab.append(False) # cannot align-justify
3862
3863 if i < len(t0) - 1: # not the last line?
3864 text += "\n" # insert line break
3865
3866 # compute used part of the textbox
3867 if text.endswith("\n"):
3868 text = text[:-1]
3869 lb_count = text.count("\n") + 1 # number of lines written
3870
3871 # text height = line count * line height plus one descender value
3872 text_height = lheight * lb_count - descender * fontsize
3873
3874 more = text_height - maxheight # difference to height limit
3875 if more > EPSILON: # landed too much outside rect
3876 return (-1) * more # return deficit, don't output
3877
3878 more = abs(more)
3879 if more < EPSILON:
3880 more = 0 # don't bother with epsilons
3881 nres = "\nq\n%s%sBT\n" % (bdc, alpha) + cm # initialize output buffer
3882 templ = "1 0 0 1 %g %g Tm /%s %g Tf "
3883 # center, right, justify: output each line with its own specifics
3884 text_t = text.splitlines() # split text in lines again
3885 just_tab[-1] = False # never justify last line
3886 for i, t in enumerate(text_t):
3887 pl = maxwidth - pixlen(t) # length of empty line part
3888 pnt = point + c_pnt * (i * lheight_factor) # text start of line
3889 if align == 1: # center: right shift by half width
3890 if rot in (0, 180):
3891 pnt = pnt + Point(pl / 2, 0) * progr
3892 else:
3893 pnt = pnt - Point(0, pl / 2) * progr
3894 elif align == 2: # right: right shift by full width
3895 if rot in (0, 180):
3896 pnt = pnt + Point(pl, 0) * progr
3897 else:
3898 pnt = pnt - Point(0, pl) * progr
3899 elif align == 3: # justify
3900 spaces = t.count(" ") # number of spaces in line
3901 if spaces > 0 and just_tab[i]: # if any, and we may justify
3902 spacing = pl / spaces # make every space this much larger
3903 else:
3904 spacing = 0 # keep normal space length
3905 top = height - pnt.y - self.y
3906 left = pnt.x + self.x
3907 if rot == 90:
3908 left = height - pnt.y - self.y
3909 top = -pnt.x - self.x
3910 elif rot == 270:
3911 left = -height + pnt.y + self.y
3912 top = pnt.x + self.x
3913 elif rot == 180:
3914 left = -pnt.x - self.x
3915 top = -height + pnt.y + self.y
3916
3917 nres += templ % (left, top, fname, fontsize)
3918
3919 if render_mode > 0:
3920 nres += "%i Tr " % render_mode
3921 nres += "%g w " % (border_width * fontsize)
3922
3923 if align == 3:
3924 nres += "%g Tw " % spacing
3925
3926 if color is not None:
3927 nres += color_str
3928 if fill is not None:
3929 nres += fill_str
3930 nres += "%sTJ\n" % getTJstr(t, tj_glyphs, simple, ordering)
3931
3932 nres += "ET\n%sQ\n" % emc
3933
3934 self.text_cont += nres
3935 self.updateRect(rect)
3936 return more
3937
3938 def finish(
3939 self,
3940 width: float = 1,
3941 color: OptSeq = (0,),
3942 fill: OptSeq = None,
3943 lineCap: int = 0,
3944 lineJoin: int = 0,
3945 dashes: OptStr = None,
3946 even_odd: bool = False,
3947 morph: OptSeq = None,
3948 closePath: bool = True,
3949 fill_opacity: float = 1,
3950 stroke_opacity: float = 1,
3951 oc: int = 0,
3952 ) -> None:
3953 """Finish the current drawing segment.
3954
3955 Notes:
3956 Apply colors, opacity, dashes, line style and width, or
3957 morphing. Also whether to close the path
3958 by connecting last to first point.
3959 """
3960 if self.draw_cont == "": # treat empty contents as no-op
3961 return
3962
3963 if width == 0: # border color makes no sense then
3964 color = None
3965 elif color == None: # vice versa
3966 width = 0
3967 # if color == None and fill == None:
3968 # raise ValueError("at least one of 'color' or 'fill' must be given")
3969 color_str = ColorCode(color, "c") # ensure proper color string
3970 fill_str = ColorCode(fill, "f") # ensure proper fill string
3971
3972 optcont = self.page._get_optional_content(oc)
3973 if optcont is not None:
3974 self.draw_cont = "/OC /%s BDC\n" % optcont + self.draw_cont
3975 emc = "EMC\n"
3976 else:
3977 emc = ""
3978
3979 alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
3980 if alpha != None:
3981 self.draw_cont = "/%s gs\n" % alpha + self.draw_cont
3982
3983 if width != 1 and width != 0:
3984 self.draw_cont += "%g w\n" % width
3985
3986 if lineCap != 0:
3987 self.draw_cont = "%i J\n" % lineCap + self.draw_cont
3988 if lineJoin != 0:
3989 self.draw_cont = "%i j\n" % lineJoin + self.draw_cont
3990
3991 if dashes not in (None, "", "[] 0"):
3992 self.draw_cont = "%s d\n" % dashes + self.draw_cont
3993
3994 if closePath:
3995 self.draw_cont += "h\n"
3996 self.lastPoint = None
3997
3998 if color is not None:
3999 self.draw_cont += color_str
4000
4001 if fill is not None:
4002 self.draw_cont += fill_str
4003 if color is not None:
4004 if not even_odd:
4005 self.draw_cont += "B\n"
4006 else:
4007 self.draw_cont += "B*\n"
4008 else:
4009 if not even_odd:
4010 self.draw_cont += "f\n"
4011 else:
4012 self.draw_cont += "f*\n"
4013 else:
4014 self.draw_cont += "S\n"
4015
4016 self.draw_cont += emc
4017 if CheckMorph(morph):
4018 m1 = Matrix(
4019 1, 0, 0, 1, morph[0].x + self.x, self.height - morph[0].y - self.y
4020 )
4021 mat = ~m1 * morph[1] * m1
4022 self.draw_cont = "%g %g %g %g %g %g cm\n" % JM_TUPLE(mat) + self.draw_cont
4023
4024 self.totalcont += "\nq\n" + self.draw_cont + "Q\n"
4025 self.draw_cont = ""
4026 self.lastPoint = None
4027 return
4028
4029 def commit(self, overlay: bool = True) -> None:
4030 """Update the page's /Contents object with Shape data. The argument controls whether data appear in foreground (default) or background."""
4031 CheckParent(self.page) # doc may have died meanwhile
4032 self.totalcont += self.text_cont
4033
4034 self.totalcont = self.totalcont.encode()
4035
4036 if self.totalcont != b"":
4037 # make /Contents object with dummy stream
4038 xref = TOOLS._insert_contents(self.page, b" ", overlay)
4039 # update it with potential compression
4040 self.doc.update_stream(xref, self.totalcont)
4041
4042 self.lastPoint = None # clean up ...
4043 self.rect = None #
4044 self.draw_cont = "" # for potential ...
4045 self.text_cont = "" # ...
4046 self.totalcont = "" # re-use
4047 return
4048
4049
4050 def apply_redactions(page: Page, images: int = 2) -> bool:
4051 """Apply the redaction annotations of the page.
4052
4053 Args:
4054 page: the PDF page.
4055 images: 0 - ignore images, 1 - remove complete overlapping image,
4056 2 - blank out overlapping image parts.
4057 """
4058
4059 def center_rect(annot_rect, text, font, fsize):
4060 """Calculate minimal sub-rectangle for the overlay text.
4061
4062 Notes:
4063 Because 'insert_textbox' supports no vertical text centering,
4064 we calculate an approximate number of lines here and return a
4065 sub-rect with smaller height, which should still be sufficient.
4066 Args:
4067 annot_rect: the annotation rectangle
4068 text: the text to insert.
4069 font: the fontname. Must be one of the CJK or Base-14 set, else
4070 the rectangle is returned unchanged.
4071 fsize: the fontsize
4072 Returns:
4073 A rectangle to use instead of the annot rectangle.
4074 """
4075 if not text:
4076 return annot_rect
4077 try:
4078 text_width = get_text_length(text, font, fsize)
4079 except ValueError: # unsupported font
4080 return annot_rect
4081 line_height = fsize * 1.2
4082 limit = annot_rect.width
4083 h = math.ceil(text_width / limit) * line_height # estimate rect height
4084 if h >= annot_rect.height:
4085 return annot_rect
4086 r = annot_rect
4087 y = (annot_rect.tl.y + annot_rect.bl.y - h) * 0.5
4088 r.y0 = y
4089 return r
4090
4091 CheckParent(page)
4092 doc = page.parent
4093 if doc.is_encrypted or doc.is_closed:
4094 raise ValueError("document closed or encrypted")
4095 if not doc.is_pdf:
4096 raise ValueError("is no PDF")
4097
4098 redact_annots = [] # storage of annot values
4099 for annot in page.annots(types=(PDF_ANNOT_REDACT,)): # loop redactions
4100 redact_annots.append(annot._get_redact_values()) # save annot values
4101
4102 if redact_annots == []: # any redactions on this page?
4103 return False # no redactions
4104
4105 rc = page._apply_redactions(images) # call MuPDF redaction process step
4106 if not rc: # should not happen really
4107 raise ValueError("Error applying redactions.")
4108
4109 # now write replacement text in old redact rectangles
4110 shape = page.new_shape()
4111 for redact in redact_annots:
4112 annot_rect = redact["rect"]
4113 fill = redact["fill"]
4114 if fill:
4115 shape.draw_rect(annot_rect) # colorize the rect background
4116 shape.finish(fill=fill, color=fill)
4117 if "text" in redact.keys(): # if we also have text
4118 text = redact["text"]
4119 align = redact.get("align", 0)
4120 fname = redact["fontname"]
4121 fsize = redact["fontsize"]
4122 color = redact["text_color"]
4123 # try finding vertical centered sub-rect
4124 trect = center_rect(annot_rect, text, fname, fsize)
4125
4126 rc = -1
4127 while rc < 0 and fsize >= 4: # while not enough room
4128 # (re-) try insertion
4129 rc = shape.insert_textbox(
4130 trect,
4131 text,
4132 fontname=fname,
4133 fontsize=fsize,
4134 color=color,
4135 align=align,
4136 )
4137 fsize -= 0.5 # reduce font if unsuccessful
4138 shape.commit() # append new contents object
4139 return True
4140
4141
4142 # ------------------------------------------------------------------------------
4143 # Remove potentially sensitive data from a PDF. Similar to the Adobe
4144 # Acrobat 'sanitize' function
4145 # ------------------------------------------------------------------------------
4146 def scrub(
4147 doc: Document,
4148 attached_files: bool = True,
4149 clean_pages: bool = True,
4150 embedded_files: bool = True,
4151 hidden_text: bool = True,
4152 javascript: bool = True,
4153 metadata: bool = True,
4154 redactions: bool = True,
4155 redact_images: int = 0,
4156 remove_links: bool = True,
4157 reset_fields: bool = True,
4158 reset_responses: bool = True,
4159 thumbnails: bool = True,
4160 xml_metadata: bool = True,
4161 ) -> None:
4162 def remove_hidden(cont_lines):
4163 """Remove hidden text from a PDF page.
4164
4165 Args:
4166 cont_lines: list of lines with /Contents content. Should have status
4167 from after page.cleanContents().
4168
4169 Returns:
4170 List of /Contents lines from which hidden text has been removed.
4171
4172 Notes:
4173 The input must have been created after the page's /Contents object(s)
4174 have been cleaned with page.cleanContents(). This ensures a standard
4175 formatting: one command per line, single spaces between operators.
4176 This allows for drastic simplification of this code.
4177 """
4178 out_lines = [] # will return this
4179 in_text = False # indicate if within BT/ET object
4180 suppress = False # indicate text suppression active
4181 make_return = False
4182 for line in cont_lines:
4183 if line == b"BT": # start of text object
4184 in_text = True # switch on
4185 out_lines.append(line) # output it
4186 continue
4187 if line == b"ET": # end of text object
4188 in_text = False # switch off
4189 out_lines.append(line) # output it
4190 continue
4191 if line == b"3 Tr": # text suppression operator
4192 suppress = True # switch on
4193 make_return = True
4194 continue
4195 if line[-2:] == b"Tr" and line[0] != b"3":
4196 suppress = False # text rendering changed
4197 out_lines.append(line)
4198 continue
4199 if line == b"Q": # unstack command also switches off
4200 suppress = False
4201 out_lines.append(line)
4202 continue
4203 if suppress and in_text: # suppress hidden lines
4204 continue
4205 out_lines.append(line)
4206 if make_return:
4207 return out_lines
4208 else:
4209 return None
4210
4211 if not doc.is_pdf: # only works for PDF
4212 raise ValueError("is no PDF")
4213 if doc.is_encrypted or doc.is_closed:
4214 raise ValueError("closed or encrypted doc")
4215
4216 if clean_pages is False:
4217 hidden_text = False
4218 redactions = False
4219
4220 if metadata:
4221 doc.set_metadata({}) # remove standard metadata
4222
4223 for page in doc:
4224 if reset_fields:
4225 # reset form fields (widgets)
4226 for widget in page.widgets():
4227 widget.reset()
4228
4229 if remove_links:
4230 links = page.get_links() # list of all links on page
4231 for link in links: # remove all links
4232 page.delete_link(link)
4233
4234 found_redacts = False
4235 for annot in page.annots():
4236 if annot.type[0] == PDF_ANNOT_FILE_ATTACHMENT and attached_files:
4237 annot.update_file(buffer=b" ") # set file content to empty
4238 if reset_responses:
4239 annot.delete_responses()
4240 if annot.type[0] == PDF_ANNOT_REDACT:
4241 found_redacts = True
4242
4243 if redactions and found_redacts:
4244 page.apply_redactions(images=redact_images)
4245
4246 if not (clean_pages or hidden_text):
4247 continue # done with the page
4248
4249 page.clean_contents()
4250 if not page.get_contents():
4251 continue
4252 if hidden_text:
4253 xref = page.get_contents()[0] # only one b/o cleaning!
4254 cont = doc.xref_stream(xref)
4255 cont_lines = remove_hidden(cont.splitlines()) # remove hidden text
4256 if cont_lines: # something was actually removed
4257 cont = b"\n".join(cont_lines)
4258 doc.update_stream(xref, cont) # rewrite the page /Contents
4259
4260 if thumbnails: # remove page thumbnails?
4261 if doc.xref_get_key(page.xref, "Thumb")[0] != "null":
4262 doc.xref_set_key(page.xref, "Thumb", "null")
4263
4264 # pages are scrubbed, now perform document-wide scrubbing
4265 # remove embedded files
4266 if embedded_files:
4267 for name in doc.embfile_names():
4268 doc.embfile_del(name)
4269
4270 if xml_metadata:
4271 doc.del_xml_metadata()
4272 if not (xml_metadata or javascript):
4273 xref_limit = 0
4274 else:
4275 xref_limit = doc.xref_length()
4276 for xref in range(1, xref_limit):
4277 if not doc.xref_object(xref):
4278 msg = "bad xref %i - clean PDF before scrubbing" % xref
4279 raise ValueError(msg)
4280 if javascript and doc.xref_get_key(xref, "S")[1] == "/JavaScript":
4281 # a /JavaScript action object
4282 obj = "<</S/JavaScript/JS()>>" # replace with a null JavaScript
4283 doc.update_object(xref, obj) # update this object
4284 continue # no further handling
4285
4286 if not xml_metadata:
4287 continue
4288
4289 if doc.xref_get_key(xref, "Type")[1] == "/Metadata":
4290 # delete any metadata object directly
4291 doc.update_object(xref, "<<>>")
4292 doc.update_stream(xref, b"deleted", new=True)
4293 continue
4294
4295 if doc.xref_get_key(xref, "Metadata")[0] != "null":
4296 doc.xref_set_key(xref, "Metadata", "null")
4297
4298
4299 def fill_textbox(
4300 writer: TextWriter,
4301 rect: rect_like,
4302 text: typing.Union[str, list],
4303 pos: point_like = None,
4304 font: typing.Optional[Font] = None,
4305 fontsize: float = 11,
4306 lineheight: OptFloat = None,
4307 align: int = 0,
4308 warn: bool = None,
4309 right_to_left: bool = False,
4310 small_caps: bool = False,
4311 ) -> tuple:
4312 """Fill a rectangle with text.
4313
4314 Args:
4315 writer: TextWriter object (= "self")
4316 rect: rect-like to receive the text.
4317 text: string or list/tuple of strings.
4318 pos: point-like start position of first word.
4319 font: Font object (default Font('helv')).
4320 fontsize: the fontsize.
4321 lineheight: overwrite the font property
4322 align: (int) 0 = left, 1 = center, 2 = right, 3 = justify
4323 warn: (bool) text overflow action: none, warn, or exception
4324 right_to_left: (bool) indicate right-to-left language.
4325 """
4326 rect = Rect(rect)
4327 if rect.is_empty:
4328 raise ValueError("fill rect must not empty.")
4329 if type(font) is not Font:
4330 font = Font("helv")
4331
4332 def textlen(x):
4333 """Return length of a string."""
4334 return font.text_length(
4335 x, fontsize=fontsize, small_caps=small_caps
4336 ) # abbreviation
4337
4338 def char_lengths(x):
4339 """Return list of single character lengths for a string."""
4340 return font.char_lengths(x, fontsize=fontsize, small_caps=small_caps)
4341
4342 def append_this(pos, text):
4343 return writer.append(
4344 pos, text, font=font, fontsize=fontsize, small_caps=small_caps
4345 )
4346
4347 tolerance = fontsize * 0.2 # extra distance to left border
4348 space_len = textlen(" ")
4349 std_width = rect.width - tolerance
4350 std_start = rect.x0 + tolerance
4351
4352 def norm_words(width, words):
4353 """Cut any word in pieces no longer than 'width'."""
4354 nwords = []
4355 word_lengths = []
4356 for w in words:
4357 wl_lst = char_lengths(w)
4358 wl = sum(wl_lst)
4359 if wl <= width: # nothing to do - copy over
4360 nwords.append(w)
4361 word_lengths.append(wl)
4362 continue
4363
4364 # word longer than rect width - split it in parts
4365 n = len(wl_lst)
4366 while n > 0:
4367 wl = sum(wl_lst[:n])
4368 if wl <= width:
4369 nwords.append(w[:n])
4370 word_lengths.append(wl)
4371 w = w[n:]
4372 wl_lst = wl_lst[n:]
4373 n = len(wl_lst)
4374 else:
4375 n -= 1
4376 return nwords, word_lengths
4377
4378 def output_justify(start, line):
4379 """Justified output of a line."""
4380 # ignore leading / trailing / multiple spaces
4381 words = [w for w in line.split(" ") if w != ""]
4382 nwords = len(words)
4383 if nwords == 0:
4384 return
4385 if nwords == 1: # single word cannot be justified
4386 append_this(start, words[0])
4387 return
4388 tl = sum([textlen(w) for w in words]) # total word lengths
4389 gaps = nwords - 1 # number of word gaps
4390 gapl = (std_width - tl) / gaps # width of each gap
4391 for w in words:
4392 _, lp = append_this(start, w) # output one word
4393 start.x = lp.x + gapl # next start at word end plus gap
4394 return
4395
4396 asc = font.ascender
4397 dsc = font.descender
4398 if not lineheight:
4399 if asc - dsc <= 1:
4400 lheight = 1.2
4401 else:
4402 lheight = asc - dsc
4403 else:
4404 lheight = lineheight
4405
4406 LINEHEIGHT = fontsize * lheight # effective line height
4407 width = std_width # available horizontal space
4408
4409 # starting point of text
4410 if pos is not None:
4411 pos = Point(pos)
4412 else: # default is just below rect top-left
4413 pos = rect.tl + (tolerance, fontsize * asc)
4414 if not pos in rect:
4415 raise ValueError("Text must start in rectangle.")
4416
4417 # calculate displacement factor for alignment
4418 if align == TEXT_ALIGN_CENTER:
4419 factor = 0.5
4420 elif align == TEXT_ALIGN_RIGHT:
4421 factor = 1.0
4422 else:
4423 factor = 0
4424
4425 # split in lines if just a string was given
4426 if type(text) is str:
4427 textlines = text.splitlines()
4428 else:
4429 textlines = []
4430 for line in text:
4431 textlines.extend(line.splitlines())
4432
4433 max_lines = int((rect.y1 - pos.y) / LINEHEIGHT) + 1
4434
4435 new_lines = [] # the final list of textbox lines
4436 no_justify = [] # no justify for these line numbers
4437 for i, line in enumerate(textlines):
4438 if line in ("", " "):
4439 new_lines.append((line, space_len))
4440 width = rect.width - tolerance
4441 no_justify.append((len(new_lines) - 1))
4442 continue
4443 if i == 0:
4444 width = rect.x1 - pos.x
4445 else:
4446 width = rect.width - tolerance
4447
4448 if right_to_left: # reverses Arabic / Hebrew text front to back
4449 line = writer.clean_rtl(line)
4450 tl = textlen(line)
4451 if tl <= width: # line short enough
4452 new_lines.append((line, tl))
4453 no_justify.append((len(new_lines) - 1))
4454 continue
4455
4456 # we need to split the line in fitting parts
4457 words = line.split(" ") # the words in the line
4458
4459 # cut in parts any words that are longer than rect width
4460 words, word_lengths = norm_words(std_width, words)
4461
4462 n = len(words)
4463 while True:
4464 line0 = " ".join(words[:n])
4465 wl = sum(word_lengths[:n]) + space_len * (len(word_lengths[:n]) - 1)
4466 if wl <= width:
4467 new_lines.append((line0, wl))
4468 words = words[n:]
4469 word_lengths = word_lengths[n:]
4470 n = len(words)
4471 line0 = None
4472 else:
4473 n -= 1
4474
4475 if len(words) == 0:
4476 break
4477
4478 # -------------------------------------------------------------------------
4479 # List of lines created. Each item is (text, tl), where 'tl' is the PDF
4480 # output length (float) and 'text' is the text. Except for justified text,
4481 # this is output-ready.
4482 # -------------------------------------------------------------------------
4483 nlines = len(new_lines)
4484 if nlines > max_lines:
4485 msg = "Only fitting %i of %i lines." % (max_lines, nlines)
4486 if warn == True:
4487 print("Warning: " + msg)
4488 elif warn == False:
4489 raise ValueError(msg)
4490
4491 start = Point()
4492 no_justify += [len(new_lines) - 1] # no justifying of last line
4493 for i in range(max_lines):
4494 try:
4495 line, tl = new_lines.pop(0)
4496 except IndexError:
4497 break
4498
4499 if right_to_left: # Arabic, Hebrew
4500 line = "".join(reversed(line))
4501
4502 if i == 0: # may have different start for first line
4503 start = pos
4504
4505 if align == TEXT_ALIGN_JUSTIFY and i not in no_justify and tl < std_width:
4506 output_justify(start, line)
4507 start.x = std_start
4508 start.y += LINEHEIGHT
4509 continue
4510
4511 if i > 0 or pos.x == std_start: # left, center, right alignments
4512 start.x += (width - tl) * factor
4513
4514 append_this(start, line)
4515 start.x = std_start
4516 start.y += LINEHEIGHT
4517
4518 return new_lines # return non-written lines
4519
4520
4521 # ------------------------------------------------------------------------
4522 # Optional Content functions
4523 # ------------------------------------------------------------------------
4524 def get_oc(doc: Document, xref: int) -> int:
4525 """Return optional content object xref for an image or form xobject.
4526
4527 Args:
4528 xref: (int) xref number of an image or form xobject.
4529 """
4530 if doc.is_closed or doc.is_encrypted:
4531 raise ValueError("document close or encrypted")
4532 t, name = doc.xref_get_key(xref, "Subtype")
4533 if t != "name" or name not in ("/Image", "/Form"):
4534 raise ValueError("bad object type at xref %i" % xref)
4535 t, oc = doc.xref_get_key(xref, "OC")
4536 if t != "xref":
4537 return 0
4538 rc = int(oc.replace("0 R", ""))
4539 return rc
4540
4541
4542 def set_oc(doc: Document, xref: int, oc: int) -> None:
4543 """Attach optional content object to image or form xobject.
4544
4545 Args:
4546 xref: (int) xref number of an image or form xobject
4547 oc: (int) xref number of an OCG or OCMD
4548 """
4549 if doc.is_closed or doc.is_encrypted:
4550 raise ValueError("document close or encrypted")
4551 t, name = doc.xref_get_key(xref, "Subtype")
4552 if t != "name" or name not in ("/Image", "/Form"):
4553 raise ValueError("bad object type at xref %i" % xref)
4554 if oc > 0:
4555 t, name = doc.xref_get_key(oc, "Type")
4556 if t != "name" or name not in ("/OCG", "/OCMD"):
4557 raise ValueError("bad object type at xref %i" % oc)
4558 if oc == 0 and "OC" in doc.xref_get_keys(xref):
4559 doc.xref_set_key(xref, "OC", "null")
4560 return None
4561 doc.xref_set_key(xref, "OC", "%i 0 R" % oc)
4562 return None
4563
4564
4565 def set_ocmd(
4566 doc: Document,
4567 xref: int = 0,
4568 ocgs: typing.Union[list, None] = None,
4569 policy: OptStr = None,
4570 ve: typing.Union[list, None] = None,
4571 ) -> int:
4572 """Create or update an OCMD object in a PDF document.
4573
4574 Args:
4575 xref: (int) 0 for creating a new object, otherwise update existing one.
4576 ocgs: (list) OCG xref numbers, which shall be subject to 'policy'.
4577 policy: one of 'AllOn', 'AllOff', 'AnyOn', 'AnyOff' (any casing).
4578 ve: (list) visibility expression. Use instead of 'ocgs' with 'policy'.
4579
4580 Returns:
4581 Xref of the created or updated OCMD.
4582 """
4583
4584 all_ocgs = set(doc.get_ocgs().keys())
4585
4586 def ve_maker(ve):
4587 if type(ve) not in (list, tuple) or len(ve) < 2:
4588 raise ValueError("bad 've' format: %s" % ve)
4589 if ve[0].lower() not in ("and", "or", "not"):
4590 raise ValueError("bad operand: %s" % ve[0])
4591 if ve[0].lower() == "not" and len(ve) != 2:
4592 raise ValueError("bad 've' format: %s" % ve)
4593 item = "[/%s" % ve[0].title()
4594 for x in ve[1:]:
4595 if type(x) is int:
4596 if x not in all_ocgs:
4597 raise ValueError("bad OCG %i" % x)
4598 item += " %i 0 R" % x
4599 else:
4600 item += " %s" % ve_maker(x)
4601 item += "]"
4602 return item
4603
4604 text = "<</Type/OCMD"
4605
4606 if ocgs and type(ocgs) in (list, tuple): # some OCGs are provided
4607 s = set(ocgs).difference(all_ocgs) # contains illegal xrefs
4608 if s != set():
4609 msg = "bad OCGs: %s" % s
4610 raise ValueError(msg)
4611 text += "/OCGs[" + " ".join(map(lambda x: "%i 0 R" % x, ocgs)) + "]"
4612
4613 if policy:
4614 policy = str(policy).lower()
4615 pols = {
4616 "anyon": "AnyOn",
4617 "allon": "AllOn",
4618 "anyoff": "AnyOff",
4619 "alloff": "AllOff",
4620 }
4621 if policy not in ("anyon", "allon", "anyoff", "alloff"):
4622 raise ValueError("bad policy: %s" % policy)
4623 text += "/P/%s" % pols[policy]
4624
4625 if ve:
4626 text += "/VE%s" % ve_maker(ve)
4627
4628 text += ">>"
4629
4630 # make new object or replace old OCMD (check type first)
4631 if xref == 0:
4632 xref = doc.get_new_xref()
4633 elif "/Type/OCMD" not in doc.xref_object(xref, compressed=True):
4634 raise ValueError("bad xref or not an OCMD")
4635 doc.update_object(xref, text)
4636 return xref
4637
4638
4639 def get_ocmd(doc: Document, xref: int) -> dict:
4640 """Return the definition of an OCMD (optional content membership dictionary).
4641
4642 Recognizes PDF dict keys /OCGs (PDF array of OCGs), /P (policy string) and
4643 /VE (visibility expression, PDF array). Via string manipulation, this
4644 info is converted to a Python dictionary with keys "xref", "ocgs", "policy"
4645 and "ve" - ready to recycle as input for 'set_ocmd()'.
4646 """
4647
4648 if xref not in range(doc.xref_length()):
4649 raise ValueError("bad xref")
4650 text = doc.xref_object(xref, compressed=True)
4651 if "/Type/OCMD" not in text:
4652 raise ValueError("bad object type")
4653 textlen = len(text)
4654
4655 p0 = text.find("/OCGs[") # look for /OCGs key
4656 p1 = text.find("]", p0)
4657 if p0 < 0 or p1 < 0: # no OCGs found
4658 ocgs = None
4659 else:
4660 ocgs = text[p0 + 6 : p1].replace("0 R", " ").split()
4661 ocgs = list(map(int, ocgs))
4662
4663 p0 = text.find("/P/") # look for /P policy key
4664 if p0 < 0:
4665 policy = None
4666 else:
4667 p1 = text.find("ff", p0)
4668 if p1 < 0:
4669 p1 = text.find("on", p0)
4670 if p1 < 0: # some irregular syntax
4671 raise ValueError("bad object at xref")
4672 else:
4673 policy = text[p0 + 3 : p1 + 2]
4674
4675 p0 = text.find("/VE[") # look for /VE visibility expression key
4676 if p0 < 0: # no visibility expression found
4677 ve = None
4678 else:
4679 lp = rp = 0 # find end of /VE by finding last ']'.
4680 p1 = p0
4681 while lp < 1 or lp != rp:
4682 p1 += 1
4683 if not p1 < textlen: # some irregular syntax
4684 raise ValueError("bad object at xref")
4685 if text[p1] == "[":
4686 lp += 1
4687 if text[p1] == "]":
4688 rp += 1
4689 # p1 now positioned at the last "]"
4690 ve = text[p0 + 3 : p1 + 1] # the PDF /VE array
4691 ve = (
4692 ve.replace("/And", '"and",')
4693 .replace("/Not", '"not",')
4694 .replace("/Or", '"or",')
4695 )
4696 ve = ve.replace(" 0 R]", "]").replace(" 0 R", ",").replace("][", "],[")
4697 try:
4698 ve = json.loads(ve)
4699 except:
4700 print("bad /VE key: ", ve)
4701 raise
4702 return {"xref": xref, "ocgs": ocgs, "policy": policy, "ve": ve}
4703
4704
4705 """
4706 Handle page labels for PDF documents.
4707
4708 Reading
4709 -------
4710 * compute the label of a page
4711 * find page number(s) having the given label.
4712
4713 Writing
4714 -------
4715 Supports setting (defining) page labels for PDF documents.
4716
4717 A big Thank You goes to WILLIAM CHAPMAN who contributed the idea and
4718 significant parts of the following code during late December 2020
4719 through early January 2021.
4720 """
4721
4722
4723 def rule_dict(item):
4724 """Make a Python dict from a PDF page label rule.
4725
4726 Args:
4727 item -- a tuple (pno, rule) with the start page number and the rule
4728 string like <</S/D...>>.
4729 Returns:
4730 A dict like
4731 {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int}.
4732 """
4733 # Jorj McKie, 2021-01-06
4734
4735 pno, rule = item
4736 rule = rule[2:-2].split("/")[1:] # strip "<<" and ">>"
4737 d = {"startpage": pno, "prefix": "", "firstpagenum": 1}
4738 skip = False
4739 for i, item in enumerate(rule):
4740 if skip: # this item has already been processed
4741 skip = False # deactivate skipping again
4742 continue
4743 if item == "S": # style specification
4744 d["style"] = rule[i + 1] # next item has the style
4745 skip = True # do not process next item again
4746 continue
4747 if item.startswith("P"): # prefix specification: extract the string
4748 x = item[1:].replace("(", "").replace(")", "")
4749 d["prefix"] = x
4750 continue
4751 if item.startswith("St"): # start page number specification
4752 x = int(item[2:])
4753 d["firstpagenum"] = x
4754 return d
4755
4756
4757 def get_label_pno(pgNo, labels):
4758 """Return the label for this page number.
4759
4760 Args:
4761 pgNo: page number, 0-based.
4762 labels: result of doc._get_page_labels().
4763 Returns:
4764 The label (str) of the page number. Errors return an empty string.
4765 """
4766 # Jorj McKie, 2021-01-06
4767
4768 item = [x for x in labels if x[0] <= pgNo][-1]
4769 rule = rule_dict(item)
4770 prefix = rule.get("prefix", "")
4771 style = rule.get("style", "")
4772 pagenumber = pgNo - rule["startpage"] + rule["firstpagenum"]
4773 return construct_label(style, prefix, pagenumber)
4774
4775
4776 def get_label(page):
4777 """Return the label for this PDF page.
4778
4779 Args:
4780 page: page object.
4781 Returns:
4782 The label (str) of the page. Errors return an empty string.
4783 """
4784 # Jorj McKie, 2021-01-06
4785
4786 labels = page.parent._get_page_labels()
4787 if not labels:
4788 return ""
4789 labels.sort()
4790 return get_label_pno(page.number, labels)
4791
4792
4793 def get_page_numbers(doc, label, only_one=False):
4794 """Return a list of page numbers with the given label.
4795
4796 Args:
4797 doc: PDF document object (resp. 'self').
4798 label: (str) label.
4799 only_one: (bool) stop searching after first hit.
4800 Returns:
4801 List of page numbers having this label.
4802 """
4803 # Jorj McKie, 2021-01-06
4804
4805 numbers = []
4806 if not label:
4807 return numbers
4808 labels = doc._get_page_labels()
4809 if labels == []:
4810 return numbers
4811 for i in range(doc.page_count):
4812 plabel = get_label_pno(i, labels)
4813 if plabel == label:
4814 numbers.append(i)
4815 if only_one:
4816 break
4817 return numbers
4818
4819
4820 def construct_label(style, prefix, pno) -> str:
4821 """Construct a label based on style, prefix and page number."""
4822 # William Chapman, 2021-01-06
4823
4824 n_str = ""
4825 if style == "D":
4826 n_str = str(pno)
4827 elif style == "r":
4828 n_str = integerToRoman(pno).lower()
4829 elif style == "R":
4830 n_str = integerToRoman(pno).upper()
4831 elif style == "a":
4832 n_str = integerToLetter(pno).lower()
4833 elif style == "A":
4834 n_str = integerToLetter(pno).upper()
4835 result = prefix + n_str
4836 return result
4837
4838
4839 def integerToLetter(i) -> str:
4840 """Returns letter sequence string for integer i."""
4841 # William Chapman, Jorj McKie, 2021-01-06
4842
4843 ls = string.ascii_uppercase
4844 n, a = 1, i
4845 while pow(26, n) <= a:
4846 a -= int(math.pow(26, n))
4847 n += 1
4848
4849 str_t = ""
4850 for j in reversed(range(n)):
4851 f, g = divmod(a, int(math.pow(26, j)))
4852 str_t += ls[f]
4853 a = g
4854 return str_t
4855
4856
4857 def integerToRoman(num: int) -> str:
4858 """Return roman numeral for an integer."""
4859 # William Chapman, Jorj McKie, 2021-01-06
4860
4861 roman = (
4862 (1000, "M"),
4863 (900, "CM"),
4864 (500, "D"),
4865 (400, "CD"),
4866 (100, "C"),
4867 (90, "XC"),
4868 (50, "L"),
4869 (40, "XL"),
4870 (10, "X"),
4871 (9, "IX"),
4872 (5, "V"),
4873 (4, "IV"),
4874 (1, "I"),
4875 )
4876
4877 def roman_num(num):
4878 for r, ltr in roman:
4879 x, _ = divmod(num, r)
4880 yield ltr * x
4881 num -= r * x
4882 if num <= 0:
4883 break
4884
4885 return "".join([a for a in roman_num(num)])
4886
4887
4888 def get_page_labels(doc):
4889 """Return page label definitions in PDF document.
4890
4891 Args:
4892 doc: PDF document (resp. 'self').
4893 Returns:
4894 A list of dictionaries with the following format:
4895 {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int}.
4896 """
4897 # Jorj McKie, 2021-01-10
4898 return [rule_dict(item) for item in doc._get_page_labels()]
4899
4900
4901 def set_page_labels(doc, labels):
4902 """Add / replace page label definitions in PDF document.
4903
4904 Args:
4905 doc: PDF document (resp. 'self').
4906 labels: list of label dictionaries like:
4907 {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int},
4908 as returned by get_page_labels().
4909 """
4910 # William Chapman, 2021-01-06
4911
4912 def create_label_str(label):
4913 """Convert Python label dict to correspnding PDF rule string.
4914
4915 Args:
4916 label: (dict) build rule for the label.
4917 Returns:
4918 PDF label rule string wrapped in "<<", ">>".
4919 """
4920 s = "%i<<" % label["startpage"]
4921 if label.get("prefix", "") != "":
4922 s += "/P(%s)" % label["prefix"]
4923 if label.get("style", "") != "":
4924 s += "/S/%s" % label["style"]
4925 if label.get("firstpagenum", 1) > 1:
4926 s += "/St %i" % label["firstpagenum"]
4927 s += ">>"
4928 return s
4929
4930 def create_nums(labels):
4931 """Return concatenated string of all labels rules.
4932
4933 Args:
4934 labels: (list) dictionaries as created by function 'rule_dict'.
4935 Returns:
4936 PDF compatible string for page label definitions, ready to be
4937 enclosed in PDF array 'Nums[...]'.
4938 """
4939 labels.sort(key=lambda x: x["startpage"])
4940 s = "".join([create_label_str(label) for label in labels])
4941 return s
4942
4943 doc._set_page_labels(create_nums(labels))
4944
4945
4946 # End of Page Label Code -------------------------------------------------
4947
4948
4949 def has_links(doc: Document) -> bool:
4950 """Check whether there are links on any page."""
4951 if doc.is_closed:
4952 raise ValueError("document closed")
4953 if not doc.is_pdf:
4954 raise ValueError("is no PDF")
4955 for i in range(doc.page_count):
4956 for item in doc.page_annot_xrefs(i):
4957 if item[1] == PDF_ANNOT_LINK:
4958 return True
4959 return False
4960
4961
4962 def has_annots(doc: Document) -> bool:
4963 """Check whether there are annotations on any page."""
4964 if doc.is_closed:
4965 raise ValueError("document closed")
4966 if not doc.is_pdf:
4967 raise ValueError("is no PDF")
4968 for i in range(doc.page_count):
4969 for item in doc.page_annot_xrefs(i):
4970 if not (item[1] == PDF_ANNOT_LINK or item[1] == PDF_ANNOT_WIDGET):
4971 return True
4972 return False
4973
4974
4975 # -------------------------------------------------------------------
4976 # Functions to recover the quad contained in a text extraction bbox
4977 # -------------------------------------------------------------------
4978 def recover_bbox_quad(line_dir: tuple, span: dict, bbox: tuple) -> Quad:
4979 """Compute the quad located inside the bbox.
4980
4981 The bbox may be any of the resp. tuples occurring inside the given span.
4982
4983 Args:
4984 line_dir: (tuple) 'line["dir"]' of the owning line or None.
4985 span: (dict) the span. May be from get_texttrace() method.
4986 bbox: (tuple) the bbox of the span or any of its characters.
4987 Returns:
4988 The quad which is wrapped by the bbox.
4989 """
4990 if line_dir == None:
4991 line_dir = span["dir"]
4992 cos, sin = line_dir
4993 bbox = Rect(bbox) # make it a rect
4994 if TOOLS.set_small_glyph_heights(): # ==> just fontsize as height
4995 d = 1
4996 else:
4997 d = span["ascender"] - span["descender"]
4998
4999 height = d * span["size"] # the quad's rectangle height
5000 # The following are distances from the bbox corners, at wich we find the
5001 # respective quad points. The computation depends on in which quadrant
5002 # the text writing angle is located.
5003 hs = height * sin
5004 hc = height * cos
5005 if hc >= 0 and hs <= 0: # quadrant 1
5006 ul = bbox.bl - (0, hc)
5007 ur = bbox.tr + (hs, 0)
5008 ll = bbox.bl - (hs, 0)
5009 lr = bbox.tr + (0, hc)
5010 elif hc <= 0 and hs <= 0: # quadrant 2
5011 ul = bbox.br + (hs, 0)
5012 ur = bbox.tl - (0, hc)
5013 ll = bbox.br + (0, hc)
5014 lr = bbox.tl - (hs, 0)
5015 elif hc <= 0 and hs >= 0: # quadrant 3
5016 ul = bbox.tr - (0, hc)
5017 ur = bbox.bl + (hs, 0)
5018 ll = bbox.tr - (hs, 0)
5019 lr = bbox.bl + (0, hc)
5020 else: # quadrant 4
5021 ul = bbox.tl + (hs, 0)
5022 ur = bbox.br - (0, hc)
5023 ll = bbox.tl + (0, hc)
5024 lr = bbox.br - (hs, 0)
5025 return Quad(ul, ur, ll, lr)
5026
5027
5028 def recover_quad(line_dir: tuple, span: dict) -> Quad:
5029 """Recover the quadrilateral of a text span.
5030
5031 Args:
5032 line_dir: (tuple) 'line["dir"]' of the owning line.
5033 span: the span.
5034 Returns:
5035 The quadrilateral enveloping the span's text.
5036 """
5037 if type(line_dir) is not tuple or len(line_dir) != 2:
5038 raise ValueError("bad line dir argument")
5039 if type(span) is not dict:
5040 raise ValueError("bad span argument")
5041 return recover_bbox_quad(line_dir, span, span["bbox"])
5042
5043
5044 def recover_line_quad(line: dict, spans: list = None) -> Quad:
5045 """Calculate the line quad for 'dict' / 'rawdict' text extractions.
5046
5047 The lower quad points are those of the first, resp. last span quad.
5048 The upper points are determined by the maximum span quad height.
5049 From this, compute a rect with bottom-left in (0, 0), convert this to a
5050 quad and rotate and shift back to cover the text of the spans.
5051
5052 Args:
5053 spans: (list, optional) sub-list of spans to consider.
5054 Returns:
5055 Quad covering selected spans.
5056 """
5057 if spans == None: # no sub-selection
5058 spans = line["spans"] # all spans
5059 if len(spans) == 0:
5060 raise ValueError("bad span list")
5061 line_dir = line["dir"] # text direction
5062 cos, sin = line_dir
5063 q0 = recover_quad(line_dir, spans[0]) # quad of first span
5064 if len(spans) > 1: # get quad of last span
5065 q1 = recover_quad(line_dir, spans[-1])
5066 else:
5067 q1 = q0 # last = first
5068
5069 line_ll = q0.ll # lower-left of line quad
5070 line_lr = q1.lr # lower-right of line quad
5071
5072 mat0 = planish_line(line_ll, line_lr)
5073
5074 # map base line to x-axis such that line_ll goes to (0, 0)
5075 x_lr = line_lr * mat0
5076
5077 small = TOOLS.set_small_glyph_heights() # small glyph heights?
5078
5079 h = max(
5080 [s["size"] * (1 if small else (s["ascender"] - s["descender"])) for s in spans]
5081 )
5082
5083 line_rect = Rect(0, -h, x_lr.x, 0) # line rectangle
5084 line_quad = line_rect.quad # make it a quad and:
5085 line_quad *= ~mat0
5086 return line_quad
5087
5088
5089 def recover_span_quad(line_dir: tuple, span: dict, chars: list = None) -> Quad:
5090 """Calculate the span quad for 'dict' / 'rawdict' text extractions.
5091
5092 Notes:
5093 There are two execution paths:
5094 1. For the full span quad, the result of 'recover_quad' is returned.
5095 2. For the quad of a sub-list of characters, the char quads are
5096 computed and joined. This is only supported for the "rawdict"
5097 extraction option.
5098
5099 Args:
5100 line_dir: (tuple) 'line["dir"]' of the owning line.
5101 span: (dict) the span.
5102 chars: (list, optional) sub-list of characters to consider.
5103 Returns:
5104 Quad covering selected characters.
5105 """
5106 if line_dir == None: # must be a span from get_texttrace()
5107 line_dir = span["dir"]
5108 if chars == None: # no sub-selection
5109 return recover_quad(line_dir, span)
5110 if not "chars" in span.keys():
5111 raise ValueError("need 'rawdict' option to sub-select chars")
5112
5113 q0 = recover_char_quad(line_dir, span, chars[0]) # quad of first char
5114 if len(chars) > 1: # get quad of last char
5115 q1 = recover_char_quad(line_dir, span, chars[-1])
5116 else:
5117 q1 = q0 # last = first
5118
5119 span_ll = q0.ll # lower-left of span quad
5120 span_lr = q1.lr # lower-right of span quad
5121 mat0 = planish_line(span_ll, span_lr)
5122 # map base line to x-axis such that span_ll goes to (0, 0)
5123 x_lr = span_lr * mat0
5124
5125 small = TOOLS.set_small_glyph_heights() # small glyph heights?
5126 h = span["size"] * (1 if small else (span["ascender"] - span["descender"]))
5127
5128 span_rect = Rect(0, -h, x_lr.x, 0) # line rectangle
5129 span_quad = span_rect.quad # make it a quad and:
5130 span_quad *= ~mat0 # rotate back and shift back
5131 return span_quad
5132
5133
5134 def recover_char_quad(line_dir: tuple, span: dict, char: dict) -> Quad:
5135 """Recover the quadrilateral of a text character.
5136
5137 This requires the "rawdict" option of text extraction.
5138
5139 Args:
5140 line_dir: (tuple) 'line["dir"]' of the span's line.
5141 span: (dict) the span dict.
5142 char: (dict) the character dict.
5143 Returns:
5144 The quadrilateral enveloping the character.
5145 """
5146 if line_dir == None:
5147 line_dir = span["dir"]
5148 if type(line_dir) is not tuple or len(line_dir) != 2:
5149 raise ValueError("bad line dir argument")
5150 if type(span) is not dict:
5151 raise ValueError("bad span argument")
5152 if type(char) is dict:
5153 bbox = Rect(char["bbox"])
5154 elif type(char) is tuple:
5155 bbox = Rect(char[3])
5156 else:
5157 raise ValueError("bad span argument")
5158
5159 return recover_bbox_quad(line_dir, span, bbox)
5160
5161
5162 # -------------------------------------------------------------------
5163 # Building font subsets using fontTools
5164 # -------------------------------------------------------------------
5165 def subset_fonts(doc: Document, verbose: bool = False) -> None:
5166 """Build font subsets of a PDF. Requires package 'fontTools'.
5167
5168 Eligible fonts are potentially replaced by smaller versions. Page text is
5169 NOT rewritten and thus should retain properties like being hidden or
5170 controlled by optional content.
5171 """
5172 # Font binaries: - "buffer" -> (names, xrefs, (unicodes, glyphs))
5173 # An embedded font is uniquely defined by its fontbuffer only. It may have
5174 # multiple names and xrefs.
5175 # Once the sets of used unicodes and glyphs are known, we compute a
5176 # smaller version of the buffer user package fontTools.
5177 font_buffers = {}
5178
5179 def get_old_widths(xref):
5180 """Retrieve old font '/W' and '/DW' values."""
5181 df = doc.xref_get_key(xref, "DescendantFonts")
5182 if df[0] != "array": # only handle xref specifications
5183 return None, None
5184 df_xref = int(df[1][1:-1].replace("0 R", ""))
5185 widths = doc.xref_get_key(df_xref, "W")
5186 if widths[0] != "array": # no widths key found
5187 widths = None
5188 else:
5189 widths = widths[1]
5190 dwidths = doc.xref_get_key(df_xref, "DW")
5191 if dwidths[0] != "int":
5192 dwidths = None
5193 else:
5194 dwidths = dwidths[1]
5195 return widths, dwidths
5196
5197 def set_old_widths(xref, widths, dwidths):
5198 """Restore the old '/W' and '/DW' in subsetted font.
5199
5200 If either parameter is None or evaluates to False, the corresponding
5201 dictionary key will be set to null.
5202 """
5203 df = doc.xref_get_key(xref, "DescendantFonts")
5204 if df[0] != "array": # only handle xref specs
5205 return None
5206 df_xref = int(df[1][1:-1].replace("0 R", ""))
5207 if (type(widths) is not str or not widths) and doc.xref_get_key(df_xref, "W")[
5208 0
5209 ] != "null":
5210 doc.xref_set_key(df_xref, "W", "null")
5211 else:
5212 doc.xref_set_key(df_xref, "W", widths)
5213 if (type(dwidths) is not str or not dwidths) and doc.xref_get_key(
5214 df_xref, "DW"
5215 )[0] != "null":
5216 doc.xref_set_key(df_xref, "DW", "null")
5217 else:
5218 doc.xref_set_key(df_xref, "DW", dwidths)
5219 return None
5220
5221 def set_subset_fontname(new_xref):
5222 """Generate a name prefix to tag a font as subset.
5223
5224 We use a random generator to select 6 upper case ASCII characters.
5225 The prefixed name must be put in the font xref as the "/BaseFont" value
5226 and in the FontDescriptor object as the '/FontName' value.
5227 """
5228 # The following generates a prefix like 'ABCDEF+'
5229 prefix = "".join(random.choices(tuple(string.ascii_uppercase), k=6)) + "+"
5230 font_str = doc.xref_object(new_xref, compressed=True)
5231 font_str = font_str.replace("/BaseFont/", "/BaseFont/" + prefix)
5232 df = doc.xref_get_key(new_xref, "DescendantFonts")
5233 if df[0] == "array":
5234 df_xref = int(df[1][1:-1].replace("0 R", ""))
5235 fd = doc.xref_get_key(df_xref, "FontDescriptor")
5236 if fd[0] == "xref":
5237 fd_xref = int(fd[1].replace("0 R", ""))
5238 fd_str = doc.xref_object(fd_xref, compressed=True)
5239 fd_str = fd_str.replace("/FontName/", "/FontName/" + prefix)
5240 doc.update_object(fd_xref, fd_str)
5241 doc.update_object(new_xref, font_str)
5242 return None
5243
5244 def build_subset(buffer, unc_set, gid_set):
5245 """Build font subset using fontTools.
5246
5247 Args:
5248 buffer: (bytes) the font given as a binary buffer.
5249 unc_set: (set) required glyph ids.
5250 Returns:
5251 Either None if subsetting is unsuccessful or the subset font buffer.
5252 """
5253 try:
5254 import fontTools.subset as fts
5255 except ImportError:
5256 print("This method requires fontTools to be installed.")
5257 raise
5258 tmp_dir = tempfile.gettempdir()
5259 oldfont_path = f"{tmp_dir}/oldfont.ttf"
5260 newfont_path = f"{tmp_dir}/newfont.ttf"
5261 uncfile_path = f"{tmp_dir}/uncfile.txt"
5262 args = [
5263 oldfont_path,
5264 "--retain-gids",
5265 f"--output-file={newfont_path}",
5266 "--layout-features='*'",
5267 "--passthrough-tables",
5268 "--ignore-missing-glyphs",
5269 "--ignore-missing-unicodes",
5270 "--symbol-cmap",
5271 ]
5272
5273 unc_file = open(
5274 f"{tmp_dir}/uncfile.txt", "w"
5275 ) # store glyph ids or unicodes as file
5276 if 0xFFFD in unc_set: # error unicode exists -> use glyphs
5277 args.append(f"--gids-file={uncfile_path}")
5278 gid_set.add(189)
5279 unc_list = list(gid_set)
5280 for unc in unc_list:
5281 unc_file.write("%i\n" % unc)
5282 else:
5283 args.append(f"--unicodes-file={uncfile_path}")
5284 unc_set.add(255)
5285 unc_list = list(unc_set)
5286 for unc in unc_list:
5287 unc_file.write("%04x\n" % unc)
5288
5289 unc_file.close()
5290 fontfile = open(oldfont_path, "wb") # store fontbuffer as a file
5291 fontfile.write(buffer)
5292 fontfile.close()
5293 try:
5294 os.remove(newfont_path) # remove old file
5295 except:
5296 pass
5297 try: # invoke fontTools subsetter
5298 fts.main(args)
5299 font = Font(fontfile=newfont_path)
5300 new_buffer = font.buffer
5301 if len(font.valid_codepoints()) == 0:
5302 new_buffer = None
5303 except:
5304 new_buffer = None
5305 try:
5306 os.remove(uncfile_path)
5307 except:
5308 pass
5309 try:
5310 os.remove(oldfont_path)
5311 except:
5312 pass
5313 try:
5314 os.remove(newfont_path)
5315 except:
5316 pass
5317 return new_buffer
5318
5319 def repl_fontnames(doc):
5320 """Populate 'font_buffers'.
5321
5322 For each font candidate, store its xref and the list of names
5323 by which PDF text may refer to it (there may be multiple).
5324 """
5325
5326 def norm_name(name):
5327 """Recreate font name that contains PDF hex codes.
5328
5329 E.g. #20 -> space, chr(32)
5330 """
5331 while "#" in name:
5332 p = name.find("#")
5333 c = int(name[p + 1 : p + 3], 16)
5334 name = name.replace(name[p : p + 3], chr(c))
5335 return name
5336
5337 def get_fontnames(doc, item):
5338 """Return a list of fontnames for an item of page.get_fonts().
5339
5340 There may be multiple names e.g. for Type0 fonts.
5341 """
5342 fontname = item[3]
5343 names = [fontname]
5344 fontname = doc.xref_get_key(item[0], "BaseFont")[1][1:]
5345 fontname = norm_name(fontname)
5346 if fontname not in names:
5347 names.append(fontname)
5348 descendents = doc.xref_get_key(item[0], "DescendantFonts")
5349 if descendents[0] != "array":
5350 return names
5351 descendents = descendents[1][1:-1]
5352 if descendents.endswith(" 0 R"):
5353 xref = int(descendents[:-4])
5354 descendents = doc.xref_object(xref, compressed=True)
5355 p1 = descendents.find("/BaseFont")
5356 if p1 >= 0:
5357 p2 = descendents.find("/", p1 + 1)
5358 p1 = min(descendents.find("/", p2 + 1), descendents.find(">>", p2 + 1))
5359 fontname = descendents[p2 + 1 : p1]
5360 fontname = norm_name(fontname)
5361 if fontname not in names:
5362 names.append(fontname)
5363 return names
5364
5365 for i in range(doc.page_count):
5366 for f in doc.get_page_fonts(i, full=True):
5367 font_xref = f[0] # font xref
5368 font_ext = f[1] # font file extension
5369 basename = f[3] # font basename
5370
5371 if font_ext not in ( # skip if not supported by fontTools
5372 "otf",
5373 "ttf",
5374 "woff",
5375 "woff2",
5376 ):
5377 continue
5378 # skip fonts which already are subsets
5379 if len(basename) > 6 and basename[6] == "+":
5380 continue
5381
5382 extr = doc.extract_font(font_xref)
5383 fontbuffer = extr[-1]
5384 names = get_fontnames(doc, f)
5385 name_set, xref_set, subsets = font_buffers.get(
5386 fontbuffer, (set(), set(), (set(), set()))
5387 )
5388 xref_set.add(font_xref)
5389 for name in names:
5390 name_set.add(name)
5391 font = Font(fontbuffer=fontbuffer)
5392 name_set.add(font.name)
5393 del font
5394 font_buffers[fontbuffer] = (name_set, xref_set, subsets)
5395 return None
5396
5397 def find_buffer_by_name(name):
5398 for buffer in font_buffers.keys():
5399 name_set, _, _ = font_buffers[buffer]
5400 if name in name_set:
5401 return buffer
5402 return None
5403
5404 # -----------------
5405 # main function
5406 # -----------------
5407 repl_fontnames(doc) # populate font information
5408 if not font_buffers: # nothing found to do
5409 if verbose:
5410 print("No fonts to subset.")
5411 return 0
5412
5413 old_fontsize = 0
5414 new_fontsize = 0
5415 for fontbuffer in font_buffers.keys():
5416 old_fontsize += len(fontbuffer)
5417
5418 # Scan page text for usage of subsettable fonts
5419 for page in doc:
5420 # go through the text and extend set of used glyphs by font
5421 # we use a modified MuPDF trace device, which delivers us glyph ids.
5422 for span in page.get_texttrace():
5423 if type(span) is not dict: # skip useless information
5424 continue
5425 fontname = span["font"][:33] # fontname for the span
5426 buffer = find_buffer_by_name(fontname)
5427 if buffer is None:
5428 continue
5429 name_set, xref_set, (set_ucs, set_gid) = font_buffers[buffer]
5430 for c in span["chars"]:
5431 set_ucs.add(c[0]) # unicode
5432 set_gid.add(c[1]) # glyph id
5433 font_buffers[buffer] = (name_set, xref_set, (set_ucs, set_gid))
5434
5435 # build the font subsets
5436 for old_buffer in font_buffers.keys():
5437 name_set, xref_set, subsets = font_buffers[old_buffer]
5438 new_buffer = build_subset(old_buffer, subsets[0], subsets[1])
5439 fontname = list(name_set)[0]
5440 if new_buffer == None or len(new_buffer) >= len(old_buffer):
5441 # subset was not created or did not get smaller
5442 if verbose:
5443 print(f"Cannot subset '{fontname}'.")
5444 continue
5445 if verbose:
5446 print(f"Built subset of font '{fontname}'.")
5447 val = doc._insert_font(fontbuffer=new_buffer) # store subset font in PDF
5448 new_xref = val[0] # get its xref
5449 set_subset_fontname(new_xref) # tag fontname as subset font
5450 font_str = doc.xref_object( # get its object definition
5451 new_xref,
5452 compressed=True,
5453 )
5454 # walk through the original font xrefs and replace each by the subset def
5455 for font_xref in xref_set:
5456 # we need the original '/W' and '/DW' width values
5457 width_table, def_width = get_old_widths(font_xref)
5458 # ... and replace original font definition at xref with it
5459 doc.update_object(font_xref, font_str)
5460 # now copy over old '/W' and '/DW' values
5461 if width_table or def_width:
5462 set_old_widths(font_xref, width_table, def_width)
5463 # 'new_xref' remains unused in the PDF and must be removed
5464 # by garbage collection.
5465 new_fontsize += len(new_buffer)
5466
5467 return old_fontsize - new_fontsize
5468
5469
5470 # -------------------------------------------------------------------
5471 # Copy XREF object to another XREF
5472 # -------------------------------------------------------------------
5473 def xref_copy(doc: Document, source: int, target: int, *, keep: list = None) -> None:
5474 """Copy a PDF dictionary object to another one given their xref numbers.
5475
5476 Args:
5477 doc: PDF document object
5478 source: source xref number
5479 target: target xref number, the xref must already exist
5480 keep: an optional list of 1st level keys in target that should not be
5481 removed before copying.
5482 Notes:
5483 This works similar to the copy() method of dictionaries in Python. The
5484 source may be a stream object.
5485 """
5486 if doc.xref_is_stream(source):
5487 # read new xref stream, maintaining compression
5488 stream = doc.xref_stream_raw(source)
5489 doc.update_stream(
5490 target,
5491 stream,
5492 compress=False, # keeps source compression
5493 new=True, # in case target is no stream
5494 )
5495
5496 # empty the target completely, observe exceptions
5497 if keep is None:
5498 keep = []
5499 for key in doc.xref_get_keys(target):
5500 if key in keep:
5501 continue
5502 doc.xref_set_key(target, key, "null")
5503 # copy over all source dict items
5504 for key in doc.xref_get_keys(source):
5505 item = doc.xref_get_key(source, key)
5506 doc.xref_set_key(target, key, item[1])
5507 return None