comparison src/__init__.py @ 1:1d09e1dec1d9 upstream

ADD: PyMuPDF v1.26.4: the original sdist. It does not yet contain MuPDF. This normally will be downloaded when building PyMuPDF.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:37:51 +0200
parents
children d77477b4e151 a6bc019ac0b2
comparison
equal deleted inserted replaced
-1:000000000000 1:1d09e1dec1d9
1 '''
2 PyMuPDF implemented on top of MuPDF Python bindings.
3
4 License:
5
6 SPDX-License-Identifier: GPL-3.0-only
7 '''
8
9 # To reduce startup times, we don't import everything we require here.
10 #
11 import atexit
12 import binascii
13 import collections
14 import inspect
15 import io
16 import math
17 import os
18 import pathlib
19 import glob
20 import re
21 import string
22 import sys
23 import tarfile
24 import time
25 import typing
26 import warnings
27 import weakref
28 import zipfile
29
30 from . import extra
31
32
33 # Set up g_out_log and g_out_message from environment variables.
34 #
35 # PYMUPDF_MESSAGE controls the destination of user messages (from function
36 # `pymupdf.message()`).
37 #
38 # PYMUPDF_LOG controls the destination of internal development logging (from
39 # function `pymupdf.log()`).
40 #
41 # For syntax, see _make_output()'s `text` arg.
42 #
43
44 def _make_output(
45 *,
46 text=None,
47 fd=None,
48 stream=None,
49 path=None,
50 path_append=None,
51 pylogging=None,
52 pylogging_logger=None,
53 pylogging_level=None,
54 pylogging_name=None,
55 default=None,
56 ):
57 '''
58 Returns a stream that writes to a specified destination, which can be a
59 file descriptor, a file, an existing stream or Python's `logging' system.
60
61 Args:
62 text: text specification of destination.
63 fd:<int> - write to file descriptor.
64 path:<str> - write to file.
65 path+:<str> - append to file.
66 logging:<items> - write to Python `logging` module.
67 items: comma-separated <name=value> pairs.
68 level=<int>
69 name=<str>.
70 Other names are ignored.
71
72 fd: an int file descriptor.
73 stream: something with methods .write(text) and .flush().
74 If specified we simply return <stream>.
75 path: a file path.
76 If specified we return a stream that writes to this file.
77 path_append: a file path.
78 If specified we return a stream that appends to this file.
79 pylogging*:
80 if any of these args is not None, we return a stream that writes to
81 Python's `logging` module.
82
83 pylogging:
84 Unused other than to activate use of logging module.
85 pylogging_logger:
86 A logging.Logger; If None, set from <pylogging_name>.
87 pylogging_level:
88 An int log level, if None we use
89 pylogging_logger.getEffectiveLevel().
90 pylogging_name:
91 Only used if <pylogging_logger> is None:
92 If <pylogging_name> is None, we set it to 'pymupdf'.
93 Then we do: pylogging_logger = logging.getLogger(pylogging_name)
94 '''
95 if text is not None:
96 # Textual specification, for example from from environment variable.
97 if text.startswith('fd:'):
98 fd = int(text[3:])
99 elif text.startswith('path:'):
100 path = text[5:]
101 elif text.startswith('path+'):
102 path_append = text[5:]
103 elif text.startswith('logging:'):
104 pylogging = True
105 items_d = dict()
106 items = text[8:].split(',')
107 #items_d = {n: v for (n, v) in [item.split('=', 1) for item in items]}
108 for item in items:
109 if not item:
110 continue
111 nv = item.split('=', 1)
112 assert len(nv) == 2, f'Need `=` in {item=}.'
113 n, v = nv
114 items_d[n] = v
115 pylogging_level = items_d.get('level')
116 if pylogging_level is not None:
117 pylogging_level = int(pylogging_level)
118 pylogging_name = items_d.get('name', 'pymupdf')
119 else:
120 assert 0, f'Expected prefix `fd:`, `path:`. `path+:` or `logging:` in {text=}.'
121
122 if fd is not None:
123 ret = open(fd, mode='w', closefd=False)
124 elif stream is not None:
125 assert hasattr(stream, 'write')
126 assert hasattr(stream, 'flush')
127 ret = stream
128 elif path is not None:
129 ret = open(path, 'w')
130 elif path_append is not None:
131 ret = open(path_append, 'a')
132 elif (0
133 or pylogging is not None
134 or pylogging_logger is not None
135 or pylogging_level is not None
136 or pylogging_name is not None
137 ):
138 import logging
139 if pylogging_logger is None:
140 if pylogging_name is None:
141 pylogging_name = 'pymupdf'
142 pylogging_logger = logging.getLogger(pylogging_name)
143 assert isinstance(pylogging_logger, logging.Logger)
144 if pylogging_level is None:
145 pylogging_level = pylogging_logger.getEffectiveLevel()
146 class Out:
147 def write(self, text):
148 # `logging` module appends newlines, but so does the `print()`
149 # functions in our caller message() and log() fns, so we need to
150 # remove them here.
151 text = text.rstrip('\n')
152 if text:
153 pylogging_logger.log(pylogging_level, text)
154 def flush(self):
155 pass
156 ret = Out()
157 else:
158 ret = default
159 return ret
160
161 # Set steam used by PyMuPDF messaging.
162 _g_out_message = _make_output(text=os.environ.get('PYMUPDF_MESSAGE'), default=sys.stdout)
163
164 # Set steam used by PyMuPDF development/debugging logging.
165 _g_out_log = _make_output(text=os.environ.get('PYMUPDF_LOG'), default=sys.stdout)
166
167 # Things for testing logging.
168 _g_log_items = list()
169 _g_log_items_active = False
170
171 def _log_items():
172 return _g_log_items
173
174 def _log_items_active(active):
175 global _g_log_items_active
176 _g_log_items_active = active
177
178 def _log_items_clear():
179 del _g_log_items[:]
180
181
182 def set_messages(
183 *,
184 text=None,
185 fd=None,
186 stream=None,
187 path=None,
188 path_append=None,
189 pylogging=None,
190 pylogging_logger=None,
191 pylogging_level=None,
192 pylogging_name=None,
193 ):
194 '''
195 Sets destination of PyMuPDF messages. See _make_output() for details.
196 '''
197 global _g_out_message
198 _g_out_message = _make_output(
199 text=text,
200 fd=fd,
201 stream=stream,
202 path=path,
203 path_append=path_append,
204 pylogging=pylogging,
205 pylogging_logger=pylogging_logger,
206 pylogging_level=pylogging_level,
207 pylogging_name=pylogging_name,
208 default=_g_out_message,
209 )
210
211 def set_log(
212 *,
213 text=None,
214 fd=None,
215 stream=None,
216 path=None,
217 path_append=None,
218 pylogging=None,
219 pylogging_logger=None,
220 pylogging_level=None,
221 pylogging_name=None,
222 ):
223 '''
224 Sets destination of PyMuPDF development/debugging logging. See
225 _make_output() for details.
226 '''
227 global _g_out_log
228 _g_out_log = _make_output(
229 text=text,
230 fd=fd,
231 stream=stream,
232 path=path,
233 path_append=path_append,
234 pylogging=pylogging,
235 pylogging_logger=pylogging_logger,
236 pylogging_level=pylogging_level,
237 pylogging_name=pylogging_name,
238 default=_g_out_log,
239 )
240
241 def log( text='', caller=1):
242 '''
243 For development/debugging diagnostics.
244 '''
245 try:
246 stack = inspect.stack(context=0)
247 except StopIteration:
248 pass
249 else:
250 frame_record = stack[caller]
251 try:
252 filename = os.path.relpath(frame_record.filename)
253 except Exception: # Can fail on windows.
254 filename = frame_record.filename
255 line = frame_record.lineno
256 function = frame_record.function
257 text = f'{filename}:{line}:{function}(): {text}'
258 if _g_log_items_active:
259 _g_log_items.append(text)
260 if _g_out_log:
261 print(text, file=_g_out_log, flush=1)
262
263
264 def message(text=''):
265 '''
266 For user messages.
267 '''
268 # It looks like `print()` does nothing if sys.stdout is None (without
269 # raising an exception), but we don't rely on this.
270 if _g_out_message:
271 print(text, file=_g_out_message, flush=1)
272
273
274 def exception_info():
275 import traceback
276 log(f'exception_info:')
277 log(traceback.format_exc())
278
279
280 # PDF names must not contain these characters:
281 INVALID_NAME_CHARS = set(string.whitespace + "()<>[]{}/%" + chr(0))
282
283 def get_env_bool( name, default):
284 '''
285 Returns `True`, `False` or `default` depending on whether $<name> is '1',
286 '0' or unset. Otherwise assert-fails.
287 '''
288 v = os.environ.get( name)
289 if v is None:
290 ret = default
291 elif v == '1':
292 ret = True
293 elif v == '0':
294 ret = False
295 else:
296 assert 0, f'Unrecognised value for {name}: {v!r}'
297 if ret != default:
298 log(f'Using non-default setting from {name}: {v!r}')
299 return ret
300
301 def get_env_int( name, default):
302 '''
303 Returns `True`, `False` or `default` depending on whether $<name> is '1',
304 '0' or unset. Otherwise assert-fails.
305 '''
306 v = os.environ.get( name)
307 if v is None:
308 ret = default
309 else:
310 ret = int(v)
311 if ret != default:
312 log(f'Using non-default setting from {name}: {v}')
313 return ret
314
315 # All our `except ...` blocks output diagnostics if `g_exceptions_verbose` is
316 # true.
317 g_exceptions_verbose = get_env_int( 'PYMUPDF_EXCEPTIONS_VERBOSE', 1)
318
319 # $PYMUPDF_USE_EXTRA overrides whether to use optimised C fns in `extra`.
320 #
321 g_use_extra = get_env_bool( 'PYMUPDF_USE_EXTRA', True)
322
323
324 # Global switches
325 #
326
327 class _Globals:
328 def __init__(self):
329 self.no_device_caching = 0
330 self.small_glyph_heights = 0
331 self.subset_fontnames = 0
332 self.skip_quad_corrections = 0
333
334 _globals = _Globals()
335
336
337 # Optionally use MuPDF via cppyy bindings; experimental and not tested recently
338 # as of 2023-01-20 11:51:40
339 #
340 mupdf_cppyy = os.environ.get( 'MUPDF_CPPYY')
341 if mupdf_cppyy is not None:
342 # pylint: disable=all
343 log( f'{__file__}: $MUPDF_CPPYY={mupdf_cppyy!r} so attempting to import mupdf_cppyy.')
344 log( f'{__file__}: $PYTHONPATH={os.environ["PYTHONPATH"]}')
345 if mupdf_cppyy == '':
346 import mupdf_cppyy
347 else:
348 import importlib
349 mupdf_cppyy = importlib.machinery.SourceFileLoader(
350 'mupdf_cppyy',
351 mupdf_cppyy
352 ).load_module()
353 mupdf = mupdf_cppyy.cppyy.gbl.mupdf
354 else:
355 # Use MuPDF Python SWIG bindings. We allow import from either our own
356 # directory for conventional wheel installs, or from separate place in case
357 # we are using a separately-installed system installation of mupdf.
358 #
359 try:
360 from . import mupdf
361 except Exception:
362 import mupdf
363 if hasattr(mupdf, 'internal_check_ndebug'):
364 mupdf.internal_check_ndebug()
365 mupdf.reinit_singlethreaded()
366
367 def _int_rc(text):
368 '''
369 Converts string to int, ignoring trailing 'rc...'.
370 '''
371 rc = text.find('rc')
372 if rc >= 0:
373 text = text[:rc]
374 return int(text)
375
376 # Basic version information.
377 #
378 # (We use `noqa F401` to avoid flake8 errors such as `F401
379 # '._build.mupdf_location' imported but unused`.
380 #
381 from ._build import mupdf_location # noqa F401
382 from ._build import pymupdf_git_branch # noqa F401
383 from ._build import pymupdf_git_diff # noqa F401
384 from ._build import pymupdf_git_sha # noqa F401
385 from ._build import pymupdf_version # noqa F401
386 from ._build import swig_version # noqa F401
387 from ._build import swig_version_tuple # noqa F401
388
389 mupdf_version = mupdf.FZ_VERSION
390
391 # Removed in PyMuPDF-1.26.1.
392 pymupdf_date = None
393
394 # Versions as tuples; useful when comparing versions.
395 #
396 pymupdf_version_tuple = tuple( [_int_rc(i) for i in pymupdf_version.split('.')])
397 mupdf_version_tuple = tuple( [_int_rc(i) for i in mupdf_version.split('.')])
398
399 assert mupdf_version_tuple == (mupdf.FZ_VERSION_MAJOR, mupdf.FZ_VERSION_MINOR, mupdf.FZ_VERSION_PATCH), \
400 f'Inconsistent MuPDF version numbers: {mupdf_version_tuple=} != {(mupdf.FZ_VERSION_MAJOR, mupdf.FZ_VERSION_MINOR, mupdf.FZ_VERSION_PATCH)=}'
401
402 # Legacy version information.
403 #
404 version = (pymupdf_version, mupdf_version, None)
405 VersionFitz = mupdf_version
406 VersionBind = pymupdf_version
407 VersionDate = None
408
409
410 # String formatting.
411
412 def _format_g(value, *, fmt='%g'):
413 '''
414 Returns `value` formatted with mupdf.fz_format_double() if available,
415 otherwise with Python's `%`.
416
417 If `value` is a list or tuple, we return a space-separated string of
418 formatted values.
419 '''
420 if isinstance(value, (list, tuple)):
421 ret = ''
422 for v in value:
423 if ret:
424 ret += ' '
425 ret += _format_g(v, fmt=fmt)
426 return ret
427 else:
428 return mupdf.fz_format_double(fmt, value)
429
430 format_g = _format_g
431
432 # ByteString is gone from typing in 3.14.
433 # collections.abc.Buffer available from 3.12 only
434 try:
435 ByteString = typing.ByteString
436 except AttributeError:
437 ByteString = bytes | bytearray | memoryview
438
439 # Names required by class method typing annotations.
440 OptBytes = typing.Optional[ByteString]
441 OptDict = typing.Optional[dict]
442 OptFloat = typing.Optional[float]
443 OptInt = typing.Union[int, None]
444 OptSeq = typing.Optional[typing.Sequence]
445 OptStr = typing.Optional[str]
446
447 Page = 'Page_forward_decl'
448 Point = 'Point_forward_decl'
449
450 matrix_like = 'matrix_like'
451 point_like = 'point_like'
452 quad_like = 'quad_like'
453 rect_like = 'rect_like'
454
455
456 def _as_fz_document(document):
457 '''
458 Returns document as a mupdf.FzDocument, upcasting as required. Raises
459 'document closed' exception if closed.
460 '''
461 if isinstance(document, Document):
462 if document.is_closed:
463 raise ValueError('document closed')
464 document = document.this
465 if isinstance(document, mupdf.FzDocument):
466 return document
467 elif isinstance(document, mupdf.PdfDocument):
468 return document.super()
469 elif document is None:
470 assert 0, f'document is None'
471 else:
472 assert 0, f'Unrecognised {type(document)=}'
473
474 def _as_pdf_document(document, required=True):
475 '''
476 Returns `document` downcast to a mupdf.PdfDocument. If downcast fails (i.e.
477 `document` is not actually a `PdfDocument`) then we assert-fail if `required`
478 is true (the default) else return a `mupdf.PdfDocument` with `.m_internal`
479 false.
480 '''
481 if isinstance(document, Document):
482 if document.is_closed:
483 raise ValueError('document closed')
484 document = document.this
485 if isinstance(document, mupdf.PdfDocument):
486 return document
487 elif isinstance(document, mupdf.FzDocument):
488 ret = mupdf.PdfDocument(document)
489 if required:
490 assert ret.m_internal
491 return ret
492 elif document is None:
493 assert 0, f'document is None'
494 else:
495 assert 0, f'Unrecognised {type(document)=}'
496
497 def _as_fz_page(page):
498 '''
499 Returns page as a mupdf.FzPage, upcasting as required.
500 '''
501 if isinstance(page, Page):
502 page = page.this
503 if isinstance(page, mupdf.PdfPage):
504 return page.super()
505 elif isinstance(page, mupdf.FzPage):
506 return page
507 elif page is None:
508 assert 0, f'page is None'
509 else:
510 assert 0, f'Unrecognised {type(page)=}'
511
512 def _as_pdf_page(page, required=True):
513 '''
514 Returns `page` downcast to a mupdf.PdfPage. If downcast fails (i.e. `page`
515 is not actually a `PdfPage`) then we assert-fail if `required` is true (the
516 default) else return a `mupdf.PdfPage` with `.m_internal` false.
517 '''
518 if isinstance(page, Page):
519 page = page.this
520 if isinstance(page, mupdf.PdfPage):
521 return page
522 elif isinstance(page, mupdf.FzPage):
523 ret = mupdf.pdf_page_from_fz_page(page)
524 if required:
525 assert ret.m_internal
526 return ret
527 elif page is None:
528 assert 0, f'page is None'
529 else:
530 assert 0, f'Unrecognised {type(page)=}'
531
532
533 def _pdf_annot_page(annot):
534 '''
535 Wrapper for mupdf.pdf_annot_page() which raises an exception if <annot>
536 is not bound to a page instead of returning a mupdf.PdfPage with
537 `.m_internal=None`.
538
539 [Some other MuPDF functions such as pdf_update_annot()` already raise a
540 similar exception if a pdf_annot's .page field is null.]
541 '''
542 page = mupdf.pdf_annot_page(annot)
543 if not page.m_internal:
544 raise RuntimeError('Annot is not bound to a page')
545 return page
546
547
548 # Fixme: we don't support JM_MEMORY=1.
549 JM_MEMORY = 0
550
551 # Classes
552 #
553
554 class Annot:
555
556 def __init__(self, annot):
557 assert isinstance( annot, mupdf.PdfAnnot)
558 self.this = annot
559
560 def __repr__(self):
561 parent = getattr(self, 'parent', '<>')
562 return "'%s' annotation on %s" % (self.type[1], str(parent))
563
564 def __str__(self):
565 return self.__repr__()
566
567 def _erase(self):
568 if getattr(self, "thisown", False):
569 self.thisown = False
570
571 def _get_redact_values(self):
572 annot = self.this
573 if mupdf.pdf_annot_type(annot) != mupdf.PDF_ANNOT_REDACT:
574 return
575
576 values = dict()
577 try:
578 obj = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "RO")
579 if obj.m_internal:
580 message_warning("Ignoring redaction key '/RO'.")
581 xref = mupdf.pdf_to_num(obj)
582 values[dictkey_xref] = xref
583 obj = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "OverlayText")
584 if obj.m_internal:
585 text = mupdf.pdf_to_text_string(obj)
586 values[dictkey_text] = JM_UnicodeFromStr(text)
587 else:
588 values[dictkey_text] = ''
589 obj = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('Q'))
590 align = 0
591 if obj.m_internal:
592 align = mupdf.pdf_to_int(obj)
593 values[dictkey_align] = align
594 except Exception:
595 if g_exceptions_verbose: exception_info()
596 return
597 val = values
598
599 if not val:
600 return val
601 val["rect"] = self.rect
602 text_color, fontname, fontsize = TOOLS._parse_da(self)
603 val["text_color"] = text_color
604 val["fontname"] = fontname
605 val["fontsize"] = fontsize
606 fill = self.colors["fill"]
607 val["fill"] = fill
608 return val
609
610 def _getAP(self):
611 if g_use_extra:
612 assert isinstance( self.this, mupdf.PdfAnnot)
613 ret = extra.Annot_getAP(self.this)
614 assert isinstance( ret, bytes)
615 return ret
616 else:
617 r = None
618 res = None
619 annot = self.this
620 assert isinstance( annot, mupdf.PdfAnnot)
621 annot_obj = mupdf.pdf_annot_obj( annot)
622 ap = mupdf.pdf_dict_getl( annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
623 if mupdf.pdf_is_stream( ap):
624 res = mupdf.pdf_load_stream( ap)
625 if res and res.m_internal:
626 r = JM_BinFromBuffer(res)
627 return r
628
629 def _setAP(self, buffer_, rect=0):
630 try:
631 annot = self.this
632 annot_obj = mupdf.pdf_annot_obj( annot)
633 page = _pdf_annot_page(annot)
634 apobj = mupdf.pdf_dict_getl( annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
635 if not apobj.m_internal:
636 raise RuntimeError( MSG_BAD_APN)
637 if not mupdf.pdf_is_stream( apobj):
638 raise RuntimeError( MSG_BAD_APN)
639 res = JM_BufferFromBytes( buffer_)
640 if not res.m_internal:
641 raise ValueError( MSG_BAD_BUFFER)
642 JM_update_stream( page.doc(), apobj, res, 1)
643 if rect:
644 bbox = mupdf.pdf_dict_get_rect( annot_obj, PDF_NAME('Rect'))
645 mupdf.pdf_dict_put_rect( apobj, PDF_NAME('BBox'), bbox)
646 except Exception:
647 if g_exceptions_verbose: exception_info()
648
649 def _update_appearance(self, opacity=-1, blend_mode=None, fill_color=None, rotate=-1):
650 annot = self.this
651 assert annot.m_internal
652 annot_obj = mupdf.pdf_annot_obj( annot)
653 page = _pdf_annot_page(annot)
654 pdf = page.doc()
655 type_ = mupdf.pdf_annot_type( annot)
656 nfcol, fcol = JM_color_FromSequence(fill_color)
657
658 try:
659 # remove fill color from unsupported annots
660 # or if so requested
661 if nfcol == 0 or type_ not in (
662 mupdf.PDF_ANNOT_SQUARE,
663 mupdf.PDF_ANNOT_CIRCLE,
664 mupdf.PDF_ANNOT_LINE,
665 mupdf.PDF_ANNOT_POLY_LINE,
666 mupdf.PDF_ANNOT_POLYGON
667 ):
668 mupdf.pdf_dict_del( annot_obj, PDF_NAME('IC'))
669 elif nfcol > 0:
670 mupdf.pdf_set_annot_interior_color( annot, fcol[:nfcol])
671
672 insert_rot = 1 if rotate >= 0 else 0
673 if type_ not in (
674 mupdf.PDF_ANNOT_CARET,
675 mupdf.PDF_ANNOT_CIRCLE,
676 mupdf.PDF_ANNOT_FREE_TEXT,
677 mupdf.PDF_ANNOT_FILE_ATTACHMENT,
678 mupdf.PDF_ANNOT_INK,
679 mupdf.PDF_ANNOT_LINE,
680 mupdf.PDF_ANNOT_POLY_LINE,
681 mupdf.PDF_ANNOT_POLYGON,
682 mupdf.PDF_ANNOT_SQUARE,
683 mupdf.PDF_ANNOT_STAMP,
684 mupdf.PDF_ANNOT_TEXT,
685 ):
686 insert_rot = 0
687
688 if insert_rot:
689 mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('Rotate'), rotate)
690
691 # insert fill color
692 if type_ == mupdf.PDF_ANNOT_FREE_TEXT:
693 if nfcol > 0:
694 mupdf.pdf_set_annot_color(annot, fcol[:nfcol])
695 elif nfcol > 0:
696 col = mupdf.pdf_new_array(page.doc(), nfcol)
697 for i in range( nfcol):
698 mupdf.pdf_array_push_real(col, fcol[i])
699 mupdf.pdf_dict_put(annot_obj, PDF_NAME('IC'), col)
700 mupdf.pdf_dirty_annot(annot)
701 mupdf.pdf_update_annot(annot) # let MuPDF update
702 pdf.resynth_required = 0
703 except Exception as e:
704 if g_exceptions_verbose:
705 exception_info()
706 message( f'cannot update annot: {e}')
707 raise
708
709 if (opacity < 0 or opacity >= 1) and not blend_mode: # no opacity, no blend_mode
710 return True
711
712 try: # create or update /ExtGState
713 ap = mupdf.pdf_dict_getl(
714 mupdf.pdf_annot_obj(annot),
715 PDF_NAME('AP'),
716 PDF_NAME('N')
717 )
718 if not ap.m_internal: # should never happen
719 raise RuntimeError( MSG_BAD_APN)
720
721 resources = mupdf.pdf_dict_get( ap, PDF_NAME('Resources'))
722 if not resources.m_internal: # no Resources yet: make one
723 resources = mupdf.pdf_dict_put_dict( ap, PDF_NAME('Resources'), 2)
724
725 alp0 = mupdf.pdf_new_dict( page.doc(), 3)
726 if opacity >= 0 and opacity < 1:
727 mupdf.pdf_dict_put_real( alp0, PDF_NAME('CA'), opacity)
728 mupdf.pdf_dict_put_real( alp0, PDF_NAME('ca'), opacity)
729 mupdf.pdf_dict_put_real( annot_obj, PDF_NAME('CA'), opacity)
730
731 if blend_mode:
732 mupdf.pdf_dict_put_name( alp0, PDF_NAME('BM'), blend_mode)
733 mupdf.pdf_dict_put_name( annot_obj, PDF_NAME('BM'), blend_mode)
734
735 extg = mupdf.pdf_dict_get( resources, PDF_NAME('ExtGState'))
736 if not extg.m_internal: # no ExtGState yet: make one
737 extg = mupdf.pdf_dict_put_dict( resources, PDF_NAME('ExtGState'), 2)
738
739 mupdf.pdf_dict_put( extg, PDF_NAME('H'), alp0)
740
741 except Exception as e:
742 if g_exceptions_verbose: exception_info()
743 message( f'cannot set opacity or blend mode\n: {e}')
744 raise
745
746 return True
747
748 @property
749 def apn_bbox(self):
750 """annotation appearance bbox"""
751 CheckParent(self)
752 annot = self.this
753 annot_obj = mupdf.pdf_annot_obj(annot)
754 ap = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
755 if not ap.m_internal:
756 val = JM_py_from_rect(mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE))
757 else:
758 rect = mupdf.pdf_dict_get_rect(ap, PDF_NAME('BBox'))
759 val = JM_py_from_rect(rect)
760
761 val = Rect(val) * self.get_parent().transformation_matrix
762 val *= self.get_parent().derotation_matrix
763 return val
764
765 @property
766 def apn_matrix(self):
767 """annotation appearance matrix"""
768 try:
769 CheckParent(self)
770 annot = self.this
771 assert isinstance(annot, mupdf.PdfAnnot)
772 ap = mupdf.pdf_dict_getl(
773 mupdf.pdf_annot_obj(annot),
774 mupdf.PDF_ENUM_NAME_AP,
775 mupdf.PDF_ENUM_NAME_N
776 )
777 if not ap.m_internal:
778 return JM_py_from_matrix(mupdf.FzMatrix())
779 mat = mupdf.pdf_dict_get_matrix(ap, mupdf.PDF_ENUM_NAME_Matrix)
780 val = JM_py_from_matrix(mat)
781
782 val = Matrix(val)
783
784 return val
785 except Exception:
786 if g_exceptions_verbose: exception_info()
787 raise
788
789 @property
790 def blendmode(self):
791 """annotation BlendMode"""
792 CheckParent(self)
793 annot = self.this
794 annot_obj = mupdf.pdf_annot_obj(annot)
795 obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('BM'))
796 blend_mode = None
797 if obj.m_internal:
798 blend_mode = JM_UnicodeFromStr(mupdf.pdf_to_name(obj))
799 return blend_mode
800 # loop through the /AP/N/Resources/ExtGState objects
801 obj = mupdf.pdf_dict_getl(
802 annot_obj,
803 PDF_NAME('AP'),
804 PDF_NAME('N'),
805 PDF_NAME('Resources'),
806 PDF_NAME('ExtGState'),
807 )
808 if mupdf.pdf_is_dict(obj):
809 n = mupdf.pdf_dict_len(obj)
810 for i in range(n):
811 obj1 = mupdf.pdf_dict_get_val(obj, i)
812 if mupdf.pdf_is_dict(obj1):
813 m = mupdf.pdf_dict_len(obj1)
814 for j in range(m):
815 obj2 = mupdf.pdf_dict_get_key(obj1, j)
816 if mupdf.pdf_objcmp(obj2, PDF_NAME('BM')) == 0:
817 blend_mode = JM_UnicodeFromStr(mupdf.pdf_to_name(mupdf.pdf_dict_get_val(obj1, j)))
818 return blend_mode
819 return blend_mode
820
821 @property
822 def border(self):
823 """Border information."""
824 CheckParent(self)
825 atype = self.type[0]
826 if atype not in (
827 mupdf.PDF_ANNOT_CIRCLE,
828 mupdf.PDF_ANNOT_FREE_TEXT,
829 mupdf.PDF_ANNOT_INK,
830 mupdf.PDF_ANNOT_LINE,
831 mupdf.PDF_ANNOT_POLY_LINE,
832 mupdf.PDF_ANNOT_POLYGON,
833 mupdf.PDF_ANNOT_SQUARE,
834 ):
835 return dict()
836 ao = mupdf.pdf_annot_obj(self.this)
837 ret = JM_annot_border(ao)
838 return ret
839
840 def clean_contents(self, sanitize=1):
841 """Clean appearance contents stream."""
842 CheckParent(self)
843 annot = self.this
844 pdf = mupdf.pdf_get_bound_document(mupdf.pdf_annot_obj(annot))
845 filter_ = _make_PdfFilterOptions(recurse=1, instance_forms=0, ascii=0, sanitize=sanitize)
846 mupdf.pdf_filter_annot_contents(pdf, annot, filter_)
847
848 @property
849 def colors(self):
850 """Color definitions."""
851 try:
852 CheckParent(self)
853 annot = self.this
854 assert isinstance(annot, mupdf.PdfAnnot)
855 return JM_annot_colors(mupdf.pdf_annot_obj(annot))
856 except Exception:
857 if g_exceptions_verbose: exception_info()
858 raise
859
860 def delete_responses(self):
861 """Delete 'Popup' and responding annotations."""
862 CheckParent(self)
863 annot = self.this
864 annot_obj = mupdf.pdf_annot_obj(annot)
865 page = _pdf_annot_page(annot)
866 while 1:
867 irt_annot = JM_find_annot_irt(annot)
868 if not irt_annot:
869 break
870 mupdf.pdf_delete_annot(page, irt_annot)
871 mupdf.pdf_dict_del(annot_obj, PDF_NAME('Popup'))
872
873 annots = mupdf.pdf_dict_get(page.obj(), PDF_NAME('Annots'))
874 n = mupdf.pdf_array_len(annots)
875 found = 0
876 for i in range(n-1, -1, -1):
877 o = mupdf.pdf_array_get(annots, i)
878 p = mupdf.pdf_dict_get(o, PDF_NAME('Parent'))
879 if not o.m_internal:
880 continue
881 if not mupdf.pdf_objcmp(p, annot_obj):
882 mupdf.pdf_array_delete(annots, i)
883 found = 1
884 if found:
885 mupdf.pdf_dict_put(page.obj(), PDF_NAME('Annots'), annots)
886
887 @property
888 def file_info(self):
889 """Attached file information."""
890 CheckParent(self)
891 res = dict()
892 length = -1
893 size = -1
894 desc = None
895 annot = self.this
896 annot_obj = mupdf.pdf_annot_obj(annot)
897 type_ = mupdf.pdf_annot_type(annot)
898 if type_ != mupdf.PDF_ANNOT_FILE_ATTACHMENT:
899 raise TypeError( MSG_BAD_ANNOT_TYPE)
900 stream = mupdf.pdf_dict_getl(
901 annot_obj,
902 PDF_NAME('FS'),
903 PDF_NAME('EF'),
904 PDF_NAME('F'),
905 )
906 if not stream.m_internal:
907 RAISEPY( "bad PDF: file entry not found", JM_Exc_FileDataError)
908
909 fs = mupdf.pdf_dict_get(annot_obj, PDF_NAME('FS'))
910
911 o = mupdf.pdf_dict_get(fs, PDF_NAME('UF'))
912 if o.m_internal:
913 filename = mupdf.pdf_to_text_string(o)
914 else:
915 o = mupdf.pdf_dict_get(fs, PDF_NAME('F'))
916 if o.m_internal:
917 filename = mupdf.pdf_to_text_string(o)
918
919 o = mupdf.pdf_dict_get(fs, PDF_NAME('Desc'))
920 if o.m_internal:
921 desc = mupdf.pdf_to_text_string(o)
922
923 o = mupdf.pdf_dict_get(stream, PDF_NAME('Length'))
924 if o.m_internal:
925 length = mupdf.pdf_to_int(o)
926
927 o = mupdf.pdf_dict_getl(stream, PDF_NAME('Params'), PDF_NAME('Size'))
928 if o.m_internal:
929 size = mupdf.pdf_to_int(o)
930
931 res[ dictkey_filename] = JM_EscapeStrFromStr(filename)
932 res[ dictkey_descr] = JM_UnicodeFromStr(desc)
933 res[ dictkey_length] = length
934 res[ dictkey_size] = size
935 return res
936
937 @property
938 def flags(self):
939 """Flags field."""
940 CheckParent(self)
941 annot = self.this
942 return mupdf.pdf_annot_flags(annot)
943
944 def get_file(self):
945 """Retrieve attached file content."""
946 CheckParent(self)
947 annot = self.this
948 annot_obj = mupdf.pdf_annot_obj(annot)
949 type = mupdf.pdf_annot_type(annot)
950 if type != mupdf.PDF_ANNOT_FILE_ATTACHMENT:
951 raise TypeError( MSG_BAD_ANNOT_TYPE)
952 stream = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('FS'), PDF_NAME('EF'), PDF_NAME('F'))
953 if not stream.m_internal:
954 RAISEPY( "bad PDF: file entry not found", JM_Exc_FileDataError)
955 buf = mupdf.pdf_load_stream(stream)
956 res = JM_BinFromBuffer(buf)
957 return res
958
959 def get_oc(self):
960 """Get annotation optional content reference."""
961 CheckParent(self)
962 oc = 0
963 annot = self.this
964 annot_obj = mupdf.pdf_annot_obj(annot)
965 obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('OC'))
966 if obj.m_internal:
967 oc = mupdf.pdf_to_num(obj)
968 return oc
969
970 # PyMuPDF doesn't seem to have this .parent member, but removing it breaks
971 # 11 tests...?
972 #@property
973 def get_parent(self):
974 try:
975 ret = getattr( self, 'parent')
976 except AttributeError:
977 page = _pdf_annot_page(self.this)
978 assert isinstance( page, mupdf.PdfPage)
979 document = Document( page.doc()) if page.m_internal else None
980 ret = Page(page, document)
981 #self.parent = weakref.proxy( ret)
982 self.parent = ret
983 #log(f'No attribute .parent: {type(self)=} {id(self)=}: have set {id(self.parent)=}.')
984 #log( f'Have set self.parent')
985 return ret
986
987 def get_pixmap(self, matrix=None, dpi=None, colorspace=None, alpha=0):
988 """annotation Pixmap"""
989
990 CheckParent(self)
991 cspaces = {"gray": csGRAY, "rgb": csRGB, "cmyk": csCMYK}
992 if type(colorspace) is str:
993 colorspace = cspaces.get(colorspace.lower(), None)
994 if dpi:
995 matrix = Matrix(dpi / 72, dpi / 72)
996 ctm = JM_matrix_from_py(matrix)
997 cs = colorspace
998 if not cs:
999 cs = mupdf.fz_device_rgb()
1000
1001 pix = mupdf.pdf_new_pixmap_from_annot(self.this, ctm, cs, mupdf.FzSeparations(0), alpha)
1002 ret = Pixmap(pix)
1003 if dpi:
1004 ret.set_dpi(dpi, dpi)
1005 return ret
1006
1007 def get_sound(self):
1008 """Retrieve sound stream."""
1009 CheckParent(self)
1010 annot = self.this
1011 annot_obj = mupdf.pdf_annot_obj(annot)
1012 type = mupdf.pdf_annot_type(annot)
1013 sound = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Sound'))
1014 if type != mupdf.PDF_ANNOT_SOUND or not sound.m_internal:
1015 raise TypeError( MSG_BAD_ANNOT_TYPE)
1016 if mupdf.pdf_dict_get(sound, PDF_NAME('F')).m_internal:
1017 RAISEPY( "unsupported sound stream", JM_Exc_FileDataError)
1018 res = dict()
1019 obj = mupdf.pdf_dict_get(sound, PDF_NAME('R'))
1020 if obj.m_internal:
1021 res['rate'] = mupdf.pdf_to_real(obj)
1022 obj = mupdf.pdf_dict_get(sound, PDF_NAME('C'))
1023 if obj.m_internal:
1024 res['channels'] = mupdf.pdf_to_int(obj)
1025 obj = mupdf.pdf_dict_get(sound, PDF_NAME('B'))
1026 if obj.m_internal:
1027 res['bps'] = mupdf.pdf_to_int(obj)
1028 obj = mupdf.pdf_dict_get(sound, PDF_NAME('E'))
1029 if obj.m_internal:
1030 res['encoding'] = mupdf.pdf_to_name(obj)
1031 obj = mupdf.pdf_dict_gets(sound, "CO")
1032 if obj.m_internal:
1033 res['compression'] = mupdf.pdf_to_name(obj)
1034 buf = mupdf.pdf_load_stream(sound)
1035 stream = JM_BinFromBuffer(buf)
1036 res['stream'] = stream
1037 return res
1038
1039 def get_textpage(self, clip=None, flags=0):
1040 """Make annotation TextPage."""
1041 CheckParent(self)
1042 options = mupdf.FzStextOptions(flags)
1043 if clip:
1044 assert hasattr(mupdf, 'FZ_STEXT_CLIP_RECT'), f'MuPDF-{mupdf_version} does not support FZ_STEXT_CLIP_RECT.'
1045 clip2 = JM_rect_from_py(clip)
1046 options.clip = clip2.internal()
1047 options.flags |= mupdf.FZ_STEXT_CLIP_RECT
1048 annot = self.this
1049 stextpage = mupdf.FzStextPage(annot, options)
1050 ret = TextPage(stextpage)
1051 p = self.get_parent()
1052 if isinstance(p, weakref.ProxyType):
1053 ret.parent = p
1054 else:
1055 ret.parent = weakref.proxy(p)
1056 return ret
1057
1058 @property
1059 def has_popup(self):
1060 """Check if annotation has a Popup."""
1061 CheckParent(self)
1062 annot = self.this
1063 obj = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('Popup'))
1064 return True if obj.m_internal else False
1065
1066 @property
1067 def info(self):
1068 """Various information details."""
1069 CheckParent(self)
1070 annot = self.this
1071 res = dict()
1072
1073 res[dictkey_content] = JM_UnicodeFromStr(mupdf.pdf_annot_contents(annot))
1074
1075 o = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('Name'))
1076 res[dictkey_name] = JM_UnicodeFromStr(mupdf.pdf_to_name(o))
1077
1078 # Title (= author)
1079 o = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('T'))
1080 res[dictkey_title] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o))
1081
1082 # CreationDate
1083 o = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "CreationDate")
1084 res[dictkey_creationDate] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o))
1085
1086 # ModDate
1087 o = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('M'))
1088 res[dictkey_modDate] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o))
1089
1090 # Subj
1091 o = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "Subj")
1092 res[dictkey_subject] = mupdf.pdf_to_text_string(o)
1093
1094 # Identification (PDF key /NM)
1095 o = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "NM")
1096 res[dictkey_id] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o))
1097
1098 return res
1099
1100 @property
1101 def irt_xref(self):
1102 '''
1103 annotation IRT xref
1104 '''
1105 annot = self.this
1106 annot_obj = mupdf.pdf_annot_obj( annot)
1107 irt = mupdf.pdf_dict_get( annot_obj, PDF_NAME('IRT'))
1108 if not irt.m_internal:
1109 return 0
1110 return mupdf.pdf_to_num( irt)
1111
1112 @property
1113 def is_open(self):
1114 """Get 'open' status of annotation or its Popup."""
1115 CheckParent(self)
1116 return mupdf.pdf_annot_is_open(self.this)
1117
1118 @property
1119 def language(self):
1120 """annotation language"""
1121 this_annot = self.this
1122 lang = mupdf.pdf_annot_language(this_annot)
1123 if lang == mupdf.FZ_LANG_UNSET:
1124 return
1125 assert hasattr(mupdf, 'fz_string_from_text_language2')
1126 return mupdf.fz_string_from_text_language2(lang)
1127
1128 @property
1129 def line_ends(self):
1130 """Line end codes."""
1131 CheckParent(self)
1132 annot = self.this
1133 # return nothing for invalid annot types
1134 if not mupdf.pdf_annot_has_line_ending_styles(annot):
1135 return
1136 lstart = mupdf.pdf_annot_line_start_style(annot)
1137 lend = mupdf.pdf_annot_line_end_style(annot)
1138 return lstart, lend
1139
1140 @property
1141 def next(self):
1142 """Next annotation."""
1143 CheckParent(self)
1144 this_annot = self.this
1145 assert isinstance(this_annot, mupdf.PdfAnnot)
1146 assert this_annot.m_internal
1147 type_ = mupdf.pdf_annot_type(this_annot)
1148 if type_ != mupdf.PDF_ANNOT_WIDGET:
1149 annot = mupdf.pdf_next_annot(this_annot)
1150 else:
1151 annot = mupdf.pdf_next_widget(this_annot)
1152
1153 val = Annot(annot) if annot.m_internal else None
1154 if not val:
1155 return None
1156 val.thisown = True
1157 assert val.get_parent().this.m_internal_value() == self.get_parent().this.m_internal_value()
1158 val.parent._annot_refs[id(val)] = val
1159
1160 if val.type[0] == mupdf.PDF_ANNOT_WIDGET:
1161 widget = Widget()
1162 TOOLS._fill_widget(val, widget)
1163 val = widget
1164 return val
1165
1166 @property
1167 def opacity(self):
1168 """Opacity."""
1169 CheckParent(self)
1170 annot = self.this
1171 opy = -1
1172 ca = mupdf.pdf_dict_get( mupdf.pdf_annot_obj(annot), mupdf.PDF_ENUM_NAME_CA)
1173 if mupdf.pdf_is_number(ca):
1174 opy = mupdf.pdf_to_real(ca)
1175 return opy
1176
1177 @property
1178 def popup_rect(self):
1179 """annotation 'Popup' rectangle"""
1180 CheckParent(self)
1181 rect = mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)
1182 annot = self.this
1183 annot_obj = mupdf.pdf_annot_obj( annot)
1184 obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Popup'))
1185 if obj.m_internal:
1186 rect = mupdf.pdf_dict_get_rect(obj, PDF_NAME('Rect'))
1187 #log( '{rect=}')
1188 val = JM_py_from_rect(rect)
1189 #log( '{val=}')
1190
1191 val = Rect(val) * self.get_parent().transformation_matrix
1192 val *= self.get_parent().derotation_matrix
1193
1194 return val
1195
1196 @property
1197 def popup_xref(self):
1198 """annotation 'Popup' xref"""
1199 CheckParent(self)
1200 xref = 0
1201 annot = self.this
1202 annot_obj = mupdf.pdf_annot_obj(annot)
1203 obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Popup'))
1204 if obj.m_internal:
1205 xref = mupdf.pdf_to_num(obj)
1206 return xref
1207
1208 @property
1209 def rect(self):
1210 """annotation rectangle"""
1211 if g_use_extra:
1212 val = extra.Annot_rect3( self.this)
1213 else:
1214 val = mupdf.pdf_bound_annot(self.this)
1215 val = Rect(val)
1216
1217 # Caching self.parent_() reduces 1000x from 0.07 to 0.04.
1218 #
1219 p = self.get_parent()
1220 #p = getattr( self, 'parent', None)
1221 #if p is None:
1222 # p = self.parent
1223 # self.parent = p
1224 #p = self.parent_()
1225 val *= p.derotation_matrix
1226 return val
1227
1228 @property
1229 def rect_delta(self):
1230 '''
1231 annotation delta values to rectangle
1232 '''
1233 annot_obj = mupdf.pdf_annot_obj(self.this)
1234 arr = mupdf.pdf_dict_get( annot_obj, PDF_NAME('RD'))
1235 if mupdf.pdf_array_len( arr) == 4:
1236 return (
1237 mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 0)),
1238 mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 1)),
1239 -mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 2)),
1240 -mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 3)),
1241 )
1242
1243 @property
1244 def rotation(self):
1245 """annotation rotation"""
1246 CheckParent(self)
1247 annot = self.this
1248 rotation = mupdf.pdf_dict_get( mupdf.pdf_annot_obj(annot), mupdf.PDF_ENUM_NAME_Rotate)
1249 if not rotation.m_internal:
1250 return -1
1251 return mupdf.pdf_to_int( rotation)
1252
1253 def set_apn_bbox(self, bbox):
1254 """
1255 Set annotation appearance bbox.
1256 """
1257 CheckParent(self)
1258 page = self.get_parent()
1259 rot = page.rotation_matrix
1260 mat = page.transformation_matrix
1261 bbox *= rot * ~mat
1262 annot = self.this
1263 annot_obj = mupdf.pdf_annot_obj(annot)
1264 ap = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
1265 if not ap.m_internal:
1266 raise RuntimeError( MSG_BAD_APN)
1267 rect = JM_rect_from_py(bbox)
1268 mupdf.pdf_dict_put_rect(ap, PDF_NAME('BBox'), rect)
1269
1270 def set_apn_matrix(self, matrix):
1271 """Set annotation appearance matrix."""
1272 CheckParent(self)
1273 annot = self.this
1274 annot_obj = mupdf.pdf_annot_obj(annot)
1275 ap = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
1276 if not ap.m_internal:
1277 raise RuntimeError( MSG_BAD_APN)
1278 mat = JM_matrix_from_py(matrix)
1279 mupdf.pdf_dict_put_matrix(ap, PDF_NAME('Matrix'), mat)
1280
1281 def set_blendmode(self, blend_mode):
1282 """Set annotation BlendMode."""
1283 CheckParent(self)
1284 annot = self.this
1285 annot_obj = mupdf.pdf_annot_obj(annot)
1286 mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('BM'), blend_mode)
1287
1288 def set_border(self, border=None, width=-1, style=None, dashes=None, clouds=-1):
1289 """Set border properties.
1290
1291 Either a dict, or direct arguments width, style, dashes or clouds."""
1292 CheckParent(self)
1293 atype, atname = self.type[:2] # annotation type
1294 if atype not in (
1295 mupdf.PDF_ANNOT_CIRCLE,
1296 mupdf.PDF_ANNOT_FREE_TEXT,
1297 mupdf.PDF_ANNOT_INK,
1298 mupdf.PDF_ANNOT_LINE,
1299 mupdf.PDF_ANNOT_POLY_LINE,
1300 mupdf.PDF_ANNOT_POLYGON,
1301 mupdf.PDF_ANNOT_SQUARE,
1302 ):
1303 message(f"Cannot set border for '{atname}'.")
1304 return None
1305 if atype not in (
1306 mupdf.PDF_ANNOT_CIRCLE,
1307 mupdf.PDF_ANNOT_FREE_TEXT,
1308 mupdf.PDF_ANNOT_POLYGON,
1309 mupdf.PDF_ANNOT_SQUARE,
1310 ):
1311 if clouds > 0:
1312 message(f"Cannot set cloudy border for '{atname}'.")
1313 clouds = -1 # do not set border effect
1314 if type(border) is not dict:
1315 border = {"width": width, "style": style, "dashes": dashes, "clouds": clouds}
1316 border.setdefault("width", -1)
1317 border.setdefault("style", None)
1318 border.setdefault("dashes", None)
1319 border.setdefault("clouds", -1)
1320 if border["width"] is None:
1321 border["width"] = -1
1322 if border["clouds"] is None:
1323 border["clouds"] = -1
1324 if hasattr(border["dashes"], "__getitem__"): # ensure sequence items are integers
1325 border["dashes"] = tuple(border["dashes"])
1326 for item in border["dashes"]:
1327 if not isinstance(item, int):
1328 border["dashes"] = None
1329 break
1330 annot = self.this
1331 annot_obj = mupdf.pdf_annot_obj( annot)
1332 pdf = mupdf.pdf_get_bound_document( annot_obj)
1333 return JM_annot_set_border( border, pdf, annot_obj)
1334
1335 def set_colors(self, colors=None, stroke=None, fill=None):
1336 """Set 'stroke' and 'fill' colors.
1337
1338 Use either a dict or the direct arguments.
1339 """
1340 if self.type[0] == mupdf.PDF_ANNOT_FREE_TEXT:
1341 raise ValueError("cannot be used for FreeText annotations")
1342
1343 CheckParent(self)
1344 doc = self.get_parent().parent
1345 if type(colors) is not dict:
1346 colors = {"fill": fill, "stroke": stroke}
1347 fill = colors.get("fill")
1348 stroke = colors.get("stroke")
1349
1350 fill_annots = (mupdf.PDF_ANNOT_CIRCLE, mupdf.PDF_ANNOT_SQUARE, mupdf.PDF_ANNOT_LINE, mupdf.PDF_ANNOT_POLY_LINE, mupdf.PDF_ANNOT_POLYGON,
1351 mupdf.PDF_ANNOT_REDACT,)
1352
1353 if stroke in ([], ()):
1354 doc.xref_set_key(self.xref, "C", "[]")
1355 elif stroke is not None:
1356 if hasattr(stroke, "__float__"):
1357 stroke = [float(stroke)]
1358 CheckColor(stroke)
1359 assert len(stroke) in (1, 3, 4)
1360 s = f"[{_format_g(stroke)}]"
1361 doc.xref_set_key(self.xref, "C", s)
1362
1363 if fill and self.type[0] not in fill_annots:
1364 message("Warning: fill color ignored for annot type '%s'." % self.type[1])
1365 return
1366 if fill in ([], ()):
1367 doc.xref_set_key(self.xref, "IC", "[]")
1368 elif fill is not None:
1369 if hasattr(fill, "__float__"):
1370 fill = [float(fill)]
1371 CheckColor(fill)
1372 assert len(fill) in (1, 3, 4)
1373 s = f"[{_format_g(fill)}]"
1374 doc.xref_set_key(self.xref, "IC", s)
1375
1376 def set_flags(self, flags):
1377 """Set annotation flags."""
1378 CheckParent(self)
1379 annot = self.this
1380 mupdf.pdf_set_annot_flags(annot, flags)
1381
1382 def set_info(self, info=None, content=None, title=None, creationDate=None, modDate=None, subject=None):
1383 """Set various properties."""
1384 CheckParent(self)
1385 if type(info) is dict: # build the args from the dictionary
1386 content = info.get("content", None)
1387 title = info.get("title", None)
1388 creationDate = info.get("creationDate", None)
1389 modDate = info.get("modDate", None)
1390 subject = info.get("subject", None)
1391 info = None
1392 annot = self.this
1393 # use this to indicate a 'markup' annot type
1394 is_markup = mupdf.pdf_annot_has_author(annot)
1395 # contents
1396 if content:
1397 mupdf.pdf_set_annot_contents(annot, content)
1398 if is_markup:
1399 # title (= author)
1400 if title:
1401 mupdf.pdf_set_annot_author(annot, title)
1402 # creation date
1403 if creationDate:
1404 mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('CreationDate'), creationDate)
1405 # mod date
1406 if modDate:
1407 mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('M'), modDate)
1408 # subject
1409 if subject:
1410 mupdf.pdf_dict_puts(mupdf.pdf_annot_obj(annot), "Subj", mupdf.pdf_new_text_string(subject))
1411
1412 def set_irt_xref(self, xref):
1413 '''
1414 Set annotation IRT xref
1415 '''
1416 annot = self.this
1417 annot_obj = mupdf.pdf_annot_obj( annot)
1418 page = _pdf_annot_page(annot)
1419 if xref < 1 or xref >= mupdf.pdf_xref_len( page.doc()):
1420 raise ValueError( MSG_BAD_XREF)
1421 irt = mupdf.pdf_new_indirect( page.doc(), xref, 0)
1422 subt = mupdf.pdf_dict_get( irt, PDF_NAME('Subtype'))
1423 irt_subt = mupdf.pdf_annot_type_from_string( mupdf.pdf_to_name( subt))
1424 if irt_subt < 0:
1425 raise ValueError( MSG_IS_NO_ANNOT)
1426 mupdf.pdf_dict_put( annot_obj, PDF_NAME('IRT'), irt)
1427
1428 def set_language(self, language=None):
1429 """Set annotation language."""
1430 CheckParent(self)
1431 this_annot = self.this
1432 if not language:
1433 lang = mupdf.FZ_LANG_UNSET
1434 else:
1435 lang = mupdf.fz_text_language_from_string(language)
1436 mupdf.pdf_set_annot_language(this_annot, lang)
1437
1438 def set_line_ends(self, start, end):
1439 """Set line end codes."""
1440 CheckParent(self)
1441 annot = self.this
1442 if mupdf.pdf_annot_has_line_ending_styles(annot):
1443 mupdf.pdf_set_annot_line_ending_styles(annot, start, end)
1444 else:
1445 message_warning("bad annot type for line ends")
1446
1447 def set_name(self, name):
1448 """Set /Name (icon) of annotation."""
1449 CheckParent(self)
1450 annot = self.this
1451 annot_obj = mupdf.pdf_annot_obj(annot)
1452 mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('Name'), name)
1453
1454 def set_oc(self, oc=0):
1455 """Set / remove annotation OC xref."""
1456 CheckParent(self)
1457 annot = self.this
1458 annot_obj = mupdf.pdf_annot_obj(annot)
1459 if not oc:
1460 mupdf.pdf_dict_del(annot_obj, PDF_NAME('OC'))
1461 else:
1462 JM_add_oc_object(mupdf.pdf_get_bound_document(annot_obj), annot_obj, oc)
1463
1464 def set_opacity(self, opacity):
1465 """Set opacity."""
1466 CheckParent(self)
1467 annot = self.this
1468 if not _INRANGE(opacity, 0.0, 1.0):
1469 mupdf.pdf_set_annot_opacity(annot, 1)
1470 return
1471 mupdf.pdf_set_annot_opacity(annot, opacity)
1472 if opacity < 1.0:
1473 page = _pdf_annot_page(annot)
1474 page.transparency = 1
1475
1476 def set_open(self, is_open):
1477 """Set 'open' status of annotation or its Popup."""
1478 CheckParent(self)
1479 annot = self.this
1480 mupdf.pdf_set_annot_is_open(annot, is_open)
1481
1482 def set_popup(self, rect):
1483 '''
1484 Create annotation 'Popup' or update rectangle.
1485 '''
1486 CheckParent(self)
1487 annot = self.this
1488 pdfpage = _pdf_annot_page(annot)
1489 rot = JM_rotate_page_matrix(pdfpage)
1490 r = mupdf.fz_transform_rect(JM_rect_from_py(rect), rot)
1491 mupdf.pdf_set_annot_popup(annot, r)
1492
1493 def set_rect(self, rect):
1494 """Set annotation rectangle."""
1495 CheckParent(self)
1496 annot = self.this
1497
1498 pdfpage = _pdf_annot_page(annot)
1499 rot = JM_rotate_page_matrix(pdfpage)
1500 r = mupdf.fz_transform_rect(JM_rect_from_py(rect), rot)
1501 if mupdf.fz_is_empty_rect(r) or mupdf.fz_is_infinite_rect(r):
1502 raise ValueError( MSG_BAD_RECT)
1503 try:
1504 mupdf.pdf_set_annot_rect(annot, r)
1505 except Exception as e:
1506 message(f'cannot set rect: {e}')
1507 return False
1508
1509 def set_rotation(self, rotate=0):
1510 """Set annotation rotation."""
1511 CheckParent(self)
1512
1513 annot = self.this
1514 type = mupdf.pdf_annot_type(annot)
1515 if type not in (
1516 mupdf.PDF_ANNOT_CARET,
1517 mupdf.PDF_ANNOT_CIRCLE,
1518 mupdf.PDF_ANNOT_FREE_TEXT,
1519 mupdf.PDF_ANNOT_FILE_ATTACHMENT,
1520 mupdf.PDF_ANNOT_INK,
1521 mupdf.PDF_ANNOT_LINE,
1522 mupdf.PDF_ANNOT_POLY_LINE,
1523 mupdf.PDF_ANNOT_POLYGON,
1524 mupdf.PDF_ANNOT_SQUARE,
1525 mupdf.PDF_ANNOT_STAMP,
1526 mupdf.PDF_ANNOT_TEXT,
1527 ):
1528 return
1529 rot = rotate
1530 while rot < 0:
1531 rot += 360
1532 while rot >= 360:
1533 rot -= 360
1534 if type == mupdf.PDF_ANNOT_FREE_TEXT and rot % 90 != 0:
1535 rot = 0
1536 annot_obj = mupdf.pdf_annot_obj(annot)
1537 mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('Rotate'), rot)
1538
1539 @property
1540 def type(self):
1541 """annotation type"""
1542 CheckParent(self)
1543 if not self.this.m_internal:
1544 return 'null'
1545 type_ = mupdf.pdf_annot_type(self.this)
1546 c = mupdf.pdf_string_from_annot_type(type_)
1547 o = mupdf.pdf_dict_gets( mupdf.pdf_annot_obj(self.this), 'IT')
1548 if not o.m_internal or mupdf.pdf_is_name(o):
1549 return (type_, c)
1550 it = mupdf.pdf_to_name(o)
1551 return (type_, c, it)
1552
1553 def update(self,
1554 blend_mode: OptStr =None,
1555 opacity: OptFloat =None,
1556 fontsize: float =0,
1557 fontname: OptStr =None,
1558 text_color: OptSeq =None,
1559 border_color: OptSeq =None,
1560 fill_color: OptSeq =None,
1561 cross_out: bool =True,
1562 rotate: int =-1,
1563 ):
1564 """Update annot appearance.
1565
1566 Notes:
1567 Depending on the annot type, some parameters make no sense,
1568 while others are only available in this method to achieve the
1569 desired result. This is especially true for 'FreeText' annots.
1570 Args:
1571 blend_mode: set the blend mode, all annotations.
1572 opacity: set the opacity, all annotations.
1573 fontsize: set fontsize, 'FreeText' only.
1574 fontname: set the font, 'FreeText' only.
1575 border_color: set border color, 'FreeText' only.
1576 text_color: set text color, 'FreeText' only.
1577 fill_color: set fill color, all annotations.
1578 cross_out: draw diagonal lines, 'Redact' only.
1579 rotate: set rotation, 'FreeText' and some others.
1580 """
1581 annot_obj = mupdf.pdf_annot_obj(self.this)
1582
1583 if border_color:
1584 is_rich_text = mupdf.pdf_dict_get(annot_obj, PDF_NAME("RC"))
1585 if not is_rich_text:
1586 raise ValueError("cannot set border_color if rich_text is False")
1587 Annot.update_timing_test()
1588 CheckParent(self)
1589 def color_string(cs, code):
1590 """Return valid PDF color operator for a given color sequence.
1591 """
1592 cc = ColorCode(cs, code)
1593 if not cc:
1594 return b""
1595 return (cc + "\n").encode()
1596
1597 annot_type = self.type[0] # get the annot type
1598
1599 dt = self.border.get("dashes", None) # get the dashes spec
1600 bwidth = self.border.get("width", -1) # get border line width
1601 stroke = self.colors["stroke"] # get the stroke color
1602 if fill_color is not None:
1603 fill = fill_color
1604 else:
1605 fill = self.colors["fill"]
1606 rect = None # self.rect # prevent MuPDF fiddling with it
1607 apnmat = self.apn_matrix # prevent MuPDF fiddling with it
1608 if rotate != -1: # sanitize rotation value
1609 while rotate < 0:
1610 rotate += 360
1611 while rotate >= 360:
1612 rotate -= 360
1613 if annot_type == mupdf.PDF_ANNOT_FREE_TEXT and rotate % 90 != 0:
1614 rotate = 0
1615
1616 #------------------------------------------------------------------
1617 # handle opacity and blend mode
1618 #------------------------------------------------------------------
1619 if blend_mode is None:
1620 blend_mode = self.blendmode
1621 if not hasattr(opacity, "__float__"):
1622 opacity = self.opacity
1623
1624 if 0 <= opacity < 1 or blend_mode:
1625 opa_code = "/H gs\n" # then we must reference this 'gs'
1626 else:
1627 opa_code = ""
1628
1629 if annot_type == mupdf.PDF_ANNOT_FREE_TEXT:
1630 CheckColor(text_color)
1631 CheckColor(fill_color)
1632 tcol, fname, fsize = TOOLS._parse_da(self)
1633
1634 # read and update default appearance as necessary
1635 if fsize <= 0:
1636 fsize = 12
1637 if text_color:
1638 tcol = text_color
1639 if fontname:
1640 fname = fontname
1641 if fontsize > 0:
1642 fsize = fontsize
1643 JM_make_annot_DA(self, len(tcol), tcol, fname, fsize)
1644 blend_mode = None # not supported for free text annotations!
1645
1646 #------------------------------------------------------------------
1647 # now invoke MuPDF to update the annot appearance
1648 #------------------------------------------------------------------
1649 val = self._update_appearance(
1650 opacity=opacity,
1651 blend_mode=blend_mode,
1652 fill_color=fill,
1653 rotate=rotate,
1654 )
1655 if val is False:
1656 raise RuntimeError("Error updating annotation.")
1657
1658 if annot_type == mupdf.PDF_ANNOT_FREE_TEXT:
1659 # in absence of previous opacity, we may need to modify the AP
1660 ap = self._getAP()
1661 if 0 <= opacity < 1 and not ap.startswith(b"/H gs"):
1662 self._setAP(b"/H gs\n" + ap)
1663 return
1664
1665 bfill = color_string(fill, "f")
1666 bstroke = color_string(stroke, "c")
1667
1668 p_ctm = self.get_parent().transformation_matrix
1669 imat = ~p_ctm # inverse page transf. matrix
1670
1671 if dt:
1672 dashes = "[" + " ".join(map(str, dt)) + "] 0 d\n"
1673 dashes = dashes.encode("utf-8")
1674 else:
1675 dashes = None
1676
1677 if self.line_ends:
1678 line_end_le, line_end_ri = self.line_ends
1679 else:
1680 line_end_le, line_end_ri = 0, 0 # init line end codes
1681
1682 # read contents as created by MuPDF
1683 ap = self._getAP()
1684 ap_tab = ap.splitlines() # split in single lines
1685 ap_updated = False # assume we did nothing
1686
1687 if annot_type == mupdf.PDF_ANNOT_REDACT:
1688 if cross_out: # create crossed-out rect
1689 ap_updated = True
1690 ap_tab = ap_tab[:-1]
1691 _, LL, LR, UR, UL = ap_tab
1692 ap_tab.append(LR)
1693 ap_tab.append(LL)
1694 ap_tab.append(UR)
1695 ap_tab.append(LL)
1696 ap_tab.append(UL)
1697 ap_tab.append(b"S")
1698
1699 if bwidth > 0 or bstroke != b"":
1700 ap_updated = True
1701 ntab = [_format_g(bwidth).encode() + b" w"] if bwidth > 0 else []
1702 for line in ap_tab:
1703 if line.endswith(b"w"):
1704 continue
1705 if line.endswith(b"RG") and bstroke != b"":
1706 line = bstroke[:-1]
1707 ntab.append(line)
1708 ap_tab = ntab
1709
1710 ap = b"\n".join(ap_tab)
1711
1712 if annot_type in (mupdf.PDF_ANNOT_POLYGON, mupdf.PDF_ANNOT_POLY_LINE):
1713 ap = b"\n".join(ap_tab[:-1]) + b"\n"
1714 ap_updated = True
1715 if bfill != b"":
1716 if annot_type == mupdf.PDF_ANNOT_POLYGON:
1717 ap = ap + bfill + b"b" # close, fill, and stroke
1718 elif annot_type == mupdf.PDF_ANNOT_POLY_LINE:
1719 ap = ap + b"S" # stroke
1720 else:
1721 if annot_type == mupdf.PDF_ANNOT_POLYGON:
1722 ap = ap + b"s" # close and stroke
1723 elif annot_type == mupdf.PDF_ANNOT_POLY_LINE:
1724 ap = ap + b"S" # stroke
1725
1726 if dashes is not None: # handle dashes
1727 ap = dashes + ap
1728 # reset dashing - only applies for LINE annots with line ends given
1729 ap = ap.replace(b"\nS\n", b"\nS\n[] 0 d\n", 1)
1730 ap_updated = True
1731
1732 if opa_code:
1733 ap = opa_code.encode("utf-8") + ap
1734 ap_updated = True
1735
1736 ap = b"q\n" + ap + b"\nQ\n"
1737 #----------------------------------------------------------------------
1738 # the following handles line end symbols for 'Polygon' and 'Polyline'
1739 #----------------------------------------------------------------------
1740 if line_end_le + line_end_ri > 0 and annot_type in (mupdf.PDF_ANNOT_POLYGON, mupdf.PDF_ANNOT_POLY_LINE):
1741
1742 le_funcs = (None, TOOLS._le_square, TOOLS._le_circle,
1743 TOOLS._le_diamond, TOOLS._le_openarrow,
1744 TOOLS._le_closedarrow, TOOLS._le_butt,
1745 TOOLS._le_ropenarrow, TOOLS._le_rclosedarrow,
1746 TOOLS._le_slash)
1747 le_funcs_range = range(1, len(le_funcs))
1748 d = 2 * max(1, self.border["width"])
1749 rect = self.rect + (-d, -d, d, d)
1750 ap_updated = True
1751 points = self.vertices
1752 if line_end_le in le_funcs_range:
1753 p1 = Point(points[0]) * imat
1754 p2 = Point(points[1]) * imat
1755 left = le_funcs[line_end_le](self, p1, p2, False, fill_color)
1756 ap += left.encode()
1757 if line_end_ri in le_funcs_range:
1758 p1 = Point(points[-2]) * imat
1759 p2 = Point(points[-1]) * imat
1760 left = le_funcs[line_end_ri](self, p1, p2, True, fill_color)
1761 ap += left.encode()
1762
1763 if ap_updated:
1764 if rect: # rect modified here?
1765 self.set_rect(rect)
1766 self._setAP(ap, rect=1)
1767 else:
1768 self._setAP(ap, rect=0)
1769
1770 #-------------------------------
1771 # handle annotation rotations
1772 #-------------------------------
1773 if annot_type not in ( # only these types are supported
1774 mupdf.PDF_ANNOT_CARET,
1775 mupdf.PDF_ANNOT_CIRCLE,
1776 mupdf.PDF_ANNOT_FILE_ATTACHMENT,
1777 mupdf.PDF_ANNOT_INK,
1778 mupdf.PDF_ANNOT_LINE,
1779 mupdf.PDF_ANNOT_POLY_LINE,
1780 mupdf.PDF_ANNOT_POLYGON,
1781 mupdf.PDF_ANNOT_SQUARE,
1782 mupdf.PDF_ANNOT_STAMP,
1783 mupdf.PDF_ANNOT_TEXT,
1784 ):
1785 return
1786
1787 rot = self.rotation # get value from annot object
1788 if rot == -1: # nothing to change
1789 return
1790
1791 M = (self.rect.tl + self.rect.br) / 2 # center of annot rect
1792
1793 if rot == 0: # undo rotations
1794 if abs(apnmat - Matrix(1, 1)) < 1e-5:
1795 return # matrix already is a no-op
1796 quad = self.rect.morph(M, ~apnmat) # derotate rect
1797 self.setRect(quad.rect)
1798 self.set_apn_matrix(Matrix(1, 1)) # appearance matrix = no-op
1799 return
1800
1801 mat = Matrix(rot)
1802 quad = self.rect.morph(M, mat)
1803 self.set_rect(quad.rect)
1804 self.set_apn_matrix(apnmat * mat)
1805
1806 def update_file(self, buffer_=None, filename=None, ufilename=None, desc=None):
1807 """Update attached file."""
1808 CheckParent(self)
1809 annot = self.this
1810 annot_obj = mupdf.pdf_annot_obj(annot)
1811 pdf = mupdf.pdf_get_bound_document(annot_obj) # the owning PDF
1812 type = mupdf.pdf_annot_type(annot)
1813 if type != mupdf.PDF_ANNOT_FILE_ATTACHMENT:
1814 raise TypeError( MSG_BAD_ANNOT_TYPE)
1815 stream = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('FS'), PDF_NAME('EF'), PDF_NAME('F'))
1816 # the object for file content
1817 if not stream.m_internal:
1818 RAISEPY( "bad PDF: no /EF object", JM_Exc_FileDataError)
1819
1820 fs = mupdf.pdf_dict_get(annot_obj, PDF_NAME('FS'))
1821
1822 # file content given
1823 res = JM_BufferFromBytes(buffer_)
1824 if buffer_ and not res.m_internal:
1825 raise ValueError( MSG_BAD_BUFFER)
1826 if res:
1827 JM_update_stream(pdf, stream, res, 1)
1828 # adjust /DL and /Size parameters
1829 len, _ = mupdf.fz_buffer_storage(res)
1830 l = mupdf.pdf_new_int(len)
1831 mupdf.pdf_dict_put(stream, PDF_NAME('DL'), l)
1832 mupdf.pdf_dict_putl(stream, l, PDF_NAME('Params'), PDF_NAME('Size'))
1833
1834 if filename:
1835 mupdf.pdf_dict_put_text_string(stream, PDF_NAME('F'), filename)
1836 mupdf.pdf_dict_put_text_string(fs, PDF_NAME('F'), filename)
1837 mupdf.pdf_dict_put_text_string(stream, PDF_NAME('UF'), filename)
1838 mupdf.pdf_dict_put_text_string(fs, PDF_NAME('UF'), filename)
1839 mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('Contents'), filename)
1840
1841 if ufilename:
1842 mupdf.pdf_dict_put_text_string(stream, PDF_NAME('UF'), ufilename)
1843 mupdf.pdf_dict_put_text_string(fs, PDF_NAME('UF'), ufilename)
1844
1845 if desc:
1846 mupdf.pdf_dict_put_text_string(stream, PDF_NAME('Desc'), desc)
1847 mupdf.pdf_dict_put_text_string(fs, PDF_NAME('Desc'), desc)
1848
1849 @staticmethod
1850 def update_timing_test():
1851 total = 0
1852 for i in range( 30*1000):
1853 total += i
1854 return total
1855
1856 @property
1857 def vertices(self):
1858 """annotation vertex points"""
1859 CheckParent(self)
1860 annot = self.this
1861 assert isinstance(annot, mupdf.PdfAnnot)
1862 annot_obj = mupdf.pdf_annot_obj(annot)
1863 page = _pdf_annot_page(annot)
1864 page_ctm = mupdf.FzMatrix() # page transformation matrix
1865 dummy = mupdf.FzRect() # Out-param for mupdf.pdf_page_transform().
1866 mupdf.pdf_page_transform(page, dummy, page_ctm)
1867 derot = JM_derotate_page_matrix(page)
1868 page_ctm = mupdf.fz_concat(page_ctm, derot)
1869
1870 #----------------------------------------------------------------
1871 # The following objects occur in different annotation types.
1872 # So we are sure that (!o) occurs at most once.
1873 # Every pair of floats is one point, that needs to be separately
1874 # transformed with the page transformation matrix.
1875 #----------------------------------------------------------------
1876 o = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Vertices'))
1877 if not o.m_internal: o = mupdf.pdf_dict_get(annot_obj, PDF_NAME('L'))
1878 if not o.m_internal: o = mupdf.pdf_dict_get(annot_obj, PDF_NAME('QuadPoints'))
1879 if not o.m_internal: o = mupdf.pdf_dict_gets(annot_obj, 'CL')
1880
1881 if o.m_internal:
1882 # handle lists with 1-level depth
1883 # weiter
1884 res = []
1885 for i in range(0, mupdf.pdf_array_len(o), 2):
1886 x = mupdf.pdf_to_real(mupdf.pdf_array_get(o, i))
1887 y = mupdf.pdf_to_real(mupdf.pdf_array_get(o, i+1))
1888 point = mupdf.FzPoint(x, y)
1889 point = mupdf.fz_transform_point(point, page_ctm)
1890 res.append( (point.x, point.y))
1891 return res
1892
1893 o = mupdf.pdf_dict_gets(annot_obj, 'InkList')
1894 if o.m_internal:
1895 # InkList has 2-level lists
1896 #inklist:
1897 res = []
1898 for i in range(mupdf.pdf_array_len(o)):
1899 res1 = []
1900 o1 = mupdf.pdf_array_get(o, i)
1901 for j in range(0, mupdf.pdf_array_len(o1), 2):
1902 x = mupdf.pdf_to_real(mupdf.pdf_array_get(o1, j))
1903 y = mupdf.pdf_to_real(mupdf.pdf_array_get(o1, j+1))
1904 point = mupdf.FzPoint(x, y)
1905 point = mupdf.fz_transform_point(point, page_ctm)
1906 res1.append( (point.x, point.y))
1907 res.append(res1)
1908 return res
1909
1910 @property
1911 def xref(self):
1912 """annotation xref number"""
1913 CheckParent(self)
1914 annot = self.this
1915 return mupdf.pdf_to_num(mupdf.pdf_annot_obj(annot))
1916
1917
1918 class Archive:
1919 def __init__( self, *args):
1920 '''
1921 Archive(dirname [, path]) - from folder
1922 Archive(file [, path]) - from file name or object
1923 Archive(data, name) - from memory item
1924 Archive() - empty archive
1925 Archive(archive [, path]) - from archive
1926 '''
1927 self._subarchives = list()
1928 self.this = mupdf.fz_new_multi_archive()
1929 if args:
1930 self.add( *args)
1931
1932 def __repr__( self):
1933 return f'Archive, sub-archives: {len(self._subarchives)}'
1934
1935 def _add_arch( self, subarch, path=None):
1936 mupdf.fz_mount_multi_archive( self.this, subarch, path)
1937
1938 def _add_dir( self, folder, path=None):
1939 sub = mupdf.fz_open_directory( folder)
1940 mupdf.fz_mount_multi_archive( self.this, sub, path)
1941
1942 def _add_treeitem( self, memory, name, path=None):
1943 buff = JM_BufferFromBytes( memory)
1944 sub = mupdf.fz_new_tree_archive( mupdf.FzTree())
1945 mupdf.fz_tree_archive_add_buffer( sub, name, buff)
1946 mupdf.fz_mount_multi_archive( self.this, sub, path)
1947
1948 def _add_ziptarfile( self, filepath, type_, path=None):
1949 if type_ == 1:
1950 sub = mupdf.fz_open_zip_archive( filepath)
1951 else:
1952 sub = mupdf.fz_open_tar_archive( filepath)
1953 mupdf.fz_mount_multi_archive( self.this, sub, path)
1954
1955 def _add_ziptarmemory( self, memory, type_, path=None):
1956 buff = JM_BufferFromBytes( memory)
1957 stream = mupdf.fz_open_buffer( buff)
1958 if type_==1:
1959 sub = mupdf.fz_open_zip_archive_with_stream( stream)
1960 else:
1961 sub = mupdf.fz_open_tar_archive_with_stream( stream)
1962 mupdf.fz_mount_multi_archive( self.this, sub, path)
1963
1964 def add( self, content, path=None):
1965 '''
1966 Add a sub-archive.
1967
1968 Args:
1969 content:
1970 The content to be added. May be one of:
1971 `str` - must be path of directory or file.
1972 `bytes`, `bytearray`, `io.BytesIO` - raw data.
1973 `zipfile.Zipfile`.
1974 `tarfile.TarFile`.
1975 `pymupdf.Archive`.
1976 A two-item tuple `(data, name)`.
1977 List or tuple (but not tuple with length 2) of the above.
1978 path: (str) a "virtual" path name, under which the elements
1979 of content can be retrieved. Use it to e.g. cope with
1980 duplicate element names.
1981 '''
1982 def is_binary_data(x):
1983 return isinstance(x, (bytes, bytearray, io.BytesIO))
1984
1985 def make_subarch(entries, mount, fmt):
1986 subarch = dict(fmt=fmt, entries=entries, path=mount)
1987 if fmt != "tree" or self._subarchives == []:
1988 self._subarchives.append(subarch)
1989 else:
1990 ltree = self._subarchives[-1]
1991 if ltree["fmt"] != "tree" or ltree["path"] != subarch["path"]:
1992 self._subarchives.append(subarch)
1993 else:
1994 ltree["entries"].extend(subarch["entries"])
1995 self._subarchives[-1] = ltree
1996
1997 if isinstance(content, pathlib.Path):
1998 content = str(content)
1999
2000 if isinstance(content, str):
2001 if os.path.isdir(content):
2002 self._add_dir(content, path)
2003 return make_subarch(os.listdir(content), path, 'dir')
2004 elif os.path.isfile(content):
2005 assert isinstance(path, str) and path != '', \
2006 f'Need name for binary content, but {path=}.'
2007 with open(content) as f:
2008 ff = f.read()
2009 self._add_treeitem(ff, path)
2010 return make_subarch([path], None, 'tree')
2011 else:
2012 raise ValueError(f'Not a file or directory: {content!r}')
2013
2014 elif is_binary_data(content):
2015 assert isinstance(path, str) and path != '' \
2016 f'Need name for binary content, but {path=}.'
2017 self._add_treeitem(content, path)
2018 return make_subarch([path], None, 'tree')
2019
2020 elif isinstance(content, zipfile.ZipFile):
2021 filename = getattr(content, "filename", None)
2022 if filename is None:
2023 fp = content.fp.getvalue()
2024 self._add_ziptarmemory(fp, 1, path)
2025 else:
2026 self._add_ziptarfile(filename, 1, path)
2027 return make_subarch(content.namelist(), path, 'zip')
2028
2029 elif isinstance(content, tarfile.TarFile):
2030 filename = getattr(content.fileobj, "name", None)
2031 if filename is None:
2032 fp = content.fileobj
2033 if not isinstance(fp, io.BytesIO):
2034 fp = fp.fileobj
2035 self._add_ziptarmemory(fp.getvalue(), 0, path)
2036 else:
2037 self._add_ziptarfile(filename, 0, path)
2038 return make_subarch(content.getnames(), path, 'tar')
2039
2040 elif isinstance(content, Archive):
2041 self._add_arch(content, path)
2042 return make_subarch([], path, 'multi')
2043
2044 if isinstance(content, tuple) and len(content) == 2:
2045 # covers the tree item plus path
2046 data, name = content
2047 assert isinstance(name, str), f'Unexpected {type(name)=}'
2048 if is_binary_data(data):
2049 self._add_treeitem(data, name, path=path)
2050 elif isinstance(data, str):
2051 if os.path.isfile(data):
2052 with open(data, 'rb') as f:
2053 ff = f.read()
2054 self._add_treeitem(ff, name, path=path)
2055 else:
2056 assert 0, f'Unexpected {type(data)=}.'
2057 return make_subarch([name], path, 'tree')
2058
2059 elif hasattr(content, '__getitem__'):
2060 # Deal with sequence of disparate items.
2061 for item in content:
2062 self.add(item, path)
2063 return
2064
2065 else:
2066 raise TypeError(f'Unrecognised type {type(content)}.')
2067 assert 0
2068
2069 @property
2070 def entry_list( self):
2071 '''
2072 List of sub archives.
2073 '''
2074 return self._subarchives
2075
2076 def has_entry( self, name):
2077 return mupdf.fz_has_archive_entry( self.this, name)
2078
2079 def read_entry( self, name):
2080 buff = mupdf.fz_read_archive_entry( self.this, name)
2081 return JM_BinFromBuffer( buff)
2082
2083
2084 class Xml:
2085
2086 def __enter__(self):
2087 return self
2088
2089 def __exit__(self, *args):
2090 pass
2091
2092 def __init__(self, rhs):
2093 if isinstance(rhs, mupdf.FzXml):
2094 self.this = rhs
2095 elif isinstance(rhs, str):
2096 buff = mupdf.fz_new_buffer_from_copied_data(rhs)
2097 self.this = mupdf.fz_parse_xml_from_html5(buff)
2098 else:
2099 assert 0, f'Unsupported type for rhs: {type(rhs)}'
2100
2101 def _get_node_tree( self):
2102 def show_node(node, items, shift):
2103 while node is not None:
2104 if node.is_text:
2105 items.append((shift, f'"{node.text}"'))
2106 node = node.next
2107 continue
2108 items.append((shift, f"({node.tagname}"))
2109 for k, v in node.get_attributes().items():
2110 items.append((shift, f"={k} '{v}'"))
2111 child = node.first_child
2112 if child:
2113 items = show_node(child, items, shift + 1)
2114 items.append((shift, f"){node.tagname}"))
2115 node = node.next
2116 return items
2117
2118 shift = 0
2119 items = []
2120 items = show_node(self, items, shift)
2121 return items
2122
2123 def add_bullet_list(self):
2124 """Add bulleted list ("ul" tag)"""
2125 child = self.create_element("ul")
2126 self.append_child(child)
2127 return child
2128
2129 def add_class(self, text):
2130 """Set some class via CSS. Replaces complete class spec."""
2131 cls = self.get_attribute_value("class")
2132 if cls is not None and text in cls:
2133 return self
2134 self.remove_attribute("class")
2135 if cls is None:
2136 cls = text
2137 else:
2138 cls += " " + text
2139 self.set_attribute("class", cls)
2140 return self
2141
2142 def add_code(self, text=None):
2143 """Add a "code" tag"""
2144 child = self.create_element("code")
2145 if type(text) is str:
2146 child.append_child(self.create_text_node(text))
2147 prev = self.span_bottom()
2148 if prev is None:
2149 prev = self
2150 prev.append_child(child)
2151 return self
2152
2153 def add_codeblock(self):
2154 """Add monospaced lines ("pre" node)"""
2155 child = self.create_element("pre")
2156 self.append_child(child)
2157 return child
2158
2159 def add_description_list(self):
2160 """Add description list ("dl" tag)"""
2161 child = self.create_element("dl")
2162 self.append_child(child)
2163 return child
2164
2165 def add_division(self):
2166 """Add "div" tag"""
2167 child = self.create_element("div")
2168 self.append_child(child)
2169 return child
2170
2171 def add_header(self, level=1):
2172 """Add header tag"""
2173 if level not in range(1, 7):
2174 raise ValueError("Header level must be in [1, 6]")
2175 this_tag = self.tagname
2176 new_tag = f"h{level}"
2177 child = self.create_element(new_tag)
2178 if this_tag not in ("h1", "h2", "h3", "h4", "h5", "h6", "p"):
2179 self.append_child(child)
2180 return child
2181 self.parent.append_child(child)
2182 return child
2183
2184 def add_horizontal_line(self):
2185 """Add horizontal line ("hr" tag)"""
2186 child = self.create_element("hr")
2187 self.append_child(child)
2188 return child
2189
2190 def add_image(self, name, width=None, height=None, imgfloat=None, align=None):
2191 """Add image node (tag "img")."""
2192 child = self.create_element("img")
2193 if width is not None:
2194 child.set_attribute("width", f"{width}")
2195 if height is not None:
2196 child.set_attribute("height", f"{height}")
2197 if imgfloat is not None:
2198 child.set_attribute("style", f"float: {imgfloat}")
2199 if align is not None:
2200 child.set_attribute("align", f"{align}")
2201 child.set_attribute("src", f"{name}")
2202 self.append_child(child)
2203 return child
2204
2205 def add_link(self, href, text=None):
2206 """Add a hyperlink ("a" tag)"""
2207 child = self.create_element("a")
2208 if not isinstance(text, str):
2209 text = href
2210 child.set_attribute("href", href)
2211 child.append_child(self.create_text_node(text))
2212 prev = self.span_bottom()
2213 if prev is None:
2214 prev = self
2215 prev.append_child(child)
2216 return self
2217
2218 def add_list_item(self):
2219 """Add item ("li" tag) under a (numbered or bulleted) list."""
2220 if self.tagname not in ("ol", "ul"):
2221 raise ValueError("cannot add list item to", self.tagname)
2222 child = self.create_element("li")
2223 self.append_child(child)
2224 return child
2225
2226 def add_number_list(self, start=1, numtype=None):
2227 """Add numbered list ("ol" tag)"""
2228 child = self.create_element("ol")
2229 if start > 1:
2230 child.set_attribute("start", str(start))
2231 if numtype is not None:
2232 child.set_attribute("type", numtype)
2233 self.append_child(child)
2234 return child
2235
2236 def add_paragraph(self):
2237 """Add "p" tag"""
2238 child = self.create_element("p")
2239 if self.tagname != "p":
2240 self.append_child(child)
2241 else:
2242 self.parent.append_child(child)
2243 return child
2244
2245 def add_span(self):
2246 child = self.create_element("span")
2247 self.append_child(child)
2248 return child
2249
2250 def add_style(self, text):
2251 """Set some style via CSS style. Replaces complete style spec."""
2252 style = self.get_attribute_value("style")
2253 if style is not None and text in style:
2254 return self
2255 self.remove_attribute("style")
2256 if style is None:
2257 style = text
2258 else:
2259 style += ";" + text
2260 self.set_attribute("style", style)
2261 return self
2262
2263 def add_subscript(self, text=None):
2264 """Add a subscript ("sub" tag)"""
2265 child = self.create_element("sub")
2266 if type(text) is str:
2267 child.append_child(self.create_text_node(text))
2268 prev = self.span_bottom()
2269 if prev is None:
2270 prev = self
2271 prev.append_child(child)
2272 return self
2273
2274 def add_superscript(self, text=None):
2275 """Add a superscript ("sup" tag)"""
2276 child = self.create_element("sup")
2277 if type(text) is str:
2278 child.append_child(self.create_text_node(text))
2279 prev = self.span_bottom()
2280 if prev is None:
2281 prev = self
2282 prev.append_child(child)
2283 return self
2284
2285 def add_text(self, text):
2286 """Add text. Line breaks are honored."""
2287 lines = text.splitlines()
2288 line_count = len(lines)
2289 prev = self.span_bottom()
2290 if prev is None:
2291 prev = self
2292
2293 for i, line in enumerate(lines):
2294 prev.append_child(self.create_text_node(line))
2295 if i < line_count - 1:
2296 prev.append_child(self.create_element("br"))
2297 return self
2298
2299 def append_child( self, child):
2300 mupdf.fz_dom_append_child( self.this, child.this)
2301
2302 def append_styled_span(self, style):
2303 span = self.create_element("span")
2304 span.add_style(style)
2305 prev = self.span_bottom()
2306 if prev is None:
2307 prev = self
2308 prev.append_child(span)
2309 return prev
2310
2311 def bodytag( self):
2312 return Xml( mupdf.fz_dom_body( self.this))
2313
2314 def clone( self):
2315 ret = mupdf.fz_dom_clone( self.this)
2316 return Xml( ret)
2317
2318 @staticmethod
2319 def color_text(color):
2320 if type(color) is str:
2321 return color
2322 if type(color) is int:
2323 return f"rgb({sRGB_to_rgb(color)})"
2324 if type(color) in (tuple, list):
2325 return f"rgb{tuple(color)}"
2326 return color
2327
2328 def create_element( self, tag):
2329 return Xml( mupdf.fz_dom_create_element( self.this, tag))
2330
2331 def create_text_node( self, text):
2332 return Xml( mupdf.fz_dom_create_text_node( self.this, text))
2333
2334 def debug(self):
2335 """Print a list of the node tree below self."""
2336 items = self._get_node_tree()
2337 for item in items:
2338 message(" " * item[0] + item[1].replace("\n", "\\n"))
2339
2340 def find( self, tag, att, match):
2341 ret = mupdf.fz_dom_find( self.this, tag, att, match)
2342 if ret.m_internal:
2343 return Xml( ret)
2344
2345 def find_next( self, tag, att, match):
2346 ret = mupdf.fz_dom_find_next( self.this, tag, att, match)
2347 if ret.m_internal:
2348 return Xml( ret)
2349
2350 @property
2351 def first_child( self):
2352 if mupdf.fz_xml_text( self.this):
2353 # text node, has no child.
2354 return
2355 ret = mupdf.fz_dom_first_child( self)
2356 if ret.m_internal:
2357 return Xml( ret)
2358
2359 def get_attribute_value( self, key):
2360 assert key
2361 return mupdf.fz_dom_attribute( self.this, key)
2362
2363 def get_attributes( self):
2364 if mupdf.fz_xml_text( self.this):
2365 # text node, has no attributes.
2366 return
2367 result = dict()
2368 i = 0
2369 while 1:
2370 val, key = mupdf.fz_dom_get_attribute( self.this, i)
2371 if not val or not key:
2372 break
2373 result[ key] = val
2374 i += 1
2375 return result
2376
2377 def insert_after( self, node):
2378 mupdf.fz_dom_insert_after( self.this, node.this)
2379
2380 def insert_before( self, node):
2381 mupdf.fz_dom_insert_before( self.this, node.this)
2382
2383 def insert_text(self, text):
2384 lines = text.splitlines()
2385 line_count = len(lines)
2386 for i, line in enumerate(lines):
2387 self.append_child(self.create_text_node(line))
2388 if i < line_count - 1:
2389 self.append_child(self.create_element("br"))
2390 return self
2391
2392 @property
2393 def is_text(self):
2394 """Check if this is a text node."""
2395 return self.text is not None
2396
2397 @property
2398 def last_child(self):
2399 """Return last child node."""
2400 child = self.first_child
2401 if child is None:
2402 return None
2403 while True:
2404 next = child.next
2405 if not next:
2406 return child
2407 child = next
2408
2409 @property
2410 def next( self):
2411 ret = mupdf.fz_dom_next( self.this)
2412 if ret.m_internal:
2413 return Xml( ret)
2414
2415 @property
2416 def parent( self):
2417 ret = mupdf.fz_dom_parent( self.this)
2418 if ret.m_internal:
2419 return Xml( ret)
2420
2421 @property
2422 def previous( self):
2423 ret = mupdf.fz_dom_previous( self.this)
2424 if ret.m_internal:
2425 return Xml( ret)
2426
2427 def remove( self):
2428 mupdf.fz_dom_remove( self.this)
2429
2430 def remove_attribute( self, key):
2431 assert key
2432 mupdf.fz_dom_remove_attribute( self.this, key)
2433
2434 @property
2435 def root( self):
2436 return Xml( mupdf.fz_xml_root( self.this))
2437
2438 def set_align(self, align):
2439 """Set text alignment via CSS style"""
2440 text = "text-align: %s"
2441 if isinstance( align, str):
2442 t = align
2443 elif align == TEXT_ALIGN_LEFT:
2444 t = "left"
2445 elif align == TEXT_ALIGN_CENTER:
2446 t = "center"
2447 elif align == TEXT_ALIGN_RIGHT:
2448 t = "right"
2449 elif align == TEXT_ALIGN_JUSTIFY:
2450 t = "justify"
2451 else:
2452 raise ValueError(f"Unrecognised {align=}")
2453 text = text % t
2454 self.add_style(text)
2455 return self
2456
2457 def set_attribute( self, key, value):
2458 assert key
2459 mupdf.fz_dom_add_attribute( self.this, key, value)
2460
2461 def set_bgcolor(self, color):
2462 """Set background color via CSS style"""
2463 text = f"background-color: %s" % self.color_text(color)
2464 self.add_style(text) # does not work on span level
2465 return self
2466
2467 def set_bold(self, val=True):
2468 """Set bold on / off via CSS style"""
2469 if val:
2470 val="bold"
2471 else:
2472 val="normal"
2473 text = "font-weight: %s" % val
2474 self.append_styled_span(text)
2475 return self
2476
2477 def set_color(self, color):
2478 """Set text color via CSS style"""
2479 text = f"color: %s" % self.color_text(color)
2480 self.append_styled_span(text)
2481 return self
2482
2483 def set_columns(self, cols):
2484 """Set number of text columns via CSS style"""
2485 text = f"columns: {cols}"
2486 self.append_styled_span(text)
2487 return self
2488
2489 def set_font(self, font):
2490 """Set font-family name via CSS style"""
2491 text = "font-family: %s" % font
2492 self.append_styled_span(text)
2493 return self
2494
2495 def set_fontsize(self, fontsize):
2496 """Set font size name via CSS style"""
2497 if type(fontsize) is str:
2498 px=""
2499 else:
2500 px="px"
2501 text = f"font-size: {fontsize}{px}"
2502 self.append_styled_span(text)
2503 return self
2504
2505 def set_id(self, unique):
2506 """Set a unique id."""
2507 # check uniqueness
2508 root = self.root
2509 if root.find(None, "id", unique):
2510 raise ValueError(f"id '{unique}' already exists")
2511 self.set_attribute("id", unique)
2512 return self
2513
2514 def set_italic(self, val=True):
2515 """Set italic on / off via CSS style"""
2516 if val:
2517 val="italic"
2518 else:
2519 val="normal"
2520 text = "font-style: %s" % val
2521 self.append_styled_span(text)
2522 return self
2523
2524 def set_leading(self, leading):
2525 """Set inter-line spacing value via CSS style - block-level only."""
2526 text = f"-mupdf-leading: {leading}"
2527 self.add_style(text)
2528 return self
2529
2530 def set_letter_spacing(self, spacing):
2531 """Set inter-letter spacing value via CSS style"""
2532 text = f"letter-spacing: {spacing}"
2533 self.append_styled_span(text)
2534 return self
2535
2536 def set_lineheight(self, lineheight):
2537 """Set line height name via CSS style - block-level only."""
2538 text = f"line-height: {lineheight}"
2539 self.add_style(text)
2540 return self
2541
2542 def set_margins(self, val):
2543 """Set margin values via CSS style"""
2544 text = "margins: %s" % val
2545 self.append_styled_span(text)
2546 return self
2547
2548 def set_opacity(self, opacity):
2549 """Set opacity via CSS style"""
2550 text = f"opacity: {opacity}"
2551 self.append_styled_span(text)
2552 return self
2553
2554 def set_pagebreak_after(self):
2555 """Insert a page break after this node."""
2556 text = "page-break-after: always"
2557 self.add_style(text)
2558 return self
2559
2560 def set_pagebreak_before(self):
2561 """Insert a page break before this node."""
2562 text = "page-break-before: always"
2563 self.add_style(text)
2564 return self
2565
2566 def set_properties(
2567 self,
2568 align=None,
2569 bgcolor=None,
2570 bold=None,
2571 color=None,
2572 columns=None,
2573 font=None,
2574 fontsize=None,
2575 indent=None,
2576 italic=None,
2577 leading=None,
2578 letter_spacing=None,
2579 lineheight=None,
2580 margins=None,
2581 pagebreak_after=None,
2582 pagebreak_before=None,
2583 word_spacing=None,
2584 unqid=None,
2585 cls=None,
2586 ):
2587 """Set any or all properties of a node.
2588
2589 To be used for existing nodes preferably.
2590 """
2591 root = self.root
2592 temp = root.add_division()
2593 if align is not None:
2594 temp.set_align(align)
2595 if bgcolor is not None:
2596 temp.set_bgcolor(bgcolor)
2597 if bold is not None:
2598 temp.set_bold(bold)
2599 if color is not None:
2600 temp.set_color(color)
2601 if columns is not None:
2602 temp.set_columns(columns)
2603 if font is not None:
2604 temp.set_font(font)
2605 if fontsize is not None:
2606 temp.set_fontsize(fontsize)
2607 if indent is not None:
2608 temp.set_text_indent(indent)
2609 if italic is not None:
2610 temp.set_italic(italic)
2611 if leading is not None:
2612 temp.set_leading(leading)
2613 if letter_spacing is not None:
2614 temp.set_letter_spacing(letter_spacing)
2615 if lineheight is not None:
2616 temp.set_lineheight(lineheight)
2617 if margins is not None:
2618 temp.set_margins(margins)
2619 if pagebreak_after is not None:
2620 temp.set_pagebreak_after()
2621 if pagebreak_before is not None:
2622 temp.set_pagebreak_before()
2623 if word_spacing is not None:
2624 temp.set_word_spacing(word_spacing)
2625 if unqid is not None:
2626 self.set_id(unqid)
2627 if cls is not None:
2628 self.add_class(cls)
2629
2630 styles = []
2631 top_style = temp.get_attribute_value("style")
2632 if top_style is not None:
2633 styles.append(top_style)
2634 child = temp.first_child
2635 while child:
2636 styles.append(child.get_attribute_value("style"))
2637 child = child.first_child
2638 self.set_attribute("style", ";".join(styles))
2639 temp.remove()
2640 return self
2641
2642 def set_text_indent(self, indent):
2643 """Set text indentation name via CSS style - block-level only."""
2644 text = f"text-indent: {indent}"
2645 self.add_style(text)
2646 return self
2647
2648 def set_underline(self, val="underline"):
2649 text = "text-decoration: %s" % val
2650 self.append_styled_span(text)
2651 return self
2652
2653 def set_word_spacing(self, spacing):
2654 """Set inter-word spacing value via CSS style"""
2655 text = f"word-spacing: {spacing}"
2656 self.append_styled_span(text)
2657 return self
2658
2659 def span_bottom(self):
2660 """Find deepest level in stacked spans."""
2661 parent = self
2662 child = self.last_child
2663 if child is None:
2664 return None
2665 while child.is_text:
2666 child = child.previous
2667 if child is None:
2668 break
2669 if child is None or child.tagname != "span":
2670 return None
2671
2672 while True:
2673 if child is None:
2674 return parent
2675 if child.tagname in ("a", "sub","sup","body") or child.is_text:
2676 child = child.next
2677 continue
2678 if child.tagname == "span":
2679 parent = child
2680 child = child.first_child
2681 else:
2682 return parent
2683
2684 @property
2685 def tagname( self):
2686 return mupdf.fz_xml_tag( self.this)
2687
2688 @property
2689 def text( self):
2690 return mupdf.fz_xml_text( self.this)
2691
2692 add_var = add_code
2693 add_samp = add_code
2694 add_kbd = add_code
2695
2696
2697 class Colorspace:
2698
2699 def __init__(self, type_):
2700 """Supported are GRAY, RGB and CMYK."""
2701 if isinstance( type_, mupdf.FzColorspace):
2702 self.this = type_
2703 elif type_ == CS_GRAY:
2704 self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_GRAY)
2705 elif type_ == CS_CMYK:
2706 self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_CMYK)
2707 elif type_ == CS_RGB:
2708 self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB)
2709 else:
2710 self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB)
2711
2712 def __repr__(self):
2713 x = ("", "GRAY", "", "RGB", "CMYK")[self.n]
2714 return "Colorspace(CS_%s) - %s" % (x, self.name)
2715
2716 def _name(self):
2717 return mupdf.fz_colorspace_name(self.this)
2718
2719 @property
2720 def n(self):
2721 """Size of one pixel."""
2722 return mupdf.fz_colorspace_n(self.this)
2723
2724 @property
2725 def name(self):
2726 """Name of the Colorspace."""
2727 return self._name()
2728
2729
2730 class DeviceWrapper:
2731 def __init__(self, *args):
2732 if args_match( args, mupdf.FzDevice):
2733 device, = args
2734 self.this = device
2735 elif args_match( args, Pixmap, None):
2736 pm, clip = args
2737 bbox = JM_irect_from_py( clip)
2738 if mupdf.fz_is_infinite_irect( bbox):
2739 self.this = mupdf.fz_new_draw_device( mupdf.FzMatrix(), pm)
2740 else:
2741 self.this = mupdf.fz_new_draw_device_with_bbox( mupdf.FzMatrix(), pm, bbox)
2742 elif args_match( args, mupdf.FzDisplayList):
2743 dl, = args
2744 self.this = mupdf.fz_new_list_device( dl)
2745 elif args_match( args, mupdf.FzStextPage, None):
2746 tp, flags = args
2747 opts = mupdf.FzStextOptions( flags)
2748 self.this = mupdf.fz_new_stext_device( tp, opts)
2749 else:
2750 raise Exception( f'Unrecognised args for DeviceWrapper: {args!r}')
2751
2752
2753 class DisplayList:
2754 def __del__(self):
2755 if not type(self) is DisplayList: return
2756 self.thisown = False
2757
2758 def __init__(self, *args):
2759 if len(args) == 1 and isinstance(args[0], mupdf.FzRect):
2760 self.this = mupdf.FzDisplayList(args[0])
2761 elif len(args) == 1 and isinstance(args[0], mupdf.FzDisplayList):
2762 self.this = args[0]
2763 else:
2764 assert 0, f'Unrecognised {args=}'
2765
2766 def get_pixmap(self, matrix=None, colorspace=None, alpha=0, clip=None):
2767 if isinstance(colorspace, Colorspace):
2768 colorspace = colorspace.this
2769 else:
2770 colorspace = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB)
2771 val = JM_pixmap_from_display_list(self.this, matrix, colorspace, alpha, clip, None)
2772 val.thisown = True
2773 return val
2774
2775 def get_textpage(self, flags=3):
2776 """Make a TextPage from a DisplayList."""
2777 stext_options = mupdf.FzStextOptions()
2778 stext_options.flags = flags
2779 val = mupdf.FzStextPage(self.this, stext_options)
2780 val.thisown = True
2781 return val
2782
2783 @property
2784 def rect(self):
2785 val = JM_py_from_rect(mupdf.fz_bound_display_list(self.this))
2786 val = Rect(val)
2787 return val
2788
2789 def run(self, dw, m, area):
2790 mupdf.fz_run_display_list(
2791 self.this,
2792 dw.device,
2793 JM_matrix_from_py(m),
2794 JM_rect_from_py(area),
2795 mupdf.FzCookie(),
2796 )
2797
2798 if g_use_extra:
2799 extra_FzDocument_insert_pdf = extra.FzDocument_insert_pdf
2800
2801
2802 class Document:
2803
2804 def __contains__(self, loc) -> bool:
2805 if type(loc) is int:
2806 if loc < self.page_count:
2807 return True
2808 return False
2809 if type(loc) not in (tuple, list) or len(loc) != 2:
2810 return False
2811 chapter, pno = loc
2812 if (0
2813 or not isinstance(chapter, int)
2814 or chapter < 0
2815 or chapter >= self.chapter_count
2816 ):
2817 return False
2818 if (0
2819 or not isinstance(pno, int)
2820 or pno < 0
2821 or pno >= self.chapter_page_count(chapter)
2822 ):
2823 return False
2824 return True
2825
2826 def __delitem__(self, i)->None:
2827 if not self.is_pdf:
2828 raise ValueError("is no PDF")
2829 if type(i) is int:
2830 return self.delete_page(i)
2831 if type(i) in (list, tuple, range):
2832 return self.delete_pages(i)
2833 if type(i) is not slice:
2834 raise ValueError("bad argument type")
2835 pc = self.page_count
2836 start = i.start if i.start else 0
2837 stop = i.stop if i.stop else pc
2838 step = i.step if i.step else 1
2839 while start < 0:
2840 start += pc
2841 if start >= pc:
2842 raise ValueError("bad page number(s)")
2843 while stop < 0:
2844 stop += pc
2845 if stop > pc:
2846 raise ValueError("bad page number(s)")
2847 return self.delete_pages(range(start, stop, step))
2848
2849 def __enter__(self):
2850 return self
2851
2852 def __exit__(self, *args):
2853 self.close()
2854
2855 @typing.overload
2856 def __getitem__(self, i: int = 0) -> Page:
2857 ...
2858
2859 if sys.version_info >= (3, 9):
2860 @typing.overload
2861 def __getitem__(self, i: slice) -> list[Page]:
2862 ...
2863
2864 @typing.overload
2865 def __getitem__(self, i: tuple[int, int]) -> Page:
2866 ...
2867
2868 def __getitem__(self, i=0):
2869 if isinstance(i, slice):
2870 return [self[j] for j in range(*i.indices(len(self)))]
2871 assert isinstance(i, int) or (isinstance(i, tuple) and len(i) == 2 and all(isinstance(x, int) for x in i)), \
2872 f'Invalid item number: {i=}.'
2873 if i not in self:
2874 raise IndexError(f"page {i} not in document")
2875 return self.load_page(i)
2876
2877 def __init__(self, filename=None, stream=None, filetype=None, rect=None, width=0, height=0, fontsize=11):
2878 """Creates a document. Use 'open' as a synonym.
2879
2880 Notes:
2881 Basic usages:
2882 open() - new PDF document
2883 open(filename) - string or pathlib.Path, must have supported
2884 file extension.
2885 open(type, buffer) - type: valid extension, buffer: bytes object.
2886 open(stream=buffer, filetype=type) - keyword version of previous.
2887 open(filename, fileype=type) - filename with unrecognized extension.
2888 rect, width, height, fontsize: layout reflowable document
2889 on open (e.g. EPUB). Ignored if n/a.
2890 """
2891 # We temporarily set JM_mupdf_show_errors=0 while we are constructing,
2892 # then restore its original value in a `finally:` block.
2893 #
2894 global JM_mupdf_show_errors
2895 JM_mupdf_show_errors_old = JM_mupdf_show_errors
2896 JM_mupdf_show_errors = 0
2897
2898 try:
2899 self.is_closed = False
2900 self.is_encrypted = False
2901 self.is_encrypted = False
2902 self.metadata = None
2903 self.FontInfos = []
2904 self.Graftmaps = {}
2905 self.ShownPages = {}
2906 self.InsertedImages = {}
2907 self._page_refs = weakref.WeakValueDictionary()
2908 if isinstance(filename, mupdf.PdfDocument):
2909 pdf_document = filename
2910 self.this = pdf_document
2911 self.this_is_pdf = True
2912 return
2913
2914 w = width
2915 h = height
2916 r = JM_rect_from_py(rect)
2917 if not mupdf.fz_is_infinite_rect(r):
2918 w = r.x1 - r.x0
2919 h = r.y1 - r.y0
2920
2921 self._name = filename
2922 self.stream = stream
2923
2924 if stream is not None:
2925 if filename is not None and filetype is None:
2926 # 2025-05-06: Use <filename> as the filetype. This is
2927 # reversing precedence - we used to use <filename> if both
2928 # were set.
2929 filetype = filename
2930 if isinstance(stream, (bytes, memoryview)):
2931 pass
2932 elif isinstance(stream, bytearray):
2933 stream = bytes(stream)
2934 elif isinstance(stream, io.BytesIO):
2935 stream = stream.getvalue()
2936 else:
2937 raise TypeError(f"bad stream: {type(stream)=}.")
2938 self.stream = stream
2939
2940 assert isinstance(stream, (bytes, memoryview))
2941 if len(stream) == 0:
2942 # MuPDF raise an exception for this but also generates
2943 # warnings, which is not very helpful for us. So instead we
2944 # raise a specific exception.
2945 raise EmptyFileError('Cannot open empty stream.')
2946
2947 stream2 = mupdf.fz_open_memory(mupdf.python_buffer_data(stream), len(stream))
2948 try:
2949 doc = mupdf.fz_open_document_with_stream(filetype if filetype else '', stream2)
2950 except Exception as e:
2951 if g_exceptions_verbose > 1: exception_info()
2952 raise FileDataError('Failed to open stream') from e
2953
2954 elif filename:
2955 assert not stream
2956 if isinstance(filename, str):
2957 pass
2958 elif hasattr(filename, "absolute"):
2959 filename = str(filename)
2960 elif hasattr(filename, "name"):
2961 filename = filename.name
2962 else:
2963 raise TypeError(f"bad filename: {type(filename)=} {filename=}.")
2964 self._name = filename
2965
2966 # Generate our own specific exceptions. This avoids MuPDF
2967 # generating warnings etc.
2968 if not os.path.exists(filename):
2969 raise FileNotFoundError(f"no such file: '{filename}'")
2970 elif not os.path.isfile(filename):
2971 raise FileDataError(f"'{filename}' is no file")
2972 elif os.path.getsize(filename) == 0:
2973 raise EmptyFileError(f'Cannot open empty file: {filename=}.')
2974
2975 if filetype:
2976 # Override the type implied by <filename>. MuPDF does not
2977 # have a way to do this directly so we open via a stream.
2978 try:
2979 fz_stream = mupdf.fz_open_file(filename)
2980 doc = mupdf.fz_open_document_with_stream(filetype, fz_stream)
2981 except Exception as e:
2982 if g_exceptions_verbose > 1: exception_info()
2983 raise FileDataError(f'Failed to open file {filename!r} as type {filetype!r}.') from e
2984 else:
2985 try:
2986 doc = mupdf.fz_open_document(filename)
2987 except Exception as e:
2988 if g_exceptions_verbose > 1: exception_info()
2989 raise FileDataError(f'Failed to open file {filename!r}.') from e
2990
2991 else:
2992 pdf = mupdf.PdfDocument()
2993 doc = mupdf.FzDocument(pdf)
2994
2995 if w > 0 and h > 0:
2996 mupdf.fz_layout_document(doc, w, h, fontsize)
2997 elif mupdf.fz_is_document_reflowable(doc):
2998 mupdf.fz_layout_document(doc, 400, 600, 11)
2999
3000 self.this = doc
3001
3002 # fixme: not sure where self.thisown gets initialised in PyMuPDF.
3003 #
3004 self.thisown = True
3005
3006 if self.thisown:
3007 self._graft_id = TOOLS.gen_id()
3008 if self.needs_pass:
3009 self.is_encrypted = True
3010 else: # we won't init until doc is decrypted
3011 self.init_doc()
3012 # the following hack detects invalid/empty SVG files, which else may lead
3013 # to interpreter crashes
3014 if filename and filename.lower().endswith("svg") or filetype and "svg" in filetype.lower():
3015 try:
3016 _ = self.convert_to_pdf() # this seems to always work
3017 except Exception as e:
3018 if g_exceptions_verbose > 1: exception_info()
3019 raise FileDataError("cannot open broken document") from e
3020
3021 if g_use_extra:
3022 self.this_is_pdf = isinstance( self.this, mupdf.PdfDocument)
3023 if self.this_is_pdf:
3024 self.page_count2 = extra.page_count_pdf
3025 else:
3026 self.page_count2 = extra.page_count_fz
3027 finally:
3028 JM_mupdf_show_errors = JM_mupdf_show_errors_old
3029
3030 def __len__(self) -> int:
3031 return self.page_count
3032
3033 def __repr__(self) -> str:
3034 m = "closed " if self.is_closed else ""
3035 if self.stream is None:
3036 if self.name == "":
3037 return m + "Document(<new PDF, doc# %i>)" % self._graft_id
3038 return m + "Document('%s')" % (self.name,)
3039 return m + "Document('%s', <memory, doc# %i>)" % (self.name, self._graft_id)
3040
3041 def _addFormFont(self, name, font):
3042 """Add new form font."""
3043 if self.is_closed or self.is_encrypted:
3044 raise ValueError("document closed or encrypted")
3045 pdf = _as_pdf_document(self, required=0)
3046 if not pdf.m_internal:
3047 return
3048 fonts = mupdf.pdf_dict_getl(
3049 mupdf.pdf_trailer( pdf),
3050 PDF_NAME('Root'),
3051 PDF_NAME('AcroForm'),
3052 PDF_NAME('DR'),
3053 PDF_NAME('Font'),
3054 )
3055 if not fonts.m_internal or not mupdf.pdf_is_dict( fonts):
3056 raise RuntimeError( "PDF has no form fonts yet")
3057 k = mupdf.pdf_new_name( name)
3058 v = JM_pdf_obj_from_str( pdf, font)
3059 mupdf.pdf_dict_put( fonts, k, v)
3060
3061 def _delToC(self):
3062 """Delete the TOC."""
3063 if self.is_closed or self.is_encrypted:
3064 raise ValueError("document closed or encrypted")
3065 xrefs = [] # create Python list
3066 pdf = _as_pdf_document(self, required=0)
3067 if not pdf.m_internal:
3068 return xrefs # not a pdf
3069 # get the main root
3070 root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
3071 # get the outline root
3072 olroot = mupdf.pdf_dict_get(root, PDF_NAME('Outlines'))
3073 if not olroot.m_internal:
3074 return xrefs # no outlines or some problem
3075
3076 first = mupdf.pdf_dict_get(olroot, PDF_NAME('First')) # first outline
3077
3078 xrefs = JM_outline_xrefs(first, xrefs)
3079 xref_count = len(xrefs)
3080
3081 olroot_xref = mupdf.pdf_to_num(olroot) # delete OL root
3082 mupdf.pdf_delete_object(pdf, olroot_xref) # delete OL root
3083 mupdf.pdf_dict_del(root, PDF_NAME('Outlines')) # delete OL root
3084
3085 for i in range(xref_count):
3086 _, xref = JM_INT_ITEM(xrefs, i)
3087 mupdf.pdf_delete_object(pdf, xref) # delete outline item
3088 xrefs.append(olroot_xref)
3089 val = xrefs
3090 self.init_doc()
3091 return val
3092
3093 def _delete_page(self, pno):
3094 pdf = _as_pdf_document(self)
3095 mupdf.pdf_delete_page( pdf, pno)
3096 if pdf.m_internal.rev_page_map:
3097 mupdf.ll_pdf_drop_page_tree( pdf.m_internal)
3098
3099 def _deleteObject(self, xref):
3100 """Delete object."""
3101 pdf = _as_pdf_document(self)
3102 if not _INRANGE(xref, 1, mupdf.pdf_xref_len(pdf)-1):
3103 raise ValueError( MSG_BAD_XREF)
3104 mupdf.pdf_delete_object(pdf, xref)
3105
3106 def _embeddedFileGet(self, idx):
3107 pdf = _as_pdf_document(self)
3108 names = mupdf.pdf_dict_getl(
3109 mupdf.pdf_trailer(pdf),
3110 PDF_NAME('Root'),
3111 PDF_NAME('Names'),
3112 PDF_NAME('EmbeddedFiles'),
3113 PDF_NAME('Names'),
3114 )
3115 entry = mupdf.pdf_array_get(names, 2*idx+1)
3116 filespec = mupdf.pdf_dict_getl(entry, PDF_NAME('EF'), PDF_NAME('F'))
3117 buf = mupdf.pdf_load_stream(filespec)
3118 cont = JM_BinFromBuffer(buf)
3119 return cont
3120
3121 def _embeddedFileIndex(self, item: typing.Union[int, str]) -> int:
3122 filenames = self.embfile_names()
3123 msg = "'%s' not in EmbeddedFiles array." % str(item)
3124 if item in filenames:
3125 idx = filenames.index(item)
3126 elif item in range(len(filenames)):
3127 idx = item
3128 else:
3129 raise ValueError(msg)
3130 return idx
3131
3132 def _embfile_add(self, name, buffer_, filename=None, ufilename=None, desc=None):
3133 pdf = _as_pdf_document(self)
3134 data = JM_BufferFromBytes(buffer_)
3135 if not data.m_internal:
3136 raise TypeError( MSG_BAD_BUFFER)
3137
3138 names = mupdf.pdf_dict_getl(
3139 mupdf.pdf_trailer(pdf),
3140 PDF_NAME('Root'),
3141 PDF_NAME('Names'),
3142 PDF_NAME('EmbeddedFiles'),
3143 PDF_NAME('Names'),
3144 )
3145 if not mupdf.pdf_is_array(names):
3146 root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
3147 names = mupdf.pdf_new_array(pdf, 6) # an even number!
3148 mupdf.pdf_dict_putl(
3149 root,
3150 names,
3151 PDF_NAME('Names'),
3152 PDF_NAME('EmbeddedFiles'),
3153 PDF_NAME('Names'),
3154 )
3155 fileentry = JM_embed_file(pdf, data, filename, ufilename, desc, 1)
3156 xref = mupdf.pdf_to_num(
3157 mupdf.pdf_dict_getl(fileentry, PDF_NAME('EF'), PDF_NAME('F'))
3158 )
3159 mupdf.pdf_array_push(names, mupdf.pdf_new_text_string(name))
3160 mupdf.pdf_array_push(names, fileentry)
3161 return xref
3162
3163 def _embfile_del(self, idx):
3164 pdf = _as_pdf_document(self)
3165 names = mupdf.pdf_dict_getl(
3166 mupdf.pdf_trailer(pdf),
3167 PDF_NAME('Root'),
3168 PDF_NAME('Names'),
3169 PDF_NAME('EmbeddedFiles'),
3170 PDF_NAME('Names'),
3171 )
3172 mupdf.pdf_array_delete(names, idx + 1)
3173 mupdf.pdf_array_delete(names, idx)
3174
3175 def _embfile_info(self, idx, infodict):
3176 pdf = _as_pdf_document(self)
3177 xref = 0
3178 ci_xref=0
3179
3180 trailer = mupdf.pdf_trailer(pdf)
3181
3182 names = mupdf.pdf_dict_getl(
3183 trailer,
3184 PDF_NAME('Root'),
3185 PDF_NAME('Names'),
3186 PDF_NAME('EmbeddedFiles'),
3187 PDF_NAME('Names'),
3188 )
3189 o = mupdf.pdf_array_get(names, 2*idx+1)
3190 ci = mupdf.pdf_dict_get(o, PDF_NAME('CI'))
3191 if ci.m_internal:
3192 ci_xref = mupdf.pdf_to_num(ci)
3193 infodict["collection"] = ci_xref
3194 name = mupdf.pdf_to_text_string(mupdf.pdf_dict_get(o, PDF_NAME('F')))
3195 infodict[dictkey_filename] = JM_EscapeStrFromStr(name)
3196
3197 name = mupdf.pdf_to_text_string(mupdf.pdf_dict_get(o, PDF_NAME('UF')))
3198 infodict[dictkey_ufilename] = JM_EscapeStrFromStr(name)
3199
3200 name = mupdf.pdf_to_text_string(mupdf.pdf_dict_get(o, PDF_NAME('Desc')))
3201 infodict[dictkey_descr] = JM_UnicodeFromStr(name)
3202
3203 len_ = -1
3204 DL = -1
3205 fileentry = mupdf.pdf_dict_getl(o, PDF_NAME('EF'), PDF_NAME('F'))
3206 xref = mupdf.pdf_to_num(fileentry)
3207 o = mupdf.pdf_dict_get(fileentry, PDF_NAME('Length'))
3208 if o.m_internal:
3209 len_ = mupdf.pdf_to_int(o)
3210
3211 o = mupdf.pdf_dict_get(fileentry, PDF_NAME('DL'))
3212 if o.m_internal:
3213 DL = mupdf.pdf_to_int(o)
3214 else:
3215 o = mupdf.pdf_dict_getl(fileentry, PDF_NAME('Params'), PDF_NAME('Size'))
3216 if o.m_internal:
3217 DL = mupdf.pdf_to_int(o)
3218 infodict[dictkey_size] = DL
3219 infodict[dictkey_length] = len_
3220 return xref
3221
3222 def _embfile_names(self, namelist):
3223 """Get list of embedded file names."""
3224 pdf = _as_pdf_document(self)
3225 names = mupdf.pdf_dict_getl(
3226 mupdf.pdf_trailer(pdf),
3227 PDF_NAME('Root'),
3228 PDF_NAME('Names'),
3229 PDF_NAME('EmbeddedFiles'),
3230 PDF_NAME('Names'),
3231 )
3232 if mupdf.pdf_is_array(names):
3233 n = mupdf.pdf_array_len(names)
3234 for i in range(0, n, 2):
3235 val = JM_EscapeStrFromStr(
3236 mupdf.pdf_to_text_string(
3237 mupdf.pdf_array_get(names, i)
3238 )
3239 )
3240 namelist.append(val)
3241
3242 def _embfile_upd(self, idx, buffer_=None, filename=None, ufilename=None, desc=None):
3243 pdf = _as_pdf_document(self)
3244 xref = 0
3245 names = mupdf.pdf_dict_getl(
3246 mupdf.pdf_trailer(pdf),
3247 PDF_NAME('Root'),
3248 PDF_NAME('Names'),
3249 PDF_NAME('EmbeddedFiles'),
3250 PDF_NAME('Names'),
3251 )
3252 entry = mupdf.pdf_array_get(names, 2*idx+1)
3253
3254 filespec = mupdf.pdf_dict_getl(entry, PDF_NAME('EF'), PDF_NAME('F'))
3255 if not filespec.m_internal:
3256 RAISEPY( "bad PDF: no /EF object", JM_Exc_FileDataError)
3257 res = JM_BufferFromBytes(buffer_)
3258 if buffer_ and buffer_.m_internal and not res.m_internal:
3259 raise TypeError( MSG_BAD_BUFFER)
3260 if res.m_internal and buffer_ and buffer_.m_internal:
3261 JM_update_stream(pdf, filespec, res, 1)
3262 # adjust /DL and /Size parameters
3263 len, _ = mupdf.fz_buffer_storage(res)
3264 l = mupdf.pdf_new_int(len)
3265 mupdf.pdf_dict_put(filespec, PDF_NAME('DL'), l)
3266 mupdf.pdf_dict_putl(filespec, l, PDF_NAME('Params'), PDF_NAME('Size'))
3267 xref = mupdf.pdf_to_num(filespec)
3268 if filename:
3269 mupdf.pdf_dict_put_text_string(entry, PDF_NAME('F'), filename)
3270
3271 if ufilename:
3272 mupdf.pdf_dict_put_text_string(entry, PDF_NAME('UF'), ufilename)
3273
3274 if desc:
3275 mupdf.pdf_dict_put_text_string(entry, PDF_NAME('Desc'), desc)
3276 return xref
3277
3278 def _extend_toc_items(self, items):
3279 """Add color info to all items of an extended TOC list."""
3280 if self.is_closed:
3281 raise ValueError("document closed")
3282 if g_use_extra:
3283 return extra.Document_extend_toc_items( self.this, items)
3284 pdf = _as_pdf_document(self)
3285 zoom = "zoom"
3286 bold = "bold"
3287 italic = "italic"
3288 collapse = "collapse"
3289
3290 root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
3291 if not root.m_internal:
3292 return
3293 olroot = mupdf.pdf_dict_get(root, PDF_NAME('Outlines'))
3294 if not olroot.m_internal:
3295 return
3296 first = mupdf.pdf_dict_get(olroot, PDF_NAME('First'))
3297 if not first.m_internal:
3298 return
3299 xrefs = []
3300 xrefs = JM_outline_xrefs(first, xrefs)
3301 n = len(xrefs)
3302 m = len(items)
3303 if not n:
3304 return
3305 if n != m:
3306 raise IndexError( "internal error finding outline xrefs")
3307
3308 # update all TOC item dictionaries
3309 for i in range(n):
3310 xref = int(xrefs[i])
3311 item = items[i]
3312 itemdict = item[3]
3313 if not isinstance(itemdict, dict):
3314 raise ValueError( "need non-simple TOC format")
3315 itemdict[dictkey_xref] = xrefs[i]
3316 bm = mupdf.pdf_load_object(pdf, xref)
3317 flags = mupdf.pdf_to_int( mupdf.pdf_dict_get(bm, PDF_NAME('F')))
3318 if flags == 1:
3319 itemdict[italic] = True
3320 elif flags == 2:
3321 itemdict[bold] = True
3322 elif flags == 3:
3323 itemdict[italic] = True
3324 itemdict[bold] = True
3325 count = mupdf.pdf_to_int( mupdf.pdf_dict_get(bm, PDF_NAME('Count')))
3326 if count < 0:
3327 itemdict[collapse] = True
3328 elif count > 0:
3329 itemdict[collapse] = False
3330 col = mupdf.pdf_dict_get(bm, PDF_NAME('C'))
3331 if mupdf.pdf_is_array(col) and mupdf.pdf_array_len(col) == 3:
3332 color = (
3333 mupdf.pdf_to_real(mupdf.pdf_array_get(col, 0)),
3334 mupdf.pdf_to_real(mupdf.pdf_array_get(col, 1)),
3335 mupdf.pdf_to_real(mupdf.pdf_array_get(col, 2)),
3336 )
3337 itemdict[dictkey_color] = color
3338 z=0
3339 obj = mupdf.pdf_dict_get(bm, PDF_NAME('Dest'))
3340 if not obj.m_internal or not mupdf.pdf_is_array(obj):
3341 obj = mupdf.pdf_dict_getl(bm, PDF_NAME('A'), PDF_NAME('D'))
3342 if mupdf.pdf_is_array(obj) and mupdf.pdf_array_len(obj) == 5:
3343 z = mupdf.pdf_to_real(mupdf.pdf_array_get(obj, 4))
3344 itemdict[zoom] = float(z)
3345 item[3] = itemdict
3346 items[i] = item
3347
3348 def _forget_page(self, page: Page):
3349 """Remove a page from document page dict."""
3350 pid = id(page)
3351 if pid in self._page_refs:
3352 #self._page_refs[pid] = None
3353 del self._page_refs[pid]
3354
3355 def _get_char_widths(self, xref: int, bfname: str, ext: str, ordering: int, limit: int, idx: int = 0):
3356 pdf = _as_pdf_document(self)
3357 mylimit = limit
3358 if mylimit < 256:
3359 mylimit = 256
3360 if ordering >= 0:
3361 data, size, index = mupdf.fz_lookup_cjk_font(ordering)
3362 font = mupdf.fz_new_font_from_memory(None, data, size, index, 0)
3363 else:
3364 data, size = mupdf.fz_lookup_base14_font(bfname)
3365 if data:
3366 font = mupdf.fz_new_font_from_memory(bfname, data, size, 0, 0)
3367 else:
3368 buf = JM_get_fontbuffer(pdf, xref)
3369 if not buf.m_internal:
3370 raise Exception("font at xref %d is not supported" % xref)
3371
3372 font = mupdf.fz_new_font_from_buffer(None, buf, idx, 0)
3373 wlist = []
3374 for i in range(mylimit):
3375 glyph = mupdf.fz_encode_character(font, i)
3376 adv = mupdf.fz_advance_glyph(font, glyph, 0)
3377 if ordering >= 0:
3378 glyph = i
3379 if glyph > 0:
3380 wlist.append( (glyph, adv))
3381 else:
3382 wlist.append( (glyph, 0.0))
3383 return wlist
3384
3385 def _get_page_labels(self):
3386 pdf = _as_pdf_document(self)
3387 rc = []
3388 pagelabels = mupdf.pdf_new_name("PageLabels")
3389 obj = mupdf.pdf_dict_getl( mupdf.pdf_trailer(pdf), PDF_NAME('Root'), pagelabels)
3390 if not obj.m_internal:
3391 return rc
3392 # simple case: direct /Nums object
3393 nums = mupdf.pdf_resolve_indirect( mupdf.pdf_dict_get( obj, PDF_NAME('Nums')))
3394 if nums.m_internal:
3395 JM_get_page_labels(rc, nums)
3396 return rc
3397 # case: /Kids/Nums
3398 nums = mupdf.pdf_resolve_indirect( mupdf.pdf_dict_getl(obj, PDF_NAME('Kids'), PDF_NAME('Nums')))
3399 if nums.m_internal:
3400 JM_get_page_labels(rc, nums)
3401 return rc
3402 # case: /Kids is an array of multiple /Nums
3403 kids = mupdf.pdf_resolve_indirect( mupdf.pdf_dict_get( obj, PDF_NAME('Kids')))
3404 if not kids.m_internal or not mupdf.pdf_is_array(kids):
3405 return rc
3406 n = mupdf.pdf_array_len(kids)
3407 for i in range(n):
3408 nums = mupdf.pdf_resolve_indirect(
3409 mupdf.pdf_dict_get(
3410 mupdf.pdf_array_get(kids, i),
3411 PDF_NAME('Nums'),
3412 )
3413 )
3414 JM_get_page_labels(rc, nums)
3415 return rc
3416
3417 def _getMetadata(self, key):
3418 """Get metadata."""
3419 try:
3420 return mupdf.fz_lookup_metadata2( self.this, key)
3421 except Exception:
3422 if g_exceptions_verbose > 2: exception_info()
3423 return ''
3424
3425 def _getOLRootNumber(self):
3426 """Get xref of Outline Root, create it if missing."""
3427 if self.is_closed or self.is_encrypted:
3428 raise ValueError("document closed or encrypted")
3429 pdf = _as_pdf_document(self)
3430 # get main root
3431 root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
3432 # get outline root
3433 olroot = mupdf.pdf_dict_get( root, PDF_NAME('Outlines'))
3434 if not olroot.m_internal:
3435 olroot = mupdf.pdf_new_dict( pdf, 4)
3436 mupdf.pdf_dict_put( olroot, PDF_NAME('Type'), PDF_NAME('Outlines'))
3437 ind_obj = mupdf.pdf_add_object( pdf, olroot)
3438 mupdf.pdf_dict_put( root, PDF_NAME('Outlines'), ind_obj)
3439 olroot = mupdf.pdf_dict_get( root, PDF_NAME('Outlines'))
3440 return mupdf.pdf_to_num( olroot)
3441
3442 def _getPDFfileid(self):
3443 """Get PDF file id."""
3444 pdf = _as_pdf_document(self, required=0)
3445 if not pdf.m_internal:
3446 return
3447 idlist = []
3448 identity = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('ID'))
3449 if identity.m_internal:
3450 n = mupdf.pdf_array_len(identity)
3451 for i in range(n):
3452 o = mupdf.pdf_array_get(identity, i)
3453 text = mupdf.pdf_to_text_string(o)
3454 hex_ = binascii.hexlify(text)
3455 idlist.append(hex_)
3456 return idlist
3457
3458 def _getPageInfo(self, pno, what):
3459 """List fonts, images, XObjects used on a page."""
3460 if self.is_closed or self.is_encrypted:
3461 raise ValueError("document closed or encrypted")
3462 doc = self.this
3463 pageCount = mupdf.pdf_count_pages(doc) if isinstance(doc, mupdf.PdfDocument) else mupdf.fz_count_pages(doc)
3464 n = pno # pno < 0 is allowed
3465 while n < 0:
3466 n += pageCount # make it non-negative
3467 if n >= pageCount:
3468 raise ValueError( MSG_BAD_PAGENO)
3469 pdf = _as_pdf_document(self)
3470 pageref = mupdf.pdf_lookup_page_obj(pdf, n)
3471 rsrc = mupdf.pdf_dict_get_inheritable(pageref, mupdf.PDF_ENUM_NAME_Resources)
3472 liste = []
3473 tracer = []
3474 if rsrc.m_internal:
3475 JM_scan_resources(pdf, rsrc, liste, what, 0, tracer)
3476 return liste
3477
3478 def _insert_font(self, fontfile=None, fontbuffer=None):
3479 '''
3480 Utility: insert font from file or binary.
3481 '''
3482 pdf = _as_pdf_document(self)
3483 if not fontfile and not fontbuffer:
3484 raise ValueError( MSG_FILE_OR_BUFFER)
3485 value = JM_insert_font(pdf, None, fontfile, fontbuffer, 0, 0, 0, 0, 0, -1)
3486 return value
3487
3488 def _loadOutline(self):
3489 """Load first outline."""
3490 doc = self.this
3491 assert isinstance( doc, mupdf.FzDocument)
3492 try:
3493 ol = mupdf.fz_load_outline( doc)
3494 except Exception:
3495 if g_exceptions_verbose > 1: exception_info()
3496 return
3497 return Outline( ol)
3498
3499 def _make_page_map(self):
3500 """Make an array page number -> page object."""
3501 if self.is_closed:
3502 raise ValueError("document closed")
3503 assert 0, f'_make_page_map() is no-op'
3504
3505 def _move_copy_page(self, pno, nb, before, copy):
3506 """Move or copy a PDF page reference."""
3507 pdf = _as_pdf_document(self)
3508 same = 0
3509 # get the two page objects -----------------------------------
3510 # locate the /Kids arrays and indices in each
3511
3512 page1, parent1, i1 = pdf_lookup_page_loc( pdf, pno)
3513
3514 kids1 = mupdf.pdf_dict_get( parent1, PDF_NAME('Kids'))
3515
3516 page2, parent2, i2 = pdf_lookup_page_loc( pdf, nb)
3517 kids2 = mupdf.pdf_dict_get( parent2, PDF_NAME('Kids'))
3518 if before: # calc index of source page in target /Kids
3519 pos = i2
3520 else:
3521 pos = i2 + 1
3522
3523 # same /Kids array? ------------------------------------------
3524 same = mupdf.pdf_objcmp( kids1, kids2)
3525
3526 # put source page in target /Kids array ----------------------
3527 if not copy and same != 0: # update parent in page object
3528 mupdf.pdf_dict_put( page1, PDF_NAME('Parent'), parent2)
3529 mupdf.pdf_array_insert( kids2, page1, pos)
3530
3531 if same != 0: # different /Kids arrays ----------------------
3532 parent = parent2
3533 while parent.m_internal: # increase /Count objects in parents
3534 count = mupdf.pdf_dict_get_int( parent, PDF_NAME('Count'))
3535 mupdf.pdf_dict_put_int( parent, PDF_NAME('Count'), count + 1)
3536 parent = mupdf.pdf_dict_get( parent, PDF_NAME('Parent'))
3537 if not copy: # delete original item
3538 mupdf.pdf_array_delete( kids1, i1)
3539 parent = parent1
3540 while parent.m_internal: # decrease /Count objects in parents
3541 count = mupdf.pdf_dict_get_int( parent, PDF_NAME('Count'))
3542 mupdf.pdf_dict_put_int( parent, PDF_NAME('Count'), count - 1)
3543 parent = mupdf.pdf_dict_get( parent, PDF_NAME('Parent'))
3544 else: # same /Kids array
3545 if copy: # source page is copied
3546 parent = parent2
3547 while parent.m_internal: # increase /Count object in parents
3548 count = mupdf.pdf_dict_get_int( parent, PDF_NAME('Count'))
3549 mupdf.pdf_dict_put_int( parent, PDF_NAME('Count'), count + 1)
3550 parent = mupdf.pdf_dict_get( parent, PDF_NAME('Parent'))
3551 else:
3552 if i1 < pos:
3553 mupdf.pdf_array_delete( kids1, i1)
3554 else:
3555 mupdf.pdf_array_delete( kids1, i1 + 1)
3556 if pdf.m_internal.rev_page_map: # page map no longer valid: drop it
3557 mupdf.ll_pdf_drop_page_tree( pdf.m_internal)
3558
3559 self._reset_page_refs()
3560
3561 def _newPage(self, pno=-1, width=595, height=842):
3562 """Make a new PDF page."""
3563 if self.is_closed or self.is_encrypted:
3564 raise ValueError("document closed or encrypted")
3565 if g_use_extra:
3566 extra._newPage( self.this, pno, width, height)
3567 else:
3568 pdf = _as_pdf_document(self)
3569 mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
3570 mediabox.x1 = width
3571 mediabox.y1 = height
3572 contents = mupdf.FzBuffer()
3573 if pno < -1:
3574 raise ValueError( MSG_BAD_PAGENO)
3575 # create /Resources and /Contents objects
3576 #resources = pdf.add_object(pdf.new_dict(1))
3577 resources = mupdf.pdf_add_new_dict(pdf, 1)
3578 page_obj = mupdf.pdf_add_page( pdf, mediabox, 0, resources, contents)
3579 mupdf.pdf_insert_page( pdf, pno, page_obj)
3580 # fixme: pdf->dirty = 1;
3581
3582 self._reset_page_refs()
3583 return self[pno]
3584
3585 def _remove_links_to(self, numbers):
3586 pdf = _as_pdf_document(self)
3587 _remove_dest_range(pdf, numbers)
3588
3589 def _remove_toc_item(self, xref):
3590 # "remove" bookmark by letting it point to nowhere
3591 pdf = _as_pdf_document(self)
3592 item = mupdf.pdf_new_indirect(pdf, xref, 0)
3593 mupdf.pdf_dict_del( item, PDF_NAME('Dest'))
3594 mupdf.pdf_dict_del( item, PDF_NAME('A'))
3595 color = mupdf.pdf_new_array( pdf, 3)
3596 for i in range(3):
3597 mupdf.pdf_array_push_real( color, 0.8)
3598 mupdf.pdf_dict_put( item, PDF_NAME('C'), color)
3599
3600 def _reset_page_refs(self):
3601 """Invalidate all pages in document dictionary."""
3602 if getattr(self, "is_closed", True):
3603 return
3604 pages = [p for p in self._page_refs.values()]
3605 for page in pages:
3606 if page:
3607 page._erase()
3608 page = None
3609 self._page_refs.clear()
3610
3611 def _set_page_labels(self, labels):
3612 pdf = _as_pdf_document(self)
3613 pagelabels = mupdf.pdf_new_name("PageLabels")
3614 root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
3615 mupdf.pdf_dict_del(root, pagelabels)
3616 mupdf.pdf_dict_putl(root, mupdf.pdf_new_array(pdf, 0), pagelabels, PDF_NAME('Nums'))
3617
3618 xref = self.pdf_catalog()
3619 text = self.xref_object(xref, compressed=True)
3620 text = text.replace("/Nums[]", "/Nums[%s]" % labels)
3621 self.update_object(xref, text)
3622
3623 def _update_toc_item(self, xref, action=None, title=None, flags=0, collapse=None, color=None):
3624 '''
3625 "update" bookmark by letting it point to nowhere
3626 '''
3627 pdf = _as_pdf_document(self)
3628 item = mupdf.pdf_new_indirect( pdf, xref, 0)
3629 if title:
3630 mupdf.pdf_dict_put_text_string( item, PDF_NAME('Title'), title)
3631 if action:
3632 mupdf.pdf_dict_del( item, PDF_NAME('Dest'))
3633 obj = JM_pdf_obj_from_str( pdf, action)
3634 mupdf.pdf_dict_put( item, PDF_NAME('A'), obj)
3635 mupdf.pdf_dict_put_int( item, PDF_NAME('F'), flags)
3636 if color:
3637 c = mupdf.pdf_new_array( pdf, 3)
3638 for i in range(3):
3639 f = color[i]
3640 mupdf.pdf_array_push_real( c, f)
3641 mupdf.pdf_dict_put( item, PDF_NAME('C'), c)
3642 elif color is not None:
3643 mupdf.pdf_dict_del( item, PDF_NAME('C'))
3644 if collapse is not None:
3645 if mupdf.pdf_dict_get( item, PDF_NAME('Count')).m_internal:
3646 i = mupdf.pdf_dict_get_int( item, PDF_NAME('Count'))
3647 if (i < 0 and collapse is False) or (i > 0 and collapse is True):
3648 i = i * (-1)
3649 mupdf.pdf_dict_put_int( item, PDF_NAME('Count'), i)
3650
3651 @property
3652 def FormFonts(self):
3653 """Get list of field font resource names."""
3654 pdf = _as_pdf_document(self, required=0)
3655 if not pdf.m_internal:
3656 return
3657 fonts = mupdf.pdf_dict_getl(
3658 mupdf.pdf_trailer(pdf),
3659 PDF_NAME('Root'),
3660 PDF_NAME('AcroForm'),
3661 PDF_NAME('DR'),
3662 PDF_NAME('Font'),
3663 )
3664 liste = list()
3665 if fonts.m_internal and mupdf.pdf_is_dict(fonts): # fonts exist
3666 n = mupdf.pdf_dict_len(fonts)
3667 for i in range(n):
3668 f = mupdf.pdf_dict_get_key(fonts, i)
3669 liste.append(JM_UnicodeFromStr(mupdf.pdf_to_name(f)))
3670 return liste
3671
3672 def add_layer(self, name, creator=None, on=None):
3673 """Add a new OC layer."""
3674 pdf = _as_pdf_document(self)
3675 JM_add_layer_config( pdf, name, creator, on)
3676 mupdf.ll_pdf_read_ocg( pdf.m_internal)
3677
3678 def add_ocg(self, name, config=-1, on=1, intent=None, usage=None):
3679 """Add new optional content group."""
3680 xref = 0
3681 pdf = _as_pdf_document(self)
3682
3683 # make the OCG
3684 ocg = mupdf.pdf_add_new_dict(pdf, 3)
3685 mupdf.pdf_dict_put(ocg, PDF_NAME('Type'), PDF_NAME('OCG'))
3686 mupdf.pdf_dict_put_text_string(ocg, PDF_NAME('Name'), name)
3687 intents = mupdf.pdf_dict_put_array(ocg, PDF_NAME('Intent'), 2)
3688 if not intent:
3689 mupdf.pdf_array_push(intents, PDF_NAME('View'))
3690 elif not isinstance(intent, str):
3691 assert 0, f'fixme: intent is not a str. {type(intent)=} {type=}'
3692 #n = len(intent)
3693 #for i in range(n):
3694 # item = intent[i]
3695 # c = JM_StrAsChar(item);
3696 # if (c) {
3697 # pdf_array_push(gctx, intents, pdf_new_name(gctx, c));
3698 # }
3699 # Py_DECREF(item);
3700 #}
3701 else:
3702 mupdf.pdf_array_push(intents, mupdf.pdf_new_name(intent))
3703 use_for = mupdf.pdf_dict_put_dict(ocg, PDF_NAME('Usage'), 3)
3704 ci_name = mupdf.pdf_new_name("CreatorInfo")
3705 cre_info = mupdf.pdf_dict_put_dict(use_for, ci_name, 2)
3706 mupdf.pdf_dict_put_text_string(cre_info, PDF_NAME('Creator'), "PyMuPDF")
3707 if usage:
3708 mupdf.pdf_dict_put_name(cre_info, PDF_NAME('Subtype'), usage)
3709 else:
3710 mupdf.pdf_dict_put_name(cre_info, PDF_NAME('Subtype'), "Artwork")
3711 indocg = mupdf.pdf_add_object(pdf, ocg)
3712
3713 # Insert OCG in the right config
3714 ocp = JM_ensure_ocproperties(pdf)
3715 obj = mupdf.pdf_dict_get(ocp, PDF_NAME('OCGs'))
3716 mupdf.pdf_array_push(obj, indocg)
3717
3718 if config > -1:
3719 obj = mupdf.pdf_dict_get(ocp, PDF_NAME('Configs'))
3720 if not mupdf.pdf_is_array(obj):
3721 raise ValueError( MSG_BAD_OC_CONFIG)
3722 cfg = mupdf.pdf_array_get(obj, config)
3723 if not cfg.m_internal:
3724 raise ValueError( MSG_BAD_OC_CONFIG)
3725 else:
3726 cfg = mupdf.pdf_dict_get(ocp, PDF_NAME('D'))
3727
3728 obj = mupdf.pdf_dict_get(cfg, PDF_NAME('Order'))
3729 if not obj.m_internal:
3730 obj = mupdf.pdf_dict_put_array(cfg, PDF_NAME('Order'), 1)
3731 mupdf.pdf_array_push(obj, indocg)
3732 if on:
3733 obj = mupdf.pdf_dict_get(cfg, PDF_NAME('ON'))
3734 if not obj.m_internal:
3735 obj = mupdf.pdf_dict_put_array(cfg, PDF_NAME('ON'), 1)
3736 else:
3737 obj =mupdf.pdf_dict_get(cfg, PDF_NAME('OFF'))
3738 if not obj.m_internal:
3739 obj =mupdf.pdf_dict_put_array(cfg, PDF_NAME('OFF'), 1)
3740 mupdf.pdf_array_push(obj, indocg)
3741
3742 # let MuPDF take note: re-read OCProperties
3743 mupdf.ll_pdf_read_ocg(pdf.m_internal)
3744
3745 xref = mupdf.pdf_to_num(indocg)
3746 return xref
3747
3748 def authenticate(self, password):
3749 """Decrypt document."""
3750 if self.is_closed:
3751 raise ValueError("document closed")
3752 val = mupdf.fz_authenticate_password(self.this, password)
3753 if val: # the doc is decrypted successfully and we init the outline
3754 self.is_encrypted = False
3755 self.is_encrypted = False
3756 self.init_doc()
3757 self.thisown = True
3758 return val
3759
3760 def can_save_incrementally(self):
3761 """Check whether incremental saves are possible."""
3762 pdf = _as_pdf_document(self, required=0)
3763 if not pdf.m_internal:
3764 return False
3765 return mupdf.pdf_can_be_saved_incrementally(pdf)
3766
3767 def bake(self, *, annots: bool = True, widgets: bool = True) -> None:
3768 """Convert annotations or fields to permanent content.
3769
3770 Notes:
3771 Converts annotations or widgets to permanent page content, like
3772 text and vector graphics, as appropriate.
3773 After execution, pages will still look the same, but no longer
3774 have annotations, respectively no fields.
3775 If widgets are selected the PDF will no longer be a Form PDF.
3776
3777 Args:
3778 annots: convert annotations
3779 widgets: convert form fields
3780
3781 """
3782 pdf = _as_pdf_document(self)
3783 mupdf.pdf_bake_document(pdf, int(annots), int(widgets))
3784
3785 @property
3786 def chapter_count(self):
3787 """Number of chapters."""
3788 if self.is_closed:
3789 raise ValueError("document closed")
3790 return mupdf.fz_count_chapters( self.this)
3791
3792 def chapter_page_count(self, chapter):
3793 """Page count of chapter."""
3794 if self.is_closed:
3795 raise ValueError("document closed")
3796 chapters = mupdf.fz_count_chapters( self.this)
3797 if chapter < 0 or chapter >= chapters:
3798 raise ValueError( "bad chapter number")
3799 pages = mupdf.fz_count_chapter_pages( self.this, chapter)
3800 return pages
3801
3802 def close(self):
3803 """Close document."""
3804 if getattr(self, "is_closed", True):
3805 raise ValueError("document closed")
3806 # self._cleanup()
3807 if hasattr(self, "_outline") and self._outline:
3808 self._outline = None
3809 self._reset_page_refs()
3810 #self.metadata = None
3811 #self.stream = None
3812 self.is_closed = True
3813 #self.FontInfos = []
3814 self.Graftmaps = {} # Fixes test_3140().
3815 #self.ShownPages = {}
3816 #self.InsertedImages = {}
3817 #self.this = None
3818 self.this = None
3819
3820 def convert_to_pdf(self, from_page=0, to_page=-1, rotate=0):
3821 """Convert document to a PDF, selecting page range and optional rotation. Output bytes object."""
3822 if self.is_closed or self.is_encrypted:
3823 raise ValueError("document closed or encrypted")
3824 fz_doc = self.this
3825 fp = from_page
3826 tp = to_page
3827 srcCount = mupdf.fz_count_pages(fz_doc)
3828 if fp < 0:
3829 fp = 0
3830 if fp > srcCount - 1:
3831 fp = srcCount - 1
3832 if tp < 0:
3833 tp = srcCount - 1
3834 if tp > srcCount - 1:
3835 tp = srcCount - 1
3836 len0 = len(JM_mupdf_warnings_store)
3837 doc = JM_convert_to_pdf(fz_doc, fp, tp, rotate)
3838 len1 = len(JM_mupdf_warnings_store)
3839 for i in range(len0, len1):
3840 message(f'{JM_mupdf_warnings_store[i]}')
3841 return doc
3842
3843 def copy_page(self, pno: int, to: int =-1):
3844 """Copy a page within a PDF document.
3845
3846 This will only create another reference of the same page object.
3847 Args:
3848 pno: source page number
3849 to: put before this page, '-1' means after last page.
3850 """
3851 if self.is_closed:
3852 raise ValueError("document closed")
3853
3854 page_count = len(self)
3855 if (
3856 pno not in range(page_count)
3857 or to not in range(-1, page_count)
3858 ):
3859 raise ValueError("bad page number(s)")
3860 before = 1
3861 copy = 1
3862 if to == -1:
3863 to = page_count - 1
3864 before = 0
3865
3866 return self._move_copy_page(pno, to, before, copy)
3867
3868 def del_xml_metadata(self):
3869 """Delete XML metadata."""
3870 if self.is_closed or self.is_encrypted:
3871 raise ValueError("document closed or encrypted")
3872 pdf = _as_pdf_document(self)
3873 root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
3874 if root.m_internal:
3875 mupdf.pdf_dict_del( root, PDF_NAME('Metadata'))
3876
3877 def delete_page(self, pno: int =-1):
3878 """ Delete one page from a PDF.
3879 """
3880 return self.delete_pages(pno)
3881
3882 def delete_pages(self, *args, **kw):
3883 """Delete pages from a PDF.
3884
3885 Args:
3886 Either keywords 'from_page'/'to_page', or two integers to
3887 specify the first/last page to delete.
3888 Or a list/tuple/range object, which can contain arbitrary
3889 page numbers.
3890 Or a single integer page number.
3891 """
3892 if not self.is_pdf:
3893 raise ValueError("is no PDF")
3894 if self.is_closed:
3895 raise ValueError("document closed")
3896
3897 page_count = self.page_count # page count of document
3898 f = t = -1
3899 if kw: # check if keywords were used
3900 if args: # then no positional args are allowed
3901 raise ValueError("cannot mix keyword and positional argument")
3902 f = kw.get("from_page", -1) # first page to delete
3903 t = kw.get("to_page", -1) # last page to delete
3904 while f < 0:
3905 f += page_count
3906 while t < 0:
3907 t += page_count
3908 if not f <= t < page_count:
3909 raise ValueError("bad page number(s)")
3910 numbers = tuple(range(f, t + 1))
3911 else:
3912 if len(args) > 2 or args == []:
3913 raise ValueError("need 1 or 2 positional arguments")
3914 if len(args) == 2:
3915 f, t = args
3916 if not (type(f) is int and type(t) is int):
3917 raise ValueError("both arguments must be int")
3918 if f > t:
3919 f, t = t, f
3920 if not f <= t < page_count:
3921 raise ValueError("bad page number(s)")
3922 numbers = tuple(range(f, t + 1))
3923 elif isinstance(args[0], int):
3924 pno = args[0]
3925 while pno < 0:
3926 pno += page_count
3927 numbers = (pno,)
3928 else:
3929 numbers = tuple(args[0])
3930
3931 numbers = list(map(int, set(numbers))) # ensure unique integers
3932 if numbers == []:
3933 message("nothing to delete")
3934 return
3935 numbers.sort()
3936 if numbers[0] < 0 or numbers[-1] >= page_count:
3937 raise ValueError("bad page number(s)")
3938 frozen_numbers = frozenset(numbers)
3939 toc = self.get_toc()
3940 for i, xref in enumerate(self.get_outline_xrefs()):
3941 if toc[i][2] - 1 in frozen_numbers:
3942 self._remove_toc_item(xref) # remove target in PDF object
3943
3944 self._remove_links_to(frozen_numbers)
3945
3946 for i in reversed(numbers): # delete pages, last to first
3947 self._delete_page(i)
3948
3949 self._reset_page_refs()
3950
3951 def embfile_add(self,
3952 name: str,
3953 buffer_: ByteString,
3954 filename: OptStr =None,
3955 ufilename: OptStr =None,
3956 desc: OptStr =None,
3957 ) -> None:
3958 """Add an item to the EmbeddedFiles array.
3959
3960 Args:
3961 name: name of the new item, must not already exist.
3962 buffer_: (binary data) the file content.
3963 filename: (str) the file name, default: the name
3964 ufilename: (unicode) the file name, default: filename
3965 desc: (str) the description.
3966 """
3967 filenames = self.embfile_names()
3968 msg = "Name '%s' already exists." % str(name)
3969 if name in filenames:
3970 raise ValueError(msg)
3971
3972 if filename is None:
3973 filename = name
3974 if ufilename is None:
3975 ufilename = filename
3976 if desc is None:
3977 desc = name
3978 xref = self._embfile_add(
3979 name,
3980 buffer_=buffer_,
3981 filename=filename,
3982 ufilename=ufilename,
3983 desc=desc,
3984 )
3985 date = get_pdf_now()
3986 self.xref_set_key(xref, "Type", "/EmbeddedFile")
3987 self.xref_set_key(xref, "Params/CreationDate", get_pdf_str(date))
3988 self.xref_set_key(xref, "Params/ModDate", get_pdf_str(date))
3989 return xref
3990
3991 def embfile_count(self) -> int:
3992 """Get number of EmbeddedFiles."""
3993 return len(self.embfile_names())
3994
3995 def embfile_del(self, item: typing.Union[int, str]):
3996 """Delete an entry from EmbeddedFiles.
3997
3998 Notes:
3999 The argument must be name or index of an EmbeddedFiles item.
4000 Physical deletion of data will happen on save to a new
4001 file with appropriate garbage option.
4002 Args:
4003 item: name or number of item.
4004 Returns:
4005 None
4006 """
4007 idx = self._embeddedFileIndex(item)
4008 return self._embfile_del(idx)
4009
4010 def embfile_get(self, item: typing.Union[int, str]) -> bytes:
4011 """Get the content of an item in the EmbeddedFiles array.
4012
4013 Args:
4014 item: number or name of item.
4015 Returns:
4016 (bytes) The file content.
4017 """
4018 idx = self._embeddedFileIndex(item)
4019 return self._embeddedFileGet(idx)
4020
4021 def embfile_info(self, item: typing.Union[int, str]) -> dict:
4022 """Get information of an item in the EmbeddedFiles array.
4023
4024 Args:
4025 item: number or name of item.
4026 Returns:
4027 Information dictionary.
4028 """
4029 idx = self._embeddedFileIndex(item)
4030 infodict = {"name": self.embfile_names()[idx]}
4031 xref = self._embfile_info(idx, infodict)
4032 t, date = self.xref_get_key(xref, "Params/CreationDate")
4033 if t != "null":
4034 infodict["creationDate"] = date
4035 t, date = self.xref_get_key(xref, "Params/ModDate")
4036 if t != "null":
4037 infodict["modDate"] = date
4038 t, md5 = self.xref_get_key(xref, "Params/CheckSum")
4039 if t != "null":
4040 infodict["checksum"] = binascii.hexlify(md5.encode()).decode()
4041 return infodict
4042
4043 def embfile_names(self) -> list:
4044 """Get list of names of EmbeddedFiles."""
4045 filenames = []
4046 self._embfile_names(filenames)
4047 return filenames
4048
4049 def embfile_upd(self,
4050 item: typing.Union[int, str],
4051 buffer_: OptBytes =None,
4052 filename: OptStr =None,
4053 ufilename: OptStr =None,
4054 desc: OptStr =None,
4055 ) -> None:
4056 """Change an item of the EmbeddedFiles array.
4057
4058 Notes:
4059 Only provided parameters are changed. If all are omitted,
4060 the method is a no-op.
4061 Args:
4062 item: number or name of item.
4063 buffer_: (binary data) the new file content.
4064 filename: (str) the new file name.
4065 ufilename: (unicode) the new filen ame.
4066 desc: (str) the new description.
4067 """
4068 idx = self._embeddedFileIndex(item)
4069 xref = self._embfile_upd(
4070 idx,
4071 buffer_=buffer_,
4072 filename=filename,
4073 ufilename=ufilename,
4074 desc=desc,
4075 )
4076 date = get_pdf_now()
4077 self.xref_set_key(xref, "Params/ModDate", get_pdf_str(date))
4078 return xref
4079
4080 def extract_font(self, xref=0, info_only=0, named=None):
4081 '''
4082 Get a font by xref. Returns a tuple or dictionary.
4083 '''
4084 #log( '{=xref info_only}')
4085 pdf = _as_pdf_document(self)
4086 obj = mupdf.pdf_load_object(pdf, xref)
4087 type_ = mupdf.pdf_dict_get(obj, PDF_NAME('Type'))
4088 subtype = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype'))
4089 if (mupdf.pdf_name_eq(type_, PDF_NAME('Font'))
4090 and not mupdf.pdf_to_name( subtype).startswith('CIDFontType')
4091 ):
4092 basefont = mupdf.pdf_dict_get(obj, PDF_NAME('BaseFont'))
4093 if not basefont.m_internal or mupdf.pdf_is_null(basefont):
4094 bname = mupdf.pdf_dict_get(obj, PDF_NAME('Name'))
4095 else:
4096 bname = basefont
4097 ext = JM_get_fontextension(pdf, xref)
4098 if ext != 'n/a' and not info_only:
4099 buffer_ = JM_get_fontbuffer(pdf, xref)
4100 bytes_ = JM_BinFromBuffer(buffer_)
4101 else:
4102 bytes_ = b''
4103 if not named:
4104 rc = (
4105 JM_EscapeStrFromStr(mupdf.pdf_to_name(bname)),
4106 JM_UnicodeFromStr(ext),
4107 JM_UnicodeFromStr(mupdf.pdf_to_name(subtype)),
4108 bytes_,
4109 )
4110 else:
4111 rc = {
4112 dictkey_name: JM_EscapeStrFromStr(mupdf.pdf_to_name(bname)),
4113 dictkey_ext: JM_UnicodeFromStr(ext),
4114 dictkey_type: JM_UnicodeFromStr(mupdf.pdf_to_name(subtype)),
4115 dictkey_content: bytes_,
4116 }
4117 else:
4118 if not named:
4119 rc = '', '', '', b''
4120 else:
4121 rc = {
4122 dictkey_name: '',
4123 dictkey_ext: '',
4124 dictkey_type: '',
4125 dictkey_content: b'',
4126 }
4127 return rc
4128
4129 def extract_image(self, xref):
4130 """Get image by xref. Returns a dictionary."""
4131 if self.is_closed or self.is_encrypted:
4132 raise ValueError("document closed or encrypted")
4133
4134 pdf = _as_pdf_document(self)
4135
4136 if not _INRANGE(xref, 1, mupdf.pdf_xref_len(pdf)-1):
4137 raise ValueError( MSG_BAD_XREF)
4138
4139 obj = mupdf.pdf_new_indirect(pdf, xref, 0)
4140 subtype = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype'))
4141
4142 if not mupdf.pdf_name_eq(subtype, PDF_NAME('Image')):
4143 raise ValueError( "not an image")
4144
4145 o = mupdf.pdf_dict_geta(obj, PDF_NAME('SMask'), PDF_NAME('Mask'))
4146 if o.m_internal:
4147 smask = mupdf.pdf_to_num(o)
4148 else:
4149 smask = 0
4150
4151 # load the image
4152 img = mupdf.pdf_load_image(pdf, obj)
4153 rc = dict()
4154 _make_image_dict(img, rc)
4155 rc[dictkey_smask] = smask
4156 rc[dictkey_cs_name] = mupdf.fz_colorspace_name(img.colorspace())
4157 return rc
4158
4159 def ez_save(
4160 self,
4161 filename,
4162 garbage=3,
4163 clean=False,
4164 deflate=True,
4165 deflate_images=True,
4166 deflate_fonts=True,
4167 incremental=False,
4168 ascii=False,
4169 expand=False,
4170 linear=False,
4171 pretty=False,
4172 encryption=1,
4173 permissions=4095,
4174 owner_pw=None,
4175 user_pw=None,
4176 no_new_id=True,
4177 preserve_metadata=1,
4178 use_objstms=1,
4179 compression_effort=0,
4180 ):
4181 '''
4182 Save PDF using some different defaults
4183 '''
4184 return self.save(
4185 filename,
4186 garbage=garbage,
4187 clean=clean,
4188 deflate=deflate,
4189 deflate_images=deflate_images,
4190 deflate_fonts=deflate_fonts,
4191 incremental=incremental,
4192 ascii=ascii,
4193 expand=expand,
4194 linear=linear,
4195 pretty=pretty,
4196 encryption=encryption,
4197 permissions=permissions,
4198 owner_pw=owner_pw,
4199 user_pw=user_pw,
4200 no_new_id=no_new_id,
4201 preserve_metadata=preserve_metadata,
4202 use_objstms=use_objstms,
4203 compression_effort=compression_effort,
4204 )
4205
4206 def find_bookmark(self, bm):
4207 """Find new location after layouting a document."""
4208 if self.is_closed or self.is_encrypted:
4209 raise ValueError("document closed or encrypted")
4210 location = mupdf.fz_lookup_bookmark2( self.this, bm)
4211 return location.chapter, location.page
4212
4213 def fullcopy_page(self, pno, to=-1):
4214 """Make a full page duplicate."""
4215 pdf = _as_pdf_document(self)
4216 page_count = mupdf.pdf_count_pages( pdf)
4217 try:
4218 if (not _INRANGE(pno, 0, page_count - 1)
4219 or not _INRANGE(to, -1, page_count - 1)
4220 ):
4221 raise ValueError( MSG_BAD_PAGENO)
4222
4223 page1 = mupdf.pdf_resolve_indirect( mupdf.pdf_lookup_page_obj( pdf, pno))
4224
4225 page2 = mupdf.pdf_deep_copy_obj( page1)
4226 old_annots = mupdf.pdf_dict_get( page2, PDF_NAME('Annots'))
4227
4228 # copy annotations, but remove Popup and IRT types
4229 if old_annots.m_internal:
4230 n = mupdf.pdf_array_len( old_annots)
4231 new_annots = mupdf.pdf_new_array( pdf, n)
4232 for i in range(n):
4233 o = mupdf.pdf_array_get( old_annots, i)
4234 subtype = mupdf.pdf_dict_get( o, PDF_NAME('Subtype'))
4235 if mupdf.pdf_name_eq( subtype, PDF_NAME('Popup')):
4236 continue
4237 if mupdf.pdf_dict_gets( o, "IRT").m_internal:
4238 continue
4239 copy_o = mupdf.pdf_deep_copy_obj( mupdf.pdf_resolve_indirect( o))
4240 xref = mupdf.pdf_create_object( pdf)
4241 mupdf.pdf_update_object( pdf, xref, copy_o)
4242 copy_o = mupdf.pdf_new_indirect( pdf, xref, 0)
4243 mupdf.pdf_dict_del( copy_o, PDF_NAME('Popup'))
4244 mupdf.pdf_dict_del( copy_o, PDF_NAME('P'))
4245 mupdf.pdf_array_push( new_annots, copy_o)
4246 mupdf.pdf_dict_put( page2, PDF_NAME('Annots'), new_annots)
4247
4248 # copy the old contents stream(s)
4249 res = JM_read_contents( page1)
4250
4251 # create new /Contents object for page2
4252 if res and res.m_internal:
4253 #contents = mupdf.pdf_add_stream( pdf, mupdf.fz_new_buffer_from_copied_data( b" ", 1), NULL, 0)
4254 contents = mupdf.pdf_add_stream( pdf, mupdf.fz_new_buffer_from_copied_data( b" "), mupdf.PdfObj(), 0)
4255 JM_update_stream( pdf, contents, res, 1)
4256 mupdf.pdf_dict_put( page2, PDF_NAME('Contents'), contents)
4257
4258 # now insert target page, making sure it is an indirect object
4259 xref = mupdf.pdf_create_object( pdf) # get new xref
4260 mupdf.pdf_update_object( pdf, xref, page2) # store new page
4261
4262 page2 = mupdf.pdf_new_indirect( pdf, xref, 0) # reread object
4263 mupdf.pdf_insert_page( pdf, to, page2) # and store the page
4264 finally:
4265 mupdf.ll_pdf_drop_page_tree( pdf.m_internal)
4266
4267 self._reset_page_refs()
4268
4269 def get_layer(self, config=-1):
4270 """Content of ON, OFF, RBGroups of an OC layer."""
4271 pdf = _as_pdf_document(self)
4272 ocp = mupdf.pdf_dict_getl(
4273 mupdf.pdf_trailer( pdf),
4274 PDF_NAME('Root'),
4275 PDF_NAME('OCProperties'),
4276 )
4277 if not ocp.m_internal:
4278 return
4279 if config == -1:
4280 obj = mupdf.pdf_dict_get( ocp, PDF_NAME('D'))
4281 else:
4282 obj = mupdf.pdf_array_get(
4283 mupdf.pdf_dict_get( ocp, PDF_NAME('Configs')),
4284 config,
4285 )
4286 if not obj.m_internal:
4287 raise ValueError( MSG_BAD_OC_CONFIG)
4288 rc = JM_get_ocg_arrays( obj)
4289 return rc
4290
4291 def get_layers(self):
4292 """Show optional OC layers."""
4293 pdf = _as_pdf_document(self)
4294 n = mupdf.pdf_count_layer_configs( pdf)
4295 if n == 1:
4296 obj = mupdf.pdf_dict_getl(
4297 mupdf.pdf_trailer( pdf),
4298 PDF_NAME('Root'),
4299 PDF_NAME('OCProperties'),
4300 PDF_NAME('Configs'),
4301 )
4302 if not mupdf.pdf_is_array( obj):
4303 n = 0
4304 rc = []
4305 info = mupdf.PdfLayerConfig()
4306 for i in range(n):
4307 mupdf.pdf_layer_config_info( pdf, i, info)
4308 item = {
4309 "number": i,
4310 "name": info.name,
4311 "creator": info.creator,
4312 }
4313 rc.append( item)
4314 return rc
4315
4316 def get_new_xref(self):
4317 """Make new xref."""
4318 if self.is_closed or self.is_encrypted:
4319 raise ValueError("document closed or encrypted")
4320 pdf = _as_pdf_document(self)
4321 xref = 0
4322 ENSURE_OPERATION(pdf)
4323 xref = mupdf.pdf_create_object(pdf)
4324 return xref
4325
4326 def get_ocgs(self):
4327 """Show existing optional content groups."""
4328 ci = mupdf.pdf_new_name( "CreatorInfo")
4329 pdf = _as_pdf_document(self)
4330 ocgs = mupdf.pdf_dict_getl(
4331 mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root')),
4332 PDF_NAME('OCProperties'),
4333 PDF_NAME('OCGs'),
4334 )
4335 rc = dict()
4336 if not mupdf.pdf_is_array( ocgs):
4337 return rc
4338 n = mupdf.pdf_array_len( ocgs)
4339 for i in range(n):
4340 ocg = mupdf.pdf_array_get( ocgs, i)
4341 xref = mupdf.pdf_to_num( ocg)
4342 name = mupdf.pdf_to_text_string( mupdf.pdf_dict_get( ocg, PDF_NAME('Name')))
4343 obj = mupdf.pdf_dict_getl( ocg, PDF_NAME('Usage'), ci, PDF_NAME('Subtype'))
4344 usage = None
4345 if obj.m_internal:
4346 usage = mupdf.pdf_to_name( obj)
4347 intents = list()
4348 intent = mupdf.pdf_dict_get( ocg, PDF_NAME('Intent'))
4349 if intent.m_internal:
4350 if mupdf.pdf_is_name( intent):
4351 intents.append( mupdf.pdf_to_name( intent))
4352 elif mupdf.pdf_is_array( intent):
4353 m = mupdf.pdf_array_len( intent)
4354 for j in range(m):
4355 o = mupdf.pdf_array_get( intent, j)
4356 if mupdf.pdf_is_name( o):
4357 intents.append( mupdf.pdf_to_name( o))
4358 hidden = mupdf.pdf_is_ocg_hidden( pdf, mupdf.PdfObj(), usage, ocg)
4359 item = {
4360 "name": name,
4361 "intent": intents,
4362 "on": not hidden,
4363 "usage": usage,
4364 }
4365 temp = xref
4366 rc[ temp] = item
4367 return rc
4368
4369 def get_outline_xrefs(self):
4370 """Get list of outline xref numbers."""
4371 xrefs = []
4372 pdf = _as_pdf_document(self, required=0)
4373 if not pdf.m_internal:
4374 return xrefs
4375 root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
4376 if not root.m_internal:
4377 return xrefs
4378 olroot = mupdf.pdf_dict_get(root, PDF_NAME('Outlines'))
4379 if not olroot.m_internal:
4380 return xrefs
4381 first = mupdf.pdf_dict_get(olroot, PDF_NAME('First'))
4382 if not first.m_internal:
4383 return xrefs
4384 xrefs = JM_outline_xrefs(first, xrefs)
4385 return xrefs
4386
4387 def get_page_fonts(self, pno: int, full: bool =False) -> list:
4388 """Retrieve a list of fonts used on a page.
4389 """
4390 if self.is_closed or self.is_encrypted:
4391 raise ValueError("document closed or encrypted")
4392 if not self.is_pdf:
4393 return ()
4394 if type(pno) is not int:
4395 try:
4396 pno = pno.number
4397 except Exception:
4398 exception_info()
4399 raise ValueError("need a Page or page number")
4400 val = self._getPageInfo(pno, 1)
4401 if not full:
4402 return [v[:-1] for v in val]
4403 return val
4404
4405 def get_page_images(self, pno: int, full: bool =False) -> list:
4406 """Retrieve a list of images used on a page.
4407 """
4408 if self.is_closed or self.is_encrypted:
4409 raise ValueError("document closed or encrypted")
4410 if not self.is_pdf:
4411 return ()
4412 val = self._getPageInfo(pno, 2)
4413 if not full:
4414 return [v[:-1] for v in val]
4415 return val
4416
4417 def get_page_xobjects(self, pno: int) -> list:
4418 """Retrieve a list of XObjects used on a page.
4419 """
4420 if self.is_closed or self.is_encrypted:
4421 raise ValueError("document closed or encrypted")
4422 if not self.is_pdf:
4423 return ()
4424 val = self._getPageInfo(pno, 3)
4425 return val
4426
4427 def get_sigflags(self):
4428 """Get the /SigFlags value."""
4429 pdf = _as_pdf_document(self, required=0)
4430 if not pdf.m_internal:
4431 return -1 # not a PDF
4432 sigflags = mupdf.pdf_dict_getl(
4433 mupdf.pdf_trailer(pdf),
4434 PDF_NAME('Root'),
4435 PDF_NAME('AcroForm'),
4436 PDF_NAME('SigFlags'),
4437 )
4438 sigflag = -1
4439 if sigflags.m_internal:
4440 sigflag = mupdf.pdf_to_int(sigflags)
4441 return sigflag
4442
4443 def get_xml_metadata(self):
4444 """Get document XML metadata."""
4445 xml = None
4446 pdf = _as_pdf_document(self, required=0)
4447 if pdf.m_internal:
4448 xml = mupdf.pdf_dict_getl(
4449 mupdf.pdf_trailer(pdf),
4450 PDF_NAME('Root'),
4451 PDF_NAME('Metadata'),
4452 )
4453 if xml is not None and xml.m_internal:
4454 buff = mupdf.pdf_load_stream(xml)
4455 rc = JM_UnicodeFromBuffer(buff)
4456 else:
4457 rc = ''
4458 return rc
4459
4460 def init_doc(self):
4461 if self.is_encrypted:
4462 raise ValueError("cannot initialize - document still encrypted")
4463 self._outline = self._loadOutline()
4464 self.metadata = dict(
4465 [
4466 (k,self._getMetadata(v)) for k,v in {
4467 'format':'format',
4468 'title':'info:Title',
4469 'author':'info:Author',
4470 'subject':'info:Subject',
4471 'keywords':'info:Keywords',
4472 'creator':'info:Creator',
4473 'producer':'info:Producer',
4474 'creationDate':'info:CreationDate',
4475 'modDate':'info:ModDate',
4476 'trapped':'info:Trapped'
4477 }.items()
4478 ]
4479 )
4480 self.metadata['encryption'] = None if self._getMetadata('encryption')=='None' else self._getMetadata('encryption')
4481
4482 def insert_file(self,
4483 infile,
4484 from_page=-1,
4485 to_page=-1,
4486 start_at=-1,
4487 rotate=-1,
4488 links=True,
4489 annots=True,
4490 show_progress=0,
4491 final=1,
4492 ):
4493 '''
4494 Insert an arbitrary supported document to an existing PDF.
4495
4496 The infile may be given as a filename, a Document or a Pixmap. Other
4497 parameters - where applicable - equal those of insert_pdf().
4498 '''
4499 src = None
4500 if isinstance(infile, Pixmap):
4501 if infile.colorspace.n > 3:
4502 infile = Pixmap(csRGB, infile)
4503 src = Document("png", infile.tobytes())
4504 elif isinstance(infile, Document):
4505 src = infile
4506 else:
4507 src = Document(infile)
4508 if not src:
4509 raise ValueError("bad infile parameter")
4510 if not src.is_pdf:
4511 pdfbytes = src.convert_to_pdf()
4512 src = Document("pdf", pdfbytes)
4513 return self.insert_pdf(
4514 src,
4515 from_page=from_page,
4516 to_page=to_page,
4517 start_at=start_at,
4518 rotate=rotate,
4519 links=links,
4520 annots=annots,
4521 show_progress=show_progress,
4522 final=final,
4523 )
4524
4525 def insert_pdf(
4526 self,
4527 docsrc,
4528 *,
4529 from_page=-1,
4530 to_page=-1,
4531 start_at=-1,
4532 rotate=-1,
4533 links=1,
4534 annots=1,
4535 widgets=1,
4536 join_duplicates=0,
4537 show_progress=0,
4538 final=1,
4539 _gmap=None,
4540 ):
4541 """Insert a page range from another PDF.
4542
4543 Args:
4544 docsrc: PDF to copy from. Must be different object, but may be same file.
4545 from_page: (int) first source page to copy, 0-based, default 0.
4546 to_page: (int) last source page to copy, 0-based, default last page.
4547 start_at: (int) from_page will become this page number in target.
4548 rotate: (int) rotate copied pages, default -1 is no change.
4549 links: (int/bool) whether to also copy links.
4550 annots: (int/bool) whether to also copy annotations.
4551 widgets: (int/bool) whether to also copy form fields.
4552 join_duplicates: (int/bool) join or rename duplicate widget names.
4553 show_progress: (int) progress message interval, 0 is no messages.
4554 final: (bool) indicates last insertion from this source PDF.
4555 _gmap: internal use only
4556
4557 Copy sequence reversed if from_page > to_page."""
4558
4559 # Insert pages from a source PDF into this PDF.
4560 # For reconstructing the links (_do_links method), we must save the
4561 # insertion point (start_at) if it was specified as -1.
4562 #log( 'insert_pdf(): start')
4563 if self.is_closed or self.is_encrypted:
4564 raise ValueError("document closed or encrypted")
4565 if self._graft_id == docsrc._graft_id:
4566 raise ValueError("source and target cannot be same object")
4567 sa = start_at
4568 if sa < 0:
4569 sa = self.page_count
4570 outCount = self.page_count
4571 srcCount = docsrc.page_count
4572
4573 # local copies of page numbers
4574 fp = from_page
4575 tp = to_page
4576 sa = start_at
4577
4578 # normalize page numbers
4579 fp = max(fp, 0) # -1 = first page
4580 fp = min(fp, srcCount - 1) # but do not exceed last page
4581
4582 if tp < 0:
4583 tp = srcCount - 1 # -1 = last page
4584 tp = min(tp, srcCount - 1) # but do not exceed last page
4585
4586 if sa < 0:
4587 sa = outCount # -1 = behind last page
4588 sa = min(sa, outCount) # but that is also the limit
4589
4590 if len(docsrc) > show_progress > 0:
4591 inname = os.path.basename(docsrc.name)
4592 if not inname:
4593 inname = "memory PDF"
4594 outname = os.path.basename(self.name)
4595 if not outname:
4596 outname = "memory PDF"
4597 message("Inserting '%s' at '%s'" % (inname, outname))
4598
4599 # retrieve / make a Graftmap to avoid duplicate objects
4600 #log( 'insert_pdf(): Graftmaps')
4601 isrt = docsrc._graft_id
4602 _gmap = self.Graftmaps.get(isrt, None)
4603 if _gmap is None:
4604 #log( 'insert_pdf(): Graftmaps2')
4605 _gmap = Graftmap(self)
4606 self.Graftmaps[isrt] = _gmap
4607
4608 if g_use_extra:
4609 #log( 'insert_pdf(): calling extra_FzDocument_insert_pdf()')
4610 extra_FzDocument_insert_pdf(
4611 self.this,
4612 docsrc.this,
4613 from_page,
4614 to_page,
4615 start_at,
4616 rotate,
4617 links,
4618 annots,
4619 show_progress,
4620 final,
4621 _gmap,
4622 )
4623 #log( 'insert_pdf(): extra_FzDocument_insert_pdf() returned.')
4624 else:
4625 pdfout = _as_pdf_document(self)
4626 pdfsrc = _as_pdf_document(docsrc)
4627
4628 if not pdfout.m_internal or not pdfsrc.m_internal:
4629 raise TypeError( "source or target not a PDF")
4630 ENSURE_OPERATION(pdfout)
4631 JM_merge_range(pdfout, pdfsrc, fp, tp, sa, rotate, links, annots, show_progress, _gmap)
4632
4633 #log( 'insert_pdf(): calling self._reset_page_refs()')
4634 self._reset_page_refs()
4635 if links:
4636 #log( 'insert_pdf(): calling self._do_links()')
4637 self._do_links(docsrc, from_page=fp, to_page=tp, start_at=sa)
4638 if widgets:
4639 self._do_widgets(docsrc, _gmap, from_page=fp, to_page=tp, start_at=sa, join_duplicates=join_duplicates)
4640 if final == 1:
4641 self.Graftmaps[isrt] = None
4642 #log( 'insert_pdf(): returning')
4643
4644 @property
4645 def is_dirty(self):
4646 pdf = _as_pdf_document(self, required=0)
4647 if not pdf.m_internal:
4648 return False
4649 r = mupdf.pdf_has_unsaved_changes(pdf)
4650 return True if r else False
4651
4652 @property
4653 def is_fast_webaccess(self):
4654 '''
4655 Check whether we have a linearized PDF.
4656 '''
4657 pdf = _as_pdf_document(self, required=0)
4658 if pdf.m_internal:
4659 return mupdf.pdf_doc_was_linearized(pdf)
4660 return False # gracefully handle non-PDF
4661
4662 @property
4663 def is_form_pdf(self):
4664 """Either False or PDF field count."""
4665 pdf = _as_pdf_document(self, required=0)
4666 if not pdf.m_internal:
4667 return False
4668 count = -1
4669 try:
4670 fields = mupdf.pdf_dict_getl(
4671 mupdf.pdf_trailer(pdf),
4672 mupdf.PDF_ENUM_NAME_Root,
4673 mupdf.PDF_ENUM_NAME_AcroForm,
4674 mupdf.PDF_ENUM_NAME_Fields,
4675 )
4676 if mupdf.pdf_is_array(fields):
4677 count = mupdf.pdf_array_len(fields)
4678 except Exception:
4679 if g_exceptions_verbose: exception_info()
4680 return False
4681 if count >= 0:
4682 return count
4683 return False
4684
4685 @property
4686 def is_pdf(self):
4687 """Check for PDF."""
4688 if isinstance(self.this, mupdf.PdfDocument):
4689 return True
4690 # Avoid calling smupdf.pdf_specifics because it will end up creating
4691 # a new PdfDocument which will call pdf_create_document(), which is ok
4692 # but a little unnecessary.
4693 #
4694 if mupdf.ll_pdf_specifics(self.this.m_internal):
4695 ret = True
4696 else:
4697 ret = False
4698 return ret
4699
4700 @property
4701 def is_reflowable(self):
4702 """Check if document is layoutable."""
4703 if self.is_closed:
4704 raise ValueError("document closed")
4705 return bool(mupdf.fz_is_document_reflowable(self))
4706
4707 @property
4708 def is_repaired(self):
4709 """Check whether PDF was repaired."""
4710 pdf = _as_pdf_document(self, required=0)
4711 if not pdf.m_internal:
4712 return False
4713 r = mupdf.pdf_was_repaired(pdf)
4714 if r:
4715 return True
4716 return False
4717
4718 def journal_can_do(self):
4719 """Show if undo and / or redo are possible."""
4720 if self.is_closed or self.is_encrypted:
4721 raise ValueError("document closed or encrypted")
4722 undo=0
4723 redo=0
4724 pdf = _as_pdf_document(self)
4725 undo = mupdf.pdf_can_undo(pdf)
4726 redo = mupdf.pdf_can_redo(pdf)
4727 return {'undo': bool(undo), 'redo': bool(redo)}
4728
4729 def journal_enable(self):
4730 """Activate document journalling."""
4731 if self.is_closed or self.is_encrypted:
4732 raise ValueError("document closed or encrypted")
4733 pdf = _as_pdf_document(self)
4734 mupdf.pdf_enable_journal(pdf)
4735
4736 def journal_is_enabled(self):
4737 """Check if journalling is enabled."""
4738 if self.is_closed or self.is_encrypted:
4739 raise ValueError("document closed or encrypted")
4740 pdf = _as_pdf_document(self)
4741 enabled = pdf.m_internal and pdf.m_internal.journal
4742 return enabled
4743
4744 def journal_load(self, filename):
4745 """Load a journal from a file."""
4746 if self.is_closed or self.is_encrypted:
4747 raise ValueError("document closed or encrypted")
4748 pdf = _as_pdf_document(self)
4749 if isinstance(filename, str):
4750 mupdf.pdf_load_journal(pdf, filename)
4751 else:
4752 res = JM_BufferFromBytes(filename)
4753 stm = mupdf.fz_open_buffer(res)
4754 mupdf.pdf_deserialise_journal(pdf, stm)
4755 if not pdf.m_internal.journal:
4756 RAISEPY( "Journal and document do not match", JM_Exc_FileDataError)
4757
4758 def journal_op_name(self, step):
4759 """Show operation name for given step."""
4760 if self.is_closed or self.is_encrypted:
4761 raise ValueError("document closed or encrypted")
4762 pdf = _as_pdf_document(self)
4763 name = mupdf.pdf_undoredo_step(pdf, step)
4764 return name
4765
4766 def journal_position(self):
4767 """Show journalling state."""
4768 if self.is_closed or self.is_encrypted:
4769 raise ValueError("document closed or encrypted")
4770 steps=0
4771 pdf = _as_pdf_document(self)
4772 rc, steps = mupdf.pdf_undoredo_state(pdf)
4773 return rc, steps
4774
4775 def journal_redo(self):
4776 """Move forward in the journal."""
4777 if self.is_closed or self.is_encrypted:
4778 raise ValueError("document closed or encrypted")
4779 pdf = _as_pdf_document(self)
4780 mupdf.pdf_redo(pdf)
4781 return True
4782
4783 def journal_save(self, filename):
4784 """Save journal to a file."""
4785 if self.is_closed or self.is_encrypted:
4786 raise ValueError("document closed or encrypted")
4787 pdf = _as_pdf_document(self)
4788 if isinstance(filename, str):
4789 mupdf.pdf_save_journal(pdf, filename)
4790 else:
4791 out = JM_new_output_fileptr(filename)
4792 mupdf.pdf_write_journal(pdf, out)
4793 out.fz_close_output()
4794
4795 def journal_start_op(self, name=None):
4796 """Begin a journalling operation."""
4797 if self.is_closed or self.is_encrypted:
4798 raise ValueError("document closed or encrypted")
4799 pdf = _as_pdf_document(self)
4800 if not pdf.m_internal.journal:
4801 raise RuntimeError( "Journalling not enabled")
4802 if name:
4803 mupdf.pdf_begin_operation(pdf, name)
4804 else:
4805 mupdf.pdf_begin_implicit_operation(pdf)
4806
4807 def journal_stop_op(self):
4808 """End a journalling operation."""
4809 if self.is_closed or self.is_encrypted:
4810 raise ValueError("document closed or encrypted")
4811 pdf = _as_pdf_document(self)
4812 mupdf.pdf_end_operation(pdf)
4813
4814 def journal_undo(self):
4815 """Move backwards in the journal."""
4816 if self.is_closed or self.is_encrypted:
4817 raise ValueError("document closed or encrypted")
4818 pdf = _as_pdf_document(self)
4819 mupdf.pdf_undo(pdf)
4820 return True
4821
4822 @property
4823 def language(self):
4824 """Document language."""
4825 pdf = _as_pdf_document(self, required=0)
4826 if not pdf.m_internal:
4827 return
4828 lang = mupdf.pdf_document_language(pdf)
4829 if lang == mupdf.FZ_LANG_UNSET:
4830 return
4831 return mupdf.fz_string_from_text_language2(lang)
4832
4833 @property
4834 def last_location(self):
4835 """Id (chapter, page) of last page."""
4836 if self.is_closed:
4837 raise ValueError("document closed")
4838 last_loc = mupdf.fz_last_page(self.this)
4839 return last_loc.chapter, last_loc.page
4840
4841 def layer_ui_configs(self):
4842 """Show OC visibility status modifiable by user."""
4843 pdf = _as_pdf_document(self)
4844 info = mupdf.PdfLayerConfigUi()
4845 n = mupdf.pdf_count_layer_config_ui( pdf)
4846 rc = []
4847 for i in range(n):
4848 mupdf.pdf_layer_config_ui_info( pdf, i, info)
4849 if info.type == 1:
4850 type_ = "checkbox"
4851 elif info.type == 2:
4852 type_ = "radiobox"
4853 else:
4854 type_ = "label"
4855 item = {
4856 "number": i,
4857 "text": info.text,
4858 "depth": info.depth,
4859 "type": type_,
4860 "on": info.selected,
4861 "locked": info.locked,
4862 }
4863 rc.append(item)
4864 return rc
4865
4866 def layout(self, rect=None, width=0, height=0, fontsize=11):
4867 """Re-layout a reflowable document."""
4868 if self.is_closed or self.is_encrypted:
4869 raise ValueError("document closed or encrypted")
4870 doc = self.this
4871 if not mupdf.fz_is_document_reflowable( doc):
4872 return
4873 w = width
4874 h = height
4875 r = JM_rect_from_py(rect)
4876 if not mupdf.fz_is_infinite_rect(r):
4877 w = r.x1 - r.x0
4878 h = r.y1 - r.y0
4879 if w <= 0.0 or h <= 0.0:
4880 raise ValueError( "bad page size")
4881 mupdf.fz_layout_document( doc, w, h, fontsize)
4882
4883 self._reset_page_refs()
4884 self.init_doc()
4885
4886 def load_page(self, page_id):
4887 """Load a page.
4888
4889 'page_id' is either a 0-based page number or a tuple (chapter, pno),
4890 with chapter number and page number within that chapter.
4891 """
4892 if self.is_closed or self.is_encrypted:
4893 raise ValueError("document closed or encrypted")
4894 if page_id is None:
4895 page_id = 0
4896 if page_id not in self:
4897 raise ValueError("page not in document")
4898 if type(page_id) is int and page_id < 0:
4899 np = self.page_count
4900 while page_id < 0:
4901 page_id += np
4902 if isinstance(page_id, int):
4903 page = mupdf.fz_load_page(self.this, page_id)
4904 else:
4905 chapter, pagenum = page_id
4906 page = mupdf.fz_load_chapter_page(self.this, chapter, pagenum)
4907 val = Page(page, self)
4908
4909 val.thisown = True
4910 val.parent = self
4911 self._page_refs[id(val)] = val
4912 val._annot_refs = weakref.WeakValueDictionary()
4913 val.number = page_id
4914 return val
4915
4916 def location_from_page_number(self, pno):
4917 """Convert pno to (chapter, page)."""
4918 if self.is_closed:
4919 raise ValueError("document closed")
4920 this_doc = self.this
4921 loc = mupdf.fz_make_location(-1, -1)
4922 page_count = mupdf.fz_count_pages(this_doc)
4923 while pno < 0:
4924 pno += page_count
4925 if pno >= page_count:
4926 raise ValueError( MSG_BAD_PAGENO)
4927 loc = mupdf.fz_location_from_page_number(this_doc, pno)
4928 return loc.chapter, loc.page
4929
4930 def make_bookmark(self, loc):
4931 """Make a page pointer before layouting document."""
4932 if self.is_closed or self.is_encrypted:
4933 raise ValueError("document closed or encrypted")
4934 loc = mupdf.FzLocation(*loc)
4935 mark = mupdf.ll_fz_make_bookmark2( self.this.m_internal, loc.internal())
4936 return mark
4937
4938 @property
4939 def markinfo(self) -> dict:
4940 """Return the PDF MarkInfo value."""
4941 xref = self.pdf_catalog()
4942 if xref == 0:
4943 return None
4944 rc = self.xref_get_key(xref, "MarkInfo")
4945 if rc[0] == "null":
4946 return {}
4947 if rc[0] == "xref":
4948 xref = int(rc[1].split()[0])
4949 val = self.xref_object(xref, compressed=True)
4950 elif rc[0] == "dict":
4951 val = rc[1]
4952 else:
4953 val = None
4954 if val is None or not (val[:2] == "<<" and val[-2:] == ">>"):
4955 return {}
4956 valid = {"Marked": False, "UserProperties": False, "Suspects": False}
4957 val = val[2:-2].split("/")
4958 for v in val[1:]:
4959 try:
4960 key, value = v.split()
4961 except Exception:
4962 if g_exceptions_verbose > 1: exception_info()
4963 return valid
4964 if value == "true":
4965 valid[key] = True
4966 return valid
4967
4968 def move_page(self, pno: int, to: int =-1):
4969 """Move a page within a PDF document.
4970
4971 Args:
4972 pno: source page number.
4973 to: put before this page, '-1' means after last page.
4974 """
4975 if self.is_closed:
4976 raise ValueError("document closed")
4977 page_count = len(self)
4978 if (pno not in range(page_count) or to not in range(-1, page_count)):
4979 raise ValueError("bad page number(s)")
4980 before = 1
4981 copy = 0
4982 if to == -1:
4983 to = page_count - 1
4984 before = 0
4985
4986 return self._move_copy_page(pno, to, before, copy)
4987
4988 @property
4989 def name(self):
4990 return self._name
4991
4992 def need_appearances(self, value=None):
4993 """Get/set the NeedAppearances value."""
4994 if not self.is_form_pdf:
4995 return None
4996
4997 pdf = _as_pdf_document(self)
4998 oldval = -1
4999 appkey = "NeedAppearances"
5000
5001 form = mupdf.pdf_dict_getp(
5002 mupdf.pdf_trailer(pdf),
5003 "Root/AcroForm",
5004 )
5005 app = mupdf.pdf_dict_gets(form, appkey)
5006 if mupdf.pdf_is_bool(app):
5007 oldval = mupdf.pdf_to_bool(app)
5008 if value:
5009 mupdf.pdf_dict_puts(form, appkey, mupdf.PDF_TRUE)
5010 else:
5011 mupdf.pdf_dict_puts(form, appkey, mupdf.PDF_FALSE)
5012 if value is None:
5013 return oldval >= 0
5014 return value
5015
5016 @property
5017 def needs_pass(self):
5018 """Indicate password required."""
5019 if self.is_closed:
5020 raise ValueError("document closed")
5021 document = self.this if isinstance(self.this, mupdf.FzDocument) else self.this.super()
5022 ret = mupdf.fz_needs_password( document)
5023 return ret
5024
5025 def next_location(self, page_id):
5026 """Get (chapter, page) of next page."""
5027 if self.is_closed or self.is_encrypted:
5028 raise ValueError("document closed or encrypted")
5029 if type(page_id) is int:
5030 page_id = (0, page_id)
5031 if page_id not in self:
5032 raise ValueError("page id not in document")
5033 if tuple(page_id) == self.last_location:
5034 return ()
5035 this_doc = _as_fz_document(self)
5036 val = page_id[ 0]
5037 if not isinstance(val, int):
5038 RAISEPY(MSG_BAD_PAGEID, PyExc_ValueError)
5039 chapter = val
5040 val = page_id[ 1]
5041 pno = val
5042 loc = mupdf.fz_make_location(chapter, pno)
5043 next_loc = mupdf.fz_next_page( this_doc, loc)
5044 return next_loc.chapter, next_loc.page
5045
5046 def page_annot_xrefs(self, n):
5047 if g_use_extra:
5048 return extra.page_annot_xrefs( self.this, n)
5049
5050 if isinstance(self.this, mupdf.PdfDocument):
5051 page_count = mupdf.pdf_count_pages(self.this)
5052 pdf_document = self.this
5053 else:
5054 page_count = mupdf.fz_count_pages(self.this)
5055 pdf_document = _as_pdf_document(self)
5056 while n < 0:
5057 n += page_count
5058 if n > page_count:
5059 raise ValueError( MSG_BAD_PAGENO)
5060 page_obj = mupdf.pdf_lookup_page_obj(pdf_document, n)
5061 annots = JM_get_annot_xref_list(page_obj)
5062 return annots
5063
5064 @property
5065 def page_count(self):
5066 """Number of pages."""
5067 if self.is_closed:
5068 raise ValueError('document closed')
5069 if g_use_extra:
5070 return self.page_count2(self)
5071 if isinstance( self.this, mupdf.FzDocument):
5072 return mupdf.fz_count_pages( self.this)
5073 else:
5074 return mupdf.pdf_count_pages( self.this)
5075
5076 def page_cropbox(self, pno):
5077 """Get CropBox of page number (without loading page)."""
5078 if self.is_closed:
5079 raise ValueError("document closed")
5080 this_doc = self.this
5081 page_count = mupdf.fz_count_pages( this_doc)
5082 n = pno
5083 while n < 0:
5084 n += page_count
5085 pdf = _as_pdf_document(self)
5086 if n >= page_count:
5087 raise ValueError( MSG_BAD_PAGENO)
5088 pageref = mupdf.pdf_lookup_page_obj( pdf, n)
5089 cropbox = JM_cropbox(pageref)
5090 val = JM_py_from_rect(cropbox)
5091
5092 val = Rect(val)
5093
5094 return val
5095
5096 def page_number_from_location(self, page_id):
5097 """Convert (chapter, pno) to page number."""
5098 if type(page_id) is int:
5099 np = self.page_count
5100 while page_id < 0:
5101 page_id += np
5102 page_id = (0, page_id)
5103 if page_id not in self:
5104 raise ValueError("page id not in document")
5105 chapter, pno = page_id
5106 loc = mupdf.fz_make_location( chapter, pno)
5107 page_n = mupdf.fz_page_number_from_location( self.this, loc)
5108 return page_n
5109
5110 def page_xref(self, pno):
5111 """Get xref of page number."""
5112 if g_use_extra:
5113 return extra.page_xref( self.this, pno)
5114 if self.is_closed:
5115 raise ValueError("document closed")
5116 page_count = mupdf.fz_count_pages(self.this)
5117 n = pno
5118 while n < 0:
5119 n += page_count
5120 pdf = _as_pdf_document(self)
5121 xref = 0
5122 if n >= page_count:
5123 raise ValueError( MSG_BAD_PAGENO)
5124 xref = mupdf.pdf_to_num(mupdf.pdf_lookup_page_obj(pdf, n))
5125 return xref
5126
5127 @property
5128 def pagelayout(self) -> str:
5129 """Return the PDF PageLayout value.
5130 """
5131 xref = self.pdf_catalog()
5132 if xref == 0:
5133 return None
5134 rc = self.xref_get_key(xref, "PageLayout")
5135 if rc[0] == "null":
5136 return "SinglePage"
5137 if rc[0] == "name":
5138 return rc[1][1:]
5139 return "SinglePage"
5140
5141 @property
5142 def pagemode(self) -> str:
5143 """Return the PDF PageMode value.
5144 """
5145 xref = self.pdf_catalog()
5146 if xref == 0:
5147 return None
5148 rc = self.xref_get_key(xref, "PageMode")
5149 if rc[0] == "null":
5150 return "UseNone"
5151 if rc[0] == "name":
5152 return rc[1][1:]
5153 return "UseNone"
5154
5155 if sys.implementation.version < (3, 9):
5156 # Appending `[Page]` causes `TypeError: 'ABCMeta' object is not subscriptable`.
5157 _pages_ret = collections.abc.Iterable
5158 else:
5159 _pages_ret = collections.abc.Iterable[Page]
5160
5161 def pages(self, start: OptInt =None, stop: OptInt =None, step: OptInt =None) -> _pages_ret:
5162 """Return a generator iterator over a page range.
5163
5164 Arguments have the same meaning as for the range() built-in.
5165 """
5166 if not self.page_count:
5167 return
5168 # set the start value
5169 start = start or 0
5170 while start < 0:
5171 start += self.page_count
5172 if start not in range(self.page_count):
5173 raise ValueError("bad start page number")
5174
5175 # set the stop value
5176 stop = stop if stop is not None and stop <= self.page_count else self.page_count
5177
5178 # set the step value
5179 if step == 0:
5180 raise ValueError("arg 3 must not be zero")
5181 if step is None:
5182 if start > stop:
5183 step = -1
5184 else:
5185 step = 1
5186
5187 for pno in range(start, stop, step):
5188 yield (self.load_page(pno))
5189
5190 def pdf_catalog(self):
5191 """Get xref of PDF catalog."""
5192 pdf = _as_pdf_document(self, required=0)
5193 xref = 0
5194 if not pdf.m_internal:
5195 return xref
5196 root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
5197 xref = mupdf.pdf_to_num(root)
5198 return xref
5199
5200 def pdf_trailer(self, compressed=0, ascii=0):
5201 """Get PDF trailer as a string."""
5202 return self.xref_object(-1, compressed=compressed, ascii=ascii)
5203
5204 @property
5205 def permissions(self):
5206 """Document permissions."""
5207 if self.is_encrypted:
5208 return 0
5209 doc =self.this
5210 pdf = mupdf.pdf_document_from_fz_document(doc)
5211
5212 # for PDF return result of standard function
5213 if pdf.m_internal:
5214 return mupdf.pdf_document_permissions(pdf)
5215
5216 # otherwise simulate the PDF return value
5217 perm = 0xFFFFFFFC # all permissions granted
5218 # now switch off where needed
5219 if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_PRINT):
5220 perm = perm ^ mupdf.PDF_PERM_PRINT
5221 if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_EDIT):
5222 perm = perm ^ mupdf.PDF_PERM_MODIFY
5223 if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_COPY):
5224 perm = perm ^ mupdf.PDF_PERM_COPY
5225 if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_ANNOTATE):
5226 perm = perm ^ mupdf.PDF_PERM_ANNOTATE
5227 return perm
5228
5229 def prev_location(self, page_id):
5230
5231 """Get (chapter, page) of previous page."""
5232 if self.is_closed or self.is_encrypted:
5233 raise ValueError("document closed or encrypted")
5234 if type(page_id) is int:
5235 page_id = (0, page_id)
5236 if page_id not in self:
5237 raise ValueError("page id not in document")
5238 if page_id == (0, 0):
5239 return ()
5240 chapter, pno = page_id
5241 loc = mupdf.fz_make_location(chapter, pno)
5242 prev_loc = mupdf.fz_previous_page(self.this, loc)
5243 return prev_loc.chapter, prev_loc.page
5244
5245 def reload_page(self, page: Page) -> Page:
5246 """Make a fresh copy of a page."""
5247 old_annots = {} # copy annot references to here
5248 pno = page.number # save the page number
5249 for k, v in page._annot_refs.items(): # save the annot dictionary
5250 old_annots[k] = v
5251
5252 # When we call `self.load_page()` below, it will end up in
5253 # fz_load_chapter_page(), which will return any matching page in the
5254 # document's list of non-ref-counted loaded pages, instead of actually
5255 # reloading the page.
5256 #
5257 # We want to assert that we have actually reloaded the fz_page, and not
5258 # simply returned the same `fz_page*` pointer from the document's list
5259 # of non-ref-counted loaded pages.
5260 #
5261 # So we first remove our reference to the `fz_page*`. This will
5262 # decrement .refs, and if .refs was 1, this is guaranteed to free the
5263 # `fz_page*` and remove it from the document's list if it was there. So
5264 # we are guaranteed that our returned `fz_page*` is from a genuine
5265 # reload, even if it happens to reuse the original block of memory.
5266 #
5267 # However if the original .refs is greater than one, there must be
5268 # other references to the `fz_page` somewhere, and we require that
5269 # these other references are not keeping the page in the document's
5270 # list. We check that we are returning a newly loaded page by
5271 # asserting that our returned `fz_page*` is different from the original
5272 # `fz_page*` - the original was not freed, so a new `fz_page` cannot
5273 # reuse the same block of memory.
5274 #
5275
5276 refs_old = page.this.m_internal.refs
5277 m_internal_old = page.this.m_internal_value()
5278
5279 page.this = None
5280 page._erase() # remove the page
5281 page = None
5282 TOOLS.store_shrink(100)
5283 page = self.load_page(pno) # reload the page
5284
5285 # copy annot refs over to the new dictionary
5286 #page_proxy = weakref.proxy(page)
5287 for k, v in old_annots.items():
5288 annot = old_annots[k]
5289 #annot.parent = page_proxy # refresh parent to new page
5290 page._annot_refs[k] = annot
5291 if refs_old == 1:
5292 # We know that `page.this = None` will have decremented the ref
5293 # count to zero so we are guaranteed that the new `fz_page` is a
5294 # new page even if it happens to have reused the same block of
5295 # memory.
5296 pass
5297 else:
5298 # Check that the new `fz_page*` is different from the original.
5299 m_internal_new = page.this.m_internal_value()
5300 assert m_internal_new != m_internal_old, \
5301 f'{refs_old=} {m_internal_old=:#x} {m_internal_new=:#x}'
5302 return page
5303
5304 def resolve_link(self, uri=None, chapters=0):
5305 """Calculate internal link destination.
5306
5307 Args:
5308 uri: (str) some Link.uri
5309 chapters: (bool) whether to use (chapter, page) format
5310 Returns:
5311 (page_id, x, y) where x, y are point coordinates on the page.
5312 page_id is either page number (if chapters=0), or (chapter, pno).
5313 """
5314 if not uri:
5315 if chapters:
5316 return (-1, -1), 0, 0
5317 return -1, 0, 0
5318 try:
5319 loc, xp, yp = mupdf.fz_resolve_link(self.this, uri)
5320 except Exception:
5321 if g_exceptions_verbose: exception_info()
5322 if chapters:
5323 return (-1, -1), 0, 0
5324 return -1, 0, 0
5325 if chapters:
5326 return (loc.chapter, loc.page), xp, yp
5327 pno = mupdf.fz_page_number_from_location(self.this, loc)
5328 return pno, xp, yp
5329
5330 def rewrite_images(
5331 self,
5332 dpi_threshold=None,
5333 dpi_target=0,
5334 quality=0,
5335 lossy=True,
5336 lossless=True,
5337 bitonal=True,
5338 color=True,
5339 gray=True,
5340 set_to_gray=False,
5341 options=None,
5342 ):
5343 """Rewrite images in a PDF document.
5344
5345 The typical use case is to reduce the size of the PDF by recompressing
5346 images. Default parameters will convert all images to JPEG where
5347 possible, using the specified resolutions and quality. Exclude
5348 undesired images by setting parameters to False.
5349 Args:
5350 dpi_threshold: look at images with a larger DPI only.
5351 dpi_target: change eligible images to this DPI.
5352 quality: Quality of the recompressed images (0-100).
5353 lossy: process lossy image types (e.g. JPEG).
5354 lossless: process lossless image types (e.g. PNG).
5355 bitonal: process black-and-white images (e.g. FAX)
5356 color: process colored images.
5357 gray: process gray images.
5358 set_to_gray: whether to change the PDF to gray at process start.
5359 options: (PdfImageRewriterOptions) Custom options for image
5360 rewriting (optional). Expert use only. If provided, other
5361 parameters are ignored, except set_to_gray.
5362 """
5363 quality_str = str(quality)
5364 if not dpi_threshold:
5365 dpi_threshold = dpi_target = 0
5366 if dpi_target > 0 and dpi_target >= dpi_threshold:
5367 raise ValueError("{dpi_target=} must be less than {dpi_threshold=}")
5368 template_opts = mupdf.PdfImageRewriterOptions()
5369 dir1 = set(dir(template_opts)) # for checking that only existing options are set
5370 if not options:
5371 opts = mupdf.PdfImageRewriterOptions()
5372 if bitonal:
5373 opts.bitonal_image_recompress_method = mupdf.FZ_RECOMPRESS_FAX
5374 opts.bitonal_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
5375 opts.bitonal_image_subsample_to = dpi_target
5376 opts.bitonal_image_recompress_quality = quality_str
5377 opts.bitonal_image_subsample_threshold = dpi_threshold
5378 if color:
5379 if lossless:
5380 opts.color_lossless_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG
5381 opts.color_lossless_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
5382 opts.color_lossless_image_subsample_to = dpi_target
5383 opts.color_lossless_image_subsample_threshold = dpi_threshold
5384 opts.color_lossless_image_recompress_quality = quality_str
5385 if lossy:
5386 opts.color_lossy_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG
5387 opts.color_lossy_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
5388 opts.color_lossy_image_subsample_threshold = dpi_threshold
5389 opts.color_lossy_image_subsample_to = dpi_target
5390 opts.color_lossy_image_recompress_quality = quality_str
5391 if gray:
5392 if lossless:
5393 opts.gray_lossless_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG
5394 opts.gray_lossless_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
5395 opts.gray_lossless_image_subsample_to = dpi_target
5396 opts.gray_lossless_image_subsample_threshold = dpi_threshold
5397 opts.gray_lossless_image_recompress_quality = quality_str
5398 if lossy:
5399 opts.gray_lossy_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG
5400 opts.gray_lossy_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
5401 opts.gray_lossy_image_subsample_threshold = dpi_threshold
5402 opts.gray_lossy_image_subsample_to = dpi_target
5403 opts.gray_lossy_image_recompress_quality = quality_str
5404 else:
5405 opts = options
5406
5407 dir2 = set(dir(opts)) # checking that only possible options were used
5408 invalid_options = dir2 - dir1
5409 if invalid_options:
5410 raise ValueError(f"Invalid options: {invalid_options}")
5411
5412 if set_to_gray:
5413 self.recolor(1)
5414 pdf = _as_pdf_document(self)
5415 mupdf.pdf_rewrite_images(pdf, opts)
5416
5417 def recolor(self, components=1):
5418 """Change the color component count on all pages.
5419
5420 Args:
5421 components: (int) desired color component count, one of 1, 3, 4.
5422
5423 Invokes the same-named method for all pages.
5424 """
5425 if not self.is_pdf:
5426 raise ValueError("is no PDF")
5427 for i in range(self.page_count):
5428 self.load_page(i).recolor(components)
5429
5430 def resolve_names(self):
5431 """Convert the PDF's destination names into a Python dict.
5432
5433 The only parameter is the pymupdf.Document.
5434 All names found in the catalog under keys "/Dests" and "/Names/Dests" are
5435 being included.
5436
5437 Returns:
5438 A dcitionary with the following layout:
5439 - key: (str) the name
5440 - value: (dict) with the following layout:
5441 * "page": target page number (0-based). If no page number found -1.
5442 * "to": (x, y) target point on page - currently in PDF coordinates,
5443 i.e. point (0,0) is the bottom-left of the page.
5444 * "zoom": (float) the zoom factor
5445 * "dest": (str) only occurs if the target location on the page has
5446 not been provided as "/XYZ" or if no page number was found.
5447 Examples:
5448 {'__bookmark_1': {'page': 0, 'to': (0.0, 541.0), 'zoom': 0.0},
5449 '__bookmark_2': {'page': 0, 'to': (0.0, 481.45), 'zoom': 0.0}}
5450
5451 or
5452
5453 '21154a7c20684ceb91f9c9adc3b677c40': {'page': -1, 'dest': '/XYZ 15.75 1486 0'}, ...
5454 """
5455 if hasattr(self, "_resolved_names"): # do not execute multiple times!
5456 return self._resolved_names
5457 # this is a backward listing of page xref to page number
5458 page_xrefs = {self.page_xref(i): i for i in range(self.page_count)}
5459
5460 def obj_string(obj):
5461 """Return string version of a PDF object definition."""
5462 buffer = mupdf.fz_new_buffer(512)
5463 output = mupdf.FzOutput(buffer)
5464 mupdf.pdf_print_obj(output, obj, 1, 0)
5465 output.fz_close_output()
5466 return JM_UnicodeFromBuffer(buffer)
5467
5468 def get_array(val):
5469 """Generate value of one item of the names dictionary."""
5470 templ_dict = {"page": -1, "dest": ""} # value template
5471 if val.pdf_is_indirect():
5472 val = mupdf.pdf_resolve_indirect(val)
5473 if val.pdf_is_array():
5474 array = obj_string(val)
5475 elif val.pdf_is_dict():
5476 array = obj_string(mupdf.pdf_dict_gets(val, "D"))
5477 else: # if all fails return the empty template
5478 return templ_dict
5479
5480 # replace PDF "null" by zero, omit the square brackets
5481 array = array.replace("null", "0")[1:-1]
5482
5483 # find stuff before first "/"
5484 idx = array.find("/")
5485 if idx < 1: # this has no target page spec
5486 templ_dict["dest"] = array # return the orig. string
5487 return templ_dict
5488
5489 subval = array[:idx].strip() # stuff before "/"
5490 array = array[idx:] # stuff from "/" onwards
5491 templ_dict["dest"] = array
5492 # if we start with /XYZ: extract x, y, zoom
5493 # 1, 2 or 3 of these values may actually be supplied
5494 if array.startswith("/XYZ"):
5495 del templ_dict["dest"] # don't return orig string in this case
5496
5497 # make a list of the 3 tokens following "/XYZ"
5498 array_list = array.split()[1:4] # omit "/XYZ"
5499
5500 # fill up missing tokens with "0" strings
5501 while len(array_list) < 3: # fill up if too short
5502 array_list.append("0") # add missing values
5503
5504 # make list of 3 floats: x, y and zoom
5505 t = list(map(float, array_list)) # the resulting x, y, z values
5506 templ_dict["to"] = (t[0], t[1])
5507 templ_dict["zoom"] = t[2]
5508
5509 # extract page number
5510 if subval.endswith("0 R"): # page xref given?
5511 templ_dict["page"] = page_xrefs.get(int(subval.split()[0]),-1)
5512 else: # naked page number given
5513 templ_dict["page"] = int(subval)
5514 return templ_dict
5515
5516 def fill_dict(dest_dict, pdf_dict):
5517 """Generate name resolution items for pdf_dict.
5518
5519 This may be either "/Names/Dests" or just "/Dests"
5520 """
5521 # length of the PDF dictionary
5522 name_count = mupdf.pdf_dict_len(pdf_dict)
5523
5524 # extract key-val of each dict item
5525 for i in range(name_count):
5526 key = mupdf.pdf_dict_get_key(pdf_dict, i)
5527 val = mupdf.pdf_dict_get_val(pdf_dict, i)
5528 if key.pdf_is_name(): # this should always be true!
5529 dict_key = key.pdf_to_name()
5530 else:
5531 message(f"key {i} is no /Name")
5532 dict_key = None
5533
5534 if dict_key:
5535 dest_dict[dict_key] = get_array(val) # store key/value in dict
5536
5537 # access underlying PDF document of fz Document
5538 pdf = mupdf.pdf_document_from_fz_document(self)
5539
5540 # access PDF catalog
5541 catalog = mupdf.pdf_dict_gets(mupdf.pdf_trailer(pdf), "Root")
5542
5543 dest_dict = {}
5544
5545 # make PDF_NAME(Dests)
5546 dests = mupdf.pdf_new_name("Dests")
5547
5548 # extract destinations old style (PDF 1.1)
5549 old_dests = mupdf.pdf_dict_get(catalog, dests)
5550 if old_dests.pdf_is_dict():
5551 fill_dict(dest_dict, old_dests)
5552
5553 # extract destinations new style (PDF 1.2+)
5554 tree = mupdf.pdf_load_name_tree(pdf, dests)
5555 if tree.pdf_is_dict():
5556 fill_dict(dest_dict, tree)
5557
5558 self._resolved_names = dest_dict # store result or reuse
5559 return dest_dict
5560
5561 def save(
5562 self,
5563 filename,
5564 garbage=0,
5565 clean=0,
5566 deflate=0,
5567 deflate_images=0,
5568 deflate_fonts=0,
5569 incremental=0,
5570 ascii=0,
5571 expand=0,
5572 linear=0,
5573 no_new_id=0,
5574 appearance=0,
5575 pretty=0,
5576 encryption=1,
5577 permissions=4095,
5578 owner_pw=None,
5579 user_pw=None,
5580 preserve_metadata=1,
5581 use_objstms=0,
5582 compression_effort=0,
5583 ):
5584 # From %pythonprepend save
5585 #
5586 """Save PDF to file, pathlib.Path or file pointer."""
5587 if self.is_closed or self.is_encrypted:
5588 raise ValueError("document closed or encrypted")
5589 if type(filename) is str:
5590 pass
5591 elif hasattr(filename, "open"): # assume: pathlib.Path
5592 filename = str(filename)
5593 elif hasattr(filename, "name"): # assume: file object
5594 filename = filename.name
5595 elif not hasattr(filename, "seek"): # assume file object
5596 raise ValueError("filename must be str, Path or file object")
5597 if filename == self.name and not incremental:
5598 raise ValueError("save to original must be incremental")
5599 if linear and use_objstms:
5600 raise ValueError("'linear' and 'use_objstms' cannot both be requested")
5601 if self.page_count < 1:
5602 raise ValueError("cannot save with zero pages")
5603 if incremental:
5604 if self.name != filename or self.stream:
5605 raise ValueError("incremental needs original file")
5606 if user_pw and len(user_pw) > 40 or owner_pw and len(owner_pw) > 40:
5607 raise ValueError("password length must not exceed 40")
5608
5609 pdf = _as_pdf_document(self)
5610 opts = mupdf.PdfWriteOptions()
5611 opts.do_incremental = incremental
5612 opts.do_ascii = ascii
5613 opts.do_compress = deflate
5614 opts.do_compress_images = deflate_images
5615 opts.do_compress_fonts = deflate_fonts
5616 opts.do_decompress = expand
5617 opts.do_garbage = garbage
5618 opts.do_pretty = pretty
5619 opts.do_linear = linear
5620 opts.do_clean = clean
5621 opts.do_sanitize = clean
5622 opts.dont_regenerate_id = no_new_id
5623 opts.do_appearance = appearance
5624 opts.do_encrypt = encryption
5625 opts.permissions = permissions
5626 if owner_pw is not None:
5627 opts.opwd_utf8_set_value(owner_pw)
5628 elif user_pw is not None:
5629 opts.opwd_utf8_set_value(user_pw)
5630 if user_pw is not None:
5631 opts.upwd_utf8_set_value(user_pw)
5632 opts.do_preserve_metadata = preserve_metadata
5633 opts.do_use_objstms = use_objstms
5634 opts.compression_effort = compression_effort
5635
5636 out = None
5637 pdf.m_internal.resynth_required = 0
5638 JM_embedded_clean(pdf)
5639 if no_new_id == 0:
5640 JM_ensure_identity(pdf)
5641 if isinstance(filename, str):
5642 #log( 'calling mupdf.pdf_save_document()')
5643 mupdf.pdf_save_document(pdf, filename, opts)
5644 else:
5645 out = JM_new_output_fileptr(filename)
5646 #log( f'{type(out)=} {type(out.this)=}')
5647 mupdf.pdf_write_document(pdf, out, opts)
5648 out.fz_close_output()
5649
5650 def save_snapshot(self, filename):
5651 """Save a file snapshot suitable for journalling."""
5652 if self.is_closed:
5653 raise ValueError("doc is closed")
5654 if type(filename) is str:
5655 pass
5656 elif hasattr(filename, "open"): # assume: pathlib.Path
5657 filename = str(filename)
5658 elif hasattr(filename, "name"): # assume: file object
5659 filename = filename.name
5660 else:
5661 raise ValueError("filename must be str, Path or file object")
5662 if filename == self.name:
5663 raise ValueError("cannot snapshot to original")
5664 pdf = _as_pdf_document(self)
5665 mupdf.pdf_save_snapshot(pdf, filename)
5666
5667 def saveIncr(self):
5668 """ Save PDF incrementally"""
5669 return self.save(self.name, incremental=True, encryption=mupdf.PDF_ENCRYPT_KEEP)
5670
5671 def select(self, pyliste):
5672 """Build sub-pdf with page numbers in the list."""
5673 if self.is_closed or self.is_encrypted:
5674 raise ValueError("document closed or encrypted")
5675 if not self.is_pdf:
5676 raise ValueError("is no PDF")
5677 if not hasattr(pyliste, "__getitem__"):
5678 raise ValueError("sequence required")
5679
5680 valid_range = range(len(self))
5681 if (len(pyliste) == 0
5682 or min(pyliste) not in valid_range
5683 or max(pyliste) not in valid_range
5684 ):
5685 raise ValueError("bad page number(s)")
5686
5687 # get underlying pdf document,
5688 pdf = _as_pdf_document(self)
5689 # create page sub-pdf via pdf_rearrange_pages2().
5690 #
5691 if mupdf_version_tuple >= (1, 25, 3):
5692 # We use PDF_CLEAN_STRUCTURE_KEEP otherwise we lose structure tree
5693 # which, for example, breaks test_3705.
5694 mupdf.pdf_rearrange_pages2(pdf, pyliste, mupdf.PDF_CLEAN_STRUCTURE_KEEP)
5695 else:
5696 mupdf.pdf_rearrange_pages2(pdf, pyliste)
5697
5698 # remove any existing pages with their kids
5699 self._reset_page_refs()
5700
5701 def set_language(self, language=None):
5702 pdf = _as_pdf_document(self)
5703 if not language:
5704 lang = mupdf.FZ_LANG_UNSET
5705 else:
5706 lang = mupdf.fz_text_language_from_string(language)
5707 mupdf.pdf_set_document_language(pdf, lang)
5708 return True
5709
5710 def set_layer(self, config, basestate=None, on=None, off=None, rbgroups=None, locked=None):
5711 """Set the PDF keys /ON, /OFF, /RBGroups of an OC layer."""
5712 if self.is_closed:
5713 raise ValueError("document closed")
5714 ocgs = set(self.get_ocgs().keys())
5715 if ocgs == set():
5716 raise ValueError("document has no optional content")
5717
5718 if on:
5719 if type(on) not in (list, tuple):
5720 raise ValueError("bad type: 'on'")
5721 s = set(on).difference(ocgs)
5722 if s != set():
5723 raise ValueError("bad OCGs in 'on': %s" % s)
5724
5725 if off:
5726 if type(off) not in (list, tuple):
5727 raise ValueError("bad type: 'off'")
5728 s = set(off).difference(ocgs)
5729 if s != set():
5730 raise ValueError("bad OCGs in 'off': %s" % s)
5731
5732 if locked:
5733 if type(locked) not in (list, tuple):
5734 raise ValueError("bad type: 'locked'")
5735 s = set(locked).difference(ocgs)
5736 if s != set():
5737 raise ValueError("bad OCGs in 'locked': %s" % s)
5738
5739 if rbgroups:
5740 if type(rbgroups) not in (list, tuple):
5741 raise ValueError("bad type: 'rbgroups'")
5742 for x in rbgroups:
5743 if not type(x) in (list, tuple):
5744 raise ValueError("bad RBGroup '%s'" % x)
5745 s = set(x).difference(ocgs)
5746 if s != set():
5747 raise ValueError("bad OCGs in RBGroup: %s" % s)
5748
5749 if basestate:
5750 basestate = str(basestate).upper()
5751 if basestate == "UNCHANGED":
5752 basestate = "Unchanged"
5753 if basestate not in ("ON", "OFF", "Unchanged"):
5754 raise ValueError("bad 'basestate'")
5755 pdf = _as_pdf_document(self)
5756 ocp = mupdf.pdf_dict_getl(
5757 mupdf.pdf_trailer( pdf),
5758 PDF_NAME('Root'),
5759 PDF_NAME('OCProperties'),
5760 )
5761 if not ocp.m_internal:
5762 return
5763 if config == -1:
5764 obj = mupdf.pdf_dict_get( ocp, PDF_NAME('D'))
5765 else:
5766 obj = mupdf.pdf_array_get(
5767 mupdf.pdf_dict_get( ocp, PDF_NAME('Configs')),
5768 config,
5769 )
5770 if not obj.m_internal:
5771 raise ValueError( MSG_BAD_OC_CONFIG)
5772 JM_set_ocg_arrays( obj, basestate, on, off, rbgroups, locked)
5773 mupdf.ll_pdf_read_ocg( pdf.m_internal)
5774
5775 def set_layer_ui_config(self, number, action=0):
5776 """Set / unset OC intent configuration."""
5777 # The user might have given the name instead of sequence number,
5778 # so select by that name and continue with corresp. number
5779 if isinstance(number, str):
5780 select = [ui["number"] for ui in self.layer_ui_configs() if ui["text"] == number]
5781 if select == []:
5782 raise ValueError(f"bad OCG '{number}'.")
5783 number = select[0] # this is the number for the name
5784 pdf = _as_pdf_document(self)
5785 if action == 1:
5786 mupdf.pdf_toggle_layer_config_ui(pdf, number)
5787 elif action == 2:
5788 mupdf.pdf_deselect_layer_config_ui(pdf, number)
5789 else:
5790 mupdf.pdf_select_layer_config_ui(pdf, number)
5791
5792 def set_markinfo(self, markinfo: dict) -> bool:
5793 """Set the PDF MarkInfo values."""
5794 xref = self.pdf_catalog()
5795 if xref == 0:
5796 raise ValueError("not a PDF")
5797 if not markinfo or not isinstance(markinfo, dict):
5798 return False
5799 valid = {"Marked": False, "UserProperties": False, "Suspects": False}
5800
5801 if not set(valid.keys()).issuperset(markinfo.keys()):
5802 badkeys = f"bad MarkInfo key(s): {set(markinfo.keys()).difference(valid.keys())}"
5803 raise ValueError(badkeys)
5804 pdfdict = "<<"
5805 valid.update(markinfo)
5806 for key, value in valid.items():
5807 value=str(value).lower()
5808 if value not in ("true", "false"):
5809 raise ValueError(f"bad key value '{key}': '{value}'")
5810 pdfdict += f"/{key} {value}"
5811 pdfdict += ">>"
5812 self.xref_set_key(xref, "MarkInfo", pdfdict)
5813 return True
5814
5815 def set_pagelayout(self, pagelayout: str):
5816 """Set the PDF PageLayout value."""
5817 valid = ("SinglePage", "OneColumn", "TwoColumnLeft", "TwoColumnRight", "TwoPageLeft", "TwoPageRight")
5818 xref = self.pdf_catalog()
5819 if xref == 0:
5820 raise ValueError("not a PDF")
5821 if not pagelayout:
5822 raise ValueError("bad PageLayout value")
5823 if pagelayout[0] == "/":
5824 pagelayout = pagelayout[1:]
5825 for v in valid:
5826 if pagelayout.lower() == v.lower():
5827 self.xref_set_key(xref, "PageLayout", f"/{v}")
5828 return True
5829 raise ValueError("bad PageLayout value")
5830
5831 def set_pagemode(self, pagemode: str):
5832 """Set the PDF PageMode value."""
5833 valid = ("UseNone", "UseOutlines", "UseThumbs", "FullScreen", "UseOC", "UseAttachments")
5834 xref = self.pdf_catalog()
5835 if xref == 0:
5836 raise ValueError("not a PDF")
5837 if not pagemode:
5838 raise ValueError("bad PageMode value")
5839 if pagemode[0] == "/":
5840 pagemode = pagemode[1:]
5841 for v in valid:
5842 if pagemode.lower() == v.lower():
5843 self.xref_set_key(xref, "PageMode", f"/{v}")
5844 return True
5845 raise ValueError("bad PageMode value")
5846
5847 def set_xml_metadata(self, metadata):
5848 """Store XML document level metadata."""
5849 if self.is_closed or self.is_encrypted:
5850 raise ValueError("document closed or encrypted")
5851 pdf = _as_pdf_document(self)
5852 root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
5853 if not root.m_internal:
5854 RAISEPY( MSG_BAD_PDFROOT, JM_Exc_FileDataError)
5855 res = mupdf.fz_new_buffer_from_copied_data( metadata.encode('utf-8'))
5856 xml = mupdf.pdf_dict_get( root, PDF_NAME('Metadata'))
5857 if xml.m_internal:
5858 JM_update_stream( pdf, xml, res, 0)
5859 else:
5860 xml = mupdf.pdf_add_stream( pdf, res, mupdf.PdfObj(), 0)
5861 mupdf.pdf_dict_put( xml, PDF_NAME('Type'), PDF_NAME('Metadata'))
5862 mupdf.pdf_dict_put( xml, PDF_NAME('Subtype'), PDF_NAME('XML'))
5863 mupdf.pdf_dict_put( root, PDF_NAME('Metadata'), xml)
5864
5865 def switch_layer(self, config, as_default=0):
5866 """Activate an OC layer."""
5867 pdf = _as_pdf_document(self)
5868 cfgs = mupdf.pdf_dict_getl(
5869 mupdf.pdf_trailer( pdf),
5870 PDF_NAME('Root'),
5871 PDF_NAME('OCProperties'),
5872 PDF_NAME('Configs')
5873 )
5874 if not mupdf.pdf_is_array( cfgs) or not mupdf.pdf_array_len( cfgs):
5875 if config < 1:
5876 return
5877 raise ValueError( MSG_BAD_OC_LAYER)
5878 if config < 0:
5879 return
5880 mupdf.pdf_select_layer_config( pdf, config)
5881 if as_default:
5882 mupdf.pdf_set_layer_config_as_default( pdf)
5883 mupdf.ll_pdf_read_ocg( pdf.m_internal)
5884
5885 def update_object(self, xref, text, page=None):
5886 """Replace object definition source."""
5887 if self.is_closed or self.is_encrypted:
5888 raise ValueError("document closed or encrypted")
5889 pdf = _as_pdf_document(self)
5890 xreflen = mupdf.pdf_xref_len(pdf)
5891 if not _INRANGE(xref, 1, xreflen-1):
5892 RAISEPY("bad xref", MSG_BAD_XREF)
5893 ENSURE_OPERATION(pdf)
5894 # create new object with passed-in string
5895 new_obj = JM_pdf_obj_from_str(pdf, text)
5896 mupdf.pdf_update_object(pdf, xref, new_obj)
5897 if page:
5898 JM_refresh_links( _as_pdf_page(page))
5899
5900 def update_stream(self, xref=0, stream=None, new=1, compress=1):
5901 """Replace xref stream part."""
5902 if self.is_closed or self.is_encrypted:
5903 raise ValueError("document closed or encrypted")
5904 pdf = _as_pdf_document(self)
5905 xreflen = mupdf.pdf_xref_len(pdf)
5906 if xref < 1 or xref > xreflen:
5907 raise ValueError( MSG_BAD_XREF)
5908 # get the object
5909 obj = mupdf.pdf_new_indirect(pdf, xref, 0)
5910 if not mupdf.pdf_is_dict(obj):
5911 raise ValueError( MSG_IS_NO_DICT)
5912 res = JM_BufferFromBytes(stream)
5913 if not res.m_internal:
5914 raise TypeError( MSG_BAD_BUFFER)
5915 JM_update_stream(pdf, obj, res, compress)
5916 pdf.dirty = 1
5917
5918 @property
5919 def version_count(self):
5920 '''
5921 Count versions of PDF document.
5922 '''
5923 pdf = _as_pdf_document(self, required=0)
5924 if pdf.m_internal:
5925 return mupdf.pdf_count_versions(pdf)
5926 return 0
5927
5928 def write(
5929 self,
5930 garbage=False,
5931 clean=False,
5932 deflate=False,
5933 deflate_images=False,
5934 deflate_fonts=False,
5935 incremental=False,
5936 ascii=False,
5937 expand=False,
5938 linear=False,
5939 no_new_id=False,
5940 appearance=False,
5941 pretty=False,
5942 encryption=1,
5943 permissions=4095,
5944 owner_pw=None,
5945 user_pw=None,
5946 preserve_metadata=1,
5947 use_objstms=0,
5948 compression_effort=0,
5949 ):
5950 from io import BytesIO
5951 bio = BytesIO()
5952 self.save(
5953 bio,
5954 garbage=garbage,
5955 clean=clean,
5956 no_new_id=no_new_id,
5957 appearance=appearance,
5958 deflate=deflate,
5959 deflate_images=deflate_images,
5960 deflate_fonts=deflate_fonts,
5961 incremental=incremental,
5962 ascii=ascii,
5963 expand=expand,
5964 linear=linear,
5965 pretty=pretty,
5966 encryption=encryption,
5967 permissions=permissions,
5968 owner_pw=owner_pw,
5969 user_pw=user_pw,
5970 preserve_metadata=preserve_metadata,
5971 use_objstms=use_objstms,
5972 compression_effort=compression_effort,
5973 )
5974 return bio.getvalue()
5975
5976 @property
5977 def xref(self):
5978 """PDF xref number of page."""
5979 CheckParent(self)
5980 return self.parent.page_xref(self.number)
5981
5982 def xref_get_key(self, xref, key):
5983 """Get PDF dict key value of object at 'xref'."""
5984 pdf = _as_pdf_document(self)
5985 xreflen = mupdf.pdf_xref_len(pdf)
5986 if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
5987 raise ValueError( MSG_BAD_XREF)
5988 if xref > 0:
5989 obj = mupdf.pdf_load_object(pdf, xref)
5990 else:
5991 obj = mupdf.pdf_trailer(pdf)
5992 if not obj.m_internal:
5993 return ("null", "null")
5994 subobj = mupdf.pdf_dict_getp(obj, key)
5995 if not subobj.m_internal:
5996 return ("null", "null")
5997 text = None
5998 if mupdf.pdf_is_indirect(subobj):
5999 type = "xref"
6000 text = "%i 0 R" % mupdf.pdf_to_num(subobj)
6001 elif mupdf.pdf_is_array(subobj):
6002 type = "array"
6003 elif mupdf.pdf_is_dict(subobj):
6004 type = "dict"
6005 elif mupdf.pdf_is_int(subobj):
6006 type = "int"
6007 text = "%i" % mupdf.pdf_to_int(subobj)
6008 elif mupdf.pdf_is_real(subobj):
6009 type = "float"
6010 elif mupdf.pdf_is_null(subobj):
6011 type = "null"
6012 text = "null"
6013 elif mupdf.pdf_is_bool(subobj):
6014 type = "bool"
6015 if mupdf.pdf_to_bool(subobj):
6016 text = "true"
6017 else:
6018 text = "false"
6019 elif mupdf.pdf_is_name(subobj):
6020 type = "name"
6021 text = "/%s" % mupdf.pdf_to_name(subobj)
6022 elif mupdf.pdf_is_string(subobj):
6023 type = "string"
6024 text = JM_UnicodeFromStr(mupdf.pdf_to_text_string(subobj))
6025 else:
6026 type = "unknown"
6027 if text is None:
6028 res = JM_object_to_buffer(subobj, 1, 0)
6029 text = JM_UnicodeFromBuffer(res)
6030 return (type, text)
6031
6032 def xref_get_keys(self, xref):
6033 """Get the keys of PDF dict object at 'xref'. Use -1 for the PDF trailer."""
6034 pdf = _as_pdf_document(self)
6035 xreflen = mupdf.pdf_xref_len( pdf)
6036 if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
6037 raise ValueError( MSG_BAD_XREF)
6038 if xref > 0:
6039 obj = mupdf.pdf_load_object( pdf, xref)
6040 else:
6041 obj = mupdf.pdf_trailer( pdf)
6042 n = mupdf.pdf_dict_len( obj)
6043 rc = []
6044 if n == 0:
6045 return rc
6046 for i in range(n):
6047 key = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( obj, i))
6048 rc.append(key)
6049 return rc
6050
6051 def xref_is_font(self, xref):
6052 """Check if xref is a font object."""
6053 if self.is_closed or self.is_encrypted:
6054 raise ValueError("document closed or encrypted")
6055 if self.xref_get_key(xref, "Type")[1] == "/Font":
6056 return True
6057 return False
6058
6059 def xref_is_image(self, xref):
6060 """Check if xref is an image object."""
6061 if self.is_closed or self.is_encrypted:
6062 raise ValueError("document closed or encrypted")
6063 if self.xref_get_key(xref, "Subtype")[1] == "/Image":
6064 return True
6065 return False
6066
6067 def xref_is_stream(self, xref=0):
6068 """Check if xref is a stream object."""
6069 pdf = _as_pdf_document(self, required=0)
6070 if not pdf.m_internal:
6071 return False # not a PDF
6072 return bool(mupdf.pdf_obj_num_is_stream(pdf, xref))
6073
6074 def xref_is_xobject(self, xref):
6075 """Check if xref is a form xobject."""
6076 if self.is_closed or self.is_encrypted:
6077 raise ValueError("document closed or encrypted")
6078 if self.xref_get_key(xref, "Subtype")[1] == "/Form":
6079 return True
6080 return False
6081
6082 def xref_length(self):
6083 """Get length of xref table."""
6084 xreflen = 0
6085 pdf = _as_pdf_document(self, required=0)
6086 if pdf.m_internal:
6087 xreflen = mupdf.pdf_xref_len(pdf)
6088 return xreflen
6089
6090 def xref_object(self, xref, compressed=0, ascii=0):
6091 """Get xref object source as a string."""
6092 if self.is_closed:
6093 raise ValueError("document closed")
6094 if g_use_extra:
6095 ret = extra.xref_object( self.this, xref, compressed, ascii)
6096 return ret
6097 pdf = _as_pdf_document(self)
6098 xreflen = mupdf.pdf_xref_len(pdf)
6099 if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
6100 raise ValueError( MSG_BAD_XREF)
6101 if xref > 0:
6102 obj = mupdf.pdf_load_object(pdf, xref)
6103 else:
6104 obj = mupdf.pdf_trailer(pdf)
6105 res = JM_object_to_buffer(mupdf.pdf_resolve_indirect(obj), compressed, ascii)
6106 text = JM_EscapeStrFromBuffer(res)
6107 return text
6108
6109 def xref_set_key(self, xref, key, value):
6110 """Set the value of a PDF dictionary key."""
6111 if self.is_closed:
6112 raise ValueError("document closed")
6113
6114 if not key or not isinstance(key, str) or INVALID_NAME_CHARS.intersection(key) not in (set(), {"/"}):
6115 raise ValueError("bad 'key'")
6116 if not isinstance(value, str) or not value or value[0] == "/" and INVALID_NAME_CHARS.intersection(value[1:]) != set():
6117 raise ValueError("bad 'value'")
6118
6119 pdf = _as_pdf_document(self)
6120 xreflen = mupdf.pdf_xref_len(pdf)
6121 #if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
6122 # THROWMSG("bad xref")
6123 #if len(value) == 0:
6124 # THROWMSG("bad 'value'")
6125 #if len(key) == 0:
6126 # THROWMSG("bad 'key'")
6127 if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
6128 raise ValueError( MSG_BAD_XREF)
6129 if xref != -1:
6130 obj = mupdf.pdf_load_object(pdf, xref)
6131 else:
6132 obj = mupdf.pdf_trailer(pdf)
6133 new_obj = JM_set_object_value(obj, key, value)
6134 if not new_obj.m_internal:
6135 return # did not work: skip update
6136 if xref != -1:
6137 mupdf.pdf_update_object(pdf, xref, new_obj)
6138 else:
6139 n = mupdf.pdf_dict_len(new_obj)
6140 for i in range(n):
6141 mupdf.pdf_dict_put(
6142 obj,
6143 mupdf.pdf_dict_get_key(new_obj, i),
6144 mupdf.pdf_dict_get_val(new_obj, i),
6145 )
6146
6147 def xref_stream(self, xref):
6148 """Get decompressed xref stream."""
6149 if self.is_closed or self.is_encrypted:
6150 raise ValueError("document closed or encrypted")
6151 pdf = _as_pdf_document(self)
6152 xreflen = mupdf.pdf_xref_len( pdf)
6153 if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
6154 raise ValueError( MSG_BAD_XREF)
6155 if xref >= 0:
6156 obj = mupdf.pdf_new_indirect( pdf, xref, 0)
6157 else:
6158 obj = mupdf.pdf_trailer( pdf)
6159 r = None
6160 if mupdf.pdf_is_stream( obj):
6161 res = mupdf.pdf_load_stream_number( pdf, xref)
6162 r = JM_BinFromBuffer( res)
6163 return r
6164
6165 def xref_stream_raw(self, xref):
6166 """Get xref stream without decompression."""
6167 if self.is_closed or self.is_encrypted:
6168 raise ValueError("document closed or encrypted")
6169 pdf = _as_pdf_document(self)
6170 xreflen = mupdf.pdf_xref_len( pdf)
6171 if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
6172 raise ValueError( MSG_BAD_XREF)
6173 if xref >= 0:
6174 obj = mupdf.pdf_new_indirect( pdf, xref, 0)
6175 else:
6176 obj = mupdf.pdf_trailer( pdf)
6177 r = None
6178 if mupdf.pdf_is_stream( obj):
6179 res = mupdf.pdf_load_raw_stream_number( pdf, xref)
6180 r = JM_BinFromBuffer( res)
6181 return r
6182
6183 def xref_xml_metadata(self):
6184 """Get xref of document XML metadata."""
6185 pdf = _as_pdf_document(self)
6186 root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
6187 if not root.m_internal:
6188 RAISEPY( MSG_BAD_PDFROOT, JM_Exc_FileDataError)
6189 xml = mupdf.pdf_dict_get( root, PDF_NAME('Metadata'))
6190 xref = 0
6191 if xml.m_internal:
6192 xref = mupdf.pdf_to_num( xml)
6193 return xref
6194
6195 __slots__ = ('this', 'page_count2', 'this_is_pdf', '__dict__')
6196
6197 outline = property(lambda self: self._outline)
6198 tobytes = write
6199 is_stream = xref_is_stream
6200
6201 open = Document
6202
6203
6204 class DocumentWriter:
6205
6206 def __enter__(self):
6207 return self
6208
6209 def __exit__(self, *args):
6210 self.close()
6211
6212 def __init__(self, path, options=''):
6213 if isinstance( path, str):
6214 pass
6215 elif hasattr( path, 'absolute'):
6216 path = str( path)
6217 elif hasattr( path, 'name'):
6218 path = path.name
6219 if isinstance( path, str):
6220 self.this = mupdf.FzDocumentWriter( path, options, mupdf.FzDocumentWriter.PathType_PDF)
6221 else:
6222 # Need to keep the Python JM_new_output_fileptr_Output instance
6223 # alive for the lifetime of this DocumentWriter, otherwise calls
6224 # to virtual methods implemented in Python fail. So we make it a
6225 # member of this DocumentWriter.
6226 #
6227 # Unrelated to this, mupdf.FzDocumentWriter will set
6228 # self._out.m_internal to null because ownership is passed in.
6229 #
6230 out = JM_new_output_fileptr( path)
6231 self.this = mupdf.FzDocumentWriter( out, options, mupdf.FzDocumentWriter.OutputType_PDF)
6232 assert out.m_internal_value() == 0
6233 assert hasattr( self.this, '_out')
6234
6235 def begin_page( self, mediabox):
6236 mediabox2 = JM_rect_from_py(mediabox)
6237 device = mupdf.fz_begin_page( self.this, mediabox2)
6238 device_wrapper = DeviceWrapper( device)
6239 return device_wrapper
6240
6241 def close( self):
6242 mupdf.fz_close_document_writer( self.this)
6243
6244 def end_page( self):
6245 mupdf.fz_end_page( self.this)
6246
6247
6248 class Font:
6249
6250 def __del__(self):
6251 if type(self) is not Font:
6252 return None
6253
6254 def __init__(
6255 self,
6256 fontname=None,
6257 fontfile=None,
6258 fontbuffer=None,
6259 script=0,
6260 language=None,
6261 ordering=-1,
6262 is_bold=0,
6263 is_italic=0,
6264 is_serif=0,
6265 embed=1,
6266 ):
6267
6268 if fontbuffer:
6269 if hasattr(fontbuffer, "getvalue"):
6270 fontbuffer = fontbuffer.getvalue()
6271 elif isinstance(fontbuffer, bytearray):
6272 fontbuffer = bytes(fontbuffer)
6273 if not isinstance(fontbuffer, bytes):
6274 raise ValueError("bad type: 'fontbuffer'")
6275
6276 if isinstance(fontname, str):
6277 fname_lower = fontname.lower()
6278 if "/" in fname_lower or "\\" in fname_lower or "." in fname_lower:
6279 message("Warning: did you mean a fontfile?")
6280
6281 if fname_lower in ("cjk", "china-t", "china-ts"):
6282 ordering = 0
6283
6284 elif fname_lower.startswith("china-s"):
6285 ordering = 1
6286 elif fname_lower.startswith("korea"):
6287 ordering = 3
6288 elif fname_lower.startswith("japan"):
6289 ordering = 2
6290 elif fname_lower in fitz_fontdescriptors.keys():
6291 import pymupdf_fonts # optional fonts
6292 fontbuffer = pymupdf_fonts.myfont(fname_lower) # make a copy
6293 fontname = None # ensure using fontbuffer only
6294 del pymupdf_fonts # remove package again
6295
6296 elif ordering < 0:
6297 fontname = Base14_fontdict.get(fontname, fontname)
6298
6299 lang = mupdf.fz_text_language_from_string(language)
6300 font = JM_get_font(fontname, fontfile,
6301 fontbuffer, script, lang, ordering,
6302 is_bold, is_italic, is_serif, embed)
6303 self.this = font
6304
6305 def __repr__(self):
6306 return "Font('%s')" % self.name
6307
6308 @property
6309 def ascender(self):
6310 """Return the glyph ascender value."""
6311 return mupdf.fz_font_ascender(self.this)
6312
6313 @property
6314 def bbox(self):
6315 return self.this.fz_font_bbox()
6316
6317 @property
6318 def buffer(self):
6319 buffer_ = mupdf.FzBuffer( mupdf.ll_fz_keep_buffer( self.this.m_internal.buffer))
6320 return mupdf.fz_buffer_extract_copy( buffer_)
6321
6322 def char_lengths(self, text, fontsize=11, language=None, script=0, wmode=0, small_caps=0):
6323 """Return tuple of char lengths of unicode 'text' under a fontsize."""
6324 lang = mupdf.fz_text_language_from_string(language)
6325 rc = []
6326 for ch in text:
6327 c = ord(ch)
6328 if small_caps:
6329 gid = mupdf.fz_encode_character_sc(self.this, c)
6330 if gid >= 0:
6331 font = self.this
6332 else:
6333 gid, font = mupdf.fz_encode_character_with_fallback(self.this, c, script, lang)
6334 rc.append(fontsize * mupdf.fz_advance_glyph(font, gid, wmode))
6335 return rc
6336
6337 @property
6338 def descender(self):
6339 """Return the glyph descender value."""
6340 return mupdf.fz_font_descender(self.this)
6341
6342 @property
6343 def flags(self):
6344 f = mupdf.ll_fz_font_flags(self.this.m_internal)
6345 if not f:
6346 return
6347 assert isinstance( f, mupdf.fz_font_flags_t)
6348 #log( '{=f}')
6349 if mupdf_cppyy:
6350 # cppyy includes remaining higher bits.
6351 v = [f.is_mono]
6352 def b(bits):
6353 ret = v[0] & ((1 << bits)-1)
6354 v[0] = v[0] >> bits
6355 return ret
6356 is_mono = b(1)
6357 is_serif = b(1)
6358 is_bold = b(1)
6359 is_italic = b(1)
6360 ft_substitute = b(1)
6361 ft_stretch = b(1)
6362 fake_bold = b(1)
6363 fake_italic = b(1)
6364 has_opentype = b(1)
6365 invalid_bbox = b(1)
6366 cjk_lang = b(1)
6367 embed = b(1)
6368 never_embed = b(1)
6369 return {
6370 "mono": is_mono if mupdf_cppyy else f.is_mono,
6371 "serif": is_serif if mupdf_cppyy else f.is_serif,
6372 "bold": is_bold if mupdf_cppyy else f.is_bold,
6373 "italic": is_italic if mupdf_cppyy else f.is_italic,
6374 "substitute": ft_substitute if mupdf_cppyy else f.ft_substitute,
6375 "stretch": ft_stretch if mupdf_cppyy else f.ft_stretch,
6376 "fake-bold": fake_bold if mupdf_cppyy else f.fake_bold,
6377 "fake-italic": fake_italic if mupdf_cppyy else f.fake_italic,
6378 "opentype": has_opentype if mupdf_cppyy else f.has_opentype,
6379 "invalid-bbox": invalid_bbox if mupdf_cppyy else f.invalid_bbox,
6380 'cjk': cjk_lang if mupdf_cppyy else f.cjk,
6381 'cjk-lang': cjk_lang if mupdf_cppyy else f.cjk_lang,
6382 'embed': embed if mupdf_cppyy else f.embed,
6383 'never-embed': never_embed if mupdf_cppyy else f.never_embed,
6384 }
6385
6386 def glyph_advance(self, chr_, language=None, script=0, wmode=0, small_caps=0):
6387 """Return the glyph width of a unicode (font size 1)."""
6388 lang = mupdf.fz_text_language_from_string(language)
6389 if small_caps:
6390 gid = mupdf.fz_encode_character_sc(self.this, chr_)
6391 if gid >= 0:
6392 font = self.this
6393 else:
6394 gid, font = mupdf.fz_encode_character_with_fallback(self.this, chr_, script, lang)
6395 return mupdf.fz_advance_glyph(font, gid, wmode)
6396
6397 def glyph_bbox(self, char, language=None, script=0, small_caps=0):
6398 """Return the glyph bbox of a unicode (font size 1)."""
6399 lang = mupdf.fz_text_language_from_string(language)
6400 if small_caps:
6401 gid = mupdf.fz_encode_character_sc( self.this, char)
6402 if gid >= 0:
6403 font = self.this
6404 else:
6405 gid, font = mupdf.fz_encode_character_with_fallback( self.this, char, script, lang)
6406 return Rect(mupdf.fz_bound_glyph( font, gid, mupdf.FzMatrix()))
6407
6408 @property
6409 def glyph_count(self):
6410 return self.this.m_internal.glyph_count
6411
6412 def glyph_name_to_unicode(self, name):
6413 """Return the unicode for a glyph name."""
6414 return glyph_name_to_unicode(name)
6415
6416 def has_glyph(self, chr, language=None, script=0, fallback=0, small_caps=0):
6417 """Check whether font has a glyph for this unicode."""
6418 if fallback:
6419 lang = mupdf.fz_text_language_from_string(language)
6420 gid, font = mupdf.fz_encode_character_with_fallback(self.this, chr, script, lang)
6421 else:
6422 if small_caps:
6423 gid = mupdf.fz_encode_character_sc(self.this, chr)
6424 else:
6425 gid = mupdf.fz_encode_character(self.this, chr)
6426 return gid
6427
6428 @property
6429 def is_bold(self):
6430 return mupdf.fz_font_is_bold( self.this)
6431
6432 @property
6433 def is_italic(self):
6434 return mupdf.fz_font_is_italic( self.this)
6435
6436 @property
6437 def is_monospaced(self):
6438 return mupdf.fz_font_is_monospaced( self.this)
6439
6440 @property
6441 def is_serif(self):
6442 return mupdf.fz_font_is_serif( self.this)
6443
6444 @property
6445 def is_writable(self):
6446 return True # see pymupdf commit ef4056ee4da2
6447 font = self.this
6448 flags = mupdf.ll_fz_font_flags(font.m_internal)
6449 if mupdf_cppyy:
6450 # cppyy doesn't handle bitfields correctly.
6451 import cppyy
6452 ft_substitute = cppyy.gbl.mupdf_mfz_font_flags_ft_substitute( flags)
6453 else:
6454 ft_substitute = flags.ft_substitute
6455
6456 if ( mupdf.ll_fz_font_t3_procs(font.m_internal)
6457 or ft_substitute
6458 or not mupdf.pdf_font_writing_supported(font)
6459 ):
6460 return False
6461 return True
6462
6463 @property
6464 def name(self):
6465 ret = mupdf.fz_font_name(self.this)
6466 #log( '{ret=}')
6467 return ret
6468
6469 def text_length(self, text, fontsize=11, language=None, script=0, wmode=0, small_caps=0):
6470 """Return length of unicode 'text' under a fontsize."""
6471 thisfont = self.this
6472 lang = mupdf.fz_text_language_from_string(language)
6473 rc = 0
6474 if not isinstance(text, str):
6475 raise TypeError( MSG_BAD_TEXT)
6476 for ch in text:
6477 c = ord(ch)
6478 if small_caps:
6479 gid = mupdf.fz_encode_character_sc(thisfont, c)
6480 if gid >= 0:
6481 font = thisfont
6482 else:
6483 gid, font = mupdf.fz_encode_character_with_fallback(thisfont, c, script, lang)
6484 rc += mupdf.fz_advance_glyph(font, gid, wmode)
6485 rc *= fontsize
6486 return rc
6487
6488 def unicode_to_glyph_name(self, ch):
6489 """Return the glyph name for a unicode."""
6490 return unicode_to_glyph_name(ch)
6491
6492 def valid_codepoints(self):
6493 '''
6494 Returns sorted list of valid unicodes of a fz_font.
6495 '''
6496 ucs_gids = mupdf.fz_enumerate_font_cmap2(self.this)
6497 ucss = [i.ucs for i in ucs_gids]
6498 ucss_unique = set(ucss)
6499 ucss_unique_sorted = sorted(ucss_unique)
6500 return ucss_unique_sorted
6501
6502
6503 class Graftmap:
6504
6505 def __del__(self):
6506 if not type(self) is Graftmap:
6507 return
6508 self.thisown = False
6509
6510 def __init__(self, doc):
6511 dst = _as_pdf_document(doc)
6512 map_ = mupdf.pdf_new_graft_map(dst)
6513 self.this = map_
6514 self.thisown = True
6515
6516
6517 class Link:
6518 def __del__(self):
6519 self._erase()
6520
6521 def __init__( self, this):
6522 assert isinstance( this, mupdf.FzLink)
6523 self.this = this
6524
6525 def __repr__(self):
6526 CheckParent(self)
6527 return "link on " + str(self.parent)
6528
6529 def __str__(self):
6530 CheckParent(self)
6531 return "link on " + str(self.parent)
6532
6533 def _border(self, doc, xref):
6534 pdf = _as_pdf_document(doc, required=0)
6535 if not pdf.m_internal:
6536 return
6537 link_obj = mupdf.pdf_new_indirect(pdf, xref, 0)
6538 if not link_obj.m_internal:
6539 return
6540 b = JM_annot_border(link_obj)
6541 return b
6542
6543 def _colors(self, doc, xref):
6544 pdf = _as_pdf_document(doc, required=0)
6545 if not pdf.m_internal:
6546 return
6547 link_obj = mupdf.pdf_new_indirect( pdf, xref, 0)
6548 if not link_obj.m_internal:
6549 raise ValueError( MSG_BAD_XREF)
6550 b = JM_annot_colors( link_obj)
6551 return b
6552
6553 def _erase(self):
6554 self.parent = None
6555 self.thisown = False
6556
6557 def _setBorder(self, border, doc, xref):
6558 pdf = _as_pdf_document(doc, required=0)
6559 if not pdf.m_internal:
6560 return
6561 link_obj = mupdf.pdf_new_indirect(pdf, xref, 0)
6562 if not link_obj.m_internal:
6563 return
6564 b = JM_annot_set_border(border, pdf, link_obj)
6565 return b
6566
6567 @property
6568 def border(self):
6569 return self._border(self.parent.parent.this, self.xref)
6570
6571 @property
6572 def colors(self):
6573 return self._colors(self.parent.parent.this, self.xref)
6574
6575 @property
6576 def dest(self):
6577 """Create link destination details."""
6578 if hasattr(self, "parent") and self.parent is None:
6579 raise ValueError("orphaned object: parent is None")
6580 if self.parent.parent.is_closed or self.parent.parent.is_encrypted:
6581 raise ValueError("document closed or encrypted")
6582 doc = self.parent.parent
6583
6584 if self.is_external or self.uri.startswith("#"):
6585 uri = None
6586 else:
6587 uri = doc.resolve_link(self.uri)
6588
6589 return linkDest(self, uri, doc)
6590
6591 @property
6592 def flags(self)->int:
6593 CheckParent(self)
6594 doc = self.parent.parent
6595 if not doc.is_pdf:
6596 return 0
6597 f = doc.xref_get_key(self.xref, "F")
6598 if f[1] != "null":
6599 return int(f[1])
6600 return 0
6601
6602 @property
6603 def is_external(self):
6604 """Flag the link as external."""
6605 CheckParent(self)
6606 if g_use_extra:
6607 return extra.Link_is_external( self.this)
6608 this_link = self.this
6609 if not this_link.m_internal or not this_link.m_internal.uri:
6610 return False
6611 return bool( mupdf.fz_is_external_link( this_link.m_internal.uri))
6612
6613 @property
6614 def next(self):
6615 """Next link."""
6616 if not self.this.m_internal:
6617 return None
6618 CheckParent(self)
6619 if 0 and g_use_extra:
6620 val = extra.Link_next( self.this)
6621 else:
6622 val = self.this.next()
6623 if not val.m_internal:
6624 return None
6625 val = Link( val)
6626 if val:
6627 val.thisown = True
6628 val.parent = self.parent # copy owning page from prev link
6629 val.parent._annot_refs[id(val)] = val
6630 if self.xref > 0: # prev link has an xref
6631 link_xrefs = [x[0] for x in self.parent.annot_xrefs() if x[1] == mupdf.PDF_ANNOT_LINK]
6632 link_ids = [x[2] for x in self.parent.annot_xrefs() if x[1] == mupdf.PDF_ANNOT_LINK]
6633 idx = link_xrefs.index(self.xref)
6634 val.xref = link_xrefs[idx + 1]
6635 val.id = link_ids[idx + 1]
6636 else:
6637 val.xref = 0
6638 val.id = ""
6639 return val
6640
6641 @property
6642 def rect(self):
6643 """Rectangle ('hot area')."""
6644 CheckParent(self)
6645 # utils.py:getLinkDict() appears to expect exceptions from us, so we
6646 # ensure that we raise on error.
6647 if self.this is None or not self.this.m_internal:
6648 raise Exception( 'self.this.m_internal not available')
6649 val = JM_py_from_rect( self.this.rect())
6650 val = Rect(val)
6651 return val
6652
6653 def set_border(self, border=None, width=0, dashes=None, style=None):
6654 if type(border) is not dict:
6655 border = {"width": width, "style": style, "dashes": dashes}
6656 return self._setBorder(border, self.parent.parent.this, self.xref)
6657
6658 def set_colors(self, colors=None, stroke=None, fill=None):
6659 """Set border colors."""
6660 CheckParent(self)
6661 doc = self.parent.parent
6662 if type(colors) is not dict:
6663 colors = {"fill": fill, "stroke": stroke}
6664 fill = colors.get("fill")
6665 stroke = colors.get("stroke")
6666 if fill is not None:
6667 message("warning: links have no fill color")
6668 if stroke in ([], ()):
6669 doc.xref_set_key(self.xref, "C", "[]")
6670 return
6671 if hasattr(stroke, "__float__"):
6672 stroke = [float(stroke)]
6673 CheckColor(stroke)
6674 assert len(stroke) in (1, 3, 4)
6675 s = f"[{_format_g(stroke)}]"
6676 doc.xref_set_key(self.xref, "C", s)
6677
6678 def set_flags(self, flags):
6679 CheckParent(self)
6680 doc = self.parent.parent
6681 if not doc.is_pdf:
6682 raise ValueError("is no PDF")
6683 if not type(flags) is int:
6684 raise ValueError("bad 'flags' value")
6685 doc.xref_set_key(self.xref, "F", str(flags))
6686 return None
6687
6688 @property
6689 def uri(self):
6690 """Uri string."""
6691 #CheckParent(self)
6692 if g_use_extra:
6693 return extra.link_uri(self.this)
6694 this_link = self.this
6695 return this_link.m_internal.uri if this_link.m_internal else ''
6696
6697 page = -1
6698
6699
6700 class Matrix:
6701
6702 def __abs__(self):
6703 return math.sqrt(sum([c*c for c in self]))
6704
6705 def __add__(self, m):
6706 if hasattr(m, "__float__"):
6707 return Matrix(self.a + m, self.b + m, self.c + m,
6708 self.d + m, self.e + m, self.f + m)
6709 if len(m) != 6:
6710 raise ValueError("Matrix: bad seq len")
6711 return Matrix(self.a + m[0], self.b + m[1], self.c + m[2],
6712 self.d + m[3], self.e + m[4], self.f + m[5])
6713
6714 def __bool__(self):
6715 return not (max(self) == min(self) == 0)
6716
6717 def __eq__(self, mat):
6718 if not hasattr(mat, "__len__"):
6719 return False
6720 return len(mat) == 6 and not (self - mat)
6721
6722 def __getitem__(self, i):
6723 return (self.a, self.b, self.c, self.d, self.e, self.f)[i]
6724
6725 def __init__(self, *args, a=None, b=None, c=None, d=None, e=None, f=None):
6726 """
6727 Matrix() - all zeros
6728 Matrix(a, b, c, d, e, f)
6729 Matrix(zoom-x, zoom-y) - zoom
6730 Matrix(shear-x, shear-y, 1) - shear
6731 Matrix(degree) - rotate
6732 Matrix(Matrix) - new copy
6733 Matrix(sequence) - from 'sequence'
6734 Matrix(mupdf.FzMatrix) - from MuPDF class wrapper for fz_matrix.
6735
6736 Explicit keyword args a, b, c, d, e, f override any earlier settings if
6737 not None.
6738 """
6739 if not args:
6740 self.a = self.b = self.c = self.d = self.e = self.f = 0.0
6741 elif len(args) > 6:
6742 raise ValueError("Matrix: bad seq len")
6743 elif len(args) == 6: # 6 numbers
6744 self.a, self.b, self.c, self.d, self.e, self.f = map(float, args)
6745 elif len(args) == 1: # either an angle or a sequ
6746 if isinstance(args[0], mupdf.FzMatrix):
6747 self.a = args[0].a
6748 self.b = args[0].b
6749 self.c = args[0].c
6750 self.d = args[0].d
6751 self.e = args[0].e
6752 self.f = args[0].f
6753 elif hasattr(args[0], "__float__"):
6754 theta = math.radians(args[0])
6755 c_ = round(math.cos(theta), 8)
6756 s_ = round(math.sin(theta), 8)
6757 self.a = self.d = c_
6758 self.b = s_
6759 self.c = -s_
6760 self.e = self.f = 0.0
6761 else:
6762 self.a, self.b, self.c, self.d, self.e, self.f = map(float, args[0])
6763 elif len(args) == 2 or len(args) == 3 and args[2] == 0:
6764 self.a, self.b, self.c, self.d, self.e, self.f = float(args[0]), \
6765 0.0, 0.0, float(args[1]), 0.0, 0.0
6766 elif len(args) == 3 and args[2] == 1:
6767 self.a, self.b, self.c, self.d, self.e, self.f = 1.0, \
6768 float(args[1]), float(args[0]), 1.0, 0.0, 0.0
6769 else:
6770 raise ValueError("Matrix: bad args")
6771
6772 # Override with explicit args if specified.
6773 if a is not None: self.a = a
6774 if b is not None: self.b = b
6775 if c is not None: self.c = c
6776 if d is not None: self.d = d
6777 if e is not None: self.e = e
6778 if f is not None: self.f = f
6779
6780 def __invert__(self):
6781 """Calculate inverted matrix."""
6782 m1 = Matrix()
6783 m1.invert(self)
6784 return m1
6785
6786 def __len__(self):
6787 return 6
6788
6789 def __mul__(self, m):
6790 if hasattr(m, "__float__"):
6791 return Matrix(self.a * m, self.b * m, self.c * m,
6792 self.d * m, self.e * m, self.f * m)
6793 m1 = Matrix(1,1)
6794 return m1.concat(self, m)
6795
6796 def __neg__(self):
6797 return Matrix(-self.a, -self.b, -self.c, -self.d, -self.e, -self.f)
6798
6799 def __nonzero__(self):
6800 return not (max(self) == min(self) == 0)
6801
6802 def __pos__(self):
6803 return Matrix(self)
6804
6805 def __repr__(self):
6806 return "Matrix" + str(tuple(self))
6807
6808 def __setitem__(self, i, v):
6809 v = float(v)
6810 if i == 0: self.a = v
6811 elif i == 1: self.b = v
6812 elif i == 2: self.c = v
6813 elif i == 3: self.d = v
6814 elif i == 4: self.e = v
6815 elif i == 5: self.f = v
6816 else:
6817 raise IndexError("index out of range")
6818 return
6819
6820 def __sub__(self, m):
6821 if hasattr(m, "__float__"):
6822 return Matrix(self.a - m, self.b - m, self.c - m,
6823 self.d - m, self.e - m, self.f - m)
6824 if len(m) != 6:
6825 raise ValueError("Matrix: bad seq len")
6826 return Matrix(self.a - m[0], self.b - m[1], self.c - m[2],
6827 self.d - m[3], self.e - m[4], self.f - m[5])
6828
6829 def __truediv__(self, m):
6830 if hasattr(m, "__float__"):
6831 return Matrix(self.a * 1./m, self.b * 1./m, self.c * 1./m,
6832 self.d * 1./m, self.e * 1./m, self.f * 1./m)
6833 m1 = util_invert_matrix(m)[1]
6834 if not m1:
6835 raise ZeroDivisionError("matrix not invertible")
6836 m2 = Matrix(1,1)
6837 return m2.concat(self, m1)
6838
6839 def concat(self, one, two):
6840 """Multiply two matrices and replace current one."""
6841 if not len(one) == len(two) == 6:
6842 raise ValueError("Matrix: bad seq len")
6843 self.a, self.b, self.c, self.d, self.e, self.f = util_concat_matrix(one, two)
6844 return self
6845
6846 def invert(self, src=None):
6847 """Calculate the inverted matrix. Return 0 if successful and replace
6848 current one. Else return 1 and do nothing.
6849 """
6850 if src is None:
6851 dst = util_invert_matrix(self)
6852 else:
6853 dst = util_invert_matrix(src)
6854 if dst[0] == 1:
6855 return 1
6856 self.a, self.b, self.c, self.d, self.e, self.f = dst[1]
6857 return 0
6858
6859 @property
6860 def is_rectilinear(self):
6861 """True if rectangles are mapped to rectangles."""
6862 return (abs(self.b) < EPSILON and abs(self.c) < EPSILON) or \
6863 (abs(self.a) < EPSILON and abs(self.d) < EPSILON)
6864
6865 def prerotate(self, theta):
6866 """Calculate pre rotation and replace current matrix."""
6867 theta = float(theta)
6868 while theta < 0: theta += 360
6869 while theta >= 360: theta -= 360
6870 if abs(0 - theta) < EPSILON:
6871 pass
6872
6873 elif abs(90.0 - theta) < EPSILON:
6874 a = self.a
6875 b = self.b
6876 self.a = self.c
6877 self.b = self.d
6878 self.c = -a
6879 self.d = -b
6880
6881 elif abs(180.0 - theta) < EPSILON:
6882 self.a = -self.a
6883 self.b = -self.b
6884 self.c = -self.c
6885 self.d = -self.d
6886
6887 elif abs(270.0 - theta) < EPSILON:
6888 a = self.a
6889 b = self.b
6890 self.a = -self.c
6891 self.b = -self.d
6892 self.c = a
6893 self.d = b
6894
6895 else:
6896 rad = math.radians(theta)
6897 s = math.sin(rad)
6898 c = math.cos(rad)
6899 a = self.a
6900 b = self.b
6901 self.a = c * a + s * self.c
6902 self.b = c * b + s * self.d
6903 self.c =-s * a + c * self.c
6904 self.d =-s * b + c * self.d
6905
6906 return self
6907
6908 def prescale(self, sx, sy):
6909 """Calculate pre scaling and replace current matrix."""
6910 sx = float(sx)
6911 sy = float(sy)
6912 self.a *= sx
6913 self.b *= sx
6914 self.c *= sy
6915 self.d *= sy
6916 return self
6917
6918 def preshear(self, h, v):
6919 """Calculate pre shearing and replace current matrix."""
6920 h = float(h)
6921 v = float(v)
6922 a, b = self.a, self.b
6923 self.a += v * self.c
6924 self.b += v * self.d
6925 self.c += h * a
6926 self.d += h * b
6927 return self
6928
6929 def pretranslate(self, tx, ty):
6930 """Calculate pre translation and replace current matrix."""
6931 tx = float(tx)
6932 ty = float(ty)
6933 self.e += tx * self.a + ty * self.c
6934 self.f += tx * self.b + ty * self.d
6935 return self
6936
6937 __inv__ = __invert__
6938 __div__ = __truediv__
6939 norm = __abs__
6940
6941
6942 class IdentityMatrix(Matrix):
6943 """Identity matrix [1, 0, 0, 1, 0, 0]"""
6944
6945 def __hash__(self):
6946 return hash((1,0,0,1,0,0))
6947
6948 def __init__(self):
6949 Matrix.__init__(self, 1.0, 1.0)
6950
6951 def __repr__(self):
6952 return "IdentityMatrix(1.0, 0.0, 0.0, 1.0, 0.0, 0.0)"
6953
6954 def __setattr__(self, name, value):
6955 if name in "ad":
6956 self.__dict__[name] = 1.0
6957 elif name in "bcef":
6958 self.__dict__[name] = 0.0
6959 else:
6960 self.__dict__[name] = value
6961
6962 def checkargs(*args):
6963 raise NotImplementedError("Identity is readonly")
6964
6965 Identity = IdentityMatrix()
6966
6967
6968 class linkDest:
6969 """link or outline destination details"""
6970
6971 def __init__(self, obj, rlink, document=None):
6972 isExt = obj.is_external
6973 isInt = not isExt
6974 self.dest = ""
6975 self.file_spec = ""
6976 self.flags = 0
6977 self.is_map = False
6978 self.is_uri = False
6979 self.kind = LINK_NONE
6980 self.lt = Point(0, 0)
6981 self.named = dict()
6982 self.new_window = ""
6983 self.page = obj.page
6984 self.rb = Point(0, 0)
6985 self.uri = obj.uri
6986
6987 def uri_to_dict(uri):
6988 items = self.uri[1:].split('&')
6989 ret = dict()
6990 for item in items:
6991 eq = item.find('=')
6992 if eq >= 0:
6993 ret[item[:eq]] = item[eq+1:]
6994 else:
6995 ret[item] = None
6996 return ret
6997
6998 def unescape(name):
6999 """Unescape '%AB' substrings to chr(0xAB)."""
7000 split = name.replace("%%", "%25") # take care of escaped '%'
7001 split = split.split("%")
7002 newname = split[0]
7003 for item in split[1:]:
7004 piece = item[:2]
7005 newname += chr(int(piece, base=16))
7006 newname += item[2:]
7007 return newname
7008
7009 if rlink and not self.uri.startswith("#"):
7010 self.uri = f"#page={rlink[0] + 1}&zoom=0,{_format_g(rlink[1])},{_format_g(rlink[2])}"
7011 if obj.is_external:
7012 self.page = -1
7013 self.kind = LINK_URI
7014 if not self.uri:
7015 self.page = -1
7016 self.kind = LINK_NONE
7017 if isInt and self.uri:
7018 self.uri = self.uri.replace("&zoom=nan", "&zoom=0")
7019 if self.uri.startswith("#"):
7020 self.kind = LINK_GOTO
7021 m = re.match('^#page=([0-9]+)&zoom=([0-9.]+),(-?[0-9.]+),(-?[0-9.]+)$', self.uri)
7022 if m:
7023 self.page = int(m.group(1)) - 1
7024 self.lt = Point(float((m.group(3))), float(m.group(4)))
7025 self.flags = self.flags | LINK_FLAG_L_VALID | LINK_FLAG_T_VALID
7026 else:
7027 m = re.match('^#page=([0-9]+)$', self.uri)
7028 if m:
7029 self.page = int(m.group(1)) - 1
7030 else:
7031 self.kind = LINK_NAMED
7032 m = re.match('^#nameddest=(.*)', self.uri)
7033 assert document
7034 if document and m:
7035 named = unescape(m.group(1))
7036 self.named = document.resolve_names().get(named)
7037 if self.named is None:
7038 # document.resolve_names() does not contain an
7039 # entry for `named` so use an empty dict.
7040 self.named = dict()
7041 self.named['nameddest'] = named
7042 else:
7043 self.named = uri_to_dict(self.uri[1:])
7044 else:
7045 self.kind = LINK_NAMED
7046 self.named = uri_to_dict(self.uri)
7047 if obj.is_external:
7048 if not self.uri:
7049 pass
7050 elif self.uri.startswith("file:"):
7051 self.file_spec = self.uri[5:]
7052 if self.file_spec.startswith("//"):
7053 self.file_spec = self.file_spec[2:]
7054 self.is_uri = False
7055 self.uri = ""
7056 self.kind = LINK_LAUNCH
7057 ftab = self.file_spec.split("#")
7058 if len(ftab) == 2:
7059 if ftab[1].startswith("page="):
7060 self.kind = LINK_GOTOR
7061 self.file_spec = ftab[0]
7062 self.page = int(ftab[1].split("&")[0][5:]) - 1
7063 elif ":" in self.uri:
7064 self.is_uri = True
7065 self.kind = LINK_URI
7066 else:
7067 self.is_uri = True
7068 self.kind = LINK_LAUNCH
7069 assert isinstance(self.named, dict)
7070
7071 class Widget:
7072 '''
7073 Class describing a PDF form field ("widget")
7074 '''
7075
7076 def __init__(self):
7077 self.border_color = None
7078 self.border_style = "S"
7079 self.border_width = 0
7080 self.border_dashes = None
7081 self.choice_values = None # choice fields only
7082 self.rb_parent = None # radio buttons only: xref of owning parent
7083
7084 self.field_name = None # field name
7085 self.field_label = None # field label
7086 self.field_value = None
7087 self.field_flags = 0
7088 self.field_display = 0
7089 self.field_type = 0 # valid range 1 through 7
7090 self.field_type_string = None # field type as string
7091
7092 self.fill_color = None
7093 self.button_caption = None # button caption
7094 self.is_signed = None # True / False if signature
7095 self.text_color = (0, 0, 0)
7096 self.text_font = "Helv"
7097 self.text_fontsize = 0
7098 self.text_maxlen = 0 # text fields only
7099 self.text_format = 0 # text fields only
7100 self._text_da = "" # /DA = default appearance
7101
7102 self.script = None # JavaScript (/A)
7103 self.script_stroke = None # JavaScript (/AA/K)
7104 self.script_format = None # JavaScript (/AA/F)
7105 self.script_change = None # JavaScript (/AA/V)
7106 self.script_calc = None # JavaScript (/AA/C)
7107 self.script_blur = None # JavaScript (/AA/Bl)
7108 self.script_focus = None # JavaScript (/AA/Fo) codespell:ignore
7109
7110 self.rect = None # annot value
7111 self.xref = 0 # annot value
7112
7113 def __repr__(self):
7114 #return "'%s' widget on %s" % (self.field_type_string, str(self.parent))
7115 # No self.parent.
7116 return f'Widget:(field_type={self.field_type_string} script={self.script})'
7117 return "'%s' widget" % (self.field_type_string)
7118
7119 def _adjust_font(self):
7120 """Ensure text_font is from our list and correctly spelled.
7121 """
7122 if not self.text_font:
7123 self.text_font = "Helv"
7124 return
7125 valid_fonts = ("Cour", "TiRo", "Helv", "ZaDb")
7126 for f in valid_fonts:
7127 if self.text_font.lower() == f.lower():
7128 self.text_font = f
7129 return
7130 self.text_font = "Helv"
7131 return
7132
7133 def _checker(self):
7134 """Any widget type checks.
7135 """
7136 if self.field_type not in range(1, 8):
7137 raise ValueError("bad field type")
7138
7139 # if setting a radio button to ON, first set Off all buttons
7140 # in the group - this is not done by MuPDF:
7141 if self.field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON and self.field_value not in (False, "Off") and hasattr(self, "parent"):
7142 # so we are about setting this button to ON/True
7143 # check other buttons in same group and set them to 'Off'
7144 doc = self.parent.parent
7145 kids_type, kids_value = doc.xref_get_key(self.xref, "Parent/Kids")
7146 if kids_type == "array":
7147 xrefs = tuple(map(int, kids_value[1:-1].replace("0 R","").split()))
7148 for xref in xrefs:
7149 if xref != self.xref:
7150 doc.xref_set_key(xref, "AS", "/Off")
7151 # the calling method will now set the intended button to on and
7152 # will find everything prepared for correct functioning.
7153
7154 def _parse_da(self):
7155 """Extract font name, size and color from default appearance string (/DA object).
7156
7157 Equivalent to 'pdf_parse_default_appearance' function in MuPDF's 'pdf-annot.c'.
7158 """
7159 if not self._text_da:
7160 return
7161 font = "Helv"
7162 fsize = 0
7163 col = (0, 0, 0)
7164 dat = self._text_da.split() # split on any whitespace
7165 for i, item in enumerate(dat):
7166 if item == "Tf":
7167 font = dat[i - 2][1:]
7168 fsize = float(dat[i - 1])
7169 dat[i] = dat[i-1] = dat[i-2] = ""
7170 continue
7171 if item == "g": # unicolor text
7172 col = [(float(dat[i - 1]))]
7173 dat[i] = dat[i-1] = ""
7174 continue
7175 if item == "rg": # RGB colored text
7176 col = [float(f) for f in dat[i - 3:i]]
7177 dat[i] = dat[i-1] = dat[i-2] = dat[i-3] = ""
7178 continue
7179 self.text_font = font
7180 self.text_fontsize = fsize
7181 self.text_color = col
7182 self._text_da = ""
7183 return
7184
7185 def _validate(self):
7186 """Validate the class entries.
7187 """
7188 if (self.rect.is_infinite
7189 or self.rect.is_empty
7190 ):
7191 raise ValueError("bad rect")
7192
7193 if not self.field_name:
7194 raise ValueError("field name missing")
7195
7196 if self.field_label == "Unnamed":
7197 self.field_label = None
7198 CheckColor(self.border_color)
7199 CheckColor(self.fill_color)
7200 if not self.text_color:
7201 self.text_color = (0, 0, 0)
7202 CheckColor(self.text_color)
7203
7204 if not self.border_width:
7205 self.border_width = 0
7206
7207 if not self.text_fontsize:
7208 self.text_fontsize = 0
7209
7210 self.border_style = self.border_style.upper()[0:1]
7211
7212 # standardize content of JavaScript entries
7213 btn_type = self.field_type in (
7214 mupdf.PDF_WIDGET_TYPE_BUTTON,
7215 mupdf.PDF_WIDGET_TYPE_CHECKBOX,
7216 mupdf.PDF_WIDGET_TYPE_RADIOBUTTON,
7217 )
7218 if not self.script:
7219 self.script = None
7220 elif type(self.script) is not str:
7221 raise ValueError("script content must be a string")
7222
7223 # buttons cannot have the following script actions
7224 if btn_type or not self.script_calc:
7225 self.script_calc = None
7226 elif type(self.script_calc) is not str:
7227 raise ValueError("script_calc content must be a string")
7228
7229 if btn_type or not self.script_change:
7230 self.script_change = None
7231 elif type(self.script_change) is not str:
7232 raise ValueError("script_change content must be a string")
7233
7234 if btn_type or not self.script_format:
7235 self.script_format = None
7236 elif type(self.script_format) is not str:
7237 raise ValueError("script_format content must be a string")
7238
7239 if btn_type or not self.script_stroke:
7240 self.script_stroke = None
7241 elif type(self.script_stroke) is not str:
7242 raise ValueError("script_stroke content must be a string")
7243
7244 if btn_type or not self.script_blur:
7245 self.script_blur = None
7246 elif type(self.script_blur) is not str:
7247 raise ValueError("script_blur content must be a string")
7248
7249 if btn_type or not self.script_focus:
7250 self.script_focus = None
7251 elif type(self.script_focus) is not str:
7252 raise ValueError("script_focus content must be a string")
7253
7254 self._checker() # any field_type specific checks
7255
7256 def _sync_flags(self):
7257 """Propagate the field flags.
7258
7259 If this widget has a "/Parent", set its field flags and that of all
7260 its /Kids widgets to the value of the current widget.
7261 Only possible for widgets existing in the PDF.
7262
7263 Returns True or False.
7264 """
7265 if not self.xref:
7266 return False # no xref: widget not in the PDF
7267 doc = self.parent.parent # the owning document
7268 assert doc
7269 pdf = _as_pdf_document(doc)
7270 # load underlying PDF object
7271 pdf_widget = mupdf.pdf_load_object(pdf, self.xref)
7272 Parent = mupdf.pdf_dict_get(pdf_widget, PDF_NAME("Parent"))
7273 if not Parent.pdf_is_dict():
7274 return False # no /Parent: nothing to do
7275
7276 # put the field flags value into the parent field flags:
7277 Parent.pdf_dict_put_int(PDF_NAME("Ff"), self.field_flags)
7278
7279 # also put that value into all kids of the Parent
7280 kids = Parent.pdf_dict_get(PDF_NAME("Kids"))
7281 if not kids.pdf_is_array():
7282 message("warning: malformed PDF, Parent has no Kids array")
7283 return False # no /Kids: should never happen!
7284
7285 for i in range(kids.pdf_array_len()): # walk through all kids
7286 # access kid widget, and do some precautionary checks
7287 kid = kids.pdf_array_get(i)
7288 if not kid.pdf_is_dict():
7289 continue
7290 xref = kid.pdf_to_num() # get xref of the kid
7291 if xref == self.xref: # skip self widget
7292 continue
7293 subtype = kid.pdf_dict_get(PDF_NAME("Subtype"))
7294 if not subtype.pdf_to_name() == "Widget":
7295 continue
7296 # put the field flags value into the kid field flags:
7297 kid.pdf_dict_put_int(PDF_NAME("Ff"), self.field_flags)
7298
7299 return True # all done
7300
7301 def button_states(self):
7302 """Return the on/off state names for button widgets.
7303
7304 A button may have 'normal' or 'pressed down' appearances. While the 'Off'
7305 state is usually called like this, the 'On' state is often given a name
7306 relating to the functional context.
7307 """
7308 if self.field_type not in (2, 5):
7309 return None # no button type
7310 if hasattr(self, "parent"): # field already exists on page
7311 doc = self.parent.parent
7312 else:
7313 return
7314 xref = self.xref
7315 states = {"normal": None, "down": None}
7316 APN = doc.xref_get_key(xref, "AP/N")
7317 if APN[0] == "dict":
7318 nstates = []
7319 APN = APN[1][2:-2]
7320 apnt = APN.split("/")[1:]
7321 for x in apnt:
7322 nstates.append(x.split()[0])
7323 states["normal"] = nstates
7324 if APN[0] == "xref":
7325 nstates = []
7326 nxref = int(APN[1].split(" ")[0])
7327 APN = doc.xref_object(nxref)
7328 apnt = APN.split("/")[1:]
7329 for x in apnt:
7330 nstates.append(x.split()[0])
7331 states["normal"] = nstates
7332 APD = doc.xref_get_key(xref, "AP/D")
7333 if APD[0] == "dict":
7334 dstates = []
7335 APD = APD[1][2:-2]
7336 apdt = APD.split("/")[1:]
7337 for x in apdt:
7338 dstates.append(x.split()[0])
7339 states["down"] = dstates
7340 if APD[0] == "xref":
7341 dstates = []
7342 dxref = int(APD[1].split(" ")[0])
7343 APD = doc.xref_object(dxref)
7344 apdt = APD.split("/")[1:]
7345 for x in apdt:
7346 dstates.append(x.split()[0])
7347 states["down"] = dstates
7348 return states
7349
7350 @property
7351 def next(self):
7352 return self._annot.next
7353
7354 def on_state(self):
7355 """Return the "On" value for button widgets.
7356
7357 This is useful for radio buttons mainly. Checkboxes will always return
7358 "Yes". Radio buttons will return the string that is unequal to "Off"
7359 as returned by method button_states().
7360 If the radio button is new / being created, it does not yet have an
7361 "On" value. In this case, a warning is shown and True is returned.
7362 """
7363 if self.field_type not in (2, 5):
7364 return None # no checkbox or radio button
7365 bstate = self.button_states()
7366 if bstate is None:
7367 bstate = dict()
7368 for k in bstate.keys():
7369 for v in bstate[k]:
7370 if v != "Off":
7371 return v
7372 message("warning: radio button has no 'On' value.")
7373 return True
7374
7375 def reset(self):
7376 """Reset the field value to its default.
7377 """
7378 TOOLS._reset_widget(self._annot)
7379
7380 def update(self, sync_flags=False):
7381 """Reflect Python object in the PDF."""
7382 self._validate()
7383
7384 self._adjust_font() # ensure valid text_font name
7385
7386 # now create the /DA string
7387 self._text_da = ""
7388 if len(self.text_color) == 3:
7389 fmt = "{:g} {:g} {:g} rg /{f:s} {s:g} Tf" + self._text_da
7390 elif len(self.text_color) == 1:
7391 fmt = "{:g} g /{f:s} {s:g} Tf" + self._text_da
7392 elif len(self.text_color) == 4:
7393 fmt = "{:g} {:g} {:g} {:g} k /{f:s} {s:g} Tf" + self._text_da
7394 self._text_da = fmt.format(*self.text_color, f=self.text_font,
7395 s=self.text_fontsize)
7396 # finally update the widget
7397
7398 # if widget has a '/AA/C' script, make sure it is in the '/CO'
7399 # array of the '/AcroForm' dictionary.
7400 if self.script_calc: # there is a "calculation" script:
7401 # make sure we are in the /CO array
7402 util_ensure_widget_calc(self._annot)
7403
7404 # finally update the widget
7405 TOOLS._save_widget(self._annot, self)
7406 self._text_da = ""
7407 if sync_flags:
7408 self._sync_flags() # propagate field flags to parent and kids
7409
7410
7411 from . import _extra
7412
7413
7414 class Outline:
7415
7416 def __init__(self, ol):
7417 self.this = ol
7418
7419 @property
7420 def dest(self):
7421 '''outline destination details'''
7422 return linkDest(self, None, None)
7423
7424 def destination(self, document):
7425 '''
7426 Like `dest` property but uses `document` to resolve destinations for
7427 kind=LINK_NAMED.
7428 '''
7429 return linkDest(self, None, document)
7430
7431 @property
7432 def down(self):
7433 ol = self.this
7434 down_ol = ol.down()
7435 if not down_ol.m_internal:
7436 return
7437 return Outline(down_ol)
7438
7439 @property
7440 def is_external(self):
7441 if g_use_extra:
7442 # calling _extra.* here appears to save significant time in
7443 # test_toc.py:test_full_toc, 1.2s=>0.94s.
7444 #
7445 return _extra.Outline_is_external( self.this)
7446 ol = self.this
7447 if not ol.m_internal:
7448 return False
7449 uri = ol.m_internal.uri if 1 else ol.uri()
7450 if uri is None:
7451 return False
7452 return mupdf.fz_is_external_link(uri)
7453
7454 @property
7455 def is_open(self):
7456 if 1:
7457 return self.this.m_internal.is_open
7458 return self.this.is_open()
7459
7460 @property
7461 def next(self):
7462 ol = self.this
7463 next_ol = ol.next()
7464 if not next_ol.m_internal:
7465 return
7466 return Outline(next_ol)
7467
7468 @property
7469 def page(self):
7470 if 1:
7471 return self.this.m_internal.page.page
7472 return self.this.page().page
7473
7474 @property
7475 def title(self):
7476 return self.this.m_internal.title
7477
7478 @property
7479 def uri(self):
7480 ol = self.this
7481 if not ol.m_internal:
7482 return None
7483 return ol.m_internal.uri
7484
7485 @property
7486 def x(self):
7487 return self.this.m_internal.x
7488
7489 @property
7490 def y(self):
7491 return self.this.m_internal.y
7492
7493 __slots__ = [ 'this']
7494
7495
7496 def _make_PdfFilterOptions(
7497 recurse=0,
7498 instance_forms=0,
7499 ascii=0,
7500 no_update=0,
7501 sanitize=0,
7502 sopts=None,
7503 ):
7504 '''
7505 Returns a mupdf.PdfFilterOptions instance.
7506 '''
7507
7508 filter_ = mupdf.PdfFilterOptions()
7509 filter_.recurse = recurse
7510 filter_.instance_forms = instance_forms
7511 filter_.ascii = ascii
7512
7513 filter_.no_update = no_update
7514 if sanitize:
7515 # We want to use a PdfFilterFactory whose `.filter` fn pointer is
7516 # set to MuPDF's `pdf_new_sanitize_filter()`. But not sure how to
7517 # get access to this raw fn in Python; and on Windows raw MuPDF
7518 # functions are not even available to C++.
7519 #
7520 # So we use SWIG Director to implement our own
7521 # PdfFilterFactory whose `filter()` method calls
7522 # `mupdf.ll_pdf_new_sanitize_filter()`.
7523 if sopts:
7524 assert isinstance(sopts, mupdf.PdfSanitizeFilterOptions)
7525 else:
7526 sopts = mupdf.PdfSanitizeFilterOptions()
7527 class Factory(mupdf.PdfFilterFactory2):
7528 def __init__(self):
7529 super().__init__()
7530 self.use_virtual_filter()
7531 self.sopts = sopts
7532 def filter(self, ctx, doc, chain, struct_parents, transform, options):
7533 if 0:
7534 log(f'sanitize filter.filter():')
7535 log(f' {self=}')
7536 log(f' {ctx=}')
7537 log(f' {doc=}')
7538 log(f' {chain=}')
7539 log(f' {struct_parents=}')
7540 log(f' {transform=}')
7541 log(f' {options=}')
7542 log(f' {self.sopts.internal()=}')
7543 return mupdf.ll_pdf_new_sanitize_filter(
7544 doc,
7545 chain,
7546 struct_parents,
7547 transform,
7548 options,
7549 self.sopts.internal(),
7550 )
7551
7552 factory = Factory()
7553 filter_.add_factory(factory.internal())
7554 filter_._factory = factory
7555 return filter_
7556
7557
7558 class Page:
7559
7560 def __init__(self, page, document):
7561 assert isinstance(page, (mupdf.FzPage, mupdf.PdfPage)), f'page is: {page}'
7562 self.this = page
7563 self.thisown = True
7564 self.last_point = None
7565 self.draw_cont = ''
7566 self._annot_refs = dict()
7567 self.parent = document
7568 if page.m_internal:
7569 if isinstance( page, mupdf.PdfPage):
7570 self.number = page.m_internal.super.number
7571 else:
7572 self.number = page.m_internal.number
7573 else:
7574 self.number = None
7575
7576 def __repr__(self):
7577 return self.__str__()
7578 CheckParent(self)
7579 x = self.parent.name
7580 if self.parent.stream is not None:
7581 x = "<memory, doc# %i>" % (self.parent._graft_id,)
7582 if x == "":
7583 x = "<new PDF, doc# %i>" % self.parent._graft_id
7584 return "page %s of %s" % (self.number, x)
7585
7586 def __str__(self):
7587 #CheckParent(self)
7588 parent = getattr(self, 'parent', None)
7589 if isinstance(self.this.m_internal, mupdf.pdf_page):
7590 number = self.this.m_internal.super.number
7591 else:
7592 number = self.this.m_internal.number
7593 ret = f'page {number}'
7594 if parent:
7595 x = self.parent.name
7596 if self.parent.stream is not None:
7597 x = "<memory, doc# %i>" % (self.parent._graft_id,)
7598 if x == "":
7599 x = "<new PDF, doc# %i>" % self.parent._graft_id
7600 ret += f' of {x}'
7601 return ret
7602
7603 def _add_caret_annot(self, point):
7604 if g_use_extra:
7605 annot = extra._add_caret_annot( self.this, JM_point_from_py(point))
7606 else:
7607 page = self._pdf_page()
7608 annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_CARET)
7609 if point:
7610 p = JM_point_from_py(point)
7611 r = mupdf.pdf_annot_rect(annot)
7612 r = mupdf.FzRect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0)
7613 mupdf.pdf_set_annot_rect(annot, r)
7614 mupdf.pdf_update_annot(annot)
7615 JM_add_annot_id(annot, "A")
7616 return annot
7617
7618 def _add_file_annot(self, point, buffer_, filename, ufilename=None, desc=None, icon=None):
7619 page = self._pdf_page()
7620 uf = ufilename if ufilename else filename
7621 d = desc if desc else filename
7622 p = JM_point_from_py(point)
7623 filebuf = JM_BufferFromBytes(buffer_)
7624 if not filebuf.m_internal:
7625 raise TypeError( MSG_BAD_BUFFER)
7626 annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_FILE_ATTACHMENT)
7627 r = mupdf.pdf_annot_rect(annot)
7628 r = mupdf.fz_make_rect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0)
7629 mupdf.pdf_set_annot_rect(annot, r)
7630 flags = mupdf.PDF_ANNOT_IS_PRINT
7631 mupdf.pdf_set_annot_flags(annot, flags)
7632
7633 if icon:
7634 mupdf.pdf_set_annot_icon_name(annot, icon)
7635
7636 val = JM_embed_file(page.doc(), filebuf, filename, uf, d, 1)
7637 mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME('FS'), val)
7638 mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('Contents'), filename)
7639 mupdf.pdf_update_annot(annot)
7640 mupdf.pdf_set_annot_rect(annot, r)
7641 mupdf.pdf_set_annot_flags(annot, flags)
7642 JM_add_annot_id(annot, "A")
7643 return Annot(annot)
7644
7645 def _add_freetext_annot(
7646 self, rect,
7647 text,
7648 fontsize=11,
7649 fontname=None,
7650 text_color=None,
7651 fill_color=None,
7652 border_color=None,
7653 border_width=0,
7654 dashes=None,
7655 callout=None,
7656 line_end=mupdf.PDF_ANNOT_LE_OPEN_ARROW,
7657 opacity=1,
7658 align=0,
7659 rotate=0,
7660 richtext=False,
7661 style=None,
7662 ):
7663 rc = f"""<?xml version="1.0"?>
7664 <body xmlns="http://www.w3.org/1999/xtml"
7665 xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/"
7666 xfa:contentType="text/html" xfa:APIVersion="Acrobat:8.0.0" xfa:spec="2.4">
7667 {text}"""
7668 page = self._pdf_page()
7669 if border_color and not richtext:
7670 raise ValueError("cannot set border_color if rich_text is False")
7671 if border_color and not text_color:
7672 text_color = border_color
7673 nfcol, fcol = JM_color_FromSequence(fill_color)
7674 ntcol, tcol = JM_color_FromSequence(text_color)
7675 r = JM_rect_from_py(rect)
7676 if mupdf.fz_is_infinite_rect(r) or mupdf.fz_is_empty_rect(r):
7677 raise ValueError( MSG_BAD_RECT)
7678 annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_FREE_TEXT)
7679 annot_obj = mupdf.pdf_annot_obj(annot)
7680
7681 #insert text as 'contents' or 'RC' depending on 'richtext'
7682 if not richtext:
7683 mupdf.pdf_set_annot_contents(annot, text)
7684 else:
7685 mupdf.pdf_dict_put_text_string(annot_obj,PDF_NAME("RC"), rc)
7686 if style:
7687 mupdf.pdf_dict_put_text_string(annot_obj,PDF_NAME("DS"), style)
7688
7689 mupdf.pdf_set_annot_rect(annot, r)
7690
7691 while rotate < 0:
7692 rotate += 360
7693 while rotate >= 360:
7694 rotate -= 360
7695 if rotate != 0:
7696 mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('Rotate'), rotate)
7697
7698 mupdf.pdf_set_annot_quadding(annot, align)
7699
7700 if nfcol > 0:
7701 mupdf.pdf_set_annot_color(annot, fcol[:nfcol])
7702
7703 mupdf.pdf_set_annot_border_width(annot, border_width)
7704 mupdf.pdf_set_annot_opacity(annot, opacity)
7705 if dashes:
7706 for d in dashes:
7707 mupdf.pdf_add_annot_border_dash_item(annot, float(d))
7708
7709 # Insert callout information
7710 if callout:
7711 mupdf.pdf_dict_put(annot_obj, PDF_NAME("IT"), PDF_NAME("FreeTextCallout"))
7712 mupdf.pdf_set_annot_callout_style(annot, line_end)
7713 point_count = len(callout)
7714 extra.JM_set_annot_callout_line(annot, tuple(callout), point_count)
7715
7716 # insert the default appearance string
7717 if not richtext:
7718 JM_make_annot_DA(annot, ntcol, tcol, fontname, fontsize)
7719
7720 mupdf.pdf_update_annot(annot)
7721 JM_add_annot_id(annot, "A")
7722 val = Annot(annot)
7723 return val
7724
7725 def _add_ink_annot(self, list):
7726 page = _as_pdf_page(self.this)
7727 if not PySequence_Check(list):
7728 raise ValueError( MSG_BAD_ARG_INK_ANNOT)
7729 ctm = mupdf.FzMatrix()
7730 mupdf.pdf_page_transform(page, mupdf.FzRect(0), ctm)
7731 inv_ctm = mupdf.fz_invert_matrix(ctm)
7732 annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_INK)
7733 annot_obj = mupdf.pdf_annot_obj(annot)
7734 n0 = len(list)
7735 inklist = mupdf.pdf_new_array(page.doc(), n0)
7736
7737 for j in range(n0):
7738 sublist = list[j]
7739 n1 = len(sublist)
7740 stroke = mupdf.pdf_new_array(page.doc(), 2 * n1)
7741
7742 for i in range(n1):
7743 p = sublist[i]
7744 if not PySequence_Check(p) or PySequence_Size(p) != 2:
7745 raise ValueError( MSG_BAD_ARG_INK_ANNOT)
7746 point = mupdf.fz_transform_point(JM_point_from_py(p), inv_ctm)
7747 mupdf.pdf_array_push_real(stroke, point.x)
7748 mupdf.pdf_array_push_real(stroke, point.y)
7749
7750 mupdf.pdf_array_push(inklist, stroke)
7751
7752 mupdf.pdf_dict_put(annot_obj, PDF_NAME('InkList'), inklist)
7753 mupdf.pdf_update_annot(annot)
7754 JM_add_annot_id(annot, "A")
7755 return Annot(annot)
7756
7757 def _add_line_annot(self, p1, p2):
7758 page = self._pdf_page()
7759 annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_LINE)
7760 a = JM_point_from_py(p1)
7761 b = JM_point_from_py(p2)
7762 mupdf.pdf_set_annot_line(annot, a, b)
7763 mupdf.pdf_update_annot(annot)
7764 JM_add_annot_id(annot, "A")
7765 assert annot.m_internal
7766 return Annot(annot)
7767
7768 def _add_multiline(self, points, annot_type):
7769 page = self._pdf_page()
7770 if len(points) < 2:
7771 raise ValueError( MSG_BAD_ARG_POINTS)
7772 annot = mupdf.pdf_create_annot(page, annot_type)
7773 for p in points:
7774 if (PySequence_Size(p) != 2):
7775 raise ValueError( MSG_BAD_ARG_POINTS)
7776 point = JM_point_from_py(p)
7777 mupdf.pdf_add_annot_vertex(annot, point)
7778
7779 mupdf.pdf_update_annot(annot)
7780 JM_add_annot_id(annot, "A")
7781 return Annot(annot)
7782
7783 def _add_redact_annot(self, quad, text=None, da_str=None, align=0, fill=None, text_color=None):
7784 page = self._pdf_page()
7785 fcol = [ 1, 1, 1, 0]
7786 nfcol = 0
7787 annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_REDACT)
7788 q = JM_quad_from_py(quad)
7789 r = mupdf.fz_rect_from_quad(q)
7790 # TODO calculate de-rotated rect
7791 mupdf.pdf_set_annot_rect(annot, r)
7792 if fill:
7793 nfcol, fcol = JM_color_FromSequence(fill)
7794 arr = mupdf.pdf_new_array(page.doc(), nfcol)
7795 for i in range(nfcol):
7796 mupdf.pdf_array_push_real(arr, fcol[i])
7797 mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME('IC'), arr)
7798 if text:
7799 assert da_str
7800 mupdf.pdf_dict_puts(
7801 mupdf.pdf_annot_obj(annot),
7802 "OverlayText",
7803 mupdf.pdf_new_text_string(text),
7804 )
7805 mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('DA'), da_str)
7806 mupdf.pdf_dict_put_int(mupdf.pdf_annot_obj(annot), PDF_NAME('Q'), align)
7807 mupdf.pdf_update_annot(annot)
7808 JM_add_annot_id(annot, "A")
7809 annot = mupdf.ll_pdf_keep_annot(annot.m_internal)
7810 annot = mupdf.PdfAnnot( annot)
7811 return Annot(annot)
7812
7813 def _add_square_or_circle(self, rect, annot_type):
7814 page = self._pdf_page()
7815 r = JM_rect_from_py(rect)
7816 if mupdf.fz_is_infinite_rect(r) or mupdf.fz_is_empty_rect(r):
7817 raise ValueError( MSG_BAD_RECT)
7818 annot = mupdf.pdf_create_annot(page, annot_type)
7819 mupdf.pdf_set_annot_rect(annot, r)
7820 mupdf.pdf_update_annot(annot)
7821 JM_add_annot_id(annot, "A")
7822 assert annot.m_internal
7823 return Annot(annot)
7824
7825 def _add_stamp_annot(self, rect, stamp=0):
7826 rect = Rect(rect)
7827 r = JM_rect_from_py(rect)
7828 if mupdf.fz_is_infinite_rect(r) or mupdf.fz_is_empty_rect(r):
7829 raise ValueError(MSG_BAD_RECT)
7830 page = self._pdf_page()
7831 stamp_id = [
7832 "Approved",
7833 "AsIs",
7834 "Confidential",
7835 "Departmental",
7836 "Experimental",
7837 "Expired",
7838 "Final",
7839 "ForComment",
7840 "ForPublicRelease",
7841 "NotApproved",
7842 "NotForPublicRelease",
7843 "Sold",
7844 "TopSecret",
7845 "Draft",
7846 ]
7847 n = len(stamp_id)
7848 buf = None
7849 name = None
7850 if stamp in range(n):
7851 name = stamp_id[stamp]
7852 elif isinstance(stamp, Pixmap):
7853 buf = stamp.tobytes()
7854 elif isinstance(stamp, str):
7855 buf = pathlib.Path(stamp).read_bytes()
7856 elif isinstance(stamp, (bytes, bytearray)):
7857 buf = stamp
7858 elif isinstance(stamp, io.BytesIO):
7859 buf = stamp.getvalue()
7860 else:
7861 name = stamp_id[0]
7862
7863 annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_STAMP)
7864 if buf: # image stamp
7865 fzbuff = mupdf.fz_new_buffer_from_copied_data(buf)
7866 img = mupdf.fz_new_image_from_buffer(fzbuff)
7867
7868 # compute image boundary box on page
7869 w, h = img.w(), img.h()
7870 scale = min(rect.width / w, rect.height / h)
7871 width = w * scale # bbox width
7872 height = h * scale # bbox height
7873
7874 # center of "rect"
7875 center = (rect.tl + rect.br) / 2
7876 x0 = center.x - width / 2
7877 y0 = center.y - height / 2
7878 x1 = x0 + width
7879 y1 = y0 + height
7880 r = mupdf.fz_make_rect(x0, y0, x1, y1)
7881 mupdf.pdf_set_annot_rect(annot, r)
7882 mupdf.pdf_set_annot_stamp_image(annot, img)
7883 mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME("Name"), mupdf.pdf_new_name("ImageStamp"))
7884 mupdf.pdf_set_annot_contents(annot, "Image Stamp")
7885 else: # text stamp
7886 mupdf.pdf_set_annot_rect(annot, r)
7887 mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME("Name"), PDF_NAME(name))
7888 mupdf.pdf_set_annot_contents(annot, name)
7889 mupdf.pdf_update_annot(annot)
7890 JM_add_annot_id(annot, "A")
7891 return Annot(annot)
7892
7893 def _add_text_annot(self, point, text, icon=None):
7894 page = self._pdf_page()
7895 p = JM_point_from_py( point)
7896 annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_TEXT)
7897 r = mupdf.pdf_annot_rect(annot)
7898 r = mupdf.fz_make_rect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0)
7899 mupdf.pdf_set_annot_rect(annot, r)
7900 mupdf.pdf_set_annot_contents(annot, text)
7901 if icon:
7902 mupdf.pdf_set_annot_icon_name(annot, icon)
7903 mupdf.pdf_update_annot(annot)
7904 JM_add_annot_id(annot, "A")
7905 return Annot(annot)
7906
7907 def _add_text_marker(self, quads, annot_type):
7908
7909 CheckParent(self)
7910 if not self.parent.is_pdf:
7911 raise ValueError("is no PDF")
7912
7913 val = Page__add_text_marker(self, quads, annot_type)
7914 if not val:
7915 return None
7916 val.parent = weakref.proxy(self)
7917 self._annot_refs[id(val)] = val
7918
7919 return val
7920
7921 def _addAnnot_FromString(self, linklist):
7922 """Add links from list of object sources."""
7923 CheckParent(self)
7924 if g_use_extra:
7925 self.__class__._addAnnot_FromString = extra.Page_addAnnot_FromString
7926 #log('Page._addAnnot_FromString() deferring to extra.Page_addAnnot_FromString().')
7927 return extra.Page_addAnnot_FromString( self.this, linklist)
7928 page = _as_pdf_page(self.this)
7929 lcount = len(linklist) # link count
7930 if lcount < 1:
7931 return
7932 i = -1
7933
7934 # insert links from the provided sources
7935 if not isinstance(linklist, tuple):
7936 raise ValueError( "bad 'linklist' argument")
7937 if not mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots')).m_internal:
7938 mupdf.pdf_dict_put_array( page.obj(), PDF_NAME('Annots'), lcount)
7939 annots = mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots'))
7940 assert annots.m_internal, f'{lcount=} {annots.m_internal=}'
7941 for i in range(lcount):
7942 txtpy = linklist[i]
7943 text = JM_StrAsChar(txtpy)
7944 if not text:
7945 message("skipping bad link / annot item %i.", i)
7946 continue
7947 try:
7948 annot = mupdf.pdf_add_object( page.doc(), JM_pdf_obj_from_str( page.doc(), text))
7949 ind_obj = mupdf.pdf_new_indirect( page.doc(), mupdf.pdf_to_num( annot), 0)
7950 mupdf.pdf_array_push( annots, ind_obj)
7951 except Exception:
7952 if g_exceptions_verbose: exception_info()
7953 message("skipping bad link / annot item %i.\n" % i)
7954
7955 def _addWidget(self, field_type, field_name):
7956 page = self._pdf_page()
7957 pdf = page.doc()
7958 annot = JM_create_widget(pdf, page, field_type, field_name)
7959 if not annot.m_internal:
7960 raise RuntimeError( "cannot create widget")
7961 JM_add_annot_id(annot, "W")
7962 return Annot(annot)
7963
7964 def _apply_redactions(self, text, images, graphics):
7965 page = self._pdf_page()
7966 opts = mupdf.PdfRedactOptions()
7967 opts.black_boxes = 0 # no black boxes
7968 opts.text = text # how to treat text
7969 opts.image_method = images # how to treat images
7970 opts.line_art = graphics # how to treat vector graphics
7971 success = mupdf.pdf_redact_page(page.doc(), page, opts)
7972 return success
7973
7974 def _erase(self):
7975 self._reset_annot_refs()
7976 try:
7977 self.parent._forget_page(self)
7978 except Exception:
7979 exception_info()
7980 pass
7981 self.parent = None
7982 self.thisown = False
7983 self.number = None
7984 self.this = None
7985
7986 def _count_q_balance(self):
7987 """Count missing graphic state pushs and pops.
7988
7989 Returns:
7990 A pair of integers (push, pop). Push is the number of missing
7991 PDF "q" commands, pop is the number of "Q" commands.
7992 A balanced graphics state for the page will be reached if its
7993 /Contents is prepended with 'push' copies of string "q\n"
7994 and appended with 'pop' copies of "\nQ".
7995 """
7996 page = _as_pdf_page(self) # need the underlying PDF page
7997 res = mupdf.pdf_dict_get( # access /Resources
7998 page.obj(),
7999 mupdf.PDF_ENUM_NAME_Resources,
8000 )
8001 cont = mupdf.pdf_dict_get( # access /Contents
8002 page.obj(),
8003 mupdf.PDF_ENUM_NAME_Contents,
8004 )
8005 pdf = _as_pdf_document(self.parent) # need underlying PDF document
8006
8007 # return value of MuPDF function
8008 return mupdf.pdf_count_q_balance_outparams_fn(pdf, res, cont)
8009
8010 def _get_optional_content(self, oc: OptInt) -> OptStr:
8011 if oc is None or oc == 0:
8012 return None
8013 doc = self.parent
8014 check = doc.xref_object(oc, compressed=True)
8015 if not ("/Type/OCG" in check or "/Type/OCMD" in check):
8016 #log( 'raising "bad optional content"')
8017 raise ValueError("bad optional content: 'oc'")
8018 #log( 'Looking at self._get_resource_properties()')
8019 props = {}
8020 for p, x in self._get_resource_properties():
8021 props[x] = p
8022 if oc in props.keys():
8023 return props[oc]
8024 i = 0
8025 mc = "MC%i" % i
8026 while mc in props.values():
8027 i += 1
8028 mc = "MC%i" % i
8029 self._set_resource_property(mc, oc)
8030 #log( 'returning {mc=}')
8031 return mc
8032
8033 def _get_resource_properties(self):
8034 '''
8035 page list Resource/Properties
8036 '''
8037 page = self._pdf_page()
8038 rc = JM_get_resource_properties(page.obj())
8039 return rc
8040
8041 def _get_textpage(self, clip=None, flags=0, matrix=None):
8042 if g_use_extra:
8043 ll_tpage = extra.page_get_textpage(self.this, clip, flags, matrix)
8044 tpage = mupdf.FzStextPage(ll_tpage)
8045 return tpage
8046 page = self.this
8047 options = mupdf.FzStextOptions(flags)
8048 rect = JM_rect_from_py(clip)
8049 # Default to page's rect if `clip` not specified, for #2048.
8050 rect = mupdf.fz_bound_page(page) if clip is None else JM_rect_from_py(clip)
8051 ctm = JM_matrix_from_py(matrix)
8052 tpage = mupdf.FzStextPage(rect)
8053 dev = mupdf.fz_new_stext_device(tpage, options)
8054 if _globals.no_device_caching:
8055 mupdf.fz_enable_device_hints( dev, mupdf.FZ_NO_CACHE)
8056 if isinstance(page, mupdf.FzPage):
8057 pass
8058 elif isinstance(page, mupdf.PdfPage):
8059 page = page.super()
8060 else:
8061 assert 0, f'Unrecognised {type(page)=}'
8062 mupdf.fz_run_page(page, dev, ctm, mupdf.FzCookie())
8063 mupdf.fz_close_device(dev)
8064 return tpage
8065
8066 def _insert_image(self,
8067 filename=None, pixmap=None, stream=None, imask=None, clip=None,
8068 overlay=1, rotate=0, keep_proportion=1, oc=0, width=0, height=0,
8069 xref=0, alpha=-1, _imgname=None, digests=None
8070 ):
8071 maskbuf = mupdf.FzBuffer()
8072 page = self._pdf_page()
8073 # This will create an empty PdfDocument with a call to
8074 # pdf_new_document() then assign page.doc()'s return value to it (which
8075 # drop the original empty pdf_document).
8076 pdf = page.doc()
8077 w = width
8078 h = height
8079 img_xref = xref
8080 rc_digest = 0
8081
8082 do_process_pixmap = 1
8083 do_process_stream = 1
8084 do_have_imask = 1
8085 do_have_image = 1
8086 do_have_xref = 1
8087
8088 if xref > 0:
8089 ref = mupdf.pdf_new_indirect(pdf, xref, 0)
8090 w = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Width'), PDF_NAME('W')))
8091 h = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Height'), PDF_NAME('H')))
8092 if w + h == 0:
8093 raise ValueError( MSG_IS_NO_IMAGE)
8094 #goto have_xref()
8095 do_process_pixmap = 0
8096 do_process_stream = 0
8097 do_have_imask = 0
8098 do_have_image = 0
8099
8100 else:
8101 if stream:
8102 imgbuf = JM_BufferFromBytes(stream)
8103 do_process_pixmap = 0
8104 else:
8105 if filename:
8106 imgbuf = mupdf.fz_read_file(filename)
8107 #goto have_stream()
8108 do_process_pixmap = 0
8109
8110 if do_process_pixmap:
8111 #log( 'do_process_pixmap')
8112 # process pixmap ---------------------------------
8113 arg_pix = pixmap.this
8114 w = arg_pix.w()
8115 h = arg_pix.h()
8116 digest = mupdf.fz_md5_pixmap2(arg_pix)
8117 md5_py = digest
8118 temp = digests.get(md5_py, None)
8119 if temp is not None:
8120 img_xref = temp
8121 ref = mupdf.pdf_new_indirect(page.doc(), img_xref, 0)
8122 #goto have_xref()
8123 do_process_stream = 0
8124 do_have_imask = 0
8125 do_have_image = 0
8126 else:
8127 if arg_pix.alpha() == 0:
8128 image = mupdf.fz_new_image_from_pixmap(arg_pix, mupdf.FzImage())
8129 else:
8130 pm = mupdf.fz_convert_pixmap(
8131 arg_pix,
8132 mupdf.FzColorspace(),
8133 mupdf.FzColorspace(),
8134 mupdf.FzDefaultColorspaces(None),
8135 mupdf.FzColorParams(),
8136 1,
8137 )
8138 pm.alpha = 0
8139 pm.colorspace = None
8140 mask = mupdf.fz_new_image_from_pixmap(pm, mupdf.FzImage())
8141 image = mupdf.fz_new_image_from_pixmap(arg_pix, mask)
8142 #goto have_image()
8143 do_process_stream = 0
8144 do_have_imask = 0
8145
8146 if do_process_stream:
8147 #log( 'do_process_stream')
8148 # process stream ---------------------------------
8149 state = mupdf.FzMd5()
8150 if mupdf_cppyy:
8151 mupdf.fz_md5_update_buffer( state, imgbuf)
8152 else:
8153 mupdf.fz_md5_update(state, imgbuf.m_internal.data, imgbuf.m_internal.len)
8154 if imask:
8155 maskbuf = JM_BufferFromBytes(imask)
8156 if mupdf_cppyy:
8157 mupdf.fz_md5_update_buffer( state, maskbuf)
8158 else:
8159 mupdf.fz_md5_update(state, maskbuf.m_internal.data, maskbuf.m_internal.len)
8160 digest = mupdf.fz_md5_final2(state)
8161 md5_py = bytes(digest)
8162 temp = digests.get(md5_py, None)
8163 if temp is not None:
8164 img_xref = temp
8165 ref = mupdf.pdf_new_indirect(page.doc(), img_xref, 0)
8166 w = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Width'), PDF_NAME('W')))
8167 h = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Height'), PDF_NAME('H')))
8168 #goto have_xref()
8169 do_have_imask = 0
8170 do_have_image = 0
8171 else:
8172 image = mupdf.fz_new_image_from_buffer(imgbuf)
8173 w = image.w()
8174 h = image.h()
8175 if not imask:
8176 #goto have_image()
8177 do_have_imask = 0
8178
8179 if do_have_imask:
8180 # `fz_compressed_buffer` is reference counted and
8181 # `mupdf.fz_new_image_from_compressed_buffer2()`
8182 # is povided as a Swig-friendly wrapper for
8183 # `fz_new_image_from_compressed_buffer()`, so we can do things
8184 # straightfowardly.
8185 #
8186 cbuf1 = mupdf.fz_compressed_image_buffer( image)
8187 if not cbuf1.m_internal:
8188 raise ValueError( "uncompressed image cannot have mask")
8189 bpc = image.bpc()
8190 colorspace = image.colorspace()
8191 xres, yres = mupdf.fz_image_resolution(image)
8192 mask = mupdf.fz_new_image_from_buffer(maskbuf)
8193 image = mupdf.fz_new_image_from_compressed_buffer2(
8194 w,
8195 h,
8196 bpc,
8197 colorspace,
8198 xres,
8199 yres,
8200 1, # interpolate
8201 0, # imagemask,
8202 list(), # decode
8203 list(), # colorkey
8204 cbuf1,
8205 mask,
8206 )
8207
8208 if do_have_image:
8209 #log( 'do_have_image')
8210 ref = mupdf.pdf_add_image(pdf, image)
8211 if oc:
8212 JM_add_oc_object(pdf, ref, oc)
8213 img_xref = mupdf.pdf_to_num(ref)
8214 digests[md5_py] = img_xref
8215 rc_digest = 1
8216
8217 if do_have_xref:
8218 #log( 'do_have_xref')
8219 resources = mupdf.pdf_dict_get_inheritable(page.obj(), PDF_NAME('Resources'))
8220 if not resources.m_internal:
8221 resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME('Resources'), 2)
8222 xobject = mupdf.pdf_dict_get(resources, PDF_NAME('XObject'))
8223 if not xobject.m_internal:
8224 xobject = mupdf.pdf_dict_put_dict(resources, PDF_NAME('XObject'), 2)
8225 mat = calc_image_matrix(w, h, clip, rotate, keep_proportion)
8226 mupdf.pdf_dict_puts(xobject, _imgname, ref)
8227 nres = mupdf.fz_new_buffer(50)
8228 s = f"\nq\n{_format_g((mat.a, mat.b, mat.c, mat.d, mat.e, mat.f))} cm\n/{_imgname} Do\nQ\n"
8229 #s = s.replace('\n', '\r\n')
8230 mupdf.fz_append_string(nres, s)
8231 JM_insert_contents(pdf, page.obj(), nres, overlay)
8232
8233 if rc_digest:
8234 return img_xref, digests
8235 else:
8236 return img_xref, None
8237
8238 def _insertFont(self, fontname, bfname, fontfile, fontbuffer, set_simple, idx, wmode, serif, encoding, ordering):
8239 page = self._pdf_page()
8240 pdf = page.doc()
8241
8242 value = JM_insert_font(pdf, bfname, fontfile,fontbuffer, set_simple, idx, wmode, serif, encoding, ordering)
8243 # get the objects /Resources, /Resources/Font
8244 resources = mupdf.pdf_dict_get_inheritable(page.obj(), PDF_NAME('Resources'))
8245 if not resources.pdf_is_dict():
8246 resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME("Resources"), 5)
8247 fonts = mupdf.pdf_dict_get(resources, PDF_NAME('Font'))
8248 if not fonts.m_internal: # page has no fonts yet
8249 fonts = mupdf.pdf_new_dict(pdf, 5)
8250 mupdf.pdf_dict_putl(page.obj(), fonts, PDF_NAME('Resources'), PDF_NAME('Font'))
8251 # store font in resources and fonts objects will contain named reference to font
8252 _, xref = JM_INT_ITEM(value, 0)
8253 if not xref:
8254 raise RuntimeError( "cannot insert font")
8255 font_obj = mupdf.pdf_new_indirect(pdf, xref, 0)
8256 mupdf.pdf_dict_puts(fonts, fontname, font_obj)
8257 return value
8258
8259 def _load_annot(self, name, xref):
8260 page = self._pdf_page()
8261 if xref == 0:
8262 annot = JM_get_annot_by_name(page, name)
8263 else:
8264 annot = JM_get_annot_by_xref(page, xref)
8265 if annot.m_internal:
8266 return Annot(annot)
8267
8268 def _makePixmap(self, doc, ctm, cs, alpha=0, annots=1, clip=None):
8269 pix = JM_pixmap_from_page(doc, self.this, ctm, cs, alpha, annots, clip)
8270 return Pixmap(pix)
8271
8272 def _other_box(self, boxtype):
8273 rect = mupdf.FzRect( mupdf.FzRect.Fixed_INFINITE)
8274 page = _as_pdf_page(self.this, required=False)
8275 if page.m_internal:
8276 obj = mupdf.pdf_dict_gets( page.obj(), boxtype)
8277 if mupdf.pdf_is_array(obj):
8278 rect = mupdf.pdf_to_rect(obj)
8279 if mupdf.fz_is_infinite_rect( rect):
8280 return
8281 return JM_py_from_rect(rect)
8282
8283 def _pdf_page(self, required=True):
8284 return _as_pdf_page(self.this, required=required)
8285
8286 def _reset_annot_refs(self):
8287 """Invalidate / delete all annots of this page."""
8288 self._annot_refs.clear()
8289
8290 def _set_opacity(self, gstate=None, CA=1, ca=1, blendmode=None):
8291
8292 if CA >= 1 and ca >= 1 and blendmode is None:
8293 return
8294 tCA = int(round(max(CA , 0) * 100))
8295 if tCA >= 100:
8296 tCA = 99
8297 tca = int(round(max(ca, 0) * 100))
8298 if tca >= 100:
8299 tca = 99
8300 gstate = "fitzca%02i%02i" % (tCA, tca)
8301
8302 if not gstate:
8303 return
8304 page = _as_pdf_page(self.this)
8305 resources = mupdf.pdf_dict_get(page.obj(), PDF_NAME('Resources'))
8306 if not resources.m_internal:
8307 resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME('Resources'), 2)
8308 extg = mupdf.pdf_dict_get(resources, PDF_NAME('ExtGState'))
8309 if not extg.m_internal:
8310 extg = mupdf.pdf_dict_put_dict(resources, PDF_NAME('ExtGState'), 2)
8311 n = mupdf.pdf_dict_len(extg)
8312 for i in range(n):
8313 o1 = mupdf.pdf_dict_get_key(extg, i)
8314 name = mupdf.pdf_to_name(o1)
8315 if name == gstate:
8316 return gstate
8317 opa = mupdf.pdf_new_dict(page.doc(), 3)
8318 mupdf.pdf_dict_put_real(opa, PDF_NAME('CA'), CA)
8319 mupdf.pdf_dict_put_real(opa, PDF_NAME('ca'), ca)
8320 mupdf.pdf_dict_puts(extg, gstate, opa)
8321 return gstate
8322
8323 def _set_pagebox(self, boxtype, rect):
8324 doc = self.parent
8325 if doc is None:
8326 raise ValueError("orphaned object: parent is None")
8327
8328 if not doc.is_pdf:
8329 raise ValueError("is no PDF")
8330
8331 valid_boxes = ("CropBox", "BleedBox", "TrimBox", "ArtBox")
8332
8333 if boxtype not in valid_boxes:
8334 raise ValueError("bad boxtype")
8335
8336 rect = Rect(rect)
8337 mb = self.mediabox
8338 rect = Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
8339 if not (mb.x0 <= rect.x0 < rect.x1 <= mb.x1 and mb.y0 <= rect.y0 < rect.y1 <= mb.y1):
8340 raise ValueError(f"{boxtype} not in MediaBox")
8341
8342 doc.xref_set_key(self.xref, boxtype, f"[{_format_g(tuple(rect))}]")
8343
8344 def _set_resource_property(self, name, xref):
8345 page = self._pdf_page()
8346 JM_set_resource_property(page.obj(), name, xref)
8347
8348 def _show_pdf_page(self, fz_srcpage, overlay=1, matrix=None, xref=0, oc=0, clip=None, graftmap=None, _imgname=None):
8349 cropbox = JM_rect_from_py(clip)
8350 mat = JM_matrix_from_py(matrix)
8351 rc_xref = xref
8352 tpage = _as_pdf_page(self.this)
8353 tpageref = tpage.obj()
8354 pdfout = tpage.doc() # target PDF
8355 ENSURE_OPERATION(pdfout)
8356 #-------------------------------------------------------------
8357 # convert the source page to a Form XObject
8358 #-------------------------------------------------------------
8359 xobj1 = JM_xobject_from_page(pdfout, fz_srcpage, xref, graftmap.this)
8360 if not rc_xref:
8361 rc_xref = mupdf.pdf_to_num(xobj1)
8362
8363 #-------------------------------------------------------------
8364 # create referencing XObject (controls display on target page)
8365 #-------------------------------------------------------------
8366 # fill reference to xobj1 into the /Resources
8367 #-------------------------------------------------------------
8368 subres1 = mupdf.pdf_new_dict(pdfout, 5)
8369 mupdf.pdf_dict_puts(subres1, "fullpage", xobj1)
8370 subres = mupdf.pdf_new_dict(pdfout, 5)
8371 mupdf.pdf_dict_put(subres, PDF_NAME('XObject'), subres1)
8372
8373 res = mupdf.fz_new_buffer(20)
8374 mupdf.fz_append_string(res, "/fullpage Do")
8375
8376 xobj2 = mupdf.pdf_new_xobject(pdfout, cropbox, mat, subres, res)
8377 if oc > 0:
8378 JM_add_oc_object(pdfout, mupdf.pdf_resolve_indirect(xobj2), oc)
8379
8380 #-------------------------------------------------------------
8381 # update target page with xobj2:
8382 #-------------------------------------------------------------
8383 # 1. insert Xobject in Resources
8384 #-------------------------------------------------------------
8385 resources = mupdf.pdf_dict_get_inheritable(tpageref, PDF_NAME('Resources'))
8386 if not resources.m_internal:
8387 resources = mupdf.pdf_dict_put_dict(tpageref,PDF_NAME('Resources'), 5)
8388 subres = mupdf.pdf_dict_get(resources, PDF_NAME('XObject'))
8389 if not subres.m_internal:
8390 subres = mupdf.pdf_dict_put_dict(resources, PDF_NAME('XObject'), 5)
8391
8392 mupdf.pdf_dict_puts(subres, _imgname, xobj2)
8393
8394 #-------------------------------------------------------------
8395 # 2. make and insert new Contents object
8396 #-------------------------------------------------------------
8397 nres = mupdf.fz_new_buffer(50) # buffer for Do-command
8398 mupdf.fz_append_string(nres, " q /") # Do-command
8399 mupdf.fz_append_string(nres, _imgname)
8400 mupdf.fz_append_string(nres, " Do Q ")
8401
8402 JM_insert_contents(pdfout, tpageref, nres, overlay)
8403 return rc_xref
8404
8405 def add_caret_annot(self, point: point_like) -> Annot:
8406 """Add a 'Caret' annotation."""
8407 old_rotation = annot_preprocess(self)
8408 try:
8409 annot = self._add_caret_annot(point)
8410 finally:
8411 if old_rotation != 0:
8412 self.set_rotation(old_rotation)
8413 annot = Annot( annot)
8414 annot_postprocess(self, annot)
8415 assert hasattr( annot, 'parent')
8416 return annot
8417
8418 def add_circle_annot(self, rect: rect_like) -> Annot:
8419 """Add a 'Circle' (ellipse, oval) annotation."""
8420 old_rotation = annot_preprocess(self)
8421 try:
8422 annot = self._add_square_or_circle(rect, mupdf.PDF_ANNOT_CIRCLE)
8423 finally:
8424 if old_rotation != 0:
8425 self.set_rotation(old_rotation)
8426 annot_postprocess(self, annot)
8427 return annot
8428
8429 def add_file_annot(
8430 self,
8431 point: point_like,
8432 buffer_: ByteString,
8433 filename: str,
8434 ufilename: OptStr =None,
8435 desc: OptStr =None,
8436 icon: OptStr =None
8437 ) -> Annot:
8438 """Add a 'FileAttachment' annotation."""
8439 old_rotation = annot_preprocess(self)
8440 try:
8441 annot = self._add_file_annot(point,
8442 buffer_,
8443 filename,
8444 ufilename=ufilename,
8445 desc=desc,
8446 icon=icon,
8447 )
8448 finally:
8449 if old_rotation != 0:
8450 self.set_rotation(old_rotation)
8451 annot_postprocess(self, annot)
8452 return annot
8453
8454 def add_freetext_annot(
8455 self,
8456 rect: rect_like,
8457 text: str,
8458 *,
8459 fontsize: float =11,
8460 fontname: OptStr =None,
8461 text_color: OptSeq =None,
8462 fill_color: OptSeq =None,
8463 border_color: OptSeq =None,
8464 border_width: float =0,
8465 dashes: OptSeq =None,
8466 callout: OptSeq =None,
8467 line_end: int=mupdf.PDF_ANNOT_LE_OPEN_ARROW,
8468 opacity: float =1,
8469 align: int =0,
8470 rotate: int =0,
8471 richtext=False,
8472 style=None,
8473 ) -> Annot:
8474 """Add a 'FreeText' annotation."""
8475
8476 old_rotation = annot_preprocess(self)
8477 try:
8478 annot = self._add_freetext_annot(
8479 rect,
8480 text,
8481 fontsize=fontsize,
8482 fontname=fontname,
8483 text_color=text_color,
8484 fill_color=fill_color,
8485 border_color=border_color,
8486 border_width=border_width,
8487 dashes=dashes,
8488 callout=callout,
8489 line_end=line_end,
8490 opacity=opacity,
8491 align=align,
8492 rotate=rotate,
8493 richtext=richtext,
8494 style=style,
8495 )
8496 finally:
8497 if old_rotation != 0:
8498 self.set_rotation(old_rotation)
8499 annot_postprocess(self, annot)
8500 return annot
8501
8502 def add_highlight_annot(self, quads=None, start=None,
8503 stop=None, clip=None) -> Annot:
8504 """Add a 'Highlight' annotation."""
8505 if quads is None:
8506 q = get_highlight_selection(self, start=start, stop=stop, clip=clip)
8507 else:
8508 q = CheckMarkerArg(quads)
8509 ret = self._add_text_marker(q, mupdf.PDF_ANNOT_HIGHLIGHT)
8510 return ret
8511
8512 def add_ink_annot(self, handwriting: list) -> Annot:
8513 """Add a 'Ink' ('handwriting') annotation.
8514
8515 The argument must be a list of lists of point_likes.
8516 """
8517 old_rotation = annot_preprocess(self)
8518 try:
8519 annot = self._add_ink_annot(handwriting)
8520 finally:
8521 if old_rotation != 0:
8522 self.set_rotation(old_rotation)
8523 annot_postprocess(self, annot)
8524 return annot
8525
8526 def add_line_annot(self, p1: point_like, p2: point_like) -> Annot:
8527 """Add a 'Line' annotation."""
8528 old_rotation = annot_preprocess(self)
8529 try:
8530 annot = self._add_line_annot(p1, p2)
8531 finally:
8532 if old_rotation != 0:
8533 self.set_rotation(old_rotation)
8534 annot_postprocess(self, annot)
8535 return annot
8536
8537 def add_polygon_annot(self, points: list) -> Annot:
8538 """Add a 'Polygon' annotation."""
8539 old_rotation = annot_preprocess(self)
8540 try:
8541 annot = self._add_multiline(points, mupdf.PDF_ANNOT_POLYGON)
8542 finally:
8543 if old_rotation != 0:
8544 self.set_rotation(old_rotation)
8545 annot_postprocess(self, annot)
8546 return annot
8547
8548 def add_polyline_annot(self, points: list) -> Annot:
8549 """Add a 'PolyLine' annotation."""
8550 old_rotation = annot_preprocess(self)
8551 try:
8552 annot = self._add_multiline(points, mupdf.PDF_ANNOT_POLY_LINE)
8553 finally:
8554 if old_rotation != 0:
8555 self.set_rotation(old_rotation)
8556 annot_postprocess(self, annot)
8557 return annot
8558
8559 def add_rect_annot(self, rect: rect_like) -> Annot:
8560 """Add a 'Square' (rectangle) annotation."""
8561 old_rotation = annot_preprocess(self)
8562 try:
8563 annot = self._add_square_or_circle(rect, mupdf.PDF_ANNOT_SQUARE)
8564 finally:
8565 if old_rotation != 0:
8566 self.set_rotation(old_rotation)
8567 annot_postprocess(self, annot)
8568 return annot
8569
8570 def add_redact_annot(
8571 self,
8572 quad,
8573 text: OptStr =None,
8574 fontname: OptStr =None,
8575 fontsize: float =11,
8576 align: int =0,
8577 fill: OptSeq =None,
8578 text_color: OptSeq =None,
8579 cross_out: bool =True,
8580 ) -> Annot:
8581 """Add a 'Redact' annotation."""
8582 da_str = None
8583 if text and not set(string.whitespace).issuperset(text):
8584 CheckColor(fill)
8585 CheckColor(text_color)
8586 if not fontname:
8587 fontname = "Helv"
8588 if not fontsize:
8589 fontsize = 11
8590 if not text_color:
8591 text_color = (0, 0, 0)
8592 if hasattr(text_color, "__float__"):
8593 text_color = (text_color, text_color, text_color)
8594 if len(text_color) > 3:
8595 text_color = text_color[:3]
8596 fmt = "{:g} {:g} {:g} rg /{f:s} {s:g} Tf"
8597 da_str = fmt.format(*text_color, f=fontname, s=fontsize)
8598 if fill is None:
8599 fill = (1, 1, 1)
8600 if fill:
8601 if hasattr(fill, "__float__"):
8602 fill = (fill, fill, fill)
8603 if len(fill) > 3:
8604 fill = fill[:3]
8605 else:
8606 text = None
8607
8608 old_rotation = annot_preprocess(self)
8609 try:
8610 annot = self._add_redact_annot(quad, text=text, da_str=da_str,
8611 align=align, fill=fill)
8612 finally:
8613 if old_rotation != 0:
8614 self.set_rotation(old_rotation)
8615 annot_postprocess(self, annot)
8616 #-------------------------------------------------------------
8617 # change appearance to show a crossed-out rectangle
8618 #-------------------------------------------------------------
8619 if cross_out:
8620 ap_tab = annot._getAP().splitlines()[:-1] # get the 4 commands only
8621 _, LL, LR, UR, UL = ap_tab
8622 ap_tab.append(LR)
8623 ap_tab.append(LL)
8624 ap_tab.append(UR)
8625 ap_tab.append(LL)
8626 ap_tab.append(UL)
8627 ap_tab.append(b"S")
8628 ap = b"\n".join(ap_tab)
8629 annot._setAP(ap, 0)
8630 return annot
8631
8632 def add_squiggly_annot(
8633 self,
8634 quads=None,
8635 start=None,
8636 stop=None,
8637 clip=None,
8638 ) -> Annot:
8639 """Add a 'Squiggly' annotation."""
8640 if quads is None:
8641 q = get_highlight_selection(self, start=start, stop=stop, clip=clip)
8642 else:
8643 q = CheckMarkerArg(quads)
8644 return self._add_text_marker(q, mupdf.PDF_ANNOT_SQUIGGLY)
8645
8646 def add_stamp_annot(self, rect: rect_like, stamp=0) -> Annot:
8647 """Add a ('rubber') 'Stamp' annotation."""
8648 old_rotation = annot_preprocess(self)
8649 try:
8650 annot = self._add_stamp_annot(rect, stamp)
8651 finally:
8652 if old_rotation != 0:
8653 self.set_rotation(old_rotation)
8654 annot_postprocess(self, annot)
8655 return annot
8656
8657 def add_strikeout_annot(self, quads=None, start=None, stop=None, clip=None) -> Annot:
8658 """Add a 'StrikeOut' annotation."""
8659 if quads is None:
8660 q = get_highlight_selection(self, start=start, stop=stop, clip=clip)
8661 else:
8662 q = CheckMarkerArg(quads)
8663 return self._add_text_marker(q, mupdf.PDF_ANNOT_STRIKE_OUT)
8664
8665 def add_text_annot(self, point: point_like, text: str, icon: str ="Note") -> Annot:
8666 """Add a 'Text' (sticky note) annotation."""
8667 old_rotation = annot_preprocess(self)
8668 try:
8669 annot = self._add_text_annot(point, text, icon=icon)
8670 finally:
8671 if old_rotation != 0:
8672 self.set_rotation(old_rotation)
8673 annot_postprocess(self, annot)
8674 return annot
8675
8676 def add_underline_annot(self, quads=None, start=None, stop=None, clip=None) -> Annot:
8677 """Add a 'Underline' annotation."""
8678 if quads is None:
8679 q = get_highlight_selection(self, start=start, stop=stop, clip=clip)
8680 else:
8681 q = CheckMarkerArg(quads)
8682 return self._add_text_marker(q, mupdf.PDF_ANNOT_UNDERLINE)
8683
8684 def add_widget(self, widget: Widget) -> Annot:
8685 """Add a 'Widget' (form field)."""
8686 CheckParent(self)
8687 doc = self.parent
8688 if not doc.is_pdf:
8689 raise ValueError("is no PDF")
8690 widget._validate()
8691 annot = self._addWidget(widget.field_type, widget.field_name)
8692 if not annot:
8693 return None
8694 annot.thisown = True
8695 annot.parent = weakref.proxy(self) # owning page object
8696 self._annot_refs[id(annot)] = annot
8697 widget.parent = annot.parent
8698 widget._annot = annot
8699 widget.update()
8700 return annot
8701
8702 def annot_names(self):
8703 '''
8704 page get list of annot names
8705 '''
8706 """List of names of annotations, fields and links."""
8707 CheckParent(self)
8708 page = self._pdf_page(required=False)
8709 if not page.m_internal:
8710 return []
8711 return JM_get_annot_id_list(page)
8712
8713 def annot_xrefs(self):
8714 '''
8715 List of xref numbers of annotations, fields and links.
8716 '''
8717 return JM_get_annot_xref_list2(self)
8718
8719 def annots(self, types=None):
8720 """ Generator over the annotations of a page.
8721
8722 Args:
8723 types: (list) annotation types to subselect from. If none,
8724 all annotations are returned. E.g. types=[PDF_ANNOT_LINE]
8725 will only yield line annotations.
8726 """
8727 skip_types = (mupdf.PDF_ANNOT_LINK, mupdf.PDF_ANNOT_POPUP, mupdf.PDF_ANNOT_WIDGET)
8728 if not hasattr(types, "__getitem__"):
8729 annot_xrefs = [a[0] for a in self.annot_xrefs() if a[1] not in skip_types]
8730 else:
8731 annot_xrefs = [a[0] for a in self.annot_xrefs() if a[1] in types and a[1] not in skip_types]
8732 for xref in annot_xrefs:
8733 annot = self.load_annot(xref)
8734 annot._yielded=True
8735 yield annot
8736
8737 def recolor(self, components=1):
8738 """Convert colorspaces of objects on the page.
8739
8740 Valid values are 1, 3 and 4.
8741 """
8742 if components not in (1, 3, 4):
8743 raise ValueError("components must be one of 1, 3, 4")
8744 pdfdoc = _as_pdf_document(self.parent)
8745 ropt = mupdf.pdf_recolor_options()
8746 ropt.num_comp = components
8747 ropts = mupdf.PdfRecolorOptions(ropt)
8748 mupdf.pdf_recolor_page(pdfdoc, self.number, ropts)
8749
8750 def clip_to_rect(self, rect):
8751 """Clip away page content outside the rectangle."""
8752 clip = Rect(rect)
8753 if clip.is_infinite or (clip & self.rect).is_empty:
8754 raise ValueError("rect must not be infinite or empty")
8755 clip *= self.transformation_matrix
8756 pdfpage = _as_pdf_page(self)
8757 pclip = JM_rect_from_py(clip)
8758 mupdf.pdf_clip_page(pdfpage, pclip)
8759
8760 @property
8761 def artbox(self):
8762 """The ArtBox"""
8763 rect = self._other_box("ArtBox")
8764 if rect is None:
8765 return self.cropbox
8766 mb = self.mediabox
8767 return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
8768
8769 @property
8770 def bleedbox(self):
8771 """The BleedBox"""
8772 rect = self._other_box("BleedBox")
8773 if rect is None:
8774 return self.cropbox
8775 mb = self.mediabox
8776 return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
8777
8778 def bound(self):
8779 """Get page rectangle."""
8780 CheckParent(self)
8781 page = _as_fz_page(self.this)
8782 val = mupdf.fz_bound_page(page)
8783 val = Rect(val)
8784
8785 if val.is_infinite and self.parent.is_pdf:
8786 cb = self.cropbox
8787 w, h = cb.width, cb.height
8788 if self.rotation not in (0, 180):
8789 w, h = h, w
8790 val = Rect(0, 0, w, h)
8791 msg = TOOLS.mupdf_warnings(reset=False).splitlines()[-1]
8792 message(msg)
8793
8794 return val
8795
8796 def clean_contents(self, sanitize=1):
8797 if not sanitize and not self.is_wrapped:
8798 self.wrap_contents()
8799 page = _as_pdf_page( self.this, required=False)
8800 if not page.m_internal:
8801 return
8802 filter_ = _make_PdfFilterOptions(recurse=1, sanitize=sanitize)
8803 mupdf.pdf_filter_page_contents( page.doc(), page, filter_)
8804
8805 @property
8806 def cropbox(self):
8807 """The CropBox."""
8808 CheckParent(self)
8809 page = self._pdf_page(required=False)
8810 if not page.m_internal:
8811 val = mupdf.fz_bound_page(self.this)
8812 else:
8813 val = JM_cropbox(page.obj())
8814 val = Rect(val)
8815
8816 return val
8817
8818 @property
8819 def cropbox_position(self):
8820 return self.cropbox.tl
8821
8822 def delete_annot(self, annot):
8823 """Delete annot and return next one."""
8824 CheckParent(self)
8825 CheckParent(annot)
8826
8827 page = self._pdf_page()
8828 while 1:
8829 # first loop through all /IRT annots and remove them
8830 irt_annot = JM_find_annot_irt(annot.this)
8831 if not irt_annot: # no more there
8832 break
8833 mupdf.pdf_delete_annot(page, irt_annot.this)
8834 nextannot = mupdf.pdf_next_annot(annot.this) # store next
8835 mupdf.pdf_delete_annot(page, annot.this)
8836 val = Annot(nextannot)
8837
8838 if val:
8839 val.thisown = True
8840 val.parent = weakref.proxy(self) # owning page object
8841 val.parent._annot_refs[id(val)] = val
8842 annot._erase()
8843 return val
8844
8845 def delete_link(self, linkdict):
8846 """Delete a Link."""
8847 CheckParent(self)
8848 if not isinstance( linkdict, dict):
8849 return # have no dictionary
8850
8851 def finished():
8852 if linkdict["xref"] == 0: return
8853 try:
8854 linkid = linkdict["id"]
8855 linkobj = self._annot_refs[linkid]
8856 linkobj._erase()
8857 except Exception:
8858 # Don't print this exception, to match classic. Issue #2841.
8859 if g_exceptions_verbose > 1: exception_info()
8860 pass
8861
8862 page = _as_pdf_page(self.this, required=False)
8863 if not page.m_internal:
8864 return finished() # have no PDF
8865 xref = linkdict[dictkey_xref]
8866 if xref < 1:
8867 return finished() # invalid xref
8868 annots = mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots'))
8869 if not annots.m_internal:
8870 return finished() # have no annotations
8871 len_ = mupdf.pdf_array_len( annots)
8872 if len_ == 0:
8873 return finished()
8874 oxref = 0
8875 for i in range( len_):
8876 oxref = mupdf.pdf_to_num( mupdf.pdf_array_get( annots, i))
8877 if xref == oxref:
8878 break # found xref in annotations
8879
8880 if xref != oxref:
8881 return finished() # xref not in annotations
8882 mupdf.pdf_array_delete( annots, i) # delete entry in annotations
8883 mupdf.pdf_delete_object( page.doc(), xref) # delete link object
8884 mupdf.pdf_dict_put( page.obj(), PDF_NAME('Annots'), annots)
8885 JM_refresh_links( page)
8886
8887 return finished()
8888
8889 @property
8890 def derotation_matrix(self) -> Matrix:
8891 """Reflects page de-rotation."""
8892 if g_use_extra:
8893 return Matrix(extra.Page_derotate_matrix( self.this))
8894 pdfpage = self._pdf_page(required=False)
8895 if not pdfpage.m_internal:
8896 return Matrix(mupdf.FzRect(mupdf.FzRect.UNIT))
8897 return Matrix(JM_derotate_page_matrix(pdfpage))
8898
8899 def extend_textpage(self, tpage, flags=0, matrix=None):
8900 page = self.this
8901 tp = tpage.this
8902 assert isinstance( tp, mupdf.FzStextPage)
8903 options = mupdf.FzStextOptions()
8904 options.flags = flags
8905 ctm = JM_matrix_from_py(matrix)
8906 dev = mupdf.FzDevice(tp, options)
8907 mupdf.fz_run_page( page, dev, ctm, mupdf.FzCookie())
8908 mupdf.fz_close_device( dev)
8909
8910 @property
8911 def first_annot(self):
8912 """First annotation."""
8913 CheckParent(self)
8914 page = self._pdf_page(required=False)
8915 if not page.m_internal:
8916 return
8917 annot = mupdf.pdf_first_annot(page)
8918 if not annot.m_internal:
8919 return
8920 val = Annot(annot)
8921 val.thisown = True
8922 val.parent = weakref.proxy(self) # owning page object
8923 self._annot_refs[id(val)] = val
8924 return val
8925
8926 @property
8927 def first_link(self):
8928 '''
8929 First link on page
8930 '''
8931 return self.load_links()
8932
8933 @property
8934 def first_widget(self):
8935 """First widget/field."""
8936 CheckParent(self)
8937 annot = 0
8938 page = self._pdf_page(required=False)
8939 if not page.m_internal:
8940 return
8941 annot = mupdf.pdf_first_widget(page)
8942 if not annot.m_internal:
8943 return
8944 val = Annot(annot)
8945 val.thisown = True
8946 val.parent = weakref.proxy(self) # owning page object
8947 self._annot_refs[id(val)] = val
8948 widget = Widget()
8949 TOOLS._fill_widget(val, widget)
8950 val = widget
8951 return val
8952
8953 def get_bboxlog(self, layers=None):
8954 CheckParent(self)
8955 old_rotation = self.rotation
8956 if old_rotation != 0:
8957 self.set_rotation(0)
8958 page = self.this
8959 rc = []
8960 inc_layers = True if layers else False
8961 dev = JM_new_bbox_device( rc, inc_layers)
8962 mupdf.fz_run_page( page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
8963 mupdf.fz_close_device( dev)
8964
8965 if old_rotation != 0:
8966 self.set_rotation(old_rotation)
8967 return rc
8968
8969 def get_cdrawings(self, extended=None, callback=None, method=None):
8970 """Extract vector graphics ("line art") from the page."""
8971 CheckParent(self)
8972 old_rotation = self.rotation
8973 if old_rotation != 0:
8974 self.set_rotation(0)
8975 page = self.this
8976 if isinstance(page, mupdf.PdfPage):
8977 # Downcast pdf_page to fz_page.
8978 page = mupdf.FzPage(page)
8979 assert isinstance(page, mupdf.FzPage), f'{self.this=}'
8980 clips = True if extended else False
8981 prect = mupdf.fz_bound_page(page)
8982 if g_use_extra:
8983 rc = extra.get_cdrawings(page, extended, callback, method)
8984 else:
8985 rc = list()
8986 if callable(callback) or method is not None:
8987 dev = JM_new_lineart_device_Device(callback, clips, method)
8988 else:
8989 dev = JM_new_lineart_device_Device(rc, clips, method)
8990 dev.ptm = mupdf.FzMatrix(1, 0, 0, -1, 0, prect.y1)
8991 mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
8992 mupdf.fz_close_device(dev)
8993
8994 if old_rotation != 0:
8995 self.set_rotation(old_rotation)
8996 if callable(callback) or method is not None:
8997 return
8998 return rc
8999
9000 def get_contents(self):
9001 """Get xrefs of /Contents objects."""
9002 CheckParent(self)
9003 ret = []
9004 page = _as_pdf_page(self.this)
9005 obj = page.obj()
9006 contents = mupdf.pdf_dict_get(obj, mupdf.PDF_ENUM_NAME_Contents)
9007 if mupdf.pdf_is_array(contents):
9008 n = mupdf.pdf_array_len(contents)
9009 for i in range(n):
9010 icont = mupdf.pdf_array_get(contents, i)
9011 xref = mupdf.pdf_to_num(icont)
9012 ret.append(xref)
9013 elif contents.m_internal:
9014 xref = mupdf.pdf_to_num(contents)
9015 ret.append( xref)
9016 return ret
9017
9018 def get_displaylist(self, annots=1):
9019 '''
9020 Make a DisplayList from the page for Pixmap generation.
9021
9022 Include (default) or exclude annotations.
9023 '''
9024 CheckParent(self)
9025 if annots:
9026 dl = mupdf.fz_new_display_list_from_page(self.this)
9027 else:
9028 dl = mupdf.fz_new_display_list_from_page_contents(self.this)
9029 return DisplayList(dl)
9030
9031 def get_drawings(self, extended: bool=False) -> list:
9032 """Retrieve vector graphics. The extended version includes clips.
9033
9034 Note:
9035 For greater comfort, this method converts point-likes, rect-likes, quad-likes
9036 of the C version to respective Point / Rect / Quad objects.
9037 It also adds default items that are missing in original path types.
9038 """
9039 allkeys = (
9040 'closePath',
9041 'fill',
9042 'color',
9043 'width',
9044 'lineCap',
9045 'lineJoin',
9046 'dashes',
9047 'stroke_opacity',
9048 'fill_opacity',
9049 'even_odd',
9050 )
9051 val = self.get_cdrawings(extended=extended)
9052 for i in range(len(val)):
9053 npath = val[i]
9054 if not npath["type"].startswith("clip"):
9055 npath["rect"] = Rect(npath["rect"])
9056 else:
9057 npath["scissor"] = Rect(npath["scissor"])
9058 if npath["type"]!="group":
9059 items = npath["items"]
9060 newitems = []
9061 for item in items:
9062 cmd = item[0]
9063 rest = item[1:]
9064 if cmd == "re":
9065 item = ("re", Rect(rest[0]).normalize(), rest[1])
9066 elif cmd == "qu":
9067 item = ("qu", Quad(rest[0]))
9068 else:
9069 item = tuple([cmd] + [Point(i) for i in rest])
9070 newitems.append(item)
9071 npath["items"] = newitems
9072 if npath['type'] in ('f', 's'):
9073 for k in allkeys:
9074 npath[k] = npath.get(k)
9075
9076 val[i] = npath
9077 return val
9078
9079 class Drawpath(object):
9080 """Reflects a path dictionary from get_cdrawings()."""
9081 def __init__(self, **args):
9082 self.__dict__.update(args)
9083
9084 class Drawpathlist(object):
9085 """List of Path objects representing get_cdrawings() output."""
9086 def __getitem__(self, item):
9087 return self.paths.__getitem__(item)
9088
9089 def __init__(self):
9090 self.paths = []
9091 self.path_count = 0
9092 self.group_count = 0
9093 self.clip_count = 0
9094 self.fill_count = 0
9095 self.stroke_count = 0
9096 self.fillstroke_count = 0
9097
9098 def __len__(self):
9099 return self.paths.__len__()
9100
9101 def append(self, path):
9102 self.paths.append(path)
9103 self.path_count += 1
9104 if path.type == "clip":
9105 self.clip_count += 1
9106 elif path.type == "group":
9107 self.group_count += 1
9108 elif path.type == "f":
9109 self.fill_count += 1
9110 elif path.type == "s":
9111 self.stroke_count += 1
9112 elif path.type == "fs":
9113 self.fillstroke_count += 1
9114
9115 def clip_parents(self, i):
9116 """Return list of parent clip paths.
9117
9118 Args:
9119 i: (int) return parents of this path.
9120 Returns:
9121 List of the clip parents."""
9122 if i >= self.path_count:
9123 raise IndexError("bad path index")
9124 while i < 0:
9125 i += self.path_count
9126 lvl = self.paths[i].level
9127 clips = list( # clip paths before identified one
9128 reversed(
9129 [
9130 p
9131 for p in self.paths[:i]
9132 if p.type == "clip" and p.level < lvl
9133 ]
9134 )
9135 )
9136 if clips == []: # none found: empty list
9137 return []
9138 nclips = [clips[0]] # init return list
9139 for p in clips[1:]:
9140 if p.level >= nclips[-1].level:
9141 continue # only accept smaller clip levels
9142 nclips.append(p)
9143 return nclips
9144
9145 def group_parents(self, i):
9146 """Return list of parent group paths.
9147
9148 Args:
9149 i: (int) return parents of this path.
9150 Returns:
9151 List of the group parents."""
9152 if i >= self.path_count:
9153 raise IndexError("bad path index")
9154 while i < 0:
9155 i += self.path_count
9156 lvl = self.paths[i].level
9157 groups = list( # group paths before identified one
9158 reversed(
9159 [
9160 p
9161 for p in self.paths[:i]
9162 if p.type == "group" and p.level < lvl
9163 ]
9164 )
9165 )
9166 if groups == []: # none found: empty list
9167 return []
9168 ngroups = [groups[0]] # init return list
9169 for p in groups[1:]:
9170 if p.level >= ngroups[-1].level:
9171 continue # only accept smaller group levels
9172 ngroups.append(p)
9173 return ngroups
9174
9175 def get_lineart(self) -> object:
9176 """Get page drawings paths.
9177
9178 Note:
9179 For greater comfort, this method converts point-like, rect-like, quad-like
9180 tuples of the C version to respective Point / Rect / Quad objects.
9181 Also adds default items that are missing in original path types.
9182 In contrast to get_drawings(), this output is an object.
9183 """
9184
9185 val = self.get_cdrawings(extended=True)
9186 paths = self.Drawpathlist()
9187 for path in val:
9188 npath = self.Drawpath(**path)
9189 if npath.type != "clip":
9190 npath.rect = Rect(path["rect"])
9191 else:
9192 npath.scissor = Rect(path["scissor"])
9193 if npath.type != "group":
9194 items = path["items"]
9195 newitems = []
9196 for item in items:
9197 cmd = item[0]
9198 rest = item[1:]
9199 if cmd == "re":
9200 item = ("re", Rect(rest[0]).normalize(), rest[1])
9201 elif cmd == "qu":
9202 item = ("qu", Quad(rest[0]))
9203 else:
9204 item = tuple([cmd] + [Point(i) for i in rest])
9205 newitems.append(item)
9206 npath.items = newitems
9207
9208 if npath.type == "f":
9209 npath.stroke_opacity = None
9210 npath.dashes = None
9211 npath.line_join = None
9212 npath.line_cap = None
9213 npath.color = None
9214 npath.width = None
9215
9216 paths.append(npath)
9217
9218 val = None
9219 return paths
9220
9221 def remove_rotation(self):
9222 """Set page rotation to 0 while maintaining visual appearance."""
9223 rot = self.rotation # normalized rotation value
9224 if rot == 0:
9225 return Identity # nothing to do
9226
9227 # need to derotate the page's content
9228 mb = self.mediabox # current mediabox
9229
9230 if rot == 90:
9231 # before derotation, shift content horizontally
9232 mat0 = Matrix(1, 0, 0, 1, mb.y1 - mb.x1 - mb.x0 - mb.y0, 0)
9233 elif rot == 270:
9234 # before derotation, shift content vertically
9235 mat0 = Matrix(1, 0, 0, 1, 0, mb.x1 - mb.y1 - mb.y0 - mb.x0)
9236 else: # rot = 180
9237 mat0 = Matrix(1, 0, 0, 1, -2 * mb.x0, -2 * mb.y0)
9238
9239 # prefix with derotation matrix
9240 mat = mat0 * self.derotation_matrix
9241 cmd = _format_g(tuple(mat)) + ' cm '
9242 cmd = cmd.encode('utf8')
9243 _ = TOOLS._insert_contents(self, cmd, False) # prepend to page contents
9244
9245 # swap x- and y-coordinates
9246 if rot in (90, 270):
9247 x0, y0, x1, y1 = mb
9248 mb.x0 = y0
9249 mb.y0 = x0
9250 mb.x1 = y1
9251 mb.y1 = x1
9252 self.set_mediabox(mb)
9253
9254 self.set_rotation(0)
9255 rot = ~mat # inverse of the derotation matrix
9256
9257 for annot in self.annots(): # modify rectangles of annotations
9258 r = annot.rect * rot
9259 # TODO: only try to set rectangle for applicable annot types
9260 annot.set_rect(r)
9261 for link in self.get_links(): # modify 'from' rectangles of links
9262 r = link["from"] * rot
9263 self.delete_link(link)
9264 link["from"] = r
9265 try: # invalid links remain deleted
9266 self.insert_link(link)
9267 except Exception:
9268 pass
9269 for widget in self.widgets(): # modify field rectangles
9270 r = widget.rect * rot
9271 widget.rect = r
9272 widget.update()
9273 return rot # the inverse of the generated derotation matrix
9274
9275 def cluster_drawings(
9276 self, clip=None, drawings=None, x_tolerance: float = 3, y_tolerance: float = 3,
9277 final_filter: bool = True,
9278 ) -> list:
9279 """Join rectangles of neighboring vector graphic items.
9280
9281 Args:
9282 clip: optional rect-like to restrict the page area to consider.
9283 drawings: (optional) output of a previous "get_drawings()".
9284 x_tolerance: horizontal neighborhood threshold.
9285 y_tolerance: vertical neighborhood threshold.
9286
9287 Notes:
9288 Vector graphics (also called line-art or drawings) usually consist
9289 of independent items like rectangles, lines or curves to jointly
9290 form table grid lines or bar, line, pie charts and similar.
9291 This method identifies rectangles wrapping these disparate items.
9292
9293 Returns:
9294 A list of Rect items, each wrapping line-art items that are close
9295 enough to be considered forming a common vector graphic.
9296 Only "significant" rectangles will be returned, i.e. having both,
9297 width and height larger than the tolerance values.
9298 """
9299 CheckParent(self)
9300 parea = self.rect # the default clipping area
9301 if clip is not None:
9302 parea = Rect(clip)
9303 delta_x = x_tolerance # shorter local name
9304 delta_y = y_tolerance # shorter local name
9305 if drawings is None: # if we cannot re-use a previous output
9306 drawings = self.get_drawings()
9307
9308 def are_neighbors(r1, r2):
9309 """Detect whether r1, r2 are "neighbors".
9310
9311 Items r1, r2 are called neighbors if the minimum distance between
9312 their points is less-equal delta.
9313
9314 Both parameters must be (potentially invalid) rectangles.
9315 """
9316 # normalize rectangles as needed
9317 rr1_x0, rr1_x1 = (r1.x0, r1.x1) if r1.x1 > r1.x0 else (r1.x1, r1.x0)
9318 rr1_y0, rr1_y1 = (r1.y0, r1.y1) if r1.y1 > r1.y0 else (r1.y1, r1.y0)
9319 rr2_x0, rr2_x1 = (r2.x0, r2.x1) if r2.x1 > r2.x0 else (r2.x1, r2.x0)
9320 rr2_y0, rr2_y1 = (r2.y0, r2.y1) if r2.y1 > r2.y0 else (r2.y1, r2.y0)
9321 if (
9322 0
9323 or rr1_x1 < rr2_x0 - delta_x
9324 or rr1_x0 > rr2_x1 + delta_x
9325 or rr1_y1 < rr2_y0 - delta_y
9326 or rr1_y0 > rr2_y1 + delta_y
9327 ):
9328 # Rects do not overlap.
9329 return False
9330 else:
9331 # Rects overlap.
9332 return True
9333
9334 # exclude graphics not contained in the clip
9335 paths = [
9336 p
9337 for p in drawings
9338 if 1
9339 and p["rect"].x0 >= parea.x0
9340 and p["rect"].x1 <= parea.x1
9341 and p["rect"].y0 >= parea.y0
9342 and p["rect"].y1 <= parea.y1
9343 ]
9344
9345 # list of all vector graphic rectangles
9346 prects = sorted([p["rect"] for p in paths], key=lambda r: (r.y1, r.x0))
9347
9348 new_rects = [] # the final list of the joined rectangles
9349
9350 # -------------------------------------------------------------------------
9351 # The strategy is to identify and join all rects that are neighbors
9352 # -------------------------------------------------------------------------
9353 while prects: # the algorithm will empty this list
9354 r = +prects[0] # copy of first rectangle
9355 repeat = True
9356 while repeat:
9357 repeat = False
9358 for i in range(len(prects) - 1, 0, -1): # from back to front
9359 if are_neighbors(prects[i], r):
9360 r |= prects[i].tl # include in first rect
9361 r |= prects[i].br # include in first rect
9362 del prects[i] # delete this rect
9363 repeat = True
9364
9365 new_rects.append(r)
9366 del prects[0]
9367 prects = sorted(set(prects), key=lambda r: (r.y1, r.x0))
9368
9369 new_rects = sorted(set(new_rects), key=lambda r: (r.y1, r.x0))
9370 if not final_filter:
9371 return new_rects
9372 return [r for r in new_rects if r.width > delta_x and r.height > delta_y]
9373
9374 def get_fonts(self, full=False):
9375 """List of fonts defined in the page object."""
9376 CheckParent(self)
9377 return self.parent.get_page_fonts(self.number, full=full)
9378
9379 def get_image_bbox(self, name, transform=0):
9380 """Get rectangle occupied by image 'name'.
9381
9382 'name' is either an item of the image list, or the referencing
9383 name string - elem[7] of the resp. item.
9384 Option 'transform' also returns the image transformation matrix.
9385 """
9386 CheckParent(self)
9387 doc = self.parent
9388 if doc.is_closed or doc.is_encrypted:
9389 raise ValueError('document closed or encrypted')
9390
9391 inf_rect = Rect(1, 1, -1, -1)
9392 null_mat = Matrix()
9393 if transform:
9394 rc = (inf_rect, null_mat)
9395 else:
9396 rc = inf_rect
9397
9398 if type(name) in (list, tuple):
9399 if not type(name[-1]) is int:
9400 raise ValueError('need item of full page image list')
9401 item = name
9402 else:
9403 imglist = [i for i in doc.get_page_images(self.number, True) if name == i[7]]
9404 if len(imglist) == 1:
9405 item = imglist[0]
9406 elif imglist == []:
9407 raise ValueError('bad image name')
9408 else:
9409 raise ValueError("found multiple images named '%s'." % name)
9410 xref = item[-1]
9411 if xref != 0 or transform:
9412 try:
9413 return self.get_image_rects(item, transform=transform)[0]
9414 except Exception:
9415 exception_info()
9416 return inf_rect
9417 pdf_page = self._pdf_page()
9418 val = JM_image_reporter(pdf_page)
9419
9420 if not bool(val):
9421 return rc
9422
9423 for v in val:
9424 if v[0] != item[-3]:
9425 continue
9426 q = Quad(v[1])
9427 bbox = q.rect
9428 if transform == 0:
9429 rc = bbox
9430 break
9431
9432 hm = Matrix(util_hor_matrix(q.ll, q.lr))
9433 h = abs(q.ll - q.ul)
9434 w = abs(q.ur - q.ul)
9435 m0 = Matrix(1 / w, 0, 0, 1 / h, 0, 0)
9436 m = ~(hm * m0)
9437 rc = (bbox, m)
9438 break
9439 val = rc
9440
9441 return val
9442
9443 def get_images(self, full=False):
9444 """List of images defined in the page object."""
9445 CheckParent(self)
9446 return self.parent.get_page_images(self.number, full=full)
9447
9448 def get_oc_items(self) -> list:
9449 """Get OCGs and OCMDs used in the page's contents.
9450
9451 Returns:
9452 List of items (name, xref, type), where type is one of "ocg" / "ocmd",
9453 and name is the property name.
9454 """
9455 rc = []
9456 for pname, xref in self._get_resource_properties():
9457 text = self.parent.xref_object(xref, compressed=True)
9458 if "/Type/OCG" in text:
9459 octype = "ocg"
9460 elif "/Type/OCMD" in text:
9461 octype = "ocmd"
9462 else:
9463 continue
9464 rc.append((pname, xref, octype))
9465 return rc
9466
9467 def get_svg_image(self, matrix=None, text_as_path=1):
9468 """Make SVG image from page."""
9469 CheckParent(self)
9470 mediabox = mupdf.fz_bound_page(self.this)
9471 ctm = JM_matrix_from_py(matrix)
9472 tbounds = mediabox
9473 text_option = mupdf.FZ_SVG_TEXT_AS_PATH if text_as_path == 1 else mupdf.FZ_SVG_TEXT_AS_TEXT
9474 tbounds = mupdf.fz_transform_rect(tbounds, ctm)
9475
9476 res = mupdf.fz_new_buffer(1024)
9477 out = mupdf.FzOutput(res)
9478 dev = mupdf.fz_new_svg_device(
9479 out,
9480 tbounds.x1-tbounds.x0, # width
9481 tbounds.y1-tbounds.y0, # height
9482 text_option,
9483 1,
9484 )
9485 mupdf.fz_run_page(self.this, dev, ctm, mupdf.FzCookie())
9486 mupdf.fz_close_device(dev)
9487 out.fz_close_output()
9488 text = JM_EscapeStrFromBuffer(res)
9489 return text
9490
9491 def get_textbox(
9492 page: Page,
9493 rect: rect_like,
9494 textpage=None, #: TextPage = None,
9495 ) -> str:
9496 tp = textpage
9497 if tp is None:
9498 tp = page.get_textpage()
9499 elif getattr(tp, "parent") != page:
9500 raise ValueError("not a textpage of this page")
9501 rc = tp.extractTextbox(rect)
9502 if textpage is None:
9503 del tp
9504 return rc
9505
9506 def get_textpage(self, clip: rect_like = None, flags: int = 0, matrix=None) -> "TextPage":
9507 CheckParent(self)
9508 if matrix is None:
9509 matrix = Matrix(1, 1)
9510 old_rotation = self.rotation
9511 if old_rotation != 0:
9512 self.set_rotation(0)
9513 try:
9514 textpage = self._get_textpage(clip, flags=flags, matrix=matrix)
9515 finally:
9516 if old_rotation != 0:
9517 self.set_rotation(old_rotation)
9518 textpage = TextPage(textpage)
9519 textpage.parent = weakref.proxy(self)
9520 return textpage
9521
9522 def get_texttrace(self):
9523
9524 CheckParent(self)
9525 old_rotation = self.rotation
9526 if old_rotation != 0:
9527 self.set_rotation(0)
9528 page = self.this
9529 rc = []
9530 if g_use_extra:
9531 dev = extra.JM_new_texttrace_device(rc)
9532 else:
9533 dev = JM_new_texttrace_device(rc)
9534 prect = mupdf.fz_bound_page(page)
9535 dev.ptm = mupdf.FzMatrix(1, 0, 0, -1, 0, prect.y1)
9536 mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
9537 mupdf.fz_close_device(dev)
9538
9539 if old_rotation != 0:
9540 self.set_rotation(old_rotation)
9541 return rc
9542
9543 def get_xobjects(self):
9544 """List of xobjects defined in the page object."""
9545 CheckParent(self)
9546 return self.parent.get_page_xobjects(self.number)
9547
9548 def insert_font(self, fontname="helv", fontfile=None, fontbuffer=None,
9549 set_simple=False, wmode=0, encoding=0):
9550 doc = self.parent
9551 if doc is None:
9552 raise ValueError("orphaned object: parent is None")
9553 idx = 0
9554
9555 if fontname.startswith("/"):
9556 fontname = fontname[1:]
9557 inv_chars = INVALID_NAME_CHARS.intersection(fontname)
9558 if inv_chars != set():
9559 raise ValueError(f"bad fontname chars {inv_chars}")
9560
9561 font = CheckFont(self, fontname)
9562 if font is not None: # font already in font list of page
9563 xref = font[0] # this is the xref
9564 if CheckFontInfo(doc, xref): # also in our document font list?
9565 return xref # yes: we are done
9566 # need to build the doc FontInfo entry - done via get_char_widths
9567 doc.get_char_widths(xref)
9568 return xref
9569
9570 #--------------------------------------------------------------------------
9571 # the font is not present for this page
9572 #--------------------------------------------------------------------------
9573
9574 bfname = Base14_fontdict.get(fontname.lower(), None) # BaseFont if Base-14 font
9575
9576 serif = 0
9577 CJK_number = -1
9578 CJK_list_n = ["china-t", "china-s", "japan", "korea"]
9579 CJK_list_s = ["china-ts", "china-ss", "japan-s", "korea-s"]
9580
9581 try:
9582 CJK_number = CJK_list_n.index(fontname)
9583 serif = 0
9584 except Exception:
9585 # Verbose in PyMuPDF/tests.
9586 if g_exceptions_verbose > 1: exception_info()
9587 pass
9588
9589 if CJK_number < 0:
9590 try:
9591 CJK_number = CJK_list_s.index(fontname)
9592 serif = 1
9593 except Exception:
9594 # Verbose in PyMuPDF/tests.
9595 if g_exceptions_verbose > 1: exception_info()
9596 pass
9597
9598 if fontname.lower() in fitz_fontdescriptors.keys():
9599 import pymupdf_fonts
9600 fontbuffer = pymupdf_fonts.myfont(fontname) # make a copy
9601 del pymupdf_fonts
9602
9603 # install the font for the page
9604 if fontfile is not None:
9605 if type(fontfile) is str:
9606 fontfile_str = fontfile
9607 elif hasattr(fontfile, "absolute"):
9608 fontfile_str = str(fontfile)
9609 elif hasattr(fontfile, "name"):
9610 fontfile_str = fontfile.name
9611 else:
9612 raise ValueError("bad fontfile")
9613 else:
9614 fontfile_str = None
9615 val = self._insertFont(fontname, bfname, fontfile_str, fontbuffer, set_simple, idx,
9616 wmode, serif, encoding, CJK_number)
9617
9618 if not val: # did not work, error return
9619 return val
9620
9621 xref = val[0] # xref of installed font
9622 fontdict = val[1]
9623
9624 if CheckFontInfo(doc, xref): # check again: document already has this font
9625 return xref # we are done
9626
9627 # need to create document font info
9628 doc.get_char_widths(xref, fontdict=fontdict)
9629 return xref
9630
9631 @property
9632 def is_wrapped(self):
9633 """Check if /Contents is in a balanced graphics state."""
9634 return self._count_q_balance() == (0, 0)
9635
9636 @property
9637 def language(self):
9638 """Page language."""
9639 pdfpage = _as_pdf_page(self.this, required=False)
9640 if not pdfpage.m_internal:
9641 return
9642 lang = mupdf.pdf_dict_get_inheritable(pdfpage.obj(), PDF_NAME('Lang'))
9643 if not lang.m_internal:
9644 return
9645 return mupdf.pdf_to_str_buf(lang)
9646
9647 def links(self, kinds=None):
9648 """ Generator over the links of a page.
9649
9650 Args:
9651 kinds: (list) link kinds to subselect from. If none,
9652 all links are returned. E.g. kinds=[LINK_URI]
9653 will only yield URI links.
9654 """
9655 all_links = self.get_links()
9656 for link in all_links:
9657 if kinds is None or link["kind"] in kinds:
9658 yield (link)
9659
9660 def load_annot(self, ident: typing.Union[str, int]) -> Annot:
9661 """Load an annot by name (/NM key) or xref.
9662
9663 Args:
9664 ident: identifier, either name (str) or xref (int).
9665 """
9666 CheckParent(self)
9667 if type(ident) is str:
9668 xref = 0
9669 name = ident
9670 elif type(ident) is int:
9671 xref = ident
9672 name = None
9673 else:
9674 raise ValueError("identifier must be a string or integer")
9675 val = self._load_annot(name, xref)
9676 if not val:
9677 return val
9678 val.thisown = True
9679 val.parent = weakref.proxy(self)
9680 self._annot_refs[id(val)] = val
9681 return val
9682
9683 def load_links(self):
9684 """Get first Link."""
9685 CheckParent(self)
9686 val = mupdf.fz_load_links( self.this)
9687 if not val.m_internal:
9688 return
9689 val = Link( val)
9690 val.thisown = True
9691 val.parent = weakref.proxy(self) # owning page object
9692 self._annot_refs[id(val)] = val
9693 val.xref = 0
9694 val.id = ""
9695 if self.parent.is_pdf:
9696 xrefs = self.annot_xrefs()
9697 xrefs = [x for x in xrefs if x[1] == mupdf.PDF_ANNOT_LINK]
9698 if xrefs:
9699 link_id = xrefs[0]
9700 val.xref = link_id[0]
9701 val.id = link_id[2]
9702 else:
9703 val.xref = 0
9704 val.id = ""
9705 return val
9706
9707 #----------------------------------------------------------------
9708 # page load widget by xref
9709 #----------------------------------------------------------------
9710 def load_widget( self, xref):
9711 """Load a widget by its xref."""
9712 CheckParent(self)
9713
9714 page = _as_pdf_page(self.this)
9715 annot = JM_get_widget_by_xref( page, xref)
9716 #log( '{=type(annot)}')
9717 val = annot
9718 if not val:
9719 return val
9720 val.thisown = True
9721 val.parent = weakref.proxy(self)
9722 self._annot_refs[id(val)] = val
9723 widget = Widget()
9724 TOOLS._fill_widget(val, widget)
9725 val = widget
9726 return val
9727
9728 @property
9729 def mediabox(self):
9730 """The MediaBox."""
9731 CheckParent(self)
9732 page = self._pdf_page(required=False)
9733 if not page.m_internal:
9734 rect = mupdf.fz_bound_page( self.this)
9735 else:
9736 rect = JM_mediabox( page.obj())
9737 return Rect(rect)
9738
9739 @property
9740 def mediabox_size(self):
9741 return Point(self.mediabox.x1, self.mediabox.y1)
9742
9743 #@property
9744 #def parent( self):
9745 # assert self._parent
9746 # if self._parent:
9747 # return self._parent
9748 # return Document( self.this.document())
9749
9750 def read_contents(self):
9751 """All /Contents streams concatenated to one bytes object."""
9752 return TOOLS._get_all_contents(self)
9753
9754 def refresh(self):
9755 """Refresh page after link/annot/widget updates."""
9756 CheckParent(self)
9757 doc = self.parent
9758 page = doc.reload_page(self)
9759 # fixme this looks wrong.
9760 self.this = page
9761
9762 @property
9763 def rotation(self):
9764 """Page rotation."""
9765 CheckParent(self)
9766 page = _as_pdf_page(self.this, required=0)
9767 if not page.m_internal:
9768 return 0
9769 return JM_page_rotation(page)
9770
9771 @property
9772 def rotation_matrix(self) -> Matrix:
9773 """Reflects page rotation."""
9774 return Matrix(TOOLS._rotate_matrix(self))
9775
9776 def run(self, dw, m):
9777 """Run page through a device.
9778 dw: DeviceWrapper
9779 """
9780 CheckParent(self)
9781 mupdf.fz_run_page(self.this, dw.device, JM_matrix_from_py(m), mupdf.FzCookie())
9782
9783 def set_artbox(self, rect):
9784 """Set the ArtBox."""
9785 return self._set_pagebox("ArtBox", rect)
9786
9787 def set_bleedbox(self, rect):
9788 """Set the BleedBox."""
9789 return self._set_pagebox("BleedBox", rect)
9790
9791 def set_contents(self, xref):
9792 """Set object at 'xref' as the page's /Contents."""
9793 CheckParent(self)
9794 doc = self.parent
9795 if doc.is_closed:
9796 raise ValueError("document closed")
9797 if not doc.is_pdf:
9798 raise ValueError("is no PDF")
9799 if xref not in range(1, doc.xref_length()):
9800 raise ValueError("bad xref")
9801 if not doc.xref_is_stream(xref):
9802 raise ValueError("xref is no stream")
9803 doc.xref_set_key(self.xref, "Contents", "%i 0 R" % xref)
9804
9805 def set_cropbox(self, rect):
9806 """Set the CropBox. Will also change Page.rect."""
9807 return self._set_pagebox("CropBox", rect)
9808
9809 def set_language(self, language=None):
9810 """Set PDF page default language."""
9811 CheckParent(self)
9812 pdfpage = _as_pdf_page(self.this)
9813 if not language:
9814 mupdf.pdf_dict_del(pdfpage.obj(), PDF_NAME('Lang'))
9815 else:
9816 lang = mupdf.fz_text_language_from_string(language)
9817 assert hasattr(mupdf, 'fz_string_from_text_language2')
9818 mupdf.pdf_dict_put_text_string(
9819 pdfpage.obj,
9820 PDF_NAME('Lang'),
9821 mupdf.fz_string_from_text_language2(lang)
9822 )
9823
9824 def set_mediabox(self, rect):
9825 """Set the MediaBox."""
9826 CheckParent(self)
9827 page = self._pdf_page()
9828 mediabox = JM_rect_from_py(rect)
9829 if (mupdf.fz_is_empty_rect(mediabox)
9830 or mupdf.fz_is_infinite_rect(mediabox)
9831 ):
9832 raise ValueError( MSG_BAD_RECT)
9833 mupdf.pdf_dict_put_rect( page.obj(), PDF_NAME('MediaBox'), mediabox)
9834 mupdf.pdf_dict_del( page.obj(), PDF_NAME('CropBox'))
9835 mupdf.pdf_dict_del( page.obj(), PDF_NAME('ArtBox'))
9836 mupdf.pdf_dict_del( page.obj(), PDF_NAME('BleedBox'))
9837 mupdf.pdf_dict_del( page.obj(), PDF_NAME('TrimBox'))
9838
9839 def set_rotation(self, rotation):
9840 """Set page rotation."""
9841 CheckParent(self)
9842 page = _as_pdf_page(self.this)
9843 rot = JM_norm_rotation(rotation)
9844 mupdf.pdf_dict_put_int( page.obj(), PDF_NAME('Rotate'), rot)
9845
9846 def set_trimbox(self, rect):
9847 """Set the TrimBox."""
9848 return self._set_pagebox("TrimBox", rect)
9849
9850 @property
9851 def transformation_matrix(self):
9852 """Page transformation matrix."""
9853 CheckParent(self)
9854
9855 ctm = mupdf.FzMatrix()
9856 page = self._pdf_page(required=False)
9857 if not page.m_internal:
9858 return JM_py_from_matrix(ctm)
9859 mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT) # fixme: original code passed mediabox=NULL.
9860 mupdf.pdf_page_transform(page, mediabox, ctm)
9861 val = JM_py_from_matrix(ctm)
9862
9863 if self.rotation % 360 == 0:
9864 val = Matrix(val)
9865 else:
9866 val = Matrix(1, 0, 0, -1, 0, self.cropbox.height)
9867 return val
9868
9869 @property
9870 def trimbox(self):
9871 """The TrimBox"""
9872 rect = self._other_box("TrimBox")
9873 if rect is None:
9874 return self.cropbox
9875 mb = self.mediabox
9876 return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
9877
9878 def widgets(self, types=None):
9879 """ Generator over the widgets of a page.
9880
9881 Args:
9882 types: (list) field types to subselect from. If none,
9883 all fields are returned. E.g. types=[PDF_WIDGET_TYPE_TEXT]
9884 will only yield text fields.
9885 """
9886 #for a in self.annot_xrefs():
9887 # log( '{a=}')
9888 widget_xrefs = [a[0] for a in self.annot_xrefs() if a[1] == mupdf.PDF_ANNOT_WIDGET]
9889 #log(f'widgets(): {widget_xrefs=}')
9890 for xref in widget_xrefs:
9891 widget = self.load_widget(xref)
9892 if types is None or widget.field_type in types:
9893 yield (widget)
9894
9895 def wrap_contents(self):
9896 """Ensure page is in a balanced graphics state."""
9897 push, pop = self._count_q_balance() # count missing "q"/"Q" commands
9898 if push > 0: # prepend required push commands
9899 prepend = b"q\n" * push
9900 TOOLS._insert_contents(self, prepend, False)
9901 if pop > 0: # append required pop commands
9902 append = b"\nQ" * pop + b"\n"
9903 TOOLS._insert_contents(self, append, True)
9904
9905 @property
9906 def xref(self):
9907 """PDF xref number of page."""
9908 CheckParent(self)
9909 return self.parent.page_xref(self.number)
9910
9911 rect = property(bound, doc="page rectangle")
9912
9913
9914 class Pixmap:
9915
9916 def __init__(self, *args):
9917 """
9918 Pixmap(colorspace, irect, alpha) - empty pixmap.
9919 Pixmap(colorspace, src) - copy changing colorspace.
9920 Pixmap(src, width, height,[clip]) - scaled copy, float dimensions.
9921 Pixmap(src, alpha=1) - copy and add or drop alpha channel.
9922 Pixmap(filename) - from an image in a file.
9923 Pixmap(image) - from an image in memory (bytes).
9924 Pixmap(colorspace, width, height, samples, alpha) - from samples data.
9925 Pixmap(PDFdoc, xref) - from an image at xref in a PDF document.
9926 """
9927 # Cache for property `self.samples_mv`. Set here so __del_() sees it if
9928 # we raise.
9929 #
9930 self._samples_mv = None
9931
9932 # 2024-01-16: Experimental support for a memory-view of the underlying
9933 # data. Doesn't seem to make much difference to Pixmap.set_pixel() so
9934 # not currently used.
9935 self._memory_view = None
9936
9937 if 0:
9938 pass
9939
9940 elif args_match(args,
9941 (Colorspace, mupdf.FzColorspace),
9942 (mupdf.FzRect, mupdf.FzIrect, IRect, Rect, tuple)
9943 ):
9944 # create empty pixmap with colorspace and IRect
9945 cs, rect = args
9946 alpha = 0
9947 pm = mupdf.fz_new_pixmap_with_bbox(cs, JM_irect_from_py(rect), mupdf.FzSeparations(0), alpha)
9948 self.this = pm
9949
9950 elif args_match(args,
9951 (Colorspace, mupdf.FzColorspace),
9952 (mupdf.FzRect, mupdf.FzIrect, IRect, Rect, tuple),
9953 (int, bool)
9954 ):
9955 # create empty pixmap with colorspace and IRect
9956 cs, rect, alpha = args
9957 pm = mupdf.fz_new_pixmap_with_bbox(cs, JM_irect_from_py(rect), mupdf.FzSeparations(0), alpha)
9958 self.this = pm
9959
9960 elif args_match(args, (Colorspace, mupdf.FzColorspace, type(None)), (Pixmap, mupdf.FzPixmap)):
9961 # copy pixmap, converting colorspace
9962 cs, spix = args
9963 if isinstance(cs, Colorspace):
9964 cs = cs.this
9965 elif cs is None:
9966 cs = mupdf.FzColorspace(None)
9967 if isinstance(spix, Pixmap):
9968 spix = spix.this
9969 if not mupdf.fz_pixmap_colorspace(spix).m_internal:
9970 raise ValueError( "source colorspace must not be None")
9971
9972 if cs.m_internal:
9973 self.this = mupdf.fz_convert_pixmap(
9974 spix,
9975 cs,
9976 mupdf.FzColorspace(),
9977 mupdf.FzDefaultColorspaces(None),
9978 mupdf.FzColorParams(),
9979 1
9980 )
9981 else:
9982 self.this = mupdf.fz_new_pixmap_from_alpha_channel( spix)
9983 if not self.this.m_internal:
9984 raise RuntimeError( MSG_PIX_NOALPHA)
9985
9986 elif args_match(args, (Pixmap, mupdf.FzPixmap), (Pixmap, mupdf.FzPixmap)):
9987 # add mask to a pixmap w/o alpha channel
9988 spix, mpix = args
9989 if isinstance(spix, Pixmap):
9990 spix = spix.this
9991 if isinstance(mpix, Pixmap):
9992 mpix = mpix.this
9993 spm = spix
9994 mpm = mpix
9995 if not spix.m_internal: # intercept NULL for spix: make alpha only pix
9996 dst = mupdf.fz_new_pixmap_from_alpha_channel(mpm)
9997 if not dst.m_internal:
9998 raise RuntimeError( MSG_PIX_NOALPHA)
9999 else:
10000 dst = mupdf.fz_new_pixmap_from_color_and_mask(spm, mpm)
10001 self.this = dst
10002
10003 elif (args_match(args, (Pixmap, mupdf.FzPixmap), (float, int), (float, int), None) or
10004 args_match(args, (Pixmap, mupdf.FzPixmap), (float, int), (float, int))):
10005 # create pixmap as scaled copy of another one
10006 if len(args) == 3:
10007 spix, w, h = args
10008 bbox = mupdf.FzIrect(mupdf.fz_infinite_irect)
10009 else:
10010 spix, w, h, clip = args
10011 bbox = JM_irect_from_py(clip)
10012
10013 src_pix = spix.this if isinstance(spix, Pixmap) else spix
10014 if not mupdf.fz_is_infinite_irect(bbox):
10015 pm = mupdf.fz_scale_pixmap(src_pix, src_pix.x(), src_pix.y(), w, h, bbox)
10016 else:
10017 pm = mupdf.fz_scale_pixmap(src_pix, src_pix.x(), src_pix.y(), w, h, mupdf.FzIrect(mupdf.fz_infinite_irect))
10018 self.this = pm
10019
10020 elif args_match(args, str, (Pixmap, mupdf.FzPixmap)) and args[0] == 'raw':
10021 # Special raw construction where we set .this directly.
10022 _, pm = args
10023 if isinstance(pm, Pixmap):
10024 pm = pm.this
10025 self.this = pm
10026
10027 elif args_match(args, (Pixmap, mupdf.FzPixmap), (int, None)):
10028 # Pixmap(struct Pixmap *spix, int alpha=1)
10029 # copy pixmap & add / drop the alpha channel
10030 spix = args[0]
10031 alpha = args[1] if len(args) == 2 else 1
10032 src_pix = spix.this if isinstance(spix, Pixmap) else spix
10033 if not _INRANGE(alpha, 0, 1):
10034 raise ValueError( "bad alpha value")
10035 cs = mupdf.fz_pixmap_colorspace(src_pix)
10036 if not cs.m_internal and not alpha:
10037 raise ValueError( "cannot drop alpha for 'NULL' colorspace")
10038 seps = mupdf.FzSeparations()
10039 n = mupdf.fz_pixmap_colorants(src_pix)
10040 w = mupdf.fz_pixmap_width(src_pix)
10041 h = mupdf.fz_pixmap_height(src_pix)
10042 pm = mupdf.fz_new_pixmap(cs, w, h, seps, alpha)
10043 pm.m_internal.x = src_pix.m_internal.x
10044 pm.m_internal.y = src_pix.m_internal.y
10045 pm.m_internal.xres = src_pix.m_internal.xres
10046 pm.m_internal.yres = src_pix.m_internal.yres
10047
10048 # copy samples data ------------------------------------------
10049 if 1:
10050 # We use our pixmap_copy() to get best performance.
10051 # test_pixmap.py:test_setalpha(): 3.9s t=0.0062
10052 extra.pixmap_copy( pm.m_internal, src_pix.m_internal, n)
10053 elif 1:
10054 # Use memoryview.
10055 # test_pixmap.py:test_setalpha(): 4.6 t=0.51
10056 src_view = mupdf.fz_pixmap_samples_memoryview( src_pix)
10057 pm_view = mupdf.fz_pixmap_samples_memoryview( pm)
10058 if src_pix.alpha() == pm.alpha(): # identical samples
10059 #memcpy(tptr, sptr, w * h * (n + alpha));
10060 size = w * h * (n + alpha)
10061 pm_view[ 0 : size] = src_view[ 0 : size]
10062 else:
10063 tptr = 0
10064 sptr = 0
10065 # This is a little faster than calling
10066 # pm.fz_samples_set(), but still quite slow. E.g. reduces
10067 # test_pixmap.py:test_setalpha() from 6.7s to 4.5s.
10068 #
10069 # t=0.53
10070 pm_stride = pm.stride()
10071 pm_n = pm.n()
10072 pm_alpha = pm.alpha()
10073 src_stride = src_pix.stride()
10074 src_n = src_pix.n()
10075 #log( '{=pm_stride pm_n src_stride src_n}')
10076 for y in range( h):
10077 for x in range( w):
10078 pm_i = pm_stride * y + pm_n * x
10079 src_i = src_stride * y + src_n * x
10080 pm_view[ pm_i : pm_i + n] = src_view[ src_i : src_i + n]
10081 if pm_alpha:
10082 pm_view[ pm_i + n] = 255
10083 else:
10084 # Copy individual bytes from Python. Very slow.
10085 # test_pixmap.py:test_setalpha(): 6.89 t=2.601
10086 if src_pix.alpha() == pm.alpha(): # identical samples
10087 #memcpy(tptr, sptr, w * h * (n + alpha));
10088 for i in range(w * h * (n + alpha)):
10089 mupdf.fz_samples_set(pm, i, mupdf.fz_samples_get(src_pix, i))
10090 else:
10091 # t=2.56
10092 tptr = 0
10093 sptr = 0
10094 src_pix_alpha = src_pix.alpha()
10095 for i in range(w * h):
10096 #memcpy(tptr, sptr, n);
10097 for j in range(n):
10098 mupdf.fz_samples_set(pm, tptr + j, mupdf.fz_samples_get(src_pix, sptr + j))
10099 tptr += n
10100 if pm.alpha():
10101 mupdf.fz_samples_set(pm, tptr, 255)
10102 tptr += 1
10103 sptr += n + src_pix_alpha
10104 self.this = pm
10105
10106 elif args_match(args, (mupdf.FzColorspace, Colorspace), int, int, None, (int, bool)):
10107 # create pixmap from samples data
10108 cs, w, h, samples, alpha = args
10109 if isinstance(cs, Colorspace):
10110 cs = cs.this
10111 assert isinstance(cs, mupdf.FzColorspace)
10112 n = mupdf.fz_colorspace_n(cs)
10113 stride = (n + alpha) * w
10114 seps = mupdf.FzSeparations()
10115 pm = mupdf.fz_new_pixmap(cs, w, h, seps, alpha)
10116
10117 if isinstance( samples, (bytes, bytearray)):
10118 #log('using mupdf.python_buffer_data()')
10119 samples2 = mupdf.python_buffer_data(samples)
10120 size = len(samples)
10121 else:
10122 res = JM_BufferFromBytes(samples)
10123 if not res.m_internal:
10124 raise ValueError( "bad samples data")
10125 size, c = mupdf.fz_buffer_storage(res)
10126 samples2 = mupdf.python_buffer_data(samples) # raw swig proxy for `const unsigned char*`.
10127 if stride * h != size:
10128 raise ValueError( f"bad samples length {w=} {h=} {alpha=} {n=} {stride=} {size=}")
10129 mupdf.ll_fz_pixmap_copy_raw( pm.m_internal, samples2)
10130 self.this = pm
10131
10132 elif args_match(args, None):
10133 # create pixmap from filename, file object, pathlib.Path or memory
10134 imagedata, = args
10135 name = 'name'
10136 if hasattr(imagedata, "resolve"):
10137 fname = imagedata.__str__()
10138 if fname:
10139 img = mupdf.fz_new_image_from_file(fname)
10140 elif hasattr(imagedata, name):
10141 fname = imagedata.name
10142 if fname:
10143 img = mupdf.fz_new_image_from_file(fname)
10144 elif isinstance(imagedata, str):
10145 img = mupdf.fz_new_image_from_file(imagedata)
10146 else:
10147 res = JM_BufferFromBytes(imagedata)
10148 if not res.m_internal or not res.m_internal.len:
10149 raise ValueError( "bad image data")
10150 img = mupdf.fz_new_image_from_buffer(res)
10151
10152 # Original code passed null for subarea and ctm, but that's not
10153 # possible with MuPDF's python bindings. The equivalent is an
10154 # infinite rect and identify matrix scaled by img.w() and img.h().
10155 pm, w, h = mupdf.fz_get_pixmap_from_image(
10156 img,
10157 mupdf.FzIrect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT),
10158 mupdf.FzMatrix( img.w(), 0, 0, img.h(), 0, 0),
10159 )
10160 xres, yres = mupdf.fz_image_resolution(img)
10161 pm.m_internal.xres = xres
10162 pm.m_internal.yres = yres
10163 self.this = pm
10164
10165 elif args_match(args, (Document, mupdf.FzDocument), int):
10166 # Create pixmap from PDF image identified by XREF number
10167 doc, xref = args
10168 pdf = _as_pdf_document(doc)
10169 xreflen = mupdf.pdf_xref_len(pdf)
10170 if not _INRANGE(xref, 1, xreflen-1):
10171 raise ValueError( MSG_BAD_XREF)
10172 ref = mupdf.pdf_new_indirect(pdf, xref, 0)
10173 type_ = mupdf.pdf_dict_get(ref, PDF_NAME('Subtype'))
10174 if (not mupdf.pdf_name_eq(type_, PDF_NAME('Image'))
10175 and not mupdf.pdf_name_eq(type_, PDF_NAME('Alpha'))
10176 and not mupdf.pdf_name_eq(type_, PDF_NAME('Luminosity'))
10177 ):
10178 raise ValueError( MSG_IS_NO_IMAGE)
10179 img = mupdf.pdf_load_image(pdf, ref)
10180 # Original code passed null for subarea and ctm, but that's not
10181 # possible with MuPDF's python bindings. The equivalent is an
10182 # infinite rect and identify matrix scaled by img.w() and img.h().
10183 pix, w, h = mupdf.fz_get_pixmap_from_image(
10184 img,
10185 mupdf.FzIrect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT),
10186 mupdf.FzMatrix(img.w(), 0, 0, img.h(), 0, 0),
10187 )
10188 self.this = pix
10189
10190 else:
10191 text = 'Unrecognised args for constructing Pixmap:\n'
10192 for arg in args:
10193 text += f' {type(arg)}: {arg}\n'
10194 raise Exception( text)
10195
10196 def __len__(self):
10197 return self.size
10198
10199 def __repr__(self):
10200 if not type(self) is Pixmap: return
10201 if self.colorspace:
10202 return "Pixmap(%s, %s, %s)" % (self.colorspace.this.m_internal.name, self.irect, self.alpha)
10203 else:
10204 return "Pixmap(%s, %s, %s)" % ('None', self.irect, self.alpha)
10205
10206 def _tobytes(self, format_, jpg_quality):
10207 '''
10208 Pixmap._tobytes
10209 '''
10210 pm = self.this
10211 size = mupdf.fz_pixmap_stride(pm) * pm.h()
10212 res = mupdf.fz_new_buffer(size)
10213 out = mupdf.FzOutput(res)
10214 if format_ == 1: mupdf.fz_write_pixmap_as_png(out, pm)
10215 elif format_ == 2: mupdf.fz_write_pixmap_as_pnm(out, pm)
10216 elif format_ == 3: mupdf.fz_write_pixmap_as_pam(out, pm)
10217 elif format_ == 5: mupdf.fz_write_pixmap_as_psd(out, pm)
10218 elif format_ == 6: mupdf.fz_write_pixmap_as_ps(out, pm)
10219 elif format_ == 7:
10220 mupdf.fz_write_pixmap_as_jpeg(out, pm, jpg_quality, 0)
10221 else:
10222 mupdf.fz_write_pixmap_as_png(out, pm)
10223 out.fz_close_output()
10224 barray = JM_BinFromBuffer(res)
10225 return barray
10226
10227 def _writeIMG(self, filename, format_, jpg_quality):
10228 pm = self.this
10229 if format_ == 1: mupdf.fz_save_pixmap_as_png(pm, filename)
10230 elif format_ == 2: mupdf.fz_save_pixmap_as_pnm(pm, filename)
10231 elif format_ == 3: mupdf.fz_save_pixmap_as_pam(pm, filename)
10232 elif format_ == 5: mupdf.fz_save_pixmap_as_psd(pm, filename)
10233 elif format_ == 6: mupdf.fz_save_pixmap_as_ps(pm, filename)
10234 elif format_ == 7: mupdf.fz_save_pixmap_as_jpeg(pm, filename, jpg_quality)
10235 else: mupdf.fz_save_pixmap_as_png(pm, filename)
10236
10237 @property
10238 def alpha(self):
10239 """Indicates presence of alpha channel."""
10240 return mupdf.fz_pixmap_alpha(self.this)
10241
10242 def clear_with(self, value=None, bbox=None):
10243 """Fill all color components with same value."""
10244 if value is None:
10245 mupdf.fz_clear_pixmap(self.this)
10246 elif bbox is None:
10247 mupdf.fz_clear_pixmap_with_value(self.this, value)
10248 else:
10249 JM_clear_pixmap_rect_with_value(self.this, value, JM_irect_from_py(bbox))
10250
10251 def color_count(self, colors=0, clip=None):
10252 '''
10253 Return count of each color.
10254 '''
10255 pm = self.this
10256 rc = JM_color_count( pm, clip)
10257 if not colors:
10258 return len( rc)
10259 return rc
10260
10261 def color_topusage(self, clip=None):
10262 """Return most frequent color and its usage ratio."""
10263 allpixels = 0
10264 cnt = 0
10265 if clip is not None and self.irect in Rect(clip):
10266 clip = self.irect
10267 for pixel, count in self.color_count(colors=True,clip=clip).items():
10268 allpixels += count
10269 if count > cnt:
10270 cnt = count
10271 maxpixel = pixel
10272 if not allpixels:
10273 return (1, bytes([255] * self.n))
10274 return (cnt / allpixels, maxpixel)
10275
10276 @property
10277 def colorspace(self):
10278 """Pixmap Colorspace."""
10279 cs = Colorspace(mupdf.fz_pixmap_colorspace(self.this))
10280 if cs.name == "None":
10281 return None
10282 return cs
10283
10284 def copy(self, src, bbox):
10285 """Copy bbox from another Pixmap."""
10286 pm = self.this
10287 src_pix = src.this
10288 if not mupdf.fz_pixmap_colorspace(src_pix):
10289 raise ValueError( "cannot copy pixmap with NULL colorspace")
10290 if pm.alpha() != src_pix.alpha():
10291 raise ValueError( "source and target alpha must be equal")
10292 mupdf.fz_copy_pixmap_rect(pm, src_pix, JM_irect_from_py(bbox), mupdf.FzDefaultColorspaces(None))
10293
10294 @property
10295 def digest(self):
10296 """MD5 digest of pixmap (bytes)."""
10297 ret = mupdf.fz_md5_pixmap2(self.this)
10298 return bytes(ret)
10299
10300 def gamma_with(self, gamma):
10301 """Apply correction with some float.
10302 gamma=1 is a no-op."""
10303 if not mupdf.fz_pixmap_colorspace( self.this):
10304 message_warning("colorspace invalid for function")
10305 return
10306 mupdf.fz_gamma_pixmap( self.this, gamma)
10307
10308 @property
10309 def h(self):
10310 """The height."""
10311 return mupdf.fz_pixmap_height(self.this)
10312
10313 def invert_irect(self, bbox=None):
10314 """Invert the colors inside a bbox."""
10315 pm = self.this
10316 if not mupdf.fz_pixmap_colorspace(pm).m_internal:
10317 message_warning("ignored for stencil pixmap")
10318 return False
10319 r = JM_irect_from_py(bbox)
10320 if mupdf.fz_is_infinite_irect(r):
10321 mupdf.fz_invert_pixmap(pm)
10322 return True
10323 mupdf.fz_invert_pixmap_rect(pm, r)
10324 return True
10325
10326 @property
10327 def irect(self):
10328 """Pixmap bbox - an IRect object."""
10329 val = mupdf.fz_pixmap_bbox(self.this)
10330 return JM_py_from_irect( val)
10331
10332 @property
10333 def is_monochrome(self):
10334 """Check if pixmap is monochrome."""
10335 return mupdf.fz_is_pixmap_monochrome( self.this)
10336
10337 @property
10338 def is_unicolor(self):
10339 '''
10340 Check if pixmap has only one color.
10341 '''
10342 pm = self.this
10343 n = pm.n()
10344 count = pm.w() * pm.h() * n
10345 def _pixmap_read_samples(pm, offset, n):
10346 ret = list()
10347 for i in range(n):
10348 ret.append(mupdf.fz_samples_get(pm, offset+i))
10349 return ret
10350 for offset in range( 0, count, n):
10351 if offset == 0:
10352 sample0 = _pixmap_read_samples( pm, 0, n)
10353 else:
10354 sample = _pixmap_read_samples( pm, offset, n)
10355 if sample != sample0:
10356 return False
10357 return True
10358
10359 @property
10360 def n(self):
10361 """The size of one pixel."""
10362 if g_use_extra:
10363 # Setting self.__class__.n gives a small reduction in overhead of
10364 # test_general.py:test_2093, e.g. 1.4x -> 1.3x.
10365 #return extra.pixmap_n(self.this)
10366 def n2(self):
10367 return extra.pixmap_n(self.this)
10368 self.__class__.n = property(n2)
10369 return self.n
10370 return mupdf.fz_pixmap_components(self.this)
10371
10372 def pdfocr_save(self, filename, compress=1, language=None, tessdata=None):
10373 '''
10374 Save pixmap as an OCR-ed PDF page.
10375 '''
10376 tessdata = get_tessdata(tessdata)
10377 opts = mupdf.FzPdfocrOptions()
10378 opts.compress = compress
10379 if language:
10380 opts.language_set2( language)
10381 if tessdata:
10382 opts.datadir_set2( tessdata)
10383 pix = self.this
10384 if isinstance(filename, str):
10385 mupdf.fz_save_pixmap_as_pdfocr( pix, filename, 0, opts)
10386 else:
10387 out = JM_new_output_fileptr( filename)
10388 try:
10389 mupdf.fz_write_pixmap_as_pdfocr( out, pix, opts)
10390 finally:
10391 out.fz_close_output() # Avoid MuPDF warning.
10392
10393 def pdfocr_tobytes(self, compress=True, language="eng", tessdata=None):
10394 """Save pixmap as an OCR-ed PDF page.
10395
10396 Args:
10397 compress: (bool) compress, default 1 (True).
10398 language: (str) language(s) occurring on page, default "eng" (English),
10399 multiples like "eng+ger" for English and German.
10400 tessdata: (str) folder name of Tesseract's language support. If None
10401 we use environment variable TESSDATA_PREFIX or search for
10402 Tesseract installation.
10403 Notes:
10404 On failure, make sure Tesseract is installed and you have set
10405 <tessdata> or environment variable "TESSDATA_PREFIX" to the folder
10406 containing your Tesseract's language support data.
10407 """
10408 tessdata = get_tessdata(tessdata)
10409 from io import BytesIO
10410 bio = BytesIO()
10411 self.pdfocr_save(bio, compress=compress, language=language, tessdata=tessdata)
10412 return bio.getvalue()
10413
10414 def pil_image(self):
10415 """Create a Pillow Image from the Pixmap."""
10416 try:
10417 from PIL import Image
10418 except ImportError:
10419 message("PIL/Pillow not installed")
10420 raise
10421
10422 cspace = self.colorspace
10423 if not cspace:
10424 mode = "L"
10425 elif cspace.n == 1:
10426 mode = "L" if not self.alpha else "LA"
10427 elif cspace.n == 3:
10428 mode = "RGB" if not self.alpha else "RGBA"
10429 else:
10430 mode = "CMYK"
10431
10432 img = Image.frombytes(mode, (self.width, self.height), self.samples)
10433 return img
10434
10435 def pil_save(self, *args, **kwargs):
10436 """Write to image file using Pillow.
10437
10438 An intermediate PIL Image is created, and its "save" method is used
10439 to store the image. See Pillow documentation to learn about the
10440 meaning of possible positional and keyword parameters.
10441 Use this when other output formats are desired.
10442 """
10443 img = self.pil_image()
10444
10445 if "dpi" not in kwargs.keys():
10446 kwargs["dpi"] = (self.xres, self.yres)
10447
10448 img.save(*args, **kwargs)
10449
10450 def pil_tobytes(self, *args, **kwargs):
10451 """Convert to an image in memory using Pillow.
10452
10453 An intermediate PIL Image is created, and its "save" method is used
10454 to store the image. See Pillow documentation to learn about the
10455 meaning of possible positional or keyword parameters.
10456 Use this when other output formats are desired.
10457 """
10458 bytes_out = io.BytesIO()
10459 img = self.pil_image()
10460
10461 if "dpi" not in kwargs.keys():
10462 kwargs["dpi"] = (self.xres, self.yres)
10463
10464 img.save(bytes_out, *args, **kwargs)
10465 return bytes_out.getvalue()
10466
10467 def pixel(self, x, y):
10468 """Get color tuple of pixel (x, y).
10469 Last item is the alpha if Pixmap.alpha is true."""
10470 if g_use_extra:
10471 return extra.pixmap_pixel(self.this.m_internal, x, y)
10472 if (0
10473 or x < 0
10474 or x >= self.this.m_internal.w
10475 or y < 0
10476 or y >= self.this.m_internal.h
10477 ):
10478 RAISEPY(MSG_PIXEL_OUTSIDE, PyExc_ValueError)
10479 n = self.this.m_internal.n
10480 stride = self.this.m_internal.stride
10481 i = stride * y + n * x
10482 ret = tuple( self.samples_mv[ i: i+n])
10483 return ret
10484
10485 @property
10486 def samples(self)->bytes:
10487 mv = self.samples_mv
10488 return bytes( mv)
10489
10490 @property
10491 def samples_mv(self):
10492 '''
10493 Pixmap samples memoryview.
10494 '''
10495 # We remember the returned memoryview so that our `__del__()` can
10496 # release it; otherwise accessing it after we have been destructed will
10497 # fail, possibly crashing Python; this is #4155.
10498 #
10499 if self._samples_mv is None:
10500 self._samples_mv = mupdf.fz_pixmap_samples_memoryview(self.this)
10501 return self._samples_mv
10502
10503 def _samples_mv_release(self):
10504 if self._samples_mv:
10505 self._samples_mv.release()
10506
10507 @property
10508 def samples_ptr(self):
10509 return mupdf.fz_pixmap_samples_int(self.this)
10510
10511 def save(self, filename, output=None, jpg_quality=95):
10512 """Output as image in format determined by filename extension.
10513
10514 Args:
10515 output: (str) only use to overrule filename extension. Default is PNG.
10516 Others are JPEG, JPG, PNM, PGM, PPM, PBM, PAM, PSD, PS.
10517 """
10518 valid_formats = {
10519 "png": 1,
10520 "pnm": 2,
10521 "pgm": 2,
10522 "ppm": 2,
10523 "pbm": 2,
10524 "pam": 3,
10525 "psd": 5,
10526 "ps": 6,
10527 "jpg": 7,
10528 "jpeg": 7,
10529 }
10530
10531 if type(filename) is str:
10532 pass
10533 elif hasattr(filename, "absolute"):
10534 filename = str(filename)
10535 elif hasattr(filename, "name"):
10536 filename = filename.name
10537 if output is None:
10538 _, ext = os.path.splitext(filename)
10539 output = ext[1:]
10540
10541 idx = valid_formats.get(output.lower(), None)
10542 if idx is None:
10543 raise ValueError(f"Image format {output} not in {tuple(valid_formats.keys())}")
10544 if self.alpha and idx in (2, 6, 7):
10545 raise ValueError("'%s' cannot have alpha" % output)
10546 if self.colorspace and self.colorspace.n > 3 and idx in (1, 2, 4):
10547 raise ValueError(f"unsupported colorspace for '{output}'")
10548 if idx == 7:
10549 self.set_dpi(self.xres, self.yres)
10550 return self._writeIMG(filename, idx, jpg_quality)
10551
10552 def set_alpha(self, alphavalues=None, premultiply=1, opaque=None, matte=None):
10553 """Set alpha channel to values contained in a byte array.
10554 If omitted, set alphas to 255.
10555
10556 Args:
10557 alphavalues: (bytes) with length (width * height) or 'None'.
10558 premultiply: (bool, True) premultiply colors with alpha values.
10559 opaque: (tuple, length colorspace.n) this color receives opacity 0.
10560 matte: (tuple, length colorspace.n)) preblending background color.
10561 """
10562 pix = self.this
10563 alpha = 0
10564 m = 0
10565 if pix.alpha() == 0:
10566 raise ValueError( MSG_PIX_NOALPHA)
10567 n = mupdf.fz_pixmap_colorants(pix)
10568 w = mupdf.fz_pixmap_width(pix)
10569 h = mupdf.fz_pixmap_height(pix)
10570 balen = w * h * (n+1)
10571 colors = [0, 0, 0, 0] # make this color opaque
10572 bgcolor = [0, 0, 0, 0] # preblending background color
10573 zero_out = 0
10574 bground = 0
10575 if opaque and isinstance(opaque, (list, tuple)) and len(opaque) == n:
10576 for i in range(n):
10577 colors[i] = opaque[i]
10578 zero_out = 1
10579 if matte and isinstance( matte, (tuple, list)) and len(matte) == n:
10580 for i in range(n):
10581 bgcolor[i] = matte[i]
10582 bground = 1
10583 data = bytes()
10584 data_len = 0
10585 if alphavalues:
10586 #res = JM_BufferFromBytes(alphavalues)
10587 #data_len, data = mupdf.fz_buffer_storage(res)
10588 #if data_len < w * h:
10589 # THROWMSG("bad alpha values")
10590 # fixme: don't seem to need to create an fz_buffer - can
10591 # use <alphavalues> directly?
10592 if isinstance(alphavalues, (bytes, bytearray)):
10593 data = alphavalues
10594 data_len = len(alphavalues)
10595 else:
10596 assert 0, f'unexpected type for alphavalues: {type(alphavalues)}'
10597 if data_len < w * h:
10598 raise ValueError( "bad alpha values")
10599 if 1:
10600 # Use C implementation for speed.
10601 mupdf.Pixmap_set_alpha_helper(
10602 balen,
10603 n,
10604 data_len,
10605 zero_out,
10606 mupdf.python_buffer_data( data),
10607 pix.m_internal,
10608 premultiply,
10609 bground,
10610 colors,
10611 bgcolor,
10612 )
10613 else:
10614 i = k = j = 0
10615 data_fix = 255
10616 while i < balen:
10617 alpha = data[k]
10618 if zero_out:
10619 for j in range(i, i+n):
10620 if mupdf.fz_samples_get(pix, j) != colors[j - i]:
10621 data_fix = 255
10622 break
10623 else:
10624 data_fix = 0
10625 if data_len:
10626 def fz_mul255( a, b):
10627 x = a * b + 128
10628 x += x // 256
10629 return x // 256
10630
10631 if data_fix == 0:
10632 mupdf.fz_samples_set(pix, i+n, 0)
10633 else:
10634 mupdf.fz_samples_set(pix, i+n, alpha)
10635 if premultiply and not bground:
10636 for j in range(i, i+n):
10637 mupdf.fz_samples_set(pix, j, fz_mul255( mupdf.fz_samples_get(pix, j), alpha))
10638 elif bground:
10639 for j in range( i, i+n):
10640 m = bgcolor[j - i]
10641 mupdf.fz_samples_set(pix, j, fz_mul255( mupdf.fz_samples_get(pix, j) - m, alpha))
10642 else:
10643 mupdf.fz_samples_set(pix, i+n, data_fix)
10644 i += n+1
10645 k += 1
10646
10647 def tobytes(self, output="png", jpg_quality=95):
10648 '''
10649 Convert to binary image stream of desired type.
10650 '''
10651 valid_formats = {
10652 "png": 1,
10653 "pnm": 2,
10654 "pgm": 2,
10655 "ppm": 2,
10656 "pbm": 2,
10657 "pam": 3,
10658 "tga": 4,
10659 "tpic": 4,
10660 "psd": 5,
10661 "ps": 6,
10662 'jpg': 7,
10663 'jpeg': 7,
10664 }
10665 idx = valid_formats.get(output.lower(), None)
10666 if idx is None:
10667 raise ValueError(f"Image format {output} not in {tuple(valid_formats.keys())}")
10668 if self.alpha and idx in (2, 6, 7):
10669 raise ValueError("'{output}' cannot have alpha")
10670 if self.colorspace and self.colorspace.n > 3 and idx in (1, 2, 4):
10671 raise ValueError(f"unsupported colorspace for '{output}'")
10672 if idx == 7:
10673 self.set_dpi(self.xres, self.yres)
10674 barray = self._tobytes(idx, jpg_quality)
10675 return barray
10676
10677 def set_dpi(self, xres, yres):
10678 """Set resolution in both dimensions."""
10679 pm = self.this
10680 pm.m_internal.xres = xres
10681 pm.m_internal.yres = yres
10682
10683 def set_origin(self, x, y):
10684 """Set top-left coordinates."""
10685 pm = self.this
10686 pm.m_internal.x = x
10687 pm.m_internal.y = y
10688
10689 def set_pixel(self, x, y, color):
10690 """Set color of pixel (x, y)."""
10691 if g_use_extra:
10692 return extra.set_pixel(self.this.m_internal, x, y, color)
10693 pm = self.this
10694 if not _INRANGE(x, 0, pm.w() - 1) or not _INRANGE(y, 0, pm.h() - 1):
10695 raise ValueError( MSG_PIXEL_OUTSIDE)
10696 n = pm.n()
10697 for j in range(n):
10698 i = color[j]
10699 if not _INRANGE(i, 0, 255):
10700 raise ValueError( MSG_BAD_COLOR_SEQ)
10701 stride = mupdf.fz_pixmap_stride( pm)
10702 i = stride * y + n * x
10703 if 0:
10704 # Using a cached self._memory_view doesn't actually make much
10705 # difference to speed.
10706 if not self._memory_view:
10707 self._memory_view = self.samples_mv
10708 for j in range(n):
10709 self._memory_view[i + j] = color[j]
10710 else:
10711 for j in range(n):
10712 pm.fz_samples_set(i + j, color[j])
10713
10714 def set_rect(self, bbox, color):
10715 """Set color of all pixels in bbox."""
10716 pm = self.this
10717 n = pm.n()
10718 c = []
10719 for j in range(n):
10720 i = color[j]
10721 if not _INRANGE(i, 0, 255):
10722 raise ValueError( MSG_BAD_COLOR_SEQ)
10723 c.append(i)
10724 bbox = JM_irect_from_py(bbox)
10725 i = JM_fill_pixmap_rect_with_color(pm, c, bbox)
10726 rc = bool(i)
10727 return rc
10728
10729 def shrink(self, factor):
10730 """Divide width and height by 2**factor.
10731 E.g. factor=1 shrinks to 25% of original size (in place)."""
10732 if factor < 1:
10733 message_warning("ignoring shrink factor < 1")
10734 return
10735 mupdf.fz_subsample_pixmap( self.this, factor)
10736 # Pixmap has changed so clear our memory view.
10737 self._memory_view = None
10738 self._samples_mv_release()
10739
10740 @property
10741 def size(self):
10742 """Pixmap size."""
10743 return mupdf.fz_pixmap_size( self.this)
10744
10745 @property
10746 def stride(self):
10747 """Length of one image line (width * n)."""
10748 return self.this.stride()
10749
10750 def tint_with(self, black, white):
10751 """Tint colors with modifiers for black and white."""
10752 if not self.colorspace or self.colorspace.n > 3:
10753 message("warning: colorspace invalid for function")
10754 return
10755 return mupdf.fz_tint_pixmap( self.this, black, white)
10756
10757 @property
10758 def w(self):
10759 """The width."""
10760 return mupdf.fz_pixmap_width(self.this)
10761
10762 def warp(self, quad, width, height):
10763 """Return pixmap from a warped quad."""
10764 if not quad.is_convex: raise ValueError("quad must be convex")
10765 q = JM_quad_from_py(quad)
10766 points = [ q.ul, q.ur, q.lr, q.ll]
10767 dst = mupdf.fz_warp_pixmap( self.this, points, width, height)
10768 return Pixmap( dst)
10769
10770 @property
10771 def x(self):
10772 """x component of Pixmap origin."""
10773 return mupdf.fz_pixmap_x(self.this)
10774
10775 @property
10776 def xres(self):
10777 """Resolution in x direction."""
10778 return self.this.xres()
10779
10780 @property
10781 def y(self):
10782 """y component of Pixmap origin."""
10783 return mupdf.fz_pixmap_y(self.this)
10784
10785 @property
10786 def yres(self):
10787 """Resolution in y direction."""
10788 return self.this.yres()
10789
10790 width = w
10791 height = h
10792
10793 def __del__(self):
10794 if self._samples_mv:
10795 self._samples_mv.release()
10796
10797
10798 del Point
10799 class Point:
10800
10801 def __abs__(self):
10802 return math.sqrt(self.x * self.x + self.y * self.y)
10803
10804 def __add__(self, p):
10805 if hasattr(p, "__float__"):
10806 return Point(self.x + p, self.y + p)
10807 if len(p) != 2:
10808 raise ValueError("Point: bad seq len")
10809 return Point(self.x + p[0], self.y + p[1])
10810
10811 def __bool__(self):
10812 return not (max(self) == min(self) == 0)
10813
10814 def __eq__(self, p):
10815 if not hasattr(p, "__len__"):
10816 return False
10817 return len(p) == 2 and not (self - p)
10818
10819 def __getitem__(self, i):
10820 return (self.x, self.y)[i]
10821
10822 def __hash__(self):
10823 return hash(tuple(self))
10824
10825 def __init__(self, *args, x=None, y=None):
10826 '''
10827 Point() - all zeros
10828 Point(x, y)
10829 Point(Point) - new copy
10830 Point(sequence) - from 'sequence'
10831
10832 Explicit keyword args x, y override earlier settings if not None.
10833 '''
10834 if not args:
10835 self.x = 0.0
10836 self.y = 0.0
10837 elif len(args) > 2:
10838 raise ValueError("Point: bad seq len")
10839 elif len(args) == 2:
10840 self.x = float(args[0])
10841 self.y = float(args[1])
10842 elif len(args) == 1:
10843 l = args[0]
10844 if isinstance(l, (mupdf.FzPoint, mupdf.fz_point)):
10845 self.x = l.x
10846 self.y = l.y
10847 else:
10848 if not hasattr(l, "__getitem__"):
10849 raise ValueError("Point: bad args")
10850 if len(l) != 2:
10851 raise ValueError("Point: bad seq len")
10852 self.x = float(l[0])
10853 self.y = float(l[1])
10854 else:
10855 raise ValueError("Point: bad seq len")
10856 if x is not None: self.x = x
10857 if y is not None: self.y = y
10858
10859 def __len__(self):
10860 return 2
10861
10862 def __mul__(self, m):
10863 if hasattr(m, "__float__"):
10864 return Point(self.x * m, self.y * m)
10865 if hasattr(m, "__getitem__") and len(m) == 2:
10866 # dot product
10867 return self.x * m[0] + self.y * m[1]
10868 p = Point(self)
10869 return p.transform(m)
10870
10871 def __neg__(self):
10872 return Point(-self.x, -self.y)
10873
10874 def __nonzero__(self):
10875 return not (max(self) == min(self) == 0)
10876
10877 def __pos__(self):
10878 return Point(self)
10879
10880 def __repr__(self):
10881 return "Point" + str(tuple(self))
10882
10883 def __setitem__(self, i, v):
10884 v = float(v)
10885 if i == 0: self.x = v
10886 elif i == 1: self.y = v
10887 else:
10888 raise IndexError("index out of range")
10889 return None
10890
10891 def __sub__(self, p):
10892 if hasattr(p, "__float__"):
10893 return Point(self.x - p, self.y - p)
10894 if len(p) != 2:
10895 raise ValueError("Point: bad seq len")
10896 return Point(self.x - p[0], self.y - p[1])
10897
10898 def __truediv__(self, m):
10899 if hasattr(m, "__float__"):
10900 return Point(self.x * 1./m, self.y * 1./m)
10901 m1 = util_invert_matrix(m)[1]
10902 if not m1:
10903 raise ZeroDivisionError("matrix not invertible")
10904 p = Point(self)
10905 return p.transform(m1)
10906
10907 @property
10908 def abs_unit(self):
10909 """Unit vector with positive coordinates."""
10910 s = self.x * self.x + self.y * self.y
10911 if s < EPSILON:
10912 return Point(0,0)
10913 s = math.sqrt(s)
10914 return Point(abs(self.x) / s, abs(self.y) / s)
10915
10916 def distance_to(self, *args):
10917 """Return distance to rectangle or another point."""
10918 if not len(args) > 0:
10919 raise ValueError("at least one parameter must be given")
10920
10921 x = args[0]
10922 if len(x) == 2:
10923 x = Point(x)
10924 elif len(x) == 4:
10925 x = Rect(x)
10926 else:
10927 raise ValueError("arg1 must be point-like or rect-like")
10928
10929 if len(args) > 1:
10930 unit = args[1]
10931 else:
10932 unit = "px"
10933 u = {"px": (1.,1.), "in": (1.,72.), "cm": (2.54, 72.),
10934 "mm": (25.4, 72.)}
10935 f = u[unit][0] / u[unit][1]
10936
10937 if type(x) is Point:
10938 return abs(self - x) * f
10939
10940 # from here on, x is a rectangle
10941 # as a safeguard, make a finite copy of it
10942 r = Rect(x.top_left, x.top_left)
10943 r = r | x.bottom_right
10944 if self in r:
10945 return 0.0
10946 if self.x > r.x1:
10947 if self.y >= r.y1:
10948 return self.distance_to(r.bottom_right, unit)
10949 elif self.y <= r.y0:
10950 return self.distance_to(r.top_right, unit)
10951 else:
10952 return (self.x - r.x1) * f
10953 elif r.x0 <= self.x <= r.x1:
10954 if self.y >= r.y1:
10955 return (self.y - r.y1) * f
10956 else:
10957 return (r.y0 - self.y) * f
10958 else:
10959 if self.y >= r.y1:
10960 return self.distance_to(r.bottom_left, unit)
10961 elif self.y <= r.y0:
10962 return self.distance_to(r.top_left, unit)
10963 else:
10964 return (r.x0 - self.x) * f
10965
10966 def transform(self, m):
10967 """Replace point by its transformation with matrix-like m."""
10968 if len(m) != 6:
10969 raise ValueError("Matrix: bad seq len")
10970 self.x, self.y = util_transform_point(self, m)
10971 return self
10972
10973 @property
10974 def unit(self):
10975 """Unit vector of the point."""
10976 s = self.x * self.x + self.y * self.y
10977 if s < EPSILON:
10978 return Point(0,0)
10979 s = math.sqrt(s)
10980 return Point(self.x / s, self.y / s)
10981
10982 __div__ = __truediv__
10983 norm = __abs__
10984
10985
10986 class Quad:
10987
10988 def __abs__(self):
10989 if self.is_empty:
10990 return 0.0
10991 return abs(self.ul - self.ur) * abs(self.ul - self.ll)
10992
10993 def __add__(self, q):
10994 if hasattr(q, "__float__"):
10995 return Quad(self.ul + q, self.ur + q, self.ll + q, self.lr + q)
10996 if len(q) != 4:
10997 raise ValueError("Quad: bad seq len")
10998 return Quad(self.ul + q[0], self.ur + q[1], self.ll + q[2], self.lr + q[3])
10999
11000 def __bool__(self):
11001 return not self.is_empty
11002
11003 def __contains__(self, x):
11004 try:
11005 l = x.__len__()
11006 except Exception:
11007 if g_exceptions_verbose > 1: exception_info()
11008 return False
11009 if l == 2:
11010 return util_point_in_quad(x, self)
11011 if l != 4:
11012 return False
11013 if CheckRect(x):
11014 if Rect(x).is_empty:
11015 return True
11016 return util_point_in_quad(x[:2], self) and util_point_in_quad(x[2:], self)
11017 if CheckQuad(x):
11018 for i in range(4):
11019 if not util_point_in_quad(x[i], self):
11020 return False
11021 return True
11022 return False
11023
11024 def __eq__(self, quad):
11025 if not hasattr(quad, "__len__"):
11026 return False
11027 return len(quad) == 4 and (
11028 self.ul == quad[0] and
11029 self.ur == quad[1] and
11030 self.ll == quad[2] and
11031 self.lr == quad[3]
11032 )
11033
11034 def __getitem__(self, i):
11035 return (self.ul, self.ur, self.ll, self.lr)[i]
11036
11037 def __hash__(self):
11038 return hash(tuple(self))
11039
11040 def __init__(self, *args, ul=None, ur=None, ll=None, lr=None):
11041 '''
11042 Quad() - all zero points
11043 Quad(ul, ur, ll, lr)
11044 Quad(quad) - new copy
11045 Quad(sequence) - from 'sequence'
11046
11047 Explicit keyword args ul, ur, ll, lr override earlier settings if not
11048 None.
11049
11050 '''
11051 if not args:
11052 self.ul = self.ur = self.ll = self.lr = Point()
11053 elif len(args) > 4:
11054 raise ValueError("Quad: bad seq len")
11055 elif len(args) == 4:
11056 self.ul, self.ur, self.ll, self.lr = map(Point, args)
11057 elif len(args) == 1:
11058 l = args[0]
11059 if isinstance(l, mupdf.FzQuad):
11060 self.this = l
11061 self.ul, self.ur, self.ll, self.lr = Point(l.ul), Point(l.ur), Point(l.ll), Point(l.lr)
11062 elif not hasattr(l, "__getitem__"):
11063 raise ValueError("Quad: bad args")
11064 elif len(l) != 4:
11065 raise ValueError("Quad: bad seq len")
11066 else:
11067 self.ul, self.ur, self.ll, self.lr = map(Point, l)
11068 else:
11069 raise ValueError("Quad: bad args")
11070 if ul is not None: self.ul = Point(ul)
11071 if ur is not None: self.ur = Point(ur)
11072 if ll is not None: self.ll = Point(ll)
11073 if lr is not None: self.lr = Point(lr)
11074
11075 def __len__(self):
11076 return 4
11077
11078 def __mul__(self, m):
11079 q = Quad(self)
11080 q = q.transform(m)
11081 return q
11082
11083 def __neg__(self):
11084 return Quad(-self.ul, -self.ur, -self.ll, -self.lr)
11085
11086 def __nonzero__(self):
11087 return not self.is_empty
11088
11089 def __pos__(self):
11090 return Quad(self)
11091
11092 def __repr__(self):
11093 return "Quad" + str(tuple(self))
11094
11095 def __setitem__(self, i, v):
11096 if i == 0: self.ul = Point(v)
11097 elif i == 1: self.ur = Point(v)
11098 elif i == 2: self.ll = Point(v)
11099 elif i == 3: self.lr = Point(v)
11100 else:
11101 raise IndexError("index out of range")
11102 return None
11103
11104 def __sub__(self, q):
11105 if hasattr(q, "__float__"):
11106 return Quad(self.ul - q, self.ur - q, self.ll - q, self.lr - q)
11107 if len(q) != 4:
11108 raise ValueError("Quad: bad seq len")
11109 return Quad(self.ul - q[0], self.ur - q[1], self.ll - q[2], self.lr - q[3])
11110
11111 def __truediv__(self, m):
11112 if hasattr(m, "__float__"):
11113 im = 1. / m
11114 else:
11115 im = util_invert_matrix(m)[1]
11116 if not im:
11117 raise ZeroDivisionError("Matrix not invertible")
11118 q = Quad(self)
11119 q = q.transform(im)
11120 return q
11121
11122 @property
11123 def is_convex(self):
11124 """Check if quad is convex and not degenerate.
11125
11126 Notes:
11127 Check that for the two diagonals, the other two corners are not
11128 on the same side of the diagonal.
11129 Returns:
11130 True or False.
11131 """
11132 m = planish_line(self.ul, self.lr) # puts this diagonal on x-axis
11133 p1 = self.ll * m # transform the
11134 p2 = self.ur * m # other two points
11135 if p1.y * p2.y > 0:
11136 return False
11137 m = planish_line(self.ll, self.ur) # puts other diagonal on x-axis
11138 p1 = self.lr * m # transform the
11139 p2 = self.ul * m # remaining points
11140 if p1.y * p2.y > 0:
11141 return False
11142 return True
11143
11144 @property
11145 def is_empty(self):
11146 """Check whether all quad corners are on the same line.
11147
11148 This is the case if width or height is zero.
11149 """
11150 return self.width < EPSILON or self.height < EPSILON
11151
11152 @property
11153 def is_infinite(self):
11154 """Check whether this is the infinite quad."""
11155 return self.rect.is_infinite
11156
11157 @property
11158 def is_rectangular(self):
11159 """Check if quad is rectangular.
11160
11161 Notes:
11162 Some rotation matrix can thus transform it into a rectangle.
11163 This is equivalent to three corners enclose 90 degrees.
11164 Returns:
11165 True or False.
11166 """
11167
11168 sine = util_sine_between(self.ul, self.ur, self.lr)
11169 if abs(sine - 1) > EPSILON: # the sine of the angle
11170 return False
11171
11172 sine = util_sine_between(self.ur, self.lr, self.ll)
11173 if abs(sine - 1) > EPSILON:
11174 return False
11175
11176 sine = util_sine_between(self.lr, self.ll, self.ul)
11177 if abs(sine - 1) > EPSILON:
11178 return False
11179
11180 return True
11181
11182 def morph(self, p, m):
11183 """Morph the quad with matrix-like 'm' and point-like 'p'.
11184
11185 Return a new quad."""
11186 if self.is_infinite:
11187 return INFINITE_QUAD()
11188 delta = Matrix(1, 1).pretranslate(p.x, p.y)
11189 q = self * ~delta * m * delta
11190 return q
11191
11192 @property
11193 def rect(self):
11194 r = Rect()
11195 r.x0 = min(self.ul.x, self.ur.x, self.lr.x, self.ll.x)
11196 r.y0 = min(self.ul.y, self.ur.y, self.lr.y, self.ll.y)
11197 r.x1 = max(self.ul.x, self.ur.x, self.lr.x, self.ll.x)
11198 r.y1 = max(self.ul.y, self.ur.y, self.lr.y, self.ll.y)
11199 return r
11200
11201 def transform(self, m):
11202 """Replace quad by its transformation with matrix m."""
11203 if hasattr(m, "__float__"):
11204 pass
11205 elif len(m) != 6:
11206 raise ValueError("Matrix: bad seq len")
11207 self.ul *= m
11208 self.ur *= m
11209 self.ll *= m
11210 self.lr *= m
11211 return self
11212
11213 __div__ = __truediv__
11214 width = property(lambda self: max(abs(self.ul - self.ur), abs(self.ll - self.lr)))
11215 height = property(lambda self: max(abs(self.ul - self.ll), abs(self.ur - self.lr)))
11216
11217
11218 class Rect:
11219
11220 def __abs__(self):
11221 if self.is_empty or self.is_infinite:
11222 return 0.0
11223 return (self.x1 - self.x0) * (self.y1 - self.y0)
11224
11225 def __add__(self, p):
11226 if hasattr(p, "__float__"):
11227 return Rect(self.x0 + p, self.y0 + p, self.x1 + p, self.y1 + p)
11228 if len(p) != 4:
11229 raise ValueError("Rect: bad seq len")
11230 return Rect(self.x0 + p[0], self.y0 + p[1], self.x1 + p[2], self.y1 + p[3])
11231
11232 def __and__(self, x):
11233 if not hasattr(x, "__len__"):
11234 raise ValueError("bad operand 2")
11235
11236 r1 = Rect(x)
11237 r = Rect(self)
11238 return r.intersect(r1)
11239
11240 def __bool__(self):
11241 return not (max(self) == min(self) == 0)
11242
11243 def __contains__(self, x):
11244 if hasattr(x, "__float__"):
11245 return x in tuple(self)
11246 l = len(x)
11247 if l == 2:
11248 return util_is_point_in_rect(x, self)
11249 if l == 4:
11250 r = INFINITE_RECT()
11251 try:
11252 r = Rect(x)
11253 except Exception:
11254 if g_exceptions_verbose > 1: exception_info()
11255 r = Quad(x).rect
11256 return (self.x0 <= r.x0 <= r.x1 <= self.x1 and
11257 self.y0 <= r.y0 <= r.y1 <= self.y1)
11258 return False
11259
11260 def __eq__(self, rect):
11261 if not hasattr(rect, "__len__"):
11262 return False
11263 return len(rect) == 4 and not (self - rect)
11264
11265 def __getitem__(self, i):
11266 return (self.x0, self.y0, self.x1, self.y1)[i]
11267
11268 def __hash__(self):
11269 return hash(tuple(self))
11270
11271 def __init__(self, *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None):
11272 """
11273 Rect() - all zeros
11274 Rect(x0, y0, x1, y1)
11275 Rect(top-left, x1, y1)
11276 Rect(x0, y0, bottom-right)
11277 Rect(top-left, bottom-right)
11278 Rect(Rect or IRect) - new copy
11279 Rect(sequence) - from 'sequence'
11280
11281 Explicit keyword args p0, p1, x0, y0, x1, y1 override earlier settings
11282 if not None.
11283 """
11284 x0, y0, x1, y1 = util_make_rect( *args, p0=p0, p1=p1, x0=x0, y0=y0, x1=x1, y1=y1)
11285 self.x0 = float( x0)
11286 self.y0 = float( y0)
11287 self.x1 = float( x1)
11288 self.y1 = float( y1)
11289
11290 def __len__(self):
11291 return 4
11292
11293 def __mul__(self, m):
11294 if hasattr(m, "__float__"):
11295 return Rect(self.x0 * m, self.y0 * m, self.x1 * m, self.y1 * m)
11296 r = Rect(self)
11297 r = r.transform(m)
11298 return r
11299
11300 def __neg__(self):
11301 return Rect(-self.x0, -self.y0, -self.x1, -self.y1)
11302
11303 def __nonzero__(self):
11304 return not (max(self) == min(self) == 0)
11305
11306 def __or__(self, x):
11307 if not hasattr(x, "__len__"):
11308 raise ValueError("bad operand 2")
11309 r = Rect(self)
11310 if len(x) == 2:
11311 return r.include_point(x)
11312 if len(x) == 4:
11313 return r.include_rect(x)
11314 raise ValueError("bad operand 2")
11315
11316 def __pos__(self):
11317 return Rect(self)
11318
11319 def __repr__(self):
11320 return "Rect" + str(tuple(self))
11321
11322 def __setitem__(self, i, v):
11323 v = float(v)
11324 if i == 0: self.x0 = v
11325 elif i == 1: self.y0 = v
11326 elif i == 2: self.x1 = v
11327 elif i == 3: self.y1 = v
11328 else:
11329 raise IndexError("index out of range")
11330 return None
11331
11332 def __sub__(self, p):
11333 if hasattr(p, "__float__"):
11334 return Rect(self.x0 - p, self.y0 - p, self.x1 - p, self.y1 - p)
11335 if len(p) != 4:
11336 raise ValueError("Rect: bad seq len")
11337 return Rect(self.x0 - p[0], self.y0 - p[1], self.x1 - p[2], self.y1 - p[3])
11338
11339 def __truediv__(self, m):
11340 if hasattr(m, "__float__"):
11341 return Rect(self.x0 * 1./m, self.y0 * 1./m, self.x1 * 1./m, self.y1 * 1./m)
11342 im = util_invert_matrix(m)[1]
11343 if not im:
11344 raise ZeroDivisionError(f"Matrix not invertible: {m}")
11345 r = Rect(self)
11346 r = r.transform(im)
11347 return r
11348
11349 @property
11350 def bottom_left(self):
11351 """Bottom-left corner."""
11352 return Point(self.x0, self.y1)
11353
11354 @property
11355 def bottom_right(self):
11356 """Bottom-right corner."""
11357 return Point(self.x1, self.y1)
11358
11359 def contains(self, x):
11360 """Check if containing point-like or rect-like x."""
11361 return self.__contains__(x)
11362
11363 @property
11364 def height(self):
11365 return max(0, self.y1 - self.y0)
11366
11367 def include_point(self, p):
11368 """Extend to include point-like p."""
11369 if len(p) != 2:
11370 raise ValueError("Point: bad seq len")
11371 self.x0, self.y0, self.x1, self.y1 = util_include_point_in_rect(self, p)
11372 return self
11373
11374 def include_rect(self, r):
11375 """Extend to include rect-like r."""
11376 if len(r) != 4:
11377 raise ValueError("Rect: bad seq len")
11378 r = Rect(r)
11379 if r.is_infinite or self.is_infinite:
11380 self.x0, self.y0, self.x1, self.y1 = FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT
11381 elif r.is_empty:
11382 return self
11383 elif self.is_empty:
11384 self.x0, self.y0, self.x1, self.y1 = r.x0, r.y0, r.x1, r.y1
11385 else:
11386 self.x0, self.y0, self.x1, self.y1 = util_union_rect(self, r)
11387 return self
11388
11389 def intersect(self, r):
11390 """Restrict to common rect with rect-like r."""
11391 if not len(r) == 4:
11392 raise ValueError("Rect: bad seq len")
11393 r = Rect(r)
11394 if r.is_infinite:
11395 return self
11396 elif self.is_infinite:
11397 self.x0, self.y0, self.x1, self.y1 = r.x0, r.y0, r.x1, r.y1
11398 elif r.is_empty:
11399 self.x0, self.y0, self.x1, self.y1 = r.x0, r.y0, r.x1, r.y1
11400 elif self.is_empty:
11401 return self
11402 else:
11403 self.x0, self.y0, self.x1, self.y1 = util_intersect_rect(self, r)
11404 return self
11405
11406 def intersects(self, x):
11407 """Check if intersection with rectangle x is not empty."""
11408 rect2 = Rect(x)
11409 return (1
11410 and not self.is_empty
11411 and not self.is_infinite
11412 and not rect2.is_empty
11413 and not rect2.is_infinite
11414 and self.x0 < rect2.x1
11415 and rect2.x0 < self.x1
11416 and self.y0 < rect2.y1
11417 and rect2.y0 < self.y1
11418 )
11419
11420 @property
11421 def is_empty(self):
11422 """True if rectangle area is empty."""
11423 return self.x0 >= self.x1 or self.y0 >= self.y1
11424
11425 @property
11426 def is_infinite(self):
11427 """True if this is the infinite rectangle."""
11428 return self.x0 == self.y0 == FZ_MIN_INF_RECT and self.x1 == self.y1 == FZ_MAX_INF_RECT
11429
11430 @property
11431 def is_valid(self):
11432 """True if rectangle is valid."""
11433 return self.x0 <= self.x1 and self.y0 <= self.y1
11434
11435 def morph(self, p, m):
11436 """Morph with matrix-like m and point-like p.
11437
11438 Returns a new quad."""
11439 if self.is_infinite:
11440 return INFINITE_QUAD()
11441 return self.quad.morph(p, m)
11442
11443 def norm(self):
11444 return math.sqrt(sum([c*c for c in self]))
11445
11446 def normalize(self):
11447 """Replace rectangle with its finite version."""
11448 if self.x1 < self.x0:
11449 self.x0, self.x1 = self.x1, self.x0
11450 if self.y1 < self.y0:
11451 self.y0, self.y1 = self.y1, self.y0
11452 return self
11453
11454 @property
11455 def quad(self):
11456 """Return Quad version of rectangle."""
11457 return Quad(self.tl, self.tr, self.bl, self.br)
11458
11459 def round(self):
11460 """Return the IRect."""
11461 return IRect(util_round_rect(self))
11462
11463 @property
11464 def top_left(self):
11465 """Top-left corner."""
11466 return Point(self.x0, self.y0)
11467
11468 @property
11469 def top_right(self):
11470 """Top-right corner."""
11471 return Point(self.x1, self.y0)
11472
11473 def torect(self, r):
11474 """Return matrix that converts to target rect."""
11475
11476 r = Rect(r)
11477 if self.is_infinite or self.is_empty or r.is_infinite or r.is_empty:
11478 raise ValueError("rectangles must be finite and not empty")
11479 return (
11480 Matrix(1, 0, 0, 1, -self.x0, -self.y0)
11481 * Matrix(r.width / self.width, r.height / self.height)
11482 * Matrix(1, 0, 0, 1, r.x0, r.y0)
11483 )
11484
11485 def transform(self, m):
11486 """Replace with the transformation by matrix-like m."""
11487 if not len(m) == 6:
11488 raise ValueError("Matrix: bad seq len")
11489 self.x0, self.y0, self.x1, self.y1 = util_transform_rect(self, m)
11490 return self
11491
11492 @property
11493 def width(self):
11494 return max(0, self.x1 - self.x0)
11495
11496 __div__ = __truediv__
11497
11498 bl = bottom_left
11499 br = bottom_right
11500 irect = property(round)
11501 tl = top_left
11502 tr = top_right
11503
11504
11505 class Story:
11506
11507 def __init__( self, html='', user_css=None, em=12, archive=None):
11508 buffer_ = mupdf.fz_new_buffer_from_copied_data( html.encode('utf-8'))
11509 if archive and not isinstance(archive, Archive):
11510 archive = Archive(archive)
11511 arch = archive.this if archive else mupdf.FzArchive( None)
11512 if hasattr(mupdf, 'FzStoryS'):
11513 self.this = mupdf.FzStoryS( buffer_, user_css, em, arch)
11514 else:
11515 self.this = mupdf.FzStory( buffer_, user_css, em, arch)
11516
11517 def add_header_ids(self):
11518 '''
11519 Look for `<h1..6>` items in `self` and adds unique `id`
11520 attributes if not already present.
11521 '''
11522 dom = self.body
11523 i = 0
11524 x = dom.find(None, None, None)
11525 while x:
11526 name = x.tagname
11527 if len(name) == 2 and name[0]=="h" and name[1] in "123456":
11528 attr = x.get_attribute_value("id")
11529 if not attr:
11530 id_ = f"h_id_{i}"
11531 #log(f"{name=}: setting {id_=}")
11532 x.set_attribute("id", id_)
11533 i += 1
11534 x = x.find_next(None, None, None)
11535
11536 @staticmethod
11537 def add_pdf_links(document_or_stream, positions):
11538 """
11539 Adds links to PDF document.
11540 Args:
11541 document_or_stream:
11542 A PDF `Document` or raw PDF content, for example an
11543 `io.BytesIO` instance.
11544 positions:
11545 List of `ElementPosition`'s for `document_or_stream`,
11546 typically from Story.element_positions(). We raise an
11547 exception if two or more positions have same id.
11548 Returns:
11549 `document_or_stream` if a `Document` instance, otherwise a
11550 new `Document` instance.
11551 We raise an exception if an `href` in `positions` refers to an
11552 internal position `#<name>` but no item in `positions` has `id =
11553 name`.
11554 """
11555 if isinstance(document_or_stream, Document):
11556 document = document_or_stream
11557 else:
11558 document = Document("pdf", document_or_stream)
11559
11560 # Create dict from id to position, which we will use to find
11561 # link destinations.
11562 #
11563 id_to_position = dict()
11564 #log(f"positions: {positions}")
11565 for position in positions:
11566 #log(f"add_pdf_links(): position: {position}")
11567 if (position.open_close & 1) and position.id:
11568 #log(f"add_pdf_links(): position with id: {position}")
11569 if position.id in id_to_position:
11570 #log(f"Ignoring duplicate positions with id={position.id!r}")
11571 pass
11572 else:
11573 id_to_position[ position.id] = position
11574
11575 # Insert links for all positions that have an `href`.
11576 #
11577 for position_from in positions:
11578
11579 if (position_from.open_close & 1) and position_from.href:
11580
11581 #log(f"add_pdf_links(): position with href: {position}")
11582 link = dict()
11583 link['from'] = Rect(position_from.rect)
11584
11585 if position_from.href.startswith("#"):
11586 #`<a href="#...">...</a>` internal link.
11587 target_id = position_from.href[1:]
11588 try:
11589 position_to = id_to_position[ target_id]
11590 except Exception as e:
11591 if g_exceptions_verbose > 1: exception_info()
11592 raise RuntimeError(f"No destination with id={target_id}, required by position_from: {position_from}") from e
11593 # Make link from `position_from`'s rect to top-left of
11594 # `position_to`'s rect.
11595 if 0:
11596 log(f"add_pdf_links(): making link from:")
11597 log(f"add_pdf_links(): {position_from}")
11598 log(f"add_pdf_links(): to:")
11599 log(f"add_pdf_links(): {position_to}")
11600 link["kind"] = LINK_GOTO
11601 x0, y0, x1, y1 = position_to.rect
11602 # This appears to work well with viewers which scroll
11603 # to make destination point top-left of window.
11604 link["to"] = Point(x0, y0)
11605 link["page"] = position_to.page_num - 1
11606
11607 else:
11608 # `<a href="...">...</a>` external link.
11609 if position_from.href.startswith('name:'):
11610 link['kind'] = LINK_NAMED
11611 link['name'] = position_from.href[5:]
11612 else:
11613 link['kind'] = LINK_URI
11614 link['uri'] = position_from.href
11615
11616 #log(f'Adding link: {position_from.page_num=} {link=}.')
11617 document[position_from.page_num - 1].insert_link(link)
11618
11619 return document
11620
11621 @property
11622 def body(self):
11623 dom = self.document()
11624 return dom.bodytag()
11625
11626 def document( self):
11627 dom = mupdf.fz_story_document( self.this)
11628 return Xml( dom)
11629
11630 def draw( self, device, matrix=None):
11631 ctm2 = JM_matrix_from_py( matrix)
11632 dev = device.this if device else mupdf.FzDevice( None)
11633 mupdf.fz_draw_story( self.this, dev, ctm2)
11634
11635 def element_positions( self, function, args=None):
11636 '''
11637 Trigger a callback function to record where items have been placed.
11638 '''
11639 if type(args) is dict:
11640 for k in args.keys():
11641 if not (type(k) is str and k.isidentifier()):
11642 raise ValueError(f"invalid key '{k}'")
11643 else:
11644 args = {}
11645 if not callable(function) or function.__code__.co_argcount != 1:
11646 raise ValueError("callback 'function' must be a callable with exactly one argument")
11647
11648 def function2( position):
11649 class Position2:
11650 pass
11651 position2 = Position2()
11652 position2.depth = position.depth
11653 position2.heading = position.heading
11654 position2.id = position.id
11655 position2.rect = JM_py_from_rect(position.rect)
11656 position2.text = position.text
11657 position2.open_close = position.open_close
11658 position2.rect_num = position.rectangle_num
11659 position2.href = position.href
11660 if args:
11661 for k, v in args.items():
11662 setattr( position2, k, v)
11663 function( position2)
11664 mupdf.fz_story_positions( self.this, function2)
11665
11666 def place( self, where):
11667 where = JM_rect_from_py( where)
11668 filled = mupdf.FzRect()
11669 more = mupdf.fz_place_story( self.this, where, filled)
11670 return more, JM_py_from_rect( filled)
11671
11672 def reset( self):
11673 mupdf.fz_reset_story( self.this)
11674
11675 def write(self, writer, rectfn, positionfn=None, pagefn=None):
11676 dev = None
11677 page_num = 0
11678 rect_num = 0
11679 filled = Rect(0, 0, 0, 0)
11680 while 1:
11681 mediabox, rect, ctm = rectfn(rect_num, filled)
11682 rect_num += 1
11683 if mediabox:
11684 # new page.
11685 page_num += 1
11686 more, filled = self.place( rect)
11687 if positionfn:
11688 def positionfn2(position):
11689 # We add a `.page_num` member to the
11690 # `ElementPosition` instance.
11691 position.page_num = page_num
11692 positionfn(position)
11693 self.element_positions(positionfn2)
11694 if writer:
11695 if mediabox:
11696 # new page.
11697 if dev:
11698 if pagefn:
11699 pagefn(page_num, mediabox, dev, 1)
11700 writer.end_page()
11701 dev = writer.begin_page( mediabox)
11702 if pagefn:
11703 pagefn(page_num, mediabox, dev, 0)
11704 self.draw( dev, ctm)
11705 if not more:
11706 if pagefn:
11707 pagefn( page_num, mediabox, dev, 1)
11708 writer.end_page()
11709 else:
11710 self.draw(None, ctm)
11711 if not more:
11712 break
11713
11714 @staticmethod
11715 def write_stabilized(writer, contentfn, rectfn, user_css=None, em=12, positionfn=None, pagefn=None, archive=None, add_header_ids=True):
11716 positions = list()
11717 content = None
11718 # Iterate until stable.
11719 while 1:
11720 content_prev = content
11721 content = contentfn( positions)
11722 stable = False
11723 if content == content_prev:
11724 stable = True
11725 content2 = content
11726 story = Story(content2, user_css, em, archive)
11727
11728 if add_header_ids:
11729 story.add_header_ids()
11730
11731 positions = list()
11732 def positionfn2(position):
11733 #log(f"write_stabilized(): {stable=} {positionfn=} {position=}")
11734 positions.append(position)
11735 if stable and positionfn:
11736 positionfn(position)
11737 story.write(
11738 writer if stable else None,
11739 rectfn,
11740 positionfn2,
11741 pagefn,
11742 )
11743 if stable:
11744 break
11745
11746 @staticmethod
11747 def write_stabilized_with_links(contentfn, rectfn, user_css=None, em=12, positionfn=None, pagefn=None, archive=None, add_header_ids=True):
11748 #log("write_stabilized_with_links()")
11749 stream = io.BytesIO()
11750 writer = DocumentWriter(stream)
11751 positions = []
11752 def positionfn2(position):
11753 #log(f"write_stabilized_with_links(): {position=}")
11754 positions.append(position)
11755 if positionfn:
11756 positionfn(position)
11757 Story.write_stabilized(writer, contentfn, rectfn, user_css, em, positionfn2, pagefn, archive, add_header_ids)
11758 writer.close()
11759 stream.seek(0)
11760 return Story.add_pdf_links(stream, positions)
11761
11762 def write_with_links(self, rectfn, positionfn=None, pagefn=None):
11763 #log("write_with_links()")
11764 stream = io.BytesIO()
11765 writer = DocumentWriter(stream)
11766 positions = []
11767 def positionfn2(position):
11768 #log(f"write_with_links(): {position=}")
11769 positions.append(position)
11770 if positionfn:
11771 positionfn(position)
11772 self.write(writer, rectfn, positionfn=positionfn2, pagefn=pagefn)
11773 writer.close()
11774 stream.seek(0)
11775 return Story.add_pdf_links(stream, positions)
11776
11777 class FitResult:
11778 '''
11779 The result from a `Story.fit*()` method.
11780
11781 Members:
11782
11783 `big_enough`:
11784 `True` if the fit succeeded.
11785 `filled`:
11786 From the last call to `Story.place()`.
11787 `more`:
11788 `False` if the fit succeeded.
11789 `numcalls`:
11790 Number of calls made to `self.place()`.
11791 `parameter`:
11792 The successful parameter value, or the largest failing value.
11793 `rect`:
11794 The rect created from `parameter`.
11795 '''
11796 def __init__(self, big_enough=None, filled=None, more=None, numcalls=None, parameter=None, rect=None):
11797 self.big_enough = big_enough
11798 self.filled = filled
11799 self.more = more
11800 self.numcalls = numcalls
11801 self.parameter = parameter
11802 self.rect = rect
11803
11804 def __repr__(self):
11805 return (
11806 f' big_enough={self.big_enough}'
11807 f' filled={self.filled}'
11808 f' more={self.more}'
11809 f' numcalls={self.numcalls}'
11810 f' parameter={self.parameter}'
11811 f' rect={self.rect}'
11812 )
11813
11814 def fit(self, fn, pmin=None, pmax=None, delta=0.001, verbose=False):
11815 '''
11816 Finds optimal rect that contains the story `self`.
11817
11818 Returns a `Story.FitResult` instance.
11819
11820 On success, the last call to `self.place()` will have been with the
11821 returned rectangle, so `self.draw()` can be used directly.
11822
11823 Args:
11824 :arg fn:
11825 A callable taking a floating point `parameter` and returning a
11826 `pymupdf.Rect()`. If the rect is empty, we assume the story will
11827 not fit and do not call `self.place()`.
11828
11829 Must guarantee that `self.place()` behaves monotonically when
11830 given rect `fn(parameter`) as `parameter` increases. This
11831 usually means that both width and height increase or stay
11832 unchanged as `parameter` increases.
11833 :arg pmin:
11834 Minimum parameter to consider; `None` for -infinity.
11835 :arg pmax:
11836 Maximum parameter to consider; `None` for +infinity.
11837 :arg delta:
11838 Maximum error in returned `parameter`.
11839 :arg verbose:
11840 If true we output diagnostics.
11841 '''
11842 def log(text):
11843 assert verbose
11844 message(f'fit(): {text}')
11845
11846 assert isinstance(pmin, (int, float)) or pmin is None
11847 assert isinstance(pmax, (int, float)) or pmax is None
11848
11849 class State:
11850 def __init__(self):
11851 self.pmin = pmin
11852 self.pmax = pmax
11853 self.pmin_result = None
11854 self.pmax_result = None
11855 self.result = None
11856 self.numcalls = 0
11857 if verbose:
11858 self.pmin0 = pmin
11859 self.pmax0 = pmax
11860 state = State()
11861
11862 if verbose:
11863 log(f'starting. {state.pmin=} {state.pmax=}.')
11864
11865 self.reset()
11866
11867 def ret():
11868 if state.pmax is not None:
11869 if state.last_p != state.pmax:
11870 if verbose:
11871 log(f'Calling update() with pmax, because was overwritten by later calls.')
11872 big_enough = update(state.pmax)
11873 assert big_enough
11874 result = state.pmax_result
11875 else:
11876 result = state.pmin_result if state.pmin_result else Story.FitResult(numcalls=state.numcalls)
11877 if verbose:
11878 log(f'finished. {state.pmin0=} {state.pmax0=} {state.pmax=}: returning {result=}')
11879 return result
11880
11881 def update(parameter):
11882 '''
11883 Evaluates `more, _ = self.place(fn(parameter))`. If `more` is
11884 false, then `rect` is big enough to contain `self` and we
11885 set `state.pmax=parameter` and return True. Otherwise we set
11886 `state.pmin=parameter` and return False.
11887 '''
11888 rect = fn(parameter)
11889 assert isinstance(rect, Rect), f'{type(rect)=} {rect=}'
11890 if rect.is_empty:
11891 big_enough = False
11892 result = Story.FitResult(parameter=parameter, numcalls=state.numcalls)
11893 if verbose:
11894 log(f'update(): not calling self.place() because rect is empty.')
11895 else:
11896 more, filled = self.place(rect)
11897 state.numcalls += 1
11898 big_enough = not more
11899 result = Story.FitResult(
11900 filled=filled,
11901 more=more,
11902 numcalls=state.numcalls,
11903 parameter=parameter,
11904 rect=rect,
11905 big_enough=big_enough,
11906 )
11907 if verbose:
11908 log(f'update(): called self.place(): {state.numcalls:>2d}: {more=} {parameter=} {rect=}.')
11909 if big_enough:
11910 state.pmax = parameter
11911 state.pmax_result = result
11912 else:
11913 state.pmin = parameter
11914 state.pmin_result = result
11915 state.last_p = parameter
11916 return big_enough
11917
11918 def opposite(p, direction):
11919 '''
11920 Returns same sign as `direction`, larger or smaller than `p` if
11921 direction is positive or negative respectively.
11922 '''
11923 if p is None or p==0:
11924 return direction
11925 if direction * p > 0:
11926 return 2 * p
11927 return -p
11928
11929 if state.pmin is None:
11930 # Find an initial finite pmin value.
11931 if verbose: log(f'finding pmin.')
11932 parameter = opposite(state.pmax, -1)
11933 while 1:
11934 if not update(parameter):
11935 break
11936 parameter *= 2
11937 else:
11938 if update(state.pmin):
11939 if verbose: log(f'{state.pmin=} is big enough.')
11940 return ret()
11941
11942 if state.pmax is None:
11943 # Find an initial finite pmax value.
11944 if verbose: log(f'finding pmax.')
11945 parameter = opposite(state.pmin, +1)
11946 while 1:
11947 if update(parameter):
11948 break
11949 parameter *= 2
11950 else:
11951 if not update(state.pmax):
11952 # No solution possible.
11953 state.pmax = None
11954 if verbose: log(f'No solution possible {state.pmax=}.')
11955 return ret()
11956
11957 # Do binary search in pmin..pmax.
11958 if verbose: log(f'doing binary search with {state.pmin=} {state.pmax=}.')
11959 while 1:
11960 if state.pmax - state.pmin < delta:
11961 return ret()
11962 parameter = (state.pmin + state.pmax) / 2
11963 update(parameter)
11964
11965 def fit_scale(self, rect, scale_min=0, scale_max=None, delta=0.001, verbose=False):
11966 '''
11967 Finds smallest value `scale` in range `scale_min..scale_max` where
11968 `scale * rect` is large enough to contain the story `self`.
11969
11970 Returns a `Story.FitResult` instance.
11971
11972 :arg width:
11973 width of rect.
11974 :arg height:
11975 height of rect.
11976 :arg scale_min:
11977 Minimum scale to consider; must be >= 0.
11978 :arg scale_max:
11979 Maximum scale to consider, must be >= scale_min or `None` for
11980 infinite.
11981 :arg delta:
11982 Maximum error in returned scale.
11983 :arg verbose:
11984 If true we output diagnostics.
11985 '''
11986 x0, y0, x1, y1 = rect
11987 width = x1 - x0
11988 height = y1 - y0
11989 def fn(scale):
11990 return Rect(x0, y0, x0 + scale*width, y0 + scale*height)
11991 return self.fit(fn, scale_min, scale_max, delta, verbose)
11992
11993 def fit_height(self, width, height_min=0, height_max=None, origin=(0, 0), delta=0.001, verbose=False):
11994 '''
11995 Finds smallest height in range `height_min..height_max` where a rect
11996 with size `(width, height)` is large enough to contain the story
11997 `self`.
11998
11999 Returns a `Story.FitResult` instance.
12000
12001 :arg width:
12002 width of rect.
12003 :arg height_min:
12004 Minimum height to consider; must be >= 0.
12005 :arg height_max:
12006 Maximum height to consider, must be >= height_min or `None` for
12007 infinite.
12008 :arg origin:
12009 `(x0, y0)` of rect.
12010 :arg delta:
12011 Maximum error in returned height.
12012 :arg verbose:
12013 If true we output diagnostics.
12014 '''
12015 x0, y0 = origin
12016 x1 = x0 + width
12017 def fn(height):
12018 return Rect(x0, y0, x1, y0+height)
12019 return self.fit(fn, height_min, height_max, delta, verbose)
12020
12021 def fit_width(self, height, width_min=0, width_max=None, origin=(0, 0), delta=0.001, verbose=False):
12022 '''
12023 Finds smallest width in range `width_min..width_max` where a rect with size
12024 `(width, height)` is large enough to contain the story `self`.
12025
12026 Returns a `Story.FitResult` instance.
12027 Returns a `FitResult` instance.
12028
12029 :arg height:
12030 height of rect.
12031 :arg width_min:
12032 Minimum width to consider; must be >= 0.
12033 :arg width_max:
12034 Maximum width to consider, must be >= width_min or `None` for
12035 infinite.
12036 :arg origin:
12037 `(x0, y0)` of rect.
12038 :arg delta:
12039 Maximum error in returned width.
12040 :arg verbose:
12041 If true we output diagnostics.
12042 '''
12043 x0, y0 = origin
12044 y1 = y0 + height
12045 def fn(width):
12046 return Rect(x0, y0, x0+width, y1)
12047 return self.fit(fn, width_min, width_max, delta, verbose)
12048
12049
12050 class TextPage:
12051
12052 def __init__(self, *args):
12053 if args_match(args, mupdf.FzRect):
12054 mediabox = args[0]
12055 self.this = mupdf.FzStextPage( mediabox)
12056 elif args_match(args, mupdf.FzStextPage):
12057 self.this = args[0]
12058 else:
12059 raise Exception(f'Unrecognised args: {args}')
12060 self.thisown = True
12061 self.parent = None
12062
12063 def _extractText(self, format_):
12064 this_tpage = self.this
12065 res = mupdf.fz_new_buffer(1024)
12066 out = mupdf.FzOutput( res)
12067 # fixme: mupdfwrap.py thinks fz_output is not copyable, possibly
12068 # because there is no .refs member visible and no fz_keep_output() fn,
12069 # although there is an fz_drop_output(). So mupdf.fz_new_output_with_buffer()
12070 # doesn't convert the returned fz_output* into a mupdf.FzOutput.
12071 #out = mupdf.FzOutput(out)
12072 if format_ == 1:
12073 mupdf.fz_print_stext_page_as_html(out, this_tpage, 0)
12074 elif format_ == 3:
12075 mupdf.fz_print_stext_page_as_xml(out, this_tpage, 0)
12076 elif format_ == 4:
12077 mupdf.fz_print_stext_page_as_xhtml(out, this_tpage, 0)
12078 else:
12079 JM_print_stext_page_as_text(res, this_tpage)
12080 out.fz_close_output()
12081 text = JM_EscapeStrFromBuffer(res)
12082 return text
12083
12084 def _getNewBlockList(self, page_dict, raw):
12085 JM_make_textpage_dict(self.this, page_dict, raw)
12086
12087 def _textpage_dict(self, raw=False):
12088 page_dict = {"width": self.rect.width, "height": self.rect.height}
12089 self._getNewBlockList(page_dict, raw)
12090 return page_dict
12091
12092 def extractBLOCKS(self):
12093 """Return a list with text block information."""
12094 if g_use_extra:
12095 return extra.extractBLOCKS(self.this)
12096 block_n = -1
12097 this_tpage = self.this
12098 tp_rect = mupdf.FzRect(this_tpage.m_internal.mediabox)
12099 res = mupdf.fz_new_buffer(1024)
12100 lines = []
12101 for block in this_tpage:
12102 block_n += 1
12103 blockrect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
12104 if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
12105 mupdf.fz_clear_buffer(res) # set text buffer to empty
12106 line_n = -1
12107 last_char = 0
12108 for line in block:
12109 line_n += 1
12110 linerect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
12111 for ch in line:
12112 cbbox = JM_char_bbox(line, ch)
12113 if (not JM_rects_overlap(tp_rect, cbbox)
12114 and not mupdf.fz_is_infinite_rect(tp_rect)
12115 ):
12116 continue
12117 JM_append_rune(res, ch.m_internal.c)
12118 last_char = ch.m_internal.c
12119 linerect = mupdf.fz_union_rect(linerect, cbbox)
12120 if last_char != 10 and not mupdf.fz_is_empty_rect(linerect):
12121 mupdf.fz_append_byte(res, 10)
12122 blockrect = mupdf.fz_union_rect(blockrect, linerect)
12123 text = JM_EscapeStrFromBuffer(res)
12124 elif (JM_rects_overlap(tp_rect, block.m_internal.bbox)
12125 or mupdf.fz_is_infinite_rect(tp_rect)
12126 ):
12127 img = block.i_image()
12128 cs = img.colorspace()
12129 text = "<image: %s, width: %d, height: %d, bpc: %d>" % (
12130 mupdf.fz_colorspace_name(cs),
12131 img.w(), img.h(), img.bpc()
12132 )
12133 blockrect = mupdf.fz_union_rect(blockrect, mupdf.FzRect(block.m_internal.bbox))
12134 if not mupdf.fz_is_empty_rect(blockrect):
12135 litem = (
12136 blockrect.x0,
12137 blockrect.y0,
12138 blockrect.x1,
12139 blockrect.y1,
12140 text,
12141 block_n,
12142 block.m_internal.type,
12143 )
12144 lines.append(litem)
12145 return lines
12146
12147 def extractDICT(self, cb=None, sort=False) -> dict:
12148 """Return page content as a Python dict of images and text spans."""
12149 val = self._textpage_dict(raw=False)
12150 if cb is not None:
12151 val["width"] = cb.width
12152 val["height"] = cb.height
12153 if sort:
12154 blocks = val["blocks"]
12155 blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
12156 val["blocks"] = blocks
12157 return val
12158
12159 def extractHTML(self) -> str:
12160 """Return page content as a HTML string."""
12161 return self._extractText(1)
12162
12163 def extractIMGINFO(self, hashes=0):
12164 """Return a list with image meta information."""
12165 block_n = -1
12166 this_tpage = self.this
12167 rc = []
12168 for block in this_tpage:
12169 block_n += 1
12170 if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
12171 continue
12172 img = block.i_image()
12173 img_size = 0
12174 mask = img.mask()
12175 if mask.m_internal:
12176 has_mask = True
12177 else:
12178 has_mask = False
12179 compr_buff = mupdf.fz_compressed_image_buffer(img)
12180 if compr_buff.m_internal:
12181 img_size = compr_buff.fz_compressed_buffer_size()
12182 compr_buff = None
12183 if hashes:
12184 r = mupdf.FzIrect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT)
12185 assert mupdf.fz_is_infinite_irect(r)
12186 m = mupdf.FzMatrix(img.w(), 0, 0, img.h(), 0, 0)
12187 pix, w, h = mupdf.fz_get_pixmap_from_image(img, r, m)
12188 digest = mupdf.fz_md5_pixmap2(pix)
12189 digest = bytes(digest)
12190 if img_size == 0:
12191 img_size = img.w() * img.h() * img.n()
12192 cs = mupdf.FzColorspace(mupdf.ll_fz_keep_colorspace(img.m_internal.colorspace))
12193 block_dict = dict()
12194 block_dict[dictkey_number] = block_n
12195 block_dict[dictkey_bbox] = JM_py_from_rect(block.m_internal.bbox)
12196 block_dict[dictkey_matrix] = JM_py_from_matrix(block.i_transform())
12197 block_dict[dictkey_width] = img.w()
12198 block_dict[dictkey_height] = img.h()
12199 block_dict[dictkey_colorspace] = mupdf.fz_colorspace_n(cs)
12200 block_dict[dictkey_cs_name] = mupdf.fz_colorspace_name(cs)
12201 block_dict[dictkey_xres] = img.xres()
12202 block_dict[dictkey_yres] = img.yres()
12203 block_dict[dictkey_bpc] = img.bpc()
12204 block_dict[dictkey_size] = img_size
12205 if hashes:
12206 block_dict["digest"] = digest
12207 block_dict["has-mask"] = has_mask
12208 rc.append(block_dict)
12209 return rc
12210
12211 def extractJSON(self, cb=None, sort=False) -> str:
12212 """Return 'extractDICT' converted to JSON format."""
12213 import base64
12214 import json
12215 val = self._textpage_dict(raw=False)
12216
12217 class b64encode(json.JSONEncoder):
12218 def default(self, s):
12219 if type(s) in (bytes, bytearray):
12220 return base64.b64encode(s).decode()
12221
12222 if cb is not None:
12223 val["width"] = cb.width
12224 val["height"] = cb.height
12225 if sort:
12226 blocks = val["blocks"]
12227 blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
12228 val["blocks"] = blocks
12229
12230 val = json.dumps(val, separators=(",", ":"), cls=b64encode, indent=1)
12231 return val
12232
12233 def extractRAWDICT(self, cb=None, sort=False) -> dict:
12234 """Return page content as a Python dict of images and text characters."""
12235 val = self._textpage_dict(raw=True)
12236 if cb is not None:
12237 val["width"] = cb.width
12238 val["height"] = cb.height
12239 if sort:
12240 blocks = val["blocks"]
12241 blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
12242 val["blocks"] = blocks
12243 return val
12244
12245 def extractRAWJSON(self, cb=None, sort=False) -> str:
12246 """Return 'extractRAWDICT' converted to JSON format."""
12247 import base64
12248 import json
12249 val = self._textpage_dict(raw=True)
12250
12251 class b64encode(json.JSONEncoder):
12252 def default(self,s):
12253 if type(s) in (bytes, bytearray):
12254 return base64.b64encode(s).decode()
12255
12256 if cb is not None:
12257 val["width"] = cb.width
12258 val["height"] = cb.height
12259 if sort:
12260 blocks = val["blocks"]
12261 blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
12262 val["blocks"] = blocks
12263 val = json.dumps(val, separators=(",", ":"), cls=b64encode, indent=1)
12264 return val
12265
12266 def extractSelection(self, pointa, pointb):
12267 a = JM_point_from_py(pointa)
12268 b = JM_point_from_py(pointb)
12269 found = mupdf.fz_copy_selection(self.this, a, b, 0)
12270 return found
12271
12272 def extractText(self, sort=False) -> str:
12273 """Return simple, bare text on the page."""
12274 if not sort:
12275 return self._extractText(0)
12276 blocks = self.extractBLOCKS()[:]
12277 blocks.sort(key=lambda b: (b[3], b[0]))
12278 return "".join([b[4] for b in blocks])
12279
12280 def extractTextbox(self, rect):
12281 this_tpage = self.this
12282 assert isinstance(this_tpage, mupdf.FzStextPage)
12283 area = JM_rect_from_py(rect)
12284 found = JM_copy_rectangle(this_tpage, area)
12285 rc = PyUnicode_DecodeRawUnicodeEscape(found)
12286 return rc
12287
12288 def extractWORDS(self, delimiters=None):
12289 """Return a list with text word information."""
12290 if g_use_extra:
12291 return extra.extractWORDS(self.this, delimiters)
12292 buflen = 0
12293 last_char_rtl = 0
12294 block_n = -1
12295 wbbox = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY) # word bbox
12296 this_tpage = self.this
12297 tp_rect = mupdf.FzRect(this_tpage.m_internal.mediabox)
12298
12299 lines = None
12300 buff = mupdf.fz_new_buffer(64)
12301 lines = []
12302 for block in this_tpage:
12303 block_n += 1
12304 if block.m_internal.type != mupdf.FZ_STEXT_BLOCK_TEXT:
12305 continue
12306 line_n = -1
12307 for line in block:
12308 line_n += 1
12309 word_n = 0 # word counter per line
12310 mupdf.fz_clear_buffer(buff) # reset word buffer
12311 buflen = 0 # reset char counter
12312 for ch in line:
12313 cbbox = JM_char_bbox(line, ch)
12314 if (not JM_rects_overlap(tp_rect, cbbox)
12315 and not mupdf.fz_is_infinite_rect(tp_rect)
12316 ):
12317 continue
12318 word_delimiter = JM_is_word_delimiter(ch.m_internal.c, delimiters)
12319 this_char_rtl = JM_is_rtl_char(ch.m_internal.c)
12320 if word_delimiter or this_char_rtl != last_char_rtl:
12321 if buflen == 0 and word_delimiter:
12322 continue # skip delimiters at line start
12323 if not mupdf.fz_is_empty_rect(wbbox):
12324 word_n, wbbox = JM_append_word(lines, buff, wbbox, block_n, line_n, word_n)
12325 mupdf.fz_clear_buffer(buff)
12326 buflen = 0 # reset char counter
12327 if word_delimiter:
12328 continue
12329 # append one unicode character to the word
12330 JM_append_rune(buff, ch.m_internal.c)
12331 last_char_rtl = this_char_rtl
12332 buflen += 1
12333 # enlarge word bbox
12334 wbbox = mupdf.fz_union_rect(wbbox, JM_char_bbox(line, ch))
12335 if buflen and not mupdf.fz_is_empty_rect(wbbox):
12336 word_n, wbbox = JM_append_word(lines, buff, wbbox, block_n, line_n, word_n)
12337 buflen = 0
12338 return lines
12339
12340 def extractXHTML(self) -> str:
12341 """Return page content as a XHTML string."""
12342 return self._extractText(4)
12343
12344 def extractXML(self) -> str:
12345 """Return page content as a XML string."""
12346 return self._extractText(3)
12347
12348 def poolsize(self):
12349 """TextPage current poolsize."""
12350 tpage = self.this
12351 pool = mupdf.Pool(tpage.m_internal.pool)
12352 size = mupdf.fz_pool_size( pool)
12353 pool.m_internal = None # Ensure that pool's destructor does not free the pool.
12354 return size
12355
12356 @property
12357 def rect(self):
12358 """Page rectangle."""
12359 this_tpage = self.this
12360 mediabox = this_tpage.m_internal.mediabox
12361 val = JM_py_from_rect(mediabox)
12362 val = Rect(val)
12363
12364 return val
12365
12366 def search(self, needle, hit_max=0, quads=1):
12367 """Locate 'needle' returning rects or quads."""
12368 val = JM_search_stext_page(self.this, needle)
12369 if not val:
12370 return val
12371 items = len(val)
12372 for i in range(items): # change entries to quads or rects
12373 q = Quad(val[i])
12374 if quads:
12375 val[i] = q
12376 else:
12377 val[i] = q.rect
12378 if quads:
12379 return val
12380 i = 0 # join overlapping rects on the same line
12381 while i < items - 1:
12382 v1 = val[i]
12383 v2 = val[i + 1]
12384 if v1.y1 != v2.y1 or (v1 & v2).is_empty:
12385 i += 1
12386 continue # no overlap on same line
12387 val[i] = v1 | v2 # join rectangles
12388 del val[i + 1] # remove v2
12389 items -= 1 # reduce item count
12390 return val
12391
12392 extractTEXT = extractText
12393
12394
12395 class TextWriter:
12396
12397 def __init__(self, page_rect, opacity=1, color=None):
12398 """Stores text spans for later output on compatible PDF pages."""
12399 self.this = mupdf.fz_new_text()
12400
12401 self.opacity = opacity
12402 self.color = color
12403 self.rect = Rect(page_rect)
12404 self.ctm = Matrix(1, 0, 0, -1, 0, self.rect.height)
12405 self.ictm = ~self.ctm
12406 self.last_point = Point()
12407 self.last_point.__doc__ = "Position following last text insertion."
12408 self.text_rect = Rect()
12409
12410 self.text_rect.__doc__ = "Accumulated area of text spans."
12411 self.used_fonts = set()
12412 self.thisown = True
12413
12414 @property
12415 def _bbox(self):
12416 val = JM_py_from_rect( mupdf.fz_bound_text( self.this, mupdf.FzStrokeState(None), mupdf.FzMatrix()))
12417 val = Rect(val)
12418 return val
12419
12420 def append(self, pos, text, font=None, fontsize=11, language=None, right_to_left=0, small_caps=0):
12421 """Store 'text' at point 'pos' using 'font' and 'fontsize'."""
12422 pos = Point(pos) * self.ictm
12423 #log( '{font=}')
12424 if font is None:
12425 font = Font("helv")
12426 if not font.is_writable:
12427 if 0:
12428 log( '{font.this.m_internal.name=}')
12429 log( '{font.this.m_internal.t3matrix=}')
12430 log( '{font.this.m_internal.bbox=}')
12431 log( '{font.this.m_internal.glyph_count=}')
12432 log( '{font.this.m_internal.use_glyph_bbox=}')
12433 log( '{font.this.m_internal.width_count=}')
12434 log( '{font.this.m_internal.width_default=}')
12435 log( '{font.this.m_internal.has_digest=}')
12436 log( 'Unsupported font {font.name=}')
12437 if mupdf_cppyy:
12438 import cppyy
12439 log( f'Unsupported font {cppyy.gbl.mupdf_font_name(font.this.m_internal)=}')
12440 raise ValueError("Unsupported font '%s'." % font.name)
12441 if right_to_left:
12442 text = self.clean_rtl(text)
12443 text = "".join(reversed(text))
12444 right_to_left = 0
12445
12446 lang = mupdf.fz_text_language_from_string(language)
12447 p = JM_point_from_py(pos)
12448 trm = mupdf.fz_make_matrix(fontsize, 0, 0, fontsize, p.x, p.y)
12449 markup_dir = 0
12450 wmode = 0
12451 if small_caps == 0:
12452 trm = mupdf.fz_show_string( self.this, font.this, trm, text, wmode, right_to_left, markup_dir, lang)
12453 else:
12454 trm = JM_show_string_cs( self.this, font.this, trm, text, wmode, right_to_left, markup_dir, lang)
12455 val = JM_py_from_matrix(trm)
12456
12457 self.last_point = Point(val[-2:]) * self.ctm
12458 self.text_rect = self._bbox * self.ctm
12459 val = self.text_rect, self.last_point
12460 if font.flags["mono"] == 1:
12461 self.used_fonts.add(font)
12462 return val
12463
12464 def appendv(self, pos, text, font=None, fontsize=11, language=None, small_caps=False):
12465 lheight = fontsize * 1.2
12466 for c in text:
12467 self.append(pos, c, font=font, fontsize=fontsize,
12468 language=language, small_caps=small_caps)
12469 pos.y += lheight
12470 return self.text_rect, self.last_point
12471
12472 def clean_rtl(self, text):
12473 """Revert the sequence of Latin text parts.
12474
12475 Text with right-to-left writing direction (Arabic, Hebrew) often
12476 contains Latin parts, which are written in left-to-right: numbers, names,
12477 etc. For output as PDF text we need *everything* in right-to-left.
12478 E.g. an input like "<arabic> ABCDE FG HIJ <arabic> KL <arabic>" will be
12479 converted to "<arabic> JIH GF EDCBA <arabic> LK <arabic>". The Arabic
12480 parts remain untouched.
12481
12482 Args:
12483 text: str
12484 Returns:
12485 Massaged string.
12486 """
12487 if not text:
12488 return text
12489 # split into words at space boundaries
12490 words = text.split(" ")
12491 idx = []
12492 for i in range(len(words)):
12493 w = words[i]
12494 # revert character sequence for Latin only words
12495 if not (len(w) < 2 or max([ord(c) for c in w]) > 255):
12496 words[i] = "".join(reversed(w))
12497 idx.append(i) # stored index of Latin word
12498
12499 # adjacent Latin words must revert their sequence, too
12500 idx2 = [] # store indices of adjacent Latin words
12501 for i in range(len(idx)):
12502 if idx2 == []: # empty yet?
12503 idx2.append(idx[i]) # store Latin word number
12504
12505 elif idx[i] > idx2[-1] + 1: # large gap to last?
12506 if len(idx2) > 1: # at least two consecutives?
12507 words[idx2[0] : idx2[-1] + 1] = reversed(
12508 words[idx2[0] : idx2[-1] + 1]
12509 ) # revert their sequence
12510 idx2 = [idx[i]] # re-initialize
12511
12512 elif idx[i] == idx2[-1] + 1: # new adjacent Latin word
12513 idx2.append(idx[i])
12514
12515 text = " ".join(words)
12516 return text
12517
12518 def write_text(self, page, color=None, opacity=-1, overlay=1, morph=None, matrix=None, render_mode=0, oc=0):
12519 """Write the text to a PDF page having the TextWriter's page size.
12520
12521 Args:
12522 page: a PDF page having same size.
12523 color: override text color.
12524 opacity: override transparency.
12525 overlay: put in foreground or background.
12526 morph: tuple(Point, Matrix), apply a matrix with a fixpoint.
12527 matrix: Matrix to be used instead of 'morph' argument.
12528 render_mode: (int) PDF render mode operator 'Tr'.
12529 """
12530 CheckParent(page)
12531 if abs(self.rect - page.rect) > 1e-3:
12532 raise ValueError("incompatible page rect")
12533 if morph is not None:
12534 if (type(morph) not in (tuple, list)
12535 or type(morph[0]) is not Point
12536 or type(morph[1]) is not Matrix
12537 ):
12538 raise ValueError("morph must be (Point, Matrix) or None")
12539 if matrix is not None and morph is not None:
12540 raise ValueError("only one of matrix, morph is allowed")
12541 if getattr(opacity, "__float__", None) is None or opacity == -1:
12542 opacity = self.opacity
12543 if color is None:
12544 color = self.color
12545
12546 if 1:
12547 pdfpage = page._pdf_page()
12548 alpha = 1
12549 if opacity >= 0 and opacity < 1:
12550 alpha = opacity
12551 ncol = 1
12552 dev_color = [0, 0, 0, 0]
12553 if color:
12554 ncol, dev_color = JM_color_FromSequence(color)
12555 if ncol == 3:
12556 colorspace = mupdf.fz_device_rgb()
12557 elif ncol == 4:
12558 colorspace = mupdf.fz_device_cmyk()
12559 else:
12560 colorspace = mupdf.fz_device_gray()
12561
12562 resources = mupdf.pdf_new_dict(pdfpage.doc(), 5)
12563 contents = mupdf.fz_new_buffer(1024)
12564 dev = mupdf.pdf_new_pdf_device( pdfpage.doc(), mupdf.FzMatrix(), resources, contents)
12565 #log( '=== {dev_color!r=}')
12566 mupdf.fz_fill_text(
12567 dev,
12568 self.this,
12569 mupdf.FzMatrix(),
12570 colorspace,
12571 dev_color,
12572 alpha,
12573 mupdf.FzColorParams(mupdf.fz_default_color_params),
12574 )
12575 mupdf.fz_close_device( dev)
12576
12577 # copy generated resources into the one of the page
12578 max_nums = JM_merge_resources( pdfpage, resources)
12579 cont_string = JM_EscapeStrFromBuffer( contents)
12580 result = (max_nums, cont_string)
12581 val = result
12582
12583 max_nums = val[0]
12584 content = val[1]
12585 max_alp, max_font = max_nums
12586 old_cont_lines = content.splitlines()
12587
12588 optcont = page._get_optional_content(oc)
12589 if optcont is not None:
12590 bdc = "/OC /%s BDC" % optcont
12591 emc = "EMC"
12592 else:
12593 bdc = emc = ""
12594
12595 new_cont_lines = ["q"]
12596 if bdc:
12597 new_cont_lines.append(bdc)
12598
12599 cb = page.cropbox_position
12600 if page.rotation in (90, 270):
12601 delta = page.rect.height - page.rect.width
12602 else:
12603 delta = 0
12604 mb = page.mediabox
12605 if bool(cb) or mb.y0 != 0 or delta != 0:
12606 new_cont_lines.append(f"1 0 0 1 {_format_g((cb.x, cb.y + mb.y0 - delta))} cm")
12607
12608 if morph:
12609 p = morph[0] * self.ictm
12610 delta = Matrix(1, 1).pretranslate(p.x, p.y)
12611 matrix = ~delta * morph[1] * delta
12612 if morph or matrix:
12613 new_cont_lines.append(_format_g(JM_TUPLE(matrix)) + " cm")
12614
12615 for line in old_cont_lines:
12616 if line.endswith(" cm"):
12617 continue
12618 if line == "BT":
12619 new_cont_lines.append(line)
12620 new_cont_lines.append("%i Tr" % render_mode)
12621 continue
12622 if line.endswith(" gs"):
12623 alp = int(line.split()[0][4:]) + max_alp
12624 line = "/Alp%i gs" % alp
12625 elif line.endswith(" Tf"):
12626 temp = line.split()
12627 fsize = float(temp[1])
12628 if render_mode != 0:
12629 w = fsize * 0.05
12630 else:
12631 w = 1
12632 new_cont_lines.append(_format_g(w) + " w")
12633 font = int(temp[0][2:]) + max_font
12634 line = " ".join(["/F%i" % font] + temp[1:])
12635 elif line.endswith(" rg"):
12636 new_cont_lines.append(line.replace("rg", "RG"))
12637 elif line.endswith(" g"):
12638 new_cont_lines.append(line.replace(" g", " G"))
12639 elif line.endswith(" k"):
12640 new_cont_lines.append(line.replace(" k", " K"))
12641 new_cont_lines.append(line)
12642 if emc:
12643 new_cont_lines.append(emc)
12644 new_cont_lines.append("Q\n")
12645 content = "\n".join(new_cont_lines).encode("utf-8")
12646 TOOLS._insert_contents(page, content, overlay=overlay)
12647 val = None
12648 for font in self.used_fonts:
12649 repair_mono_font(page, font)
12650 return val
12651
12652
12653 class IRect:
12654 """
12655 IRect() - all zeros
12656 IRect(x0, y0, x1, y1) - 4 coordinates
12657 IRect(top-left, x1, y1) - point and 2 coordinates
12658 IRect(x0, y0, bottom-right) - 2 coordinates and point
12659 IRect(top-left, bottom-right) - 2 points
12660 IRect(sequ) - new from sequence or rect-like
12661 """
12662
12663 def __add__(self, p):
12664 return Rect.__add__(self, p).round()
12665
12666 def __and__(self, x):
12667 return Rect.__and__(self, x).round()
12668
12669 def __contains__(self, x):
12670 return Rect.__contains__(self, x)
12671
12672 def __eq__(self, r):
12673 if not hasattr(r, "__len__"):
12674 return False
12675 return len(r) == 4 and self.x0 == r[0] and self.y0 == r[1] and self.x1 == r[2] and self.y1 == r[3]
12676
12677 def __getitem__(self, i):
12678 return (self.x0, self.y0, self.x1, self.y1)[i]
12679
12680 def __hash__(self):
12681 return hash(tuple(self))
12682
12683 def __init__(self, *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None):
12684 self.x0, self.y0, self.x1, self.y1 = util_make_irect( *args, p0=p0, p1=p1, x0=x0, y0=y0, x1=x1, y1=y1)
12685
12686 def __len__(self):
12687 return 4
12688
12689 def __mul__(self, m):
12690 return Rect.__mul__(self, m).round()
12691
12692 def __neg__(self):
12693 return IRect(-self.x0, -self.y0, -self.x1, -self.y1)
12694
12695 def __or__(self, x):
12696 return Rect.__or__(self, x).round()
12697
12698 def __pos__(self):
12699 return IRect(self)
12700
12701 def __repr__(self):
12702 return "IRect" + str(tuple(self))
12703
12704 def __setitem__(self, i, v):
12705 v = int(v)
12706 if i == 0: self.x0 = v
12707 elif i == 1: self.y0 = v
12708 elif i == 2: self.x1 = v
12709 elif i == 3: self.y1 = v
12710 else:
12711 raise IndexError("index out of range")
12712 return None
12713
12714 def __sub__(self, p):
12715 return Rect.__sub__(self, p).round()
12716
12717 def __truediv__(self, m):
12718 return Rect.__truediv__(self, m).round()
12719
12720 @property
12721 def bottom_left(self):
12722 """Bottom-left corner."""
12723 return Point(self.x0, self.y1)
12724
12725 @property
12726 def bottom_right(self):
12727 """Bottom-right corner."""
12728 return Point(self.x1, self.y1)
12729
12730 @property
12731 def height(self):
12732 return max(0, self.y1 - self.y0)
12733
12734 def contains(self, x):
12735 """Check if x is in the rectangle."""
12736 return self.__contains__(x)
12737
12738 def include_point(self, p):
12739 """Extend rectangle to include point p."""
12740 rect = self.rect.include_point(p)
12741 return rect.irect
12742
12743 def include_rect(self, r):
12744 """Extend rectangle to include rectangle r."""
12745 rect = self.rect.include_rect(r)
12746 return rect.irect
12747
12748 def intersect(self, r):
12749 """Restrict rectangle to intersection with rectangle r."""
12750 return Rect.intersect(self, r).round()
12751
12752 def intersects(self, x):
12753 return Rect.intersects(self, x)
12754
12755 @property
12756 def is_empty(self):
12757 """True if rectangle area is empty."""
12758 return self.x0 >= self.x1 or self.y0 >= self.y1
12759
12760 @property
12761 def is_infinite(self):
12762 """True if rectangle is infinite."""
12763 return self.x0 == self.y0 == FZ_MIN_INF_RECT and self.x1 == self.y1 == FZ_MAX_INF_RECT
12764
12765 @property
12766 def is_valid(self):
12767 """True if rectangle is valid."""
12768 return self.x0 <= self.x1 and self.y0 <= self.y1
12769
12770 def morph(self, p, m):
12771 """Morph with matrix-like m and point-like p.
12772
12773 Returns a new quad."""
12774 if self.is_infinite:
12775 return INFINITE_QUAD()
12776 return self.quad.morph(p, m)
12777
12778 def norm(self):
12779 return math.sqrt(sum([c*c for c in self]))
12780
12781 def normalize(self):
12782 """Replace rectangle with its valid version."""
12783 if self.x1 < self.x0:
12784 self.x0, self.x1 = self.x1, self.x0
12785 if self.y1 < self.y0:
12786 self.y0, self.y1 = self.y1, self.y0
12787 return self
12788
12789 @property
12790 def quad(self):
12791 """Return Quad version of rectangle."""
12792 return Quad(self.tl, self.tr, self.bl, self.br)
12793
12794 @property
12795 def rect(self):
12796 return Rect(self)
12797
12798 @property
12799 def top_left(self):
12800 """Top-left corner."""
12801 return Point(self.x0, self.y0)
12802
12803 @property
12804 def top_right(self):
12805 """Top-right corner."""
12806 return Point(self.x1, self.y0)
12807
12808 def torect(self, r):
12809 """Return matrix that converts to target rect."""
12810 r = Rect(r)
12811 if self.is_infinite or self.is_empty or r.is_infinite or r.is_empty:
12812 raise ValueError("rectangles must be finite and not empty")
12813 return (
12814 Matrix(1, 0, 0, 1, -self.x0, -self.y0)
12815 * Matrix(r.width / self.width, r.height / self.height)
12816 * Matrix(1, 0, 0, 1, r.x0, r.y0)
12817 )
12818
12819 def transform(self, m):
12820 return Rect.transform(self, m).round()
12821
12822 @property
12823 def width(self):
12824 return max(0, self.x1 - self.x0)
12825
12826 br = bottom_right
12827 bl = bottom_left
12828 tl = top_left
12829 tr = top_right
12830
12831
12832 # Data
12833 #
12834
12835 if 1:
12836 _self = sys.modules[__name__]
12837 if 1:
12838 for _name, _value in mupdf.__dict__.items():
12839 if _name.startswith(('PDF_', 'UCDN_SCRIPT_')):
12840 if _name.startswith('PDF_ENUM_NAME_'):
12841 # Not a simple enum.
12842 pass
12843 else:
12844 #assert not inspect.isroutine(value)
12845 #log(f'importing {_name=} {_value=}.')
12846 setattr(_self, _name, _value)
12847 #log(f'{getattr( self, name, None)=}')
12848 else:
12849 # This is slow due to importing inspect, e.g. 0.019 instead of 0.004.
12850 for _name, _value in inspect.getmembers(mupdf):
12851 if _name.startswith(('PDF_', 'UCDN_SCRIPT_')):
12852 if _name.startswith('PDF_ENUM_NAME_'):
12853 # Not a simple enum.
12854 pass
12855 else:
12856 #assert not inspect.isroutine(value)
12857 #log(f'importing {name}')
12858 setattr(_self, _name, _value)
12859 #log(f'{getattr( self, name, None)=}')
12860
12861 # This is a macro so not preserved in mupdf C++/Python bindings.
12862 #
12863 PDF_SIGNATURE_DEFAULT_APPEARANCE = (0
12864 | mupdf.PDF_SIGNATURE_SHOW_LABELS
12865 | mupdf.PDF_SIGNATURE_SHOW_DN
12866 | mupdf.PDF_SIGNATURE_SHOW_DATE
12867 | mupdf.PDF_SIGNATURE_SHOW_TEXT_NAME
12868 | mupdf.PDF_SIGNATURE_SHOW_GRAPHIC_NAME
12869 | mupdf.PDF_SIGNATURE_SHOW_LOGO
12870 )
12871
12872 #UCDN_SCRIPT_ADLAM = mupdf.UCDN_SCRIPT_ADLAM
12873 #setattr(self, 'UCDN_SCRIPT_ADLAM', mupdf.UCDN_SCRIPT_ADLAM)
12874
12875 assert mupdf.UCDN_EAST_ASIAN_H == 1
12876
12877 # Flake8 incorrectly fails next two lines because we've dynamically added
12878 # items to self.
12879 assert PDF_TX_FIELD_IS_MULTILINE == mupdf.PDF_TX_FIELD_IS_MULTILINE # noqa: F821
12880 assert UCDN_SCRIPT_ADLAM == mupdf.UCDN_SCRIPT_ADLAM # noqa: F821
12881 del _self, _name, _value
12882
12883 AnyType = typing.Any
12884
12885 Base14_fontnames = (
12886 "Courier",
12887 "Courier-Oblique",
12888 "Courier-Bold",
12889 "Courier-BoldOblique",
12890 "Helvetica",
12891 "Helvetica-Oblique",
12892 "Helvetica-Bold",
12893 "Helvetica-BoldOblique",
12894 "Times-Roman",
12895 "Times-Italic",
12896 "Times-Bold",
12897 "Times-BoldItalic",
12898 "Symbol",
12899 "ZapfDingbats",
12900 )
12901
12902 Base14_fontdict = {}
12903 for f in Base14_fontnames:
12904 Base14_fontdict[f.lower()] = f
12905 Base14_fontdict["helv"] = "Helvetica"
12906 Base14_fontdict["heit"] = "Helvetica-Oblique"
12907 Base14_fontdict["hebo"] = "Helvetica-Bold"
12908 Base14_fontdict["hebi"] = "Helvetica-BoldOblique"
12909 Base14_fontdict["cour"] = "Courier"
12910 Base14_fontdict["coit"] = "Courier-Oblique"
12911 Base14_fontdict["cobo"] = "Courier-Bold"
12912 Base14_fontdict["cobi"] = "Courier-BoldOblique"
12913 Base14_fontdict["tiro"] = "Times-Roman"
12914 Base14_fontdict["tibo"] = "Times-Bold"
12915 Base14_fontdict["tiit"] = "Times-Italic"
12916 Base14_fontdict["tibi"] = "Times-BoldItalic"
12917 Base14_fontdict["symb"] = "Symbol"
12918 Base14_fontdict["zadb"] = "ZapfDingbats"
12919
12920 EPSILON = 1e-5
12921 FLT_EPSILON = 1e-5
12922
12923 # largest 32bit integers surviving C float conversion roundtrips
12924 # used by MuPDF to define infinite rectangles
12925 FZ_MIN_INF_RECT = -0x80000000
12926 FZ_MAX_INF_RECT = 0x7fffff80
12927
12928 JM_annot_id_stem = "fitz"
12929 JM_mupdf_warnings_store = []
12930 JM_mupdf_show_errors = 1
12931 JM_mupdf_show_warnings = 0
12932
12933
12934 # ------------------------------------------------------------------------------
12935 # Image recompression constants
12936 # ------------------------------------------------------------------------------
12937 FZ_RECOMPRESS_NEVER = mupdf.FZ_RECOMPRESS_NEVER
12938 FZ_RECOMPRESS_SAME = mupdf.FZ_RECOMPRESS_SAME
12939 FZ_RECOMPRESS_LOSSLESS = mupdf.FZ_RECOMPRESS_LOSSLESS
12940 FZ_RECOMPRESS_JPEG = mupdf.FZ_RECOMPRESS_JPEG
12941 FZ_RECOMPRESS_J2K = mupdf.FZ_RECOMPRESS_J2K
12942 FZ_RECOMPRESS_FAX = mupdf.FZ_RECOMPRESS_FAX
12943 FZ_SUBSAMPLE_AVERAGE = mupdf.FZ_SUBSAMPLE_AVERAGE
12944 FZ_SUBSAMPLE_BICUBIC = mupdf.FZ_SUBSAMPLE_BICUBIC
12945
12946 # ------------------------------------------------------------------------------
12947 # Various PDF Optional Content Flags
12948 # ------------------------------------------------------------------------------
12949 PDF_OC_ON = 0
12950 PDF_OC_TOGGLE = 1
12951 PDF_OC_OFF = 2
12952
12953 # ------------------------------------------------------------------------------
12954 # link kinds and link flags
12955 # ------------------------------------------------------------------------------
12956 LINK_NONE = 0
12957 LINK_GOTO = 1
12958 LINK_URI = 2
12959 LINK_LAUNCH = 3
12960 LINK_NAMED = 4
12961 LINK_GOTOR = 5
12962 LINK_FLAG_L_VALID = 1
12963 LINK_FLAG_T_VALID = 2
12964 LINK_FLAG_R_VALID = 4
12965 LINK_FLAG_B_VALID = 8
12966 LINK_FLAG_FIT_H = 16
12967 LINK_FLAG_FIT_V = 32
12968 LINK_FLAG_R_IS_ZOOM = 64
12969
12970 SigFlag_SignaturesExist = 1
12971 SigFlag_AppendOnly = 2
12972
12973 STAMP_Approved = 0
12974 STAMP_AsIs = 1
12975 STAMP_Confidential = 2
12976 STAMP_Departmental = 3
12977 STAMP_Experimental = 4
12978 STAMP_Expired = 5
12979 STAMP_Final = 6
12980 STAMP_ForComment = 7
12981 STAMP_ForPublicRelease = 8
12982 STAMP_NotApproved = 9
12983 STAMP_NotForPublicRelease = 10
12984 STAMP_Sold = 11
12985 STAMP_TopSecret = 12
12986 STAMP_Draft = 13
12987
12988 TEXT_ALIGN_LEFT = 0
12989 TEXT_ALIGN_CENTER = 1
12990 TEXT_ALIGN_RIGHT = 2
12991 TEXT_ALIGN_JUSTIFY = 3
12992
12993 TEXT_FONT_SUPERSCRIPT = 1
12994 TEXT_FONT_ITALIC = 2
12995 TEXT_FONT_SERIFED = 4
12996 TEXT_FONT_MONOSPACED = 8
12997 TEXT_FONT_BOLD = 16
12998
12999 TEXT_OUTPUT_TEXT = 0
13000 TEXT_OUTPUT_HTML = 1
13001 TEXT_OUTPUT_JSON = 2
13002 TEXT_OUTPUT_XML = 3
13003 TEXT_OUTPUT_XHTML = 4
13004
13005 TEXT_PRESERVE_LIGATURES = mupdf.FZ_STEXT_PRESERVE_LIGATURES
13006 TEXT_PRESERVE_WHITESPACE = mupdf.FZ_STEXT_PRESERVE_WHITESPACE
13007 TEXT_PRESERVE_IMAGES = mupdf.FZ_STEXT_PRESERVE_IMAGES
13008 TEXT_INHIBIT_SPACES = mupdf.FZ_STEXT_INHIBIT_SPACES
13009 TEXT_DEHYPHENATE = mupdf.FZ_STEXT_DEHYPHENATE
13010 TEXT_PRESERVE_SPANS = mupdf.FZ_STEXT_PRESERVE_SPANS
13011 TEXT_MEDIABOX_CLIP = mupdf.FZ_STEXT_MEDIABOX_CLIP
13012 TEXT_USE_CID_FOR_UNKNOWN_UNICODE = mupdf.FZ_STEXT_USE_CID_FOR_UNKNOWN_UNICODE
13013 TEXT_COLLECT_STRUCTURE = mupdf.FZ_STEXT_COLLECT_STRUCTURE
13014 TEXT_ACCURATE_BBOXES = mupdf.FZ_STEXT_ACCURATE_BBOXES
13015 TEXT_COLLECT_VECTORS = mupdf.FZ_STEXT_COLLECT_VECTORS
13016 TEXT_IGNORE_ACTUALTEXT = mupdf.FZ_STEXT_IGNORE_ACTUALTEXT
13017 TEXT_SEGMENT = mupdf.FZ_STEXT_SEGMENT
13018
13019 if mupdf_version_tuple >= (1, 26):
13020 TEXT_PARAGRAPH_BREAK = mupdf.FZ_STEXT_PARAGRAPH_BREAK
13021 TEXT_TABLE_HUNT = mupdf.FZ_STEXT_TABLE_HUNT
13022 TEXT_COLLECT_STYLES = mupdf.FZ_STEXT_COLLECT_STYLES
13023 TEXT_USE_GID_FOR_UNKNOWN_UNICODE = mupdf.FZ_STEXT_USE_GID_FOR_UNKNOWN_UNICODE
13024 TEXT_CLIP_RECT = mupdf.FZ_STEXT_CLIP_RECT
13025 TEXT_ACCURATE_ASCENDERS = mupdf.FZ_STEXT_ACCURATE_ASCENDERS
13026 TEXT_ACCURATE_SIDE_BEARINGS = mupdf.FZ_STEXT_ACCURATE_SIDE_BEARINGS
13027
13028 # 2025-05-07: Non-standard names preserved for backwards compatibility.
13029 TEXT_STEXT_SEGMENT = TEXT_SEGMENT
13030 TEXT_CID_FOR_UNKNOWN_UNICODE = TEXT_USE_CID_FOR_UNKNOWN_UNICODE
13031
13032 TEXTFLAGS_WORDS = (0
13033 | TEXT_PRESERVE_LIGATURES
13034 | TEXT_PRESERVE_WHITESPACE
13035 | TEXT_MEDIABOX_CLIP
13036 | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
13037 )
13038
13039 TEXTFLAGS_BLOCKS = (0
13040 | TEXT_PRESERVE_LIGATURES
13041 | TEXT_PRESERVE_WHITESPACE
13042 | TEXT_MEDIABOX_CLIP
13043 | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
13044 )
13045
13046 TEXTFLAGS_DICT = (0
13047 | TEXT_PRESERVE_LIGATURES
13048 | TEXT_PRESERVE_WHITESPACE
13049 | TEXT_MEDIABOX_CLIP
13050 | TEXT_PRESERVE_IMAGES
13051 | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
13052 )
13053
13054 TEXTFLAGS_RAWDICT = TEXTFLAGS_DICT
13055
13056 TEXTFLAGS_SEARCH = (0
13057 | TEXT_PRESERVE_WHITESPACE
13058 | TEXT_MEDIABOX_CLIP
13059 | TEXT_DEHYPHENATE
13060 | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
13061 )
13062
13063 TEXTFLAGS_HTML = (0
13064 | TEXT_PRESERVE_LIGATURES
13065 | TEXT_PRESERVE_WHITESPACE
13066 | TEXT_MEDIABOX_CLIP
13067 | TEXT_PRESERVE_IMAGES
13068 | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
13069 )
13070
13071 TEXTFLAGS_XHTML = (0
13072 | TEXT_PRESERVE_LIGATURES
13073 | TEXT_PRESERVE_WHITESPACE
13074 | TEXT_MEDIABOX_CLIP
13075 | TEXT_PRESERVE_IMAGES
13076 | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
13077 )
13078
13079 TEXTFLAGS_XML = (0
13080 | TEXT_PRESERVE_LIGATURES
13081 | TEXT_PRESERVE_WHITESPACE
13082 | TEXT_MEDIABOX_CLIP
13083 | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
13084 )
13085
13086 TEXTFLAGS_TEXT = (0
13087 | TEXT_PRESERVE_LIGATURES
13088 | TEXT_PRESERVE_WHITESPACE
13089 | TEXT_MEDIABOX_CLIP
13090 | TEXT_USE_CID_FOR_UNKNOWN_UNICODE
13091 )
13092
13093 # Simple text encoding options
13094 TEXT_ENCODING_LATIN = 0
13095 TEXT_ENCODING_GREEK = 1
13096 TEXT_ENCODING_CYRILLIC = 2
13097
13098 TOOLS_JM_UNIQUE_ID = 0
13099
13100 # colorspace identifiers
13101 CS_RGB = 1
13102 CS_GRAY = 2
13103 CS_CMYK = 3
13104
13105 # PDF Blend Modes
13106 PDF_BM_Color = "Color"
13107 PDF_BM_ColorBurn = "ColorBurn"
13108 PDF_BM_ColorDodge = "ColorDodge"
13109 PDF_BM_Darken = "Darken"
13110 PDF_BM_Difference = "Difference"
13111 PDF_BM_Exclusion = "Exclusion"
13112 PDF_BM_HardLight = "HardLight"
13113 PDF_BM_Hue = "Hue"
13114 PDF_BM_Lighten = "Lighten"
13115 PDF_BM_Luminosity = "Luminosity"
13116 PDF_BM_Multiply = "Multiply"
13117 PDF_BM_Normal = "Normal"
13118 PDF_BM_Overlay = "Overlay"
13119 PDF_BM_Saturation = "Saturation"
13120 PDF_BM_Screen = "Screen"
13121 PDF_BM_SoftLight = "Softlight"
13122
13123
13124 annot_skel = {
13125 "goto1": lambda a, b, c, d, e: f"<</A<</S/GoTo/D[{a} 0 R/XYZ {_format_g((b, c, d))}]>>/Rect[{e}]/BS<</W 0>>/Subtype/Link>>",
13126 "goto2": lambda a, b: f"<</A<</S/GoTo/D{a}>>/Rect[{b}]/BS<</W 0>>/Subtype/Link>>",
13127 "gotor1": lambda a, b, c, d, e, f, g: f"<</A<</S/GoToR/D[{a} /XYZ {_format_g((b, c, d))}]/F<</F({e})/UF({f})/Type/Filespec>>>>/Rect[{g}]/BS<</W 0>>/Subtype/Link>>",
13128 "gotor2": lambda a, b, c: f"<</A<</S/GoToR/D{a}/F({b})>>/Rect[{c}]/BS<</W 0>>/Subtype/Link>>",
13129 "launch": lambda a, b, c: f"<</A<</S/Launch/F<</F({a})/UF({b})/Type/Filespec>>>>/Rect[{c}]/BS<</W 0>>/Subtype/Link>>",
13130 "uri": lambda a, b: f"<</A<</S/URI/URI({a})>>/Rect[{b}]/BS<</W 0>>/Subtype/Link>>",
13131 "named": lambda a, b: f"<</A<</S/GoTo/D({a})/Type/Action>>/Rect[{b}]/BS<</W 0>>/Subtype/Link>>",
13132 }
13133
13134 class FileDataError(RuntimeError):
13135 """Raised for documents with file structure issues."""
13136 pass
13137
13138 class FileNotFoundError(RuntimeError):
13139 """Raised if file does not exist."""
13140 pass
13141
13142 class EmptyFileError(FileDataError):
13143 """Raised when creating documents from zero-length data."""
13144 pass
13145
13146 # propagate exception class to C-level code
13147 #_set_FileDataError(FileDataError)
13148
13149 csRGB = Colorspace(CS_RGB)
13150 csGRAY = Colorspace(CS_GRAY)
13151 csCMYK = Colorspace(CS_CMYK)
13152
13153 # These don't appear to be visible in classic, but are used
13154 # internally.
13155 #
13156 dictkey_align = "align"
13157 dictkey_asc = "ascender"
13158 dictkey_bidi = "bidi"
13159 dictkey_bbox = "bbox"
13160 dictkey_blocks = "blocks"
13161 dictkey_bpc = "bpc"
13162 dictkey_c = "c"
13163 dictkey_chars = "chars"
13164 dictkey_color = "color"
13165 dictkey_colorspace = "colorspace"
13166 dictkey_content = "content"
13167 dictkey_creationDate = "creationDate"
13168 dictkey_cs_name = "cs-name"
13169 dictkey_da = "da"
13170 dictkey_dashes = "dashes"
13171 dictkey_descr = "description"
13172 dictkey_desc = "descender"
13173 dictkey_dir = "dir"
13174 dictkey_effect = "effect"
13175 dictkey_ext = "ext"
13176 dictkey_filename = "filename"
13177 dictkey_fill = "fill"
13178 dictkey_flags = "flags"
13179 dictkey_char_flags = "char_flags"
13180 dictkey_font = "font"
13181 dictkey_glyph = "glyph"
13182 dictkey_height = "height"
13183 dictkey_id = "id"
13184 dictkey_image = "image"
13185 dictkey_items = "items"
13186 dictkey_length = "length"
13187 dictkey_lines = "lines"
13188 dictkey_matrix = "transform"
13189 dictkey_modDate = "modDate"
13190 dictkey_name = "name"
13191 dictkey_number = "number"
13192 dictkey_origin = "origin"
13193 dictkey_rect = "rect"
13194 dictkey_size = "size"
13195 dictkey_smask = "smask"
13196 dictkey_spans = "spans"
13197 dictkey_stroke = "stroke"
13198 dictkey_style = "style"
13199 dictkey_subject = "subject"
13200 dictkey_text = "text"
13201 dictkey_title = "title"
13202 dictkey_type = "type"
13203 dictkey_ufilename = "ufilename"
13204 dictkey_width = "width"
13205 dictkey_wmode = "wmode"
13206 dictkey_xref = "xref"
13207 dictkey_xres = "xres"
13208 dictkey_yres = "yres"
13209
13210
13211 try:
13212 from pymupdf_fonts import fontdescriptors, fontbuffers
13213
13214 fitz_fontdescriptors = fontdescriptors.copy()
13215 for k in fitz_fontdescriptors.keys():
13216 fitz_fontdescriptors[k]["loader"] = fontbuffers[k]
13217 del fontdescriptors, fontbuffers
13218 except ImportError:
13219 fitz_fontdescriptors = {}
13220
13221 symbol_glyphs = ( # Glyph list for the built-in font 'Symbol'
13222 (183, 0.46),
13223 (183, 0.46),
13224 (183, 0.46),
13225 (183, 0.46),
13226 (183, 0.46),
13227 (183, 0.46),
13228 (183, 0.46),
13229 (183, 0.46),
13230 (183, 0.46),
13231 (183, 0.46),
13232 (183, 0.46),
13233 (183, 0.46),
13234 (183, 0.46),
13235 (183, 0.46),
13236 (183, 0.46),
13237 (183, 0.46),
13238 (183, 0.46),
13239 (183, 0.46),
13240 (183, 0.46),
13241 (183, 0.46),
13242 (183, 0.46),
13243 (183, 0.46),
13244 (183, 0.46),
13245 (183, 0.46),
13246 (183, 0.46),
13247 (183, 0.46),
13248 (183, 0.46),
13249 (183, 0.46),
13250 (183, 0.46),
13251 (183, 0.46),
13252 (183, 0.46),
13253 (183, 0.46),
13254 (32, 0.25),
13255 (33, 0.333),
13256 (34, 0.713),
13257 (35, 0.5),
13258 (36, 0.549),
13259 (37, 0.833),
13260 (38, 0.778),
13261 (39, 0.439),
13262 (40, 0.333),
13263 (41, 0.333),
13264 (42, 0.5),
13265 (43, 0.549),
13266 (44, 0.25),
13267 (45, 0.549),
13268 (46, 0.25),
13269 (47, 0.278),
13270 (48, 0.5),
13271 (49, 0.5),
13272 (50, 0.5),
13273 (51, 0.5),
13274 (52, 0.5),
13275 (53, 0.5),
13276 (54, 0.5),
13277 (55, 0.5),
13278 (56, 0.5),
13279 (57, 0.5),
13280 (58, 0.278),
13281 (59, 0.278),
13282 (60, 0.549),
13283 (61, 0.549),
13284 (62, 0.549),
13285 (63, 0.444),
13286 (64, 0.549),
13287 (65, 0.722),
13288 (66, 0.667),
13289 (67, 0.722),
13290 (68, 0.612),
13291 (69, 0.611),
13292 (70, 0.763),
13293 (71, 0.603),
13294 (72, 0.722),
13295 (73, 0.333),
13296 (74, 0.631),
13297 (75, 0.722),
13298 (76, 0.686),
13299 (77, 0.889),
13300 (78, 0.722),
13301 (79, 0.722),
13302 (80, 0.768),
13303 (81, 0.741),
13304 (82, 0.556),
13305 (83, 0.592),
13306 (84, 0.611),
13307 (85, 0.69),
13308 (86, 0.439),
13309 (87, 0.768),
13310 (88, 0.645),
13311 (89, 0.795),
13312 (90, 0.611),
13313 (91, 0.333),
13314 (92, 0.863),
13315 (93, 0.333),
13316 (94, 0.658),
13317 (95, 0.5),
13318 (96, 0.5),
13319 (97, 0.631),
13320 (98, 0.549),
13321 (99, 0.549),
13322 (100, 0.494),
13323 (101, 0.439),
13324 (102, 0.521),
13325 (103, 0.411),
13326 (104, 0.603),
13327 (105, 0.329),
13328 (106, 0.603),
13329 (107, 0.549),
13330 (108, 0.549),
13331 (109, 0.576),
13332 (110, 0.521),
13333 (111, 0.549),
13334 (112, 0.549),
13335 (113, 0.521),
13336 (114, 0.549),
13337 (115, 0.603),
13338 (116, 0.439),
13339 (117, 0.576),
13340 (118, 0.713),
13341 (119, 0.686),
13342 (120, 0.493),
13343 (121, 0.686),
13344 (122, 0.494),
13345 (123, 0.48),
13346 (124, 0.2),
13347 (125, 0.48),
13348 (126, 0.549),
13349 (183, 0.46),
13350 (183, 0.46),
13351 (183, 0.46),
13352 (183, 0.46),
13353 (183, 0.46),
13354 (183, 0.46),
13355 (183, 0.46),
13356 (183, 0.46),
13357 (183, 0.46),
13358 (183, 0.46),
13359 (183, 0.46),
13360 (183, 0.46),
13361 (183, 0.46),
13362 (183, 0.46),
13363 (183, 0.46),
13364 (183, 0.46),
13365 (183, 0.46),
13366 (183, 0.46),
13367 (183, 0.46),
13368 (183, 0.46),
13369 (183, 0.46),
13370 (183, 0.46),
13371 (183, 0.46),
13372 (183, 0.46),
13373 (183, 0.46),
13374 (183, 0.46),
13375 (183, 0.46),
13376 (183, 0.46),
13377 (183, 0.46),
13378 (183, 0.46),
13379 (183, 0.46),
13380 (183, 0.46),
13381 (183, 0.46),
13382 (160, 0.25),
13383 (161, 0.62),
13384 (162, 0.247),
13385 (163, 0.549),
13386 (164, 0.167),
13387 (165, 0.713),
13388 (166, 0.5),
13389 (167, 0.753),
13390 (168, 0.753),
13391 (169, 0.753),
13392 (170, 0.753),
13393 (171, 1.042),
13394 (172, 0.713),
13395 (173, 0.603),
13396 (174, 0.987),
13397 (175, 0.603),
13398 (176, 0.4),
13399 (177, 0.549),
13400 (178, 0.411),
13401 (179, 0.549),
13402 (180, 0.549),
13403 (181, 0.576),
13404 (182, 0.494),
13405 (183, 0.46),
13406 (184, 0.549),
13407 (185, 0.549),
13408 (186, 0.549),
13409 (187, 0.549),
13410 (188, 1),
13411 (189, 0.603),
13412 (190, 1),
13413 (191, 0.658),
13414 (192, 0.823),
13415 (193, 0.686),
13416 (194, 0.795),
13417 (195, 0.987),
13418 (196, 0.768),
13419 (197, 0.768),
13420 (198, 0.823),
13421 (199, 0.768),
13422 (200, 0.768),
13423 (201, 0.713),
13424 (202, 0.713),
13425 (203, 0.713),
13426 (204, 0.713),
13427 (205, 0.713),
13428 (206, 0.713),
13429 (207, 0.713),
13430 (208, 0.768),
13431 (209, 0.713),
13432 (210, 0.79),
13433 (211, 0.79),
13434 (212, 0.89),
13435 (213, 0.823),
13436 (214, 0.549),
13437 (215, 0.549),
13438 (216, 0.713),
13439 (217, 0.603),
13440 (218, 0.603),
13441 (219, 1.042),
13442 (220, 0.987),
13443 (221, 0.603),
13444 (222, 0.987),
13445 (223, 0.603),
13446 (224, 0.494),
13447 (225, 0.329),
13448 (226, 0.79),
13449 (227, 0.79),
13450 (228, 0.786),
13451 (229, 0.713),
13452 (230, 0.384),
13453 (231, 0.384),
13454 (232, 0.384),
13455 (233, 0.384),
13456 (234, 0.384),
13457 (235, 0.384),
13458 (236, 0.494),
13459 (237, 0.494),
13460 (238, 0.494),
13461 (239, 0.494),
13462 (183, 0.46),
13463 (241, 0.329),
13464 (242, 0.274),
13465 (243, 0.686),
13466 (244, 0.686),
13467 (245, 0.686),
13468 (246, 0.384),
13469 (247, 0.549),
13470 (248, 0.384),
13471 (249, 0.384),
13472 (250, 0.384),
13473 (251, 0.384),
13474 (252, 0.494),
13475 (253, 0.494),
13476 (254, 0.494),
13477 (183, 0.46),
13478 )
13479
13480
13481 zapf_glyphs = ( # Glyph list for the built-in font 'ZapfDingbats'
13482 (183, 0.788),
13483 (183, 0.788),
13484 (183, 0.788),
13485 (183, 0.788),
13486 (183, 0.788),
13487 (183, 0.788),
13488 (183, 0.788),
13489 (183, 0.788),
13490 (183, 0.788),
13491 (183, 0.788),
13492 (183, 0.788),
13493 (183, 0.788),
13494 (183, 0.788),
13495 (183, 0.788),
13496 (183, 0.788),
13497 (183, 0.788),
13498 (183, 0.788),
13499 (183, 0.788),
13500 (183, 0.788),
13501 (183, 0.788),
13502 (183, 0.788),
13503 (183, 0.788),
13504 (183, 0.788),
13505 (183, 0.788),
13506 (183, 0.788),
13507 (183, 0.788),
13508 (183, 0.788),
13509 (183, 0.788),
13510 (183, 0.788),
13511 (183, 0.788),
13512 (183, 0.788),
13513 (183, 0.788),
13514 (32, 0.278),
13515 (33, 0.974),
13516 (34, 0.961),
13517 (35, 0.974),
13518 (36, 0.98),
13519 (37, 0.719),
13520 (38, 0.789),
13521 (39, 0.79),
13522 (40, 0.791),
13523 (41, 0.69),
13524 (42, 0.96),
13525 (43, 0.939),
13526 (44, 0.549),
13527 (45, 0.855),
13528 (46, 0.911),
13529 (47, 0.933),
13530 (48, 0.911),
13531 (49, 0.945),
13532 (50, 0.974),
13533 (51, 0.755),
13534 (52, 0.846),
13535 (53, 0.762),
13536 (54, 0.761),
13537 (55, 0.571),
13538 (56, 0.677),
13539 (57, 0.763),
13540 (58, 0.76),
13541 (59, 0.759),
13542 (60, 0.754),
13543 (61, 0.494),
13544 (62, 0.552),
13545 (63, 0.537),
13546 (64, 0.577),
13547 (65, 0.692),
13548 (66, 0.786),
13549 (67, 0.788),
13550 (68, 0.788),
13551 (69, 0.79),
13552 (70, 0.793),
13553 (71, 0.794),
13554 (72, 0.816),
13555 (73, 0.823),
13556 (74, 0.789),
13557 (75, 0.841),
13558 (76, 0.823),
13559 (77, 0.833),
13560 (78, 0.816),
13561 (79, 0.831),
13562 (80, 0.923),
13563 (81, 0.744),
13564 (82, 0.723),
13565 (83, 0.749),
13566 (84, 0.79),
13567 (85, 0.792),
13568 (86, 0.695),
13569 (87, 0.776),
13570 (88, 0.768),
13571 (89, 0.792),
13572 (90, 0.759),
13573 (91, 0.707),
13574 (92, 0.708),
13575 (93, 0.682),
13576 (94, 0.701),
13577 (95, 0.826),
13578 (96, 0.815),
13579 (97, 0.789),
13580 (98, 0.789),
13581 (99, 0.707),
13582 (100, 0.687),
13583 (101, 0.696),
13584 (102, 0.689),
13585 (103, 0.786),
13586 (104, 0.787),
13587 (105, 0.713),
13588 (106, 0.791),
13589 (107, 0.785),
13590 (108, 0.791),
13591 (109, 0.873),
13592 (110, 0.761),
13593 (111, 0.762),
13594 (112, 0.762),
13595 (113, 0.759),
13596 (114, 0.759),
13597 (115, 0.892),
13598 (116, 0.892),
13599 (117, 0.788),
13600 (118, 0.784),
13601 (119, 0.438),
13602 (120, 0.138),
13603 (121, 0.277),
13604 (122, 0.415),
13605 (123, 0.392),
13606 (124, 0.392),
13607 (125, 0.668),
13608 (126, 0.668),
13609 (183, 0.788),
13610 (183, 0.788),
13611 (183, 0.788),
13612 (183, 0.788),
13613 (183, 0.788),
13614 (183, 0.788),
13615 (183, 0.788),
13616 (183, 0.788),
13617 (183, 0.788),
13618 (183, 0.788),
13619 (183, 0.788),
13620 (183, 0.788),
13621 (183, 0.788),
13622 (183, 0.788),
13623 (183, 0.788),
13624 (183, 0.788),
13625 (183, 0.788),
13626 (183, 0.788),
13627 (183, 0.788),
13628 (183, 0.788),
13629 (183, 0.788),
13630 (183, 0.788),
13631 (183, 0.788),
13632 (183, 0.788),
13633 (183, 0.788),
13634 (183, 0.788),
13635 (183, 0.788),
13636 (183, 0.788),
13637 (183, 0.788),
13638 (183, 0.788),
13639 (183, 0.788),
13640 (183, 0.788),
13641 (183, 0.788),
13642 (183, 0.788),
13643 (161, 0.732),
13644 (162, 0.544),
13645 (163, 0.544),
13646 (164, 0.91),
13647 (165, 0.667),
13648 (166, 0.76),
13649 (167, 0.76),
13650 (168, 0.776),
13651 (169, 0.595),
13652 (170, 0.694),
13653 (171, 0.626),
13654 (172, 0.788),
13655 (173, 0.788),
13656 (174, 0.788),
13657 (175, 0.788),
13658 (176, 0.788),
13659 (177, 0.788),
13660 (178, 0.788),
13661 (179, 0.788),
13662 (180, 0.788),
13663 (181, 0.788),
13664 (182, 0.788),
13665 (183, 0.788),
13666 (184, 0.788),
13667 (185, 0.788),
13668 (186, 0.788),
13669 (187, 0.788),
13670 (188, 0.788),
13671 (189, 0.788),
13672 (190, 0.788),
13673 (191, 0.788),
13674 (192, 0.788),
13675 (193, 0.788),
13676 (194, 0.788),
13677 (195, 0.788),
13678 (196, 0.788),
13679 (197, 0.788),
13680 (198, 0.788),
13681 (199, 0.788),
13682 (200, 0.788),
13683 (201, 0.788),
13684 (202, 0.788),
13685 (203, 0.788),
13686 (204, 0.788),
13687 (205, 0.788),
13688 (206, 0.788),
13689 (207, 0.788),
13690 (208, 0.788),
13691 (209, 0.788),
13692 (210, 0.788),
13693 (211, 0.788),
13694 (212, 0.894),
13695 (213, 0.838),
13696 (214, 1.016),
13697 (215, 0.458),
13698 (216, 0.748),
13699 (217, 0.924),
13700 (218, 0.748),
13701 (219, 0.918),
13702 (220, 0.927),
13703 (221, 0.928),
13704 (222, 0.928),
13705 (223, 0.834),
13706 (224, 0.873),
13707 (225, 0.828),
13708 (226, 0.924),
13709 (227, 0.924),
13710 (228, 0.917),
13711 (229, 0.93),
13712 (230, 0.931),
13713 (231, 0.463),
13714 (232, 0.883),
13715 (233, 0.836),
13716 (234, 0.836),
13717 (235, 0.867),
13718 (236, 0.867),
13719 (237, 0.696),
13720 (238, 0.696),
13721 (239, 0.874),
13722 (183, 0.788),
13723 (241, 0.874),
13724 (242, 0.76),
13725 (243, 0.946),
13726 (244, 0.771),
13727 (245, 0.865),
13728 (246, 0.771),
13729 (247, 0.888),
13730 (248, 0.967),
13731 (249, 0.888),
13732 (250, 0.831),
13733 (251, 0.873),
13734 (252, 0.927),
13735 (253, 0.97),
13736 (183, 0.788),
13737 (183, 0.788),
13738 )
13739
13740
13741 # Functions
13742 #
13743
13744 def _read_samples( pixmap, offset, n):
13745 # fixme: need to be able to get a sample in one call, as a Python
13746 # bytes or similar.
13747 ret = []
13748 if not pixmap.samples():
13749 # mupdf.fz_samples_get() gives a segv if pixmap->samples is null.
13750 return ret
13751 for i in range( n):
13752 ret.append( mupdf.fz_samples_get( pixmap, offset + i))
13753 return bytes( ret)
13754
13755
13756 def _INRANGE(v, low, high):
13757 return low <= v and v <= high
13758
13759
13760 def _remove_dest_range(pdf, numbers):
13761 pagecount = mupdf.pdf_count_pages(pdf)
13762 for i in range(pagecount):
13763 n1 = i
13764 if n1 in numbers:
13765 continue
13766
13767 pageref = mupdf.pdf_lookup_page_obj( pdf, i)
13768 annots = mupdf.pdf_dict_get( pageref, PDF_NAME('Annots'))
13769 if not annots.m_internal:
13770 continue
13771 len_ = mupdf.pdf_array_len(annots)
13772 for j in range(len_ - 1, -1, -1):
13773 o = mupdf.pdf_array_get( annots, j)
13774 if not mupdf.pdf_name_eq( mupdf.pdf_dict_get( o, PDF_NAME('Subtype')), PDF_NAME('Link')):
13775 continue
13776 action = mupdf.pdf_dict_get( o, PDF_NAME('A'))
13777 dest = mupdf.pdf_dict_get( o, PDF_NAME('Dest'))
13778 if action.m_internal:
13779 if not mupdf.pdf_name_eq( mupdf.pdf_dict_get( action, PDF_NAME('S')), PDF_NAME('GoTo')):
13780 continue
13781 dest = mupdf.pdf_dict_get( action, PDF_NAME('D'))
13782 pno = -1
13783 if mupdf.pdf_is_array( dest):
13784 target = mupdf.pdf_array_get( dest, 0)
13785 pno = mupdf.pdf_lookup_page_number( pdf, target)
13786 elif mupdf.pdf_is_string( dest):
13787 location, _, _ = mupdf.fz_resolve_link( pdf.super(), mupdf.pdf_to_text_string( dest))
13788 pno = location.page
13789 if pno < 0: # page number lookup did not work
13790 continue
13791 n1 = pno
13792 if n1 in numbers:
13793 mupdf.pdf_array_delete( annots, j)
13794
13795
13796 def ASSERT_PDF(cond):
13797 assert isinstance(cond, (mupdf.PdfPage, mupdf.PdfDocument)), f'{type(cond)=} {cond=}'
13798 if not cond.m_internal:
13799 raise Exception(MSG_IS_NO_PDF)
13800
13801
13802 def EMPTY_IRECT():
13803 return IRect(FZ_MAX_INF_RECT, FZ_MAX_INF_RECT, FZ_MIN_INF_RECT, FZ_MIN_INF_RECT)
13804
13805
13806 def EMPTY_QUAD():
13807 return EMPTY_RECT().quad
13808
13809
13810 def EMPTY_RECT():
13811 return Rect(FZ_MAX_INF_RECT, FZ_MAX_INF_RECT, FZ_MIN_INF_RECT, FZ_MIN_INF_RECT)
13812
13813
13814 def ENSURE_OPERATION(pdf):
13815 if not JM_have_operation(pdf):
13816 raise Exception("No journalling operation started")
13817
13818
13819 def INFINITE_IRECT():
13820 return IRect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT)
13821
13822
13823 def INFINITE_QUAD():
13824 return INFINITE_RECT().quad
13825
13826
13827 def INFINITE_RECT():
13828 return Rect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT)
13829
13830
13831 def JM_BinFromBuffer(buffer_):
13832 '''
13833 Turn fz_buffer into a Python bytes object
13834 '''
13835 assert isinstance(buffer_, mupdf.FzBuffer)
13836 ret = mupdf.fz_buffer_extract_copy(buffer_)
13837 return ret
13838
13839
13840 def JM_EscapeStrFromStr(c):
13841 # `c` is typically from SWIG which will have converted a `const char*` from
13842 # C into a Python `str` using `PyUnicode_DecodeUTF8(carray, static_cast<
13843 # Py_ssize_t >(size), "surrogateescape")`. This gives us a Python `str`
13844 # with some characters encoded as a \0xdcXY sequence, where `XY` are hex
13845 # digits for an invalid byte in the original `const char*`.
13846 #
13847 # This is actually a reasonable way of representing arbitrary
13848 # strings from C, but we want to mimic what PyMuPDF does. It uses
13849 # `PyUnicode_DecodeRawUnicodeEscape(c, (Py_ssize_t) strlen(c), "replace")`
13850 # which gives a string containing actual unicode characters for any invalid
13851 # bytes.
13852 #
13853 # We mimic this by converting the `str` to a `bytes` with 'surrogateescape'
13854 # to recognise \0xdcXY sequences, then convert the individual bytes into a
13855 # `str` using `chr()`.
13856 #
13857 # Would be good to have a more efficient way to do this.
13858 #
13859 if c is None:
13860 return ''
13861 assert isinstance(c, str), f'{type(c)=}'
13862 b = c.encode('utf8', 'surrogateescape')
13863 ret = ''
13864 for bb in b:
13865 ret += chr(bb)
13866 return ret
13867
13868
13869 def JM_BufferFromBytes(stream):
13870 '''
13871 Make fz_buffer from a PyBytes, PyByteArray or io.BytesIO object. If a text
13872 io.BytesIO, we convert to binary by encoding as utf8.
13873 '''
13874 if isinstance(stream, (bytes, bytearray)):
13875 data = stream
13876 elif hasattr(stream, 'getvalue'):
13877 data = stream.getvalue()
13878 if isinstance(data, str):
13879 data = data.encode('utf-8')
13880 if not isinstance(data, (bytes, bytearray)):
13881 raise Exception(f'.getvalue() returned unexpected type: {type(data)}')
13882 else:
13883 return mupdf.FzBuffer()
13884 return mupdf.fz_new_buffer_from_copied_data(data)
13885
13886
13887 def JM_FLOAT_ITEM(obj, idx):
13888 if not PySequence_Check(obj):
13889 return None
13890 return float(obj[idx])
13891
13892 def JM_INT_ITEM(obj, idx):
13893 if idx < len(obj):
13894 temp = obj[idx]
13895 if isinstance(temp, (int, float)):
13896 return 0, temp
13897 return 1, None
13898
13899
13900 def JM_pixmap_from_page(doc, page, ctm, cs, alpha, annots, clip):
13901 '''
13902 Pixmap creation directly using a short-lived displaylist, so we can support
13903 separations.
13904 '''
13905 SPOTS_NONE = 0
13906 SPOTS_OVERPRINT_SIM = 1
13907 SPOTS_FULL = 2
13908
13909 FZ_ENABLE_SPOT_RENDERING = True # fixme: this is a build-time setting in MuPDF's config.h.
13910 if FZ_ENABLE_SPOT_RENDERING:
13911 spots = SPOTS_OVERPRINT_SIM
13912 else:
13913 spots = SPOTS_NONE
13914
13915 seps = None
13916 colorspace = cs
13917
13918 matrix = JM_matrix_from_py(ctm)
13919 rect = mupdf.fz_bound_page(page)
13920 rclip = JM_rect_from_py(clip)
13921 rect = mupdf.fz_intersect_rect(rect, rclip) # no-op if clip is not given
13922 rect = mupdf.fz_transform_rect(rect, matrix)
13923 bbox = mupdf.fz_round_rect(rect)
13924
13925 # Pixmap of the document's /OutputIntents ("output intents")
13926 oi = mupdf.fz_document_output_intent(doc)
13927 # if present and compatible, use it instead of the parameter
13928 if oi.m_internal:
13929 if mupdf.fz_colorspace_n(oi) == mupdf.fz_colorspace_n(cs):
13930 colorspace = mupdf.fz_keep_colorspace(oi)
13931
13932 # check if spots rendering is available and if so use separations
13933 if spots != SPOTS_NONE:
13934 seps = mupdf.fz_page_separations(page)
13935 if seps.m_internal:
13936 n = mupdf.fz_count_separations(seps)
13937 if spots == SPOTS_FULL:
13938 for i in range(n):
13939 mupdf.fz_set_separation_behavior(seps, i, mupdf.FZ_SEPARATION_SPOT)
13940 else:
13941 for i in range(n):
13942 mupdf.fz_set_separation_behavior(seps, i, mupdf.FZ_SEPARATION_COMPOSITE)
13943 elif mupdf.fz_page_uses_overprint(page):
13944 # This page uses overprint, so we need an empty
13945 # sep object to force the overprint simulation on.
13946 seps = mupdf.fz_new_separations(0)
13947 elif oi.m_internal and mupdf.fz_colorspace_n(oi) != mupdf.fz_colorspace_n(colorspace):
13948 # We have an output intent, and it's incompatible
13949 # with the colorspace our device needs. Force the
13950 # overprint simulation on, because this ensures that
13951 # we 'simulate' the output intent too.
13952 seps = mupdf.fz_new_separations(0)
13953
13954 pix = mupdf.fz_new_pixmap_with_bbox(colorspace, bbox, seps, alpha)
13955
13956 if alpha:
13957 mupdf.fz_clear_pixmap(pix)
13958 else:
13959 mupdf.fz_clear_pixmap_with_value(pix, 0xFF)
13960
13961 dev = mupdf.fz_new_draw_device(matrix, pix)
13962 if annots:
13963 mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
13964 else:
13965 mupdf.fz_run_page_contents(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
13966 mupdf.fz_close_device(dev)
13967 return pix
13968
13969
13970 def JM_StrAsChar(x):
13971 # fixme: should encode, but swig doesn't pass bytes to C as const char*.
13972 return x
13973 #return x.encode('utf8')
13974
13975
13976 def JM_TUPLE(o: typing.Sequence) -> tuple:
13977 return tuple(map(lambda x: round(x, 5) if abs(x) >= 1e-4 else 0, o))
13978
13979
13980 def JM_TUPLE3(o: typing.Sequence) -> tuple:
13981 return tuple(map(lambda x: round(x, 3) if abs(x) >= 1e-3 else 0, o))
13982
13983
13984 def JM_UnicodeFromStr(s):
13985 if s is None:
13986 return ''
13987 if isinstance(s, bytes):
13988 s = s.decode('utf8')
13989 assert isinstance(s, str), f'{type(s)=} {s=}'
13990 return s
13991
13992
13993 def JM_add_annot_id(annot, stem):
13994 '''
13995 Add a unique /NM key to an annotation or widget.
13996 Append a number to 'stem' such that the result is a unique name.
13997 '''
13998 assert isinstance(annot, mupdf.PdfAnnot)
13999 page = _pdf_annot_page(annot)
14000 annot_obj = mupdf.pdf_annot_obj( annot)
14001 names = JM_get_annot_id_list(page)
14002 i = 0
14003 while 1:
14004 stem_id = f'{JM_annot_id_stem}-{stem}{i}'
14005 if stem_id not in names:
14006 break
14007 i += 1
14008 response = JM_StrAsChar(stem_id)
14009 name = mupdf.pdf_new_string( response, len(response))
14010 mupdf.pdf_dict_puts(annot_obj, "NM", name)
14011 page.doc().m_internal.resynth_required = 0
14012
14013
14014 def JM_add_oc_object(pdf, ref, xref):
14015 '''
14016 Add OC object reference to a dictionary
14017 '''
14018 indobj = mupdf.pdf_new_indirect(pdf, xref, 0)
14019 if not mupdf.pdf_is_dict(indobj):
14020 RAISEPY(MSG_BAD_OC_REF, PyExc_ValueError)
14021 type_ = mupdf.pdf_dict_get(indobj, PDF_NAME('Type'))
14022 if (mupdf.pdf_objcmp(type_, PDF_NAME('OCG')) == 0
14023 or mupdf.pdf_objcmp(type_, PDF_NAME('OCMD')) == 0
14024 ):
14025 mupdf.pdf_dict_put(ref, PDF_NAME('OC'), indobj)
14026 else:
14027 RAISEPY(MSG_BAD_OC_REF, PyExc_ValueError)
14028
14029
14030 def JM_annot_border(annot_obj):
14031 dash_py = list()
14032 style = None
14033 width = -1
14034 clouds = -1
14035 obj = None
14036
14037 obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Border'))
14038 if mupdf.pdf_is_array( obj):
14039 width = mupdf.pdf_to_real( mupdf.pdf_array_get( obj, 2))
14040 if mupdf.pdf_array_len( obj) == 4:
14041 dash = mupdf.pdf_array_get( obj, 3)
14042 for i in range( mupdf.pdf_array_len( dash)):
14043 val = mupdf.pdf_to_int( mupdf.pdf_array_get( dash, i))
14044 dash_py.append( val)
14045
14046 bs_o = mupdf.pdf_dict_get( annot_obj, PDF_NAME('BS'))
14047 if bs_o.m_internal:
14048 width = mupdf.pdf_to_real( mupdf.pdf_dict_get( bs_o, PDF_NAME('W')))
14049 style = mupdf.pdf_to_name( mupdf.pdf_dict_get( bs_o, PDF_NAME('S')))
14050 if style == '':
14051 style = None
14052 obj = mupdf.pdf_dict_get( bs_o, PDF_NAME('D'))
14053 if obj.m_internal:
14054 for i in range( mupdf.pdf_array_len( obj)):
14055 val = mupdf.pdf_to_int( mupdf.pdf_array_get( obj, i))
14056 dash_py.append( val)
14057
14058 obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('BE'))
14059 if obj.m_internal:
14060 clouds = mupdf.pdf_to_int( mupdf.pdf_dict_get( obj, PDF_NAME('I')))
14061
14062 res = dict()
14063 res[ dictkey_width] = width
14064 res[ dictkey_dashes] = tuple( dash_py)
14065 res[ dictkey_style] = style
14066 res[ 'clouds'] = clouds
14067 return res
14068
14069
14070 def JM_annot_colors(annot_obj):
14071 res = dict()
14072 bc = list() # stroke colors
14073 fc =list() # fill colors
14074 o = mupdf.pdf_dict_get(annot_obj, mupdf.PDF_ENUM_NAME_C)
14075 if mupdf.pdf_is_array(o):
14076 n = mupdf.pdf_array_len(o)
14077 for i in range(n):
14078 col = mupdf.pdf_to_real( mupdf.pdf_array_get(o, i))
14079 bc.append(col)
14080 res[dictkey_stroke] = bc
14081
14082 o = mupdf.pdf_dict_gets(annot_obj, "IC")
14083 if mupdf.pdf_is_array(o):
14084 n = mupdf.pdf_array_len(o)
14085 for i in range(n):
14086 col = mupdf.pdf_to_real( mupdf.pdf_array_get(o, i))
14087 fc.append(col)
14088
14089 res[dictkey_fill] = fc
14090 return res
14091
14092
14093 def JM_annot_set_border( border, doc, annot_obj):
14094 assert isinstance(border, dict)
14095 obj = None
14096 dashlen = 0
14097 nwidth = border.get( dictkey_width) # new width
14098 ndashes = border.get( dictkey_dashes) # new dashes
14099 nstyle = border.get( dictkey_style) # new style
14100 nclouds = border.get( 'clouds', -1) # new clouds value
14101
14102 # get old border properties
14103 oborder = JM_annot_border( annot_obj)
14104
14105 # delete border-related entries
14106 mupdf.pdf_dict_del( annot_obj, PDF_NAME('BS'))
14107 mupdf.pdf_dict_del( annot_obj, PDF_NAME('BE'))
14108 mupdf.pdf_dict_del( annot_obj, PDF_NAME('Border'))
14109
14110 # populate border items: keep old values for any omitted new ones
14111 if nwidth < 0:
14112 nwidth = oborder.get( dictkey_width) # no new width: keep current
14113 if ndashes is None:
14114 ndashes = oborder.get( dictkey_dashes) # no new dashes: keep old
14115 if nstyle is None:
14116 nstyle = oborder.get( dictkey_style) # no new style: keep old
14117 if nclouds < 0:
14118 nclouds = oborder.get( "clouds", -1) # no new clouds: keep old
14119
14120 if isinstance( ndashes, tuple) and len( ndashes) > 0:
14121 dashlen = len( ndashes)
14122 darr = mupdf.pdf_new_array( doc, dashlen)
14123 for d in ndashes:
14124 mupdf.pdf_array_push_int( darr, d)
14125 mupdf.pdf_dict_putl( annot_obj, darr, PDF_NAME('BS'), PDF_NAME('D'))
14126
14127 mupdf.pdf_dict_putl(
14128 annot_obj,
14129 mupdf.pdf_new_real( nwidth),
14130 PDF_NAME('BS'),
14131 PDF_NAME('W'),
14132 )
14133
14134 if dashlen == 0:
14135 obj = JM_get_border_style( nstyle)
14136 else:
14137 obj = PDF_NAME('D')
14138 mupdf.pdf_dict_putl( annot_obj, obj, PDF_NAME('BS'), PDF_NAME('S'))
14139
14140 if nclouds > 0:
14141 mupdf.pdf_dict_put_dict( annot_obj, PDF_NAME('BE'), 2)
14142 obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('BE'))
14143 mupdf.pdf_dict_put( obj, PDF_NAME('S'), PDF_NAME('C'))
14144 mupdf.pdf_dict_put_int( obj, PDF_NAME('I'), nclouds)
14145
14146
14147 def make_escape(ch):
14148 if ch == 92:
14149 return "\\u005c"
14150 elif 32 <= ch <= 127 or ch == 10:
14151 return chr(ch)
14152 elif 0xd800 <= ch <= 0xdfff: # orphaned surrogate
14153 return "\\ufffd"
14154 elif ch <= 0xffff:
14155 return "\\u%04x" % ch
14156 else:
14157 return "\\U%08x" % ch
14158
14159
14160 def JM_append_rune(buff, ch):
14161 """
14162 APPEND non-ascii runes in unicode escape format to fz_buffer.
14163 """
14164 mupdf.fz_append_string(buff, make_escape(ch))
14165
14166
14167 def JM_append_word(lines, buff, wbbox, block_n, line_n, word_n):
14168 '''
14169 Functions for wordlist output
14170 '''
14171 s = JM_EscapeStrFromBuffer(buff)
14172 litem = (
14173 wbbox.x0,
14174 wbbox.y0,
14175 wbbox.x1,
14176 wbbox.y1,
14177 s,
14178 block_n,
14179 line_n,
14180 word_n,
14181 )
14182 lines.append(litem)
14183 return word_n + 1, mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY) # word counter
14184
14185
14186 def JM_add_layer_config( pdf, name, creator, ON):
14187 '''
14188 Add OC configuration to the PDF catalog
14189 '''
14190 ocp = JM_ensure_ocproperties( pdf)
14191 configs = mupdf.pdf_dict_get( ocp, PDF_NAME('Configs'))
14192 if not mupdf.pdf_is_array( configs):
14193 configs = mupdf.pdf_dict_put_array( ocp, PDF_NAME('Configs'), 1)
14194 D = mupdf.pdf_new_dict( pdf, 5)
14195 mupdf.pdf_dict_put_text_string( D, PDF_NAME('Name'), name)
14196 if creator is not None:
14197 mupdf.pdf_dict_put_text_string( D, PDF_NAME('Creator'), creator)
14198 mupdf.pdf_dict_put( D, PDF_NAME('BaseState'), PDF_NAME('OFF'))
14199 onarray = mupdf.pdf_dict_put_array( D, PDF_NAME('ON'), 5)
14200 if not ON:
14201 pass
14202 else:
14203 ocgs = mupdf.pdf_dict_get( ocp, PDF_NAME('OCGs'))
14204 n = len(ON)
14205 for i in range(n):
14206 xref = 0
14207 e, xref = JM_INT_ITEM(ON, i)
14208 if e == 1:
14209 continue
14210 ind = mupdf.pdf_new_indirect( pdf, xref, 0)
14211 if mupdf.pdf_array_contains( ocgs, ind):
14212 mupdf.pdf_array_push( onarray, ind)
14213 mupdf.pdf_array_push( configs, D)
14214
14215
14216 def JM_char_bbox(line, ch):
14217 '''
14218 return rect of char quad
14219 '''
14220 q = JM_char_quad(line, ch)
14221 r = mupdf.fz_rect_from_quad(q)
14222 if not line.m_internal.wmode:
14223 return r
14224 if r.y1 < r.y0 + ch.m_internal.size:
14225 r.y0 = r.y1 - ch.m_internal.size
14226 return r
14227
14228
14229 def JM_char_font_flags(font, line, ch):
14230 flags = 0
14231 if line and ch:
14232 flags += detect_super_script(line, ch)
14233 flags += mupdf.fz_font_is_italic(font) * TEXT_FONT_ITALIC
14234 flags += mupdf.fz_font_is_serif(font) * TEXT_FONT_SERIFED
14235 flags += mupdf.fz_font_is_monospaced(font) * TEXT_FONT_MONOSPACED
14236 flags += mupdf.fz_font_is_bold(font) * TEXT_FONT_BOLD
14237 return flags
14238
14239
14240 def JM_char_quad(line, ch):
14241 '''
14242 re-compute char quad if ascender/descender values make no sense
14243 '''
14244 if 1 and g_use_extra:
14245 # This reduces time taken to extract text from PyMuPDF.pdf from 20s to
14246 # 15s.
14247 return mupdf.FzQuad(extra.JM_char_quad( line.m_internal, ch.m_internal))
14248
14249 assert isinstance(line, mupdf.FzStextLine)
14250 assert isinstance(ch, mupdf.FzStextChar)
14251 if _globals.skip_quad_corrections: # no special handling
14252 return ch.quad
14253 if line.m_internal.wmode: # never touch vertical write mode
14254 return ch.quad
14255 font = mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font))
14256 asc = JM_font_ascender(font)
14257 dsc = JM_font_descender(font)
14258 fsize = ch.m_internal.size
14259 asc_dsc = asc - dsc + FLT_EPSILON
14260 if asc_dsc >= 1 and _globals.small_glyph_heights == 0: # no problem
14261 return mupdf.FzQuad(ch.m_internal.quad)
14262
14263 # Re-compute quad with adjusted ascender / descender values:
14264 # Move ch->origin to (0,0) and de-rotate quad, then adjust the corners,
14265 # re-rotate and move back to ch->origin location.
14266 fsize = ch.m_internal.size
14267 bbox = mupdf.fz_font_bbox(font)
14268 fwidth = bbox.x1 - bbox.x0
14269 if asc < 1e-3: # probably Tesseract glyphless font
14270 dsc = -0.1
14271 asc = 0.9
14272 asc_dsc = 1.0
14273
14274 if _globals.small_glyph_heights or asc_dsc < 1:
14275 dsc = dsc / asc_dsc
14276 asc = asc / asc_dsc
14277 asc_dsc = asc - dsc
14278 asc = asc * fsize / asc_dsc
14279 dsc = dsc * fsize / asc_dsc
14280
14281 # Re-compute quad with the adjusted ascender / descender values:
14282 # Move ch->origin to (0,0) and de-rotate quad, then adjust the corners,
14283 # re-rotate and move back to ch->origin location.
14284 c = line.m_internal.dir.x # cosine
14285 s = line.m_internal.dir.y # sine
14286 trm1 = mupdf.fz_make_matrix(c, -s, s, c, 0, 0) # derotate
14287 trm2 = mupdf.fz_make_matrix(c, s, -s, c, 0, 0) # rotate
14288 if (c == -1): # left-right flip
14289 trm1.d = 1
14290 trm2.d = 1
14291 xlate1 = mupdf.fz_make_matrix(1, 0, 0, 1, -ch.m_internal.origin.x, -ch.m_internal.origin.y)
14292 xlate2 = mupdf.fz_make_matrix(1, 0, 0, 1, ch.m_internal.origin.x, ch.m_internal.origin.y)
14293
14294 quad = mupdf.fz_transform_quad(mupdf.FzQuad(ch.m_internal.quad), xlate1) # move origin to (0,0)
14295 quad = mupdf.fz_transform_quad(quad, trm1) # de-rotate corners
14296
14297 # adjust vertical coordinates
14298 if c == 1 and quad.ul.y > 0: # up-down flip
14299 quad.ul.y = asc
14300 quad.ur.y = asc
14301 quad.ll.y = dsc
14302 quad.lr.y = dsc
14303 else:
14304 quad.ul.y = -asc
14305 quad.ur.y = -asc
14306 quad.ll.y = -dsc
14307 quad.lr.y = -dsc
14308
14309 # adjust horizontal coordinates that are too crazy:
14310 # (1) left x must be >= 0
14311 # (2) if bbox width is 0, lookup char advance in font.
14312 if quad.ll.x < 0:
14313 quad.ll.x = 0
14314 quad.ul.x = 0
14315
14316 cwidth = quad.lr.x - quad.ll.x
14317 if cwidth < FLT_EPSILON:
14318 glyph = mupdf.fz_encode_character( font, ch.m_internal.c)
14319 if glyph:
14320 fwidth = mupdf.fz_advance_glyph( font, glyph, line.m_internal.wmode)
14321 quad.lr.x = quad.ll.x + fwidth * fsize
14322 quad.ur.x = quad.lr.x
14323
14324 quad = mupdf.fz_transform_quad(quad, trm2) # rotate back
14325 quad = mupdf.fz_transform_quad(quad, xlate2) # translate back
14326 return quad
14327
14328
14329 def JM_choice_options(annot):
14330 '''
14331 return list of choices for list or combo boxes
14332 '''
14333 annot_obj = mupdf.pdf_annot_obj( annot.this)
14334
14335 opts = mupdf.pdf_choice_widget_options2( annot, 0)
14336 n = len( opts)
14337 if n == 0:
14338 return # wrong widget type
14339
14340 optarr = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Opt'))
14341 liste = []
14342
14343 for i in range( n):
14344 m = mupdf.pdf_array_len( mupdf.pdf_array_get( optarr, i))
14345 if m == 2:
14346 val = (
14347 mupdf.pdf_to_text_string( mupdf.pdf_array_get( mupdf.pdf_array_get( optarr, i), 0)),
14348 mupdf.pdf_to_text_string( mupdf.pdf_array_get( mupdf.pdf_array_get( optarr, i), 1)),
14349 )
14350 liste.append( val)
14351 else:
14352 val = mupdf.pdf_to_text_string( mupdf.pdf_array_get( optarr, i))
14353 liste.append( val)
14354 return liste
14355
14356
14357 def JM_clear_pixmap_rect_with_value(dest, value, b):
14358 '''
14359 Clear a pixmap rectangle - my version also supports non-alpha pixmaps
14360 '''
14361 b = mupdf.fz_intersect_irect(b, mupdf.fz_pixmap_bbox(dest))
14362 w = b.x1 - b.x0
14363 y = b.y1 - b.y0
14364 if w <= 0 or y <= 0:
14365 return 0
14366
14367 destspan = dest.stride()
14368 destp = destspan * (b.y0 - dest.y()) + dest.n() * (b.x0 - dest.x())
14369
14370 # CMYK needs special handling (and potentially any other subtractive colorspaces)
14371 if mupdf.fz_colorspace_n(dest.colorspace()) == 4:
14372 value = 255 - value
14373 while 1:
14374 s = destp
14375 for x in range(0, w):
14376 mupdf.fz_samples_set(dest, s, 0)
14377 s += 1
14378 mupdf.fz_samples_set(dest, s, 0)
14379 s += 1
14380 mupdf.fz_samples_set(dest, s, 0)
14381 s += 1
14382 mupdf.fz_samples_set(dest, s, value)
14383 s += 1
14384 if dest.alpha():
14385 mupdf.fz_samples_set(dest, s, 255)
14386 s += 1
14387 destp += destspan
14388 if y == 0:
14389 break
14390 y -= 1
14391 return 1
14392
14393 while 1:
14394 s = destp
14395 for x in range(w):
14396 for k in range(dest.n()-1):
14397 mupdf.fz_samples_set(dest, s, value)
14398 s += 1
14399 if dest.alpha():
14400 mupdf.fz_samples_set(dest, s, 255)
14401 s += 1
14402 else:
14403 mupdf.fz_samples_set(dest, s, value)
14404 s += 1
14405 destp += destspan
14406 if y == 0:
14407 break
14408 y -= 1
14409 return 1
14410
14411
14412 def JM_color_FromSequence(color):
14413
14414 if isinstance(color, (int, float)): # maybe just a single float
14415 color = [color]
14416
14417 if not isinstance( color, (list, tuple)):
14418 return -1, []
14419
14420 if len(color) not in (0, 1, 3, 4):
14421 return -1, []
14422
14423 ret = color[:]
14424 for i in range(len(ret)):
14425 if ret[i] < 0 or ret[i] > 1:
14426 ret[i] = 1
14427 return len(ret), ret
14428
14429
14430 def JM_color_count( pm, clip):
14431 if g_use_extra:
14432 return extra.ll_JM_color_count(pm.m_internal, clip)
14433
14434 rc = dict()
14435 cnt = 0
14436 irect = mupdf.fz_pixmap_bbox( pm)
14437 irect = mupdf.fz_intersect_irect(irect, mupdf.fz_round_rect(JM_rect_from_py(clip)))
14438 stride = pm.stride()
14439 width = irect.x1 - irect.x0
14440 height = irect.y1 - irect.y0
14441 n = pm.n()
14442 substride = width * n
14443 s = stride * (irect.y0 - pm.y()) + (irect.x0 - pm.x()) * n
14444 oldpix = _read_samples( pm, s, n)
14445 cnt = 0
14446 if mupdf.fz_is_empty_irect(irect):
14447 return rc
14448 for i in range( height):
14449 for j in range( 0, substride, n):
14450 newpix = _read_samples( pm, s + j, n)
14451 if newpix != oldpix:
14452 pixel = oldpix
14453 c = rc.get( pixel, None)
14454 if c is not None:
14455 cnt += c
14456 rc[ pixel] = cnt
14457 cnt = 1
14458 oldpix = newpix
14459 else:
14460 cnt += 1
14461 s += stride
14462 pixel = oldpix
14463 c = rc.get( pixel)
14464 if c is not None:
14465 cnt += c
14466 rc[ pixel] = cnt
14467 return rc
14468
14469
14470 def JM_compress_buffer(inbuffer):
14471 '''
14472 compress char* into a new buffer
14473 '''
14474 data, compressed_length = mupdf.fz_new_deflated_data_from_buffer(
14475 inbuffer,
14476 mupdf.FZ_DEFLATE_BEST,
14477 )
14478 #log( '{=data compressed_length}')
14479 if not data or compressed_length == 0:
14480 return None
14481 buf = mupdf.FzBuffer(mupdf.fz_new_buffer_from_data(data, compressed_length))
14482 mupdf.fz_resize_buffer(buf, compressed_length)
14483 return buf
14484
14485
14486 def JM_copy_rectangle(page, area):
14487 need_new_line = 0
14488 buffer = io.StringIO()
14489 for block in page:
14490 if block.m_internal.type != mupdf.FZ_STEXT_BLOCK_TEXT:
14491 continue
14492 for line in block:
14493 line_had_text = 0
14494 for ch in line:
14495 r = JM_char_bbox(line, ch)
14496 if JM_rects_overlap(area, r):
14497 line_had_text = 1
14498 if need_new_line:
14499 buffer.write("\n")
14500 need_new_line = 0
14501 buffer.write(make_escape(ch.m_internal.c))
14502 if line_had_text:
14503 need_new_line = 1
14504
14505 s = buffer.getvalue() # take over the data
14506 return s
14507
14508
14509 def JM_convert_to_pdf(doc, fp, tp, rotate):
14510 '''
14511 Convert any MuPDF document to a PDF
14512 Returns bytes object containing the PDF, created via 'write' function.
14513 '''
14514 pdfout = mupdf.PdfDocument()
14515 incr = 1
14516 s = fp
14517 e = tp
14518 if fp > tp:
14519 incr = -1 # count backwards
14520 s = tp # adjust ...
14521 e = fp # ... range
14522 rot = JM_norm_rotation(rotate)
14523 i = fp
14524 while 1: # interpret & write document pages as PDF pages
14525 if not _INRANGE(i, s, e):
14526 break
14527 page = mupdf.fz_load_page(doc, i)
14528 mediabox = mupdf.fz_bound_page(page)
14529 dev, resources, contents = mupdf.pdf_page_write(pdfout, mediabox)
14530 mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
14531 mupdf.fz_close_device(dev)
14532 dev = None
14533 page_obj = mupdf.pdf_add_page(pdfout, mediabox, rot, resources, contents)
14534 mupdf.pdf_insert_page(pdfout, -1, page_obj)
14535 i += incr
14536 # PDF created - now write it to Python bytearray
14537 # prepare write options structure
14538 opts = mupdf.PdfWriteOptions()
14539 opts.do_garbage = 4
14540 opts.do_compress = 1
14541 opts.do_compress_images = 1
14542 opts.do_compress_fonts = 1
14543 opts.do_sanitize = 1
14544 opts.do_incremental = 0
14545 opts.do_ascii = 0
14546 opts.do_decompress = 0
14547 opts.do_linear = 0
14548 opts.do_clean = 1
14549 opts.do_pretty = 0
14550
14551 res = mupdf.fz_new_buffer(8192)
14552 out = mupdf.FzOutput(res)
14553 mupdf.pdf_write_document(pdfout, out, opts)
14554 out.fz_close_output()
14555 c = mupdf.fz_buffer_extract_copy(res)
14556 assert isinstance(c, bytes)
14557 return c
14558
14559
14560 # Copied from MuPDF v1.14
14561 # Create widget
14562 def JM_create_widget(doc, page, type, fieldname):
14563 old_sigflags = mupdf.pdf_to_int(mupdf.pdf_dict_getp(mupdf.pdf_trailer(doc), "Root/AcroForm/SigFlags"))
14564 #log( '*** JM_create_widget()')
14565 #log( f'{mupdf.pdf_create_annot_raw=}')
14566 #log( f'{page=}')
14567 #log( f'{mupdf.PDF_ANNOT_WIDGET=}')
14568 annot = mupdf.pdf_create_annot_raw(page, mupdf.PDF_ANNOT_WIDGET)
14569 annot_obj = mupdf.pdf_annot_obj(annot)
14570 try:
14571 JM_set_field_type(doc, annot_obj, type)
14572 mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('T'), fieldname)
14573
14574 if type == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
14575 sigflags = old_sigflags | (SigFlag_SignaturesExist | SigFlag_AppendOnly)
14576 mupdf.pdf_dict_putl(
14577 mupdf.pdf_trailer(doc),
14578 mupdf.pdf_new_int(sigflags),
14579 PDF_NAME('Root'),
14580 PDF_NAME('AcroForm'),
14581 PDF_NAME('SigFlags'),
14582 )
14583 # pdf_create_annot will have linked the new widget into the page's
14584 # annot array. We also need it linked into the document's form
14585 form = mupdf.pdf_dict_getp(mupdf.pdf_trailer(doc), "Root/AcroForm/Fields")
14586 if not form.m_internal:
14587 form = mupdf.pdf_new_array(doc, 1)
14588 mupdf.pdf_dict_putl(
14589 mupdf.pdf_trailer(doc),
14590 form,
14591 PDF_NAME('Root'),
14592 PDF_NAME('AcroForm'),
14593 PDF_NAME('Fields'),
14594 )
14595 mupdf.pdf_array_push(form, annot_obj) # Cleanup relies on this statement being last
14596 except Exception:
14597 if g_exceptions_verbose: exception_info()
14598 mupdf.pdf_delete_annot(page, annot)
14599
14600 if type == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
14601 mupdf.pdf_dict_putl(
14602 mupdf.pdf_trailer(doc),
14603 mupdf.pdf_new_int(old_sigflags),
14604 PDF_NAME('Root'),
14605 PDF_NAME('AcroForm'),
14606 PDF_NAME('SigFlags'),
14607 )
14608 raise
14609 return annot
14610
14611
14612 def JM_cropbox(page_obj):
14613 '''
14614 return a PDF page's CropBox
14615 '''
14616 if g_use_extra:
14617 return extra.JM_cropbox(page_obj)
14618
14619 mediabox = JM_mediabox(page_obj)
14620 cropbox = mupdf.pdf_to_rect(
14621 mupdf.pdf_dict_get_inheritable(page_obj, PDF_NAME('CropBox'))
14622 )
14623 if mupdf.fz_is_infinite_rect(cropbox) or mupdf.fz_is_empty_rect(cropbox):
14624 cropbox = mediabox
14625 y0 = mediabox.y1 - cropbox.y1
14626 y1 = mediabox.y1 - cropbox.y0
14627 cropbox.y0 = y0
14628 cropbox.y1 = y1
14629 return cropbox
14630
14631
14632 def JM_cropbox_size(page_obj):
14633 rect = JM_cropbox(page_obj)
14634 w = abs(rect.x1 - rect.x0)
14635 h = abs(rect.y1 - rect.y0)
14636 size = mupdf.fz_make_point(w, h)
14637 return size
14638
14639
14640 def JM_derotate_page_matrix(page):
14641 '''
14642 just the inverse of rotation
14643 '''
14644 mp = JM_rotate_page_matrix(page)
14645 return mupdf.fz_invert_matrix(mp)
14646
14647
14648 def JM_embed_file(
14649 pdf,
14650 buf,
14651 filename,
14652 ufilename,
14653 desc,
14654 compress,
14655 ):
14656 '''
14657 embed a new file in a PDF (not only /EmbeddedFiles entries)
14658 '''
14659 len_ = 0
14660 val = mupdf.pdf_new_dict(pdf, 6)
14661 mupdf.pdf_dict_put_dict(val, PDF_NAME('CI'), 4)
14662 ef = mupdf.pdf_dict_put_dict(val, PDF_NAME('EF'), 4)
14663 mupdf.pdf_dict_put_text_string(val, PDF_NAME('F'), filename)
14664 mupdf.pdf_dict_put_text_string(val, PDF_NAME('UF'), ufilename)
14665 mupdf.pdf_dict_put_text_string(val, PDF_NAME('Desc'), desc)
14666 mupdf.pdf_dict_put(val, PDF_NAME('Type'), PDF_NAME('Filespec'))
14667 bs = b' '
14668 f = mupdf.pdf_add_stream(
14669 pdf,
14670 #mupdf.fz_fz_new_buffer_from_copied_data(bs),
14671 mupdf.fz_new_buffer_from_copied_data(bs),
14672 mupdf.PdfObj(),
14673 0,
14674 )
14675 mupdf.pdf_dict_put(ef, PDF_NAME('F'), f)
14676 JM_update_stream(pdf, f, buf, compress)
14677 len_, _ = mupdf.fz_buffer_storage(buf)
14678 mupdf.pdf_dict_put_int(f, PDF_NAME('DL'), len_)
14679 mupdf.pdf_dict_put_int(f, PDF_NAME('Length'), len_)
14680 params = mupdf.pdf_dict_put_dict(f, PDF_NAME('Params'), 4)
14681 mupdf.pdf_dict_put_int(params, PDF_NAME('Size'), len_)
14682 return val
14683
14684
14685 def JM_embedded_clean(pdf):
14686 '''
14687 perform some cleaning if we have /EmbeddedFiles:
14688 (1) remove any /Limits if /Names exists
14689 (2) remove any empty /Collection
14690 (3) set /PageMode/UseAttachments
14691 '''
14692 root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
14693
14694 # remove any empty /Collection entry
14695 coll = mupdf.pdf_dict_get(root, PDF_NAME('Collection'))
14696 if coll.m_internal and mupdf.pdf_dict_len(coll) == 0:
14697 mupdf.pdf_dict_del(root, PDF_NAME('Collection'))
14698
14699 efiles = mupdf.pdf_dict_getl(
14700 root,
14701 PDF_NAME('Names'),
14702 PDF_NAME('EmbeddedFiles'),
14703 PDF_NAME('Names'),
14704 )
14705 if efiles.m_internal:
14706 mupdf.pdf_dict_put_name(root, PDF_NAME('PageMode'), "UseAttachments")
14707
14708
14709 def JM_EscapeStrFromBuffer(buff):
14710 if not buff.m_internal:
14711 return ''
14712 s = mupdf.fz_buffer_extract_copy(buff)
14713 val = PyUnicode_DecodeRawUnicodeEscape(s, errors='replace')
14714 return val
14715
14716
14717 def JM_ensure_identity(pdf):
14718 '''
14719 Store ID in PDF trailer
14720 '''
14721 id_ = mupdf.pdf_dict_get( mupdf.pdf_trailer(pdf), PDF_NAME('ID'))
14722 if not id_.m_internal:
14723 rnd0 = mupdf.fz_memrnd2(16)
14724 # Need to convert raw bytes into a str to send to
14725 # mupdf.pdf_new_string(). chr() seems to work for this.
14726 rnd = ''
14727 for i in rnd0:
14728 rnd += chr(i)
14729 id_ = mupdf.pdf_dict_put_array( mupdf.pdf_trailer( pdf), PDF_NAME('ID'), 2)
14730 mupdf.pdf_array_push( id_, mupdf.pdf_new_string( rnd, len(rnd)))
14731 mupdf.pdf_array_push( id_, mupdf.pdf_new_string( rnd, len(rnd)))
14732
14733 def JM_ensure_ocproperties(pdf):
14734 '''
14735 Ensure OCProperties, return /OCProperties key
14736 '''
14737 ocp = mupdf.pdf_dict_get(mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root')), PDF_NAME('OCProperties'))
14738 if ocp.m_internal:
14739 return ocp
14740 root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
14741 ocp = mupdf.pdf_dict_put_dict(root, PDF_NAME('OCProperties'), 2)
14742 mupdf.pdf_dict_put_array(ocp, PDF_NAME('OCGs'), 0)
14743 D = mupdf.pdf_dict_put_dict(ocp, PDF_NAME('D'), 5)
14744 mupdf.pdf_dict_put_array(D, PDF_NAME('ON'), 0)
14745 mupdf.pdf_dict_put_array(D, PDF_NAME('OFF'), 0)
14746 mupdf.pdf_dict_put_array(D, PDF_NAME('Order'), 0)
14747 mupdf.pdf_dict_put_array(D, PDF_NAME('RBGroups'), 0)
14748 return ocp
14749
14750
14751 def JM_expand_fname(name):
14752 '''
14753 Make /DA string of annotation
14754 '''
14755 if not name: return "Helv"
14756 if name.startswith("Co"): return "Cour"
14757 if name.startswith("co"): return "Cour"
14758 if name.startswith("Ti"): return "TiRo"
14759 if name.startswith("ti"): return "TiRo"
14760 if name.startswith("Sy"): return "Symb"
14761 if name.startswith("sy"): return "Symb"
14762 if name.startswith("Za"): return "ZaDb"
14763 if name.startswith("za"): return "ZaDb"
14764 return "Helv"
14765
14766
14767 def JM_field_type_text(wtype):
14768 '''
14769 String from widget type
14770 '''
14771 if wtype == mupdf.PDF_WIDGET_TYPE_BUTTON:
14772 return "Button"
14773 if wtype == mupdf.PDF_WIDGET_TYPE_CHECKBOX:
14774 return "CheckBox"
14775 if wtype == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
14776 return "RadioButton"
14777 if wtype == mupdf.PDF_WIDGET_TYPE_TEXT:
14778 return "Text"
14779 if wtype == mupdf.PDF_WIDGET_TYPE_LISTBOX:
14780 return "ListBox"
14781 if wtype == mupdf.PDF_WIDGET_TYPE_COMBOBOX:
14782 return "ComboBox"
14783 if wtype == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
14784 return "Signature"
14785 return "unknown"
14786
14787
14788 def JM_fill_pixmap_rect_with_color(dest, col, b):
14789 assert isinstance(dest, mupdf.FzPixmap)
14790 # fill a rect with a color tuple
14791 b = mupdf.fz_intersect_irect(b, mupdf.fz_pixmap_bbox( dest))
14792 w = b.x1 - b.x0
14793 y = b.y1 - b.y0
14794 if w <= 0 or y <= 0:
14795 return 0
14796 destspan = dest.stride()
14797 destp = destspan * (b.y0 - dest.y()) + dest.n() * (b.x0 - dest.x())
14798 while 1:
14799 s = destp
14800 for x in range(w):
14801 for i in range( dest.n()):
14802 mupdf.fz_samples_set(dest, s, col[i])
14803 s += 1
14804 destp += destspan
14805 y -= 1
14806 if y == 0:
14807 break
14808 return 1
14809
14810
14811 def JM_find_annot_irt(annot):
14812 '''
14813 Return the first annotation whose /IRT key ("In Response To") points to
14814 annot. Used to remove the response chain of a given annotation.
14815 '''
14816 assert isinstance(annot, mupdf.PdfAnnot)
14817 irt_annot = None # returning this
14818 annot_obj = mupdf.pdf_annot_obj(annot)
14819 found = 0
14820 # loop thru MuPDF's internal annots array
14821 page = _pdf_annot_page(annot)
14822 irt_annot = mupdf.pdf_first_annot(page)
14823 while 1:
14824 assert isinstance(irt_annot, mupdf.PdfAnnot)
14825 if not irt_annot.m_internal:
14826 break
14827 irt_annot_obj = mupdf.pdf_annot_obj(irt_annot)
14828 o = mupdf.pdf_dict_gets(irt_annot_obj, 'IRT')
14829 if o.m_internal:
14830 if not mupdf.pdf_objcmp(o, annot_obj):
14831 found = 1
14832 break
14833 irt_annot = mupdf.pdf_next_annot(irt_annot)
14834 if found:
14835 return irt_annot
14836
14837
14838 def JM_font_ascender(font):
14839 '''
14840 need own versions of ascender / descender
14841 '''
14842 assert isinstance(font, mupdf.FzFont)
14843 if _globals.skip_quad_corrections:
14844 return 0.8
14845 return mupdf.fz_font_ascender(font)
14846
14847
14848 def JM_font_descender(font):
14849 '''
14850 need own versions of ascender / descender
14851 '''
14852 assert isinstance(font, mupdf.FzFont)
14853 if _globals.skip_quad_corrections:
14854 return -0.2
14855 ret = mupdf.fz_font_descender(font)
14856 return ret
14857
14858
14859 def JM_is_word_delimiter(ch, delimiters):
14860 """Check if ch is an extra word delimiting character.
14861 """
14862 if (0
14863 or ch <= 32
14864 or ch == 160
14865 or 0x202a <= ch <= 0x202e
14866 ):
14867 # covers any whitespace plus unicodes that switch between
14868 # right-to-left and left-to-right languages
14869 return True
14870 if not delimiters: # no extra delimiters provided
14871 return False
14872 char = chr(ch)
14873 for d in delimiters:
14874 if d == char:
14875 return True
14876 return False
14877
14878
14879 def JM_is_rtl_char(ch):
14880 if ch < 0x590 or ch > 0x900:
14881 return False
14882 return True
14883
14884
14885 def JM_font_name(font):
14886 assert isinstance(font, mupdf.FzFont)
14887 name = mupdf.fz_font_name(font)
14888 s = name.find('+')
14889 if _globals.subset_fontnames or s == -1 or s != 6:
14890 return name
14891 return name[s + 1:]
14892
14893
14894 def JM_gather_fonts(pdf, dict_, fontlist, stream_xref):
14895 rc = 1
14896 n = mupdf.pdf_dict_len(dict_)
14897 for i in range(n):
14898
14899 refname = mupdf.pdf_dict_get_key(dict_, i)
14900 fontdict = mupdf.pdf_dict_get_val(dict_, i)
14901 if not mupdf.pdf_is_dict(fontdict):
14902 mupdf.fz_warn( f"'{mupdf.pdf_to_name(refname)}' is no font dict ({mupdf.pdf_to_num(fontdict)} 0 R)")
14903 continue
14904
14905 subtype = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_Subtype)
14906 basefont = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_BaseFont)
14907 if not basefont.m_internal or mupdf.pdf_is_null(basefont):
14908 name = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_Name)
14909 else:
14910 name = basefont
14911 encoding = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_Encoding)
14912 if mupdf.pdf_is_dict(encoding):
14913 encoding = mupdf.pdf_dict_get(encoding, mupdf.PDF_ENUM_NAME_BaseEncoding)
14914 xref = mupdf.pdf_to_num(fontdict)
14915 ext = "n/a"
14916 if xref:
14917 ext = JM_get_fontextension(pdf, xref)
14918 entry = (
14919 xref,
14920 ext,
14921 mupdf.pdf_to_name(subtype),
14922 JM_EscapeStrFromStr(mupdf.pdf_to_name(name)),
14923 mupdf.pdf_to_name(refname),
14924 mupdf.pdf_to_name(encoding),
14925 stream_xref,
14926 )
14927 fontlist.append(entry)
14928 return rc
14929
14930
14931 def JM_gather_forms(doc, dict_: mupdf.PdfObj, imagelist, stream_xref: int):
14932 '''
14933 Store info of a /Form xobject in Python list
14934 '''
14935 assert isinstance(doc, mupdf.PdfDocument)
14936 rc = 1
14937 n = mupdf.pdf_dict_len(dict_)
14938 for i in range(n):
14939 refname = mupdf.pdf_dict_get_key( dict_, i)
14940 imagedict = mupdf.pdf_dict_get_val(dict_, i)
14941 if not mupdf.pdf_is_dict(imagedict):
14942 mupdf.fz_warn( f"'{mupdf.pdf_to_name(refname)}' is no form dict ({mupdf.pdf_to_num(imagedict)} 0 R)")
14943 continue
14944
14945 type_ = mupdf.pdf_dict_get(imagedict, PDF_NAME('Subtype'))
14946 if not mupdf.pdf_name_eq(type_, PDF_NAME('Form')):
14947 continue
14948
14949 o = mupdf.pdf_dict_get(imagedict, PDF_NAME('BBox'))
14950 m = mupdf.pdf_dict_get(imagedict, PDF_NAME('Matrix'))
14951 if m.m_internal:
14952 mat = mupdf.pdf_to_matrix(m)
14953 else:
14954 mat = mupdf.FzMatrix()
14955 if o.m_internal:
14956 bbox = mupdf.fz_transform_rect( mupdf.pdf_to_rect(o), mat)
14957 else:
14958 bbox = mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)
14959 xref = mupdf.pdf_to_num(imagedict)
14960
14961 entry = (
14962 xref,
14963 mupdf.pdf_to_name( refname),
14964 stream_xref,
14965 JM_py_from_rect(bbox),
14966 )
14967 imagelist.append(entry)
14968 return rc
14969
14970
14971 def JM_gather_images(doc: mupdf.PdfDocument, dict_: mupdf.PdfObj, imagelist, stream_xref: int):
14972 '''
14973 Store info of an image in Python list
14974 '''
14975 rc = 1
14976 n = mupdf.pdf_dict_len( dict_)
14977 for i in range(n):
14978 refname = mupdf.pdf_dict_get_key(dict_, i)
14979 imagedict = mupdf.pdf_dict_get_val(dict_, i)
14980 if not mupdf.pdf_is_dict(imagedict):
14981 mupdf.fz_warn(f"'{mupdf.pdf_to_name(refname)}' is no image dict ({mupdf.pdf_to_num(imagedict)} 0 R)")
14982 continue
14983
14984 type_ = mupdf.pdf_dict_get(imagedict, PDF_NAME('Subtype'))
14985 if not mupdf.pdf_name_eq(type_, PDF_NAME('Image')):
14986 continue
14987
14988 xref = mupdf.pdf_to_num(imagedict)
14989 gen = 0
14990 smask = mupdf.pdf_dict_geta(imagedict, PDF_NAME('SMask'), PDF_NAME('Mask'))
14991 if smask.m_internal:
14992 gen = mupdf.pdf_to_num(smask)
14993
14994 filter_ = mupdf.pdf_dict_geta(imagedict, PDF_NAME('Filter'), PDF_NAME('F'))
14995 if mupdf.pdf_is_array(filter_):
14996 filter_ = mupdf.pdf_array_get(filter_, 0)
14997
14998 altcs = mupdf.PdfObj(0)
14999 cs = mupdf.pdf_dict_geta(imagedict, PDF_NAME('ColorSpace'), PDF_NAME('CS'))
15000 if mupdf.pdf_is_array(cs):
15001 cses = cs
15002 cs = mupdf.pdf_array_get(cses, 0)
15003 if (mupdf.pdf_name_eq(cs, PDF_NAME('DeviceN'))
15004 or mupdf.pdf_name_eq(cs, PDF_NAME('Separation'))
15005 ):
15006 altcs = mupdf.pdf_array_get(cses, 2)
15007 if mupdf.pdf_is_array(altcs):
15008 altcs = mupdf.pdf_array_get(altcs, 0)
15009 width = mupdf.pdf_dict_geta(imagedict, PDF_NAME('Width'), PDF_NAME('W'))
15010 height = mupdf.pdf_dict_geta(imagedict, PDF_NAME('Height'), PDF_NAME('H'))
15011 bpc = mupdf.pdf_dict_geta(imagedict, PDF_NAME('BitsPerComponent'), PDF_NAME('BPC'))
15012
15013 entry = (
15014 xref,
15015 gen,
15016 mupdf.pdf_to_int(width),
15017 mupdf.pdf_to_int(height),
15018 mupdf.pdf_to_int(bpc),
15019 JM_EscapeStrFromStr(mupdf.pdf_to_name(cs)),
15020 JM_EscapeStrFromStr(mupdf.pdf_to_name(altcs)),
15021 JM_EscapeStrFromStr(mupdf.pdf_to_name(refname)),
15022 JM_EscapeStrFromStr(mupdf.pdf_to_name(filter_)),
15023 stream_xref,
15024 )
15025 imagelist.append(entry)
15026 return rc
15027
15028
15029 def JM_get_annot_by_xref(page, xref):
15030 '''
15031 retrieve annot by its xref
15032 '''
15033 assert isinstance(page, mupdf.PdfPage)
15034 found = 0
15035 # loop thru MuPDF's internal annots array
15036 annot = mupdf.pdf_first_annot(page)
15037 while 1:
15038 if not annot.m_internal:
15039 break
15040 if xref == mupdf.pdf_to_num(mupdf.pdf_annot_obj(annot)):
15041 found = 1
15042 break
15043 annot = mupdf.pdf_next_annot( annot)
15044 if not found:
15045 raise Exception("xref %d is not an annot of this page" % xref)
15046 return annot
15047
15048
15049 def JM_get_annot_by_name(page, name):
15050 '''
15051 retrieve annot by name (/NM key)
15052 '''
15053 assert isinstance(page, mupdf.PdfPage)
15054 if not name:
15055 return
15056 found = 0
15057 # loop thru MuPDF's internal annots and widget arrays
15058 annot = mupdf.pdf_first_annot(page)
15059 while 1:
15060 if not annot.m_internal:
15061 break
15062
15063 response, len_ = mupdf.pdf_to_string(mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "NM"))
15064 if name == response:
15065 found = 1
15066 break
15067 annot = mupdf.pdf_next_annot(annot)
15068 if not found:
15069 raise Exception("'%s' is not an annot of this page" % name)
15070 return annot
15071
15072
15073 def JM_get_annot_id_list(page):
15074 names = []
15075 annots = mupdf.pdf_dict_get( page.obj(), mupdf.PDF_ENUM_NAME_Annots)
15076 if not annots.m_internal:
15077 return names
15078 for i in range( mupdf.pdf_array_len(annots)):
15079 annot_obj = mupdf.pdf_array_get(annots, i)
15080 name = mupdf.pdf_dict_gets(annot_obj, "NM")
15081 if name.m_internal:
15082 names.append(
15083 mupdf.pdf_to_text_string(name)
15084 )
15085 return names
15086
15087 def JM_get_annot_xref_list( page_obj):
15088 '''
15089 return the xrefs and /NM ids of a page's annots, links and fields
15090 '''
15091 if g_use_extra:
15092 names = extra.JM_get_annot_xref_list( page_obj)
15093 return names
15094
15095 names = []
15096 annots = mupdf.pdf_dict_get( page_obj, PDF_NAME('Annots'))
15097 n = mupdf.pdf_array_len( annots)
15098 for i in range( n):
15099 annot_obj = mupdf.pdf_array_get( annots, i)
15100 xref = mupdf.pdf_to_num( annot_obj)
15101 subtype = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Subtype'))
15102 if not subtype.m_internal:
15103 continue # subtype is required
15104 type_ = mupdf.pdf_annot_type_from_string( mupdf.pdf_to_name( subtype))
15105 if type_ == mupdf.PDF_ANNOT_UNKNOWN:
15106 continue # only accept valid annot types
15107 id_ = mupdf.pdf_dict_gets( annot_obj, "NM")
15108 names.append( (xref, type_, mupdf.pdf_to_text_string( id_)))
15109 return names
15110
15111
15112 def JM_get_annot_xref_list2(page):
15113 page = page._pdf_page(required=False)
15114 if not page.m_internal:
15115 return list()
15116 return JM_get_annot_xref_list( page.obj())
15117
15118
15119 def JM_get_border_style(style):
15120 '''
15121 return pdf_obj "border style" from Python str
15122 '''
15123 val = mupdf.PDF_ENUM_NAME_S
15124 if style is None:
15125 return val
15126 s = style
15127 if s.startswith("b") or s.startswith("B"): val = mupdf.PDF_ENUM_NAME_B
15128 elif s.startswith("d") or s.startswith("D"): val = mupdf.PDF_ENUM_NAME_D
15129 elif s.startswith("i") or s.startswith("I"): val = mupdf.PDF_ENUM_NAME_I
15130 elif s.startswith("u") or s.startswith("U"): val = mupdf.PDF_ENUM_NAME_U
15131 elif s.startswith("s") or s.startswith("S"): val = mupdf.PDF_ENUM_NAME_S
15132 return val
15133
15134
15135 def JM_get_font(
15136 fontname,
15137 fontfile,
15138 fontbuffer,
15139 script,
15140 lang,
15141 ordering,
15142 is_bold,
15143 is_italic,
15144 is_serif,
15145 embed,
15146 ):
15147 '''
15148 return a fz_font from a number of parameters
15149 '''
15150 def fertig(font):
15151 if not font.m_internal:
15152 raise RuntimeError(MSG_FONT_FAILED)
15153 # if font allows this, set embedding
15154 if not font.m_internal.flags.never_embed:
15155 mupdf.fz_set_font_embedding(font, embed)
15156 return font
15157
15158 index = 0
15159 font = None
15160 if fontfile:
15161 #goto have_file;
15162 font = mupdf.fz_new_font_from_file( None, fontfile, index, 0)
15163 return fertig(font)
15164
15165 if fontbuffer:
15166 #goto have_buffer;
15167 res = JM_BufferFromBytes(fontbuffer)
15168 font = mupdf.fz_new_font_from_buffer( None, res, index, 0)
15169 return fertig(font)
15170
15171 if ordering > -1:
15172 # goto have_cjk;
15173 font = mupdf.fz_new_cjk_font(ordering)
15174 return fertig(font)
15175
15176 if fontname:
15177 # goto have_base14;
15178 # Base-14 or a MuPDF builtin font
15179 font = mupdf.fz_new_base14_font(fontname)
15180 if font.m_internal:
15181 return fertig(font)
15182 font = mupdf.fz_new_builtin_font(fontname, is_bold, is_italic)
15183 return fertig(font)
15184
15185 # Check for NOTO font
15186 #have_noto:;
15187 data, size, index = mupdf.fz_lookup_noto_font( script, lang)
15188 font = None
15189 if data:
15190 font = mupdf.fz_new_font_from_memory( None, data, size, index, 0)
15191 if font.m_internal:
15192 return fertig(font)
15193 font = mupdf.fz_load_fallback_font( script, lang, is_serif, is_bold, is_italic)
15194 return fertig(font)
15195
15196
15197 def JM_get_fontbuffer(doc, xref):
15198 '''
15199 Return the contents of a font file, identified by xref
15200 '''
15201 if xref < 1:
15202 return
15203 o = mupdf.pdf_load_object(doc, xref)
15204 desft = mupdf.pdf_dict_get(o, PDF_NAME('DescendantFonts'))
15205 if desft.m_internal:
15206 obj = mupdf.pdf_resolve_indirect(mupdf.pdf_array_get(desft, 0))
15207 obj = mupdf.pdf_dict_get(obj, PDF_NAME('FontDescriptor'))
15208 else:
15209 obj = mupdf.pdf_dict_get(o, PDF_NAME('FontDescriptor'))
15210
15211 if not obj.m_internal:
15212 message(f"invalid font - FontDescriptor missing")
15213 return
15214
15215 o = obj
15216
15217 stream = None
15218
15219 obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile'))
15220 if obj.m_internal:
15221 stream = obj # ext = "pfa"
15222
15223 obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile2'))
15224 if obj.m_internal:
15225 stream = obj # ext = "ttf"
15226
15227 obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile3'))
15228 if obj.m_internal:
15229 stream = obj
15230
15231 obj = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype'))
15232 if obj.m_internal and not mupdf.pdf_is_name(obj):
15233 message("invalid font descriptor subtype")
15234 return
15235
15236 if mupdf.pdf_name_eq(obj, PDF_NAME('Type1C')):
15237 pass # Prev code did: ext = "cff", but this has no effect.
15238 elif mupdf.pdf_name_eq(obj, PDF_NAME('CIDFontType0C')):
15239 pass # Prev code did: ext = "cid", but this has no effect.
15240 elif mupdf.pdf_name_eq(obj, PDF_NAME('OpenType')):
15241 pass # Prev code did: ext = "otf", but this has no effect. */
15242 else:
15243 message('warning: unhandled font type {pdf_to_name(ctx, obj)!r}')
15244
15245 if not stream:
15246 message('warning: unhandled font type')
15247 return
15248
15249 return mupdf.pdf_load_stream(stream)
15250
15251
15252 def JM_get_resource_properties(ref):
15253 '''
15254 Return the items of Resources/Properties (used for Marked Content)
15255 Argument may be e.g. a page object or a Form XObject
15256 '''
15257 properties = mupdf.pdf_dict_getl(ref, PDF_NAME('Resources'), PDF_NAME('Properties'))
15258 if not properties.m_internal:
15259 return ()
15260 else:
15261 n = mupdf.pdf_dict_len(properties)
15262 if n < 1:
15263 return ()
15264 rc = []
15265 for i in range(n):
15266 key = mupdf.pdf_dict_get_key(properties, i)
15267 val = mupdf.pdf_dict_get_val(properties, i)
15268 c = mupdf.pdf_to_name(key)
15269 xref = mupdf.pdf_to_num(val)
15270 rc.append((c, xref))
15271 return rc
15272
15273
15274 def JM_get_widget_by_xref( page, xref):
15275 '''
15276 retrieve widget by its xref
15277 '''
15278 found = False
15279 annot = mupdf.pdf_first_widget( page)
15280 while annot.m_internal:
15281 annot_obj = mupdf.pdf_annot_obj( annot)
15282 if xref == mupdf.pdf_to_num( annot_obj):
15283 found = True
15284 break
15285 annot = mupdf.pdf_next_widget( annot)
15286 if not found:
15287 raise Exception( f"xref {xref} is not a widget of this page")
15288 return Annot( annot)
15289
15290
15291 def JM_get_widget_properties(annot, Widget):
15292 '''
15293 Populate a Python Widget object with the values from a PDF form field.
15294 Called by "Page.first_widget" and "Widget.next".
15295 '''
15296 #log( '{type(annot)=}')
15297 annot_obj = mupdf.pdf_annot_obj(annot.this)
15298 #log( 'Have called mupdf.pdf_annot_obj()')
15299 page = _pdf_annot_page(annot.this)
15300 pdf = page.doc()
15301 tw = annot
15302
15303 def SETATTR(key, value):
15304 setattr(Widget, key, value)
15305
15306 def SETATTR_DROP(mod, key, value):
15307 # Original C code for this function deletes if PyObject* is NULL. We
15308 # don't have a representation for that in Python - e.g. None is not
15309 # represented by NULL.
15310 setattr(mod, key, value)
15311
15312 #log( '=== + mupdf.pdf_widget_type(tw)')
15313 field_type = mupdf.pdf_widget_type(tw.this)
15314 #log( '=== - mupdf.pdf_widget_type(tw)')
15315 Widget.field_type = field_type
15316 if field_type == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
15317 if mupdf.pdf_signature_is_signed(pdf, annot_obj):
15318 SETATTR("is_signed", True)
15319 else:
15320 SETATTR("is_signed",False)
15321 else:
15322 SETATTR("is_signed", None)
15323 SETATTR_DROP(Widget, "border_style", JM_UnicodeFromStr(mupdf.pdf_field_border_style(annot_obj)))
15324 SETATTR_DROP(Widget, "field_type_string", JM_UnicodeFromStr(JM_field_type_text(field_type)))
15325
15326 field_name = mupdf.pdf_load_field_name(annot_obj)
15327 SETATTR_DROP(Widget, "field_name", field_name)
15328
15329 def pdf_dict_get_inheritable_nonempty_label(node, key):
15330 '''
15331 This is a modified version of MuPDF's pdf_dict_get_inheritable(), with
15332 some changes:
15333 * Returns string from pdf_to_text_string() or None if not found.
15334 * Recurses to parent if current node exists but with empty string
15335 value.
15336 '''
15337 slow = node
15338 halfbeat = 11 # Don't start moving slow pointer for a while.
15339 while 1:
15340 if not node.m_internal:
15341 return
15342 val = mupdf.pdf_dict_get(node, key)
15343 if val.m_internal:
15344 label = mupdf.pdf_to_text_string(val)
15345 if label:
15346 return label
15347 node = mupdf.pdf_dict_get(node, PDF_NAME('Parent'))
15348 if node.m_internal == slow.m_internal:
15349 raise Exception("cycle in resources")
15350 halfbeat -= 1
15351 if halfbeat == 0:
15352 slow = mupdf.pdf_dict_get(slow, PDF_NAME('Parent'))
15353 halfbeat = 2
15354
15355 # In order to address #3950, we use our modified pdf_dict_get_inheritable()
15356 # to ignore empty-string child values.
15357 label = pdf_dict_get_inheritable_nonempty_label(annot_obj, PDF_NAME('TU'))
15358 if label is not None:
15359 SETATTR_DROP(Widget, "field_label", label)
15360
15361 fvalue = None
15362 if field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
15363 obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Parent')) # owning RB group
15364 if obj.m_internal:
15365 SETATTR_DROP(Widget, "rb_parent", mupdf.pdf_to_num( obj))
15366 obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('AS'))
15367 if obj.m_internal:
15368 fvalue = mupdf.pdf_to_name(obj)
15369 if not fvalue:
15370 fvalue = mupdf.pdf_field_value(annot_obj)
15371 SETATTR_DROP(Widget, "field_value", JM_UnicodeFromStr(fvalue))
15372
15373 SETATTR_DROP(Widget, "field_display", mupdf.pdf_field_display(annot_obj))
15374
15375 border_width = mupdf.pdf_to_real(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('BS'), PDF_NAME('W')))
15376 if border_width == 0:
15377 border_width = 1
15378 SETATTR_DROP(Widget, "border_width", border_width)
15379
15380 obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('BS'), PDF_NAME('D'))
15381 if mupdf.pdf_is_array(obj):
15382 n = mupdf.pdf_array_len(obj)
15383 d = [0] * n
15384 for i in range(n):
15385 d[i] = mupdf.pdf_to_int(mupdf.pdf_array_get(obj, i))
15386 SETATTR_DROP(Widget, "border_dashes", d)
15387
15388 SETATTR_DROP(Widget, "text_maxlen", mupdf.pdf_text_widget_max_len(tw.this))
15389
15390 SETATTR_DROP(Widget, "text_format", mupdf.pdf_text_widget_format(tw.this))
15391
15392 obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('MK'), PDF_NAME('BG'))
15393 if mupdf.pdf_is_array(obj):
15394 n = mupdf.pdf_array_len(obj)
15395 col = [0] * n
15396 for i in range(n):
15397 col[i] = mupdf.pdf_to_real(mupdf.pdf_array_get(obj, i))
15398 SETATTR_DROP(Widget, "fill_color", col)
15399
15400 obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('MK'), PDF_NAME('BC'))
15401 if mupdf.pdf_is_array(obj):
15402 n = mupdf.pdf_array_len(obj)
15403 col = [0] * n
15404 for i in range(n):
15405 col[i] = mupdf.pdf_to_real(mupdf.pdf_array_get(obj, i))
15406 SETATTR_DROP(Widget, "border_color", col)
15407
15408 SETATTR_DROP(Widget, "choice_values", JM_choice_options(annot))
15409
15410 da = mupdf.pdf_to_text_string(mupdf.pdf_dict_get_inheritable(annot_obj, PDF_NAME('DA')))
15411 SETATTR_DROP(Widget, "_text_da", JM_UnicodeFromStr(da))
15412
15413 obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('MK'), PDF_NAME('CA'))
15414 if obj.m_internal:
15415 SETATTR_DROP(Widget, "button_caption", JM_UnicodeFromStr(mupdf.pdf_to_text_string(obj)))
15416
15417 SETATTR_DROP(Widget, "field_flags", mupdf.pdf_field_flags(annot_obj))
15418
15419 # call Py method to reconstruct text color, font name, size
15420 Widget._parse_da()
15421
15422 # extract JavaScript action texts
15423 s = mupdf.pdf_dict_get(annot_obj, PDF_NAME('A'))
15424 ss = JM_get_script(s)
15425 SETATTR_DROP(Widget, "script", ss)
15426
15427 SETATTR_DROP(Widget, "script_stroke",
15428 JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('K')))
15429 )
15430
15431 SETATTR_DROP(Widget, "script_format",
15432 JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('F')))
15433 )
15434
15435 SETATTR_DROP(Widget, "script_change",
15436 JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('V')))
15437 )
15438
15439 SETATTR_DROP(Widget, "script_calc",
15440 JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('C')))
15441 )
15442
15443 SETATTR_DROP(Widget, "script_blur",
15444 JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Bl')))
15445 )
15446
15447 SETATTR_DROP(Widget, "script_focus",
15448 JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Fo')))
15449 )
15450
15451
15452 def JM_get_fontextension(doc, xref):
15453 '''
15454 Return the file extension of a font file, identified by xref
15455 '''
15456 if xref < 1:
15457 return "n/a"
15458 o = mupdf.pdf_load_object(doc, xref)
15459 desft = mupdf.pdf_dict_get(o, PDF_NAME('DescendantFonts'))
15460 if desft.m_internal:
15461 obj = mupdf.pdf_resolve_indirect(mupdf.pdf_array_get(desft, 0))
15462 obj = mupdf.pdf_dict_get(obj, PDF_NAME('FontDescriptor'))
15463 else:
15464 obj = mupdf.pdf_dict_get(o, PDF_NAME('FontDescriptor'))
15465 if not obj.m_internal:
15466 return "n/a" # this is a base-14 font
15467
15468 o = obj # we have the FontDescriptor
15469
15470 obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile'))
15471 if obj.m_internal:
15472 return "pfa"
15473
15474 obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile2'))
15475 if obj.m_internal:
15476 return "ttf"
15477
15478 obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile3'))
15479 if obj.m_internal:
15480 obj = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype'))
15481 if obj.m_internal and not mupdf.pdf_is_name(obj):
15482 message("invalid font descriptor subtype")
15483 return "n/a"
15484 if mupdf.pdf_name_eq(obj, PDF_NAME('Type1C')):
15485 return "cff"
15486 elif mupdf.pdf_name_eq(obj, PDF_NAME('CIDFontType0C')):
15487 return "cid"
15488 elif mupdf.pdf_name_eq(obj, PDF_NAME('OpenType')):
15489 return "otf"
15490 else:
15491 message("unhandled font type '%s'", mupdf.pdf_to_name(obj))
15492
15493 return "n/a"
15494
15495
15496 def JM_get_ocg_arrays_imp(arr):
15497 '''
15498 Get OCG arrays from OC configuration
15499 Returns dict {"basestate":name, "on":list, "off":list, "rbg":list, "locked":list}
15500 '''
15501 list_ = list()
15502 if mupdf.pdf_is_array( arr):
15503 n = mupdf.pdf_array_len( arr)
15504 for i in range(n):
15505 obj = mupdf.pdf_array_get( arr, i)
15506 item = mupdf.pdf_to_num( obj)
15507 if item not in list_:
15508 list_.append(item)
15509 return list_
15510
15511
15512 def JM_get_ocg_arrays(conf):
15513
15514 rc = dict()
15515 arr = mupdf.pdf_dict_get( conf, PDF_NAME('ON'))
15516 list_ = JM_get_ocg_arrays_imp( arr)
15517 if list_:
15518 rc["on"] = list_
15519 arr = mupdf.pdf_dict_get( conf, PDF_NAME('OFF'))
15520 list_ = JM_get_ocg_arrays_imp( arr)
15521 if list_:
15522 rc["off"] = list_
15523 arr = mupdf.pdf_dict_get( conf, PDF_NAME('Locked'))
15524 list_ = JM_get_ocg_arrays_imp( arr)
15525 if list_:
15526 rc['locked'] = list_
15527 list_ = list()
15528 arr = mupdf.pdf_dict_get( conf, PDF_NAME('RBGroups'))
15529 if mupdf.pdf_is_array( arr):
15530 n = mupdf.pdf_array_len( arr)
15531 for i in range(n):
15532 obj = mupdf.pdf_array_get( arr, i)
15533 list1 = JM_get_ocg_arrays_imp( obj)
15534 list_.append(list1)
15535 if list_:
15536 rc["rbgroups"] = list_
15537 obj = mupdf.pdf_dict_get( conf, PDF_NAME('BaseState'))
15538
15539 if obj.m_internal:
15540 state = mupdf.pdf_to_name( obj)
15541 rc["basestate"] = state
15542 return rc
15543
15544
15545 def JM_get_page_labels(liste, nums):
15546 n = mupdf.pdf_array_len(nums)
15547 for i in range(0, n, 2):
15548 key = mupdf.pdf_resolve_indirect( mupdf.pdf_array_get(nums, i))
15549 pno = mupdf.pdf_to_int(key)
15550 val = mupdf.pdf_resolve_indirect( mupdf.pdf_array_get(nums, i + 1))
15551 res = JM_object_to_buffer(val, 1, 0)
15552 c = mupdf.fz_buffer_extract(res)
15553 assert isinstance(c, bytes)
15554 c = c.decode('utf-8')
15555 liste.append( (pno, c))
15556
15557
15558 def JM_get_script(key):
15559 '''
15560 JavaScript extractor
15561 Returns either the script source or None. Parameter is a PDF action
15562 dictionary, which must have keys /S and /JS. The value of /S must be
15563 '/JavaScript'. The value of /JS is returned.
15564 '''
15565 if not key.m_internal:
15566 return
15567
15568 j = mupdf.pdf_dict_get(key, PDF_NAME('S'))
15569 jj = mupdf.pdf_to_name(j)
15570 if jj == "JavaScript":
15571 js = mupdf.pdf_dict_get(key, PDF_NAME('JS'))
15572 if not js.m_internal:
15573 return
15574 else:
15575 return
15576
15577 if mupdf.pdf_is_string(js):
15578 script = JM_UnicodeFromStr(mupdf.pdf_to_text_string(js))
15579 elif mupdf.pdf_is_stream(js):
15580 res = mupdf.pdf_load_stream(js)
15581 script = JM_EscapeStrFromBuffer(res)
15582 else:
15583 return
15584 if script: # do not return an empty script
15585 return script
15586 return
15587
15588
15589 def JM_have_operation(pdf):
15590 '''
15591 Ensure valid journalling state
15592 '''
15593 if pdf.m_internal.journal and not mupdf.pdf_undoredo_step(pdf, 0):
15594 return 0
15595 return 1
15596
15597
15598 def JM_image_extension(type_):
15599 '''
15600 return extension for MuPDF image type
15601 '''
15602 if type_ == mupdf.FZ_IMAGE_FAX: return "fax"
15603 if type_ == mupdf.FZ_IMAGE_RAW: return "raw"
15604 if type_ == mupdf.FZ_IMAGE_FLATE: return "flate"
15605 if type_ == mupdf.FZ_IMAGE_LZW: return "lzw"
15606 if type_ == mupdf.FZ_IMAGE_RLD: return "rld"
15607 if type_ == mupdf.FZ_IMAGE_BMP: return "bmp"
15608 if type_ == mupdf.FZ_IMAGE_GIF: return "gif"
15609 if type_ == mupdf.FZ_IMAGE_JBIG2: return "jb2"
15610 if type_ == mupdf.FZ_IMAGE_JPEG: return "jpeg"
15611 if type_ == mupdf.FZ_IMAGE_JPX: return "jpx"
15612 if type_ == mupdf.FZ_IMAGE_JXR: return "jxr"
15613 if type_ == mupdf.FZ_IMAGE_PNG: return "png"
15614 if type_ == mupdf.FZ_IMAGE_PNM: return "pnm"
15615 if type_ == mupdf.FZ_IMAGE_TIFF: return "tiff"
15616 #if type_ == mupdf.FZ_IMAGE_PSD: return "psd"
15617 return "n/a"
15618
15619
15620 # fixme: need to avoid using a global for this.
15621 g_img_info = None
15622
15623
15624 def JM_image_filter(opaque, ctm, name, image):
15625 assert isinstance(ctm, mupdf.FzMatrix)
15626 r = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
15627 q = mupdf.fz_transform_quad( mupdf.fz_quad_from_rect(r), ctm)
15628 q = mupdf.fz_transform_quad( q, g_img_info_matrix)
15629 temp = name, JM_py_from_quad(q)
15630 g_img_info.append(temp)
15631
15632
15633 def JM_image_profile( imagedata, keep_image):
15634 '''
15635 Return basic properties of an image provided as bytes or bytearray
15636 The function creates an fz_image and optionally returns it.
15637 '''
15638 if not imagedata:
15639 return None # nothing given
15640
15641 len_ = len( imagedata)
15642 if len_ < 8:
15643 message( "bad image data")
15644 return None
15645 c = imagedata
15646 #log( 'calling mfz_recognize_image_format with {c!r=}')
15647 type_ = mupdf.fz_recognize_image_format( c)
15648 if type_ == mupdf.FZ_IMAGE_UNKNOWN:
15649 return None
15650
15651 if keep_image:
15652 res = mupdf.fz_new_buffer_from_copied_data( c, len_)
15653 else:
15654 res = mupdf.fz_new_buffer_from_shared_data( c, len_)
15655 image = mupdf.fz_new_image_from_buffer( res)
15656 ctm = mupdf.fz_image_orientation_matrix( image)
15657 xres, yres = mupdf.fz_image_resolution(image)
15658 orientation = mupdf.fz_image_orientation( image)
15659 cs_name = mupdf.fz_colorspace_name( image.colorspace())
15660 result = dict()
15661 result[ dictkey_width] = image.w()
15662 result[ dictkey_height] = image.h()
15663 result[ "orientation"] = orientation
15664 result[ dictkey_matrix] = JM_py_from_matrix(ctm)
15665 result[ dictkey_xres] = xres
15666 result[ dictkey_yres] = yres
15667 result[ dictkey_colorspace] = image.n()
15668 result[ dictkey_bpc] = image.bpc()
15669 result[ dictkey_ext] = JM_image_extension(type_)
15670 result[ dictkey_cs_name] = cs_name
15671
15672 if keep_image:
15673 result[ dictkey_image] = image
15674 return result
15675
15676
15677 def JM_image_reporter(page):
15678 doc = page.doc()
15679 global g_img_info_matrix
15680 g_img_info_matrix = mupdf.FzMatrix()
15681 mediabox = mupdf.FzRect()
15682 mupdf.pdf_page_transform(page, mediabox, g_img_info_matrix)
15683
15684 class SanitizeFilterOptions(mupdf.PdfSanitizeFilterOptions2):
15685 def __init__(self):
15686 super().__init__()
15687 self.use_virtual_image_filter()
15688 def image_filter(self, ctx, ctm, name, image, scissor):
15689 JM_image_filter(None, mupdf.FzMatrix(ctm), name, image)
15690
15691 sanitize_filter_options = SanitizeFilterOptions()
15692
15693 filter_options = _make_PdfFilterOptions(
15694 instance_forms=1,
15695 ascii=1,
15696 no_update=1,
15697 sanitize=1,
15698 sopts=sanitize_filter_options,
15699 )
15700
15701 global g_img_info
15702 g_img_info = []
15703
15704 mupdf.pdf_filter_page_contents( doc, page, filter_options)
15705
15706 rc = tuple(g_img_info)
15707 g_img_info = []
15708 return rc
15709
15710
15711 def JM_fitz_config():
15712 have_TOFU = not hasattr(mupdf, 'TOFU')
15713 have_TOFU_BASE14 = not hasattr(mupdf, 'TOFU_BASE14')
15714 have_TOFU_CJK = not hasattr(mupdf, 'TOFU_CJK')
15715 have_TOFU_CJK_EXT = not hasattr(mupdf, 'TOFU_CJK_EXT')
15716 have_TOFU_CJK_LANG = not hasattr(mupdf, 'TOFU_CJK_LANG')
15717 have_TOFU_EMOJI = not hasattr(mupdf, 'TOFU_EMOJI')
15718 have_TOFU_HISTORIC = not hasattr(mupdf, 'TOFU_HISTORIC')
15719 have_TOFU_SIL = not hasattr(mupdf, 'TOFU_SIL')
15720 have_TOFU_SYMBOL = not hasattr(mupdf, 'TOFU_SYMBOL')
15721
15722 ret = dict()
15723 ret["base14"] = have_TOFU_BASE14
15724 ret["cbz"] = bool(mupdf.FZ_ENABLE_CBZ)
15725 ret["epub"] = bool(mupdf.FZ_ENABLE_EPUB)
15726 ret["html"] = bool(mupdf.FZ_ENABLE_HTML)
15727 ret["icc"] = bool(mupdf.FZ_ENABLE_ICC)
15728 ret["img"] = bool(mupdf.FZ_ENABLE_IMG)
15729 ret["jpx"] = bool(mupdf.FZ_ENABLE_JPX)
15730 ret["js"] = bool(mupdf.FZ_ENABLE_JS)
15731 ret["pdf"] = bool(mupdf.FZ_ENABLE_PDF)
15732 ret["plotter-cmyk"] = bool(mupdf.FZ_PLOTTERS_CMYK)
15733 ret["plotter-g"] = bool(mupdf.FZ_PLOTTERS_G)
15734 ret["plotter-n"] = bool(mupdf.FZ_PLOTTERS_N)
15735 ret["plotter-rgb"] = bool(mupdf.FZ_PLOTTERS_RGB)
15736 ret["py-memory"] = bool(JM_MEMORY)
15737 ret["svg"] = bool(mupdf.FZ_ENABLE_SVG)
15738 ret["tofu"] = have_TOFU
15739 ret["tofu-cjk"] = have_TOFU_CJK
15740 ret["tofu-cjk-ext"] = have_TOFU_CJK_EXT
15741 ret["tofu-cjk-lang"] = have_TOFU_CJK_LANG
15742 ret["tofu-emoji"] = have_TOFU_EMOJI
15743 ret["tofu-historic"] = have_TOFU_HISTORIC
15744 ret["tofu-sil"] = have_TOFU_SIL
15745 ret["tofu-symbol"] = have_TOFU_SYMBOL
15746 ret["xps"] = bool(mupdf.FZ_ENABLE_XPS)
15747 return ret
15748
15749
15750 def JM_insert_contents(pdf, pageref, newcont, overlay):
15751 '''
15752 Insert a buffer as a new separate /Contents object of a page.
15753 1. Create a new stream object from buffer 'newcont'
15754 2. If /Contents already is an array, then just prepend or append this object
15755 3. Else, create new array and put old content obj and this object into it.
15756 If the page had no /Contents before, just create a 1-item array.
15757 '''
15758 contents = mupdf.pdf_dict_get(pageref, PDF_NAME('Contents'))
15759 newconts = mupdf.pdf_add_stream(pdf, newcont, mupdf.PdfObj(), 0)
15760 xref = mupdf.pdf_to_num(newconts)
15761 if mupdf.pdf_is_array(contents):
15762 if overlay: # append new object
15763 mupdf.pdf_array_push(contents, newconts)
15764 else: # prepend new object
15765 mupdf.pdf_array_insert(contents, newconts, 0)
15766 else:
15767 carr = mupdf.pdf_new_array(pdf, 5)
15768 if overlay:
15769 if contents.m_internal:
15770 mupdf.pdf_array_push(carr, contents)
15771 mupdf.pdf_array_push(carr, newconts)
15772 else:
15773 mupdf.pdf_array_push(carr, newconts)
15774 if contents.m_internal:
15775 mupdf.pdf_array_push(carr, contents)
15776 mupdf.pdf_dict_put(pageref, PDF_NAME('Contents'), carr)
15777 return xref
15778
15779
15780 def JM_insert_font(pdf, bfname, fontfile, fontbuffer, set_simple, idx, wmode, serif, encoding, ordering):
15781 '''
15782 Insert a font in a PDF
15783 '''
15784 font = None
15785 res = None
15786 data = None
15787 ixref = 0
15788 index = 0
15789 simple = 0
15790 value=None
15791 name=None
15792 subt=None
15793 exto = None
15794
15795 ENSURE_OPERATION(pdf)
15796 # check for CJK font
15797 if ordering > -1:
15798 data, size, index = mupdf.fz_lookup_cjk_font(ordering)
15799 if data:
15800 font = mupdf.fz_new_font_from_memory(None, data, size, index, 0)
15801 font_obj = mupdf.pdf_add_cjk_font(pdf, font, ordering, wmode, serif)
15802 exto = "n/a"
15803 simple = 0
15804 #goto weiter;
15805 else:
15806
15807 # check for PDF Base-14 font
15808 if bfname:
15809 data, size = mupdf.fz_lookup_base14_font(bfname)
15810 if data:
15811 font = mupdf.fz_new_font_from_memory(bfname, data, size, 0, 0)
15812 font_obj = mupdf.pdf_add_simple_font(pdf, font, encoding)
15813 exto = "n/a"
15814 simple = 1
15815 #goto weiter;
15816
15817 else:
15818 if fontfile:
15819 font = mupdf.fz_new_font_from_file(None, fontfile, idx, 0)
15820 else:
15821 res = JM_BufferFromBytes(fontbuffer)
15822 if not res.m_internal:
15823 RAISEPY(MSG_FILE_OR_BUFFER, PyExc_ValueError)
15824 font = mupdf.fz_new_font_from_buffer(None, res, idx, 0)
15825
15826 if not set_simple:
15827 font_obj = mupdf.pdf_add_cid_font(pdf, font)
15828 simple = 0
15829 else:
15830 font_obj = mupdf.pdf_add_simple_font(pdf, font, encoding)
15831 simple = 2
15832 #weiter: ;
15833 ixref = mupdf.pdf_to_num(font_obj)
15834 name = JM_EscapeStrFromStr( mupdf.pdf_to_name( mupdf.pdf_dict_get(font_obj, PDF_NAME('BaseFont'))))
15835
15836 subt = JM_UnicodeFromStr( mupdf.pdf_to_name( mupdf.pdf_dict_get( font_obj, PDF_NAME('Subtype'))))
15837
15838 if not exto:
15839 exto = JM_UnicodeFromStr(JM_get_fontextension(pdf, ixref))
15840
15841 asc = mupdf.fz_font_ascender(font)
15842 dsc = mupdf.fz_font_descender(font)
15843 value = [
15844 ixref,
15845 {
15846 "name": name, # base font name
15847 "type": subt, # subtype
15848 "ext": exto, # file extension
15849 "simple": bool(simple), # simple font?
15850 "ordering": ordering, # CJK font?
15851 "ascender": asc,
15852 "descender": dsc,
15853 },
15854 ]
15855 return value
15856
15857 def JM_irect_from_py(r):
15858 '''
15859 PySequence to mupdf.FzIrect. Default: infinite irect
15860 '''
15861 if isinstance(r, mupdf.FzIrect):
15862 return r
15863 if isinstance(r, IRect):
15864 r = mupdf.FzIrect( r.x0, r.y0, r.x1, r.y1)
15865 return r
15866 if isinstance(r, Rect):
15867 ret = mupdf.FzRect(r.x0, r.y0, r.x1, r.y1)
15868 ret = mupdf.FzIrect(ret) # Uses fz_irect_from_rect().
15869 return ret
15870 if isinstance(r, mupdf.FzRect):
15871 ret = mupdf.FzIrect(r) # Uses fz_irect_from_rect().
15872 return ret
15873 if not r or not PySequence_Check(r) or PySequence_Size(r) != 4:
15874 return mupdf.FzIrect(mupdf.fz_infinite_irect)
15875 f = [0, 0, 0, 0]
15876 for i in range(4):
15877 f[i] = r[i]
15878 if f[i] is None:
15879 return mupdf.FzIrect(mupdf.fz_infinite_irect)
15880 if f[i] < FZ_MIN_INF_RECT:
15881 f[i] = FZ_MIN_INF_RECT
15882 if f[i] > FZ_MAX_INF_RECT:
15883 f[i] = FZ_MAX_INF_RECT
15884 return mupdf.fz_make_irect(f[0], f[1], f[2], f[3])
15885
15886 def JM_listbox_value( annot):
15887 '''
15888 ListBox retrieve value
15889 '''
15890 # may be single value or array
15891 annot_obj = mupdf.pdf_annot_obj( annot)
15892 optarr = mupdf.pdf_dict_get( annot_obj, PDF_NAME('V'))
15893 if mupdf.pdf_is_string( optarr): # a single string
15894 return mupdf.pdf_to_text_string( optarr)
15895
15896 # value is an array (may have len 0)
15897 n = mupdf.pdf_array_len( optarr)
15898 liste = []
15899
15900 # extract a list of strings
15901 # each entry may again be an array: take second entry then
15902 for i in range( n):
15903 elem = mupdf.pdf_array_get( optarr, i)
15904 if mupdf.pdf_is_array( elem):
15905 elem = mupdf.pdf_array_get( elem, 1)
15906 liste.append( JM_UnicodeFromStr( mupdf.pdf_to_text_string( elem)))
15907 return liste
15908
15909
15910 def JM_make_annot_DA(annot, ncol, col, fontname, fontsize):
15911 # PyMuPDF uses a fz_buffer to build up the string, but it's non-trivial to
15912 # convert the fz_buffer's `unsigned char*` into a `const char*` suitable
15913 # for passing to pdf_dict_put_text_string(). So instead we build up the
15914 # string directly in Python.
15915 buf = ''
15916 if ncol < 1:
15917 buf += f'0 g '
15918 elif ncol == 1:
15919 buf += f'{col[0]:g} g '
15920 elif ncol == 2:
15921 assert 0
15922 elif ncol == 3:
15923 buf += f'{col[0]:g} {col[1]:g} {col[2]:g} rg '
15924 else:
15925 buf += f'{col[0]:g} {col[1]:g} {col[2]:g} {col[3]:g} k '
15926 buf += f'/{JM_expand_fname(fontname)} {fontsize} Tf'
15927 mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), mupdf.PDF_ENUM_NAME_DA, buf)
15928
15929
15930 def JM_make_spanlist(line_dict, line, raw, buff, tp_rect):
15931 if g_use_extra:
15932 return extra.JM_make_spanlist(line_dict, line, raw, buff, tp_rect)
15933 char_list = None
15934 span_list = []
15935 mupdf.fz_clear_buffer(buff)
15936 span_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
15937 line_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
15938
15939 class char_style:
15940 def __init__(self, rhs=None):
15941 if rhs:
15942 self.size = rhs.size
15943 self.flags = rhs.flags
15944 if mupdf_version_tuple >= (1, 25, 2):
15945 self.char_flags = rhs.char_flags
15946 self.font = rhs.font
15947 self.argb = rhs.argb
15948 self.asc = rhs.asc
15949 self.desc = rhs.desc
15950 self.bidi = rhs.bidi
15951 else:
15952 self.size = -1
15953 self.flags = -1
15954 if mupdf_version_tuple >= (1, 25, 2):
15955 self.char_flags = -1
15956 self.font = ''
15957 self.argb = -1
15958 self.asc = 0
15959 self.desc = 0
15960 self.bidi = 0
15961 def __str__(self):
15962 ret = f'{self.size} {self.flags}'
15963 if mupdf_version_tuple >= (1, 25, 2):
15964 ret += f' {self.char_flags}'
15965 ret += f' {self.font} {self.color} {self.asc} {self.desc}'
15966 return ret
15967
15968 old_style = char_style()
15969 style = char_style()
15970 span = None
15971 span_origin = None
15972
15973 for ch in line:
15974 # start-trace
15975 r = JM_char_bbox(line, ch)
15976 if (not JM_rects_overlap(tp_rect, r)
15977 and not mupdf.fz_is_infinite_rect(tp_rect)
15978 ):
15979 continue
15980
15981 # Info from:
15982 # detect_super_script()
15983 # fz_font_is_italic()
15984 # fz_font_is_serif()
15985 # fz_font_is_monospaced()
15986 # fz_font_is_bold()
15987
15988 flags = JM_char_font_flags(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)), line, ch)
15989 origin = mupdf.FzPoint(ch.m_internal.origin)
15990 style.size = ch.m_internal.size
15991 style.flags = flags
15992 if mupdf_version_tuple >= (1, 25, 2):
15993 # FZ_STEXT_SYNTHETIC is per-char, not per-span.
15994 style.char_flags = ch.m_internal.flags & ~mupdf.FZ_STEXT_SYNTHETIC
15995 style.font = JM_font_name(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
15996 style.argb = ch.m_internal.argb
15997 style.asc = JM_font_ascender(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
15998 style.desc = JM_font_descender(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
15999 style.bidi = ch.m_internal.bidi
16000
16001 if (style.size != old_style.size
16002 or style.flags != old_style.flags
16003 or (mupdf_version_tuple >= (1, 25, 2)
16004 and (style.char_flags != old_style.char_flags)
16005 )
16006 or style.argb != old_style.argb
16007 or style.font != old_style.font
16008 or style.bidi != old_style.bidi
16009 ):
16010 if old_style.size >= 0:
16011 # not first one, output previous
16012 if raw:
16013 # put character list in the span
16014 span[dictkey_chars] = char_list
16015 char_list = None
16016 else:
16017 # put text string in the span
16018 span[dictkey_text] = JM_EscapeStrFromBuffer( buff)
16019 mupdf.fz_clear_buffer(buff)
16020
16021 span[dictkey_origin] = JM_py_from_point(span_origin)
16022 span[dictkey_bbox] = JM_py_from_rect(span_rect)
16023 line_rect = mupdf.fz_union_rect(line_rect, span_rect)
16024 span_list.append( span)
16025 span = None
16026
16027 span = dict()
16028 asc = style.asc
16029 desc = style.desc
16030 if style.asc < 1e-3:
16031 asc = 0.9
16032 desc = -0.1
16033
16034 span[dictkey_size] = style.size
16035 span[dictkey_flags] = style.flags
16036 span[dictkey_bidi] = style.bidi
16037 if mupdf_version_tuple >= (1, 25, 2):
16038 span[dictkey_char_flags] = style.char_flags
16039 span[dictkey_font] = JM_EscapeStrFromStr(style.font)
16040 span[dictkey_color] = style.argb & 0xffffff
16041 if mupdf_version_tuple >= (1, 25, 0):
16042 span['alpha'] = style.argb >> 24
16043 span["ascender"] = asc
16044 span["descender"] = desc
16045
16046 # Need to be careful here - doing 'old_style=style' does a shallow
16047 # copy, but we need to keep old_style as a distinct instance.
16048 old_style = char_style(style)
16049 span_rect = r
16050 span_origin = origin
16051
16052 span_rect = mupdf.fz_union_rect(span_rect, r)
16053
16054 if raw: # make and append a char dict
16055 char_dict = dict()
16056 char_dict[dictkey_origin] = JM_py_from_point( ch.m_internal.origin)
16057 char_dict[dictkey_bbox] = JM_py_from_rect(r)
16058 char_dict[dictkey_c] = chr(ch.m_internal.c)
16059 char_dict['synthetic'] = bool(ch.m_internal.flags & mupdf.FZ_STEXT_SYNTHETIC)
16060
16061 if char_list is None:
16062 char_list = []
16063 char_list.append(char_dict)
16064 else: # add character byte to buffer
16065 JM_append_rune(buff, ch.m_internal.c)
16066
16067 # all characters processed, now flush remaining span
16068 if span:
16069 if raw:
16070 span[dictkey_chars] = char_list
16071 char_list = None
16072 else:
16073 span[dictkey_text] = JM_EscapeStrFromBuffer(buff)
16074 mupdf.fz_clear_buffer(buff)
16075 span[dictkey_origin] = JM_py_from_point(span_origin)
16076 span[dictkey_bbox] = JM_py_from_rect(span_rect)
16077
16078 if not mupdf.fz_is_empty_rect(span_rect):
16079 span_list.append(span)
16080 line_rect = mupdf.fz_union_rect(line_rect, span_rect)
16081 span = None
16082 if not mupdf.fz_is_empty_rect(line_rect):
16083 line_dict[dictkey_spans] = span_list
16084 else:
16085 line_dict[dictkey_spans] = span_list
16086 return line_rect
16087
16088 def _make_image_dict(img, img_dict):
16089 """Populate a dictionary with information extracted from a given image.
16090
16091 Used by 'Document.extract_image' and by 'JM_make_image_block'.
16092 Both of these functions will add some more specific information.
16093 """
16094 img_type = img.fz_compressed_image_type()
16095 ext = JM_image_extension(img_type)
16096
16097 # compressed image buffer if present, else None
16098 ll_cbuf = mupdf.ll_fz_compressed_image_buffer(img.m_internal)
16099
16100 if (0
16101 or not ll_cbuf
16102 or img_type in (mupdf.FZ_IMAGE_JBIG2, mupdf.FZ_IMAGE_UNKNOWN)
16103 or img_type < mupdf.FZ_IMAGE_BMP
16104 ):
16105 # not an image with a compressed buffer: convert to PNG
16106 res = mupdf.fz_new_buffer_from_image_as_png(
16107 img,
16108 mupdf.FzColorParams(mupdf.fz_default_color_params),
16109 )
16110 ext = "png"
16111 elif ext == "jpeg" and img.n() == 4:
16112 # JPEG with CMYK: invert colors
16113 res = mupdf.fz_new_buffer_from_image_as_jpeg(
16114 img, mupdf.FzColorParams(mupdf.fz_default_color_params), 95, 1)
16115 else:
16116 # copy the compressed buffer
16117 res = mupdf.FzBuffer(mupdf.ll_fz_keep_buffer(ll_cbuf.buffer))
16118
16119 bytes_ = JM_BinFromBuffer(res)
16120 img_dict[dictkey_width] = img.w()
16121 img_dict[dictkey_height] = img.h()
16122 img_dict[dictkey_ext] = ext
16123 img_dict[dictkey_colorspace] = img.n()
16124 img_dict[dictkey_xres] = img.xres()
16125 img_dict[dictkey_yres] = img.yres()
16126 img_dict[dictkey_bpc] = img.bpc()
16127 img_dict[dictkey_size] = len(bytes_)
16128 img_dict[dictkey_image] = bytes_
16129
16130 def JM_make_image_block(block, block_dict):
16131 img = block.i_image()
16132 _make_image_dict(img, block_dict)
16133 # if the image has a mask, store it as a PNG buffer
16134 mask = img.mask()
16135 if mask.m_internal:
16136 buff = mask.fz_new_buffer_from_image_as_png(mupdf.FzColorParams(mupdf.fz_default_color_params))
16137 block_dict["mask"] = buff.fz_buffer_extract()
16138 else:
16139 block_dict["mask"] = None
16140 block_dict[dictkey_matrix] = JM_py_from_matrix(block.i_transform())
16141
16142
16143 def JM_make_text_block(block, block_dict, raw, buff, tp_rect):
16144 if g_use_extra:
16145 return extra.JM_make_text_block(block.m_internal, block_dict, raw, buff.m_internal, tp_rect.m_internal)
16146 line_list = []
16147 block_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
16148 #log(f'{block=}')
16149 for line in block:
16150 #log(f'{line=}')
16151 if (mupdf.fz_is_empty_rect(mupdf.fz_intersect_rect(tp_rect, mupdf.FzRect(line.m_internal.bbox)))
16152 and not mupdf.fz_is_infinite_rect(tp_rect)
16153 ):
16154 continue
16155 line_dict = dict()
16156 line_rect = JM_make_spanlist(line_dict, line, raw, buff, tp_rect)
16157 block_rect = mupdf.fz_union_rect(block_rect, line_rect)
16158 line_dict[dictkey_wmode] = line.m_internal.wmode
16159 line_dict[dictkey_dir] = JM_py_from_point(line.m_internal.dir)
16160 line_dict[dictkey_bbox] = JM_py_from_rect(line_rect)
16161 line_list.append(line_dict)
16162 block_dict[dictkey_bbox] = JM_py_from_rect(block_rect)
16163 block_dict[dictkey_lines] = line_list
16164
16165
16166 def JM_make_textpage_dict(tp, page_dict, raw):
16167 if g_use_extra:
16168 return extra.JM_make_textpage_dict(tp.m_internal, page_dict, raw)
16169 text_buffer = mupdf.fz_new_buffer(128)
16170 block_list = []
16171 tp_rect = mupdf.FzRect(tp.m_internal.mediabox)
16172 block_n = -1
16173 #log( 'JM_make_textpage_dict {=tp}')
16174 for block in tp:
16175 block_n += 1
16176 if (not mupdf.fz_contains_rect(tp_rect, mupdf.FzRect(block.m_internal.bbox))
16177 and not mupdf.fz_is_infinite_rect(tp_rect)
16178 and block.m_internal.type == mupdf.FZ_STEXT_BLOCK_IMAGE
16179 ):
16180 continue
16181 if (not mupdf.fz_is_infinite_rect(tp_rect)
16182 and mupdf.fz_is_empty_rect(mupdf.fz_intersect_rect(tp_rect, mupdf.FzRect(block.m_internal.bbox)))
16183 ):
16184 continue
16185
16186 block_dict = dict()
16187 block_dict[dictkey_number] = block_n
16188 block_dict[dictkey_type] = block.m_internal.type
16189 if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_IMAGE:
16190 block_dict[dictkey_bbox] = JM_py_from_rect(block.m_internal.bbox)
16191 JM_make_image_block(block, block_dict)
16192 else:
16193 JM_make_text_block(block, block_dict, raw, text_buffer, tp_rect)
16194
16195 block_list.append(block_dict)
16196 page_dict[dictkey_blocks] = block_list
16197
16198
16199 def JM_matrix_from_py(m):
16200 a = [0, 0, 0, 0, 0, 0]
16201 if isinstance(m, mupdf.FzMatrix):
16202 return m
16203 if isinstance(m, Matrix):
16204 return mupdf.FzMatrix(m.a, m.b, m.c, m.d, m.e, m.f)
16205 if not m or not PySequence_Check(m) or PySequence_Size(m) != 6:
16206 return mupdf.FzMatrix()
16207 for i in range(6):
16208 a[i] = JM_FLOAT_ITEM(m, i)
16209 if a[i] is None:
16210 return mupdf.FzRect()
16211 return mupdf.FzMatrix(a[0], a[1], a[2], a[3], a[4], a[5])
16212
16213
16214 def JM_mediabox(page_obj):
16215 '''
16216 return a PDF page's MediaBox
16217 '''
16218 page_mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
16219 mediabox = mupdf.pdf_to_rect(
16220 mupdf.pdf_dict_get_inheritable(page_obj, PDF_NAME('MediaBox'))
16221 )
16222 if mupdf.fz_is_empty_rect(mediabox) or mupdf.fz_is_infinite_rect(mediabox):
16223 mediabox.x0 = 0
16224 mediabox.y0 = 0
16225 mediabox.x1 = 612
16226 mediabox.y1 = 792
16227
16228 page_mediabox = mupdf.FzRect(
16229 mupdf.fz_min(mediabox.x0, mediabox.x1),
16230 mupdf.fz_min(mediabox.y0, mediabox.y1),
16231 mupdf.fz_max(mediabox.x0, mediabox.x1),
16232 mupdf.fz_max(mediabox.y0, mediabox.y1),
16233 )
16234
16235 if (page_mediabox.x1 - page_mediabox.x0 < 1
16236 or page_mediabox.y1 - page_mediabox.y0 < 1
16237 ):
16238 page_mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
16239
16240 return page_mediabox
16241
16242
16243 def JM_merge_range(
16244 doc_des,
16245 doc_src,
16246 spage,
16247 epage,
16248 apage,
16249 rotate,
16250 links,
16251 annots,
16252 show_progress,
16253 graft_map,
16254 ):
16255 '''
16256 Copy a range of pages (spage, epage) from a source PDF to a specified
16257 location (apage) of the target PDF.
16258 If spage > epage, the sequence of source pages is reversed.
16259 '''
16260 if g_use_extra:
16261 return extra.JM_merge_range(
16262 doc_des,
16263 doc_src,
16264 spage,
16265 epage,
16266 apage,
16267 rotate,
16268 links,
16269 annots,
16270 show_progress,
16271 graft_map,
16272 )
16273 afterpage = apage
16274 counter = 0 # copied pages counter
16275 total = mupdf.fz_absi(epage - spage) + 1 # total pages to copy
16276
16277 if spage < epage:
16278 page = spage
16279 while page <= epage:
16280 page_merge(doc_des, doc_src, page, afterpage, rotate, links, annots, graft_map)
16281 counter += 1
16282 if show_progress > 0 and counter % show_progress == 0:
16283 message(f"Inserted {counter} of {total} pages.")
16284 page += 1
16285 afterpage += 1
16286 else:
16287 page = spage
16288 while page >= epage:
16289 page_merge(doc_des, doc_src, page, afterpage, rotate, links, annots, graft_map)
16290 counter += 1
16291 if show_progress > 0 and counter % show_progress == 0:
16292 message(f"Inserted {counter} of {total} pages.")
16293 page -= 1
16294 afterpage += 1
16295
16296
16297 def JM_merge_resources( page, temp_res):
16298 '''
16299 Merge the /Resources object created by a text pdf device into the page.
16300 The device may have created multiple /ExtGState/Alp? and /Font/F? objects.
16301 These need to be renamed (renumbered) to not overwrite existing page
16302 objects from previous executions.
16303 Returns the next available numbers n, m for objects /Alp<n>, /F<m>.
16304 '''
16305 # page objects /Resources, /Resources/ExtGState, /Resources/Font
16306 resources = mupdf.pdf_dict_get(page.obj(), PDF_NAME('Resources'))
16307 if not resources.m_internal:
16308 resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME('Resources'), 5)
16309 main_extg = mupdf.pdf_dict_get(resources, PDF_NAME('ExtGState'))
16310 main_fonts = mupdf.pdf_dict_get(resources, PDF_NAME('Font'))
16311
16312 # text pdf device objects /ExtGState, /Font
16313 temp_extg = mupdf.pdf_dict_get(temp_res, PDF_NAME('ExtGState'))
16314 temp_fonts = mupdf.pdf_dict_get(temp_res, PDF_NAME('Font'))
16315
16316 max_alp = -1
16317 max_fonts = -1
16318
16319 # Handle /Alp objects
16320 if mupdf.pdf_is_dict(temp_extg): # any created at all?
16321 n = mupdf.pdf_dict_len(temp_extg)
16322 if mupdf.pdf_is_dict(main_extg): # does page have /ExtGState yet?
16323 for i in range(mupdf.pdf_dict_len(main_extg)):
16324 # get highest number of objects named /Alpxxx
16325 alp = mupdf.pdf_to_name( mupdf.pdf_dict_get_key(main_extg, i))
16326 if not alp.startswith('Alp'):
16327 continue
16328 j = mupdf.fz_atoi(alp[3:])
16329 if j > max_alp:
16330 max_alp = j
16331 else: # create a /ExtGState for the page
16332 main_extg = mupdf.pdf_dict_put_dict(resources, PDF_NAME('ExtGState'), n)
16333
16334 max_alp += 1
16335 for i in range(n): # copy over renumbered /Alp objects
16336 alp = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( temp_extg, i))
16337 j = mupdf.fz_atoi(alp[3:]) + max_alp
16338 text = f'Alp{j}'
16339 val = mupdf.pdf_dict_get_val( temp_extg, i)
16340 mupdf.pdf_dict_puts(main_extg, text, val)
16341
16342 if mupdf.pdf_is_dict(main_fonts): # has page any fonts yet?
16343 for i in range(mupdf.pdf_dict_len(main_fonts)): # get max font number
16344 font = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( main_fonts, i))
16345 if not font.startswith("F"):
16346 continue
16347 j = mupdf.fz_atoi(font[1:])
16348 if j > max_fonts:
16349 max_fonts = j
16350 else: # create a Resources/Font for the page
16351 main_fonts = mupdf.pdf_dict_put_dict(resources, PDF_NAME('Font'), 2)
16352
16353 max_fonts += 1
16354 for i in range(mupdf.pdf_dict_len(temp_fonts)): # copy renumbered fonts
16355 font = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( temp_fonts, i))
16356 j = mupdf.fz_atoi(font[1:]) + max_fonts
16357 text = f'F{j}'
16358 val = mupdf.pdf_dict_get_val(temp_fonts, i)
16359 mupdf.pdf_dict_puts(main_fonts, text, val)
16360 return (max_alp, max_fonts) # next available numbers
16361
16362
16363 def JM_mupdf_warning( text):
16364 '''
16365 redirect MuPDF warnings
16366 '''
16367 JM_mupdf_warnings_store.append(text)
16368 if JM_mupdf_show_warnings:
16369 message(f'MuPDF warning: {text}')
16370
16371
16372 def JM_mupdf_error( text):
16373 JM_mupdf_warnings_store.append(text)
16374 if JM_mupdf_show_errors:
16375 message(f'MuPDF error: {text}\n')
16376
16377
16378 def JM_new_bbox_device(rc, inc_layers):
16379 assert isinstance(rc, list)
16380 return JM_new_bbox_device_Device( rc, inc_layers)
16381
16382
16383 def JM_new_buffer_from_stext_page(page):
16384 '''
16385 make a buffer from an stext_page's text
16386 '''
16387 assert isinstance(page, mupdf.FzStextPage)
16388 rect = mupdf.FzRect(page.m_internal.mediabox)
16389 buf = mupdf.fz_new_buffer(256)
16390 for block in page:
16391 if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
16392 for line in block:
16393 for ch in line:
16394 if (not JM_rects_overlap(rect, JM_char_bbox(line, ch))
16395 and not mupdf.fz_is_infinite_rect(rect)
16396 ):
16397 continue
16398 mupdf.fz_append_rune(buf, ch.m_internal.c)
16399 mupdf.fz_append_byte(buf, ord('\n'))
16400 mupdf.fz_append_byte(buf, ord('\n'))
16401 return buf
16402
16403
16404 def JM_new_javascript(pdf, value):
16405 '''
16406 make new PDF action object from JavaScript source
16407 Parameters are a PDF document and a Python string.
16408 Returns a PDF action object.
16409 '''
16410 if value is None:
16411 # no argument given
16412 return
16413 data = JM_StrAsChar(value)
16414 if data is None:
16415 # not convertible to char*
16416 return
16417
16418 res = mupdf.fz_new_buffer_from_copied_data(data.encode('utf8'))
16419 source = mupdf.pdf_add_stream(pdf, res, mupdf.PdfObj(), 0)
16420 newaction = mupdf.pdf_add_new_dict(pdf, 4)
16421 mupdf.pdf_dict_put(newaction, PDF_NAME('S'), mupdf.pdf_new_name('JavaScript'))
16422 mupdf.pdf_dict_put(newaction, PDF_NAME('JS'), source)
16423 return newaction
16424
16425
16426 def JM_new_output_fileptr(bio):
16427 return JM_new_output_fileptr_Output( bio)
16428
16429
16430 def JM_norm_rotation(rotate):
16431 '''
16432 # return normalized /Rotate value:one of 0, 90, 180, 270
16433 '''
16434 while rotate < 0:
16435 rotate += 360
16436 while rotate >= 360:
16437 rotate -= 360
16438 if rotate % 90 != 0:
16439 return 0
16440 return rotate
16441
16442
16443 def JM_object_to_buffer(what, compress, ascii):
16444 res = mupdf.fz_new_buffer(512)
16445 out = mupdf.FzOutput(res)
16446 mupdf.pdf_print_obj(out, what, compress, ascii)
16447 out.fz_close_output()
16448 mupdf.fz_terminate_buffer(res)
16449 return res
16450
16451
16452 def JM_outline_xrefs(obj, xrefs):
16453 '''
16454 Return list of outline xref numbers. Recursive function. Arguments:
16455 'obj' first OL item
16456 'xrefs' empty Python list
16457 '''
16458 if not obj.m_internal:
16459 return xrefs
16460 thisobj = obj
16461 while thisobj.m_internal:
16462 newxref = mupdf.pdf_to_num( thisobj)
16463 if newxref in xrefs or mupdf.pdf_dict_get( thisobj, PDF_NAME('Type')).m_internal:
16464 # circular ref or top of chain: terminate
16465 break
16466 xrefs.append( newxref)
16467 first = mupdf.pdf_dict_get( thisobj, PDF_NAME('First')) # try go down
16468 if mupdf.pdf_is_dict( first):
16469 xrefs = JM_outline_xrefs( first, xrefs)
16470 thisobj = mupdf.pdf_dict_get( thisobj, PDF_NAME('Next')) # try go next
16471 parent = mupdf.pdf_dict_get( thisobj, PDF_NAME('Parent')) # get parent
16472 if not mupdf.pdf_is_dict( thisobj):
16473 thisobj = parent
16474 return xrefs
16475
16476
16477 def JM_page_rotation(page):
16478 '''
16479 return a PDF page's /Rotate value: one of (0, 90, 180, 270)
16480 '''
16481 rotate = 0
16482
16483 obj = mupdf.pdf_dict_get_inheritable( page.obj(), mupdf.PDF_ENUM_NAME_Rotate)
16484 rotate = mupdf.pdf_to_int(obj)
16485 rotate = JM_norm_rotation(rotate)
16486 return rotate
16487
16488
16489 def JM_pdf_obj_from_str(doc, src):
16490 '''
16491 create PDF object from given string (new in v1.14.0: MuPDF dropped it)
16492 '''
16493 # fixme: seems inefficient to convert to bytes instance then make another
16494 # copy inside fz_new_buffer_from_copied_data(), but no other way?
16495 #
16496 buffer_ = mupdf.fz_new_buffer_from_copied_data(bytes(src, 'utf8'))
16497 stream = mupdf.fz_open_buffer(buffer_)
16498 lexbuf = mupdf.PdfLexbuf(mupdf.PDF_LEXBUF_SMALL)
16499 result = mupdf.pdf_parse_stm_obj(doc, stream, lexbuf)
16500 return result
16501
16502
16503 def JM_pixmap_from_display_list(
16504 list_,
16505 ctm,
16506 cs,
16507 alpha,
16508 clip,
16509 seps,
16510 ):
16511 '''
16512 Version of fz_new_pixmap_from_display_list (util.c) to also support
16513 rendering of only the 'clip' part of the displaylist rectangle
16514 '''
16515 assert isinstance(list_, mupdf.FzDisplayList)
16516 if seps is None:
16517 seps = mupdf.FzSeparations()
16518 assert seps is None or isinstance(seps, mupdf.FzSeparations), f'{type(seps)=}: {seps}'
16519
16520 rect = mupdf.fz_bound_display_list(list_)
16521 matrix = JM_matrix_from_py(ctm)
16522 rclip = JM_rect_from_py(clip)
16523 rect = mupdf.fz_intersect_rect(rect, rclip) # no-op if clip is not given
16524
16525 rect = mupdf.fz_transform_rect(rect, matrix)
16526 irect = mupdf.fz_round_rect(rect)
16527
16528 assert isinstance( cs, mupdf.FzColorspace)
16529
16530 pix = mupdf.fz_new_pixmap_with_bbox(cs, irect, seps, alpha)
16531 if alpha:
16532 mupdf.fz_clear_pixmap(pix)
16533 else:
16534 mupdf.fz_clear_pixmap_with_value(pix, 0xFF)
16535
16536 if not mupdf.fz_is_infinite_rect(rclip):
16537 dev = mupdf.fz_new_draw_device_with_bbox(matrix, pix, irect)
16538 mupdf.fz_run_display_list(list_, dev, mupdf.FzMatrix(), rclip, mupdf.FzCookie())
16539 else:
16540 dev = mupdf.fz_new_draw_device(matrix, pix)
16541 mupdf.fz_run_display_list(list_, dev, mupdf.FzMatrix(), mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE), mupdf.FzCookie())
16542
16543 mupdf.fz_close_device(dev)
16544 # Use special raw Pixmap constructor so we don't set alpha to true.
16545 return Pixmap( 'raw', pix)
16546
16547
16548 def JM_point_from_py(p):
16549 '''
16550 PySequence to fz_point. Default: (FZ_MIN_INF_RECT, FZ_MIN_INF_RECT)
16551 '''
16552 if isinstance(p, mupdf.FzPoint):
16553 return p
16554 if isinstance(p, Point):
16555 return mupdf.FzPoint(p.x, p.y)
16556 if g_use_extra:
16557 return extra.JM_point_from_py( p)
16558
16559 p0 = mupdf.FzPoint(0, 0)
16560 x = JM_FLOAT_ITEM(p, 0)
16561 y = JM_FLOAT_ITEM(p, 1)
16562 if x is None or y is None:
16563 return p0
16564 x = max( x, FZ_MIN_INF_RECT)
16565 y = max( y, FZ_MIN_INF_RECT)
16566 x = min( x, FZ_MAX_INF_RECT)
16567 y = min( y, FZ_MAX_INF_RECT)
16568 return mupdf.FzPoint(x, y)
16569
16570
16571 def JM_print_stext_page_as_text(res, page):
16572 '''
16573 Plain text output. An identical copy of fz_print_stext_page_as_text,
16574 but lines within a block are concatenated by space instead a new-line
16575 character (which else leads to 2 new-lines).
16576 '''
16577 if 1 and g_use_extra:
16578 return extra.JM_print_stext_page_as_text(res, page)
16579
16580 assert isinstance(res, mupdf.FzBuffer)
16581 assert isinstance(page, mupdf.FzStextPage)
16582 rect = mupdf.FzRect(page.m_internal.mediabox)
16583 last_char = 0
16584
16585 n_blocks = 0
16586 n_lines = 0
16587 n_chars = 0
16588 for n_blocks2, block in enumerate( page):
16589 if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
16590 for n_lines2, line in enumerate( block):
16591 for n_chars2, ch in enumerate( line):
16592 pass
16593 n_chars += n_chars2
16594 n_lines += n_lines2
16595 n_blocks += n_blocks2
16596
16597 for block in page:
16598 if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
16599 for line in block:
16600 last_char = 0
16601 for ch in line:
16602 chbbox = JM_char_bbox(line, ch)
16603 if (mupdf.fz_is_infinite_rect(rect)
16604 or JM_rects_overlap(rect, chbbox)
16605 ):
16606 #raw += chr(ch.m_internal.c)
16607 last_char = ch.m_internal.c
16608 #log( '{=last_char!r utf!r}')
16609 JM_append_rune(res, last_char)
16610 if last_char != 10 and last_char > 0:
16611 mupdf.fz_append_string(res, "\n")
16612
16613
16614 def JM_put_script(annot_obj, key1, key2, value):
16615 '''
16616 Create a JavaScript PDF action.
16617 Usable for all object types which support PDF actions, even if the
16618 argument name suggests annotations. Up to 2 key values can be specified, so
16619 JavaScript actions can be stored for '/A' and '/AA/?' keys.
16620 '''
16621 key1_obj = mupdf.pdf_dict_get(annot_obj, key1)
16622 pdf = mupdf.pdf_get_bound_document(annot_obj) # owning PDF
16623
16624 # if no new script given, just delete corresponding key
16625 if not value:
16626 if key2 is None or not key2.m_internal:
16627 mupdf.pdf_dict_del(annot_obj, key1)
16628 elif key1_obj.m_internal:
16629 mupdf.pdf_dict_del(key1_obj, key2)
16630 return
16631
16632 # read any existing script as a PyUnicode string
16633 if not key2.m_internal or not key1_obj.m_internal:
16634 script = JM_get_script(key1_obj)
16635 else:
16636 script = JM_get_script(mupdf.pdf_dict_get(key1_obj, key2))
16637
16638 # replace old script, if different from new one
16639 if value != script:
16640 newaction = JM_new_javascript(pdf, value)
16641 if not key2.m_internal:
16642 mupdf.pdf_dict_put(annot_obj, key1, newaction)
16643 else:
16644 mupdf.pdf_dict_putl(annot_obj, newaction, key1, key2)
16645
16646
16647 def JM_py_from_irect(r):
16648 return r.x0, r.y0, r.x1, r.y1
16649
16650
16651 def JM_py_from_matrix(m):
16652 return m.a, m.b, m.c, m.d, m.e, m.f
16653
16654
16655 def JM_py_from_point(p):
16656 return p.x, p.y
16657
16658
16659 def JM_py_from_quad(q):
16660 '''
16661 PySequence from fz_quad.
16662 '''
16663 return (
16664 (q.ul.x, q.ul.y),
16665 (q.ur.x, q.ur.y),
16666 (q.ll.x, q.ll.y),
16667 (q.lr.x, q.lr.y),
16668 )
16669
16670
16671 def JM_py_from_rect(r):
16672 return r.x0, r.y0, r.x1, r.y1
16673
16674
16675 def JM_quad_from_py(r):
16676 if isinstance(r, mupdf.FzQuad):
16677 return r
16678 # cover all cases of 4-float-sequences
16679 if hasattr(r, "__getitem__") and len(r) == 4 and hasattr(r[0], "__float__"):
16680 r = mupdf.FzRect(*tuple(r))
16681 if isinstance( r, mupdf.FzRect):
16682 return mupdf.fz_quad_from_rect( r)
16683 if isinstance( r, Quad):
16684 return mupdf.fz_make_quad(
16685 r.ul.x, r.ul.y,
16686 r.ur.x, r.ur.y,
16687 r.ll.x, r.ll.y,
16688 r.lr.x, r.lr.y,
16689 )
16690 q = mupdf.fz_make_quad(0, 0, 0, 0, 0, 0, 0, 0)
16691 p = [0,0,0,0]
16692 if not r or not isinstance(r, (tuple, list)) or len(r) != 4:
16693 return q
16694
16695 if JM_FLOAT_ITEM(r, 0) is None:
16696 return mupdf.fz_quad_from_rect(JM_rect_from_py(r))
16697
16698 for i in range(4):
16699 if i >= len(r):
16700 return q # invalid: cancel the rest
16701 obj = r[i] # next point item
16702 if not PySequence_Check(obj) or PySequence_Size(obj) != 2:
16703 return q # invalid: cancel the rest
16704
16705 p[i].x = JM_FLOAT_ITEM(obj, 0)
16706 p[i].y = JM_FLOAT_ITEM(obj, 1)
16707 if p[i].x is None or p[i].y is None:
16708 return q
16709 p[i].x = max( p[i].x, FZ_MIN_INF_RECT)
16710 p[i].y = max( p[i].y, FZ_MIN_INF_RECT)
16711 p[i].x = min( p[i].x, FZ_MAX_INF_RECT)
16712 p[i].y = min( p[i].y, FZ_MAX_INF_RECT)
16713 q.ul = p[0]
16714 q.ur = p[1]
16715 q.ll = p[2]
16716 q.lr = p[3]
16717 return q
16718
16719
16720 def JM_read_contents(pageref):
16721 '''
16722 Read and concatenate a PDF page's /Contents object(s) in a buffer
16723 '''
16724 assert isinstance(pageref, mupdf.PdfObj), f'{type(pageref)}'
16725 contents = mupdf.pdf_dict_get(pageref, mupdf.PDF_ENUM_NAME_Contents)
16726 if mupdf.pdf_is_array(contents):
16727 res = mupdf.FzBuffer(1024)
16728 for i in range(mupdf.pdf_array_len(contents)):
16729 if i > 0:
16730 mupdf.fz_append_byte(res, 32)
16731 obj = mupdf.pdf_array_get(contents, i)
16732 if mupdf.pdf_is_stream(obj):
16733 nres = mupdf.pdf_load_stream(obj)
16734 mupdf.fz_append_buffer(res, nres)
16735 elif contents.m_internal:
16736 res = mupdf.pdf_load_stream(contents)
16737 else:
16738 res = mupdf.FzBuffer(0)
16739 return res
16740
16741
16742 def JM_rect_from_py(r):
16743 if isinstance(r, mupdf.FzRect):
16744 return r
16745 if isinstance(r, mupdf.FzIrect):
16746 return mupdf.FzRect(r)
16747 if isinstance(r, Rect):
16748 return mupdf.fz_make_rect(r.x0, r.y0, r.x1, r.y1)
16749 if isinstance(r, IRect):
16750 return mupdf.fz_make_rect(r.x0, r.y0, r.x1, r.y1)
16751 if not r or not PySequence_Check(r) or PySequence_Size(r) != 4:
16752 return mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)
16753 f = [0, 0, 0, 0]
16754 for i in range(4):
16755 f[i] = JM_FLOAT_ITEM(r, i)
16756 if f[i] is None:
16757 return mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)
16758 if f[i] < FZ_MIN_INF_RECT:
16759 f[i] = FZ_MIN_INF_RECT
16760 if f[i] > FZ_MAX_INF_RECT:
16761 f[i] = FZ_MAX_INF_RECT
16762 return mupdf.fz_make_rect(f[0], f[1], f[2], f[3])
16763
16764
16765 def JM_rects_overlap(a, b):
16766 if (0
16767 or a.x0 >= b.x1
16768 or a.y0 >= b.y1
16769 or a.x1 <= b.x0
16770 or a.y1 <= b.y0
16771 ):
16772 return 0
16773 return 1
16774
16775
16776 def JM_refresh_links( page):
16777 '''
16778 refreshes the link and annotation tables of a page
16779 '''
16780 if page is None or not page.m_internal:
16781 return
16782 obj = mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots'))
16783 if obj.m_internal:
16784 pdf = page.doc()
16785 number = mupdf.pdf_lookup_page_number( pdf, page.obj())
16786 page_mediabox = mupdf.FzRect()
16787 page_ctm = mupdf.FzMatrix()
16788 mupdf.pdf_page_transform( page, page_mediabox, page_ctm)
16789 link = mupdf.pdf_load_link_annots( pdf, page, obj, number, page_ctm)
16790 page.m_internal.links = mupdf.ll_fz_keep_link( link.m_internal)
16791
16792
16793 def JM_rotate_page_matrix(page):
16794 '''
16795 calculate page rotation matrices
16796 '''
16797 if not page.m_internal:
16798 return mupdf.FzMatrix() # no valid pdf page given
16799 rotation = JM_page_rotation(page)
16800 #log( '{rotation=}')
16801 if rotation == 0:
16802 return mupdf.FzMatrix() # no rotation
16803 cb_size = JM_cropbox_size(page.obj())
16804 w = cb_size.x
16805 h = cb_size.y
16806 #log( '{=h w}')
16807 if rotation == 90:
16808 m = mupdf.fz_make_matrix(0, 1, -1, 0, h, 0)
16809 elif rotation == 180:
16810 m = mupdf.fz_make_matrix(-1, 0, 0, -1, w, h)
16811 else:
16812 m = mupdf.fz_make_matrix(0, -1, 1, 0, 0, w)
16813 #log( 'returning {m=}')
16814 return m
16815
16816
16817 def JM_search_stext_page(page, needle):
16818 if g_use_extra:
16819 return extra.JM_search_stext_page(page.m_internal, needle)
16820
16821 rect = mupdf.FzRect(page.m_internal.mediabox)
16822 if not needle:
16823 return
16824 quads = []
16825 class Hits:
16826 def __str__(self):
16827 return f'Hits(len={self.len} quads={self.quads} hfuzz={self.hfuzz} vfuzz={self.vfuzz}'
16828 hits = Hits()
16829 hits.len = 0
16830 hits.quads = quads
16831 hits.hfuzz = 0.2 # merge kerns but not large gaps
16832 hits.vfuzz = 0.1
16833
16834 buffer_ = JM_new_buffer_from_stext_page(page)
16835 haystack_string = mupdf.fz_string_from_buffer(buffer_)
16836 haystack = 0
16837 begin, end = find_string(haystack_string[haystack:], needle)
16838 if begin is None:
16839 #goto no_more_matches;
16840 return quads
16841
16842 begin += haystack
16843 end += haystack
16844 inside = 0
16845 i = 0
16846 for block in page:
16847 if block.m_internal.type != mupdf.FZ_STEXT_BLOCK_TEXT:
16848 continue
16849 for line in block:
16850 for ch in line:
16851 i += 1
16852 if not mupdf.fz_is_infinite_rect(rect):
16853 r = JM_char_bbox(line, ch)
16854 if not JM_rects_overlap(rect, r):
16855 #goto next_char;
16856 continue
16857 while 1:
16858 #try_new_match:
16859 if not inside:
16860 if haystack >= begin:
16861 inside = 1
16862 if inside:
16863 if haystack < end:
16864 on_highlight_char(hits, line, ch)
16865 break
16866 else:
16867 inside = 0
16868 begin, end = find_string(haystack_string[haystack:], needle)
16869 if begin is None:
16870 #goto no_more_matches;
16871 return quads
16872 else:
16873 #goto try_new_match;
16874 begin += haystack
16875 end += haystack
16876 continue
16877 break
16878 haystack += 1
16879 #next_char:;
16880 assert haystack_string[haystack] == '\n', \
16881 f'{haystack=} {haystack_string[haystack]=}'
16882 haystack += 1
16883 assert haystack_string[haystack] == '\n', \
16884 f'{haystack=} {haystack_string[haystack]=}'
16885 haystack += 1
16886 #no_more_matches:;
16887 return quads
16888
16889
16890 def JM_scan_resources(pdf, rsrc, liste, what, stream_xref, tracer):
16891 '''
16892 Step through /Resources, looking up image, xobject or font information
16893 '''
16894 if mupdf.pdf_mark_obj(rsrc):
16895 mupdf.fz_warn('Circular dependencies! Consider page cleaning.')
16896 return # Circular dependencies!
16897 try:
16898 xobj = mupdf.pdf_dict_get(rsrc, mupdf.PDF_ENUM_NAME_XObject)
16899
16900 if what == 1: # lookup fonts
16901 font = mupdf.pdf_dict_get(rsrc, mupdf.PDF_ENUM_NAME_Font)
16902 JM_gather_fonts(pdf, font, liste, stream_xref)
16903 elif what == 2: # look up images
16904 JM_gather_images(pdf, xobj, liste, stream_xref)
16905 elif what == 3: # look up form xobjects
16906 JM_gather_forms(pdf, xobj, liste, stream_xref)
16907 else: # should never happen
16908 return
16909
16910 # check if we need to recurse into Form XObjects
16911 n = mupdf.pdf_dict_len(xobj)
16912 for i in range(n):
16913 obj = mupdf.pdf_dict_get_val(xobj, i)
16914 if mupdf.pdf_is_stream(obj):
16915 sxref = mupdf.pdf_to_num(obj)
16916 else:
16917 sxref = 0
16918 subrsrc = mupdf.pdf_dict_get(obj, mupdf.PDF_ENUM_NAME_Resources)
16919 if subrsrc.m_internal:
16920 sxref_t = sxref
16921 if sxref_t not in tracer:
16922 tracer.append(sxref_t)
16923 JM_scan_resources( pdf, subrsrc, liste, what, sxref, tracer)
16924 else:
16925 mupdf.fz_warn('Circular dependencies! Consider page cleaning.')
16926 return
16927 finally:
16928 mupdf.pdf_unmark_obj(rsrc)
16929
16930
16931 def JM_set_choice_options(annot, liste):
16932 '''
16933 set ListBox / ComboBox values
16934 '''
16935 if not liste:
16936 return
16937 assert isinstance( liste, (tuple, list))
16938 n = len( liste)
16939 if n == 0:
16940 return
16941 annot_obj = mupdf.pdf_annot_obj( annot)
16942 pdf = mupdf.pdf_get_bound_document( annot_obj)
16943 optarr = mupdf.pdf_new_array( pdf, n)
16944 for i in range(n):
16945 val = liste[i]
16946 opt = val
16947 if isinstance(opt, str):
16948 mupdf.pdf_array_push_text_string( optarr, opt)
16949 else:
16950 assert isinstance( val, (tuple, list)) and len( val) == 2, 'bad choice field list'
16951 opt1, opt2 = val
16952 assert opt1 and opt2, 'bad choice field list'
16953 optarrsub = mupdf.pdf_array_push_array( optarr, 2)
16954 mupdf.pdf_array_push_text_string( optarrsub, opt1)
16955 mupdf.pdf_array_push_text_string( optarrsub, opt2)
16956 mupdf.pdf_dict_put( annot_obj, PDF_NAME('Opt'), optarr)
16957
16958
16959 def JM_set_field_type(doc, obj, type):
16960 '''
16961 Set the field type
16962 '''
16963 setbits = 0
16964 clearbits = 0
16965 typename = None
16966 if type == mupdf.PDF_WIDGET_TYPE_BUTTON:
16967 typename = PDF_NAME('Btn')
16968 setbits = mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON
16969 elif type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
16970 typename = PDF_NAME('Btn')
16971 clearbits = mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON
16972 setbits = mupdf.PDF_BTN_FIELD_IS_RADIO
16973 elif type == mupdf.PDF_WIDGET_TYPE_CHECKBOX:
16974 typename = PDF_NAME('Btn')
16975 clearbits = (mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON | mupdf.PDF_BTN_FIELD_IS_RADIO)
16976 elif type == mupdf.PDF_WIDGET_TYPE_TEXT:
16977 typename = PDF_NAME('Tx')
16978 elif type == mupdf.PDF_WIDGET_TYPE_LISTBOX:
16979 typename = PDF_NAME('Ch')
16980 clearbits = mupdf.PDF_CH_FIELD_IS_COMBO
16981 elif type == mupdf.PDF_WIDGET_TYPE_COMBOBOX:
16982 typename = PDF_NAME('Ch')
16983 setbits = mupdf.PDF_CH_FIELD_IS_COMBO
16984 elif type == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
16985 typename = PDF_NAME('Sig')
16986
16987 if typename is not None and typename.m_internal:
16988 mupdf.pdf_dict_put(obj, PDF_NAME('FT'), typename)
16989
16990 if setbits != 0 or clearbits != 0:
16991 bits = mupdf.pdf_dict_get_int(obj, PDF_NAME('Ff'))
16992 bits &= ~clearbits
16993 bits |= setbits
16994 mupdf.pdf_dict_put_int(obj, PDF_NAME('Ff'), bits)
16995
16996
16997 def JM_set_object_value(obj, key, value):
16998 '''
16999 Set a PDF dict key to some value
17000 '''
17001 eyecatcher = "fitz: replace me!"
17002 pdf = mupdf.pdf_get_bound_document(obj)
17003 # split PDF key at path seps and take last key part
17004 list_ = key.split('/')
17005 len_ = len(list_)
17006 i = len_ - 1
17007 skey = list_[i]
17008
17009 del list_[i] # del the last sub-key
17010 len_ = len(list_) # remaining length
17011 testkey = mupdf.pdf_dict_getp(obj, key) # check if key already exists
17012 if not testkey.m_internal:
17013 #No, it will be created here. But we cannot allow this happening if
17014 #indirect objects are referenced. So we check all higher level
17015 #sub-paths for indirect references.
17016 while len_ > 0:
17017 t = '/'.join(list_) # next high level
17018 if mupdf.pdf_is_indirect(mupdf.pdf_dict_getp(obj, JM_StrAsChar(t))):
17019 raise Exception("path to '%s' has indirects", JM_StrAsChar(skey))
17020 del list_[len_ - 1] # del last sub-key
17021 len_ = len(list_) # remaining length
17022 # Insert our eyecatcher. Will create all sub-paths in the chain, or
17023 # respectively remove old value of key-path.
17024 mupdf.pdf_dict_putp(obj, key, mupdf.pdf_new_text_string(eyecatcher))
17025 testkey = mupdf.pdf_dict_getp(obj, key)
17026 if not mupdf.pdf_is_string(testkey):
17027 raise Exception("cannot insert value for '%s'", key)
17028 temp = mupdf.pdf_to_text_string(testkey)
17029 if temp != eyecatcher:
17030 raise Exception("cannot insert value for '%s'", key)
17031 # read the result as a string
17032 res = JM_object_to_buffer(obj, 1, 0)
17033 objstr = JM_EscapeStrFromBuffer(res)
17034
17035 # replace 'eyecatcher' by desired 'value'
17036 nullval = "/%s(%s)" % ( skey, eyecatcher)
17037 newval = "/%s %s" % (skey, value)
17038 newstr = objstr.replace(nullval, newval, 1)
17039
17040 # make PDF object from resulting string
17041 new_obj = JM_pdf_obj_from_str(pdf, newstr)
17042 return new_obj
17043
17044
17045 def JM_set_ocg_arrays(conf, basestate, on, off, rbgroups, locked):
17046 if basestate:
17047 mupdf.pdf_dict_put_name( conf, PDF_NAME('BaseState'), basestate)
17048
17049 if on is not None:
17050 mupdf.pdf_dict_del( conf, PDF_NAME('ON'))
17051 if on:
17052 arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('ON'), 1)
17053 JM_set_ocg_arrays_imp( arr, on)
17054 if off is not None:
17055 mupdf.pdf_dict_del( conf, PDF_NAME('OFF'))
17056 if off:
17057 arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('OFF'), 1)
17058 JM_set_ocg_arrays_imp( arr, off)
17059 if locked is not None:
17060 mupdf.pdf_dict_del( conf, PDF_NAME('Locked'))
17061 if locked:
17062 arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('Locked'), 1)
17063 JM_set_ocg_arrays_imp( arr, locked)
17064 if rbgroups is not None:
17065 mupdf.pdf_dict_del( conf, PDF_NAME('RBGroups'))
17066 if rbgroups:
17067 arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('RBGroups'), 1)
17068 n =len(rbgroups)
17069 for i in range(n):
17070 item0 = rbgroups[i]
17071 obj = mupdf.pdf_array_push_array( arr, 1)
17072 JM_set_ocg_arrays_imp( obj, item0)
17073
17074
17075 def JM_set_ocg_arrays_imp(arr, list_):
17076 '''
17077 Set OCG arrays from dict of Python lists
17078 Works with dict like {"basestate":name, "on":list, "off":list, "rbg":list}
17079 '''
17080 pdf = mupdf.pdf_get_bound_document(arr)
17081 for xref in list_:
17082 obj = mupdf.pdf_new_indirect(pdf, xref, 0)
17083 mupdf.pdf_array_push(arr, obj)
17084
17085
17086 def JM_set_resource_property(ref, name, xref):
17087 '''
17088 Insert an item into Resources/Properties (used for Marked Content)
17089 Arguments:
17090 (1) e.g. page object, Form XObject
17091 (2) marked content name
17092 (3) xref of the referenced object (insert as indirect reference)
17093 '''
17094 pdf = mupdf.pdf_get_bound_document(ref)
17095 ind = mupdf.pdf_new_indirect(pdf, xref, 0)
17096 if not ind.m_internal:
17097 RAISEPY(MSG_BAD_XREF, PyExc_ValueError)
17098 resources = mupdf.pdf_dict_get(ref, PDF_NAME('Resources'))
17099 if not resources.m_internal:
17100 resources = mupdf.pdf_dict_put_dict(ref, PDF_NAME('Resources'), 1)
17101 properties = mupdf.pdf_dict_get(resources, PDF_NAME('Properties'))
17102 if not properties.m_internal:
17103 properties = mupdf.pdf_dict_put_dict(resources, PDF_NAME('Properties'), 1)
17104 mupdf.pdf_dict_put(properties, mupdf.pdf_new_name(name), ind)
17105
17106
17107 def JM_set_widget_properties(annot, Widget):
17108 '''
17109 Update the PDF form field with the properties from a Python Widget object.
17110 Called by "Page.add_widget" and "Annot.update_widget".
17111 '''
17112 if isinstance( annot, Annot):
17113 annot = annot.this
17114 assert isinstance( annot, mupdf.PdfAnnot), f'{type(annot)=} {type=}'
17115 page = _pdf_annot_page(annot)
17116 assert page.m_internal, 'Annot is not bound to a page'
17117 annot_obj = mupdf.pdf_annot_obj(annot)
17118 pdf = page.doc()
17119 def GETATTR(name):
17120 return getattr(Widget, name, None)
17121
17122 value = GETATTR("field_type")
17123 field_type = value
17124
17125 # rectangle --------------------------------------------------------------
17126 value = GETATTR("rect")
17127 rect = JM_rect_from_py(value)
17128 rot_mat = JM_rotate_page_matrix(page)
17129 rect = mupdf.fz_transform_rect(rect, rot_mat)
17130 mupdf.pdf_set_annot_rect(annot, rect)
17131
17132 # fill color -------------------------------------------------------------
17133 value = GETATTR("fill_color")
17134 if value and PySequence_Check(value):
17135 n = len(value)
17136 fill_col = mupdf.pdf_new_array(pdf, n)
17137 col = 0
17138 for i in range(n):
17139 col = value[i]
17140 mupdf.pdf_array_push_real(fill_col, col)
17141 mupdf.pdf_field_set_fill_color(annot_obj, fill_col)
17142
17143 # dashes -----------------------------------------------------------------
17144 value = GETATTR("border_dashes")
17145 if value and PySequence_Check(value):
17146 n = len(value)
17147 dashes = mupdf.pdf_new_array(pdf, n)
17148 for i in range(n):
17149 mupdf.pdf_array_push_int(dashes, value[i])
17150 mupdf.pdf_dict_putl(annot_obj, dashes, PDF_NAME('BS'), PDF_NAME('D'))
17151
17152 # border color -----------------------------------------------------------
17153 value = GETATTR("border_color")
17154 if value and PySequence_Check(value):
17155 n = len(value)
17156 border_col = mupdf.pdf_new_array(pdf, n)
17157 col = 0
17158 for i in range(n):
17159 col = value[i]
17160 mupdf.pdf_array_push_real(border_col, col)
17161 mupdf.pdf_dict_putl(annot_obj, border_col, PDF_NAME('MK'), PDF_NAME('BC'))
17162
17163 # entry ignored - may be used later
17164 #
17165 #int text_format = (int) PyInt_AsLong(GETATTR("text_format"));
17166 #
17167
17168 # field label -----------------------------------------------------------
17169 value = GETATTR("field_label")
17170 if value is not None:
17171 label = JM_StrAsChar(value)
17172 mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('TU'), label)
17173
17174 # field name -------------------------------------------------------------
17175 value = GETATTR("field_name")
17176 if value is not None:
17177 name = JM_StrAsChar(value)
17178 old_name = mupdf.pdf_load_field_name(annot_obj)
17179 if name != old_name:
17180 mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('T'), name)
17181
17182 # max text len -----------------------------------------------------------
17183 if field_type == mupdf.PDF_WIDGET_TYPE_TEXT:
17184 value = GETATTR("text_maxlen")
17185 text_maxlen = value
17186 if text_maxlen:
17187 mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('MaxLen'), text_maxlen)
17188 value = GETATTR("field_display")
17189 d = value
17190 mupdf.pdf_field_set_display(annot_obj, d)
17191
17192 # choice values ----------------------------------------------------------
17193 if field_type in (mupdf.PDF_WIDGET_TYPE_LISTBOX, mupdf.PDF_WIDGET_TYPE_COMBOBOX):
17194 value = GETATTR("choice_values")
17195 JM_set_choice_options(annot, value)
17196
17197 # border style -----------------------------------------------------------
17198 value = GETATTR("border_style")
17199 val = JM_get_border_style(value)
17200 mupdf.pdf_dict_putl(annot_obj, val, PDF_NAME('BS'), PDF_NAME('S'))
17201
17202 # border width -----------------------------------------------------------
17203 value = GETATTR("border_width")
17204 border_width = value
17205 mupdf.pdf_dict_putl(
17206 annot_obj,
17207 mupdf.pdf_new_real(border_width),
17208 PDF_NAME('BS'),
17209 PDF_NAME('W'),
17210 )
17211
17212 # /DA string -------------------------------------------------------------
17213 value = GETATTR("_text_da")
17214 da = JM_StrAsChar(value)
17215 mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('DA'), da)
17216 mupdf.pdf_dict_del(annot_obj, PDF_NAME('DS')) # not supported by MuPDF
17217 mupdf.pdf_dict_del(annot_obj, PDF_NAME('RC')) # not supported by MuPDF
17218
17219 # field flags ------------------------------------------------------------
17220 field_flags = GETATTR("field_flags")
17221 if field_flags is not None:
17222 if field_type == mupdf.PDF_WIDGET_TYPE_COMBOBOX:
17223 field_flags |= mupdf.PDF_CH_FIELD_IS_COMBO
17224 elif field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
17225 field_flags |= mupdf.PDF_BTN_FIELD_IS_RADIO
17226 elif field_type == mupdf.PDF_WIDGET_TYPE_BUTTON:
17227 field_flags |= mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON
17228 mupdf.pdf_dict_put_int( annot_obj, PDF_NAME('Ff'), field_flags)
17229
17230 # button caption ---------------------------------------------------------
17231 value = GETATTR("button_caption")
17232 ca = JM_StrAsChar(value)
17233 if ca:
17234 mupdf.pdf_field_set_button_caption(annot_obj, ca)
17235
17236 # script (/A) -------------------------------------------------------
17237 value = GETATTR("script")
17238 JM_put_script(annot_obj, PDF_NAME('A'), mupdf.PdfObj(), value)
17239
17240 # script (/AA/K) -------------------------------------------------------
17241 value = GETATTR("script_stroke")
17242 JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('K'), value)
17243
17244 # script (/AA/F) -------------------------------------------------------
17245 value = GETATTR("script_format")
17246 JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('F'), value)
17247
17248 # script (/AA/V) -------------------------------------------------------
17249 value = GETATTR("script_change")
17250 JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('V'), value)
17251
17252 # script (/AA/C) -------------------------------------------------------
17253 value = GETATTR("script_calc")
17254 JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('C'), value)
17255
17256 # script (/AA/Bl) -------------------------------------------------------
17257 value = GETATTR("script_blur")
17258 JM_put_script(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Bl'), value)
17259
17260 # script (/AA/Fo) codespell:ignore --------------------------------------
17261 value = GETATTR("script_focus")
17262 JM_put_script(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Fo'), value)
17263
17264 # field value ------------------------------------------------------------
17265 value = GETATTR("field_value") # field value
17266 text = JM_StrAsChar(value) # convert to text (may fail!)
17267 if field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
17268 if not value:
17269 mupdf.pdf_set_field_value(pdf, annot_obj, "Off", 1)
17270 mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), "Off")
17271 else:
17272 # TODO check if another button in the group is ON and if so set it Off
17273 onstate = mupdf.pdf_button_field_on_state(annot_obj)
17274 if onstate.m_internal:
17275 on = mupdf.pdf_to_name(onstate)
17276 mupdf.pdf_set_field_value(pdf, annot_obj, on, 1)
17277 mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), on)
17278 elif text:
17279 mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), text)
17280 elif field_type == mupdf.PDF_WIDGET_TYPE_CHECKBOX:
17281 onstate = mupdf.pdf_button_field_on_state(annot_obj)
17282 on = onstate.pdf_to_name()
17283 if value in (True, on) or text == 'Yes':
17284 mupdf.pdf_set_field_value(pdf, annot_obj, on, 1)
17285 mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), on)
17286 mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('V'), on)
17287 else:
17288 mupdf.pdf_dict_put_name( annot_obj, PDF_NAME('AS'), 'Off')
17289 mupdf.pdf_dict_put_name( annot_obj, PDF_NAME('V'), 'Off')
17290 else:
17291 if text:
17292 mupdf.pdf_set_field_value(pdf, annot_obj, text, 1)
17293 if field_type in (mupdf.PDF_WIDGET_TYPE_COMBOBOX, mupdf.PDF_WIDGET_TYPE_LISTBOX):
17294 mupdf.pdf_dict_del(annot_obj, PDF_NAME('I'))
17295 mupdf.pdf_dirty_annot(annot)
17296 mupdf.pdf_set_annot_hot(annot, 1)
17297 mupdf.pdf_set_annot_active(annot, 1)
17298 mupdf.pdf_update_annot(annot)
17299
17300
17301 def JM_show_string_cs(
17302 text,
17303 user_font,
17304 trm,
17305 s,
17306 wmode,
17307 bidi_level,
17308 markup_dir,
17309 language,
17310 ):
17311 i = 0
17312 while i < len(s):
17313 l, ucs = mupdf.fz_chartorune(s[i:])
17314 i += l
17315 gid = mupdf.fz_encode_character_sc(user_font, ucs)
17316 if gid == 0:
17317 gid, font = mupdf.fz_encode_character_with_fallback(user_font, ucs, 0, language)
17318 else:
17319 font = user_font
17320 mupdf.fz_show_glyph(text, font, trm, gid, ucs, wmode, bidi_level, markup_dir, language)
17321 adv = mupdf.fz_advance_glyph(font, gid, wmode)
17322 if wmode == 0:
17323 trm = mupdf.fz_pre_translate(trm, adv, 0)
17324 else:
17325 trm = mupdf.fz_pre_translate(trm, 0, -adv)
17326 return trm
17327
17328
17329 def JM_UnicodeFromBuffer(buff):
17330 buff_bytes = mupdf.fz_buffer_extract_copy(buff)
17331 val = buff_bytes.decode(errors='replace')
17332 z = val.find(chr(0))
17333 if z >= 0:
17334 val = val[:z]
17335 return val
17336
17337
17338 def message_warning(text):
17339 '''
17340 Generate a warning.
17341 '''
17342 message(f'warning: {text}')
17343
17344
17345 def JM_update_stream(doc, obj, buffer_, compress):
17346 '''
17347 update a stream object
17348 compress stream when beneficial
17349 '''
17350 if compress:
17351 length, _ = mupdf.fz_buffer_storage(buffer_)
17352 if length > 30: # ignore small stuff
17353 buffer_compressed = JM_compress_buffer(buffer_)
17354 assert isinstance(buffer_compressed, mupdf.FzBuffer)
17355 if buffer_compressed.m_internal:
17356 length_compressed, _ = mupdf.fz_buffer_storage(buffer_compressed)
17357 if length_compressed < length: # was it worth the effort?
17358 mupdf.pdf_dict_put(
17359 obj,
17360 mupdf.PDF_ENUM_NAME_Filter,
17361 mupdf.PDF_ENUM_NAME_FlateDecode,
17362 )
17363 mupdf.pdf_update_stream(doc, obj, buffer_compressed, 1)
17364 return
17365
17366 mupdf.pdf_update_stream(doc, obj, buffer_, 0)
17367
17368
17369 def JM_xobject_from_page(pdfout, fsrcpage, xref, gmap):
17370 '''
17371 Make an XObject from a PDF page
17372 For a positive xref assume that its object can be used instead
17373 '''
17374 assert isinstance(gmap, mupdf.PdfGraftMap), f'{type(gmap)=}'
17375 if xref > 0:
17376 xobj1 = mupdf.pdf_new_indirect(pdfout, xref, 0)
17377 else:
17378 srcpage = _as_pdf_page(fsrcpage.this)
17379 spageref = srcpage.obj()
17380 mediabox = mupdf.pdf_to_rect(mupdf.pdf_dict_get_inheritable(spageref, PDF_NAME('MediaBox')))
17381 # Deep-copy resources object of source page
17382 o = mupdf.pdf_dict_get_inheritable(spageref, PDF_NAME('Resources'))
17383 if gmap.m_internal:
17384 # use graftmap when possible
17385 resources = mupdf.pdf_graft_mapped_object(gmap, o)
17386 else:
17387 resources = mupdf.pdf_graft_object(pdfout, o)
17388
17389 # get spgage contents source
17390 res = JM_read_contents(spageref)
17391
17392 #-------------------------------------------------------------
17393 # create XObject representing the source page
17394 #-------------------------------------------------------------
17395 xobj1 = mupdf.pdf_new_xobject(pdfout, mediabox, mupdf.FzMatrix(), mupdf.PdfObj(0), res)
17396 # store spage contents
17397 JM_update_stream(pdfout, xobj1, res, 1)
17398
17399 # store spage resources
17400 mupdf.pdf_dict_put(xobj1, PDF_NAME('Resources'), resources)
17401 return xobj1
17402
17403
17404 def PySequence_Check(s):
17405 return isinstance(s, (tuple, list))
17406
17407
17408 def PySequence_Size(s):
17409 return len(s)
17410
17411
17412 # constants: error messages. These are also in extra.i.
17413 #
17414 MSG_BAD_ANNOT_TYPE = "bad annot type"
17415 MSG_BAD_APN = "bad or missing annot AP/N"
17416 MSG_BAD_ARG_INK_ANNOT = "arg must be seq of seq of float pairs"
17417 MSG_BAD_ARG_POINTS = "bad seq of points"
17418 MSG_BAD_BUFFER = "bad type: 'buffer'"
17419 MSG_BAD_COLOR_SEQ = "bad color sequence"
17420 MSG_BAD_DOCUMENT = "cannot open broken document"
17421 MSG_BAD_FILETYPE = "bad filetype"
17422 MSG_BAD_LOCATION = "bad location"
17423 MSG_BAD_OC_CONFIG = "bad config number"
17424 MSG_BAD_OC_LAYER = "bad layer number"
17425 MSG_BAD_OC_REF = "bad 'oc' reference"
17426 MSG_BAD_PAGEID = "bad page id"
17427 MSG_BAD_PAGENO = "bad page number(s)"
17428 MSG_BAD_PDFROOT = "PDF has no root"
17429 MSG_BAD_RECT = "rect is infinite or empty"
17430 MSG_BAD_TEXT = "bad type: 'text'"
17431 MSG_BAD_XREF = "bad xref"
17432 MSG_COLOR_COUNT_FAILED = "color count failed"
17433 MSG_FILE_OR_BUFFER = "need font file or buffer"
17434 MSG_FONT_FAILED = "cannot create font"
17435 MSG_IS_NO_ANNOT = "is no annotation"
17436 MSG_IS_NO_IMAGE = "is no image"
17437 MSG_IS_NO_PDF = "is no PDF"
17438 MSG_IS_NO_DICT = "object is no PDF dict"
17439 MSG_PIX_NOALPHA = "source pixmap has no alpha"
17440 MSG_PIXEL_OUTSIDE = "pixel(s) outside image"
17441
17442
17443 JM_Exc_FileDataError = 'FileDataError'
17444 PyExc_ValueError = 'ValueError'
17445
17446 def RAISEPY( msg, exc):
17447 #JM_Exc_CurrentException=exc
17448 #fz_throw(context, FZ_ERROR_GENERIC, msg)
17449 raise Exception( msg)
17450
17451
17452 def PyUnicode_DecodeRawUnicodeEscape(s, errors='strict'):
17453 # FIXED: handle raw unicode escape sequences
17454 if not s:
17455 return ""
17456 if isinstance(s, str):
17457 rc = s.encode("utf8", errors=errors)
17458 elif isinstance(s, bytes):
17459 rc = s[:]
17460 ret = rc.decode('raw_unicode_escape', errors=errors)
17461 return ret
17462
17463
17464 def CheckColor(c: OptSeq):
17465 if c:
17466 if (
17467 type(c) not in (list, tuple)
17468 or len(c) not in (1, 3, 4)
17469 or min(c) < 0
17470 or max(c) > 1
17471 ):
17472 raise ValueError("need 1, 3 or 4 color components in range 0 to 1")
17473
17474
17475 def CheckFont(page: Page, fontname: str) -> tuple:
17476 """Return an entry in the page's font list if reference name matches.
17477 """
17478 for f in page.get_fonts():
17479 if f[4] == fontname:
17480 return f
17481
17482
17483 def CheckFontInfo(doc: Document, xref: int) -> list:
17484 """Return a font info if present in the document.
17485 """
17486 for f in doc.FontInfos:
17487 if xref == f[0]:
17488 return f
17489
17490
17491 def CheckMarkerArg(quads: typing.Any) -> tuple:
17492 if CheckRect(quads):
17493 r = Rect(quads)
17494 return (r.quad,)
17495 if CheckQuad(quads):
17496 return (quads,)
17497 for q in quads:
17498 if not (CheckRect(q) or CheckQuad(q)):
17499 raise ValueError("bad quads entry")
17500 return quads
17501
17502
17503 def CheckMorph(o: typing.Any) -> bool:
17504 if not bool(o):
17505 return False
17506 if not (type(o) in (list, tuple) and len(o) == 2):
17507 raise ValueError("morph must be a sequence of length 2")
17508 if not (len(o[0]) == 2 and len(o[1]) == 6):
17509 raise ValueError("invalid morph param 0")
17510 if not o[1][4] == o[1][5] == 0:
17511 raise ValueError("invalid morph param 1")
17512 return True
17513
17514
17515 def CheckParent(o: typing.Any):
17516 return
17517 if not hasattr(o, "parent") or o.parent is None:
17518 raise ValueError(f"orphaned object {type(o)=}: parent is None")
17519
17520
17521 def CheckQuad(q: typing.Any) -> bool:
17522 """Check whether an object is convex, not empty quad-like.
17523
17524 It must be a sequence of 4 number pairs.
17525 """
17526 try:
17527 q0 = Quad(q)
17528 except Exception:
17529 if g_exceptions_verbose > 1: exception_info()
17530 return False
17531 return q0.is_convex
17532
17533
17534 def CheckRect(r: typing.Any) -> bool:
17535 """Check whether an object is non-degenerate rect-like.
17536
17537 It must be a sequence of 4 numbers.
17538 """
17539 try:
17540 r = Rect(r)
17541 except Exception:
17542 if g_exceptions_verbose > 1: exception_info()
17543 return False
17544 return not (r.is_empty or r.is_infinite)
17545
17546
17547 def ColorCode(c: typing.Union[list, tuple, float, None], f: str) -> str:
17548 if not c:
17549 return ""
17550 if hasattr(c, "__float__"):
17551 c = (c,)
17552 CheckColor(c)
17553 if len(c) == 1:
17554 s = _format_g(c[0]) + " "
17555 return s + "G " if f == "c" else s + "g "
17556
17557 if len(c) == 3:
17558 s = _format_g(tuple(c)) + " "
17559 return s + "RG " if f == "c" else s + "rg "
17560
17561 s = _format_g(tuple(c)) + " "
17562 return s + "K " if f == "c" else s + "k "
17563
17564
17565 def Page__add_text_marker(self, quads, annot_type):
17566 pdfpage = self._pdf_page()
17567 rotation = JM_page_rotation(pdfpage)
17568 def final():
17569 if rotation != 0:
17570 mupdf.pdf_dict_put_int(pdfpage.obj(), PDF_NAME('Rotate'), rotation)
17571 try:
17572 if rotation != 0:
17573 mupdf.pdf_dict_put_int(pdfpage.obj(), PDF_NAME('Rotate'), 0)
17574 annot = mupdf.pdf_create_annot(pdfpage, annot_type)
17575 for item in quads:
17576 q = JM_quad_from_py(item)
17577 mupdf.pdf_add_annot_quad_point(annot, q)
17578 mupdf.pdf_update_annot(annot)
17579 JM_add_annot_id(annot, "A")
17580 final()
17581 except Exception:
17582 if g_exceptions_verbose: exception_info()
17583 final()
17584 return
17585 return Annot(annot)
17586
17587
17588 def PDF_NAME(x):
17589 assert isinstance(x, str)
17590 ret = getattr(mupdf, f'PDF_ENUM_NAME_{x}')
17591 # Note that we return a (swig proxy for) pdf_obj*, not a mupdf.PdfObj. In
17592 # the C++ API, the constructor PdfObj::PdfObj(pdf_obj*) is marked as
17593 # explicit, but this seems to be ignored by SWIG. If SWIG started to
17594 # generate code that respected `explicit`, we would need to do `return
17595 # mupdf.PdfObj(ret)`.
17596 #
17597 # [Compare with extra.i, where we define our own PDF_NAME2() macro that
17598 # returns a mupdf::PdfObj.]
17599 return ret
17600
17601
17602 def UpdateFontInfo(doc: Document, info: typing.Sequence):
17603 xref = info[0]
17604 found = False
17605 for i, fi in enumerate(doc.FontInfos):
17606 if fi[0] == xref:
17607 found = True
17608 break
17609 if found:
17610 doc.FontInfos[i] = info
17611 else:
17612 doc.FontInfos.append(info)
17613
17614
17615 def args_match(args, *types):
17616 '''
17617 Returns true if <args> matches <types>.
17618
17619 Each item in <types> is a type or tuple of types. Any of these types will
17620 match an item in <args>. `None` will match anything in <args>. `type(None)`
17621 will match an arg whose value is `None`.
17622 '''
17623 j = 0
17624 for i in range(len(types)):
17625 type_ = types[i]
17626 if j >= len(args):
17627 if isinstance(type_, tuple) and None in type_:
17628 # arg is missing but has default value.
17629 continue
17630 else:
17631 return False
17632 if type_ is not None and not isinstance(args[j], type_):
17633 return False
17634 j += 1
17635 if j != len(args):
17636 return False
17637 return True
17638
17639
17640 def calc_image_matrix(width, height, tr, rotate, keep):
17641 '''
17642 # compute image insertion matrix
17643 '''
17644 trect = JM_rect_from_py(tr)
17645 rot = mupdf.fz_rotate(rotate)
17646 trw = trect.x1 - trect.x0
17647 trh = trect.y1 - trect.y0
17648 w = trw
17649 h = trh
17650 if keep:
17651 large = max(width, height)
17652 fw = width / large
17653 fh = height / large
17654 else:
17655 fw = fh = 1
17656 small = min(fw, fh)
17657 if rotate != 0 and rotate != 180:
17658 f = fw
17659 fw = fh
17660 fh = f
17661 if fw < 1:
17662 if trw / fw > trh / fh:
17663 w = trh * small
17664 h = trh
17665 else:
17666 w = trw
17667 h = trw / small
17668 elif fw != fh:
17669 if trw / fw > trh / fh:
17670 w = trh / small
17671 h = trh
17672 else:
17673 w = trw
17674 h = trw * small
17675 else:
17676 w = trw
17677 h = trh
17678 tmp = mupdf.fz_make_point(
17679 (trect.x0 + trect.x1) / 2,
17680 (trect.y0 + trect.y1) / 2,
17681 )
17682 mat = mupdf.fz_make_matrix(1, 0, 0, 1, -0.5, -0.5)
17683 mat = mupdf.fz_concat(mat, rot)
17684 mat = mupdf.fz_concat(mat, mupdf.fz_scale(w, h))
17685 mat = mupdf.fz_concat(mat, mupdf.fz_translate(tmp.x, tmp.y))
17686 return mat
17687
17688
17689 def detect_super_script(line, ch):
17690 if line.m_internal.wmode == 0 and line.m_internal.dir.x == 1 and line.m_internal.dir.y == 0:
17691 return ch.m_internal.origin.y < line.m_internal.first_char.origin.y - ch.m_internal.size * 0.1
17692 return 0
17693
17694
17695 def dir_str(x):
17696 ret = f'{x} {type(x)} ({len(dir(x))}):\n'
17697 for i in dir(x):
17698 ret += f' {i}\n'
17699 return ret
17700
17701
17702 def getTJstr(text: str, glyphs: typing.Union[list, tuple, None], simple: bool, ordering: int) -> str:
17703 """ Return a PDF string enclosed in [] brackets, suitable for the PDF TJ
17704 operator.
17705
17706 Notes:
17707 The input string is converted to either 2 or 4 hex digits per character.
17708 Args:
17709 simple: no glyphs: 2-chars, use char codes as the glyph
17710 glyphs: 2-chars, use glyphs instead of char codes (Symbol,
17711 ZapfDingbats)
17712 not simple: ordering < 0: 4-chars, use glyphs not char codes
17713 ordering >=0: a CJK font! 4 chars, use char codes as glyphs
17714 """
17715 if text.startswith("[<") and text.endswith(">]"): # already done
17716 return text
17717
17718 if not bool(text):
17719 return "[<>]"
17720
17721 if simple: # each char or its glyph is coded as a 2-byte hex
17722 if glyphs is None: # not Symbol, not ZapfDingbats: use char code
17723 otxt = "".join(["%02x" % ord(c) if ord(c) < 256 else "b7" for c in text])
17724 else: # Symbol or ZapfDingbats: use glyphs
17725 otxt = "".join(
17726 ["%02x" % glyphs[ord(c)][0] if ord(c) < 256 else "b7" for c in text]
17727 )
17728 return "[<" + otxt + ">]"
17729
17730 # non-simple fonts: each char or its glyph is coded as 4-byte hex
17731 if ordering < 0: # not a CJK font: use the glyphs
17732 otxt = "".join(["%04x" % glyphs[ord(c)][0] for c in text])
17733 else: # CJK: use the char codes
17734 otxt = "".join(["%04x" % ord(c) for c in text])
17735
17736 return "[<" + otxt + ">]"
17737
17738
17739 def get_pdf_str(s: str) -> str:
17740 """ Return a PDF string depending on its coding.
17741
17742 Notes:
17743 Returns a string bracketed with either "()" or "<>" for hex values.
17744 If only ascii then "(original)" is returned, else if only 8 bit chars
17745 then "(original)" with interspersed octal strings \nnn is returned,
17746 else a string "<FEFF[hexstring]>" is returned, where [hexstring] is the
17747 UTF-16BE encoding of the original.
17748 """
17749 if not bool(s):
17750 return "()"
17751
17752 def make_utf16be(s):
17753 r = bytearray([254, 255]) + bytearray(s, "UTF-16BE")
17754 return "<" + r.hex() + ">" # brackets indicate hex
17755
17756 # The following either returns the original string with mixed-in
17757 # octal numbers \nnn for chars outside the ASCII range, or returns
17758 # the UTF-16BE BOM version of the string.
17759 r = ""
17760 for c in s:
17761 oc = ord(c)
17762 if oc > 255: # shortcut if beyond 8-bit code range
17763 return make_utf16be(s)
17764
17765 if oc > 31 and oc < 127: # in ASCII range
17766 if c in ("(", ")", "\\"): # these need to be escaped
17767 r += "\\"
17768 r += c
17769 continue
17770
17771 if oc > 127: # beyond ASCII
17772 r += "\\%03o" % oc
17773 continue
17774
17775 # now the white spaces
17776 if oc == 8: # backspace
17777 r += "\\b"
17778 elif oc == 9: # tab
17779 r += "\\t"
17780 elif oc == 10: # line feed
17781 r += "\\n"
17782 elif oc == 12: # form feed
17783 r += "\\f"
17784 elif oc == 13: # carriage return
17785 r += "\\r"
17786 else:
17787 r += "\\267" # unsupported: replace by 0xB7
17788
17789 return "(" + r + ")"
17790
17791
17792 def get_tessdata(tessdata=None):
17793 """Detect Tesseract language support folder.
17794
17795 This function is used to enable OCR via Tesseract even if the language
17796 support folder is not specified directly or in environment variable
17797 TESSDATA_PREFIX.
17798
17799 * If <tessdata> is set we return it directly.
17800
17801 * Otherwise we return `os.environ['TESSDATA_PREFIX']` if set.
17802
17803 * Otherwise we search for a Tesseract installation and return its language
17804 support folder.
17805
17806 * Otherwise we raise an exception.
17807 """
17808 if tessdata:
17809 return tessdata
17810 tessdata = os.getenv("TESSDATA_PREFIX")
17811 if tessdata: # use environment variable if set
17812 return tessdata
17813
17814 # Try to locate the tesseract-ocr installation.
17815
17816 import subprocess
17817
17818 cp = subprocess.run('tesseract --list-langs', shell=1, capture_output=1, check=0, text=True)
17819 if cp.returncode == 0:
17820 m = re.search('List of available languages in "(.+)"', cp.stdout)
17821 if m:
17822 tessdata = m.group(1)
17823 return tessdata
17824
17825 # Windows systems:
17826 if sys.platform == "win32":
17827 cp = subprocess.run("where tesseract", shell=1, capture_output=1, check=0, text=True)
17828 response = cp.stdout.strip()
17829 if cp.returncode or not response:
17830 raise RuntimeError("No tessdata specified and Tesseract is not installed")
17831 dirname = os.path.dirname(response) # path of tesseract.exe
17832 tessdata = os.path.join(dirname, "tessdata") # language support
17833 if os.path.exists(tessdata): # all ok?
17834 return tessdata
17835 else: # should not happen!
17836 raise RuntimeError("No tessdata specified and Tesseract installation has no {tessdata} folder")
17837
17838 # Unix-like systems:
17839 attempts = list()
17840 for path in 'tesseract-ocr', 'tesseract':
17841 cp = subprocess.run(f'whereis {path}', shell=1, capture_output=1, check=0, text=True)
17842 if cp.returncode == 0:
17843 response = cp.stdout.strip().split()
17844 if len(response) == 2:
17845 # search tessdata in folder structure
17846 dirname = response[1] # contains tesseract-ocr installation folder
17847 pattern = f"{dirname}/*/tessdata"
17848 attempts.append(pattern)
17849 tessdatas = glob.glob(pattern)
17850 tessdatas.sort()
17851 if tessdatas:
17852 return tessdatas[-1]
17853 if attempts:
17854 text = 'No tessdata specified and no match for:\n'
17855 for attempt in attempts:
17856 text += f' {attempt}'
17857 raise RuntimeError(text)
17858 else:
17859 raise RuntimeError('No tessdata specified and Tesseract is not installed')
17860
17861
17862 def css_for_pymupdf_font(
17863 fontcode: str, *, CSS: OptStr = None, archive: AnyType = None, name: OptStr = None
17864 ) -> str:
17865 """Create @font-face items for the given fontcode of pymupdf-fonts.
17866
17867 Adds @font-face support for fonts contained in package pymupdf-fonts.
17868
17869 Creates a CSS font-family for all fonts starting with string 'fontcode'.
17870
17871 Note:
17872 The font naming convention in package pymupdf-fonts is "fontcode<sf>",
17873 where the suffix "sf" is either empty or one of "it", "bo" or "bi".
17874 These suffixes thus represent the regular, italic, bold or bold-italic
17875 variants of a font. For example, font code "notos" refers to fonts
17876 "notos" - "Noto Sans Regular"
17877 "notosit" - "Noto Sans Italic"
17878 "notosbo" - "Noto Sans Bold"
17879 "notosbi" - "Noto Sans Bold Italic"
17880
17881 This function creates four CSS @font-face definitions and collectively
17882 assigns the font-family name "notos" to them (or the "name" value).
17883
17884 All fitting font buffers of the pymupdf-fonts package are placed / added
17885 to the archive provided as parameter.
17886 To use the font in pymupdf.Story, execute 'set_font(fontcode)'. The correct
17887 font weight (bold) or style (italic) will automatically be selected.
17888 Expects and returns the CSS source, with the new CSS definitions appended.
17889
17890 Args:
17891 fontcode: (str) font code for naming the font variants to include.
17892 E.g. "fig" adds notos, notosi, notosb, notosbi fonts.
17893 A maximum of 4 font variants is accepted.
17894 CSS: (str) CSS string to add @font-face definitions to.
17895 archive: (Archive, mandatory) where to place the font buffers.
17896 name: (str) use this as family-name instead of 'fontcode'.
17897 Returns:
17898 Modified CSS, with appended @font-face statements for each font variant
17899 of fontcode.
17900 Fontbuffers associated with "fontcode" will be added to 'archive'.
17901 """
17902 # @font-face template string
17903 CSSFONT = "\n@font-face {font-family: %s; src: url(%s);%s%s}\n"
17904
17905 if not type(archive) is Archive:
17906 raise ValueError("'archive' must be an Archive")
17907 if CSS is None:
17908 CSS = ""
17909
17910 # select font codes starting with the pass-in string
17911 font_keys = [k for k in fitz_fontdescriptors.keys() if k.startswith(fontcode)]
17912 if font_keys == []:
17913 raise ValueError(f"No font code '{fontcode}' found in pymupdf-fonts.")
17914 if len(font_keys) > 4:
17915 raise ValueError("fontcode too short")
17916 if name is None: # use this name for font-family
17917 name = fontcode
17918
17919 for fkey in font_keys:
17920 font = fitz_fontdescriptors[fkey]
17921 bold = font["bold"] # determine font property
17922 italic = font["italic"] # determine font property
17923 fbuff = font["loader"]() # load the fontbuffer
17924 archive.add(fbuff, fkey) # update the archive
17925 bold_text = "font-weight: bold;" if bold else ""
17926 italic_text = "font-style: italic;" if italic else ""
17927 CSS += CSSFONT % (name, fkey, bold_text, italic_text)
17928 return CSS
17929
17930
17931 def get_text_length(text: str, fontname: str ="helv", fontsize: float =11, encoding: int =0) -> float:
17932 """Calculate length of a string for a built-in font.
17933
17934 Args:
17935 fontname: name of the font.
17936 fontsize: font size points.
17937 encoding: encoding to use, 0=Latin (default), 1=Greek, 2=Cyrillic.
17938 Returns:
17939 (float) length of text.
17940 """
17941 fontname = fontname.lower()
17942 basename = Base14_fontdict.get(fontname, None)
17943
17944 glyphs = None
17945 if basename == "Symbol":
17946 glyphs = symbol_glyphs
17947 if basename == "ZapfDingbats":
17948 glyphs = zapf_glyphs
17949 if glyphs is not None:
17950 w = sum([glyphs[ord(c)][1] if ord(c) < 256 else glyphs[183][1] for c in text])
17951 return w * fontsize
17952
17953 if fontname in Base14_fontdict.keys():
17954 return util_measure_string(
17955 text, Base14_fontdict[fontname], fontsize, encoding
17956 )
17957
17958 if fontname in (
17959 "china-t",
17960 "china-s",
17961 "china-ts",
17962 "china-ss",
17963 "japan",
17964 "japan-s",
17965 "korea",
17966 "korea-s",
17967 ):
17968 return len(text) * fontsize
17969
17970 raise ValueError("Font '%s' is unsupported" % fontname)
17971
17972
17973 def image_profile(img: ByteString) -> dict:
17974 """ Return basic properties of an image.
17975
17976 Args:
17977 img: bytes, bytearray, io.BytesIO object or an opened image file.
17978 Returns:
17979 A dictionary with keys width, height, colorspace.n, bpc, type, ext and size,
17980 where 'type' is the MuPDF image type (0 to 14) and 'ext' the suitable
17981 file extension.
17982 """
17983 if type(img) is io.BytesIO:
17984 stream = img.getvalue()
17985 elif hasattr(img, "read"):
17986 stream = img.read()
17987 elif type(img) in (bytes, bytearray):
17988 stream = img
17989 else:
17990 raise ValueError("bad argument 'img'")
17991
17992 return TOOLS.image_profile(stream)
17993
17994
17995 def jm_append_merge(dev):
17996 '''
17997 Append current path to list or merge into last path of the list.
17998 (1) Append if first path, different item lists or not a 'stroke' version
17999 of previous path
18000 (2) If new path has the same items, merge its content into previous path
18001 and change path["type"] to "fs".
18002 (3) If "out" is callable, skip the previous and pass dictionary to it.
18003 '''
18004 #log(f'{getattr(dev, "pathdict", None)=}')
18005 assert isinstance(dev.out, list)
18006 #log( f'{dev.out=}')
18007
18008 if callable(dev.method) or dev.method: # function or method
18009 # callback.
18010 if dev.method is None:
18011 # fixme, this surely cannot happen?
18012 assert 0
18013 #resp = PyObject_CallFunctionObjArgs(out, dev.pathdict, NULL)
18014 else:
18015 #log(f'calling {dev.out=} {dev.method=} {dev.pathdict=}')
18016 resp = getattr(dev.out, dev.method)(dev.pathdict)
18017 if not resp:
18018 message("calling cdrawings callback function/method failed!")
18019 dev.pathdict = None
18020 return
18021
18022 def append():
18023 #log(f'jm_append_merge(): clearing dev.pathdict')
18024 dev.out.append(dev.pathdict.copy())
18025 dev.pathdict.clear()
18026 assert isinstance(dev.out, list)
18027 len_ = len(dev.out) # len of output list so far
18028 #log('{len_=}')
18029 if len_ == 0: # always append first path
18030 return append()
18031 #log(f'{getattr(dev, "pathdict", None)=}')
18032 thistype = dev.pathdict[ dictkey_type]
18033 #log(f'{thistype=}')
18034 if thistype != 's': # if not stroke, then append
18035 return append()
18036 prev = dev.out[ len_-1] # get prev path
18037 #log( f'{prev=}')
18038 prevtype = prev[ dictkey_type]
18039 #log( f'{prevtype=}')
18040 if prevtype != 'f': # if previous not fill, append
18041 return append()
18042 # last check: there must be the same list of items for "f" and "s".
18043 previtems = prev[ dictkey_items]
18044 thisitems = dev.pathdict[ dictkey_items]
18045 if previtems != thisitems:
18046 return append()
18047
18048 #rc = PyDict_Merge(prev, dev.pathdict, 0); // merge with no override
18049 try:
18050 for k, v in dev.pathdict.items():
18051 if k not in prev:
18052 prev[k] = v
18053 rc = 0
18054 except Exception:
18055 if g_exceptions_verbose: exception_info()
18056 #raise
18057 rc = -1
18058 if rc == 0:
18059 prev[ dictkey_type] = 'fs'
18060 dev.pathdict.clear()
18061 else:
18062 message("could not merge stroke and fill path")
18063 append()
18064
18065
18066 def jm_bbox_add_rect( dev, ctx, rect, code):
18067 if not dev.layers:
18068 dev.result.append( (code, JM_py_from_rect(rect)))
18069 else:
18070 dev.result.append( (code, JM_py_from_rect(rect), dev.layer_name))
18071
18072
18073 def jm_bbox_fill_image( dev, ctx, image, ctm, alpha, color_params):
18074 r = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
18075 r = mupdf.ll_fz_transform_rect( r.internal(), ctm)
18076 jm_bbox_add_rect( dev, ctx, r, "fill-image")
18077
18078
18079 def jm_bbox_fill_image_mask( dev, ctx, image, ctm, colorspace, color, alpha, color_params):
18080 try:
18081 jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_transform_rect(mupdf.fz_unit_rect, ctm), "fill-imgmask")
18082 except Exception:
18083 if g_exceptions_verbose: exception_info()
18084 raise
18085
18086
18087 def jm_bbox_fill_path( dev, ctx, path, even_odd, ctm, colorspace, color, alpha, color_params):
18088 even_odd = True if even_odd else False
18089 try:
18090 jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_path(path, None, ctm), "fill-path")
18091 except Exception:
18092 if g_exceptions_verbose: exception_info()
18093 raise
18094
18095
18096 def jm_bbox_fill_shade( dev, ctx, shade, ctm, alpha, color_params):
18097 try:
18098 jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_shade( shade, ctm), "fill-shade")
18099 except Exception:
18100 if g_exceptions_verbose: exception_info()
18101 raise
18102
18103
18104 def jm_bbox_stroke_text( dev, ctx, text, stroke, ctm, *args):
18105 try:
18106 jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_text( text, stroke, ctm), "stroke-text")
18107 except Exception:
18108 if g_exceptions_verbose: exception_info()
18109 raise
18110
18111
18112 def jm_bbox_fill_text( dev, ctx, text, ctm, *args):
18113 try:
18114 jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_text( text, None, ctm), "fill-text")
18115 except Exception:
18116 if g_exceptions_verbose: exception_info()
18117 raise
18118
18119
18120 def jm_bbox_ignore_text( dev, ctx, text, ctm):
18121 jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_text(text, None, ctm), "ignore-text")
18122
18123
18124 def jm_bbox_stroke_path( dev, ctx, path, stroke, ctm, colorspace, color, alpha, color_params):
18125 try:
18126 jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_path( path, stroke, ctm), "stroke-path")
18127 except Exception:
18128 if g_exceptions_verbose: exception_info()
18129 raise
18130
18131
18132 def jm_checkquad(dev):
18133 '''
18134 Check whether the last 4 lines represent a quad.
18135 Because of how we count, the lines are a polyline already, i.e. last point
18136 of a line equals 1st point of next line.
18137 So we check for a polygon (last line's end point equals start point).
18138 If not true we return 0.
18139 '''
18140 #log(f'{getattr(dev, "pathdict", None)=}')
18141 items = dev.pathdict[ dictkey_items]
18142 len_ = len(items)
18143 f = [0] * 8 # coordinates of the 4 corners
18144 # fill the 8 floats in f, start from items[-4:]
18145 for i in range( 4): # store line start points
18146 line = items[ len_ - 4 + i]
18147 temp = JM_point_from_py( line[1])
18148 f[i * 2] = temp.x
18149 f[i * 2 + 1] = temp.y
18150 lp = JM_point_from_py( line[ 2])
18151 if lp.x != f[0] or lp.y != f[1]:
18152 # not a polygon!
18153 #dev.linecount -= 1
18154 return 0
18155
18156 # we have detected a quad
18157 dev.linecount = 0 # reset this
18158 # a quad item is ("qu", (ul, ur, ll, lr)), where the tuple items
18159 # are pairs of floats representing a quad corner each.
18160
18161 # relationship of float array to quad points:
18162 # (0, 1) = ul, (2, 3) = ll, (6, 7) = ur, (4, 5) = lr
18163 q = mupdf.fz_make_quad(f[0], f[1], f[6], f[7], f[2], f[3], f[4], f[5])
18164 rect = ('qu', JM_py_from_quad(q))
18165
18166 items[ len_ - 4] = rect # replace item -4 by rect
18167 del items[ len_ - 3 : len_] # delete remaining 3 items
18168 return 1
18169
18170
18171 def jm_checkrect(dev):
18172 '''
18173 Check whether the last 3 path items represent a rectangle.
18174 Returns 1 if we have modified the path, otherwise 0.
18175 '''
18176 #log(f'{getattr(dev, "pathdict", None)=}')
18177 dev.linecount = 0 # reset line count
18178 orientation = 0 # area orientation of rectangle
18179 items = dev.pathdict[ dictkey_items]
18180 len_ = len(items)
18181
18182 line0 = items[ len_ - 3]
18183 ll = JM_point_from_py( line0[ 1])
18184 lr = JM_point_from_py( line0[ 2])
18185
18186 # no need to extract "line1"!
18187 line2 = items[ len_ - 1]
18188 ur = JM_point_from_py( line2[ 1])
18189 ul = JM_point_from_py( line2[ 2])
18190
18191 # Assumption:
18192 # When decomposing rects, MuPDF always starts with a horizontal line,
18193 # followed by a vertical line, followed by a horizontal line.
18194 # First line: (ll, lr), third line: (ul, ur).
18195 # If 1st line is below 3rd line, we record anti-clockwise (+1), else
18196 # clockwise (-1) orientation.
18197
18198 if (0
18199 or ll.y != lr.y
18200 or ll.x != ul.x
18201 or ur.y != ul.y
18202 or ur.x != lr.x
18203 ):
18204 return 0 # not a rectangle
18205
18206 # we have a rect, replace last 3 "l" items by one "re" item.
18207 if ul.y < lr.y:
18208 r = mupdf.fz_make_rect(ul.x, ul.y, lr.x, lr.y)
18209 orientation = 1
18210 else:
18211 r = mupdf.fz_make_rect(ll.x, ll.y, ur.x, ur.y)
18212 orientation = -1
18213
18214 rect = ( 're', JM_py_from_rect(r), orientation)
18215 items[ len_ - 3] = rect # replace item -3 by rect
18216 del items[ len_ - 2 : len_] # delete remaining 2 items
18217 return 1
18218
18219
18220 def jm_trace_text( dev, text, type_, ctm, colorspace, color, alpha, seqno):
18221 span = text.head
18222 while 1:
18223 if not span:
18224 break
18225 jm_trace_text_span( dev, span, type_, ctm, colorspace, color, alpha, seqno)
18226 span = span.next
18227
18228
18229 def jm_trace_text_span(dev, span, type_, ctm, colorspace, color, alpha, seqno):
18230 '''
18231 jm_trace_text_span(fz_context *ctx, PyObject *out, fz_text_span *span, int type, fz_matrix ctm, fz_colorspace *colorspace, const float *color, float alpha, size_t seqno)
18232 '''
18233 out_font = None
18234 assert isinstance( span, mupdf.fz_text_span)
18235 span = mupdf.FzTextSpan( span)
18236 assert isinstance( ctm, mupdf.fz_matrix)
18237 ctm = mupdf.FzMatrix( ctm)
18238 fontname = JM_font_name( span.font())
18239 #float rgb[3];
18240 #PyObject *chars = PyTuple_New(span->len);
18241
18242 mat = mupdf.fz_concat(span.trm(), ctm) # text transformation matrix
18243 dir = mupdf.fz_transform_vector(mupdf.fz_make_point(1, 0), mat) # writing direction
18244 fsize = math.sqrt(dir.x * dir.x + dir.y * dir.y) # font size
18245
18246 dir = mupdf.fz_normalize_vector(dir)
18247
18248 space_adv = 0
18249 asc = JM_font_ascender( span.font())
18250 dsc = JM_font_descender( span.font())
18251 if asc < 1e-3: # probably Tesseract font
18252 dsc = -0.1
18253 asc = 0.9
18254
18255 # compute effective ascender / descender
18256 ascsize = asc * fsize / (asc - dsc)
18257 dscsize = dsc * fsize / (asc - dsc)
18258 fflags = 0 # font flags
18259 mono = mupdf.fz_font_is_monospaced( span.font())
18260 fflags += mono * TEXT_FONT_MONOSPACED
18261 fflags += mupdf.fz_font_is_italic( span.font()) * TEXT_FONT_ITALIC
18262 fflags += mupdf.fz_font_is_serif( span.font()) * TEXT_FONT_SERIFED
18263 fflags += mupdf.fz_font_is_bold( span.font()) * TEXT_FONT_BOLD
18264
18265 last_adv = 0
18266
18267 # walk through characters of span
18268 span_bbox = mupdf.FzRect()
18269 rot = mupdf.fz_make_matrix(dir.x, dir.y, -dir.y, dir.x, 0, 0)
18270 if dir.x == -1: # left-right flip
18271 rot.d = 1
18272
18273 chars = []
18274 for i in range( span.m_internal.len):
18275 adv = 0
18276 if span.items(i).gid >= 0:
18277 adv = mupdf.fz_advance_glyph( span.font(), span.items(i).gid, span.m_internal.wmode)
18278 adv *= fsize
18279 last_adv = adv
18280 if span.items(i).ucs == 32:
18281 space_adv = adv
18282 char_orig = mupdf.fz_make_point(span.items(i).x, span.items(i).y)
18283 char_orig = mupdf.fz_transform_point(char_orig, ctm)
18284 m1 = mupdf.fz_make_matrix(1, 0, 0, 1, -char_orig.x, -char_orig.y)
18285 m1 = mupdf.fz_concat(m1, rot)
18286 m1 = mupdf.fz_concat(m1, mupdf.FzMatrix(1, 0, 0, 1, char_orig.x, char_orig.y))
18287 x0 = char_orig.x
18288 x1 = x0 + adv
18289 if (
18290 (mat.d > 0 and (dir.x == 1 or dir.x == -1))
18291 or
18292 (mat.b != 0 and mat.b == -mat.c)
18293 ): # up-down flip
18294 y0 = char_orig.y + dscsize
18295 y1 = char_orig.y + ascsize
18296 else:
18297 y0 = char_orig.y - ascsize
18298 y1 = char_orig.y - dscsize
18299 char_bbox = mupdf.fz_make_rect(x0, y0, x1, y1)
18300 char_bbox = mupdf.fz_transform_rect(char_bbox, m1)
18301 chars.append(
18302 (
18303 span.items(i).ucs,
18304 span.items(i).gid,
18305 (
18306 char_orig.x,
18307 char_orig.y,
18308 ),
18309 (
18310 char_bbox.x0,
18311 char_bbox.y0,
18312 char_bbox.x1,
18313 char_bbox.y1,
18314 ),
18315 )
18316 )
18317 if i > 0:
18318 span_bbox = mupdf.fz_union_rect(span_bbox, char_bbox)
18319 else:
18320 span_bbox = char_bbox
18321 chars = tuple(chars)
18322
18323 if not space_adv:
18324 if not (fflags & TEXT_FONT_MONOSPACED):
18325 c, out_font = mupdf.fz_encode_character_with_fallback( span.font(), 32, 0, 0)
18326 space_adv = mupdf.fz_advance_glyph(
18327 span.font(),
18328 c,
18329 span.m_internal.wmode,
18330 )
18331 space_adv *= fsize
18332 if not space_adv:
18333 space_adv = last_adv
18334 else:
18335 space_adv = last_adv # for mono, any char width suffices
18336
18337 # make the span dictionary
18338 span_dict = dict()
18339 span_dict[ 'dir'] = JM_py_from_point(dir)
18340 span_dict[ 'font'] = JM_EscapeStrFromStr(fontname)
18341 span_dict[ 'wmode'] = span.m_internal.wmode
18342 span_dict[ 'flags'] =fflags
18343 span_dict[ "bidi_lvl"] =span.m_internal.bidi_level
18344 span_dict[ "bidi_dir"] = span.m_internal.markup_dir
18345 span_dict[ 'ascender'] = asc
18346 span_dict[ 'descender'] = dsc
18347 span_dict[ 'colorspace'] = 3
18348
18349 if colorspace:
18350 rgb = mupdf.fz_convert_color(
18351 mupdf.FzColorspace( mupdf.ll_fz_keep_colorspace( colorspace)),
18352 color,
18353 mupdf.fz_device_rgb(),
18354 mupdf.FzColorspace(),
18355 mupdf.FzColorParams(),
18356 )
18357 rgb = rgb[:3] # mupdf.fz_convert_color() always returns 4 items.
18358 else:
18359 rgb = (0, 0, 0)
18360
18361 if dev.linewidth > 0: # width of character border
18362 linewidth = dev.linewidth
18363 else:
18364 linewidth = fsize * 0.05 # default: 5% of font size
18365 #log(f'{dev.linewidth=:.4f} {fsize=:.4f} {linewidth=:.4f}')
18366
18367 span_dict[ 'color'] = rgb
18368 span_dict[ 'size'] = fsize
18369 span_dict[ "opacity"] = alpha
18370 span_dict[ "linewidth"] = linewidth
18371 span_dict[ "spacewidth"] = space_adv
18372 span_dict[ 'type'] = type_
18373 span_dict[ 'bbox'] = JM_py_from_rect(span_bbox)
18374 span_dict[ 'layer'] = dev.layer_name
18375 span_dict[ "seqno"] = seqno
18376 span_dict[ 'chars'] = chars
18377 #log(f'{span_dict=}')
18378 dev.out.append( span_dict)
18379
18380
18381 def jm_lineart_color(colorspace, color):
18382 #log(f' ')
18383 if colorspace:
18384 try:
18385 # Need to be careful to use a named Python object to ensure
18386 # that the `params` we pass to mupdf.ll_fz_convert_color() is
18387 # valid. E.g. doing:
18388 #
18389 # rgb = mupdf.ll_fz_convert_color(..., mupdf.FzColorParams().internal())
18390 #
18391 # - seems to end up with a corrupted `params`.
18392 #
18393 cs = mupdf.FzColorspace( mupdf.FzColorspace.Fixed_RGB)
18394 cp = mupdf.FzColorParams()
18395 rgb = mupdf.ll_fz_convert_color(
18396 colorspace,
18397 color,
18398 cs.m_internal,
18399 None,
18400 cp.internal(),
18401 )
18402 except Exception:
18403 if g_exceptions_verbose: exception_info()
18404 raise
18405 return rgb[:3]
18406 return ()
18407
18408
18409 def jm_lineart_drop_device(dev, ctx):
18410 if isinstance(dev.out, list):
18411 dev.out = []
18412 dev.scissors = []
18413
18414
18415 def jm_lineart_fill_path( dev, ctx, path, even_odd, ctm, colorspace, color, alpha, color_params):
18416 #log(f'{getattr(dev, "pathdict", None)=}')
18417 #log(f'jm_lineart_fill_path(): {dev.seqno=}')
18418 even_odd = True if even_odd else False
18419 try:
18420 assert isinstance( ctm, mupdf.fz_matrix)
18421 dev.ctm = mupdf.FzMatrix( ctm) # fz_concat(ctm, dev_ptm);
18422 dev.path_type = trace_device_FILL_PATH
18423 jm_lineart_path( dev, ctx, path)
18424 if dev.pathdict is None:
18425 return
18426 #item_count = len(dev.pathdict[ dictkey_items])
18427 #if item_count == 0:
18428 # return
18429 dev.pathdict[ dictkey_type] ="f"
18430 dev.pathdict[ "even_odd"] = even_odd
18431 dev.pathdict[ "fill_opacity"] = alpha
18432 #log(f'setting dev.pathdict[ "closePath"] to false')
18433 #dev.pathdict[ "closePath"] = False
18434 dev.pathdict[ "fill"] = jm_lineart_color( colorspace, color)
18435 dev.pathdict[ dictkey_rect] = JM_py_from_rect(dev.pathrect)
18436 dev.pathdict[ "seqno"] = dev.seqno
18437 #jm_append_merge(dev)
18438 dev.pathdict[ 'layer'] = dev.layer_name
18439 if dev.clips:
18440 dev.pathdict[ 'level'] = dev.depth
18441 jm_append_merge(dev)
18442 dev.seqno += 1
18443 #log(f'jm_lineart_fill_path() end: {getattr(dev, "pathdict", None)=}')
18444 except Exception:
18445 if g_exceptions_verbose: exception_info()
18446 raise
18447
18448
18449 # There are 3 text trace types:
18450 # 0 - fill text (PDF Tr 0)
18451 # 1 - stroke text (PDF Tr 1)
18452 # 3 - ignore text (PDF Tr 3)
18453
18454 def jm_lineart_fill_text( dev, ctx, text, ctm, colorspace, color, alpha, color_params):
18455 if 0:
18456 log(f'{type(ctx)=} {ctx=}')
18457 log(f'{type(dev)=} {dev=}')
18458 log(f'{type(text)=} {text=}')
18459 log(f'{type(ctm)=} {ctm=}')
18460 log(f'{type(colorspace)=} {colorspace=}')
18461 log(f'{type(color)=} {color=}')
18462 log(f'{type(alpha)=} {alpha=}')
18463 log(f'{type(color_params)=} {color_params=}')
18464 jm_trace_text(dev, text, 0, ctm, colorspace, color, alpha, dev.seqno)
18465 dev.seqno += 1
18466
18467
18468 def jm_lineart_ignore_text(dev, text, ctm):
18469 #log(f'{getattr(dev, "pathdict", None)=}')
18470 jm_trace_text(dev, text, 3, ctm, None, None, 1, dev.seqno)
18471 dev.seqno += 1
18472
18473
18474 class Walker(mupdf.FzPathWalker2):
18475
18476 def __init__(self, dev):
18477 super().__init__()
18478 self.use_virtual_moveto()
18479 self.use_virtual_lineto()
18480 self.use_virtual_curveto()
18481 self.use_virtual_closepath()
18482 self.dev = dev
18483
18484 def closepath(self, ctx): # trace_close().
18485 #log(f'Walker(): {self.dev.pathdict=}')
18486 try:
18487 if self.dev.linecount == 3:
18488 if jm_checkrect(self.dev):
18489 #log(f'end1: {self.dev.pathdict=}')
18490 return
18491 self.dev.linecount = 0 # reset # of consec. lines
18492
18493 if self.dev.havemove:
18494 if self.dev.lastpoint != self.dev.firstpoint:
18495 item = ("l", JM_py_from_point(self.dev.lastpoint),
18496 JM_py_from_point(self.dev.firstpoint))
18497 self.dev.pathdict[dictkey_items].append(item)
18498 self.dev.lastpoint = self.dev.firstpoint
18499 self.dev.pathdict["closePath"] = False
18500
18501 else:
18502 #log('setting self.dev.pathdict[ "closePath"] to true')
18503 self.dev.pathdict[ "closePath"] = True
18504 #log(f'end2: {self.dev.pathdict=}')
18505
18506 self.dev.havemove = 0
18507
18508 except Exception:
18509 if g_exceptions_verbose: exception_info()
18510 raise
18511
18512 def curveto(self, ctx, x1, y1, x2, y2, x3, y3): # trace_curveto().
18513 #log(f'Walker(): {self.dev.pathdict=}')
18514 try:
18515 self.dev.linecount = 0 # reset # of consec. lines
18516 p1 = mupdf.fz_make_point(x1, y1)
18517 p2 = mupdf.fz_make_point(x2, y2)
18518 p3 = mupdf.fz_make_point(x3, y3)
18519 p1 = mupdf.fz_transform_point(p1, self.dev.ctm)
18520 p2 = mupdf.fz_transform_point(p2, self.dev.ctm)
18521 p3 = mupdf.fz_transform_point(p3, self.dev.ctm)
18522 self.dev.pathrect = mupdf.fz_include_point_in_rect(self.dev.pathrect, p1)
18523 self.dev.pathrect = mupdf.fz_include_point_in_rect(self.dev.pathrect, p2)
18524 self.dev.pathrect = mupdf.fz_include_point_in_rect(self.dev.pathrect, p3)
18525
18526 list_ = (
18527 "c",
18528 JM_py_from_point(self.dev.lastpoint),
18529 JM_py_from_point(p1),
18530 JM_py_from_point(p2),
18531 JM_py_from_point(p3),
18532 )
18533 self.dev.lastpoint = p3
18534 self.dev.pathdict[ dictkey_items].append( list_)
18535 except Exception:
18536 if g_exceptions_verbose: exception_info()
18537 raise
18538
18539 def lineto(self, ctx, x, y): # trace_lineto().
18540 #log(f'Walker(): {self.dev.pathdict=}')
18541 try:
18542 p1 = mupdf.fz_transform_point( mupdf.fz_make_point(x, y), self.dev.ctm)
18543 self.dev.pathrect = mupdf.fz_include_point_in_rect( self.dev.pathrect, p1)
18544 list_ = (
18545 'l',
18546 JM_py_from_point( self.dev.lastpoint),
18547 JM_py_from_point(p1),
18548 )
18549 self.dev.lastpoint = p1
18550 items = self.dev.pathdict[ dictkey_items]
18551 items.append( list_)
18552 self.dev.linecount += 1 # counts consecutive lines
18553 if self.dev.linecount == 4 and self.dev.path_type != trace_device_FILL_PATH:
18554 # shrink to "re" or "qu" item
18555 jm_checkquad(self.dev)
18556 except Exception:
18557 if g_exceptions_verbose: exception_info()
18558 raise
18559
18560 def moveto(self, ctx, x, y): # trace_moveto().
18561 if 0 and isinstance(self.dev.pathdict, dict):
18562 log(f'self.dev.pathdict:')
18563 for n, v in self.dev.pathdict.items():
18564 log( ' {type(n)=} {len(n)=} {n!r} {n}: {v!r}: {v}')
18565
18566 #log(f'Walker(): {type(self.dev.pathdict)=} {self.dev.pathdict=}')
18567
18568 try:
18569 #log( '{=dev.ctm type(dev.ctm)}')
18570 self.dev.lastpoint = mupdf.fz_transform_point(
18571 mupdf.fz_make_point(x, y),
18572 self.dev.ctm,
18573 )
18574 if mupdf.fz_is_infinite_rect( self.dev.pathrect):
18575 self.dev.pathrect = mupdf.fz_make_rect(
18576 self.dev.lastpoint.x,
18577 self.dev.lastpoint.y,
18578 self.dev.lastpoint.x,
18579 self.dev.lastpoint.y,
18580 )
18581 self.dev.firstpoint = self.dev.lastpoint
18582 self.dev.havemove = 1
18583 self.dev.linecount = 0 # reset # of consec. lines
18584 except Exception:
18585 if g_exceptions_verbose: exception_info()
18586 raise
18587
18588
18589 def jm_lineart_path(dev, ctx, path):
18590 '''
18591 Create the "items" list of the path dictionary
18592 * either create or empty the path dictionary
18593 * reset the end point of the path
18594 * reset count of consecutive lines
18595 * invoke fz_walk_path(), which create the single items
18596 * if no items detected, empty path dict again
18597 '''
18598 #log(f'{getattr(dev, "pathdict", None)=}')
18599 try:
18600 dev.pathrect = mupdf.FzRect( mupdf.FzRect.Fixed_INFINITE)
18601 dev.linecount = 0
18602 dev.lastpoint = mupdf.FzPoint( 0, 0)
18603 dev.pathdict = dict()
18604 dev.pathdict[ dictkey_items] = []
18605
18606 # First time we create a Walker instance is slow, e.g. 0.3s, then later
18607 # times run in around 0.01ms. If Walker is defined locally instead of
18608 # globally, each time takes 0.3s.
18609 #
18610 walker = Walker(dev)
18611 # Unlike fz_run_page(), fz_path_walker callbacks are not passed
18612 # a pointer to the struct, instead they get an arbitrary
18613 # void*. The underlying C++ Director callbacks use this void* to
18614 # identify the fz_path_walker instance so in turn we need to pass
18615 # arg=walker.m_internal.
18616 mupdf.fz_walk_path( mupdf.FzPath(mupdf.ll_fz_keep_path(path)), walker, walker.m_internal)
18617 # Check if any items were added ...
18618 if not dev.pathdict[ dictkey_items]:
18619 dev.pathdict = None
18620 except Exception:
18621 if g_exceptions_verbose: exception_info()
18622 raise
18623
18624
18625 def jm_lineart_stroke_path( dev, ctx, path, stroke, ctm, colorspace, color, alpha, color_params):
18626 #log(f'{dev.pathdict=} {dev.clips=}')
18627 try:
18628 assert isinstance( ctm, mupdf.fz_matrix)
18629 dev.pathfactor = 1
18630 if ctm.a != 0 and abs(ctm.a) == abs(ctm.d):
18631 dev.pathfactor = abs(ctm.a)
18632 elif ctm.b != 0 and abs(ctm.b) == abs(ctm.c):
18633 dev.pathfactor = abs(ctm.b)
18634 dev.ctm = mupdf.FzMatrix( ctm) # fz_concat(ctm, dev_ptm);
18635 dev.path_type = trace_device_STROKE_PATH
18636
18637 jm_lineart_path( dev, ctx, path)
18638 if dev.pathdict is None:
18639 return
18640 dev.pathdict[ dictkey_type] = 's'
18641 dev.pathdict[ 'stroke_opacity'] = alpha
18642 dev.pathdict[ 'color'] = jm_lineart_color( colorspace, color)
18643 dev.pathdict[ dictkey_width] = dev.pathfactor * stroke.linewidth
18644 dev.pathdict[ 'lineCap'] = (
18645 stroke.start_cap,
18646 stroke.dash_cap,
18647 stroke.end_cap,
18648 )
18649 dev.pathdict[ 'lineJoin'] = dev.pathfactor * stroke.linejoin
18650 if 'closePath' not in dev.pathdict:
18651 #log('setting dev.pathdict["closePath"] to false')
18652 dev.pathdict['closePath'] = False
18653
18654 # output the "dashes" string
18655 if stroke.dash_len:
18656 buff = mupdf.fz_new_buffer( 256)
18657 mupdf.fz_append_string( buff, "[ ") # left bracket
18658 for i in range( stroke.dash_len):
18659 # We use mupdf python's SWIG-generated floats_getitem() fn to
18660 # access float *stroke.dash_list[].
18661 value = mupdf.floats_getitem( stroke.dash_list, i) # stroke.dash_list[i].
18662 mupdf.fz_append_string( buff, f'{_format_g(dev.pathfactor * value)} ')
18663 mupdf.fz_append_string( buff, f'] {_format_g(dev.pathfactor * stroke.dash_phase)}')
18664 dev.pathdict[ 'dashes'] = buff
18665 else:
18666 dev.pathdict[ 'dashes'] = '[] 0'
18667 dev.pathdict[ dictkey_rect] = JM_py_from_rect(dev.pathrect)
18668 dev.pathdict['layer'] = dev.layer_name
18669 dev.pathdict[ 'seqno'] = dev.seqno
18670 if dev.clips:
18671 dev.pathdict[ 'level'] = dev.depth
18672 jm_append_merge(dev)
18673 dev.seqno += 1
18674
18675 except Exception:
18676 if g_exceptions_verbose: exception_info()
18677 raise
18678
18679
18680 def jm_lineart_clip_path(dev, ctx, path, even_odd, ctm, scissor):
18681 if not dev.clips:
18682 return
18683 dev.ctm = mupdf.FzMatrix(ctm) # fz_concat(ctm, trace_device_ptm);
18684 dev.path_type = trace_device_CLIP_PATH
18685 jm_lineart_path(dev, ctx, path)
18686 if dev.pathdict is None:
18687 return
18688 dev.pathdict[ dictkey_type] = 'clip'
18689 dev.pathdict[ 'even_odd'] = bool(even_odd)
18690 if 'closePath' not in dev.pathdict:
18691 #log(f'setting dev.pathdict["closePath"] to False')
18692 dev.pathdict['closePath'] = False
18693
18694 dev.pathdict['scissor'] = JM_py_from_rect(compute_scissor(dev))
18695 dev.pathdict['level'] = dev.depth
18696 dev.pathdict['layer'] = dev.layer_name
18697 jm_append_merge(dev)
18698 dev.depth += 1
18699
18700
18701 def jm_lineart_clip_stroke_path(dev, ctx, path, stroke, ctm, scissor):
18702 if not dev.clips:
18703 return
18704 dev.ctm = mupdf.FzMatrix(ctm) # fz_concat(ctm, trace_device_ptm);
18705 dev.path_type = trace_device_CLIP_STROKE_PATH
18706 jm_lineart_path(dev, ctx, path)
18707 if dev.pathdict is None:
18708 return
18709 dev.pathdict['dictkey_type'] = 'clip'
18710 dev.pathdict['even_odd'] = None
18711 if 'closePath' not in dev.pathdict:
18712 #log(f'setting dev.pathdict["closePath"] to False')
18713 dev.pathdict['closePath'] = False
18714 dev.pathdict['scissor'] = JM_py_from_rect(compute_scissor(dev))
18715 dev.pathdict['level'] = dev.depth
18716 dev.pathdict['layer'] = dev.layer_name
18717 jm_append_merge(dev)
18718 dev.depth += 1
18719
18720
18721 def jm_lineart_clip_stroke_text(dev, ctx, text, stroke, ctm, scissor):
18722 if not dev.clips:
18723 return
18724 compute_scissor(dev)
18725 dev.depth += 1
18726
18727
18728 def jm_lineart_clip_text(dev, ctx, text, ctm, scissor):
18729 if not dev.clips:
18730 return
18731 compute_scissor(dev)
18732 dev.depth += 1
18733
18734
18735 def jm_lineart_clip_image_mask( dev, ctx, image, ctm, scissor):
18736 if not dev.clips:
18737 return
18738 compute_scissor(dev)
18739 dev.depth += 1
18740
18741
18742 def jm_lineart_pop_clip(dev, ctx):
18743 if not dev.clips or not dev.scissors:
18744 return
18745 len_ = len(dev.scissors)
18746 if len_ < 1:
18747 return
18748 del dev.scissors[-1]
18749 dev.depth -= 1
18750
18751
18752 def jm_lineart_begin_layer(dev, ctx, name):
18753 if name:
18754 dev.layer_name = name
18755 else:
18756 dev.layer_name = ""
18757
18758
18759 def jm_lineart_end_layer(dev, ctx):
18760 dev.layer_name = ""
18761
18762
18763 def jm_lineart_begin_group(dev, ctx, bbox, cs, isolated, knockout, blendmode, alpha):
18764 #log(f'{dev.pathdict=} {dev.clips=}')
18765 if not dev.clips:
18766 return
18767 dev.pathdict = { # Py_BuildValue("{s:s,s:N,s:N,s:N,s:s,s:f,s:i,s:N}",
18768 "type": "group",
18769 "rect": JM_py_from_rect(bbox),
18770 "isolated": bool(isolated),
18771 "knockout": bool(knockout),
18772 "blendmode": mupdf.fz_blendmode_name(blendmode),
18773 "opacity": alpha,
18774 "level": dev.depth,
18775 "layer": dev.layer_name
18776 }
18777 jm_append_merge(dev)
18778 dev.depth += 1
18779
18780
18781 def jm_lineart_end_group(dev, ctx):
18782 #log(f'{dev.pathdict=} {dev.clips=}')
18783 if not dev.clips:
18784 return
18785 dev.depth -= 1
18786
18787
18788 def jm_lineart_stroke_text(dev, ctx, text, stroke, ctm, colorspace, color, alpha, color_params):
18789 jm_trace_text(dev, text, 1, ctm, colorspace, color, alpha, dev.seqno)
18790 dev.seqno += 1
18791
18792
18793 def jm_dev_linewidth( dev, ctx, path, stroke, matrix, colorspace, color, alpha, color_params):
18794 dev.linewidth = stroke.linewidth
18795 jm_increase_seqno( dev, ctx)
18796
18797
18798 def jm_increase_seqno( dev, ctx, *vargs):
18799 try:
18800 dev.seqno += 1
18801 except Exception:
18802 if g_exceptions_verbose: exception_info()
18803 raise
18804
18805
18806 def planish_line(p1: point_like, p2: point_like) -> Matrix:
18807 """Compute matrix which maps line from p1 to p2 to the x-axis, such that it
18808 maintains its length and p1 * matrix = Point(0, 0).
18809
18810 Args:
18811 p1, p2: point_like
18812 Returns:
18813 Matrix which maps p1 to Point(0, 0) and p2 to a point on the x axis at
18814 the same distance to Point(0,0). Will always combine a rotation and a
18815 transformation.
18816 """
18817 p1 = Point(p1)
18818 p2 = Point(p2)
18819 return Matrix(util_hor_matrix(p1, p2))
18820
18821
18822 class JM_image_reporter_Filter(mupdf.PdfFilterOptions2):
18823 def __init__(self):
18824 super().__init__()
18825 self.use_virtual_image_filter()
18826
18827 def image_filter( self, ctx, ctm, name, image):
18828 assert isinstance(ctm, mupdf.fz_matrix)
18829 JM_image_filter(self, mupdf.FzMatrix(ctm), name, image)
18830 if mupdf_cppyy:
18831 # cppyy doesn't appear to treat returned None as nullptr,
18832 # resulting in obscure 'python exception' exception.
18833 return 0
18834
18835
18836 class JM_new_bbox_device_Device(mupdf.FzDevice2):
18837 def __init__(self, result, layers):
18838 super().__init__()
18839 self.result = result
18840 self.layers = layers
18841 self.layer_name = ""
18842 self.use_virtual_fill_path()
18843 self.use_virtual_stroke_path()
18844 self.use_virtual_fill_text()
18845 self.use_virtual_stroke_text()
18846 self.use_virtual_ignore_text()
18847 self.use_virtual_fill_shade()
18848 self.use_virtual_fill_image()
18849 self.use_virtual_fill_image_mask()
18850
18851 self.use_virtual_begin_layer()
18852 self.use_virtual_end_layer()
18853
18854 begin_layer = jm_lineart_begin_layer
18855 end_layer = jm_lineart_end_layer
18856
18857 fill_path = jm_bbox_fill_path
18858 stroke_path = jm_bbox_stroke_path
18859 fill_text = jm_bbox_fill_text
18860 stroke_text = jm_bbox_stroke_text
18861 ignore_text = jm_bbox_ignore_text
18862 fill_shade = jm_bbox_fill_shade
18863 fill_image = jm_bbox_fill_image
18864 fill_image_mask = jm_bbox_fill_image_mask
18865
18866
18867 class JM_new_output_fileptr_Output(mupdf.FzOutput2):
18868 def __init__(self, bio):
18869 super().__init__()
18870 self.bio = bio
18871 self.use_virtual_write()
18872 self.use_virtual_seek()
18873 self.use_virtual_tell()
18874 self.use_virtual_truncate()
18875
18876 def seek( self, ctx, offset, whence):
18877 return self.bio.seek( offset, whence)
18878
18879 def tell( self, ctx):
18880 ret = self.bio.tell()
18881 return ret
18882
18883 def truncate( self, ctx):
18884 return self.bio.truncate()
18885
18886 def write(self, ctx, data_raw, data_length):
18887 data = mupdf.raw_to_python_bytes(data_raw, data_length)
18888 return self.bio.write(data)
18889
18890
18891 def compute_scissor(dev):
18892 '''
18893 Every scissor of a clip is a sub rectangle of the preceding clip scissor
18894 if the clip level is larger.
18895 '''
18896 if dev.scissors is None:
18897 dev.scissors = list()
18898 num_scissors = len(dev.scissors)
18899 if num_scissors > 0:
18900 last_scissor = dev.scissors[num_scissors-1]
18901 scissor = JM_rect_from_py(last_scissor)
18902 scissor = mupdf.fz_intersect_rect(scissor, dev.pathrect)
18903 else:
18904 scissor = dev.pathrect
18905 dev.scissors.append(JM_py_from_rect(scissor))
18906 return scissor
18907
18908
18909 class JM_new_lineart_device_Device(mupdf.FzDevice2):
18910 '''
18911 LINEART device for Python method Page.get_cdrawings()
18912 '''
18913 #log(f'JM_new_lineart_device_Device()')
18914 def __init__(self, out, clips, method):
18915 #log(f'JM_new_lineart_device_Device.__init__()')
18916 super().__init__()
18917 # fixme: this results in "Unexpected call of unimplemented virtual_fnptrs fn FzDevice2::drop_device().".
18918 #self.use_virtual_drop_device()
18919 self.use_virtual_fill_path()
18920 self.use_virtual_stroke_path()
18921 self.use_virtual_clip_path()
18922 self.use_virtual_clip_image_mask()
18923 self.use_virtual_clip_stroke_path()
18924 self.use_virtual_clip_stroke_text()
18925 self.use_virtual_clip_text()
18926
18927 self.use_virtual_fill_text
18928 self.use_virtual_stroke_text
18929 self.use_virtual_ignore_text
18930
18931 self.use_virtual_fill_shade()
18932 self.use_virtual_fill_image()
18933 self.use_virtual_fill_image_mask()
18934
18935 self.use_virtual_pop_clip()
18936
18937 self.use_virtual_begin_group()
18938 self.use_virtual_end_group()
18939
18940 self.use_virtual_begin_layer()
18941 self.use_virtual_end_layer()
18942
18943 self.out = out
18944 self.seqno = 0
18945 self.depth = 0
18946 self.clips = clips
18947 self.method = method
18948
18949 self.scissors = None
18950 self.layer_name = "" # optional content name
18951 self.pathrect = None
18952
18953 self.linewidth = 0
18954 self.ptm = mupdf.FzMatrix()
18955 self.ctm = mupdf.FzMatrix()
18956 self.rot = mupdf.FzMatrix()
18957 self.lastpoint = mupdf.FzPoint()
18958 self.firstpoint = mupdf.FzPoint()
18959 self.havemove = 0
18960 self.pathrect = mupdf.FzRect()
18961 self.pathfactor = 0
18962 self.linecount = 0
18963 self.path_type = 0
18964
18965 #drop_device = jm_lineart_drop_device
18966
18967 fill_path = jm_lineart_fill_path
18968 stroke_path = jm_lineart_stroke_path
18969 clip_image_mask = jm_lineart_clip_image_mask
18970 clip_path = jm_lineart_clip_path
18971 clip_stroke_path = jm_lineart_clip_stroke_path
18972 clip_text = jm_lineart_clip_text
18973 clip_stroke_text = jm_lineart_clip_stroke_text
18974
18975 fill_text = jm_increase_seqno
18976 stroke_text = jm_increase_seqno
18977 ignore_text = jm_increase_seqno
18978
18979 fill_shade = jm_increase_seqno
18980 fill_image = jm_increase_seqno
18981 fill_image_mask = jm_increase_seqno
18982
18983 pop_clip = jm_lineart_pop_clip
18984
18985 begin_group = jm_lineart_begin_group
18986 end_group = jm_lineart_end_group
18987
18988 begin_layer = jm_lineart_begin_layer
18989 end_layer = jm_lineart_end_layer
18990
18991
18992 class JM_new_texttrace_device(mupdf.FzDevice2):
18993 '''
18994 Trace TEXT device for Python method Page.get_texttrace()
18995 '''
18996
18997 def __init__(self, out):
18998 super().__init__()
18999 self.use_virtual_fill_path()
19000 self.use_virtual_stroke_path()
19001 self.use_virtual_fill_text()
19002 self.use_virtual_stroke_text()
19003 self.use_virtual_ignore_text()
19004 self.use_virtual_fill_shade()
19005 self.use_virtual_fill_image()
19006 self.use_virtual_fill_image_mask()
19007
19008 self.use_virtual_begin_layer()
19009 self.use_virtual_end_layer()
19010
19011 self.out = out
19012
19013 self.seqno = 0
19014 self.depth = 0
19015 self.clips = 0
19016 self.method = None
19017
19018 self.seqno = 0
19019
19020 self.pathdict = dict()
19021 self.scissors = list()
19022 self.linewidth = 0
19023 self.ptm = mupdf.FzMatrix()
19024 self.ctm = mupdf.FzMatrix()
19025 self.rot = mupdf.FzMatrix()
19026 self.lastpoint = mupdf.FzPoint()
19027 self.pathrect = mupdf.FzRect()
19028 self.pathfactor = 0
19029 self.linecount = 0
19030 self.path_type = 0
19031 self.layer_name = ""
19032
19033 fill_path = jm_increase_seqno
19034 stroke_path = jm_dev_linewidth
19035 fill_text = jm_lineart_fill_text
19036 stroke_text = jm_lineart_stroke_text
19037 ignore_text = jm_lineart_ignore_text
19038 fill_shade = jm_increase_seqno
19039 fill_image = jm_increase_seqno
19040 fill_image_mask = jm_increase_seqno
19041
19042 begin_layer = jm_lineart_begin_layer
19043 end_layer = jm_lineart_end_layer
19044
19045
19046 def ConversionHeader(i: str, filename: OptStr ="unknown"):
19047 t = i.lower()
19048 import textwrap
19049 html = textwrap.dedent("""
19050 <!DOCTYPE html>
19051 <html>
19052 <head>
19053 <style>
19054 body{background-color:gray}
19055 div{position:relative;background-color:white;margin:1em auto}
19056 p{position:absolute;margin:0}
19057 img{position:absolute}
19058 </style>
19059 </head>
19060 <body>
19061 """)
19062
19063 xml = textwrap.dedent("""
19064 <?xml version="1.0"?>
19065 <document name="%s">
19066 """
19067 % filename
19068 )
19069
19070 xhtml = textwrap.dedent("""
19071 <?xml version="1.0"?>
19072 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
19073 <html xmlns="http://www.w3.org/1999/xhtml">
19074 <head>
19075 <style>
19076 body{background-color:gray}
19077 div{background-color:white;margin:1em;padding:1em}
19078 p{white-space:pre-wrap}
19079 </style>
19080 </head>
19081 <body>
19082 """)
19083
19084 text = ""
19085 json = '{"document": "%s", "pages": [\n' % filename
19086 if t == "html":
19087 r = html
19088 elif t == "json":
19089 r = json
19090 elif t == "xml":
19091 r = xml
19092 elif t == "xhtml":
19093 r = xhtml
19094 else:
19095 r = text
19096
19097 return r
19098
19099
19100 def ConversionTrailer(i: str):
19101 t = i.lower()
19102 text = ""
19103 json = "]\n}"
19104 html = "</body>\n</html>\n"
19105 xml = "</document>\n"
19106 xhtml = html
19107 if t == "html":
19108 r = html
19109 elif t == "json":
19110 r = json
19111 elif t == "xml":
19112 r = xml
19113 elif t == "xhtml":
19114 r = xhtml
19115 else:
19116 r = text
19117
19118 return r
19119
19120
19121 def annot_preprocess(page: "Page") -> int:
19122 """Prepare for annotation insertion on the page.
19123
19124 Returns:
19125 Old page rotation value. Temporarily sets rotation to 0 when required.
19126 """
19127 CheckParent(page)
19128 if not page.parent.is_pdf:
19129 raise ValueError("is no PDF")
19130 old_rotation = page.rotation
19131 if old_rotation != 0:
19132 page.set_rotation(0)
19133 return old_rotation
19134
19135
19136 def annot_postprocess(page: "Page", annot: "Annot") -> None:
19137 """Clean up after annotation insertion.
19138
19139 Set ownership flag and store annotation in page annotation dictionary.
19140 """
19141 #annot.parent = weakref.proxy(page)
19142 assert isinstance( page, Page)
19143 assert isinstance( annot, Annot)
19144 annot.parent = page
19145 page._annot_refs[id(annot)] = annot
19146 annot.thisown = True
19147
19148
19149 def canon(c):
19150 assert isinstance(c, int)
19151 # TODO: proper unicode case folding
19152 # TODO: character equivalence (a matches ä, etc)
19153 if c == 0xA0 or c == 0x2028 or c == 0x2029:
19154 return ord(' ')
19155 if c == ord('\r') or c == ord('\n') or c == ord('\t'):
19156 return ord(' ')
19157 if c >= ord('A') and c <= ord('Z'):
19158 return c - ord('A') + ord('a')
19159 return c
19160
19161
19162 def chartocanon(s):
19163 assert isinstance(s, str)
19164 n, c = mupdf.fz_chartorune(s)
19165 c = canon(c)
19166 return n, c
19167
19168
19169 def dest_is_valid(o, page_count, page_object_nums, names_list):
19170 p = mupdf.pdf_dict_get( o, PDF_NAME('A'))
19171 if (
19172 mupdf.pdf_name_eq(
19173 mupdf.pdf_dict_get( p, PDF_NAME('S')),
19174 PDF_NAME('GoTo')
19175 )
19176 and not string_in_names_list(
19177 mupdf.pdf_dict_get( p, PDF_NAME('D')),
19178 names_list
19179 )
19180 ):
19181 return 0
19182
19183 p = mupdf.pdf_dict_get( o, PDF_NAME('Dest'))
19184 if not p.m_internal:
19185 pass
19186 elif mupdf.pdf_is_string( p):
19187 return string_in_names_list( p, names_list)
19188 elif not dest_is_valid_page(
19189 mupdf.pdf_array_get( p, 0),
19190 page_object_nums,
19191 page_count,
19192 ):
19193 return 0
19194 return 1
19195
19196
19197 def dest_is_valid_page(obj, page_object_nums, pagecount):
19198 num = mupdf.pdf_to_num(obj)
19199
19200 if num == 0:
19201 return 0
19202 for i in range(pagecount):
19203 if page_object_nums[i] == num:
19204 return 1
19205 return 0
19206
19207
19208 def find_string(s, needle):
19209 assert isinstance(s, str)
19210 for i in range(len(s)):
19211 end = match_string(s[i:], needle)
19212 if end is not None:
19213 end += i
19214 return i, end
19215 return None, None
19216
19217
19218 def get_pdf_now() -> str:
19219 '''
19220 "Now" timestamp in PDF Format
19221 '''
19222 import time
19223 tz = "%s'%s'" % (
19224 str(abs(time.altzone // 3600)).rjust(2, "0"),
19225 str((abs(time.altzone // 60) % 60)).rjust(2, "0"),
19226 )
19227 tstamp = time.strftime("D:%Y%m%d%H%M%S", time.localtime())
19228 if time.altzone > 0:
19229 tstamp += "-" + tz
19230 elif time.altzone < 0:
19231 tstamp += "+" + tz
19232 else:
19233 pass
19234 return tstamp
19235
19236
19237 class ElementPosition(object):
19238 """Convert a dictionary with element position information to an object."""
19239
19240 def __init__(self):
19241 pass
19242
19243
19244 def make_story_elpos():
19245 return ElementPosition()
19246
19247
19248 def get_highlight_selection(page, start: point_like =None, stop: point_like =None, clip: rect_like =None) -> list:
19249 """Return rectangles of text lines between two points.
19250
19251 Notes:
19252 The default of 'start' is top-left of 'clip'. The default of 'stop'
19253 is bottom-reight of 'clip'.
19254
19255 Args:
19256 start: start point_like
19257 stop: end point_like, must be 'below' start
19258 clip: consider this rect_like only, default is page rectangle
19259 Returns:
19260 List of line bbox intersections with the area established by the
19261 parameters.
19262 """
19263 # validate and normalize arguments
19264 if clip is None:
19265 clip = page.rect
19266 clip = Rect(clip)
19267 if start is None:
19268 start = clip.tl
19269 if stop is None:
19270 stop = clip.br
19271 clip.y0 = start.y
19272 clip.y1 = stop.y
19273 if clip.is_empty or clip.is_infinite:
19274 return []
19275
19276 # extract text of page, clip only, no images, expand ligatures
19277 blocks = page.get_text(
19278 "dict", flags=0, clip=clip,
19279 )["blocks"]
19280
19281 lines = [] # will return this list of rectangles
19282 for b in blocks:
19283 bbox = Rect(b["bbox"])
19284 if bbox.is_infinite or bbox.is_empty:
19285 continue
19286 for line in b["lines"]:
19287 bbox = Rect(line["bbox"])
19288 if bbox.is_infinite or bbox.is_empty:
19289 continue
19290 lines.append(bbox)
19291
19292 if lines == []: # did not select anything
19293 return lines
19294
19295 lines.sort(key=lambda bbox: bbox.y1) # sort by vertical positions
19296
19297 # cut off prefix from first line if start point is close to its top
19298 bboxf = lines.pop(0)
19299 if bboxf.y0 - start.y <= 0.1 * bboxf.height: # close enough?
19300 r = Rect(start.x, bboxf.y0, bboxf.br) # intersection rectangle
19301 if not (r.is_empty or r.is_infinite):
19302 lines.insert(0, r) # insert again if not empty
19303 else:
19304 lines.insert(0, bboxf) # insert again
19305
19306 if lines == []: # the list might have been emptied
19307 return lines
19308
19309 # cut off suffix from last line if stop point is close to its bottom
19310 bboxl = lines.pop()
19311 if stop.y - bboxl.y1 <= 0.1 * bboxl.height: # close enough?
19312 r = Rect(bboxl.tl, stop.x, bboxl.y1) # intersection rectangle
19313 if not (r.is_empty or r.is_infinite):
19314 lines.append(r) # append if not empty
19315 else:
19316 lines.append(bboxl) # append again
19317
19318 return lines
19319
19320
19321 def glyph_name_to_unicode(name: str) -> int:
19322 """Convenience function accessing unicodedata."""
19323 import unicodedata
19324 try:
19325 unc = ord(unicodedata.lookup(name))
19326 except Exception:
19327 unc = 65533
19328 return unc
19329
19330
19331 def hdist(dir, a, b):
19332 dx = b.x - a.x
19333 dy = b.y - a.y
19334 return mupdf.fz_abs(dx * dir.x + dy * dir.y)
19335
19336
19337 def make_table(rect: rect_like =(0, 0, 1, 1), cols: int =1, rows: int =1) -> list:
19338 """Return a list of (rows x cols) equal sized rectangles.
19339
19340 Notes:
19341 A utility to fill a given area with table cells of equal size.
19342 Args:
19343 rect: rect_like to use as the table area
19344 rows: number of rows
19345 cols: number of columns
19346 Returns:
19347 A list with <rows> items, where each item is a list of <cols>
19348 PyMuPDF Rect objects of equal sizes.
19349 """
19350 rect = Rect(rect) # ensure this is a Rect
19351 if rect.is_empty or rect.is_infinite:
19352 raise ValueError("rect must be finite and not empty")
19353 tl = rect.tl
19354
19355 height = rect.height / rows # height of one table cell
19356 width = rect.width / cols # width of one table cell
19357 delta_h = (width, 0, width, 0) # diff to next right rect
19358 delta_v = (0, height, 0, height) # diff to next lower rect
19359
19360 r = Rect(tl, tl.x + width, tl.y + height) # first rectangle
19361
19362 # make the first row
19363 row = [r]
19364 for i in range(1, cols):
19365 r += delta_h # build next rect to the right
19366 row.append(r)
19367
19368 # make result, starts with first row
19369 rects = [row]
19370 for i in range(1, rows):
19371 row = rects[i - 1] # take previously appended row
19372 nrow = [] # the new row to append
19373 for r in row: # for each previous cell add its downward copy
19374 nrow.append(r + delta_v)
19375 rects.append(nrow) # append new row to result
19376
19377 return rects
19378
19379
19380 def util_ensure_widget_calc(annot):
19381 '''
19382 Ensure that widgets with /AA/C JavaScript are in array AcroForm/CO
19383 '''
19384 annot_obj = mupdf.pdf_annot_obj(annot.this)
19385 pdf = mupdf.pdf_get_bound_document(annot_obj)
19386 PDFNAME_CO = mupdf.pdf_new_name("CO") # = PDF_NAME(CO)
19387 acro = mupdf.pdf_dict_getl( # get AcroForm dict
19388 mupdf.pdf_trailer(pdf),
19389 PDF_NAME('Root'),
19390 PDF_NAME('AcroForm'),
19391 )
19392
19393 CO = mupdf.pdf_dict_get(acro, PDFNAME_CO) # = AcroForm/CO
19394 if not mupdf.pdf_is_array(CO):
19395 CO = mupdf.pdf_dict_put_array(acro, PDFNAME_CO, 2)
19396 n = mupdf.pdf_array_len(CO)
19397 found = 0
19398 xref = mupdf.pdf_to_num(annot_obj)
19399 for i in range(n):
19400 nxref = mupdf.pdf_to_num(mupdf.pdf_array_get(CO, i))
19401 if xref == nxref:
19402 found = 1
19403 break
19404 if not found:
19405 mupdf.pdf_array_push(CO, mupdf.pdf_new_indirect(pdf, xref, 0))
19406
19407
19408 def util_make_rect( *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None):
19409 '''
19410 Helper for initialising rectangle classes.
19411
19412 2022-09-02: This is quite different from PyMuPDF's util_make_rect(), which
19413 uses `goto` in ways that don't easily translate to Python.
19414
19415 Returns (x0, y0, x1, y1) derived from <args>, then override with p0, p1,
19416 x0, y0, x1, y1 if they are not None.
19417
19418 Accepts following forms for <args>:
19419 () returns all zeros.
19420 (top-left, bottom-right)
19421 (top-left, x1, y1)
19422 (x0, y0, bottom-right)
19423 (x0, y0, x1, y1)
19424 (rect)
19425
19426 Where top-left and bottom-right are (x, y) or something with .x, .y
19427 members; rect is something with .x0, .y0, .x1, and .y1 members.
19428
19429 2023-11-18: we now override with p0, p1, x0, y0, x1, y1 if not None.
19430 '''
19431 def get_xy( arg):
19432 if isinstance( arg, (list, tuple)) and len( arg) == 2:
19433 return arg[0], arg[1]
19434 if isinstance( arg, (Point, mupdf.FzPoint, mupdf.fz_point)):
19435 return arg.x, arg.y
19436 return None, None
19437 def make_tuple( a):
19438 if isinstance( a, tuple):
19439 return a
19440 if isinstance( a, Point):
19441 return a.x, a.y
19442 elif isinstance( a, (Rect, IRect, mupdf.FzRect, mupdf.fz_rect)):
19443 return a.x0, a.y0, a.x1, a.y1
19444 if not isinstance( a, (list, tuple)):
19445 a = a,
19446 return a
19447 def handle_args():
19448 if len(args) == 0:
19449 return 0, 0, 0, 0
19450 elif len(args) == 1:
19451 arg = args[0]
19452 if isinstance( arg, (list, tuple)) and len( arg) == 2:
19453 p1, p2 = arg
19454 ret = *p1, *p2
19455 assert len(ret) == 4
19456 return ret
19457 if isinstance( arg, (list, tuple)) and len( arg) == 3:
19458 a, b, c = arg
19459 a = make_tuple(a)
19460 b = make_tuple(b)
19461 c = make_tuple(c)
19462 ret = *a, *b, *c
19463 assert len(ret) == 4
19464 return ret
19465 ret = make_tuple( arg)
19466 assert len(ret) == 4, f'{arg=} {ret=}'
19467 return ret
19468 elif len(args) == 2:
19469 ret = get_xy( args[0]) + get_xy( args[1])
19470 assert len(ret) == 4
19471 return ret
19472 elif len(args) == 3:
19473 x0, y0 = get_xy( args[0])
19474 if (x0, y0) != (None, None):
19475 return x0, y0, args[1], args[2]
19476 x1, y1 = get_xy( args[2])
19477 if (x1, y1) != (None, None):
19478 return args[0], args[1], x1, y1
19479 elif len(args) == 4:
19480 return args[0], args[1], args[2], args[3]
19481 raise Exception( f'Unrecognised args: {args}')
19482 ret_x0, ret_y0, ret_x1, ret_y1 = handle_args()
19483 if p0 is not None: ret_x0, ret_y0 = get_xy(p0)
19484 if p1 is not None: ret_x1, ret_y1 = get_xy(p1)
19485 if x0 is not None: ret_x0 = x0
19486 if y0 is not None: ret_y0 = y0
19487 if x1 is not None: ret_x1 = x1
19488 if y1 is not None: ret_y1 = y1
19489 return ret_x0, ret_y0, ret_x1, ret_y1
19490
19491
19492 def util_make_irect( *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None):
19493 a, b, c, d = util_make_rect( *args, p0=p0, p1=p1, x0=x0, y0=y0, x1=x1, y1=y1)
19494 def convert(x, ceil):
19495 if ceil:
19496 return int(math.ceil(x))
19497 else:
19498 return int(math.floor(x))
19499 a = convert(a, False)
19500 b = convert(b, False)
19501 c = convert(c, True)
19502 d = convert(d, True)
19503 return a, b, c, d
19504
19505
19506 def util_round_rect( rect):
19507 return JM_py_from_irect(mupdf.fz_round_rect(JM_rect_from_py(rect)))
19508
19509
19510 def util_transform_rect( rect, matrix):
19511 if g_use_extra:
19512 return extra.util_transform_rect( rect, matrix)
19513 return JM_py_from_rect(mupdf.fz_transform_rect(JM_rect_from_py(rect), JM_matrix_from_py(matrix)))
19514
19515
19516 def util_intersect_rect( r1, r2):
19517 return JM_py_from_rect(
19518 mupdf.fz_intersect_rect(
19519 JM_rect_from_py(r1),
19520 JM_rect_from_py(r2),
19521 )
19522 )
19523
19524
19525 def util_is_point_in_rect( p, r):
19526 return mupdf.fz_is_point_inside_rect(
19527 JM_point_from_py(p),
19528 JM_rect_from_py(r),
19529 )
19530
19531 def util_include_point_in_rect( r, p):
19532 return JM_py_from_rect(
19533 mupdf.fz_include_point_in_rect(
19534 JM_rect_from_py(r),
19535 JM_point_from_py(p),
19536 )
19537 )
19538
19539
19540 def util_point_in_quad( P, Q):
19541 p = JM_point_from_py(P)
19542 q = JM_quad_from_py(Q)
19543 return mupdf.fz_is_point_inside_quad(p, q)
19544
19545
19546 def util_transform_point( point, matrix):
19547 return JM_py_from_point(
19548 mupdf.fz_transform_point(
19549 JM_point_from_py(point),
19550 JM_matrix_from_py(matrix),
19551 )
19552 )
19553
19554
19555 def util_union_rect( r1, r2):
19556 return JM_py_from_rect(
19557 mupdf.fz_union_rect(
19558 JM_rect_from_py(r1),
19559 JM_rect_from_py(r2),
19560 )
19561 )
19562
19563
19564 def util_concat_matrix( m1, m2):
19565 return JM_py_from_matrix(
19566 mupdf.fz_concat(
19567 JM_matrix_from_py(m1),
19568 JM_matrix_from_py(m2),
19569 )
19570 )
19571
19572
19573 def util_invert_matrix(matrix):
19574 if 0:
19575 # Use MuPDF's fz_invert_matrix().
19576 if isinstance( matrix, (tuple, list)):
19577 matrix = mupdf.FzMatrix( *matrix)
19578 elif isinstance( matrix, mupdf.fz_matrix):
19579 matrix = mupdf.FzMatrix( matrix)
19580 elif isinstance( matrix, Matrix):
19581 matrix = mupdf.FzMatrix( matrix.a, matrix.b, matrix.c, matrix.d, matrix.e, matrix.f)
19582 assert isinstance( matrix, mupdf.FzMatrix), f'{type(matrix)=}: {matrix}'
19583 ret = mupdf.fz_invert_matrix( matrix)
19584 if ret == matrix and (0
19585 or abs( matrix.a - 1) >= sys.float_info.epsilon
19586 or abs( matrix.b - 0) >= sys.float_info.epsilon
19587 or abs( matrix.c - 0) >= sys.float_info.epsilon
19588 or abs( matrix.d - 1) >= sys.float_info.epsilon
19589 ):
19590 # Inversion not possible.
19591 return 1, ()
19592 return 0, (ret.a, ret.b, ret.c, ret.d, ret.e, ret.f)
19593 # Do inversion in python.
19594 src = JM_matrix_from_py(matrix)
19595 a = src.a
19596 det = a * src.d - src.b * src.c
19597 if det < -sys.float_info.epsilon or det > sys.float_info.epsilon:
19598 dst = mupdf.FzMatrix()
19599 rdet = 1 / det
19600 dst.a = src.d * rdet
19601 dst.b = -src.b * rdet
19602 dst.c = -src.c * rdet
19603 dst.d = a * rdet
19604 a = -src.e * dst.a - src.f * dst.c
19605 dst.f = -src.e * dst.b - src.f * dst.d
19606 dst.e = a
19607 return 0, (dst.a, dst.b, dst.c, dst.d, dst.e, dst.f)
19608
19609 return 1, ()
19610
19611
19612 def util_measure_string( text, fontname, fontsize, encoding):
19613 font = mupdf.fz_new_base14_font(fontname)
19614 w = 0
19615 pos = 0
19616 while pos < len(text):
19617 t, c = mupdf.fz_chartorune(text[pos:])
19618 pos += t
19619 if encoding == mupdf.PDF_SIMPLE_ENCODING_GREEK:
19620 c = mupdf.fz_iso8859_7_from_unicode(c)
19621 elif encoding == mupdf.PDF_SIMPLE_ENCODING_CYRILLIC:
19622 c = mupdf.fz_windows_1251_from_unicode(c)
19623 else:
19624 c = mupdf.fz_windows_1252_from_unicode(c)
19625 if c < 0:
19626 c = 0xB7
19627 g = mupdf.fz_encode_character(font, c)
19628 dw = mupdf.fz_advance_glyph(font, g, 0)
19629 w += dw
19630 ret = w * fontsize
19631 return ret
19632
19633
19634 def util_sine_between(C, P, Q):
19635 # for points C, P, Q compute the sine between lines CP and QP
19636 c = JM_point_from_py(C)
19637 p = JM_point_from_py(P)
19638 q = JM_point_from_py(Q)
19639 s = mupdf.fz_normalize_vector(mupdf.fz_make_point(q.x - p.x, q.y - p.y))
19640 m1 = mupdf.fz_make_matrix(1, 0, 0, 1, -p.x, -p.y)
19641 m2 = mupdf.fz_make_matrix(s.x, -s.y, s.y, s.x, 0, 0)
19642 m1 = mupdf.fz_concat(m1, m2)
19643 c = mupdf.fz_transform_point(c, m1)
19644 c = mupdf.fz_normalize_vector(c)
19645 return c.y
19646
19647
19648 def util_hor_matrix(C, P):
19649 '''
19650 Return the matrix that maps two points C, P to the x-axis such that
19651 C -> (0,0) and the image of P have the same distance.
19652 '''
19653 c = JM_point_from_py(C)
19654 p = JM_point_from_py(P)
19655
19656 # compute (cosine, sine) of vector P-C with double precision:
19657 s = mupdf.fz_normalize_vector(mupdf.fz_make_point(p.x - c.x, p.y - c.y))
19658
19659 m1 = mupdf.fz_make_matrix(1, 0, 0, 1, -c.x, -c.y)
19660 m2 = mupdf.fz_make_matrix(s.x, -s.y, s.y, s.x, 0, 0)
19661 return JM_py_from_matrix(mupdf.fz_concat(m1, m2))
19662
19663
19664 def match_string(h0, n0):
19665 h = 0
19666 n = 0
19667 e = h
19668 delta_h, hc = chartocanon(h0[h:])
19669 h += delta_h
19670 delta_n, nc = chartocanon(n0[n:])
19671 n += delta_n
19672 while hc == nc:
19673 e = h
19674 if hc == ord(' '):
19675 while 1:
19676 delta_h, hc = chartocanon(h0[h:])
19677 h += delta_h
19678 if hc != ord(' '):
19679 break
19680 else:
19681 delta_h, hc = chartocanon(h0[h:])
19682 h += delta_h
19683 if nc == ord(' '):
19684 while 1:
19685 delta_n, nc = chartocanon(n0[n:])
19686 n += delta_n
19687 if nc != ord(' '):
19688 break
19689 else:
19690 delta_n, nc = chartocanon(n0[n:])
19691 n += delta_n
19692 return None if nc != 0 else e
19693
19694
19695 def on_highlight_char(hits, line, ch):
19696 assert hits
19697 assert isinstance(line, mupdf.FzStextLine)
19698 assert isinstance(ch, mupdf.FzStextChar)
19699 vfuzz = ch.m_internal.size * hits.vfuzz
19700 hfuzz = ch.m_internal.size * hits.hfuzz
19701 ch_quad = JM_char_quad(line, ch)
19702 if hits.len > 0:
19703 # fixme: end = hits.quads[-1]
19704 quad = hits.quads[hits.len - 1]
19705 end = JM_quad_from_py(quad)
19706 if ( 1
19707 and hdist(line.m_internal.dir, end.lr, ch_quad.ll) < hfuzz
19708 and vdist(line.m_internal.dir, end.lr, ch_quad.ll) < vfuzz
19709 and hdist(line.m_internal.dir, end.ur, ch_quad.ul) < hfuzz
19710 and vdist(line.m_internal.dir, end.ur, ch_quad.ul) < vfuzz
19711 ):
19712 end.ur = ch_quad.ur
19713 end.lr = ch_quad.lr
19714 assert hits.quads[-1] == end
19715 return
19716 hits.quads.append(ch_quad)
19717 hits.len += 1
19718
19719
19720 def page_merge(doc_des, doc_src, page_from, page_to, rotate, links, copy_annots, graft_map):
19721 '''
19722 Deep-copies a source page to the target.
19723 Modified version of function of pdfmerge.c: we also copy annotations, but
19724 we skip some subtypes. In addition we rotate output.
19725 '''
19726 if g_use_extra:
19727 #log( 'Calling C++ extra.page_merge()')
19728 return extra.page_merge( doc_des, doc_src, page_from, page_to, rotate, links, copy_annots, graft_map)
19729
19730 # list of object types (per page) we want to copy
19731 known_page_objs = [
19732 PDF_NAME('Contents'),
19733 PDF_NAME('Resources'),
19734 PDF_NAME('MediaBox'),
19735 PDF_NAME('CropBox'),
19736 PDF_NAME('BleedBox'),
19737 PDF_NAME('TrimBox'),
19738 PDF_NAME('ArtBox'),
19739 PDF_NAME('Rotate'),
19740 PDF_NAME('UserUnit'),
19741 ]
19742 page_ref = mupdf.pdf_lookup_page_obj(doc_src, page_from)
19743
19744 # make new page dict in dest doc
19745 page_dict = mupdf.pdf_new_dict(doc_des, 4)
19746 mupdf.pdf_dict_put(page_dict, PDF_NAME('Type'), PDF_NAME('Page'))
19747
19748 # copy objects of source page into it
19749 for i in range( len(known_page_objs)):
19750 obj = mupdf.pdf_dict_get_inheritable( page_ref, known_page_objs[i])
19751 if obj.m_internal:
19752 #log( '{=type(graft_map) type(graft_map.this)}')
19753 mupdf.pdf_dict_put( page_dict, known_page_objs[i], mupdf.pdf_graft_mapped_object(graft_map.this, obj))
19754
19755 # Copy annotations, but skip Link, Popup, IRT, Widget types
19756 # If selected, remove dict keys P (parent) and Popup
19757 if copy_annots:
19758 old_annots = mupdf.pdf_dict_get( page_ref, PDF_NAME('Annots'))
19759 n = mupdf.pdf_array_len( old_annots)
19760 if n > 0:
19761 new_annots = mupdf.pdf_dict_put_array( page_dict, PDF_NAME('Annots'), n)
19762 for i in range(n):
19763 o = mupdf.pdf_array_get( old_annots, i)
19764 if not o.m_internal or not mupdf.pdf_is_dict(o):
19765 continue # skip non-dict items
19766 if mupdf.pdf_dict_gets( o, "IRT").m_internal:
19767 continue
19768 subtype = mupdf.pdf_dict_get( o, PDF_NAME('Subtype'))
19769 if mupdf.pdf_name_eq( subtype, PDF_NAME('Link')):
19770 continue
19771 if mupdf.pdf_name_eq( subtype, PDF_NAME('Popup')):
19772 continue
19773 if mupdf.pdf_name_eq(subtype, PDF_NAME('Widget')):
19774 continue
19775 mupdf.pdf_dict_del( o, PDF_NAME('Popup'))
19776 mupdf.pdf_dict_del( o, PDF_NAME('P'))
19777 copy_o = mupdf.pdf_graft_mapped_object( graft_map.this, o)
19778 annot = mupdf.pdf_new_indirect( doc_des, mupdf.pdf_to_num( copy_o), 0)
19779 mupdf.pdf_array_push( new_annots, annot)
19780
19781 # rotate the page
19782 if rotate != -1:
19783 mupdf.pdf_dict_put_int( page_dict, PDF_NAME('Rotate'), rotate)
19784 # Now add the page dictionary to dest PDF
19785 ref = mupdf.pdf_add_object( doc_des, page_dict)
19786
19787 # Insert new page at specified location
19788 mupdf.pdf_insert_page( doc_des, page_to, ref)
19789
19790
19791 def paper_rect(s: str) -> Rect:
19792 """Return a Rect for the paper size indicated in string 's'. Must conform to the argument of method 'PaperSize', which will be invoked.
19793 """
19794 width, height = paper_size(s)
19795 return Rect(0.0, 0.0, width, height)
19796
19797
19798 def paper_size(s: str) -> tuple:
19799 """Return a tuple (width, height) for a given paper format string.
19800
19801 Notes:
19802 'A4-L' will return (842, 595), the values for A4 landscape.
19803 Suffix '-P' and no suffix return the portrait tuple.
19804 """
19805 size = s.lower()
19806 f = "p"
19807 if size.endswith("-l"):
19808 f = "l"
19809 size = size[:-2]
19810 if size.endswith("-p"):
19811 size = size[:-2]
19812 rc = paper_sizes().get(size, (-1, -1))
19813 if f == "p":
19814 return rc
19815 return (rc[1], rc[0])
19816
19817
19818 def paper_sizes():
19819 """Known paper formats @ 72 dpi as a dictionary. Key is the format string
19820 like "a4" for ISO-A4. Value is the tuple (width, height).
19821
19822 Information taken from the following web sites:
19823 www.din-formate.de
19824 www.din-formate.info/amerikanische-formate.html
19825 www.directtools.de/wissen/normen/iso.htm
19826 """
19827 return {
19828 "a0": (2384, 3370),
19829 "a1": (1684, 2384),
19830 "a10": (74, 105),
19831 "a2": (1191, 1684),
19832 "a3": (842, 1191),
19833 "a4": (595, 842),
19834 "a5": (420, 595),
19835 "a6": (298, 420),
19836 "a7": (210, 298),
19837 "a8": (147, 210),
19838 "a9": (105, 147),
19839 "b0": (2835, 4008),
19840 "b1": (2004, 2835),
19841 "b10": (88, 125),
19842 "b2": (1417, 2004),
19843 "b3": (1001, 1417),
19844 "b4": (709, 1001),
19845 "b5": (499, 709),
19846 "b6": (354, 499),
19847 "b7": (249, 354),
19848 "b8": (176, 249),
19849 "b9": (125, 176),
19850 "c0": (2599, 3677),
19851 "c1": (1837, 2599),
19852 "c10": (79, 113),
19853 "c2": (1298, 1837),
19854 "c3": (918, 1298),
19855 "c4": (649, 918),
19856 "c5": (459, 649),
19857 "c6": (323, 459),
19858 "c7": (230, 323),
19859 "c8": (162, 230),
19860 "c9": (113, 162),
19861 "card-4x6": (288, 432),
19862 "card-5x7": (360, 504),
19863 "commercial": (297, 684),
19864 "executive": (522, 756),
19865 "invoice": (396, 612),
19866 "ledger": (792, 1224),
19867 "legal": (612, 1008),
19868 "legal-13": (612, 936),
19869 "letter": (612, 792),
19870 "monarch": (279, 540),
19871 "tabloid-extra": (864, 1296),
19872 }
19873
19874 def pdf_lookup_page_loc(doc, needle):
19875 return mupdf.pdf_lookup_page_loc(doc, needle)
19876
19877
19878 def pdfobj_string(o, prefix=''):
19879 '''
19880 Returns description of mupdf.PdfObj (wrapper for pdf_obj) <o>.
19881 '''
19882 assert 0, 'use mupdf.pdf_debug_obj() ?'
19883 ret = ''
19884 if mupdf.pdf_is_array(o):
19885 l = mupdf.pdf_array_len(o)
19886 ret += f'array {l}\n'
19887 for i in range(l):
19888 oo = mupdf.pdf_array_get(o, i)
19889 ret += pdfobj_string(oo, prefix + ' ')
19890 ret += '\n'
19891 elif mupdf.pdf_is_bool(o):
19892 ret += f'bool: {o.array_get_bool()}\n'
19893 elif mupdf.pdf_is_dict(o):
19894 l = mupdf.pdf_dict_len(o)
19895 ret += f'dict {l}\n'
19896 for i in range(l):
19897 key = mupdf.pdf_dict_get_key(o, i)
19898 value = mupdf.pdf_dict_get( o, key)
19899 ret += f'{prefix} {key}: '
19900 ret += pdfobj_string( value, prefix + ' ')
19901 ret += '\n'
19902 elif mupdf.pdf_is_embedded_file(o):
19903 ret += f'embedded_file: {o.embedded_file_name()}\n'
19904 elif mupdf.pdf_is_indirect(o):
19905 ret += f'indirect: ...\n'
19906 elif mupdf.pdf_is_int(o):
19907 ret += f'int: {mupdf.pdf_to_int(o)}\n'
19908 elif mupdf.pdf_is_jpx_image(o):
19909 ret += f'jpx_image:\n'
19910 elif mupdf.pdf_is_name(o):
19911 ret += f'name: {mupdf.pdf_to_name(o)}\n'
19912 elif o.pdf_is_null:
19913 ret += f'null\n'
19914 #elif o.pdf_is_number:
19915 # ret += f'number\n'
19916 elif o.pdf_is_real:
19917 ret += f'real: {o.pdf_to_real()}\n'
19918 elif mupdf.pdf_is_stream(o):
19919 ret += f'stream\n'
19920 elif mupdf.pdf_is_string(o):
19921 ret += f'string: {mupdf.pdf_to_string(o)}\n'
19922 else:
19923 ret += '<>\n'
19924
19925 return ret
19926
19927
19928 def repair_mono_font(page: "Page", font: "Font") -> None:
19929 """Repair character spacing for mono fonts.
19930
19931 Notes:
19932 Some mono-spaced fonts are displayed with a too large character
19933 distance, e.g. "a b c" instead of "abc". This utility adds an entry
19934 "/W[0 65535 w]" to the descendent font(s) of font. The float w is
19935 taken to be the width of 0x20 (space).
19936 This should enforce viewers to use 'w' as the character width.
19937
19938 Args:
19939 page: pymupdf.Page object.
19940 font: pymupdf.Font object.
19941 """
19942 if not font.flags["mono"]: # font not flagged as monospaced
19943 return None
19944 doc = page.parent # the document
19945 fontlist = page.get_fonts() # list of fonts on page
19946 xrefs = [ # list of objects referring to font
19947 f[0]
19948 for f in fontlist
19949 if (f[3] == font.name and f[4].startswith("F") and f[5].startswith("Identity"))
19950 ]
19951 if xrefs == []: # our font does not occur
19952 return
19953 xrefs = set(xrefs) # drop any double counts
19954 width = int(round((font.glyph_advance(32) * 1000)))
19955 for xref in xrefs:
19956 if not TOOLS.set_font_width(doc, xref, width):
19957 log("Cannot set width for '%s' in xref %i" % (font.name, xref))
19958
19959
19960 def sRGB_to_pdf(srgb: int) -> tuple:
19961 """Convert sRGB color code to a PDF color triple.
19962
19963 There is **no error checking** for performance reasons!
19964
19965 Args:
19966 srgb: (int) RRGGBB (red, green, blue), each color in range(255).
19967 Returns:
19968 Tuple (red, green, blue) each item in interval 0 <= item <= 1.
19969 """
19970 t = sRGB_to_rgb(srgb)
19971 return t[0] / 255.0, t[1] / 255.0, t[2] / 255.0
19972
19973
19974 def sRGB_to_rgb(srgb: int) -> tuple:
19975 """Convert sRGB color code to an RGB color triple.
19976
19977 There is **no error checking** for performance reasons!
19978
19979 Args:
19980 srgb: (int) SSRRGGBB (red, green, blue), each color in range(255).
19981 With MuPDF < 1.26, `s` is always 0.
19982 Returns:
19983 Tuple (red, green, blue) each item in interval 0 <= item <= 255.
19984 """
19985 srgb &= 0xffffff
19986 r = srgb >> 16
19987 g = (srgb - (r << 16)) >> 8
19988 b = srgb - (r << 16) - (g << 8)
19989 return (r, g, b)
19990
19991
19992 def string_in_names_list(p, names_list):
19993 n = mupdf.pdf_array_len( names_list) if names_list else 0
19994 str_ = mupdf.pdf_to_text_string( p)
19995 for i in range(0, n, 2):
19996 if mupdf.pdf_to_text_string( mupdf.pdf_array_get( names_list, i)) == str_:
19997 return 1
19998 return 0
19999
20000
20001 def strip_outline(doc, outlines, page_count, page_object_nums, names_list):
20002 '''
20003 Returns (count, first, prev).
20004 '''
20005 first = None
20006 count = 0
20007 current = outlines
20008 prev = None
20009 while current.m_internal:
20010 # Strip any children to start with. This takes care of
20011 # First / Last / Count for us.
20012 nc = strip_outlines(doc, current, page_count, page_object_nums, names_list)
20013
20014 if not dest_is_valid(current, page_count, page_object_nums, names_list):
20015 if nc == 0:
20016 # Outline with invalid dest and no children. Drop it by
20017 # pulling the next one in here.
20018 next = mupdf.pdf_dict_get(current, PDF_NAME('Next'))
20019 if not next.m_internal:
20020 # There is no next one to pull in
20021 if prev.m_internal:
20022 mupdf.pdf_dict_del(prev, PDF_NAME('Next'))
20023 elif prev.m_internal:
20024 mupdf.pdf_dict_put(prev, PDF_NAME('Next'), next)
20025 mupdf.pdf_dict_put(next, PDF_NAME('Prev'), prev)
20026 else:
20027 mupdf.pdf_dict_del(next, PDF_NAME('Prev'))
20028 current = next
20029 else:
20030 # Outline with invalid dest, but children. Just drop the dest.
20031 mupdf.pdf_dict_del(current, PDF_NAME('Dest'))
20032 mupdf.pdf_dict_del(current, PDF_NAME('A'))
20033 current = mupdf.pdf_dict_get(current, PDF_NAME('Next'))
20034 else:
20035 # Keep this one
20036 if not first or not first.m_internal:
20037 first = current
20038 prev = current
20039 current = mupdf.pdf_dict_get(current, PDF_NAME('Next'))
20040 count += 1
20041
20042 return count, first, prev
20043
20044
20045 def strip_outlines(doc, outlines, page_count, page_object_nums, names_list):
20046 if not outlines.m_internal:
20047 return 0
20048
20049 first = mupdf.pdf_dict_get(outlines, PDF_NAME('First'))
20050 if not first.m_internal:
20051 nc = 0
20052 else:
20053 nc, first, last = strip_outline(doc, first, page_count, page_object_nums, names_list)
20054
20055 if nc == 0:
20056 mupdf.pdf_dict_del(outlines, PDF_NAME('First'))
20057 mupdf.pdf_dict_del(outlines, PDF_NAME('Last'))
20058 mupdf.pdf_dict_del(outlines, PDF_NAME('Count'))
20059 else:
20060 old_count = mupdf.pdf_to_int(mupdf.pdf_dict_get(outlines, PDF_NAME('Count')))
20061 mupdf.pdf_dict_put(outlines, PDF_NAME('First'), first)
20062 mupdf.pdf_dict_put(outlines, PDF_NAME('Last'), last)
20063 mupdf.pdf_dict_put(outlines, PDF_NAME('Count'), mupdf.pdf_new_int(nc if old_count > 0 else -nc))
20064 return nc
20065
20066
20067 trace_device_FILL_PATH = 1
20068 trace_device_STROKE_PATH = 2
20069 trace_device_CLIP_PATH = 3
20070 trace_device_CLIP_STROKE_PATH = 4
20071
20072
20073 def unicode_to_glyph_name(ch: int) -> str:
20074 """
20075 Convenience function accessing unicodedata.
20076 """
20077 import unicodedata
20078 try:
20079 name = unicodedata.name(chr(ch))
20080 except ValueError:
20081 name = ".notdef"
20082 return name
20083
20084
20085 def vdist(dir, a, b):
20086 dx = b.x - a.x
20087 dy = b.y - a.y
20088 return mupdf.fz_abs(dx * dir.y + dy * dir.x)
20089
20090
20091 def apply_pages(
20092 path,
20093 pagefn,
20094 *,
20095 pagefn_args=(),
20096 pagefn_kwargs=dict(),
20097 initfn=None,
20098 initfn_args=(),
20099 initfn_kwargs=dict(),
20100 pages=None,
20101 method='single',
20102 concurrency=None,
20103 _stats=False,
20104 ):
20105 '''
20106 Returns list of results from `pagefn()`, optionally using concurrency for
20107 speed.
20108
20109 Args:
20110 path:
20111 Path of document.
20112 pagefn:
20113 Function to call for each page; is passed (page, *pagefn_args,
20114 **pagefn_kwargs). Return value is added to list that we return. If
20115 `method` is not 'single', must be a top-level function - nested
20116 functions don't work with concurrency.
20117 pagefn_args
20118 pagefn_kwargs:
20119 Additional args to pass to `pagefn`. Must be picklable.
20120 initfn:
20121 If true, called once in each worker process; is passed
20122 (*initfn_args, **initfn_kwargs).
20123 initfn_args
20124 initfn_kwargs:
20125 Args to pass to initfn. Must be picklable.
20126 pages:
20127 List of page numbers to process, or None to include all pages.
20128 method:
20129 'single'
20130 Do not use concurrency.
20131 'mp'
20132 Operate concurrently using Python's `multiprocessing` module.
20133 'fork'
20134 Operate concurrently using custom implementation with
20135 `os.fork()`. Does not work on Windows.
20136 concurrency:
20137 Number of worker processes to use when operating concurrently. If
20138 None, we use the number of available CPUs.
20139 _stats:
20140 Internal, may change or be removed. If true, we output simple
20141 timing diagnostics.
20142
20143 Note: We require a file path rather than a Document, because Document
20144 instances do not work properly after a fork - internal file descriptor
20145 offsets are shared between the parent and child processes.
20146 '''
20147 if _stats:
20148 t0 = time.time()
20149
20150 if method == 'single':
20151 if initfn:
20152 initfn(*initfn_args, **initfn_kwargs)
20153 ret = list()
20154 document = Document(path)
20155 if pages is None:
20156 pages = range(len(document))
20157 for pno in pages:
20158 page = document[pno]
20159 r = pagefn(page, *pagefn_args, **initfn_kwargs)
20160 ret.append(r)
20161
20162 else:
20163 # Use concurrency.
20164 #
20165 from . import _apply_pages
20166
20167 if pages is None:
20168 if _stats:
20169 t = time.time()
20170 with Document(path) as document:
20171 num_pages = len(document)
20172 pages = list(range(num_pages))
20173 if _stats:
20174 t = time.time() - t
20175 log(f'{t:.2f}s: count pages.')
20176
20177 if _stats:
20178 t = time.time()
20179
20180 if method == 'mp':
20181 ret = _apply_pages._multiprocessing(
20182 path,
20183 pages,
20184 pagefn,
20185 pagefn_args,
20186 pagefn_kwargs,
20187 initfn,
20188 initfn_args,
20189 initfn_kwargs,
20190 concurrency,
20191 _stats,
20192 )
20193
20194 elif method == 'fork':
20195 ret = _apply_pages._fork(
20196 path,
20197 pages,
20198 pagefn,
20199 pagefn_args,
20200 pagefn_kwargs,
20201 initfn,
20202 initfn_args,
20203 initfn_kwargs,
20204 concurrency,
20205 _stats,
20206 )
20207
20208 else:
20209 assert 0, f'Unrecognised {method=}.'
20210
20211 if _stats:
20212 t = time.time() - t
20213 log(f'{t:.2f}s: work.')
20214
20215 if _stats:
20216 t = time.time() - t0
20217 log(f'{t:.2f}s: total.')
20218 return ret
20219
20220
20221 def get_text(
20222 path,
20223 *,
20224 pages=None,
20225 method='single',
20226 concurrency=None,
20227
20228 option='text',
20229 clip=None,
20230 flags=None,
20231 textpage=None,
20232 sort=False,
20233 delimiters=None,
20234
20235 _stats=False,
20236 ):
20237 '''
20238 Returns list of results from `Page.get_text()`, optionally using
20239 concurrency for speed.
20240
20241 Args:
20242 path:
20243 Path of document.
20244 pages:
20245 List of page numbers to process, or None to include all pages.
20246 method:
20247 'single'
20248 Do not use concurrency.
20249 'mp'
20250 Operate concurrently using Python's `multiprocessing` module.
20251 'fork'
20252 Operate concurrently using custom implementation with
20253 `os.fork`. Does not work on Windows.
20254 concurrency:
20255 Number of worker processes to use when operating concurrently. If
20256 None, we use the number of available CPUs.
20257 option
20258 clip
20259 flags
20260 textpage
20261 sort
20262 delimiters:
20263 Passed to internal calls to `Page.get_text()`.
20264 '''
20265 args_dict = dict(
20266 option=option,
20267 clip=clip,
20268 flags=flags,
20269 textpage=textpage,
20270 sort=sort,
20271 delimiters=delimiters,
20272 )
20273
20274 return apply_pages(
20275 path,
20276 Page.get_text,
20277 pagefn_kwargs=args_dict,
20278 pages=pages,
20279 method=method,
20280 concurrency=concurrency,
20281 _stats=_stats,
20282 )
20283
20284
20285 class TOOLS:
20286 '''
20287 We use @staticmethod to avoid the need to create an instance of this class.
20288 '''
20289
20290 def _derotate_matrix(page):
20291 if isinstance(page, mupdf.PdfPage):
20292 return JM_py_from_matrix(JM_derotate_page_matrix(page))
20293 else:
20294 return JM_py_from_matrix(mupdf.FzMatrix())
20295
20296 @staticmethod
20297 def _fill_widget(annot, widget):
20298 val = JM_get_widget_properties(annot, widget)
20299
20300 widget.rect = Rect(annot.rect)
20301 widget.xref = annot.xref
20302 widget.parent = annot.parent
20303 widget._annot = annot # backpointer to annot object
20304 if not widget.script:
20305 widget.script = None
20306 if not widget.script_stroke:
20307 widget.script_stroke = None
20308 if not widget.script_format:
20309 widget.script_format = None
20310 if not widget.script_change:
20311 widget.script_change = None
20312 if not widget.script_calc:
20313 widget.script_calc = None
20314 if not widget.script_blur:
20315 widget.script_blur = None
20316 if not widget.script_focus:
20317 widget.script_focus = None
20318 return val
20319
20320 @staticmethod
20321 def _get_all_contents(page):
20322 page = _as_pdf_page(page.this)
20323 res = JM_read_contents(page.obj())
20324 result = JM_BinFromBuffer( res)
20325 return result
20326
20327 @staticmethod
20328 def _insert_contents(page, newcont, overlay=1):
20329 """Add bytes as a new /Contents object for a page, and return its xref."""
20330 pdfpage = _as_pdf_page(page, required=1)
20331 contbuf = JM_BufferFromBytes(newcont)
20332 xref = JM_insert_contents(pdfpage.doc(), pdfpage.obj(), contbuf, overlay)
20333 #fixme: pdfpage->doc->dirty = 1;
20334 return xref
20335
20336 @staticmethod
20337 def _le_annot_parms(annot, p1, p2, fill_color):
20338 """Get common parameters for making annot line end symbols.
20339
20340 Returns:
20341 m: matrix that maps p1, p2 to points L, P on the x-axis
20342 im: its inverse
20343 L, P: transformed p1, p2
20344 w: line width
20345 scol: stroke color string
20346 fcol: fill color store_shrink
20347 opacity: opacity string (gs command)
20348 """
20349 w = annot.border["width"] # line width
20350 sc = annot.colors["stroke"] # stroke color
20351 if not sc: # black if missing
20352 sc = (0,0,0)
20353 scol = " ".join(map(str, sc)) + " RG\n"
20354 if fill_color:
20355 fc = fill_color
20356 else:
20357 fc = annot.colors["fill"] # fill color
20358 if not fc:
20359 fc = (1,1,1) # white if missing
20360 fcol = " ".join(map(str, fc)) + " rg\n"
20361 # nr = annot.rect
20362 np1 = p1 # point coord relative to annot rect
20363 np2 = p2 # point coord relative to annot rect
20364 m = Matrix(util_hor_matrix(np1, np2)) # matrix makes the line horizontal
20365 im = ~m # inverted matrix
20366 L = np1 * m # converted start (left) point
20367 R = np2 * m # converted end (right) point
20368 if 0 <= annot.opacity < 1:
20369 opacity = "/H gs\n"
20370 else:
20371 opacity = ""
20372 return m, im, L, R, w, scol, fcol, opacity
20373
20374 @staticmethod
20375 def _le_butt(annot, p1, p2, lr, fill_color):
20376 """Make stream commands for butt line end symbol. "lr" denotes left (False) or right point.
20377 """
20378 m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
20379 shift = 3
20380 d = shift * max(1, w)
20381 M = R if lr else L
20382 top = (M + (0, -d/2.)) * im
20383 bot = (M + (0, d/2.)) * im
20384 ap = "\nq\n%s%f %f m\n" % (opacity, top.x, top.y)
20385 ap += "%f %f l\n" % (bot.x, bot.y)
20386 ap += _format_g(w) + " w\n"
20387 ap += scol + "s\nQ\n"
20388 return ap
20389
20390 @staticmethod
20391 def _le_circle(annot, p1, p2, lr, fill_color):
20392 """Make stream commands for circle line end symbol. "lr" denotes left (False) or right point.
20393 """
20394 m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
20395 shift = 2.5 # 2*shift*width = length of square edge
20396 d = shift * max(1, w)
20397 M = R - (d/2., 0) if lr else L + (d/2., 0)
20398 r = Rect(M, M) + (-d, -d, d, d) # the square
20399 ap = "q\n" + opacity + TOOLS._oval_string(r.tl * im, r.tr * im, r.br * im, r.bl * im)
20400 ap += _format_g(w) + " w\n"
20401 ap += scol + fcol + "b\nQ\n"
20402 return ap
20403
20404 @staticmethod
20405 def _le_closedarrow(annot, p1, p2, lr, fill_color):
20406 """Make stream commands for closed arrow line end symbol. "lr" denotes left (False) or right point.
20407 """
20408 m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
20409 shift = 2.5
20410 d = shift * max(1, w)
20411 p2 = R + (d/2., 0) if lr else L - (d/2., 0)
20412 p1 = p2 + (-2*d, -d) if lr else p2 + (2*d, -d)
20413 p3 = p2 + (-2*d, d) if lr else p2 + (2*d, d)
20414 p1 *= im
20415 p2 *= im
20416 p3 *= im
20417 ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y)
20418 ap += "%f %f l\n" % (p2.x, p2.y)
20419 ap += "%f %f l\n" % (p3.x, p3.y)
20420 ap += _format_g(w) + " w\n"
20421 ap += scol + fcol + "b\nQ\n"
20422 return ap
20423
20424 @staticmethod
20425 def _le_diamond(annot, p1, p2, lr, fill_color):
20426 """Make stream commands for diamond line end symbol. "lr" denotes left (False) or right point.
20427 """
20428 m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
20429 shift = 2.5 # 2*shift*width = length of square edge
20430 d = shift * max(1, w)
20431 M = R - (d/2., 0) if lr else L + (d/2., 0)
20432 r = Rect(M, M) + (-d, -d, d, d) # the square
20433 # the square makes line longer by (2*shift - 1)*width
20434 p = (r.tl + (r.bl - r.tl) * 0.5) * im
20435 ap = "q\n%s%f %f m\n" % (opacity, p.x, p.y)
20436 p = (r.tl + (r.tr - r.tl) * 0.5) * im
20437 ap += "%f %f l\n" % (p.x, p.y)
20438 p = (r.tr + (r.br - r.tr) * 0.5) * im
20439 ap += "%f %f l\n" % (p.x, p.y)
20440 p = (r.br + (r.bl - r.br) * 0.5) * im
20441 ap += "%f %f l\n" % (p.x, p.y)
20442 ap += _format_g(w) + " w\n"
20443 ap += scol + fcol + "b\nQ\n"
20444 return ap
20445
20446 @staticmethod
20447 def _le_openarrow(annot, p1, p2, lr, fill_color):
20448 """Make stream commands for open arrow line end symbol. "lr" denotes left (False) or right point.
20449 """
20450 m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
20451 shift = 2.5
20452 d = shift * max(1, w)
20453 p2 = R + (d/2., 0) if lr else L - (d/2., 0)
20454 p1 = p2 + (-2*d, -d) if lr else p2 + (2*d, -d)
20455 p3 = p2 + (-2*d, d) if lr else p2 + (2*d, d)
20456 p1 *= im
20457 p2 *= im
20458 p3 *= im
20459 ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y)
20460 ap += "%f %f l\n" % (p2.x, p2.y)
20461 ap += "%f %f l\n" % (p3.x, p3.y)
20462 ap += _format_g(w) + " w\n"
20463 ap += scol + "S\nQ\n"
20464 return ap
20465
20466 @staticmethod
20467 def _le_rclosedarrow(annot, p1, p2, lr, fill_color):
20468 """Make stream commands for right closed arrow line end symbol. "lr" denotes left (False) or right point.
20469 """
20470 m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
20471 shift = 2.5
20472 d = shift * max(1, w)
20473 p2 = R - (2*d, 0) if lr else L + (2*d, 0)
20474 p1 = p2 + (2*d, -d) if lr else p2 + (-2*d, -d)
20475 p3 = p2 + (2*d, d) if lr else p2 + (-2*d, d)
20476 p1 *= im
20477 p2 *= im
20478 p3 *= im
20479 ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y)
20480 ap += "%f %f l\n" % (p2.x, p2.y)
20481 ap += "%f %f l\n" % (p3.x, p3.y)
20482 ap += _format_g(w) + " w\n"
20483 ap += scol + fcol + "b\nQ\n"
20484 return ap
20485
20486 @staticmethod
20487 def _le_ropenarrow(annot, p1, p2, lr, fill_color):
20488 """Make stream commands for right open arrow line end symbol. "lr" denotes left (False) or right point.
20489 """
20490 m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
20491 shift = 2.5
20492 d = shift * max(1, w)
20493 p2 = R - (d/3., 0) if lr else L + (d/3., 0)
20494 p1 = p2 + (2*d, -d) if lr else p2 + (-2*d, -d)
20495 p3 = p2 + (2*d, d) if lr else p2 + (-2*d, d)
20496 p1 *= im
20497 p2 *= im
20498 p3 *= im
20499 ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y)
20500 ap += "%f %f l\n" % (p2.x, p2.y)
20501 ap += "%f %f l\n" % (p3.x, p3.y)
20502 ap += _format_g(w) + " w\n"
20503 ap += scol + fcol + "S\nQ\n"
20504 return ap
20505
20506 @staticmethod
20507 def _le_slash(annot, p1, p2, lr, fill_color):
20508 """Make stream commands for slash line end symbol. "lr" denotes left (False) or right point.
20509 """
20510 m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
20511 rw = 1.1547 * max(1, w) * 1.0 # makes rect diagonal a 30 deg inclination
20512 M = R if lr else L
20513 r = Rect(M.x - rw, M.y - 2 * w, M.x + rw, M.y + 2 * w)
20514 top = r.tl * im
20515 bot = r.br * im
20516 ap = "\nq\n%s%f %f m\n" % (opacity, top.x, top.y)
20517 ap += "%f %f l\n" % (bot.x, bot.y)
20518 ap += _format_g(w) + " w\n"
20519 ap += scol + "s\nQ\n"
20520 return ap
20521
20522 @staticmethod
20523 def _le_square(annot, p1, p2, lr, fill_color):
20524 """Make stream commands for square line end symbol. "lr" denotes left (False) or right point.
20525 """
20526 m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
20527 shift = 2.5 # 2*shift*width = length of square edge
20528 d = shift * max(1, w)
20529 M = R - (d/2., 0) if lr else L + (d/2., 0)
20530 r = Rect(M, M) + (-d, -d, d, d) # the square
20531 # the square makes line longer by (2*shift - 1)*width
20532 p = r.tl * im
20533 ap = "q\n%s%f %f m\n" % (opacity, p.x, p.y)
20534 p = r.tr * im
20535 ap += "%f %f l\n" % (p.x, p.y)
20536 p = r.br * im
20537 ap += "%f %f l\n" % (p.x, p.y)
20538 p = r.bl * im
20539 ap += "%f %f l\n" % (p.x, p.y)
20540 ap += _format_g(w) + " w\n"
20541 ap += scol + fcol + "b\nQ\n"
20542 return ap
20543
20544 @staticmethod
20545 def _oval_string(p1, p2, p3, p4):
20546 """Return /AP string defining an oval within a 4-polygon provided as points
20547 """
20548 def bezier(p, q, r):
20549 f = "%f %f %f %f %f %f c\n"
20550 return f % (p.x, p.y, q.x, q.y, r.x, r.y)
20551
20552 kappa = 0.55228474983 # magic number
20553 ml = p1 + (p4 - p1) * 0.5 # middle points ...
20554 mo = p1 + (p2 - p1) * 0.5 # for each ...
20555 mr = p2 + (p3 - p2) * 0.5 # polygon ...
20556 mu = p4 + (p3 - p4) * 0.5 # side
20557 ol1 = ml + (p1 - ml) * kappa # the 8 bezier
20558 ol2 = mo + (p1 - mo) * kappa # helper points
20559 or1 = mo + (p2 - mo) * kappa
20560 or2 = mr + (p2 - mr) * kappa
20561 ur1 = mr + (p3 - mr) * kappa
20562 ur2 = mu + (p3 - mu) * kappa
20563 ul1 = mu + (p4 - mu) * kappa
20564 ul2 = ml + (p4 - ml) * kappa
20565 # now draw, starting from middle point of left side
20566 ap = "%f %f m\n" % (ml.x, ml.y)
20567 ap += bezier(ol1, ol2, mo)
20568 ap += bezier(or1, or2, mr)
20569 ap += bezier(ur1, ur2, mu)
20570 ap += bezier(ul1, ul2, ml)
20571 return ap
20572
20573 @staticmethod
20574 def _parse_da(annot):
20575
20576 if g_use_extra:
20577 val = extra.Tools_parse_da( annot.this)
20578 else:
20579 def Tools__parse_da(annot):
20580 this_annot = annot.this
20581 assert isinstance(this_annot, mupdf.PdfAnnot)
20582 this_annot_obj = mupdf.pdf_annot_obj( this_annot)
20583 pdf = mupdf.pdf_get_bound_document( this_annot_obj)
20584 try:
20585 da = mupdf.pdf_dict_get_inheritable( this_annot_obj, PDF_NAME('DA'))
20586 if not da.m_internal:
20587 trailer = mupdf.pdf_trailer(pdf)
20588 da = mupdf.pdf_dict_getl(trailer,
20589 PDF_NAME('Root'),
20590 PDF_NAME('AcroForm'),
20591 PDF_NAME('DA'),
20592 )
20593 da_str = mupdf.pdf_to_text_string(da)
20594 except Exception:
20595 if g_exceptions_verbose: exception_info()
20596 return
20597 return da_str
20598 val = Tools__parse_da(annot)
20599
20600 if not val:
20601 return ((0,), "", 0)
20602 font = "Helv"
20603 fsize = 12
20604 col = (0, 0, 0)
20605 dat = val.split() # split on any whitespace
20606 for i, item in enumerate(dat):
20607 if item == "Tf":
20608 font = dat[i - 2][1:]
20609 fsize = float(dat[i - 1])
20610 dat[i] = dat[i-1] = dat[i-2] = ""
20611 continue
20612 if item == "g": # unicolor text
20613 col = [(float(dat[i - 1]))]
20614 dat[i] = dat[i-1] = ""
20615 continue
20616 if item == "rg": # RGB colored text
20617 col = [float(f) for f in dat[i - 3:i]]
20618 dat[i] = dat[i-1] = dat[i-2] = dat[i-3] = ""
20619 continue
20620 if item == "k": # CMYK colored text
20621 col = [float(f) for f in dat[i - 4:i]]
20622 dat[i] = dat[i-1] = dat[i-2] = dat[i-3] = dat[i-4] = ""
20623 continue
20624
20625 val = (col, font, fsize)
20626 return val
20627
20628 @staticmethod
20629 def _reset_widget(annot):
20630 this_annot = annot
20631 this_annot_obj = mupdf.pdf_annot_obj(this_annot)
20632 pdf = mupdf.pdf_get_bound_document(this_annot_obj)
20633 mupdf.pdf_field_reset(pdf, this_annot_obj)
20634
20635 @staticmethod
20636 def _rotate_matrix(page):
20637 pdfpage = page._pdf_page(required=False)
20638 if not pdfpage.m_internal:
20639 return JM_py_from_matrix(mupdf.FzMatrix())
20640 return JM_py_from_matrix(JM_rotate_page_matrix(pdfpage))
20641
20642 @staticmethod
20643 def _save_widget(annot, widget):
20644 JM_set_widget_properties(annot, widget)
20645
20646 def _update_da(annot, da_str):
20647 if g_use_extra:
20648 extra.Tools_update_da( annot.this, da_str)
20649 else:
20650 try:
20651 this_annot = annot.this
20652 assert isinstance(this_annot, mupdf.PdfAnnot)
20653 mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(this_annot), PDF_NAME('DA'), da_str)
20654 mupdf.pdf_dict_del(mupdf.pdf_annot_obj(this_annot), PDF_NAME('DS')) # /* not supported */
20655 mupdf.pdf_dict_del(mupdf.pdf_annot_obj(this_annot), PDF_NAME('RC')) # /* not supported */
20656 except Exception:
20657 if g_exceptions_verbose: exception_info()
20658 return
20659 return
20660
20661 @staticmethod
20662 def gen_id():
20663 global TOOLS_JM_UNIQUE_ID
20664 TOOLS_JM_UNIQUE_ID += 1
20665 return TOOLS_JM_UNIQUE_ID
20666
20667 @staticmethod
20668 def glyph_cache_empty():
20669 '''
20670 Empty the glyph cache.
20671 '''
20672 mupdf.fz_purge_glyph_cache()
20673
20674 @staticmethod
20675 def image_profile(stream, keep_image=0):
20676 '''
20677 Metadata of an image binary stream.
20678 '''
20679 return JM_image_profile(stream, keep_image)
20680
20681 @staticmethod
20682 def mupdf_display_errors(on=None):
20683 '''
20684 Set MuPDF error display to True or False.
20685 '''
20686 global JM_mupdf_show_errors
20687 if on is not None:
20688 JM_mupdf_show_errors = bool(on)
20689 return JM_mupdf_show_errors
20690
20691 @staticmethod
20692 def mupdf_display_warnings(on=None):
20693 '''
20694 Set MuPDF warnings display to True or False.
20695 '''
20696 global JM_mupdf_show_warnings
20697 if on is not None:
20698 JM_mupdf_show_warnings = bool(on)
20699 return JM_mupdf_show_warnings
20700
20701 @staticmethod
20702 def mupdf_version():
20703 '''Get version of MuPDF binary build.'''
20704 return mupdf.FZ_VERSION
20705
20706 @staticmethod
20707 def mupdf_warnings(reset=1):
20708 '''
20709 Get the MuPDF warnings/errors with optional reset (default).
20710 '''
20711 # Get any trailing `... repeated <N> times...` message.
20712 mupdf.fz_flush_warnings()
20713 ret = '\n'.join( JM_mupdf_warnings_store)
20714 if reset:
20715 TOOLS.reset_mupdf_warnings()
20716 return ret
20717
20718 @staticmethod
20719 def reset_mupdf_warnings():
20720 global JM_mupdf_warnings_store
20721 JM_mupdf_warnings_store = list()
20722
20723 @staticmethod
20724 def set_aa_level(level):
20725 '''
20726 Set anti-aliasing level.
20727 '''
20728 mupdf.fz_set_aa_level(level)
20729
20730 @staticmethod
20731 def set_annot_stem( stem=None):
20732 global JM_annot_id_stem
20733 if stem is None:
20734 return JM_annot_id_stem
20735 len_ = len(stem) + 1
20736 if len_ > 50:
20737 len_ = 50
20738 JM_annot_id_stem = stem[:50]
20739 return JM_annot_id_stem
20740
20741 @staticmethod
20742 def set_font_width(doc, xref, width):
20743 pdf = _as_pdf_document(doc, required=0)
20744 if not pdf.m_internal:
20745 return False
20746 font = mupdf.pdf_load_object(pdf, xref)
20747 dfonts = mupdf.pdf_dict_get(font, PDF_NAME('DescendantFonts'))
20748 if mupdf.pdf_is_array(dfonts):
20749 n = mupdf.pdf_array_len(dfonts)
20750 for i in range(n):
20751 dfont = mupdf.pdf_array_get(dfonts, i)
20752 warray = mupdf.pdf_new_array(pdf, 3)
20753 mupdf.pdf_array_push(warray, mupdf.pdf_new_int(0))
20754 mupdf.pdf_array_push(warray, mupdf.pdf_new_int(65535))
20755 mupdf.pdf_array_push(warray, mupdf.pdf_new_int(width))
20756 mupdf.pdf_dict_put(dfont, PDF_NAME('W'), warray)
20757 return True
20758
20759 @staticmethod
20760 def set_graphics_min_line_width(min_line_width):
20761 '''
20762 Set the graphics minimum line width.
20763 '''
20764 mupdf.fz_set_graphics_min_line_width(min_line_width)
20765
20766 @staticmethod
20767 def set_icc( on=0):
20768 """Set ICC color handling on or off."""
20769 if on:
20770 if mupdf.FZ_ENABLE_ICC:
20771 mupdf.fz_enable_icc()
20772 else:
20773 RAISEPY( "MuPDF built w/o ICC support",PyExc_ValueError)
20774 elif mupdf.FZ_ENABLE_ICC:
20775 mupdf.fz_disable_icc()
20776
20777 @staticmethod
20778 def set_low_memory( on=None):
20779 """Set / unset MuPDF device caching."""
20780 if on is not None:
20781 _globals.no_device_caching = bool(on)
20782 return _globals.no_device_caching
20783
20784 @staticmethod
20785 def set_small_glyph_heights(on=None):
20786 """Set / unset small glyph heights."""
20787 if on is not None:
20788 _globals.small_glyph_heights = bool(on)
20789 if g_use_extra:
20790 extra.set_small_glyph_heights(_globals.small_glyph_heights)
20791 return _globals.small_glyph_heights
20792
20793 @staticmethod
20794 def set_subset_fontnames(on=None):
20795 '''
20796 Set / unset returning fontnames with their subset prefix.
20797 '''
20798 if on is not None:
20799 _globals.subset_fontnames = bool(on)
20800 if g_use_extra:
20801 extra.set_subset_fontnames(_globals.subset_fontnames)
20802 return _globals.subset_fontnames
20803
20804 @staticmethod
20805 def show_aa_level():
20806 '''
20807 Show anti-aliasing values.
20808 '''
20809 return dict(
20810 graphics = mupdf.fz_graphics_aa_level(),
20811 text = mupdf.fz_text_aa_level(),
20812 graphics_min_line_width = mupdf.fz_graphics_min_line_width(),
20813 )
20814
20815 @staticmethod
20816 def store_maxsize():
20817 '''
20818 MuPDF store size limit.
20819 '''
20820 # fixme: return gctx->store->max.
20821 return None
20822
20823 @staticmethod
20824 def store_shrink(percent):
20825 '''
20826 Free 'percent' of current store size.
20827 '''
20828 if percent >= 100:
20829 mupdf.fz_empty_store()
20830 return 0
20831 if percent > 0:
20832 mupdf.fz_shrink_store( 100 - percent)
20833 # fixme: return gctx->store->size.
20834
20835 @staticmethod
20836 def store_size():
20837 '''
20838 MuPDF current store size.
20839 '''
20840 # fixme: return gctx->store->size.
20841 return None
20842
20843 @staticmethod
20844 def unset_quad_corrections(on=None):
20845 '''
20846 Set ascender / descender corrections on or off.
20847 '''
20848 if on is not None:
20849 _globals.skip_quad_corrections = bool(on)
20850 if g_use_extra:
20851 extra.set_skip_quad_corrections(_globals.skip_quad_corrections)
20852 return _globals.skip_quad_corrections
20853
20854 # fixme: also defined at top-level.
20855 JM_annot_id_stem = 'fitz'
20856
20857 fitz_config = JM_fitz_config()
20858
20859
20860 # Callbacks not yet supported with cppyy.
20861 if not mupdf_cppyy:
20862 mupdf.fz_set_warning_callback(JM_mupdf_warning)
20863 mupdf.fz_set_error_callback(JM_mupdf_error)
20864
20865
20866 # If there are pending warnings when we exit, we end up in this sequence:
20867 #
20868 # atexit()
20869 # -> mupdf::internal_thread_state::~internal_thread_state()
20870 # -> fz_drop_context()
20871 # -> fz_flush_warnings()
20872 # -> SWIG Director code
20873 # -> Python calling JM_mupdf_warning().
20874 #
20875 # Unfortunately this causes a SEGV, seemingly because the SWIG Director code has
20876 # already been torn down.
20877 #
20878 # So we use a Python atexit handler to explicitly call fz_flush_warnings();
20879 # this appears to happen early enough for the Director machinery to still
20880 # work. So in the sequence above, fz_flush_warnings() will find that there are
20881 # no pending warnings and will not attempt to call JM_mupdf_warning().
20882 #
20883 def _atexit():
20884 #log( 'PyMuPDF/src/__init__.py:_atexit() called')
20885 mupdf.fz_flush_warnings()
20886 mupdf.fz_set_warning_callback(None)
20887 mupdf.fz_set_error_callback(None)
20888 #log( '_atexit() returning')
20889 atexit.register( _atexit)
20890
20891
20892 # List of (name, red, green, blue) where:
20893 # name: upper-case name.
20894 # red, green, blue: integer in range 0..255.
20895 #
20896 from . import _wxcolors
20897 _wxcolors = _wxcolors._wxcolors
20898
20899
20900 # Dict mapping from name to (red, green, blue).
20901 # name: lower-case name.
20902 # red, green, blue: float in range 0..1.
20903 #
20904 pdfcolor = dict()
20905 for name, r, g, b in _wxcolors:
20906 pdfcolor[name.lower()] = (r/255, g/255, b/255)
20907
20908
20909 def colors_pdf_dict():
20910 '''
20911 Returns dict mapping from name to (red, green, blue).
20912 name: lower-case name.
20913 red, green, blue: float in range 0..1.
20914 '''
20915 return pdfcolor
20916
20917
20918 def colors_wx_list():
20919 '''
20920 Returns list of (name, red, green, blue) tuples:
20921 name: upper-case name.
20922 red, green, blue: integers in range 0..255.
20923 '''
20924 return _wxcolors
20925
20926
20927 # We cannot import utils earlier because it imports this .py file itself and
20928 # uses some pymupdf.* types in function typing.
20929 #
20930 from . import utils
20931
20932
20933 # Use utils.*() fns for some class methods.
20934 #
20935 recover_bbox_quad = utils.recover_bbox_quad
20936 recover_char_quad = utils.recover_char_quad
20937 recover_line_quad = utils.recover_line_quad
20938 recover_quad = utils.recover_quad
20939 recover_span_quad = utils.recover_span_quad
20940
20941 Annot.get_text = utils.get_text
20942 Annot.get_textbox = utils.get_textbox
20943
20944 Document._do_links = utils.do_links
20945 Document._do_widgets = utils.do_widgets
20946 Document.del_toc_item = utils.del_toc_item
20947 Document.get_char_widths = utils.get_char_widths
20948 Document.get_oc = utils.get_oc
20949 Document.get_ocmd = utils.get_ocmd
20950 Document.get_page_labels = utils.get_page_labels
20951 Document.get_page_numbers = utils.get_page_numbers
20952 Document.get_page_pixmap = utils.get_page_pixmap
20953 Document.get_page_text = utils.get_page_text
20954 Document.get_toc = utils.get_toc
20955 Document.has_annots = utils.has_annots
20956 Document.has_links = utils.has_links
20957 Document.insert_page = utils.insert_page
20958 Document.new_page = utils.new_page
20959 Document.scrub = utils.scrub
20960 Document.search_page_for = utils.search_page_for
20961 Document.set_metadata = utils.set_metadata
20962 Document.set_oc = utils.set_oc
20963 Document.set_ocmd = utils.set_ocmd
20964 Document.set_page_labels = utils.set_page_labels
20965 Document.set_toc = utils.set_toc
20966 Document.set_toc_item = utils.set_toc_item
20967 Document.subset_fonts = utils.subset_fonts
20968 Document.tobytes = Document.write
20969 Document.xref_copy = utils.xref_copy
20970
20971 IRect.get_area = utils.get_area
20972
20973 Page.apply_redactions = utils.apply_redactions
20974 Page.delete_image = utils.delete_image
20975 Page.delete_widget = utils.delete_widget
20976 Page.draw_bezier = utils.draw_bezier
20977 Page.draw_circle = utils.draw_circle
20978 Page.draw_curve = utils.draw_curve
20979 Page.draw_line = utils.draw_line
20980 Page.draw_oval = utils.draw_oval
20981 Page.draw_polyline = utils.draw_polyline
20982 Page.draw_quad = utils.draw_quad
20983 Page.draw_rect = utils.draw_rect
20984 Page.draw_sector = utils.draw_sector
20985 Page.draw_squiggle = utils.draw_squiggle
20986 Page.draw_zigzag = utils.draw_zigzag
20987 Page.get_image_info = utils.get_image_info
20988 Page.get_image_rects = utils.get_image_rects
20989 Page.get_label = utils.get_label
20990 Page.get_links = utils.get_links
20991 Page.get_pixmap = utils.get_pixmap
20992 Page.get_text = utils.get_text
20993 Page.get_text_blocks = utils.get_text_blocks
20994 Page.get_text_selection = utils.get_text_selection
20995 Page.get_text_words = utils.get_text_words
20996 Page.get_textbox = utils.get_textbox
20997 Page.get_textpage_ocr = utils.get_textpage_ocr
20998 Page.insert_image = utils.insert_image
20999 Page.insert_link = utils.insert_link
21000 Page.insert_text = utils.insert_text
21001 Page.insert_textbox = utils.insert_textbox
21002 Page.insert_htmlbox = utils.insert_htmlbox
21003 Page.new_shape = lambda x: utils.Shape(x)
21004 Page.replace_image = utils.replace_image
21005 Page.search_for = utils.search_for
21006 Page.show_pdf_page = utils.show_pdf_page
21007 Page.update_link = utils.update_link
21008 Page.write_text = utils.write_text
21009 Shape = utils.Shape
21010 from .table import find_tables
21011
21012 Page.find_tables = find_tables
21013
21014 Rect.get_area = utils.get_area
21015
21016 TextWriter.fill_textbox = utils.fill_textbox
21017
21018
21019 class FitzDeprecation(DeprecationWarning):
21020 pass
21021
21022 def restore_aliases():
21023 warnings.filterwarnings( "once", category=FitzDeprecation)
21024
21025 def showthis(msg, cat, filename, lineno, file=None, line=None):
21026 text = warnings.formatwarning(msg, cat, filename, lineno, line=line)
21027 s = text.find("FitzDeprecation")
21028 if s < 0:
21029 log(text)
21030 return
21031 text = text[s:].splitlines()[0][4:]
21032 log(text)
21033
21034 warnings.showwarning = showthis
21035
21036 def _alias(class_, new_name, legacy_name=None):
21037 '''
21038 Adds an alias for a class_ or module item clled <class_>.<new>.
21039
21040 class_:
21041 Class/module to modify; use None for the current module.
21042 new_name:
21043 String name of existing item, e.g. name of method.
21044 legacy_name:
21045 Name of legacy object to create in <class_>. If None, we generate
21046 from <item> by removing underscores and capitalising the next
21047 letter.
21048 '''
21049 if class_ is None:
21050 class_ = sys.modules[__name__]
21051 if not legacy_name:
21052 legacy_name = ''
21053 capitalise_next = False
21054 for c in new_name:
21055 if c == '_':
21056 capitalise_next = True
21057 elif capitalise_next:
21058 legacy_name += c.upper()
21059 capitalise_next = False
21060 else:
21061 legacy_name += c
21062 new_object = getattr( class_, new_name)
21063 assert not getattr( class_, legacy_name, None), f'class {class_} already has {legacy_name}'
21064 if callable( new_object):
21065 def deprecated_function( *args, **kwargs):
21066 warnings.warn(
21067 f'"{legacy_name=}" removed from {class_} after v1.19.0 - use "{new_name}".',
21068 category=FitzDeprecation,
21069 )
21070 return new_object( *args, **kwargs)
21071 setattr( class_, legacy_name, deprecated_function)
21072 deprecated_function.__doc__ = (
21073 f'*** Deprecated and removed in version after v1.19.0 - use "{new_name}". ***\n'
21074 f'{new_object.__doc__}'
21075 )
21076 else:
21077 setattr( class_, legacy_name, new_object)
21078
21079 _alias( Annot, 'get_file', 'fileGet')
21080 _alias( Annot, 'get_pixmap')
21081 _alias( Annot, 'get_sound', 'soundGet')
21082 _alias( Annot, 'get_text')
21083 _alias( Annot, 'get_textbox')
21084 _alias( Annot, 'get_textpage', 'getTextPage')
21085 _alias( Annot, 'line_ends')
21086 _alias( Annot, 'set_blendmode', 'setBlendMode')
21087 _alias( Annot, 'set_border')
21088 _alias( Annot, 'set_colors')
21089 _alias( Annot, 'set_flags')
21090 _alias( Annot, 'set_info')
21091 _alias( Annot, 'set_line_ends')
21092 _alias( Annot, 'set_name')
21093 _alias( Annot, 'set_oc', 'setOC')
21094 _alias( Annot, 'set_opacity')
21095 _alias( Annot, 'set_rect')
21096 _alias( Annot, 'update_file', 'fileUpd')
21097 _alias( DisplayList, 'get_pixmap')
21098 _alias( DisplayList, 'get_textpage', 'getTextPage')
21099 _alias( Document, 'chapter_count')
21100 _alias( Document, 'chapter_page_count')
21101 _alias( Document, 'convert_to_pdf', 'convertToPDF')
21102 _alias( Document, 'copy_page')
21103 _alias( Document, 'delete_page')
21104 _alias( Document, 'delete_pages', 'deletePageRange')
21105 _alias( Document, 'embfile_add', 'embeddedFileAdd')
21106 _alias( Document, 'embfile_count', 'embeddedFileCount')
21107 _alias( Document, 'embfile_del', 'embeddedFileDel')
21108 _alias( Document, 'embfile_get', 'embeddedFileGet')
21109 _alias( Document, 'embfile_info', 'embeddedFileInfo')
21110 _alias( Document, 'embfile_names', 'embeddedFileNames')
21111 _alias( Document, 'embfile_upd', 'embeddedFileUpd')
21112 _alias( Document, 'extract_font')
21113 _alias( Document, 'extract_image')
21114 _alias( Document, 'find_bookmark')
21115 _alias( Document, 'fullcopy_page')
21116 _alias( Document, 'get_char_widths')
21117 _alias( Document, 'get_ocgs', 'getOCGs')
21118 _alias( Document, 'get_page_fonts', 'getPageFontList')
21119 _alias( Document, 'get_page_images', 'getPageImageList')
21120 _alias( Document, 'get_page_pixmap')
21121 _alias( Document, 'get_page_text')
21122 _alias( Document, 'get_page_xobjects', 'getPageXObjectList')
21123 _alias( Document, 'get_sigflags', 'getSigFlags')
21124 _alias( Document, 'get_toc', 'getToC')
21125 _alias( Document, 'get_xml_metadata')
21126 _alias( Document, 'insert_page')
21127 _alias( Document, 'insert_pdf', 'insertPDF')
21128 _alias( Document, 'is_dirty')
21129 _alias( Document, 'is_form_pdf', 'isFormPDF')
21130 _alias( Document, 'is_pdf', 'isPDF')
21131 _alias( Document, 'is_reflowable')
21132 _alias( Document, 'is_repaired')
21133 _alias( Document, 'last_location')
21134 _alias( Document, 'load_page')
21135 _alias( Document, 'make_bookmark')
21136 _alias( Document, 'move_page')
21137 _alias( Document, 'needs_pass')
21138 _alias( Document, 'new_page')
21139 _alias( Document, 'next_location')
21140 _alias( Document, 'page_count')
21141 _alias( Document, 'page_cropbox', 'pageCropBox')
21142 _alias( Document, 'page_xref')
21143 _alias( Document, 'pdf_catalog', 'PDFCatalog')
21144 _alias( Document, 'pdf_trailer', 'PDFTrailer')
21145 _alias( Document, 'prev_location', 'previousLocation')
21146 _alias( Document, 'resolve_link')
21147 _alias( Document, 'search_page_for')
21148 _alias( Document, 'set_language')
21149 _alias( Document, 'set_metadata')
21150 _alias( Document, 'set_toc', 'setToC')
21151 _alias( Document, 'set_xml_metadata')
21152 _alias( Document, 'update_object')
21153 _alias( Document, 'update_stream')
21154 _alias( Document, 'xref_is_stream', 'isStream')
21155 _alias( Document, 'xref_length')
21156 _alias( Document, 'xref_object')
21157 _alias( Document, 'xref_stream')
21158 _alias( Document, 'xref_stream_raw')
21159 _alias( Document, 'xref_xml_metadata', 'metadataXML')
21160 _alias( IRect, 'get_area')
21161 _alias( IRect, 'get_area', 'getRectArea')
21162 _alias( IRect, 'include_point')
21163 _alias( IRect, 'include_rect')
21164 _alias( IRect, 'is_empty')
21165 _alias( IRect, 'is_infinite')
21166 _alias( Link, 'is_external')
21167 _alias( Link, 'set_border')
21168 _alias( Link, 'set_colors')
21169 _alias( Matrix, 'is_rectilinear')
21170 _alias( Matrix, 'prerotate', 'preRotate')
21171 _alias( Matrix, 'prescale', 'preScale')
21172 _alias( Matrix, 'preshear', 'preShear')
21173 _alias( Matrix, 'pretranslate', 'preTranslate')
21174 _alias( None, 'get_pdf_now', 'getPDFnow')
21175 _alias( None, 'get_pdf_str', 'getPDFstr')
21176 _alias( None, 'get_text_length')
21177 _alias( None, 'get_text_length', 'getTextlength')
21178 _alias( None, 'image_profile', 'ImageProperties')
21179 _alias( None, 'paper_rect', 'PaperRect')
21180 _alias( None, 'paper_size', 'PaperSize')
21181 _alias( None, 'paper_sizes')
21182 _alias( None, 'planish_line')
21183 _alias( Outline, 'is_external')
21184 _alias( Outline, 'is_open')
21185 _alias( Page, 'add_caret_annot')
21186 _alias( Page, 'add_circle_annot')
21187 _alias( Page, 'add_file_annot')
21188 _alias( Page, 'add_freetext_annot')
21189 _alias( Page, 'add_highlight_annot')
21190 _alias( Page, 'add_ink_annot')
21191 _alias( Page, 'add_line_annot')
21192 _alias( Page, 'add_polygon_annot')
21193 _alias( Page, 'add_polyline_annot')
21194 _alias( Page, 'add_rect_annot')
21195 _alias( Page, 'add_redact_annot')
21196 _alias( Page, 'add_squiggly_annot')
21197 _alias( Page, 'add_stamp_annot')
21198 _alias( Page, 'add_strikeout_annot')
21199 _alias( Page, 'add_text_annot')
21200 _alias( Page, 'add_underline_annot')
21201 _alias( Page, 'add_widget')
21202 _alias( Page, 'clean_contents')
21203 _alias( Page, 'cropbox', 'CropBox')
21204 _alias( Page, 'cropbox_position', 'CropBoxPosition')
21205 _alias( Page, 'delete_annot')
21206 _alias( Page, 'delete_link')
21207 _alias( Page, 'delete_widget')
21208 _alias( Page, 'derotation_matrix')
21209 _alias( Page, 'draw_bezier')
21210 _alias( Page, 'draw_circle')
21211 _alias( Page, 'draw_curve')
21212 _alias( Page, 'draw_line')
21213 _alias( Page, 'draw_oval')
21214 _alias( Page, 'draw_polyline')
21215 _alias( Page, 'draw_quad')
21216 _alias( Page, 'draw_rect')
21217 _alias( Page, 'draw_sector')
21218 _alias( Page, 'draw_squiggle')
21219 _alias( Page, 'draw_zigzag')
21220 _alias( Page, 'first_annot')
21221 _alias( Page, 'first_link')
21222 _alias( Page, 'first_widget')
21223 _alias( Page, 'get_contents')
21224 _alias( Page, 'get_displaylist', 'getDisplayList')
21225 _alias( Page, 'get_drawings')
21226 _alias( Page, 'get_fonts', 'getFontList')
21227 _alias( Page, 'get_image_bbox')
21228 _alias( Page, 'get_images', 'getImageList')
21229 _alias( Page, 'get_links')
21230 _alias( Page, 'get_pixmap')
21231 _alias( Page, 'get_svg_image', 'getSVGimage')
21232 _alias( Page, 'get_text')
21233 _alias( Page, 'get_text_blocks')
21234 _alias( Page, 'get_text_words')
21235 _alias( Page, 'get_textbox')
21236 _alias( Page, 'get_textpage', 'getTextPage')
21237 _alias( Page, 'insert_font')
21238 _alias( Page, 'insert_image')
21239 _alias( Page, 'insert_link')
21240 _alias( Page, 'insert_text')
21241 _alias( Page, 'insert_textbox')
21242 _alias( Page, 'is_wrapped', '_isWrapped')
21243 _alias( Page, 'load_annot')
21244 _alias( Page, 'load_links')
21245 _alias( Page, 'mediabox', 'MediaBox')
21246 _alias( Page, 'mediabox_size', 'MediaBoxSize')
21247 _alias( Page, 'new_shape')
21248 _alias( Page, 'read_contents')
21249 _alias( Page, 'rotation_matrix')
21250 _alias( Page, 'search_for')
21251 _alias( Page, 'set_cropbox', 'setCropBox')
21252 _alias( Page, 'set_mediabox', 'setMediaBox')
21253 _alias( Page, 'set_rotation')
21254 _alias( Page, 'show_pdf_page', 'showPDFpage')
21255 _alias( Page, 'transformation_matrix')
21256 _alias( Page, 'update_link')
21257 _alias( Page, 'wrap_contents')
21258 _alias( Page, 'write_text')
21259 _alias( Pixmap, 'clear_with')
21260 _alias( Pixmap, 'copy', 'copyPixmap')
21261 _alias( Pixmap, 'gamma_with')
21262 _alias( Pixmap, 'invert_irect', 'invertIRect')
21263 _alias( Pixmap, 'pil_save', 'pillowWrite')
21264 _alias( Pixmap, 'pil_tobytes', 'pillowData')
21265 _alias( Pixmap, 'save', 'writeImage')
21266 _alias( Pixmap, 'save', 'writePNG')
21267 _alias( Pixmap, 'set_alpha')
21268 _alias( Pixmap, 'set_dpi', 'setResolution')
21269 _alias( Pixmap, 'set_origin')
21270 _alias( Pixmap, 'set_pixel')
21271 _alias( Pixmap, 'set_rect')
21272 _alias( Pixmap, 'tint_with')
21273 _alias( Pixmap, 'tobytes', 'getImageData')
21274 _alias( Pixmap, 'tobytes', 'getPNGData')
21275 _alias( Pixmap, 'tobytes', 'getPNGdata')
21276 _alias( Quad, 'is_convex')
21277 _alias( Quad, 'is_empty')
21278 _alias( Quad, 'is_rectangular')
21279 _alias( Rect, 'get_area')
21280 _alias( Rect, 'get_area', 'getRectArea')
21281 _alias( Rect, 'include_point')
21282 _alias( Rect, 'include_rect')
21283 _alias( Rect, 'is_empty')
21284 _alias( Rect, 'is_infinite')
21285 _alias( TextWriter, 'fill_textbox')
21286 _alias( TextWriter, 'write_text')
21287 _alias( utils.Shape, 'draw_bezier')
21288 _alias( utils.Shape, 'draw_circle')
21289 _alias( utils.Shape, 'draw_curve')
21290 _alias( utils.Shape, 'draw_line')
21291 _alias( utils.Shape, 'draw_oval')
21292 _alias( utils.Shape, 'draw_polyline')
21293 _alias( utils.Shape, 'draw_quad')
21294 _alias( utils.Shape, 'draw_rect')
21295 _alias( utils.Shape, 'draw_sector')
21296 _alias( utils.Shape, 'draw_squiggle')
21297 _alias( utils.Shape, 'draw_zigzag')
21298 _alias( utils.Shape, 'insert_text')
21299 _alias( utils.Shape, 'insert_textbox')
21300
21301 if 0:
21302 restore_aliases()
21303
21304 __version__ = VersionBind
21305 __doc__ = (
21306 f'PyMuPDF {VersionBind}: Python bindings for the MuPDF {VersionFitz} library (rebased implementation).\n'
21307 f'Python {sys.version_info[0]}.{sys.version_info[1]} running on {sys.platform} ({64 if sys.maxsize > 2**32 else 32}-bit).\n'
21308 )