Mercurial > hgrepos > Python2 > PyMuPDF
comparison src/__init__.py @ 1:1d09e1dec1d9 upstream
ADD: PyMuPDF v1.26.4: the original sdist.
It does not yet contain MuPDF. This normally will be downloaded when
building PyMuPDF.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:37:51 +0200 |
| parents | |
| children | d77477b4e151 a6bc019ac0b2 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 1:1d09e1dec1d9 |
|---|---|
| 1 ''' | |
| 2 PyMuPDF implemented on top of MuPDF Python bindings. | |
| 3 | |
| 4 License: | |
| 5 | |
| 6 SPDX-License-Identifier: GPL-3.0-only | |
| 7 ''' | |
| 8 | |
| 9 # To reduce startup times, we don't import everything we require here. | |
| 10 # | |
| 11 import atexit | |
| 12 import binascii | |
| 13 import collections | |
| 14 import inspect | |
| 15 import io | |
| 16 import math | |
| 17 import os | |
| 18 import pathlib | |
| 19 import glob | |
| 20 import re | |
| 21 import string | |
| 22 import sys | |
| 23 import tarfile | |
| 24 import time | |
| 25 import typing | |
| 26 import warnings | |
| 27 import weakref | |
| 28 import zipfile | |
| 29 | |
| 30 from . import extra | |
| 31 | |
| 32 | |
| 33 # Set up g_out_log and g_out_message from environment variables. | |
| 34 # | |
| 35 # PYMUPDF_MESSAGE controls the destination of user messages (from function | |
| 36 # `pymupdf.message()`). | |
| 37 # | |
| 38 # PYMUPDF_LOG controls the destination of internal development logging (from | |
| 39 # function `pymupdf.log()`). | |
| 40 # | |
| 41 # For syntax, see _make_output()'s `text` arg. | |
| 42 # | |
| 43 | |
| 44 def _make_output( | |
| 45 *, | |
| 46 text=None, | |
| 47 fd=None, | |
| 48 stream=None, | |
| 49 path=None, | |
| 50 path_append=None, | |
| 51 pylogging=None, | |
| 52 pylogging_logger=None, | |
| 53 pylogging_level=None, | |
| 54 pylogging_name=None, | |
| 55 default=None, | |
| 56 ): | |
| 57 ''' | |
| 58 Returns a stream that writes to a specified destination, which can be a | |
| 59 file descriptor, a file, an existing stream or Python's `logging' system. | |
| 60 | |
| 61 Args: | |
| 62 text: text specification of destination. | |
| 63 fd:<int> - write to file descriptor. | |
| 64 path:<str> - write to file. | |
| 65 path+:<str> - append to file. | |
| 66 logging:<items> - write to Python `logging` module. | |
| 67 items: comma-separated <name=value> pairs. | |
| 68 level=<int> | |
| 69 name=<str>. | |
| 70 Other names are ignored. | |
| 71 | |
| 72 fd: an int file descriptor. | |
| 73 stream: something with methods .write(text) and .flush(). | |
| 74 If specified we simply return <stream>. | |
| 75 path: a file path. | |
| 76 If specified we return a stream that writes to this file. | |
| 77 path_append: a file path. | |
| 78 If specified we return a stream that appends to this file. | |
| 79 pylogging*: | |
| 80 if any of these args is not None, we return a stream that writes to | |
| 81 Python's `logging` module. | |
| 82 | |
| 83 pylogging: | |
| 84 Unused other than to activate use of logging module. | |
| 85 pylogging_logger: | |
| 86 A logging.Logger; If None, set from <pylogging_name>. | |
| 87 pylogging_level: | |
| 88 An int log level, if None we use | |
| 89 pylogging_logger.getEffectiveLevel(). | |
| 90 pylogging_name: | |
| 91 Only used if <pylogging_logger> is None: | |
| 92 If <pylogging_name> is None, we set it to 'pymupdf'. | |
| 93 Then we do: pylogging_logger = logging.getLogger(pylogging_name) | |
| 94 ''' | |
| 95 if text is not None: | |
| 96 # Textual specification, for example from from environment variable. | |
| 97 if text.startswith('fd:'): | |
| 98 fd = int(text[3:]) | |
| 99 elif text.startswith('path:'): | |
| 100 path = text[5:] | |
| 101 elif text.startswith('path+'): | |
| 102 path_append = text[5:] | |
| 103 elif text.startswith('logging:'): | |
| 104 pylogging = True | |
| 105 items_d = dict() | |
| 106 items = text[8:].split(',') | |
| 107 #items_d = {n: v for (n, v) in [item.split('=', 1) for item in items]} | |
| 108 for item in items: | |
| 109 if not item: | |
| 110 continue | |
| 111 nv = item.split('=', 1) | |
| 112 assert len(nv) == 2, f'Need `=` in {item=}.' | |
| 113 n, v = nv | |
| 114 items_d[n] = v | |
| 115 pylogging_level = items_d.get('level') | |
| 116 if pylogging_level is not None: | |
| 117 pylogging_level = int(pylogging_level) | |
| 118 pylogging_name = items_d.get('name', 'pymupdf') | |
| 119 else: | |
| 120 assert 0, f'Expected prefix `fd:`, `path:`. `path+:` or `logging:` in {text=}.' | |
| 121 | |
| 122 if fd is not None: | |
| 123 ret = open(fd, mode='w', closefd=False) | |
| 124 elif stream is not None: | |
| 125 assert hasattr(stream, 'write') | |
| 126 assert hasattr(stream, 'flush') | |
| 127 ret = stream | |
| 128 elif path is not None: | |
| 129 ret = open(path, 'w') | |
| 130 elif path_append is not None: | |
| 131 ret = open(path_append, 'a') | |
| 132 elif (0 | |
| 133 or pylogging is not None | |
| 134 or pylogging_logger is not None | |
| 135 or pylogging_level is not None | |
| 136 or pylogging_name is not None | |
| 137 ): | |
| 138 import logging | |
| 139 if pylogging_logger is None: | |
| 140 if pylogging_name is None: | |
| 141 pylogging_name = 'pymupdf' | |
| 142 pylogging_logger = logging.getLogger(pylogging_name) | |
| 143 assert isinstance(pylogging_logger, logging.Logger) | |
| 144 if pylogging_level is None: | |
| 145 pylogging_level = pylogging_logger.getEffectiveLevel() | |
| 146 class Out: | |
| 147 def write(self, text): | |
| 148 # `logging` module appends newlines, but so does the `print()` | |
| 149 # functions in our caller message() and log() fns, so we need to | |
| 150 # remove them here. | |
| 151 text = text.rstrip('\n') | |
| 152 if text: | |
| 153 pylogging_logger.log(pylogging_level, text) | |
| 154 def flush(self): | |
| 155 pass | |
| 156 ret = Out() | |
| 157 else: | |
| 158 ret = default | |
| 159 return ret | |
| 160 | |
| 161 # Set steam used by PyMuPDF messaging. | |
| 162 _g_out_message = _make_output(text=os.environ.get('PYMUPDF_MESSAGE'), default=sys.stdout) | |
| 163 | |
| 164 # Set steam used by PyMuPDF development/debugging logging. | |
| 165 _g_out_log = _make_output(text=os.environ.get('PYMUPDF_LOG'), default=sys.stdout) | |
| 166 | |
| 167 # Things for testing logging. | |
| 168 _g_log_items = list() | |
| 169 _g_log_items_active = False | |
| 170 | |
| 171 def _log_items(): | |
| 172 return _g_log_items | |
| 173 | |
| 174 def _log_items_active(active): | |
| 175 global _g_log_items_active | |
| 176 _g_log_items_active = active | |
| 177 | |
| 178 def _log_items_clear(): | |
| 179 del _g_log_items[:] | |
| 180 | |
| 181 | |
| 182 def set_messages( | |
| 183 *, | |
| 184 text=None, | |
| 185 fd=None, | |
| 186 stream=None, | |
| 187 path=None, | |
| 188 path_append=None, | |
| 189 pylogging=None, | |
| 190 pylogging_logger=None, | |
| 191 pylogging_level=None, | |
| 192 pylogging_name=None, | |
| 193 ): | |
| 194 ''' | |
| 195 Sets destination of PyMuPDF messages. See _make_output() for details. | |
| 196 ''' | |
| 197 global _g_out_message | |
| 198 _g_out_message = _make_output( | |
| 199 text=text, | |
| 200 fd=fd, | |
| 201 stream=stream, | |
| 202 path=path, | |
| 203 path_append=path_append, | |
| 204 pylogging=pylogging, | |
| 205 pylogging_logger=pylogging_logger, | |
| 206 pylogging_level=pylogging_level, | |
| 207 pylogging_name=pylogging_name, | |
| 208 default=_g_out_message, | |
| 209 ) | |
| 210 | |
| 211 def set_log( | |
| 212 *, | |
| 213 text=None, | |
| 214 fd=None, | |
| 215 stream=None, | |
| 216 path=None, | |
| 217 path_append=None, | |
| 218 pylogging=None, | |
| 219 pylogging_logger=None, | |
| 220 pylogging_level=None, | |
| 221 pylogging_name=None, | |
| 222 ): | |
| 223 ''' | |
| 224 Sets destination of PyMuPDF development/debugging logging. See | |
| 225 _make_output() for details. | |
| 226 ''' | |
| 227 global _g_out_log | |
| 228 _g_out_log = _make_output( | |
| 229 text=text, | |
| 230 fd=fd, | |
| 231 stream=stream, | |
| 232 path=path, | |
| 233 path_append=path_append, | |
| 234 pylogging=pylogging, | |
| 235 pylogging_logger=pylogging_logger, | |
| 236 pylogging_level=pylogging_level, | |
| 237 pylogging_name=pylogging_name, | |
| 238 default=_g_out_log, | |
| 239 ) | |
| 240 | |
| 241 def log( text='', caller=1): | |
| 242 ''' | |
| 243 For development/debugging diagnostics. | |
| 244 ''' | |
| 245 try: | |
| 246 stack = inspect.stack(context=0) | |
| 247 except StopIteration: | |
| 248 pass | |
| 249 else: | |
| 250 frame_record = stack[caller] | |
| 251 try: | |
| 252 filename = os.path.relpath(frame_record.filename) | |
| 253 except Exception: # Can fail on windows. | |
| 254 filename = frame_record.filename | |
| 255 line = frame_record.lineno | |
| 256 function = frame_record.function | |
| 257 text = f'{filename}:{line}:{function}(): {text}' | |
| 258 if _g_log_items_active: | |
| 259 _g_log_items.append(text) | |
| 260 if _g_out_log: | |
| 261 print(text, file=_g_out_log, flush=1) | |
| 262 | |
| 263 | |
| 264 def message(text=''): | |
| 265 ''' | |
| 266 For user messages. | |
| 267 ''' | |
| 268 # It looks like `print()` does nothing if sys.stdout is None (without | |
| 269 # raising an exception), but we don't rely on this. | |
| 270 if _g_out_message: | |
| 271 print(text, file=_g_out_message, flush=1) | |
| 272 | |
| 273 | |
| 274 def exception_info(): | |
| 275 import traceback | |
| 276 log(f'exception_info:') | |
| 277 log(traceback.format_exc()) | |
| 278 | |
| 279 | |
| 280 # PDF names must not contain these characters: | |
| 281 INVALID_NAME_CHARS = set(string.whitespace + "()<>[]{}/%" + chr(0)) | |
| 282 | |
| 283 def get_env_bool( name, default): | |
| 284 ''' | |
| 285 Returns `True`, `False` or `default` depending on whether $<name> is '1', | |
| 286 '0' or unset. Otherwise assert-fails. | |
| 287 ''' | |
| 288 v = os.environ.get( name) | |
| 289 if v is None: | |
| 290 ret = default | |
| 291 elif v == '1': | |
| 292 ret = True | |
| 293 elif v == '0': | |
| 294 ret = False | |
| 295 else: | |
| 296 assert 0, f'Unrecognised value for {name}: {v!r}' | |
| 297 if ret != default: | |
| 298 log(f'Using non-default setting from {name}: {v!r}') | |
| 299 return ret | |
| 300 | |
| 301 def get_env_int( name, default): | |
| 302 ''' | |
| 303 Returns `True`, `False` or `default` depending on whether $<name> is '1', | |
| 304 '0' or unset. Otherwise assert-fails. | |
| 305 ''' | |
| 306 v = os.environ.get( name) | |
| 307 if v is None: | |
| 308 ret = default | |
| 309 else: | |
| 310 ret = int(v) | |
| 311 if ret != default: | |
| 312 log(f'Using non-default setting from {name}: {v}') | |
| 313 return ret | |
| 314 | |
| 315 # All our `except ...` blocks output diagnostics if `g_exceptions_verbose` is | |
| 316 # true. | |
| 317 g_exceptions_verbose = get_env_int( 'PYMUPDF_EXCEPTIONS_VERBOSE', 1) | |
| 318 | |
| 319 # $PYMUPDF_USE_EXTRA overrides whether to use optimised C fns in `extra`. | |
| 320 # | |
| 321 g_use_extra = get_env_bool( 'PYMUPDF_USE_EXTRA', True) | |
| 322 | |
| 323 | |
| 324 # Global switches | |
| 325 # | |
| 326 | |
| 327 class _Globals: | |
| 328 def __init__(self): | |
| 329 self.no_device_caching = 0 | |
| 330 self.small_glyph_heights = 0 | |
| 331 self.subset_fontnames = 0 | |
| 332 self.skip_quad_corrections = 0 | |
| 333 | |
| 334 _globals = _Globals() | |
| 335 | |
| 336 | |
| 337 # Optionally use MuPDF via cppyy bindings; experimental and not tested recently | |
| 338 # as of 2023-01-20 11:51:40 | |
| 339 # | |
| 340 mupdf_cppyy = os.environ.get( 'MUPDF_CPPYY') | |
| 341 if mupdf_cppyy is not None: | |
| 342 # pylint: disable=all | |
| 343 log( f'{__file__}: $MUPDF_CPPYY={mupdf_cppyy!r} so attempting to import mupdf_cppyy.') | |
| 344 log( f'{__file__}: $PYTHONPATH={os.environ["PYTHONPATH"]}') | |
| 345 if mupdf_cppyy == '': | |
| 346 import mupdf_cppyy | |
| 347 else: | |
| 348 import importlib | |
| 349 mupdf_cppyy = importlib.machinery.SourceFileLoader( | |
| 350 'mupdf_cppyy', | |
| 351 mupdf_cppyy | |
| 352 ).load_module() | |
| 353 mupdf = mupdf_cppyy.cppyy.gbl.mupdf | |
| 354 else: | |
| 355 # Use MuPDF Python SWIG bindings. We allow import from either our own | |
| 356 # directory for conventional wheel installs, or from separate place in case | |
| 357 # we are using a separately-installed system installation of mupdf. | |
| 358 # | |
| 359 try: | |
| 360 from . import mupdf | |
| 361 except Exception: | |
| 362 import mupdf | |
| 363 if hasattr(mupdf, 'internal_check_ndebug'): | |
| 364 mupdf.internal_check_ndebug() | |
| 365 mupdf.reinit_singlethreaded() | |
| 366 | |
| 367 def _int_rc(text): | |
| 368 ''' | |
| 369 Converts string to int, ignoring trailing 'rc...'. | |
| 370 ''' | |
| 371 rc = text.find('rc') | |
| 372 if rc >= 0: | |
| 373 text = text[:rc] | |
| 374 return int(text) | |
| 375 | |
| 376 # Basic version information. | |
| 377 # | |
| 378 # (We use `noqa F401` to avoid flake8 errors such as `F401 | |
| 379 # '._build.mupdf_location' imported but unused`. | |
| 380 # | |
| 381 from ._build import mupdf_location # noqa F401 | |
| 382 from ._build import pymupdf_git_branch # noqa F401 | |
| 383 from ._build import pymupdf_git_diff # noqa F401 | |
| 384 from ._build import pymupdf_git_sha # noqa F401 | |
| 385 from ._build import pymupdf_version # noqa F401 | |
| 386 from ._build import swig_version # noqa F401 | |
| 387 from ._build import swig_version_tuple # noqa F401 | |
| 388 | |
| 389 mupdf_version = mupdf.FZ_VERSION | |
| 390 | |
| 391 # Removed in PyMuPDF-1.26.1. | |
| 392 pymupdf_date = None | |
| 393 | |
| 394 # Versions as tuples; useful when comparing versions. | |
| 395 # | |
| 396 pymupdf_version_tuple = tuple( [_int_rc(i) for i in pymupdf_version.split('.')]) | |
| 397 mupdf_version_tuple = tuple( [_int_rc(i) for i in mupdf_version.split('.')]) | |
| 398 | |
| 399 assert mupdf_version_tuple == (mupdf.FZ_VERSION_MAJOR, mupdf.FZ_VERSION_MINOR, mupdf.FZ_VERSION_PATCH), \ | |
| 400 f'Inconsistent MuPDF version numbers: {mupdf_version_tuple=} != {(mupdf.FZ_VERSION_MAJOR, mupdf.FZ_VERSION_MINOR, mupdf.FZ_VERSION_PATCH)=}' | |
| 401 | |
| 402 # Legacy version information. | |
| 403 # | |
| 404 version = (pymupdf_version, mupdf_version, None) | |
| 405 VersionFitz = mupdf_version | |
| 406 VersionBind = pymupdf_version | |
| 407 VersionDate = None | |
| 408 | |
| 409 | |
| 410 # String formatting. | |
| 411 | |
| 412 def _format_g(value, *, fmt='%g'): | |
| 413 ''' | |
| 414 Returns `value` formatted with mupdf.fz_format_double() if available, | |
| 415 otherwise with Python's `%`. | |
| 416 | |
| 417 If `value` is a list or tuple, we return a space-separated string of | |
| 418 formatted values. | |
| 419 ''' | |
| 420 if isinstance(value, (list, tuple)): | |
| 421 ret = '' | |
| 422 for v in value: | |
| 423 if ret: | |
| 424 ret += ' ' | |
| 425 ret += _format_g(v, fmt=fmt) | |
| 426 return ret | |
| 427 else: | |
| 428 return mupdf.fz_format_double(fmt, value) | |
| 429 | |
| 430 format_g = _format_g | |
| 431 | |
| 432 # ByteString is gone from typing in 3.14. | |
| 433 # collections.abc.Buffer available from 3.12 only | |
| 434 try: | |
| 435 ByteString = typing.ByteString | |
| 436 except AttributeError: | |
| 437 ByteString = bytes | bytearray | memoryview | |
| 438 | |
| 439 # Names required by class method typing annotations. | |
| 440 OptBytes = typing.Optional[ByteString] | |
| 441 OptDict = typing.Optional[dict] | |
| 442 OptFloat = typing.Optional[float] | |
| 443 OptInt = typing.Union[int, None] | |
| 444 OptSeq = typing.Optional[typing.Sequence] | |
| 445 OptStr = typing.Optional[str] | |
| 446 | |
| 447 Page = 'Page_forward_decl' | |
| 448 Point = 'Point_forward_decl' | |
| 449 | |
| 450 matrix_like = 'matrix_like' | |
| 451 point_like = 'point_like' | |
| 452 quad_like = 'quad_like' | |
| 453 rect_like = 'rect_like' | |
| 454 | |
| 455 | |
| 456 def _as_fz_document(document): | |
| 457 ''' | |
| 458 Returns document as a mupdf.FzDocument, upcasting as required. Raises | |
| 459 'document closed' exception if closed. | |
| 460 ''' | |
| 461 if isinstance(document, Document): | |
| 462 if document.is_closed: | |
| 463 raise ValueError('document closed') | |
| 464 document = document.this | |
| 465 if isinstance(document, mupdf.FzDocument): | |
| 466 return document | |
| 467 elif isinstance(document, mupdf.PdfDocument): | |
| 468 return document.super() | |
| 469 elif document is None: | |
| 470 assert 0, f'document is None' | |
| 471 else: | |
| 472 assert 0, f'Unrecognised {type(document)=}' | |
| 473 | |
| 474 def _as_pdf_document(document, required=True): | |
| 475 ''' | |
| 476 Returns `document` downcast to a mupdf.PdfDocument. If downcast fails (i.e. | |
| 477 `document` is not actually a `PdfDocument`) then we assert-fail if `required` | |
| 478 is true (the default) else return a `mupdf.PdfDocument` with `.m_internal` | |
| 479 false. | |
| 480 ''' | |
| 481 if isinstance(document, Document): | |
| 482 if document.is_closed: | |
| 483 raise ValueError('document closed') | |
| 484 document = document.this | |
| 485 if isinstance(document, mupdf.PdfDocument): | |
| 486 return document | |
| 487 elif isinstance(document, mupdf.FzDocument): | |
| 488 ret = mupdf.PdfDocument(document) | |
| 489 if required: | |
| 490 assert ret.m_internal | |
| 491 return ret | |
| 492 elif document is None: | |
| 493 assert 0, f'document is None' | |
| 494 else: | |
| 495 assert 0, f'Unrecognised {type(document)=}' | |
| 496 | |
| 497 def _as_fz_page(page): | |
| 498 ''' | |
| 499 Returns page as a mupdf.FzPage, upcasting as required. | |
| 500 ''' | |
| 501 if isinstance(page, Page): | |
| 502 page = page.this | |
| 503 if isinstance(page, mupdf.PdfPage): | |
| 504 return page.super() | |
| 505 elif isinstance(page, mupdf.FzPage): | |
| 506 return page | |
| 507 elif page is None: | |
| 508 assert 0, f'page is None' | |
| 509 else: | |
| 510 assert 0, f'Unrecognised {type(page)=}' | |
| 511 | |
| 512 def _as_pdf_page(page, required=True): | |
| 513 ''' | |
| 514 Returns `page` downcast to a mupdf.PdfPage. If downcast fails (i.e. `page` | |
| 515 is not actually a `PdfPage`) then we assert-fail if `required` is true (the | |
| 516 default) else return a `mupdf.PdfPage` with `.m_internal` false. | |
| 517 ''' | |
| 518 if isinstance(page, Page): | |
| 519 page = page.this | |
| 520 if isinstance(page, mupdf.PdfPage): | |
| 521 return page | |
| 522 elif isinstance(page, mupdf.FzPage): | |
| 523 ret = mupdf.pdf_page_from_fz_page(page) | |
| 524 if required: | |
| 525 assert ret.m_internal | |
| 526 return ret | |
| 527 elif page is None: | |
| 528 assert 0, f'page is None' | |
| 529 else: | |
| 530 assert 0, f'Unrecognised {type(page)=}' | |
| 531 | |
| 532 | |
| 533 def _pdf_annot_page(annot): | |
| 534 ''' | |
| 535 Wrapper for mupdf.pdf_annot_page() which raises an exception if <annot> | |
| 536 is not bound to a page instead of returning a mupdf.PdfPage with | |
| 537 `.m_internal=None`. | |
| 538 | |
| 539 [Some other MuPDF functions such as pdf_update_annot()` already raise a | |
| 540 similar exception if a pdf_annot's .page field is null.] | |
| 541 ''' | |
| 542 page = mupdf.pdf_annot_page(annot) | |
| 543 if not page.m_internal: | |
| 544 raise RuntimeError('Annot is not bound to a page') | |
| 545 return page | |
| 546 | |
| 547 | |
| 548 # Fixme: we don't support JM_MEMORY=1. | |
| 549 JM_MEMORY = 0 | |
| 550 | |
| 551 # Classes | |
| 552 # | |
| 553 | |
| 554 class Annot: | |
| 555 | |
| 556 def __init__(self, annot): | |
| 557 assert isinstance( annot, mupdf.PdfAnnot) | |
| 558 self.this = annot | |
| 559 | |
| 560 def __repr__(self): | |
| 561 parent = getattr(self, 'parent', '<>') | |
| 562 return "'%s' annotation on %s" % (self.type[1], str(parent)) | |
| 563 | |
| 564 def __str__(self): | |
| 565 return self.__repr__() | |
| 566 | |
| 567 def _erase(self): | |
| 568 if getattr(self, "thisown", False): | |
| 569 self.thisown = False | |
| 570 | |
| 571 def _get_redact_values(self): | |
| 572 annot = self.this | |
| 573 if mupdf.pdf_annot_type(annot) != mupdf.PDF_ANNOT_REDACT: | |
| 574 return | |
| 575 | |
| 576 values = dict() | |
| 577 try: | |
| 578 obj = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "RO") | |
| 579 if obj.m_internal: | |
| 580 message_warning("Ignoring redaction key '/RO'.") | |
| 581 xref = mupdf.pdf_to_num(obj) | |
| 582 values[dictkey_xref] = xref | |
| 583 obj = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "OverlayText") | |
| 584 if obj.m_internal: | |
| 585 text = mupdf.pdf_to_text_string(obj) | |
| 586 values[dictkey_text] = JM_UnicodeFromStr(text) | |
| 587 else: | |
| 588 values[dictkey_text] = '' | |
| 589 obj = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('Q')) | |
| 590 align = 0 | |
| 591 if obj.m_internal: | |
| 592 align = mupdf.pdf_to_int(obj) | |
| 593 values[dictkey_align] = align | |
| 594 except Exception: | |
| 595 if g_exceptions_verbose: exception_info() | |
| 596 return | |
| 597 val = values | |
| 598 | |
| 599 if not val: | |
| 600 return val | |
| 601 val["rect"] = self.rect | |
| 602 text_color, fontname, fontsize = TOOLS._parse_da(self) | |
| 603 val["text_color"] = text_color | |
| 604 val["fontname"] = fontname | |
| 605 val["fontsize"] = fontsize | |
| 606 fill = self.colors["fill"] | |
| 607 val["fill"] = fill | |
| 608 return val | |
| 609 | |
| 610 def _getAP(self): | |
| 611 if g_use_extra: | |
| 612 assert isinstance( self.this, mupdf.PdfAnnot) | |
| 613 ret = extra.Annot_getAP(self.this) | |
| 614 assert isinstance( ret, bytes) | |
| 615 return ret | |
| 616 else: | |
| 617 r = None | |
| 618 res = None | |
| 619 annot = self.this | |
| 620 assert isinstance( annot, mupdf.PdfAnnot) | |
| 621 annot_obj = mupdf.pdf_annot_obj( annot) | |
| 622 ap = mupdf.pdf_dict_getl( annot_obj, PDF_NAME('AP'), PDF_NAME('N')) | |
| 623 if mupdf.pdf_is_stream( ap): | |
| 624 res = mupdf.pdf_load_stream( ap) | |
| 625 if res and res.m_internal: | |
| 626 r = JM_BinFromBuffer(res) | |
| 627 return r | |
| 628 | |
| 629 def _setAP(self, buffer_, rect=0): | |
| 630 try: | |
| 631 annot = self.this | |
| 632 annot_obj = mupdf.pdf_annot_obj( annot) | |
| 633 page = _pdf_annot_page(annot) | |
| 634 apobj = mupdf.pdf_dict_getl( annot_obj, PDF_NAME('AP'), PDF_NAME('N')) | |
| 635 if not apobj.m_internal: | |
| 636 raise RuntimeError( MSG_BAD_APN) | |
| 637 if not mupdf.pdf_is_stream( apobj): | |
| 638 raise RuntimeError( MSG_BAD_APN) | |
| 639 res = JM_BufferFromBytes( buffer_) | |
| 640 if not res.m_internal: | |
| 641 raise ValueError( MSG_BAD_BUFFER) | |
| 642 JM_update_stream( page.doc(), apobj, res, 1) | |
| 643 if rect: | |
| 644 bbox = mupdf.pdf_dict_get_rect( annot_obj, PDF_NAME('Rect')) | |
| 645 mupdf.pdf_dict_put_rect( apobj, PDF_NAME('BBox'), bbox) | |
| 646 except Exception: | |
| 647 if g_exceptions_verbose: exception_info() | |
| 648 | |
| 649 def _update_appearance(self, opacity=-1, blend_mode=None, fill_color=None, rotate=-1): | |
| 650 annot = self.this | |
| 651 assert annot.m_internal | |
| 652 annot_obj = mupdf.pdf_annot_obj( annot) | |
| 653 page = _pdf_annot_page(annot) | |
| 654 pdf = page.doc() | |
| 655 type_ = mupdf.pdf_annot_type( annot) | |
| 656 nfcol, fcol = JM_color_FromSequence(fill_color) | |
| 657 | |
| 658 try: | |
| 659 # remove fill color from unsupported annots | |
| 660 # or if so requested | |
| 661 if nfcol == 0 or type_ not in ( | |
| 662 mupdf.PDF_ANNOT_SQUARE, | |
| 663 mupdf.PDF_ANNOT_CIRCLE, | |
| 664 mupdf.PDF_ANNOT_LINE, | |
| 665 mupdf.PDF_ANNOT_POLY_LINE, | |
| 666 mupdf.PDF_ANNOT_POLYGON | |
| 667 ): | |
| 668 mupdf.pdf_dict_del( annot_obj, PDF_NAME('IC')) | |
| 669 elif nfcol > 0: | |
| 670 mupdf.pdf_set_annot_interior_color( annot, fcol[:nfcol]) | |
| 671 | |
| 672 insert_rot = 1 if rotate >= 0 else 0 | |
| 673 if type_ not in ( | |
| 674 mupdf.PDF_ANNOT_CARET, | |
| 675 mupdf.PDF_ANNOT_CIRCLE, | |
| 676 mupdf.PDF_ANNOT_FREE_TEXT, | |
| 677 mupdf.PDF_ANNOT_FILE_ATTACHMENT, | |
| 678 mupdf.PDF_ANNOT_INK, | |
| 679 mupdf.PDF_ANNOT_LINE, | |
| 680 mupdf.PDF_ANNOT_POLY_LINE, | |
| 681 mupdf.PDF_ANNOT_POLYGON, | |
| 682 mupdf.PDF_ANNOT_SQUARE, | |
| 683 mupdf.PDF_ANNOT_STAMP, | |
| 684 mupdf.PDF_ANNOT_TEXT, | |
| 685 ): | |
| 686 insert_rot = 0 | |
| 687 | |
| 688 if insert_rot: | |
| 689 mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('Rotate'), rotate) | |
| 690 | |
| 691 # insert fill color | |
| 692 if type_ == mupdf.PDF_ANNOT_FREE_TEXT: | |
| 693 if nfcol > 0: | |
| 694 mupdf.pdf_set_annot_color(annot, fcol[:nfcol]) | |
| 695 elif nfcol > 0: | |
| 696 col = mupdf.pdf_new_array(page.doc(), nfcol) | |
| 697 for i in range( nfcol): | |
| 698 mupdf.pdf_array_push_real(col, fcol[i]) | |
| 699 mupdf.pdf_dict_put(annot_obj, PDF_NAME('IC'), col) | |
| 700 mupdf.pdf_dirty_annot(annot) | |
| 701 mupdf.pdf_update_annot(annot) # let MuPDF update | |
| 702 pdf.resynth_required = 0 | |
| 703 except Exception as e: | |
| 704 if g_exceptions_verbose: | |
| 705 exception_info() | |
| 706 message( f'cannot update annot: {e}') | |
| 707 raise | |
| 708 | |
| 709 if (opacity < 0 or opacity >= 1) and not blend_mode: # no opacity, no blend_mode | |
| 710 return True | |
| 711 | |
| 712 try: # create or update /ExtGState | |
| 713 ap = mupdf.pdf_dict_getl( | |
| 714 mupdf.pdf_annot_obj(annot), | |
| 715 PDF_NAME('AP'), | |
| 716 PDF_NAME('N') | |
| 717 ) | |
| 718 if not ap.m_internal: # should never happen | |
| 719 raise RuntimeError( MSG_BAD_APN) | |
| 720 | |
| 721 resources = mupdf.pdf_dict_get( ap, PDF_NAME('Resources')) | |
| 722 if not resources.m_internal: # no Resources yet: make one | |
| 723 resources = mupdf.pdf_dict_put_dict( ap, PDF_NAME('Resources'), 2) | |
| 724 | |
| 725 alp0 = mupdf.pdf_new_dict( page.doc(), 3) | |
| 726 if opacity >= 0 and opacity < 1: | |
| 727 mupdf.pdf_dict_put_real( alp0, PDF_NAME('CA'), opacity) | |
| 728 mupdf.pdf_dict_put_real( alp0, PDF_NAME('ca'), opacity) | |
| 729 mupdf.pdf_dict_put_real( annot_obj, PDF_NAME('CA'), opacity) | |
| 730 | |
| 731 if blend_mode: | |
| 732 mupdf.pdf_dict_put_name( alp0, PDF_NAME('BM'), blend_mode) | |
| 733 mupdf.pdf_dict_put_name( annot_obj, PDF_NAME('BM'), blend_mode) | |
| 734 | |
| 735 extg = mupdf.pdf_dict_get( resources, PDF_NAME('ExtGState')) | |
| 736 if not extg.m_internal: # no ExtGState yet: make one | |
| 737 extg = mupdf.pdf_dict_put_dict( resources, PDF_NAME('ExtGState'), 2) | |
| 738 | |
| 739 mupdf.pdf_dict_put( extg, PDF_NAME('H'), alp0) | |
| 740 | |
| 741 except Exception as e: | |
| 742 if g_exceptions_verbose: exception_info() | |
| 743 message( f'cannot set opacity or blend mode\n: {e}') | |
| 744 raise | |
| 745 | |
| 746 return True | |
| 747 | |
| 748 @property | |
| 749 def apn_bbox(self): | |
| 750 """annotation appearance bbox""" | |
| 751 CheckParent(self) | |
| 752 annot = self.this | |
| 753 annot_obj = mupdf.pdf_annot_obj(annot) | |
| 754 ap = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AP'), PDF_NAME('N')) | |
| 755 if not ap.m_internal: | |
| 756 val = JM_py_from_rect(mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)) | |
| 757 else: | |
| 758 rect = mupdf.pdf_dict_get_rect(ap, PDF_NAME('BBox')) | |
| 759 val = JM_py_from_rect(rect) | |
| 760 | |
| 761 val = Rect(val) * self.get_parent().transformation_matrix | |
| 762 val *= self.get_parent().derotation_matrix | |
| 763 return val | |
| 764 | |
| 765 @property | |
| 766 def apn_matrix(self): | |
| 767 """annotation appearance matrix""" | |
| 768 try: | |
| 769 CheckParent(self) | |
| 770 annot = self.this | |
| 771 assert isinstance(annot, mupdf.PdfAnnot) | |
| 772 ap = mupdf.pdf_dict_getl( | |
| 773 mupdf.pdf_annot_obj(annot), | |
| 774 mupdf.PDF_ENUM_NAME_AP, | |
| 775 mupdf.PDF_ENUM_NAME_N | |
| 776 ) | |
| 777 if not ap.m_internal: | |
| 778 return JM_py_from_matrix(mupdf.FzMatrix()) | |
| 779 mat = mupdf.pdf_dict_get_matrix(ap, mupdf.PDF_ENUM_NAME_Matrix) | |
| 780 val = JM_py_from_matrix(mat) | |
| 781 | |
| 782 val = Matrix(val) | |
| 783 | |
| 784 return val | |
| 785 except Exception: | |
| 786 if g_exceptions_verbose: exception_info() | |
| 787 raise | |
| 788 | |
| 789 @property | |
| 790 def blendmode(self): | |
| 791 """annotation BlendMode""" | |
| 792 CheckParent(self) | |
| 793 annot = self.this | |
| 794 annot_obj = mupdf.pdf_annot_obj(annot) | |
| 795 obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('BM')) | |
| 796 blend_mode = None | |
| 797 if obj.m_internal: | |
| 798 blend_mode = JM_UnicodeFromStr(mupdf.pdf_to_name(obj)) | |
| 799 return blend_mode | |
| 800 # loop through the /AP/N/Resources/ExtGState objects | |
| 801 obj = mupdf.pdf_dict_getl( | |
| 802 annot_obj, | |
| 803 PDF_NAME('AP'), | |
| 804 PDF_NAME('N'), | |
| 805 PDF_NAME('Resources'), | |
| 806 PDF_NAME('ExtGState'), | |
| 807 ) | |
| 808 if mupdf.pdf_is_dict(obj): | |
| 809 n = mupdf.pdf_dict_len(obj) | |
| 810 for i in range(n): | |
| 811 obj1 = mupdf.pdf_dict_get_val(obj, i) | |
| 812 if mupdf.pdf_is_dict(obj1): | |
| 813 m = mupdf.pdf_dict_len(obj1) | |
| 814 for j in range(m): | |
| 815 obj2 = mupdf.pdf_dict_get_key(obj1, j) | |
| 816 if mupdf.pdf_objcmp(obj2, PDF_NAME('BM')) == 0: | |
| 817 blend_mode = JM_UnicodeFromStr(mupdf.pdf_to_name(mupdf.pdf_dict_get_val(obj1, j))) | |
| 818 return blend_mode | |
| 819 return blend_mode | |
| 820 | |
| 821 @property | |
| 822 def border(self): | |
| 823 """Border information.""" | |
| 824 CheckParent(self) | |
| 825 atype = self.type[0] | |
| 826 if atype not in ( | |
| 827 mupdf.PDF_ANNOT_CIRCLE, | |
| 828 mupdf.PDF_ANNOT_FREE_TEXT, | |
| 829 mupdf.PDF_ANNOT_INK, | |
| 830 mupdf.PDF_ANNOT_LINE, | |
| 831 mupdf.PDF_ANNOT_POLY_LINE, | |
| 832 mupdf.PDF_ANNOT_POLYGON, | |
| 833 mupdf.PDF_ANNOT_SQUARE, | |
| 834 ): | |
| 835 return dict() | |
| 836 ao = mupdf.pdf_annot_obj(self.this) | |
| 837 ret = JM_annot_border(ao) | |
| 838 return ret | |
| 839 | |
| 840 def clean_contents(self, sanitize=1): | |
| 841 """Clean appearance contents stream.""" | |
| 842 CheckParent(self) | |
| 843 annot = self.this | |
| 844 pdf = mupdf.pdf_get_bound_document(mupdf.pdf_annot_obj(annot)) | |
| 845 filter_ = _make_PdfFilterOptions(recurse=1, instance_forms=0, ascii=0, sanitize=sanitize) | |
| 846 mupdf.pdf_filter_annot_contents(pdf, annot, filter_) | |
| 847 | |
| 848 @property | |
| 849 def colors(self): | |
| 850 """Color definitions.""" | |
| 851 try: | |
| 852 CheckParent(self) | |
| 853 annot = self.this | |
| 854 assert isinstance(annot, mupdf.PdfAnnot) | |
| 855 return JM_annot_colors(mupdf.pdf_annot_obj(annot)) | |
| 856 except Exception: | |
| 857 if g_exceptions_verbose: exception_info() | |
| 858 raise | |
| 859 | |
| 860 def delete_responses(self): | |
| 861 """Delete 'Popup' and responding annotations.""" | |
| 862 CheckParent(self) | |
| 863 annot = self.this | |
| 864 annot_obj = mupdf.pdf_annot_obj(annot) | |
| 865 page = _pdf_annot_page(annot) | |
| 866 while 1: | |
| 867 irt_annot = JM_find_annot_irt(annot) | |
| 868 if not irt_annot: | |
| 869 break | |
| 870 mupdf.pdf_delete_annot(page, irt_annot) | |
| 871 mupdf.pdf_dict_del(annot_obj, PDF_NAME('Popup')) | |
| 872 | |
| 873 annots = mupdf.pdf_dict_get(page.obj(), PDF_NAME('Annots')) | |
| 874 n = mupdf.pdf_array_len(annots) | |
| 875 found = 0 | |
| 876 for i in range(n-1, -1, -1): | |
| 877 o = mupdf.pdf_array_get(annots, i) | |
| 878 p = mupdf.pdf_dict_get(o, PDF_NAME('Parent')) | |
| 879 if not o.m_internal: | |
| 880 continue | |
| 881 if not mupdf.pdf_objcmp(p, annot_obj): | |
| 882 mupdf.pdf_array_delete(annots, i) | |
| 883 found = 1 | |
| 884 if found: | |
| 885 mupdf.pdf_dict_put(page.obj(), PDF_NAME('Annots'), annots) | |
| 886 | |
| 887 @property | |
| 888 def file_info(self): | |
| 889 """Attached file information.""" | |
| 890 CheckParent(self) | |
| 891 res = dict() | |
| 892 length = -1 | |
| 893 size = -1 | |
| 894 desc = None | |
| 895 annot = self.this | |
| 896 annot_obj = mupdf.pdf_annot_obj(annot) | |
| 897 type_ = mupdf.pdf_annot_type(annot) | |
| 898 if type_ != mupdf.PDF_ANNOT_FILE_ATTACHMENT: | |
| 899 raise TypeError( MSG_BAD_ANNOT_TYPE) | |
| 900 stream = mupdf.pdf_dict_getl( | |
| 901 annot_obj, | |
| 902 PDF_NAME('FS'), | |
| 903 PDF_NAME('EF'), | |
| 904 PDF_NAME('F'), | |
| 905 ) | |
| 906 if not stream.m_internal: | |
| 907 RAISEPY( "bad PDF: file entry not found", JM_Exc_FileDataError) | |
| 908 | |
| 909 fs = mupdf.pdf_dict_get(annot_obj, PDF_NAME('FS')) | |
| 910 | |
| 911 o = mupdf.pdf_dict_get(fs, PDF_NAME('UF')) | |
| 912 if o.m_internal: | |
| 913 filename = mupdf.pdf_to_text_string(o) | |
| 914 else: | |
| 915 o = mupdf.pdf_dict_get(fs, PDF_NAME('F')) | |
| 916 if o.m_internal: | |
| 917 filename = mupdf.pdf_to_text_string(o) | |
| 918 | |
| 919 o = mupdf.pdf_dict_get(fs, PDF_NAME('Desc')) | |
| 920 if o.m_internal: | |
| 921 desc = mupdf.pdf_to_text_string(o) | |
| 922 | |
| 923 o = mupdf.pdf_dict_get(stream, PDF_NAME('Length')) | |
| 924 if o.m_internal: | |
| 925 length = mupdf.pdf_to_int(o) | |
| 926 | |
| 927 o = mupdf.pdf_dict_getl(stream, PDF_NAME('Params'), PDF_NAME('Size')) | |
| 928 if o.m_internal: | |
| 929 size = mupdf.pdf_to_int(o) | |
| 930 | |
| 931 res[ dictkey_filename] = JM_EscapeStrFromStr(filename) | |
| 932 res[ dictkey_descr] = JM_UnicodeFromStr(desc) | |
| 933 res[ dictkey_length] = length | |
| 934 res[ dictkey_size] = size | |
| 935 return res | |
| 936 | |
| 937 @property | |
| 938 def flags(self): | |
| 939 """Flags field.""" | |
| 940 CheckParent(self) | |
| 941 annot = self.this | |
| 942 return mupdf.pdf_annot_flags(annot) | |
| 943 | |
| 944 def get_file(self): | |
| 945 """Retrieve attached file content.""" | |
| 946 CheckParent(self) | |
| 947 annot = self.this | |
| 948 annot_obj = mupdf.pdf_annot_obj(annot) | |
| 949 type = mupdf.pdf_annot_type(annot) | |
| 950 if type != mupdf.PDF_ANNOT_FILE_ATTACHMENT: | |
| 951 raise TypeError( MSG_BAD_ANNOT_TYPE) | |
| 952 stream = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('FS'), PDF_NAME('EF'), PDF_NAME('F')) | |
| 953 if not stream.m_internal: | |
| 954 RAISEPY( "bad PDF: file entry not found", JM_Exc_FileDataError) | |
| 955 buf = mupdf.pdf_load_stream(stream) | |
| 956 res = JM_BinFromBuffer(buf) | |
| 957 return res | |
| 958 | |
| 959 def get_oc(self): | |
| 960 """Get annotation optional content reference.""" | |
| 961 CheckParent(self) | |
| 962 oc = 0 | |
| 963 annot = self.this | |
| 964 annot_obj = mupdf.pdf_annot_obj(annot) | |
| 965 obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('OC')) | |
| 966 if obj.m_internal: | |
| 967 oc = mupdf.pdf_to_num(obj) | |
| 968 return oc | |
| 969 | |
| 970 # PyMuPDF doesn't seem to have this .parent member, but removing it breaks | |
| 971 # 11 tests...? | |
| 972 #@property | |
| 973 def get_parent(self): | |
| 974 try: | |
| 975 ret = getattr( self, 'parent') | |
| 976 except AttributeError: | |
| 977 page = _pdf_annot_page(self.this) | |
| 978 assert isinstance( page, mupdf.PdfPage) | |
| 979 document = Document( page.doc()) if page.m_internal else None | |
| 980 ret = Page(page, document) | |
| 981 #self.parent = weakref.proxy( ret) | |
| 982 self.parent = ret | |
| 983 #log(f'No attribute .parent: {type(self)=} {id(self)=}: have set {id(self.parent)=}.') | |
| 984 #log( f'Have set self.parent') | |
| 985 return ret | |
| 986 | |
| 987 def get_pixmap(self, matrix=None, dpi=None, colorspace=None, alpha=0): | |
| 988 """annotation Pixmap""" | |
| 989 | |
| 990 CheckParent(self) | |
| 991 cspaces = {"gray": csGRAY, "rgb": csRGB, "cmyk": csCMYK} | |
| 992 if type(colorspace) is str: | |
| 993 colorspace = cspaces.get(colorspace.lower(), None) | |
| 994 if dpi: | |
| 995 matrix = Matrix(dpi / 72, dpi / 72) | |
| 996 ctm = JM_matrix_from_py(matrix) | |
| 997 cs = colorspace | |
| 998 if not cs: | |
| 999 cs = mupdf.fz_device_rgb() | |
| 1000 | |
| 1001 pix = mupdf.pdf_new_pixmap_from_annot(self.this, ctm, cs, mupdf.FzSeparations(0), alpha) | |
| 1002 ret = Pixmap(pix) | |
| 1003 if dpi: | |
| 1004 ret.set_dpi(dpi, dpi) | |
| 1005 return ret | |
| 1006 | |
| 1007 def get_sound(self): | |
| 1008 """Retrieve sound stream.""" | |
| 1009 CheckParent(self) | |
| 1010 annot = self.this | |
| 1011 annot_obj = mupdf.pdf_annot_obj(annot) | |
| 1012 type = mupdf.pdf_annot_type(annot) | |
| 1013 sound = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Sound')) | |
| 1014 if type != mupdf.PDF_ANNOT_SOUND or not sound.m_internal: | |
| 1015 raise TypeError( MSG_BAD_ANNOT_TYPE) | |
| 1016 if mupdf.pdf_dict_get(sound, PDF_NAME('F')).m_internal: | |
| 1017 RAISEPY( "unsupported sound stream", JM_Exc_FileDataError) | |
| 1018 res = dict() | |
| 1019 obj = mupdf.pdf_dict_get(sound, PDF_NAME('R')) | |
| 1020 if obj.m_internal: | |
| 1021 res['rate'] = mupdf.pdf_to_real(obj) | |
| 1022 obj = mupdf.pdf_dict_get(sound, PDF_NAME('C')) | |
| 1023 if obj.m_internal: | |
| 1024 res['channels'] = mupdf.pdf_to_int(obj) | |
| 1025 obj = mupdf.pdf_dict_get(sound, PDF_NAME('B')) | |
| 1026 if obj.m_internal: | |
| 1027 res['bps'] = mupdf.pdf_to_int(obj) | |
| 1028 obj = mupdf.pdf_dict_get(sound, PDF_NAME('E')) | |
| 1029 if obj.m_internal: | |
| 1030 res['encoding'] = mupdf.pdf_to_name(obj) | |
| 1031 obj = mupdf.pdf_dict_gets(sound, "CO") | |
| 1032 if obj.m_internal: | |
| 1033 res['compression'] = mupdf.pdf_to_name(obj) | |
| 1034 buf = mupdf.pdf_load_stream(sound) | |
| 1035 stream = JM_BinFromBuffer(buf) | |
| 1036 res['stream'] = stream | |
| 1037 return res | |
| 1038 | |
| 1039 def get_textpage(self, clip=None, flags=0): | |
| 1040 """Make annotation TextPage.""" | |
| 1041 CheckParent(self) | |
| 1042 options = mupdf.FzStextOptions(flags) | |
| 1043 if clip: | |
| 1044 assert hasattr(mupdf, 'FZ_STEXT_CLIP_RECT'), f'MuPDF-{mupdf_version} does not support FZ_STEXT_CLIP_RECT.' | |
| 1045 clip2 = JM_rect_from_py(clip) | |
| 1046 options.clip = clip2.internal() | |
| 1047 options.flags |= mupdf.FZ_STEXT_CLIP_RECT | |
| 1048 annot = self.this | |
| 1049 stextpage = mupdf.FzStextPage(annot, options) | |
| 1050 ret = TextPage(stextpage) | |
| 1051 p = self.get_parent() | |
| 1052 if isinstance(p, weakref.ProxyType): | |
| 1053 ret.parent = p | |
| 1054 else: | |
| 1055 ret.parent = weakref.proxy(p) | |
| 1056 return ret | |
| 1057 | |
| 1058 @property | |
| 1059 def has_popup(self): | |
| 1060 """Check if annotation has a Popup.""" | |
| 1061 CheckParent(self) | |
| 1062 annot = self.this | |
| 1063 obj = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('Popup')) | |
| 1064 return True if obj.m_internal else False | |
| 1065 | |
| 1066 @property | |
| 1067 def info(self): | |
| 1068 """Various information details.""" | |
| 1069 CheckParent(self) | |
| 1070 annot = self.this | |
| 1071 res = dict() | |
| 1072 | |
| 1073 res[dictkey_content] = JM_UnicodeFromStr(mupdf.pdf_annot_contents(annot)) | |
| 1074 | |
| 1075 o = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('Name')) | |
| 1076 res[dictkey_name] = JM_UnicodeFromStr(mupdf.pdf_to_name(o)) | |
| 1077 | |
| 1078 # Title (= author) | |
| 1079 o = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('T')) | |
| 1080 res[dictkey_title] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o)) | |
| 1081 | |
| 1082 # CreationDate | |
| 1083 o = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "CreationDate") | |
| 1084 res[dictkey_creationDate] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o)) | |
| 1085 | |
| 1086 # ModDate | |
| 1087 o = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('M')) | |
| 1088 res[dictkey_modDate] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o)) | |
| 1089 | |
| 1090 # Subj | |
| 1091 o = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "Subj") | |
| 1092 res[dictkey_subject] = mupdf.pdf_to_text_string(o) | |
| 1093 | |
| 1094 # Identification (PDF key /NM) | |
| 1095 o = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "NM") | |
| 1096 res[dictkey_id] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o)) | |
| 1097 | |
| 1098 return res | |
| 1099 | |
| 1100 @property | |
| 1101 def irt_xref(self): | |
| 1102 ''' | |
| 1103 annotation IRT xref | |
| 1104 ''' | |
| 1105 annot = self.this | |
| 1106 annot_obj = mupdf.pdf_annot_obj( annot) | |
| 1107 irt = mupdf.pdf_dict_get( annot_obj, PDF_NAME('IRT')) | |
| 1108 if not irt.m_internal: | |
| 1109 return 0 | |
| 1110 return mupdf.pdf_to_num( irt) | |
| 1111 | |
| 1112 @property | |
| 1113 def is_open(self): | |
| 1114 """Get 'open' status of annotation or its Popup.""" | |
| 1115 CheckParent(self) | |
| 1116 return mupdf.pdf_annot_is_open(self.this) | |
| 1117 | |
| 1118 @property | |
| 1119 def language(self): | |
| 1120 """annotation language""" | |
| 1121 this_annot = self.this | |
| 1122 lang = mupdf.pdf_annot_language(this_annot) | |
| 1123 if lang == mupdf.FZ_LANG_UNSET: | |
| 1124 return | |
| 1125 assert hasattr(mupdf, 'fz_string_from_text_language2') | |
| 1126 return mupdf.fz_string_from_text_language2(lang) | |
| 1127 | |
| 1128 @property | |
| 1129 def line_ends(self): | |
| 1130 """Line end codes.""" | |
| 1131 CheckParent(self) | |
| 1132 annot = self.this | |
| 1133 # return nothing for invalid annot types | |
| 1134 if not mupdf.pdf_annot_has_line_ending_styles(annot): | |
| 1135 return | |
| 1136 lstart = mupdf.pdf_annot_line_start_style(annot) | |
| 1137 lend = mupdf.pdf_annot_line_end_style(annot) | |
| 1138 return lstart, lend | |
| 1139 | |
| 1140 @property | |
| 1141 def next(self): | |
| 1142 """Next annotation.""" | |
| 1143 CheckParent(self) | |
| 1144 this_annot = self.this | |
| 1145 assert isinstance(this_annot, mupdf.PdfAnnot) | |
| 1146 assert this_annot.m_internal | |
| 1147 type_ = mupdf.pdf_annot_type(this_annot) | |
| 1148 if type_ != mupdf.PDF_ANNOT_WIDGET: | |
| 1149 annot = mupdf.pdf_next_annot(this_annot) | |
| 1150 else: | |
| 1151 annot = mupdf.pdf_next_widget(this_annot) | |
| 1152 | |
| 1153 val = Annot(annot) if annot.m_internal else None | |
| 1154 if not val: | |
| 1155 return None | |
| 1156 val.thisown = True | |
| 1157 assert val.get_parent().this.m_internal_value() == self.get_parent().this.m_internal_value() | |
| 1158 val.parent._annot_refs[id(val)] = val | |
| 1159 | |
| 1160 if val.type[0] == mupdf.PDF_ANNOT_WIDGET: | |
| 1161 widget = Widget() | |
| 1162 TOOLS._fill_widget(val, widget) | |
| 1163 val = widget | |
| 1164 return val | |
| 1165 | |
| 1166 @property | |
| 1167 def opacity(self): | |
| 1168 """Opacity.""" | |
| 1169 CheckParent(self) | |
| 1170 annot = self.this | |
| 1171 opy = -1 | |
| 1172 ca = mupdf.pdf_dict_get( mupdf.pdf_annot_obj(annot), mupdf.PDF_ENUM_NAME_CA) | |
| 1173 if mupdf.pdf_is_number(ca): | |
| 1174 opy = mupdf.pdf_to_real(ca) | |
| 1175 return opy | |
| 1176 | |
| 1177 @property | |
| 1178 def popup_rect(self): | |
| 1179 """annotation 'Popup' rectangle""" | |
| 1180 CheckParent(self) | |
| 1181 rect = mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE) | |
| 1182 annot = self.this | |
| 1183 annot_obj = mupdf.pdf_annot_obj( annot) | |
| 1184 obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Popup')) | |
| 1185 if obj.m_internal: | |
| 1186 rect = mupdf.pdf_dict_get_rect(obj, PDF_NAME('Rect')) | |
| 1187 #log( '{rect=}') | |
| 1188 val = JM_py_from_rect(rect) | |
| 1189 #log( '{val=}') | |
| 1190 | |
| 1191 val = Rect(val) * self.get_parent().transformation_matrix | |
| 1192 val *= self.get_parent().derotation_matrix | |
| 1193 | |
| 1194 return val | |
| 1195 | |
| 1196 @property | |
| 1197 def popup_xref(self): | |
| 1198 """annotation 'Popup' xref""" | |
| 1199 CheckParent(self) | |
| 1200 xref = 0 | |
| 1201 annot = self.this | |
| 1202 annot_obj = mupdf.pdf_annot_obj(annot) | |
| 1203 obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Popup')) | |
| 1204 if obj.m_internal: | |
| 1205 xref = mupdf.pdf_to_num(obj) | |
| 1206 return xref | |
| 1207 | |
| 1208 @property | |
| 1209 def rect(self): | |
| 1210 """annotation rectangle""" | |
| 1211 if g_use_extra: | |
| 1212 val = extra.Annot_rect3( self.this) | |
| 1213 else: | |
| 1214 val = mupdf.pdf_bound_annot(self.this) | |
| 1215 val = Rect(val) | |
| 1216 | |
| 1217 # Caching self.parent_() reduces 1000x from 0.07 to 0.04. | |
| 1218 # | |
| 1219 p = self.get_parent() | |
| 1220 #p = getattr( self, 'parent', None) | |
| 1221 #if p is None: | |
| 1222 # p = self.parent | |
| 1223 # self.parent = p | |
| 1224 #p = self.parent_() | |
| 1225 val *= p.derotation_matrix | |
| 1226 return val | |
| 1227 | |
| 1228 @property | |
| 1229 def rect_delta(self): | |
| 1230 ''' | |
| 1231 annotation delta values to rectangle | |
| 1232 ''' | |
| 1233 annot_obj = mupdf.pdf_annot_obj(self.this) | |
| 1234 arr = mupdf.pdf_dict_get( annot_obj, PDF_NAME('RD')) | |
| 1235 if mupdf.pdf_array_len( arr) == 4: | |
| 1236 return ( | |
| 1237 mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 0)), | |
| 1238 mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 1)), | |
| 1239 -mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 2)), | |
| 1240 -mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 3)), | |
| 1241 ) | |
| 1242 | |
| 1243 @property | |
| 1244 def rotation(self): | |
| 1245 """annotation rotation""" | |
| 1246 CheckParent(self) | |
| 1247 annot = self.this | |
| 1248 rotation = mupdf.pdf_dict_get( mupdf.pdf_annot_obj(annot), mupdf.PDF_ENUM_NAME_Rotate) | |
| 1249 if not rotation.m_internal: | |
| 1250 return -1 | |
| 1251 return mupdf.pdf_to_int( rotation) | |
| 1252 | |
| 1253 def set_apn_bbox(self, bbox): | |
| 1254 """ | |
| 1255 Set annotation appearance bbox. | |
| 1256 """ | |
| 1257 CheckParent(self) | |
| 1258 page = self.get_parent() | |
| 1259 rot = page.rotation_matrix | |
| 1260 mat = page.transformation_matrix | |
| 1261 bbox *= rot * ~mat | |
| 1262 annot = self.this | |
| 1263 annot_obj = mupdf.pdf_annot_obj(annot) | |
| 1264 ap = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AP'), PDF_NAME('N')) | |
| 1265 if not ap.m_internal: | |
| 1266 raise RuntimeError( MSG_BAD_APN) | |
| 1267 rect = JM_rect_from_py(bbox) | |
| 1268 mupdf.pdf_dict_put_rect(ap, PDF_NAME('BBox'), rect) | |
| 1269 | |
| 1270 def set_apn_matrix(self, matrix): | |
| 1271 """Set annotation appearance matrix.""" | |
| 1272 CheckParent(self) | |
| 1273 annot = self.this | |
| 1274 annot_obj = mupdf.pdf_annot_obj(annot) | |
| 1275 ap = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AP'), PDF_NAME('N')) | |
| 1276 if not ap.m_internal: | |
| 1277 raise RuntimeError( MSG_BAD_APN) | |
| 1278 mat = JM_matrix_from_py(matrix) | |
| 1279 mupdf.pdf_dict_put_matrix(ap, PDF_NAME('Matrix'), mat) | |
| 1280 | |
| 1281 def set_blendmode(self, blend_mode): | |
| 1282 """Set annotation BlendMode.""" | |
| 1283 CheckParent(self) | |
| 1284 annot = self.this | |
| 1285 annot_obj = mupdf.pdf_annot_obj(annot) | |
| 1286 mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('BM'), blend_mode) | |
| 1287 | |
| 1288 def set_border(self, border=None, width=-1, style=None, dashes=None, clouds=-1): | |
| 1289 """Set border properties. | |
| 1290 | |
| 1291 Either a dict, or direct arguments width, style, dashes or clouds.""" | |
| 1292 CheckParent(self) | |
| 1293 atype, atname = self.type[:2] # annotation type | |
| 1294 if atype not in ( | |
| 1295 mupdf.PDF_ANNOT_CIRCLE, | |
| 1296 mupdf.PDF_ANNOT_FREE_TEXT, | |
| 1297 mupdf.PDF_ANNOT_INK, | |
| 1298 mupdf.PDF_ANNOT_LINE, | |
| 1299 mupdf.PDF_ANNOT_POLY_LINE, | |
| 1300 mupdf.PDF_ANNOT_POLYGON, | |
| 1301 mupdf.PDF_ANNOT_SQUARE, | |
| 1302 ): | |
| 1303 message(f"Cannot set border for '{atname}'.") | |
| 1304 return None | |
| 1305 if atype not in ( | |
| 1306 mupdf.PDF_ANNOT_CIRCLE, | |
| 1307 mupdf.PDF_ANNOT_FREE_TEXT, | |
| 1308 mupdf.PDF_ANNOT_POLYGON, | |
| 1309 mupdf.PDF_ANNOT_SQUARE, | |
| 1310 ): | |
| 1311 if clouds > 0: | |
| 1312 message(f"Cannot set cloudy border for '{atname}'.") | |
| 1313 clouds = -1 # do not set border effect | |
| 1314 if type(border) is not dict: | |
| 1315 border = {"width": width, "style": style, "dashes": dashes, "clouds": clouds} | |
| 1316 border.setdefault("width", -1) | |
| 1317 border.setdefault("style", None) | |
| 1318 border.setdefault("dashes", None) | |
| 1319 border.setdefault("clouds", -1) | |
| 1320 if border["width"] is None: | |
| 1321 border["width"] = -1 | |
| 1322 if border["clouds"] is None: | |
| 1323 border["clouds"] = -1 | |
| 1324 if hasattr(border["dashes"], "__getitem__"): # ensure sequence items are integers | |
| 1325 border["dashes"] = tuple(border["dashes"]) | |
| 1326 for item in border["dashes"]: | |
| 1327 if not isinstance(item, int): | |
| 1328 border["dashes"] = None | |
| 1329 break | |
| 1330 annot = self.this | |
| 1331 annot_obj = mupdf.pdf_annot_obj( annot) | |
| 1332 pdf = mupdf.pdf_get_bound_document( annot_obj) | |
| 1333 return JM_annot_set_border( border, pdf, annot_obj) | |
| 1334 | |
| 1335 def set_colors(self, colors=None, stroke=None, fill=None): | |
| 1336 """Set 'stroke' and 'fill' colors. | |
| 1337 | |
| 1338 Use either a dict or the direct arguments. | |
| 1339 """ | |
| 1340 if self.type[0] == mupdf.PDF_ANNOT_FREE_TEXT: | |
| 1341 raise ValueError("cannot be used for FreeText annotations") | |
| 1342 | |
| 1343 CheckParent(self) | |
| 1344 doc = self.get_parent().parent | |
| 1345 if type(colors) is not dict: | |
| 1346 colors = {"fill": fill, "stroke": stroke} | |
| 1347 fill = colors.get("fill") | |
| 1348 stroke = colors.get("stroke") | |
| 1349 | |
| 1350 fill_annots = (mupdf.PDF_ANNOT_CIRCLE, mupdf.PDF_ANNOT_SQUARE, mupdf.PDF_ANNOT_LINE, mupdf.PDF_ANNOT_POLY_LINE, mupdf.PDF_ANNOT_POLYGON, | |
| 1351 mupdf.PDF_ANNOT_REDACT,) | |
| 1352 | |
| 1353 if stroke in ([], ()): | |
| 1354 doc.xref_set_key(self.xref, "C", "[]") | |
| 1355 elif stroke is not None: | |
| 1356 if hasattr(stroke, "__float__"): | |
| 1357 stroke = [float(stroke)] | |
| 1358 CheckColor(stroke) | |
| 1359 assert len(stroke) in (1, 3, 4) | |
| 1360 s = f"[{_format_g(stroke)}]" | |
| 1361 doc.xref_set_key(self.xref, "C", s) | |
| 1362 | |
| 1363 if fill and self.type[0] not in fill_annots: | |
| 1364 message("Warning: fill color ignored for annot type '%s'." % self.type[1]) | |
| 1365 return | |
| 1366 if fill in ([], ()): | |
| 1367 doc.xref_set_key(self.xref, "IC", "[]") | |
| 1368 elif fill is not None: | |
| 1369 if hasattr(fill, "__float__"): | |
| 1370 fill = [float(fill)] | |
| 1371 CheckColor(fill) | |
| 1372 assert len(fill) in (1, 3, 4) | |
| 1373 s = f"[{_format_g(fill)}]" | |
| 1374 doc.xref_set_key(self.xref, "IC", s) | |
| 1375 | |
| 1376 def set_flags(self, flags): | |
| 1377 """Set annotation flags.""" | |
| 1378 CheckParent(self) | |
| 1379 annot = self.this | |
| 1380 mupdf.pdf_set_annot_flags(annot, flags) | |
| 1381 | |
| 1382 def set_info(self, info=None, content=None, title=None, creationDate=None, modDate=None, subject=None): | |
| 1383 """Set various properties.""" | |
| 1384 CheckParent(self) | |
| 1385 if type(info) is dict: # build the args from the dictionary | |
| 1386 content = info.get("content", None) | |
| 1387 title = info.get("title", None) | |
| 1388 creationDate = info.get("creationDate", None) | |
| 1389 modDate = info.get("modDate", None) | |
| 1390 subject = info.get("subject", None) | |
| 1391 info = None | |
| 1392 annot = self.this | |
| 1393 # use this to indicate a 'markup' annot type | |
| 1394 is_markup = mupdf.pdf_annot_has_author(annot) | |
| 1395 # contents | |
| 1396 if content: | |
| 1397 mupdf.pdf_set_annot_contents(annot, content) | |
| 1398 if is_markup: | |
| 1399 # title (= author) | |
| 1400 if title: | |
| 1401 mupdf.pdf_set_annot_author(annot, title) | |
| 1402 # creation date | |
| 1403 if creationDate: | |
| 1404 mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('CreationDate'), creationDate) | |
| 1405 # mod date | |
| 1406 if modDate: | |
| 1407 mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('M'), modDate) | |
| 1408 # subject | |
| 1409 if subject: | |
| 1410 mupdf.pdf_dict_puts(mupdf.pdf_annot_obj(annot), "Subj", mupdf.pdf_new_text_string(subject)) | |
| 1411 | |
| 1412 def set_irt_xref(self, xref): | |
| 1413 ''' | |
| 1414 Set annotation IRT xref | |
| 1415 ''' | |
| 1416 annot = self.this | |
| 1417 annot_obj = mupdf.pdf_annot_obj( annot) | |
| 1418 page = _pdf_annot_page(annot) | |
| 1419 if xref < 1 or xref >= mupdf.pdf_xref_len( page.doc()): | |
| 1420 raise ValueError( MSG_BAD_XREF) | |
| 1421 irt = mupdf.pdf_new_indirect( page.doc(), xref, 0) | |
| 1422 subt = mupdf.pdf_dict_get( irt, PDF_NAME('Subtype')) | |
| 1423 irt_subt = mupdf.pdf_annot_type_from_string( mupdf.pdf_to_name( subt)) | |
| 1424 if irt_subt < 0: | |
| 1425 raise ValueError( MSG_IS_NO_ANNOT) | |
| 1426 mupdf.pdf_dict_put( annot_obj, PDF_NAME('IRT'), irt) | |
| 1427 | |
| 1428 def set_language(self, language=None): | |
| 1429 """Set annotation language.""" | |
| 1430 CheckParent(self) | |
| 1431 this_annot = self.this | |
| 1432 if not language: | |
| 1433 lang = mupdf.FZ_LANG_UNSET | |
| 1434 else: | |
| 1435 lang = mupdf.fz_text_language_from_string(language) | |
| 1436 mupdf.pdf_set_annot_language(this_annot, lang) | |
| 1437 | |
| 1438 def set_line_ends(self, start, end): | |
| 1439 """Set line end codes.""" | |
| 1440 CheckParent(self) | |
| 1441 annot = self.this | |
| 1442 if mupdf.pdf_annot_has_line_ending_styles(annot): | |
| 1443 mupdf.pdf_set_annot_line_ending_styles(annot, start, end) | |
| 1444 else: | |
| 1445 message_warning("bad annot type for line ends") | |
| 1446 | |
| 1447 def set_name(self, name): | |
| 1448 """Set /Name (icon) of annotation.""" | |
| 1449 CheckParent(self) | |
| 1450 annot = self.this | |
| 1451 annot_obj = mupdf.pdf_annot_obj(annot) | |
| 1452 mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('Name'), name) | |
| 1453 | |
| 1454 def set_oc(self, oc=0): | |
| 1455 """Set / remove annotation OC xref.""" | |
| 1456 CheckParent(self) | |
| 1457 annot = self.this | |
| 1458 annot_obj = mupdf.pdf_annot_obj(annot) | |
| 1459 if not oc: | |
| 1460 mupdf.pdf_dict_del(annot_obj, PDF_NAME('OC')) | |
| 1461 else: | |
| 1462 JM_add_oc_object(mupdf.pdf_get_bound_document(annot_obj), annot_obj, oc) | |
| 1463 | |
| 1464 def set_opacity(self, opacity): | |
| 1465 """Set opacity.""" | |
| 1466 CheckParent(self) | |
| 1467 annot = self.this | |
| 1468 if not _INRANGE(opacity, 0.0, 1.0): | |
| 1469 mupdf.pdf_set_annot_opacity(annot, 1) | |
| 1470 return | |
| 1471 mupdf.pdf_set_annot_opacity(annot, opacity) | |
| 1472 if opacity < 1.0: | |
| 1473 page = _pdf_annot_page(annot) | |
| 1474 page.transparency = 1 | |
| 1475 | |
| 1476 def set_open(self, is_open): | |
| 1477 """Set 'open' status of annotation or its Popup.""" | |
| 1478 CheckParent(self) | |
| 1479 annot = self.this | |
| 1480 mupdf.pdf_set_annot_is_open(annot, is_open) | |
| 1481 | |
| 1482 def set_popup(self, rect): | |
| 1483 ''' | |
| 1484 Create annotation 'Popup' or update rectangle. | |
| 1485 ''' | |
| 1486 CheckParent(self) | |
| 1487 annot = self.this | |
| 1488 pdfpage = _pdf_annot_page(annot) | |
| 1489 rot = JM_rotate_page_matrix(pdfpage) | |
| 1490 r = mupdf.fz_transform_rect(JM_rect_from_py(rect), rot) | |
| 1491 mupdf.pdf_set_annot_popup(annot, r) | |
| 1492 | |
| 1493 def set_rect(self, rect): | |
| 1494 """Set annotation rectangle.""" | |
| 1495 CheckParent(self) | |
| 1496 annot = self.this | |
| 1497 | |
| 1498 pdfpage = _pdf_annot_page(annot) | |
| 1499 rot = JM_rotate_page_matrix(pdfpage) | |
| 1500 r = mupdf.fz_transform_rect(JM_rect_from_py(rect), rot) | |
| 1501 if mupdf.fz_is_empty_rect(r) or mupdf.fz_is_infinite_rect(r): | |
| 1502 raise ValueError( MSG_BAD_RECT) | |
| 1503 try: | |
| 1504 mupdf.pdf_set_annot_rect(annot, r) | |
| 1505 except Exception as e: | |
| 1506 message(f'cannot set rect: {e}') | |
| 1507 return False | |
| 1508 | |
| 1509 def set_rotation(self, rotate=0): | |
| 1510 """Set annotation rotation.""" | |
| 1511 CheckParent(self) | |
| 1512 | |
| 1513 annot = self.this | |
| 1514 type = mupdf.pdf_annot_type(annot) | |
| 1515 if type not in ( | |
| 1516 mupdf.PDF_ANNOT_CARET, | |
| 1517 mupdf.PDF_ANNOT_CIRCLE, | |
| 1518 mupdf.PDF_ANNOT_FREE_TEXT, | |
| 1519 mupdf.PDF_ANNOT_FILE_ATTACHMENT, | |
| 1520 mupdf.PDF_ANNOT_INK, | |
| 1521 mupdf.PDF_ANNOT_LINE, | |
| 1522 mupdf.PDF_ANNOT_POLY_LINE, | |
| 1523 mupdf.PDF_ANNOT_POLYGON, | |
| 1524 mupdf.PDF_ANNOT_SQUARE, | |
| 1525 mupdf.PDF_ANNOT_STAMP, | |
| 1526 mupdf.PDF_ANNOT_TEXT, | |
| 1527 ): | |
| 1528 return | |
| 1529 rot = rotate | |
| 1530 while rot < 0: | |
| 1531 rot += 360 | |
| 1532 while rot >= 360: | |
| 1533 rot -= 360 | |
| 1534 if type == mupdf.PDF_ANNOT_FREE_TEXT and rot % 90 != 0: | |
| 1535 rot = 0 | |
| 1536 annot_obj = mupdf.pdf_annot_obj(annot) | |
| 1537 mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('Rotate'), rot) | |
| 1538 | |
| 1539 @property | |
| 1540 def type(self): | |
| 1541 """annotation type""" | |
| 1542 CheckParent(self) | |
| 1543 if not self.this.m_internal: | |
| 1544 return 'null' | |
| 1545 type_ = mupdf.pdf_annot_type(self.this) | |
| 1546 c = mupdf.pdf_string_from_annot_type(type_) | |
| 1547 o = mupdf.pdf_dict_gets( mupdf.pdf_annot_obj(self.this), 'IT') | |
| 1548 if not o.m_internal or mupdf.pdf_is_name(o): | |
| 1549 return (type_, c) | |
| 1550 it = mupdf.pdf_to_name(o) | |
| 1551 return (type_, c, it) | |
| 1552 | |
| 1553 def update(self, | |
| 1554 blend_mode: OptStr =None, | |
| 1555 opacity: OptFloat =None, | |
| 1556 fontsize: float =0, | |
| 1557 fontname: OptStr =None, | |
| 1558 text_color: OptSeq =None, | |
| 1559 border_color: OptSeq =None, | |
| 1560 fill_color: OptSeq =None, | |
| 1561 cross_out: bool =True, | |
| 1562 rotate: int =-1, | |
| 1563 ): | |
| 1564 """Update annot appearance. | |
| 1565 | |
| 1566 Notes: | |
| 1567 Depending on the annot type, some parameters make no sense, | |
| 1568 while others are only available in this method to achieve the | |
| 1569 desired result. This is especially true for 'FreeText' annots. | |
| 1570 Args: | |
| 1571 blend_mode: set the blend mode, all annotations. | |
| 1572 opacity: set the opacity, all annotations. | |
| 1573 fontsize: set fontsize, 'FreeText' only. | |
| 1574 fontname: set the font, 'FreeText' only. | |
| 1575 border_color: set border color, 'FreeText' only. | |
| 1576 text_color: set text color, 'FreeText' only. | |
| 1577 fill_color: set fill color, all annotations. | |
| 1578 cross_out: draw diagonal lines, 'Redact' only. | |
| 1579 rotate: set rotation, 'FreeText' and some others. | |
| 1580 """ | |
| 1581 annot_obj = mupdf.pdf_annot_obj(self.this) | |
| 1582 | |
| 1583 if border_color: | |
| 1584 is_rich_text = mupdf.pdf_dict_get(annot_obj, PDF_NAME("RC")) | |
| 1585 if not is_rich_text: | |
| 1586 raise ValueError("cannot set border_color if rich_text is False") | |
| 1587 Annot.update_timing_test() | |
| 1588 CheckParent(self) | |
| 1589 def color_string(cs, code): | |
| 1590 """Return valid PDF color operator for a given color sequence. | |
| 1591 """ | |
| 1592 cc = ColorCode(cs, code) | |
| 1593 if not cc: | |
| 1594 return b"" | |
| 1595 return (cc + "\n").encode() | |
| 1596 | |
| 1597 annot_type = self.type[0] # get the annot type | |
| 1598 | |
| 1599 dt = self.border.get("dashes", None) # get the dashes spec | |
| 1600 bwidth = self.border.get("width", -1) # get border line width | |
| 1601 stroke = self.colors["stroke"] # get the stroke color | |
| 1602 if fill_color is not None: | |
| 1603 fill = fill_color | |
| 1604 else: | |
| 1605 fill = self.colors["fill"] | |
| 1606 rect = None # self.rect # prevent MuPDF fiddling with it | |
| 1607 apnmat = self.apn_matrix # prevent MuPDF fiddling with it | |
| 1608 if rotate != -1: # sanitize rotation value | |
| 1609 while rotate < 0: | |
| 1610 rotate += 360 | |
| 1611 while rotate >= 360: | |
| 1612 rotate -= 360 | |
| 1613 if annot_type == mupdf.PDF_ANNOT_FREE_TEXT and rotate % 90 != 0: | |
| 1614 rotate = 0 | |
| 1615 | |
| 1616 #------------------------------------------------------------------ | |
| 1617 # handle opacity and blend mode | |
| 1618 #------------------------------------------------------------------ | |
| 1619 if blend_mode is None: | |
| 1620 blend_mode = self.blendmode | |
| 1621 if not hasattr(opacity, "__float__"): | |
| 1622 opacity = self.opacity | |
| 1623 | |
| 1624 if 0 <= opacity < 1 or blend_mode: | |
| 1625 opa_code = "/H gs\n" # then we must reference this 'gs' | |
| 1626 else: | |
| 1627 opa_code = "" | |
| 1628 | |
| 1629 if annot_type == mupdf.PDF_ANNOT_FREE_TEXT: | |
| 1630 CheckColor(text_color) | |
| 1631 CheckColor(fill_color) | |
| 1632 tcol, fname, fsize = TOOLS._parse_da(self) | |
| 1633 | |
| 1634 # read and update default appearance as necessary | |
| 1635 if fsize <= 0: | |
| 1636 fsize = 12 | |
| 1637 if text_color: | |
| 1638 tcol = text_color | |
| 1639 if fontname: | |
| 1640 fname = fontname | |
| 1641 if fontsize > 0: | |
| 1642 fsize = fontsize | |
| 1643 JM_make_annot_DA(self, len(tcol), tcol, fname, fsize) | |
| 1644 blend_mode = None # not supported for free text annotations! | |
| 1645 | |
| 1646 #------------------------------------------------------------------ | |
| 1647 # now invoke MuPDF to update the annot appearance | |
| 1648 #------------------------------------------------------------------ | |
| 1649 val = self._update_appearance( | |
| 1650 opacity=opacity, | |
| 1651 blend_mode=blend_mode, | |
| 1652 fill_color=fill, | |
| 1653 rotate=rotate, | |
| 1654 ) | |
| 1655 if val is False: | |
| 1656 raise RuntimeError("Error updating annotation.") | |
| 1657 | |
| 1658 if annot_type == mupdf.PDF_ANNOT_FREE_TEXT: | |
| 1659 # in absence of previous opacity, we may need to modify the AP | |
| 1660 ap = self._getAP() | |
| 1661 if 0 <= opacity < 1 and not ap.startswith(b"/H gs"): | |
| 1662 self._setAP(b"/H gs\n" + ap) | |
| 1663 return | |
| 1664 | |
| 1665 bfill = color_string(fill, "f") | |
| 1666 bstroke = color_string(stroke, "c") | |
| 1667 | |
| 1668 p_ctm = self.get_parent().transformation_matrix | |
| 1669 imat = ~p_ctm # inverse page transf. matrix | |
| 1670 | |
| 1671 if dt: | |
| 1672 dashes = "[" + " ".join(map(str, dt)) + "] 0 d\n" | |
| 1673 dashes = dashes.encode("utf-8") | |
| 1674 else: | |
| 1675 dashes = None | |
| 1676 | |
| 1677 if self.line_ends: | |
| 1678 line_end_le, line_end_ri = self.line_ends | |
| 1679 else: | |
| 1680 line_end_le, line_end_ri = 0, 0 # init line end codes | |
| 1681 | |
| 1682 # read contents as created by MuPDF | |
| 1683 ap = self._getAP() | |
| 1684 ap_tab = ap.splitlines() # split in single lines | |
| 1685 ap_updated = False # assume we did nothing | |
| 1686 | |
| 1687 if annot_type == mupdf.PDF_ANNOT_REDACT: | |
| 1688 if cross_out: # create crossed-out rect | |
| 1689 ap_updated = True | |
| 1690 ap_tab = ap_tab[:-1] | |
| 1691 _, LL, LR, UR, UL = ap_tab | |
| 1692 ap_tab.append(LR) | |
| 1693 ap_tab.append(LL) | |
| 1694 ap_tab.append(UR) | |
| 1695 ap_tab.append(LL) | |
| 1696 ap_tab.append(UL) | |
| 1697 ap_tab.append(b"S") | |
| 1698 | |
| 1699 if bwidth > 0 or bstroke != b"": | |
| 1700 ap_updated = True | |
| 1701 ntab = [_format_g(bwidth).encode() + b" w"] if bwidth > 0 else [] | |
| 1702 for line in ap_tab: | |
| 1703 if line.endswith(b"w"): | |
| 1704 continue | |
| 1705 if line.endswith(b"RG") and bstroke != b"": | |
| 1706 line = bstroke[:-1] | |
| 1707 ntab.append(line) | |
| 1708 ap_tab = ntab | |
| 1709 | |
| 1710 ap = b"\n".join(ap_tab) | |
| 1711 | |
| 1712 if annot_type in (mupdf.PDF_ANNOT_POLYGON, mupdf.PDF_ANNOT_POLY_LINE): | |
| 1713 ap = b"\n".join(ap_tab[:-1]) + b"\n" | |
| 1714 ap_updated = True | |
| 1715 if bfill != b"": | |
| 1716 if annot_type == mupdf.PDF_ANNOT_POLYGON: | |
| 1717 ap = ap + bfill + b"b" # close, fill, and stroke | |
| 1718 elif annot_type == mupdf.PDF_ANNOT_POLY_LINE: | |
| 1719 ap = ap + b"S" # stroke | |
| 1720 else: | |
| 1721 if annot_type == mupdf.PDF_ANNOT_POLYGON: | |
| 1722 ap = ap + b"s" # close and stroke | |
| 1723 elif annot_type == mupdf.PDF_ANNOT_POLY_LINE: | |
| 1724 ap = ap + b"S" # stroke | |
| 1725 | |
| 1726 if dashes is not None: # handle dashes | |
| 1727 ap = dashes + ap | |
| 1728 # reset dashing - only applies for LINE annots with line ends given | |
| 1729 ap = ap.replace(b"\nS\n", b"\nS\n[] 0 d\n", 1) | |
| 1730 ap_updated = True | |
| 1731 | |
| 1732 if opa_code: | |
| 1733 ap = opa_code.encode("utf-8") + ap | |
| 1734 ap_updated = True | |
| 1735 | |
| 1736 ap = b"q\n" + ap + b"\nQ\n" | |
| 1737 #---------------------------------------------------------------------- | |
| 1738 # the following handles line end symbols for 'Polygon' and 'Polyline' | |
| 1739 #---------------------------------------------------------------------- | |
| 1740 if line_end_le + line_end_ri > 0 and annot_type in (mupdf.PDF_ANNOT_POLYGON, mupdf.PDF_ANNOT_POLY_LINE): | |
| 1741 | |
| 1742 le_funcs = (None, TOOLS._le_square, TOOLS._le_circle, | |
| 1743 TOOLS._le_diamond, TOOLS._le_openarrow, | |
| 1744 TOOLS._le_closedarrow, TOOLS._le_butt, | |
| 1745 TOOLS._le_ropenarrow, TOOLS._le_rclosedarrow, | |
| 1746 TOOLS._le_slash) | |
| 1747 le_funcs_range = range(1, len(le_funcs)) | |
| 1748 d = 2 * max(1, self.border["width"]) | |
| 1749 rect = self.rect + (-d, -d, d, d) | |
| 1750 ap_updated = True | |
| 1751 points = self.vertices | |
| 1752 if line_end_le in le_funcs_range: | |
| 1753 p1 = Point(points[0]) * imat | |
| 1754 p2 = Point(points[1]) * imat | |
| 1755 left = le_funcs[line_end_le](self, p1, p2, False, fill_color) | |
| 1756 ap += left.encode() | |
| 1757 if line_end_ri in le_funcs_range: | |
| 1758 p1 = Point(points[-2]) * imat | |
| 1759 p2 = Point(points[-1]) * imat | |
| 1760 left = le_funcs[line_end_ri](self, p1, p2, True, fill_color) | |
| 1761 ap += left.encode() | |
| 1762 | |
| 1763 if ap_updated: | |
| 1764 if rect: # rect modified here? | |
| 1765 self.set_rect(rect) | |
| 1766 self._setAP(ap, rect=1) | |
| 1767 else: | |
| 1768 self._setAP(ap, rect=0) | |
| 1769 | |
| 1770 #------------------------------- | |
| 1771 # handle annotation rotations | |
| 1772 #------------------------------- | |
| 1773 if annot_type not in ( # only these types are supported | |
| 1774 mupdf.PDF_ANNOT_CARET, | |
| 1775 mupdf.PDF_ANNOT_CIRCLE, | |
| 1776 mupdf.PDF_ANNOT_FILE_ATTACHMENT, | |
| 1777 mupdf.PDF_ANNOT_INK, | |
| 1778 mupdf.PDF_ANNOT_LINE, | |
| 1779 mupdf.PDF_ANNOT_POLY_LINE, | |
| 1780 mupdf.PDF_ANNOT_POLYGON, | |
| 1781 mupdf.PDF_ANNOT_SQUARE, | |
| 1782 mupdf.PDF_ANNOT_STAMP, | |
| 1783 mupdf.PDF_ANNOT_TEXT, | |
| 1784 ): | |
| 1785 return | |
| 1786 | |
| 1787 rot = self.rotation # get value from annot object | |
| 1788 if rot == -1: # nothing to change | |
| 1789 return | |
| 1790 | |
| 1791 M = (self.rect.tl + self.rect.br) / 2 # center of annot rect | |
| 1792 | |
| 1793 if rot == 0: # undo rotations | |
| 1794 if abs(apnmat - Matrix(1, 1)) < 1e-5: | |
| 1795 return # matrix already is a no-op | |
| 1796 quad = self.rect.morph(M, ~apnmat) # derotate rect | |
| 1797 self.setRect(quad.rect) | |
| 1798 self.set_apn_matrix(Matrix(1, 1)) # appearance matrix = no-op | |
| 1799 return | |
| 1800 | |
| 1801 mat = Matrix(rot) | |
| 1802 quad = self.rect.morph(M, mat) | |
| 1803 self.set_rect(quad.rect) | |
| 1804 self.set_apn_matrix(apnmat * mat) | |
| 1805 | |
| 1806 def update_file(self, buffer_=None, filename=None, ufilename=None, desc=None): | |
| 1807 """Update attached file.""" | |
| 1808 CheckParent(self) | |
| 1809 annot = self.this | |
| 1810 annot_obj = mupdf.pdf_annot_obj(annot) | |
| 1811 pdf = mupdf.pdf_get_bound_document(annot_obj) # the owning PDF | |
| 1812 type = mupdf.pdf_annot_type(annot) | |
| 1813 if type != mupdf.PDF_ANNOT_FILE_ATTACHMENT: | |
| 1814 raise TypeError( MSG_BAD_ANNOT_TYPE) | |
| 1815 stream = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('FS'), PDF_NAME('EF'), PDF_NAME('F')) | |
| 1816 # the object for file content | |
| 1817 if not stream.m_internal: | |
| 1818 RAISEPY( "bad PDF: no /EF object", JM_Exc_FileDataError) | |
| 1819 | |
| 1820 fs = mupdf.pdf_dict_get(annot_obj, PDF_NAME('FS')) | |
| 1821 | |
| 1822 # file content given | |
| 1823 res = JM_BufferFromBytes(buffer_) | |
| 1824 if buffer_ and not res.m_internal: | |
| 1825 raise ValueError( MSG_BAD_BUFFER) | |
| 1826 if res: | |
| 1827 JM_update_stream(pdf, stream, res, 1) | |
| 1828 # adjust /DL and /Size parameters | |
| 1829 len, _ = mupdf.fz_buffer_storage(res) | |
| 1830 l = mupdf.pdf_new_int(len) | |
| 1831 mupdf.pdf_dict_put(stream, PDF_NAME('DL'), l) | |
| 1832 mupdf.pdf_dict_putl(stream, l, PDF_NAME('Params'), PDF_NAME('Size')) | |
| 1833 | |
| 1834 if filename: | |
| 1835 mupdf.pdf_dict_put_text_string(stream, PDF_NAME('F'), filename) | |
| 1836 mupdf.pdf_dict_put_text_string(fs, PDF_NAME('F'), filename) | |
| 1837 mupdf.pdf_dict_put_text_string(stream, PDF_NAME('UF'), filename) | |
| 1838 mupdf.pdf_dict_put_text_string(fs, PDF_NAME('UF'), filename) | |
| 1839 mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('Contents'), filename) | |
| 1840 | |
| 1841 if ufilename: | |
| 1842 mupdf.pdf_dict_put_text_string(stream, PDF_NAME('UF'), ufilename) | |
| 1843 mupdf.pdf_dict_put_text_string(fs, PDF_NAME('UF'), ufilename) | |
| 1844 | |
| 1845 if desc: | |
| 1846 mupdf.pdf_dict_put_text_string(stream, PDF_NAME('Desc'), desc) | |
| 1847 mupdf.pdf_dict_put_text_string(fs, PDF_NAME('Desc'), desc) | |
| 1848 | |
| 1849 @staticmethod | |
| 1850 def update_timing_test(): | |
| 1851 total = 0 | |
| 1852 for i in range( 30*1000): | |
| 1853 total += i | |
| 1854 return total | |
| 1855 | |
| 1856 @property | |
| 1857 def vertices(self): | |
| 1858 """annotation vertex points""" | |
| 1859 CheckParent(self) | |
| 1860 annot = self.this | |
| 1861 assert isinstance(annot, mupdf.PdfAnnot) | |
| 1862 annot_obj = mupdf.pdf_annot_obj(annot) | |
| 1863 page = _pdf_annot_page(annot) | |
| 1864 page_ctm = mupdf.FzMatrix() # page transformation matrix | |
| 1865 dummy = mupdf.FzRect() # Out-param for mupdf.pdf_page_transform(). | |
| 1866 mupdf.pdf_page_transform(page, dummy, page_ctm) | |
| 1867 derot = JM_derotate_page_matrix(page) | |
| 1868 page_ctm = mupdf.fz_concat(page_ctm, derot) | |
| 1869 | |
| 1870 #---------------------------------------------------------------- | |
| 1871 # The following objects occur in different annotation types. | |
| 1872 # So we are sure that (!o) occurs at most once. | |
| 1873 # Every pair of floats is one point, that needs to be separately | |
| 1874 # transformed with the page transformation matrix. | |
| 1875 #---------------------------------------------------------------- | |
| 1876 o = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Vertices')) | |
| 1877 if not o.m_internal: o = mupdf.pdf_dict_get(annot_obj, PDF_NAME('L')) | |
| 1878 if not o.m_internal: o = mupdf.pdf_dict_get(annot_obj, PDF_NAME('QuadPoints')) | |
| 1879 if not o.m_internal: o = mupdf.pdf_dict_gets(annot_obj, 'CL') | |
| 1880 | |
| 1881 if o.m_internal: | |
| 1882 # handle lists with 1-level depth | |
| 1883 # weiter | |
| 1884 res = [] | |
| 1885 for i in range(0, mupdf.pdf_array_len(o), 2): | |
| 1886 x = mupdf.pdf_to_real(mupdf.pdf_array_get(o, i)) | |
| 1887 y = mupdf.pdf_to_real(mupdf.pdf_array_get(o, i+1)) | |
| 1888 point = mupdf.FzPoint(x, y) | |
| 1889 point = mupdf.fz_transform_point(point, page_ctm) | |
| 1890 res.append( (point.x, point.y)) | |
| 1891 return res | |
| 1892 | |
| 1893 o = mupdf.pdf_dict_gets(annot_obj, 'InkList') | |
| 1894 if o.m_internal: | |
| 1895 # InkList has 2-level lists | |
| 1896 #inklist: | |
| 1897 res = [] | |
| 1898 for i in range(mupdf.pdf_array_len(o)): | |
| 1899 res1 = [] | |
| 1900 o1 = mupdf.pdf_array_get(o, i) | |
| 1901 for j in range(0, mupdf.pdf_array_len(o1), 2): | |
| 1902 x = mupdf.pdf_to_real(mupdf.pdf_array_get(o1, j)) | |
| 1903 y = mupdf.pdf_to_real(mupdf.pdf_array_get(o1, j+1)) | |
| 1904 point = mupdf.FzPoint(x, y) | |
| 1905 point = mupdf.fz_transform_point(point, page_ctm) | |
| 1906 res1.append( (point.x, point.y)) | |
| 1907 res.append(res1) | |
| 1908 return res | |
| 1909 | |
| 1910 @property | |
| 1911 def xref(self): | |
| 1912 """annotation xref number""" | |
| 1913 CheckParent(self) | |
| 1914 annot = self.this | |
| 1915 return mupdf.pdf_to_num(mupdf.pdf_annot_obj(annot)) | |
| 1916 | |
| 1917 | |
| 1918 class Archive: | |
| 1919 def __init__( self, *args): | |
| 1920 ''' | |
| 1921 Archive(dirname [, path]) - from folder | |
| 1922 Archive(file [, path]) - from file name or object | |
| 1923 Archive(data, name) - from memory item | |
| 1924 Archive() - empty archive | |
| 1925 Archive(archive [, path]) - from archive | |
| 1926 ''' | |
| 1927 self._subarchives = list() | |
| 1928 self.this = mupdf.fz_new_multi_archive() | |
| 1929 if args: | |
| 1930 self.add( *args) | |
| 1931 | |
| 1932 def __repr__( self): | |
| 1933 return f'Archive, sub-archives: {len(self._subarchives)}' | |
| 1934 | |
| 1935 def _add_arch( self, subarch, path=None): | |
| 1936 mupdf.fz_mount_multi_archive( self.this, subarch, path) | |
| 1937 | |
| 1938 def _add_dir( self, folder, path=None): | |
| 1939 sub = mupdf.fz_open_directory( folder) | |
| 1940 mupdf.fz_mount_multi_archive( self.this, sub, path) | |
| 1941 | |
| 1942 def _add_treeitem( self, memory, name, path=None): | |
| 1943 buff = JM_BufferFromBytes( memory) | |
| 1944 sub = mupdf.fz_new_tree_archive( mupdf.FzTree()) | |
| 1945 mupdf.fz_tree_archive_add_buffer( sub, name, buff) | |
| 1946 mupdf.fz_mount_multi_archive( self.this, sub, path) | |
| 1947 | |
| 1948 def _add_ziptarfile( self, filepath, type_, path=None): | |
| 1949 if type_ == 1: | |
| 1950 sub = mupdf.fz_open_zip_archive( filepath) | |
| 1951 else: | |
| 1952 sub = mupdf.fz_open_tar_archive( filepath) | |
| 1953 mupdf.fz_mount_multi_archive( self.this, sub, path) | |
| 1954 | |
| 1955 def _add_ziptarmemory( self, memory, type_, path=None): | |
| 1956 buff = JM_BufferFromBytes( memory) | |
| 1957 stream = mupdf.fz_open_buffer( buff) | |
| 1958 if type_==1: | |
| 1959 sub = mupdf.fz_open_zip_archive_with_stream( stream) | |
| 1960 else: | |
| 1961 sub = mupdf.fz_open_tar_archive_with_stream( stream) | |
| 1962 mupdf.fz_mount_multi_archive( self.this, sub, path) | |
| 1963 | |
| 1964 def add( self, content, path=None): | |
| 1965 ''' | |
| 1966 Add a sub-archive. | |
| 1967 | |
| 1968 Args: | |
| 1969 content: | |
| 1970 The content to be added. May be one of: | |
| 1971 `str` - must be path of directory or file. | |
| 1972 `bytes`, `bytearray`, `io.BytesIO` - raw data. | |
| 1973 `zipfile.Zipfile`. | |
| 1974 `tarfile.TarFile`. | |
| 1975 `pymupdf.Archive`. | |
| 1976 A two-item tuple `(data, name)`. | |
| 1977 List or tuple (but not tuple with length 2) of the above. | |
| 1978 path: (str) a "virtual" path name, under which the elements | |
| 1979 of content can be retrieved. Use it to e.g. cope with | |
| 1980 duplicate element names. | |
| 1981 ''' | |
| 1982 def is_binary_data(x): | |
| 1983 return isinstance(x, (bytes, bytearray, io.BytesIO)) | |
| 1984 | |
| 1985 def make_subarch(entries, mount, fmt): | |
| 1986 subarch = dict(fmt=fmt, entries=entries, path=mount) | |
| 1987 if fmt != "tree" or self._subarchives == []: | |
| 1988 self._subarchives.append(subarch) | |
| 1989 else: | |
| 1990 ltree = self._subarchives[-1] | |
| 1991 if ltree["fmt"] != "tree" or ltree["path"] != subarch["path"]: | |
| 1992 self._subarchives.append(subarch) | |
| 1993 else: | |
| 1994 ltree["entries"].extend(subarch["entries"]) | |
| 1995 self._subarchives[-1] = ltree | |
| 1996 | |
| 1997 if isinstance(content, pathlib.Path): | |
| 1998 content = str(content) | |
| 1999 | |
| 2000 if isinstance(content, str): | |
| 2001 if os.path.isdir(content): | |
| 2002 self._add_dir(content, path) | |
| 2003 return make_subarch(os.listdir(content), path, 'dir') | |
| 2004 elif os.path.isfile(content): | |
| 2005 assert isinstance(path, str) and path != '', \ | |
| 2006 f'Need name for binary content, but {path=}.' | |
| 2007 with open(content) as f: | |
| 2008 ff = f.read() | |
| 2009 self._add_treeitem(ff, path) | |
| 2010 return make_subarch([path], None, 'tree') | |
| 2011 else: | |
| 2012 raise ValueError(f'Not a file or directory: {content!r}') | |
| 2013 | |
| 2014 elif is_binary_data(content): | |
| 2015 assert isinstance(path, str) and path != '' \ | |
| 2016 f'Need name for binary content, but {path=}.' | |
| 2017 self._add_treeitem(content, path) | |
| 2018 return make_subarch([path], None, 'tree') | |
| 2019 | |
| 2020 elif isinstance(content, zipfile.ZipFile): | |
| 2021 filename = getattr(content, "filename", None) | |
| 2022 if filename is None: | |
| 2023 fp = content.fp.getvalue() | |
| 2024 self._add_ziptarmemory(fp, 1, path) | |
| 2025 else: | |
| 2026 self._add_ziptarfile(filename, 1, path) | |
| 2027 return make_subarch(content.namelist(), path, 'zip') | |
| 2028 | |
| 2029 elif isinstance(content, tarfile.TarFile): | |
| 2030 filename = getattr(content.fileobj, "name", None) | |
| 2031 if filename is None: | |
| 2032 fp = content.fileobj | |
| 2033 if not isinstance(fp, io.BytesIO): | |
| 2034 fp = fp.fileobj | |
| 2035 self._add_ziptarmemory(fp.getvalue(), 0, path) | |
| 2036 else: | |
| 2037 self._add_ziptarfile(filename, 0, path) | |
| 2038 return make_subarch(content.getnames(), path, 'tar') | |
| 2039 | |
| 2040 elif isinstance(content, Archive): | |
| 2041 self._add_arch(content, path) | |
| 2042 return make_subarch([], path, 'multi') | |
| 2043 | |
| 2044 if isinstance(content, tuple) and len(content) == 2: | |
| 2045 # covers the tree item plus path | |
| 2046 data, name = content | |
| 2047 assert isinstance(name, str), f'Unexpected {type(name)=}' | |
| 2048 if is_binary_data(data): | |
| 2049 self._add_treeitem(data, name, path=path) | |
| 2050 elif isinstance(data, str): | |
| 2051 if os.path.isfile(data): | |
| 2052 with open(data, 'rb') as f: | |
| 2053 ff = f.read() | |
| 2054 self._add_treeitem(ff, name, path=path) | |
| 2055 else: | |
| 2056 assert 0, f'Unexpected {type(data)=}.' | |
| 2057 return make_subarch([name], path, 'tree') | |
| 2058 | |
| 2059 elif hasattr(content, '__getitem__'): | |
| 2060 # Deal with sequence of disparate items. | |
| 2061 for item in content: | |
| 2062 self.add(item, path) | |
| 2063 return | |
| 2064 | |
| 2065 else: | |
| 2066 raise TypeError(f'Unrecognised type {type(content)}.') | |
| 2067 assert 0 | |
| 2068 | |
| 2069 @property | |
| 2070 def entry_list( self): | |
| 2071 ''' | |
| 2072 List of sub archives. | |
| 2073 ''' | |
| 2074 return self._subarchives | |
| 2075 | |
| 2076 def has_entry( self, name): | |
| 2077 return mupdf.fz_has_archive_entry( self.this, name) | |
| 2078 | |
| 2079 def read_entry( self, name): | |
| 2080 buff = mupdf.fz_read_archive_entry( self.this, name) | |
| 2081 return JM_BinFromBuffer( buff) | |
| 2082 | |
| 2083 | |
| 2084 class Xml: | |
| 2085 | |
| 2086 def __enter__(self): | |
| 2087 return self | |
| 2088 | |
| 2089 def __exit__(self, *args): | |
| 2090 pass | |
| 2091 | |
| 2092 def __init__(self, rhs): | |
| 2093 if isinstance(rhs, mupdf.FzXml): | |
| 2094 self.this = rhs | |
| 2095 elif isinstance(rhs, str): | |
| 2096 buff = mupdf.fz_new_buffer_from_copied_data(rhs) | |
| 2097 self.this = mupdf.fz_parse_xml_from_html5(buff) | |
| 2098 else: | |
| 2099 assert 0, f'Unsupported type for rhs: {type(rhs)}' | |
| 2100 | |
| 2101 def _get_node_tree( self): | |
| 2102 def show_node(node, items, shift): | |
| 2103 while node is not None: | |
| 2104 if node.is_text: | |
| 2105 items.append((shift, f'"{node.text}"')) | |
| 2106 node = node.next | |
| 2107 continue | |
| 2108 items.append((shift, f"({node.tagname}")) | |
| 2109 for k, v in node.get_attributes().items(): | |
| 2110 items.append((shift, f"={k} '{v}'")) | |
| 2111 child = node.first_child | |
| 2112 if child: | |
| 2113 items = show_node(child, items, shift + 1) | |
| 2114 items.append((shift, f"){node.tagname}")) | |
| 2115 node = node.next | |
| 2116 return items | |
| 2117 | |
| 2118 shift = 0 | |
| 2119 items = [] | |
| 2120 items = show_node(self, items, shift) | |
| 2121 return items | |
| 2122 | |
| 2123 def add_bullet_list(self): | |
| 2124 """Add bulleted list ("ul" tag)""" | |
| 2125 child = self.create_element("ul") | |
| 2126 self.append_child(child) | |
| 2127 return child | |
| 2128 | |
| 2129 def add_class(self, text): | |
| 2130 """Set some class via CSS. Replaces complete class spec.""" | |
| 2131 cls = self.get_attribute_value("class") | |
| 2132 if cls is not None and text in cls: | |
| 2133 return self | |
| 2134 self.remove_attribute("class") | |
| 2135 if cls is None: | |
| 2136 cls = text | |
| 2137 else: | |
| 2138 cls += " " + text | |
| 2139 self.set_attribute("class", cls) | |
| 2140 return self | |
| 2141 | |
| 2142 def add_code(self, text=None): | |
| 2143 """Add a "code" tag""" | |
| 2144 child = self.create_element("code") | |
| 2145 if type(text) is str: | |
| 2146 child.append_child(self.create_text_node(text)) | |
| 2147 prev = self.span_bottom() | |
| 2148 if prev is None: | |
| 2149 prev = self | |
| 2150 prev.append_child(child) | |
| 2151 return self | |
| 2152 | |
| 2153 def add_codeblock(self): | |
| 2154 """Add monospaced lines ("pre" node)""" | |
| 2155 child = self.create_element("pre") | |
| 2156 self.append_child(child) | |
| 2157 return child | |
| 2158 | |
| 2159 def add_description_list(self): | |
| 2160 """Add description list ("dl" tag)""" | |
| 2161 child = self.create_element("dl") | |
| 2162 self.append_child(child) | |
| 2163 return child | |
| 2164 | |
| 2165 def add_division(self): | |
| 2166 """Add "div" tag""" | |
| 2167 child = self.create_element("div") | |
| 2168 self.append_child(child) | |
| 2169 return child | |
| 2170 | |
| 2171 def add_header(self, level=1): | |
| 2172 """Add header tag""" | |
| 2173 if level not in range(1, 7): | |
| 2174 raise ValueError("Header level must be in [1, 6]") | |
| 2175 this_tag = self.tagname | |
| 2176 new_tag = f"h{level}" | |
| 2177 child = self.create_element(new_tag) | |
| 2178 if this_tag not in ("h1", "h2", "h3", "h4", "h5", "h6", "p"): | |
| 2179 self.append_child(child) | |
| 2180 return child | |
| 2181 self.parent.append_child(child) | |
| 2182 return child | |
| 2183 | |
| 2184 def add_horizontal_line(self): | |
| 2185 """Add horizontal line ("hr" tag)""" | |
| 2186 child = self.create_element("hr") | |
| 2187 self.append_child(child) | |
| 2188 return child | |
| 2189 | |
| 2190 def add_image(self, name, width=None, height=None, imgfloat=None, align=None): | |
| 2191 """Add image node (tag "img").""" | |
| 2192 child = self.create_element("img") | |
| 2193 if width is not None: | |
| 2194 child.set_attribute("width", f"{width}") | |
| 2195 if height is not None: | |
| 2196 child.set_attribute("height", f"{height}") | |
| 2197 if imgfloat is not None: | |
| 2198 child.set_attribute("style", f"float: {imgfloat}") | |
| 2199 if align is not None: | |
| 2200 child.set_attribute("align", f"{align}") | |
| 2201 child.set_attribute("src", f"{name}") | |
| 2202 self.append_child(child) | |
| 2203 return child | |
| 2204 | |
| 2205 def add_link(self, href, text=None): | |
| 2206 """Add a hyperlink ("a" tag)""" | |
| 2207 child = self.create_element("a") | |
| 2208 if not isinstance(text, str): | |
| 2209 text = href | |
| 2210 child.set_attribute("href", href) | |
| 2211 child.append_child(self.create_text_node(text)) | |
| 2212 prev = self.span_bottom() | |
| 2213 if prev is None: | |
| 2214 prev = self | |
| 2215 prev.append_child(child) | |
| 2216 return self | |
| 2217 | |
| 2218 def add_list_item(self): | |
| 2219 """Add item ("li" tag) under a (numbered or bulleted) list.""" | |
| 2220 if self.tagname not in ("ol", "ul"): | |
| 2221 raise ValueError("cannot add list item to", self.tagname) | |
| 2222 child = self.create_element("li") | |
| 2223 self.append_child(child) | |
| 2224 return child | |
| 2225 | |
| 2226 def add_number_list(self, start=1, numtype=None): | |
| 2227 """Add numbered list ("ol" tag)""" | |
| 2228 child = self.create_element("ol") | |
| 2229 if start > 1: | |
| 2230 child.set_attribute("start", str(start)) | |
| 2231 if numtype is not None: | |
| 2232 child.set_attribute("type", numtype) | |
| 2233 self.append_child(child) | |
| 2234 return child | |
| 2235 | |
| 2236 def add_paragraph(self): | |
| 2237 """Add "p" tag""" | |
| 2238 child = self.create_element("p") | |
| 2239 if self.tagname != "p": | |
| 2240 self.append_child(child) | |
| 2241 else: | |
| 2242 self.parent.append_child(child) | |
| 2243 return child | |
| 2244 | |
| 2245 def add_span(self): | |
| 2246 child = self.create_element("span") | |
| 2247 self.append_child(child) | |
| 2248 return child | |
| 2249 | |
| 2250 def add_style(self, text): | |
| 2251 """Set some style via CSS style. Replaces complete style spec.""" | |
| 2252 style = self.get_attribute_value("style") | |
| 2253 if style is not None and text in style: | |
| 2254 return self | |
| 2255 self.remove_attribute("style") | |
| 2256 if style is None: | |
| 2257 style = text | |
| 2258 else: | |
| 2259 style += ";" + text | |
| 2260 self.set_attribute("style", style) | |
| 2261 return self | |
| 2262 | |
| 2263 def add_subscript(self, text=None): | |
| 2264 """Add a subscript ("sub" tag)""" | |
| 2265 child = self.create_element("sub") | |
| 2266 if type(text) is str: | |
| 2267 child.append_child(self.create_text_node(text)) | |
| 2268 prev = self.span_bottom() | |
| 2269 if prev is None: | |
| 2270 prev = self | |
| 2271 prev.append_child(child) | |
| 2272 return self | |
| 2273 | |
| 2274 def add_superscript(self, text=None): | |
| 2275 """Add a superscript ("sup" tag)""" | |
| 2276 child = self.create_element("sup") | |
| 2277 if type(text) is str: | |
| 2278 child.append_child(self.create_text_node(text)) | |
| 2279 prev = self.span_bottom() | |
| 2280 if prev is None: | |
| 2281 prev = self | |
| 2282 prev.append_child(child) | |
| 2283 return self | |
| 2284 | |
| 2285 def add_text(self, text): | |
| 2286 """Add text. Line breaks are honored.""" | |
| 2287 lines = text.splitlines() | |
| 2288 line_count = len(lines) | |
| 2289 prev = self.span_bottom() | |
| 2290 if prev is None: | |
| 2291 prev = self | |
| 2292 | |
| 2293 for i, line in enumerate(lines): | |
| 2294 prev.append_child(self.create_text_node(line)) | |
| 2295 if i < line_count - 1: | |
| 2296 prev.append_child(self.create_element("br")) | |
| 2297 return self | |
| 2298 | |
| 2299 def append_child( self, child): | |
| 2300 mupdf.fz_dom_append_child( self.this, child.this) | |
| 2301 | |
| 2302 def append_styled_span(self, style): | |
| 2303 span = self.create_element("span") | |
| 2304 span.add_style(style) | |
| 2305 prev = self.span_bottom() | |
| 2306 if prev is None: | |
| 2307 prev = self | |
| 2308 prev.append_child(span) | |
| 2309 return prev | |
| 2310 | |
| 2311 def bodytag( self): | |
| 2312 return Xml( mupdf.fz_dom_body( self.this)) | |
| 2313 | |
| 2314 def clone( self): | |
| 2315 ret = mupdf.fz_dom_clone( self.this) | |
| 2316 return Xml( ret) | |
| 2317 | |
| 2318 @staticmethod | |
| 2319 def color_text(color): | |
| 2320 if type(color) is str: | |
| 2321 return color | |
| 2322 if type(color) is int: | |
| 2323 return f"rgb({sRGB_to_rgb(color)})" | |
| 2324 if type(color) in (tuple, list): | |
| 2325 return f"rgb{tuple(color)}" | |
| 2326 return color | |
| 2327 | |
| 2328 def create_element( self, tag): | |
| 2329 return Xml( mupdf.fz_dom_create_element( self.this, tag)) | |
| 2330 | |
| 2331 def create_text_node( self, text): | |
| 2332 return Xml( mupdf.fz_dom_create_text_node( self.this, text)) | |
| 2333 | |
| 2334 def debug(self): | |
| 2335 """Print a list of the node tree below self.""" | |
| 2336 items = self._get_node_tree() | |
| 2337 for item in items: | |
| 2338 message(" " * item[0] + item[1].replace("\n", "\\n")) | |
| 2339 | |
| 2340 def find( self, tag, att, match): | |
| 2341 ret = mupdf.fz_dom_find( self.this, tag, att, match) | |
| 2342 if ret.m_internal: | |
| 2343 return Xml( ret) | |
| 2344 | |
| 2345 def find_next( self, tag, att, match): | |
| 2346 ret = mupdf.fz_dom_find_next( self.this, tag, att, match) | |
| 2347 if ret.m_internal: | |
| 2348 return Xml( ret) | |
| 2349 | |
| 2350 @property | |
| 2351 def first_child( self): | |
| 2352 if mupdf.fz_xml_text( self.this): | |
| 2353 # text node, has no child. | |
| 2354 return | |
| 2355 ret = mupdf.fz_dom_first_child( self) | |
| 2356 if ret.m_internal: | |
| 2357 return Xml( ret) | |
| 2358 | |
| 2359 def get_attribute_value( self, key): | |
| 2360 assert key | |
| 2361 return mupdf.fz_dom_attribute( self.this, key) | |
| 2362 | |
| 2363 def get_attributes( self): | |
| 2364 if mupdf.fz_xml_text( self.this): | |
| 2365 # text node, has no attributes. | |
| 2366 return | |
| 2367 result = dict() | |
| 2368 i = 0 | |
| 2369 while 1: | |
| 2370 val, key = mupdf.fz_dom_get_attribute( self.this, i) | |
| 2371 if not val or not key: | |
| 2372 break | |
| 2373 result[ key] = val | |
| 2374 i += 1 | |
| 2375 return result | |
| 2376 | |
| 2377 def insert_after( self, node): | |
| 2378 mupdf.fz_dom_insert_after( self.this, node.this) | |
| 2379 | |
| 2380 def insert_before( self, node): | |
| 2381 mupdf.fz_dom_insert_before( self.this, node.this) | |
| 2382 | |
| 2383 def insert_text(self, text): | |
| 2384 lines = text.splitlines() | |
| 2385 line_count = len(lines) | |
| 2386 for i, line in enumerate(lines): | |
| 2387 self.append_child(self.create_text_node(line)) | |
| 2388 if i < line_count - 1: | |
| 2389 self.append_child(self.create_element("br")) | |
| 2390 return self | |
| 2391 | |
| 2392 @property | |
| 2393 def is_text(self): | |
| 2394 """Check if this is a text node.""" | |
| 2395 return self.text is not None | |
| 2396 | |
| 2397 @property | |
| 2398 def last_child(self): | |
| 2399 """Return last child node.""" | |
| 2400 child = self.first_child | |
| 2401 if child is None: | |
| 2402 return None | |
| 2403 while True: | |
| 2404 next = child.next | |
| 2405 if not next: | |
| 2406 return child | |
| 2407 child = next | |
| 2408 | |
| 2409 @property | |
| 2410 def next( self): | |
| 2411 ret = mupdf.fz_dom_next( self.this) | |
| 2412 if ret.m_internal: | |
| 2413 return Xml( ret) | |
| 2414 | |
| 2415 @property | |
| 2416 def parent( self): | |
| 2417 ret = mupdf.fz_dom_parent( self.this) | |
| 2418 if ret.m_internal: | |
| 2419 return Xml( ret) | |
| 2420 | |
| 2421 @property | |
| 2422 def previous( self): | |
| 2423 ret = mupdf.fz_dom_previous( self.this) | |
| 2424 if ret.m_internal: | |
| 2425 return Xml( ret) | |
| 2426 | |
| 2427 def remove( self): | |
| 2428 mupdf.fz_dom_remove( self.this) | |
| 2429 | |
| 2430 def remove_attribute( self, key): | |
| 2431 assert key | |
| 2432 mupdf.fz_dom_remove_attribute( self.this, key) | |
| 2433 | |
| 2434 @property | |
| 2435 def root( self): | |
| 2436 return Xml( mupdf.fz_xml_root( self.this)) | |
| 2437 | |
| 2438 def set_align(self, align): | |
| 2439 """Set text alignment via CSS style""" | |
| 2440 text = "text-align: %s" | |
| 2441 if isinstance( align, str): | |
| 2442 t = align | |
| 2443 elif align == TEXT_ALIGN_LEFT: | |
| 2444 t = "left" | |
| 2445 elif align == TEXT_ALIGN_CENTER: | |
| 2446 t = "center" | |
| 2447 elif align == TEXT_ALIGN_RIGHT: | |
| 2448 t = "right" | |
| 2449 elif align == TEXT_ALIGN_JUSTIFY: | |
| 2450 t = "justify" | |
| 2451 else: | |
| 2452 raise ValueError(f"Unrecognised {align=}") | |
| 2453 text = text % t | |
| 2454 self.add_style(text) | |
| 2455 return self | |
| 2456 | |
| 2457 def set_attribute( self, key, value): | |
| 2458 assert key | |
| 2459 mupdf.fz_dom_add_attribute( self.this, key, value) | |
| 2460 | |
| 2461 def set_bgcolor(self, color): | |
| 2462 """Set background color via CSS style""" | |
| 2463 text = f"background-color: %s" % self.color_text(color) | |
| 2464 self.add_style(text) # does not work on span level | |
| 2465 return self | |
| 2466 | |
| 2467 def set_bold(self, val=True): | |
| 2468 """Set bold on / off via CSS style""" | |
| 2469 if val: | |
| 2470 val="bold" | |
| 2471 else: | |
| 2472 val="normal" | |
| 2473 text = "font-weight: %s" % val | |
| 2474 self.append_styled_span(text) | |
| 2475 return self | |
| 2476 | |
| 2477 def set_color(self, color): | |
| 2478 """Set text color via CSS style""" | |
| 2479 text = f"color: %s" % self.color_text(color) | |
| 2480 self.append_styled_span(text) | |
| 2481 return self | |
| 2482 | |
| 2483 def set_columns(self, cols): | |
| 2484 """Set number of text columns via CSS style""" | |
| 2485 text = f"columns: {cols}" | |
| 2486 self.append_styled_span(text) | |
| 2487 return self | |
| 2488 | |
| 2489 def set_font(self, font): | |
| 2490 """Set font-family name via CSS style""" | |
| 2491 text = "font-family: %s" % font | |
| 2492 self.append_styled_span(text) | |
| 2493 return self | |
| 2494 | |
| 2495 def set_fontsize(self, fontsize): | |
| 2496 """Set font size name via CSS style""" | |
| 2497 if type(fontsize) is str: | |
| 2498 px="" | |
| 2499 else: | |
| 2500 px="px" | |
| 2501 text = f"font-size: {fontsize}{px}" | |
| 2502 self.append_styled_span(text) | |
| 2503 return self | |
| 2504 | |
| 2505 def set_id(self, unique): | |
| 2506 """Set a unique id.""" | |
| 2507 # check uniqueness | |
| 2508 root = self.root | |
| 2509 if root.find(None, "id", unique): | |
| 2510 raise ValueError(f"id '{unique}' already exists") | |
| 2511 self.set_attribute("id", unique) | |
| 2512 return self | |
| 2513 | |
| 2514 def set_italic(self, val=True): | |
| 2515 """Set italic on / off via CSS style""" | |
| 2516 if val: | |
| 2517 val="italic" | |
| 2518 else: | |
| 2519 val="normal" | |
| 2520 text = "font-style: %s" % val | |
| 2521 self.append_styled_span(text) | |
| 2522 return self | |
| 2523 | |
| 2524 def set_leading(self, leading): | |
| 2525 """Set inter-line spacing value via CSS style - block-level only.""" | |
| 2526 text = f"-mupdf-leading: {leading}" | |
| 2527 self.add_style(text) | |
| 2528 return self | |
| 2529 | |
| 2530 def set_letter_spacing(self, spacing): | |
| 2531 """Set inter-letter spacing value via CSS style""" | |
| 2532 text = f"letter-spacing: {spacing}" | |
| 2533 self.append_styled_span(text) | |
| 2534 return self | |
| 2535 | |
| 2536 def set_lineheight(self, lineheight): | |
| 2537 """Set line height name via CSS style - block-level only.""" | |
| 2538 text = f"line-height: {lineheight}" | |
| 2539 self.add_style(text) | |
| 2540 return self | |
| 2541 | |
| 2542 def set_margins(self, val): | |
| 2543 """Set margin values via CSS style""" | |
| 2544 text = "margins: %s" % val | |
| 2545 self.append_styled_span(text) | |
| 2546 return self | |
| 2547 | |
| 2548 def set_opacity(self, opacity): | |
| 2549 """Set opacity via CSS style""" | |
| 2550 text = f"opacity: {opacity}" | |
| 2551 self.append_styled_span(text) | |
| 2552 return self | |
| 2553 | |
| 2554 def set_pagebreak_after(self): | |
| 2555 """Insert a page break after this node.""" | |
| 2556 text = "page-break-after: always" | |
| 2557 self.add_style(text) | |
| 2558 return self | |
| 2559 | |
| 2560 def set_pagebreak_before(self): | |
| 2561 """Insert a page break before this node.""" | |
| 2562 text = "page-break-before: always" | |
| 2563 self.add_style(text) | |
| 2564 return self | |
| 2565 | |
| 2566 def set_properties( | |
| 2567 self, | |
| 2568 align=None, | |
| 2569 bgcolor=None, | |
| 2570 bold=None, | |
| 2571 color=None, | |
| 2572 columns=None, | |
| 2573 font=None, | |
| 2574 fontsize=None, | |
| 2575 indent=None, | |
| 2576 italic=None, | |
| 2577 leading=None, | |
| 2578 letter_spacing=None, | |
| 2579 lineheight=None, | |
| 2580 margins=None, | |
| 2581 pagebreak_after=None, | |
| 2582 pagebreak_before=None, | |
| 2583 word_spacing=None, | |
| 2584 unqid=None, | |
| 2585 cls=None, | |
| 2586 ): | |
| 2587 """Set any or all properties of a node. | |
| 2588 | |
| 2589 To be used for existing nodes preferably. | |
| 2590 """ | |
| 2591 root = self.root | |
| 2592 temp = root.add_division() | |
| 2593 if align is not None: | |
| 2594 temp.set_align(align) | |
| 2595 if bgcolor is not None: | |
| 2596 temp.set_bgcolor(bgcolor) | |
| 2597 if bold is not None: | |
| 2598 temp.set_bold(bold) | |
| 2599 if color is not None: | |
| 2600 temp.set_color(color) | |
| 2601 if columns is not None: | |
| 2602 temp.set_columns(columns) | |
| 2603 if font is not None: | |
| 2604 temp.set_font(font) | |
| 2605 if fontsize is not None: | |
| 2606 temp.set_fontsize(fontsize) | |
| 2607 if indent is not None: | |
| 2608 temp.set_text_indent(indent) | |
| 2609 if italic is not None: | |
| 2610 temp.set_italic(italic) | |
| 2611 if leading is not None: | |
| 2612 temp.set_leading(leading) | |
| 2613 if letter_spacing is not None: | |
| 2614 temp.set_letter_spacing(letter_spacing) | |
| 2615 if lineheight is not None: | |
| 2616 temp.set_lineheight(lineheight) | |
| 2617 if margins is not None: | |
| 2618 temp.set_margins(margins) | |
| 2619 if pagebreak_after is not None: | |
| 2620 temp.set_pagebreak_after() | |
| 2621 if pagebreak_before is not None: | |
| 2622 temp.set_pagebreak_before() | |
| 2623 if word_spacing is not None: | |
| 2624 temp.set_word_spacing(word_spacing) | |
| 2625 if unqid is not None: | |
| 2626 self.set_id(unqid) | |
| 2627 if cls is not None: | |
| 2628 self.add_class(cls) | |
| 2629 | |
| 2630 styles = [] | |
| 2631 top_style = temp.get_attribute_value("style") | |
| 2632 if top_style is not None: | |
| 2633 styles.append(top_style) | |
| 2634 child = temp.first_child | |
| 2635 while child: | |
| 2636 styles.append(child.get_attribute_value("style")) | |
| 2637 child = child.first_child | |
| 2638 self.set_attribute("style", ";".join(styles)) | |
| 2639 temp.remove() | |
| 2640 return self | |
| 2641 | |
| 2642 def set_text_indent(self, indent): | |
| 2643 """Set text indentation name via CSS style - block-level only.""" | |
| 2644 text = f"text-indent: {indent}" | |
| 2645 self.add_style(text) | |
| 2646 return self | |
| 2647 | |
| 2648 def set_underline(self, val="underline"): | |
| 2649 text = "text-decoration: %s" % val | |
| 2650 self.append_styled_span(text) | |
| 2651 return self | |
| 2652 | |
| 2653 def set_word_spacing(self, spacing): | |
| 2654 """Set inter-word spacing value via CSS style""" | |
| 2655 text = f"word-spacing: {spacing}" | |
| 2656 self.append_styled_span(text) | |
| 2657 return self | |
| 2658 | |
| 2659 def span_bottom(self): | |
| 2660 """Find deepest level in stacked spans.""" | |
| 2661 parent = self | |
| 2662 child = self.last_child | |
| 2663 if child is None: | |
| 2664 return None | |
| 2665 while child.is_text: | |
| 2666 child = child.previous | |
| 2667 if child is None: | |
| 2668 break | |
| 2669 if child is None or child.tagname != "span": | |
| 2670 return None | |
| 2671 | |
| 2672 while True: | |
| 2673 if child is None: | |
| 2674 return parent | |
| 2675 if child.tagname in ("a", "sub","sup","body") or child.is_text: | |
| 2676 child = child.next | |
| 2677 continue | |
| 2678 if child.tagname == "span": | |
| 2679 parent = child | |
| 2680 child = child.first_child | |
| 2681 else: | |
| 2682 return parent | |
| 2683 | |
| 2684 @property | |
| 2685 def tagname( self): | |
| 2686 return mupdf.fz_xml_tag( self.this) | |
| 2687 | |
| 2688 @property | |
| 2689 def text( self): | |
| 2690 return mupdf.fz_xml_text( self.this) | |
| 2691 | |
| 2692 add_var = add_code | |
| 2693 add_samp = add_code | |
| 2694 add_kbd = add_code | |
| 2695 | |
| 2696 | |
| 2697 class Colorspace: | |
| 2698 | |
| 2699 def __init__(self, type_): | |
| 2700 """Supported are GRAY, RGB and CMYK.""" | |
| 2701 if isinstance( type_, mupdf.FzColorspace): | |
| 2702 self.this = type_ | |
| 2703 elif type_ == CS_GRAY: | |
| 2704 self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_GRAY) | |
| 2705 elif type_ == CS_CMYK: | |
| 2706 self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_CMYK) | |
| 2707 elif type_ == CS_RGB: | |
| 2708 self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB) | |
| 2709 else: | |
| 2710 self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB) | |
| 2711 | |
| 2712 def __repr__(self): | |
| 2713 x = ("", "GRAY", "", "RGB", "CMYK")[self.n] | |
| 2714 return "Colorspace(CS_%s) - %s" % (x, self.name) | |
| 2715 | |
| 2716 def _name(self): | |
| 2717 return mupdf.fz_colorspace_name(self.this) | |
| 2718 | |
| 2719 @property | |
| 2720 def n(self): | |
| 2721 """Size of one pixel.""" | |
| 2722 return mupdf.fz_colorspace_n(self.this) | |
| 2723 | |
| 2724 @property | |
| 2725 def name(self): | |
| 2726 """Name of the Colorspace.""" | |
| 2727 return self._name() | |
| 2728 | |
| 2729 | |
| 2730 class DeviceWrapper: | |
| 2731 def __init__(self, *args): | |
| 2732 if args_match( args, mupdf.FzDevice): | |
| 2733 device, = args | |
| 2734 self.this = device | |
| 2735 elif args_match( args, Pixmap, None): | |
| 2736 pm, clip = args | |
| 2737 bbox = JM_irect_from_py( clip) | |
| 2738 if mupdf.fz_is_infinite_irect( bbox): | |
| 2739 self.this = mupdf.fz_new_draw_device( mupdf.FzMatrix(), pm) | |
| 2740 else: | |
| 2741 self.this = mupdf.fz_new_draw_device_with_bbox( mupdf.FzMatrix(), pm, bbox) | |
| 2742 elif args_match( args, mupdf.FzDisplayList): | |
| 2743 dl, = args | |
| 2744 self.this = mupdf.fz_new_list_device( dl) | |
| 2745 elif args_match( args, mupdf.FzStextPage, None): | |
| 2746 tp, flags = args | |
| 2747 opts = mupdf.FzStextOptions( flags) | |
| 2748 self.this = mupdf.fz_new_stext_device( tp, opts) | |
| 2749 else: | |
| 2750 raise Exception( f'Unrecognised args for DeviceWrapper: {args!r}') | |
| 2751 | |
| 2752 | |
| 2753 class DisplayList: | |
| 2754 def __del__(self): | |
| 2755 if not type(self) is DisplayList: return | |
| 2756 self.thisown = False | |
| 2757 | |
| 2758 def __init__(self, *args): | |
| 2759 if len(args) == 1 and isinstance(args[0], mupdf.FzRect): | |
| 2760 self.this = mupdf.FzDisplayList(args[0]) | |
| 2761 elif len(args) == 1 and isinstance(args[0], mupdf.FzDisplayList): | |
| 2762 self.this = args[0] | |
| 2763 else: | |
| 2764 assert 0, f'Unrecognised {args=}' | |
| 2765 | |
| 2766 def get_pixmap(self, matrix=None, colorspace=None, alpha=0, clip=None): | |
| 2767 if isinstance(colorspace, Colorspace): | |
| 2768 colorspace = colorspace.this | |
| 2769 else: | |
| 2770 colorspace = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB) | |
| 2771 val = JM_pixmap_from_display_list(self.this, matrix, colorspace, alpha, clip, None) | |
| 2772 val.thisown = True | |
| 2773 return val | |
| 2774 | |
| 2775 def get_textpage(self, flags=3): | |
| 2776 """Make a TextPage from a DisplayList.""" | |
| 2777 stext_options = mupdf.FzStextOptions() | |
| 2778 stext_options.flags = flags | |
| 2779 val = mupdf.FzStextPage(self.this, stext_options) | |
| 2780 val.thisown = True | |
| 2781 return val | |
| 2782 | |
| 2783 @property | |
| 2784 def rect(self): | |
| 2785 val = JM_py_from_rect(mupdf.fz_bound_display_list(self.this)) | |
| 2786 val = Rect(val) | |
| 2787 return val | |
| 2788 | |
| 2789 def run(self, dw, m, area): | |
| 2790 mupdf.fz_run_display_list( | |
| 2791 self.this, | |
| 2792 dw.device, | |
| 2793 JM_matrix_from_py(m), | |
| 2794 JM_rect_from_py(area), | |
| 2795 mupdf.FzCookie(), | |
| 2796 ) | |
| 2797 | |
| 2798 if g_use_extra: | |
| 2799 extra_FzDocument_insert_pdf = extra.FzDocument_insert_pdf | |
| 2800 | |
| 2801 | |
| 2802 class Document: | |
| 2803 | |
| 2804 def __contains__(self, loc) -> bool: | |
| 2805 if type(loc) is int: | |
| 2806 if loc < self.page_count: | |
| 2807 return True | |
| 2808 return False | |
| 2809 if type(loc) not in (tuple, list) or len(loc) != 2: | |
| 2810 return False | |
| 2811 chapter, pno = loc | |
| 2812 if (0 | |
| 2813 or not isinstance(chapter, int) | |
| 2814 or chapter < 0 | |
| 2815 or chapter >= self.chapter_count | |
| 2816 ): | |
| 2817 return False | |
| 2818 if (0 | |
| 2819 or not isinstance(pno, int) | |
| 2820 or pno < 0 | |
| 2821 or pno >= self.chapter_page_count(chapter) | |
| 2822 ): | |
| 2823 return False | |
| 2824 return True | |
| 2825 | |
| 2826 def __delitem__(self, i)->None: | |
| 2827 if not self.is_pdf: | |
| 2828 raise ValueError("is no PDF") | |
| 2829 if type(i) is int: | |
| 2830 return self.delete_page(i) | |
| 2831 if type(i) in (list, tuple, range): | |
| 2832 return self.delete_pages(i) | |
| 2833 if type(i) is not slice: | |
| 2834 raise ValueError("bad argument type") | |
| 2835 pc = self.page_count | |
| 2836 start = i.start if i.start else 0 | |
| 2837 stop = i.stop if i.stop else pc | |
| 2838 step = i.step if i.step else 1 | |
| 2839 while start < 0: | |
| 2840 start += pc | |
| 2841 if start >= pc: | |
| 2842 raise ValueError("bad page number(s)") | |
| 2843 while stop < 0: | |
| 2844 stop += pc | |
| 2845 if stop > pc: | |
| 2846 raise ValueError("bad page number(s)") | |
| 2847 return self.delete_pages(range(start, stop, step)) | |
| 2848 | |
| 2849 def __enter__(self): | |
| 2850 return self | |
| 2851 | |
| 2852 def __exit__(self, *args): | |
| 2853 self.close() | |
| 2854 | |
| 2855 @typing.overload | |
| 2856 def __getitem__(self, i: int = 0) -> Page: | |
| 2857 ... | |
| 2858 | |
| 2859 if sys.version_info >= (3, 9): | |
| 2860 @typing.overload | |
| 2861 def __getitem__(self, i: slice) -> list[Page]: | |
| 2862 ... | |
| 2863 | |
| 2864 @typing.overload | |
| 2865 def __getitem__(self, i: tuple[int, int]) -> Page: | |
| 2866 ... | |
| 2867 | |
| 2868 def __getitem__(self, i=0): | |
| 2869 if isinstance(i, slice): | |
| 2870 return [self[j] for j in range(*i.indices(len(self)))] | |
| 2871 assert isinstance(i, int) or (isinstance(i, tuple) and len(i) == 2 and all(isinstance(x, int) for x in i)), \ | |
| 2872 f'Invalid item number: {i=}.' | |
| 2873 if i not in self: | |
| 2874 raise IndexError(f"page {i} not in document") | |
| 2875 return self.load_page(i) | |
| 2876 | |
| 2877 def __init__(self, filename=None, stream=None, filetype=None, rect=None, width=0, height=0, fontsize=11): | |
| 2878 """Creates a document. Use 'open' as a synonym. | |
| 2879 | |
| 2880 Notes: | |
| 2881 Basic usages: | |
| 2882 open() - new PDF document | |
| 2883 open(filename) - string or pathlib.Path, must have supported | |
| 2884 file extension. | |
| 2885 open(type, buffer) - type: valid extension, buffer: bytes object. | |
| 2886 open(stream=buffer, filetype=type) - keyword version of previous. | |
| 2887 open(filename, fileype=type) - filename with unrecognized extension. | |
| 2888 rect, width, height, fontsize: layout reflowable document | |
| 2889 on open (e.g. EPUB). Ignored if n/a. | |
| 2890 """ | |
| 2891 # We temporarily set JM_mupdf_show_errors=0 while we are constructing, | |
| 2892 # then restore its original value in a `finally:` block. | |
| 2893 # | |
| 2894 global JM_mupdf_show_errors | |
| 2895 JM_mupdf_show_errors_old = JM_mupdf_show_errors | |
| 2896 JM_mupdf_show_errors = 0 | |
| 2897 | |
| 2898 try: | |
| 2899 self.is_closed = False | |
| 2900 self.is_encrypted = False | |
| 2901 self.is_encrypted = False | |
| 2902 self.metadata = None | |
| 2903 self.FontInfos = [] | |
| 2904 self.Graftmaps = {} | |
| 2905 self.ShownPages = {} | |
| 2906 self.InsertedImages = {} | |
| 2907 self._page_refs = weakref.WeakValueDictionary() | |
| 2908 if isinstance(filename, mupdf.PdfDocument): | |
| 2909 pdf_document = filename | |
| 2910 self.this = pdf_document | |
| 2911 self.this_is_pdf = True | |
| 2912 return | |
| 2913 | |
| 2914 w = width | |
| 2915 h = height | |
| 2916 r = JM_rect_from_py(rect) | |
| 2917 if not mupdf.fz_is_infinite_rect(r): | |
| 2918 w = r.x1 - r.x0 | |
| 2919 h = r.y1 - r.y0 | |
| 2920 | |
| 2921 self._name = filename | |
| 2922 self.stream = stream | |
| 2923 | |
| 2924 if stream is not None: | |
| 2925 if filename is not None and filetype is None: | |
| 2926 # 2025-05-06: Use <filename> as the filetype. This is | |
| 2927 # reversing precedence - we used to use <filename> if both | |
| 2928 # were set. | |
| 2929 filetype = filename | |
| 2930 if isinstance(stream, (bytes, memoryview)): | |
| 2931 pass | |
| 2932 elif isinstance(stream, bytearray): | |
| 2933 stream = bytes(stream) | |
| 2934 elif isinstance(stream, io.BytesIO): | |
| 2935 stream = stream.getvalue() | |
| 2936 else: | |
| 2937 raise TypeError(f"bad stream: {type(stream)=}.") | |
| 2938 self.stream = stream | |
| 2939 | |
| 2940 assert isinstance(stream, (bytes, memoryview)) | |
| 2941 if len(stream) == 0: | |
| 2942 # MuPDF raise an exception for this but also generates | |
| 2943 # warnings, which is not very helpful for us. So instead we | |
| 2944 # raise a specific exception. | |
| 2945 raise EmptyFileError('Cannot open empty stream.') | |
| 2946 | |
| 2947 stream2 = mupdf.fz_open_memory(mupdf.python_buffer_data(stream), len(stream)) | |
| 2948 try: | |
| 2949 doc = mupdf.fz_open_document_with_stream(filetype if filetype else '', stream2) | |
| 2950 except Exception as e: | |
| 2951 if g_exceptions_verbose > 1: exception_info() | |
| 2952 raise FileDataError('Failed to open stream') from e | |
| 2953 | |
| 2954 elif filename: | |
| 2955 assert not stream | |
| 2956 if isinstance(filename, str): | |
| 2957 pass | |
| 2958 elif hasattr(filename, "absolute"): | |
| 2959 filename = str(filename) | |
| 2960 elif hasattr(filename, "name"): | |
| 2961 filename = filename.name | |
| 2962 else: | |
| 2963 raise TypeError(f"bad filename: {type(filename)=} {filename=}.") | |
| 2964 self._name = filename | |
| 2965 | |
| 2966 # Generate our own specific exceptions. This avoids MuPDF | |
| 2967 # generating warnings etc. | |
| 2968 if not os.path.exists(filename): | |
| 2969 raise FileNotFoundError(f"no such file: '{filename}'") | |
| 2970 elif not os.path.isfile(filename): | |
| 2971 raise FileDataError(f"'{filename}' is no file") | |
| 2972 elif os.path.getsize(filename) == 0: | |
| 2973 raise EmptyFileError(f'Cannot open empty file: {filename=}.') | |
| 2974 | |
| 2975 if filetype: | |
| 2976 # Override the type implied by <filename>. MuPDF does not | |
| 2977 # have a way to do this directly so we open via a stream. | |
| 2978 try: | |
| 2979 fz_stream = mupdf.fz_open_file(filename) | |
| 2980 doc = mupdf.fz_open_document_with_stream(filetype, fz_stream) | |
| 2981 except Exception as e: | |
| 2982 if g_exceptions_verbose > 1: exception_info() | |
| 2983 raise FileDataError(f'Failed to open file {filename!r} as type {filetype!r}.') from e | |
| 2984 else: | |
| 2985 try: | |
| 2986 doc = mupdf.fz_open_document(filename) | |
| 2987 except Exception as e: | |
| 2988 if g_exceptions_verbose > 1: exception_info() | |
| 2989 raise FileDataError(f'Failed to open file {filename!r}.') from e | |
| 2990 | |
| 2991 else: | |
| 2992 pdf = mupdf.PdfDocument() | |
| 2993 doc = mupdf.FzDocument(pdf) | |
| 2994 | |
| 2995 if w > 0 and h > 0: | |
| 2996 mupdf.fz_layout_document(doc, w, h, fontsize) | |
| 2997 elif mupdf.fz_is_document_reflowable(doc): | |
| 2998 mupdf.fz_layout_document(doc, 400, 600, 11) | |
| 2999 | |
| 3000 self.this = doc | |
| 3001 | |
| 3002 # fixme: not sure where self.thisown gets initialised in PyMuPDF. | |
| 3003 # | |
| 3004 self.thisown = True | |
| 3005 | |
| 3006 if self.thisown: | |
| 3007 self._graft_id = TOOLS.gen_id() | |
| 3008 if self.needs_pass: | |
| 3009 self.is_encrypted = True | |
| 3010 else: # we won't init until doc is decrypted | |
| 3011 self.init_doc() | |
| 3012 # the following hack detects invalid/empty SVG files, which else may lead | |
| 3013 # to interpreter crashes | |
| 3014 if filename and filename.lower().endswith("svg") or filetype and "svg" in filetype.lower(): | |
| 3015 try: | |
| 3016 _ = self.convert_to_pdf() # this seems to always work | |
| 3017 except Exception as e: | |
| 3018 if g_exceptions_verbose > 1: exception_info() | |
| 3019 raise FileDataError("cannot open broken document") from e | |
| 3020 | |
| 3021 if g_use_extra: | |
| 3022 self.this_is_pdf = isinstance( self.this, mupdf.PdfDocument) | |
| 3023 if self.this_is_pdf: | |
| 3024 self.page_count2 = extra.page_count_pdf | |
| 3025 else: | |
| 3026 self.page_count2 = extra.page_count_fz | |
| 3027 finally: | |
| 3028 JM_mupdf_show_errors = JM_mupdf_show_errors_old | |
| 3029 | |
| 3030 def __len__(self) -> int: | |
| 3031 return self.page_count | |
| 3032 | |
| 3033 def __repr__(self) -> str: | |
| 3034 m = "closed " if self.is_closed else "" | |
| 3035 if self.stream is None: | |
| 3036 if self.name == "": | |
| 3037 return m + "Document(<new PDF, doc# %i>)" % self._graft_id | |
| 3038 return m + "Document('%s')" % (self.name,) | |
| 3039 return m + "Document('%s', <memory, doc# %i>)" % (self.name, self._graft_id) | |
| 3040 | |
| 3041 def _addFormFont(self, name, font): | |
| 3042 """Add new form font.""" | |
| 3043 if self.is_closed or self.is_encrypted: | |
| 3044 raise ValueError("document closed or encrypted") | |
| 3045 pdf = _as_pdf_document(self, required=0) | |
| 3046 if not pdf.m_internal: | |
| 3047 return | |
| 3048 fonts = mupdf.pdf_dict_getl( | |
| 3049 mupdf.pdf_trailer( pdf), | |
| 3050 PDF_NAME('Root'), | |
| 3051 PDF_NAME('AcroForm'), | |
| 3052 PDF_NAME('DR'), | |
| 3053 PDF_NAME('Font'), | |
| 3054 ) | |
| 3055 if not fonts.m_internal or not mupdf.pdf_is_dict( fonts): | |
| 3056 raise RuntimeError( "PDF has no form fonts yet") | |
| 3057 k = mupdf.pdf_new_name( name) | |
| 3058 v = JM_pdf_obj_from_str( pdf, font) | |
| 3059 mupdf.pdf_dict_put( fonts, k, v) | |
| 3060 | |
| 3061 def _delToC(self): | |
| 3062 """Delete the TOC.""" | |
| 3063 if self.is_closed or self.is_encrypted: | |
| 3064 raise ValueError("document closed or encrypted") | |
| 3065 xrefs = [] # create Python list | |
| 3066 pdf = _as_pdf_document(self, required=0) | |
| 3067 if not pdf.m_internal: | |
| 3068 return xrefs # not a pdf | |
| 3069 # get the main root | |
| 3070 root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root')) | |
| 3071 # get the outline root | |
| 3072 olroot = mupdf.pdf_dict_get(root, PDF_NAME('Outlines')) | |
| 3073 if not olroot.m_internal: | |
| 3074 return xrefs # no outlines or some problem | |
| 3075 | |
| 3076 first = mupdf.pdf_dict_get(olroot, PDF_NAME('First')) # first outline | |
| 3077 | |
| 3078 xrefs = JM_outline_xrefs(first, xrefs) | |
| 3079 xref_count = len(xrefs) | |
| 3080 | |
| 3081 olroot_xref = mupdf.pdf_to_num(olroot) # delete OL root | |
| 3082 mupdf.pdf_delete_object(pdf, olroot_xref) # delete OL root | |
| 3083 mupdf.pdf_dict_del(root, PDF_NAME('Outlines')) # delete OL root | |
| 3084 | |
| 3085 for i in range(xref_count): | |
| 3086 _, xref = JM_INT_ITEM(xrefs, i) | |
| 3087 mupdf.pdf_delete_object(pdf, xref) # delete outline item | |
| 3088 xrefs.append(olroot_xref) | |
| 3089 val = xrefs | |
| 3090 self.init_doc() | |
| 3091 return val | |
| 3092 | |
| 3093 def _delete_page(self, pno): | |
| 3094 pdf = _as_pdf_document(self) | |
| 3095 mupdf.pdf_delete_page( pdf, pno) | |
| 3096 if pdf.m_internal.rev_page_map: | |
| 3097 mupdf.ll_pdf_drop_page_tree( pdf.m_internal) | |
| 3098 | |
| 3099 def _deleteObject(self, xref): | |
| 3100 """Delete object.""" | |
| 3101 pdf = _as_pdf_document(self) | |
| 3102 if not _INRANGE(xref, 1, mupdf.pdf_xref_len(pdf)-1): | |
| 3103 raise ValueError( MSG_BAD_XREF) | |
| 3104 mupdf.pdf_delete_object(pdf, xref) | |
| 3105 | |
| 3106 def _embeddedFileGet(self, idx): | |
| 3107 pdf = _as_pdf_document(self) | |
| 3108 names = mupdf.pdf_dict_getl( | |
| 3109 mupdf.pdf_trailer(pdf), | |
| 3110 PDF_NAME('Root'), | |
| 3111 PDF_NAME('Names'), | |
| 3112 PDF_NAME('EmbeddedFiles'), | |
| 3113 PDF_NAME('Names'), | |
| 3114 ) | |
| 3115 entry = mupdf.pdf_array_get(names, 2*idx+1) | |
| 3116 filespec = mupdf.pdf_dict_getl(entry, PDF_NAME('EF'), PDF_NAME('F')) | |
| 3117 buf = mupdf.pdf_load_stream(filespec) | |
| 3118 cont = JM_BinFromBuffer(buf) | |
| 3119 return cont | |
| 3120 | |
| 3121 def _embeddedFileIndex(self, item: typing.Union[int, str]) -> int: | |
| 3122 filenames = self.embfile_names() | |
| 3123 msg = "'%s' not in EmbeddedFiles array." % str(item) | |
| 3124 if item in filenames: | |
| 3125 idx = filenames.index(item) | |
| 3126 elif item in range(len(filenames)): | |
| 3127 idx = item | |
| 3128 else: | |
| 3129 raise ValueError(msg) | |
| 3130 return idx | |
| 3131 | |
| 3132 def _embfile_add(self, name, buffer_, filename=None, ufilename=None, desc=None): | |
| 3133 pdf = _as_pdf_document(self) | |
| 3134 data = JM_BufferFromBytes(buffer_) | |
| 3135 if not data.m_internal: | |
| 3136 raise TypeError( MSG_BAD_BUFFER) | |
| 3137 | |
| 3138 names = mupdf.pdf_dict_getl( | |
| 3139 mupdf.pdf_trailer(pdf), | |
| 3140 PDF_NAME('Root'), | |
| 3141 PDF_NAME('Names'), | |
| 3142 PDF_NAME('EmbeddedFiles'), | |
| 3143 PDF_NAME('Names'), | |
| 3144 ) | |
| 3145 if not mupdf.pdf_is_array(names): | |
| 3146 root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root')) | |
| 3147 names = mupdf.pdf_new_array(pdf, 6) # an even number! | |
| 3148 mupdf.pdf_dict_putl( | |
| 3149 root, | |
| 3150 names, | |
| 3151 PDF_NAME('Names'), | |
| 3152 PDF_NAME('EmbeddedFiles'), | |
| 3153 PDF_NAME('Names'), | |
| 3154 ) | |
| 3155 fileentry = JM_embed_file(pdf, data, filename, ufilename, desc, 1) | |
| 3156 xref = mupdf.pdf_to_num( | |
| 3157 mupdf.pdf_dict_getl(fileentry, PDF_NAME('EF'), PDF_NAME('F')) | |
| 3158 ) | |
| 3159 mupdf.pdf_array_push(names, mupdf.pdf_new_text_string(name)) | |
| 3160 mupdf.pdf_array_push(names, fileentry) | |
| 3161 return xref | |
| 3162 | |
| 3163 def _embfile_del(self, idx): | |
| 3164 pdf = _as_pdf_document(self) | |
| 3165 names = mupdf.pdf_dict_getl( | |
| 3166 mupdf.pdf_trailer(pdf), | |
| 3167 PDF_NAME('Root'), | |
| 3168 PDF_NAME('Names'), | |
| 3169 PDF_NAME('EmbeddedFiles'), | |
| 3170 PDF_NAME('Names'), | |
| 3171 ) | |
| 3172 mupdf.pdf_array_delete(names, idx + 1) | |
| 3173 mupdf.pdf_array_delete(names, idx) | |
| 3174 | |
| 3175 def _embfile_info(self, idx, infodict): | |
| 3176 pdf = _as_pdf_document(self) | |
| 3177 xref = 0 | |
| 3178 ci_xref=0 | |
| 3179 | |
| 3180 trailer = mupdf.pdf_trailer(pdf) | |
| 3181 | |
| 3182 names = mupdf.pdf_dict_getl( | |
| 3183 trailer, | |
| 3184 PDF_NAME('Root'), | |
| 3185 PDF_NAME('Names'), | |
| 3186 PDF_NAME('EmbeddedFiles'), | |
| 3187 PDF_NAME('Names'), | |
| 3188 ) | |
| 3189 o = mupdf.pdf_array_get(names, 2*idx+1) | |
| 3190 ci = mupdf.pdf_dict_get(o, PDF_NAME('CI')) | |
| 3191 if ci.m_internal: | |
| 3192 ci_xref = mupdf.pdf_to_num(ci) | |
| 3193 infodict["collection"] = ci_xref | |
| 3194 name = mupdf.pdf_to_text_string(mupdf.pdf_dict_get(o, PDF_NAME('F'))) | |
| 3195 infodict[dictkey_filename] = JM_EscapeStrFromStr(name) | |
| 3196 | |
| 3197 name = mupdf.pdf_to_text_string(mupdf.pdf_dict_get(o, PDF_NAME('UF'))) | |
| 3198 infodict[dictkey_ufilename] = JM_EscapeStrFromStr(name) | |
| 3199 | |
| 3200 name = mupdf.pdf_to_text_string(mupdf.pdf_dict_get(o, PDF_NAME('Desc'))) | |
| 3201 infodict[dictkey_descr] = JM_UnicodeFromStr(name) | |
| 3202 | |
| 3203 len_ = -1 | |
| 3204 DL = -1 | |
| 3205 fileentry = mupdf.pdf_dict_getl(o, PDF_NAME('EF'), PDF_NAME('F')) | |
| 3206 xref = mupdf.pdf_to_num(fileentry) | |
| 3207 o = mupdf.pdf_dict_get(fileentry, PDF_NAME('Length')) | |
| 3208 if o.m_internal: | |
| 3209 len_ = mupdf.pdf_to_int(o) | |
| 3210 | |
| 3211 o = mupdf.pdf_dict_get(fileentry, PDF_NAME('DL')) | |
| 3212 if o.m_internal: | |
| 3213 DL = mupdf.pdf_to_int(o) | |
| 3214 else: | |
| 3215 o = mupdf.pdf_dict_getl(fileentry, PDF_NAME('Params'), PDF_NAME('Size')) | |
| 3216 if o.m_internal: | |
| 3217 DL = mupdf.pdf_to_int(o) | |
| 3218 infodict[dictkey_size] = DL | |
| 3219 infodict[dictkey_length] = len_ | |
| 3220 return xref | |
| 3221 | |
| 3222 def _embfile_names(self, namelist): | |
| 3223 """Get list of embedded file names.""" | |
| 3224 pdf = _as_pdf_document(self) | |
| 3225 names = mupdf.pdf_dict_getl( | |
| 3226 mupdf.pdf_trailer(pdf), | |
| 3227 PDF_NAME('Root'), | |
| 3228 PDF_NAME('Names'), | |
| 3229 PDF_NAME('EmbeddedFiles'), | |
| 3230 PDF_NAME('Names'), | |
| 3231 ) | |
| 3232 if mupdf.pdf_is_array(names): | |
| 3233 n = mupdf.pdf_array_len(names) | |
| 3234 for i in range(0, n, 2): | |
| 3235 val = JM_EscapeStrFromStr( | |
| 3236 mupdf.pdf_to_text_string( | |
| 3237 mupdf.pdf_array_get(names, i) | |
| 3238 ) | |
| 3239 ) | |
| 3240 namelist.append(val) | |
| 3241 | |
| 3242 def _embfile_upd(self, idx, buffer_=None, filename=None, ufilename=None, desc=None): | |
| 3243 pdf = _as_pdf_document(self) | |
| 3244 xref = 0 | |
| 3245 names = mupdf.pdf_dict_getl( | |
| 3246 mupdf.pdf_trailer(pdf), | |
| 3247 PDF_NAME('Root'), | |
| 3248 PDF_NAME('Names'), | |
| 3249 PDF_NAME('EmbeddedFiles'), | |
| 3250 PDF_NAME('Names'), | |
| 3251 ) | |
| 3252 entry = mupdf.pdf_array_get(names, 2*idx+1) | |
| 3253 | |
| 3254 filespec = mupdf.pdf_dict_getl(entry, PDF_NAME('EF'), PDF_NAME('F')) | |
| 3255 if not filespec.m_internal: | |
| 3256 RAISEPY( "bad PDF: no /EF object", JM_Exc_FileDataError) | |
| 3257 res = JM_BufferFromBytes(buffer_) | |
| 3258 if buffer_ and buffer_.m_internal and not res.m_internal: | |
| 3259 raise TypeError( MSG_BAD_BUFFER) | |
| 3260 if res.m_internal and buffer_ and buffer_.m_internal: | |
| 3261 JM_update_stream(pdf, filespec, res, 1) | |
| 3262 # adjust /DL and /Size parameters | |
| 3263 len, _ = mupdf.fz_buffer_storage(res) | |
| 3264 l = mupdf.pdf_new_int(len) | |
| 3265 mupdf.pdf_dict_put(filespec, PDF_NAME('DL'), l) | |
| 3266 mupdf.pdf_dict_putl(filespec, l, PDF_NAME('Params'), PDF_NAME('Size')) | |
| 3267 xref = mupdf.pdf_to_num(filespec) | |
| 3268 if filename: | |
| 3269 mupdf.pdf_dict_put_text_string(entry, PDF_NAME('F'), filename) | |
| 3270 | |
| 3271 if ufilename: | |
| 3272 mupdf.pdf_dict_put_text_string(entry, PDF_NAME('UF'), ufilename) | |
| 3273 | |
| 3274 if desc: | |
| 3275 mupdf.pdf_dict_put_text_string(entry, PDF_NAME('Desc'), desc) | |
| 3276 return xref | |
| 3277 | |
| 3278 def _extend_toc_items(self, items): | |
| 3279 """Add color info to all items of an extended TOC list.""" | |
| 3280 if self.is_closed: | |
| 3281 raise ValueError("document closed") | |
| 3282 if g_use_extra: | |
| 3283 return extra.Document_extend_toc_items( self.this, items) | |
| 3284 pdf = _as_pdf_document(self) | |
| 3285 zoom = "zoom" | |
| 3286 bold = "bold" | |
| 3287 italic = "italic" | |
| 3288 collapse = "collapse" | |
| 3289 | |
| 3290 root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root')) | |
| 3291 if not root.m_internal: | |
| 3292 return | |
| 3293 olroot = mupdf.pdf_dict_get(root, PDF_NAME('Outlines')) | |
| 3294 if not olroot.m_internal: | |
| 3295 return | |
| 3296 first = mupdf.pdf_dict_get(olroot, PDF_NAME('First')) | |
| 3297 if not first.m_internal: | |
| 3298 return | |
| 3299 xrefs = [] | |
| 3300 xrefs = JM_outline_xrefs(first, xrefs) | |
| 3301 n = len(xrefs) | |
| 3302 m = len(items) | |
| 3303 if not n: | |
| 3304 return | |
| 3305 if n != m: | |
| 3306 raise IndexError( "internal error finding outline xrefs") | |
| 3307 | |
| 3308 # update all TOC item dictionaries | |
| 3309 for i in range(n): | |
| 3310 xref = int(xrefs[i]) | |
| 3311 item = items[i] | |
| 3312 itemdict = item[3] | |
| 3313 if not isinstance(itemdict, dict): | |
| 3314 raise ValueError( "need non-simple TOC format") | |
| 3315 itemdict[dictkey_xref] = xrefs[i] | |
| 3316 bm = mupdf.pdf_load_object(pdf, xref) | |
| 3317 flags = mupdf.pdf_to_int( mupdf.pdf_dict_get(bm, PDF_NAME('F'))) | |
| 3318 if flags == 1: | |
| 3319 itemdict[italic] = True | |
| 3320 elif flags == 2: | |
| 3321 itemdict[bold] = True | |
| 3322 elif flags == 3: | |
| 3323 itemdict[italic] = True | |
| 3324 itemdict[bold] = True | |
| 3325 count = mupdf.pdf_to_int( mupdf.pdf_dict_get(bm, PDF_NAME('Count'))) | |
| 3326 if count < 0: | |
| 3327 itemdict[collapse] = True | |
| 3328 elif count > 0: | |
| 3329 itemdict[collapse] = False | |
| 3330 col = mupdf.pdf_dict_get(bm, PDF_NAME('C')) | |
| 3331 if mupdf.pdf_is_array(col) and mupdf.pdf_array_len(col) == 3: | |
| 3332 color = ( | |
| 3333 mupdf.pdf_to_real(mupdf.pdf_array_get(col, 0)), | |
| 3334 mupdf.pdf_to_real(mupdf.pdf_array_get(col, 1)), | |
| 3335 mupdf.pdf_to_real(mupdf.pdf_array_get(col, 2)), | |
| 3336 ) | |
| 3337 itemdict[dictkey_color] = color | |
| 3338 z=0 | |
| 3339 obj = mupdf.pdf_dict_get(bm, PDF_NAME('Dest')) | |
| 3340 if not obj.m_internal or not mupdf.pdf_is_array(obj): | |
| 3341 obj = mupdf.pdf_dict_getl(bm, PDF_NAME('A'), PDF_NAME('D')) | |
| 3342 if mupdf.pdf_is_array(obj) and mupdf.pdf_array_len(obj) == 5: | |
| 3343 z = mupdf.pdf_to_real(mupdf.pdf_array_get(obj, 4)) | |
| 3344 itemdict[zoom] = float(z) | |
| 3345 item[3] = itemdict | |
| 3346 items[i] = item | |
| 3347 | |
| 3348 def _forget_page(self, page: Page): | |
| 3349 """Remove a page from document page dict.""" | |
| 3350 pid = id(page) | |
| 3351 if pid in self._page_refs: | |
| 3352 #self._page_refs[pid] = None | |
| 3353 del self._page_refs[pid] | |
| 3354 | |
| 3355 def _get_char_widths(self, xref: int, bfname: str, ext: str, ordering: int, limit: int, idx: int = 0): | |
| 3356 pdf = _as_pdf_document(self) | |
| 3357 mylimit = limit | |
| 3358 if mylimit < 256: | |
| 3359 mylimit = 256 | |
| 3360 if ordering >= 0: | |
| 3361 data, size, index = mupdf.fz_lookup_cjk_font(ordering) | |
| 3362 font = mupdf.fz_new_font_from_memory(None, data, size, index, 0) | |
| 3363 else: | |
| 3364 data, size = mupdf.fz_lookup_base14_font(bfname) | |
| 3365 if data: | |
| 3366 font = mupdf.fz_new_font_from_memory(bfname, data, size, 0, 0) | |
| 3367 else: | |
| 3368 buf = JM_get_fontbuffer(pdf, xref) | |
| 3369 if not buf.m_internal: | |
| 3370 raise Exception("font at xref %d is not supported" % xref) | |
| 3371 | |
| 3372 font = mupdf.fz_new_font_from_buffer(None, buf, idx, 0) | |
| 3373 wlist = [] | |
| 3374 for i in range(mylimit): | |
| 3375 glyph = mupdf.fz_encode_character(font, i) | |
| 3376 adv = mupdf.fz_advance_glyph(font, glyph, 0) | |
| 3377 if ordering >= 0: | |
| 3378 glyph = i | |
| 3379 if glyph > 0: | |
| 3380 wlist.append( (glyph, adv)) | |
| 3381 else: | |
| 3382 wlist.append( (glyph, 0.0)) | |
| 3383 return wlist | |
| 3384 | |
| 3385 def _get_page_labels(self): | |
| 3386 pdf = _as_pdf_document(self) | |
| 3387 rc = [] | |
| 3388 pagelabels = mupdf.pdf_new_name("PageLabels") | |
| 3389 obj = mupdf.pdf_dict_getl( mupdf.pdf_trailer(pdf), PDF_NAME('Root'), pagelabels) | |
| 3390 if not obj.m_internal: | |
| 3391 return rc | |
| 3392 # simple case: direct /Nums object | |
| 3393 nums = mupdf.pdf_resolve_indirect( mupdf.pdf_dict_get( obj, PDF_NAME('Nums'))) | |
| 3394 if nums.m_internal: | |
| 3395 JM_get_page_labels(rc, nums) | |
| 3396 return rc | |
| 3397 # case: /Kids/Nums | |
| 3398 nums = mupdf.pdf_resolve_indirect( mupdf.pdf_dict_getl(obj, PDF_NAME('Kids'), PDF_NAME('Nums'))) | |
| 3399 if nums.m_internal: | |
| 3400 JM_get_page_labels(rc, nums) | |
| 3401 return rc | |
| 3402 # case: /Kids is an array of multiple /Nums | |
| 3403 kids = mupdf.pdf_resolve_indirect( mupdf.pdf_dict_get( obj, PDF_NAME('Kids'))) | |
| 3404 if not kids.m_internal or not mupdf.pdf_is_array(kids): | |
| 3405 return rc | |
| 3406 n = mupdf.pdf_array_len(kids) | |
| 3407 for i in range(n): | |
| 3408 nums = mupdf.pdf_resolve_indirect( | |
| 3409 mupdf.pdf_dict_get( | |
| 3410 mupdf.pdf_array_get(kids, i), | |
| 3411 PDF_NAME('Nums'), | |
| 3412 ) | |
| 3413 ) | |
| 3414 JM_get_page_labels(rc, nums) | |
| 3415 return rc | |
| 3416 | |
| 3417 def _getMetadata(self, key): | |
| 3418 """Get metadata.""" | |
| 3419 try: | |
| 3420 return mupdf.fz_lookup_metadata2( self.this, key) | |
| 3421 except Exception: | |
| 3422 if g_exceptions_verbose > 2: exception_info() | |
| 3423 return '' | |
| 3424 | |
| 3425 def _getOLRootNumber(self): | |
| 3426 """Get xref of Outline Root, create it if missing.""" | |
| 3427 if self.is_closed or self.is_encrypted: | |
| 3428 raise ValueError("document closed or encrypted") | |
| 3429 pdf = _as_pdf_document(self) | |
| 3430 # get main root | |
| 3431 root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root')) | |
| 3432 # get outline root | |
| 3433 olroot = mupdf.pdf_dict_get( root, PDF_NAME('Outlines')) | |
| 3434 if not olroot.m_internal: | |
| 3435 olroot = mupdf.pdf_new_dict( pdf, 4) | |
| 3436 mupdf.pdf_dict_put( olroot, PDF_NAME('Type'), PDF_NAME('Outlines')) | |
| 3437 ind_obj = mupdf.pdf_add_object( pdf, olroot) | |
| 3438 mupdf.pdf_dict_put( root, PDF_NAME('Outlines'), ind_obj) | |
| 3439 olroot = mupdf.pdf_dict_get( root, PDF_NAME('Outlines')) | |
| 3440 return mupdf.pdf_to_num( olroot) | |
| 3441 | |
| 3442 def _getPDFfileid(self): | |
| 3443 """Get PDF file id.""" | |
| 3444 pdf = _as_pdf_document(self, required=0) | |
| 3445 if not pdf.m_internal: | |
| 3446 return | |
| 3447 idlist = [] | |
| 3448 identity = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('ID')) | |
| 3449 if identity.m_internal: | |
| 3450 n = mupdf.pdf_array_len(identity) | |
| 3451 for i in range(n): | |
| 3452 o = mupdf.pdf_array_get(identity, i) | |
| 3453 text = mupdf.pdf_to_text_string(o) | |
| 3454 hex_ = binascii.hexlify(text) | |
| 3455 idlist.append(hex_) | |
| 3456 return idlist | |
| 3457 | |
| 3458 def _getPageInfo(self, pno, what): | |
| 3459 """List fonts, images, XObjects used on a page.""" | |
| 3460 if self.is_closed or self.is_encrypted: | |
| 3461 raise ValueError("document closed or encrypted") | |
| 3462 doc = self.this | |
| 3463 pageCount = mupdf.pdf_count_pages(doc) if isinstance(doc, mupdf.PdfDocument) else mupdf.fz_count_pages(doc) | |
| 3464 n = pno # pno < 0 is allowed | |
| 3465 while n < 0: | |
| 3466 n += pageCount # make it non-negative | |
| 3467 if n >= pageCount: | |
| 3468 raise ValueError( MSG_BAD_PAGENO) | |
| 3469 pdf = _as_pdf_document(self) | |
| 3470 pageref = mupdf.pdf_lookup_page_obj(pdf, n) | |
| 3471 rsrc = mupdf.pdf_dict_get_inheritable(pageref, mupdf.PDF_ENUM_NAME_Resources) | |
| 3472 liste = [] | |
| 3473 tracer = [] | |
| 3474 if rsrc.m_internal: | |
| 3475 JM_scan_resources(pdf, rsrc, liste, what, 0, tracer) | |
| 3476 return liste | |
| 3477 | |
| 3478 def _insert_font(self, fontfile=None, fontbuffer=None): | |
| 3479 ''' | |
| 3480 Utility: insert font from file or binary. | |
| 3481 ''' | |
| 3482 pdf = _as_pdf_document(self) | |
| 3483 if not fontfile and not fontbuffer: | |
| 3484 raise ValueError( MSG_FILE_OR_BUFFER) | |
| 3485 value = JM_insert_font(pdf, None, fontfile, fontbuffer, 0, 0, 0, 0, 0, -1) | |
| 3486 return value | |
| 3487 | |
| 3488 def _loadOutline(self): | |
| 3489 """Load first outline.""" | |
| 3490 doc = self.this | |
| 3491 assert isinstance( doc, mupdf.FzDocument) | |
| 3492 try: | |
| 3493 ol = mupdf.fz_load_outline( doc) | |
| 3494 except Exception: | |
| 3495 if g_exceptions_verbose > 1: exception_info() | |
| 3496 return | |
| 3497 return Outline( ol) | |
| 3498 | |
| 3499 def _make_page_map(self): | |
| 3500 """Make an array page number -> page object.""" | |
| 3501 if self.is_closed: | |
| 3502 raise ValueError("document closed") | |
| 3503 assert 0, f'_make_page_map() is no-op' | |
| 3504 | |
| 3505 def _move_copy_page(self, pno, nb, before, copy): | |
| 3506 """Move or copy a PDF page reference.""" | |
| 3507 pdf = _as_pdf_document(self) | |
| 3508 same = 0 | |
| 3509 # get the two page objects ----------------------------------- | |
| 3510 # locate the /Kids arrays and indices in each | |
| 3511 | |
| 3512 page1, parent1, i1 = pdf_lookup_page_loc( pdf, pno) | |
| 3513 | |
| 3514 kids1 = mupdf.pdf_dict_get( parent1, PDF_NAME('Kids')) | |
| 3515 | |
| 3516 page2, parent2, i2 = pdf_lookup_page_loc( pdf, nb) | |
| 3517 kids2 = mupdf.pdf_dict_get( parent2, PDF_NAME('Kids')) | |
| 3518 if before: # calc index of source page in target /Kids | |
| 3519 pos = i2 | |
| 3520 else: | |
| 3521 pos = i2 + 1 | |
| 3522 | |
| 3523 # same /Kids array? ------------------------------------------ | |
| 3524 same = mupdf.pdf_objcmp( kids1, kids2) | |
| 3525 | |
| 3526 # put source page in target /Kids array ---------------------- | |
| 3527 if not copy and same != 0: # update parent in page object | |
| 3528 mupdf.pdf_dict_put( page1, PDF_NAME('Parent'), parent2) | |
| 3529 mupdf.pdf_array_insert( kids2, page1, pos) | |
| 3530 | |
| 3531 if same != 0: # different /Kids arrays ---------------------- | |
| 3532 parent = parent2 | |
| 3533 while parent.m_internal: # increase /Count objects in parents | |
| 3534 count = mupdf.pdf_dict_get_int( parent, PDF_NAME('Count')) | |
| 3535 mupdf.pdf_dict_put_int( parent, PDF_NAME('Count'), count + 1) | |
| 3536 parent = mupdf.pdf_dict_get( parent, PDF_NAME('Parent')) | |
| 3537 if not copy: # delete original item | |
| 3538 mupdf.pdf_array_delete( kids1, i1) | |
| 3539 parent = parent1 | |
| 3540 while parent.m_internal: # decrease /Count objects in parents | |
| 3541 count = mupdf.pdf_dict_get_int( parent, PDF_NAME('Count')) | |
| 3542 mupdf.pdf_dict_put_int( parent, PDF_NAME('Count'), count - 1) | |
| 3543 parent = mupdf.pdf_dict_get( parent, PDF_NAME('Parent')) | |
| 3544 else: # same /Kids array | |
| 3545 if copy: # source page is copied | |
| 3546 parent = parent2 | |
| 3547 while parent.m_internal: # increase /Count object in parents | |
| 3548 count = mupdf.pdf_dict_get_int( parent, PDF_NAME('Count')) | |
| 3549 mupdf.pdf_dict_put_int( parent, PDF_NAME('Count'), count + 1) | |
| 3550 parent = mupdf.pdf_dict_get( parent, PDF_NAME('Parent')) | |
| 3551 else: | |
| 3552 if i1 < pos: | |
| 3553 mupdf.pdf_array_delete( kids1, i1) | |
| 3554 else: | |
| 3555 mupdf.pdf_array_delete( kids1, i1 + 1) | |
| 3556 if pdf.m_internal.rev_page_map: # page map no longer valid: drop it | |
| 3557 mupdf.ll_pdf_drop_page_tree( pdf.m_internal) | |
| 3558 | |
| 3559 self._reset_page_refs() | |
| 3560 | |
| 3561 def _newPage(self, pno=-1, width=595, height=842): | |
| 3562 """Make a new PDF page.""" | |
| 3563 if self.is_closed or self.is_encrypted: | |
| 3564 raise ValueError("document closed or encrypted") | |
| 3565 if g_use_extra: | |
| 3566 extra._newPage( self.this, pno, width, height) | |
| 3567 else: | |
| 3568 pdf = _as_pdf_document(self) | |
| 3569 mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT) | |
| 3570 mediabox.x1 = width | |
| 3571 mediabox.y1 = height | |
| 3572 contents = mupdf.FzBuffer() | |
| 3573 if pno < -1: | |
| 3574 raise ValueError( MSG_BAD_PAGENO) | |
| 3575 # create /Resources and /Contents objects | |
| 3576 #resources = pdf.add_object(pdf.new_dict(1)) | |
| 3577 resources = mupdf.pdf_add_new_dict(pdf, 1) | |
| 3578 page_obj = mupdf.pdf_add_page( pdf, mediabox, 0, resources, contents) | |
| 3579 mupdf.pdf_insert_page( pdf, pno, page_obj) | |
| 3580 # fixme: pdf->dirty = 1; | |
| 3581 | |
| 3582 self._reset_page_refs() | |
| 3583 return self[pno] | |
| 3584 | |
| 3585 def _remove_links_to(self, numbers): | |
| 3586 pdf = _as_pdf_document(self) | |
| 3587 _remove_dest_range(pdf, numbers) | |
| 3588 | |
| 3589 def _remove_toc_item(self, xref): | |
| 3590 # "remove" bookmark by letting it point to nowhere | |
| 3591 pdf = _as_pdf_document(self) | |
| 3592 item = mupdf.pdf_new_indirect(pdf, xref, 0) | |
| 3593 mupdf.pdf_dict_del( item, PDF_NAME('Dest')) | |
| 3594 mupdf.pdf_dict_del( item, PDF_NAME('A')) | |
| 3595 color = mupdf.pdf_new_array( pdf, 3) | |
| 3596 for i in range(3): | |
| 3597 mupdf.pdf_array_push_real( color, 0.8) | |
| 3598 mupdf.pdf_dict_put( item, PDF_NAME('C'), color) | |
| 3599 | |
| 3600 def _reset_page_refs(self): | |
| 3601 """Invalidate all pages in document dictionary.""" | |
| 3602 if getattr(self, "is_closed", True): | |
| 3603 return | |
| 3604 pages = [p for p in self._page_refs.values()] | |
| 3605 for page in pages: | |
| 3606 if page: | |
| 3607 page._erase() | |
| 3608 page = None | |
| 3609 self._page_refs.clear() | |
| 3610 | |
| 3611 def _set_page_labels(self, labels): | |
| 3612 pdf = _as_pdf_document(self) | |
| 3613 pagelabels = mupdf.pdf_new_name("PageLabels") | |
| 3614 root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root')) | |
| 3615 mupdf.pdf_dict_del(root, pagelabels) | |
| 3616 mupdf.pdf_dict_putl(root, mupdf.pdf_new_array(pdf, 0), pagelabels, PDF_NAME('Nums')) | |
| 3617 | |
| 3618 xref = self.pdf_catalog() | |
| 3619 text = self.xref_object(xref, compressed=True) | |
| 3620 text = text.replace("/Nums[]", "/Nums[%s]" % labels) | |
| 3621 self.update_object(xref, text) | |
| 3622 | |
| 3623 def _update_toc_item(self, xref, action=None, title=None, flags=0, collapse=None, color=None): | |
| 3624 ''' | |
| 3625 "update" bookmark by letting it point to nowhere | |
| 3626 ''' | |
| 3627 pdf = _as_pdf_document(self) | |
| 3628 item = mupdf.pdf_new_indirect( pdf, xref, 0) | |
| 3629 if title: | |
| 3630 mupdf.pdf_dict_put_text_string( item, PDF_NAME('Title'), title) | |
| 3631 if action: | |
| 3632 mupdf.pdf_dict_del( item, PDF_NAME('Dest')) | |
| 3633 obj = JM_pdf_obj_from_str( pdf, action) | |
| 3634 mupdf.pdf_dict_put( item, PDF_NAME('A'), obj) | |
| 3635 mupdf.pdf_dict_put_int( item, PDF_NAME('F'), flags) | |
| 3636 if color: | |
| 3637 c = mupdf.pdf_new_array( pdf, 3) | |
| 3638 for i in range(3): | |
| 3639 f = color[i] | |
| 3640 mupdf.pdf_array_push_real( c, f) | |
| 3641 mupdf.pdf_dict_put( item, PDF_NAME('C'), c) | |
| 3642 elif color is not None: | |
| 3643 mupdf.pdf_dict_del( item, PDF_NAME('C')) | |
| 3644 if collapse is not None: | |
| 3645 if mupdf.pdf_dict_get( item, PDF_NAME('Count')).m_internal: | |
| 3646 i = mupdf.pdf_dict_get_int( item, PDF_NAME('Count')) | |
| 3647 if (i < 0 and collapse is False) or (i > 0 and collapse is True): | |
| 3648 i = i * (-1) | |
| 3649 mupdf.pdf_dict_put_int( item, PDF_NAME('Count'), i) | |
| 3650 | |
| 3651 @property | |
| 3652 def FormFonts(self): | |
| 3653 """Get list of field font resource names.""" | |
| 3654 pdf = _as_pdf_document(self, required=0) | |
| 3655 if not pdf.m_internal: | |
| 3656 return | |
| 3657 fonts = mupdf.pdf_dict_getl( | |
| 3658 mupdf.pdf_trailer(pdf), | |
| 3659 PDF_NAME('Root'), | |
| 3660 PDF_NAME('AcroForm'), | |
| 3661 PDF_NAME('DR'), | |
| 3662 PDF_NAME('Font'), | |
| 3663 ) | |
| 3664 liste = list() | |
| 3665 if fonts.m_internal and mupdf.pdf_is_dict(fonts): # fonts exist | |
| 3666 n = mupdf.pdf_dict_len(fonts) | |
| 3667 for i in range(n): | |
| 3668 f = mupdf.pdf_dict_get_key(fonts, i) | |
| 3669 liste.append(JM_UnicodeFromStr(mupdf.pdf_to_name(f))) | |
| 3670 return liste | |
| 3671 | |
| 3672 def add_layer(self, name, creator=None, on=None): | |
| 3673 """Add a new OC layer.""" | |
| 3674 pdf = _as_pdf_document(self) | |
| 3675 JM_add_layer_config( pdf, name, creator, on) | |
| 3676 mupdf.ll_pdf_read_ocg( pdf.m_internal) | |
| 3677 | |
| 3678 def add_ocg(self, name, config=-1, on=1, intent=None, usage=None): | |
| 3679 """Add new optional content group.""" | |
| 3680 xref = 0 | |
| 3681 pdf = _as_pdf_document(self) | |
| 3682 | |
| 3683 # make the OCG | |
| 3684 ocg = mupdf.pdf_add_new_dict(pdf, 3) | |
| 3685 mupdf.pdf_dict_put(ocg, PDF_NAME('Type'), PDF_NAME('OCG')) | |
| 3686 mupdf.pdf_dict_put_text_string(ocg, PDF_NAME('Name'), name) | |
| 3687 intents = mupdf.pdf_dict_put_array(ocg, PDF_NAME('Intent'), 2) | |
| 3688 if not intent: | |
| 3689 mupdf.pdf_array_push(intents, PDF_NAME('View')) | |
| 3690 elif not isinstance(intent, str): | |
| 3691 assert 0, f'fixme: intent is not a str. {type(intent)=} {type=}' | |
| 3692 #n = len(intent) | |
| 3693 #for i in range(n): | |
| 3694 # item = intent[i] | |
| 3695 # c = JM_StrAsChar(item); | |
| 3696 # if (c) { | |
| 3697 # pdf_array_push(gctx, intents, pdf_new_name(gctx, c)); | |
| 3698 # } | |
| 3699 # Py_DECREF(item); | |
| 3700 #} | |
| 3701 else: | |
| 3702 mupdf.pdf_array_push(intents, mupdf.pdf_new_name(intent)) | |
| 3703 use_for = mupdf.pdf_dict_put_dict(ocg, PDF_NAME('Usage'), 3) | |
| 3704 ci_name = mupdf.pdf_new_name("CreatorInfo") | |
| 3705 cre_info = mupdf.pdf_dict_put_dict(use_for, ci_name, 2) | |
| 3706 mupdf.pdf_dict_put_text_string(cre_info, PDF_NAME('Creator'), "PyMuPDF") | |
| 3707 if usage: | |
| 3708 mupdf.pdf_dict_put_name(cre_info, PDF_NAME('Subtype'), usage) | |
| 3709 else: | |
| 3710 mupdf.pdf_dict_put_name(cre_info, PDF_NAME('Subtype'), "Artwork") | |
| 3711 indocg = mupdf.pdf_add_object(pdf, ocg) | |
| 3712 | |
| 3713 # Insert OCG in the right config | |
| 3714 ocp = JM_ensure_ocproperties(pdf) | |
| 3715 obj = mupdf.pdf_dict_get(ocp, PDF_NAME('OCGs')) | |
| 3716 mupdf.pdf_array_push(obj, indocg) | |
| 3717 | |
| 3718 if config > -1: | |
| 3719 obj = mupdf.pdf_dict_get(ocp, PDF_NAME('Configs')) | |
| 3720 if not mupdf.pdf_is_array(obj): | |
| 3721 raise ValueError( MSG_BAD_OC_CONFIG) | |
| 3722 cfg = mupdf.pdf_array_get(obj, config) | |
| 3723 if not cfg.m_internal: | |
| 3724 raise ValueError( MSG_BAD_OC_CONFIG) | |
| 3725 else: | |
| 3726 cfg = mupdf.pdf_dict_get(ocp, PDF_NAME('D')) | |
| 3727 | |
| 3728 obj = mupdf.pdf_dict_get(cfg, PDF_NAME('Order')) | |
| 3729 if not obj.m_internal: | |
| 3730 obj = mupdf.pdf_dict_put_array(cfg, PDF_NAME('Order'), 1) | |
| 3731 mupdf.pdf_array_push(obj, indocg) | |
| 3732 if on: | |
| 3733 obj = mupdf.pdf_dict_get(cfg, PDF_NAME('ON')) | |
| 3734 if not obj.m_internal: | |
| 3735 obj = mupdf.pdf_dict_put_array(cfg, PDF_NAME('ON'), 1) | |
| 3736 else: | |
| 3737 obj =mupdf.pdf_dict_get(cfg, PDF_NAME('OFF')) | |
| 3738 if not obj.m_internal: | |
| 3739 obj =mupdf.pdf_dict_put_array(cfg, PDF_NAME('OFF'), 1) | |
| 3740 mupdf.pdf_array_push(obj, indocg) | |
| 3741 | |
| 3742 # let MuPDF take note: re-read OCProperties | |
| 3743 mupdf.ll_pdf_read_ocg(pdf.m_internal) | |
| 3744 | |
| 3745 xref = mupdf.pdf_to_num(indocg) | |
| 3746 return xref | |
| 3747 | |
| 3748 def authenticate(self, password): | |
| 3749 """Decrypt document.""" | |
| 3750 if self.is_closed: | |
| 3751 raise ValueError("document closed") | |
| 3752 val = mupdf.fz_authenticate_password(self.this, password) | |
| 3753 if val: # the doc is decrypted successfully and we init the outline | |
| 3754 self.is_encrypted = False | |
| 3755 self.is_encrypted = False | |
| 3756 self.init_doc() | |
| 3757 self.thisown = True | |
| 3758 return val | |
| 3759 | |
| 3760 def can_save_incrementally(self): | |
| 3761 """Check whether incremental saves are possible.""" | |
| 3762 pdf = _as_pdf_document(self, required=0) | |
| 3763 if not pdf.m_internal: | |
| 3764 return False | |
| 3765 return mupdf.pdf_can_be_saved_incrementally(pdf) | |
| 3766 | |
| 3767 def bake(self, *, annots: bool = True, widgets: bool = True) -> None: | |
| 3768 """Convert annotations or fields to permanent content. | |
| 3769 | |
| 3770 Notes: | |
| 3771 Converts annotations or widgets to permanent page content, like | |
| 3772 text and vector graphics, as appropriate. | |
| 3773 After execution, pages will still look the same, but no longer | |
| 3774 have annotations, respectively no fields. | |
| 3775 If widgets are selected the PDF will no longer be a Form PDF. | |
| 3776 | |
| 3777 Args: | |
| 3778 annots: convert annotations | |
| 3779 widgets: convert form fields | |
| 3780 | |
| 3781 """ | |
| 3782 pdf = _as_pdf_document(self) | |
| 3783 mupdf.pdf_bake_document(pdf, int(annots), int(widgets)) | |
| 3784 | |
| 3785 @property | |
| 3786 def chapter_count(self): | |
| 3787 """Number of chapters.""" | |
| 3788 if self.is_closed: | |
| 3789 raise ValueError("document closed") | |
| 3790 return mupdf.fz_count_chapters( self.this) | |
| 3791 | |
| 3792 def chapter_page_count(self, chapter): | |
| 3793 """Page count of chapter.""" | |
| 3794 if self.is_closed: | |
| 3795 raise ValueError("document closed") | |
| 3796 chapters = mupdf.fz_count_chapters( self.this) | |
| 3797 if chapter < 0 or chapter >= chapters: | |
| 3798 raise ValueError( "bad chapter number") | |
| 3799 pages = mupdf.fz_count_chapter_pages( self.this, chapter) | |
| 3800 return pages | |
| 3801 | |
| 3802 def close(self): | |
| 3803 """Close document.""" | |
| 3804 if getattr(self, "is_closed", True): | |
| 3805 raise ValueError("document closed") | |
| 3806 # self._cleanup() | |
| 3807 if hasattr(self, "_outline") and self._outline: | |
| 3808 self._outline = None | |
| 3809 self._reset_page_refs() | |
| 3810 #self.metadata = None | |
| 3811 #self.stream = None | |
| 3812 self.is_closed = True | |
| 3813 #self.FontInfos = [] | |
| 3814 self.Graftmaps = {} # Fixes test_3140(). | |
| 3815 #self.ShownPages = {} | |
| 3816 #self.InsertedImages = {} | |
| 3817 #self.this = None | |
| 3818 self.this = None | |
| 3819 | |
| 3820 def convert_to_pdf(self, from_page=0, to_page=-1, rotate=0): | |
| 3821 """Convert document to a PDF, selecting page range and optional rotation. Output bytes object.""" | |
| 3822 if self.is_closed or self.is_encrypted: | |
| 3823 raise ValueError("document closed or encrypted") | |
| 3824 fz_doc = self.this | |
| 3825 fp = from_page | |
| 3826 tp = to_page | |
| 3827 srcCount = mupdf.fz_count_pages(fz_doc) | |
| 3828 if fp < 0: | |
| 3829 fp = 0 | |
| 3830 if fp > srcCount - 1: | |
| 3831 fp = srcCount - 1 | |
| 3832 if tp < 0: | |
| 3833 tp = srcCount - 1 | |
| 3834 if tp > srcCount - 1: | |
| 3835 tp = srcCount - 1 | |
| 3836 len0 = len(JM_mupdf_warnings_store) | |
| 3837 doc = JM_convert_to_pdf(fz_doc, fp, tp, rotate) | |
| 3838 len1 = len(JM_mupdf_warnings_store) | |
| 3839 for i in range(len0, len1): | |
| 3840 message(f'{JM_mupdf_warnings_store[i]}') | |
| 3841 return doc | |
| 3842 | |
| 3843 def copy_page(self, pno: int, to: int =-1): | |
| 3844 """Copy a page within a PDF document. | |
| 3845 | |
| 3846 This will only create another reference of the same page object. | |
| 3847 Args: | |
| 3848 pno: source page number | |
| 3849 to: put before this page, '-1' means after last page. | |
| 3850 """ | |
| 3851 if self.is_closed: | |
| 3852 raise ValueError("document closed") | |
| 3853 | |
| 3854 page_count = len(self) | |
| 3855 if ( | |
| 3856 pno not in range(page_count) | |
| 3857 or to not in range(-1, page_count) | |
| 3858 ): | |
| 3859 raise ValueError("bad page number(s)") | |
| 3860 before = 1 | |
| 3861 copy = 1 | |
| 3862 if to == -1: | |
| 3863 to = page_count - 1 | |
| 3864 before = 0 | |
| 3865 | |
| 3866 return self._move_copy_page(pno, to, before, copy) | |
| 3867 | |
| 3868 def del_xml_metadata(self): | |
| 3869 """Delete XML metadata.""" | |
| 3870 if self.is_closed or self.is_encrypted: | |
| 3871 raise ValueError("document closed or encrypted") | |
| 3872 pdf = _as_pdf_document(self) | |
| 3873 root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root')) | |
| 3874 if root.m_internal: | |
| 3875 mupdf.pdf_dict_del( root, PDF_NAME('Metadata')) | |
| 3876 | |
| 3877 def delete_page(self, pno: int =-1): | |
| 3878 """ Delete one page from a PDF. | |
| 3879 """ | |
| 3880 return self.delete_pages(pno) | |
| 3881 | |
| 3882 def delete_pages(self, *args, **kw): | |
| 3883 """Delete pages from a PDF. | |
| 3884 | |
| 3885 Args: | |
| 3886 Either keywords 'from_page'/'to_page', or two integers to | |
| 3887 specify the first/last page to delete. | |
| 3888 Or a list/tuple/range object, which can contain arbitrary | |
| 3889 page numbers. | |
| 3890 Or a single integer page number. | |
| 3891 """ | |
| 3892 if not self.is_pdf: | |
| 3893 raise ValueError("is no PDF") | |
| 3894 if self.is_closed: | |
| 3895 raise ValueError("document closed") | |
| 3896 | |
| 3897 page_count = self.page_count # page count of document | |
| 3898 f = t = -1 | |
| 3899 if kw: # check if keywords were used | |
| 3900 if args: # then no positional args are allowed | |
| 3901 raise ValueError("cannot mix keyword and positional argument") | |
| 3902 f = kw.get("from_page", -1) # first page to delete | |
| 3903 t = kw.get("to_page", -1) # last page to delete | |
| 3904 while f < 0: | |
| 3905 f += page_count | |
| 3906 while t < 0: | |
| 3907 t += page_count | |
| 3908 if not f <= t < page_count: | |
| 3909 raise ValueError("bad page number(s)") | |
| 3910 numbers = tuple(range(f, t + 1)) | |
| 3911 else: | |
| 3912 if len(args) > 2 or args == []: | |
| 3913 raise ValueError("need 1 or 2 positional arguments") | |
| 3914 if len(args) == 2: | |
| 3915 f, t = args | |
| 3916 if not (type(f) is int and type(t) is int): | |
| 3917 raise ValueError("both arguments must be int") | |
| 3918 if f > t: | |
| 3919 f, t = t, f | |
| 3920 if not f <= t < page_count: | |
| 3921 raise ValueError("bad page number(s)") | |
| 3922 numbers = tuple(range(f, t + 1)) | |
| 3923 elif isinstance(args[0], int): | |
| 3924 pno = args[0] | |
| 3925 while pno < 0: | |
| 3926 pno += page_count | |
| 3927 numbers = (pno,) | |
| 3928 else: | |
| 3929 numbers = tuple(args[0]) | |
| 3930 | |
| 3931 numbers = list(map(int, set(numbers))) # ensure unique integers | |
| 3932 if numbers == []: | |
| 3933 message("nothing to delete") | |
| 3934 return | |
| 3935 numbers.sort() | |
| 3936 if numbers[0] < 0 or numbers[-1] >= page_count: | |
| 3937 raise ValueError("bad page number(s)") | |
| 3938 frozen_numbers = frozenset(numbers) | |
| 3939 toc = self.get_toc() | |
| 3940 for i, xref in enumerate(self.get_outline_xrefs()): | |
| 3941 if toc[i][2] - 1 in frozen_numbers: | |
| 3942 self._remove_toc_item(xref) # remove target in PDF object | |
| 3943 | |
| 3944 self._remove_links_to(frozen_numbers) | |
| 3945 | |
| 3946 for i in reversed(numbers): # delete pages, last to first | |
| 3947 self._delete_page(i) | |
| 3948 | |
| 3949 self._reset_page_refs() | |
| 3950 | |
| 3951 def embfile_add(self, | |
| 3952 name: str, | |
| 3953 buffer_: ByteString, | |
| 3954 filename: OptStr =None, | |
| 3955 ufilename: OptStr =None, | |
| 3956 desc: OptStr =None, | |
| 3957 ) -> None: | |
| 3958 """Add an item to the EmbeddedFiles array. | |
| 3959 | |
| 3960 Args: | |
| 3961 name: name of the new item, must not already exist. | |
| 3962 buffer_: (binary data) the file content. | |
| 3963 filename: (str) the file name, default: the name | |
| 3964 ufilename: (unicode) the file name, default: filename | |
| 3965 desc: (str) the description. | |
| 3966 """ | |
| 3967 filenames = self.embfile_names() | |
| 3968 msg = "Name '%s' already exists." % str(name) | |
| 3969 if name in filenames: | |
| 3970 raise ValueError(msg) | |
| 3971 | |
| 3972 if filename is None: | |
| 3973 filename = name | |
| 3974 if ufilename is None: | |
| 3975 ufilename = filename | |
| 3976 if desc is None: | |
| 3977 desc = name | |
| 3978 xref = self._embfile_add( | |
| 3979 name, | |
| 3980 buffer_=buffer_, | |
| 3981 filename=filename, | |
| 3982 ufilename=ufilename, | |
| 3983 desc=desc, | |
| 3984 ) | |
| 3985 date = get_pdf_now() | |
| 3986 self.xref_set_key(xref, "Type", "/EmbeddedFile") | |
| 3987 self.xref_set_key(xref, "Params/CreationDate", get_pdf_str(date)) | |
| 3988 self.xref_set_key(xref, "Params/ModDate", get_pdf_str(date)) | |
| 3989 return xref | |
| 3990 | |
| 3991 def embfile_count(self) -> int: | |
| 3992 """Get number of EmbeddedFiles.""" | |
| 3993 return len(self.embfile_names()) | |
| 3994 | |
| 3995 def embfile_del(self, item: typing.Union[int, str]): | |
| 3996 """Delete an entry from EmbeddedFiles. | |
| 3997 | |
| 3998 Notes: | |
| 3999 The argument must be name or index of an EmbeddedFiles item. | |
| 4000 Physical deletion of data will happen on save to a new | |
| 4001 file with appropriate garbage option. | |
| 4002 Args: | |
| 4003 item: name or number of item. | |
| 4004 Returns: | |
| 4005 None | |
| 4006 """ | |
| 4007 idx = self._embeddedFileIndex(item) | |
| 4008 return self._embfile_del(idx) | |
| 4009 | |
| 4010 def embfile_get(self, item: typing.Union[int, str]) -> bytes: | |
| 4011 """Get the content of an item in the EmbeddedFiles array. | |
| 4012 | |
| 4013 Args: | |
| 4014 item: number or name of item. | |
| 4015 Returns: | |
| 4016 (bytes) The file content. | |
| 4017 """ | |
| 4018 idx = self._embeddedFileIndex(item) | |
| 4019 return self._embeddedFileGet(idx) | |
| 4020 | |
| 4021 def embfile_info(self, item: typing.Union[int, str]) -> dict: | |
| 4022 """Get information of an item in the EmbeddedFiles array. | |
| 4023 | |
| 4024 Args: | |
| 4025 item: number or name of item. | |
| 4026 Returns: | |
| 4027 Information dictionary. | |
| 4028 """ | |
| 4029 idx = self._embeddedFileIndex(item) | |
| 4030 infodict = {"name": self.embfile_names()[idx]} | |
| 4031 xref = self._embfile_info(idx, infodict) | |
| 4032 t, date = self.xref_get_key(xref, "Params/CreationDate") | |
| 4033 if t != "null": | |
| 4034 infodict["creationDate"] = date | |
| 4035 t, date = self.xref_get_key(xref, "Params/ModDate") | |
| 4036 if t != "null": | |
| 4037 infodict["modDate"] = date | |
| 4038 t, md5 = self.xref_get_key(xref, "Params/CheckSum") | |
| 4039 if t != "null": | |
| 4040 infodict["checksum"] = binascii.hexlify(md5.encode()).decode() | |
| 4041 return infodict | |
| 4042 | |
| 4043 def embfile_names(self) -> list: | |
| 4044 """Get list of names of EmbeddedFiles.""" | |
| 4045 filenames = [] | |
| 4046 self._embfile_names(filenames) | |
| 4047 return filenames | |
| 4048 | |
| 4049 def embfile_upd(self, | |
| 4050 item: typing.Union[int, str], | |
| 4051 buffer_: OptBytes =None, | |
| 4052 filename: OptStr =None, | |
| 4053 ufilename: OptStr =None, | |
| 4054 desc: OptStr =None, | |
| 4055 ) -> None: | |
| 4056 """Change an item of the EmbeddedFiles array. | |
| 4057 | |
| 4058 Notes: | |
| 4059 Only provided parameters are changed. If all are omitted, | |
| 4060 the method is a no-op. | |
| 4061 Args: | |
| 4062 item: number or name of item. | |
| 4063 buffer_: (binary data) the new file content. | |
| 4064 filename: (str) the new file name. | |
| 4065 ufilename: (unicode) the new filen ame. | |
| 4066 desc: (str) the new description. | |
| 4067 """ | |
| 4068 idx = self._embeddedFileIndex(item) | |
| 4069 xref = self._embfile_upd( | |
| 4070 idx, | |
| 4071 buffer_=buffer_, | |
| 4072 filename=filename, | |
| 4073 ufilename=ufilename, | |
| 4074 desc=desc, | |
| 4075 ) | |
| 4076 date = get_pdf_now() | |
| 4077 self.xref_set_key(xref, "Params/ModDate", get_pdf_str(date)) | |
| 4078 return xref | |
| 4079 | |
| 4080 def extract_font(self, xref=0, info_only=0, named=None): | |
| 4081 ''' | |
| 4082 Get a font by xref. Returns a tuple or dictionary. | |
| 4083 ''' | |
| 4084 #log( '{=xref info_only}') | |
| 4085 pdf = _as_pdf_document(self) | |
| 4086 obj = mupdf.pdf_load_object(pdf, xref) | |
| 4087 type_ = mupdf.pdf_dict_get(obj, PDF_NAME('Type')) | |
| 4088 subtype = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype')) | |
| 4089 if (mupdf.pdf_name_eq(type_, PDF_NAME('Font')) | |
| 4090 and not mupdf.pdf_to_name( subtype).startswith('CIDFontType') | |
| 4091 ): | |
| 4092 basefont = mupdf.pdf_dict_get(obj, PDF_NAME('BaseFont')) | |
| 4093 if not basefont.m_internal or mupdf.pdf_is_null(basefont): | |
| 4094 bname = mupdf.pdf_dict_get(obj, PDF_NAME('Name')) | |
| 4095 else: | |
| 4096 bname = basefont | |
| 4097 ext = JM_get_fontextension(pdf, xref) | |
| 4098 if ext != 'n/a' and not info_only: | |
| 4099 buffer_ = JM_get_fontbuffer(pdf, xref) | |
| 4100 bytes_ = JM_BinFromBuffer(buffer_) | |
| 4101 else: | |
| 4102 bytes_ = b'' | |
| 4103 if not named: | |
| 4104 rc = ( | |
| 4105 JM_EscapeStrFromStr(mupdf.pdf_to_name(bname)), | |
| 4106 JM_UnicodeFromStr(ext), | |
| 4107 JM_UnicodeFromStr(mupdf.pdf_to_name(subtype)), | |
| 4108 bytes_, | |
| 4109 ) | |
| 4110 else: | |
| 4111 rc = { | |
| 4112 dictkey_name: JM_EscapeStrFromStr(mupdf.pdf_to_name(bname)), | |
| 4113 dictkey_ext: JM_UnicodeFromStr(ext), | |
| 4114 dictkey_type: JM_UnicodeFromStr(mupdf.pdf_to_name(subtype)), | |
| 4115 dictkey_content: bytes_, | |
| 4116 } | |
| 4117 else: | |
| 4118 if not named: | |
| 4119 rc = '', '', '', b'' | |
| 4120 else: | |
| 4121 rc = { | |
| 4122 dictkey_name: '', | |
| 4123 dictkey_ext: '', | |
| 4124 dictkey_type: '', | |
| 4125 dictkey_content: b'', | |
| 4126 } | |
| 4127 return rc | |
| 4128 | |
| 4129 def extract_image(self, xref): | |
| 4130 """Get image by xref. Returns a dictionary.""" | |
| 4131 if self.is_closed or self.is_encrypted: | |
| 4132 raise ValueError("document closed or encrypted") | |
| 4133 | |
| 4134 pdf = _as_pdf_document(self) | |
| 4135 | |
| 4136 if not _INRANGE(xref, 1, mupdf.pdf_xref_len(pdf)-1): | |
| 4137 raise ValueError( MSG_BAD_XREF) | |
| 4138 | |
| 4139 obj = mupdf.pdf_new_indirect(pdf, xref, 0) | |
| 4140 subtype = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype')) | |
| 4141 | |
| 4142 if not mupdf.pdf_name_eq(subtype, PDF_NAME('Image')): | |
| 4143 raise ValueError( "not an image") | |
| 4144 | |
| 4145 o = mupdf.pdf_dict_geta(obj, PDF_NAME('SMask'), PDF_NAME('Mask')) | |
| 4146 if o.m_internal: | |
| 4147 smask = mupdf.pdf_to_num(o) | |
| 4148 else: | |
| 4149 smask = 0 | |
| 4150 | |
| 4151 # load the image | |
| 4152 img = mupdf.pdf_load_image(pdf, obj) | |
| 4153 rc = dict() | |
| 4154 _make_image_dict(img, rc) | |
| 4155 rc[dictkey_smask] = smask | |
| 4156 rc[dictkey_cs_name] = mupdf.fz_colorspace_name(img.colorspace()) | |
| 4157 return rc | |
| 4158 | |
| 4159 def ez_save( | |
| 4160 self, | |
| 4161 filename, | |
| 4162 garbage=3, | |
| 4163 clean=False, | |
| 4164 deflate=True, | |
| 4165 deflate_images=True, | |
| 4166 deflate_fonts=True, | |
| 4167 incremental=False, | |
| 4168 ascii=False, | |
| 4169 expand=False, | |
| 4170 linear=False, | |
| 4171 pretty=False, | |
| 4172 encryption=1, | |
| 4173 permissions=4095, | |
| 4174 owner_pw=None, | |
| 4175 user_pw=None, | |
| 4176 no_new_id=True, | |
| 4177 preserve_metadata=1, | |
| 4178 use_objstms=1, | |
| 4179 compression_effort=0, | |
| 4180 ): | |
| 4181 ''' | |
| 4182 Save PDF using some different defaults | |
| 4183 ''' | |
| 4184 return self.save( | |
| 4185 filename, | |
| 4186 garbage=garbage, | |
| 4187 clean=clean, | |
| 4188 deflate=deflate, | |
| 4189 deflate_images=deflate_images, | |
| 4190 deflate_fonts=deflate_fonts, | |
| 4191 incremental=incremental, | |
| 4192 ascii=ascii, | |
| 4193 expand=expand, | |
| 4194 linear=linear, | |
| 4195 pretty=pretty, | |
| 4196 encryption=encryption, | |
| 4197 permissions=permissions, | |
| 4198 owner_pw=owner_pw, | |
| 4199 user_pw=user_pw, | |
| 4200 no_new_id=no_new_id, | |
| 4201 preserve_metadata=preserve_metadata, | |
| 4202 use_objstms=use_objstms, | |
| 4203 compression_effort=compression_effort, | |
| 4204 ) | |
| 4205 | |
| 4206 def find_bookmark(self, bm): | |
| 4207 """Find new location after layouting a document.""" | |
| 4208 if self.is_closed or self.is_encrypted: | |
| 4209 raise ValueError("document closed or encrypted") | |
| 4210 location = mupdf.fz_lookup_bookmark2( self.this, bm) | |
| 4211 return location.chapter, location.page | |
| 4212 | |
| 4213 def fullcopy_page(self, pno, to=-1): | |
| 4214 """Make a full page duplicate.""" | |
| 4215 pdf = _as_pdf_document(self) | |
| 4216 page_count = mupdf.pdf_count_pages( pdf) | |
| 4217 try: | |
| 4218 if (not _INRANGE(pno, 0, page_count - 1) | |
| 4219 or not _INRANGE(to, -1, page_count - 1) | |
| 4220 ): | |
| 4221 raise ValueError( MSG_BAD_PAGENO) | |
| 4222 | |
| 4223 page1 = mupdf.pdf_resolve_indirect( mupdf.pdf_lookup_page_obj( pdf, pno)) | |
| 4224 | |
| 4225 page2 = mupdf.pdf_deep_copy_obj( page1) | |
| 4226 old_annots = mupdf.pdf_dict_get( page2, PDF_NAME('Annots')) | |
| 4227 | |
| 4228 # copy annotations, but remove Popup and IRT types | |
| 4229 if old_annots.m_internal: | |
| 4230 n = mupdf.pdf_array_len( old_annots) | |
| 4231 new_annots = mupdf.pdf_new_array( pdf, n) | |
| 4232 for i in range(n): | |
| 4233 o = mupdf.pdf_array_get( old_annots, i) | |
| 4234 subtype = mupdf.pdf_dict_get( o, PDF_NAME('Subtype')) | |
| 4235 if mupdf.pdf_name_eq( subtype, PDF_NAME('Popup')): | |
| 4236 continue | |
| 4237 if mupdf.pdf_dict_gets( o, "IRT").m_internal: | |
| 4238 continue | |
| 4239 copy_o = mupdf.pdf_deep_copy_obj( mupdf.pdf_resolve_indirect( o)) | |
| 4240 xref = mupdf.pdf_create_object( pdf) | |
| 4241 mupdf.pdf_update_object( pdf, xref, copy_o) | |
| 4242 copy_o = mupdf.pdf_new_indirect( pdf, xref, 0) | |
| 4243 mupdf.pdf_dict_del( copy_o, PDF_NAME('Popup')) | |
| 4244 mupdf.pdf_dict_del( copy_o, PDF_NAME('P')) | |
| 4245 mupdf.pdf_array_push( new_annots, copy_o) | |
| 4246 mupdf.pdf_dict_put( page2, PDF_NAME('Annots'), new_annots) | |
| 4247 | |
| 4248 # copy the old contents stream(s) | |
| 4249 res = JM_read_contents( page1) | |
| 4250 | |
| 4251 # create new /Contents object for page2 | |
| 4252 if res and res.m_internal: | |
| 4253 #contents = mupdf.pdf_add_stream( pdf, mupdf.fz_new_buffer_from_copied_data( b" ", 1), NULL, 0) | |
| 4254 contents = mupdf.pdf_add_stream( pdf, mupdf.fz_new_buffer_from_copied_data( b" "), mupdf.PdfObj(), 0) | |
| 4255 JM_update_stream( pdf, contents, res, 1) | |
| 4256 mupdf.pdf_dict_put( page2, PDF_NAME('Contents'), contents) | |
| 4257 | |
| 4258 # now insert target page, making sure it is an indirect object | |
| 4259 xref = mupdf.pdf_create_object( pdf) # get new xref | |
| 4260 mupdf.pdf_update_object( pdf, xref, page2) # store new page | |
| 4261 | |
| 4262 page2 = mupdf.pdf_new_indirect( pdf, xref, 0) # reread object | |
| 4263 mupdf.pdf_insert_page( pdf, to, page2) # and store the page | |
| 4264 finally: | |
| 4265 mupdf.ll_pdf_drop_page_tree( pdf.m_internal) | |
| 4266 | |
| 4267 self._reset_page_refs() | |
| 4268 | |
| 4269 def get_layer(self, config=-1): | |
| 4270 """Content of ON, OFF, RBGroups of an OC layer.""" | |
| 4271 pdf = _as_pdf_document(self) | |
| 4272 ocp = mupdf.pdf_dict_getl( | |
| 4273 mupdf.pdf_trailer( pdf), | |
| 4274 PDF_NAME('Root'), | |
| 4275 PDF_NAME('OCProperties'), | |
| 4276 ) | |
| 4277 if not ocp.m_internal: | |
| 4278 return | |
| 4279 if config == -1: | |
| 4280 obj = mupdf.pdf_dict_get( ocp, PDF_NAME('D')) | |
| 4281 else: | |
| 4282 obj = mupdf.pdf_array_get( | |
| 4283 mupdf.pdf_dict_get( ocp, PDF_NAME('Configs')), | |
| 4284 config, | |
| 4285 ) | |
| 4286 if not obj.m_internal: | |
| 4287 raise ValueError( MSG_BAD_OC_CONFIG) | |
| 4288 rc = JM_get_ocg_arrays( obj) | |
| 4289 return rc | |
| 4290 | |
| 4291 def get_layers(self): | |
| 4292 """Show optional OC layers.""" | |
| 4293 pdf = _as_pdf_document(self) | |
| 4294 n = mupdf.pdf_count_layer_configs( pdf) | |
| 4295 if n == 1: | |
| 4296 obj = mupdf.pdf_dict_getl( | |
| 4297 mupdf.pdf_trailer( pdf), | |
| 4298 PDF_NAME('Root'), | |
| 4299 PDF_NAME('OCProperties'), | |
| 4300 PDF_NAME('Configs'), | |
| 4301 ) | |
| 4302 if not mupdf.pdf_is_array( obj): | |
| 4303 n = 0 | |
| 4304 rc = [] | |
| 4305 info = mupdf.PdfLayerConfig() | |
| 4306 for i in range(n): | |
| 4307 mupdf.pdf_layer_config_info( pdf, i, info) | |
| 4308 item = { | |
| 4309 "number": i, | |
| 4310 "name": info.name, | |
| 4311 "creator": info.creator, | |
| 4312 } | |
| 4313 rc.append( item) | |
| 4314 return rc | |
| 4315 | |
| 4316 def get_new_xref(self): | |
| 4317 """Make new xref.""" | |
| 4318 if self.is_closed or self.is_encrypted: | |
| 4319 raise ValueError("document closed or encrypted") | |
| 4320 pdf = _as_pdf_document(self) | |
| 4321 xref = 0 | |
| 4322 ENSURE_OPERATION(pdf) | |
| 4323 xref = mupdf.pdf_create_object(pdf) | |
| 4324 return xref | |
| 4325 | |
| 4326 def get_ocgs(self): | |
| 4327 """Show existing optional content groups.""" | |
| 4328 ci = mupdf.pdf_new_name( "CreatorInfo") | |
| 4329 pdf = _as_pdf_document(self) | |
| 4330 ocgs = mupdf.pdf_dict_getl( | |
| 4331 mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root')), | |
| 4332 PDF_NAME('OCProperties'), | |
| 4333 PDF_NAME('OCGs'), | |
| 4334 ) | |
| 4335 rc = dict() | |
| 4336 if not mupdf.pdf_is_array( ocgs): | |
| 4337 return rc | |
| 4338 n = mupdf.pdf_array_len( ocgs) | |
| 4339 for i in range(n): | |
| 4340 ocg = mupdf.pdf_array_get( ocgs, i) | |
| 4341 xref = mupdf.pdf_to_num( ocg) | |
| 4342 name = mupdf.pdf_to_text_string( mupdf.pdf_dict_get( ocg, PDF_NAME('Name'))) | |
| 4343 obj = mupdf.pdf_dict_getl( ocg, PDF_NAME('Usage'), ci, PDF_NAME('Subtype')) | |
| 4344 usage = None | |
| 4345 if obj.m_internal: | |
| 4346 usage = mupdf.pdf_to_name( obj) | |
| 4347 intents = list() | |
| 4348 intent = mupdf.pdf_dict_get( ocg, PDF_NAME('Intent')) | |
| 4349 if intent.m_internal: | |
| 4350 if mupdf.pdf_is_name( intent): | |
| 4351 intents.append( mupdf.pdf_to_name( intent)) | |
| 4352 elif mupdf.pdf_is_array( intent): | |
| 4353 m = mupdf.pdf_array_len( intent) | |
| 4354 for j in range(m): | |
| 4355 o = mupdf.pdf_array_get( intent, j) | |
| 4356 if mupdf.pdf_is_name( o): | |
| 4357 intents.append( mupdf.pdf_to_name( o)) | |
| 4358 hidden = mupdf.pdf_is_ocg_hidden( pdf, mupdf.PdfObj(), usage, ocg) | |
| 4359 item = { | |
| 4360 "name": name, | |
| 4361 "intent": intents, | |
| 4362 "on": not hidden, | |
| 4363 "usage": usage, | |
| 4364 } | |
| 4365 temp = xref | |
| 4366 rc[ temp] = item | |
| 4367 return rc | |
| 4368 | |
| 4369 def get_outline_xrefs(self): | |
| 4370 """Get list of outline xref numbers.""" | |
| 4371 xrefs = [] | |
| 4372 pdf = _as_pdf_document(self, required=0) | |
| 4373 if not pdf.m_internal: | |
| 4374 return xrefs | |
| 4375 root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root')) | |
| 4376 if not root.m_internal: | |
| 4377 return xrefs | |
| 4378 olroot = mupdf.pdf_dict_get(root, PDF_NAME('Outlines')) | |
| 4379 if not olroot.m_internal: | |
| 4380 return xrefs | |
| 4381 first = mupdf.pdf_dict_get(olroot, PDF_NAME('First')) | |
| 4382 if not first.m_internal: | |
| 4383 return xrefs | |
| 4384 xrefs = JM_outline_xrefs(first, xrefs) | |
| 4385 return xrefs | |
| 4386 | |
| 4387 def get_page_fonts(self, pno: int, full: bool =False) -> list: | |
| 4388 """Retrieve a list of fonts used on a page. | |
| 4389 """ | |
| 4390 if self.is_closed or self.is_encrypted: | |
| 4391 raise ValueError("document closed or encrypted") | |
| 4392 if not self.is_pdf: | |
| 4393 return () | |
| 4394 if type(pno) is not int: | |
| 4395 try: | |
| 4396 pno = pno.number | |
| 4397 except Exception: | |
| 4398 exception_info() | |
| 4399 raise ValueError("need a Page or page number") | |
| 4400 val = self._getPageInfo(pno, 1) | |
| 4401 if not full: | |
| 4402 return [v[:-1] for v in val] | |
| 4403 return val | |
| 4404 | |
| 4405 def get_page_images(self, pno: int, full: bool =False) -> list: | |
| 4406 """Retrieve a list of images used on a page. | |
| 4407 """ | |
| 4408 if self.is_closed or self.is_encrypted: | |
| 4409 raise ValueError("document closed or encrypted") | |
| 4410 if not self.is_pdf: | |
| 4411 return () | |
| 4412 val = self._getPageInfo(pno, 2) | |
| 4413 if not full: | |
| 4414 return [v[:-1] for v in val] | |
| 4415 return val | |
| 4416 | |
| 4417 def get_page_xobjects(self, pno: int) -> list: | |
| 4418 """Retrieve a list of XObjects used on a page. | |
| 4419 """ | |
| 4420 if self.is_closed or self.is_encrypted: | |
| 4421 raise ValueError("document closed or encrypted") | |
| 4422 if not self.is_pdf: | |
| 4423 return () | |
| 4424 val = self._getPageInfo(pno, 3) | |
| 4425 return val | |
| 4426 | |
| 4427 def get_sigflags(self): | |
| 4428 """Get the /SigFlags value.""" | |
| 4429 pdf = _as_pdf_document(self, required=0) | |
| 4430 if not pdf.m_internal: | |
| 4431 return -1 # not a PDF | |
| 4432 sigflags = mupdf.pdf_dict_getl( | |
| 4433 mupdf.pdf_trailer(pdf), | |
| 4434 PDF_NAME('Root'), | |
| 4435 PDF_NAME('AcroForm'), | |
| 4436 PDF_NAME('SigFlags'), | |
| 4437 ) | |
| 4438 sigflag = -1 | |
| 4439 if sigflags.m_internal: | |
| 4440 sigflag = mupdf.pdf_to_int(sigflags) | |
| 4441 return sigflag | |
| 4442 | |
| 4443 def get_xml_metadata(self): | |
| 4444 """Get document XML metadata.""" | |
| 4445 xml = None | |
| 4446 pdf = _as_pdf_document(self, required=0) | |
| 4447 if pdf.m_internal: | |
| 4448 xml = mupdf.pdf_dict_getl( | |
| 4449 mupdf.pdf_trailer(pdf), | |
| 4450 PDF_NAME('Root'), | |
| 4451 PDF_NAME('Metadata'), | |
| 4452 ) | |
| 4453 if xml is not None and xml.m_internal: | |
| 4454 buff = mupdf.pdf_load_stream(xml) | |
| 4455 rc = JM_UnicodeFromBuffer(buff) | |
| 4456 else: | |
| 4457 rc = '' | |
| 4458 return rc | |
| 4459 | |
| 4460 def init_doc(self): | |
| 4461 if self.is_encrypted: | |
| 4462 raise ValueError("cannot initialize - document still encrypted") | |
| 4463 self._outline = self._loadOutline() | |
| 4464 self.metadata = dict( | |
| 4465 [ | |
| 4466 (k,self._getMetadata(v)) for k,v in { | |
| 4467 'format':'format', | |
| 4468 'title':'info:Title', | |
| 4469 'author':'info:Author', | |
| 4470 'subject':'info:Subject', | |
| 4471 'keywords':'info:Keywords', | |
| 4472 'creator':'info:Creator', | |
| 4473 'producer':'info:Producer', | |
| 4474 'creationDate':'info:CreationDate', | |
| 4475 'modDate':'info:ModDate', | |
| 4476 'trapped':'info:Trapped' | |
| 4477 }.items() | |
| 4478 ] | |
| 4479 ) | |
| 4480 self.metadata['encryption'] = None if self._getMetadata('encryption')=='None' else self._getMetadata('encryption') | |
| 4481 | |
| 4482 def insert_file(self, | |
| 4483 infile, | |
| 4484 from_page=-1, | |
| 4485 to_page=-1, | |
| 4486 start_at=-1, | |
| 4487 rotate=-1, | |
| 4488 links=True, | |
| 4489 annots=True, | |
| 4490 show_progress=0, | |
| 4491 final=1, | |
| 4492 ): | |
| 4493 ''' | |
| 4494 Insert an arbitrary supported document to an existing PDF. | |
| 4495 | |
| 4496 The infile may be given as a filename, a Document or a Pixmap. Other | |
| 4497 parameters - where applicable - equal those of insert_pdf(). | |
| 4498 ''' | |
| 4499 src = None | |
| 4500 if isinstance(infile, Pixmap): | |
| 4501 if infile.colorspace.n > 3: | |
| 4502 infile = Pixmap(csRGB, infile) | |
| 4503 src = Document("png", infile.tobytes()) | |
| 4504 elif isinstance(infile, Document): | |
| 4505 src = infile | |
| 4506 else: | |
| 4507 src = Document(infile) | |
| 4508 if not src: | |
| 4509 raise ValueError("bad infile parameter") | |
| 4510 if not src.is_pdf: | |
| 4511 pdfbytes = src.convert_to_pdf() | |
| 4512 src = Document("pdf", pdfbytes) | |
| 4513 return self.insert_pdf( | |
| 4514 src, | |
| 4515 from_page=from_page, | |
| 4516 to_page=to_page, | |
| 4517 start_at=start_at, | |
| 4518 rotate=rotate, | |
| 4519 links=links, | |
| 4520 annots=annots, | |
| 4521 show_progress=show_progress, | |
| 4522 final=final, | |
| 4523 ) | |
| 4524 | |
| 4525 def insert_pdf( | |
| 4526 self, | |
| 4527 docsrc, | |
| 4528 *, | |
| 4529 from_page=-1, | |
| 4530 to_page=-1, | |
| 4531 start_at=-1, | |
| 4532 rotate=-1, | |
| 4533 links=1, | |
| 4534 annots=1, | |
| 4535 widgets=1, | |
| 4536 join_duplicates=0, | |
| 4537 show_progress=0, | |
| 4538 final=1, | |
| 4539 _gmap=None, | |
| 4540 ): | |
| 4541 """Insert a page range from another PDF. | |
| 4542 | |
| 4543 Args: | |
| 4544 docsrc: PDF to copy from. Must be different object, but may be same file. | |
| 4545 from_page: (int) first source page to copy, 0-based, default 0. | |
| 4546 to_page: (int) last source page to copy, 0-based, default last page. | |
| 4547 start_at: (int) from_page will become this page number in target. | |
| 4548 rotate: (int) rotate copied pages, default -1 is no change. | |
| 4549 links: (int/bool) whether to also copy links. | |
| 4550 annots: (int/bool) whether to also copy annotations. | |
| 4551 widgets: (int/bool) whether to also copy form fields. | |
| 4552 join_duplicates: (int/bool) join or rename duplicate widget names. | |
| 4553 show_progress: (int) progress message interval, 0 is no messages. | |
| 4554 final: (bool) indicates last insertion from this source PDF. | |
| 4555 _gmap: internal use only | |
| 4556 | |
| 4557 Copy sequence reversed if from_page > to_page.""" | |
| 4558 | |
| 4559 # Insert pages from a source PDF into this PDF. | |
| 4560 # For reconstructing the links (_do_links method), we must save the | |
| 4561 # insertion point (start_at) if it was specified as -1. | |
| 4562 #log( 'insert_pdf(): start') | |
| 4563 if self.is_closed or self.is_encrypted: | |
| 4564 raise ValueError("document closed or encrypted") | |
| 4565 if self._graft_id == docsrc._graft_id: | |
| 4566 raise ValueError("source and target cannot be same object") | |
| 4567 sa = start_at | |
| 4568 if sa < 0: | |
| 4569 sa = self.page_count | |
| 4570 outCount = self.page_count | |
| 4571 srcCount = docsrc.page_count | |
| 4572 | |
| 4573 # local copies of page numbers | |
| 4574 fp = from_page | |
| 4575 tp = to_page | |
| 4576 sa = start_at | |
| 4577 | |
| 4578 # normalize page numbers | |
| 4579 fp = max(fp, 0) # -1 = first page | |
| 4580 fp = min(fp, srcCount - 1) # but do not exceed last page | |
| 4581 | |
| 4582 if tp < 0: | |
| 4583 tp = srcCount - 1 # -1 = last page | |
| 4584 tp = min(tp, srcCount - 1) # but do not exceed last page | |
| 4585 | |
| 4586 if sa < 0: | |
| 4587 sa = outCount # -1 = behind last page | |
| 4588 sa = min(sa, outCount) # but that is also the limit | |
| 4589 | |
| 4590 if len(docsrc) > show_progress > 0: | |
| 4591 inname = os.path.basename(docsrc.name) | |
| 4592 if not inname: | |
| 4593 inname = "memory PDF" | |
| 4594 outname = os.path.basename(self.name) | |
| 4595 if not outname: | |
| 4596 outname = "memory PDF" | |
| 4597 message("Inserting '%s' at '%s'" % (inname, outname)) | |
| 4598 | |
| 4599 # retrieve / make a Graftmap to avoid duplicate objects | |
| 4600 #log( 'insert_pdf(): Graftmaps') | |
| 4601 isrt = docsrc._graft_id | |
| 4602 _gmap = self.Graftmaps.get(isrt, None) | |
| 4603 if _gmap is None: | |
| 4604 #log( 'insert_pdf(): Graftmaps2') | |
| 4605 _gmap = Graftmap(self) | |
| 4606 self.Graftmaps[isrt] = _gmap | |
| 4607 | |
| 4608 if g_use_extra: | |
| 4609 #log( 'insert_pdf(): calling extra_FzDocument_insert_pdf()') | |
| 4610 extra_FzDocument_insert_pdf( | |
| 4611 self.this, | |
| 4612 docsrc.this, | |
| 4613 from_page, | |
| 4614 to_page, | |
| 4615 start_at, | |
| 4616 rotate, | |
| 4617 links, | |
| 4618 annots, | |
| 4619 show_progress, | |
| 4620 final, | |
| 4621 _gmap, | |
| 4622 ) | |
| 4623 #log( 'insert_pdf(): extra_FzDocument_insert_pdf() returned.') | |
| 4624 else: | |
| 4625 pdfout = _as_pdf_document(self) | |
| 4626 pdfsrc = _as_pdf_document(docsrc) | |
| 4627 | |
| 4628 if not pdfout.m_internal or not pdfsrc.m_internal: | |
| 4629 raise TypeError( "source or target not a PDF") | |
| 4630 ENSURE_OPERATION(pdfout) | |
| 4631 JM_merge_range(pdfout, pdfsrc, fp, tp, sa, rotate, links, annots, show_progress, _gmap) | |
| 4632 | |
| 4633 #log( 'insert_pdf(): calling self._reset_page_refs()') | |
| 4634 self._reset_page_refs() | |
| 4635 if links: | |
| 4636 #log( 'insert_pdf(): calling self._do_links()') | |
| 4637 self._do_links(docsrc, from_page=fp, to_page=tp, start_at=sa) | |
| 4638 if widgets: | |
| 4639 self._do_widgets(docsrc, _gmap, from_page=fp, to_page=tp, start_at=sa, join_duplicates=join_duplicates) | |
| 4640 if final == 1: | |
| 4641 self.Graftmaps[isrt] = None | |
| 4642 #log( 'insert_pdf(): returning') | |
| 4643 | |
| 4644 @property | |
| 4645 def is_dirty(self): | |
| 4646 pdf = _as_pdf_document(self, required=0) | |
| 4647 if not pdf.m_internal: | |
| 4648 return False | |
| 4649 r = mupdf.pdf_has_unsaved_changes(pdf) | |
| 4650 return True if r else False | |
| 4651 | |
| 4652 @property | |
| 4653 def is_fast_webaccess(self): | |
| 4654 ''' | |
| 4655 Check whether we have a linearized PDF. | |
| 4656 ''' | |
| 4657 pdf = _as_pdf_document(self, required=0) | |
| 4658 if pdf.m_internal: | |
| 4659 return mupdf.pdf_doc_was_linearized(pdf) | |
| 4660 return False # gracefully handle non-PDF | |
| 4661 | |
| 4662 @property | |
| 4663 def is_form_pdf(self): | |
| 4664 """Either False or PDF field count.""" | |
| 4665 pdf = _as_pdf_document(self, required=0) | |
| 4666 if not pdf.m_internal: | |
| 4667 return False | |
| 4668 count = -1 | |
| 4669 try: | |
| 4670 fields = mupdf.pdf_dict_getl( | |
| 4671 mupdf.pdf_trailer(pdf), | |
| 4672 mupdf.PDF_ENUM_NAME_Root, | |
| 4673 mupdf.PDF_ENUM_NAME_AcroForm, | |
| 4674 mupdf.PDF_ENUM_NAME_Fields, | |
| 4675 ) | |
| 4676 if mupdf.pdf_is_array(fields): | |
| 4677 count = mupdf.pdf_array_len(fields) | |
| 4678 except Exception: | |
| 4679 if g_exceptions_verbose: exception_info() | |
| 4680 return False | |
| 4681 if count >= 0: | |
| 4682 return count | |
| 4683 return False | |
| 4684 | |
| 4685 @property | |
| 4686 def is_pdf(self): | |
| 4687 """Check for PDF.""" | |
| 4688 if isinstance(self.this, mupdf.PdfDocument): | |
| 4689 return True | |
| 4690 # Avoid calling smupdf.pdf_specifics because it will end up creating | |
| 4691 # a new PdfDocument which will call pdf_create_document(), which is ok | |
| 4692 # but a little unnecessary. | |
| 4693 # | |
| 4694 if mupdf.ll_pdf_specifics(self.this.m_internal): | |
| 4695 ret = True | |
| 4696 else: | |
| 4697 ret = False | |
| 4698 return ret | |
| 4699 | |
| 4700 @property | |
| 4701 def is_reflowable(self): | |
| 4702 """Check if document is layoutable.""" | |
| 4703 if self.is_closed: | |
| 4704 raise ValueError("document closed") | |
| 4705 return bool(mupdf.fz_is_document_reflowable(self)) | |
| 4706 | |
| 4707 @property | |
| 4708 def is_repaired(self): | |
| 4709 """Check whether PDF was repaired.""" | |
| 4710 pdf = _as_pdf_document(self, required=0) | |
| 4711 if not pdf.m_internal: | |
| 4712 return False | |
| 4713 r = mupdf.pdf_was_repaired(pdf) | |
| 4714 if r: | |
| 4715 return True | |
| 4716 return False | |
| 4717 | |
| 4718 def journal_can_do(self): | |
| 4719 """Show if undo and / or redo are possible.""" | |
| 4720 if self.is_closed or self.is_encrypted: | |
| 4721 raise ValueError("document closed or encrypted") | |
| 4722 undo=0 | |
| 4723 redo=0 | |
| 4724 pdf = _as_pdf_document(self) | |
| 4725 undo = mupdf.pdf_can_undo(pdf) | |
| 4726 redo = mupdf.pdf_can_redo(pdf) | |
| 4727 return {'undo': bool(undo), 'redo': bool(redo)} | |
| 4728 | |
| 4729 def journal_enable(self): | |
| 4730 """Activate document journalling.""" | |
| 4731 if self.is_closed or self.is_encrypted: | |
| 4732 raise ValueError("document closed or encrypted") | |
| 4733 pdf = _as_pdf_document(self) | |
| 4734 mupdf.pdf_enable_journal(pdf) | |
| 4735 | |
| 4736 def journal_is_enabled(self): | |
| 4737 """Check if journalling is enabled.""" | |
| 4738 if self.is_closed or self.is_encrypted: | |
| 4739 raise ValueError("document closed or encrypted") | |
| 4740 pdf = _as_pdf_document(self) | |
| 4741 enabled = pdf.m_internal and pdf.m_internal.journal | |
| 4742 return enabled | |
| 4743 | |
| 4744 def journal_load(self, filename): | |
| 4745 """Load a journal from a file.""" | |
| 4746 if self.is_closed or self.is_encrypted: | |
| 4747 raise ValueError("document closed or encrypted") | |
| 4748 pdf = _as_pdf_document(self) | |
| 4749 if isinstance(filename, str): | |
| 4750 mupdf.pdf_load_journal(pdf, filename) | |
| 4751 else: | |
| 4752 res = JM_BufferFromBytes(filename) | |
| 4753 stm = mupdf.fz_open_buffer(res) | |
| 4754 mupdf.pdf_deserialise_journal(pdf, stm) | |
| 4755 if not pdf.m_internal.journal: | |
| 4756 RAISEPY( "Journal and document do not match", JM_Exc_FileDataError) | |
| 4757 | |
| 4758 def journal_op_name(self, step): | |
| 4759 """Show operation name for given step.""" | |
| 4760 if self.is_closed or self.is_encrypted: | |
| 4761 raise ValueError("document closed or encrypted") | |
| 4762 pdf = _as_pdf_document(self) | |
| 4763 name = mupdf.pdf_undoredo_step(pdf, step) | |
| 4764 return name | |
| 4765 | |
| 4766 def journal_position(self): | |
| 4767 """Show journalling state.""" | |
| 4768 if self.is_closed or self.is_encrypted: | |
| 4769 raise ValueError("document closed or encrypted") | |
| 4770 steps=0 | |
| 4771 pdf = _as_pdf_document(self) | |
| 4772 rc, steps = mupdf.pdf_undoredo_state(pdf) | |
| 4773 return rc, steps | |
| 4774 | |
| 4775 def journal_redo(self): | |
| 4776 """Move forward in the journal.""" | |
| 4777 if self.is_closed or self.is_encrypted: | |
| 4778 raise ValueError("document closed or encrypted") | |
| 4779 pdf = _as_pdf_document(self) | |
| 4780 mupdf.pdf_redo(pdf) | |
| 4781 return True | |
| 4782 | |
| 4783 def journal_save(self, filename): | |
| 4784 """Save journal to a file.""" | |
| 4785 if self.is_closed or self.is_encrypted: | |
| 4786 raise ValueError("document closed or encrypted") | |
| 4787 pdf = _as_pdf_document(self) | |
| 4788 if isinstance(filename, str): | |
| 4789 mupdf.pdf_save_journal(pdf, filename) | |
| 4790 else: | |
| 4791 out = JM_new_output_fileptr(filename) | |
| 4792 mupdf.pdf_write_journal(pdf, out) | |
| 4793 out.fz_close_output() | |
| 4794 | |
| 4795 def journal_start_op(self, name=None): | |
| 4796 """Begin a journalling operation.""" | |
| 4797 if self.is_closed or self.is_encrypted: | |
| 4798 raise ValueError("document closed or encrypted") | |
| 4799 pdf = _as_pdf_document(self) | |
| 4800 if not pdf.m_internal.journal: | |
| 4801 raise RuntimeError( "Journalling not enabled") | |
| 4802 if name: | |
| 4803 mupdf.pdf_begin_operation(pdf, name) | |
| 4804 else: | |
| 4805 mupdf.pdf_begin_implicit_operation(pdf) | |
| 4806 | |
| 4807 def journal_stop_op(self): | |
| 4808 """End a journalling operation.""" | |
| 4809 if self.is_closed or self.is_encrypted: | |
| 4810 raise ValueError("document closed or encrypted") | |
| 4811 pdf = _as_pdf_document(self) | |
| 4812 mupdf.pdf_end_operation(pdf) | |
| 4813 | |
| 4814 def journal_undo(self): | |
| 4815 """Move backwards in the journal.""" | |
| 4816 if self.is_closed or self.is_encrypted: | |
| 4817 raise ValueError("document closed or encrypted") | |
| 4818 pdf = _as_pdf_document(self) | |
| 4819 mupdf.pdf_undo(pdf) | |
| 4820 return True | |
| 4821 | |
| 4822 @property | |
| 4823 def language(self): | |
| 4824 """Document language.""" | |
| 4825 pdf = _as_pdf_document(self, required=0) | |
| 4826 if not pdf.m_internal: | |
| 4827 return | |
| 4828 lang = mupdf.pdf_document_language(pdf) | |
| 4829 if lang == mupdf.FZ_LANG_UNSET: | |
| 4830 return | |
| 4831 return mupdf.fz_string_from_text_language2(lang) | |
| 4832 | |
| 4833 @property | |
| 4834 def last_location(self): | |
| 4835 """Id (chapter, page) of last page.""" | |
| 4836 if self.is_closed: | |
| 4837 raise ValueError("document closed") | |
| 4838 last_loc = mupdf.fz_last_page(self.this) | |
| 4839 return last_loc.chapter, last_loc.page | |
| 4840 | |
| 4841 def layer_ui_configs(self): | |
| 4842 """Show OC visibility status modifiable by user.""" | |
| 4843 pdf = _as_pdf_document(self) | |
| 4844 info = mupdf.PdfLayerConfigUi() | |
| 4845 n = mupdf.pdf_count_layer_config_ui( pdf) | |
| 4846 rc = [] | |
| 4847 for i in range(n): | |
| 4848 mupdf.pdf_layer_config_ui_info( pdf, i, info) | |
| 4849 if info.type == 1: | |
| 4850 type_ = "checkbox" | |
| 4851 elif info.type == 2: | |
| 4852 type_ = "radiobox" | |
| 4853 else: | |
| 4854 type_ = "label" | |
| 4855 item = { | |
| 4856 "number": i, | |
| 4857 "text": info.text, | |
| 4858 "depth": info.depth, | |
| 4859 "type": type_, | |
| 4860 "on": info.selected, | |
| 4861 "locked": info.locked, | |
| 4862 } | |
| 4863 rc.append(item) | |
| 4864 return rc | |
| 4865 | |
| 4866 def layout(self, rect=None, width=0, height=0, fontsize=11): | |
| 4867 """Re-layout a reflowable document.""" | |
| 4868 if self.is_closed or self.is_encrypted: | |
| 4869 raise ValueError("document closed or encrypted") | |
| 4870 doc = self.this | |
| 4871 if not mupdf.fz_is_document_reflowable( doc): | |
| 4872 return | |
| 4873 w = width | |
| 4874 h = height | |
| 4875 r = JM_rect_from_py(rect) | |
| 4876 if not mupdf.fz_is_infinite_rect(r): | |
| 4877 w = r.x1 - r.x0 | |
| 4878 h = r.y1 - r.y0 | |
| 4879 if w <= 0.0 or h <= 0.0: | |
| 4880 raise ValueError( "bad page size") | |
| 4881 mupdf.fz_layout_document( doc, w, h, fontsize) | |
| 4882 | |
| 4883 self._reset_page_refs() | |
| 4884 self.init_doc() | |
| 4885 | |
| 4886 def load_page(self, page_id): | |
| 4887 """Load a page. | |
| 4888 | |
| 4889 'page_id' is either a 0-based page number or a tuple (chapter, pno), | |
| 4890 with chapter number and page number within that chapter. | |
| 4891 """ | |
| 4892 if self.is_closed or self.is_encrypted: | |
| 4893 raise ValueError("document closed or encrypted") | |
| 4894 if page_id is None: | |
| 4895 page_id = 0 | |
| 4896 if page_id not in self: | |
| 4897 raise ValueError("page not in document") | |
| 4898 if type(page_id) is int and page_id < 0: | |
| 4899 np = self.page_count | |
| 4900 while page_id < 0: | |
| 4901 page_id += np | |
| 4902 if isinstance(page_id, int): | |
| 4903 page = mupdf.fz_load_page(self.this, page_id) | |
| 4904 else: | |
| 4905 chapter, pagenum = page_id | |
| 4906 page = mupdf.fz_load_chapter_page(self.this, chapter, pagenum) | |
| 4907 val = Page(page, self) | |
| 4908 | |
| 4909 val.thisown = True | |
| 4910 val.parent = self | |
| 4911 self._page_refs[id(val)] = val | |
| 4912 val._annot_refs = weakref.WeakValueDictionary() | |
| 4913 val.number = page_id | |
| 4914 return val | |
| 4915 | |
| 4916 def location_from_page_number(self, pno): | |
| 4917 """Convert pno to (chapter, page).""" | |
| 4918 if self.is_closed: | |
| 4919 raise ValueError("document closed") | |
| 4920 this_doc = self.this | |
| 4921 loc = mupdf.fz_make_location(-1, -1) | |
| 4922 page_count = mupdf.fz_count_pages(this_doc) | |
| 4923 while pno < 0: | |
| 4924 pno += page_count | |
| 4925 if pno >= page_count: | |
| 4926 raise ValueError( MSG_BAD_PAGENO) | |
| 4927 loc = mupdf.fz_location_from_page_number(this_doc, pno) | |
| 4928 return loc.chapter, loc.page | |
| 4929 | |
| 4930 def make_bookmark(self, loc): | |
| 4931 """Make a page pointer before layouting document.""" | |
| 4932 if self.is_closed or self.is_encrypted: | |
| 4933 raise ValueError("document closed or encrypted") | |
| 4934 loc = mupdf.FzLocation(*loc) | |
| 4935 mark = mupdf.ll_fz_make_bookmark2( self.this.m_internal, loc.internal()) | |
| 4936 return mark | |
| 4937 | |
| 4938 @property | |
| 4939 def markinfo(self) -> dict: | |
| 4940 """Return the PDF MarkInfo value.""" | |
| 4941 xref = self.pdf_catalog() | |
| 4942 if xref == 0: | |
| 4943 return None | |
| 4944 rc = self.xref_get_key(xref, "MarkInfo") | |
| 4945 if rc[0] == "null": | |
| 4946 return {} | |
| 4947 if rc[0] == "xref": | |
| 4948 xref = int(rc[1].split()[0]) | |
| 4949 val = self.xref_object(xref, compressed=True) | |
| 4950 elif rc[0] == "dict": | |
| 4951 val = rc[1] | |
| 4952 else: | |
| 4953 val = None | |
| 4954 if val is None or not (val[:2] == "<<" and val[-2:] == ">>"): | |
| 4955 return {} | |
| 4956 valid = {"Marked": False, "UserProperties": False, "Suspects": False} | |
| 4957 val = val[2:-2].split("/") | |
| 4958 for v in val[1:]: | |
| 4959 try: | |
| 4960 key, value = v.split() | |
| 4961 except Exception: | |
| 4962 if g_exceptions_verbose > 1: exception_info() | |
| 4963 return valid | |
| 4964 if value == "true": | |
| 4965 valid[key] = True | |
| 4966 return valid | |
| 4967 | |
| 4968 def move_page(self, pno: int, to: int =-1): | |
| 4969 """Move a page within a PDF document. | |
| 4970 | |
| 4971 Args: | |
| 4972 pno: source page number. | |
| 4973 to: put before this page, '-1' means after last page. | |
| 4974 """ | |
| 4975 if self.is_closed: | |
| 4976 raise ValueError("document closed") | |
| 4977 page_count = len(self) | |
| 4978 if (pno not in range(page_count) or to not in range(-1, page_count)): | |
| 4979 raise ValueError("bad page number(s)") | |
| 4980 before = 1 | |
| 4981 copy = 0 | |
| 4982 if to == -1: | |
| 4983 to = page_count - 1 | |
| 4984 before = 0 | |
| 4985 | |
| 4986 return self._move_copy_page(pno, to, before, copy) | |
| 4987 | |
| 4988 @property | |
| 4989 def name(self): | |
| 4990 return self._name | |
| 4991 | |
| 4992 def need_appearances(self, value=None): | |
| 4993 """Get/set the NeedAppearances value.""" | |
| 4994 if not self.is_form_pdf: | |
| 4995 return None | |
| 4996 | |
| 4997 pdf = _as_pdf_document(self) | |
| 4998 oldval = -1 | |
| 4999 appkey = "NeedAppearances" | |
| 5000 | |
| 5001 form = mupdf.pdf_dict_getp( | |
| 5002 mupdf.pdf_trailer(pdf), | |
| 5003 "Root/AcroForm", | |
| 5004 ) | |
| 5005 app = mupdf.pdf_dict_gets(form, appkey) | |
| 5006 if mupdf.pdf_is_bool(app): | |
| 5007 oldval = mupdf.pdf_to_bool(app) | |
| 5008 if value: | |
| 5009 mupdf.pdf_dict_puts(form, appkey, mupdf.PDF_TRUE) | |
| 5010 else: | |
| 5011 mupdf.pdf_dict_puts(form, appkey, mupdf.PDF_FALSE) | |
| 5012 if value is None: | |
| 5013 return oldval >= 0 | |
| 5014 return value | |
| 5015 | |
| 5016 @property | |
| 5017 def needs_pass(self): | |
| 5018 """Indicate password required.""" | |
| 5019 if self.is_closed: | |
| 5020 raise ValueError("document closed") | |
| 5021 document = self.this if isinstance(self.this, mupdf.FzDocument) else self.this.super() | |
| 5022 ret = mupdf.fz_needs_password( document) | |
| 5023 return ret | |
| 5024 | |
| 5025 def next_location(self, page_id): | |
| 5026 """Get (chapter, page) of next page.""" | |
| 5027 if self.is_closed or self.is_encrypted: | |
| 5028 raise ValueError("document closed or encrypted") | |
| 5029 if type(page_id) is int: | |
| 5030 page_id = (0, page_id) | |
| 5031 if page_id not in self: | |
| 5032 raise ValueError("page id not in document") | |
| 5033 if tuple(page_id) == self.last_location: | |
| 5034 return () | |
| 5035 this_doc = _as_fz_document(self) | |
| 5036 val = page_id[ 0] | |
| 5037 if not isinstance(val, int): | |
| 5038 RAISEPY(MSG_BAD_PAGEID, PyExc_ValueError) | |
| 5039 chapter = val | |
| 5040 val = page_id[ 1] | |
| 5041 pno = val | |
| 5042 loc = mupdf.fz_make_location(chapter, pno) | |
| 5043 next_loc = mupdf.fz_next_page( this_doc, loc) | |
| 5044 return next_loc.chapter, next_loc.page | |
| 5045 | |
| 5046 def page_annot_xrefs(self, n): | |
| 5047 if g_use_extra: | |
| 5048 return extra.page_annot_xrefs( self.this, n) | |
| 5049 | |
| 5050 if isinstance(self.this, mupdf.PdfDocument): | |
| 5051 page_count = mupdf.pdf_count_pages(self.this) | |
| 5052 pdf_document = self.this | |
| 5053 else: | |
| 5054 page_count = mupdf.fz_count_pages(self.this) | |
| 5055 pdf_document = _as_pdf_document(self) | |
| 5056 while n < 0: | |
| 5057 n += page_count | |
| 5058 if n > page_count: | |
| 5059 raise ValueError( MSG_BAD_PAGENO) | |
| 5060 page_obj = mupdf.pdf_lookup_page_obj(pdf_document, n) | |
| 5061 annots = JM_get_annot_xref_list(page_obj) | |
| 5062 return annots | |
| 5063 | |
| 5064 @property | |
| 5065 def page_count(self): | |
| 5066 """Number of pages.""" | |
| 5067 if self.is_closed: | |
| 5068 raise ValueError('document closed') | |
| 5069 if g_use_extra: | |
| 5070 return self.page_count2(self) | |
| 5071 if isinstance( self.this, mupdf.FzDocument): | |
| 5072 return mupdf.fz_count_pages( self.this) | |
| 5073 else: | |
| 5074 return mupdf.pdf_count_pages( self.this) | |
| 5075 | |
| 5076 def page_cropbox(self, pno): | |
| 5077 """Get CropBox of page number (without loading page).""" | |
| 5078 if self.is_closed: | |
| 5079 raise ValueError("document closed") | |
| 5080 this_doc = self.this | |
| 5081 page_count = mupdf.fz_count_pages( this_doc) | |
| 5082 n = pno | |
| 5083 while n < 0: | |
| 5084 n += page_count | |
| 5085 pdf = _as_pdf_document(self) | |
| 5086 if n >= page_count: | |
| 5087 raise ValueError( MSG_BAD_PAGENO) | |
| 5088 pageref = mupdf.pdf_lookup_page_obj( pdf, n) | |
| 5089 cropbox = JM_cropbox(pageref) | |
| 5090 val = JM_py_from_rect(cropbox) | |
| 5091 | |
| 5092 val = Rect(val) | |
| 5093 | |
| 5094 return val | |
| 5095 | |
| 5096 def page_number_from_location(self, page_id): | |
| 5097 """Convert (chapter, pno) to page number.""" | |
| 5098 if type(page_id) is int: | |
| 5099 np = self.page_count | |
| 5100 while page_id < 0: | |
| 5101 page_id += np | |
| 5102 page_id = (0, page_id) | |
| 5103 if page_id not in self: | |
| 5104 raise ValueError("page id not in document") | |
| 5105 chapter, pno = page_id | |
| 5106 loc = mupdf.fz_make_location( chapter, pno) | |
| 5107 page_n = mupdf.fz_page_number_from_location( self.this, loc) | |
| 5108 return page_n | |
| 5109 | |
| 5110 def page_xref(self, pno): | |
| 5111 """Get xref of page number.""" | |
| 5112 if g_use_extra: | |
| 5113 return extra.page_xref( self.this, pno) | |
| 5114 if self.is_closed: | |
| 5115 raise ValueError("document closed") | |
| 5116 page_count = mupdf.fz_count_pages(self.this) | |
| 5117 n = pno | |
| 5118 while n < 0: | |
| 5119 n += page_count | |
| 5120 pdf = _as_pdf_document(self) | |
| 5121 xref = 0 | |
| 5122 if n >= page_count: | |
| 5123 raise ValueError( MSG_BAD_PAGENO) | |
| 5124 xref = mupdf.pdf_to_num(mupdf.pdf_lookup_page_obj(pdf, n)) | |
| 5125 return xref | |
| 5126 | |
| 5127 @property | |
| 5128 def pagelayout(self) -> str: | |
| 5129 """Return the PDF PageLayout value. | |
| 5130 """ | |
| 5131 xref = self.pdf_catalog() | |
| 5132 if xref == 0: | |
| 5133 return None | |
| 5134 rc = self.xref_get_key(xref, "PageLayout") | |
| 5135 if rc[0] == "null": | |
| 5136 return "SinglePage" | |
| 5137 if rc[0] == "name": | |
| 5138 return rc[1][1:] | |
| 5139 return "SinglePage" | |
| 5140 | |
| 5141 @property | |
| 5142 def pagemode(self) -> str: | |
| 5143 """Return the PDF PageMode value. | |
| 5144 """ | |
| 5145 xref = self.pdf_catalog() | |
| 5146 if xref == 0: | |
| 5147 return None | |
| 5148 rc = self.xref_get_key(xref, "PageMode") | |
| 5149 if rc[0] == "null": | |
| 5150 return "UseNone" | |
| 5151 if rc[0] == "name": | |
| 5152 return rc[1][1:] | |
| 5153 return "UseNone" | |
| 5154 | |
| 5155 if sys.implementation.version < (3, 9): | |
| 5156 # Appending `[Page]` causes `TypeError: 'ABCMeta' object is not subscriptable`. | |
| 5157 _pages_ret = collections.abc.Iterable | |
| 5158 else: | |
| 5159 _pages_ret = collections.abc.Iterable[Page] | |
| 5160 | |
| 5161 def pages(self, start: OptInt =None, stop: OptInt =None, step: OptInt =None) -> _pages_ret: | |
| 5162 """Return a generator iterator over a page range. | |
| 5163 | |
| 5164 Arguments have the same meaning as for the range() built-in. | |
| 5165 """ | |
| 5166 if not self.page_count: | |
| 5167 return | |
| 5168 # set the start value | |
| 5169 start = start or 0 | |
| 5170 while start < 0: | |
| 5171 start += self.page_count | |
| 5172 if start not in range(self.page_count): | |
| 5173 raise ValueError("bad start page number") | |
| 5174 | |
| 5175 # set the stop value | |
| 5176 stop = stop if stop is not None and stop <= self.page_count else self.page_count | |
| 5177 | |
| 5178 # set the step value | |
| 5179 if step == 0: | |
| 5180 raise ValueError("arg 3 must not be zero") | |
| 5181 if step is None: | |
| 5182 if start > stop: | |
| 5183 step = -1 | |
| 5184 else: | |
| 5185 step = 1 | |
| 5186 | |
| 5187 for pno in range(start, stop, step): | |
| 5188 yield (self.load_page(pno)) | |
| 5189 | |
| 5190 def pdf_catalog(self): | |
| 5191 """Get xref of PDF catalog.""" | |
| 5192 pdf = _as_pdf_document(self, required=0) | |
| 5193 xref = 0 | |
| 5194 if not pdf.m_internal: | |
| 5195 return xref | |
| 5196 root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root')) | |
| 5197 xref = mupdf.pdf_to_num(root) | |
| 5198 return xref | |
| 5199 | |
| 5200 def pdf_trailer(self, compressed=0, ascii=0): | |
| 5201 """Get PDF trailer as a string.""" | |
| 5202 return self.xref_object(-1, compressed=compressed, ascii=ascii) | |
| 5203 | |
| 5204 @property | |
| 5205 def permissions(self): | |
| 5206 """Document permissions.""" | |
| 5207 if self.is_encrypted: | |
| 5208 return 0 | |
| 5209 doc =self.this | |
| 5210 pdf = mupdf.pdf_document_from_fz_document(doc) | |
| 5211 | |
| 5212 # for PDF return result of standard function | |
| 5213 if pdf.m_internal: | |
| 5214 return mupdf.pdf_document_permissions(pdf) | |
| 5215 | |
| 5216 # otherwise simulate the PDF return value | |
| 5217 perm = 0xFFFFFFFC # all permissions granted | |
| 5218 # now switch off where needed | |
| 5219 if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_PRINT): | |
| 5220 perm = perm ^ mupdf.PDF_PERM_PRINT | |
| 5221 if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_EDIT): | |
| 5222 perm = perm ^ mupdf.PDF_PERM_MODIFY | |
| 5223 if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_COPY): | |
| 5224 perm = perm ^ mupdf.PDF_PERM_COPY | |
| 5225 if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_ANNOTATE): | |
| 5226 perm = perm ^ mupdf.PDF_PERM_ANNOTATE | |
| 5227 return perm | |
| 5228 | |
| 5229 def prev_location(self, page_id): | |
| 5230 | |
| 5231 """Get (chapter, page) of previous page.""" | |
| 5232 if self.is_closed or self.is_encrypted: | |
| 5233 raise ValueError("document closed or encrypted") | |
| 5234 if type(page_id) is int: | |
| 5235 page_id = (0, page_id) | |
| 5236 if page_id not in self: | |
| 5237 raise ValueError("page id not in document") | |
| 5238 if page_id == (0, 0): | |
| 5239 return () | |
| 5240 chapter, pno = page_id | |
| 5241 loc = mupdf.fz_make_location(chapter, pno) | |
| 5242 prev_loc = mupdf.fz_previous_page(self.this, loc) | |
| 5243 return prev_loc.chapter, prev_loc.page | |
| 5244 | |
| 5245 def reload_page(self, page: Page) -> Page: | |
| 5246 """Make a fresh copy of a page.""" | |
| 5247 old_annots = {} # copy annot references to here | |
| 5248 pno = page.number # save the page number | |
| 5249 for k, v in page._annot_refs.items(): # save the annot dictionary | |
| 5250 old_annots[k] = v | |
| 5251 | |
| 5252 # When we call `self.load_page()` below, it will end up in | |
| 5253 # fz_load_chapter_page(), which will return any matching page in the | |
| 5254 # document's list of non-ref-counted loaded pages, instead of actually | |
| 5255 # reloading the page. | |
| 5256 # | |
| 5257 # We want to assert that we have actually reloaded the fz_page, and not | |
| 5258 # simply returned the same `fz_page*` pointer from the document's list | |
| 5259 # of non-ref-counted loaded pages. | |
| 5260 # | |
| 5261 # So we first remove our reference to the `fz_page*`. This will | |
| 5262 # decrement .refs, and if .refs was 1, this is guaranteed to free the | |
| 5263 # `fz_page*` and remove it from the document's list if it was there. So | |
| 5264 # we are guaranteed that our returned `fz_page*` is from a genuine | |
| 5265 # reload, even if it happens to reuse the original block of memory. | |
| 5266 # | |
| 5267 # However if the original .refs is greater than one, there must be | |
| 5268 # other references to the `fz_page` somewhere, and we require that | |
| 5269 # these other references are not keeping the page in the document's | |
| 5270 # list. We check that we are returning a newly loaded page by | |
| 5271 # asserting that our returned `fz_page*` is different from the original | |
| 5272 # `fz_page*` - the original was not freed, so a new `fz_page` cannot | |
| 5273 # reuse the same block of memory. | |
| 5274 # | |
| 5275 | |
| 5276 refs_old = page.this.m_internal.refs | |
| 5277 m_internal_old = page.this.m_internal_value() | |
| 5278 | |
| 5279 page.this = None | |
| 5280 page._erase() # remove the page | |
| 5281 page = None | |
| 5282 TOOLS.store_shrink(100) | |
| 5283 page = self.load_page(pno) # reload the page | |
| 5284 | |
| 5285 # copy annot refs over to the new dictionary | |
| 5286 #page_proxy = weakref.proxy(page) | |
| 5287 for k, v in old_annots.items(): | |
| 5288 annot = old_annots[k] | |
| 5289 #annot.parent = page_proxy # refresh parent to new page | |
| 5290 page._annot_refs[k] = annot | |
| 5291 if refs_old == 1: | |
| 5292 # We know that `page.this = None` will have decremented the ref | |
| 5293 # count to zero so we are guaranteed that the new `fz_page` is a | |
| 5294 # new page even if it happens to have reused the same block of | |
| 5295 # memory. | |
| 5296 pass | |
| 5297 else: | |
| 5298 # Check that the new `fz_page*` is different from the original. | |
| 5299 m_internal_new = page.this.m_internal_value() | |
| 5300 assert m_internal_new != m_internal_old, \ | |
| 5301 f'{refs_old=} {m_internal_old=:#x} {m_internal_new=:#x}' | |
| 5302 return page | |
| 5303 | |
| 5304 def resolve_link(self, uri=None, chapters=0): | |
| 5305 """Calculate internal link destination. | |
| 5306 | |
| 5307 Args: | |
| 5308 uri: (str) some Link.uri | |
| 5309 chapters: (bool) whether to use (chapter, page) format | |
| 5310 Returns: | |
| 5311 (page_id, x, y) where x, y are point coordinates on the page. | |
| 5312 page_id is either page number (if chapters=0), or (chapter, pno). | |
| 5313 """ | |
| 5314 if not uri: | |
| 5315 if chapters: | |
| 5316 return (-1, -1), 0, 0 | |
| 5317 return -1, 0, 0 | |
| 5318 try: | |
| 5319 loc, xp, yp = mupdf.fz_resolve_link(self.this, uri) | |
| 5320 except Exception: | |
| 5321 if g_exceptions_verbose: exception_info() | |
| 5322 if chapters: | |
| 5323 return (-1, -1), 0, 0 | |
| 5324 return -1, 0, 0 | |
| 5325 if chapters: | |
| 5326 return (loc.chapter, loc.page), xp, yp | |
| 5327 pno = mupdf.fz_page_number_from_location(self.this, loc) | |
| 5328 return pno, xp, yp | |
| 5329 | |
| 5330 def rewrite_images( | |
| 5331 self, | |
| 5332 dpi_threshold=None, | |
| 5333 dpi_target=0, | |
| 5334 quality=0, | |
| 5335 lossy=True, | |
| 5336 lossless=True, | |
| 5337 bitonal=True, | |
| 5338 color=True, | |
| 5339 gray=True, | |
| 5340 set_to_gray=False, | |
| 5341 options=None, | |
| 5342 ): | |
| 5343 """Rewrite images in a PDF document. | |
| 5344 | |
| 5345 The typical use case is to reduce the size of the PDF by recompressing | |
| 5346 images. Default parameters will convert all images to JPEG where | |
| 5347 possible, using the specified resolutions and quality. Exclude | |
| 5348 undesired images by setting parameters to False. | |
| 5349 Args: | |
| 5350 dpi_threshold: look at images with a larger DPI only. | |
| 5351 dpi_target: change eligible images to this DPI. | |
| 5352 quality: Quality of the recompressed images (0-100). | |
| 5353 lossy: process lossy image types (e.g. JPEG). | |
| 5354 lossless: process lossless image types (e.g. PNG). | |
| 5355 bitonal: process black-and-white images (e.g. FAX) | |
| 5356 color: process colored images. | |
| 5357 gray: process gray images. | |
| 5358 set_to_gray: whether to change the PDF to gray at process start. | |
| 5359 options: (PdfImageRewriterOptions) Custom options for image | |
| 5360 rewriting (optional). Expert use only. If provided, other | |
| 5361 parameters are ignored, except set_to_gray. | |
| 5362 """ | |
| 5363 quality_str = str(quality) | |
| 5364 if not dpi_threshold: | |
| 5365 dpi_threshold = dpi_target = 0 | |
| 5366 if dpi_target > 0 and dpi_target >= dpi_threshold: | |
| 5367 raise ValueError("{dpi_target=} must be less than {dpi_threshold=}") | |
| 5368 template_opts = mupdf.PdfImageRewriterOptions() | |
| 5369 dir1 = set(dir(template_opts)) # for checking that only existing options are set | |
| 5370 if not options: | |
| 5371 opts = mupdf.PdfImageRewriterOptions() | |
| 5372 if bitonal: | |
| 5373 opts.bitonal_image_recompress_method = mupdf.FZ_RECOMPRESS_FAX | |
| 5374 opts.bitonal_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE | |
| 5375 opts.bitonal_image_subsample_to = dpi_target | |
| 5376 opts.bitonal_image_recompress_quality = quality_str | |
| 5377 opts.bitonal_image_subsample_threshold = dpi_threshold | |
| 5378 if color: | |
| 5379 if lossless: | |
| 5380 opts.color_lossless_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG | |
| 5381 opts.color_lossless_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE | |
| 5382 opts.color_lossless_image_subsample_to = dpi_target | |
| 5383 opts.color_lossless_image_subsample_threshold = dpi_threshold | |
| 5384 opts.color_lossless_image_recompress_quality = quality_str | |
| 5385 if lossy: | |
| 5386 opts.color_lossy_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG | |
| 5387 opts.color_lossy_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE | |
| 5388 opts.color_lossy_image_subsample_threshold = dpi_threshold | |
| 5389 opts.color_lossy_image_subsample_to = dpi_target | |
| 5390 opts.color_lossy_image_recompress_quality = quality_str | |
| 5391 if gray: | |
| 5392 if lossless: | |
| 5393 opts.gray_lossless_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG | |
| 5394 opts.gray_lossless_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE | |
| 5395 opts.gray_lossless_image_subsample_to = dpi_target | |
| 5396 opts.gray_lossless_image_subsample_threshold = dpi_threshold | |
| 5397 opts.gray_lossless_image_recompress_quality = quality_str | |
| 5398 if lossy: | |
| 5399 opts.gray_lossy_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG | |
| 5400 opts.gray_lossy_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE | |
| 5401 opts.gray_lossy_image_subsample_threshold = dpi_threshold | |
| 5402 opts.gray_lossy_image_subsample_to = dpi_target | |
| 5403 opts.gray_lossy_image_recompress_quality = quality_str | |
| 5404 else: | |
| 5405 opts = options | |
| 5406 | |
| 5407 dir2 = set(dir(opts)) # checking that only possible options were used | |
| 5408 invalid_options = dir2 - dir1 | |
| 5409 if invalid_options: | |
| 5410 raise ValueError(f"Invalid options: {invalid_options}") | |
| 5411 | |
| 5412 if set_to_gray: | |
| 5413 self.recolor(1) | |
| 5414 pdf = _as_pdf_document(self) | |
| 5415 mupdf.pdf_rewrite_images(pdf, opts) | |
| 5416 | |
| 5417 def recolor(self, components=1): | |
| 5418 """Change the color component count on all pages. | |
| 5419 | |
| 5420 Args: | |
| 5421 components: (int) desired color component count, one of 1, 3, 4. | |
| 5422 | |
| 5423 Invokes the same-named method for all pages. | |
| 5424 """ | |
| 5425 if not self.is_pdf: | |
| 5426 raise ValueError("is no PDF") | |
| 5427 for i in range(self.page_count): | |
| 5428 self.load_page(i).recolor(components) | |
| 5429 | |
| 5430 def resolve_names(self): | |
| 5431 """Convert the PDF's destination names into a Python dict. | |
| 5432 | |
| 5433 The only parameter is the pymupdf.Document. | |
| 5434 All names found in the catalog under keys "/Dests" and "/Names/Dests" are | |
| 5435 being included. | |
| 5436 | |
| 5437 Returns: | |
| 5438 A dcitionary with the following layout: | |
| 5439 - key: (str) the name | |
| 5440 - value: (dict) with the following layout: | |
| 5441 * "page": target page number (0-based). If no page number found -1. | |
| 5442 * "to": (x, y) target point on page - currently in PDF coordinates, | |
| 5443 i.e. point (0,0) is the bottom-left of the page. | |
| 5444 * "zoom": (float) the zoom factor | |
| 5445 * "dest": (str) only occurs if the target location on the page has | |
| 5446 not been provided as "/XYZ" or if no page number was found. | |
| 5447 Examples: | |
| 5448 {'__bookmark_1': {'page': 0, 'to': (0.0, 541.0), 'zoom': 0.0}, | |
| 5449 '__bookmark_2': {'page': 0, 'to': (0.0, 481.45), 'zoom': 0.0}} | |
| 5450 | |
| 5451 or | |
| 5452 | |
| 5453 '21154a7c20684ceb91f9c9adc3b677c40': {'page': -1, 'dest': '/XYZ 15.75 1486 0'}, ... | |
| 5454 """ | |
| 5455 if hasattr(self, "_resolved_names"): # do not execute multiple times! | |
| 5456 return self._resolved_names | |
| 5457 # this is a backward listing of page xref to page number | |
| 5458 page_xrefs = {self.page_xref(i): i for i in range(self.page_count)} | |
| 5459 | |
| 5460 def obj_string(obj): | |
| 5461 """Return string version of a PDF object definition.""" | |
| 5462 buffer = mupdf.fz_new_buffer(512) | |
| 5463 output = mupdf.FzOutput(buffer) | |
| 5464 mupdf.pdf_print_obj(output, obj, 1, 0) | |
| 5465 output.fz_close_output() | |
| 5466 return JM_UnicodeFromBuffer(buffer) | |
| 5467 | |
| 5468 def get_array(val): | |
| 5469 """Generate value of one item of the names dictionary.""" | |
| 5470 templ_dict = {"page": -1, "dest": ""} # value template | |
| 5471 if val.pdf_is_indirect(): | |
| 5472 val = mupdf.pdf_resolve_indirect(val) | |
| 5473 if val.pdf_is_array(): | |
| 5474 array = obj_string(val) | |
| 5475 elif val.pdf_is_dict(): | |
| 5476 array = obj_string(mupdf.pdf_dict_gets(val, "D")) | |
| 5477 else: # if all fails return the empty template | |
| 5478 return templ_dict | |
| 5479 | |
| 5480 # replace PDF "null" by zero, omit the square brackets | |
| 5481 array = array.replace("null", "0")[1:-1] | |
| 5482 | |
| 5483 # find stuff before first "/" | |
| 5484 idx = array.find("/") | |
| 5485 if idx < 1: # this has no target page spec | |
| 5486 templ_dict["dest"] = array # return the orig. string | |
| 5487 return templ_dict | |
| 5488 | |
| 5489 subval = array[:idx].strip() # stuff before "/" | |
| 5490 array = array[idx:] # stuff from "/" onwards | |
| 5491 templ_dict["dest"] = array | |
| 5492 # if we start with /XYZ: extract x, y, zoom | |
| 5493 # 1, 2 or 3 of these values may actually be supplied | |
| 5494 if array.startswith("/XYZ"): | |
| 5495 del templ_dict["dest"] # don't return orig string in this case | |
| 5496 | |
| 5497 # make a list of the 3 tokens following "/XYZ" | |
| 5498 array_list = array.split()[1:4] # omit "/XYZ" | |
| 5499 | |
| 5500 # fill up missing tokens with "0" strings | |
| 5501 while len(array_list) < 3: # fill up if too short | |
| 5502 array_list.append("0") # add missing values | |
| 5503 | |
| 5504 # make list of 3 floats: x, y and zoom | |
| 5505 t = list(map(float, array_list)) # the resulting x, y, z values | |
| 5506 templ_dict["to"] = (t[0], t[1]) | |
| 5507 templ_dict["zoom"] = t[2] | |
| 5508 | |
| 5509 # extract page number | |
| 5510 if subval.endswith("0 R"): # page xref given? | |
| 5511 templ_dict["page"] = page_xrefs.get(int(subval.split()[0]),-1) | |
| 5512 else: # naked page number given | |
| 5513 templ_dict["page"] = int(subval) | |
| 5514 return templ_dict | |
| 5515 | |
| 5516 def fill_dict(dest_dict, pdf_dict): | |
| 5517 """Generate name resolution items for pdf_dict. | |
| 5518 | |
| 5519 This may be either "/Names/Dests" or just "/Dests" | |
| 5520 """ | |
| 5521 # length of the PDF dictionary | |
| 5522 name_count = mupdf.pdf_dict_len(pdf_dict) | |
| 5523 | |
| 5524 # extract key-val of each dict item | |
| 5525 for i in range(name_count): | |
| 5526 key = mupdf.pdf_dict_get_key(pdf_dict, i) | |
| 5527 val = mupdf.pdf_dict_get_val(pdf_dict, i) | |
| 5528 if key.pdf_is_name(): # this should always be true! | |
| 5529 dict_key = key.pdf_to_name() | |
| 5530 else: | |
| 5531 message(f"key {i} is no /Name") | |
| 5532 dict_key = None | |
| 5533 | |
| 5534 if dict_key: | |
| 5535 dest_dict[dict_key] = get_array(val) # store key/value in dict | |
| 5536 | |
| 5537 # access underlying PDF document of fz Document | |
| 5538 pdf = mupdf.pdf_document_from_fz_document(self) | |
| 5539 | |
| 5540 # access PDF catalog | |
| 5541 catalog = mupdf.pdf_dict_gets(mupdf.pdf_trailer(pdf), "Root") | |
| 5542 | |
| 5543 dest_dict = {} | |
| 5544 | |
| 5545 # make PDF_NAME(Dests) | |
| 5546 dests = mupdf.pdf_new_name("Dests") | |
| 5547 | |
| 5548 # extract destinations old style (PDF 1.1) | |
| 5549 old_dests = mupdf.pdf_dict_get(catalog, dests) | |
| 5550 if old_dests.pdf_is_dict(): | |
| 5551 fill_dict(dest_dict, old_dests) | |
| 5552 | |
| 5553 # extract destinations new style (PDF 1.2+) | |
| 5554 tree = mupdf.pdf_load_name_tree(pdf, dests) | |
| 5555 if tree.pdf_is_dict(): | |
| 5556 fill_dict(dest_dict, tree) | |
| 5557 | |
| 5558 self._resolved_names = dest_dict # store result or reuse | |
| 5559 return dest_dict | |
| 5560 | |
| 5561 def save( | |
| 5562 self, | |
| 5563 filename, | |
| 5564 garbage=0, | |
| 5565 clean=0, | |
| 5566 deflate=0, | |
| 5567 deflate_images=0, | |
| 5568 deflate_fonts=0, | |
| 5569 incremental=0, | |
| 5570 ascii=0, | |
| 5571 expand=0, | |
| 5572 linear=0, | |
| 5573 no_new_id=0, | |
| 5574 appearance=0, | |
| 5575 pretty=0, | |
| 5576 encryption=1, | |
| 5577 permissions=4095, | |
| 5578 owner_pw=None, | |
| 5579 user_pw=None, | |
| 5580 preserve_metadata=1, | |
| 5581 use_objstms=0, | |
| 5582 compression_effort=0, | |
| 5583 ): | |
| 5584 # From %pythonprepend save | |
| 5585 # | |
| 5586 """Save PDF to file, pathlib.Path or file pointer.""" | |
| 5587 if self.is_closed or self.is_encrypted: | |
| 5588 raise ValueError("document closed or encrypted") | |
| 5589 if type(filename) is str: | |
| 5590 pass | |
| 5591 elif hasattr(filename, "open"): # assume: pathlib.Path | |
| 5592 filename = str(filename) | |
| 5593 elif hasattr(filename, "name"): # assume: file object | |
| 5594 filename = filename.name | |
| 5595 elif not hasattr(filename, "seek"): # assume file object | |
| 5596 raise ValueError("filename must be str, Path or file object") | |
| 5597 if filename == self.name and not incremental: | |
| 5598 raise ValueError("save to original must be incremental") | |
| 5599 if linear and use_objstms: | |
| 5600 raise ValueError("'linear' and 'use_objstms' cannot both be requested") | |
| 5601 if self.page_count < 1: | |
| 5602 raise ValueError("cannot save with zero pages") | |
| 5603 if incremental: | |
| 5604 if self.name != filename or self.stream: | |
| 5605 raise ValueError("incremental needs original file") | |
| 5606 if user_pw and len(user_pw) > 40 or owner_pw and len(owner_pw) > 40: | |
| 5607 raise ValueError("password length must not exceed 40") | |
| 5608 | |
| 5609 pdf = _as_pdf_document(self) | |
| 5610 opts = mupdf.PdfWriteOptions() | |
| 5611 opts.do_incremental = incremental | |
| 5612 opts.do_ascii = ascii | |
| 5613 opts.do_compress = deflate | |
| 5614 opts.do_compress_images = deflate_images | |
| 5615 opts.do_compress_fonts = deflate_fonts | |
| 5616 opts.do_decompress = expand | |
| 5617 opts.do_garbage = garbage | |
| 5618 opts.do_pretty = pretty | |
| 5619 opts.do_linear = linear | |
| 5620 opts.do_clean = clean | |
| 5621 opts.do_sanitize = clean | |
| 5622 opts.dont_regenerate_id = no_new_id | |
| 5623 opts.do_appearance = appearance | |
| 5624 opts.do_encrypt = encryption | |
| 5625 opts.permissions = permissions | |
| 5626 if owner_pw is not None: | |
| 5627 opts.opwd_utf8_set_value(owner_pw) | |
| 5628 elif user_pw is not None: | |
| 5629 opts.opwd_utf8_set_value(user_pw) | |
| 5630 if user_pw is not None: | |
| 5631 opts.upwd_utf8_set_value(user_pw) | |
| 5632 opts.do_preserve_metadata = preserve_metadata | |
| 5633 opts.do_use_objstms = use_objstms | |
| 5634 opts.compression_effort = compression_effort | |
| 5635 | |
| 5636 out = None | |
| 5637 pdf.m_internal.resynth_required = 0 | |
| 5638 JM_embedded_clean(pdf) | |
| 5639 if no_new_id == 0: | |
| 5640 JM_ensure_identity(pdf) | |
| 5641 if isinstance(filename, str): | |
| 5642 #log( 'calling mupdf.pdf_save_document()') | |
| 5643 mupdf.pdf_save_document(pdf, filename, opts) | |
| 5644 else: | |
| 5645 out = JM_new_output_fileptr(filename) | |
| 5646 #log( f'{type(out)=} {type(out.this)=}') | |
| 5647 mupdf.pdf_write_document(pdf, out, opts) | |
| 5648 out.fz_close_output() | |
| 5649 | |
| 5650 def save_snapshot(self, filename): | |
| 5651 """Save a file snapshot suitable for journalling.""" | |
| 5652 if self.is_closed: | |
| 5653 raise ValueError("doc is closed") | |
| 5654 if type(filename) is str: | |
| 5655 pass | |
| 5656 elif hasattr(filename, "open"): # assume: pathlib.Path | |
| 5657 filename = str(filename) | |
| 5658 elif hasattr(filename, "name"): # assume: file object | |
| 5659 filename = filename.name | |
| 5660 else: | |
| 5661 raise ValueError("filename must be str, Path or file object") | |
| 5662 if filename == self.name: | |
| 5663 raise ValueError("cannot snapshot to original") | |
| 5664 pdf = _as_pdf_document(self) | |
| 5665 mupdf.pdf_save_snapshot(pdf, filename) | |
| 5666 | |
| 5667 def saveIncr(self): | |
| 5668 """ Save PDF incrementally""" | |
| 5669 return self.save(self.name, incremental=True, encryption=mupdf.PDF_ENCRYPT_KEEP) | |
| 5670 | |
| 5671 def select(self, pyliste): | |
| 5672 """Build sub-pdf with page numbers in the list.""" | |
| 5673 if self.is_closed or self.is_encrypted: | |
| 5674 raise ValueError("document closed or encrypted") | |
| 5675 if not self.is_pdf: | |
| 5676 raise ValueError("is no PDF") | |
| 5677 if not hasattr(pyliste, "__getitem__"): | |
| 5678 raise ValueError("sequence required") | |
| 5679 | |
| 5680 valid_range = range(len(self)) | |
| 5681 if (len(pyliste) == 0 | |
| 5682 or min(pyliste) not in valid_range | |
| 5683 or max(pyliste) not in valid_range | |
| 5684 ): | |
| 5685 raise ValueError("bad page number(s)") | |
| 5686 | |
| 5687 # get underlying pdf document, | |
| 5688 pdf = _as_pdf_document(self) | |
| 5689 # create page sub-pdf via pdf_rearrange_pages2(). | |
| 5690 # | |
| 5691 if mupdf_version_tuple >= (1, 25, 3): | |
| 5692 # We use PDF_CLEAN_STRUCTURE_KEEP otherwise we lose structure tree | |
| 5693 # which, for example, breaks test_3705. | |
| 5694 mupdf.pdf_rearrange_pages2(pdf, pyliste, mupdf.PDF_CLEAN_STRUCTURE_KEEP) | |
| 5695 else: | |
| 5696 mupdf.pdf_rearrange_pages2(pdf, pyliste) | |
| 5697 | |
| 5698 # remove any existing pages with their kids | |
| 5699 self._reset_page_refs() | |
| 5700 | |
| 5701 def set_language(self, language=None): | |
| 5702 pdf = _as_pdf_document(self) | |
| 5703 if not language: | |
| 5704 lang = mupdf.FZ_LANG_UNSET | |
| 5705 else: | |
| 5706 lang = mupdf.fz_text_language_from_string(language) | |
| 5707 mupdf.pdf_set_document_language(pdf, lang) | |
| 5708 return True | |
| 5709 | |
| 5710 def set_layer(self, config, basestate=None, on=None, off=None, rbgroups=None, locked=None): | |
| 5711 """Set the PDF keys /ON, /OFF, /RBGroups of an OC layer.""" | |
| 5712 if self.is_closed: | |
| 5713 raise ValueError("document closed") | |
| 5714 ocgs = set(self.get_ocgs().keys()) | |
| 5715 if ocgs == set(): | |
| 5716 raise ValueError("document has no optional content") | |
| 5717 | |
| 5718 if on: | |
| 5719 if type(on) not in (list, tuple): | |
| 5720 raise ValueError("bad type: 'on'") | |
| 5721 s = set(on).difference(ocgs) | |
| 5722 if s != set(): | |
| 5723 raise ValueError("bad OCGs in 'on': %s" % s) | |
| 5724 | |
| 5725 if off: | |
| 5726 if type(off) not in (list, tuple): | |
| 5727 raise ValueError("bad type: 'off'") | |
| 5728 s = set(off).difference(ocgs) | |
| 5729 if s != set(): | |
| 5730 raise ValueError("bad OCGs in 'off': %s" % s) | |
| 5731 | |
| 5732 if locked: | |
| 5733 if type(locked) not in (list, tuple): | |
| 5734 raise ValueError("bad type: 'locked'") | |
| 5735 s = set(locked).difference(ocgs) | |
| 5736 if s != set(): | |
| 5737 raise ValueError("bad OCGs in 'locked': %s" % s) | |
| 5738 | |
| 5739 if rbgroups: | |
| 5740 if type(rbgroups) not in (list, tuple): | |
| 5741 raise ValueError("bad type: 'rbgroups'") | |
| 5742 for x in rbgroups: | |
| 5743 if not type(x) in (list, tuple): | |
| 5744 raise ValueError("bad RBGroup '%s'" % x) | |
| 5745 s = set(x).difference(ocgs) | |
| 5746 if s != set(): | |
| 5747 raise ValueError("bad OCGs in RBGroup: %s" % s) | |
| 5748 | |
| 5749 if basestate: | |
| 5750 basestate = str(basestate).upper() | |
| 5751 if basestate == "UNCHANGED": | |
| 5752 basestate = "Unchanged" | |
| 5753 if basestate not in ("ON", "OFF", "Unchanged"): | |
| 5754 raise ValueError("bad 'basestate'") | |
| 5755 pdf = _as_pdf_document(self) | |
| 5756 ocp = mupdf.pdf_dict_getl( | |
| 5757 mupdf.pdf_trailer( pdf), | |
| 5758 PDF_NAME('Root'), | |
| 5759 PDF_NAME('OCProperties'), | |
| 5760 ) | |
| 5761 if not ocp.m_internal: | |
| 5762 return | |
| 5763 if config == -1: | |
| 5764 obj = mupdf.pdf_dict_get( ocp, PDF_NAME('D')) | |
| 5765 else: | |
| 5766 obj = mupdf.pdf_array_get( | |
| 5767 mupdf.pdf_dict_get( ocp, PDF_NAME('Configs')), | |
| 5768 config, | |
| 5769 ) | |
| 5770 if not obj.m_internal: | |
| 5771 raise ValueError( MSG_BAD_OC_CONFIG) | |
| 5772 JM_set_ocg_arrays( obj, basestate, on, off, rbgroups, locked) | |
| 5773 mupdf.ll_pdf_read_ocg( pdf.m_internal) | |
| 5774 | |
| 5775 def set_layer_ui_config(self, number, action=0): | |
| 5776 """Set / unset OC intent configuration.""" | |
| 5777 # The user might have given the name instead of sequence number, | |
| 5778 # so select by that name and continue with corresp. number | |
| 5779 if isinstance(number, str): | |
| 5780 select = [ui["number"] for ui in self.layer_ui_configs() if ui["text"] == number] | |
| 5781 if select == []: | |
| 5782 raise ValueError(f"bad OCG '{number}'.") | |
| 5783 number = select[0] # this is the number for the name | |
| 5784 pdf = _as_pdf_document(self) | |
| 5785 if action == 1: | |
| 5786 mupdf.pdf_toggle_layer_config_ui(pdf, number) | |
| 5787 elif action == 2: | |
| 5788 mupdf.pdf_deselect_layer_config_ui(pdf, number) | |
| 5789 else: | |
| 5790 mupdf.pdf_select_layer_config_ui(pdf, number) | |
| 5791 | |
| 5792 def set_markinfo(self, markinfo: dict) -> bool: | |
| 5793 """Set the PDF MarkInfo values.""" | |
| 5794 xref = self.pdf_catalog() | |
| 5795 if xref == 0: | |
| 5796 raise ValueError("not a PDF") | |
| 5797 if not markinfo or not isinstance(markinfo, dict): | |
| 5798 return False | |
| 5799 valid = {"Marked": False, "UserProperties": False, "Suspects": False} | |
| 5800 | |
| 5801 if not set(valid.keys()).issuperset(markinfo.keys()): | |
| 5802 badkeys = f"bad MarkInfo key(s): {set(markinfo.keys()).difference(valid.keys())}" | |
| 5803 raise ValueError(badkeys) | |
| 5804 pdfdict = "<<" | |
| 5805 valid.update(markinfo) | |
| 5806 for key, value in valid.items(): | |
| 5807 value=str(value).lower() | |
| 5808 if value not in ("true", "false"): | |
| 5809 raise ValueError(f"bad key value '{key}': '{value}'") | |
| 5810 pdfdict += f"/{key} {value}" | |
| 5811 pdfdict += ">>" | |
| 5812 self.xref_set_key(xref, "MarkInfo", pdfdict) | |
| 5813 return True | |
| 5814 | |
| 5815 def set_pagelayout(self, pagelayout: str): | |
| 5816 """Set the PDF PageLayout value.""" | |
| 5817 valid = ("SinglePage", "OneColumn", "TwoColumnLeft", "TwoColumnRight", "TwoPageLeft", "TwoPageRight") | |
| 5818 xref = self.pdf_catalog() | |
| 5819 if xref == 0: | |
| 5820 raise ValueError("not a PDF") | |
| 5821 if not pagelayout: | |
| 5822 raise ValueError("bad PageLayout value") | |
| 5823 if pagelayout[0] == "/": | |
| 5824 pagelayout = pagelayout[1:] | |
| 5825 for v in valid: | |
| 5826 if pagelayout.lower() == v.lower(): | |
| 5827 self.xref_set_key(xref, "PageLayout", f"/{v}") | |
| 5828 return True | |
| 5829 raise ValueError("bad PageLayout value") | |
| 5830 | |
| 5831 def set_pagemode(self, pagemode: str): | |
| 5832 """Set the PDF PageMode value.""" | |
| 5833 valid = ("UseNone", "UseOutlines", "UseThumbs", "FullScreen", "UseOC", "UseAttachments") | |
| 5834 xref = self.pdf_catalog() | |
| 5835 if xref == 0: | |
| 5836 raise ValueError("not a PDF") | |
| 5837 if not pagemode: | |
| 5838 raise ValueError("bad PageMode value") | |
| 5839 if pagemode[0] == "/": | |
| 5840 pagemode = pagemode[1:] | |
| 5841 for v in valid: | |
| 5842 if pagemode.lower() == v.lower(): | |
| 5843 self.xref_set_key(xref, "PageMode", f"/{v}") | |
| 5844 return True | |
| 5845 raise ValueError("bad PageMode value") | |
| 5846 | |
| 5847 def set_xml_metadata(self, metadata): | |
| 5848 """Store XML document level metadata.""" | |
| 5849 if self.is_closed or self.is_encrypted: | |
| 5850 raise ValueError("document closed or encrypted") | |
| 5851 pdf = _as_pdf_document(self) | |
| 5852 root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root')) | |
| 5853 if not root.m_internal: | |
| 5854 RAISEPY( MSG_BAD_PDFROOT, JM_Exc_FileDataError) | |
| 5855 res = mupdf.fz_new_buffer_from_copied_data( metadata.encode('utf-8')) | |
| 5856 xml = mupdf.pdf_dict_get( root, PDF_NAME('Metadata')) | |
| 5857 if xml.m_internal: | |
| 5858 JM_update_stream( pdf, xml, res, 0) | |
| 5859 else: | |
| 5860 xml = mupdf.pdf_add_stream( pdf, res, mupdf.PdfObj(), 0) | |
| 5861 mupdf.pdf_dict_put( xml, PDF_NAME('Type'), PDF_NAME('Metadata')) | |
| 5862 mupdf.pdf_dict_put( xml, PDF_NAME('Subtype'), PDF_NAME('XML')) | |
| 5863 mupdf.pdf_dict_put( root, PDF_NAME('Metadata'), xml) | |
| 5864 | |
| 5865 def switch_layer(self, config, as_default=0): | |
| 5866 """Activate an OC layer.""" | |
| 5867 pdf = _as_pdf_document(self) | |
| 5868 cfgs = mupdf.pdf_dict_getl( | |
| 5869 mupdf.pdf_trailer( pdf), | |
| 5870 PDF_NAME('Root'), | |
| 5871 PDF_NAME('OCProperties'), | |
| 5872 PDF_NAME('Configs') | |
| 5873 ) | |
| 5874 if not mupdf.pdf_is_array( cfgs) or not mupdf.pdf_array_len( cfgs): | |
| 5875 if config < 1: | |
| 5876 return | |
| 5877 raise ValueError( MSG_BAD_OC_LAYER) | |
| 5878 if config < 0: | |
| 5879 return | |
| 5880 mupdf.pdf_select_layer_config( pdf, config) | |
| 5881 if as_default: | |
| 5882 mupdf.pdf_set_layer_config_as_default( pdf) | |
| 5883 mupdf.ll_pdf_read_ocg( pdf.m_internal) | |
| 5884 | |
| 5885 def update_object(self, xref, text, page=None): | |
| 5886 """Replace object definition source.""" | |
| 5887 if self.is_closed or self.is_encrypted: | |
| 5888 raise ValueError("document closed or encrypted") | |
| 5889 pdf = _as_pdf_document(self) | |
| 5890 xreflen = mupdf.pdf_xref_len(pdf) | |
| 5891 if not _INRANGE(xref, 1, xreflen-1): | |
| 5892 RAISEPY("bad xref", MSG_BAD_XREF) | |
| 5893 ENSURE_OPERATION(pdf) | |
| 5894 # create new object with passed-in string | |
| 5895 new_obj = JM_pdf_obj_from_str(pdf, text) | |
| 5896 mupdf.pdf_update_object(pdf, xref, new_obj) | |
| 5897 if page: | |
| 5898 JM_refresh_links( _as_pdf_page(page)) | |
| 5899 | |
| 5900 def update_stream(self, xref=0, stream=None, new=1, compress=1): | |
| 5901 """Replace xref stream part.""" | |
| 5902 if self.is_closed or self.is_encrypted: | |
| 5903 raise ValueError("document closed or encrypted") | |
| 5904 pdf = _as_pdf_document(self) | |
| 5905 xreflen = mupdf.pdf_xref_len(pdf) | |
| 5906 if xref < 1 or xref > xreflen: | |
| 5907 raise ValueError( MSG_BAD_XREF) | |
| 5908 # get the object | |
| 5909 obj = mupdf.pdf_new_indirect(pdf, xref, 0) | |
| 5910 if not mupdf.pdf_is_dict(obj): | |
| 5911 raise ValueError( MSG_IS_NO_DICT) | |
| 5912 res = JM_BufferFromBytes(stream) | |
| 5913 if not res.m_internal: | |
| 5914 raise TypeError( MSG_BAD_BUFFER) | |
| 5915 JM_update_stream(pdf, obj, res, compress) | |
| 5916 pdf.dirty = 1 | |
| 5917 | |
| 5918 @property | |
| 5919 def version_count(self): | |
| 5920 ''' | |
| 5921 Count versions of PDF document. | |
| 5922 ''' | |
| 5923 pdf = _as_pdf_document(self, required=0) | |
| 5924 if pdf.m_internal: | |
| 5925 return mupdf.pdf_count_versions(pdf) | |
| 5926 return 0 | |
| 5927 | |
| 5928 def write( | |
| 5929 self, | |
| 5930 garbage=False, | |
| 5931 clean=False, | |
| 5932 deflate=False, | |
| 5933 deflate_images=False, | |
| 5934 deflate_fonts=False, | |
| 5935 incremental=False, | |
| 5936 ascii=False, | |
| 5937 expand=False, | |
| 5938 linear=False, | |
| 5939 no_new_id=False, | |
| 5940 appearance=False, | |
| 5941 pretty=False, | |
| 5942 encryption=1, | |
| 5943 permissions=4095, | |
| 5944 owner_pw=None, | |
| 5945 user_pw=None, | |
| 5946 preserve_metadata=1, | |
| 5947 use_objstms=0, | |
| 5948 compression_effort=0, | |
| 5949 ): | |
| 5950 from io import BytesIO | |
| 5951 bio = BytesIO() | |
| 5952 self.save( | |
| 5953 bio, | |
| 5954 garbage=garbage, | |
| 5955 clean=clean, | |
| 5956 no_new_id=no_new_id, | |
| 5957 appearance=appearance, | |
| 5958 deflate=deflate, | |
| 5959 deflate_images=deflate_images, | |
| 5960 deflate_fonts=deflate_fonts, | |
| 5961 incremental=incremental, | |
| 5962 ascii=ascii, | |
| 5963 expand=expand, | |
| 5964 linear=linear, | |
| 5965 pretty=pretty, | |
| 5966 encryption=encryption, | |
| 5967 permissions=permissions, | |
| 5968 owner_pw=owner_pw, | |
| 5969 user_pw=user_pw, | |
| 5970 preserve_metadata=preserve_metadata, | |
| 5971 use_objstms=use_objstms, | |
| 5972 compression_effort=compression_effort, | |
| 5973 ) | |
| 5974 return bio.getvalue() | |
| 5975 | |
| 5976 @property | |
| 5977 def xref(self): | |
| 5978 """PDF xref number of page.""" | |
| 5979 CheckParent(self) | |
| 5980 return self.parent.page_xref(self.number) | |
| 5981 | |
| 5982 def xref_get_key(self, xref, key): | |
| 5983 """Get PDF dict key value of object at 'xref'.""" | |
| 5984 pdf = _as_pdf_document(self) | |
| 5985 xreflen = mupdf.pdf_xref_len(pdf) | |
| 5986 if not _INRANGE(xref, 1, xreflen-1) and xref != -1: | |
| 5987 raise ValueError( MSG_BAD_XREF) | |
| 5988 if xref > 0: | |
| 5989 obj = mupdf.pdf_load_object(pdf, xref) | |
| 5990 else: | |
| 5991 obj = mupdf.pdf_trailer(pdf) | |
| 5992 if not obj.m_internal: | |
| 5993 return ("null", "null") | |
| 5994 subobj = mupdf.pdf_dict_getp(obj, key) | |
| 5995 if not subobj.m_internal: | |
| 5996 return ("null", "null") | |
| 5997 text = None | |
| 5998 if mupdf.pdf_is_indirect(subobj): | |
| 5999 type = "xref" | |
| 6000 text = "%i 0 R" % mupdf.pdf_to_num(subobj) | |
| 6001 elif mupdf.pdf_is_array(subobj): | |
| 6002 type = "array" | |
| 6003 elif mupdf.pdf_is_dict(subobj): | |
| 6004 type = "dict" | |
| 6005 elif mupdf.pdf_is_int(subobj): | |
| 6006 type = "int" | |
| 6007 text = "%i" % mupdf.pdf_to_int(subobj) | |
| 6008 elif mupdf.pdf_is_real(subobj): | |
| 6009 type = "float" | |
| 6010 elif mupdf.pdf_is_null(subobj): | |
| 6011 type = "null" | |
| 6012 text = "null" | |
| 6013 elif mupdf.pdf_is_bool(subobj): | |
| 6014 type = "bool" | |
| 6015 if mupdf.pdf_to_bool(subobj): | |
| 6016 text = "true" | |
| 6017 else: | |
| 6018 text = "false" | |
| 6019 elif mupdf.pdf_is_name(subobj): | |
| 6020 type = "name" | |
| 6021 text = "/%s" % mupdf.pdf_to_name(subobj) | |
| 6022 elif mupdf.pdf_is_string(subobj): | |
| 6023 type = "string" | |
| 6024 text = JM_UnicodeFromStr(mupdf.pdf_to_text_string(subobj)) | |
| 6025 else: | |
| 6026 type = "unknown" | |
| 6027 if text is None: | |
| 6028 res = JM_object_to_buffer(subobj, 1, 0) | |
| 6029 text = JM_UnicodeFromBuffer(res) | |
| 6030 return (type, text) | |
| 6031 | |
| 6032 def xref_get_keys(self, xref): | |
| 6033 """Get the keys of PDF dict object at 'xref'. Use -1 for the PDF trailer.""" | |
| 6034 pdf = _as_pdf_document(self) | |
| 6035 xreflen = mupdf.pdf_xref_len( pdf) | |
| 6036 if not _INRANGE(xref, 1, xreflen-1) and xref != -1: | |
| 6037 raise ValueError( MSG_BAD_XREF) | |
| 6038 if xref > 0: | |
| 6039 obj = mupdf.pdf_load_object( pdf, xref) | |
| 6040 else: | |
| 6041 obj = mupdf.pdf_trailer( pdf) | |
| 6042 n = mupdf.pdf_dict_len( obj) | |
| 6043 rc = [] | |
| 6044 if n == 0: | |
| 6045 return rc | |
| 6046 for i in range(n): | |
| 6047 key = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( obj, i)) | |
| 6048 rc.append(key) | |
| 6049 return rc | |
| 6050 | |
| 6051 def xref_is_font(self, xref): | |
| 6052 """Check if xref is a font object.""" | |
| 6053 if self.is_closed or self.is_encrypted: | |
| 6054 raise ValueError("document closed or encrypted") | |
| 6055 if self.xref_get_key(xref, "Type")[1] == "/Font": | |
| 6056 return True | |
| 6057 return False | |
| 6058 | |
| 6059 def xref_is_image(self, xref): | |
| 6060 """Check if xref is an image object.""" | |
| 6061 if self.is_closed or self.is_encrypted: | |
| 6062 raise ValueError("document closed or encrypted") | |
| 6063 if self.xref_get_key(xref, "Subtype")[1] == "/Image": | |
| 6064 return True | |
| 6065 return False | |
| 6066 | |
| 6067 def xref_is_stream(self, xref=0): | |
| 6068 """Check if xref is a stream object.""" | |
| 6069 pdf = _as_pdf_document(self, required=0) | |
| 6070 if not pdf.m_internal: | |
| 6071 return False # not a PDF | |
| 6072 return bool(mupdf.pdf_obj_num_is_stream(pdf, xref)) | |
| 6073 | |
| 6074 def xref_is_xobject(self, xref): | |
| 6075 """Check if xref is a form xobject.""" | |
| 6076 if self.is_closed or self.is_encrypted: | |
| 6077 raise ValueError("document closed or encrypted") | |
| 6078 if self.xref_get_key(xref, "Subtype")[1] == "/Form": | |
| 6079 return True | |
| 6080 return False | |
| 6081 | |
| 6082 def xref_length(self): | |
| 6083 """Get length of xref table.""" | |
| 6084 xreflen = 0 | |
| 6085 pdf = _as_pdf_document(self, required=0) | |
| 6086 if pdf.m_internal: | |
| 6087 xreflen = mupdf.pdf_xref_len(pdf) | |
| 6088 return xreflen | |
| 6089 | |
| 6090 def xref_object(self, xref, compressed=0, ascii=0): | |
| 6091 """Get xref object source as a string.""" | |
| 6092 if self.is_closed: | |
| 6093 raise ValueError("document closed") | |
| 6094 if g_use_extra: | |
| 6095 ret = extra.xref_object( self.this, xref, compressed, ascii) | |
| 6096 return ret | |
| 6097 pdf = _as_pdf_document(self) | |
| 6098 xreflen = mupdf.pdf_xref_len(pdf) | |
| 6099 if not _INRANGE(xref, 1, xreflen-1) and xref != -1: | |
| 6100 raise ValueError( MSG_BAD_XREF) | |
| 6101 if xref > 0: | |
| 6102 obj = mupdf.pdf_load_object(pdf, xref) | |
| 6103 else: | |
| 6104 obj = mupdf.pdf_trailer(pdf) | |
| 6105 res = JM_object_to_buffer(mupdf.pdf_resolve_indirect(obj), compressed, ascii) | |
| 6106 text = JM_EscapeStrFromBuffer(res) | |
| 6107 return text | |
| 6108 | |
| 6109 def xref_set_key(self, xref, key, value): | |
| 6110 """Set the value of a PDF dictionary key.""" | |
| 6111 if self.is_closed: | |
| 6112 raise ValueError("document closed") | |
| 6113 | |
| 6114 if not key or not isinstance(key, str) or INVALID_NAME_CHARS.intersection(key) not in (set(), {"/"}): | |
| 6115 raise ValueError("bad 'key'") | |
| 6116 if not isinstance(value, str) or not value or value[0] == "/" and INVALID_NAME_CHARS.intersection(value[1:]) != set(): | |
| 6117 raise ValueError("bad 'value'") | |
| 6118 | |
| 6119 pdf = _as_pdf_document(self) | |
| 6120 xreflen = mupdf.pdf_xref_len(pdf) | |
| 6121 #if not _INRANGE(xref, 1, xreflen-1) and xref != -1: | |
| 6122 # THROWMSG("bad xref") | |
| 6123 #if len(value) == 0: | |
| 6124 # THROWMSG("bad 'value'") | |
| 6125 #if len(key) == 0: | |
| 6126 # THROWMSG("bad 'key'") | |
| 6127 if not _INRANGE(xref, 1, xreflen-1) and xref != -1: | |
| 6128 raise ValueError( MSG_BAD_XREF) | |
| 6129 if xref != -1: | |
| 6130 obj = mupdf.pdf_load_object(pdf, xref) | |
| 6131 else: | |
| 6132 obj = mupdf.pdf_trailer(pdf) | |
| 6133 new_obj = JM_set_object_value(obj, key, value) | |
| 6134 if not new_obj.m_internal: | |
| 6135 return # did not work: skip update | |
| 6136 if xref != -1: | |
| 6137 mupdf.pdf_update_object(pdf, xref, new_obj) | |
| 6138 else: | |
| 6139 n = mupdf.pdf_dict_len(new_obj) | |
| 6140 for i in range(n): | |
| 6141 mupdf.pdf_dict_put( | |
| 6142 obj, | |
| 6143 mupdf.pdf_dict_get_key(new_obj, i), | |
| 6144 mupdf.pdf_dict_get_val(new_obj, i), | |
| 6145 ) | |
| 6146 | |
| 6147 def xref_stream(self, xref): | |
| 6148 """Get decompressed xref stream.""" | |
| 6149 if self.is_closed or self.is_encrypted: | |
| 6150 raise ValueError("document closed or encrypted") | |
| 6151 pdf = _as_pdf_document(self) | |
| 6152 xreflen = mupdf.pdf_xref_len( pdf) | |
| 6153 if not _INRANGE(xref, 1, xreflen-1) and xref != -1: | |
| 6154 raise ValueError( MSG_BAD_XREF) | |
| 6155 if xref >= 0: | |
| 6156 obj = mupdf.pdf_new_indirect( pdf, xref, 0) | |
| 6157 else: | |
| 6158 obj = mupdf.pdf_trailer( pdf) | |
| 6159 r = None | |
| 6160 if mupdf.pdf_is_stream( obj): | |
| 6161 res = mupdf.pdf_load_stream_number( pdf, xref) | |
| 6162 r = JM_BinFromBuffer( res) | |
| 6163 return r | |
| 6164 | |
| 6165 def xref_stream_raw(self, xref): | |
| 6166 """Get xref stream without decompression.""" | |
| 6167 if self.is_closed or self.is_encrypted: | |
| 6168 raise ValueError("document closed or encrypted") | |
| 6169 pdf = _as_pdf_document(self) | |
| 6170 xreflen = mupdf.pdf_xref_len( pdf) | |
| 6171 if not _INRANGE(xref, 1, xreflen-1) and xref != -1: | |
| 6172 raise ValueError( MSG_BAD_XREF) | |
| 6173 if xref >= 0: | |
| 6174 obj = mupdf.pdf_new_indirect( pdf, xref, 0) | |
| 6175 else: | |
| 6176 obj = mupdf.pdf_trailer( pdf) | |
| 6177 r = None | |
| 6178 if mupdf.pdf_is_stream( obj): | |
| 6179 res = mupdf.pdf_load_raw_stream_number( pdf, xref) | |
| 6180 r = JM_BinFromBuffer( res) | |
| 6181 return r | |
| 6182 | |
| 6183 def xref_xml_metadata(self): | |
| 6184 """Get xref of document XML metadata.""" | |
| 6185 pdf = _as_pdf_document(self) | |
| 6186 root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root')) | |
| 6187 if not root.m_internal: | |
| 6188 RAISEPY( MSG_BAD_PDFROOT, JM_Exc_FileDataError) | |
| 6189 xml = mupdf.pdf_dict_get( root, PDF_NAME('Metadata')) | |
| 6190 xref = 0 | |
| 6191 if xml.m_internal: | |
| 6192 xref = mupdf.pdf_to_num( xml) | |
| 6193 return xref | |
| 6194 | |
| 6195 __slots__ = ('this', 'page_count2', 'this_is_pdf', '__dict__') | |
| 6196 | |
| 6197 outline = property(lambda self: self._outline) | |
| 6198 tobytes = write | |
| 6199 is_stream = xref_is_stream | |
| 6200 | |
| 6201 open = Document | |
| 6202 | |
| 6203 | |
| 6204 class DocumentWriter: | |
| 6205 | |
| 6206 def __enter__(self): | |
| 6207 return self | |
| 6208 | |
| 6209 def __exit__(self, *args): | |
| 6210 self.close() | |
| 6211 | |
| 6212 def __init__(self, path, options=''): | |
| 6213 if isinstance( path, str): | |
| 6214 pass | |
| 6215 elif hasattr( path, 'absolute'): | |
| 6216 path = str( path) | |
| 6217 elif hasattr( path, 'name'): | |
| 6218 path = path.name | |
| 6219 if isinstance( path, str): | |
| 6220 self.this = mupdf.FzDocumentWriter( path, options, mupdf.FzDocumentWriter.PathType_PDF) | |
| 6221 else: | |
| 6222 # Need to keep the Python JM_new_output_fileptr_Output instance | |
| 6223 # alive for the lifetime of this DocumentWriter, otherwise calls | |
| 6224 # to virtual methods implemented in Python fail. So we make it a | |
| 6225 # member of this DocumentWriter. | |
| 6226 # | |
| 6227 # Unrelated to this, mupdf.FzDocumentWriter will set | |
| 6228 # self._out.m_internal to null because ownership is passed in. | |
| 6229 # | |
| 6230 out = JM_new_output_fileptr( path) | |
| 6231 self.this = mupdf.FzDocumentWriter( out, options, mupdf.FzDocumentWriter.OutputType_PDF) | |
| 6232 assert out.m_internal_value() == 0 | |
| 6233 assert hasattr( self.this, '_out') | |
| 6234 | |
| 6235 def begin_page( self, mediabox): | |
| 6236 mediabox2 = JM_rect_from_py(mediabox) | |
| 6237 device = mupdf.fz_begin_page( self.this, mediabox2) | |
| 6238 device_wrapper = DeviceWrapper( device) | |
| 6239 return device_wrapper | |
| 6240 | |
| 6241 def close( self): | |
| 6242 mupdf.fz_close_document_writer( self.this) | |
| 6243 | |
| 6244 def end_page( self): | |
| 6245 mupdf.fz_end_page( self.this) | |
| 6246 | |
| 6247 | |
| 6248 class Font: | |
| 6249 | |
| 6250 def __del__(self): | |
| 6251 if type(self) is not Font: | |
| 6252 return None | |
| 6253 | |
| 6254 def __init__( | |
| 6255 self, | |
| 6256 fontname=None, | |
| 6257 fontfile=None, | |
| 6258 fontbuffer=None, | |
| 6259 script=0, | |
| 6260 language=None, | |
| 6261 ordering=-1, | |
| 6262 is_bold=0, | |
| 6263 is_italic=0, | |
| 6264 is_serif=0, | |
| 6265 embed=1, | |
| 6266 ): | |
| 6267 | |
| 6268 if fontbuffer: | |
| 6269 if hasattr(fontbuffer, "getvalue"): | |
| 6270 fontbuffer = fontbuffer.getvalue() | |
| 6271 elif isinstance(fontbuffer, bytearray): | |
| 6272 fontbuffer = bytes(fontbuffer) | |
| 6273 if not isinstance(fontbuffer, bytes): | |
| 6274 raise ValueError("bad type: 'fontbuffer'") | |
| 6275 | |
| 6276 if isinstance(fontname, str): | |
| 6277 fname_lower = fontname.lower() | |
| 6278 if "/" in fname_lower or "\\" in fname_lower or "." in fname_lower: | |
| 6279 message("Warning: did you mean a fontfile?") | |
| 6280 | |
| 6281 if fname_lower in ("cjk", "china-t", "china-ts"): | |
| 6282 ordering = 0 | |
| 6283 | |
| 6284 elif fname_lower.startswith("china-s"): | |
| 6285 ordering = 1 | |
| 6286 elif fname_lower.startswith("korea"): | |
| 6287 ordering = 3 | |
| 6288 elif fname_lower.startswith("japan"): | |
| 6289 ordering = 2 | |
| 6290 elif fname_lower in fitz_fontdescriptors.keys(): | |
| 6291 import pymupdf_fonts # optional fonts | |
| 6292 fontbuffer = pymupdf_fonts.myfont(fname_lower) # make a copy | |
| 6293 fontname = None # ensure using fontbuffer only | |
| 6294 del pymupdf_fonts # remove package again | |
| 6295 | |
| 6296 elif ordering < 0: | |
| 6297 fontname = Base14_fontdict.get(fontname, fontname) | |
| 6298 | |
| 6299 lang = mupdf.fz_text_language_from_string(language) | |
| 6300 font = JM_get_font(fontname, fontfile, | |
| 6301 fontbuffer, script, lang, ordering, | |
| 6302 is_bold, is_italic, is_serif, embed) | |
| 6303 self.this = font | |
| 6304 | |
| 6305 def __repr__(self): | |
| 6306 return "Font('%s')" % self.name | |
| 6307 | |
| 6308 @property | |
| 6309 def ascender(self): | |
| 6310 """Return the glyph ascender value.""" | |
| 6311 return mupdf.fz_font_ascender(self.this) | |
| 6312 | |
| 6313 @property | |
| 6314 def bbox(self): | |
| 6315 return self.this.fz_font_bbox() | |
| 6316 | |
| 6317 @property | |
| 6318 def buffer(self): | |
| 6319 buffer_ = mupdf.FzBuffer( mupdf.ll_fz_keep_buffer( self.this.m_internal.buffer)) | |
| 6320 return mupdf.fz_buffer_extract_copy( buffer_) | |
| 6321 | |
| 6322 def char_lengths(self, text, fontsize=11, language=None, script=0, wmode=0, small_caps=0): | |
| 6323 """Return tuple of char lengths of unicode 'text' under a fontsize.""" | |
| 6324 lang = mupdf.fz_text_language_from_string(language) | |
| 6325 rc = [] | |
| 6326 for ch in text: | |
| 6327 c = ord(ch) | |
| 6328 if small_caps: | |
| 6329 gid = mupdf.fz_encode_character_sc(self.this, c) | |
| 6330 if gid >= 0: | |
| 6331 font = self.this | |
| 6332 else: | |
| 6333 gid, font = mupdf.fz_encode_character_with_fallback(self.this, c, script, lang) | |
| 6334 rc.append(fontsize * mupdf.fz_advance_glyph(font, gid, wmode)) | |
| 6335 return rc | |
| 6336 | |
| 6337 @property | |
| 6338 def descender(self): | |
| 6339 """Return the glyph descender value.""" | |
| 6340 return mupdf.fz_font_descender(self.this) | |
| 6341 | |
| 6342 @property | |
| 6343 def flags(self): | |
| 6344 f = mupdf.ll_fz_font_flags(self.this.m_internal) | |
| 6345 if not f: | |
| 6346 return | |
| 6347 assert isinstance( f, mupdf.fz_font_flags_t) | |
| 6348 #log( '{=f}') | |
| 6349 if mupdf_cppyy: | |
| 6350 # cppyy includes remaining higher bits. | |
| 6351 v = [f.is_mono] | |
| 6352 def b(bits): | |
| 6353 ret = v[0] & ((1 << bits)-1) | |
| 6354 v[0] = v[0] >> bits | |
| 6355 return ret | |
| 6356 is_mono = b(1) | |
| 6357 is_serif = b(1) | |
| 6358 is_bold = b(1) | |
| 6359 is_italic = b(1) | |
| 6360 ft_substitute = b(1) | |
| 6361 ft_stretch = b(1) | |
| 6362 fake_bold = b(1) | |
| 6363 fake_italic = b(1) | |
| 6364 has_opentype = b(1) | |
| 6365 invalid_bbox = b(1) | |
| 6366 cjk_lang = b(1) | |
| 6367 embed = b(1) | |
| 6368 never_embed = b(1) | |
| 6369 return { | |
| 6370 "mono": is_mono if mupdf_cppyy else f.is_mono, | |
| 6371 "serif": is_serif if mupdf_cppyy else f.is_serif, | |
| 6372 "bold": is_bold if mupdf_cppyy else f.is_bold, | |
| 6373 "italic": is_italic if mupdf_cppyy else f.is_italic, | |
| 6374 "substitute": ft_substitute if mupdf_cppyy else f.ft_substitute, | |
| 6375 "stretch": ft_stretch if mupdf_cppyy else f.ft_stretch, | |
| 6376 "fake-bold": fake_bold if mupdf_cppyy else f.fake_bold, | |
| 6377 "fake-italic": fake_italic if mupdf_cppyy else f.fake_italic, | |
| 6378 "opentype": has_opentype if mupdf_cppyy else f.has_opentype, | |
| 6379 "invalid-bbox": invalid_bbox if mupdf_cppyy else f.invalid_bbox, | |
| 6380 'cjk': cjk_lang if mupdf_cppyy else f.cjk, | |
| 6381 'cjk-lang': cjk_lang if mupdf_cppyy else f.cjk_lang, | |
| 6382 'embed': embed if mupdf_cppyy else f.embed, | |
| 6383 'never-embed': never_embed if mupdf_cppyy else f.never_embed, | |
| 6384 } | |
| 6385 | |
| 6386 def glyph_advance(self, chr_, language=None, script=0, wmode=0, small_caps=0): | |
| 6387 """Return the glyph width of a unicode (font size 1).""" | |
| 6388 lang = mupdf.fz_text_language_from_string(language) | |
| 6389 if small_caps: | |
| 6390 gid = mupdf.fz_encode_character_sc(self.this, chr_) | |
| 6391 if gid >= 0: | |
| 6392 font = self.this | |
| 6393 else: | |
| 6394 gid, font = mupdf.fz_encode_character_with_fallback(self.this, chr_, script, lang) | |
| 6395 return mupdf.fz_advance_glyph(font, gid, wmode) | |
| 6396 | |
| 6397 def glyph_bbox(self, char, language=None, script=0, small_caps=0): | |
| 6398 """Return the glyph bbox of a unicode (font size 1).""" | |
| 6399 lang = mupdf.fz_text_language_from_string(language) | |
| 6400 if small_caps: | |
| 6401 gid = mupdf.fz_encode_character_sc( self.this, char) | |
| 6402 if gid >= 0: | |
| 6403 font = self.this | |
| 6404 else: | |
| 6405 gid, font = mupdf.fz_encode_character_with_fallback( self.this, char, script, lang) | |
| 6406 return Rect(mupdf.fz_bound_glyph( font, gid, mupdf.FzMatrix())) | |
| 6407 | |
| 6408 @property | |
| 6409 def glyph_count(self): | |
| 6410 return self.this.m_internal.glyph_count | |
| 6411 | |
| 6412 def glyph_name_to_unicode(self, name): | |
| 6413 """Return the unicode for a glyph name.""" | |
| 6414 return glyph_name_to_unicode(name) | |
| 6415 | |
| 6416 def has_glyph(self, chr, language=None, script=0, fallback=0, small_caps=0): | |
| 6417 """Check whether font has a glyph for this unicode.""" | |
| 6418 if fallback: | |
| 6419 lang = mupdf.fz_text_language_from_string(language) | |
| 6420 gid, font = mupdf.fz_encode_character_with_fallback(self.this, chr, script, lang) | |
| 6421 else: | |
| 6422 if small_caps: | |
| 6423 gid = mupdf.fz_encode_character_sc(self.this, chr) | |
| 6424 else: | |
| 6425 gid = mupdf.fz_encode_character(self.this, chr) | |
| 6426 return gid | |
| 6427 | |
| 6428 @property | |
| 6429 def is_bold(self): | |
| 6430 return mupdf.fz_font_is_bold( self.this) | |
| 6431 | |
| 6432 @property | |
| 6433 def is_italic(self): | |
| 6434 return mupdf.fz_font_is_italic( self.this) | |
| 6435 | |
| 6436 @property | |
| 6437 def is_monospaced(self): | |
| 6438 return mupdf.fz_font_is_monospaced( self.this) | |
| 6439 | |
| 6440 @property | |
| 6441 def is_serif(self): | |
| 6442 return mupdf.fz_font_is_serif( self.this) | |
| 6443 | |
| 6444 @property | |
| 6445 def is_writable(self): | |
| 6446 return True # see pymupdf commit ef4056ee4da2 | |
| 6447 font = self.this | |
| 6448 flags = mupdf.ll_fz_font_flags(font.m_internal) | |
| 6449 if mupdf_cppyy: | |
| 6450 # cppyy doesn't handle bitfields correctly. | |
| 6451 import cppyy | |
| 6452 ft_substitute = cppyy.gbl.mupdf_mfz_font_flags_ft_substitute( flags) | |
| 6453 else: | |
| 6454 ft_substitute = flags.ft_substitute | |
| 6455 | |
| 6456 if ( mupdf.ll_fz_font_t3_procs(font.m_internal) | |
| 6457 or ft_substitute | |
| 6458 or not mupdf.pdf_font_writing_supported(font) | |
| 6459 ): | |
| 6460 return False | |
| 6461 return True | |
| 6462 | |
| 6463 @property | |
| 6464 def name(self): | |
| 6465 ret = mupdf.fz_font_name(self.this) | |
| 6466 #log( '{ret=}') | |
| 6467 return ret | |
| 6468 | |
| 6469 def text_length(self, text, fontsize=11, language=None, script=0, wmode=0, small_caps=0): | |
| 6470 """Return length of unicode 'text' under a fontsize.""" | |
| 6471 thisfont = self.this | |
| 6472 lang = mupdf.fz_text_language_from_string(language) | |
| 6473 rc = 0 | |
| 6474 if not isinstance(text, str): | |
| 6475 raise TypeError( MSG_BAD_TEXT) | |
| 6476 for ch in text: | |
| 6477 c = ord(ch) | |
| 6478 if small_caps: | |
| 6479 gid = mupdf.fz_encode_character_sc(thisfont, c) | |
| 6480 if gid >= 0: | |
| 6481 font = thisfont | |
| 6482 else: | |
| 6483 gid, font = mupdf.fz_encode_character_with_fallback(thisfont, c, script, lang) | |
| 6484 rc += mupdf.fz_advance_glyph(font, gid, wmode) | |
| 6485 rc *= fontsize | |
| 6486 return rc | |
| 6487 | |
| 6488 def unicode_to_glyph_name(self, ch): | |
| 6489 """Return the glyph name for a unicode.""" | |
| 6490 return unicode_to_glyph_name(ch) | |
| 6491 | |
| 6492 def valid_codepoints(self): | |
| 6493 ''' | |
| 6494 Returns sorted list of valid unicodes of a fz_font. | |
| 6495 ''' | |
| 6496 ucs_gids = mupdf.fz_enumerate_font_cmap2(self.this) | |
| 6497 ucss = [i.ucs for i in ucs_gids] | |
| 6498 ucss_unique = set(ucss) | |
| 6499 ucss_unique_sorted = sorted(ucss_unique) | |
| 6500 return ucss_unique_sorted | |
| 6501 | |
| 6502 | |
| 6503 class Graftmap: | |
| 6504 | |
| 6505 def __del__(self): | |
| 6506 if not type(self) is Graftmap: | |
| 6507 return | |
| 6508 self.thisown = False | |
| 6509 | |
| 6510 def __init__(self, doc): | |
| 6511 dst = _as_pdf_document(doc) | |
| 6512 map_ = mupdf.pdf_new_graft_map(dst) | |
| 6513 self.this = map_ | |
| 6514 self.thisown = True | |
| 6515 | |
| 6516 | |
| 6517 class Link: | |
| 6518 def __del__(self): | |
| 6519 self._erase() | |
| 6520 | |
| 6521 def __init__( self, this): | |
| 6522 assert isinstance( this, mupdf.FzLink) | |
| 6523 self.this = this | |
| 6524 | |
| 6525 def __repr__(self): | |
| 6526 CheckParent(self) | |
| 6527 return "link on " + str(self.parent) | |
| 6528 | |
| 6529 def __str__(self): | |
| 6530 CheckParent(self) | |
| 6531 return "link on " + str(self.parent) | |
| 6532 | |
| 6533 def _border(self, doc, xref): | |
| 6534 pdf = _as_pdf_document(doc, required=0) | |
| 6535 if not pdf.m_internal: | |
| 6536 return | |
| 6537 link_obj = mupdf.pdf_new_indirect(pdf, xref, 0) | |
| 6538 if not link_obj.m_internal: | |
| 6539 return | |
| 6540 b = JM_annot_border(link_obj) | |
| 6541 return b | |
| 6542 | |
| 6543 def _colors(self, doc, xref): | |
| 6544 pdf = _as_pdf_document(doc, required=0) | |
| 6545 if not pdf.m_internal: | |
| 6546 return | |
| 6547 link_obj = mupdf.pdf_new_indirect( pdf, xref, 0) | |
| 6548 if not link_obj.m_internal: | |
| 6549 raise ValueError( MSG_BAD_XREF) | |
| 6550 b = JM_annot_colors( link_obj) | |
| 6551 return b | |
| 6552 | |
| 6553 def _erase(self): | |
| 6554 self.parent = None | |
| 6555 self.thisown = False | |
| 6556 | |
| 6557 def _setBorder(self, border, doc, xref): | |
| 6558 pdf = _as_pdf_document(doc, required=0) | |
| 6559 if not pdf.m_internal: | |
| 6560 return | |
| 6561 link_obj = mupdf.pdf_new_indirect(pdf, xref, 0) | |
| 6562 if not link_obj.m_internal: | |
| 6563 return | |
| 6564 b = JM_annot_set_border(border, pdf, link_obj) | |
| 6565 return b | |
| 6566 | |
| 6567 @property | |
| 6568 def border(self): | |
| 6569 return self._border(self.parent.parent.this, self.xref) | |
| 6570 | |
| 6571 @property | |
| 6572 def colors(self): | |
| 6573 return self._colors(self.parent.parent.this, self.xref) | |
| 6574 | |
| 6575 @property | |
| 6576 def dest(self): | |
| 6577 """Create link destination details.""" | |
| 6578 if hasattr(self, "parent") and self.parent is None: | |
| 6579 raise ValueError("orphaned object: parent is None") | |
| 6580 if self.parent.parent.is_closed or self.parent.parent.is_encrypted: | |
| 6581 raise ValueError("document closed or encrypted") | |
| 6582 doc = self.parent.parent | |
| 6583 | |
| 6584 if self.is_external or self.uri.startswith("#"): | |
| 6585 uri = None | |
| 6586 else: | |
| 6587 uri = doc.resolve_link(self.uri) | |
| 6588 | |
| 6589 return linkDest(self, uri, doc) | |
| 6590 | |
| 6591 @property | |
| 6592 def flags(self)->int: | |
| 6593 CheckParent(self) | |
| 6594 doc = self.parent.parent | |
| 6595 if not doc.is_pdf: | |
| 6596 return 0 | |
| 6597 f = doc.xref_get_key(self.xref, "F") | |
| 6598 if f[1] != "null": | |
| 6599 return int(f[1]) | |
| 6600 return 0 | |
| 6601 | |
| 6602 @property | |
| 6603 def is_external(self): | |
| 6604 """Flag the link as external.""" | |
| 6605 CheckParent(self) | |
| 6606 if g_use_extra: | |
| 6607 return extra.Link_is_external( self.this) | |
| 6608 this_link = self.this | |
| 6609 if not this_link.m_internal or not this_link.m_internal.uri: | |
| 6610 return False | |
| 6611 return bool( mupdf.fz_is_external_link( this_link.m_internal.uri)) | |
| 6612 | |
| 6613 @property | |
| 6614 def next(self): | |
| 6615 """Next link.""" | |
| 6616 if not self.this.m_internal: | |
| 6617 return None | |
| 6618 CheckParent(self) | |
| 6619 if 0 and g_use_extra: | |
| 6620 val = extra.Link_next( self.this) | |
| 6621 else: | |
| 6622 val = self.this.next() | |
| 6623 if not val.m_internal: | |
| 6624 return None | |
| 6625 val = Link( val) | |
| 6626 if val: | |
| 6627 val.thisown = True | |
| 6628 val.parent = self.parent # copy owning page from prev link | |
| 6629 val.parent._annot_refs[id(val)] = val | |
| 6630 if self.xref > 0: # prev link has an xref | |
| 6631 link_xrefs = [x[0] for x in self.parent.annot_xrefs() if x[1] == mupdf.PDF_ANNOT_LINK] | |
| 6632 link_ids = [x[2] for x in self.parent.annot_xrefs() if x[1] == mupdf.PDF_ANNOT_LINK] | |
| 6633 idx = link_xrefs.index(self.xref) | |
| 6634 val.xref = link_xrefs[idx + 1] | |
| 6635 val.id = link_ids[idx + 1] | |
| 6636 else: | |
| 6637 val.xref = 0 | |
| 6638 val.id = "" | |
| 6639 return val | |
| 6640 | |
| 6641 @property | |
| 6642 def rect(self): | |
| 6643 """Rectangle ('hot area').""" | |
| 6644 CheckParent(self) | |
| 6645 # utils.py:getLinkDict() appears to expect exceptions from us, so we | |
| 6646 # ensure that we raise on error. | |
| 6647 if self.this is None or not self.this.m_internal: | |
| 6648 raise Exception( 'self.this.m_internal not available') | |
| 6649 val = JM_py_from_rect( self.this.rect()) | |
| 6650 val = Rect(val) | |
| 6651 return val | |
| 6652 | |
| 6653 def set_border(self, border=None, width=0, dashes=None, style=None): | |
| 6654 if type(border) is not dict: | |
| 6655 border = {"width": width, "style": style, "dashes": dashes} | |
| 6656 return self._setBorder(border, self.parent.parent.this, self.xref) | |
| 6657 | |
| 6658 def set_colors(self, colors=None, stroke=None, fill=None): | |
| 6659 """Set border colors.""" | |
| 6660 CheckParent(self) | |
| 6661 doc = self.parent.parent | |
| 6662 if type(colors) is not dict: | |
| 6663 colors = {"fill": fill, "stroke": stroke} | |
| 6664 fill = colors.get("fill") | |
| 6665 stroke = colors.get("stroke") | |
| 6666 if fill is not None: | |
| 6667 message("warning: links have no fill color") | |
| 6668 if stroke in ([], ()): | |
| 6669 doc.xref_set_key(self.xref, "C", "[]") | |
| 6670 return | |
| 6671 if hasattr(stroke, "__float__"): | |
| 6672 stroke = [float(stroke)] | |
| 6673 CheckColor(stroke) | |
| 6674 assert len(stroke) in (1, 3, 4) | |
| 6675 s = f"[{_format_g(stroke)}]" | |
| 6676 doc.xref_set_key(self.xref, "C", s) | |
| 6677 | |
| 6678 def set_flags(self, flags): | |
| 6679 CheckParent(self) | |
| 6680 doc = self.parent.parent | |
| 6681 if not doc.is_pdf: | |
| 6682 raise ValueError("is no PDF") | |
| 6683 if not type(flags) is int: | |
| 6684 raise ValueError("bad 'flags' value") | |
| 6685 doc.xref_set_key(self.xref, "F", str(flags)) | |
| 6686 return None | |
| 6687 | |
| 6688 @property | |
| 6689 def uri(self): | |
| 6690 """Uri string.""" | |
| 6691 #CheckParent(self) | |
| 6692 if g_use_extra: | |
| 6693 return extra.link_uri(self.this) | |
| 6694 this_link = self.this | |
| 6695 return this_link.m_internal.uri if this_link.m_internal else '' | |
| 6696 | |
| 6697 page = -1 | |
| 6698 | |
| 6699 | |
| 6700 class Matrix: | |
| 6701 | |
| 6702 def __abs__(self): | |
| 6703 return math.sqrt(sum([c*c for c in self])) | |
| 6704 | |
| 6705 def __add__(self, m): | |
| 6706 if hasattr(m, "__float__"): | |
| 6707 return Matrix(self.a + m, self.b + m, self.c + m, | |
| 6708 self.d + m, self.e + m, self.f + m) | |
| 6709 if len(m) != 6: | |
| 6710 raise ValueError("Matrix: bad seq len") | |
| 6711 return Matrix(self.a + m[0], self.b + m[1], self.c + m[2], | |
| 6712 self.d + m[3], self.e + m[4], self.f + m[5]) | |
| 6713 | |
| 6714 def __bool__(self): | |
| 6715 return not (max(self) == min(self) == 0) | |
| 6716 | |
| 6717 def __eq__(self, mat): | |
| 6718 if not hasattr(mat, "__len__"): | |
| 6719 return False | |
| 6720 return len(mat) == 6 and not (self - mat) | |
| 6721 | |
| 6722 def __getitem__(self, i): | |
| 6723 return (self.a, self.b, self.c, self.d, self.e, self.f)[i] | |
| 6724 | |
| 6725 def __init__(self, *args, a=None, b=None, c=None, d=None, e=None, f=None): | |
| 6726 """ | |
| 6727 Matrix() - all zeros | |
| 6728 Matrix(a, b, c, d, e, f) | |
| 6729 Matrix(zoom-x, zoom-y) - zoom | |
| 6730 Matrix(shear-x, shear-y, 1) - shear | |
| 6731 Matrix(degree) - rotate | |
| 6732 Matrix(Matrix) - new copy | |
| 6733 Matrix(sequence) - from 'sequence' | |
| 6734 Matrix(mupdf.FzMatrix) - from MuPDF class wrapper for fz_matrix. | |
| 6735 | |
| 6736 Explicit keyword args a, b, c, d, e, f override any earlier settings if | |
| 6737 not None. | |
| 6738 """ | |
| 6739 if not args: | |
| 6740 self.a = self.b = self.c = self.d = self.e = self.f = 0.0 | |
| 6741 elif len(args) > 6: | |
| 6742 raise ValueError("Matrix: bad seq len") | |
| 6743 elif len(args) == 6: # 6 numbers | |
| 6744 self.a, self.b, self.c, self.d, self.e, self.f = map(float, args) | |
| 6745 elif len(args) == 1: # either an angle or a sequ | |
| 6746 if isinstance(args[0], mupdf.FzMatrix): | |
| 6747 self.a = args[0].a | |
| 6748 self.b = args[0].b | |
| 6749 self.c = args[0].c | |
| 6750 self.d = args[0].d | |
| 6751 self.e = args[0].e | |
| 6752 self.f = args[0].f | |
| 6753 elif hasattr(args[0], "__float__"): | |
| 6754 theta = math.radians(args[0]) | |
| 6755 c_ = round(math.cos(theta), 8) | |
| 6756 s_ = round(math.sin(theta), 8) | |
| 6757 self.a = self.d = c_ | |
| 6758 self.b = s_ | |
| 6759 self.c = -s_ | |
| 6760 self.e = self.f = 0.0 | |
| 6761 else: | |
| 6762 self.a, self.b, self.c, self.d, self.e, self.f = map(float, args[0]) | |
| 6763 elif len(args) == 2 or len(args) == 3 and args[2] == 0: | |
| 6764 self.a, self.b, self.c, self.d, self.e, self.f = float(args[0]), \ | |
| 6765 0.0, 0.0, float(args[1]), 0.0, 0.0 | |
| 6766 elif len(args) == 3 and args[2] == 1: | |
| 6767 self.a, self.b, self.c, self.d, self.e, self.f = 1.0, \ | |
| 6768 float(args[1]), float(args[0]), 1.0, 0.0, 0.0 | |
| 6769 else: | |
| 6770 raise ValueError("Matrix: bad args") | |
| 6771 | |
| 6772 # Override with explicit args if specified. | |
| 6773 if a is not None: self.a = a | |
| 6774 if b is not None: self.b = b | |
| 6775 if c is not None: self.c = c | |
| 6776 if d is not None: self.d = d | |
| 6777 if e is not None: self.e = e | |
| 6778 if f is not None: self.f = f | |
| 6779 | |
| 6780 def __invert__(self): | |
| 6781 """Calculate inverted matrix.""" | |
| 6782 m1 = Matrix() | |
| 6783 m1.invert(self) | |
| 6784 return m1 | |
| 6785 | |
| 6786 def __len__(self): | |
| 6787 return 6 | |
| 6788 | |
| 6789 def __mul__(self, m): | |
| 6790 if hasattr(m, "__float__"): | |
| 6791 return Matrix(self.a * m, self.b * m, self.c * m, | |
| 6792 self.d * m, self.e * m, self.f * m) | |
| 6793 m1 = Matrix(1,1) | |
| 6794 return m1.concat(self, m) | |
| 6795 | |
| 6796 def __neg__(self): | |
| 6797 return Matrix(-self.a, -self.b, -self.c, -self.d, -self.e, -self.f) | |
| 6798 | |
| 6799 def __nonzero__(self): | |
| 6800 return not (max(self) == min(self) == 0) | |
| 6801 | |
| 6802 def __pos__(self): | |
| 6803 return Matrix(self) | |
| 6804 | |
| 6805 def __repr__(self): | |
| 6806 return "Matrix" + str(tuple(self)) | |
| 6807 | |
| 6808 def __setitem__(self, i, v): | |
| 6809 v = float(v) | |
| 6810 if i == 0: self.a = v | |
| 6811 elif i == 1: self.b = v | |
| 6812 elif i == 2: self.c = v | |
| 6813 elif i == 3: self.d = v | |
| 6814 elif i == 4: self.e = v | |
| 6815 elif i == 5: self.f = v | |
| 6816 else: | |
| 6817 raise IndexError("index out of range") | |
| 6818 return | |
| 6819 | |
| 6820 def __sub__(self, m): | |
| 6821 if hasattr(m, "__float__"): | |
| 6822 return Matrix(self.a - m, self.b - m, self.c - m, | |
| 6823 self.d - m, self.e - m, self.f - m) | |
| 6824 if len(m) != 6: | |
| 6825 raise ValueError("Matrix: bad seq len") | |
| 6826 return Matrix(self.a - m[0], self.b - m[1], self.c - m[2], | |
| 6827 self.d - m[3], self.e - m[4], self.f - m[5]) | |
| 6828 | |
| 6829 def __truediv__(self, m): | |
| 6830 if hasattr(m, "__float__"): | |
| 6831 return Matrix(self.a * 1./m, self.b * 1./m, self.c * 1./m, | |
| 6832 self.d * 1./m, self.e * 1./m, self.f * 1./m) | |
| 6833 m1 = util_invert_matrix(m)[1] | |
| 6834 if not m1: | |
| 6835 raise ZeroDivisionError("matrix not invertible") | |
| 6836 m2 = Matrix(1,1) | |
| 6837 return m2.concat(self, m1) | |
| 6838 | |
| 6839 def concat(self, one, two): | |
| 6840 """Multiply two matrices and replace current one.""" | |
| 6841 if not len(one) == len(two) == 6: | |
| 6842 raise ValueError("Matrix: bad seq len") | |
| 6843 self.a, self.b, self.c, self.d, self.e, self.f = util_concat_matrix(one, two) | |
| 6844 return self | |
| 6845 | |
| 6846 def invert(self, src=None): | |
| 6847 """Calculate the inverted matrix. Return 0 if successful and replace | |
| 6848 current one. Else return 1 and do nothing. | |
| 6849 """ | |
| 6850 if src is None: | |
| 6851 dst = util_invert_matrix(self) | |
| 6852 else: | |
| 6853 dst = util_invert_matrix(src) | |
| 6854 if dst[0] == 1: | |
| 6855 return 1 | |
| 6856 self.a, self.b, self.c, self.d, self.e, self.f = dst[1] | |
| 6857 return 0 | |
| 6858 | |
| 6859 @property | |
| 6860 def is_rectilinear(self): | |
| 6861 """True if rectangles are mapped to rectangles.""" | |
| 6862 return (abs(self.b) < EPSILON and abs(self.c) < EPSILON) or \ | |
| 6863 (abs(self.a) < EPSILON and abs(self.d) < EPSILON) | |
| 6864 | |
| 6865 def prerotate(self, theta): | |
| 6866 """Calculate pre rotation and replace current matrix.""" | |
| 6867 theta = float(theta) | |
| 6868 while theta < 0: theta += 360 | |
| 6869 while theta >= 360: theta -= 360 | |
| 6870 if abs(0 - theta) < EPSILON: | |
| 6871 pass | |
| 6872 | |
| 6873 elif abs(90.0 - theta) < EPSILON: | |
| 6874 a = self.a | |
| 6875 b = self.b | |
| 6876 self.a = self.c | |
| 6877 self.b = self.d | |
| 6878 self.c = -a | |
| 6879 self.d = -b | |
| 6880 | |
| 6881 elif abs(180.0 - theta) < EPSILON: | |
| 6882 self.a = -self.a | |
| 6883 self.b = -self.b | |
| 6884 self.c = -self.c | |
| 6885 self.d = -self.d | |
| 6886 | |
| 6887 elif abs(270.0 - theta) < EPSILON: | |
| 6888 a = self.a | |
| 6889 b = self.b | |
| 6890 self.a = -self.c | |
| 6891 self.b = -self.d | |
| 6892 self.c = a | |
| 6893 self.d = b | |
| 6894 | |
| 6895 else: | |
| 6896 rad = math.radians(theta) | |
| 6897 s = math.sin(rad) | |
| 6898 c = math.cos(rad) | |
| 6899 a = self.a | |
| 6900 b = self.b | |
| 6901 self.a = c * a + s * self.c | |
| 6902 self.b = c * b + s * self.d | |
| 6903 self.c =-s * a + c * self.c | |
| 6904 self.d =-s * b + c * self.d | |
| 6905 | |
| 6906 return self | |
| 6907 | |
| 6908 def prescale(self, sx, sy): | |
| 6909 """Calculate pre scaling and replace current matrix.""" | |
| 6910 sx = float(sx) | |
| 6911 sy = float(sy) | |
| 6912 self.a *= sx | |
| 6913 self.b *= sx | |
| 6914 self.c *= sy | |
| 6915 self.d *= sy | |
| 6916 return self | |
| 6917 | |
| 6918 def preshear(self, h, v): | |
| 6919 """Calculate pre shearing and replace current matrix.""" | |
| 6920 h = float(h) | |
| 6921 v = float(v) | |
| 6922 a, b = self.a, self.b | |
| 6923 self.a += v * self.c | |
| 6924 self.b += v * self.d | |
| 6925 self.c += h * a | |
| 6926 self.d += h * b | |
| 6927 return self | |
| 6928 | |
| 6929 def pretranslate(self, tx, ty): | |
| 6930 """Calculate pre translation and replace current matrix.""" | |
| 6931 tx = float(tx) | |
| 6932 ty = float(ty) | |
| 6933 self.e += tx * self.a + ty * self.c | |
| 6934 self.f += tx * self.b + ty * self.d | |
| 6935 return self | |
| 6936 | |
| 6937 __inv__ = __invert__ | |
| 6938 __div__ = __truediv__ | |
| 6939 norm = __abs__ | |
| 6940 | |
| 6941 | |
| 6942 class IdentityMatrix(Matrix): | |
| 6943 """Identity matrix [1, 0, 0, 1, 0, 0]""" | |
| 6944 | |
| 6945 def __hash__(self): | |
| 6946 return hash((1,0,0,1,0,0)) | |
| 6947 | |
| 6948 def __init__(self): | |
| 6949 Matrix.__init__(self, 1.0, 1.0) | |
| 6950 | |
| 6951 def __repr__(self): | |
| 6952 return "IdentityMatrix(1.0, 0.0, 0.0, 1.0, 0.0, 0.0)" | |
| 6953 | |
| 6954 def __setattr__(self, name, value): | |
| 6955 if name in "ad": | |
| 6956 self.__dict__[name] = 1.0 | |
| 6957 elif name in "bcef": | |
| 6958 self.__dict__[name] = 0.0 | |
| 6959 else: | |
| 6960 self.__dict__[name] = value | |
| 6961 | |
| 6962 def checkargs(*args): | |
| 6963 raise NotImplementedError("Identity is readonly") | |
| 6964 | |
| 6965 Identity = IdentityMatrix() | |
| 6966 | |
| 6967 | |
| 6968 class linkDest: | |
| 6969 """link or outline destination details""" | |
| 6970 | |
| 6971 def __init__(self, obj, rlink, document=None): | |
| 6972 isExt = obj.is_external | |
| 6973 isInt = not isExt | |
| 6974 self.dest = "" | |
| 6975 self.file_spec = "" | |
| 6976 self.flags = 0 | |
| 6977 self.is_map = False | |
| 6978 self.is_uri = False | |
| 6979 self.kind = LINK_NONE | |
| 6980 self.lt = Point(0, 0) | |
| 6981 self.named = dict() | |
| 6982 self.new_window = "" | |
| 6983 self.page = obj.page | |
| 6984 self.rb = Point(0, 0) | |
| 6985 self.uri = obj.uri | |
| 6986 | |
| 6987 def uri_to_dict(uri): | |
| 6988 items = self.uri[1:].split('&') | |
| 6989 ret = dict() | |
| 6990 for item in items: | |
| 6991 eq = item.find('=') | |
| 6992 if eq >= 0: | |
| 6993 ret[item[:eq]] = item[eq+1:] | |
| 6994 else: | |
| 6995 ret[item] = None | |
| 6996 return ret | |
| 6997 | |
| 6998 def unescape(name): | |
| 6999 """Unescape '%AB' substrings to chr(0xAB).""" | |
| 7000 split = name.replace("%%", "%25") # take care of escaped '%' | |
| 7001 split = split.split("%") | |
| 7002 newname = split[0] | |
| 7003 for item in split[1:]: | |
| 7004 piece = item[:2] | |
| 7005 newname += chr(int(piece, base=16)) | |
| 7006 newname += item[2:] | |
| 7007 return newname | |
| 7008 | |
| 7009 if rlink and not self.uri.startswith("#"): | |
| 7010 self.uri = f"#page={rlink[0] + 1}&zoom=0,{_format_g(rlink[1])},{_format_g(rlink[2])}" | |
| 7011 if obj.is_external: | |
| 7012 self.page = -1 | |
| 7013 self.kind = LINK_URI | |
| 7014 if not self.uri: | |
| 7015 self.page = -1 | |
| 7016 self.kind = LINK_NONE | |
| 7017 if isInt and self.uri: | |
| 7018 self.uri = self.uri.replace("&zoom=nan", "&zoom=0") | |
| 7019 if self.uri.startswith("#"): | |
| 7020 self.kind = LINK_GOTO | |
| 7021 m = re.match('^#page=([0-9]+)&zoom=([0-9.]+),(-?[0-9.]+),(-?[0-9.]+)$', self.uri) | |
| 7022 if m: | |
| 7023 self.page = int(m.group(1)) - 1 | |
| 7024 self.lt = Point(float((m.group(3))), float(m.group(4))) | |
| 7025 self.flags = self.flags | LINK_FLAG_L_VALID | LINK_FLAG_T_VALID | |
| 7026 else: | |
| 7027 m = re.match('^#page=([0-9]+)$', self.uri) | |
| 7028 if m: | |
| 7029 self.page = int(m.group(1)) - 1 | |
| 7030 else: | |
| 7031 self.kind = LINK_NAMED | |
| 7032 m = re.match('^#nameddest=(.*)', self.uri) | |
| 7033 assert document | |
| 7034 if document and m: | |
| 7035 named = unescape(m.group(1)) | |
| 7036 self.named = document.resolve_names().get(named) | |
| 7037 if self.named is None: | |
| 7038 # document.resolve_names() does not contain an | |
| 7039 # entry for `named` so use an empty dict. | |
| 7040 self.named = dict() | |
| 7041 self.named['nameddest'] = named | |
| 7042 else: | |
| 7043 self.named = uri_to_dict(self.uri[1:]) | |
| 7044 else: | |
| 7045 self.kind = LINK_NAMED | |
| 7046 self.named = uri_to_dict(self.uri) | |
| 7047 if obj.is_external: | |
| 7048 if not self.uri: | |
| 7049 pass | |
| 7050 elif self.uri.startswith("file:"): | |
| 7051 self.file_spec = self.uri[5:] | |
| 7052 if self.file_spec.startswith("//"): | |
| 7053 self.file_spec = self.file_spec[2:] | |
| 7054 self.is_uri = False | |
| 7055 self.uri = "" | |
| 7056 self.kind = LINK_LAUNCH | |
| 7057 ftab = self.file_spec.split("#") | |
| 7058 if len(ftab) == 2: | |
| 7059 if ftab[1].startswith("page="): | |
| 7060 self.kind = LINK_GOTOR | |
| 7061 self.file_spec = ftab[0] | |
| 7062 self.page = int(ftab[1].split("&")[0][5:]) - 1 | |
| 7063 elif ":" in self.uri: | |
| 7064 self.is_uri = True | |
| 7065 self.kind = LINK_URI | |
| 7066 else: | |
| 7067 self.is_uri = True | |
| 7068 self.kind = LINK_LAUNCH | |
| 7069 assert isinstance(self.named, dict) | |
| 7070 | |
| 7071 class Widget: | |
| 7072 ''' | |
| 7073 Class describing a PDF form field ("widget") | |
| 7074 ''' | |
| 7075 | |
| 7076 def __init__(self): | |
| 7077 self.border_color = None | |
| 7078 self.border_style = "S" | |
| 7079 self.border_width = 0 | |
| 7080 self.border_dashes = None | |
| 7081 self.choice_values = None # choice fields only | |
| 7082 self.rb_parent = None # radio buttons only: xref of owning parent | |
| 7083 | |
| 7084 self.field_name = None # field name | |
| 7085 self.field_label = None # field label | |
| 7086 self.field_value = None | |
| 7087 self.field_flags = 0 | |
| 7088 self.field_display = 0 | |
| 7089 self.field_type = 0 # valid range 1 through 7 | |
| 7090 self.field_type_string = None # field type as string | |
| 7091 | |
| 7092 self.fill_color = None | |
| 7093 self.button_caption = None # button caption | |
| 7094 self.is_signed = None # True / False if signature | |
| 7095 self.text_color = (0, 0, 0) | |
| 7096 self.text_font = "Helv" | |
| 7097 self.text_fontsize = 0 | |
| 7098 self.text_maxlen = 0 # text fields only | |
| 7099 self.text_format = 0 # text fields only | |
| 7100 self._text_da = "" # /DA = default appearance | |
| 7101 | |
| 7102 self.script = None # JavaScript (/A) | |
| 7103 self.script_stroke = None # JavaScript (/AA/K) | |
| 7104 self.script_format = None # JavaScript (/AA/F) | |
| 7105 self.script_change = None # JavaScript (/AA/V) | |
| 7106 self.script_calc = None # JavaScript (/AA/C) | |
| 7107 self.script_blur = None # JavaScript (/AA/Bl) | |
| 7108 self.script_focus = None # JavaScript (/AA/Fo) codespell:ignore | |
| 7109 | |
| 7110 self.rect = None # annot value | |
| 7111 self.xref = 0 # annot value | |
| 7112 | |
| 7113 def __repr__(self): | |
| 7114 #return "'%s' widget on %s" % (self.field_type_string, str(self.parent)) | |
| 7115 # No self.parent. | |
| 7116 return f'Widget:(field_type={self.field_type_string} script={self.script})' | |
| 7117 return "'%s' widget" % (self.field_type_string) | |
| 7118 | |
| 7119 def _adjust_font(self): | |
| 7120 """Ensure text_font is from our list and correctly spelled. | |
| 7121 """ | |
| 7122 if not self.text_font: | |
| 7123 self.text_font = "Helv" | |
| 7124 return | |
| 7125 valid_fonts = ("Cour", "TiRo", "Helv", "ZaDb") | |
| 7126 for f in valid_fonts: | |
| 7127 if self.text_font.lower() == f.lower(): | |
| 7128 self.text_font = f | |
| 7129 return | |
| 7130 self.text_font = "Helv" | |
| 7131 return | |
| 7132 | |
| 7133 def _checker(self): | |
| 7134 """Any widget type checks. | |
| 7135 """ | |
| 7136 if self.field_type not in range(1, 8): | |
| 7137 raise ValueError("bad field type") | |
| 7138 | |
| 7139 # if setting a radio button to ON, first set Off all buttons | |
| 7140 # in the group - this is not done by MuPDF: | |
| 7141 if self.field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON and self.field_value not in (False, "Off") and hasattr(self, "parent"): | |
| 7142 # so we are about setting this button to ON/True | |
| 7143 # check other buttons in same group and set them to 'Off' | |
| 7144 doc = self.parent.parent | |
| 7145 kids_type, kids_value = doc.xref_get_key(self.xref, "Parent/Kids") | |
| 7146 if kids_type == "array": | |
| 7147 xrefs = tuple(map(int, kids_value[1:-1].replace("0 R","").split())) | |
| 7148 for xref in xrefs: | |
| 7149 if xref != self.xref: | |
| 7150 doc.xref_set_key(xref, "AS", "/Off") | |
| 7151 # the calling method will now set the intended button to on and | |
| 7152 # will find everything prepared for correct functioning. | |
| 7153 | |
| 7154 def _parse_da(self): | |
| 7155 """Extract font name, size and color from default appearance string (/DA object). | |
| 7156 | |
| 7157 Equivalent to 'pdf_parse_default_appearance' function in MuPDF's 'pdf-annot.c'. | |
| 7158 """ | |
| 7159 if not self._text_da: | |
| 7160 return | |
| 7161 font = "Helv" | |
| 7162 fsize = 0 | |
| 7163 col = (0, 0, 0) | |
| 7164 dat = self._text_da.split() # split on any whitespace | |
| 7165 for i, item in enumerate(dat): | |
| 7166 if item == "Tf": | |
| 7167 font = dat[i - 2][1:] | |
| 7168 fsize = float(dat[i - 1]) | |
| 7169 dat[i] = dat[i-1] = dat[i-2] = "" | |
| 7170 continue | |
| 7171 if item == "g": # unicolor text | |
| 7172 col = [(float(dat[i - 1]))] | |
| 7173 dat[i] = dat[i-1] = "" | |
| 7174 continue | |
| 7175 if item == "rg": # RGB colored text | |
| 7176 col = [float(f) for f in dat[i - 3:i]] | |
| 7177 dat[i] = dat[i-1] = dat[i-2] = dat[i-3] = "" | |
| 7178 continue | |
| 7179 self.text_font = font | |
| 7180 self.text_fontsize = fsize | |
| 7181 self.text_color = col | |
| 7182 self._text_da = "" | |
| 7183 return | |
| 7184 | |
| 7185 def _validate(self): | |
| 7186 """Validate the class entries. | |
| 7187 """ | |
| 7188 if (self.rect.is_infinite | |
| 7189 or self.rect.is_empty | |
| 7190 ): | |
| 7191 raise ValueError("bad rect") | |
| 7192 | |
| 7193 if not self.field_name: | |
| 7194 raise ValueError("field name missing") | |
| 7195 | |
| 7196 if self.field_label == "Unnamed": | |
| 7197 self.field_label = None | |
| 7198 CheckColor(self.border_color) | |
| 7199 CheckColor(self.fill_color) | |
| 7200 if not self.text_color: | |
| 7201 self.text_color = (0, 0, 0) | |
| 7202 CheckColor(self.text_color) | |
| 7203 | |
| 7204 if not self.border_width: | |
| 7205 self.border_width = 0 | |
| 7206 | |
| 7207 if not self.text_fontsize: | |
| 7208 self.text_fontsize = 0 | |
| 7209 | |
| 7210 self.border_style = self.border_style.upper()[0:1] | |
| 7211 | |
| 7212 # standardize content of JavaScript entries | |
| 7213 btn_type = self.field_type in ( | |
| 7214 mupdf.PDF_WIDGET_TYPE_BUTTON, | |
| 7215 mupdf.PDF_WIDGET_TYPE_CHECKBOX, | |
| 7216 mupdf.PDF_WIDGET_TYPE_RADIOBUTTON, | |
| 7217 ) | |
| 7218 if not self.script: | |
| 7219 self.script = None | |
| 7220 elif type(self.script) is not str: | |
| 7221 raise ValueError("script content must be a string") | |
| 7222 | |
| 7223 # buttons cannot have the following script actions | |
| 7224 if btn_type or not self.script_calc: | |
| 7225 self.script_calc = None | |
| 7226 elif type(self.script_calc) is not str: | |
| 7227 raise ValueError("script_calc content must be a string") | |
| 7228 | |
| 7229 if btn_type or not self.script_change: | |
| 7230 self.script_change = None | |
| 7231 elif type(self.script_change) is not str: | |
| 7232 raise ValueError("script_change content must be a string") | |
| 7233 | |
| 7234 if btn_type or not self.script_format: | |
| 7235 self.script_format = None | |
| 7236 elif type(self.script_format) is not str: | |
| 7237 raise ValueError("script_format content must be a string") | |
| 7238 | |
| 7239 if btn_type or not self.script_stroke: | |
| 7240 self.script_stroke = None | |
| 7241 elif type(self.script_stroke) is not str: | |
| 7242 raise ValueError("script_stroke content must be a string") | |
| 7243 | |
| 7244 if btn_type or not self.script_blur: | |
| 7245 self.script_blur = None | |
| 7246 elif type(self.script_blur) is not str: | |
| 7247 raise ValueError("script_blur content must be a string") | |
| 7248 | |
| 7249 if btn_type or not self.script_focus: | |
| 7250 self.script_focus = None | |
| 7251 elif type(self.script_focus) is not str: | |
| 7252 raise ValueError("script_focus content must be a string") | |
| 7253 | |
| 7254 self._checker() # any field_type specific checks | |
| 7255 | |
| 7256 def _sync_flags(self): | |
| 7257 """Propagate the field flags. | |
| 7258 | |
| 7259 If this widget has a "/Parent", set its field flags and that of all | |
| 7260 its /Kids widgets to the value of the current widget. | |
| 7261 Only possible for widgets existing in the PDF. | |
| 7262 | |
| 7263 Returns True or False. | |
| 7264 """ | |
| 7265 if not self.xref: | |
| 7266 return False # no xref: widget not in the PDF | |
| 7267 doc = self.parent.parent # the owning document | |
| 7268 assert doc | |
| 7269 pdf = _as_pdf_document(doc) | |
| 7270 # load underlying PDF object | |
| 7271 pdf_widget = mupdf.pdf_load_object(pdf, self.xref) | |
| 7272 Parent = mupdf.pdf_dict_get(pdf_widget, PDF_NAME("Parent")) | |
| 7273 if not Parent.pdf_is_dict(): | |
| 7274 return False # no /Parent: nothing to do | |
| 7275 | |
| 7276 # put the field flags value into the parent field flags: | |
| 7277 Parent.pdf_dict_put_int(PDF_NAME("Ff"), self.field_flags) | |
| 7278 | |
| 7279 # also put that value into all kids of the Parent | |
| 7280 kids = Parent.pdf_dict_get(PDF_NAME("Kids")) | |
| 7281 if not kids.pdf_is_array(): | |
| 7282 message("warning: malformed PDF, Parent has no Kids array") | |
| 7283 return False # no /Kids: should never happen! | |
| 7284 | |
| 7285 for i in range(kids.pdf_array_len()): # walk through all kids | |
| 7286 # access kid widget, and do some precautionary checks | |
| 7287 kid = kids.pdf_array_get(i) | |
| 7288 if not kid.pdf_is_dict(): | |
| 7289 continue | |
| 7290 xref = kid.pdf_to_num() # get xref of the kid | |
| 7291 if xref == self.xref: # skip self widget | |
| 7292 continue | |
| 7293 subtype = kid.pdf_dict_get(PDF_NAME("Subtype")) | |
| 7294 if not subtype.pdf_to_name() == "Widget": | |
| 7295 continue | |
| 7296 # put the field flags value into the kid field flags: | |
| 7297 kid.pdf_dict_put_int(PDF_NAME("Ff"), self.field_flags) | |
| 7298 | |
| 7299 return True # all done | |
| 7300 | |
| 7301 def button_states(self): | |
| 7302 """Return the on/off state names for button widgets. | |
| 7303 | |
| 7304 A button may have 'normal' or 'pressed down' appearances. While the 'Off' | |
| 7305 state is usually called like this, the 'On' state is often given a name | |
| 7306 relating to the functional context. | |
| 7307 """ | |
| 7308 if self.field_type not in (2, 5): | |
| 7309 return None # no button type | |
| 7310 if hasattr(self, "parent"): # field already exists on page | |
| 7311 doc = self.parent.parent | |
| 7312 else: | |
| 7313 return | |
| 7314 xref = self.xref | |
| 7315 states = {"normal": None, "down": None} | |
| 7316 APN = doc.xref_get_key(xref, "AP/N") | |
| 7317 if APN[0] == "dict": | |
| 7318 nstates = [] | |
| 7319 APN = APN[1][2:-2] | |
| 7320 apnt = APN.split("/")[1:] | |
| 7321 for x in apnt: | |
| 7322 nstates.append(x.split()[0]) | |
| 7323 states["normal"] = nstates | |
| 7324 if APN[0] == "xref": | |
| 7325 nstates = [] | |
| 7326 nxref = int(APN[1].split(" ")[0]) | |
| 7327 APN = doc.xref_object(nxref) | |
| 7328 apnt = APN.split("/")[1:] | |
| 7329 for x in apnt: | |
| 7330 nstates.append(x.split()[0]) | |
| 7331 states["normal"] = nstates | |
| 7332 APD = doc.xref_get_key(xref, "AP/D") | |
| 7333 if APD[0] == "dict": | |
| 7334 dstates = [] | |
| 7335 APD = APD[1][2:-2] | |
| 7336 apdt = APD.split("/")[1:] | |
| 7337 for x in apdt: | |
| 7338 dstates.append(x.split()[0]) | |
| 7339 states["down"] = dstates | |
| 7340 if APD[0] == "xref": | |
| 7341 dstates = [] | |
| 7342 dxref = int(APD[1].split(" ")[0]) | |
| 7343 APD = doc.xref_object(dxref) | |
| 7344 apdt = APD.split("/")[1:] | |
| 7345 for x in apdt: | |
| 7346 dstates.append(x.split()[0]) | |
| 7347 states["down"] = dstates | |
| 7348 return states | |
| 7349 | |
| 7350 @property | |
| 7351 def next(self): | |
| 7352 return self._annot.next | |
| 7353 | |
| 7354 def on_state(self): | |
| 7355 """Return the "On" value for button widgets. | |
| 7356 | |
| 7357 This is useful for radio buttons mainly. Checkboxes will always return | |
| 7358 "Yes". Radio buttons will return the string that is unequal to "Off" | |
| 7359 as returned by method button_states(). | |
| 7360 If the radio button is new / being created, it does not yet have an | |
| 7361 "On" value. In this case, a warning is shown and True is returned. | |
| 7362 """ | |
| 7363 if self.field_type not in (2, 5): | |
| 7364 return None # no checkbox or radio button | |
| 7365 bstate = self.button_states() | |
| 7366 if bstate is None: | |
| 7367 bstate = dict() | |
| 7368 for k in bstate.keys(): | |
| 7369 for v in bstate[k]: | |
| 7370 if v != "Off": | |
| 7371 return v | |
| 7372 message("warning: radio button has no 'On' value.") | |
| 7373 return True | |
| 7374 | |
| 7375 def reset(self): | |
| 7376 """Reset the field value to its default. | |
| 7377 """ | |
| 7378 TOOLS._reset_widget(self._annot) | |
| 7379 | |
| 7380 def update(self, sync_flags=False): | |
| 7381 """Reflect Python object in the PDF.""" | |
| 7382 self._validate() | |
| 7383 | |
| 7384 self._adjust_font() # ensure valid text_font name | |
| 7385 | |
| 7386 # now create the /DA string | |
| 7387 self._text_da = "" | |
| 7388 if len(self.text_color) == 3: | |
| 7389 fmt = "{:g} {:g} {:g} rg /{f:s} {s:g} Tf" + self._text_da | |
| 7390 elif len(self.text_color) == 1: | |
| 7391 fmt = "{:g} g /{f:s} {s:g} Tf" + self._text_da | |
| 7392 elif len(self.text_color) == 4: | |
| 7393 fmt = "{:g} {:g} {:g} {:g} k /{f:s} {s:g} Tf" + self._text_da | |
| 7394 self._text_da = fmt.format(*self.text_color, f=self.text_font, | |
| 7395 s=self.text_fontsize) | |
| 7396 # finally update the widget | |
| 7397 | |
| 7398 # if widget has a '/AA/C' script, make sure it is in the '/CO' | |
| 7399 # array of the '/AcroForm' dictionary. | |
| 7400 if self.script_calc: # there is a "calculation" script: | |
| 7401 # make sure we are in the /CO array | |
| 7402 util_ensure_widget_calc(self._annot) | |
| 7403 | |
| 7404 # finally update the widget | |
| 7405 TOOLS._save_widget(self._annot, self) | |
| 7406 self._text_da = "" | |
| 7407 if sync_flags: | |
| 7408 self._sync_flags() # propagate field flags to parent and kids | |
| 7409 | |
| 7410 | |
| 7411 from . import _extra | |
| 7412 | |
| 7413 | |
| 7414 class Outline: | |
| 7415 | |
| 7416 def __init__(self, ol): | |
| 7417 self.this = ol | |
| 7418 | |
| 7419 @property | |
| 7420 def dest(self): | |
| 7421 '''outline destination details''' | |
| 7422 return linkDest(self, None, None) | |
| 7423 | |
| 7424 def destination(self, document): | |
| 7425 ''' | |
| 7426 Like `dest` property but uses `document` to resolve destinations for | |
| 7427 kind=LINK_NAMED. | |
| 7428 ''' | |
| 7429 return linkDest(self, None, document) | |
| 7430 | |
| 7431 @property | |
| 7432 def down(self): | |
| 7433 ol = self.this | |
| 7434 down_ol = ol.down() | |
| 7435 if not down_ol.m_internal: | |
| 7436 return | |
| 7437 return Outline(down_ol) | |
| 7438 | |
| 7439 @property | |
| 7440 def is_external(self): | |
| 7441 if g_use_extra: | |
| 7442 # calling _extra.* here appears to save significant time in | |
| 7443 # test_toc.py:test_full_toc, 1.2s=>0.94s. | |
| 7444 # | |
| 7445 return _extra.Outline_is_external( self.this) | |
| 7446 ol = self.this | |
| 7447 if not ol.m_internal: | |
| 7448 return False | |
| 7449 uri = ol.m_internal.uri if 1 else ol.uri() | |
| 7450 if uri is None: | |
| 7451 return False | |
| 7452 return mupdf.fz_is_external_link(uri) | |
| 7453 | |
| 7454 @property | |
| 7455 def is_open(self): | |
| 7456 if 1: | |
| 7457 return self.this.m_internal.is_open | |
| 7458 return self.this.is_open() | |
| 7459 | |
| 7460 @property | |
| 7461 def next(self): | |
| 7462 ol = self.this | |
| 7463 next_ol = ol.next() | |
| 7464 if not next_ol.m_internal: | |
| 7465 return | |
| 7466 return Outline(next_ol) | |
| 7467 | |
| 7468 @property | |
| 7469 def page(self): | |
| 7470 if 1: | |
| 7471 return self.this.m_internal.page.page | |
| 7472 return self.this.page().page | |
| 7473 | |
| 7474 @property | |
| 7475 def title(self): | |
| 7476 return self.this.m_internal.title | |
| 7477 | |
| 7478 @property | |
| 7479 def uri(self): | |
| 7480 ol = self.this | |
| 7481 if not ol.m_internal: | |
| 7482 return None | |
| 7483 return ol.m_internal.uri | |
| 7484 | |
| 7485 @property | |
| 7486 def x(self): | |
| 7487 return self.this.m_internal.x | |
| 7488 | |
| 7489 @property | |
| 7490 def y(self): | |
| 7491 return self.this.m_internal.y | |
| 7492 | |
| 7493 __slots__ = [ 'this'] | |
| 7494 | |
| 7495 | |
| 7496 def _make_PdfFilterOptions( | |
| 7497 recurse=0, | |
| 7498 instance_forms=0, | |
| 7499 ascii=0, | |
| 7500 no_update=0, | |
| 7501 sanitize=0, | |
| 7502 sopts=None, | |
| 7503 ): | |
| 7504 ''' | |
| 7505 Returns a mupdf.PdfFilterOptions instance. | |
| 7506 ''' | |
| 7507 | |
| 7508 filter_ = mupdf.PdfFilterOptions() | |
| 7509 filter_.recurse = recurse | |
| 7510 filter_.instance_forms = instance_forms | |
| 7511 filter_.ascii = ascii | |
| 7512 | |
| 7513 filter_.no_update = no_update | |
| 7514 if sanitize: | |
| 7515 # We want to use a PdfFilterFactory whose `.filter` fn pointer is | |
| 7516 # set to MuPDF's `pdf_new_sanitize_filter()`. But not sure how to | |
| 7517 # get access to this raw fn in Python; and on Windows raw MuPDF | |
| 7518 # functions are not even available to C++. | |
| 7519 # | |
| 7520 # So we use SWIG Director to implement our own | |
| 7521 # PdfFilterFactory whose `filter()` method calls | |
| 7522 # `mupdf.ll_pdf_new_sanitize_filter()`. | |
| 7523 if sopts: | |
| 7524 assert isinstance(sopts, mupdf.PdfSanitizeFilterOptions) | |
| 7525 else: | |
| 7526 sopts = mupdf.PdfSanitizeFilterOptions() | |
| 7527 class Factory(mupdf.PdfFilterFactory2): | |
| 7528 def __init__(self): | |
| 7529 super().__init__() | |
| 7530 self.use_virtual_filter() | |
| 7531 self.sopts = sopts | |
| 7532 def filter(self, ctx, doc, chain, struct_parents, transform, options): | |
| 7533 if 0: | |
| 7534 log(f'sanitize filter.filter():') | |
| 7535 log(f' {self=}') | |
| 7536 log(f' {ctx=}') | |
| 7537 log(f' {doc=}') | |
| 7538 log(f' {chain=}') | |
| 7539 log(f' {struct_parents=}') | |
| 7540 log(f' {transform=}') | |
| 7541 log(f' {options=}') | |
| 7542 log(f' {self.sopts.internal()=}') | |
| 7543 return mupdf.ll_pdf_new_sanitize_filter( | |
| 7544 doc, | |
| 7545 chain, | |
| 7546 struct_parents, | |
| 7547 transform, | |
| 7548 options, | |
| 7549 self.sopts.internal(), | |
| 7550 ) | |
| 7551 | |
| 7552 factory = Factory() | |
| 7553 filter_.add_factory(factory.internal()) | |
| 7554 filter_._factory = factory | |
| 7555 return filter_ | |
| 7556 | |
| 7557 | |
| 7558 class Page: | |
| 7559 | |
| 7560 def __init__(self, page, document): | |
| 7561 assert isinstance(page, (mupdf.FzPage, mupdf.PdfPage)), f'page is: {page}' | |
| 7562 self.this = page | |
| 7563 self.thisown = True | |
| 7564 self.last_point = None | |
| 7565 self.draw_cont = '' | |
| 7566 self._annot_refs = dict() | |
| 7567 self.parent = document | |
| 7568 if page.m_internal: | |
| 7569 if isinstance( page, mupdf.PdfPage): | |
| 7570 self.number = page.m_internal.super.number | |
| 7571 else: | |
| 7572 self.number = page.m_internal.number | |
| 7573 else: | |
| 7574 self.number = None | |
| 7575 | |
| 7576 def __repr__(self): | |
| 7577 return self.__str__() | |
| 7578 CheckParent(self) | |
| 7579 x = self.parent.name | |
| 7580 if self.parent.stream is not None: | |
| 7581 x = "<memory, doc# %i>" % (self.parent._graft_id,) | |
| 7582 if x == "": | |
| 7583 x = "<new PDF, doc# %i>" % self.parent._graft_id | |
| 7584 return "page %s of %s" % (self.number, x) | |
| 7585 | |
| 7586 def __str__(self): | |
| 7587 #CheckParent(self) | |
| 7588 parent = getattr(self, 'parent', None) | |
| 7589 if isinstance(self.this.m_internal, mupdf.pdf_page): | |
| 7590 number = self.this.m_internal.super.number | |
| 7591 else: | |
| 7592 number = self.this.m_internal.number | |
| 7593 ret = f'page {number}' | |
| 7594 if parent: | |
| 7595 x = self.parent.name | |
| 7596 if self.parent.stream is not None: | |
| 7597 x = "<memory, doc# %i>" % (self.parent._graft_id,) | |
| 7598 if x == "": | |
| 7599 x = "<new PDF, doc# %i>" % self.parent._graft_id | |
| 7600 ret += f' of {x}' | |
| 7601 return ret | |
| 7602 | |
| 7603 def _add_caret_annot(self, point): | |
| 7604 if g_use_extra: | |
| 7605 annot = extra._add_caret_annot( self.this, JM_point_from_py(point)) | |
| 7606 else: | |
| 7607 page = self._pdf_page() | |
| 7608 annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_CARET) | |
| 7609 if point: | |
| 7610 p = JM_point_from_py(point) | |
| 7611 r = mupdf.pdf_annot_rect(annot) | |
| 7612 r = mupdf.FzRect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0) | |
| 7613 mupdf.pdf_set_annot_rect(annot, r) | |
| 7614 mupdf.pdf_update_annot(annot) | |
| 7615 JM_add_annot_id(annot, "A") | |
| 7616 return annot | |
| 7617 | |
| 7618 def _add_file_annot(self, point, buffer_, filename, ufilename=None, desc=None, icon=None): | |
| 7619 page = self._pdf_page() | |
| 7620 uf = ufilename if ufilename else filename | |
| 7621 d = desc if desc else filename | |
| 7622 p = JM_point_from_py(point) | |
| 7623 filebuf = JM_BufferFromBytes(buffer_) | |
| 7624 if not filebuf.m_internal: | |
| 7625 raise TypeError( MSG_BAD_BUFFER) | |
| 7626 annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_FILE_ATTACHMENT) | |
| 7627 r = mupdf.pdf_annot_rect(annot) | |
| 7628 r = mupdf.fz_make_rect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0) | |
| 7629 mupdf.pdf_set_annot_rect(annot, r) | |
| 7630 flags = mupdf.PDF_ANNOT_IS_PRINT | |
| 7631 mupdf.pdf_set_annot_flags(annot, flags) | |
| 7632 | |
| 7633 if icon: | |
| 7634 mupdf.pdf_set_annot_icon_name(annot, icon) | |
| 7635 | |
| 7636 val = JM_embed_file(page.doc(), filebuf, filename, uf, d, 1) | |
| 7637 mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME('FS'), val) | |
| 7638 mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('Contents'), filename) | |
| 7639 mupdf.pdf_update_annot(annot) | |
| 7640 mupdf.pdf_set_annot_rect(annot, r) | |
| 7641 mupdf.pdf_set_annot_flags(annot, flags) | |
| 7642 JM_add_annot_id(annot, "A") | |
| 7643 return Annot(annot) | |
| 7644 | |
| 7645 def _add_freetext_annot( | |
| 7646 self, rect, | |
| 7647 text, | |
| 7648 fontsize=11, | |
| 7649 fontname=None, | |
| 7650 text_color=None, | |
| 7651 fill_color=None, | |
| 7652 border_color=None, | |
| 7653 border_width=0, | |
| 7654 dashes=None, | |
| 7655 callout=None, | |
| 7656 line_end=mupdf.PDF_ANNOT_LE_OPEN_ARROW, | |
| 7657 opacity=1, | |
| 7658 align=0, | |
| 7659 rotate=0, | |
| 7660 richtext=False, | |
| 7661 style=None, | |
| 7662 ): | |
| 7663 rc = f"""<?xml version="1.0"?> | |
| 7664 <body xmlns="http://www.w3.org/1999/xtml" | |
| 7665 xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/" | |
| 7666 xfa:contentType="text/html" xfa:APIVersion="Acrobat:8.0.0" xfa:spec="2.4"> | |
| 7667 {text}""" | |
| 7668 page = self._pdf_page() | |
| 7669 if border_color and not richtext: | |
| 7670 raise ValueError("cannot set border_color if rich_text is False") | |
| 7671 if border_color and not text_color: | |
| 7672 text_color = border_color | |
| 7673 nfcol, fcol = JM_color_FromSequence(fill_color) | |
| 7674 ntcol, tcol = JM_color_FromSequence(text_color) | |
| 7675 r = JM_rect_from_py(rect) | |
| 7676 if mupdf.fz_is_infinite_rect(r) or mupdf.fz_is_empty_rect(r): | |
| 7677 raise ValueError( MSG_BAD_RECT) | |
| 7678 annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_FREE_TEXT) | |
| 7679 annot_obj = mupdf.pdf_annot_obj(annot) | |
| 7680 | |
| 7681 #insert text as 'contents' or 'RC' depending on 'richtext' | |
| 7682 if not richtext: | |
| 7683 mupdf.pdf_set_annot_contents(annot, text) | |
| 7684 else: | |
| 7685 mupdf.pdf_dict_put_text_string(annot_obj,PDF_NAME("RC"), rc) | |
| 7686 if style: | |
| 7687 mupdf.pdf_dict_put_text_string(annot_obj,PDF_NAME("DS"), style) | |
| 7688 | |
| 7689 mupdf.pdf_set_annot_rect(annot, r) | |
| 7690 | |
| 7691 while rotate < 0: | |
| 7692 rotate += 360 | |
| 7693 while rotate >= 360: | |
| 7694 rotate -= 360 | |
| 7695 if rotate != 0: | |
| 7696 mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('Rotate'), rotate) | |
| 7697 | |
| 7698 mupdf.pdf_set_annot_quadding(annot, align) | |
| 7699 | |
| 7700 if nfcol > 0: | |
| 7701 mupdf.pdf_set_annot_color(annot, fcol[:nfcol]) | |
| 7702 | |
| 7703 mupdf.pdf_set_annot_border_width(annot, border_width) | |
| 7704 mupdf.pdf_set_annot_opacity(annot, opacity) | |
| 7705 if dashes: | |
| 7706 for d in dashes: | |
| 7707 mupdf.pdf_add_annot_border_dash_item(annot, float(d)) | |
| 7708 | |
| 7709 # Insert callout information | |
| 7710 if callout: | |
| 7711 mupdf.pdf_dict_put(annot_obj, PDF_NAME("IT"), PDF_NAME("FreeTextCallout")) | |
| 7712 mupdf.pdf_set_annot_callout_style(annot, line_end) | |
| 7713 point_count = len(callout) | |
| 7714 extra.JM_set_annot_callout_line(annot, tuple(callout), point_count) | |
| 7715 | |
| 7716 # insert the default appearance string | |
| 7717 if not richtext: | |
| 7718 JM_make_annot_DA(annot, ntcol, tcol, fontname, fontsize) | |
| 7719 | |
| 7720 mupdf.pdf_update_annot(annot) | |
| 7721 JM_add_annot_id(annot, "A") | |
| 7722 val = Annot(annot) | |
| 7723 return val | |
| 7724 | |
| 7725 def _add_ink_annot(self, list): | |
| 7726 page = _as_pdf_page(self.this) | |
| 7727 if not PySequence_Check(list): | |
| 7728 raise ValueError( MSG_BAD_ARG_INK_ANNOT) | |
| 7729 ctm = mupdf.FzMatrix() | |
| 7730 mupdf.pdf_page_transform(page, mupdf.FzRect(0), ctm) | |
| 7731 inv_ctm = mupdf.fz_invert_matrix(ctm) | |
| 7732 annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_INK) | |
| 7733 annot_obj = mupdf.pdf_annot_obj(annot) | |
| 7734 n0 = len(list) | |
| 7735 inklist = mupdf.pdf_new_array(page.doc(), n0) | |
| 7736 | |
| 7737 for j in range(n0): | |
| 7738 sublist = list[j] | |
| 7739 n1 = len(sublist) | |
| 7740 stroke = mupdf.pdf_new_array(page.doc(), 2 * n1) | |
| 7741 | |
| 7742 for i in range(n1): | |
| 7743 p = sublist[i] | |
| 7744 if not PySequence_Check(p) or PySequence_Size(p) != 2: | |
| 7745 raise ValueError( MSG_BAD_ARG_INK_ANNOT) | |
| 7746 point = mupdf.fz_transform_point(JM_point_from_py(p), inv_ctm) | |
| 7747 mupdf.pdf_array_push_real(stroke, point.x) | |
| 7748 mupdf.pdf_array_push_real(stroke, point.y) | |
| 7749 | |
| 7750 mupdf.pdf_array_push(inklist, stroke) | |
| 7751 | |
| 7752 mupdf.pdf_dict_put(annot_obj, PDF_NAME('InkList'), inklist) | |
| 7753 mupdf.pdf_update_annot(annot) | |
| 7754 JM_add_annot_id(annot, "A") | |
| 7755 return Annot(annot) | |
| 7756 | |
| 7757 def _add_line_annot(self, p1, p2): | |
| 7758 page = self._pdf_page() | |
| 7759 annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_LINE) | |
| 7760 a = JM_point_from_py(p1) | |
| 7761 b = JM_point_from_py(p2) | |
| 7762 mupdf.pdf_set_annot_line(annot, a, b) | |
| 7763 mupdf.pdf_update_annot(annot) | |
| 7764 JM_add_annot_id(annot, "A") | |
| 7765 assert annot.m_internal | |
| 7766 return Annot(annot) | |
| 7767 | |
| 7768 def _add_multiline(self, points, annot_type): | |
| 7769 page = self._pdf_page() | |
| 7770 if len(points) < 2: | |
| 7771 raise ValueError( MSG_BAD_ARG_POINTS) | |
| 7772 annot = mupdf.pdf_create_annot(page, annot_type) | |
| 7773 for p in points: | |
| 7774 if (PySequence_Size(p) != 2): | |
| 7775 raise ValueError( MSG_BAD_ARG_POINTS) | |
| 7776 point = JM_point_from_py(p) | |
| 7777 mupdf.pdf_add_annot_vertex(annot, point) | |
| 7778 | |
| 7779 mupdf.pdf_update_annot(annot) | |
| 7780 JM_add_annot_id(annot, "A") | |
| 7781 return Annot(annot) | |
| 7782 | |
| 7783 def _add_redact_annot(self, quad, text=None, da_str=None, align=0, fill=None, text_color=None): | |
| 7784 page = self._pdf_page() | |
| 7785 fcol = [ 1, 1, 1, 0] | |
| 7786 nfcol = 0 | |
| 7787 annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_REDACT) | |
| 7788 q = JM_quad_from_py(quad) | |
| 7789 r = mupdf.fz_rect_from_quad(q) | |
| 7790 # TODO calculate de-rotated rect | |
| 7791 mupdf.pdf_set_annot_rect(annot, r) | |
| 7792 if fill: | |
| 7793 nfcol, fcol = JM_color_FromSequence(fill) | |
| 7794 arr = mupdf.pdf_new_array(page.doc(), nfcol) | |
| 7795 for i in range(nfcol): | |
| 7796 mupdf.pdf_array_push_real(arr, fcol[i]) | |
| 7797 mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME('IC'), arr) | |
| 7798 if text: | |
| 7799 assert da_str | |
| 7800 mupdf.pdf_dict_puts( | |
| 7801 mupdf.pdf_annot_obj(annot), | |
| 7802 "OverlayText", | |
| 7803 mupdf.pdf_new_text_string(text), | |
| 7804 ) | |
| 7805 mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('DA'), da_str) | |
| 7806 mupdf.pdf_dict_put_int(mupdf.pdf_annot_obj(annot), PDF_NAME('Q'), align) | |
| 7807 mupdf.pdf_update_annot(annot) | |
| 7808 JM_add_annot_id(annot, "A") | |
| 7809 annot = mupdf.ll_pdf_keep_annot(annot.m_internal) | |
| 7810 annot = mupdf.PdfAnnot( annot) | |
| 7811 return Annot(annot) | |
| 7812 | |
| 7813 def _add_square_or_circle(self, rect, annot_type): | |
| 7814 page = self._pdf_page() | |
| 7815 r = JM_rect_from_py(rect) | |
| 7816 if mupdf.fz_is_infinite_rect(r) or mupdf.fz_is_empty_rect(r): | |
| 7817 raise ValueError( MSG_BAD_RECT) | |
| 7818 annot = mupdf.pdf_create_annot(page, annot_type) | |
| 7819 mupdf.pdf_set_annot_rect(annot, r) | |
| 7820 mupdf.pdf_update_annot(annot) | |
| 7821 JM_add_annot_id(annot, "A") | |
| 7822 assert annot.m_internal | |
| 7823 return Annot(annot) | |
| 7824 | |
| 7825 def _add_stamp_annot(self, rect, stamp=0): | |
| 7826 rect = Rect(rect) | |
| 7827 r = JM_rect_from_py(rect) | |
| 7828 if mupdf.fz_is_infinite_rect(r) or mupdf.fz_is_empty_rect(r): | |
| 7829 raise ValueError(MSG_BAD_RECT) | |
| 7830 page = self._pdf_page() | |
| 7831 stamp_id = [ | |
| 7832 "Approved", | |
| 7833 "AsIs", | |
| 7834 "Confidential", | |
| 7835 "Departmental", | |
| 7836 "Experimental", | |
| 7837 "Expired", | |
| 7838 "Final", | |
| 7839 "ForComment", | |
| 7840 "ForPublicRelease", | |
| 7841 "NotApproved", | |
| 7842 "NotForPublicRelease", | |
| 7843 "Sold", | |
| 7844 "TopSecret", | |
| 7845 "Draft", | |
| 7846 ] | |
| 7847 n = len(stamp_id) | |
| 7848 buf = None | |
| 7849 name = None | |
| 7850 if stamp in range(n): | |
| 7851 name = stamp_id[stamp] | |
| 7852 elif isinstance(stamp, Pixmap): | |
| 7853 buf = stamp.tobytes() | |
| 7854 elif isinstance(stamp, str): | |
| 7855 buf = pathlib.Path(stamp).read_bytes() | |
| 7856 elif isinstance(stamp, (bytes, bytearray)): | |
| 7857 buf = stamp | |
| 7858 elif isinstance(stamp, io.BytesIO): | |
| 7859 buf = stamp.getvalue() | |
| 7860 else: | |
| 7861 name = stamp_id[0] | |
| 7862 | |
| 7863 annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_STAMP) | |
| 7864 if buf: # image stamp | |
| 7865 fzbuff = mupdf.fz_new_buffer_from_copied_data(buf) | |
| 7866 img = mupdf.fz_new_image_from_buffer(fzbuff) | |
| 7867 | |
| 7868 # compute image boundary box on page | |
| 7869 w, h = img.w(), img.h() | |
| 7870 scale = min(rect.width / w, rect.height / h) | |
| 7871 width = w * scale # bbox width | |
| 7872 height = h * scale # bbox height | |
| 7873 | |
| 7874 # center of "rect" | |
| 7875 center = (rect.tl + rect.br) / 2 | |
| 7876 x0 = center.x - width / 2 | |
| 7877 y0 = center.y - height / 2 | |
| 7878 x1 = x0 + width | |
| 7879 y1 = y0 + height | |
| 7880 r = mupdf.fz_make_rect(x0, y0, x1, y1) | |
| 7881 mupdf.pdf_set_annot_rect(annot, r) | |
| 7882 mupdf.pdf_set_annot_stamp_image(annot, img) | |
| 7883 mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME("Name"), mupdf.pdf_new_name("ImageStamp")) | |
| 7884 mupdf.pdf_set_annot_contents(annot, "Image Stamp") | |
| 7885 else: # text stamp | |
| 7886 mupdf.pdf_set_annot_rect(annot, r) | |
| 7887 mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME("Name"), PDF_NAME(name)) | |
| 7888 mupdf.pdf_set_annot_contents(annot, name) | |
| 7889 mupdf.pdf_update_annot(annot) | |
| 7890 JM_add_annot_id(annot, "A") | |
| 7891 return Annot(annot) | |
| 7892 | |
| 7893 def _add_text_annot(self, point, text, icon=None): | |
| 7894 page = self._pdf_page() | |
| 7895 p = JM_point_from_py( point) | |
| 7896 annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_TEXT) | |
| 7897 r = mupdf.pdf_annot_rect(annot) | |
| 7898 r = mupdf.fz_make_rect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0) | |
| 7899 mupdf.pdf_set_annot_rect(annot, r) | |
| 7900 mupdf.pdf_set_annot_contents(annot, text) | |
| 7901 if icon: | |
| 7902 mupdf.pdf_set_annot_icon_name(annot, icon) | |
| 7903 mupdf.pdf_update_annot(annot) | |
| 7904 JM_add_annot_id(annot, "A") | |
| 7905 return Annot(annot) | |
| 7906 | |
| 7907 def _add_text_marker(self, quads, annot_type): | |
| 7908 | |
| 7909 CheckParent(self) | |
| 7910 if not self.parent.is_pdf: | |
| 7911 raise ValueError("is no PDF") | |
| 7912 | |
| 7913 val = Page__add_text_marker(self, quads, annot_type) | |
| 7914 if not val: | |
| 7915 return None | |
| 7916 val.parent = weakref.proxy(self) | |
| 7917 self._annot_refs[id(val)] = val | |
| 7918 | |
| 7919 return val | |
| 7920 | |
| 7921 def _addAnnot_FromString(self, linklist): | |
| 7922 """Add links from list of object sources.""" | |
| 7923 CheckParent(self) | |
| 7924 if g_use_extra: | |
| 7925 self.__class__._addAnnot_FromString = extra.Page_addAnnot_FromString | |
| 7926 #log('Page._addAnnot_FromString() deferring to extra.Page_addAnnot_FromString().') | |
| 7927 return extra.Page_addAnnot_FromString( self.this, linklist) | |
| 7928 page = _as_pdf_page(self.this) | |
| 7929 lcount = len(linklist) # link count | |
| 7930 if lcount < 1: | |
| 7931 return | |
| 7932 i = -1 | |
| 7933 | |
| 7934 # insert links from the provided sources | |
| 7935 if not isinstance(linklist, tuple): | |
| 7936 raise ValueError( "bad 'linklist' argument") | |
| 7937 if not mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots')).m_internal: | |
| 7938 mupdf.pdf_dict_put_array( page.obj(), PDF_NAME('Annots'), lcount) | |
| 7939 annots = mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots')) | |
| 7940 assert annots.m_internal, f'{lcount=} {annots.m_internal=}' | |
| 7941 for i in range(lcount): | |
| 7942 txtpy = linklist[i] | |
| 7943 text = JM_StrAsChar(txtpy) | |
| 7944 if not text: | |
| 7945 message("skipping bad link / annot item %i.", i) | |
| 7946 continue | |
| 7947 try: | |
| 7948 annot = mupdf.pdf_add_object( page.doc(), JM_pdf_obj_from_str( page.doc(), text)) | |
| 7949 ind_obj = mupdf.pdf_new_indirect( page.doc(), mupdf.pdf_to_num( annot), 0) | |
| 7950 mupdf.pdf_array_push( annots, ind_obj) | |
| 7951 except Exception: | |
| 7952 if g_exceptions_verbose: exception_info() | |
| 7953 message("skipping bad link / annot item %i.\n" % i) | |
| 7954 | |
| 7955 def _addWidget(self, field_type, field_name): | |
| 7956 page = self._pdf_page() | |
| 7957 pdf = page.doc() | |
| 7958 annot = JM_create_widget(pdf, page, field_type, field_name) | |
| 7959 if not annot.m_internal: | |
| 7960 raise RuntimeError( "cannot create widget") | |
| 7961 JM_add_annot_id(annot, "W") | |
| 7962 return Annot(annot) | |
| 7963 | |
| 7964 def _apply_redactions(self, text, images, graphics): | |
| 7965 page = self._pdf_page() | |
| 7966 opts = mupdf.PdfRedactOptions() | |
| 7967 opts.black_boxes = 0 # no black boxes | |
| 7968 opts.text = text # how to treat text | |
| 7969 opts.image_method = images # how to treat images | |
| 7970 opts.line_art = graphics # how to treat vector graphics | |
| 7971 success = mupdf.pdf_redact_page(page.doc(), page, opts) | |
| 7972 return success | |
| 7973 | |
| 7974 def _erase(self): | |
| 7975 self._reset_annot_refs() | |
| 7976 try: | |
| 7977 self.parent._forget_page(self) | |
| 7978 except Exception: | |
| 7979 exception_info() | |
| 7980 pass | |
| 7981 self.parent = None | |
| 7982 self.thisown = False | |
| 7983 self.number = None | |
| 7984 self.this = None | |
| 7985 | |
| 7986 def _count_q_balance(self): | |
| 7987 """Count missing graphic state pushs and pops. | |
| 7988 | |
| 7989 Returns: | |
| 7990 A pair of integers (push, pop). Push is the number of missing | |
| 7991 PDF "q" commands, pop is the number of "Q" commands. | |
| 7992 A balanced graphics state for the page will be reached if its | |
| 7993 /Contents is prepended with 'push' copies of string "q\n" | |
| 7994 and appended with 'pop' copies of "\nQ". | |
| 7995 """ | |
| 7996 page = _as_pdf_page(self) # need the underlying PDF page | |
| 7997 res = mupdf.pdf_dict_get( # access /Resources | |
| 7998 page.obj(), | |
| 7999 mupdf.PDF_ENUM_NAME_Resources, | |
| 8000 ) | |
| 8001 cont = mupdf.pdf_dict_get( # access /Contents | |
| 8002 page.obj(), | |
| 8003 mupdf.PDF_ENUM_NAME_Contents, | |
| 8004 ) | |
| 8005 pdf = _as_pdf_document(self.parent) # need underlying PDF document | |
| 8006 | |
| 8007 # return value of MuPDF function | |
| 8008 return mupdf.pdf_count_q_balance_outparams_fn(pdf, res, cont) | |
| 8009 | |
| 8010 def _get_optional_content(self, oc: OptInt) -> OptStr: | |
| 8011 if oc is None or oc == 0: | |
| 8012 return None | |
| 8013 doc = self.parent | |
| 8014 check = doc.xref_object(oc, compressed=True) | |
| 8015 if not ("/Type/OCG" in check or "/Type/OCMD" in check): | |
| 8016 #log( 'raising "bad optional content"') | |
| 8017 raise ValueError("bad optional content: 'oc'") | |
| 8018 #log( 'Looking at self._get_resource_properties()') | |
| 8019 props = {} | |
| 8020 for p, x in self._get_resource_properties(): | |
| 8021 props[x] = p | |
| 8022 if oc in props.keys(): | |
| 8023 return props[oc] | |
| 8024 i = 0 | |
| 8025 mc = "MC%i" % i | |
| 8026 while mc in props.values(): | |
| 8027 i += 1 | |
| 8028 mc = "MC%i" % i | |
| 8029 self._set_resource_property(mc, oc) | |
| 8030 #log( 'returning {mc=}') | |
| 8031 return mc | |
| 8032 | |
| 8033 def _get_resource_properties(self): | |
| 8034 ''' | |
| 8035 page list Resource/Properties | |
| 8036 ''' | |
| 8037 page = self._pdf_page() | |
| 8038 rc = JM_get_resource_properties(page.obj()) | |
| 8039 return rc | |
| 8040 | |
| 8041 def _get_textpage(self, clip=None, flags=0, matrix=None): | |
| 8042 if g_use_extra: | |
| 8043 ll_tpage = extra.page_get_textpage(self.this, clip, flags, matrix) | |
| 8044 tpage = mupdf.FzStextPage(ll_tpage) | |
| 8045 return tpage | |
| 8046 page = self.this | |
| 8047 options = mupdf.FzStextOptions(flags) | |
| 8048 rect = JM_rect_from_py(clip) | |
| 8049 # Default to page's rect if `clip` not specified, for #2048. | |
| 8050 rect = mupdf.fz_bound_page(page) if clip is None else JM_rect_from_py(clip) | |
| 8051 ctm = JM_matrix_from_py(matrix) | |
| 8052 tpage = mupdf.FzStextPage(rect) | |
| 8053 dev = mupdf.fz_new_stext_device(tpage, options) | |
| 8054 if _globals.no_device_caching: | |
| 8055 mupdf.fz_enable_device_hints( dev, mupdf.FZ_NO_CACHE) | |
| 8056 if isinstance(page, mupdf.FzPage): | |
| 8057 pass | |
| 8058 elif isinstance(page, mupdf.PdfPage): | |
| 8059 page = page.super() | |
| 8060 else: | |
| 8061 assert 0, f'Unrecognised {type(page)=}' | |
| 8062 mupdf.fz_run_page(page, dev, ctm, mupdf.FzCookie()) | |
| 8063 mupdf.fz_close_device(dev) | |
| 8064 return tpage | |
| 8065 | |
| 8066 def _insert_image(self, | |
| 8067 filename=None, pixmap=None, stream=None, imask=None, clip=None, | |
| 8068 overlay=1, rotate=0, keep_proportion=1, oc=0, width=0, height=0, | |
| 8069 xref=0, alpha=-1, _imgname=None, digests=None | |
| 8070 ): | |
| 8071 maskbuf = mupdf.FzBuffer() | |
| 8072 page = self._pdf_page() | |
| 8073 # This will create an empty PdfDocument with a call to | |
| 8074 # pdf_new_document() then assign page.doc()'s return value to it (which | |
| 8075 # drop the original empty pdf_document). | |
| 8076 pdf = page.doc() | |
| 8077 w = width | |
| 8078 h = height | |
| 8079 img_xref = xref | |
| 8080 rc_digest = 0 | |
| 8081 | |
| 8082 do_process_pixmap = 1 | |
| 8083 do_process_stream = 1 | |
| 8084 do_have_imask = 1 | |
| 8085 do_have_image = 1 | |
| 8086 do_have_xref = 1 | |
| 8087 | |
| 8088 if xref > 0: | |
| 8089 ref = mupdf.pdf_new_indirect(pdf, xref, 0) | |
| 8090 w = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Width'), PDF_NAME('W'))) | |
| 8091 h = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Height'), PDF_NAME('H'))) | |
| 8092 if w + h == 0: | |
| 8093 raise ValueError( MSG_IS_NO_IMAGE) | |
| 8094 #goto have_xref() | |
| 8095 do_process_pixmap = 0 | |
| 8096 do_process_stream = 0 | |
| 8097 do_have_imask = 0 | |
| 8098 do_have_image = 0 | |
| 8099 | |
| 8100 else: | |
| 8101 if stream: | |
| 8102 imgbuf = JM_BufferFromBytes(stream) | |
| 8103 do_process_pixmap = 0 | |
| 8104 else: | |
| 8105 if filename: | |
| 8106 imgbuf = mupdf.fz_read_file(filename) | |
| 8107 #goto have_stream() | |
| 8108 do_process_pixmap = 0 | |
| 8109 | |
| 8110 if do_process_pixmap: | |
| 8111 #log( 'do_process_pixmap') | |
| 8112 # process pixmap --------------------------------- | |
| 8113 arg_pix = pixmap.this | |
| 8114 w = arg_pix.w() | |
| 8115 h = arg_pix.h() | |
| 8116 digest = mupdf.fz_md5_pixmap2(arg_pix) | |
| 8117 md5_py = digest | |
| 8118 temp = digests.get(md5_py, None) | |
| 8119 if temp is not None: | |
| 8120 img_xref = temp | |
| 8121 ref = mupdf.pdf_new_indirect(page.doc(), img_xref, 0) | |
| 8122 #goto have_xref() | |
| 8123 do_process_stream = 0 | |
| 8124 do_have_imask = 0 | |
| 8125 do_have_image = 0 | |
| 8126 else: | |
| 8127 if arg_pix.alpha() == 0: | |
| 8128 image = mupdf.fz_new_image_from_pixmap(arg_pix, mupdf.FzImage()) | |
| 8129 else: | |
| 8130 pm = mupdf.fz_convert_pixmap( | |
| 8131 arg_pix, | |
| 8132 mupdf.FzColorspace(), | |
| 8133 mupdf.FzColorspace(), | |
| 8134 mupdf.FzDefaultColorspaces(None), | |
| 8135 mupdf.FzColorParams(), | |
| 8136 1, | |
| 8137 ) | |
| 8138 pm.alpha = 0 | |
| 8139 pm.colorspace = None | |
| 8140 mask = mupdf.fz_new_image_from_pixmap(pm, mupdf.FzImage()) | |
| 8141 image = mupdf.fz_new_image_from_pixmap(arg_pix, mask) | |
| 8142 #goto have_image() | |
| 8143 do_process_stream = 0 | |
| 8144 do_have_imask = 0 | |
| 8145 | |
| 8146 if do_process_stream: | |
| 8147 #log( 'do_process_stream') | |
| 8148 # process stream --------------------------------- | |
| 8149 state = mupdf.FzMd5() | |
| 8150 if mupdf_cppyy: | |
| 8151 mupdf.fz_md5_update_buffer( state, imgbuf) | |
| 8152 else: | |
| 8153 mupdf.fz_md5_update(state, imgbuf.m_internal.data, imgbuf.m_internal.len) | |
| 8154 if imask: | |
| 8155 maskbuf = JM_BufferFromBytes(imask) | |
| 8156 if mupdf_cppyy: | |
| 8157 mupdf.fz_md5_update_buffer( state, maskbuf) | |
| 8158 else: | |
| 8159 mupdf.fz_md5_update(state, maskbuf.m_internal.data, maskbuf.m_internal.len) | |
| 8160 digest = mupdf.fz_md5_final2(state) | |
| 8161 md5_py = bytes(digest) | |
| 8162 temp = digests.get(md5_py, None) | |
| 8163 if temp is not None: | |
| 8164 img_xref = temp | |
| 8165 ref = mupdf.pdf_new_indirect(page.doc(), img_xref, 0) | |
| 8166 w = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Width'), PDF_NAME('W'))) | |
| 8167 h = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Height'), PDF_NAME('H'))) | |
| 8168 #goto have_xref() | |
| 8169 do_have_imask = 0 | |
| 8170 do_have_image = 0 | |
| 8171 else: | |
| 8172 image = mupdf.fz_new_image_from_buffer(imgbuf) | |
| 8173 w = image.w() | |
| 8174 h = image.h() | |
| 8175 if not imask: | |
| 8176 #goto have_image() | |
| 8177 do_have_imask = 0 | |
| 8178 | |
| 8179 if do_have_imask: | |
| 8180 # `fz_compressed_buffer` is reference counted and | |
| 8181 # `mupdf.fz_new_image_from_compressed_buffer2()` | |
| 8182 # is povided as a Swig-friendly wrapper for | |
| 8183 # `fz_new_image_from_compressed_buffer()`, so we can do things | |
| 8184 # straightfowardly. | |
| 8185 # | |
| 8186 cbuf1 = mupdf.fz_compressed_image_buffer( image) | |
| 8187 if not cbuf1.m_internal: | |
| 8188 raise ValueError( "uncompressed image cannot have mask") | |
| 8189 bpc = image.bpc() | |
| 8190 colorspace = image.colorspace() | |
| 8191 xres, yres = mupdf.fz_image_resolution(image) | |
| 8192 mask = mupdf.fz_new_image_from_buffer(maskbuf) | |
| 8193 image = mupdf.fz_new_image_from_compressed_buffer2( | |
| 8194 w, | |
| 8195 h, | |
| 8196 bpc, | |
| 8197 colorspace, | |
| 8198 xres, | |
| 8199 yres, | |
| 8200 1, # interpolate | |
| 8201 0, # imagemask, | |
| 8202 list(), # decode | |
| 8203 list(), # colorkey | |
| 8204 cbuf1, | |
| 8205 mask, | |
| 8206 ) | |
| 8207 | |
| 8208 if do_have_image: | |
| 8209 #log( 'do_have_image') | |
| 8210 ref = mupdf.pdf_add_image(pdf, image) | |
| 8211 if oc: | |
| 8212 JM_add_oc_object(pdf, ref, oc) | |
| 8213 img_xref = mupdf.pdf_to_num(ref) | |
| 8214 digests[md5_py] = img_xref | |
| 8215 rc_digest = 1 | |
| 8216 | |
| 8217 if do_have_xref: | |
| 8218 #log( 'do_have_xref') | |
| 8219 resources = mupdf.pdf_dict_get_inheritable(page.obj(), PDF_NAME('Resources')) | |
| 8220 if not resources.m_internal: | |
| 8221 resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME('Resources'), 2) | |
| 8222 xobject = mupdf.pdf_dict_get(resources, PDF_NAME('XObject')) | |
| 8223 if not xobject.m_internal: | |
| 8224 xobject = mupdf.pdf_dict_put_dict(resources, PDF_NAME('XObject'), 2) | |
| 8225 mat = calc_image_matrix(w, h, clip, rotate, keep_proportion) | |
| 8226 mupdf.pdf_dict_puts(xobject, _imgname, ref) | |
| 8227 nres = mupdf.fz_new_buffer(50) | |
| 8228 s = f"\nq\n{_format_g((mat.a, mat.b, mat.c, mat.d, mat.e, mat.f))} cm\n/{_imgname} Do\nQ\n" | |
| 8229 #s = s.replace('\n', '\r\n') | |
| 8230 mupdf.fz_append_string(nres, s) | |
| 8231 JM_insert_contents(pdf, page.obj(), nres, overlay) | |
| 8232 | |
| 8233 if rc_digest: | |
| 8234 return img_xref, digests | |
| 8235 else: | |
| 8236 return img_xref, None | |
| 8237 | |
| 8238 def _insertFont(self, fontname, bfname, fontfile, fontbuffer, set_simple, idx, wmode, serif, encoding, ordering): | |
| 8239 page = self._pdf_page() | |
| 8240 pdf = page.doc() | |
| 8241 | |
| 8242 value = JM_insert_font(pdf, bfname, fontfile,fontbuffer, set_simple, idx, wmode, serif, encoding, ordering) | |
| 8243 # get the objects /Resources, /Resources/Font | |
| 8244 resources = mupdf.pdf_dict_get_inheritable(page.obj(), PDF_NAME('Resources')) | |
| 8245 if not resources.pdf_is_dict(): | |
| 8246 resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME("Resources"), 5) | |
| 8247 fonts = mupdf.pdf_dict_get(resources, PDF_NAME('Font')) | |
| 8248 if not fonts.m_internal: # page has no fonts yet | |
| 8249 fonts = mupdf.pdf_new_dict(pdf, 5) | |
| 8250 mupdf.pdf_dict_putl(page.obj(), fonts, PDF_NAME('Resources'), PDF_NAME('Font')) | |
| 8251 # store font in resources and fonts objects will contain named reference to font | |
| 8252 _, xref = JM_INT_ITEM(value, 0) | |
| 8253 if not xref: | |
| 8254 raise RuntimeError( "cannot insert font") | |
| 8255 font_obj = mupdf.pdf_new_indirect(pdf, xref, 0) | |
| 8256 mupdf.pdf_dict_puts(fonts, fontname, font_obj) | |
| 8257 return value | |
| 8258 | |
| 8259 def _load_annot(self, name, xref): | |
| 8260 page = self._pdf_page() | |
| 8261 if xref == 0: | |
| 8262 annot = JM_get_annot_by_name(page, name) | |
| 8263 else: | |
| 8264 annot = JM_get_annot_by_xref(page, xref) | |
| 8265 if annot.m_internal: | |
| 8266 return Annot(annot) | |
| 8267 | |
| 8268 def _makePixmap(self, doc, ctm, cs, alpha=0, annots=1, clip=None): | |
| 8269 pix = JM_pixmap_from_page(doc, self.this, ctm, cs, alpha, annots, clip) | |
| 8270 return Pixmap(pix) | |
| 8271 | |
| 8272 def _other_box(self, boxtype): | |
| 8273 rect = mupdf.FzRect( mupdf.FzRect.Fixed_INFINITE) | |
| 8274 page = _as_pdf_page(self.this, required=False) | |
| 8275 if page.m_internal: | |
| 8276 obj = mupdf.pdf_dict_gets( page.obj(), boxtype) | |
| 8277 if mupdf.pdf_is_array(obj): | |
| 8278 rect = mupdf.pdf_to_rect(obj) | |
| 8279 if mupdf.fz_is_infinite_rect( rect): | |
| 8280 return | |
| 8281 return JM_py_from_rect(rect) | |
| 8282 | |
| 8283 def _pdf_page(self, required=True): | |
| 8284 return _as_pdf_page(self.this, required=required) | |
| 8285 | |
| 8286 def _reset_annot_refs(self): | |
| 8287 """Invalidate / delete all annots of this page.""" | |
| 8288 self._annot_refs.clear() | |
| 8289 | |
| 8290 def _set_opacity(self, gstate=None, CA=1, ca=1, blendmode=None): | |
| 8291 | |
| 8292 if CA >= 1 and ca >= 1 and blendmode is None: | |
| 8293 return | |
| 8294 tCA = int(round(max(CA , 0) * 100)) | |
| 8295 if tCA >= 100: | |
| 8296 tCA = 99 | |
| 8297 tca = int(round(max(ca, 0) * 100)) | |
| 8298 if tca >= 100: | |
| 8299 tca = 99 | |
| 8300 gstate = "fitzca%02i%02i" % (tCA, tca) | |
| 8301 | |
| 8302 if not gstate: | |
| 8303 return | |
| 8304 page = _as_pdf_page(self.this) | |
| 8305 resources = mupdf.pdf_dict_get(page.obj(), PDF_NAME('Resources')) | |
| 8306 if not resources.m_internal: | |
| 8307 resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME('Resources'), 2) | |
| 8308 extg = mupdf.pdf_dict_get(resources, PDF_NAME('ExtGState')) | |
| 8309 if not extg.m_internal: | |
| 8310 extg = mupdf.pdf_dict_put_dict(resources, PDF_NAME('ExtGState'), 2) | |
| 8311 n = mupdf.pdf_dict_len(extg) | |
| 8312 for i in range(n): | |
| 8313 o1 = mupdf.pdf_dict_get_key(extg, i) | |
| 8314 name = mupdf.pdf_to_name(o1) | |
| 8315 if name == gstate: | |
| 8316 return gstate | |
| 8317 opa = mupdf.pdf_new_dict(page.doc(), 3) | |
| 8318 mupdf.pdf_dict_put_real(opa, PDF_NAME('CA'), CA) | |
| 8319 mupdf.pdf_dict_put_real(opa, PDF_NAME('ca'), ca) | |
| 8320 mupdf.pdf_dict_puts(extg, gstate, opa) | |
| 8321 return gstate | |
| 8322 | |
| 8323 def _set_pagebox(self, boxtype, rect): | |
| 8324 doc = self.parent | |
| 8325 if doc is None: | |
| 8326 raise ValueError("orphaned object: parent is None") | |
| 8327 | |
| 8328 if not doc.is_pdf: | |
| 8329 raise ValueError("is no PDF") | |
| 8330 | |
| 8331 valid_boxes = ("CropBox", "BleedBox", "TrimBox", "ArtBox") | |
| 8332 | |
| 8333 if boxtype not in valid_boxes: | |
| 8334 raise ValueError("bad boxtype") | |
| 8335 | |
| 8336 rect = Rect(rect) | |
| 8337 mb = self.mediabox | |
| 8338 rect = Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1]) | |
| 8339 if not (mb.x0 <= rect.x0 < rect.x1 <= mb.x1 and mb.y0 <= rect.y0 < rect.y1 <= mb.y1): | |
| 8340 raise ValueError(f"{boxtype} not in MediaBox") | |
| 8341 | |
| 8342 doc.xref_set_key(self.xref, boxtype, f"[{_format_g(tuple(rect))}]") | |
| 8343 | |
| 8344 def _set_resource_property(self, name, xref): | |
| 8345 page = self._pdf_page() | |
| 8346 JM_set_resource_property(page.obj(), name, xref) | |
| 8347 | |
| 8348 def _show_pdf_page(self, fz_srcpage, overlay=1, matrix=None, xref=0, oc=0, clip=None, graftmap=None, _imgname=None): | |
| 8349 cropbox = JM_rect_from_py(clip) | |
| 8350 mat = JM_matrix_from_py(matrix) | |
| 8351 rc_xref = xref | |
| 8352 tpage = _as_pdf_page(self.this) | |
| 8353 tpageref = tpage.obj() | |
| 8354 pdfout = tpage.doc() # target PDF | |
| 8355 ENSURE_OPERATION(pdfout) | |
| 8356 #------------------------------------------------------------- | |
| 8357 # convert the source page to a Form XObject | |
| 8358 #------------------------------------------------------------- | |
| 8359 xobj1 = JM_xobject_from_page(pdfout, fz_srcpage, xref, graftmap.this) | |
| 8360 if not rc_xref: | |
| 8361 rc_xref = mupdf.pdf_to_num(xobj1) | |
| 8362 | |
| 8363 #------------------------------------------------------------- | |
| 8364 # create referencing XObject (controls display on target page) | |
| 8365 #------------------------------------------------------------- | |
| 8366 # fill reference to xobj1 into the /Resources | |
| 8367 #------------------------------------------------------------- | |
| 8368 subres1 = mupdf.pdf_new_dict(pdfout, 5) | |
| 8369 mupdf.pdf_dict_puts(subres1, "fullpage", xobj1) | |
| 8370 subres = mupdf.pdf_new_dict(pdfout, 5) | |
| 8371 mupdf.pdf_dict_put(subres, PDF_NAME('XObject'), subres1) | |
| 8372 | |
| 8373 res = mupdf.fz_new_buffer(20) | |
| 8374 mupdf.fz_append_string(res, "/fullpage Do") | |
| 8375 | |
| 8376 xobj2 = mupdf.pdf_new_xobject(pdfout, cropbox, mat, subres, res) | |
| 8377 if oc > 0: | |
| 8378 JM_add_oc_object(pdfout, mupdf.pdf_resolve_indirect(xobj2), oc) | |
| 8379 | |
| 8380 #------------------------------------------------------------- | |
| 8381 # update target page with xobj2: | |
| 8382 #------------------------------------------------------------- | |
| 8383 # 1. insert Xobject in Resources | |
| 8384 #------------------------------------------------------------- | |
| 8385 resources = mupdf.pdf_dict_get_inheritable(tpageref, PDF_NAME('Resources')) | |
| 8386 if not resources.m_internal: | |
| 8387 resources = mupdf.pdf_dict_put_dict(tpageref,PDF_NAME('Resources'), 5) | |
| 8388 subres = mupdf.pdf_dict_get(resources, PDF_NAME('XObject')) | |
| 8389 if not subres.m_internal: | |
| 8390 subres = mupdf.pdf_dict_put_dict(resources, PDF_NAME('XObject'), 5) | |
| 8391 | |
| 8392 mupdf.pdf_dict_puts(subres, _imgname, xobj2) | |
| 8393 | |
| 8394 #------------------------------------------------------------- | |
| 8395 # 2. make and insert new Contents object | |
| 8396 #------------------------------------------------------------- | |
| 8397 nres = mupdf.fz_new_buffer(50) # buffer for Do-command | |
| 8398 mupdf.fz_append_string(nres, " q /") # Do-command | |
| 8399 mupdf.fz_append_string(nres, _imgname) | |
| 8400 mupdf.fz_append_string(nres, " Do Q ") | |
| 8401 | |
| 8402 JM_insert_contents(pdfout, tpageref, nres, overlay) | |
| 8403 return rc_xref | |
| 8404 | |
| 8405 def add_caret_annot(self, point: point_like) -> Annot: | |
| 8406 """Add a 'Caret' annotation.""" | |
| 8407 old_rotation = annot_preprocess(self) | |
| 8408 try: | |
| 8409 annot = self._add_caret_annot(point) | |
| 8410 finally: | |
| 8411 if old_rotation != 0: | |
| 8412 self.set_rotation(old_rotation) | |
| 8413 annot = Annot( annot) | |
| 8414 annot_postprocess(self, annot) | |
| 8415 assert hasattr( annot, 'parent') | |
| 8416 return annot | |
| 8417 | |
| 8418 def add_circle_annot(self, rect: rect_like) -> Annot: | |
| 8419 """Add a 'Circle' (ellipse, oval) annotation.""" | |
| 8420 old_rotation = annot_preprocess(self) | |
| 8421 try: | |
| 8422 annot = self._add_square_or_circle(rect, mupdf.PDF_ANNOT_CIRCLE) | |
| 8423 finally: | |
| 8424 if old_rotation != 0: | |
| 8425 self.set_rotation(old_rotation) | |
| 8426 annot_postprocess(self, annot) | |
| 8427 return annot | |
| 8428 | |
| 8429 def add_file_annot( | |
| 8430 self, | |
| 8431 point: point_like, | |
| 8432 buffer_: ByteString, | |
| 8433 filename: str, | |
| 8434 ufilename: OptStr =None, | |
| 8435 desc: OptStr =None, | |
| 8436 icon: OptStr =None | |
| 8437 ) -> Annot: | |
| 8438 """Add a 'FileAttachment' annotation.""" | |
| 8439 old_rotation = annot_preprocess(self) | |
| 8440 try: | |
| 8441 annot = self._add_file_annot(point, | |
| 8442 buffer_, | |
| 8443 filename, | |
| 8444 ufilename=ufilename, | |
| 8445 desc=desc, | |
| 8446 icon=icon, | |
| 8447 ) | |
| 8448 finally: | |
| 8449 if old_rotation != 0: | |
| 8450 self.set_rotation(old_rotation) | |
| 8451 annot_postprocess(self, annot) | |
| 8452 return annot | |
| 8453 | |
| 8454 def add_freetext_annot( | |
| 8455 self, | |
| 8456 rect: rect_like, | |
| 8457 text: str, | |
| 8458 *, | |
| 8459 fontsize: float =11, | |
| 8460 fontname: OptStr =None, | |
| 8461 text_color: OptSeq =None, | |
| 8462 fill_color: OptSeq =None, | |
| 8463 border_color: OptSeq =None, | |
| 8464 border_width: float =0, | |
| 8465 dashes: OptSeq =None, | |
| 8466 callout: OptSeq =None, | |
| 8467 line_end: int=mupdf.PDF_ANNOT_LE_OPEN_ARROW, | |
| 8468 opacity: float =1, | |
| 8469 align: int =0, | |
| 8470 rotate: int =0, | |
| 8471 richtext=False, | |
| 8472 style=None, | |
| 8473 ) -> Annot: | |
| 8474 """Add a 'FreeText' annotation.""" | |
| 8475 | |
| 8476 old_rotation = annot_preprocess(self) | |
| 8477 try: | |
| 8478 annot = self._add_freetext_annot( | |
| 8479 rect, | |
| 8480 text, | |
| 8481 fontsize=fontsize, | |
| 8482 fontname=fontname, | |
| 8483 text_color=text_color, | |
| 8484 fill_color=fill_color, | |
| 8485 border_color=border_color, | |
| 8486 border_width=border_width, | |
| 8487 dashes=dashes, | |
| 8488 callout=callout, | |
| 8489 line_end=line_end, | |
| 8490 opacity=opacity, | |
| 8491 align=align, | |
| 8492 rotate=rotate, | |
| 8493 richtext=richtext, | |
| 8494 style=style, | |
| 8495 ) | |
| 8496 finally: | |
| 8497 if old_rotation != 0: | |
| 8498 self.set_rotation(old_rotation) | |
| 8499 annot_postprocess(self, annot) | |
| 8500 return annot | |
| 8501 | |
| 8502 def add_highlight_annot(self, quads=None, start=None, | |
| 8503 stop=None, clip=None) -> Annot: | |
| 8504 """Add a 'Highlight' annotation.""" | |
| 8505 if quads is None: | |
| 8506 q = get_highlight_selection(self, start=start, stop=stop, clip=clip) | |
| 8507 else: | |
| 8508 q = CheckMarkerArg(quads) | |
| 8509 ret = self._add_text_marker(q, mupdf.PDF_ANNOT_HIGHLIGHT) | |
| 8510 return ret | |
| 8511 | |
| 8512 def add_ink_annot(self, handwriting: list) -> Annot: | |
| 8513 """Add a 'Ink' ('handwriting') annotation. | |
| 8514 | |
| 8515 The argument must be a list of lists of point_likes. | |
| 8516 """ | |
| 8517 old_rotation = annot_preprocess(self) | |
| 8518 try: | |
| 8519 annot = self._add_ink_annot(handwriting) | |
| 8520 finally: | |
| 8521 if old_rotation != 0: | |
| 8522 self.set_rotation(old_rotation) | |
| 8523 annot_postprocess(self, annot) | |
| 8524 return annot | |
| 8525 | |
| 8526 def add_line_annot(self, p1: point_like, p2: point_like) -> Annot: | |
| 8527 """Add a 'Line' annotation.""" | |
| 8528 old_rotation = annot_preprocess(self) | |
| 8529 try: | |
| 8530 annot = self._add_line_annot(p1, p2) | |
| 8531 finally: | |
| 8532 if old_rotation != 0: | |
| 8533 self.set_rotation(old_rotation) | |
| 8534 annot_postprocess(self, annot) | |
| 8535 return annot | |
| 8536 | |
| 8537 def add_polygon_annot(self, points: list) -> Annot: | |
| 8538 """Add a 'Polygon' annotation.""" | |
| 8539 old_rotation = annot_preprocess(self) | |
| 8540 try: | |
| 8541 annot = self._add_multiline(points, mupdf.PDF_ANNOT_POLYGON) | |
| 8542 finally: | |
| 8543 if old_rotation != 0: | |
| 8544 self.set_rotation(old_rotation) | |
| 8545 annot_postprocess(self, annot) | |
| 8546 return annot | |
| 8547 | |
| 8548 def add_polyline_annot(self, points: list) -> Annot: | |
| 8549 """Add a 'PolyLine' annotation.""" | |
| 8550 old_rotation = annot_preprocess(self) | |
| 8551 try: | |
| 8552 annot = self._add_multiline(points, mupdf.PDF_ANNOT_POLY_LINE) | |
| 8553 finally: | |
| 8554 if old_rotation != 0: | |
| 8555 self.set_rotation(old_rotation) | |
| 8556 annot_postprocess(self, annot) | |
| 8557 return annot | |
| 8558 | |
| 8559 def add_rect_annot(self, rect: rect_like) -> Annot: | |
| 8560 """Add a 'Square' (rectangle) annotation.""" | |
| 8561 old_rotation = annot_preprocess(self) | |
| 8562 try: | |
| 8563 annot = self._add_square_or_circle(rect, mupdf.PDF_ANNOT_SQUARE) | |
| 8564 finally: | |
| 8565 if old_rotation != 0: | |
| 8566 self.set_rotation(old_rotation) | |
| 8567 annot_postprocess(self, annot) | |
| 8568 return annot | |
| 8569 | |
| 8570 def add_redact_annot( | |
| 8571 self, | |
| 8572 quad, | |
| 8573 text: OptStr =None, | |
| 8574 fontname: OptStr =None, | |
| 8575 fontsize: float =11, | |
| 8576 align: int =0, | |
| 8577 fill: OptSeq =None, | |
| 8578 text_color: OptSeq =None, | |
| 8579 cross_out: bool =True, | |
| 8580 ) -> Annot: | |
| 8581 """Add a 'Redact' annotation.""" | |
| 8582 da_str = None | |
| 8583 if text and not set(string.whitespace).issuperset(text): | |
| 8584 CheckColor(fill) | |
| 8585 CheckColor(text_color) | |
| 8586 if not fontname: | |
| 8587 fontname = "Helv" | |
| 8588 if not fontsize: | |
| 8589 fontsize = 11 | |
| 8590 if not text_color: | |
| 8591 text_color = (0, 0, 0) | |
| 8592 if hasattr(text_color, "__float__"): | |
| 8593 text_color = (text_color, text_color, text_color) | |
| 8594 if len(text_color) > 3: | |
| 8595 text_color = text_color[:3] | |
| 8596 fmt = "{:g} {:g} {:g} rg /{f:s} {s:g} Tf" | |
| 8597 da_str = fmt.format(*text_color, f=fontname, s=fontsize) | |
| 8598 if fill is None: | |
| 8599 fill = (1, 1, 1) | |
| 8600 if fill: | |
| 8601 if hasattr(fill, "__float__"): | |
| 8602 fill = (fill, fill, fill) | |
| 8603 if len(fill) > 3: | |
| 8604 fill = fill[:3] | |
| 8605 else: | |
| 8606 text = None | |
| 8607 | |
| 8608 old_rotation = annot_preprocess(self) | |
| 8609 try: | |
| 8610 annot = self._add_redact_annot(quad, text=text, da_str=da_str, | |
| 8611 align=align, fill=fill) | |
| 8612 finally: | |
| 8613 if old_rotation != 0: | |
| 8614 self.set_rotation(old_rotation) | |
| 8615 annot_postprocess(self, annot) | |
| 8616 #------------------------------------------------------------- | |
| 8617 # change appearance to show a crossed-out rectangle | |
| 8618 #------------------------------------------------------------- | |
| 8619 if cross_out: | |
| 8620 ap_tab = annot._getAP().splitlines()[:-1] # get the 4 commands only | |
| 8621 _, LL, LR, UR, UL = ap_tab | |
| 8622 ap_tab.append(LR) | |
| 8623 ap_tab.append(LL) | |
| 8624 ap_tab.append(UR) | |
| 8625 ap_tab.append(LL) | |
| 8626 ap_tab.append(UL) | |
| 8627 ap_tab.append(b"S") | |
| 8628 ap = b"\n".join(ap_tab) | |
| 8629 annot._setAP(ap, 0) | |
| 8630 return annot | |
| 8631 | |
| 8632 def add_squiggly_annot( | |
| 8633 self, | |
| 8634 quads=None, | |
| 8635 start=None, | |
| 8636 stop=None, | |
| 8637 clip=None, | |
| 8638 ) -> Annot: | |
| 8639 """Add a 'Squiggly' annotation.""" | |
| 8640 if quads is None: | |
| 8641 q = get_highlight_selection(self, start=start, stop=stop, clip=clip) | |
| 8642 else: | |
| 8643 q = CheckMarkerArg(quads) | |
| 8644 return self._add_text_marker(q, mupdf.PDF_ANNOT_SQUIGGLY) | |
| 8645 | |
| 8646 def add_stamp_annot(self, rect: rect_like, stamp=0) -> Annot: | |
| 8647 """Add a ('rubber') 'Stamp' annotation.""" | |
| 8648 old_rotation = annot_preprocess(self) | |
| 8649 try: | |
| 8650 annot = self._add_stamp_annot(rect, stamp) | |
| 8651 finally: | |
| 8652 if old_rotation != 0: | |
| 8653 self.set_rotation(old_rotation) | |
| 8654 annot_postprocess(self, annot) | |
| 8655 return annot | |
| 8656 | |
| 8657 def add_strikeout_annot(self, quads=None, start=None, stop=None, clip=None) -> Annot: | |
| 8658 """Add a 'StrikeOut' annotation.""" | |
| 8659 if quads is None: | |
| 8660 q = get_highlight_selection(self, start=start, stop=stop, clip=clip) | |
| 8661 else: | |
| 8662 q = CheckMarkerArg(quads) | |
| 8663 return self._add_text_marker(q, mupdf.PDF_ANNOT_STRIKE_OUT) | |
| 8664 | |
| 8665 def add_text_annot(self, point: point_like, text: str, icon: str ="Note") -> Annot: | |
| 8666 """Add a 'Text' (sticky note) annotation.""" | |
| 8667 old_rotation = annot_preprocess(self) | |
| 8668 try: | |
| 8669 annot = self._add_text_annot(point, text, icon=icon) | |
| 8670 finally: | |
| 8671 if old_rotation != 0: | |
| 8672 self.set_rotation(old_rotation) | |
| 8673 annot_postprocess(self, annot) | |
| 8674 return annot | |
| 8675 | |
| 8676 def add_underline_annot(self, quads=None, start=None, stop=None, clip=None) -> Annot: | |
| 8677 """Add a 'Underline' annotation.""" | |
| 8678 if quads is None: | |
| 8679 q = get_highlight_selection(self, start=start, stop=stop, clip=clip) | |
| 8680 else: | |
| 8681 q = CheckMarkerArg(quads) | |
| 8682 return self._add_text_marker(q, mupdf.PDF_ANNOT_UNDERLINE) | |
| 8683 | |
| 8684 def add_widget(self, widget: Widget) -> Annot: | |
| 8685 """Add a 'Widget' (form field).""" | |
| 8686 CheckParent(self) | |
| 8687 doc = self.parent | |
| 8688 if not doc.is_pdf: | |
| 8689 raise ValueError("is no PDF") | |
| 8690 widget._validate() | |
| 8691 annot = self._addWidget(widget.field_type, widget.field_name) | |
| 8692 if not annot: | |
| 8693 return None | |
| 8694 annot.thisown = True | |
| 8695 annot.parent = weakref.proxy(self) # owning page object | |
| 8696 self._annot_refs[id(annot)] = annot | |
| 8697 widget.parent = annot.parent | |
| 8698 widget._annot = annot | |
| 8699 widget.update() | |
| 8700 return annot | |
| 8701 | |
| 8702 def annot_names(self): | |
| 8703 ''' | |
| 8704 page get list of annot names | |
| 8705 ''' | |
| 8706 """List of names of annotations, fields and links.""" | |
| 8707 CheckParent(self) | |
| 8708 page = self._pdf_page(required=False) | |
| 8709 if not page.m_internal: | |
| 8710 return [] | |
| 8711 return JM_get_annot_id_list(page) | |
| 8712 | |
| 8713 def annot_xrefs(self): | |
| 8714 ''' | |
| 8715 List of xref numbers of annotations, fields and links. | |
| 8716 ''' | |
| 8717 return JM_get_annot_xref_list2(self) | |
| 8718 | |
| 8719 def annots(self, types=None): | |
| 8720 """ Generator over the annotations of a page. | |
| 8721 | |
| 8722 Args: | |
| 8723 types: (list) annotation types to subselect from. If none, | |
| 8724 all annotations are returned. E.g. types=[PDF_ANNOT_LINE] | |
| 8725 will only yield line annotations. | |
| 8726 """ | |
| 8727 skip_types = (mupdf.PDF_ANNOT_LINK, mupdf.PDF_ANNOT_POPUP, mupdf.PDF_ANNOT_WIDGET) | |
| 8728 if not hasattr(types, "__getitem__"): | |
| 8729 annot_xrefs = [a[0] for a in self.annot_xrefs() if a[1] not in skip_types] | |
| 8730 else: | |
| 8731 annot_xrefs = [a[0] for a in self.annot_xrefs() if a[1] in types and a[1] not in skip_types] | |
| 8732 for xref in annot_xrefs: | |
| 8733 annot = self.load_annot(xref) | |
| 8734 annot._yielded=True | |
| 8735 yield annot | |
| 8736 | |
| 8737 def recolor(self, components=1): | |
| 8738 """Convert colorspaces of objects on the page. | |
| 8739 | |
| 8740 Valid values are 1, 3 and 4. | |
| 8741 """ | |
| 8742 if components not in (1, 3, 4): | |
| 8743 raise ValueError("components must be one of 1, 3, 4") | |
| 8744 pdfdoc = _as_pdf_document(self.parent) | |
| 8745 ropt = mupdf.pdf_recolor_options() | |
| 8746 ropt.num_comp = components | |
| 8747 ropts = mupdf.PdfRecolorOptions(ropt) | |
| 8748 mupdf.pdf_recolor_page(pdfdoc, self.number, ropts) | |
| 8749 | |
| 8750 def clip_to_rect(self, rect): | |
| 8751 """Clip away page content outside the rectangle.""" | |
| 8752 clip = Rect(rect) | |
| 8753 if clip.is_infinite or (clip & self.rect).is_empty: | |
| 8754 raise ValueError("rect must not be infinite or empty") | |
| 8755 clip *= self.transformation_matrix | |
| 8756 pdfpage = _as_pdf_page(self) | |
| 8757 pclip = JM_rect_from_py(clip) | |
| 8758 mupdf.pdf_clip_page(pdfpage, pclip) | |
| 8759 | |
| 8760 @property | |
| 8761 def artbox(self): | |
| 8762 """The ArtBox""" | |
| 8763 rect = self._other_box("ArtBox") | |
| 8764 if rect is None: | |
| 8765 return self.cropbox | |
| 8766 mb = self.mediabox | |
| 8767 return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1]) | |
| 8768 | |
| 8769 @property | |
| 8770 def bleedbox(self): | |
| 8771 """The BleedBox""" | |
| 8772 rect = self._other_box("BleedBox") | |
| 8773 if rect is None: | |
| 8774 return self.cropbox | |
| 8775 mb = self.mediabox | |
| 8776 return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1]) | |
| 8777 | |
| 8778 def bound(self): | |
| 8779 """Get page rectangle.""" | |
| 8780 CheckParent(self) | |
| 8781 page = _as_fz_page(self.this) | |
| 8782 val = mupdf.fz_bound_page(page) | |
| 8783 val = Rect(val) | |
| 8784 | |
| 8785 if val.is_infinite and self.parent.is_pdf: | |
| 8786 cb = self.cropbox | |
| 8787 w, h = cb.width, cb.height | |
| 8788 if self.rotation not in (0, 180): | |
| 8789 w, h = h, w | |
| 8790 val = Rect(0, 0, w, h) | |
| 8791 msg = TOOLS.mupdf_warnings(reset=False).splitlines()[-1] | |
| 8792 message(msg) | |
| 8793 | |
| 8794 return val | |
| 8795 | |
| 8796 def clean_contents(self, sanitize=1): | |
| 8797 if not sanitize and not self.is_wrapped: | |
| 8798 self.wrap_contents() | |
| 8799 page = _as_pdf_page( self.this, required=False) | |
| 8800 if not page.m_internal: | |
| 8801 return | |
| 8802 filter_ = _make_PdfFilterOptions(recurse=1, sanitize=sanitize) | |
| 8803 mupdf.pdf_filter_page_contents( page.doc(), page, filter_) | |
| 8804 | |
| 8805 @property | |
| 8806 def cropbox(self): | |
| 8807 """The CropBox.""" | |
| 8808 CheckParent(self) | |
| 8809 page = self._pdf_page(required=False) | |
| 8810 if not page.m_internal: | |
| 8811 val = mupdf.fz_bound_page(self.this) | |
| 8812 else: | |
| 8813 val = JM_cropbox(page.obj()) | |
| 8814 val = Rect(val) | |
| 8815 | |
| 8816 return val | |
| 8817 | |
| 8818 @property | |
| 8819 def cropbox_position(self): | |
| 8820 return self.cropbox.tl | |
| 8821 | |
| 8822 def delete_annot(self, annot): | |
| 8823 """Delete annot and return next one.""" | |
| 8824 CheckParent(self) | |
| 8825 CheckParent(annot) | |
| 8826 | |
| 8827 page = self._pdf_page() | |
| 8828 while 1: | |
| 8829 # first loop through all /IRT annots and remove them | |
| 8830 irt_annot = JM_find_annot_irt(annot.this) | |
| 8831 if not irt_annot: # no more there | |
| 8832 break | |
| 8833 mupdf.pdf_delete_annot(page, irt_annot.this) | |
| 8834 nextannot = mupdf.pdf_next_annot(annot.this) # store next | |
| 8835 mupdf.pdf_delete_annot(page, annot.this) | |
| 8836 val = Annot(nextannot) | |
| 8837 | |
| 8838 if val: | |
| 8839 val.thisown = True | |
| 8840 val.parent = weakref.proxy(self) # owning page object | |
| 8841 val.parent._annot_refs[id(val)] = val | |
| 8842 annot._erase() | |
| 8843 return val | |
| 8844 | |
| 8845 def delete_link(self, linkdict): | |
| 8846 """Delete a Link.""" | |
| 8847 CheckParent(self) | |
| 8848 if not isinstance( linkdict, dict): | |
| 8849 return # have no dictionary | |
| 8850 | |
| 8851 def finished(): | |
| 8852 if linkdict["xref"] == 0: return | |
| 8853 try: | |
| 8854 linkid = linkdict["id"] | |
| 8855 linkobj = self._annot_refs[linkid] | |
| 8856 linkobj._erase() | |
| 8857 except Exception: | |
| 8858 # Don't print this exception, to match classic. Issue #2841. | |
| 8859 if g_exceptions_verbose > 1: exception_info() | |
| 8860 pass | |
| 8861 | |
| 8862 page = _as_pdf_page(self.this, required=False) | |
| 8863 if not page.m_internal: | |
| 8864 return finished() # have no PDF | |
| 8865 xref = linkdict[dictkey_xref] | |
| 8866 if xref < 1: | |
| 8867 return finished() # invalid xref | |
| 8868 annots = mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots')) | |
| 8869 if not annots.m_internal: | |
| 8870 return finished() # have no annotations | |
| 8871 len_ = mupdf.pdf_array_len( annots) | |
| 8872 if len_ == 0: | |
| 8873 return finished() | |
| 8874 oxref = 0 | |
| 8875 for i in range( len_): | |
| 8876 oxref = mupdf.pdf_to_num( mupdf.pdf_array_get( annots, i)) | |
| 8877 if xref == oxref: | |
| 8878 break # found xref in annotations | |
| 8879 | |
| 8880 if xref != oxref: | |
| 8881 return finished() # xref not in annotations | |
| 8882 mupdf.pdf_array_delete( annots, i) # delete entry in annotations | |
| 8883 mupdf.pdf_delete_object( page.doc(), xref) # delete link object | |
| 8884 mupdf.pdf_dict_put( page.obj(), PDF_NAME('Annots'), annots) | |
| 8885 JM_refresh_links( page) | |
| 8886 | |
| 8887 return finished() | |
| 8888 | |
| 8889 @property | |
| 8890 def derotation_matrix(self) -> Matrix: | |
| 8891 """Reflects page de-rotation.""" | |
| 8892 if g_use_extra: | |
| 8893 return Matrix(extra.Page_derotate_matrix( self.this)) | |
| 8894 pdfpage = self._pdf_page(required=False) | |
| 8895 if not pdfpage.m_internal: | |
| 8896 return Matrix(mupdf.FzRect(mupdf.FzRect.UNIT)) | |
| 8897 return Matrix(JM_derotate_page_matrix(pdfpage)) | |
| 8898 | |
| 8899 def extend_textpage(self, tpage, flags=0, matrix=None): | |
| 8900 page = self.this | |
| 8901 tp = tpage.this | |
| 8902 assert isinstance( tp, mupdf.FzStextPage) | |
| 8903 options = mupdf.FzStextOptions() | |
| 8904 options.flags = flags | |
| 8905 ctm = JM_matrix_from_py(matrix) | |
| 8906 dev = mupdf.FzDevice(tp, options) | |
| 8907 mupdf.fz_run_page( page, dev, ctm, mupdf.FzCookie()) | |
| 8908 mupdf.fz_close_device( dev) | |
| 8909 | |
| 8910 @property | |
| 8911 def first_annot(self): | |
| 8912 """First annotation.""" | |
| 8913 CheckParent(self) | |
| 8914 page = self._pdf_page(required=False) | |
| 8915 if not page.m_internal: | |
| 8916 return | |
| 8917 annot = mupdf.pdf_first_annot(page) | |
| 8918 if not annot.m_internal: | |
| 8919 return | |
| 8920 val = Annot(annot) | |
| 8921 val.thisown = True | |
| 8922 val.parent = weakref.proxy(self) # owning page object | |
| 8923 self._annot_refs[id(val)] = val | |
| 8924 return val | |
| 8925 | |
| 8926 @property | |
| 8927 def first_link(self): | |
| 8928 ''' | |
| 8929 First link on page | |
| 8930 ''' | |
| 8931 return self.load_links() | |
| 8932 | |
| 8933 @property | |
| 8934 def first_widget(self): | |
| 8935 """First widget/field.""" | |
| 8936 CheckParent(self) | |
| 8937 annot = 0 | |
| 8938 page = self._pdf_page(required=False) | |
| 8939 if not page.m_internal: | |
| 8940 return | |
| 8941 annot = mupdf.pdf_first_widget(page) | |
| 8942 if not annot.m_internal: | |
| 8943 return | |
| 8944 val = Annot(annot) | |
| 8945 val.thisown = True | |
| 8946 val.parent = weakref.proxy(self) # owning page object | |
| 8947 self._annot_refs[id(val)] = val | |
| 8948 widget = Widget() | |
| 8949 TOOLS._fill_widget(val, widget) | |
| 8950 val = widget | |
| 8951 return val | |
| 8952 | |
| 8953 def get_bboxlog(self, layers=None): | |
| 8954 CheckParent(self) | |
| 8955 old_rotation = self.rotation | |
| 8956 if old_rotation != 0: | |
| 8957 self.set_rotation(0) | |
| 8958 page = self.this | |
| 8959 rc = [] | |
| 8960 inc_layers = True if layers else False | |
| 8961 dev = JM_new_bbox_device( rc, inc_layers) | |
| 8962 mupdf.fz_run_page( page, dev, mupdf.FzMatrix(), mupdf.FzCookie()) | |
| 8963 mupdf.fz_close_device( dev) | |
| 8964 | |
| 8965 if old_rotation != 0: | |
| 8966 self.set_rotation(old_rotation) | |
| 8967 return rc | |
| 8968 | |
| 8969 def get_cdrawings(self, extended=None, callback=None, method=None): | |
| 8970 """Extract vector graphics ("line art") from the page.""" | |
| 8971 CheckParent(self) | |
| 8972 old_rotation = self.rotation | |
| 8973 if old_rotation != 0: | |
| 8974 self.set_rotation(0) | |
| 8975 page = self.this | |
| 8976 if isinstance(page, mupdf.PdfPage): | |
| 8977 # Downcast pdf_page to fz_page. | |
| 8978 page = mupdf.FzPage(page) | |
| 8979 assert isinstance(page, mupdf.FzPage), f'{self.this=}' | |
| 8980 clips = True if extended else False | |
| 8981 prect = mupdf.fz_bound_page(page) | |
| 8982 if g_use_extra: | |
| 8983 rc = extra.get_cdrawings(page, extended, callback, method) | |
| 8984 else: | |
| 8985 rc = list() | |
| 8986 if callable(callback) or method is not None: | |
| 8987 dev = JM_new_lineart_device_Device(callback, clips, method) | |
| 8988 else: | |
| 8989 dev = JM_new_lineart_device_Device(rc, clips, method) | |
| 8990 dev.ptm = mupdf.FzMatrix(1, 0, 0, -1, 0, prect.y1) | |
| 8991 mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie()) | |
| 8992 mupdf.fz_close_device(dev) | |
| 8993 | |
| 8994 if old_rotation != 0: | |
| 8995 self.set_rotation(old_rotation) | |
| 8996 if callable(callback) or method is not None: | |
| 8997 return | |
| 8998 return rc | |
| 8999 | |
| 9000 def get_contents(self): | |
| 9001 """Get xrefs of /Contents objects.""" | |
| 9002 CheckParent(self) | |
| 9003 ret = [] | |
| 9004 page = _as_pdf_page(self.this) | |
| 9005 obj = page.obj() | |
| 9006 contents = mupdf.pdf_dict_get(obj, mupdf.PDF_ENUM_NAME_Contents) | |
| 9007 if mupdf.pdf_is_array(contents): | |
| 9008 n = mupdf.pdf_array_len(contents) | |
| 9009 for i in range(n): | |
| 9010 icont = mupdf.pdf_array_get(contents, i) | |
| 9011 xref = mupdf.pdf_to_num(icont) | |
| 9012 ret.append(xref) | |
| 9013 elif contents.m_internal: | |
| 9014 xref = mupdf.pdf_to_num(contents) | |
| 9015 ret.append( xref) | |
| 9016 return ret | |
| 9017 | |
| 9018 def get_displaylist(self, annots=1): | |
| 9019 ''' | |
| 9020 Make a DisplayList from the page for Pixmap generation. | |
| 9021 | |
| 9022 Include (default) or exclude annotations. | |
| 9023 ''' | |
| 9024 CheckParent(self) | |
| 9025 if annots: | |
| 9026 dl = mupdf.fz_new_display_list_from_page(self.this) | |
| 9027 else: | |
| 9028 dl = mupdf.fz_new_display_list_from_page_contents(self.this) | |
| 9029 return DisplayList(dl) | |
| 9030 | |
| 9031 def get_drawings(self, extended: bool=False) -> list: | |
| 9032 """Retrieve vector graphics. The extended version includes clips. | |
| 9033 | |
| 9034 Note: | |
| 9035 For greater comfort, this method converts point-likes, rect-likes, quad-likes | |
| 9036 of the C version to respective Point / Rect / Quad objects. | |
| 9037 It also adds default items that are missing in original path types. | |
| 9038 """ | |
| 9039 allkeys = ( | |
| 9040 'closePath', | |
| 9041 'fill', | |
| 9042 'color', | |
| 9043 'width', | |
| 9044 'lineCap', | |
| 9045 'lineJoin', | |
| 9046 'dashes', | |
| 9047 'stroke_opacity', | |
| 9048 'fill_opacity', | |
| 9049 'even_odd', | |
| 9050 ) | |
| 9051 val = self.get_cdrawings(extended=extended) | |
| 9052 for i in range(len(val)): | |
| 9053 npath = val[i] | |
| 9054 if not npath["type"].startswith("clip"): | |
| 9055 npath["rect"] = Rect(npath["rect"]) | |
| 9056 else: | |
| 9057 npath["scissor"] = Rect(npath["scissor"]) | |
| 9058 if npath["type"]!="group": | |
| 9059 items = npath["items"] | |
| 9060 newitems = [] | |
| 9061 for item in items: | |
| 9062 cmd = item[0] | |
| 9063 rest = item[1:] | |
| 9064 if cmd == "re": | |
| 9065 item = ("re", Rect(rest[0]).normalize(), rest[1]) | |
| 9066 elif cmd == "qu": | |
| 9067 item = ("qu", Quad(rest[0])) | |
| 9068 else: | |
| 9069 item = tuple([cmd] + [Point(i) for i in rest]) | |
| 9070 newitems.append(item) | |
| 9071 npath["items"] = newitems | |
| 9072 if npath['type'] in ('f', 's'): | |
| 9073 for k in allkeys: | |
| 9074 npath[k] = npath.get(k) | |
| 9075 | |
| 9076 val[i] = npath | |
| 9077 return val | |
| 9078 | |
| 9079 class Drawpath(object): | |
| 9080 """Reflects a path dictionary from get_cdrawings().""" | |
| 9081 def __init__(self, **args): | |
| 9082 self.__dict__.update(args) | |
| 9083 | |
| 9084 class Drawpathlist(object): | |
| 9085 """List of Path objects representing get_cdrawings() output.""" | |
| 9086 def __getitem__(self, item): | |
| 9087 return self.paths.__getitem__(item) | |
| 9088 | |
| 9089 def __init__(self): | |
| 9090 self.paths = [] | |
| 9091 self.path_count = 0 | |
| 9092 self.group_count = 0 | |
| 9093 self.clip_count = 0 | |
| 9094 self.fill_count = 0 | |
| 9095 self.stroke_count = 0 | |
| 9096 self.fillstroke_count = 0 | |
| 9097 | |
| 9098 def __len__(self): | |
| 9099 return self.paths.__len__() | |
| 9100 | |
| 9101 def append(self, path): | |
| 9102 self.paths.append(path) | |
| 9103 self.path_count += 1 | |
| 9104 if path.type == "clip": | |
| 9105 self.clip_count += 1 | |
| 9106 elif path.type == "group": | |
| 9107 self.group_count += 1 | |
| 9108 elif path.type == "f": | |
| 9109 self.fill_count += 1 | |
| 9110 elif path.type == "s": | |
| 9111 self.stroke_count += 1 | |
| 9112 elif path.type == "fs": | |
| 9113 self.fillstroke_count += 1 | |
| 9114 | |
| 9115 def clip_parents(self, i): | |
| 9116 """Return list of parent clip paths. | |
| 9117 | |
| 9118 Args: | |
| 9119 i: (int) return parents of this path. | |
| 9120 Returns: | |
| 9121 List of the clip parents.""" | |
| 9122 if i >= self.path_count: | |
| 9123 raise IndexError("bad path index") | |
| 9124 while i < 0: | |
| 9125 i += self.path_count | |
| 9126 lvl = self.paths[i].level | |
| 9127 clips = list( # clip paths before identified one | |
| 9128 reversed( | |
| 9129 [ | |
| 9130 p | |
| 9131 for p in self.paths[:i] | |
| 9132 if p.type == "clip" and p.level < lvl | |
| 9133 ] | |
| 9134 ) | |
| 9135 ) | |
| 9136 if clips == []: # none found: empty list | |
| 9137 return [] | |
| 9138 nclips = [clips[0]] # init return list | |
| 9139 for p in clips[1:]: | |
| 9140 if p.level >= nclips[-1].level: | |
| 9141 continue # only accept smaller clip levels | |
| 9142 nclips.append(p) | |
| 9143 return nclips | |
| 9144 | |
| 9145 def group_parents(self, i): | |
| 9146 """Return list of parent group paths. | |
| 9147 | |
| 9148 Args: | |
| 9149 i: (int) return parents of this path. | |
| 9150 Returns: | |
| 9151 List of the group parents.""" | |
| 9152 if i >= self.path_count: | |
| 9153 raise IndexError("bad path index") | |
| 9154 while i < 0: | |
| 9155 i += self.path_count | |
| 9156 lvl = self.paths[i].level | |
| 9157 groups = list( # group paths before identified one | |
| 9158 reversed( | |
| 9159 [ | |
| 9160 p | |
| 9161 for p in self.paths[:i] | |
| 9162 if p.type == "group" and p.level < lvl | |
| 9163 ] | |
| 9164 ) | |
| 9165 ) | |
| 9166 if groups == []: # none found: empty list | |
| 9167 return [] | |
| 9168 ngroups = [groups[0]] # init return list | |
| 9169 for p in groups[1:]: | |
| 9170 if p.level >= ngroups[-1].level: | |
| 9171 continue # only accept smaller group levels | |
| 9172 ngroups.append(p) | |
| 9173 return ngroups | |
| 9174 | |
| 9175 def get_lineart(self) -> object: | |
| 9176 """Get page drawings paths. | |
| 9177 | |
| 9178 Note: | |
| 9179 For greater comfort, this method converts point-like, rect-like, quad-like | |
| 9180 tuples of the C version to respective Point / Rect / Quad objects. | |
| 9181 Also adds default items that are missing in original path types. | |
| 9182 In contrast to get_drawings(), this output is an object. | |
| 9183 """ | |
| 9184 | |
| 9185 val = self.get_cdrawings(extended=True) | |
| 9186 paths = self.Drawpathlist() | |
| 9187 for path in val: | |
| 9188 npath = self.Drawpath(**path) | |
| 9189 if npath.type != "clip": | |
| 9190 npath.rect = Rect(path["rect"]) | |
| 9191 else: | |
| 9192 npath.scissor = Rect(path["scissor"]) | |
| 9193 if npath.type != "group": | |
| 9194 items = path["items"] | |
| 9195 newitems = [] | |
| 9196 for item in items: | |
| 9197 cmd = item[0] | |
| 9198 rest = item[1:] | |
| 9199 if cmd == "re": | |
| 9200 item = ("re", Rect(rest[0]).normalize(), rest[1]) | |
| 9201 elif cmd == "qu": | |
| 9202 item = ("qu", Quad(rest[0])) | |
| 9203 else: | |
| 9204 item = tuple([cmd] + [Point(i) for i in rest]) | |
| 9205 newitems.append(item) | |
| 9206 npath.items = newitems | |
| 9207 | |
| 9208 if npath.type == "f": | |
| 9209 npath.stroke_opacity = None | |
| 9210 npath.dashes = None | |
| 9211 npath.line_join = None | |
| 9212 npath.line_cap = None | |
| 9213 npath.color = None | |
| 9214 npath.width = None | |
| 9215 | |
| 9216 paths.append(npath) | |
| 9217 | |
| 9218 val = None | |
| 9219 return paths | |
| 9220 | |
| 9221 def remove_rotation(self): | |
| 9222 """Set page rotation to 0 while maintaining visual appearance.""" | |
| 9223 rot = self.rotation # normalized rotation value | |
| 9224 if rot == 0: | |
| 9225 return Identity # nothing to do | |
| 9226 | |
| 9227 # need to derotate the page's content | |
| 9228 mb = self.mediabox # current mediabox | |
| 9229 | |
| 9230 if rot == 90: | |
| 9231 # before derotation, shift content horizontally | |
| 9232 mat0 = Matrix(1, 0, 0, 1, mb.y1 - mb.x1 - mb.x0 - mb.y0, 0) | |
| 9233 elif rot == 270: | |
| 9234 # before derotation, shift content vertically | |
| 9235 mat0 = Matrix(1, 0, 0, 1, 0, mb.x1 - mb.y1 - mb.y0 - mb.x0) | |
| 9236 else: # rot = 180 | |
| 9237 mat0 = Matrix(1, 0, 0, 1, -2 * mb.x0, -2 * mb.y0) | |
| 9238 | |
| 9239 # prefix with derotation matrix | |
| 9240 mat = mat0 * self.derotation_matrix | |
| 9241 cmd = _format_g(tuple(mat)) + ' cm ' | |
| 9242 cmd = cmd.encode('utf8') | |
| 9243 _ = TOOLS._insert_contents(self, cmd, False) # prepend to page contents | |
| 9244 | |
| 9245 # swap x- and y-coordinates | |
| 9246 if rot in (90, 270): | |
| 9247 x0, y0, x1, y1 = mb | |
| 9248 mb.x0 = y0 | |
| 9249 mb.y0 = x0 | |
| 9250 mb.x1 = y1 | |
| 9251 mb.y1 = x1 | |
| 9252 self.set_mediabox(mb) | |
| 9253 | |
| 9254 self.set_rotation(0) | |
| 9255 rot = ~mat # inverse of the derotation matrix | |
| 9256 | |
| 9257 for annot in self.annots(): # modify rectangles of annotations | |
| 9258 r = annot.rect * rot | |
| 9259 # TODO: only try to set rectangle for applicable annot types | |
| 9260 annot.set_rect(r) | |
| 9261 for link in self.get_links(): # modify 'from' rectangles of links | |
| 9262 r = link["from"] * rot | |
| 9263 self.delete_link(link) | |
| 9264 link["from"] = r | |
| 9265 try: # invalid links remain deleted | |
| 9266 self.insert_link(link) | |
| 9267 except Exception: | |
| 9268 pass | |
| 9269 for widget in self.widgets(): # modify field rectangles | |
| 9270 r = widget.rect * rot | |
| 9271 widget.rect = r | |
| 9272 widget.update() | |
| 9273 return rot # the inverse of the generated derotation matrix | |
| 9274 | |
| 9275 def cluster_drawings( | |
| 9276 self, clip=None, drawings=None, x_tolerance: float = 3, y_tolerance: float = 3, | |
| 9277 final_filter: bool = True, | |
| 9278 ) -> list: | |
| 9279 """Join rectangles of neighboring vector graphic items. | |
| 9280 | |
| 9281 Args: | |
| 9282 clip: optional rect-like to restrict the page area to consider. | |
| 9283 drawings: (optional) output of a previous "get_drawings()". | |
| 9284 x_tolerance: horizontal neighborhood threshold. | |
| 9285 y_tolerance: vertical neighborhood threshold. | |
| 9286 | |
| 9287 Notes: | |
| 9288 Vector graphics (also called line-art or drawings) usually consist | |
| 9289 of independent items like rectangles, lines or curves to jointly | |
| 9290 form table grid lines or bar, line, pie charts and similar. | |
| 9291 This method identifies rectangles wrapping these disparate items. | |
| 9292 | |
| 9293 Returns: | |
| 9294 A list of Rect items, each wrapping line-art items that are close | |
| 9295 enough to be considered forming a common vector graphic. | |
| 9296 Only "significant" rectangles will be returned, i.e. having both, | |
| 9297 width and height larger than the tolerance values. | |
| 9298 """ | |
| 9299 CheckParent(self) | |
| 9300 parea = self.rect # the default clipping area | |
| 9301 if clip is not None: | |
| 9302 parea = Rect(clip) | |
| 9303 delta_x = x_tolerance # shorter local name | |
| 9304 delta_y = y_tolerance # shorter local name | |
| 9305 if drawings is None: # if we cannot re-use a previous output | |
| 9306 drawings = self.get_drawings() | |
| 9307 | |
| 9308 def are_neighbors(r1, r2): | |
| 9309 """Detect whether r1, r2 are "neighbors". | |
| 9310 | |
| 9311 Items r1, r2 are called neighbors if the minimum distance between | |
| 9312 their points is less-equal delta. | |
| 9313 | |
| 9314 Both parameters must be (potentially invalid) rectangles. | |
| 9315 """ | |
| 9316 # normalize rectangles as needed | |
| 9317 rr1_x0, rr1_x1 = (r1.x0, r1.x1) if r1.x1 > r1.x0 else (r1.x1, r1.x0) | |
| 9318 rr1_y0, rr1_y1 = (r1.y0, r1.y1) if r1.y1 > r1.y0 else (r1.y1, r1.y0) | |
| 9319 rr2_x0, rr2_x1 = (r2.x0, r2.x1) if r2.x1 > r2.x0 else (r2.x1, r2.x0) | |
| 9320 rr2_y0, rr2_y1 = (r2.y0, r2.y1) if r2.y1 > r2.y0 else (r2.y1, r2.y0) | |
| 9321 if ( | |
| 9322 0 | |
| 9323 or rr1_x1 < rr2_x0 - delta_x | |
| 9324 or rr1_x0 > rr2_x1 + delta_x | |
| 9325 or rr1_y1 < rr2_y0 - delta_y | |
| 9326 or rr1_y0 > rr2_y1 + delta_y | |
| 9327 ): | |
| 9328 # Rects do not overlap. | |
| 9329 return False | |
| 9330 else: | |
| 9331 # Rects overlap. | |
| 9332 return True | |
| 9333 | |
| 9334 # exclude graphics not contained in the clip | |
| 9335 paths = [ | |
| 9336 p | |
| 9337 for p in drawings | |
| 9338 if 1 | |
| 9339 and p["rect"].x0 >= parea.x0 | |
| 9340 and p["rect"].x1 <= parea.x1 | |
| 9341 and p["rect"].y0 >= parea.y0 | |
| 9342 and p["rect"].y1 <= parea.y1 | |
| 9343 ] | |
| 9344 | |
| 9345 # list of all vector graphic rectangles | |
| 9346 prects = sorted([p["rect"] for p in paths], key=lambda r: (r.y1, r.x0)) | |
| 9347 | |
| 9348 new_rects = [] # the final list of the joined rectangles | |
| 9349 | |
| 9350 # ------------------------------------------------------------------------- | |
| 9351 # The strategy is to identify and join all rects that are neighbors | |
| 9352 # ------------------------------------------------------------------------- | |
| 9353 while prects: # the algorithm will empty this list | |
| 9354 r = +prects[0] # copy of first rectangle | |
| 9355 repeat = True | |
| 9356 while repeat: | |
| 9357 repeat = False | |
| 9358 for i in range(len(prects) - 1, 0, -1): # from back to front | |
| 9359 if are_neighbors(prects[i], r): | |
| 9360 r |= prects[i].tl # include in first rect | |
| 9361 r |= prects[i].br # include in first rect | |
| 9362 del prects[i] # delete this rect | |
| 9363 repeat = True | |
| 9364 | |
| 9365 new_rects.append(r) | |
| 9366 del prects[0] | |
| 9367 prects = sorted(set(prects), key=lambda r: (r.y1, r.x0)) | |
| 9368 | |
| 9369 new_rects = sorted(set(new_rects), key=lambda r: (r.y1, r.x0)) | |
| 9370 if not final_filter: | |
| 9371 return new_rects | |
| 9372 return [r for r in new_rects if r.width > delta_x and r.height > delta_y] | |
| 9373 | |
| 9374 def get_fonts(self, full=False): | |
| 9375 """List of fonts defined in the page object.""" | |
| 9376 CheckParent(self) | |
| 9377 return self.parent.get_page_fonts(self.number, full=full) | |
| 9378 | |
| 9379 def get_image_bbox(self, name, transform=0): | |
| 9380 """Get rectangle occupied by image 'name'. | |
| 9381 | |
| 9382 'name' is either an item of the image list, or the referencing | |
| 9383 name string - elem[7] of the resp. item. | |
| 9384 Option 'transform' also returns the image transformation matrix. | |
| 9385 """ | |
| 9386 CheckParent(self) | |
| 9387 doc = self.parent | |
| 9388 if doc.is_closed or doc.is_encrypted: | |
| 9389 raise ValueError('document closed or encrypted') | |
| 9390 | |
| 9391 inf_rect = Rect(1, 1, -1, -1) | |
| 9392 null_mat = Matrix() | |
| 9393 if transform: | |
| 9394 rc = (inf_rect, null_mat) | |
| 9395 else: | |
| 9396 rc = inf_rect | |
| 9397 | |
| 9398 if type(name) in (list, tuple): | |
| 9399 if not type(name[-1]) is int: | |
| 9400 raise ValueError('need item of full page image list') | |
| 9401 item = name | |
| 9402 else: | |
| 9403 imglist = [i for i in doc.get_page_images(self.number, True) if name == i[7]] | |
| 9404 if len(imglist) == 1: | |
| 9405 item = imglist[0] | |
| 9406 elif imglist == []: | |
| 9407 raise ValueError('bad image name') | |
| 9408 else: | |
| 9409 raise ValueError("found multiple images named '%s'." % name) | |
| 9410 xref = item[-1] | |
| 9411 if xref != 0 or transform: | |
| 9412 try: | |
| 9413 return self.get_image_rects(item, transform=transform)[0] | |
| 9414 except Exception: | |
| 9415 exception_info() | |
| 9416 return inf_rect | |
| 9417 pdf_page = self._pdf_page() | |
| 9418 val = JM_image_reporter(pdf_page) | |
| 9419 | |
| 9420 if not bool(val): | |
| 9421 return rc | |
| 9422 | |
| 9423 for v in val: | |
| 9424 if v[0] != item[-3]: | |
| 9425 continue | |
| 9426 q = Quad(v[1]) | |
| 9427 bbox = q.rect | |
| 9428 if transform == 0: | |
| 9429 rc = bbox | |
| 9430 break | |
| 9431 | |
| 9432 hm = Matrix(util_hor_matrix(q.ll, q.lr)) | |
| 9433 h = abs(q.ll - q.ul) | |
| 9434 w = abs(q.ur - q.ul) | |
| 9435 m0 = Matrix(1 / w, 0, 0, 1 / h, 0, 0) | |
| 9436 m = ~(hm * m0) | |
| 9437 rc = (bbox, m) | |
| 9438 break | |
| 9439 val = rc | |
| 9440 | |
| 9441 return val | |
| 9442 | |
| 9443 def get_images(self, full=False): | |
| 9444 """List of images defined in the page object.""" | |
| 9445 CheckParent(self) | |
| 9446 return self.parent.get_page_images(self.number, full=full) | |
| 9447 | |
| 9448 def get_oc_items(self) -> list: | |
| 9449 """Get OCGs and OCMDs used in the page's contents. | |
| 9450 | |
| 9451 Returns: | |
| 9452 List of items (name, xref, type), where type is one of "ocg" / "ocmd", | |
| 9453 and name is the property name. | |
| 9454 """ | |
| 9455 rc = [] | |
| 9456 for pname, xref in self._get_resource_properties(): | |
| 9457 text = self.parent.xref_object(xref, compressed=True) | |
| 9458 if "/Type/OCG" in text: | |
| 9459 octype = "ocg" | |
| 9460 elif "/Type/OCMD" in text: | |
| 9461 octype = "ocmd" | |
| 9462 else: | |
| 9463 continue | |
| 9464 rc.append((pname, xref, octype)) | |
| 9465 return rc | |
| 9466 | |
| 9467 def get_svg_image(self, matrix=None, text_as_path=1): | |
| 9468 """Make SVG image from page.""" | |
| 9469 CheckParent(self) | |
| 9470 mediabox = mupdf.fz_bound_page(self.this) | |
| 9471 ctm = JM_matrix_from_py(matrix) | |
| 9472 tbounds = mediabox | |
| 9473 text_option = mupdf.FZ_SVG_TEXT_AS_PATH if text_as_path == 1 else mupdf.FZ_SVG_TEXT_AS_TEXT | |
| 9474 tbounds = mupdf.fz_transform_rect(tbounds, ctm) | |
| 9475 | |
| 9476 res = mupdf.fz_new_buffer(1024) | |
| 9477 out = mupdf.FzOutput(res) | |
| 9478 dev = mupdf.fz_new_svg_device( | |
| 9479 out, | |
| 9480 tbounds.x1-tbounds.x0, # width | |
| 9481 tbounds.y1-tbounds.y0, # height | |
| 9482 text_option, | |
| 9483 1, | |
| 9484 ) | |
| 9485 mupdf.fz_run_page(self.this, dev, ctm, mupdf.FzCookie()) | |
| 9486 mupdf.fz_close_device(dev) | |
| 9487 out.fz_close_output() | |
| 9488 text = JM_EscapeStrFromBuffer(res) | |
| 9489 return text | |
| 9490 | |
| 9491 def get_textbox( | |
| 9492 page: Page, | |
| 9493 rect: rect_like, | |
| 9494 textpage=None, #: TextPage = None, | |
| 9495 ) -> str: | |
| 9496 tp = textpage | |
| 9497 if tp is None: | |
| 9498 tp = page.get_textpage() | |
| 9499 elif getattr(tp, "parent") != page: | |
| 9500 raise ValueError("not a textpage of this page") | |
| 9501 rc = tp.extractTextbox(rect) | |
| 9502 if textpage is None: | |
| 9503 del tp | |
| 9504 return rc | |
| 9505 | |
| 9506 def get_textpage(self, clip: rect_like = None, flags: int = 0, matrix=None) -> "TextPage": | |
| 9507 CheckParent(self) | |
| 9508 if matrix is None: | |
| 9509 matrix = Matrix(1, 1) | |
| 9510 old_rotation = self.rotation | |
| 9511 if old_rotation != 0: | |
| 9512 self.set_rotation(0) | |
| 9513 try: | |
| 9514 textpage = self._get_textpage(clip, flags=flags, matrix=matrix) | |
| 9515 finally: | |
| 9516 if old_rotation != 0: | |
| 9517 self.set_rotation(old_rotation) | |
| 9518 textpage = TextPage(textpage) | |
| 9519 textpage.parent = weakref.proxy(self) | |
| 9520 return textpage | |
| 9521 | |
| 9522 def get_texttrace(self): | |
| 9523 | |
| 9524 CheckParent(self) | |
| 9525 old_rotation = self.rotation | |
| 9526 if old_rotation != 0: | |
| 9527 self.set_rotation(0) | |
| 9528 page = self.this | |
| 9529 rc = [] | |
| 9530 if g_use_extra: | |
| 9531 dev = extra.JM_new_texttrace_device(rc) | |
| 9532 else: | |
| 9533 dev = JM_new_texttrace_device(rc) | |
| 9534 prect = mupdf.fz_bound_page(page) | |
| 9535 dev.ptm = mupdf.FzMatrix(1, 0, 0, -1, 0, prect.y1) | |
| 9536 mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie()) | |
| 9537 mupdf.fz_close_device(dev) | |
| 9538 | |
| 9539 if old_rotation != 0: | |
| 9540 self.set_rotation(old_rotation) | |
| 9541 return rc | |
| 9542 | |
| 9543 def get_xobjects(self): | |
| 9544 """List of xobjects defined in the page object.""" | |
| 9545 CheckParent(self) | |
| 9546 return self.parent.get_page_xobjects(self.number) | |
| 9547 | |
| 9548 def insert_font(self, fontname="helv", fontfile=None, fontbuffer=None, | |
| 9549 set_simple=False, wmode=0, encoding=0): | |
| 9550 doc = self.parent | |
| 9551 if doc is None: | |
| 9552 raise ValueError("orphaned object: parent is None") | |
| 9553 idx = 0 | |
| 9554 | |
| 9555 if fontname.startswith("/"): | |
| 9556 fontname = fontname[1:] | |
| 9557 inv_chars = INVALID_NAME_CHARS.intersection(fontname) | |
| 9558 if inv_chars != set(): | |
| 9559 raise ValueError(f"bad fontname chars {inv_chars}") | |
| 9560 | |
| 9561 font = CheckFont(self, fontname) | |
| 9562 if font is not None: # font already in font list of page | |
| 9563 xref = font[0] # this is the xref | |
| 9564 if CheckFontInfo(doc, xref): # also in our document font list? | |
| 9565 return xref # yes: we are done | |
| 9566 # need to build the doc FontInfo entry - done via get_char_widths | |
| 9567 doc.get_char_widths(xref) | |
| 9568 return xref | |
| 9569 | |
| 9570 #-------------------------------------------------------------------------- | |
| 9571 # the font is not present for this page | |
| 9572 #-------------------------------------------------------------------------- | |
| 9573 | |
| 9574 bfname = Base14_fontdict.get(fontname.lower(), None) # BaseFont if Base-14 font | |
| 9575 | |
| 9576 serif = 0 | |
| 9577 CJK_number = -1 | |
| 9578 CJK_list_n = ["china-t", "china-s", "japan", "korea"] | |
| 9579 CJK_list_s = ["china-ts", "china-ss", "japan-s", "korea-s"] | |
| 9580 | |
| 9581 try: | |
| 9582 CJK_number = CJK_list_n.index(fontname) | |
| 9583 serif = 0 | |
| 9584 except Exception: | |
| 9585 # Verbose in PyMuPDF/tests. | |
| 9586 if g_exceptions_verbose > 1: exception_info() | |
| 9587 pass | |
| 9588 | |
| 9589 if CJK_number < 0: | |
| 9590 try: | |
| 9591 CJK_number = CJK_list_s.index(fontname) | |
| 9592 serif = 1 | |
| 9593 except Exception: | |
| 9594 # Verbose in PyMuPDF/tests. | |
| 9595 if g_exceptions_verbose > 1: exception_info() | |
| 9596 pass | |
| 9597 | |
| 9598 if fontname.lower() in fitz_fontdescriptors.keys(): | |
| 9599 import pymupdf_fonts | |
| 9600 fontbuffer = pymupdf_fonts.myfont(fontname) # make a copy | |
| 9601 del pymupdf_fonts | |
| 9602 | |
| 9603 # install the font for the page | |
| 9604 if fontfile is not None: | |
| 9605 if type(fontfile) is str: | |
| 9606 fontfile_str = fontfile | |
| 9607 elif hasattr(fontfile, "absolute"): | |
| 9608 fontfile_str = str(fontfile) | |
| 9609 elif hasattr(fontfile, "name"): | |
| 9610 fontfile_str = fontfile.name | |
| 9611 else: | |
| 9612 raise ValueError("bad fontfile") | |
| 9613 else: | |
| 9614 fontfile_str = None | |
| 9615 val = self._insertFont(fontname, bfname, fontfile_str, fontbuffer, set_simple, idx, | |
| 9616 wmode, serif, encoding, CJK_number) | |
| 9617 | |
| 9618 if not val: # did not work, error return | |
| 9619 return val | |
| 9620 | |
| 9621 xref = val[0] # xref of installed font | |
| 9622 fontdict = val[1] | |
| 9623 | |
| 9624 if CheckFontInfo(doc, xref): # check again: document already has this font | |
| 9625 return xref # we are done | |
| 9626 | |
| 9627 # need to create document font info | |
| 9628 doc.get_char_widths(xref, fontdict=fontdict) | |
| 9629 return xref | |
| 9630 | |
| 9631 @property | |
| 9632 def is_wrapped(self): | |
| 9633 """Check if /Contents is in a balanced graphics state.""" | |
| 9634 return self._count_q_balance() == (0, 0) | |
| 9635 | |
| 9636 @property | |
| 9637 def language(self): | |
| 9638 """Page language.""" | |
| 9639 pdfpage = _as_pdf_page(self.this, required=False) | |
| 9640 if not pdfpage.m_internal: | |
| 9641 return | |
| 9642 lang = mupdf.pdf_dict_get_inheritable(pdfpage.obj(), PDF_NAME('Lang')) | |
| 9643 if not lang.m_internal: | |
| 9644 return | |
| 9645 return mupdf.pdf_to_str_buf(lang) | |
| 9646 | |
| 9647 def links(self, kinds=None): | |
| 9648 """ Generator over the links of a page. | |
| 9649 | |
| 9650 Args: | |
| 9651 kinds: (list) link kinds to subselect from. If none, | |
| 9652 all links are returned. E.g. kinds=[LINK_URI] | |
| 9653 will only yield URI links. | |
| 9654 """ | |
| 9655 all_links = self.get_links() | |
| 9656 for link in all_links: | |
| 9657 if kinds is None or link["kind"] in kinds: | |
| 9658 yield (link) | |
| 9659 | |
| 9660 def load_annot(self, ident: typing.Union[str, int]) -> Annot: | |
| 9661 """Load an annot by name (/NM key) or xref. | |
| 9662 | |
| 9663 Args: | |
| 9664 ident: identifier, either name (str) or xref (int). | |
| 9665 """ | |
| 9666 CheckParent(self) | |
| 9667 if type(ident) is str: | |
| 9668 xref = 0 | |
| 9669 name = ident | |
| 9670 elif type(ident) is int: | |
| 9671 xref = ident | |
| 9672 name = None | |
| 9673 else: | |
| 9674 raise ValueError("identifier must be a string or integer") | |
| 9675 val = self._load_annot(name, xref) | |
| 9676 if not val: | |
| 9677 return val | |
| 9678 val.thisown = True | |
| 9679 val.parent = weakref.proxy(self) | |
| 9680 self._annot_refs[id(val)] = val | |
| 9681 return val | |
| 9682 | |
| 9683 def load_links(self): | |
| 9684 """Get first Link.""" | |
| 9685 CheckParent(self) | |
| 9686 val = mupdf.fz_load_links( self.this) | |
| 9687 if not val.m_internal: | |
| 9688 return | |
| 9689 val = Link( val) | |
| 9690 val.thisown = True | |
| 9691 val.parent = weakref.proxy(self) # owning page object | |
| 9692 self._annot_refs[id(val)] = val | |
| 9693 val.xref = 0 | |
| 9694 val.id = "" | |
| 9695 if self.parent.is_pdf: | |
| 9696 xrefs = self.annot_xrefs() | |
| 9697 xrefs = [x for x in xrefs if x[1] == mupdf.PDF_ANNOT_LINK] | |
| 9698 if xrefs: | |
| 9699 link_id = xrefs[0] | |
| 9700 val.xref = link_id[0] | |
| 9701 val.id = link_id[2] | |
| 9702 else: | |
| 9703 val.xref = 0 | |
| 9704 val.id = "" | |
| 9705 return val | |
| 9706 | |
| 9707 #---------------------------------------------------------------- | |
| 9708 # page load widget by xref | |
| 9709 #---------------------------------------------------------------- | |
| 9710 def load_widget( self, xref): | |
| 9711 """Load a widget by its xref.""" | |
| 9712 CheckParent(self) | |
| 9713 | |
| 9714 page = _as_pdf_page(self.this) | |
| 9715 annot = JM_get_widget_by_xref( page, xref) | |
| 9716 #log( '{=type(annot)}') | |
| 9717 val = annot | |
| 9718 if not val: | |
| 9719 return val | |
| 9720 val.thisown = True | |
| 9721 val.parent = weakref.proxy(self) | |
| 9722 self._annot_refs[id(val)] = val | |
| 9723 widget = Widget() | |
| 9724 TOOLS._fill_widget(val, widget) | |
| 9725 val = widget | |
| 9726 return val | |
| 9727 | |
| 9728 @property | |
| 9729 def mediabox(self): | |
| 9730 """The MediaBox.""" | |
| 9731 CheckParent(self) | |
| 9732 page = self._pdf_page(required=False) | |
| 9733 if not page.m_internal: | |
| 9734 rect = mupdf.fz_bound_page( self.this) | |
| 9735 else: | |
| 9736 rect = JM_mediabox( page.obj()) | |
| 9737 return Rect(rect) | |
| 9738 | |
| 9739 @property | |
| 9740 def mediabox_size(self): | |
| 9741 return Point(self.mediabox.x1, self.mediabox.y1) | |
| 9742 | |
| 9743 #@property | |
| 9744 #def parent( self): | |
| 9745 # assert self._parent | |
| 9746 # if self._parent: | |
| 9747 # return self._parent | |
| 9748 # return Document( self.this.document()) | |
| 9749 | |
| 9750 def read_contents(self): | |
| 9751 """All /Contents streams concatenated to one bytes object.""" | |
| 9752 return TOOLS._get_all_contents(self) | |
| 9753 | |
| 9754 def refresh(self): | |
| 9755 """Refresh page after link/annot/widget updates.""" | |
| 9756 CheckParent(self) | |
| 9757 doc = self.parent | |
| 9758 page = doc.reload_page(self) | |
| 9759 # fixme this looks wrong. | |
| 9760 self.this = page | |
| 9761 | |
| 9762 @property | |
| 9763 def rotation(self): | |
| 9764 """Page rotation.""" | |
| 9765 CheckParent(self) | |
| 9766 page = _as_pdf_page(self.this, required=0) | |
| 9767 if not page.m_internal: | |
| 9768 return 0 | |
| 9769 return JM_page_rotation(page) | |
| 9770 | |
| 9771 @property | |
| 9772 def rotation_matrix(self) -> Matrix: | |
| 9773 """Reflects page rotation.""" | |
| 9774 return Matrix(TOOLS._rotate_matrix(self)) | |
| 9775 | |
| 9776 def run(self, dw, m): | |
| 9777 """Run page through a device. | |
| 9778 dw: DeviceWrapper | |
| 9779 """ | |
| 9780 CheckParent(self) | |
| 9781 mupdf.fz_run_page(self.this, dw.device, JM_matrix_from_py(m), mupdf.FzCookie()) | |
| 9782 | |
| 9783 def set_artbox(self, rect): | |
| 9784 """Set the ArtBox.""" | |
| 9785 return self._set_pagebox("ArtBox", rect) | |
| 9786 | |
| 9787 def set_bleedbox(self, rect): | |
| 9788 """Set the BleedBox.""" | |
| 9789 return self._set_pagebox("BleedBox", rect) | |
| 9790 | |
| 9791 def set_contents(self, xref): | |
| 9792 """Set object at 'xref' as the page's /Contents.""" | |
| 9793 CheckParent(self) | |
| 9794 doc = self.parent | |
| 9795 if doc.is_closed: | |
| 9796 raise ValueError("document closed") | |
| 9797 if not doc.is_pdf: | |
| 9798 raise ValueError("is no PDF") | |
| 9799 if xref not in range(1, doc.xref_length()): | |
| 9800 raise ValueError("bad xref") | |
| 9801 if not doc.xref_is_stream(xref): | |
| 9802 raise ValueError("xref is no stream") | |
| 9803 doc.xref_set_key(self.xref, "Contents", "%i 0 R" % xref) | |
| 9804 | |
| 9805 def set_cropbox(self, rect): | |
| 9806 """Set the CropBox. Will also change Page.rect.""" | |
| 9807 return self._set_pagebox("CropBox", rect) | |
| 9808 | |
| 9809 def set_language(self, language=None): | |
| 9810 """Set PDF page default language.""" | |
| 9811 CheckParent(self) | |
| 9812 pdfpage = _as_pdf_page(self.this) | |
| 9813 if not language: | |
| 9814 mupdf.pdf_dict_del(pdfpage.obj(), PDF_NAME('Lang')) | |
| 9815 else: | |
| 9816 lang = mupdf.fz_text_language_from_string(language) | |
| 9817 assert hasattr(mupdf, 'fz_string_from_text_language2') | |
| 9818 mupdf.pdf_dict_put_text_string( | |
| 9819 pdfpage.obj, | |
| 9820 PDF_NAME('Lang'), | |
| 9821 mupdf.fz_string_from_text_language2(lang) | |
| 9822 ) | |
| 9823 | |
| 9824 def set_mediabox(self, rect): | |
| 9825 """Set the MediaBox.""" | |
| 9826 CheckParent(self) | |
| 9827 page = self._pdf_page() | |
| 9828 mediabox = JM_rect_from_py(rect) | |
| 9829 if (mupdf.fz_is_empty_rect(mediabox) | |
| 9830 or mupdf.fz_is_infinite_rect(mediabox) | |
| 9831 ): | |
| 9832 raise ValueError( MSG_BAD_RECT) | |
| 9833 mupdf.pdf_dict_put_rect( page.obj(), PDF_NAME('MediaBox'), mediabox) | |
| 9834 mupdf.pdf_dict_del( page.obj(), PDF_NAME('CropBox')) | |
| 9835 mupdf.pdf_dict_del( page.obj(), PDF_NAME('ArtBox')) | |
| 9836 mupdf.pdf_dict_del( page.obj(), PDF_NAME('BleedBox')) | |
| 9837 mupdf.pdf_dict_del( page.obj(), PDF_NAME('TrimBox')) | |
| 9838 | |
| 9839 def set_rotation(self, rotation): | |
| 9840 """Set page rotation.""" | |
| 9841 CheckParent(self) | |
| 9842 page = _as_pdf_page(self.this) | |
| 9843 rot = JM_norm_rotation(rotation) | |
| 9844 mupdf.pdf_dict_put_int( page.obj(), PDF_NAME('Rotate'), rot) | |
| 9845 | |
| 9846 def set_trimbox(self, rect): | |
| 9847 """Set the TrimBox.""" | |
| 9848 return self._set_pagebox("TrimBox", rect) | |
| 9849 | |
| 9850 @property | |
| 9851 def transformation_matrix(self): | |
| 9852 """Page transformation matrix.""" | |
| 9853 CheckParent(self) | |
| 9854 | |
| 9855 ctm = mupdf.FzMatrix() | |
| 9856 page = self._pdf_page(required=False) | |
| 9857 if not page.m_internal: | |
| 9858 return JM_py_from_matrix(ctm) | |
| 9859 mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT) # fixme: original code passed mediabox=NULL. | |
| 9860 mupdf.pdf_page_transform(page, mediabox, ctm) | |
| 9861 val = JM_py_from_matrix(ctm) | |
| 9862 | |
| 9863 if self.rotation % 360 == 0: | |
| 9864 val = Matrix(val) | |
| 9865 else: | |
| 9866 val = Matrix(1, 0, 0, -1, 0, self.cropbox.height) | |
| 9867 return val | |
| 9868 | |
| 9869 @property | |
| 9870 def trimbox(self): | |
| 9871 """The TrimBox""" | |
| 9872 rect = self._other_box("TrimBox") | |
| 9873 if rect is None: | |
| 9874 return self.cropbox | |
| 9875 mb = self.mediabox | |
| 9876 return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1]) | |
| 9877 | |
| 9878 def widgets(self, types=None): | |
| 9879 """ Generator over the widgets of a page. | |
| 9880 | |
| 9881 Args: | |
| 9882 types: (list) field types to subselect from. If none, | |
| 9883 all fields are returned. E.g. types=[PDF_WIDGET_TYPE_TEXT] | |
| 9884 will only yield text fields. | |
| 9885 """ | |
| 9886 #for a in self.annot_xrefs(): | |
| 9887 # log( '{a=}') | |
| 9888 widget_xrefs = [a[0] for a in self.annot_xrefs() if a[1] == mupdf.PDF_ANNOT_WIDGET] | |
| 9889 #log(f'widgets(): {widget_xrefs=}') | |
| 9890 for xref in widget_xrefs: | |
| 9891 widget = self.load_widget(xref) | |
| 9892 if types is None or widget.field_type in types: | |
| 9893 yield (widget) | |
| 9894 | |
| 9895 def wrap_contents(self): | |
| 9896 """Ensure page is in a balanced graphics state.""" | |
| 9897 push, pop = self._count_q_balance() # count missing "q"/"Q" commands | |
| 9898 if push > 0: # prepend required push commands | |
| 9899 prepend = b"q\n" * push | |
| 9900 TOOLS._insert_contents(self, prepend, False) | |
| 9901 if pop > 0: # append required pop commands | |
| 9902 append = b"\nQ" * pop + b"\n" | |
| 9903 TOOLS._insert_contents(self, append, True) | |
| 9904 | |
| 9905 @property | |
| 9906 def xref(self): | |
| 9907 """PDF xref number of page.""" | |
| 9908 CheckParent(self) | |
| 9909 return self.parent.page_xref(self.number) | |
| 9910 | |
| 9911 rect = property(bound, doc="page rectangle") | |
| 9912 | |
| 9913 | |
| 9914 class Pixmap: | |
| 9915 | |
| 9916 def __init__(self, *args): | |
| 9917 """ | |
| 9918 Pixmap(colorspace, irect, alpha) - empty pixmap. | |
| 9919 Pixmap(colorspace, src) - copy changing colorspace. | |
| 9920 Pixmap(src, width, height,[clip]) - scaled copy, float dimensions. | |
| 9921 Pixmap(src, alpha=1) - copy and add or drop alpha channel. | |
| 9922 Pixmap(filename) - from an image in a file. | |
| 9923 Pixmap(image) - from an image in memory (bytes). | |
| 9924 Pixmap(colorspace, width, height, samples, alpha) - from samples data. | |
| 9925 Pixmap(PDFdoc, xref) - from an image at xref in a PDF document. | |
| 9926 """ | |
| 9927 # Cache for property `self.samples_mv`. Set here so __del_() sees it if | |
| 9928 # we raise. | |
| 9929 # | |
| 9930 self._samples_mv = None | |
| 9931 | |
| 9932 # 2024-01-16: Experimental support for a memory-view of the underlying | |
| 9933 # data. Doesn't seem to make much difference to Pixmap.set_pixel() so | |
| 9934 # not currently used. | |
| 9935 self._memory_view = None | |
| 9936 | |
| 9937 if 0: | |
| 9938 pass | |
| 9939 | |
| 9940 elif args_match(args, | |
| 9941 (Colorspace, mupdf.FzColorspace), | |
| 9942 (mupdf.FzRect, mupdf.FzIrect, IRect, Rect, tuple) | |
| 9943 ): | |
| 9944 # create empty pixmap with colorspace and IRect | |
| 9945 cs, rect = args | |
| 9946 alpha = 0 | |
| 9947 pm = mupdf.fz_new_pixmap_with_bbox(cs, JM_irect_from_py(rect), mupdf.FzSeparations(0), alpha) | |
| 9948 self.this = pm | |
| 9949 | |
| 9950 elif args_match(args, | |
| 9951 (Colorspace, mupdf.FzColorspace), | |
| 9952 (mupdf.FzRect, mupdf.FzIrect, IRect, Rect, tuple), | |
| 9953 (int, bool) | |
| 9954 ): | |
| 9955 # create empty pixmap with colorspace and IRect | |
| 9956 cs, rect, alpha = args | |
| 9957 pm = mupdf.fz_new_pixmap_with_bbox(cs, JM_irect_from_py(rect), mupdf.FzSeparations(0), alpha) | |
| 9958 self.this = pm | |
| 9959 | |
| 9960 elif args_match(args, (Colorspace, mupdf.FzColorspace, type(None)), (Pixmap, mupdf.FzPixmap)): | |
| 9961 # copy pixmap, converting colorspace | |
| 9962 cs, spix = args | |
| 9963 if isinstance(cs, Colorspace): | |
| 9964 cs = cs.this | |
| 9965 elif cs is None: | |
| 9966 cs = mupdf.FzColorspace(None) | |
| 9967 if isinstance(spix, Pixmap): | |
| 9968 spix = spix.this | |
| 9969 if not mupdf.fz_pixmap_colorspace(spix).m_internal: | |
| 9970 raise ValueError( "source colorspace must not be None") | |
| 9971 | |
| 9972 if cs.m_internal: | |
| 9973 self.this = mupdf.fz_convert_pixmap( | |
| 9974 spix, | |
| 9975 cs, | |
| 9976 mupdf.FzColorspace(), | |
| 9977 mupdf.FzDefaultColorspaces(None), | |
| 9978 mupdf.FzColorParams(), | |
| 9979 1 | |
| 9980 ) | |
| 9981 else: | |
| 9982 self.this = mupdf.fz_new_pixmap_from_alpha_channel( spix) | |
| 9983 if not self.this.m_internal: | |
| 9984 raise RuntimeError( MSG_PIX_NOALPHA) | |
| 9985 | |
| 9986 elif args_match(args, (Pixmap, mupdf.FzPixmap), (Pixmap, mupdf.FzPixmap)): | |
| 9987 # add mask to a pixmap w/o alpha channel | |
| 9988 spix, mpix = args | |
| 9989 if isinstance(spix, Pixmap): | |
| 9990 spix = spix.this | |
| 9991 if isinstance(mpix, Pixmap): | |
| 9992 mpix = mpix.this | |
| 9993 spm = spix | |
| 9994 mpm = mpix | |
| 9995 if not spix.m_internal: # intercept NULL for spix: make alpha only pix | |
| 9996 dst = mupdf.fz_new_pixmap_from_alpha_channel(mpm) | |
| 9997 if not dst.m_internal: | |
| 9998 raise RuntimeError( MSG_PIX_NOALPHA) | |
| 9999 else: | |
| 10000 dst = mupdf.fz_new_pixmap_from_color_and_mask(spm, mpm) | |
| 10001 self.this = dst | |
| 10002 | |
| 10003 elif (args_match(args, (Pixmap, mupdf.FzPixmap), (float, int), (float, int), None) or | |
| 10004 args_match(args, (Pixmap, mupdf.FzPixmap), (float, int), (float, int))): | |
| 10005 # create pixmap as scaled copy of another one | |
| 10006 if len(args) == 3: | |
| 10007 spix, w, h = args | |
| 10008 bbox = mupdf.FzIrect(mupdf.fz_infinite_irect) | |
| 10009 else: | |
| 10010 spix, w, h, clip = args | |
| 10011 bbox = JM_irect_from_py(clip) | |
| 10012 | |
| 10013 src_pix = spix.this if isinstance(spix, Pixmap) else spix | |
| 10014 if not mupdf.fz_is_infinite_irect(bbox): | |
| 10015 pm = mupdf.fz_scale_pixmap(src_pix, src_pix.x(), src_pix.y(), w, h, bbox) | |
| 10016 else: | |
| 10017 pm = mupdf.fz_scale_pixmap(src_pix, src_pix.x(), src_pix.y(), w, h, mupdf.FzIrect(mupdf.fz_infinite_irect)) | |
| 10018 self.this = pm | |
| 10019 | |
| 10020 elif args_match(args, str, (Pixmap, mupdf.FzPixmap)) and args[0] == 'raw': | |
| 10021 # Special raw construction where we set .this directly. | |
| 10022 _, pm = args | |
| 10023 if isinstance(pm, Pixmap): | |
| 10024 pm = pm.this | |
| 10025 self.this = pm | |
| 10026 | |
| 10027 elif args_match(args, (Pixmap, mupdf.FzPixmap), (int, None)): | |
| 10028 # Pixmap(struct Pixmap *spix, int alpha=1) | |
| 10029 # copy pixmap & add / drop the alpha channel | |
| 10030 spix = args[0] | |
| 10031 alpha = args[1] if len(args) == 2 else 1 | |
| 10032 src_pix = spix.this if isinstance(spix, Pixmap) else spix | |
| 10033 if not _INRANGE(alpha, 0, 1): | |
| 10034 raise ValueError( "bad alpha value") | |
| 10035 cs = mupdf.fz_pixmap_colorspace(src_pix) | |
| 10036 if not cs.m_internal and not alpha: | |
| 10037 raise ValueError( "cannot drop alpha for 'NULL' colorspace") | |
| 10038 seps = mupdf.FzSeparations() | |
| 10039 n = mupdf.fz_pixmap_colorants(src_pix) | |
| 10040 w = mupdf.fz_pixmap_width(src_pix) | |
| 10041 h = mupdf.fz_pixmap_height(src_pix) | |
| 10042 pm = mupdf.fz_new_pixmap(cs, w, h, seps, alpha) | |
| 10043 pm.m_internal.x = src_pix.m_internal.x | |
| 10044 pm.m_internal.y = src_pix.m_internal.y | |
| 10045 pm.m_internal.xres = src_pix.m_internal.xres | |
| 10046 pm.m_internal.yres = src_pix.m_internal.yres | |
| 10047 | |
| 10048 # copy samples data ------------------------------------------ | |
| 10049 if 1: | |
| 10050 # We use our pixmap_copy() to get best performance. | |
| 10051 # test_pixmap.py:test_setalpha(): 3.9s t=0.0062 | |
| 10052 extra.pixmap_copy( pm.m_internal, src_pix.m_internal, n) | |
| 10053 elif 1: | |
| 10054 # Use memoryview. | |
| 10055 # test_pixmap.py:test_setalpha(): 4.6 t=0.51 | |
| 10056 src_view = mupdf.fz_pixmap_samples_memoryview( src_pix) | |
| 10057 pm_view = mupdf.fz_pixmap_samples_memoryview( pm) | |
| 10058 if src_pix.alpha() == pm.alpha(): # identical samples | |
| 10059 #memcpy(tptr, sptr, w * h * (n + alpha)); | |
| 10060 size = w * h * (n + alpha) | |
| 10061 pm_view[ 0 : size] = src_view[ 0 : size] | |
| 10062 else: | |
| 10063 tptr = 0 | |
| 10064 sptr = 0 | |
| 10065 # This is a little faster than calling | |
| 10066 # pm.fz_samples_set(), but still quite slow. E.g. reduces | |
| 10067 # test_pixmap.py:test_setalpha() from 6.7s to 4.5s. | |
| 10068 # | |
| 10069 # t=0.53 | |
| 10070 pm_stride = pm.stride() | |
| 10071 pm_n = pm.n() | |
| 10072 pm_alpha = pm.alpha() | |
| 10073 src_stride = src_pix.stride() | |
| 10074 src_n = src_pix.n() | |
| 10075 #log( '{=pm_stride pm_n src_stride src_n}') | |
| 10076 for y in range( h): | |
| 10077 for x in range( w): | |
| 10078 pm_i = pm_stride * y + pm_n * x | |
| 10079 src_i = src_stride * y + src_n * x | |
| 10080 pm_view[ pm_i : pm_i + n] = src_view[ src_i : src_i + n] | |
| 10081 if pm_alpha: | |
| 10082 pm_view[ pm_i + n] = 255 | |
| 10083 else: | |
| 10084 # Copy individual bytes from Python. Very slow. | |
| 10085 # test_pixmap.py:test_setalpha(): 6.89 t=2.601 | |
| 10086 if src_pix.alpha() == pm.alpha(): # identical samples | |
| 10087 #memcpy(tptr, sptr, w * h * (n + alpha)); | |
| 10088 for i in range(w * h * (n + alpha)): | |
| 10089 mupdf.fz_samples_set(pm, i, mupdf.fz_samples_get(src_pix, i)) | |
| 10090 else: | |
| 10091 # t=2.56 | |
| 10092 tptr = 0 | |
| 10093 sptr = 0 | |
| 10094 src_pix_alpha = src_pix.alpha() | |
| 10095 for i in range(w * h): | |
| 10096 #memcpy(tptr, sptr, n); | |
| 10097 for j in range(n): | |
| 10098 mupdf.fz_samples_set(pm, tptr + j, mupdf.fz_samples_get(src_pix, sptr + j)) | |
| 10099 tptr += n | |
| 10100 if pm.alpha(): | |
| 10101 mupdf.fz_samples_set(pm, tptr, 255) | |
| 10102 tptr += 1 | |
| 10103 sptr += n + src_pix_alpha | |
| 10104 self.this = pm | |
| 10105 | |
| 10106 elif args_match(args, (mupdf.FzColorspace, Colorspace), int, int, None, (int, bool)): | |
| 10107 # create pixmap from samples data | |
| 10108 cs, w, h, samples, alpha = args | |
| 10109 if isinstance(cs, Colorspace): | |
| 10110 cs = cs.this | |
| 10111 assert isinstance(cs, mupdf.FzColorspace) | |
| 10112 n = mupdf.fz_colorspace_n(cs) | |
| 10113 stride = (n + alpha) * w | |
| 10114 seps = mupdf.FzSeparations() | |
| 10115 pm = mupdf.fz_new_pixmap(cs, w, h, seps, alpha) | |
| 10116 | |
| 10117 if isinstance( samples, (bytes, bytearray)): | |
| 10118 #log('using mupdf.python_buffer_data()') | |
| 10119 samples2 = mupdf.python_buffer_data(samples) | |
| 10120 size = len(samples) | |
| 10121 else: | |
| 10122 res = JM_BufferFromBytes(samples) | |
| 10123 if not res.m_internal: | |
| 10124 raise ValueError( "bad samples data") | |
| 10125 size, c = mupdf.fz_buffer_storage(res) | |
| 10126 samples2 = mupdf.python_buffer_data(samples) # raw swig proxy for `const unsigned char*`. | |
| 10127 if stride * h != size: | |
| 10128 raise ValueError( f"bad samples length {w=} {h=} {alpha=} {n=} {stride=} {size=}") | |
| 10129 mupdf.ll_fz_pixmap_copy_raw( pm.m_internal, samples2) | |
| 10130 self.this = pm | |
| 10131 | |
| 10132 elif args_match(args, None): | |
| 10133 # create pixmap from filename, file object, pathlib.Path or memory | |
| 10134 imagedata, = args | |
| 10135 name = 'name' | |
| 10136 if hasattr(imagedata, "resolve"): | |
| 10137 fname = imagedata.__str__() | |
| 10138 if fname: | |
| 10139 img = mupdf.fz_new_image_from_file(fname) | |
| 10140 elif hasattr(imagedata, name): | |
| 10141 fname = imagedata.name | |
| 10142 if fname: | |
| 10143 img = mupdf.fz_new_image_from_file(fname) | |
| 10144 elif isinstance(imagedata, str): | |
| 10145 img = mupdf.fz_new_image_from_file(imagedata) | |
| 10146 else: | |
| 10147 res = JM_BufferFromBytes(imagedata) | |
| 10148 if not res.m_internal or not res.m_internal.len: | |
| 10149 raise ValueError( "bad image data") | |
| 10150 img = mupdf.fz_new_image_from_buffer(res) | |
| 10151 | |
| 10152 # Original code passed null for subarea and ctm, but that's not | |
| 10153 # possible with MuPDF's python bindings. The equivalent is an | |
| 10154 # infinite rect and identify matrix scaled by img.w() and img.h(). | |
| 10155 pm, w, h = mupdf.fz_get_pixmap_from_image( | |
| 10156 img, | |
| 10157 mupdf.FzIrect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT), | |
| 10158 mupdf.FzMatrix( img.w(), 0, 0, img.h(), 0, 0), | |
| 10159 ) | |
| 10160 xres, yres = mupdf.fz_image_resolution(img) | |
| 10161 pm.m_internal.xres = xres | |
| 10162 pm.m_internal.yres = yres | |
| 10163 self.this = pm | |
| 10164 | |
| 10165 elif args_match(args, (Document, mupdf.FzDocument), int): | |
| 10166 # Create pixmap from PDF image identified by XREF number | |
| 10167 doc, xref = args | |
| 10168 pdf = _as_pdf_document(doc) | |
| 10169 xreflen = mupdf.pdf_xref_len(pdf) | |
| 10170 if not _INRANGE(xref, 1, xreflen-1): | |
| 10171 raise ValueError( MSG_BAD_XREF) | |
| 10172 ref = mupdf.pdf_new_indirect(pdf, xref, 0) | |
| 10173 type_ = mupdf.pdf_dict_get(ref, PDF_NAME('Subtype')) | |
| 10174 if (not mupdf.pdf_name_eq(type_, PDF_NAME('Image')) | |
| 10175 and not mupdf.pdf_name_eq(type_, PDF_NAME('Alpha')) | |
| 10176 and not mupdf.pdf_name_eq(type_, PDF_NAME('Luminosity')) | |
| 10177 ): | |
| 10178 raise ValueError( MSG_IS_NO_IMAGE) | |
| 10179 img = mupdf.pdf_load_image(pdf, ref) | |
| 10180 # Original code passed null for subarea and ctm, but that's not | |
| 10181 # possible with MuPDF's python bindings. The equivalent is an | |
| 10182 # infinite rect and identify matrix scaled by img.w() and img.h(). | |
| 10183 pix, w, h = mupdf.fz_get_pixmap_from_image( | |
| 10184 img, | |
| 10185 mupdf.FzIrect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT), | |
| 10186 mupdf.FzMatrix(img.w(), 0, 0, img.h(), 0, 0), | |
| 10187 ) | |
| 10188 self.this = pix | |
| 10189 | |
| 10190 else: | |
| 10191 text = 'Unrecognised args for constructing Pixmap:\n' | |
| 10192 for arg in args: | |
| 10193 text += f' {type(arg)}: {arg}\n' | |
| 10194 raise Exception( text) | |
| 10195 | |
| 10196 def __len__(self): | |
| 10197 return self.size | |
| 10198 | |
| 10199 def __repr__(self): | |
| 10200 if not type(self) is Pixmap: return | |
| 10201 if self.colorspace: | |
| 10202 return "Pixmap(%s, %s, %s)" % (self.colorspace.this.m_internal.name, self.irect, self.alpha) | |
| 10203 else: | |
| 10204 return "Pixmap(%s, %s, %s)" % ('None', self.irect, self.alpha) | |
| 10205 | |
| 10206 def _tobytes(self, format_, jpg_quality): | |
| 10207 ''' | |
| 10208 Pixmap._tobytes | |
| 10209 ''' | |
| 10210 pm = self.this | |
| 10211 size = mupdf.fz_pixmap_stride(pm) * pm.h() | |
| 10212 res = mupdf.fz_new_buffer(size) | |
| 10213 out = mupdf.FzOutput(res) | |
| 10214 if format_ == 1: mupdf.fz_write_pixmap_as_png(out, pm) | |
| 10215 elif format_ == 2: mupdf.fz_write_pixmap_as_pnm(out, pm) | |
| 10216 elif format_ == 3: mupdf.fz_write_pixmap_as_pam(out, pm) | |
| 10217 elif format_ == 5: mupdf.fz_write_pixmap_as_psd(out, pm) | |
| 10218 elif format_ == 6: mupdf.fz_write_pixmap_as_ps(out, pm) | |
| 10219 elif format_ == 7: | |
| 10220 mupdf.fz_write_pixmap_as_jpeg(out, pm, jpg_quality, 0) | |
| 10221 else: | |
| 10222 mupdf.fz_write_pixmap_as_png(out, pm) | |
| 10223 out.fz_close_output() | |
| 10224 barray = JM_BinFromBuffer(res) | |
| 10225 return barray | |
| 10226 | |
| 10227 def _writeIMG(self, filename, format_, jpg_quality): | |
| 10228 pm = self.this | |
| 10229 if format_ == 1: mupdf.fz_save_pixmap_as_png(pm, filename) | |
| 10230 elif format_ == 2: mupdf.fz_save_pixmap_as_pnm(pm, filename) | |
| 10231 elif format_ == 3: mupdf.fz_save_pixmap_as_pam(pm, filename) | |
| 10232 elif format_ == 5: mupdf.fz_save_pixmap_as_psd(pm, filename) | |
| 10233 elif format_ == 6: mupdf.fz_save_pixmap_as_ps(pm, filename) | |
| 10234 elif format_ == 7: mupdf.fz_save_pixmap_as_jpeg(pm, filename, jpg_quality) | |
| 10235 else: mupdf.fz_save_pixmap_as_png(pm, filename) | |
| 10236 | |
| 10237 @property | |
| 10238 def alpha(self): | |
| 10239 """Indicates presence of alpha channel.""" | |
| 10240 return mupdf.fz_pixmap_alpha(self.this) | |
| 10241 | |
| 10242 def clear_with(self, value=None, bbox=None): | |
| 10243 """Fill all color components with same value.""" | |
| 10244 if value is None: | |
| 10245 mupdf.fz_clear_pixmap(self.this) | |
| 10246 elif bbox is None: | |
| 10247 mupdf.fz_clear_pixmap_with_value(self.this, value) | |
| 10248 else: | |
| 10249 JM_clear_pixmap_rect_with_value(self.this, value, JM_irect_from_py(bbox)) | |
| 10250 | |
| 10251 def color_count(self, colors=0, clip=None): | |
| 10252 ''' | |
| 10253 Return count of each color. | |
| 10254 ''' | |
| 10255 pm = self.this | |
| 10256 rc = JM_color_count( pm, clip) | |
| 10257 if not colors: | |
| 10258 return len( rc) | |
| 10259 return rc | |
| 10260 | |
| 10261 def color_topusage(self, clip=None): | |
| 10262 """Return most frequent color and its usage ratio.""" | |
| 10263 allpixels = 0 | |
| 10264 cnt = 0 | |
| 10265 if clip is not None and self.irect in Rect(clip): | |
| 10266 clip = self.irect | |
| 10267 for pixel, count in self.color_count(colors=True,clip=clip).items(): | |
| 10268 allpixels += count | |
| 10269 if count > cnt: | |
| 10270 cnt = count | |
| 10271 maxpixel = pixel | |
| 10272 if not allpixels: | |
| 10273 return (1, bytes([255] * self.n)) | |
| 10274 return (cnt / allpixels, maxpixel) | |
| 10275 | |
| 10276 @property | |
| 10277 def colorspace(self): | |
| 10278 """Pixmap Colorspace.""" | |
| 10279 cs = Colorspace(mupdf.fz_pixmap_colorspace(self.this)) | |
| 10280 if cs.name == "None": | |
| 10281 return None | |
| 10282 return cs | |
| 10283 | |
| 10284 def copy(self, src, bbox): | |
| 10285 """Copy bbox from another Pixmap.""" | |
| 10286 pm = self.this | |
| 10287 src_pix = src.this | |
| 10288 if not mupdf.fz_pixmap_colorspace(src_pix): | |
| 10289 raise ValueError( "cannot copy pixmap with NULL colorspace") | |
| 10290 if pm.alpha() != src_pix.alpha(): | |
| 10291 raise ValueError( "source and target alpha must be equal") | |
| 10292 mupdf.fz_copy_pixmap_rect(pm, src_pix, JM_irect_from_py(bbox), mupdf.FzDefaultColorspaces(None)) | |
| 10293 | |
| 10294 @property | |
| 10295 def digest(self): | |
| 10296 """MD5 digest of pixmap (bytes).""" | |
| 10297 ret = mupdf.fz_md5_pixmap2(self.this) | |
| 10298 return bytes(ret) | |
| 10299 | |
| 10300 def gamma_with(self, gamma): | |
| 10301 """Apply correction with some float. | |
| 10302 gamma=1 is a no-op.""" | |
| 10303 if not mupdf.fz_pixmap_colorspace( self.this): | |
| 10304 message_warning("colorspace invalid for function") | |
| 10305 return | |
| 10306 mupdf.fz_gamma_pixmap( self.this, gamma) | |
| 10307 | |
| 10308 @property | |
| 10309 def h(self): | |
| 10310 """The height.""" | |
| 10311 return mupdf.fz_pixmap_height(self.this) | |
| 10312 | |
| 10313 def invert_irect(self, bbox=None): | |
| 10314 """Invert the colors inside a bbox.""" | |
| 10315 pm = self.this | |
| 10316 if not mupdf.fz_pixmap_colorspace(pm).m_internal: | |
| 10317 message_warning("ignored for stencil pixmap") | |
| 10318 return False | |
| 10319 r = JM_irect_from_py(bbox) | |
| 10320 if mupdf.fz_is_infinite_irect(r): | |
| 10321 mupdf.fz_invert_pixmap(pm) | |
| 10322 return True | |
| 10323 mupdf.fz_invert_pixmap_rect(pm, r) | |
| 10324 return True | |
| 10325 | |
| 10326 @property | |
| 10327 def irect(self): | |
| 10328 """Pixmap bbox - an IRect object.""" | |
| 10329 val = mupdf.fz_pixmap_bbox(self.this) | |
| 10330 return JM_py_from_irect( val) | |
| 10331 | |
| 10332 @property | |
| 10333 def is_monochrome(self): | |
| 10334 """Check if pixmap is monochrome.""" | |
| 10335 return mupdf.fz_is_pixmap_monochrome( self.this) | |
| 10336 | |
| 10337 @property | |
| 10338 def is_unicolor(self): | |
| 10339 ''' | |
| 10340 Check if pixmap has only one color. | |
| 10341 ''' | |
| 10342 pm = self.this | |
| 10343 n = pm.n() | |
| 10344 count = pm.w() * pm.h() * n | |
| 10345 def _pixmap_read_samples(pm, offset, n): | |
| 10346 ret = list() | |
| 10347 for i in range(n): | |
| 10348 ret.append(mupdf.fz_samples_get(pm, offset+i)) | |
| 10349 return ret | |
| 10350 for offset in range( 0, count, n): | |
| 10351 if offset == 0: | |
| 10352 sample0 = _pixmap_read_samples( pm, 0, n) | |
| 10353 else: | |
| 10354 sample = _pixmap_read_samples( pm, offset, n) | |
| 10355 if sample != sample0: | |
| 10356 return False | |
| 10357 return True | |
| 10358 | |
| 10359 @property | |
| 10360 def n(self): | |
| 10361 """The size of one pixel.""" | |
| 10362 if g_use_extra: | |
| 10363 # Setting self.__class__.n gives a small reduction in overhead of | |
| 10364 # test_general.py:test_2093, e.g. 1.4x -> 1.3x. | |
| 10365 #return extra.pixmap_n(self.this) | |
| 10366 def n2(self): | |
| 10367 return extra.pixmap_n(self.this) | |
| 10368 self.__class__.n = property(n2) | |
| 10369 return self.n | |
| 10370 return mupdf.fz_pixmap_components(self.this) | |
| 10371 | |
| 10372 def pdfocr_save(self, filename, compress=1, language=None, tessdata=None): | |
| 10373 ''' | |
| 10374 Save pixmap as an OCR-ed PDF page. | |
| 10375 ''' | |
| 10376 tessdata = get_tessdata(tessdata) | |
| 10377 opts = mupdf.FzPdfocrOptions() | |
| 10378 opts.compress = compress | |
| 10379 if language: | |
| 10380 opts.language_set2( language) | |
| 10381 if tessdata: | |
| 10382 opts.datadir_set2( tessdata) | |
| 10383 pix = self.this | |
| 10384 if isinstance(filename, str): | |
| 10385 mupdf.fz_save_pixmap_as_pdfocr( pix, filename, 0, opts) | |
| 10386 else: | |
| 10387 out = JM_new_output_fileptr( filename) | |
| 10388 try: | |
| 10389 mupdf.fz_write_pixmap_as_pdfocr( out, pix, opts) | |
| 10390 finally: | |
| 10391 out.fz_close_output() # Avoid MuPDF warning. | |
| 10392 | |
| 10393 def pdfocr_tobytes(self, compress=True, language="eng", tessdata=None): | |
| 10394 """Save pixmap as an OCR-ed PDF page. | |
| 10395 | |
| 10396 Args: | |
| 10397 compress: (bool) compress, default 1 (True). | |
| 10398 language: (str) language(s) occurring on page, default "eng" (English), | |
| 10399 multiples like "eng+ger" for English and German. | |
| 10400 tessdata: (str) folder name of Tesseract's language support. If None | |
| 10401 we use environment variable TESSDATA_PREFIX or search for | |
| 10402 Tesseract installation. | |
| 10403 Notes: | |
| 10404 On failure, make sure Tesseract is installed and you have set | |
| 10405 <tessdata> or environment variable "TESSDATA_PREFIX" to the folder | |
| 10406 containing your Tesseract's language support data. | |
| 10407 """ | |
| 10408 tessdata = get_tessdata(tessdata) | |
| 10409 from io import BytesIO | |
| 10410 bio = BytesIO() | |
| 10411 self.pdfocr_save(bio, compress=compress, language=language, tessdata=tessdata) | |
| 10412 return bio.getvalue() | |
| 10413 | |
| 10414 def pil_image(self): | |
| 10415 """Create a Pillow Image from the Pixmap.""" | |
| 10416 try: | |
| 10417 from PIL import Image | |
| 10418 except ImportError: | |
| 10419 message("PIL/Pillow not installed") | |
| 10420 raise | |
| 10421 | |
| 10422 cspace = self.colorspace | |
| 10423 if not cspace: | |
| 10424 mode = "L" | |
| 10425 elif cspace.n == 1: | |
| 10426 mode = "L" if not self.alpha else "LA" | |
| 10427 elif cspace.n == 3: | |
| 10428 mode = "RGB" if not self.alpha else "RGBA" | |
| 10429 else: | |
| 10430 mode = "CMYK" | |
| 10431 | |
| 10432 img = Image.frombytes(mode, (self.width, self.height), self.samples) | |
| 10433 return img | |
| 10434 | |
| 10435 def pil_save(self, *args, **kwargs): | |
| 10436 """Write to image file using Pillow. | |
| 10437 | |
| 10438 An intermediate PIL Image is created, and its "save" method is used | |
| 10439 to store the image. See Pillow documentation to learn about the | |
| 10440 meaning of possible positional and keyword parameters. | |
| 10441 Use this when other output formats are desired. | |
| 10442 """ | |
| 10443 img = self.pil_image() | |
| 10444 | |
| 10445 if "dpi" not in kwargs.keys(): | |
| 10446 kwargs["dpi"] = (self.xres, self.yres) | |
| 10447 | |
| 10448 img.save(*args, **kwargs) | |
| 10449 | |
| 10450 def pil_tobytes(self, *args, **kwargs): | |
| 10451 """Convert to an image in memory using Pillow. | |
| 10452 | |
| 10453 An intermediate PIL Image is created, and its "save" method is used | |
| 10454 to store the image. See Pillow documentation to learn about the | |
| 10455 meaning of possible positional or keyword parameters. | |
| 10456 Use this when other output formats are desired. | |
| 10457 """ | |
| 10458 bytes_out = io.BytesIO() | |
| 10459 img = self.pil_image() | |
| 10460 | |
| 10461 if "dpi" not in kwargs.keys(): | |
| 10462 kwargs["dpi"] = (self.xres, self.yres) | |
| 10463 | |
| 10464 img.save(bytes_out, *args, **kwargs) | |
| 10465 return bytes_out.getvalue() | |
| 10466 | |
| 10467 def pixel(self, x, y): | |
| 10468 """Get color tuple of pixel (x, y). | |
| 10469 Last item is the alpha if Pixmap.alpha is true.""" | |
| 10470 if g_use_extra: | |
| 10471 return extra.pixmap_pixel(self.this.m_internal, x, y) | |
| 10472 if (0 | |
| 10473 or x < 0 | |
| 10474 or x >= self.this.m_internal.w | |
| 10475 or y < 0 | |
| 10476 or y >= self.this.m_internal.h | |
| 10477 ): | |
| 10478 RAISEPY(MSG_PIXEL_OUTSIDE, PyExc_ValueError) | |
| 10479 n = self.this.m_internal.n | |
| 10480 stride = self.this.m_internal.stride | |
| 10481 i = stride * y + n * x | |
| 10482 ret = tuple( self.samples_mv[ i: i+n]) | |
| 10483 return ret | |
| 10484 | |
| 10485 @property | |
| 10486 def samples(self)->bytes: | |
| 10487 mv = self.samples_mv | |
| 10488 return bytes( mv) | |
| 10489 | |
| 10490 @property | |
| 10491 def samples_mv(self): | |
| 10492 ''' | |
| 10493 Pixmap samples memoryview. | |
| 10494 ''' | |
| 10495 # We remember the returned memoryview so that our `__del__()` can | |
| 10496 # release it; otherwise accessing it after we have been destructed will | |
| 10497 # fail, possibly crashing Python; this is #4155. | |
| 10498 # | |
| 10499 if self._samples_mv is None: | |
| 10500 self._samples_mv = mupdf.fz_pixmap_samples_memoryview(self.this) | |
| 10501 return self._samples_mv | |
| 10502 | |
| 10503 def _samples_mv_release(self): | |
| 10504 if self._samples_mv: | |
| 10505 self._samples_mv.release() | |
| 10506 | |
| 10507 @property | |
| 10508 def samples_ptr(self): | |
| 10509 return mupdf.fz_pixmap_samples_int(self.this) | |
| 10510 | |
| 10511 def save(self, filename, output=None, jpg_quality=95): | |
| 10512 """Output as image in format determined by filename extension. | |
| 10513 | |
| 10514 Args: | |
| 10515 output: (str) only use to overrule filename extension. Default is PNG. | |
| 10516 Others are JPEG, JPG, PNM, PGM, PPM, PBM, PAM, PSD, PS. | |
| 10517 """ | |
| 10518 valid_formats = { | |
| 10519 "png": 1, | |
| 10520 "pnm": 2, | |
| 10521 "pgm": 2, | |
| 10522 "ppm": 2, | |
| 10523 "pbm": 2, | |
| 10524 "pam": 3, | |
| 10525 "psd": 5, | |
| 10526 "ps": 6, | |
| 10527 "jpg": 7, | |
| 10528 "jpeg": 7, | |
| 10529 } | |
| 10530 | |
| 10531 if type(filename) is str: | |
| 10532 pass | |
| 10533 elif hasattr(filename, "absolute"): | |
| 10534 filename = str(filename) | |
| 10535 elif hasattr(filename, "name"): | |
| 10536 filename = filename.name | |
| 10537 if output is None: | |
| 10538 _, ext = os.path.splitext(filename) | |
| 10539 output = ext[1:] | |
| 10540 | |
| 10541 idx = valid_formats.get(output.lower(), None) | |
| 10542 if idx is None: | |
| 10543 raise ValueError(f"Image format {output} not in {tuple(valid_formats.keys())}") | |
| 10544 if self.alpha and idx in (2, 6, 7): | |
| 10545 raise ValueError("'%s' cannot have alpha" % output) | |
| 10546 if self.colorspace and self.colorspace.n > 3 and idx in (1, 2, 4): | |
| 10547 raise ValueError(f"unsupported colorspace for '{output}'") | |
| 10548 if idx == 7: | |
| 10549 self.set_dpi(self.xres, self.yres) | |
| 10550 return self._writeIMG(filename, idx, jpg_quality) | |
| 10551 | |
| 10552 def set_alpha(self, alphavalues=None, premultiply=1, opaque=None, matte=None): | |
| 10553 """Set alpha channel to values contained in a byte array. | |
| 10554 If omitted, set alphas to 255. | |
| 10555 | |
| 10556 Args: | |
| 10557 alphavalues: (bytes) with length (width * height) or 'None'. | |
| 10558 premultiply: (bool, True) premultiply colors with alpha values. | |
| 10559 opaque: (tuple, length colorspace.n) this color receives opacity 0. | |
| 10560 matte: (tuple, length colorspace.n)) preblending background color. | |
| 10561 """ | |
| 10562 pix = self.this | |
| 10563 alpha = 0 | |
| 10564 m = 0 | |
| 10565 if pix.alpha() == 0: | |
| 10566 raise ValueError( MSG_PIX_NOALPHA) | |
| 10567 n = mupdf.fz_pixmap_colorants(pix) | |
| 10568 w = mupdf.fz_pixmap_width(pix) | |
| 10569 h = mupdf.fz_pixmap_height(pix) | |
| 10570 balen = w * h * (n+1) | |
| 10571 colors = [0, 0, 0, 0] # make this color opaque | |
| 10572 bgcolor = [0, 0, 0, 0] # preblending background color | |
| 10573 zero_out = 0 | |
| 10574 bground = 0 | |
| 10575 if opaque and isinstance(opaque, (list, tuple)) and len(opaque) == n: | |
| 10576 for i in range(n): | |
| 10577 colors[i] = opaque[i] | |
| 10578 zero_out = 1 | |
| 10579 if matte and isinstance( matte, (tuple, list)) and len(matte) == n: | |
| 10580 for i in range(n): | |
| 10581 bgcolor[i] = matte[i] | |
| 10582 bground = 1 | |
| 10583 data = bytes() | |
| 10584 data_len = 0 | |
| 10585 if alphavalues: | |
| 10586 #res = JM_BufferFromBytes(alphavalues) | |
| 10587 #data_len, data = mupdf.fz_buffer_storage(res) | |
| 10588 #if data_len < w * h: | |
| 10589 # THROWMSG("bad alpha values") | |
| 10590 # fixme: don't seem to need to create an fz_buffer - can | |
| 10591 # use <alphavalues> directly? | |
| 10592 if isinstance(alphavalues, (bytes, bytearray)): | |
| 10593 data = alphavalues | |
| 10594 data_len = len(alphavalues) | |
| 10595 else: | |
| 10596 assert 0, f'unexpected type for alphavalues: {type(alphavalues)}' | |
| 10597 if data_len < w * h: | |
| 10598 raise ValueError( "bad alpha values") | |
| 10599 if 1: | |
| 10600 # Use C implementation for speed. | |
| 10601 mupdf.Pixmap_set_alpha_helper( | |
| 10602 balen, | |
| 10603 n, | |
| 10604 data_len, | |
| 10605 zero_out, | |
| 10606 mupdf.python_buffer_data( data), | |
| 10607 pix.m_internal, | |
| 10608 premultiply, | |
| 10609 bground, | |
| 10610 colors, | |
| 10611 bgcolor, | |
| 10612 ) | |
| 10613 else: | |
| 10614 i = k = j = 0 | |
| 10615 data_fix = 255 | |
| 10616 while i < balen: | |
| 10617 alpha = data[k] | |
| 10618 if zero_out: | |
| 10619 for j in range(i, i+n): | |
| 10620 if mupdf.fz_samples_get(pix, j) != colors[j - i]: | |
| 10621 data_fix = 255 | |
| 10622 break | |
| 10623 else: | |
| 10624 data_fix = 0 | |
| 10625 if data_len: | |
| 10626 def fz_mul255( a, b): | |
| 10627 x = a * b + 128 | |
| 10628 x += x // 256 | |
| 10629 return x // 256 | |
| 10630 | |
| 10631 if data_fix == 0: | |
| 10632 mupdf.fz_samples_set(pix, i+n, 0) | |
| 10633 else: | |
| 10634 mupdf.fz_samples_set(pix, i+n, alpha) | |
| 10635 if premultiply and not bground: | |
| 10636 for j in range(i, i+n): | |
| 10637 mupdf.fz_samples_set(pix, j, fz_mul255( mupdf.fz_samples_get(pix, j), alpha)) | |
| 10638 elif bground: | |
| 10639 for j in range( i, i+n): | |
| 10640 m = bgcolor[j - i] | |
| 10641 mupdf.fz_samples_set(pix, j, fz_mul255( mupdf.fz_samples_get(pix, j) - m, alpha)) | |
| 10642 else: | |
| 10643 mupdf.fz_samples_set(pix, i+n, data_fix) | |
| 10644 i += n+1 | |
| 10645 k += 1 | |
| 10646 | |
| 10647 def tobytes(self, output="png", jpg_quality=95): | |
| 10648 ''' | |
| 10649 Convert to binary image stream of desired type. | |
| 10650 ''' | |
| 10651 valid_formats = { | |
| 10652 "png": 1, | |
| 10653 "pnm": 2, | |
| 10654 "pgm": 2, | |
| 10655 "ppm": 2, | |
| 10656 "pbm": 2, | |
| 10657 "pam": 3, | |
| 10658 "tga": 4, | |
| 10659 "tpic": 4, | |
| 10660 "psd": 5, | |
| 10661 "ps": 6, | |
| 10662 'jpg': 7, | |
| 10663 'jpeg': 7, | |
| 10664 } | |
| 10665 idx = valid_formats.get(output.lower(), None) | |
| 10666 if idx is None: | |
| 10667 raise ValueError(f"Image format {output} not in {tuple(valid_formats.keys())}") | |
| 10668 if self.alpha and idx in (2, 6, 7): | |
| 10669 raise ValueError("'{output}' cannot have alpha") | |
| 10670 if self.colorspace and self.colorspace.n > 3 and idx in (1, 2, 4): | |
| 10671 raise ValueError(f"unsupported colorspace for '{output}'") | |
| 10672 if idx == 7: | |
| 10673 self.set_dpi(self.xres, self.yres) | |
| 10674 barray = self._tobytes(idx, jpg_quality) | |
| 10675 return barray | |
| 10676 | |
| 10677 def set_dpi(self, xres, yres): | |
| 10678 """Set resolution in both dimensions.""" | |
| 10679 pm = self.this | |
| 10680 pm.m_internal.xres = xres | |
| 10681 pm.m_internal.yres = yres | |
| 10682 | |
| 10683 def set_origin(self, x, y): | |
| 10684 """Set top-left coordinates.""" | |
| 10685 pm = self.this | |
| 10686 pm.m_internal.x = x | |
| 10687 pm.m_internal.y = y | |
| 10688 | |
| 10689 def set_pixel(self, x, y, color): | |
| 10690 """Set color of pixel (x, y).""" | |
| 10691 if g_use_extra: | |
| 10692 return extra.set_pixel(self.this.m_internal, x, y, color) | |
| 10693 pm = self.this | |
| 10694 if not _INRANGE(x, 0, pm.w() - 1) or not _INRANGE(y, 0, pm.h() - 1): | |
| 10695 raise ValueError( MSG_PIXEL_OUTSIDE) | |
| 10696 n = pm.n() | |
| 10697 for j in range(n): | |
| 10698 i = color[j] | |
| 10699 if not _INRANGE(i, 0, 255): | |
| 10700 raise ValueError( MSG_BAD_COLOR_SEQ) | |
| 10701 stride = mupdf.fz_pixmap_stride( pm) | |
| 10702 i = stride * y + n * x | |
| 10703 if 0: | |
| 10704 # Using a cached self._memory_view doesn't actually make much | |
| 10705 # difference to speed. | |
| 10706 if not self._memory_view: | |
| 10707 self._memory_view = self.samples_mv | |
| 10708 for j in range(n): | |
| 10709 self._memory_view[i + j] = color[j] | |
| 10710 else: | |
| 10711 for j in range(n): | |
| 10712 pm.fz_samples_set(i + j, color[j]) | |
| 10713 | |
| 10714 def set_rect(self, bbox, color): | |
| 10715 """Set color of all pixels in bbox.""" | |
| 10716 pm = self.this | |
| 10717 n = pm.n() | |
| 10718 c = [] | |
| 10719 for j in range(n): | |
| 10720 i = color[j] | |
| 10721 if not _INRANGE(i, 0, 255): | |
| 10722 raise ValueError( MSG_BAD_COLOR_SEQ) | |
| 10723 c.append(i) | |
| 10724 bbox = JM_irect_from_py(bbox) | |
| 10725 i = JM_fill_pixmap_rect_with_color(pm, c, bbox) | |
| 10726 rc = bool(i) | |
| 10727 return rc | |
| 10728 | |
| 10729 def shrink(self, factor): | |
| 10730 """Divide width and height by 2**factor. | |
| 10731 E.g. factor=1 shrinks to 25% of original size (in place).""" | |
| 10732 if factor < 1: | |
| 10733 message_warning("ignoring shrink factor < 1") | |
| 10734 return | |
| 10735 mupdf.fz_subsample_pixmap( self.this, factor) | |
| 10736 # Pixmap has changed so clear our memory view. | |
| 10737 self._memory_view = None | |
| 10738 self._samples_mv_release() | |
| 10739 | |
| 10740 @property | |
| 10741 def size(self): | |
| 10742 """Pixmap size.""" | |
| 10743 return mupdf.fz_pixmap_size( self.this) | |
| 10744 | |
| 10745 @property | |
| 10746 def stride(self): | |
| 10747 """Length of one image line (width * n).""" | |
| 10748 return self.this.stride() | |
| 10749 | |
| 10750 def tint_with(self, black, white): | |
| 10751 """Tint colors with modifiers for black and white.""" | |
| 10752 if not self.colorspace or self.colorspace.n > 3: | |
| 10753 message("warning: colorspace invalid for function") | |
| 10754 return | |
| 10755 return mupdf.fz_tint_pixmap( self.this, black, white) | |
| 10756 | |
| 10757 @property | |
| 10758 def w(self): | |
| 10759 """The width.""" | |
| 10760 return mupdf.fz_pixmap_width(self.this) | |
| 10761 | |
| 10762 def warp(self, quad, width, height): | |
| 10763 """Return pixmap from a warped quad.""" | |
| 10764 if not quad.is_convex: raise ValueError("quad must be convex") | |
| 10765 q = JM_quad_from_py(quad) | |
| 10766 points = [ q.ul, q.ur, q.lr, q.ll] | |
| 10767 dst = mupdf.fz_warp_pixmap( self.this, points, width, height) | |
| 10768 return Pixmap( dst) | |
| 10769 | |
| 10770 @property | |
| 10771 def x(self): | |
| 10772 """x component of Pixmap origin.""" | |
| 10773 return mupdf.fz_pixmap_x(self.this) | |
| 10774 | |
| 10775 @property | |
| 10776 def xres(self): | |
| 10777 """Resolution in x direction.""" | |
| 10778 return self.this.xres() | |
| 10779 | |
| 10780 @property | |
| 10781 def y(self): | |
| 10782 """y component of Pixmap origin.""" | |
| 10783 return mupdf.fz_pixmap_y(self.this) | |
| 10784 | |
| 10785 @property | |
| 10786 def yres(self): | |
| 10787 """Resolution in y direction.""" | |
| 10788 return self.this.yres() | |
| 10789 | |
| 10790 width = w | |
| 10791 height = h | |
| 10792 | |
| 10793 def __del__(self): | |
| 10794 if self._samples_mv: | |
| 10795 self._samples_mv.release() | |
| 10796 | |
| 10797 | |
| 10798 del Point | |
| 10799 class Point: | |
| 10800 | |
| 10801 def __abs__(self): | |
| 10802 return math.sqrt(self.x * self.x + self.y * self.y) | |
| 10803 | |
| 10804 def __add__(self, p): | |
| 10805 if hasattr(p, "__float__"): | |
| 10806 return Point(self.x + p, self.y + p) | |
| 10807 if len(p) != 2: | |
| 10808 raise ValueError("Point: bad seq len") | |
| 10809 return Point(self.x + p[0], self.y + p[1]) | |
| 10810 | |
| 10811 def __bool__(self): | |
| 10812 return not (max(self) == min(self) == 0) | |
| 10813 | |
| 10814 def __eq__(self, p): | |
| 10815 if not hasattr(p, "__len__"): | |
| 10816 return False | |
| 10817 return len(p) == 2 and not (self - p) | |
| 10818 | |
| 10819 def __getitem__(self, i): | |
| 10820 return (self.x, self.y)[i] | |
| 10821 | |
| 10822 def __hash__(self): | |
| 10823 return hash(tuple(self)) | |
| 10824 | |
| 10825 def __init__(self, *args, x=None, y=None): | |
| 10826 ''' | |
| 10827 Point() - all zeros | |
| 10828 Point(x, y) | |
| 10829 Point(Point) - new copy | |
| 10830 Point(sequence) - from 'sequence' | |
| 10831 | |
| 10832 Explicit keyword args x, y override earlier settings if not None. | |
| 10833 ''' | |
| 10834 if not args: | |
| 10835 self.x = 0.0 | |
| 10836 self.y = 0.0 | |
| 10837 elif len(args) > 2: | |
| 10838 raise ValueError("Point: bad seq len") | |
| 10839 elif len(args) == 2: | |
| 10840 self.x = float(args[0]) | |
| 10841 self.y = float(args[1]) | |
| 10842 elif len(args) == 1: | |
| 10843 l = args[0] | |
| 10844 if isinstance(l, (mupdf.FzPoint, mupdf.fz_point)): | |
| 10845 self.x = l.x | |
| 10846 self.y = l.y | |
| 10847 else: | |
| 10848 if not hasattr(l, "__getitem__"): | |
| 10849 raise ValueError("Point: bad args") | |
| 10850 if len(l) != 2: | |
| 10851 raise ValueError("Point: bad seq len") | |
| 10852 self.x = float(l[0]) | |
| 10853 self.y = float(l[1]) | |
| 10854 else: | |
| 10855 raise ValueError("Point: bad seq len") | |
| 10856 if x is not None: self.x = x | |
| 10857 if y is not None: self.y = y | |
| 10858 | |
| 10859 def __len__(self): | |
| 10860 return 2 | |
| 10861 | |
| 10862 def __mul__(self, m): | |
| 10863 if hasattr(m, "__float__"): | |
| 10864 return Point(self.x * m, self.y * m) | |
| 10865 if hasattr(m, "__getitem__") and len(m) == 2: | |
| 10866 # dot product | |
| 10867 return self.x * m[0] + self.y * m[1] | |
| 10868 p = Point(self) | |
| 10869 return p.transform(m) | |
| 10870 | |
| 10871 def __neg__(self): | |
| 10872 return Point(-self.x, -self.y) | |
| 10873 | |
| 10874 def __nonzero__(self): | |
| 10875 return not (max(self) == min(self) == 0) | |
| 10876 | |
| 10877 def __pos__(self): | |
| 10878 return Point(self) | |
| 10879 | |
| 10880 def __repr__(self): | |
| 10881 return "Point" + str(tuple(self)) | |
| 10882 | |
| 10883 def __setitem__(self, i, v): | |
| 10884 v = float(v) | |
| 10885 if i == 0: self.x = v | |
| 10886 elif i == 1: self.y = v | |
| 10887 else: | |
| 10888 raise IndexError("index out of range") | |
| 10889 return None | |
| 10890 | |
| 10891 def __sub__(self, p): | |
| 10892 if hasattr(p, "__float__"): | |
| 10893 return Point(self.x - p, self.y - p) | |
| 10894 if len(p) != 2: | |
| 10895 raise ValueError("Point: bad seq len") | |
| 10896 return Point(self.x - p[0], self.y - p[1]) | |
| 10897 | |
| 10898 def __truediv__(self, m): | |
| 10899 if hasattr(m, "__float__"): | |
| 10900 return Point(self.x * 1./m, self.y * 1./m) | |
| 10901 m1 = util_invert_matrix(m)[1] | |
| 10902 if not m1: | |
| 10903 raise ZeroDivisionError("matrix not invertible") | |
| 10904 p = Point(self) | |
| 10905 return p.transform(m1) | |
| 10906 | |
| 10907 @property | |
| 10908 def abs_unit(self): | |
| 10909 """Unit vector with positive coordinates.""" | |
| 10910 s = self.x * self.x + self.y * self.y | |
| 10911 if s < EPSILON: | |
| 10912 return Point(0,0) | |
| 10913 s = math.sqrt(s) | |
| 10914 return Point(abs(self.x) / s, abs(self.y) / s) | |
| 10915 | |
| 10916 def distance_to(self, *args): | |
| 10917 """Return distance to rectangle or another point.""" | |
| 10918 if not len(args) > 0: | |
| 10919 raise ValueError("at least one parameter must be given") | |
| 10920 | |
| 10921 x = args[0] | |
| 10922 if len(x) == 2: | |
| 10923 x = Point(x) | |
| 10924 elif len(x) == 4: | |
| 10925 x = Rect(x) | |
| 10926 else: | |
| 10927 raise ValueError("arg1 must be point-like or rect-like") | |
| 10928 | |
| 10929 if len(args) > 1: | |
| 10930 unit = args[1] | |
| 10931 else: | |
| 10932 unit = "px" | |
| 10933 u = {"px": (1.,1.), "in": (1.,72.), "cm": (2.54, 72.), | |
| 10934 "mm": (25.4, 72.)} | |
| 10935 f = u[unit][0] / u[unit][1] | |
| 10936 | |
| 10937 if type(x) is Point: | |
| 10938 return abs(self - x) * f | |
| 10939 | |
| 10940 # from here on, x is a rectangle | |
| 10941 # as a safeguard, make a finite copy of it | |
| 10942 r = Rect(x.top_left, x.top_left) | |
| 10943 r = r | x.bottom_right | |
| 10944 if self in r: | |
| 10945 return 0.0 | |
| 10946 if self.x > r.x1: | |
| 10947 if self.y >= r.y1: | |
| 10948 return self.distance_to(r.bottom_right, unit) | |
| 10949 elif self.y <= r.y0: | |
| 10950 return self.distance_to(r.top_right, unit) | |
| 10951 else: | |
| 10952 return (self.x - r.x1) * f | |
| 10953 elif r.x0 <= self.x <= r.x1: | |
| 10954 if self.y >= r.y1: | |
| 10955 return (self.y - r.y1) * f | |
| 10956 else: | |
| 10957 return (r.y0 - self.y) * f | |
| 10958 else: | |
| 10959 if self.y >= r.y1: | |
| 10960 return self.distance_to(r.bottom_left, unit) | |
| 10961 elif self.y <= r.y0: | |
| 10962 return self.distance_to(r.top_left, unit) | |
| 10963 else: | |
| 10964 return (r.x0 - self.x) * f | |
| 10965 | |
| 10966 def transform(self, m): | |
| 10967 """Replace point by its transformation with matrix-like m.""" | |
| 10968 if len(m) != 6: | |
| 10969 raise ValueError("Matrix: bad seq len") | |
| 10970 self.x, self.y = util_transform_point(self, m) | |
| 10971 return self | |
| 10972 | |
| 10973 @property | |
| 10974 def unit(self): | |
| 10975 """Unit vector of the point.""" | |
| 10976 s = self.x * self.x + self.y * self.y | |
| 10977 if s < EPSILON: | |
| 10978 return Point(0,0) | |
| 10979 s = math.sqrt(s) | |
| 10980 return Point(self.x / s, self.y / s) | |
| 10981 | |
| 10982 __div__ = __truediv__ | |
| 10983 norm = __abs__ | |
| 10984 | |
| 10985 | |
| 10986 class Quad: | |
| 10987 | |
| 10988 def __abs__(self): | |
| 10989 if self.is_empty: | |
| 10990 return 0.0 | |
| 10991 return abs(self.ul - self.ur) * abs(self.ul - self.ll) | |
| 10992 | |
| 10993 def __add__(self, q): | |
| 10994 if hasattr(q, "__float__"): | |
| 10995 return Quad(self.ul + q, self.ur + q, self.ll + q, self.lr + q) | |
| 10996 if len(q) != 4: | |
| 10997 raise ValueError("Quad: bad seq len") | |
| 10998 return Quad(self.ul + q[0], self.ur + q[1], self.ll + q[2], self.lr + q[3]) | |
| 10999 | |
| 11000 def __bool__(self): | |
| 11001 return not self.is_empty | |
| 11002 | |
| 11003 def __contains__(self, x): | |
| 11004 try: | |
| 11005 l = x.__len__() | |
| 11006 except Exception: | |
| 11007 if g_exceptions_verbose > 1: exception_info() | |
| 11008 return False | |
| 11009 if l == 2: | |
| 11010 return util_point_in_quad(x, self) | |
| 11011 if l != 4: | |
| 11012 return False | |
| 11013 if CheckRect(x): | |
| 11014 if Rect(x).is_empty: | |
| 11015 return True | |
| 11016 return util_point_in_quad(x[:2], self) and util_point_in_quad(x[2:], self) | |
| 11017 if CheckQuad(x): | |
| 11018 for i in range(4): | |
| 11019 if not util_point_in_quad(x[i], self): | |
| 11020 return False | |
| 11021 return True | |
| 11022 return False | |
| 11023 | |
| 11024 def __eq__(self, quad): | |
| 11025 if not hasattr(quad, "__len__"): | |
| 11026 return False | |
| 11027 return len(quad) == 4 and ( | |
| 11028 self.ul == quad[0] and | |
| 11029 self.ur == quad[1] and | |
| 11030 self.ll == quad[2] and | |
| 11031 self.lr == quad[3] | |
| 11032 ) | |
| 11033 | |
| 11034 def __getitem__(self, i): | |
| 11035 return (self.ul, self.ur, self.ll, self.lr)[i] | |
| 11036 | |
| 11037 def __hash__(self): | |
| 11038 return hash(tuple(self)) | |
| 11039 | |
| 11040 def __init__(self, *args, ul=None, ur=None, ll=None, lr=None): | |
| 11041 ''' | |
| 11042 Quad() - all zero points | |
| 11043 Quad(ul, ur, ll, lr) | |
| 11044 Quad(quad) - new copy | |
| 11045 Quad(sequence) - from 'sequence' | |
| 11046 | |
| 11047 Explicit keyword args ul, ur, ll, lr override earlier settings if not | |
| 11048 None. | |
| 11049 | |
| 11050 ''' | |
| 11051 if not args: | |
| 11052 self.ul = self.ur = self.ll = self.lr = Point() | |
| 11053 elif len(args) > 4: | |
| 11054 raise ValueError("Quad: bad seq len") | |
| 11055 elif len(args) == 4: | |
| 11056 self.ul, self.ur, self.ll, self.lr = map(Point, args) | |
| 11057 elif len(args) == 1: | |
| 11058 l = args[0] | |
| 11059 if isinstance(l, mupdf.FzQuad): | |
| 11060 self.this = l | |
| 11061 self.ul, self.ur, self.ll, self.lr = Point(l.ul), Point(l.ur), Point(l.ll), Point(l.lr) | |
| 11062 elif not hasattr(l, "__getitem__"): | |
| 11063 raise ValueError("Quad: bad args") | |
| 11064 elif len(l) != 4: | |
| 11065 raise ValueError("Quad: bad seq len") | |
| 11066 else: | |
| 11067 self.ul, self.ur, self.ll, self.lr = map(Point, l) | |
| 11068 else: | |
| 11069 raise ValueError("Quad: bad args") | |
| 11070 if ul is not None: self.ul = Point(ul) | |
| 11071 if ur is not None: self.ur = Point(ur) | |
| 11072 if ll is not None: self.ll = Point(ll) | |
| 11073 if lr is not None: self.lr = Point(lr) | |
| 11074 | |
| 11075 def __len__(self): | |
| 11076 return 4 | |
| 11077 | |
| 11078 def __mul__(self, m): | |
| 11079 q = Quad(self) | |
| 11080 q = q.transform(m) | |
| 11081 return q | |
| 11082 | |
| 11083 def __neg__(self): | |
| 11084 return Quad(-self.ul, -self.ur, -self.ll, -self.lr) | |
| 11085 | |
| 11086 def __nonzero__(self): | |
| 11087 return not self.is_empty | |
| 11088 | |
| 11089 def __pos__(self): | |
| 11090 return Quad(self) | |
| 11091 | |
| 11092 def __repr__(self): | |
| 11093 return "Quad" + str(tuple(self)) | |
| 11094 | |
| 11095 def __setitem__(self, i, v): | |
| 11096 if i == 0: self.ul = Point(v) | |
| 11097 elif i == 1: self.ur = Point(v) | |
| 11098 elif i == 2: self.ll = Point(v) | |
| 11099 elif i == 3: self.lr = Point(v) | |
| 11100 else: | |
| 11101 raise IndexError("index out of range") | |
| 11102 return None | |
| 11103 | |
| 11104 def __sub__(self, q): | |
| 11105 if hasattr(q, "__float__"): | |
| 11106 return Quad(self.ul - q, self.ur - q, self.ll - q, self.lr - q) | |
| 11107 if len(q) != 4: | |
| 11108 raise ValueError("Quad: bad seq len") | |
| 11109 return Quad(self.ul - q[0], self.ur - q[1], self.ll - q[2], self.lr - q[3]) | |
| 11110 | |
| 11111 def __truediv__(self, m): | |
| 11112 if hasattr(m, "__float__"): | |
| 11113 im = 1. / m | |
| 11114 else: | |
| 11115 im = util_invert_matrix(m)[1] | |
| 11116 if not im: | |
| 11117 raise ZeroDivisionError("Matrix not invertible") | |
| 11118 q = Quad(self) | |
| 11119 q = q.transform(im) | |
| 11120 return q | |
| 11121 | |
| 11122 @property | |
| 11123 def is_convex(self): | |
| 11124 """Check if quad is convex and not degenerate. | |
| 11125 | |
| 11126 Notes: | |
| 11127 Check that for the two diagonals, the other two corners are not | |
| 11128 on the same side of the diagonal. | |
| 11129 Returns: | |
| 11130 True or False. | |
| 11131 """ | |
| 11132 m = planish_line(self.ul, self.lr) # puts this diagonal on x-axis | |
| 11133 p1 = self.ll * m # transform the | |
| 11134 p2 = self.ur * m # other two points | |
| 11135 if p1.y * p2.y > 0: | |
| 11136 return False | |
| 11137 m = planish_line(self.ll, self.ur) # puts other diagonal on x-axis | |
| 11138 p1 = self.lr * m # transform the | |
| 11139 p2 = self.ul * m # remaining points | |
| 11140 if p1.y * p2.y > 0: | |
| 11141 return False | |
| 11142 return True | |
| 11143 | |
| 11144 @property | |
| 11145 def is_empty(self): | |
| 11146 """Check whether all quad corners are on the same line. | |
| 11147 | |
| 11148 This is the case if width or height is zero. | |
| 11149 """ | |
| 11150 return self.width < EPSILON or self.height < EPSILON | |
| 11151 | |
| 11152 @property | |
| 11153 def is_infinite(self): | |
| 11154 """Check whether this is the infinite quad.""" | |
| 11155 return self.rect.is_infinite | |
| 11156 | |
| 11157 @property | |
| 11158 def is_rectangular(self): | |
| 11159 """Check if quad is rectangular. | |
| 11160 | |
| 11161 Notes: | |
| 11162 Some rotation matrix can thus transform it into a rectangle. | |
| 11163 This is equivalent to three corners enclose 90 degrees. | |
| 11164 Returns: | |
| 11165 True or False. | |
| 11166 """ | |
| 11167 | |
| 11168 sine = util_sine_between(self.ul, self.ur, self.lr) | |
| 11169 if abs(sine - 1) > EPSILON: # the sine of the angle | |
| 11170 return False | |
| 11171 | |
| 11172 sine = util_sine_between(self.ur, self.lr, self.ll) | |
| 11173 if abs(sine - 1) > EPSILON: | |
| 11174 return False | |
| 11175 | |
| 11176 sine = util_sine_between(self.lr, self.ll, self.ul) | |
| 11177 if abs(sine - 1) > EPSILON: | |
| 11178 return False | |
| 11179 | |
| 11180 return True | |
| 11181 | |
| 11182 def morph(self, p, m): | |
| 11183 """Morph the quad with matrix-like 'm' and point-like 'p'. | |
| 11184 | |
| 11185 Return a new quad.""" | |
| 11186 if self.is_infinite: | |
| 11187 return INFINITE_QUAD() | |
| 11188 delta = Matrix(1, 1).pretranslate(p.x, p.y) | |
| 11189 q = self * ~delta * m * delta | |
| 11190 return q | |
| 11191 | |
| 11192 @property | |
| 11193 def rect(self): | |
| 11194 r = Rect() | |
| 11195 r.x0 = min(self.ul.x, self.ur.x, self.lr.x, self.ll.x) | |
| 11196 r.y0 = min(self.ul.y, self.ur.y, self.lr.y, self.ll.y) | |
| 11197 r.x1 = max(self.ul.x, self.ur.x, self.lr.x, self.ll.x) | |
| 11198 r.y1 = max(self.ul.y, self.ur.y, self.lr.y, self.ll.y) | |
| 11199 return r | |
| 11200 | |
| 11201 def transform(self, m): | |
| 11202 """Replace quad by its transformation with matrix m.""" | |
| 11203 if hasattr(m, "__float__"): | |
| 11204 pass | |
| 11205 elif len(m) != 6: | |
| 11206 raise ValueError("Matrix: bad seq len") | |
| 11207 self.ul *= m | |
| 11208 self.ur *= m | |
| 11209 self.ll *= m | |
| 11210 self.lr *= m | |
| 11211 return self | |
| 11212 | |
| 11213 __div__ = __truediv__ | |
| 11214 width = property(lambda self: max(abs(self.ul - self.ur), abs(self.ll - self.lr))) | |
| 11215 height = property(lambda self: max(abs(self.ul - self.ll), abs(self.ur - self.lr))) | |
| 11216 | |
| 11217 | |
| 11218 class Rect: | |
| 11219 | |
| 11220 def __abs__(self): | |
| 11221 if self.is_empty or self.is_infinite: | |
| 11222 return 0.0 | |
| 11223 return (self.x1 - self.x0) * (self.y1 - self.y0) | |
| 11224 | |
| 11225 def __add__(self, p): | |
| 11226 if hasattr(p, "__float__"): | |
| 11227 return Rect(self.x0 + p, self.y0 + p, self.x1 + p, self.y1 + p) | |
| 11228 if len(p) != 4: | |
| 11229 raise ValueError("Rect: bad seq len") | |
| 11230 return Rect(self.x0 + p[0], self.y0 + p[1], self.x1 + p[2], self.y1 + p[3]) | |
| 11231 | |
| 11232 def __and__(self, x): | |
| 11233 if not hasattr(x, "__len__"): | |
| 11234 raise ValueError("bad operand 2") | |
| 11235 | |
| 11236 r1 = Rect(x) | |
| 11237 r = Rect(self) | |
| 11238 return r.intersect(r1) | |
| 11239 | |
| 11240 def __bool__(self): | |
| 11241 return not (max(self) == min(self) == 0) | |
| 11242 | |
| 11243 def __contains__(self, x): | |
| 11244 if hasattr(x, "__float__"): | |
| 11245 return x in tuple(self) | |
| 11246 l = len(x) | |
| 11247 if l == 2: | |
| 11248 return util_is_point_in_rect(x, self) | |
| 11249 if l == 4: | |
| 11250 r = INFINITE_RECT() | |
| 11251 try: | |
| 11252 r = Rect(x) | |
| 11253 except Exception: | |
| 11254 if g_exceptions_verbose > 1: exception_info() | |
| 11255 r = Quad(x).rect | |
| 11256 return (self.x0 <= r.x0 <= r.x1 <= self.x1 and | |
| 11257 self.y0 <= r.y0 <= r.y1 <= self.y1) | |
| 11258 return False | |
| 11259 | |
| 11260 def __eq__(self, rect): | |
| 11261 if not hasattr(rect, "__len__"): | |
| 11262 return False | |
| 11263 return len(rect) == 4 and not (self - rect) | |
| 11264 | |
| 11265 def __getitem__(self, i): | |
| 11266 return (self.x0, self.y0, self.x1, self.y1)[i] | |
| 11267 | |
| 11268 def __hash__(self): | |
| 11269 return hash(tuple(self)) | |
| 11270 | |
| 11271 def __init__(self, *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None): | |
| 11272 """ | |
| 11273 Rect() - all zeros | |
| 11274 Rect(x0, y0, x1, y1) | |
| 11275 Rect(top-left, x1, y1) | |
| 11276 Rect(x0, y0, bottom-right) | |
| 11277 Rect(top-left, bottom-right) | |
| 11278 Rect(Rect or IRect) - new copy | |
| 11279 Rect(sequence) - from 'sequence' | |
| 11280 | |
| 11281 Explicit keyword args p0, p1, x0, y0, x1, y1 override earlier settings | |
| 11282 if not None. | |
| 11283 """ | |
| 11284 x0, y0, x1, y1 = util_make_rect( *args, p0=p0, p1=p1, x0=x0, y0=y0, x1=x1, y1=y1) | |
| 11285 self.x0 = float( x0) | |
| 11286 self.y0 = float( y0) | |
| 11287 self.x1 = float( x1) | |
| 11288 self.y1 = float( y1) | |
| 11289 | |
| 11290 def __len__(self): | |
| 11291 return 4 | |
| 11292 | |
| 11293 def __mul__(self, m): | |
| 11294 if hasattr(m, "__float__"): | |
| 11295 return Rect(self.x0 * m, self.y0 * m, self.x1 * m, self.y1 * m) | |
| 11296 r = Rect(self) | |
| 11297 r = r.transform(m) | |
| 11298 return r | |
| 11299 | |
| 11300 def __neg__(self): | |
| 11301 return Rect(-self.x0, -self.y0, -self.x1, -self.y1) | |
| 11302 | |
| 11303 def __nonzero__(self): | |
| 11304 return not (max(self) == min(self) == 0) | |
| 11305 | |
| 11306 def __or__(self, x): | |
| 11307 if not hasattr(x, "__len__"): | |
| 11308 raise ValueError("bad operand 2") | |
| 11309 r = Rect(self) | |
| 11310 if len(x) == 2: | |
| 11311 return r.include_point(x) | |
| 11312 if len(x) == 4: | |
| 11313 return r.include_rect(x) | |
| 11314 raise ValueError("bad operand 2") | |
| 11315 | |
| 11316 def __pos__(self): | |
| 11317 return Rect(self) | |
| 11318 | |
| 11319 def __repr__(self): | |
| 11320 return "Rect" + str(tuple(self)) | |
| 11321 | |
| 11322 def __setitem__(self, i, v): | |
| 11323 v = float(v) | |
| 11324 if i == 0: self.x0 = v | |
| 11325 elif i == 1: self.y0 = v | |
| 11326 elif i == 2: self.x1 = v | |
| 11327 elif i == 3: self.y1 = v | |
| 11328 else: | |
| 11329 raise IndexError("index out of range") | |
| 11330 return None | |
| 11331 | |
| 11332 def __sub__(self, p): | |
| 11333 if hasattr(p, "__float__"): | |
| 11334 return Rect(self.x0 - p, self.y0 - p, self.x1 - p, self.y1 - p) | |
| 11335 if len(p) != 4: | |
| 11336 raise ValueError("Rect: bad seq len") | |
| 11337 return Rect(self.x0 - p[0], self.y0 - p[1], self.x1 - p[2], self.y1 - p[3]) | |
| 11338 | |
| 11339 def __truediv__(self, m): | |
| 11340 if hasattr(m, "__float__"): | |
| 11341 return Rect(self.x0 * 1./m, self.y0 * 1./m, self.x1 * 1./m, self.y1 * 1./m) | |
| 11342 im = util_invert_matrix(m)[1] | |
| 11343 if not im: | |
| 11344 raise ZeroDivisionError(f"Matrix not invertible: {m}") | |
| 11345 r = Rect(self) | |
| 11346 r = r.transform(im) | |
| 11347 return r | |
| 11348 | |
| 11349 @property | |
| 11350 def bottom_left(self): | |
| 11351 """Bottom-left corner.""" | |
| 11352 return Point(self.x0, self.y1) | |
| 11353 | |
| 11354 @property | |
| 11355 def bottom_right(self): | |
| 11356 """Bottom-right corner.""" | |
| 11357 return Point(self.x1, self.y1) | |
| 11358 | |
| 11359 def contains(self, x): | |
| 11360 """Check if containing point-like or rect-like x.""" | |
| 11361 return self.__contains__(x) | |
| 11362 | |
| 11363 @property | |
| 11364 def height(self): | |
| 11365 return max(0, self.y1 - self.y0) | |
| 11366 | |
| 11367 def include_point(self, p): | |
| 11368 """Extend to include point-like p.""" | |
| 11369 if len(p) != 2: | |
| 11370 raise ValueError("Point: bad seq len") | |
| 11371 self.x0, self.y0, self.x1, self.y1 = util_include_point_in_rect(self, p) | |
| 11372 return self | |
| 11373 | |
| 11374 def include_rect(self, r): | |
| 11375 """Extend to include rect-like r.""" | |
| 11376 if len(r) != 4: | |
| 11377 raise ValueError("Rect: bad seq len") | |
| 11378 r = Rect(r) | |
| 11379 if r.is_infinite or self.is_infinite: | |
| 11380 self.x0, self.y0, self.x1, self.y1 = FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT | |
| 11381 elif r.is_empty: | |
| 11382 return self | |
| 11383 elif self.is_empty: | |
| 11384 self.x0, self.y0, self.x1, self.y1 = r.x0, r.y0, r.x1, r.y1 | |
| 11385 else: | |
| 11386 self.x0, self.y0, self.x1, self.y1 = util_union_rect(self, r) | |
| 11387 return self | |
| 11388 | |
| 11389 def intersect(self, r): | |
| 11390 """Restrict to common rect with rect-like r.""" | |
| 11391 if not len(r) == 4: | |
| 11392 raise ValueError("Rect: bad seq len") | |
| 11393 r = Rect(r) | |
| 11394 if r.is_infinite: | |
| 11395 return self | |
| 11396 elif self.is_infinite: | |
| 11397 self.x0, self.y0, self.x1, self.y1 = r.x0, r.y0, r.x1, r.y1 | |
| 11398 elif r.is_empty: | |
| 11399 self.x0, self.y0, self.x1, self.y1 = r.x0, r.y0, r.x1, r.y1 | |
| 11400 elif self.is_empty: | |
| 11401 return self | |
| 11402 else: | |
| 11403 self.x0, self.y0, self.x1, self.y1 = util_intersect_rect(self, r) | |
| 11404 return self | |
| 11405 | |
| 11406 def intersects(self, x): | |
| 11407 """Check if intersection with rectangle x is not empty.""" | |
| 11408 rect2 = Rect(x) | |
| 11409 return (1 | |
| 11410 and not self.is_empty | |
| 11411 and not self.is_infinite | |
| 11412 and not rect2.is_empty | |
| 11413 and not rect2.is_infinite | |
| 11414 and self.x0 < rect2.x1 | |
| 11415 and rect2.x0 < self.x1 | |
| 11416 and self.y0 < rect2.y1 | |
| 11417 and rect2.y0 < self.y1 | |
| 11418 ) | |
| 11419 | |
| 11420 @property | |
| 11421 def is_empty(self): | |
| 11422 """True if rectangle area is empty.""" | |
| 11423 return self.x0 >= self.x1 or self.y0 >= self.y1 | |
| 11424 | |
| 11425 @property | |
| 11426 def is_infinite(self): | |
| 11427 """True if this is the infinite rectangle.""" | |
| 11428 return self.x0 == self.y0 == FZ_MIN_INF_RECT and self.x1 == self.y1 == FZ_MAX_INF_RECT | |
| 11429 | |
| 11430 @property | |
| 11431 def is_valid(self): | |
| 11432 """True if rectangle is valid.""" | |
| 11433 return self.x0 <= self.x1 and self.y0 <= self.y1 | |
| 11434 | |
| 11435 def morph(self, p, m): | |
| 11436 """Morph with matrix-like m and point-like p. | |
| 11437 | |
| 11438 Returns a new quad.""" | |
| 11439 if self.is_infinite: | |
| 11440 return INFINITE_QUAD() | |
| 11441 return self.quad.morph(p, m) | |
| 11442 | |
| 11443 def norm(self): | |
| 11444 return math.sqrt(sum([c*c for c in self])) | |
| 11445 | |
| 11446 def normalize(self): | |
| 11447 """Replace rectangle with its finite version.""" | |
| 11448 if self.x1 < self.x0: | |
| 11449 self.x0, self.x1 = self.x1, self.x0 | |
| 11450 if self.y1 < self.y0: | |
| 11451 self.y0, self.y1 = self.y1, self.y0 | |
| 11452 return self | |
| 11453 | |
| 11454 @property | |
| 11455 def quad(self): | |
| 11456 """Return Quad version of rectangle.""" | |
| 11457 return Quad(self.tl, self.tr, self.bl, self.br) | |
| 11458 | |
| 11459 def round(self): | |
| 11460 """Return the IRect.""" | |
| 11461 return IRect(util_round_rect(self)) | |
| 11462 | |
| 11463 @property | |
| 11464 def top_left(self): | |
| 11465 """Top-left corner.""" | |
| 11466 return Point(self.x0, self.y0) | |
| 11467 | |
| 11468 @property | |
| 11469 def top_right(self): | |
| 11470 """Top-right corner.""" | |
| 11471 return Point(self.x1, self.y0) | |
| 11472 | |
| 11473 def torect(self, r): | |
| 11474 """Return matrix that converts to target rect.""" | |
| 11475 | |
| 11476 r = Rect(r) | |
| 11477 if self.is_infinite or self.is_empty or r.is_infinite or r.is_empty: | |
| 11478 raise ValueError("rectangles must be finite and not empty") | |
| 11479 return ( | |
| 11480 Matrix(1, 0, 0, 1, -self.x0, -self.y0) | |
| 11481 * Matrix(r.width / self.width, r.height / self.height) | |
| 11482 * Matrix(1, 0, 0, 1, r.x0, r.y0) | |
| 11483 ) | |
| 11484 | |
| 11485 def transform(self, m): | |
| 11486 """Replace with the transformation by matrix-like m.""" | |
| 11487 if not len(m) == 6: | |
| 11488 raise ValueError("Matrix: bad seq len") | |
| 11489 self.x0, self.y0, self.x1, self.y1 = util_transform_rect(self, m) | |
| 11490 return self | |
| 11491 | |
| 11492 @property | |
| 11493 def width(self): | |
| 11494 return max(0, self.x1 - self.x0) | |
| 11495 | |
| 11496 __div__ = __truediv__ | |
| 11497 | |
| 11498 bl = bottom_left | |
| 11499 br = bottom_right | |
| 11500 irect = property(round) | |
| 11501 tl = top_left | |
| 11502 tr = top_right | |
| 11503 | |
| 11504 | |
| 11505 class Story: | |
| 11506 | |
| 11507 def __init__( self, html='', user_css=None, em=12, archive=None): | |
| 11508 buffer_ = mupdf.fz_new_buffer_from_copied_data( html.encode('utf-8')) | |
| 11509 if archive and not isinstance(archive, Archive): | |
| 11510 archive = Archive(archive) | |
| 11511 arch = archive.this if archive else mupdf.FzArchive( None) | |
| 11512 if hasattr(mupdf, 'FzStoryS'): | |
| 11513 self.this = mupdf.FzStoryS( buffer_, user_css, em, arch) | |
| 11514 else: | |
| 11515 self.this = mupdf.FzStory( buffer_, user_css, em, arch) | |
| 11516 | |
| 11517 def add_header_ids(self): | |
| 11518 ''' | |
| 11519 Look for `<h1..6>` items in `self` and adds unique `id` | |
| 11520 attributes if not already present. | |
| 11521 ''' | |
| 11522 dom = self.body | |
| 11523 i = 0 | |
| 11524 x = dom.find(None, None, None) | |
| 11525 while x: | |
| 11526 name = x.tagname | |
| 11527 if len(name) == 2 and name[0]=="h" and name[1] in "123456": | |
| 11528 attr = x.get_attribute_value("id") | |
| 11529 if not attr: | |
| 11530 id_ = f"h_id_{i}" | |
| 11531 #log(f"{name=}: setting {id_=}") | |
| 11532 x.set_attribute("id", id_) | |
| 11533 i += 1 | |
| 11534 x = x.find_next(None, None, None) | |
| 11535 | |
| 11536 @staticmethod | |
| 11537 def add_pdf_links(document_or_stream, positions): | |
| 11538 """ | |
| 11539 Adds links to PDF document. | |
| 11540 Args: | |
| 11541 document_or_stream: | |
| 11542 A PDF `Document` or raw PDF content, for example an | |
| 11543 `io.BytesIO` instance. | |
| 11544 positions: | |
| 11545 List of `ElementPosition`'s for `document_or_stream`, | |
| 11546 typically from Story.element_positions(). We raise an | |
| 11547 exception if two or more positions have same id. | |
| 11548 Returns: | |
| 11549 `document_or_stream` if a `Document` instance, otherwise a | |
| 11550 new `Document` instance. | |
| 11551 We raise an exception if an `href` in `positions` refers to an | |
| 11552 internal position `#<name>` but no item in `positions` has `id = | |
| 11553 name`. | |
| 11554 """ | |
| 11555 if isinstance(document_or_stream, Document): | |
| 11556 document = document_or_stream | |
| 11557 else: | |
| 11558 document = Document("pdf", document_or_stream) | |
| 11559 | |
| 11560 # Create dict from id to position, which we will use to find | |
| 11561 # link destinations. | |
| 11562 # | |
| 11563 id_to_position = dict() | |
| 11564 #log(f"positions: {positions}") | |
| 11565 for position in positions: | |
| 11566 #log(f"add_pdf_links(): position: {position}") | |
| 11567 if (position.open_close & 1) and position.id: | |
| 11568 #log(f"add_pdf_links(): position with id: {position}") | |
| 11569 if position.id in id_to_position: | |
| 11570 #log(f"Ignoring duplicate positions with id={position.id!r}") | |
| 11571 pass | |
| 11572 else: | |
| 11573 id_to_position[ position.id] = position | |
| 11574 | |
| 11575 # Insert links for all positions that have an `href`. | |
| 11576 # | |
| 11577 for position_from in positions: | |
| 11578 | |
| 11579 if (position_from.open_close & 1) and position_from.href: | |
| 11580 | |
| 11581 #log(f"add_pdf_links(): position with href: {position}") | |
| 11582 link = dict() | |
| 11583 link['from'] = Rect(position_from.rect) | |
| 11584 | |
| 11585 if position_from.href.startswith("#"): | |
| 11586 #`<a href="#...">...</a>` internal link. | |
| 11587 target_id = position_from.href[1:] | |
| 11588 try: | |
| 11589 position_to = id_to_position[ target_id] | |
| 11590 except Exception as e: | |
| 11591 if g_exceptions_verbose > 1: exception_info() | |
| 11592 raise RuntimeError(f"No destination with id={target_id}, required by position_from: {position_from}") from e | |
| 11593 # Make link from `position_from`'s rect to top-left of | |
| 11594 # `position_to`'s rect. | |
| 11595 if 0: | |
| 11596 log(f"add_pdf_links(): making link from:") | |
| 11597 log(f"add_pdf_links(): {position_from}") | |
| 11598 log(f"add_pdf_links(): to:") | |
| 11599 log(f"add_pdf_links(): {position_to}") | |
| 11600 link["kind"] = LINK_GOTO | |
| 11601 x0, y0, x1, y1 = position_to.rect | |
| 11602 # This appears to work well with viewers which scroll | |
| 11603 # to make destination point top-left of window. | |
| 11604 link["to"] = Point(x0, y0) | |
| 11605 link["page"] = position_to.page_num - 1 | |
| 11606 | |
| 11607 else: | |
| 11608 # `<a href="...">...</a>` external link. | |
| 11609 if position_from.href.startswith('name:'): | |
| 11610 link['kind'] = LINK_NAMED | |
| 11611 link['name'] = position_from.href[5:] | |
| 11612 else: | |
| 11613 link['kind'] = LINK_URI | |
| 11614 link['uri'] = position_from.href | |
| 11615 | |
| 11616 #log(f'Adding link: {position_from.page_num=} {link=}.') | |
| 11617 document[position_from.page_num - 1].insert_link(link) | |
| 11618 | |
| 11619 return document | |
| 11620 | |
| 11621 @property | |
| 11622 def body(self): | |
| 11623 dom = self.document() | |
| 11624 return dom.bodytag() | |
| 11625 | |
| 11626 def document( self): | |
| 11627 dom = mupdf.fz_story_document( self.this) | |
| 11628 return Xml( dom) | |
| 11629 | |
| 11630 def draw( self, device, matrix=None): | |
| 11631 ctm2 = JM_matrix_from_py( matrix) | |
| 11632 dev = device.this if device else mupdf.FzDevice( None) | |
| 11633 mupdf.fz_draw_story( self.this, dev, ctm2) | |
| 11634 | |
| 11635 def element_positions( self, function, args=None): | |
| 11636 ''' | |
| 11637 Trigger a callback function to record where items have been placed. | |
| 11638 ''' | |
| 11639 if type(args) is dict: | |
| 11640 for k in args.keys(): | |
| 11641 if not (type(k) is str and k.isidentifier()): | |
| 11642 raise ValueError(f"invalid key '{k}'") | |
| 11643 else: | |
| 11644 args = {} | |
| 11645 if not callable(function) or function.__code__.co_argcount != 1: | |
| 11646 raise ValueError("callback 'function' must be a callable with exactly one argument") | |
| 11647 | |
| 11648 def function2( position): | |
| 11649 class Position2: | |
| 11650 pass | |
| 11651 position2 = Position2() | |
| 11652 position2.depth = position.depth | |
| 11653 position2.heading = position.heading | |
| 11654 position2.id = position.id | |
| 11655 position2.rect = JM_py_from_rect(position.rect) | |
| 11656 position2.text = position.text | |
| 11657 position2.open_close = position.open_close | |
| 11658 position2.rect_num = position.rectangle_num | |
| 11659 position2.href = position.href | |
| 11660 if args: | |
| 11661 for k, v in args.items(): | |
| 11662 setattr( position2, k, v) | |
| 11663 function( position2) | |
| 11664 mupdf.fz_story_positions( self.this, function2) | |
| 11665 | |
| 11666 def place( self, where): | |
| 11667 where = JM_rect_from_py( where) | |
| 11668 filled = mupdf.FzRect() | |
| 11669 more = mupdf.fz_place_story( self.this, where, filled) | |
| 11670 return more, JM_py_from_rect( filled) | |
| 11671 | |
| 11672 def reset( self): | |
| 11673 mupdf.fz_reset_story( self.this) | |
| 11674 | |
| 11675 def write(self, writer, rectfn, positionfn=None, pagefn=None): | |
| 11676 dev = None | |
| 11677 page_num = 0 | |
| 11678 rect_num = 0 | |
| 11679 filled = Rect(0, 0, 0, 0) | |
| 11680 while 1: | |
| 11681 mediabox, rect, ctm = rectfn(rect_num, filled) | |
| 11682 rect_num += 1 | |
| 11683 if mediabox: | |
| 11684 # new page. | |
| 11685 page_num += 1 | |
| 11686 more, filled = self.place( rect) | |
| 11687 if positionfn: | |
| 11688 def positionfn2(position): | |
| 11689 # We add a `.page_num` member to the | |
| 11690 # `ElementPosition` instance. | |
| 11691 position.page_num = page_num | |
| 11692 positionfn(position) | |
| 11693 self.element_positions(positionfn2) | |
| 11694 if writer: | |
| 11695 if mediabox: | |
| 11696 # new page. | |
| 11697 if dev: | |
| 11698 if pagefn: | |
| 11699 pagefn(page_num, mediabox, dev, 1) | |
| 11700 writer.end_page() | |
| 11701 dev = writer.begin_page( mediabox) | |
| 11702 if pagefn: | |
| 11703 pagefn(page_num, mediabox, dev, 0) | |
| 11704 self.draw( dev, ctm) | |
| 11705 if not more: | |
| 11706 if pagefn: | |
| 11707 pagefn( page_num, mediabox, dev, 1) | |
| 11708 writer.end_page() | |
| 11709 else: | |
| 11710 self.draw(None, ctm) | |
| 11711 if not more: | |
| 11712 break | |
| 11713 | |
| 11714 @staticmethod | |
| 11715 def write_stabilized(writer, contentfn, rectfn, user_css=None, em=12, positionfn=None, pagefn=None, archive=None, add_header_ids=True): | |
| 11716 positions = list() | |
| 11717 content = None | |
| 11718 # Iterate until stable. | |
| 11719 while 1: | |
| 11720 content_prev = content | |
| 11721 content = contentfn( positions) | |
| 11722 stable = False | |
| 11723 if content == content_prev: | |
| 11724 stable = True | |
| 11725 content2 = content | |
| 11726 story = Story(content2, user_css, em, archive) | |
| 11727 | |
| 11728 if add_header_ids: | |
| 11729 story.add_header_ids() | |
| 11730 | |
| 11731 positions = list() | |
| 11732 def positionfn2(position): | |
| 11733 #log(f"write_stabilized(): {stable=} {positionfn=} {position=}") | |
| 11734 positions.append(position) | |
| 11735 if stable and positionfn: | |
| 11736 positionfn(position) | |
| 11737 story.write( | |
| 11738 writer if stable else None, | |
| 11739 rectfn, | |
| 11740 positionfn2, | |
| 11741 pagefn, | |
| 11742 ) | |
| 11743 if stable: | |
| 11744 break | |
| 11745 | |
| 11746 @staticmethod | |
| 11747 def write_stabilized_with_links(contentfn, rectfn, user_css=None, em=12, positionfn=None, pagefn=None, archive=None, add_header_ids=True): | |
| 11748 #log("write_stabilized_with_links()") | |
| 11749 stream = io.BytesIO() | |
| 11750 writer = DocumentWriter(stream) | |
| 11751 positions = [] | |
| 11752 def positionfn2(position): | |
| 11753 #log(f"write_stabilized_with_links(): {position=}") | |
| 11754 positions.append(position) | |
| 11755 if positionfn: | |
| 11756 positionfn(position) | |
| 11757 Story.write_stabilized(writer, contentfn, rectfn, user_css, em, positionfn2, pagefn, archive, add_header_ids) | |
| 11758 writer.close() | |
| 11759 stream.seek(0) | |
| 11760 return Story.add_pdf_links(stream, positions) | |
| 11761 | |
| 11762 def write_with_links(self, rectfn, positionfn=None, pagefn=None): | |
| 11763 #log("write_with_links()") | |
| 11764 stream = io.BytesIO() | |
| 11765 writer = DocumentWriter(stream) | |
| 11766 positions = [] | |
| 11767 def positionfn2(position): | |
| 11768 #log(f"write_with_links(): {position=}") | |
| 11769 positions.append(position) | |
| 11770 if positionfn: | |
| 11771 positionfn(position) | |
| 11772 self.write(writer, rectfn, positionfn=positionfn2, pagefn=pagefn) | |
| 11773 writer.close() | |
| 11774 stream.seek(0) | |
| 11775 return Story.add_pdf_links(stream, positions) | |
| 11776 | |
| 11777 class FitResult: | |
| 11778 ''' | |
| 11779 The result from a `Story.fit*()` method. | |
| 11780 | |
| 11781 Members: | |
| 11782 | |
| 11783 `big_enough`: | |
| 11784 `True` if the fit succeeded. | |
| 11785 `filled`: | |
| 11786 From the last call to `Story.place()`. | |
| 11787 `more`: | |
| 11788 `False` if the fit succeeded. | |
| 11789 `numcalls`: | |
| 11790 Number of calls made to `self.place()`. | |
| 11791 `parameter`: | |
| 11792 The successful parameter value, or the largest failing value. | |
| 11793 `rect`: | |
| 11794 The rect created from `parameter`. | |
| 11795 ''' | |
| 11796 def __init__(self, big_enough=None, filled=None, more=None, numcalls=None, parameter=None, rect=None): | |
| 11797 self.big_enough = big_enough | |
| 11798 self.filled = filled | |
| 11799 self.more = more | |
| 11800 self.numcalls = numcalls | |
| 11801 self.parameter = parameter | |
| 11802 self.rect = rect | |
| 11803 | |
| 11804 def __repr__(self): | |
| 11805 return ( | |
| 11806 f' big_enough={self.big_enough}' | |
| 11807 f' filled={self.filled}' | |
| 11808 f' more={self.more}' | |
| 11809 f' numcalls={self.numcalls}' | |
| 11810 f' parameter={self.parameter}' | |
| 11811 f' rect={self.rect}' | |
| 11812 ) | |
| 11813 | |
| 11814 def fit(self, fn, pmin=None, pmax=None, delta=0.001, verbose=False): | |
| 11815 ''' | |
| 11816 Finds optimal rect that contains the story `self`. | |
| 11817 | |
| 11818 Returns a `Story.FitResult` instance. | |
| 11819 | |
| 11820 On success, the last call to `self.place()` will have been with the | |
| 11821 returned rectangle, so `self.draw()` can be used directly. | |
| 11822 | |
| 11823 Args: | |
| 11824 :arg fn: | |
| 11825 A callable taking a floating point `parameter` and returning a | |
| 11826 `pymupdf.Rect()`. If the rect is empty, we assume the story will | |
| 11827 not fit and do not call `self.place()`. | |
| 11828 | |
| 11829 Must guarantee that `self.place()` behaves monotonically when | |
| 11830 given rect `fn(parameter`) as `parameter` increases. This | |
| 11831 usually means that both width and height increase or stay | |
| 11832 unchanged as `parameter` increases. | |
| 11833 :arg pmin: | |
| 11834 Minimum parameter to consider; `None` for -infinity. | |
| 11835 :arg pmax: | |
| 11836 Maximum parameter to consider; `None` for +infinity. | |
| 11837 :arg delta: | |
| 11838 Maximum error in returned `parameter`. | |
| 11839 :arg verbose: | |
| 11840 If true we output diagnostics. | |
| 11841 ''' | |
| 11842 def log(text): | |
| 11843 assert verbose | |
| 11844 message(f'fit(): {text}') | |
| 11845 | |
| 11846 assert isinstance(pmin, (int, float)) or pmin is None | |
| 11847 assert isinstance(pmax, (int, float)) or pmax is None | |
| 11848 | |
| 11849 class State: | |
| 11850 def __init__(self): | |
| 11851 self.pmin = pmin | |
| 11852 self.pmax = pmax | |
| 11853 self.pmin_result = None | |
| 11854 self.pmax_result = None | |
| 11855 self.result = None | |
| 11856 self.numcalls = 0 | |
| 11857 if verbose: | |
| 11858 self.pmin0 = pmin | |
| 11859 self.pmax0 = pmax | |
| 11860 state = State() | |
| 11861 | |
| 11862 if verbose: | |
| 11863 log(f'starting. {state.pmin=} {state.pmax=}.') | |
| 11864 | |
| 11865 self.reset() | |
| 11866 | |
| 11867 def ret(): | |
| 11868 if state.pmax is not None: | |
| 11869 if state.last_p != state.pmax: | |
| 11870 if verbose: | |
| 11871 log(f'Calling update() with pmax, because was overwritten by later calls.') | |
| 11872 big_enough = update(state.pmax) | |
| 11873 assert big_enough | |
| 11874 result = state.pmax_result | |
| 11875 else: | |
| 11876 result = state.pmin_result if state.pmin_result else Story.FitResult(numcalls=state.numcalls) | |
| 11877 if verbose: | |
| 11878 log(f'finished. {state.pmin0=} {state.pmax0=} {state.pmax=}: returning {result=}') | |
| 11879 return result | |
| 11880 | |
| 11881 def update(parameter): | |
| 11882 ''' | |
| 11883 Evaluates `more, _ = self.place(fn(parameter))`. If `more` is | |
| 11884 false, then `rect` is big enough to contain `self` and we | |
| 11885 set `state.pmax=parameter` and return True. Otherwise we set | |
| 11886 `state.pmin=parameter` and return False. | |
| 11887 ''' | |
| 11888 rect = fn(parameter) | |
| 11889 assert isinstance(rect, Rect), f'{type(rect)=} {rect=}' | |
| 11890 if rect.is_empty: | |
| 11891 big_enough = False | |
| 11892 result = Story.FitResult(parameter=parameter, numcalls=state.numcalls) | |
| 11893 if verbose: | |
| 11894 log(f'update(): not calling self.place() because rect is empty.') | |
| 11895 else: | |
| 11896 more, filled = self.place(rect) | |
| 11897 state.numcalls += 1 | |
| 11898 big_enough = not more | |
| 11899 result = Story.FitResult( | |
| 11900 filled=filled, | |
| 11901 more=more, | |
| 11902 numcalls=state.numcalls, | |
| 11903 parameter=parameter, | |
| 11904 rect=rect, | |
| 11905 big_enough=big_enough, | |
| 11906 ) | |
| 11907 if verbose: | |
| 11908 log(f'update(): called self.place(): {state.numcalls:>2d}: {more=} {parameter=} {rect=}.') | |
| 11909 if big_enough: | |
| 11910 state.pmax = parameter | |
| 11911 state.pmax_result = result | |
| 11912 else: | |
| 11913 state.pmin = parameter | |
| 11914 state.pmin_result = result | |
| 11915 state.last_p = parameter | |
| 11916 return big_enough | |
| 11917 | |
| 11918 def opposite(p, direction): | |
| 11919 ''' | |
| 11920 Returns same sign as `direction`, larger or smaller than `p` if | |
| 11921 direction is positive or negative respectively. | |
| 11922 ''' | |
| 11923 if p is None or p==0: | |
| 11924 return direction | |
| 11925 if direction * p > 0: | |
| 11926 return 2 * p | |
| 11927 return -p | |
| 11928 | |
| 11929 if state.pmin is None: | |
| 11930 # Find an initial finite pmin value. | |
| 11931 if verbose: log(f'finding pmin.') | |
| 11932 parameter = opposite(state.pmax, -1) | |
| 11933 while 1: | |
| 11934 if not update(parameter): | |
| 11935 break | |
| 11936 parameter *= 2 | |
| 11937 else: | |
| 11938 if update(state.pmin): | |
| 11939 if verbose: log(f'{state.pmin=} is big enough.') | |
| 11940 return ret() | |
| 11941 | |
| 11942 if state.pmax is None: | |
| 11943 # Find an initial finite pmax value. | |
| 11944 if verbose: log(f'finding pmax.') | |
| 11945 parameter = opposite(state.pmin, +1) | |
| 11946 while 1: | |
| 11947 if update(parameter): | |
| 11948 break | |
| 11949 parameter *= 2 | |
| 11950 else: | |
| 11951 if not update(state.pmax): | |
| 11952 # No solution possible. | |
| 11953 state.pmax = None | |
| 11954 if verbose: log(f'No solution possible {state.pmax=}.') | |
| 11955 return ret() | |
| 11956 | |
| 11957 # Do binary search in pmin..pmax. | |
| 11958 if verbose: log(f'doing binary search with {state.pmin=} {state.pmax=}.') | |
| 11959 while 1: | |
| 11960 if state.pmax - state.pmin < delta: | |
| 11961 return ret() | |
| 11962 parameter = (state.pmin + state.pmax) / 2 | |
| 11963 update(parameter) | |
| 11964 | |
| 11965 def fit_scale(self, rect, scale_min=0, scale_max=None, delta=0.001, verbose=False): | |
| 11966 ''' | |
| 11967 Finds smallest value `scale` in range `scale_min..scale_max` where | |
| 11968 `scale * rect` is large enough to contain the story `self`. | |
| 11969 | |
| 11970 Returns a `Story.FitResult` instance. | |
| 11971 | |
| 11972 :arg width: | |
| 11973 width of rect. | |
| 11974 :arg height: | |
| 11975 height of rect. | |
| 11976 :arg scale_min: | |
| 11977 Minimum scale to consider; must be >= 0. | |
| 11978 :arg scale_max: | |
| 11979 Maximum scale to consider, must be >= scale_min or `None` for | |
| 11980 infinite. | |
| 11981 :arg delta: | |
| 11982 Maximum error in returned scale. | |
| 11983 :arg verbose: | |
| 11984 If true we output diagnostics. | |
| 11985 ''' | |
| 11986 x0, y0, x1, y1 = rect | |
| 11987 width = x1 - x0 | |
| 11988 height = y1 - y0 | |
| 11989 def fn(scale): | |
| 11990 return Rect(x0, y0, x0 + scale*width, y0 + scale*height) | |
| 11991 return self.fit(fn, scale_min, scale_max, delta, verbose) | |
| 11992 | |
| 11993 def fit_height(self, width, height_min=0, height_max=None, origin=(0, 0), delta=0.001, verbose=False): | |
| 11994 ''' | |
| 11995 Finds smallest height in range `height_min..height_max` where a rect | |
| 11996 with size `(width, height)` is large enough to contain the story | |
| 11997 `self`. | |
| 11998 | |
| 11999 Returns a `Story.FitResult` instance. | |
| 12000 | |
| 12001 :arg width: | |
| 12002 width of rect. | |
| 12003 :arg height_min: | |
| 12004 Minimum height to consider; must be >= 0. | |
| 12005 :arg height_max: | |
| 12006 Maximum height to consider, must be >= height_min or `None` for | |
| 12007 infinite. | |
| 12008 :arg origin: | |
| 12009 `(x0, y0)` of rect. | |
| 12010 :arg delta: | |
| 12011 Maximum error in returned height. | |
| 12012 :arg verbose: | |
| 12013 If true we output diagnostics. | |
| 12014 ''' | |
| 12015 x0, y0 = origin | |
| 12016 x1 = x0 + width | |
| 12017 def fn(height): | |
| 12018 return Rect(x0, y0, x1, y0+height) | |
| 12019 return self.fit(fn, height_min, height_max, delta, verbose) | |
| 12020 | |
| 12021 def fit_width(self, height, width_min=0, width_max=None, origin=(0, 0), delta=0.001, verbose=False): | |
| 12022 ''' | |
| 12023 Finds smallest width in range `width_min..width_max` where a rect with size | |
| 12024 `(width, height)` is large enough to contain the story `self`. | |
| 12025 | |
| 12026 Returns a `Story.FitResult` instance. | |
| 12027 Returns a `FitResult` instance. | |
| 12028 | |
| 12029 :arg height: | |
| 12030 height of rect. | |
| 12031 :arg width_min: | |
| 12032 Minimum width to consider; must be >= 0. | |
| 12033 :arg width_max: | |
| 12034 Maximum width to consider, must be >= width_min or `None` for | |
| 12035 infinite. | |
| 12036 :arg origin: | |
| 12037 `(x0, y0)` of rect. | |
| 12038 :arg delta: | |
| 12039 Maximum error in returned width. | |
| 12040 :arg verbose: | |
| 12041 If true we output diagnostics. | |
| 12042 ''' | |
| 12043 x0, y0 = origin | |
| 12044 y1 = y0 + height | |
| 12045 def fn(width): | |
| 12046 return Rect(x0, y0, x0+width, y1) | |
| 12047 return self.fit(fn, width_min, width_max, delta, verbose) | |
| 12048 | |
| 12049 | |
| 12050 class TextPage: | |
| 12051 | |
| 12052 def __init__(self, *args): | |
| 12053 if args_match(args, mupdf.FzRect): | |
| 12054 mediabox = args[0] | |
| 12055 self.this = mupdf.FzStextPage( mediabox) | |
| 12056 elif args_match(args, mupdf.FzStextPage): | |
| 12057 self.this = args[0] | |
| 12058 else: | |
| 12059 raise Exception(f'Unrecognised args: {args}') | |
| 12060 self.thisown = True | |
| 12061 self.parent = None | |
| 12062 | |
| 12063 def _extractText(self, format_): | |
| 12064 this_tpage = self.this | |
| 12065 res = mupdf.fz_new_buffer(1024) | |
| 12066 out = mupdf.FzOutput( res) | |
| 12067 # fixme: mupdfwrap.py thinks fz_output is not copyable, possibly | |
| 12068 # because there is no .refs member visible and no fz_keep_output() fn, | |
| 12069 # although there is an fz_drop_output(). So mupdf.fz_new_output_with_buffer() | |
| 12070 # doesn't convert the returned fz_output* into a mupdf.FzOutput. | |
| 12071 #out = mupdf.FzOutput(out) | |
| 12072 if format_ == 1: | |
| 12073 mupdf.fz_print_stext_page_as_html(out, this_tpage, 0) | |
| 12074 elif format_ == 3: | |
| 12075 mupdf.fz_print_stext_page_as_xml(out, this_tpage, 0) | |
| 12076 elif format_ == 4: | |
| 12077 mupdf.fz_print_stext_page_as_xhtml(out, this_tpage, 0) | |
| 12078 else: | |
| 12079 JM_print_stext_page_as_text(res, this_tpage) | |
| 12080 out.fz_close_output() | |
| 12081 text = JM_EscapeStrFromBuffer(res) | |
| 12082 return text | |
| 12083 | |
| 12084 def _getNewBlockList(self, page_dict, raw): | |
| 12085 JM_make_textpage_dict(self.this, page_dict, raw) | |
| 12086 | |
| 12087 def _textpage_dict(self, raw=False): | |
| 12088 page_dict = {"width": self.rect.width, "height": self.rect.height} | |
| 12089 self._getNewBlockList(page_dict, raw) | |
| 12090 return page_dict | |
| 12091 | |
| 12092 def extractBLOCKS(self): | |
| 12093 """Return a list with text block information.""" | |
| 12094 if g_use_extra: | |
| 12095 return extra.extractBLOCKS(self.this) | |
| 12096 block_n = -1 | |
| 12097 this_tpage = self.this | |
| 12098 tp_rect = mupdf.FzRect(this_tpage.m_internal.mediabox) | |
| 12099 res = mupdf.fz_new_buffer(1024) | |
| 12100 lines = [] | |
| 12101 for block in this_tpage: | |
| 12102 block_n += 1 | |
| 12103 blockrect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY) | |
| 12104 if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT: | |
| 12105 mupdf.fz_clear_buffer(res) # set text buffer to empty | |
| 12106 line_n = -1 | |
| 12107 last_char = 0 | |
| 12108 for line in block: | |
| 12109 line_n += 1 | |
| 12110 linerect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY) | |
| 12111 for ch in line: | |
| 12112 cbbox = JM_char_bbox(line, ch) | |
| 12113 if (not JM_rects_overlap(tp_rect, cbbox) | |
| 12114 and not mupdf.fz_is_infinite_rect(tp_rect) | |
| 12115 ): | |
| 12116 continue | |
| 12117 JM_append_rune(res, ch.m_internal.c) | |
| 12118 last_char = ch.m_internal.c | |
| 12119 linerect = mupdf.fz_union_rect(linerect, cbbox) | |
| 12120 if last_char != 10 and not mupdf.fz_is_empty_rect(linerect): | |
| 12121 mupdf.fz_append_byte(res, 10) | |
| 12122 blockrect = mupdf.fz_union_rect(blockrect, linerect) | |
| 12123 text = JM_EscapeStrFromBuffer(res) | |
| 12124 elif (JM_rects_overlap(tp_rect, block.m_internal.bbox) | |
| 12125 or mupdf.fz_is_infinite_rect(tp_rect) | |
| 12126 ): | |
| 12127 img = block.i_image() | |
| 12128 cs = img.colorspace() | |
| 12129 text = "<image: %s, width: %d, height: %d, bpc: %d>" % ( | |
| 12130 mupdf.fz_colorspace_name(cs), | |
| 12131 img.w(), img.h(), img.bpc() | |
| 12132 ) | |
| 12133 blockrect = mupdf.fz_union_rect(blockrect, mupdf.FzRect(block.m_internal.bbox)) | |
| 12134 if not mupdf.fz_is_empty_rect(blockrect): | |
| 12135 litem = ( | |
| 12136 blockrect.x0, | |
| 12137 blockrect.y0, | |
| 12138 blockrect.x1, | |
| 12139 blockrect.y1, | |
| 12140 text, | |
| 12141 block_n, | |
| 12142 block.m_internal.type, | |
| 12143 ) | |
| 12144 lines.append(litem) | |
| 12145 return lines | |
| 12146 | |
| 12147 def extractDICT(self, cb=None, sort=False) -> dict: | |
| 12148 """Return page content as a Python dict of images and text spans.""" | |
| 12149 val = self._textpage_dict(raw=False) | |
| 12150 if cb is not None: | |
| 12151 val["width"] = cb.width | |
| 12152 val["height"] = cb.height | |
| 12153 if sort: | |
| 12154 blocks = val["blocks"] | |
| 12155 blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0])) | |
| 12156 val["blocks"] = blocks | |
| 12157 return val | |
| 12158 | |
| 12159 def extractHTML(self) -> str: | |
| 12160 """Return page content as a HTML string.""" | |
| 12161 return self._extractText(1) | |
| 12162 | |
| 12163 def extractIMGINFO(self, hashes=0): | |
| 12164 """Return a list with image meta information.""" | |
| 12165 block_n = -1 | |
| 12166 this_tpage = self.this | |
| 12167 rc = [] | |
| 12168 for block in this_tpage: | |
| 12169 block_n += 1 | |
| 12170 if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT: | |
| 12171 continue | |
| 12172 img = block.i_image() | |
| 12173 img_size = 0 | |
| 12174 mask = img.mask() | |
| 12175 if mask.m_internal: | |
| 12176 has_mask = True | |
| 12177 else: | |
| 12178 has_mask = False | |
| 12179 compr_buff = mupdf.fz_compressed_image_buffer(img) | |
| 12180 if compr_buff.m_internal: | |
| 12181 img_size = compr_buff.fz_compressed_buffer_size() | |
| 12182 compr_buff = None | |
| 12183 if hashes: | |
| 12184 r = mupdf.FzIrect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT) | |
| 12185 assert mupdf.fz_is_infinite_irect(r) | |
| 12186 m = mupdf.FzMatrix(img.w(), 0, 0, img.h(), 0, 0) | |
| 12187 pix, w, h = mupdf.fz_get_pixmap_from_image(img, r, m) | |
| 12188 digest = mupdf.fz_md5_pixmap2(pix) | |
| 12189 digest = bytes(digest) | |
| 12190 if img_size == 0: | |
| 12191 img_size = img.w() * img.h() * img.n() | |
| 12192 cs = mupdf.FzColorspace(mupdf.ll_fz_keep_colorspace(img.m_internal.colorspace)) | |
| 12193 block_dict = dict() | |
| 12194 block_dict[dictkey_number] = block_n | |
| 12195 block_dict[dictkey_bbox] = JM_py_from_rect(block.m_internal.bbox) | |
| 12196 block_dict[dictkey_matrix] = JM_py_from_matrix(block.i_transform()) | |
| 12197 block_dict[dictkey_width] = img.w() | |
| 12198 block_dict[dictkey_height] = img.h() | |
| 12199 block_dict[dictkey_colorspace] = mupdf.fz_colorspace_n(cs) | |
| 12200 block_dict[dictkey_cs_name] = mupdf.fz_colorspace_name(cs) | |
| 12201 block_dict[dictkey_xres] = img.xres() | |
| 12202 block_dict[dictkey_yres] = img.yres() | |
| 12203 block_dict[dictkey_bpc] = img.bpc() | |
| 12204 block_dict[dictkey_size] = img_size | |
| 12205 if hashes: | |
| 12206 block_dict["digest"] = digest | |
| 12207 block_dict["has-mask"] = has_mask | |
| 12208 rc.append(block_dict) | |
| 12209 return rc | |
| 12210 | |
| 12211 def extractJSON(self, cb=None, sort=False) -> str: | |
| 12212 """Return 'extractDICT' converted to JSON format.""" | |
| 12213 import base64 | |
| 12214 import json | |
| 12215 val = self._textpage_dict(raw=False) | |
| 12216 | |
| 12217 class b64encode(json.JSONEncoder): | |
| 12218 def default(self, s): | |
| 12219 if type(s) in (bytes, bytearray): | |
| 12220 return base64.b64encode(s).decode() | |
| 12221 | |
| 12222 if cb is not None: | |
| 12223 val["width"] = cb.width | |
| 12224 val["height"] = cb.height | |
| 12225 if sort: | |
| 12226 blocks = val["blocks"] | |
| 12227 blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0])) | |
| 12228 val["blocks"] = blocks | |
| 12229 | |
| 12230 val = json.dumps(val, separators=(",", ":"), cls=b64encode, indent=1) | |
| 12231 return val | |
| 12232 | |
| 12233 def extractRAWDICT(self, cb=None, sort=False) -> dict: | |
| 12234 """Return page content as a Python dict of images and text characters.""" | |
| 12235 val = self._textpage_dict(raw=True) | |
| 12236 if cb is not None: | |
| 12237 val["width"] = cb.width | |
| 12238 val["height"] = cb.height | |
| 12239 if sort: | |
| 12240 blocks = val["blocks"] | |
| 12241 blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0])) | |
| 12242 val["blocks"] = blocks | |
| 12243 return val | |
| 12244 | |
| 12245 def extractRAWJSON(self, cb=None, sort=False) -> str: | |
| 12246 """Return 'extractRAWDICT' converted to JSON format.""" | |
| 12247 import base64 | |
| 12248 import json | |
| 12249 val = self._textpage_dict(raw=True) | |
| 12250 | |
| 12251 class b64encode(json.JSONEncoder): | |
| 12252 def default(self,s): | |
| 12253 if type(s) in (bytes, bytearray): | |
| 12254 return base64.b64encode(s).decode() | |
| 12255 | |
| 12256 if cb is not None: | |
| 12257 val["width"] = cb.width | |
| 12258 val["height"] = cb.height | |
| 12259 if sort: | |
| 12260 blocks = val["blocks"] | |
| 12261 blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0])) | |
| 12262 val["blocks"] = blocks | |
| 12263 val = json.dumps(val, separators=(",", ":"), cls=b64encode, indent=1) | |
| 12264 return val | |
| 12265 | |
| 12266 def extractSelection(self, pointa, pointb): | |
| 12267 a = JM_point_from_py(pointa) | |
| 12268 b = JM_point_from_py(pointb) | |
| 12269 found = mupdf.fz_copy_selection(self.this, a, b, 0) | |
| 12270 return found | |
| 12271 | |
| 12272 def extractText(self, sort=False) -> str: | |
| 12273 """Return simple, bare text on the page.""" | |
| 12274 if not sort: | |
| 12275 return self._extractText(0) | |
| 12276 blocks = self.extractBLOCKS()[:] | |
| 12277 blocks.sort(key=lambda b: (b[3], b[0])) | |
| 12278 return "".join([b[4] for b in blocks]) | |
| 12279 | |
| 12280 def extractTextbox(self, rect): | |
| 12281 this_tpage = self.this | |
| 12282 assert isinstance(this_tpage, mupdf.FzStextPage) | |
| 12283 area = JM_rect_from_py(rect) | |
| 12284 found = JM_copy_rectangle(this_tpage, area) | |
| 12285 rc = PyUnicode_DecodeRawUnicodeEscape(found) | |
| 12286 return rc | |
| 12287 | |
| 12288 def extractWORDS(self, delimiters=None): | |
| 12289 """Return a list with text word information.""" | |
| 12290 if g_use_extra: | |
| 12291 return extra.extractWORDS(self.this, delimiters) | |
| 12292 buflen = 0 | |
| 12293 last_char_rtl = 0 | |
| 12294 block_n = -1 | |
| 12295 wbbox = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY) # word bbox | |
| 12296 this_tpage = self.this | |
| 12297 tp_rect = mupdf.FzRect(this_tpage.m_internal.mediabox) | |
| 12298 | |
| 12299 lines = None | |
| 12300 buff = mupdf.fz_new_buffer(64) | |
| 12301 lines = [] | |
| 12302 for block in this_tpage: | |
| 12303 block_n += 1 | |
| 12304 if block.m_internal.type != mupdf.FZ_STEXT_BLOCK_TEXT: | |
| 12305 continue | |
| 12306 line_n = -1 | |
| 12307 for line in block: | |
| 12308 line_n += 1 | |
| 12309 word_n = 0 # word counter per line | |
| 12310 mupdf.fz_clear_buffer(buff) # reset word buffer | |
| 12311 buflen = 0 # reset char counter | |
| 12312 for ch in line: | |
| 12313 cbbox = JM_char_bbox(line, ch) | |
| 12314 if (not JM_rects_overlap(tp_rect, cbbox) | |
| 12315 and not mupdf.fz_is_infinite_rect(tp_rect) | |
| 12316 ): | |
| 12317 continue | |
| 12318 word_delimiter = JM_is_word_delimiter(ch.m_internal.c, delimiters) | |
| 12319 this_char_rtl = JM_is_rtl_char(ch.m_internal.c) | |
| 12320 if word_delimiter or this_char_rtl != last_char_rtl: | |
| 12321 if buflen == 0 and word_delimiter: | |
| 12322 continue # skip delimiters at line start | |
| 12323 if not mupdf.fz_is_empty_rect(wbbox): | |
| 12324 word_n, wbbox = JM_append_word(lines, buff, wbbox, block_n, line_n, word_n) | |
| 12325 mupdf.fz_clear_buffer(buff) | |
| 12326 buflen = 0 # reset char counter | |
| 12327 if word_delimiter: | |
| 12328 continue | |
| 12329 # append one unicode character to the word | |
| 12330 JM_append_rune(buff, ch.m_internal.c) | |
| 12331 last_char_rtl = this_char_rtl | |
| 12332 buflen += 1 | |
| 12333 # enlarge word bbox | |
| 12334 wbbox = mupdf.fz_union_rect(wbbox, JM_char_bbox(line, ch)) | |
| 12335 if buflen and not mupdf.fz_is_empty_rect(wbbox): | |
| 12336 word_n, wbbox = JM_append_word(lines, buff, wbbox, block_n, line_n, word_n) | |
| 12337 buflen = 0 | |
| 12338 return lines | |
| 12339 | |
| 12340 def extractXHTML(self) -> str: | |
| 12341 """Return page content as a XHTML string.""" | |
| 12342 return self._extractText(4) | |
| 12343 | |
| 12344 def extractXML(self) -> str: | |
| 12345 """Return page content as a XML string.""" | |
| 12346 return self._extractText(3) | |
| 12347 | |
| 12348 def poolsize(self): | |
| 12349 """TextPage current poolsize.""" | |
| 12350 tpage = self.this | |
| 12351 pool = mupdf.Pool(tpage.m_internal.pool) | |
| 12352 size = mupdf.fz_pool_size( pool) | |
| 12353 pool.m_internal = None # Ensure that pool's destructor does not free the pool. | |
| 12354 return size | |
| 12355 | |
| 12356 @property | |
| 12357 def rect(self): | |
| 12358 """Page rectangle.""" | |
| 12359 this_tpage = self.this | |
| 12360 mediabox = this_tpage.m_internal.mediabox | |
| 12361 val = JM_py_from_rect(mediabox) | |
| 12362 val = Rect(val) | |
| 12363 | |
| 12364 return val | |
| 12365 | |
| 12366 def search(self, needle, hit_max=0, quads=1): | |
| 12367 """Locate 'needle' returning rects or quads.""" | |
| 12368 val = JM_search_stext_page(self.this, needle) | |
| 12369 if not val: | |
| 12370 return val | |
| 12371 items = len(val) | |
| 12372 for i in range(items): # change entries to quads or rects | |
| 12373 q = Quad(val[i]) | |
| 12374 if quads: | |
| 12375 val[i] = q | |
| 12376 else: | |
| 12377 val[i] = q.rect | |
| 12378 if quads: | |
| 12379 return val | |
| 12380 i = 0 # join overlapping rects on the same line | |
| 12381 while i < items - 1: | |
| 12382 v1 = val[i] | |
| 12383 v2 = val[i + 1] | |
| 12384 if v1.y1 != v2.y1 or (v1 & v2).is_empty: | |
| 12385 i += 1 | |
| 12386 continue # no overlap on same line | |
| 12387 val[i] = v1 | v2 # join rectangles | |
| 12388 del val[i + 1] # remove v2 | |
| 12389 items -= 1 # reduce item count | |
| 12390 return val | |
| 12391 | |
| 12392 extractTEXT = extractText | |
| 12393 | |
| 12394 | |
| 12395 class TextWriter: | |
| 12396 | |
| 12397 def __init__(self, page_rect, opacity=1, color=None): | |
| 12398 """Stores text spans for later output on compatible PDF pages.""" | |
| 12399 self.this = mupdf.fz_new_text() | |
| 12400 | |
| 12401 self.opacity = opacity | |
| 12402 self.color = color | |
| 12403 self.rect = Rect(page_rect) | |
| 12404 self.ctm = Matrix(1, 0, 0, -1, 0, self.rect.height) | |
| 12405 self.ictm = ~self.ctm | |
| 12406 self.last_point = Point() | |
| 12407 self.last_point.__doc__ = "Position following last text insertion." | |
| 12408 self.text_rect = Rect() | |
| 12409 | |
| 12410 self.text_rect.__doc__ = "Accumulated area of text spans." | |
| 12411 self.used_fonts = set() | |
| 12412 self.thisown = True | |
| 12413 | |
| 12414 @property | |
| 12415 def _bbox(self): | |
| 12416 val = JM_py_from_rect( mupdf.fz_bound_text( self.this, mupdf.FzStrokeState(None), mupdf.FzMatrix())) | |
| 12417 val = Rect(val) | |
| 12418 return val | |
| 12419 | |
| 12420 def append(self, pos, text, font=None, fontsize=11, language=None, right_to_left=0, small_caps=0): | |
| 12421 """Store 'text' at point 'pos' using 'font' and 'fontsize'.""" | |
| 12422 pos = Point(pos) * self.ictm | |
| 12423 #log( '{font=}') | |
| 12424 if font is None: | |
| 12425 font = Font("helv") | |
| 12426 if not font.is_writable: | |
| 12427 if 0: | |
| 12428 log( '{font.this.m_internal.name=}') | |
| 12429 log( '{font.this.m_internal.t3matrix=}') | |
| 12430 log( '{font.this.m_internal.bbox=}') | |
| 12431 log( '{font.this.m_internal.glyph_count=}') | |
| 12432 log( '{font.this.m_internal.use_glyph_bbox=}') | |
| 12433 log( '{font.this.m_internal.width_count=}') | |
| 12434 log( '{font.this.m_internal.width_default=}') | |
| 12435 log( '{font.this.m_internal.has_digest=}') | |
| 12436 log( 'Unsupported font {font.name=}') | |
| 12437 if mupdf_cppyy: | |
| 12438 import cppyy | |
| 12439 log( f'Unsupported font {cppyy.gbl.mupdf_font_name(font.this.m_internal)=}') | |
| 12440 raise ValueError("Unsupported font '%s'." % font.name) | |
| 12441 if right_to_left: | |
| 12442 text = self.clean_rtl(text) | |
| 12443 text = "".join(reversed(text)) | |
| 12444 right_to_left = 0 | |
| 12445 | |
| 12446 lang = mupdf.fz_text_language_from_string(language) | |
| 12447 p = JM_point_from_py(pos) | |
| 12448 trm = mupdf.fz_make_matrix(fontsize, 0, 0, fontsize, p.x, p.y) | |
| 12449 markup_dir = 0 | |
| 12450 wmode = 0 | |
| 12451 if small_caps == 0: | |
| 12452 trm = mupdf.fz_show_string( self.this, font.this, trm, text, wmode, right_to_left, markup_dir, lang) | |
| 12453 else: | |
| 12454 trm = JM_show_string_cs( self.this, font.this, trm, text, wmode, right_to_left, markup_dir, lang) | |
| 12455 val = JM_py_from_matrix(trm) | |
| 12456 | |
| 12457 self.last_point = Point(val[-2:]) * self.ctm | |
| 12458 self.text_rect = self._bbox * self.ctm | |
| 12459 val = self.text_rect, self.last_point | |
| 12460 if font.flags["mono"] == 1: | |
| 12461 self.used_fonts.add(font) | |
| 12462 return val | |
| 12463 | |
| 12464 def appendv(self, pos, text, font=None, fontsize=11, language=None, small_caps=False): | |
| 12465 lheight = fontsize * 1.2 | |
| 12466 for c in text: | |
| 12467 self.append(pos, c, font=font, fontsize=fontsize, | |
| 12468 language=language, small_caps=small_caps) | |
| 12469 pos.y += lheight | |
| 12470 return self.text_rect, self.last_point | |
| 12471 | |
| 12472 def clean_rtl(self, text): | |
| 12473 """Revert the sequence of Latin text parts. | |
| 12474 | |
| 12475 Text with right-to-left writing direction (Arabic, Hebrew) often | |
| 12476 contains Latin parts, which are written in left-to-right: numbers, names, | |
| 12477 etc. For output as PDF text we need *everything* in right-to-left. | |
| 12478 E.g. an input like "<arabic> ABCDE FG HIJ <arabic> KL <arabic>" will be | |
| 12479 converted to "<arabic> JIH GF EDCBA <arabic> LK <arabic>". The Arabic | |
| 12480 parts remain untouched. | |
| 12481 | |
| 12482 Args: | |
| 12483 text: str | |
| 12484 Returns: | |
| 12485 Massaged string. | |
| 12486 """ | |
| 12487 if not text: | |
| 12488 return text | |
| 12489 # split into words at space boundaries | |
| 12490 words = text.split(" ") | |
| 12491 idx = [] | |
| 12492 for i in range(len(words)): | |
| 12493 w = words[i] | |
| 12494 # revert character sequence for Latin only words | |
| 12495 if not (len(w) < 2 or max([ord(c) for c in w]) > 255): | |
| 12496 words[i] = "".join(reversed(w)) | |
| 12497 idx.append(i) # stored index of Latin word | |
| 12498 | |
| 12499 # adjacent Latin words must revert their sequence, too | |
| 12500 idx2 = [] # store indices of adjacent Latin words | |
| 12501 for i in range(len(idx)): | |
| 12502 if idx2 == []: # empty yet? | |
| 12503 idx2.append(idx[i]) # store Latin word number | |
| 12504 | |
| 12505 elif idx[i] > idx2[-1] + 1: # large gap to last? | |
| 12506 if len(idx2) > 1: # at least two consecutives? | |
| 12507 words[idx2[0] : idx2[-1] + 1] = reversed( | |
| 12508 words[idx2[0] : idx2[-1] + 1] | |
| 12509 ) # revert their sequence | |
| 12510 idx2 = [idx[i]] # re-initialize | |
| 12511 | |
| 12512 elif idx[i] == idx2[-1] + 1: # new adjacent Latin word | |
| 12513 idx2.append(idx[i]) | |
| 12514 | |
| 12515 text = " ".join(words) | |
| 12516 return text | |
| 12517 | |
| 12518 def write_text(self, page, color=None, opacity=-1, overlay=1, morph=None, matrix=None, render_mode=0, oc=0): | |
| 12519 """Write the text to a PDF page having the TextWriter's page size. | |
| 12520 | |
| 12521 Args: | |
| 12522 page: a PDF page having same size. | |
| 12523 color: override text color. | |
| 12524 opacity: override transparency. | |
| 12525 overlay: put in foreground or background. | |
| 12526 morph: tuple(Point, Matrix), apply a matrix with a fixpoint. | |
| 12527 matrix: Matrix to be used instead of 'morph' argument. | |
| 12528 render_mode: (int) PDF render mode operator 'Tr'. | |
| 12529 """ | |
| 12530 CheckParent(page) | |
| 12531 if abs(self.rect - page.rect) > 1e-3: | |
| 12532 raise ValueError("incompatible page rect") | |
| 12533 if morph is not None: | |
| 12534 if (type(morph) not in (tuple, list) | |
| 12535 or type(morph[0]) is not Point | |
| 12536 or type(morph[1]) is not Matrix | |
| 12537 ): | |
| 12538 raise ValueError("morph must be (Point, Matrix) or None") | |
| 12539 if matrix is not None and morph is not None: | |
| 12540 raise ValueError("only one of matrix, morph is allowed") | |
| 12541 if getattr(opacity, "__float__", None) is None or opacity == -1: | |
| 12542 opacity = self.opacity | |
| 12543 if color is None: | |
| 12544 color = self.color | |
| 12545 | |
| 12546 if 1: | |
| 12547 pdfpage = page._pdf_page() | |
| 12548 alpha = 1 | |
| 12549 if opacity >= 0 and opacity < 1: | |
| 12550 alpha = opacity | |
| 12551 ncol = 1 | |
| 12552 dev_color = [0, 0, 0, 0] | |
| 12553 if color: | |
| 12554 ncol, dev_color = JM_color_FromSequence(color) | |
| 12555 if ncol == 3: | |
| 12556 colorspace = mupdf.fz_device_rgb() | |
| 12557 elif ncol == 4: | |
| 12558 colorspace = mupdf.fz_device_cmyk() | |
| 12559 else: | |
| 12560 colorspace = mupdf.fz_device_gray() | |
| 12561 | |
| 12562 resources = mupdf.pdf_new_dict(pdfpage.doc(), 5) | |
| 12563 contents = mupdf.fz_new_buffer(1024) | |
| 12564 dev = mupdf.pdf_new_pdf_device( pdfpage.doc(), mupdf.FzMatrix(), resources, contents) | |
| 12565 #log( '=== {dev_color!r=}') | |
| 12566 mupdf.fz_fill_text( | |
| 12567 dev, | |
| 12568 self.this, | |
| 12569 mupdf.FzMatrix(), | |
| 12570 colorspace, | |
| 12571 dev_color, | |
| 12572 alpha, | |
| 12573 mupdf.FzColorParams(mupdf.fz_default_color_params), | |
| 12574 ) | |
| 12575 mupdf.fz_close_device( dev) | |
| 12576 | |
| 12577 # copy generated resources into the one of the page | |
| 12578 max_nums = JM_merge_resources( pdfpage, resources) | |
| 12579 cont_string = JM_EscapeStrFromBuffer( contents) | |
| 12580 result = (max_nums, cont_string) | |
| 12581 val = result | |
| 12582 | |
| 12583 max_nums = val[0] | |
| 12584 content = val[1] | |
| 12585 max_alp, max_font = max_nums | |
| 12586 old_cont_lines = content.splitlines() | |
| 12587 | |
| 12588 optcont = page._get_optional_content(oc) | |
| 12589 if optcont is not None: | |
| 12590 bdc = "/OC /%s BDC" % optcont | |
| 12591 emc = "EMC" | |
| 12592 else: | |
| 12593 bdc = emc = "" | |
| 12594 | |
| 12595 new_cont_lines = ["q"] | |
| 12596 if bdc: | |
| 12597 new_cont_lines.append(bdc) | |
| 12598 | |
| 12599 cb = page.cropbox_position | |
| 12600 if page.rotation in (90, 270): | |
| 12601 delta = page.rect.height - page.rect.width | |
| 12602 else: | |
| 12603 delta = 0 | |
| 12604 mb = page.mediabox | |
| 12605 if bool(cb) or mb.y0 != 0 or delta != 0: | |
| 12606 new_cont_lines.append(f"1 0 0 1 {_format_g((cb.x, cb.y + mb.y0 - delta))} cm") | |
| 12607 | |
| 12608 if morph: | |
| 12609 p = morph[0] * self.ictm | |
| 12610 delta = Matrix(1, 1).pretranslate(p.x, p.y) | |
| 12611 matrix = ~delta * morph[1] * delta | |
| 12612 if morph or matrix: | |
| 12613 new_cont_lines.append(_format_g(JM_TUPLE(matrix)) + " cm") | |
| 12614 | |
| 12615 for line in old_cont_lines: | |
| 12616 if line.endswith(" cm"): | |
| 12617 continue | |
| 12618 if line == "BT": | |
| 12619 new_cont_lines.append(line) | |
| 12620 new_cont_lines.append("%i Tr" % render_mode) | |
| 12621 continue | |
| 12622 if line.endswith(" gs"): | |
| 12623 alp = int(line.split()[0][4:]) + max_alp | |
| 12624 line = "/Alp%i gs" % alp | |
| 12625 elif line.endswith(" Tf"): | |
| 12626 temp = line.split() | |
| 12627 fsize = float(temp[1]) | |
| 12628 if render_mode != 0: | |
| 12629 w = fsize * 0.05 | |
| 12630 else: | |
| 12631 w = 1 | |
| 12632 new_cont_lines.append(_format_g(w) + " w") | |
| 12633 font = int(temp[0][2:]) + max_font | |
| 12634 line = " ".join(["/F%i" % font] + temp[1:]) | |
| 12635 elif line.endswith(" rg"): | |
| 12636 new_cont_lines.append(line.replace("rg", "RG")) | |
| 12637 elif line.endswith(" g"): | |
| 12638 new_cont_lines.append(line.replace(" g", " G")) | |
| 12639 elif line.endswith(" k"): | |
| 12640 new_cont_lines.append(line.replace(" k", " K")) | |
| 12641 new_cont_lines.append(line) | |
| 12642 if emc: | |
| 12643 new_cont_lines.append(emc) | |
| 12644 new_cont_lines.append("Q\n") | |
| 12645 content = "\n".join(new_cont_lines).encode("utf-8") | |
| 12646 TOOLS._insert_contents(page, content, overlay=overlay) | |
| 12647 val = None | |
| 12648 for font in self.used_fonts: | |
| 12649 repair_mono_font(page, font) | |
| 12650 return val | |
| 12651 | |
| 12652 | |
| 12653 class IRect: | |
| 12654 """ | |
| 12655 IRect() - all zeros | |
| 12656 IRect(x0, y0, x1, y1) - 4 coordinates | |
| 12657 IRect(top-left, x1, y1) - point and 2 coordinates | |
| 12658 IRect(x0, y0, bottom-right) - 2 coordinates and point | |
| 12659 IRect(top-left, bottom-right) - 2 points | |
| 12660 IRect(sequ) - new from sequence or rect-like | |
| 12661 """ | |
| 12662 | |
| 12663 def __add__(self, p): | |
| 12664 return Rect.__add__(self, p).round() | |
| 12665 | |
| 12666 def __and__(self, x): | |
| 12667 return Rect.__and__(self, x).round() | |
| 12668 | |
| 12669 def __contains__(self, x): | |
| 12670 return Rect.__contains__(self, x) | |
| 12671 | |
| 12672 def __eq__(self, r): | |
| 12673 if not hasattr(r, "__len__"): | |
| 12674 return False | |
| 12675 return len(r) == 4 and self.x0 == r[0] and self.y0 == r[1] and self.x1 == r[2] and self.y1 == r[3] | |
| 12676 | |
| 12677 def __getitem__(self, i): | |
| 12678 return (self.x0, self.y0, self.x1, self.y1)[i] | |
| 12679 | |
| 12680 def __hash__(self): | |
| 12681 return hash(tuple(self)) | |
| 12682 | |
| 12683 def __init__(self, *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None): | |
| 12684 self.x0, self.y0, self.x1, self.y1 = util_make_irect( *args, p0=p0, p1=p1, x0=x0, y0=y0, x1=x1, y1=y1) | |
| 12685 | |
| 12686 def __len__(self): | |
| 12687 return 4 | |
| 12688 | |
| 12689 def __mul__(self, m): | |
| 12690 return Rect.__mul__(self, m).round() | |
| 12691 | |
| 12692 def __neg__(self): | |
| 12693 return IRect(-self.x0, -self.y0, -self.x1, -self.y1) | |
| 12694 | |
| 12695 def __or__(self, x): | |
| 12696 return Rect.__or__(self, x).round() | |
| 12697 | |
| 12698 def __pos__(self): | |
| 12699 return IRect(self) | |
| 12700 | |
| 12701 def __repr__(self): | |
| 12702 return "IRect" + str(tuple(self)) | |
| 12703 | |
| 12704 def __setitem__(self, i, v): | |
| 12705 v = int(v) | |
| 12706 if i == 0: self.x0 = v | |
| 12707 elif i == 1: self.y0 = v | |
| 12708 elif i == 2: self.x1 = v | |
| 12709 elif i == 3: self.y1 = v | |
| 12710 else: | |
| 12711 raise IndexError("index out of range") | |
| 12712 return None | |
| 12713 | |
| 12714 def __sub__(self, p): | |
| 12715 return Rect.__sub__(self, p).round() | |
| 12716 | |
| 12717 def __truediv__(self, m): | |
| 12718 return Rect.__truediv__(self, m).round() | |
| 12719 | |
| 12720 @property | |
| 12721 def bottom_left(self): | |
| 12722 """Bottom-left corner.""" | |
| 12723 return Point(self.x0, self.y1) | |
| 12724 | |
| 12725 @property | |
| 12726 def bottom_right(self): | |
| 12727 """Bottom-right corner.""" | |
| 12728 return Point(self.x1, self.y1) | |
| 12729 | |
| 12730 @property | |
| 12731 def height(self): | |
| 12732 return max(0, self.y1 - self.y0) | |
| 12733 | |
| 12734 def contains(self, x): | |
| 12735 """Check if x is in the rectangle.""" | |
| 12736 return self.__contains__(x) | |
| 12737 | |
| 12738 def include_point(self, p): | |
| 12739 """Extend rectangle to include point p.""" | |
| 12740 rect = self.rect.include_point(p) | |
| 12741 return rect.irect | |
| 12742 | |
| 12743 def include_rect(self, r): | |
| 12744 """Extend rectangle to include rectangle r.""" | |
| 12745 rect = self.rect.include_rect(r) | |
| 12746 return rect.irect | |
| 12747 | |
| 12748 def intersect(self, r): | |
| 12749 """Restrict rectangle to intersection with rectangle r.""" | |
| 12750 return Rect.intersect(self, r).round() | |
| 12751 | |
| 12752 def intersects(self, x): | |
| 12753 return Rect.intersects(self, x) | |
| 12754 | |
| 12755 @property | |
| 12756 def is_empty(self): | |
| 12757 """True if rectangle area is empty.""" | |
| 12758 return self.x0 >= self.x1 or self.y0 >= self.y1 | |
| 12759 | |
| 12760 @property | |
| 12761 def is_infinite(self): | |
| 12762 """True if rectangle is infinite.""" | |
| 12763 return self.x0 == self.y0 == FZ_MIN_INF_RECT and self.x1 == self.y1 == FZ_MAX_INF_RECT | |
| 12764 | |
| 12765 @property | |
| 12766 def is_valid(self): | |
| 12767 """True if rectangle is valid.""" | |
| 12768 return self.x0 <= self.x1 and self.y0 <= self.y1 | |
| 12769 | |
| 12770 def morph(self, p, m): | |
| 12771 """Morph with matrix-like m and point-like p. | |
| 12772 | |
| 12773 Returns a new quad.""" | |
| 12774 if self.is_infinite: | |
| 12775 return INFINITE_QUAD() | |
| 12776 return self.quad.morph(p, m) | |
| 12777 | |
| 12778 def norm(self): | |
| 12779 return math.sqrt(sum([c*c for c in self])) | |
| 12780 | |
| 12781 def normalize(self): | |
| 12782 """Replace rectangle with its valid version.""" | |
| 12783 if self.x1 < self.x0: | |
| 12784 self.x0, self.x1 = self.x1, self.x0 | |
| 12785 if self.y1 < self.y0: | |
| 12786 self.y0, self.y1 = self.y1, self.y0 | |
| 12787 return self | |
| 12788 | |
| 12789 @property | |
| 12790 def quad(self): | |
| 12791 """Return Quad version of rectangle.""" | |
| 12792 return Quad(self.tl, self.tr, self.bl, self.br) | |
| 12793 | |
| 12794 @property | |
| 12795 def rect(self): | |
| 12796 return Rect(self) | |
| 12797 | |
| 12798 @property | |
| 12799 def top_left(self): | |
| 12800 """Top-left corner.""" | |
| 12801 return Point(self.x0, self.y0) | |
| 12802 | |
| 12803 @property | |
| 12804 def top_right(self): | |
| 12805 """Top-right corner.""" | |
| 12806 return Point(self.x1, self.y0) | |
| 12807 | |
| 12808 def torect(self, r): | |
| 12809 """Return matrix that converts to target rect.""" | |
| 12810 r = Rect(r) | |
| 12811 if self.is_infinite or self.is_empty or r.is_infinite or r.is_empty: | |
| 12812 raise ValueError("rectangles must be finite and not empty") | |
| 12813 return ( | |
| 12814 Matrix(1, 0, 0, 1, -self.x0, -self.y0) | |
| 12815 * Matrix(r.width / self.width, r.height / self.height) | |
| 12816 * Matrix(1, 0, 0, 1, r.x0, r.y0) | |
| 12817 ) | |
| 12818 | |
| 12819 def transform(self, m): | |
| 12820 return Rect.transform(self, m).round() | |
| 12821 | |
| 12822 @property | |
| 12823 def width(self): | |
| 12824 return max(0, self.x1 - self.x0) | |
| 12825 | |
| 12826 br = bottom_right | |
| 12827 bl = bottom_left | |
| 12828 tl = top_left | |
| 12829 tr = top_right | |
| 12830 | |
| 12831 | |
| 12832 # Data | |
| 12833 # | |
| 12834 | |
| 12835 if 1: | |
| 12836 _self = sys.modules[__name__] | |
| 12837 if 1: | |
| 12838 for _name, _value in mupdf.__dict__.items(): | |
| 12839 if _name.startswith(('PDF_', 'UCDN_SCRIPT_')): | |
| 12840 if _name.startswith('PDF_ENUM_NAME_'): | |
| 12841 # Not a simple enum. | |
| 12842 pass | |
| 12843 else: | |
| 12844 #assert not inspect.isroutine(value) | |
| 12845 #log(f'importing {_name=} {_value=}.') | |
| 12846 setattr(_self, _name, _value) | |
| 12847 #log(f'{getattr( self, name, None)=}') | |
| 12848 else: | |
| 12849 # This is slow due to importing inspect, e.g. 0.019 instead of 0.004. | |
| 12850 for _name, _value in inspect.getmembers(mupdf): | |
| 12851 if _name.startswith(('PDF_', 'UCDN_SCRIPT_')): | |
| 12852 if _name.startswith('PDF_ENUM_NAME_'): | |
| 12853 # Not a simple enum. | |
| 12854 pass | |
| 12855 else: | |
| 12856 #assert not inspect.isroutine(value) | |
| 12857 #log(f'importing {name}') | |
| 12858 setattr(_self, _name, _value) | |
| 12859 #log(f'{getattr( self, name, None)=}') | |
| 12860 | |
| 12861 # This is a macro so not preserved in mupdf C++/Python bindings. | |
| 12862 # | |
| 12863 PDF_SIGNATURE_DEFAULT_APPEARANCE = (0 | |
| 12864 | mupdf.PDF_SIGNATURE_SHOW_LABELS | |
| 12865 | mupdf.PDF_SIGNATURE_SHOW_DN | |
| 12866 | mupdf.PDF_SIGNATURE_SHOW_DATE | |
| 12867 | mupdf.PDF_SIGNATURE_SHOW_TEXT_NAME | |
| 12868 | mupdf.PDF_SIGNATURE_SHOW_GRAPHIC_NAME | |
| 12869 | mupdf.PDF_SIGNATURE_SHOW_LOGO | |
| 12870 ) | |
| 12871 | |
| 12872 #UCDN_SCRIPT_ADLAM = mupdf.UCDN_SCRIPT_ADLAM | |
| 12873 #setattr(self, 'UCDN_SCRIPT_ADLAM', mupdf.UCDN_SCRIPT_ADLAM) | |
| 12874 | |
| 12875 assert mupdf.UCDN_EAST_ASIAN_H == 1 | |
| 12876 | |
| 12877 # Flake8 incorrectly fails next two lines because we've dynamically added | |
| 12878 # items to self. | |
| 12879 assert PDF_TX_FIELD_IS_MULTILINE == mupdf.PDF_TX_FIELD_IS_MULTILINE # noqa: F821 | |
| 12880 assert UCDN_SCRIPT_ADLAM == mupdf.UCDN_SCRIPT_ADLAM # noqa: F821 | |
| 12881 del _self, _name, _value | |
| 12882 | |
| 12883 AnyType = typing.Any | |
| 12884 | |
| 12885 Base14_fontnames = ( | |
| 12886 "Courier", | |
| 12887 "Courier-Oblique", | |
| 12888 "Courier-Bold", | |
| 12889 "Courier-BoldOblique", | |
| 12890 "Helvetica", | |
| 12891 "Helvetica-Oblique", | |
| 12892 "Helvetica-Bold", | |
| 12893 "Helvetica-BoldOblique", | |
| 12894 "Times-Roman", | |
| 12895 "Times-Italic", | |
| 12896 "Times-Bold", | |
| 12897 "Times-BoldItalic", | |
| 12898 "Symbol", | |
| 12899 "ZapfDingbats", | |
| 12900 ) | |
| 12901 | |
| 12902 Base14_fontdict = {} | |
| 12903 for f in Base14_fontnames: | |
| 12904 Base14_fontdict[f.lower()] = f | |
| 12905 Base14_fontdict["helv"] = "Helvetica" | |
| 12906 Base14_fontdict["heit"] = "Helvetica-Oblique" | |
| 12907 Base14_fontdict["hebo"] = "Helvetica-Bold" | |
| 12908 Base14_fontdict["hebi"] = "Helvetica-BoldOblique" | |
| 12909 Base14_fontdict["cour"] = "Courier" | |
| 12910 Base14_fontdict["coit"] = "Courier-Oblique" | |
| 12911 Base14_fontdict["cobo"] = "Courier-Bold" | |
| 12912 Base14_fontdict["cobi"] = "Courier-BoldOblique" | |
| 12913 Base14_fontdict["tiro"] = "Times-Roman" | |
| 12914 Base14_fontdict["tibo"] = "Times-Bold" | |
| 12915 Base14_fontdict["tiit"] = "Times-Italic" | |
| 12916 Base14_fontdict["tibi"] = "Times-BoldItalic" | |
| 12917 Base14_fontdict["symb"] = "Symbol" | |
| 12918 Base14_fontdict["zadb"] = "ZapfDingbats" | |
| 12919 | |
| 12920 EPSILON = 1e-5 | |
| 12921 FLT_EPSILON = 1e-5 | |
| 12922 | |
| 12923 # largest 32bit integers surviving C float conversion roundtrips | |
| 12924 # used by MuPDF to define infinite rectangles | |
| 12925 FZ_MIN_INF_RECT = -0x80000000 | |
| 12926 FZ_MAX_INF_RECT = 0x7fffff80 | |
| 12927 | |
| 12928 JM_annot_id_stem = "fitz" | |
| 12929 JM_mupdf_warnings_store = [] | |
| 12930 JM_mupdf_show_errors = 1 | |
| 12931 JM_mupdf_show_warnings = 0 | |
| 12932 | |
| 12933 | |
| 12934 # ------------------------------------------------------------------------------ | |
| 12935 # Image recompression constants | |
| 12936 # ------------------------------------------------------------------------------ | |
| 12937 FZ_RECOMPRESS_NEVER = mupdf.FZ_RECOMPRESS_NEVER | |
| 12938 FZ_RECOMPRESS_SAME = mupdf.FZ_RECOMPRESS_SAME | |
| 12939 FZ_RECOMPRESS_LOSSLESS = mupdf.FZ_RECOMPRESS_LOSSLESS | |
| 12940 FZ_RECOMPRESS_JPEG = mupdf.FZ_RECOMPRESS_JPEG | |
| 12941 FZ_RECOMPRESS_J2K = mupdf.FZ_RECOMPRESS_J2K | |
| 12942 FZ_RECOMPRESS_FAX = mupdf.FZ_RECOMPRESS_FAX | |
| 12943 FZ_SUBSAMPLE_AVERAGE = mupdf.FZ_SUBSAMPLE_AVERAGE | |
| 12944 FZ_SUBSAMPLE_BICUBIC = mupdf.FZ_SUBSAMPLE_BICUBIC | |
| 12945 | |
| 12946 # ------------------------------------------------------------------------------ | |
| 12947 # Various PDF Optional Content Flags | |
| 12948 # ------------------------------------------------------------------------------ | |
| 12949 PDF_OC_ON = 0 | |
| 12950 PDF_OC_TOGGLE = 1 | |
| 12951 PDF_OC_OFF = 2 | |
| 12952 | |
| 12953 # ------------------------------------------------------------------------------ | |
| 12954 # link kinds and link flags | |
| 12955 # ------------------------------------------------------------------------------ | |
| 12956 LINK_NONE = 0 | |
| 12957 LINK_GOTO = 1 | |
| 12958 LINK_URI = 2 | |
| 12959 LINK_LAUNCH = 3 | |
| 12960 LINK_NAMED = 4 | |
| 12961 LINK_GOTOR = 5 | |
| 12962 LINK_FLAG_L_VALID = 1 | |
| 12963 LINK_FLAG_T_VALID = 2 | |
| 12964 LINK_FLAG_R_VALID = 4 | |
| 12965 LINK_FLAG_B_VALID = 8 | |
| 12966 LINK_FLAG_FIT_H = 16 | |
| 12967 LINK_FLAG_FIT_V = 32 | |
| 12968 LINK_FLAG_R_IS_ZOOM = 64 | |
| 12969 | |
| 12970 SigFlag_SignaturesExist = 1 | |
| 12971 SigFlag_AppendOnly = 2 | |
| 12972 | |
| 12973 STAMP_Approved = 0 | |
| 12974 STAMP_AsIs = 1 | |
| 12975 STAMP_Confidential = 2 | |
| 12976 STAMP_Departmental = 3 | |
| 12977 STAMP_Experimental = 4 | |
| 12978 STAMP_Expired = 5 | |
| 12979 STAMP_Final = 6 | |
| 12980 STAMP_ForComment = 7 | |
| 12981 STAMP_ForPublicRelease = 8 | |
| 12982 STAMP_NotApproved = 9 | |
| 12983 STAMP_NotForPublicRelease = 10 | |
| 12984 STAMP_Sold = 11 | |
| 12985 STAMP_TopSecret = 12 | |
| 12986 STAMP_Draft = 13 | |
| 12987 | |
| 12988 TEXT_ALIGN_LEFT = 0 | |
| 12989 TEXT_ALIGN_CENTER = 1 | |
| 12990 TEXT_ALIGN_RIGHT = 2 | |
| 12991 TEXT_ALIGN_JUSTIFY = 3 | |
| 12992 | |
| 12993 TEXT_FONT_SUPERSCRIPT = 1 | |
| 12994 TEXT_FONT_ITALIC = 2 | |
| 12995 TEXT_FONT_SERIFED = 4 | |
| 12996 TEXT_FONT_MONOSPACED = 8 | |
| 12997 TEXT_FONT_BOLD = 16 | |
| 12998 | |
| 12999 TEXT_OUTPUT_TEXT = 0 | |
| 13000 TEXT_OUTPUT_HTML = 1 | |
| 13001 TEXT_OUTPUT_JSON = 2 | |
| 13002 TEXT_OUTPUT_XML = 3 | |
| 13003 TEXT_OUTPUT_XHTML = 4 | |
| 13004 | |
| 13005 TEXT_PRESERVE_LIGATURES = mupdf.FZ_STEXT_PRESERVE_LIGATURES | |
| 13006 TEXT_PRESERVE_WHITESPACE = mupdf.FZ_STEXT_PRESERVE_WHITESPACE | |
| 13007 TEXT_PRESERVE_IMAGES = mupdf.FZ_STEXT_PRESERVE_IMAGES | |
| 13008 TEXT_INHIBIT_SPACES = mupdf.FZ_STEXT_INHIBIT_SPACES | |
| 13009 TEXT_DEHYPHENATE = mupdf.FZ_STEXT_DEHYPHENATE | |
| 13010 TEXT_PRESERVE_SPANS = mupdf.FZ_STEXT_PRESERVE_SPANS | |
| 13011 TEXT_MEDIABOX_CLIP = mupdf.FZ_STEXT_MEDIABOX_CLIP | |
| 13012 TEXT_USE_CID_FOR_UNKNOWN_UNICODE = mupdf.FZ_STEXT_USE_CID_FOR_UNKNOWN_UNICODE | |
| 13013 TEXT_COLLECT_STRUCTURE = mupdf.FZ_STEXT_COLLECT_STRUCTURE | |
| 13014 TEXT_ACCURATE_BBOXES = mupdf.FZ_STEXT_ACCURATE_BBOXES | |
| 13015 TEXT_COLLECT_VECTORS = mupdf.FZ_STEXT_COLLECT_VECTORS | |
| 13016 TEXT_IGNORE_ACTUALTEXT = mupdf.FZ_STEXT_IGNORE_ACTUALTEXT | |
| 13017 TEXT_SEGMENT = mupdf.FZ_STEXT_SEGMENT | |
| 13018 | |
| 13019 if mupdf_version_tuple >= (1, 26): | |
| 13020 TEXT_PARAGRAPH_BREAK = mupdf.FZ_STEXT_PARAGRAPH_BREAK | |
| 13021 TEXT_TABLE_HUNT = mupdf.FZ_STEXT_TABLE_HUNT | |
| 13022 TEXT_COLLECT_STYLES = mupdf.FZ_STEXT_COLLECT_STYLES | |
| 13023 TEXT_USE_GID_FOR_UNKNOWN_UNICODE = mupdf.FZ_STEXT_USE_GID_FOR_UNKNOWN_UNICODE | |
| 13024 TEXT_CLIP_RECT = mupdf.FZ_STEXT_CLIP_RECT | |
| 13025 TEXT_ACCURATE_ASCENDERS = mupdf.FZ_STEXT_ACCURATE_ASCENDERS | |
| 13026 TEXT_ACCURATE_SIDE_BEARINGS = mupdf.FZ_STEXT_ACCURATE_SIDE_BEARINGS | |
| 13027 | |
| 13028 # 2025-05-07: Non-standard names preserved for backwards compatibility. | |
| 13029 TEXT_STEXT_SEGMENT = TEXT_SEGMENT | |
| 13030 TEXT_CID_FOR_UNKNOWN_UNICODE = TEXT_USE_CID_FOR_UNKNOWN_UNICODE | |
| 13031 | |
| 13032 TEXTFLAGS_WORDS = (0 | |
| 13033 | TEXT_PRESERVE_LIGATURES | |
| 13034 | TEXT_PRESERVE_WHITESPACE | |
| 13035 | TEXT_MEDIABOX_CLIP | |
| 13036 | TEXT_USE_CID_FOR_UNKNOWN_UNICODE | |
| 13037 ) | |
| 13038 | |
| 13039 TEXTFLAGS_BLOCKS = (0 | |
| 13040 | TEXT_PRESERVE_LIGATURES | |
| 13041 | TEXT_PRESERVE_WHITESPACE | |
| 13042 | TEXT_MEDIABOX_CLIP | |
| 13043 | TEXT_USE_CID_FOR_UNKNOWN_UNICODE | |
| 13044 ) | |
| 13045 | |
| 13046 TEXTFLAGS_DICT = (0 | |
| 13047 | TEXT_PRESERVE_LIGATURES | |
| 13048 | TEXT_PRESERVE_WHITESPACE | |
| 13049 | TEXT_MEDIABOX_CLIP | |
| 13050 | TEXT_PRESERVE_IMAGES | |
| 13051 | TEXT_USE_CID_FOR_UNKNOWN_UNICODE | |
| 13052 ) | |
| 13053 | |
| 13054 TEXTFLAGS_RAWDICT = TEXTFLAGS_DICT | |
| 13055 | |
| 13056 TEXTFLAGS_SEARCH = (0 | |
| 13057 | TEXT_PRESERVE_WHITESPACE | |
| 13058 | TEXT_MEDIABOX_CLIP | |
| 13059 | TEXT_DEHYPHENATE | |
| 13060 | TEXT_USE_CID_FOR_UNKNOWN_UNICODE | |
| 13061 ) | |
| 13062 | |
| 13063 TEXTFLAGS_HTML = (0 | |
| 13064 | TEXT_PRESERVE_LIGATURES | |
| 13065 | TEXT_PRESERVE_WHITESPACE | |
| 13066 | TEXT_MEDIABOX_CLIP | |
| 13067 | TEXT_PRESERVE_IMAGES | |
| 13068 | TEXT_USE_CID_FOR_UNKNOWN_UNICODE | |
| 13069 ) | |
| 13070 | |
| 13071 TEXTFLAGS_XHTML = (0 | |
| 13072 | TEXT_PRESERVE_LIGATURES | |
| 13073 | TEXT_PRESERVE_WHITESPACE | |
| 13074 | TEXT_MEDIABOX_CLIP | |
| 13075 | TEXT_PRESERVE_IMAGES | |
| 13076 | TEXT_USE_CID_FOR_UNKNOWN_UNICODE | |
| 13077 ) | |
| 13078 | |
| 13079 TEXTFLAGS_XML = (0 | |
| 13080 | TEXT_PRESERVE_LIGATURES | |
| 13081 | TEXT_PRESERVE_WHITESPACE | |
| 13082 | TEXT_MEDIABOX_CLIP | |
| 13083 | TEXT_USE_CID_FOR_UNKNOWN_UNICODE | |
| 13084 ) | |
| 13085 | |
| 13086 TEXTFLAGS_TEXT = (0 | |
| 13087 | TEXT_PRESERVE_LIGATURES | |
| 13088 | TEXT_PRESERVE_WHITESPACE | |
| 13089 | TEXT_MEDIABOX_CLIP | |
| 13090 | TEXT_USE_CID_FOR_UNKNOWN_UNICODE | |
| 13091 ) | |
| 13092 | |
| 13093 # Simple text encoding options | |
| 13094 TEXT_ENCODING_LATIN = 0 | |
| 13095 TEXT_ENCODING_GREEK = 1 | |
| 13096 TEXT_ENCODING_CYRILLIC = 2 | |
| 13097 | |
| 13098 TOOLS_JM_UNIQUE_ID = 0 | |
| 13099 | |
| 13100 # colorspace identifiers | |
| 13101 CS_RGB = 1 | |
| 13102 CS_GRAY = 2 | |
| 13103 CS_CMYK = 3 | |
| 13104 | |
| 13105 # PDF Blend Modes | |
| 13106 PDF_BM_Color = "Color" | |
| 13107 PDF_BM_ColorBurn = "ColorBurn" | |
| 13108 PDF_BM_ColorDodge = "ColorDodge" | |
| 13109 PDF_BM_Darken = "Darken" | |
| 13110 PDF_BM_Difference = "Difference" | |
| 13111 PDF_BM_Exclusion = "Exclusion" | |
| 13112 PDF_BM_HardLight = "HardLight" | |
| 13113 PDF_BM_Hue = "Hue" | |
| 13114 PDF_BM_Lighten = "Lighten" | |
| 13115 PDF_BM_Luminosity = "Luminosity" | |
| 13116 PDF_BM_Multiply = "Multiply" | |
| 13117 PDF_BM_Normal = "Normal" | |
| 13118 PDF_BM_Overlay = "Overlay" | |
| 13119 PDF_BM_Saturation = "Saturation" | |
| 13120 PDF_BM_Screen = "Screen" | |
| 13121 PDF_BM_SoftLight = "Softlight" | |
| 13122 | |
| 13123 | |
| 13124 annot_skel = { | |
| 13125 "goto1": lambda a, b, c, d, e: f"<</A<</S/GoTo/D[{a} 0 R/XYZ {_format_g((b, c, d))}]>>/Rect[{e}]/BS<</W 0>>/Subtype/Link>>", | |
| 13126 "goto2": lambda a, b: f"<</A<</S/GoTo/D{a}>>/Rect[{b}]/BS<</W 0>>/Subtype/Link>>", | |
| 13127 "gotor1": lambda a, b, c, d, e, f, g: f"<</A<</S/GoToR/D[{a} /XYZ {_format_g((b, c, d))}]/F<</F({e})/UF({f})/Type/Filespec>>>>/Rect[{g}]/BS<</W 0>>/Subtype/Link>>", | |
| 13128 "gotor2": lambda a, b, c: f"<</A<</S/GoToR/D{a}/F({b})>>/Rect[{c}]/BS<</W 0>>/Subtype/Link>>", | |
| 13129 "launch": lambda a, b, c: f"<</A<</S/Launch/F<</F({a})/UF({b})/Type/Filespec>>>>/Rect[{c}]/BS<</W 0>>/Subtype/Link>>", | |
| 13130 "uri": lambda a, b: f"<</A<</S/URI/URI({a})>>/Rect[{b}]/BS<</W 0>>/Subtype/Link>>", | |
| 13131 "named": lambda a, b: f"<</A<</S/GoTo/D({a})/Type/Action>>/Rect[{b}]/BS<</W 0>>/Subtype/Link>>", | |
| 13132 } | |
| 13133 | |
| 13134 class FileDataError(RuntimeError): | |
| 13135 """Raised for documents with file structure issues.""" | |
| 13136 pass | |
| 13137 | |
| 13138 class FileNotFoundError(RuntimeError): | |
| 13139 """Raised if file does not exist.""" | |
| 13140 pass | |
| 13141 | |
| 13142 class EmptyFileError(FileDataError): | |
| 13143 """Raised when creating documents from zero-length data.""" | |
| 13144 pass | |
| 13145 | |
| 13146 # propagate exception class to C-level code | |
| 13147 #_set_FileDataError(FileDataError) | |
| 13148 | |
| 13149 csRGB = Colorspace(CS_RGB) | |
| 13150 csGRAY = Colorspace(CS_GRAY) | |
| 13151 csCMYK = Colorspace(CS_CMYK) | |
| 13152 | |
| 13153 # These don't appear to be visible in classic, but are used | |
| 13154 # internally. | |
| 13155 # | |
| 13156 dictkey_align = "align" | |
| 13157 dictkey_asc = "ascender" | |
| 13158 dictkey_bidi = "bidi" | |
| 13159 dictkey_bbox = "bbox" | |
| 13160 dictkey_blocks = "blocks" | |
| 13161 dictkey_bpc = "bpc" | |
| 13162 dictkey_c = "c" | |
| 13163 dictkey_chars = "chars" | |
| 13164 dictkey_color = "color" | |
| 13165 dictkey_colorspace = "colorspace" | |
| 13166 dictkey_content = "content" | |
| 13167 dictkey_creationDate = "creationDate" | |
| 13168 dictkey_cs_name = "cs-name" | |
| 13169 dictkey_da = "da" | |
| 13170 dictkey_dashes = "dashes" | |
| 13171 dictkey_descr = "description" | |
| 13172 dictkey_desc = "descender" | |
| 13173 dictkey_dir = "dir" | |
| 13174 dictkey_effect = "effect" | |
| 13175 dictkey_ext = "ext" | |
| 13176 dictkey_filename = "filename" | |
| 13177 dictkey_fill = "fill" | |
| 13178 dictkey_flags = "flags" | |
| 13179 dictkey_char_flags = "char_flags" | |
| 13180 dictkey_font = "font" | |
| 13181 dictkey_glyph = "glyph" | |
| 13182 dictkey_height = "height" | |
| 13183 dictkey_id = "id" | |
| 13184 dictkey_image = "image" | |
| 13185 dictkey_items = "items" | |
| 13186 dictkey_length = "length" | |
| 13187 dictkey_lines = "lines" | |
| 13188 dictkey_matrix = "transform" | |
| 13189 dictkey_modDate = "modDate" | |
| 13190 dictkey_name = "name" | |
| 13191 dictkey_number = "number" | |
| 13192 dictkey_origin = "origin" | |
| 13193 dictkey_rect = "rect" | |
| 13194 dictkey_size = "size" | |
| 13195 dictkey_smask = "smask" | |
| 13196 dictkey_spans = "spans" | |
| 13197 dictkey_stroke = "stroke" | |
| 13198 dictkey_style = "style" | |
| 13199 dictkey_subject = "subject" | |
| 13200 dictkey_text = "text" | |
| 13201 dictkey_title = "title" | |
| 13202 dictkey_type = "type" | |
| 13203 dictkey_ufilename = "ufilename" | |
| 13204 dictkey_width = "width" | |
| 13205 dictkey_wmode = "wmode" | |
| 13206 dictkey_xref = "xref" | |
| 13207 dictkey_xres = "xres" | |
| 13208 dictkey_yres = "yres" | |
| 13209 | |
| 13210 | |
| 13211 try: | |
| 13212 from pymupdf_fonts import fontdescriptors, fontbuffers | |
| 13213 | |
| 13214 fitz_fontdescriptors = fontdescriptors.copy() | |
| 13215 for k in fitz_fontdescriptors.keys(): | |
| 13216 fitz_fontdescriptors[k]["loader"] = fontbuffers[k] | |
| 13217 del fontdescriptors, fontbuffers | |
| 13218 except ImportError: | |
| 13219 fitz_fontdescriptors = {} | |
| 13220 | |
| 13221 symbol_glyphs = ( # Glyph list for the built-in font 'Symbol' | |
| 13222 (183, 0.46), | |
| 13223 (183, 0.46), | |
| 13224 (183, 0.46), | |
| 13225 (183, 0.46), | |
| 13226 (183, 0.46), | |
| 13227 (183, 0.46), | |
| 13228 (183, 0.46), | |
| 13229 (183, 0.46), | |
| 13230 (183, 0.46), | |
| 13231 (183, 0.46), | |
| 13232 (183, 0.46), | |
| 13233 (183, 0.46), | |
| 13234 (183, 0.46), | |
| 13235 (183, 0.46), | |
| 13236 (183, 0.46), | |
| 13237 (183, 0.46), | |
| 13238 (183, 0.46), | |
| 13239 (183, 0.46), | |
| 13240 (183, 0.46), | |
| 13241 (183, 0.46), | |
| 13242 (183, 0.46), | |
| 13243 (183, 0.46), | |
| 13244 (183, 0.46), | |
| 13245 (183, 0.46), | |
| 13246 (183, 0.46), | |
| 13247 (183, 0.46), | |
| 13248 (183, 0.46), | |
| 13249 (183, 0.46), | |
| 13250 (183, 0.46), | |
| 13251 (183, 0.46), | |
| 13252 (183, 0.46), | |
| 13253 (183, 0.46), | |
| 13254 (32, 0.25), | |
| 13255 (33, 0.333), | |
| 13256 (34, 0.713), | |
| 13257 (35, 0.5), | |
| 13258 (36, 0.549), | |
| 13259 (37, 0.833), | |
| 13260 (38, 0.778), | |
| 13261 (39, 0.439), | |
| 13262 (40, 0.333), | |
| 13263 (41, 0.333), | |
| 13264 (42, 0.5), | |
| 13265 (43, 0.549), | |
| 13266 (44, 0.25), | |
| 13267 (45, 0.549), | |
| 13268 (46, 0.25), | |
| 13269 (47, 0.278), | |
| 13270 (48, 0.5), | |
| 13271 (49, 0.5), | |
| 13272 (50, 0.5), | |
| 13273 (51, 0.5), | |
| 13274 (52, 0.5), | |
| 13275 (53, 0.5), | |
| 13276 (54, 0.5), | |
| 13277 (55, 0.5), | |
| 13278 (56, 0.5), | |
| 13279 (57, 0.5), | |
| 13280 (58, 0.278), | |
| 13281 (59, 0.278), | |
| 13282 (60, 0.549), | |
| 13283 (61, 0.549), | |
| 13284 (62, 0.549), | |
| 13285 (63, 0.444), | |
| 13286 (64, 0.549), | |
| 13287 (65, 0.722), | |
| 13288 (66, 0.667), | |
| 13289 (67, 0.722), | |
| 13290 (68, 0.612), | |
| 13291 (69, 0.611), | |
| 13292 (70, 0.763), | |
| 13293 (71, 0.603), | |
| 13294 (72, 0.722), | |
| 13295 (73, 0.333), | |
| 13296 (74, 0.631), | |
| 13297 (75, 0.722), | |
| 13298 (76, 0.686), | |
| 13299 (77, 0.889), | |
| 13300 (78, 0.722), | |
| 13301 (79, 0.722), | |
| 13302 (80, 0.768), | |
| 13303 (81, 0.741), | |
| 13304 (82, 0.556), | |
| 13305 (83, 0.592), | |
| 13306 (84, 0.611), | |
| 13307 (85, 0.69), | |
| 13308 (86, 0.439), | |
| 13309 (87, 0.768), | |
| 13310 (88, 0.645), | |
| 13311 (89, 0.795), | |
| 13312 (90, 0.611), | |
| 13313 (91, 0.333), | |
| 13314 (92, 0.863), | |
| 13315 (93, 0.333), | |
| 13316 (94, 0.658), | |
| 13317 (95, 0.5), | |
| 13318 (96, 0.5), | |
| 13319 (97, 0.631), | |
| 13320 (98, 0.549), | |
| 13321 (99, 0.549), | |
| 13322 (100, 0.494), | |
| 13323 (101, 0.439), | |
| 13324 (102, 0.521), | |
| 13325 (103, 0.411), | |
| 13326 (104, 0.603), | |
| 13327 (105, 0.329), | |
| 13328 (106, 0.603), | |
| 13329 (107, 0.549), | |
| 13330 (108, 0.549), | |
| 13331 (109, 0.576), | |
| 13332 (110, 0.521), | |
| 13333 (111, 0.549), | |
| 13334 (112, 0.549), | |
| 13335 (113, 0.521), | |
| 13336 (114, 0.549), | |
| 13337 (115, 0.603), | |
| 13338 (116, 0.439), | |
| 13339 (117, 0.576), | |
| 13340 (118, 0.713), | |
| 13341 (119, 0.686), | |
| 13342 (120, 0.493), | |
| 13343 (121, 0.686), | |
| 13344 (122, 0.494), | |
| 13345 (123, 0.48), | |
| 13346 (124, 0.2), | |
| 13347 (125, 0.48), | |
| 13348 (126, 0.549), | |
| 13349 (183, 0.46), | |
| 13350 (183, 0.46), | |
| 13351 (183, 0.46), | |
| 13352 (183, 0.46), | |
| 13353 (183, 0.46), | |
| 13354 (183, 0.46), | |
| 13355 (183, 0.46), | |
| 13356 (183, 0.46), | |
| 13357 (183, 0.46), | |
| 13358 (183, 0.46), | |
| 13359 (183, 0.46), | |
| 13360 (183, 0.46), | |
| 13361 (183, 0.46), | |
| 13362 (183, 0.46), | |
| 13363 (183, 0.46), | |
| 13364 (183, 0.46), | |
| 13365 (183, 0.46), | |
| 13366 (183, 0.46), | |
| 13367 (183, 0.46), | |
| 13368 (183, 0.46), | |
| 13369 (183, 0.46), | |
| 13370 (183, 0.46), | |
| 13371 (183, 0.46), | |
| 13372 (183, 0.46), | |
| 13373 (183, 0.46), | |
| 13374 (183, 0.46), | |
| 13375 (183, 0.46), | |
| 13376 (183, 0.46), | |
| 13377 (183, 0.46), | |
| 13378 (183, 0.46), | |
| 13379 (183, 0.46), | |
| 13380 (183, 0.46), | |
| 13381 (183, 0.46), | |
| 13382 (160, 0.25), | |
| 13383 (161, 0.62), | |
| 13384 (162, 0.247), | |
| 13385 (163, 0.549), | |
| 13386 (164, 0.167), | |
| 13387 (165, 0.713), | |
| 13388 (166, 0.5), | |
| 13389 (167, 0.753), | |
| 13390 (168, 0.753), | |
| 13391 (169, 0.753), | |
| 13392 (170, 0.753), | |
| 13393 (171, 1.042), | |
| 13394 (172, 0.713), | |
| 13395 (173, 0.603), | |
| 13396 (174, 0.987), | |
| 13397 (175, 0.603), | |
| 13398 (176, 0.4), | |
| 13399 (177, 0.549), | |
| 13400 (178, 0.411), | |
| 13401 (179, 0.549), | |
| 13402 (180, 0.549), | |
| 13403 (181, 0.576), | |
| 13404 (182, 0.494), | |
| 13405 (183, 0.46), | |
| 13406 (184, 0.549), | |
| 13407 (185, 0.549), | |
| 13408 (186, 0.549), | |
| 13409 (187, 0.549), | |
| 13410 (188, 1), | |
| 13411 (189, 0.603), | |
| 13412 (190, 1), | |
| 13413 (191, 0.658), | |
| 13414 (192, 0.823), | |
| 13415 (193, 0.686), | |
| 13416 (194, 0.795), | |
| 13417 (195, 0.987), | |
| 13418 (196, 0.768), | |
| 13419 (197, 0.768), | |
| 13420 (198, 0.823), | |
| 13421 (199, 0.768), | |
| 13422 (200, 0.768), | |
| 13423 (201, 0.713), | |
| 13424 (202, 0.713), | |
| 13425 (203, 0.713), | |
| 13426 (204, 0.713), | |
| 13427 (205, 0.713), | |
| 13428 (206, 0.713), | |
| 13429 (207, 0.713), | |
| 13430 (208, 0.768), | |
| 13431 (209, 0.713), | |
| 13432 (210, 0.79), | |
| 13433 (211, 0.79), | |
| 13434 (212, 0.89), | |
| 13435 (213, 0.823), | |
| 13436 (214, 0.549), | |
| 13437 (215, 0.549), | |
| 13438 (216, 0.713), | |
| 13439 (217, 0.603), | |
| 13440 (218, 0.603), | |
| 13441 (219, 1.042), | |
| 13442 (220, 0.987), | |
| 13443 (221, 0.603), | |
| 13444 (222, 0.987), | |
| 13445 (223, 0.603), | |
| 13446 (224, 0.494), | |
| 13447 (225, 0.329), | |
| 13448 (226, 0.79), | |
| 13449 (227, 0.79), | |
| 13450 (228, 0.786), | |
| 13451 (229, 0.713), | |
| 13452 (230, 0.384), | |
| 13453 (231, 0.384), | |
| 13454 (232, 0.384), | |
| 13455 (233, 0.384), | |
| 13456 (234, 0.384), | |
| 13457 (235, 0.384), | |
| 13458 (236, 0.494), | |
| 13459 (237, 0.494), | |
| 13460 (238, 0.494), | |
| 13461 (239, 0.494), | |
| 13462 (183, 0.46), | |
| 13463 (241, 0.329), | |
| 13464 (242, 0.274), | |
| 13465 (243, 0.686), | |
| 13466 (244, 0.686), | |
| 13467 (245, 0.686), | |
| 13468 (246, 0.384), | |
| 13469 (247, 0.549), | |
| 13470 (248, 0.384), | |
| 13471 (249, 0.384), | |
| 13472 (250, 0.384), | |
| 13473 (251, 0.384), | |
| 13474 (252, 0.494), | |
| 13475 (253, 0.494), | |
| 13476 (254, 0.494), | |
| 13477 (183, 0.46), | |
| 13478 ) | |
| 13479 | |
| 13480 | |
| 13481 zapf_glyphs = ( # Glyph list for the built-in font 'ZapfDingbats' | |
| 13482 (183, 0.788), | |
| 13483 (183, 0.788), | |
| 13484 (183, 0.788), | |
| 13485 (183, 0.788), | |
| 13486 (183, 0.788), | |
| 13487 (183, 0.788), | |
| 13488 (183, 0.788), | |
| 13489 (183, 0.788), | |
| 13490 (183, 0.788), | |
| 13491 (183, 0.788), | |
| 13492 (183, 0.788), | |
| 13493 (183, 0.788), | |
| 13494 (183, 0.788), | |
| 13495 (183, 0.788), | |
| 13496 (183, 0.788), | |
| 13497 (183, 0.788), | |
| 13498 (183, 0.788), | |
| 13499 (183, 0.788), | |
| 13500 (183, 0.788), | |
| 13501 (183, 0.788), | |
| 13502 (183, 0.788), | |
| 13503 (183, 0.788), | |
| 13504 (183, 0.788), | |
| 13505 (183, 0.788), | |
| 13506 (183, 0.788), | |
| 13507 (183, 0.788), | |
| 13508 (183, 0.788), | |
| 13509 (183, 0.788), | |
| 13510 (183, 0.788), | |
| 13511 (183, 0.788), | |
| 13512 (183, 0.788), | |
| 13513 (183, 0.788), | |
| 13514 (32, 0.278), | |
| 13515 (33, 0.974), | |
| 13516 (34, 0.961), | |
| 13517 (35, 0.974), | |
| 13518 (36, 0.98), | |
| 13519 (37, 0.719), | |
| 13520 (38, 0.789), | |
| 13521 (39, 0.79), | |
| 13522 (40, 0.791), | |
| 13523 (41, 0.69), | |
| 13524 (42, 0.96), | |
| 13525 (43, 0.939), | |
| 13526 (44, 0.549), | |
| 13527 (45, 0.855), | |
| 13528 (46, 0.911), | |
| 13529 (47, 0.933), | |
| 13530 (48, 0.911), | |
| 13531 (49, 0.945), | |
| 13532 (50, 0.974), | |
| 13533 (51, 0.755), | |
| 13534 (52, 0.846), | |
| 13535 (53, 0.762), | |
| 13536 (54, 0.761), | |
| 13537 (55, 0.571), | |
| 13538 (56, 0.677), | |
| 13539 (57, 0.763), | |
| 13540 (58, 0.76), | |
| 13541 (59, 0.759), | |
| 13542 (60, 0.754), | |
| 13543 (61, 0.494), | |
| 13544 (62, 0.552), | |
| 13545 (63, 0.537), | |
| 13546 (64, 0.577), | |
| 13547 (65, 0.692), | |
| 13548 (66, 0.786), | |
| 13549 (67, 0.788), | |
| 13550 (68, 0.788), | |
| 13551 (69, 0.79), | |
| 13552 (70, 0.793), | |
| 13553 (71, 0.794), | |
| 13554 (72, 0.816), | |
| 13555 (73, 0.823), | |
| 13556 (74, 0.789), | |
| 13557 (75, 0.841), | |
| 13558 (76, 0.823), | |
| 13559 (77, 0.833), | |
| 13560 (78, 0.816), | |
| 13561 (79, 0.831), | |
| 13562 (80, 0.923), | |
| 13563 (81, 0.744), | |
| 13564 (82, 0.723), | |
| 13565 (83, 0.749), | |
| 13566 (84, 0.79), | |
| 13567 (85, 0.792), | |
| 13568 (86, 0.695), | |
| 13569 (87, 0.776), | |
| 13570 (88, 0.768), | |
| 13571 (89, 0.792), | |
| 13572 (90, 0.759), | |
| 13573 (91, 0.707), | |
| 13574 (92, 0.708), | |
| 13575 (93, 0.682), | |
| 13576 (94, 0.701), | |
| 13577 (95, 0.826), | |
| 13578 (96, 0.815), | |
| 13579 (97, 0.789), | |
| 13580 (98, 0.789), | |
| 13581 (99, 0.707), | |
| 13582 (100, 0.687), | |
| 13583 (101, 0.696), | |
| 13584 (102, 0.689), | |
| 13585 (103, 0.786), | |
| 13586 (104, 0.787), | |
| 13587 (105, 0.713), | |
| 13588 (106, 0.791), | |
| 13589 (107, 0.785), | |
| 13590 (108, 0.791), | |
| 13591 (109, 0.873), | |
| 13592 (110, 0.761), | |
| 13593 (111, 0.762), | |
| 13594 (112, 0.762), | |
| 13595 (113, 0.759), | |
| 13596 (114, 0.759), | |
| 13597 (115, 0.892), | |
| 13598 (116, 0.892), | |
| 13599 (117, 0.788), | |
| 13600 (118, 0.784), | |
| 13601 (119, 0.438), | |
| 13602 (120, 0.138), | |
| 13603 (121, 0.277), | |
| 13604 (122, 0.415), | |
| 13605 (123, 0.392), | |
| 13606 (124, 0.392), | |
| 13607 (125, 0.668), | |
| 13608 (126, 0.668), | |
| 13609 (183, 0.788), | |
| 13610 (183, 0.788), | |
| 13611 (183, 0.788), | |
| 13612 (183, 0.788), | |
| 13613 (183, 0.788), | |
| 13614 (183, 0.788), | |
| 13615 (183, 0.788), | |
| 13616 (183, 0.788), | |
| 13617 (183, 0.788), | |
| 13618 (183, 0.788), | |
| 13619 (183, 0.788), | |
| 13620 (183, 0.788), | |
| 13621 (183, 0.788), | |
| 13622 (183, 0.788), | |
| 13623 (183, 0.788), | |
| 13624 (183, 0.788), | |
| 13625 (183, 0.788), | |
| 13626 (183, 0.788), | |
| 13627 (183, 0.788), | |
| 13628 (183, 0.788), | |
| 13629 (183, 0.788), | |
| 13630 (183, 0.788), | |
| 13631 (183, 0.788), | |
| 13632 (183, 0.788), | |
| 13633 (183, 0.788), | |
| 13634 (183, 0.788), | |
| 13635 (183, 0.788), | |
| 13636 (183, 0.788), | |
| 13637 (183, 0.788), | |
| 13638 (183, 0.788), | |
| 13639 (183, 0.788), | |
| 13640 (183, 0.788), | |
| 13641 (183, 0.788), | |
| 13642 (183, 0.788), | |
| 13643 (161, 0.732), | |
| 13644 (162, 0.544), | |
| 13645 (163, 0.544), | |
| 13646 (164, 0.91), | |
| 13647 (165, 0.667), | |
| 13648 (166, 0.76), | |
| 13649 (167, 0.76), | |
| 13650 (168, 0.776), | |
| 13651 (169, 0.595), | |
| 13652 (170, 0.694), | |
| 13653 (171, 0.626), | |
| 13654 (172, 0.788), | |
| 13655 (173, 0.788), | |
| 13656 (174, 0.788), | |
| 13657 (175, 0.788), | |
| 13658 (176, 0.788), | |
| 13659 (177, 0.788), | |
| 13660 (178, 0.788), | |
| 13661 (179, 0.788), | |
| 13662 (180, 0.788), | |
| 13663 (181, 0.788), | |
| 13664 (182, 0.788), | |
| 13665 (183, 0.788), | |
| 13666 (184, 0.788), | |
| 13667 (185, 0.788), | |
| 13668 (186, 0.788), | |
| 13669 (187, 0.788), | |
| 13670 (188, 0.788), | |
| 13671 (189, 0.788), | |
| 13672 (190, 0.788), | |
| 13673 (191, 0.788), | |
| 13674 (192, 0.788), | |
| 13675 (193, 0.788), | |
| 13676 (194, 0.788), | |
| 13677 (195, 0.788), | |
| 13678 (196, 0.788), | |
| 13679 (197, 0.788), | |
| 13680 (198, 0.788), | |
| 13681 (199, 0.788), | |
| 13682 (200, 0.788), | |
| 13683 (201, 0.788), | |
| 13684 (202, 0.788), | |
| 13685 (203, 0.788), | |
| 13686 (204, 0.788), | |
| 13687 (205, 0.788), | |
| 13688 (206, 0.788), | |
| 13689 (207, 0.788), | |
| 13690 (208, 0.788), | |
| 13691 (209, 0.788), | |
| 13692 (210, 0.788), | |
| 13693 (211, 0.788), | |
| 13694 (212, 0.894), | |
| 13695 (213, 0.838), | |
| 13696 (214, 1.016), | |
| 13697 (215, 0.458), | |
| 13698 (216, 0.748), | |
| 13699 (217, 0.924), | |
| 13700 (218, 0.748), | |
| 13701 (219, 0.918), | |
| 13702 (220, 0.927), | |
| 13703 (221, 0.928), | |
| 13704 (222, 0.928), | |
| 13705 (223, 0.834), | |
| 13706 (224, 0.873), | |
| 13707 (225, 0.828), | |
| 13708 (226, 0.924), | |
| 13709 (227, 0.924), | |
| 13710 (228, 0.917), | |
| 13711 (229, 0.93), | |
| 13712 (230, 0.931), | |
| 13713 (231, 0.463), | |
| 13714 (232, 0.883), | |
| 13715 (233, 0.836), | |
| 13716 (234, 0.836), | |
| 13717 (235, 0.867), | |
| 13718 (236, 0.867), | |
| 13719 (237, 0.696), | |
| 13720 (238, 0.696), | |
| 13721 (239, 0.874), | |
| 13722 (183, 0.788), | |
| 13723 (241, 0.874), | |
| 13724 (242, 0.76), | |
| 13725 (243, 0.946), | |
| 13726 (244, 0.771), | |
| 13727 (245, 0.865), | |
| 13728 (246, 0.771), | |
| 13729 (247, 0.888), | |
| 13730 (248, 0.967), | |
| 13731 (249, 0.888), | |
| 13732 (250, 0.831), | |
| 13733 (251, 0.873), | |
| 13734 (252, 0.927), | |
| 13735 (253, 0.97), | |
| 13736 (183, 0.788), | |
| 13737 (183, 0.788), | |
| 13738 ) | |
| 13739 | |
| 13740 | |
| 13741 # Functions | |
| 13742 # | |
| 13743 | |
| 13744 def _read_samples( pixmap, offset, n): | |
| 13745 # fixme: need to be able to get a sample in one call, as a Python | |
| 13746 # bytes or similar. | |
| 13747 ret = [] | |
| 13748 if not pixmap.samples(): | |
| 13749 # mupdf.fz_samples_get() gives a segv if pixmap->samples is null. | |
| 13750 return ret | |
| 13751 for i in range( n): | |
| 13752 ret.append( mupdf.fz_samples_get( pixmap, offset + i)) | |
| 13753 return bytes( ret) | |
| 13754 | |
| 13755 | |
| 13756 def _INRANGE(v, low, high): | |
| 13757 return low <= v and v <= high | |
| 13758 | |
| 13759 | |
| 13760 def _remove_dest_range(pdf, numbers): | |
| 13761 pagecount = mupdf.pdf_count_pages(pdf) | |
| 13762 for i in range(pagecount): | |
| 13763 n1 = i | |
| 13764 if n1 in numbers: | |
| 13765 continue | |
| 13766 | |
| 13767 pageref = mupdf.pdf_lookup_page_obj( pdf, i) | |
| 13768 annots = mupdf.pdf_dict_get( pageref, PDF_NAME('Annots')) | |
| 13769 if not annots.m_internal: | |
| 13770 continue | |
| 13771 len_ = mupdf.pdf_array_len(annots) | |
| 13772 for j in range(len_ - 1, -1, -1): | |
| 13773 o = mupdf.pdf_array_get( annots, j) | |
| 13774 if not mupdf.pdf_name_eq( mupdf.pdf_dict_get( o, PDF_NAME('Subtype')), PDF_NAME('Link')): | |
| 13775 continue | |
| 13776 action = mupdf.pdf_dict_get( o, PDF_NAME('A')) | |
| 13777 dest = mupdf.pdf_dict_get( o, PDF_NAME('Dest')) | |
| 13778 if action.m_internal: | |
| 13779 if not mupdf.pdf_name_eq( mupdf.pdf_dict_get( action, PDF_NAME('S')), PDF_NAME('GoTo')): | |
| 13780 continue | |
| 13781 dest = mupdf.pdf_dict_get( action, PDF_NAME('D')) | |
| 13782 pno = -1 | |
| 13783 if mupdf.pdf_is_array( dest): | |
| 13784 target = mupdf.pdf_array_get( dest, 0) | |
| 13785 pno = mupdf.pdf_lookup_page_number( pdf, target) | |
| 13786 elif mupdf.pdf_is_string( dest): | |
| 13787 location, _, _ = mupdf.fz_resolve_link( pdf.super(), mupdf.pdf_to_text_string( dest)) | |
| 13788 pno = location.page | |
| 13789 if pno < 0: # page number lookup did not work | |
| 13790 continue | |
| 13791 n1 = pno | |
| 13792 if n1 in numbers: | |
| 13793 mupdf.pdf_array_delete( annots, j) | |
| 13794 | |
| 13795 | |
| 13796 def ASSERT_PDF(cond): | |
| 13797 assert isinstance(cond, (mupdf.PdfPage, mupdf.PdfDocument)), f'{type(cond)=} {cond=}' | |
| 13798 if not cond.m_internal: | |
| 13799 raise Exception(MSG_IS_NO_PDF) | |
| 13800 | |
| 13801 | |
| 13802 def EMPTY_IRECT(): | |
| 13803 return IRect(FZ_MAX_INF_RECT, FZ_MAX_INF_RECT, FZ_MIN_INF_RECT, FZ_MIN_INF_RECT) | |
| 13804 | |
| 13805 | |
| 13806 def EMPTY_QUAD(): | |
| 13807 return EMPTY_RECT().quad | |
| 13808 | |
| 13809 | |
| 13810 def EMPTY_RECT(): | |
| 13811 return Rect(FZ_MAX_INF_RECT, FZ_MAX_INF_RECT, FZ_MIN_INF_RECT, FZ_MIN_INF_RECT) | |
| 13812 | |
| 13813 | |
| 13814 def ENSURE_OPERATION(pdf): | |
| 13815 if not JM_have_operation(pdf): | |
| 13816 raise Exception("No journalling operation started") | |
| 13817 | |
| 13818 | |
| 13819 def INFINITE_IRECT(): | |
| 13820 return IRect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT) | |
| 13821 | |
| 13822 | |
| 13823 def INFINITE_QUAD(): | |
| 13824 return INFINITE_RECT().quad | |
| 13825 | |
| 13826 | |
| 13827 def INFINITE_RECT(): | |
| 13828 return Rect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT) | |
| 13829 | |
| 13830 | |
| 13831 def JM_BinFromBuffer(buffer_): | |
| 13832 ''' | |
| 13833 Turn fz_buffer into a Python bytes object | |
| 13834 ''' | |
| 13835 assert isinstance(buffer_, mupdf.FzBuffer) | |
| 13836 ret = mupdf.fz_buffer_extract_copy(buffer_) | |
| 13837 return ret | |
| 13838 | |
| 13839 | |
| 13840 def JM_EscapeStrFromStr(c): | |
| 13841 # `c` is typically from SWIG which will have converted a `const char*` from | |
| 13842 # C into a Python `str` using `PyUnicode_DecodeUTF8(carray, static_cast< | |
| 13843 # Py_ssize_t >(size), "surrogateescape")`. This gives us a Python `str` | |
| 13844 # with some characters encoded as a \0xdcXY sequence, where `XY` are hex | |
| 13845 # digits for an invalid byte in the original `const char*`. | |
| 13846 # | |
| 13847 # This is actually a reasonable way of representing arbitrary | |
| 13848 # strings from C, but we want to mimic what PyMuPDF does. It uses | |
| 13849 # `PyUnicode_DecodeRawUnicodeEscape(c, (Py_ssize_t) strlen(c), "replace")` | |
| 13850 # which gives a string containing actual unicode characters for any invalid | |
| 13851 # bytes. | |
| 13852 # | |
| 13853 # We mimic this by converting the `str` to a `bytes` with 'surrogateescape' | |
| 13854 # to recognise \0xdcXY sequences, then convert the individual bytes into a | |
| 13855 # `str` using `chr()`. | |
| 13856 # | |
| 13857 # Would be good to have a more efficient way to do this. | |
| 13858 # | |
| 13859 if c is None: | |
| 13860 return '' | |
| 13861 assert isinstance(c, str), f'{type(c)=}' | |
| 13862 b = c.encode('utf8', 'surrogateescape') | |
| 13863 ret = '' | |
| 13864 for bb in b: | |
| 13865 ret += chr(bb) | |
| 13866 return ret | |
| 13867 | |
| 13868 | |
| 13869 def JM_BufferFromBytes(stream): | |
| 13870 ''' | |
| 13871 Make fz_buffer from a PyBytes, PyByteArray or io.BytesIO object. If a text | |
| 13872 io.BytesIO, we convert to binary by encoding as utf8. | |
| 13873 ''' | |
| 13874 if isinstance(stream, (bytes, bytearray)): | |
| 13875 data = stream | |
| 13876 elif hasattr(stream, 'getvalue'): | |
| 13877 data = stream.getvalue() | |
| 13878 if isinstance(data, str): | |
| 13879 data = data.encode('utf-8') | |
| 13880 if not isinstance(data, (bytes, bytearray)): | |
| 13881 raise Exception(f'.getvalue() returned unexpected type: {type(data)}') | |
| 13882 else: | |
| 13883 return mupdf.FzBuffer() | |
| 13884 return mupdf.fz_new_buffer_from_copied_data(data) | |
| 13885 | |
| 13886 | |
| 13887 def JM_FLOAT_ITEM(obj, idx): | |
| 13888 if not PySequence_Check(obj): | |
| 13889 return None | |
| 13890 return float(obj[idx]) | |
| 13891 | |
| 13892 def JM_INT_ITEM(obj, idx): | |
| 13893 if idx < len(obj): | |
| 13894 temp = obj[idx] | |
| 13895 if isinstance(temp, (int, float)): | |
| 13896 return 0, temp | |
| 13897 return 1, None | |
| 13898 | |
| 13899 | |
| 13900 def JM_pixmap_from_page(doc, page, ctm, cs, alpha, annots, clip): | |
| 13901 ''' | |
| 13902 Pixmap creation directly using a short-lived displaylist, so we can support | |
| 13903 separations. | |
| 13904 ''' | |
| 13905 SPOTS_NONE = 0 | |
| 13906 SPOTS_OVERPRINT_SIM = 1 | |
| 13907 SPOTS_FULL = 2 | |
| 13908 | |
| 13909 FZ_ENABLE_SPOT_RENDERING = True # fixme: this is a build-time setting in MuPDF's config.h. | |
| 13910 if FZ_ENABLE_SPOT_RENDERING: | |
| 13911 spots = SPOTS_OVERPRINT_SIM | |
| 13912 else: | |
| 13913 spots = SPOTS_NONE | |
| 13914 | |
| 13915 seps = None | |
| 13916 colorspace = cs | |
| 13917 | |
| 13918 matrix = JM_matrix_from_py(ctm) | |
| 13919 rect = mupdf.fz_bound_page(page) | |
| 13920 rclip = JM_rect_from_py(clip) | |
| 13921 rect = mupdf.fz_intersect_rect(rect, rclip) # no-op if clip is not given | |
| 13922 rect = mupdf.fz_transform_rect(rect, matrix) | |
| 13923 bbox = mupdf.fz_round_rect(rect) | |
| 13924 | |
| 13925 # Pixmap of the document's /OutputIntents ("output intents") | |
| 13926 oi = mupdf.fz_document_output_intent(doc) | |
| 13927 # if present and compatible, use it instead of the parameter | |
| 13928 if oi.m_internal: | |
| 13929 if mupdf.fz_colorspace_n(oi) == mupdf.fz_colorspace_n(cs): | |
| 13930 colorspace = mupdf.fz_keep_colorspace(oi) | |
| 13931 | |
| 13932 # check if spots rendering is available and if so use separations | |
| 13933 if spots != SPOTS_NONE: | |
| 13934 seps = mupdf.fz_page_separations(page) | |
| 13935 if seps.m_internal: | |
| 13936 n = mupdf.fz_count_separations(seps) | |
| 13937 if spots == SPOTS_FULL: | |
| 13938 for i in range(n): | |
| 13939 mupdf.fz_set_separation_behavior(seps, i, mupdf.FZ_SEPARATION_SPOT) | |
| 13940 else: | |
| 13941 for i in range(n): | |
| 13942 mupdf.fz_set_separation_behavior(seps, i, mupdf.FZ_SEPARATION_COMPOSITE) | |
| 13943 elif mupdf.fz_page_uses_overprint(page): | |
| 13944 # This page uses overprint, so we need an empty | |
| 13945 # sep object to force the overprint simulation on. | |
| 13946 seps = mupdf.fz_new_separations(0) | |
| 13947 elif oi.m_internal and mupdf.fz_colorspace_n(oi) != mupdf.fz_colorspace_n(colorspace): | |
| 13948 # We have an output intent, and it's incompatible | |
| 13949 # with the colorspace our device needs. Force the | |
| 13950 # overprint simulation on, because this ensures that | |
| 13951 # we 'simulate' the output intent too. | |
| 13952 seps = mupdf.fz_new_separations(0) | |
| 13953 | |
| 13954 pix = mupdf.fz_new_pixmap_with_bbox(colorspace, bbox, seps, alpha) | |
| 13955 | |
| 13956 if alpha: | |
| 13957 mupdf.fz_clear_pixmap(pix) | |
| 13958 else: | |
| 13959 mupdf.fz_clear_pixmap_with_value(pix, 0xFF) | |
| 13960 | |
| 13961 dev = mupdf.fz_new_draw_device(matrix, pix) | |
| 13962 if annots: | |
| 13963 mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie()) | |
| 13964 else: | |
| 13965 mupdf.fz_run_page_contents(page, dev, mupdf.FzMatrix(), mupdf.FzCookie()) | |
| 13966 mupdf.fz_close_device(dev) | |
| 13967 return pix | |
| 13968 | |
| 13969 | |
| 13970 def JM_StrAsChar(x): | |
| 13971 # fixme: should encode, but swig doesn't pass bytes to C as const char*. | |
| 13972 return x | |
| 13973 #return x.encode('utf8') | |
| 13974 | |
| 13975 | |
| 13976 def JM_TUPLE(o: typing.Sequence) -> tuple: | |
| 13977 return tuple(map(lambda x: round(x, 5) if abs(x) >= 1e-4 else 0, o)) | |
| 13978 | |
| 13979 | |
| 13980 def JM_TUPLE3(o: typing.Sequence) -> tuple: | |
| 13981 return tuple(map(lambda x: round(x, 3) if abs(x) >= 1e-3 else 0, o)) | |
| 13982 | |
| 13983 | |
| 13984 def JM_UnicodeFromStr(s): | |
| 13985 if s is None: | |
| 13986 return '' | |
| 13987 if isinstance(s, bytes): | |
| 13988 s = s.decode('utf8') | |
| 13989 assert isinstance(s, str), f'{type(s)=} {s=}' | |
| 13990 return s | |
| 13991 | |
| 13992 | |
| 13993 def JM_add_annot_id(annot, stem): | |
| 13994 ''' | |
| 13995 Add a unique /NM key to an annotation or widget. | |
| 13996 Append a number to 'stem' such that the result is a unique name. | |
| 13997 ''' | |
| 13998 assert isinstance(annot, mupdf.PdfAnnot) | |
| 13999 page = _pdf_annot_page(annot) | |
| 14000 annot_obj = mupdf.pdf_annot_obj( annot) | |
| 14001 names = JM_get_annot_id_list(page) | |
| 14002 i = 0 | |
| 14003 while 1: | |
| 14004 stem_id = f'{JM_annot_id_stem}-{stem}{i}' | |
| 14005 if stem_id not in names: | |
| 14006 break | |
| 14007 i += 1 | |
| 14008 response = JM_StrAsChar(stem_id) | |
| 14009 name = mupdf.pdf_new_string( response, len(response)) | |
| 14010 mupdf.pdf_dict_puts(annot_obj, "NM", name) | |
| 14011 page.doc().m_internal.resynth_required = 0 | |
| 14012 | |
| 14013 | |
| 14014 def JM_add_oc_object(pdf, ref, xref): | |
| 14015 ''' | |
| 14016 Add OC object reference to a dictionary | |
| 14017 ''' | |
| 14018 indobj = mupdf.pdf_new_indirect(pdf, xref, 0) | |
| 14019 if not mupdf.pdf_is_dict(indobj): | |
| 14020 RAISEPY(MSG_BAD_OC_REF, PyExc_ValueError) | |
| 14021 type_ = mupdf.pdf_dict_get(indobj, PDF_NAME('Type')) | |
| 14022 if (mupdf.pdf_objcmp(type_, PDF_NAME('OCG')) == 0 | |
| 14023 or mupdf.pdf_objcmp(type_, PDF_NAME('OCMD')) == 0 | |
| 14024 ): | |
| 14025 mupdf.pdf_dict_put(ref, PDF_NAME('OC'), indobj) | |
| 14026 else: | |
| 14027 RAISEPY(MSG_BAD_OC_REF, PyExc_ValueError) | |
| 14028 | |
| 14029 | |
| 14030 def JM_annot_border(annot_obj): | |
| 14031 dash_py = list() | |
| 14032 style = None | |
| 14033 width = -1 | |
| 14034 clouds = -1 | |
| 14035 obj = None | |
| 14036 | |
| 14037 obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Border')) | |
| 14038 if mupdf.pdf_is_array( obj): | |
| 14039 width = mupdf.pdf_to_real( mupdf.pdf_array_get( obj, 2)) | |
| 14040 if mupdf.pdf_array_len( obj) == 4: | |
| 14041 dash = mupdf.pdf_array_get( obj, 3) | |
| 14042 for i in range( mupdf.pdf_array_len( dash)): | |
| 14043 val = mupdf.pdf_to_int( mupdf.pdf_array_get( dash, i)) | |
| 14044 dash_py.append( val) | |
| 14045 | |
| 14046 bs_o = mupdf.pdf_dict_get( annot_obj, PDF_NAME('BS')) | |
| 14047 if bs_o.m_internal: | |
| 14048 width = mupdf.pdf_to_real( mupdf.pdf_dict_get( bs_o, PDF_NAME('W'))) | |
| 14049 style = mupdf.pdf_to_name( mupdf.pdf_dict_get( bs_o, PDF_NAME('S'))) | |
| 14050 if style == '': | |
| 14051 style = None | |
| 14052 obj = mupdf.pdf_dict_get( bs_o, PDF_NAME('D')) | |
| 14053 if obj.m_internal: | |
| 14054 for i in range( mupdf.pdf_array_len( obj)): | |
| 14055 val = mupdf.pdf_to_int( mupdf.pdf_array_get( obj, i)) | |
| 14056 dash_py.append( val) | |
| 14057 | |
| 14058 obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('BE')) | |
| 14059 if obj.m_internal: | |
| 14060 clouds = mupdf.pdf_to_int( mupdf.pdf_dict_get( obj, PDF_NAME('I'))) | |
| 14061 | |
| 14062 res = dict() | |
| 14063 res[ dictkey_width] = width | |
| 14064 res[ dictkey_dashes] = tuple( dash_py) | |
| 14065 res[ dictkey_style] = style | |
| 14066 res[ 'clouds'] = clouds | |
| 14067 return res | |
| 14068 | |
| 14069 | |
| 14070 def JM_annot_colors(annot_obj): | |
| 14071 res = dict() | |
| 14072 bc = list() # stroke colors | |
| 14073 fc =list() # fill colors | |
| 14074 o = mupdf.pdf_dict_get(annot_obj, mupdf.PDF_ENUM_NAME_C) | |
| 14075 if mupdf.pdf_is_array(o): | |
| 14076 n = mupdf.pdf_array_len(o) | |
| 14077 for i in range(n): | |
| 14078 col = mupdf.pdf_to_real( mupdf.pdf_array_get(o, i)) | |
| 14079 bc.append(col) | |
| 14080 res[dictkey_stroke] = bc | |
| 14081 | |
| 14082 o = mupdf.pdf_dict_gets(annot_obj, "IC") | |
| 14083 if mupdf.pdf_is_array(o): | |
| 14084 n = mupdf.pdf_array_len(o) | |
| 14085 for i in range(n): | |
| 14086 col = mupdf.pdf_to_real( mupdf.pdf_array_get(o, i)) | |
| 14087 fc.append(col) | |
| 14088 | |
| 14089 res[dictkey_fill] = fc | |
| 14090 return res | |
| 14091 | |
| 14092 | |
| 14093 def JM_annot_set_border( border, doc, annot_obj): | |
| 14094 assert isinstance(border, dict) | |
| 14095 obj = None | |
| 14096 dashlen = 0 | |
| 14097 nwidth = border.get( dictkey_width) # new width | |
| 14098 ndashes = border.get( dictkey_dashes) # new dashes | |
| 14099 nstyle = border.get( dictkey_style) # new style | |
| 14100 nclouds = border.get( 'clouds', -1) # new clouds value | |
| 14101 | |
| 14102 # get old border properties | |
| 14103 oborder = JM_annot_border( annot_obj) | |
| 14104 | |
| 14105 # delete border-related entries | |
| 14106 mupdf.pdf_dict_del( annot_obj, PDF_NAME('BS')) | |
| 14107 mupdf.pdf_dict_del( annot_obj, PDF_NAME('BE')) | |
| 14108 mupdf.pdf_dict_del( annot_obj, PDF_NAME('Border')) | |
| 14109 | |
| 14110 # populate border items: keep old values for any omitted new ones | |
| 14111 if nwidth < 0: | |
| 14112 nwidth = oborder.get( dictkey_width) # no new width: keep current | |
| 14113 if ndashes is None: | |
| 14114 ndashes = oborder.get( dictkey_dashes) # no new dashes: keep old | |
| 14115 if nstyle is None: | |
| 14116 nstyle = oborder.get( dictkey_style) # no new style: keep old | |
| 14117 if nclouds < 0: | |
| 14118 nclouds = oborder.get( "clouds", -1) # no new clouds: keep old | |
| 14119 | |
| 14120 if isinstance( ndashes, tuple) and len( ndashes) > 0: | |
| 14121 dashlen = len( ndashes) | |
| 14122 darr = mupdf.pdf_new_array( doc, dashlen) | |
| 14123 for d in ndashes: | |
| 14124 mupdf.pdf_array_push_int( darr, d) | |
| 14125 mupdf.pdf_dict_putl( annot_obj, darr, PDF_NAME('BS'), PDF_NAME('D')) | |
| 14126 | |
| 14127 mupdf.pdf_dict_putl( | |
| 14128 annot_obj, | |
| 14129 mupdf.pdf_new_real( nwidth), | |
| 14130 PDF_NAME('BS'), | |
| 14131 PDF_NAME('W'), | |
| 14132 ) | |
| 14133 | |
| 14134 if dashlen == 0: | |
| 14135 obj = JM_get_border_style( nstyle) | |
| 14136 else: | |
| 14137 obj = PDF_NAME('D') | |
| 14138 mupdf.pdf_dict_putl( annot_obj, obj, PDF_NAME('BS'), PDF_NAME('S')) | |
| 14139 | |
| 14140 if nclouds > 0: | |
| 14141 mupdf.pdf_dict_put_dict( annot_obj, PDF_NAME('BE'), 2) | |
| 14142 obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('BE')) | |
| 14143 mupdf.pdf_dict_put( obj, PDF_NAME('S'), PDF_NAME('C')) | |
| 14144 mupdf.pdf_dict_put_int( obj, PDF_NAME('I'), nclouds) | |
| 14145 | |
| 14146 | |
| 14147 def make_escape(ch): | |
| 14148 if ch == 92: | |
| 14149 return "\\u005c" | |
| 14150 elif 32 <= ch <= 127 or ch == 10: | |
| 14151 return chr(ch) | |
| 14152 elif 0xd800 <= ch <= 0xdfff: # orphaned surrogate | |
| 14153 return "\\ufffd" | |
| 14154 elif ch <= 0xffff: | |
| 14155 return "\\u%04x" % ch | |
| 14156 else: | |
| 14157 return "\\U%08x" % ch | |
| 14158 | |
| 14159 | |
| 14160 def JM_append_rune(buff, ch): | |
| 14161 """ | |
| 14162 APPEND non-ascii runes in unicode escape format to fz_buffer. | |
| 14163 """ | |
| 14164 mupdf.fz_append_string(buff, make_escape(ch)) | |
| 14165 | |
| 14166 | |
| 14167 def JM_append_word(lines, buff, wbbox, block_n, line_n, word_n): | |
| 14168 ''' | |
| 14169 Functions for wordlist output | |
| 14170 ''' | |
| 14171 s = JM_EscapeStrFromBuffer(buff) | |
| 14172 litem = ( | |
| 14173 wbbox.x0, | |
| 14174 wbbox.y0, | |
| 14175 wbbox.x1, | |
| 14176 wbbox.y1, | |
| 14177 s, | |
| 14178 block_n, | |
| 14179 line_n, | |
| 14180 word_n, | |
| 14181 ) | |
| 14182 lines.append(litem) | |
| 14183 return word_n + 1, mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY) # word counter | |
| 14184 | |
| 14185 | |
| 14186 def JM_add_layer_config( pdf, name, creator, ON): | |
| 14187 ''' | |
| 14188 Add OC configuration to the PDF catalog | |
| 14189 ''' | |
| 14190 ocp = JM_ensure_ocproperties( pdf) | |
| 14191 configs = mupdf.pdf_dict_get( ocp, PDF_NAME('Configs')) | |
| 14192 if not mupdf.pdf_is_array( configs): | |
| 14193 configs = mupdf.pdf_dict_put_array( ocp, PDF_NAME('Configs'), 1) | |
| 14194 D = mupdf.pdf_new_dict( pdf, 5) | |
| 14195 mupdf.pdf_dict_put_text_string( D, PDF_NAME('Name'), name) | |
| 14196 if creator is not None: | |
| 14197 mupdf.pdf_dict_put_text_string( D, PDF_NAME('Creator'), creator) | |
| 14198 mupdf.pdf_dict_put( D, PDF_NAME('BaseState'), PDF_NAME('OFF')) | |
| 14199 onarray = mupdf.pdf_dict_put_array( D, PDF_NAME('ON'), 5) | |
| 14200 if not ON: | |
| 14201 pass | |
| 14202 else: | |
| 14203 ocgs = mupdf.pdf_dict_get( ocp, PDF_NAME('OCGs')) | |
| 14204 n = len(ON) | |
| 14205 for i in range(n): | |
| 14206 xref = 0 | |
| 14207 e, xref = JM_INT_ITEM(ON, i) | |
| 14208 if e == 1: | |
| 14209 continue | |
| 14210 ind = mupdf.pdf_new_indirect( pdf, xref, 0) | |
| 14211 if mupdf.pdf_array_contains( ocgs, ind): | |
| 14212 mupdf.pdf_array_push( onarray, ind) | |
| 14213 mupdf.pdf_array_push( configs, D) | |
| 14214 | |
| 14215 | |
| 14216 def JM_char_bbox(line, ch): | |
| 14217 ''' | |
| 14218 return rect of char quad | |
| 14219 ''' | |
| 14220 q = JM_char_quad(line, ch) | |
| 14221 r = mupdf.fz_rect_from_quad(q) | |
| 14222 if not line.m_internal.wmode: | |
| 14223 return r | |
| 14224 if r.y1 < r.y0 + ch.m_internal.size: | |
| 14225 r.y0 = r.y1 - ch.m_internal.size | |
| 14226 return r | |
| 14227 | |
| 14228 | |
| 14229 def JM_char_font_flags(font, line, ch): | |
| 14230 flags = 0 | |
| 14231 if line and ch: | |
| 14232 flags += detect_super_script(line, ch) | |
| 14233 flags += mupdf.fz_font_is_italic(font) * TEXT_FONT_ITALIC | |
| 14234 flags += mupdf.fz_font_is_serif(font) * TEXT_FONT_SERIFED | |
| 14235 flags += mupdf.fz_font_is_monospaced(font) * TEXT_FONT_MONOSPACED | |
| 14236 flags += mupdf.fz_font_is_bold(font) * TEXT_FONT_BOLD | |
| 14237 return flags | |
| 14238 | |
| 14239 | |
| 14240 def JM_char_quad(line, ch): | |
| 14241 ''' | |
| 14242 re-compute char quad if ascender/descender values make no sense | |
| 14243 ''' | |
| 14244 if 1 and g_use_extra: | |
| 14245 # This reduces time taken to extract text from PyMuPDF.pdf from 20s to | |
| 14246 # 15s. | |
| 14247 return mupdf.FzQuad(extra.JM_char_quad( line.m_internal, ch.m_internal)) | |
| 14248 | |
| 14249 assert isinstance(line, mupdf.FzStextLine) | |
| 14250 assert isinstance(ch, mupdf.FzStextChar) | |
| 14251 if _globals.skip_quad_corrections: # no special handling | |
| 14252 return ch.quad | |
| 14253 if line.m_internal.wmode: # never touch vertical write mode | |
| 14254 return ch.quad | |
| 14255 font = mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)) | |
| 14256 asc = JM_font_ascender(font) | |
| 14257 dsc = JM_font_descender(font) | |
| 14258 fsize = ch.m_internal.size | |
| 14259 asc_dsc = asc - dsc + FLT_EPSILON | |
| 14260 if asc_dsc >= 1 and _globals.small_glyph_heights == 0: # no problem | |
| 14261 return mupdf.FzQuad(ch.m_internal.quad) | |
| 14262 | |
| 14263 # Re-compute quad with adjusted ascender / descender values: | |
| 14264 # Move ch->origin to (0,0) and de-rotate quad, then adjust the corners, | |
| 14265 # re-rotate and move back to ch->origin location. | |
| 14266 fsize = ch.m_internal.size | |
| 14267 bbox = mupdf.fz_font_bbox(font) | |
| 14268 fwidth = bbox.x1 - bbox.x0 | |
| 14269 if asc < 1e-3: # probably Tesseract glyphless font | |
| 14270 dsc = -0.1 | |
| 14271 asc = 0.9 | |
| 14272 asc_dsc = 1.0 | |
| 14273 | |
| 14274 if _globals.small_glyph_heights or asc_dsc < 1: | |
| 14275 dsc = dsc / asc_dsc | |
| 14276 asc = asc / asc_dsc | |
| 14277 asc_dsc = asc - dsc | |
| 14278 asc = asc * fsize / asc_dsc | |
| 14279 dsc = dsc * fsize / asc_dsc | |
| 14280 | |
| 14281 # Re-compute quad with the adjusted ascender / descender values: | |
| 14282 # Move ch->origin to (0,0) and de-rotate quad, then adjust the corners, | |
| 14283 # re-rotate and move back to ch->origin location. | |
| 14284 c = line.m_internal.dir.x # cosine | |
| 14285 s = line.m_internal.dir.y # sine | |
| 14286 trm1 = mupdf.fz_make_matrix(c, -s, s, c, 0, 0) # derotate | |
| 14287 trm2 = mupdf.fz_make_matrix(c, s, -s, c, 0, 0) # rotate | |
| 14288 if (c == -1): # left-right flip | |
| 14289 trm1.d = 1 | |
| 14290 trm2.d = 1 | |
| 14291 xlate1 = mupdf.fz_make_matrix(1, 0, 0, 1, -ch.m_internal.origin.x, -ch.m_internal.origin.y) | |
| 14292 xlate2 = mupdf.fz_make_matrix(1, 0, 0, 1, ch.m_internal.origin.x, ch.m_internal.origin.y) | |
| 14293 | |
| 14294 quad = mupdf.fz_transform_quad(mupdf.FzQuad(ch.m_internal.quad), xlate1) # move origin to (0,0) | |
| 14295 quad = mupdf.fz_transform_quad(quad, trm1) # de-rotate corners | |
| 14296 | |
| 14297 # adjust vertical coordinates | |
| 14298 if c == 1 and quad.ul.y > 0: # up-down flip | |
| 14299 quad.ul.y = asc | |
| 14300 quad.ur.y = asc | |
| 14301 quad.ll.y = dsc | |
| 14302 quad.lr.y = dsc | |
| 14303 else: | |
| 14304 quad.ul.y = -asc | |
| 14305 quad.ur.y = -asc | |
| 14306 quad.ll.y = -dsc | |
| 14307 quad.lr.y = -dsc | |
| 14308 | |
| 14309 # adjust horizontal coordinates that are too crazy: | |
| 14310 # (1) left x must be >= 0 | |
| 14311 # (2) if bbox width is 0, lookup char advance in font. | |
| 14312 if quad.ll.x < 0: | |
| 14313 quad.ll.x = 0 | |
| 14314 quad.ul.x = 0 | |
| 14315 | |
| 14316 cwidth = quad.lr.x - quad.ll.x | |
| 14317 if cwidth < FLT_EPSILON: | |
| 14318 glyph = mupdf.fz_encode_character( font, ch.m_internal.c) | |
| 14319 if glyph: | |
| 14320 fwidth = mupdf.fz_advance_glyph( font, glyph, line.m_internal.wmode) | |
| 14321 quad.lr.x = quad.ll.x + fwidth * fsize | |
| 14322 quad.ur.x = quad.lr.x | |
| 14323 | |
| 14324 quad = mupdf.fz_transform_quad(quad, trm2) # rotate back | |
| 14325 quad = mupdf.fz_transform_quad(quad, xlate2) # translate back | |
| 14326 return quad | |
| 14327 | |
| 14328 | |
| 14329 def JM_choice_options(annot): | |
| 14330 ''' | |
| 14331 return list of choices for list or combo boxes | |
| 14332 ''' | |
| 14333 annot_obj = mupdf.pdf_annot_obj( annot.this) | |
| 14334 | |
| 14335 opts = mupdf.pdf_choice_widget_options2( annot, 0) | |
| 14336 n = len( opts) | |
| 14337 if n == 0: | |
| 14338 return # wrong widget type | |
| 14339 | |
| 14340 optarr = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Opt')) | |
| 14341 liste = [] | |
| 14342 | |
| 14343 for i in range( n): | |
| 14344 m = mupdf.pdf_array_len( mupdf.pdf_array_get( optarr, i)) | |
| 14345 if m == 2: | |
| 14346 val = ( | |
| 14347 mupdf.pdf_to_text_string( mupdf.pdf_array_get( mupdf.pdf_array_get( optarr, i), 0)), | |
| 14348 mupdf.pdf_to_text_string( mupdf.pdf_array_get( mupdf.pdf_array_get( optarr, i), 1)), | |
| 14349 ) | |
| 14350 liste.append( val) | |
| 14351 else: | |
| 14352 val = mupdf.pdf_to_text_string( mupdf.pdf_array_get( optarr, i)) | |
| 14353 liste.append( val) | |
| 14354 return liste | |
| 14355 | |
| 14356 | |
| 14357 def JM_clear_pixmap_rect_with_value(dest, value, b): | |
| 14358 ''' | |
| 14359 Clear a pixmap rectangle - my version also supports non-alpha pixmaps | |
| 14360 ''' | |
| 14361 b = mupdf.fz_intersect_irect(b, mupdf.fz_pixmap_bbox(dest)) | |
| 14362 w = b.x1 - b.x0 | |
| 14363 y = b.y1 - b.y0 | |
| 14364 if w <= 0 or y <= 0: | |
| 14365 return 0 | |
| 14366 | |
| 14367 destspan = dest.stride() | |
| 14368 destp = destspan * (b.y0 - dest.y()) + dest.n() * (b.x0 - dest.x()) | |
| 14369 | |
| 14370 # CMYK needs special handling (and potentially any other subtractive colorspaces) | |
| 14371 if mupdf.fz_colorspace_n(dest.colorspace()) == 4: | |
| 14372 value = 255 - value | |
| 14373 while 1: | |
| 14374 s = destp | |
| 14375 for x in range(0, w): | |
| 14376 mupdf.fz_samples_set(dest, s, 0) | |
| 14377 s += 1 | |
| 14378 mupdf.fz_samples_set(dest, s, 0) | |
| 14379 s += 1 | |
| 14380 mupdf.fz_samples_set(dest, s, 0) | |
| 14381 s += 1 | |
| 14382 mupdf.fz_samples_set(dest, s, value) | |
| 14383 s += 1 | |
| 14384 if dest.alpha(): | |
| 14385 mupdf.fz_samples_set(dest, s, 255) | |
| 14386 s += 1 | |
| 14387 destp += destspan | |
| 14388 if y == 0: | |
| 14389 break | |
| 14390 y -= 1 | |
| 14391 return 1 | |
| 14392 | |
| 14393 while 1: | |
| 14394 s = destp | |
| 14395 for x in range(w): | |
| 14396 for k in range(dest.n()-1): | |
| 14397 mupdf.fz_samples_set(dest, s, value) | |
| 14398 s += 1 | |
| 14399 if dest.alpha(): | |
| 14400 mupdf.fz_samples_set(dest, s, 255) | |
| 14401 s += 1 | |
| 14402 else: | |
| 14403 mupdf.fz_samples_set(dest, s, value) | |
| 14404 s += 1 | |
| 14405 destp += destspan | |
| 14406 if y == 0: | |
| 14407 break | |
| 14408 y -= 1 | |
| 14409 return 1 | |
| 14410 | |
| 14411 | |
| 14412 def JM_color_FromSequence(color): | |
| 14413 | |
| 14414 if isinstance(color, (int, float)): # maybe just a single float | |
| 14415 color = [color] | |
| 14416 | |
| 14417 if not isinstance( color, (list, tuple)): | |
| 14418 return -1, [] | |
| 14419 | |
| 14420 if len(color) not in (0, 1, 3, 4): | |
| 14421 return -1, [] | |
| 14422 | |
| 14423 ret = color[:] | |
| 14424 for i in range(len(ret)): | |
| 14425 if ret[i] < 0 or ret[i] > 1: | |
| 14426 ret[i] = 1 | |
| 14427 return len(ret), ret | |
| 14428 | |
| 14429 | |
| 14430 def JM_color_count( pm, clip): | |
| 14431 if g_use_extra: | |
| 14432 return extra.ll_JM_color_count(pm.m_internal, clip) | |
| 14433 | |
| 14434 rc = dict() | |
| 14435 cnt = 0 | |
| 14436 irect = mupdf.fz_pixmap_bbox( pm) | |
| 14437 irect = mupdf.fz_intersect_irect(irect, mupdf.fz_round_rect(JM_rect_from_py(clip))) | |
| 14438 stride = pm.stride() | |
| 14439 width = irect.x1 - irect.x0 | |
| 14440 height = irect.y1 - irect.y0 | |
| 14441 n = pm.n() | |
| 14442 substride = width * n | |
| 14443 s = stride * (irect.y0 - pm.y()) + (irect.x0 - pm.x()) * n | |
| 14444 oldpix = _read_samples( pm, s, n) | |
| 14445 cnt = 0 | |
| 14446 if mupdf.fz_is_empty_irect(irect): | |
| 14447 return rc | |
| 14448 for i in range( height): | |
| 14449 for j in range( 0, substride, n): | |
| 14450 newpix = _read_samples( pm, s + j, n) | |
| 14451 if newpix != oldpix: | |
| 14452 pixel = oldpix | |
| 14453 c = rc.get( pixel, None) | |
| 14454 if c is not None: | |
| 14455 cnt += c | |
| 14456 rc[ pixel] = cnt | |
| 14457 cnt = 1 | |
| 14458 oldpix = newpix | |
| 14459 else: | |
| 14460 cnt += 1 | |
| 14461 s += stride | |
| 14462 pixel = oldpix | |
| 14463 c = rc.get( pixel) | |
| 14464 if c is not None: | |
| 14465 cnt += c | |
| 14466 rc[ pixel] = cnt | |
| 14467 return rc | |
| 14468 | |
| 14469 | |
| 14470 def JM_compress_buffer(inbuffer): | |
| 14471 ''' | |
| 14472 compress char* into a new buffer | |
| 14473 ''' | |
| 14474 data, compressed_length = mupdf.fz_new_deflated_data_from_buffer( | |
| 14475 inbuffer, | |
| 14476 mupdf.FZ_DEFLATE_BEST, | |
| 14477 ) | |
| 14478 #log( '{=data compressed_length}') | |
| 14479 if not data or compressed_length == 0: | |
| 14480 return None | |
| 14481 buf = mupdf.FzBuffer(mupdf.fz_new_buffer_from_data(data, compressed_length)) | |
| 14482 mupdf.fz_resize_buffer(buf, compressed_length) | |
| 14483 return buf | |
| 14484 | |
| 14485 | |
| 14486 def JM_copy_rectangle(page, area): | |
| 14487 need_new_line = 0 | |
| 14488 buffer = io.StringIO() | |
| 14489 for block in page: | |
| 14490 if block.m_internal.type != mupdf.FZ_STEXT_BLOCK_TEXT: | |
| 14491 continue | |
| 14492 for line in block: | |
| 14493 line_had_text = 0 | |
| 14494 for ch in line: | |
| 14495 r = JM_char_bbox(line, ch) | |
| 14496 if JM_rects_overlap(area, r): | |
| 14497 line_had_text = 1 | |
| 14498 if need_new_line: | |
| 14499 buffer.write("\n") | |
| 14500 need_new_line = 0 | |
| 14501 buffer.write(make_escape(ch.m_internal.c)) | |
| 14502 if line_had_text: | |
| 14503 need_new_line = 1 | |
| 14504 | |
| 14505 s = buffer.getvalue() # take over the data | |
| 14506 return s | |
| 14507 | |
| 14508 | |
| 14509 def JM_convert_to_pdf(doc, fp, tp, rotate): | |
| 14510 ''' | |
| 14511 Convert any MuPDF document to a PDF | |
| 14512 Returns bytes object containing the PDF, created via 'write' function. | |
| 14513 ''' | |
| 14514 pdfout = mupdf.PdfDocument() | |
| 14515 incr = 1 | |
| 14516 s = fp | |
| 14517 e = tp | |
| 14518 if fp > tp: | |
| 14519 incr = -1 # count backwards | |
| 14520 s = tp # adjust ... | |
| 14521 e = fp # ... range | |
| 14522 rot = JM_norm_rotation(rotate) | |
| 14523 i = fp | |
| 14524 while 1: # interpret & write document pages as PDF pages | |
| 14525 if not _INRANGE(i, s, e): | |
| 14526 break | |
| 14527 page = mupdf.fz_load_page(doc, i) | |
| 14528 mediabox = mupdf.fz_bound_page(page) | |
| 14529 dev, resources, contents = mupdf.pdf_page_write(pdfout, mediabox) | |
| 14530 mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie()) | |
| 14531 mupdf.fz_close_device(dev) | |
| 14532 dev = None | |
| 14533 page_obj = mupdf.pdf_add_page(pdfout, mediabox, rot, resources, contents) | |
| 14534 mupdf.pdf_insert_page(pdfout, -1, page_obj) | |
| 14535 i += incr | |
| 14536 # PDF created - now write it to Python bytearray | |
| 14537 # prepare write options structure | |
| 14538 opts = mupdf.PdfWriteOptions() | |
| 14539 opts.do_garbage = 4 | |
| 14540 opts.do_compress = 1 | |
| 14541 opts.do_compress_images = 1 | |
| 14542 opts.do_compress_fonts = 1 | |
| 14543 opts.do_sanitize = 1 | |
| 14544 opts.do_incremental = 0 | |
| 14545 opts.do_ascii = 0 | |
| 14546 opts.do_decompress = 0 | |
| 14547 opts.do_linear = 0 | |
| 14548 opts.do_clean = 1 | |
| 14549 opts.do_pretty = 0 | |
| 14550 | |
| 14551 res = mupdf.fz_new_buffer(8192) | |
| 14552 out = mupdf.FzOutput(res) | |
| 14553 mupdf.pdf_write_document(pdfout, out, opts) | |
| 14554 out.fz_close_output() | |
| 14555 c = mupdf.fz_buffer_extract_copy(res) | |
| 14556 assert isinstance(c, bytes) | |
| 14557 return c | |
| 14558 | |
| 14559 | |
| 14560 # Copied from MuPDF v1.14 | |
| 14561 # Create widget | |
| 14562 def JM_create_widget(doc, page, type, fieldname): | |
| 14563 old_sigflags = mupdf.pdf_to_int(mupdf.pdf_dict_getp(mupdf.pdf_trailer(doc), "Root/AcroForm/SigFlags")) | |
| 14564 #log( '*** JM_create_widget()') | |
| 14565 #log( f'{mupdf.pdf_create_annot_raw=}') | |
| 14566 #log( f'{page=}') | |
| 14567 #log( f'{mupdf.PDF_ANNOT_WIDGET=}') | |
| 14568 annot = mupdf.pdf_create_annot_raw(page, mupdf.PDF_ANNOT_WIDGET) | |
| 14569 annot_obj = mupdf.pdf_annot_obj(annot) | |
| 14570 try: | |
| 14571 JM_set_field_type(doc, annot_obj, type) | |
| 14572 mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('T'), fieldname) | |
| 14573 | |
| 14574 if type == mupdf.PDF_WIDGET_TYPE_SIGNATURE: | |
| 14575 sigflags = old_sigflags | (SigFlag_SignaturesExist | SigFlag_AppendOnly) | |
| 14576 mupdf.pdf_dict_putl( | |
| 14577 mupdf.pdf_trailer(doc), | |
| 14578 mupdf.pdf_new_int(sigflags), | |
| 14579 PDF_NAME('Root'), | |
| 14580 PDF_NAME('AcroForm'), | |
| 14581 PDF_NAME('SigFlags'), | |
| 14582 ) | |
| 14583 # pdf_create_annot will have linked the new widget into the page's | |
| 14584 # annot array. We also need it linked into the document's form | |
| 14585 form = mupdf.pdf_dict_getp(mupdf.pdf_trailer(doc), "Root/AcroForm/Fields") | |
| 14586 if not form.m_internal: | |
| 14587 form = mupdf.pdf_new_array(doc, 1) | |
| 14588 mupdf.pdf_dict_putl( | |
| 14589 mupdf.pdf_trailer(doc), | |
| 14590 form, | |
| 14591 PDF_NAME('Root'), | |
| 14592 PDF_NAME('AcroForm'), | |
| 14593 PDF_NAME('Fields'), | |
| 14594 ) | |
| 14595 mupdf.pdf_array_push(form, annot_obj) # Cleanup relies on this statement being last | |
| 14596 except Exception: | |
| 14597 if g_exceptions_verbose: exception_info() | |
| 14598 mupdf.pdf_delete_annot(page, annot) | |
| 14599 | |
| 14600 if type == mupdf.PDF_WIDGET_TYPE_SIGNATURE: | |
| 14601 mupdf.pdf_dict_putl( | |
| 14602 mupdf.pdf_trailer(doc), | |
| 14603 mupdf.pdf_new_int(old_sigflags), | |
| 14604 PDF_NAME('Root'), | |
| 14605 PDF_NAME('AcroForm'), | |
| 14606 PDF_NAME('SigFlags'), | |
| 14607 ) | |
| 14608 raise | |
| 14609 return annot | |
| 14610 | |
| 14611 | |
| 14612 def JM_cropbox(page_obj): | |
| 14613 ''' | |
| 14614 return a PDF page's CropBox | |
| 14615 ''' | |
| 14616 if g_use_extra: | |
| 14617 return extra.JM_cropbox(page_obj) | |
| 14618 | |
| 14619 mediabox = JM_mediabox(page_obj) | |
| 14620 cropbox = mupdf.pdf_to_rect( | |
| 14621 mupdf.pdf_dict_get_inheritable(page_obj, PDF_NAME('CropBox')) | |
| 14622 ) | |
| 14623 if mupdf.fz_is_infinite_rect(cropbox) or mupdf.fz_is_empty_rect(cropbox): | |
| 14624 cropbox = mediabox | |
| 14625 y0 = mediabox.y1 - cropbox.y1 | |
| 14626 y1 = mediabox.y1 - cropbox.y0 | |
| 14627 cropbox.y0 = y0 | |
| 14628 cropbox.y1 = y1 | |
| 14629 return cropbox | |
| 14630 | |
| 14631 | |
| 14632 def JM_cropbox_size(page_obj): | |
| 14633 rect = JM_cropbox(page_obj) | |
| 14634 w = abs(rect.x1 - rect.x0) | |
| 14635 h = abs(rect.y1 - rect.y0) | |
| 14636 size = mupdf.fz_make_point(w, h) | |
| 14637 return size | |
| 14638 | |
| 14639 | |
| 14640 def JM_derotate_page_matrix(page): | |
| 14641 ''' | |
| 14642 just the inverse of rotation | |
| 14643 ''' | |
| 14644 mp = JM_rotate_page_matrix(page) | |
| 14645 return mupdf.fz_invert_matrix(mp) | |
| 14646 | |
| 14647 | |
| 14648 def JM_embed_file( | |
| 14649 pdf, | |
| 14650 buf, | |
| 14651 filename, | |
| 14652 ufilename, | |
| 14653 desc, | |
| 14654 compress, | |
| 14655 ): | |
| 14656 ''' | |
| 14657 embed a new file in a PDF (not only /EmbeddedFiles entries) | |
| 14658 ''' | |
| 14659 len_ = 0 | |
| 14660 val = mupdf.pdf_new_dict(pdf, 6) | |
| 14661 mupdf.pdf_dict_put_dict(val, PDF_NAME('CI'), 4) | |
| 14662 ef = mupdf.pdf_dict_put_dict(val, PDF_NAME('EF'), 4) | |
| 14663 mupdf.pdf_dict_put_text_string(val, PDF_NAME('F'), filename) | |
| 14664 mupdf.pdf_dict_put_text_string(val, PDF_NAME('UF'), ufilename) | |
| 14665 mupdf.pdf_dict_put_text_string(val, PDF_NAME('Desc'), desc) | |
| 14666 mupdf.pdf_dict_put(val, PDF_NAME('Type'), PDF_NAME('Filespec')) | |
| 14667 bs = b' ' | |
| 14668 f = mupdf.pdf_add_stream( | |
| 14669 pdf, | |
| 14670 #mupdf.fz_fz_new_buffer_from_copied_data(bs), | |
| 14671 mupdf.fz_new_buffer_from_copied_data(bs), | |
| 14672 mupdf.PdfObj(), | |
| 14673 0, | |
| 14674 ) | |
| 14675 mupdf.pdf_dict_put(ef, PDF_NAME('F'), f) | |
| 14676 JM_update_stream(pdf, f, buf, compress) | |
| 14677 len_, _ = mupdf.fz_buffer_storage(buf) | |
| 14678 mupdf.pdf_dict_put_int(f, PDF_NAME('DL'), len_) | |
| 14679 mupdf.pdf_dict_put_int(f, PDF_NAME('Length'), len_) | |
| 14680 params = mupdf.pdf_dict_put_dict(f, PDF_NAME('Params'), 4) | |
| 14681 mupdf.pdf_dict_put_int(params, PDF_NAME('Size'), len_) | |
| 14682 return val | |
| 14683 | |
| 14684 | |
| 14685 def JM_embedded_clean(pdf): | |
| 14686 ''' | |
| 14687 perform some cleaning if we have /EmbeddedFiles: | |
| 14688 (1) remove any /Limits if /Names exists | |
| 14689 (2) remove any empty /Collection | |
| 14690 (3) set /PageMode/UseAttachments | |
| 14691 ''' | |
| 14692 root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root')) | |
| 14693 | |
| 14694 # remove any empty /Collection entry | |
| 14695 coll = mupdf.pdf_dict_get(root, PDF_NAME('Collection')) | |
| 14696 if coll.m_internal and mupdf.pdf_dict_len(coll) == 0: | |
| 14697 mupdf.pdf_dict_del(root, PDF_NAME('Collection')) | |
| 14698 | |
| 14699 efiles = mupdf.pdf_dict_getl( | |
| 14700 root, | |
| 14701 PDF_NAME('Names'), | |
| 14702 PDF_NAME('EmbeddedFiles'), | |
| 14703 PDF_NAME('Names'), | |
| 14704 ) | |
| 14705 if efiles.m_internal: | |
| 14706 mupdf.pdf_dict_put_name(root, PDF_NAME('PageMode'), "UseAttachments") | |
| 14707 | |
| 14708 | |
| 14709 def JM_EscapeStrFromBuffer(buff): | |
| 14710 if not buff.m_internal: | |
| 14711 return '' | |
| 14712 s = mupdf.fz_buffer_extract_copy(buff) | |
| 14713 val = PyUnicode_DecodeRawUnicodeEscape(s, errors='replace') | |
| 14714 return val | |
| 14715 | |
| 14716 | |
| 14717 def JM_ensure_identity(pdf): | |
| 14718 ''' | |
| 14719 Store ID in PDF trailer | |
| 14720 ''' | |
| 14721 id_ = mupdf.pdf_dict_get( mupdf.pdf_trailer(pdf), PDF_NAME('ID')) | |
| 14722 if not id_.m_internal: | |
| 14723 rnd0 = mupdf.fz_memrnd2(16) | |
| 14724 # Need to convert raw bytes into a str to send to | |
| 14725 # mupdf.pdf_new_string(). chr() seems to work for this. | |
| 14726 rnd = '' | |
| 14727 for i in rnd0: | |
| 14728 rnd += chr(i) | |
| 14729 id_ = mupdf.pdf_dict_put_array( mupdf.pdf_trailer( pdf), PDF_NAME('ID'), 2) | |
| 14730 mupdf.pdf_array_push( id_, mupdf.pdf_new_string( rnd, len(rnd))) | |
| 14731 mupdf.pdf_array_push( id_, mupdf.pdf_new_string( rnd, len(rnd))) | |
| 14732 | |
| 14733 def JM_ensure_ocproperties(pdf): | |
| 14734 ''' | |
| 14735 Ensure OCProperties, return /OCProperties key | |
| 14736 ''' | |
| 14737 ocp = mupdf.pdf_dict_get(mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root')), PDF_NAME('OCProperties')) | |
| 14738 if ocp.m_internal: | |
| 14739 return ocp | |
| 14740 root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root')) | |
| 14741 ocp = mupdf.pdf_dict_put_dict(root, PDF_NAME('OCProperties'), 2) | |
| 14742 mupdf.pdf_dict_put_array(ocp, PDF_NAME('OCGs'), 0) | |
| 14743 D = mupdf.pdf_dict_put_dict(ocp, PDF_NAME('D'), 5) | |
| 14744 mupdf.pdf_dict_put_array(D, PDF_NAME('ON'), 0) | |
| 14745 mupdf.pdf_dict_put_array(D, PDF_NAME('OFF'), 0) | |
| 14746 mupdf.pdf_dict_put_array(D, PDF_NAME('Order'), 0) | |
| 14747 mupdf.pdf_dict_put_array(D, PDF_NAME('RBGroups'), 0) | |
| 14748 return ocp | |
| 14749 | |
| 14750 | |
| 14751 def JM_expand_fname(name): | |
| 14752 ''' | |
| 14753 Make /DA string of annotation | |
| 14754 ''' | |
| 14755 if not name: return "Helv" | |
| 14756 if name.startswith("Co"): return "Cour" | |
| 14757 if name.startswith("co"): return "Cour" | |
| 14758 if name.startswith("Ti"): return "TiRo" | |
| 14759 if name.startswith("ti"): return "TiRo" | |
| 14760 if name.startswith("Sy"): return "Symb" | |
| 14761 if name.startswith("sy"): return "Symb" | |
| 14762 if name.startswith("Za"): return "ZaDb" | |
| 14763 if name.startswith("za"): return "ZaDb" | |
| 14764 return "Helv" | |
| 14765 | |
| 14766 | |
| 14767 def JM_field_type_text(wtype): | |
| 14768 ''' | |
| 14769 String from widget type | |
| 14770 ''' | |
| 14771 if wtype == mupdf.PDF_WIDGET_TYPE_BUTTON: | |
| 14772 return "Button" | |
| 14773 if wtype == mupdf.PDF_WIDGET_TYPE_CHECKBOX: | |
| 14774 return "CheckBox" | |
| 14775 if wtype == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON: | |
| 14776 return "RadioButton" | |
| 14777 if wtype == mupdf.PDF_WIDGET_TYPE_TEXT: | |
| 14778 return "Text" | |
| 14779 if wtype == mupdf.PDF_WIDGET_TYPE_LISTBOX: | |
| 14780 return "ListBox" | |
| 14781 if wtype == mupdf.PDF_WIDGET_TYPE_COMBOBOX: | |
| 14782 return "ComboBox" | |
| 14783 if wtype == mupdf.PDF_WIDGET_TYPE_SIGNATURE: | |
| 14784 return "Signature" | |
| 14785 return "unknown" | |
| 14786 | |
| 14787 | |
| 14788 def JM_fill_pixmap_rect_with_color(dest, col, b): | |
| 14789 assert isinstance(dest, mupdf.FzPixmap) | |
| 14790 # fill a rect with a color tuple | |
| 14791 b = mupdf.fz_intersect_irect(b, mupdf.fz_pixmap_bbox( dest)) | |
| 14792 w = b.x1 - b.x0 | |
| 14793 y = b.y1 - b.y0 | |
| 14794 if w <= 0 or y <= 0: | |
| 14795 return 0 | |
| 14796 destspan = dest.stride() | |
| 14797 destp = destspan * (b.y0 - dest.y()) + dest.n() * (b.x0 - dest.x()) | |
| 14798 while 1: | |
| 14799 s = destp | |
| 14800 for x in range(w): | |
| 14801 for i in range( dest.n()): | |
| 14802 mupdf.fz_samples_set(dest, s, col[i]) | |
| 14803 s += 1 | |
| 14804 destp += destspan | |
| 14805 y -= 1 | |
| 14806 if y == 0: | |
| 14807 break | |
| 14808 return 1 | |
| 14809 | |
| 14810 | |
| 14811 def JM_find_annot_irt(annot): | |
| 14812 ''' | |
| 14813 Return the first annotation whose /IRT key ("In Response To") points to | |
| 14814 annot. Used to remove the response chain of a given annotation. | |
| 14815 ''' | |
| 14816 assert isinstance(annot, mupdf.PdfAnnot) | |
| 14817 irt_annot = None # returning this | |
| 14818 annot_obj = mupdf.pdf_annot_obj(annot) | |
| 14819 found = 0 | |
| 14820 # loop thru MuPDF's internal annots array | |
| 14821 page = _pdf_annot_page(annot) | |
| 14822 irt_annot = mupdf.pdf_first_annot(page) | |
| 14823 while 1: | |
| 14824 assert isinstance(irt_annot, mupdf.PdfAnnot) | |
| 14825 if not irt_annot.m_internal: | |
| 14826 break | |
| 14827 irt_annot_obj = mupdf.pdf_annot_obj(irt_annot) | |
| 14828 o = mupdf.pdf_dict_gets(irt_annot_obj, 'IRT') | |
| 14829 if o.m_internal: | |
| 14830 if not mupdf.pdf_objcmp(o, annot_obj): | |
| 14831 found = 1 | |
| 14832 break | |
| 14833 irt_annot = mupdf.pdf_next_annot(irt_annot) | |
| 14834 if found: | |
| 14835 return irt_annot | |
| 14836 | |
| 14837 | |
| 14838 def JM_font_ascender(font): | |
| 14839 ''' | |
| 14840 need own versions of ascender / descender | |
| 14841 ''' | |
| 14842 assert isinstance(font, mupdf.FzFont) | |
| 14843 if _globals.skip_quad_corrections: | |
| 14844 return 0.8 | |
| 14845 return mupdf.fz_font_ascender(font) | |
| 14846 | |
| 14847 | |
| 14848 def JM_font_descender(font): | |
| 14849 ''' | |
| 14850 need own versions of ascender / descender | |
| 14851 ''' | |
| 14852 assert isinstance(font, mupdf.FzFont) | |
| 14853 if _globals.skip_quad_corrections: | |
| 14854 return -0.2 | |
| 14855 ret = mupdf.fz_font_descender(font) | |
| 14856 return ret | |
| 14857 | |
| 14858 | |
| 14859 def JM_is_word_delimiter(ch, delimiters): | |
| 14860 """Check if ch is an extra word delimiting character. | |
| 14861 """ | |
| 14862 if (0 | |
| 14863 or ch <= 32 | |
| 14864 or ch == 160 | |
| 14865 or 0x202a <= ch <= 0x202e | |
| 14866 ): | |
| 14867 # covers any whitespace plus unicodes that switch between | |
| 14868 # right-to-left and left-to-right languages | |
| 14869 return True | |
| 14870 if not delimiters: # no extra delimiters provided | |
| 14871 return False | |
| 14872 char = chr(ch) | |
| 14873 for d in delimiters: | |
| 14874 if d == char: | |
| 14875 return True | |
| 14876 return False | |
| 14877 | |
| 14878 | |
| 14879 def JM_is_rtl_char(ch): | |
| 14880 if ch < 0x590 or ch > 0x900: | |
| 14881 return False | |
| 14882 return True | |
| 14883 | |
| 14884 | |
| 14885 def JM_font_name(font): | |
| 14886 assert isinstance(font, mupdf.FzFont) | |
| 14887 name = mupdf.fz_font_name(font) | |
| 14888 s = name.find('+') | |
| 14889 if _globals.subset_fontnames or s == -1 or s != 6: | |
| 14890 return name | |
| 14891 return name[s + 1:] | |
| 14892 | |
| 14893 | |
| 14894 def JM_gather_fonts(pdf, dict_, fontlist, stream_xref): | |
| 14895 rc = 1 | |
| 14896 n = mupdf.pdf_dict_len(dict_) | |
| 14897 for i in range(n): | |
| 14898 | |
| 14899 refname = mupdf.pdf_dict_get_key(dict_, i) | |
| 14900 fontdict = mupdf.pdf_dict_get_val(dict_, i) | |
| 14901 if not mupdf.pdf_is_dict(fontdict): | |
| 14902 mupdf.fz_warn( f"'{mupdf.pdf_to_name(refname)}' is no font dict ({mupdf.pdf_to_num(fontdict)} 0 R)") | |
| 14903 continue | |
| 14904 | |
| 14905 subtype = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_Subtype) | |
| 14906 basefont = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_BaseFont) | |
| 14907 if not basefont.m_internal or mupdf.pdf_is_null(basefont): | |
| 14908 name = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_Name) | |
| 14909 else: | |
| 14910 name = basefont | |
| 14911 encoding = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_Encoding) | |
| 14912 if mupdf.pdf_is_dict(encoding): | |
| 14913 encoding = mupdf.pdf_dict_get(encoding, mupdf.PDF_ENUM_NAME_BaseEncoding) | |
| 14914 xref = mupdf.pdf_to_num(fontdict) | |
| 14915 ext = "n/a" | |
| 14916 if xref: | |
| 14917 ext = JM_get_fontextension(pdf, xref) | |
| 14918 entry = ( | |
| 14919 xref, | |
| 14920 ext, | |
| 14921 mupdf.pdf_to_name(subtype), | |
| 14922 JM_EscapeStrFromStr(mupdf.pdf_to_name(name)), | |
| 14923 mupdf.pdf_to_name(refname), | |
| 14924 mupdf.pdf_to_name(encoding), | |
| 14925 stream_xref, | |
| 14926 ) | |
| 14927 fontlist.append(entry) | |
| 14928 return rc | |
| 14929 | |
| 14930 | |
| 14931 def JM_gather_forms(doc, dict_: mupdf.PdfObj, imagelist, stream_xref: int): | |
| 14932 ''' | |
| 14933 Store info of a /Form xobject in Python list | |
| 14934 ''' | |
| 14935 assert isinstance(doc, mupdf.PdfDocument) | |
| 14936 rc = 1 | |
| 14937 n = mupdf.pdf_dict_len(dict_) | |
| 14938 for i in range(n): | |
| 14939 refname = mupdf.pdf_dict_get_key( dict_, i) | |
| 14940 imagedict = mupdf.pdf_dict_get_val(dict_, i) | |
| 14941 if not mupdf.pdf_is_dict(imagedict): | |
| 14942 mupdf.fz_warn( f"'{mupdf.pdf_to_name(refname)}' is no form dict ({mupdf.pdf_to_num(imagedict)} 0 R)") | |
| 14943 continue | |
| 14944 | |
| 14945 type_ = mupdf.pdf_dict_get(imagedict, PDF_NAME('Subtype')) | |
| 14946 if not mupdf.pdf_name_eq(type_, PDF_NAME('Form')): | |
| 14947 continue | |
| 14948 | |
| 14949 o = mupdf.pdf_dict_get(imagedict, PDF_NAME('BBox')) | |
| 14950 m = mupdf.pdf_dict_get(imagedict, PDF_NAME('Matrix')) | |
| 14951 if m.m_internal: | |
| 14952 mat = mupdf.pdf_to_matrix(m) | |
| 14953 else: | |
| 14954 mat = mupdf.FzMatrix() | |
| 14955 if o.m_internal: | |
| 14956 bbox = mupdf.fz_transform_rect( mupdf.pdf_to_rect(o), mat) | |
| 14957 else: | |
| 14958 bbox = mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE) | |
| 14959 xref = mupdf.pdf_to_num(imagedict) | |
| 14960 | |
| 14961 entry = ( | |
| 14962 xref, | |
| 14963 mupdf.pdf_to_name( refname), | |
| 14964 stream_xref, | |
| 14965 JM_py_from_rect(bbox), | |
| 14966 ) | |
| 14967 imagelist.append(entry) | |
| 14968 return rc | |
| 14969 | |
| 14970 | |
| 14971 def JM_gather_images(doc: mupdf.PdfDocument, dict_: mupdf.PdfObj, imagelist, stream_xref: int): | |
| 14972 ''' | |
| 14973 Store info of an image in Python list | |
| 14974 ''' | |
| 14975 rc = 1 | |
| 14976 n = mupdf.pdf_dict_len( dict_) | |
| 14977 for i in range(n): | |
| 14978 refname = mupdf.pdf_dict_get_key(dict_, i) | |
| 14979 imagedict = mupdf.pdf_dict_get_val(dict_, i) | |
| 14980 if not mupdf.pdf_is_dict(imagedict): | |
| 14981 mupdf.fz_warn(f"'{mupdf.pdf_to_name(refname)}' is no image dict ({mupdf.pdf_to_num(imagedict)} 0 R)") | |
| 14982 continue | |
| 14983 | |
| 14984 type_ = mupdf.pdf_dict_get(imagedict, PDF_NAME('Subtype')) | |
| 14985 if not mupdf.pdf_name_eq(type_, PDF_NAME('Image')): | |
| 14986 continue | |
| 14987 | |
| 14988 xref = mupdf.pdf_to_num(imagedict) | |
| 14989 gen = 0 | |
| 14990 smask = mupdf.pdf_dict_geta(imagedict, PDF_NAME('SMask'), PDF_NAME('Mask')) | |
| 14991 if smask.m_internal: | |
| 14992 gen = mupdf.pdf_to_num(smask) | |
| 14993 | |
| 14994 filter_ = mupdf.pdf_dict_geta(imagedict, PDF_NAME('Filter'), PDF_NAME('F')) | |
| 14995 if mupdf.pdf_is_array(filter_): | |
| 14996 filter_ = mupdf.pdf_array_get(filter_, 0) | |
| 14997 | |
| 14998 altcs = mupdf.PdfObj(0) | |
| 14999 cs = mupdf.pdf_dict_geta(imagedict, PDF_NAME('ColorSpace'), PDF_NAME('CS')) | |
| 15000 if mupdf.pdf_is_array(cs): | |
| 15001 cses = cs | |
| 15002 cs = mupdf.pdf_array_get(cses, 0) | |
| 15003 if (mupdf.pdf_name_eq(cs, PDF_NAME('DeviceN')) | |
| 15004 or mupdf.pdf_name_eq(cs, PDF_NAME('Separation')) | |
| 15005 ): | |
| 15006 altcs = mupdf.pdf_array_get(cses, 2) | |
| 15007 if mupdf.pdf_is_array(altcs): | |
| 15008 altcs = mupdf.pdf_array_get(altcs, 0) | |
| 15009 width = mupdf.pdf_dict_geta(imagedict, PDF_NAME('Width'), PDF_NAME('W')) | |
| 15010 height = mupdf.pdf_dict_geta(imagedict, PDF_NAME('Height'), PDF_NAME('H')) | |
| 15011 bpc = mupdf.pdf_dict_geta(imagedict, PDF_NAME('BitsPerComponent'), PDF_NAME('BPC')) | |
| 15012 | |
| 15013 entry = ( | |
| 15014 xref, | |
| 15015 gen, | |
| 15016 mupdf.pdf_to_int(width), | |
| 15017 mupdf.pdf_to_int(height), | |
| 15018 mupdf.pdf_to_int(bpc), | |
| 15019 JM_EscapeStrFromStr(mupdf.pdf_to_name(cs)), | |
| 15020 JM_EscapeStrFromStr(mupdf.pdf_to_name(altcs)), | |
| 15021 JM_EscapeStrFromStr(mupdf.pdf_to_name(refname)), | |
| 15022 JM_EscapeStrFromStr(mupdf.pdf_to_name(filter_)), | |
| 15023 stream_xref, | |
| 15024 ) | |
| 15025 imagelist.append(entry) | |
| 15026 return rc | |
| 15027 | |
| 15028 | |
| 15029 def JM_get_annot_by_xref(page, xref): | |
| 15030 ''' | |
| 15031 retrieve annot by its xref | |
| 15032 ''' | |
| 15033 assert isinstance(page, mupdf.PdfPage) | |
| 15034 found = 0 | |
| 15035 # loop thru MuPDF's internal annots array | |
| 15036 annot = mupdf.pdf_first_annot(page) | |
| 15037 while 1: | |
| 15038 if not annot.m_internal: | |
| 15039 break | |
| 15040 if xref == mupdf.pdf_to_num(mupdf.pdf_annot_obj(annot)): | |
| 15041 found = 1 | |
| 15042 break | |
| 15043 annot = mupdf.pdf_next_annot( annot) | |
| 15044 if not found: | |
| 15045 raise Exception("xref %d is not an annot of this page" % xref) | |
| 15046 return annot | |
| 15047 | |
| 15048 | |
| 15049 def JM_get_annot_by_name(page, name): | |
| 15050 ''' | |
| 15051 retrieve annot by name (/NM key) | |
| 15052 ''' | |
| 15053 assert isinstance(page, mupdf.PdfPage) | |
| 15054 if not name: | |
| 15055 return | |
| 15056 found = 0 | |
| 15057 # loop thru MuPDF's internal annots and widget arrays | |
| 15058 annot = mupdf.pdf_first_annot(page) | |
| 15059 while 1: | |
| 15060 if not annot.m_internal: | |
| 15061 break | |
| 15062 | |
| 15063 response, len_ = mupdf.pdf_to_string(mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "NM")) | |
| 15064 if name == response: | |
| 15065 found = 1 | |
| 15066 break | |
| 15067 annot = mupdf.pdf_next_annot(annot) | |
| 15068 if not found: | |
| 15069 raise Exception("'%s' is not an annot of this page" % name) | |
| 15070 return annot | |
| 15071 | |
| 15072 | |
| 15073 def JM_get_annot_id_list(page): | |
| 15074 names = [] | |
| 15075 annots = mupdf.pdf_dict_get( page.obj(), mupdf.PDF_ENUM_NAME_Annots) | |
| 15076 if not annots.m_internal: | |
| 15077 return names | |
| 15078 for i in range( mupdf.pdf_array_len(annots)): | |
| 15079 annot_obj = mupdf.pdf_array_get(annots, i) | |
| 15080 name = mupdf.pdf_dict_gets(annot_obj, "NM") | |
| 15081 if name.m_internal: | |
| 15082 names.append( | |
| 15083 mupdf.pdf_to_text_string(name) | |
| 15084 ) | |
| 15085 return names | |
| 15086 | |
| 15087 def JM_get_annot_xref_list( page_obj): | |
| 15088 ''' | |
| 15089 return the xrefs and /NM ids of a page's annots, links and fields | |
| 15090 ''' | |
| 15091 if g_use_extra: | |
| 15092 names = extra.JM_get_annot_xref_list( page_obj) | |
| 15093 return names | |
| 15094 | |
| 15095 names = [] | |
| 15096 annots = mupdf.pdf_dict_get( page_obj, PDF_NAME('Annots')) | |
| 15097 n = mupdf.pdf_array_len( annots) | |
| 15098 for i in range( n): | |
| 15099 annot_obj = mupdf.pdf_array_get( annots, i) | |
| 15100 xref = mupdf.pdf_to_num( annot_obj) | |
| 15101 subtype = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Subtype')) | |
| 15102 if not subtype.m_internal: | |
| 15103 continue # subtype is required | |
| 15104 type_ = mupdf.pdf_annot_type_from_string( mupdf.pdf_to_name( subtype)) | |
| 15105 if type_ == mupdf.PDF_ANNOT_UNKNOWN: | |
| 15106 continue # only accept valid annot types | |
| 15107 id_ = mupdf.pdf_dict_gets( annot_obj, "NM") | |
| 15108 names.append( (xref, type_, mupdf.pdf_to_text_string( id_))) | |
| 15109 return names | |
| 15110 | |
| 15111 | |
| 15112 def JM_get_annot_xref_list2(page): | |
| 15113 page = page._pdf_page(required=False) | |
| 15114 if not page.m_internal: | |
| 15115 return list() | |
| 15116 return JM_get_annot_xref_list( page.obj()) | |
| 15117 | |
| 15118 | |
| 15119 def JM_get_border_style(style): | |
| 15120 ''' | |
| 15121 return pdf_obj "border style" from Python str | |
| 15122 ''' | |
| 15123 val = mupdf.PDF_ENUM_NAME_S | |
| 15124 if style is None: | |
| 15125 return val | |
| 15126 s = style | |
| 15127 if s.startswith("b") or s.startswith("B"): val = mupdf.PDF_ENUM_NAME_B | |
| 15128 elif s.startswith("d") or s.startswith("D"): val = mupdf.PDF_ENUM_NAME_D | |
| 15129 elif s.startswith("i") or s.startswith("I"): val = mupdf.PDF_ENUM_NAME_I | |
| 15130 elif s.startswith("u") or s.startswith("U"): val = mupdf.PDF_ENUM_NAME_U | |
| 15131 elif s.startswith("s") or s.startswith("S"): val = mupdf.PDF_ENUM_NAME_S | |
| 15132 return val | |
| 15133 | |
| 15134 | |
| 15135 def JM_get_font( | |
| 15136 fontname, | |
| 15137 fontfile, | |
| 15138 fontbuffer, | |
| 15139 script, | |
| 15140 lang, | |
| 15141 ordering, | |
| 15142 is_bold, | |
| 15143 is_italic, | |
| 15144 is_serif, | |
| 15145 embed, | |
| 15146 ): | |
| 15147 ''' | |
| 15148 return a fz_font from a number of parameters | |
| 15149 ''' | |
| 15150 def fertig(font): | |
| 15151 if not font.m_internal: | |
| 15152 raise RuntimeError(MSG_FONT_FAILED) | |
| 15153 # if font allows this, set embedding | |
| 15154 if not font.m_internal.flags.never_embed: | |
| 15155 mupdf.fz_set_font_embedding(font, embed) | |
| 15156 return font | |
| 15157 | |
| 15158 index = 0 | |
| 15159 font = None | |
| 15160 if fontfile: | |
| 15161 #goto have_file; | |
| 15162 font = mupdf.fz_new_font_from_file( None, fontfile, index, 0) | |
| 15163 return fertig(font) | |
| 15164 | |
| 15165 if fontbuffer: | |
| 15166 #goto have_buffer; | |
| 15167 res = JM_BufferFromBytes(fontbuffer) | |
| 15168 font = mupdf.fz_new_font_from_buffer( None, res, index, 0) | |
| 15169 return fertig(font) | |
| 15170 | |
| 15171 if ordering > -1: | |
| 15172 # goto have_cjk; | |
| 15173 font = mupdf.fz_new_cjk_font(ordering) | |
| 15174 return fertig(font) | |
| 15175 | |
| 15176 if fontname: | |
| 15177 # goto have_base14; | |
| 15178 # Base-14 or a MuPDF builtin font | |
| 15179 font = mupdf.fz_new_base14_font(fontname) | |
| 15180 if font.m_internal: | |
| 15181 return fertig(font) | |
| 15182 font = mupdf.fz_new_builtin_font(fontname, is_bold, is_italic) | |
| 15183 return fertig(font) | |
| 15184 | |
| 15185 # Check for NOTO font | |
| 15186 #have_noto:; | |
| 15187 data, size, index = mupdf.fz_lookup_noto_font( script, lang) | |
| 15188 font = None | |
| 15189 if data: | |
| 15190 font = mupdf.fz_new_font_from_memory( None, data, size, index, 0) | |
| 15191 if font.m_internal: | |
| 15192 return fertig(font) | |
| 15193 font = mupdf.fz_load_fallback_font( script, lang, is_serif, is_bold, is_italic) | |
| 15194 return fertig(font) | |
| 15195 | |
| 15196 | |
| 15197 def JM_get_fontbuffer(doc, xref): | |
| 15198 ''' | |
| 15199 Return the contents of a font file, identified by xref | |
| 15200 ''' | |
| 15201 if xref < 1: | |
| 15202 return | |
| 15203 o = mupdf.pdf_load_object(doc, xref) | |
| 15204 desft = mupdf.pdf_dict_get(o, PDF_NAME('DescendantFonts')) | |
| 15205 if desft.m_internal: | |
| 15206 obj = mupdf.pdf_resolve_indirect(mupdf.pdf_array_get(desft, 0)) | |
| 15207 obj = mupdf.pdf_dict_get(obj, PDF_NAME('FontDescriptor')) | |
| 15208 else: | |
| 15209 obj = mupdf.pdf_dict_get(o, PDF_NAME('FontDescriptor')) | |
| 15210 | |
| 15211 if not obj.m_internal: | |
| 15212 message(f"invalid font - FontDescriptor missing") | |
| 15213 return | |
| 15214 | |
| 15215 o = obj | |
| 15216 | |
| 15217 stream = None | |
| 15218 | |
| 15219 obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile')) | |
| 15220 if obj.m_internal: | |
| 15221 stream = obj # ext = "pfa" | |
| 15222 | |
| 15223 obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile2')) | |
| 15224 if obj.m_internal: | |
| 15225 stream = obj # ext = "ttf" | |
| 15226 | |
| 15227 obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile3')) | |
| 15228 if obj.m_internal: | |
| 15229 stream = obj | |
| 15230 | |
| 15231 obj = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype')) | |
| 15232 if obj.m_internal and not mupdf.pdf_is_name(obj): | |
| 15233 message("invalid font descriptor subtype") | |
| 15234 return | |
| 15235 | |
| 15236 if mupdf.pdf_name_eq(obj, PDF_NAME('Type1C')): | |
| 15237 pass # Prev code did: ext = "cff", but this has no effect. | |
| 15238 elif mupdf.pdf_name_eq(obj, PDF_NAME('CIDFontType0C')): | |
| 15239 pass # Prev code did: ext = "cid", but this has no effect. | |
| 15240 elif mupdf.pdf_name_eq(obj, PDF_NAME('OpenType')): | |
| 15241 pass # Prev code did: ext = "otf", but this has no effect. */ | |
| 15242 else: | |
| 15243 message('warning: unhandled font type {pdf_to_name(ctx, obj)!r}') | |
| 15244 | |
| 15245 if not stream: | |
| 15246 message('warning: unhandled font type') | |
| 15247 return | |
| 15248 | |
| 15249 return mupdf.pdf_load_stream(stream) | |
| 15250 | |
| 15251 | |
| 15252 def JM_get_resource_properties(ref): | |
| 15253 ''' | |
| 15254 Return the items of Resources/Properties (used for Marked Content) | |
| 15255 Argument may be e.g. a page object or a Form XObject | |
| 15256 ''' | |
| 15257 properties = mupdf.pdf_dict_getl(ref, PDF_NAME('Resources'), PDF_NAME('Properties')) | |
| 15258 if not properties.m_internal: | |
| 15259 return () | |
| 15260 else: | |
| 15261 n = mupdf.pdf_dict_len(properties) | |
| 15262 if n < 1: | |
| 15263 return () | |
| 15264 rc = [] | |
| 15265 for i in range(n): | |
| 15266 key = mupdf.pdf_dict_get_key(properties, i) | |
| 15267 val = mupdf.pdf_dict_get_val(properties, i) | |
| 15268 c = mupdf.pdf_to_name(key) | |
| 15269 xref = mupdf.pdf_to_num(val) | |
| 15270 rc.append((c, xref)) | |
| 15271 return rc | |
| 15272 | |
| 15273 | |
| 15274 def JM_get_widget_by_xref( page, xref): | |
| 15275 ''' | |
| 15276 retrieve widget by its xref | |
| 15277 ''' | |
| 15278 found = False | |
| 15279 annot = mupdf.pdf_first_widget( page) | |
| 15280 while annot.m_internal: | |
| 15281 annot_obj = mupdf.pdf_annot_obj( annot) | |
| 15282 if xref == mupdf.pdf_to_num( annot_obj): | |
| 15283 found = True | |
| 15284 break | |
| 15285 annot = mupdf.pdf_next_widget( annot) | |
| 15286 if not found: | |
| 15287 raise Exception( f"xref {xref} is not a widget of this page") | |
| 15288 return Annot( annot) | |
| 15289 | |
| 15290 | |
| 15291 def JM_get_widget_properties(annot, Widget): | |
| 15292 ''' | |
| 15293 Populate a Python Widget object with the values from a PDF form field. | |
| 15294 Called by "Page.first_widget" and "Widget.next". | |
| 15295 ''' | |
| 15296 #log( '{type(annot)=}') | |
| 15297 annot_obj = mupdf.pdf_annot_obj(annot.this) | |
| 15298 #log( 'Have called mupdf.pdf_annot_obj()') | |
| 15299 page = _pdf_annot_page(annot.this) | |
| 15300 pdf = page.doc() | |
| 15301 tw = annot | |
| 15302 | |
| 15303 def SETATTR(key, value): | |
| 15304 setattr(Widget, key, value) | |
| 15305 | |
| 15306 def SETATTR_DROP(mod, key, value): | |
| 15307 # Original C code for this function deletes if PyObject* is NULL. We | |
| 15308 # don't have a representation for that in Python - e.g. None is not | |
| 15309 # represented by NULL. | |
| 15310 setattr(mod, key, value) | |
| 15311 | |
| 15312 #log( '=== + mupdf.pdf_widget_type(tw)') | |
| 15313 field_type = mupdf.pdf_widget_type(tw.this) | |
| 15314 #log( '=== - mupdf.pdf_widget_type(tw)') | |
| 15315 Widget.field_type = field_type | |
| 15316 if field_type == mupdf.PDF_WIDGET_TYPE_SIGNATURE: | |
| 15317 if mupdf.pdf_signature_is_signed(pdf, annot_obj): | |
| 15318 SETATTR("is_signed", True) | |
| 15319 else: | |
| 15320 SETATTR("is_signed",False) | |
| 15321 else: | |
| 15322 SETATTR("is_signed", None) | |
| 15323 SETATTR_DROP(Widget, "border_style", JM_UnicodeFromStr(mupdf.pdf_field_border_style(annot_obj))) | |
| 15324 SETATTR_DROP(Widget, "field_type_string", JM_UnicodeFromStr(JM_field_type_text(field_type))) | |
| 15325 | |
| 15326 field_name = mupdf.pdf_load_field_name(annot_obj) | |
| 15327 SETATTR_DROP(Widget, "field_name", field_name) | |
| 15328 | |
| 15329 def pdf_dict_get_inheritable_nonempty_label(node, key): | |
| 15330 ''' | |
| 15331 This is a modified version of MuPDF's pdf_dict_get_inheritable(), with | |
| 15332 some changes: | |
| 15333 * Returns string from pdf_to_text_string() or None if not found. | |
| 15334 * Recurses to parent if current node exists but with empty string | |
| 15335 value. | |
| 15336 ''' | |
| 15337 slow = node | |
| 15338 halfbeat = 11 # Don't start moving slow pointer for a while. | |
| 15339 while 1: | |
| 15340 if not node.m_internal: | |
| 15341 return | |
| 15342 val = mupdf.pdf_dict_get(node, key) | |
| 15343 if val.m_internal: | |
| 15344 label = mupdf.pdf_to_text_string(val) | |
| 15345 if label: | |
| 15346 return label | |
| 15347 node = mupdf.pdf_dict_get(node, PDF_NAME('Parent')) | |
| 15348 if node.m_internal == slow.m_internal: | |
| 15349 raise Exception("cycle in resources") | |
| 15350 halfbeat -= 1 | |
| 15351 if halfbeat == 0: | |
| 15352 slow = mupdf.pdf_dict_get(slow, PDF_NAME('Parent')) | |
| 15353 halfbeat = 2 | |
| 15354 | |
| 15355 # In order to address #3950, we use our modified pdf_dict_get_inheritable() | |
| 15356 # to ignore empty-string child values. | |
| 15357 label = pdf_dict_get_inheritable_nonempty_label(annot_obj, PDF_NAME('TU')) | |
| 15358 if label is not None: | |
| 15359 SETATTR_DROP(Widget, "field_label", label) | |
| 15360 | |
| 15361 fvalue = None | |
| 15362 if field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON: | |
| 15363 obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Parent')) # owning RB group | |
| 15364 if obj.m_internal: | |
| 15365 SETATTR_DROP(Widget, "rb_parent", mupdf.pdf_to_num( obj)) | |
| 15366 obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('AS')) | |
| 15367 if obj.m_internal: | |
| 15368 fvalue = mupdf.pdf_to_name(obj) | |
| 15369 if not fvalue: | |
| 15370 fvalue = mupdf.pdf_field_value(annot_obj) | |
| 15371 SETATTR_DROP(Widget, "field_value", JM_UnicodeFromStr(fvalue)) | |
| 15372 | |
| 15373 SETATTR_DROP(Widget, "field_display", mupdf.pdf_field_display(annot_obj)) | |
| 15374 | |
| 15375 border_width = mupdf.pdf_to_real(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('BS'), PDF_NAME('W'))) | |
| 15376 if border_width == 0: | |
| 15377 border_width = 1 | |
| 15378 SETATTR_DROP(Widget, "border_width", border_width) | |
| 15379 | |
| 15380 obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('BS'), PDF_NAME('D')) | |
| 15381 if mupdf.pdf_is_array(obj): | |
| 15382 n = mupdf.pdf_array_len(obj) | |
| 15383 d = [0] * n | |
| 15384 for i in range(n): | |
| 15385 d[i] = mupdf.pdf_to_int(mupdf.pdf_array_get(obj, i)) | |
| 15386 SETATTR_DROP(Widget, "border_dashes", d) | |
| 15387 | |
| 15388 SETATTR_DROP(Widget, "text_maxlen", mupdf.pdf_text_widget_max_len(tw.this)) | |
| 15389 | |
| 15390 SETATTR_DROP(Widget, "text_format", mupdf.pdf_text_widget_format(tw.this)) | |
| 15391 | |
| 15392 obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('MK'), PDF_NAME('BG')) | |
| 15393 if mupdf.pdf_is_array(obj): | |
| 15394 n = mupdf.pdf_array_len(obj) | |
| 15395 col = [0] * n | |
| 15396 for i in range(n): | |
| 15397 col[i] = mupdf.pdf_to_real(mupdf.pdf_array_get(obj, i)) | |
| 15398 SETATTR_DROP(Widget, "fill_color", col) | |
| 15399 | |
| 15400 obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('MK'), PDF_NAME('BC')) | |
| 15401 if mupdf.pdf_is_array(obj): | |
| 15402 n = mupdf.pdf_array_len(obj) | |
| 15403 col = [0] * n | |
| 15404 for i in range(n): | |
| 15405 col[i] = mupdf.pdf_to_real(mupdf.pdf_array_get(obj, i)) | |
| 15406 SETATTR_DROP(Widget, "border_color", col) | |
| 15407 | |
| 15408 SETATTR_DROP(Widget, "choice_values", JM_choice_options(annot)) | |
| 15409 | |
| 15410 da = mupdf.pdf_to_text_string(mupdf.pdf_dict_get_inheritable(annot_obj, PDF_NAME('DA'))) | |
| 15411 SETATTR_DROP(Widget, "_text_da", JM_UnicodeFromStr(da)) | |
| 15412 | |
| 15413 obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('MK'), PDF_NAME('CA')) | |
| 15414 if obj.m_internal: | |
| 15415 SETATTR_DROP(Widget, "button_caption", JM_UnicodeFromStr(mupdf.pdf_to_text_string(obj))) | |
| 15416 | |
| 15417 SETATTR_DROP(Widget, "field_flags", mupdf.pdf_field_flags(annot_obj)) | |
| 15418 | |
| 15419 # call Py method to reconstruct text color, font name, size | |
| 15420 Widget._parse_da() | |
| 15421 | |
| 15422 # extract JavaScript action texts | |
| 15423 s = mupdf.pdf_dict_get(annot_obj, PDF_NAME('A')) | |
| 15424 ss = JM_get_script(s) | |
| 15425 SETATTR_DROP(Widget, "script", ss) | |
| 15426 | |
| 15427 SETATTR_DROP(Widget, "script_stroke", | |
| 15428 JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('K'))) | |
| 15429 ) | |
| 15430 | |
| 15431 SETATTR_DROP(Widget, "script_format", | |
| 15432 JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('F'))) | |
| 15433 ) | |
| 15434 | |
| 15435 SETATTR_DROP(Widget, "script_change", | |
| 15436 JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('V'))) | |
| 15437 ) | |
| 15438 | |
| 15439 SETATTR_DROP(Widget, "script_calc", | |
| 15440 JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('C'))) | |
| 15441 ) | |
| 15442 | |
| 15443 SETATTR_DROP(Widget, "script_blur", | |
| 15444 JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Bl'))) | |
| 15445 ) | |
| 15446 | |
| 15447 SETATTR_DROP(Widget, "script_focus", | |
| 15448 JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Fo'))) | |
| 15449 ) | |
| 15450 | |
| 15451 | |
| 15452 def JM_get_fontextension(doc, xref): | |
| 15453 ''' | |
| 15454 Return the file extension of a font file, identified by xref | |
| 15455 ''' | |
| 15456 if xref < 1: | |
| 15457 return "n/a" | |
| 15458 o = mupdf.pdf_load_object(doc, xref) | |
| 15459 desft = mupdf.pdf_dict_get(o, PDF_NAME('DescendantFonts')) | |
| 15460 if desft.m_internal: | |
| 15461 obj = mupdf.pdf_resolve_indirect(mupdf.pdf_array_get(desft, 0)) | |
| 15462 obj = mupdf.pdf_dict_get(obj, PDF_NAME('FontDescriptor')) | |
| 15463 else: | |
| 15464 obj = mupdf.pdf_dict_get(o, PDF_NAME('FontDescriptor')) | |
| 15465 if not obj.m_internal: | |
| 15466 return "n/a" # this is a base-14 font | |
| 15467 | |
| 15468 o = obj # we have the FontDescriptor | |
| 15469 | |
| 15470 obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile')) | |
| 15471 if obj.m_internal: | |
| 15472 return "pfa" | |
| 15473 | |
| 15474 obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile2')) | |
| 15475 if obj.m_internal: | |
| 15476 return "ttf" | |
| 15477 | |
| 15478 obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile3')) | |
| 15479 if obj.m_internal: | |
| 15480 obj = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype')) | |
| 15481 if obj.m_internal and not mupdf.pdf_is_name(obj): | |
| 15482 message("invalid font descriptor subtype") | |
| 15483 return "n/a" | |
| 15484 if mupdf.pdf_name_eq(obj, PDF_NAME('Type1C')): | |
| 15485 return "cff" | |
| 15486 elif mupdf.pdf_name_eq(obj, PDF_NAME('CIDFontType0C')): | |
| 15487 return "cid" | |
| 15488 elif mupdf.pdf_name_eq(obj, PDF_NAME('OpenType')): | |
| 15489 return "otf" | |
| 15490 else: | |
| 15491 message("unhandled font type '%s'", mupdf.pdf_to_name(obj)) | |
| 15492 | |
| 15493 return "n/a" | |
| 15494 | |
| 15495 | |
| 15496 def JM_get_ocg_arrays_imp(arr): | |
| 15497 ''' | |
| 15498 Get OCG arrays from OC configuration | |
| 15499 Returns dict {"basestate":name, "on":list, "off":list, "rbg":list, "locked":list} | |
| 15500 ''' | |
| 15501 list_ = list() | |
| 15502 if mupdf.pdf_is_array( arr): | |
| 15503 n = mupdf.pdf_array_len( arr) | |
| 15504 for i in range(n): | |
| 15505 obj = mupdf.pdf_array_get( arr, i) | |
| 15506 item = mupdf.pdf_to_num( obj) | |
| 15507 if item not in list_: | |
| 15508 list_.append(item) | |
| 15509 return list_ | |
| 15510 | |
| 15511 | |
| 15512 def JM_get_ocg_arrays(conf): | |
| 15513 | |
| 15514 rc = dict() | |
| 15515 arr = mupdf.pdf_dict_get( conf, PDF_NAME('ON')) | |
| 15516 list_ = JM_get_ocg_arrays_imp( arr) | |
| 15517 if list_: | |
| 15518 rc["on"] = list_ | |
| 15519 arr = mupdf.pdf_dict_get( conf, PDF_NAME('OFF')) | |
| 15520 list_ = JM_get_ocg_arrays_imp( arr) | |
| 15521 if list_: | |
| 15522 rc["off"] = list_ | |
| 15523 arr = mupdf.pdf_dict_get( conf, PDF_NAME('Locked')) | |
| 15524 list_ = JM_get_ocg_arrays_imp( arr) | |
| 15525 if list_: | |
| 15526 rc['locked'] = list_ | |
| 15527 list_ = list() | |
| 15528 arr = mupdf.pdf_dict_get( conf, PDF_NAME('RBGroups')) | |
| 15529 if mupdf.pdf_is_array( arr): | |
| 15530 n = mupdf.pdf_array_len( arr) | |
| 15531 for i in range(n): | |
| 15532 obj = mupdf.pdf_array_get( arr, i) | |
| 15533 list1 = JM_get_ocg_arrays_imp( obj) | |
| 15534 list_.append(list1) | |
| 15535 if list_: | |
| 15536 rc["rbgroups"] = list_ | |
| 15537 obj = mupdf.pdf_dict_get( conf, PDF_NAME('BaseState')) | |
| 15538 | |
| 15539 if obj.m_internal: | |
| 15540 state = mupdf.pdf_to_name( obj) | |
| 15541 rc["basestate"] = state | |
| 15542 return rc | |
| 15543 | |
| 15544 | |
| 15545 def JM_get_page_labels(liste, nums): | |
| 15546 n = mupdf.pdf_array_len(nums) | |
| 15547 for i in range(0, n, 2): | |
| 15548 key = mupdf.pdf_resolve_indirect( mupdf.pdf_array_get(nums, i)) | |
| 15549 pno = mupdf.pdf_to_int(key) | |
| 15550 val = mupdf.pdf_resolve_indirect( mupdf.pdf_array_get(nums, i + 1)) | |
| 15551 res = JM_object_to_buffer(val, 1, 0) | |
| 15552 c = mupdf.fz_buffer_extract(res) | |
| 15553 assert isinstance(c, bytes) | |
| 15554 c = c.decode('utf-8') | |
| 15555 liste.append( (pno, c)) | |
| 15556 | |
| 15557 | |
| 15558 def JM_get_script(key): | |
| 15559 ''' | |
| 15560 JavaScript extractor | |
| 15561 Returns either the script source or None. Parameter is a PDF action | |
| 15562 dictionary, which must have keys /S and /JS. The value of /S must be | |
| 15563 '/JavaScript'. The value of /JS is returned. | |
| 15564 ''' | |
| 15565 if not key.m_internal: | |
| 15566 return | |
| 15567 | |
| 15568 j = mupdf.pdf_dict_get(key, PDF_NAME('S')) | |
| 15569 jj = mupdf.pdf_to_name(j) | |
| 15570 if jj == "JavaScript": | |
| 15571 js = mupdf.pdf_dict_get(key, PDF_NAME('JS')) | |
| 15572 if not js.m_internal: | |
| 15573 return | |
| 15574 else: | |
| 15575 return | |
| 15576 | |
| 15577 if mupdf.pdf_is_string(js): | |
| 15578 script = JM_UnicodeFromStr(mupdf.pdf_to_text_string(js)) | |
| 15579 elif mupdf.pdf_is_stream(js): | |
| 15580 res = mupdf.pdf_load_stream(js) | |
| 15581 script = JM_EscapeStrFromBuffer(res) | |
| 15582 else: | |
| 15583 return | |
| 15584 if script: # do not return an empty script | |
| 15585 return script | |
| 15586 return | |
| 15587 | |
| 15588 | |
| 15589 def JM_have_operation(pdf): | |
| 15590 ''' | |
| 15591 Ensure valid journalling state | |
| 15592 ''' | |
| 15593 if pdf.m_internal.journal and not mupdf.pdf_undoredo_step(pdf, 0): | |
| 15594 return 0 | |
| 15595 return 1 | |
| 15596 | |
| 15597 | |
| 15598 def JM_image_extension(type_): | |
| 15599 ''' | |
| 15600 return extension for MuPDF image type | |
| 15601 ''' | |
| 15602 if type_ == mupdf.FZ_IMAGE_FAX: return "fax" | |
| 15603 if type_ == mupdf.FZ_IMAGE_RAW: return "raw" | |
| 15604 if type_ == mupdf.FZ_IMAGE_FLATE: return "flate" | |
| 15605 if type_ == mupdf.FZ_IMAGE_LZW: return "lzw" | |
| 15606 if type_ == mupdf.FZ_IMAGE_RLD: return "rld" | |
| 15607 if type_ == mupdf.FZ_IMAGE_BMP: return "bmp" | |
| 15608 if type_ == mupdf.FZ_IMAGE_GIF: return "gif" | |
| 15609 if type_ == mupdf.FZ_IMAGE_JBIG2: return "jb2" | |
| 15610 if type_ == mupdf.FZ_IMAGE_JPEG: return "jpeg" | |
| 15611 if type_ == mupdf.FZ_IMAGE_JPX: return "jpx" | |
| 15612 if type_ == mupdf.FZ_IMAGE_JXR: return "jxr" | |
| 15613 if type_ == mupdf.FZ_IMAGE_PNG: return "png" | |
| 15614 if type_ == mupdf.FZ_IMAGE_PNM: return "pnm" | |
| 15615 if type_ == mupdf.FZ_IMAGE_TIFF: return "tiff" | |
| 15616 #if type_ == mupdf.FZ_IMAGE_PSD: return "psd" | |
| 15617 return "n/a" | |
| 15618 | |
| 15619 | |
| 15620 # fixme: need to avoid using a global for this. | |
| 15621 g_img_info = None | |
| 15622 | |
| 15623 | |
| 15624 def JM_image_filter(opaque, ctm, name, image): | |
| 15625 assert isinstance(ctm, mupdf.FzMatrix) | |
| 15626 r = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT) | |
| 15627 q = mupdf.fz_transform_quad( mupdf.fz_quad_from_rect(r), ctm) | |
| 15628 q = mupdf.fz_transform_quad( q, g_img_info_matrix) | |
| 15629 temp = name, JM_py_from_quad(q) | |
| 15630 g_img_info.append(temp) | |
| 15631 | |
| 15632 | |
| 15633 def JM_image_profile( imagedata, keep_image): | |
| 15634 ''' | |
| 15635 Return basic properties of an image provided as bytes or bytearray | |
| 15636 The function creates an fz_image and optionally returns it. | |
| 15637 ''' | |
| 15638 if not imagedata: | |
| 15639 return None # nothing given | |
| 15640 | |
| 15641 len_ = len( imagedata) | |
| 15642 if len_ < 8: | |
| 15643 message( "bad image data") | |
| 15644 return None | |
| 15645 c = imagedata | |
| 15646 #log( 'calling mfz_recognize_image_format with {c!r=}') | |
| 15647 type_ = mupdf.fz_recognize_image_format( c) | |
| 15648 if type_ == mupdf.FZ_IMAGE_UNKNOWN: | |
| 15649 return None | |
| 15650 | |
| 15651 if keep_image: | |
| 15652 res = mupdf.fz_new_buffer_from_copied_data( c, len_) | |
| 15653 else: | |
| 15654 res = mupdf.fz_new_buffer_from_shared_data( c, len_) | |
| 15655 image = mupdf.fz_new_image_from_buffer( res) | |
| 15656 ctm = mupdf.fz_image_orientation_matrix( image) | |
| 15657 xres, yres = mupdf.fz_image_resolution(image) | |
| 15658 orientation = mupdf.fz_image_orientation( image) | |
| 15659 cs_name = mupdf.fz_colorspace_name( image.colorspace()) | |
| 15660 result = dict() | |
| 15661 result[ dictkey_width] = image.w() | |
| 15662 result[ dictkey_height] = image.h() | |
| 15663 result[ "orientation"] = orientation | |
| 15664 result[ dictkey_matrix] = JM_py_from_matrix(ctm) | |
| 15665 result[ dictkey_xres] = xres | |
| 15666 result[ dictkey_yres] = yres | |
| 15667 result[ dictkey_colorspace] = image.n() | |
| 15668 result[ dictkey_bpc] = image.bpc() | |
| 15669 result[ dictkey_ext] = JM_image_extension(type_) | |
| 15670 result[ dictkey_cs_name] = cs_name | |
| 15671 | |
| 15672 if keep_image: | |
| 15673 result[ dictkey_image] = image | |
| 15674 return result | |
| 15675 | |
| 15676 | |
| 15677 def JM_image_reporter(page): | |
| 15678 doc = page.doc() | |
| 15679 global g_img_info_matrix | |
| 15680 g_img_info_matrix = mupdf.FzMatrix() | |
| 15681 mediabox = mupdf.FzRect() | |
| 15682 mupdf.pdf_page_transform(page, mediabox, g_img_info_matrix) | |
| 15683 | |
| 15684 class SanitizeFilterOptions(mupdf.PdfSanitizeFilterOptions2): | |
| 15685 def __init__(self): | |
| 15686 super().__init__() | |
| 15687 self.use_virtual_image_filter() | |
| 15688 def image_filter(self, ctx, ctm, name, image, scissor): | |
| 15689 JM_image_filter(None, mupdf.FzMatrix(ctm), name, image) | |
| 15690 | |
| 15691 sanitize_filter_options = SanitizeFilterOptions() | |
| 15692 | |
| 15693 filter_options = _make_PdfFilterOptions( | |
| 15694 instance_forms=1, | |
| 15695 ascii=1, | |
| 15696 no_update=1, | |
| 15697 sanitize=1, | |
| 15698 sopts=sanitize_filter_options, | |
| 15699 ) | |
| 15700 | |
| 15701 global g_img_info | |
| 15702 g_img_info = [] | |
| 15703 | |
| 15704 mupdf.pdf_filter_page_contents( doc, page, filter_options) | |
| 15705 | |
| 15706 rc = tuple(g_img_info) | |
| 15707 g_img_info = [] | |
| 15708 return rc | |
| 15709 | |
| 15710 | |
| 15711 def JM_fitz_config(): | |
| 15712 have_TOFU = not hasattr(mupdf, 'TOFU') | |
| 15713 have_TOFU_BASE14 = not hasattr(mupdf, 'TOFU_BASE14') | |
| 15714 have_TOFU_CJK = not hasattr(mupdf, 'TOFU_CJK') | |
| 15715 have_TOFU_CJK_EXT = not hasattr(mupdf, 'TOFU_CJK_EXT') | |
| 15716 have_TOFU_CJK_LANG = not hasattr(mupdf, 'TOFU_CJK_LANG') | |
| 15717 have_TOFU_EMOJI = not hasattr(mupdf, 'TOFU_EMOJI') | |
| 15718 have_TOFU_HISTORIC = not hasattr(mupdf, 'TOFU_HISTORIC') | |
| 15719 have_TOFU_SIL = not hasattr(mupdf, 'TOFU_SIL') | |
| 15720 have_TOFU_SYMBOL = not hasattr(mupdf, 'TOFU_SYMBOL') | |
| 15721 | |
| 15722 ret = dict() | |
| 15723 ret["base14"] = have_TOFU_BASE14 | |
| 15724 ret["cbz"] = bool(mupdf.FZ_ENABLE_CBZ) | |
| 15725 ret["epub"] = bool(mupdf.FZ_ENABLE_EPUB) | |
| 15726 ret["html"] = bool(mupdf.FZ_ENABLE_HTML) | |
| 15727 ret["icc"] = bool(mupdf.FZ_ENABLE_ICC) | |
| 15728 ret["img"] = bool(mupdf.FZ_ENABLE_IMG) | |
| 15729 ret["jpx"] = bool(mupdf.FZ_ENABLE_JPX) | |
| 15730 ret["js"] = bool(mupdf.FZ_ENABLE_JS) | |
| 15731 ret["pdf"] = bool(mupdf.FZ_ENABLE_PDF) | |
| 15732 ret["plotter-cmyk"] = bool(mupdf.FZ_PLOTTERS_CMYK) | |
| 15733 ret["plotter-g"] = bool(mupdf.FZ_PLOTTERS_G) | |
| 15734 ret["plotter-n"] = bool(mupdf.FZ_PLOTTERS_N) | |
| 15735 ret["plotter-rgb"] = bool(mupdf.FZ_PLOTTERS_RGB) | |
| 15736 ret["py-memory"] = bool(JM_MEMORY) | |
| 15737 ret["svg"] = bool(mupdf.FZ_ENABLE_SVG) | |
| 15738 ret["tofu"] = have_TOFU | |
| 15739 ret["tofu-cjk"] = have_TOFU_CJK | |
| 15740 ret["tofu-cjk-ext"] = have_TOFU_CJK_EXT | |
| 15741 ret["tofu-cjk-lang"] = have_TOFU_CJK_LANG | |
| 15742 ret["tofu-emoji"] = have_TOFU_EMOJI | |
| 15743 ret["tofu-historic"] = have_TOFU_HISTORIC | |
| 15744 ret["tofu-sil"] = have_TOFU_SIL | |
| 15745 ret["tofu-symbol"] = have_TOFU_SYMBOL | |
| 15746 ret["xps"] = bool(mupdf.FZ_ENABLE_XPS) | |
| 15747 return ret | |
| 15748 | |
| 15749 | |
| 15750 def JM_insert_contents(pdf, pageref, newcont, overlay): | |
| 15751 ''' | |
| 15752 Insert a buffer as a new separate /Contents object of a page. | |
| 15753 1. Create a new stream object from buffer 'newcont' | |
| 15754 2. If /Contents already is an array, then just prepend or append this object | |
| 15755 3. Else, create new array and put old content obj and this object into it. | |
| 15756 If the page had no /Contents before, just create a 1-item array. | |
| 15757 ''' | |
| 15758 contents = mupdf.pdf_dict_get(pageref, PDF_NAME('Contents')) | |
| 15759 newconts = mupdf.pdf_add_stream(pdf, newcont, mupdf.PdfObj(), 0) | |
| 15760 xref = mupdf.pdf_to_num(newconts) | |
| 15761 if mupdf.pdf_is_array(contents): | |
| 15762 if overlay: # append new object | |
| 15763 mupdf.pdf_array_push(contents, newconts) | |
| 15764 else: # prepend new object | |
| 15765 mupdf.pdf_array_insert(contents, newconts, 0) | |
| 15766 else: | |
| 15767 carr = mupdf.pdf_new_array(pdf, 5) | |
| 15768 if overlay: | |
| 15769 if contents.m_internal: | |
| 15770 mupdf.pdf_array_push(carr, contents) | |
| 15771 mupdf.pdf_array_push(carr, newconts) | |
| 15772 else: | |
| 15773 mupdf.pdf_array_push(carr, newconts) | |
| 15774 if contents.m_internal: | |
| 15775 mupdf.pdf_array_push(carr, contents) | |
| 15776 mupdf.pdf_dict_put(pageref, PDF_NAME('Contents'), carr) | |
| 15777 return xref | |
| 15778 | |
| 15779 | |
| 15780 def JM_insert_font(pdf, bfname, fontfile, fontbuffer, set_simple, idx, wmode, serif, encoding, ordering): | |
| 15781 ''' | |
| 15782 Insert a font in a PDF | |
| 15783 ''' | |
| 15784 font = None | |
| 15785 res = None | |
| 15786 data = None | |
| 15787 ixref = 0 | |
| 15788 index = 0 | |
| 15789 simple = 0 | |
| 15790 value=None | |
| 15791 name=None | |
| 15792 subt=None | |
| 15793 exto = None | |
| 15794 | |
| 15795 ENSURE_OPERATION(pdf) | |
| 15796 # check for CJK font | |
| 15797 if ordering > -1: | |
| 15798 data, size, index = mupdf.fz_lookup_cjk_font(ordering) | |
| 15799 if data: | |
| 15800 font = mupdf.fz_new_font_from_memory(None, data, size, index, 0) | |
| 15801 font_obj = mupdf.pdf_add_cjk_font(pdf, font, ordering, wmode, serif) | |
| 15802 exto = "n/a" | |
| 15803 simple = 0 | |
| 15804 #goto weiter; | |
| 15805 else: | |
| 15806 | |
| 15807 # check for PDF Base-14 font | |
| 15808 if bfname: | |
| 15809 data, size = mupdf.fz_lookup_base14_font(bfname) | |
| 15810 if data: | |
| 15811 font = mupdf.fz_new_font_from_memory(bfname, data, size, 0, 0) | |
| 15812 font_obj = mupdf.pdf_add_simple_font(pdf, font, encoding) | |
| 15813 exto = "n/a" | |
| 15814 simple = 1 | |
| 15815 #goto weiter; | |
| 15816 | |
| 15817 else: | |
| 15818 if fontfile: | |
| 15819 font = mupdf.fz_new_font_from_file(None, fontfile, idx, 0) | |
| 15820 else: | |
| 15821 res = JM_BufferFromBytes(fontbuffer) | |
| 15822 if not res.m_internal: | |
| 15823 RAISEPY(MSG_FILE_OR_BUFFER, PyExc_ValueError) | |
| 15824 font = mupdf.fz_new_font_from_buffer(None, res, idx, 0) | |
| 15825 | |
| 15826 if not set_simple: | |
| 15827 font_obj = mupdf.pdf_add_cid_font(pdf, font) | |
| 15828 simple = 0 | |
| 15829 else: | |
| 15830 font_obj = mupdf.pdf_add_simple_font(pdf, font, encoding) | |
| 15831 simple = 2 | |
| 15832 #weiter: ; | |
| 15833 ixref = mupdf.pdf_to_num(font_obj) | |
| 15834 name = JM_EscapeStrFromStr( mupdf.pdf_to_name( mupdf.pdf_dict_get(font_obj, PDF_NAME('BaseFont')))) | |
| 15835 | |
| 15836 subt = JM_UnicodeFromStr( mupdf.pdf_to_name( mupdf.pdf_dict_get( font_obj, PDF_NAME('Subtype')))) | |
| 15837 | |
| 15838 if not exto: | |
| 15839 exto = JM_UnicodeFromStr(JM_get_fontextension(pdf, ixref)) | |
| 15840 | |
| 15841 asc = mupdf.fz_font_ascender(font) | |
| 15842 dsc = mupdf.fz_font_descender(font) | |
| 15843 value = [ | |
| 15844 ixref, | |
| 15845 { | |
| 15846 "name": name, # base font name | |
| 15847 "type": subt, # subtype | |
| 15848 "ext": exto, # file extension | |
| 15849 "simple": bool(simple), # simple font? | |
| 15850 "ordering": ordering, # CJK font? | |
| 15851 "ascender": asc, | |
| 15852 "descender": dsc, | |
| 15853 }, | |
| 15854 ] | |
| 15855 return value | |
| 15856 | |
| 15857 def JM_irect_from_py(r): | |
| 15858 ''' | |
| 15859 PySequence to mupdf.FzIrect. Default: infinite irect | |
| 15860 ''' | |
| 15861 if isinstance(r, mupdf.FzIrect): | |
| 15862 return r | |
| 15863 if isinstance(r, IRect): | |
| 15864 r = mupdf.FzIrect( r.x0, r.y0, r.x1, r.y1) | |
| 15865 return r | |
| 15866 if isinstance(r, Rect): | |
| 15867 ret = mupdf.FzRect(r.x0, r.y0, r.x1, r.y1) | |
| 15868 ret = mupdf.FzIrect(ret) # Uses fz_irect_from_rect(). | |
| 15869 return ret | |
| 15870 if isinstance(r, mupdf.FzRect): | |
| 15871 ret = mupdf.FzIrect(r) # Uses fz_irect_from_rect(). | |
| 15872 return ret | |
| 15873 if not r or not PySequence_Check(r) or PySequence_Size(r) != 4: | |
| 15874 return mupdf.FzIrect(mupdf.fz_infinite_irect) | |
| 15875 f = [0, 0, 0, 0] | |
| 15876 for i in range(4): | |
| 15877 f[i] = r[i] | |
| 15878 if f[i] is None: | |
| 15879 return mupdf.FzIrect(mupdf.fz_infinite_irect) | |
| 15880 if f[i] < FZ_MIN_INF_RECT: | |
| 15881 f[i] = FZ_MIN_INF_RECT | |
| 15882 if f[i] > FZ_MAX_INF_RECT: | |
| 15883 f[i] = FZ_MAX_INF_RECT | |
| 15884 return mupdf.fz_make_irect(f[0], f[1], f[2], f[3]) | |
| 15885 | |
| 15886 def JM_listbox_value( annot): | |
| 15887 ''' | |
| 15888 ListBox retrieve value | |
| 15889 ''' | |
| 15890 # may be single value or array | |
| 15891 annot_obj = mupdf.pdf_annot_obj( annot) | |
| 15892 optarr = mupdf.pdf_dict_get( annot_obj, PDF_NAME('V')) | |
| 15893 if mupdf.pdf_is_string( optarr): # a single string | |
| 15894 return mupdf.pdf_to_text_string( optarr) | |
| 15895 | |
| 15896 # value is an array (may have len 0) | |
| 15897 n = mupdf.pdf_array_len( optarr) | |
| 15898 liste = [] | |
| 15899 | |
| 15900 # extract a list of strings | |
| 15901 # each entry may again be an array: take second entry then | |
| 15902 for i in range( n): | |
| 15903 elem = mupdf.pdf_array_get( optarr, i) | |
| 15904 if mupdf.pdf_is_array( elem): | |
| 15905 elem = mupdf.pdf_array_get( elem, 1) | |
| 15906 liste.append( JM_UnicodeFromStr( mupdf.pdf_to_text_string( elem))) | |
| 15907 return liste | |
| 15908 | |
| 15909 | |
| 15910 def JM_make_annot_DA(annot, ncol, col, fontname, fontsize): | |
| 15911 # PyMuPDF uses a fz_buffer to build up the string, but it's non-trivial to | |
| 15912 # convert the fz_buffer's `unsigned char*` into a `const char*` suitable | |
| 15913 # for passing to pdf_dict_put_text_string(). So instead we build up the | |
| 15914 # string directly in Python. | |
| 15915 buf = '' | |
| 15916 if ncol < 1: | |
| 15917 buf += f'0 g ' | |
| 15918 elif ncol == 1: | |
| 15919 buf += f'{col[0]:g} g ' | |
| 15920 elif ncol == 2: | |
| 15921 assert 0 | |
| 15922 elif ncol == 3: | |
| 15923 buf += f'{col[0]:g} {col[1]:g} {col[2]:g} rg ' | |
| 15924 else: | |
| 15925 buf += f'{col[0]:g} {col[1]:g} {col[2]:g} {col[3]:g} k ' | |
| 15926 buf += f'/{JM_expand_fname(fontname)} {fontsize} Tf' | |
| 15927 mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), mupdf.PDF_ENUM_NAME_DA, buf) | |
| 15928 | |
| 15929 | |
| 15930 def JM_make_spanlist(line_dict, line, raw, buff, tp_rect): | |
| 15931 if g_use_extra: | |
| 15932 return extra.JM_make_spanlist(line_dict, line, raw, buff, tp_rect) | |
| 15933 char_list = None | |
| 15934 span_list = [] | |
| 15935 mupdf.fz_clear_buffer(buff) | |
| 15936 span_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY) | |
| 15937 line_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY) | |
| 15938 | |
| 15939 class char_style: | |
| 15940 def __init__(self, rhs=None): | |
| 15941 if rhs: | |
| 15942 self.size = rhs.size | |
| 15943 self.flags = rhs.flags | |
| 15944 if mupdf_version_tuple >= (1, 25, 2): | |
| 15945 self.char_flags = rhs.char_flags | |
| 15946 self.font = rhs.font | |
| 15947 self.argb = rhs.argb | |
| 15948 self.asc = rhs.asc | |
| 15949 self.desc = rhs.desc | |
| 15950 self.bidi = rhs.bidi | |
| 15951 else: | |
| 15952 self.size = -1 | |
| 15953 self.flags = -1 | |
| 15954 if mupdf_version_tuple >= (1, 25, 2): | |
| 15955 self.char_flags = -1 | |
| 15956 self.font = '' | |
| 15957 self.argb = -1 | |
| 15958 self.asc = 0 | |
| 15959 self.desc = 0 | |
| 15960 self.bidi = 0 | |
| 15961 def __str__(self): | |
| 15962 ret = f'{self.size} {self.flags}' | |
| 15963 if mupdf_version_tuple >= (1, 25, 2): | |
| 15964 ret += f' {self.char_flags}' | |
| 15965 ret += f' {self.font} {self.color} {self.asc} {self.desc}' | |
| 15966 return ret | |
| 15967 | |
| 15968 old_style = char_style() | |
| 15969 style = char_style() | |
| 15970 span = None | |
| 15971 span_origin = None | |
| 15972 | |
| 15973 for ch in line: | |
| 15974 # start-trace | |
| 15975 r = JM_char_bbox(line, ch) | |
| 15976 if (not JM_rects_overlap(tp_rect, r) | |
| 15977 and not mupdf.fz_is_infinite_rect(tp_rect) | |
| 15978 ): | |
| 15979 continue | |
| 15980 | |
| 15981 # Info from: | |
| 15982 # detect_super_script() | |
| 15983 # fz_font_is_italic() | |
| 15984 # fz_font_is_serif() | |
| 15985 # fz_font_is_monospaced() | |
| 15986 # fz_font_is_bold() | |
| 15987 | |
| 15988 flags = JM_char_font_flags(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)), line, ch) | |
| 15989 origin = mupdf.FzPoint(ch.m_internal.origin) | |
| 15990 style.size = ch.m_internal.size | |
| 15991 style.flags = flags | |
| 15992 if mupdf_version_tuple >= (1, 25, 2): | |
| 15993 # FZ_STEXT_SYNTHETIC is per-char, not per-span. | |
| 15994 style.char_flags = ch.m_internal.flags & ~mupdf.FZ_STEXT_SYNTHETIC | |
| 15995 style.font = JM_font_name(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font))) | |
| 15996 style.argb = ch.m_internal.argb | |
| 15997 style.asc = JM_font_ascender(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font))) | |
| 15998 style.desc = JM_font_descender(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font))) | |
| 15999 style.bidi = ch.m_internal.bidi | |
| 16000 | |
| 16001 if (style.size != old_style.size | |
| 16002 or style.flags != old_style.flags | |
| 16003 or (mupdf_version_tuple >= (1, 25, 2) | |
| 16004 and (style.char_flags != old_style.char_flags) | |
| 16005 ) | |
| 16006 or style.argb != old_style.argb | |
| 16007 or style.font != old_style.font | |
| 16008 or style.bidi != old_style.bidi | |
| 16009 ): | |
| 16010 if old_style.size >= 0: | |
| 16011 # not first one, output previous | |
| 16012 if raw: | |
| 16013 # put character list in the span | |
| 16014 span[dictkey_chars] = char_list | |
| 16015 char_list = None | |
| 16016 else: | |
| 16017 # put text string in the span | |
| 16018 span[dictkey_text] = JM_EscapeStrFromBuffer( buff) | |
| 16019 mupdf.fz_clear_buffer(buff) | |
| 16020 | |
| 16021 span[dictkey_origin] = JM_py_from_point(span_origin) | |
| 16022 span[dictkey_bbox] = JM_py_from_rect(span_rect) | |
| 16023 line_rect = mupdf.fz_union_rect(line_rect, span_rect) | |
| 16024 span_list.append( span) | |
| 16025 span = None | |
| 16026 | |
| 16027 span = dict() | |
| 16028 asc = style.asc | |
| 16029 desc = style.desc | |
| 16030 if style.asc < 1e-3: | |
| 16031 asc = 0.9 | |
| 16032 desc = -0.1 | |
| 16033 | |
| 16034 span[dictkey_size] = style.size | |
| 16035 span[dictkey_flags] = style.flags | |
| 16036 span[dictkey_bidi] = style.bidi | |
| 16037 if mupdf_version_tuple >= (1, 25, 2): | |
| 16038 span[dictkey_char_flags] = style.char_flags | |
| 16039 span[dictkey_font] = JM_EscapeStrFromStr(style.font) | |
| 16040 span[dictkey_color] = style.argb & 0xffffff | |
| 16041 if mupdf_version_tuple >= (1, 25, 0): | |
| 16042 span['alpha'] = style.argb >> 24 | |
| 16043 span["ascender"] = asc | |
| 16044 span["descender"] = desc | |
| 16045 | |
| 16046 # Need to be careful here - doing 'old_style=style' does a shallow | |
| 16047 # copy, but we need to keep old_style as a distinct instance. | |
| 16048 old_style = char_style(style) | |
| 16049 span_rect = r | |
| 16050 span_origin = origin | |
| 16051 | |
| 16052 span_rect = mupdf.fz_union_rect(span_rect, r) | |
| 16053 | |
| 16054 if raw: # make and append a char dict | |
| 16055 char_dict = dict() | |
| 16056 char_dict[dictkey_origin] = JM_py_from_point( ch.m_internal.origin) | |
| 16057 char_dict[dictkey_bbox] = JM_py_from_rect(r) | |
| 16058 char_dict[dictkey_c] = chr(ch.m_internal.c) | |
| 16059 char_dict['synthetic'] = bool(ch.m_internal.flags & mupdf.FZ_STEXT_SYNTHETIC) | |
| 16060 | |
| 16061 if char_list is None: | |
| 16062 char_list = [] | |
| 16063 char_list.append(char_dict) | |
| 16064 else: # add character byte to buffer | |
| 16065 JM_append_rune(buff, ch.m_internal.c) | |
| 16066 | |
| 16067 # all characters processed, now flush remaining span | |
| 16068 if span: | |
| 16069 if raw: | |
| 16070 span[dictkey_chars] = char_list | |
| 16071 char_list = None | |
| 16072 else: | |
| 16073 span[dictkey_text] = JM_EscapeStrFromBuffer(buff) | |
| 16074 mupdf.fz_clear_buffer(buff) | |
| 16075 span[dictkey_origin] = JM_py_from_point(span_origin) | |
| 16076 span[dictkey_bbox] = JM_py_from_rect(span_rect) | |
| 16077 | |
| 16078 if not mupdf.fz_is_empty_rect(span_rect): | |
| 16079 span_list.append(span) | |
| 16080 line_rect = mupdf.fz_union_rect(line_rect, span_rect) | |
| 16081 span = None | |
| 16082 if not mupdf.fz_is_empty_rect(line_rect): | |
| 16083 line_dict[dictkey_spans] = span_list | |
| 16084 else: | |
| 16085 line_dict[dictkey_spans] = span_list | |
| 16086 return line_rect | |
| 16087 | |
| 16088 def _make_image_dict(img, img_dict): | |
| 16089 """Populate a dictionary with information extracted from a given image. | |
| 16090 | |
| 16091 Used by 'Document.extract_image' and by 'JM_make_image_block'. | |
| 16092 Both of these functions will add some more specific information. | |
| 16093 """ | |
| 16094 img_type = img.fz_compressed_image_type() | |
| 16095 ext = JM_image_extension(img_type) | |
| 16096 | |
| 16097 # compressed image buffer if present, else None | |
| 16098 ll_cbuf = mupdf.ll_fz_compressed_image_buffer(img.m_internal) | |
| 16099 | |
| 16100 if (0 | |
| 16101 or not ll_cbuf | |
| 16102 or img_type in (mupdf.FZ_IMAGE_JBIG2, mupdf.FZ_IMAGE_UNKNOWN) | |
| 16103 or img_type < mupdf.FZ_IMAGE_BMP | |
| 16104 ): | |
| 16105 # not an image with a compressed buffer: convert to PNG | |
| 16106 res = mupdf.fz_new_buffer_from_image_as_png( | |
| 16107 img, | |
| 16108 mupdf.FzColorParams(mupdf.fz_default_color_params), | |
| 16109 ) | |
| 16110 ext = "png" | |
| 16111 elif ext == "jpeg" and img.n() == 4: | |
| 16112 # JPEG with CMYK: invert colors | |
| 16113 res = mupdf.fz_new_buffer_from_image_as_jpeg( | |
| 16114 img, mupdf.FzColorParams(mupdf.fz_default_color_params), 95, 1) | |
| 16115 else: | |
| 16116 # copy the compressed buffer | |
| 16117 res = mupdf.FzBuffer(mupdf.ll_fz_keep_buffer(ll_cbuf.buffer)) | |
| 16118 | |
| 16119 bytes_ = JM_BinFromBuffer(res) | |
| 16120 img_dict[dictkey_width] = img.w() | |
| 16121 img_dict[dictkey_height] = img.h() | |
| 16122 img_dict[dictkey_ext] = ext | |
| 16123 img_dict[dictkey_colorspace] = img.n() | |
| 16124 img_dict[dictkey_xres] = img.xres() | |
| 16125 img_dict[dictkey_yres] = img.yres() | |
| 16126 img_dict[dictkey_bpc] = img.bpc() | |
| 16127 img_dict[dictkey_size] = len(bytes_) | |
| 16128 img_dict[dictkey_image] = bytes_ | |
| 16129 | |
| 16130 def JM_make_image_block(block, block_dict): | |
| 16131 img = block.i_image() | |
| 16132 _make_image_dict(img, block_dict) | |
| 16133 # if the image has a mask, store it as a PNG buffer | |
| 16134 mask = img.mask() | |
| 16135 if mask.m_internal: | |
| 16136 buff = mask.fz_new_buffer_from_image_as_png(mupdf.FzColorParams(mupdf.fz_default_color_params)) | |
| 16137 block_dict["mask"] = buff.fz_buffer_extract() | |
| 16138 else: | |
| 16139 block_dict["mask"] = None | |
| 16140 block_dict[dictkey_matrix] = JM_py_from_matrix(block.i_transform()) | |
| 16141 | |
| 16142 | |
| 16143 def JM_make_text_block(block, block_dict, raw, buff, tp_rect): | |
| 16144 if g_use_extra: | |
| 16145 return extra.JM_make_text_block(block.m_internal, block_dict, raw, buff.m_internal, tp_rect.m_internal) | |
| 16146 line_list = [] | |
| 16147 block_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY) | |
| 16148 #log(f'{block=}') | |
| 16149 for line in block: | |
| 16150 #log(f'{line=}') | |
| 16151 if (mupdf.fz_is_empty_rect(mupdf.fz_intersect_rect(tp_rect, mupdf.FzRect(line.m_internal.bbox))) | |
| 16152 and not mupdf.fz_is_infinite_rect(tp_rect) | |
| 16153 ): | |
| 16154 continue | |
| 16155 line_dict = dict() | |
| 16156 line_rect = JM_make_spanlist(line_dict, line, raw, buff, tp_rect) | |
| 16157 block_rect = mupdf.fz_union_rect(block_rect, line_rect) | |
| 16158 line_dict[dictkey_wmode] = line.m_internal.wmode | |
| 16159 line_dict[dictkey_dir] = JM_py_from_point(line.m_internal.dir) | |
| 16160 line_dict[dictkey_bbox] = JM_py_from_rect(line_rect) | |
| 16161 line_list.append(line_dict) | |
| 16162 block_dict[dictkey_bbox] = JM_py_from_rect(block_rect) | |
| 16163 block_dict[dictkey_lines] = line_list | |
| 16164 | |
| 16165 | |
| 16166 def JM_make_textpage_dict(tp, page_dict, raw): | |
| 16167 if g_use_extra: | |
| 16168 return extra.JM_make_textpage_dict(tp.m_internal, page_dict, raw) | |
| 16169 text_buffer = mupdf.fz_new_buffer(128) | |
| 16170 block_list = [] | |
| 16171 tp_rect = mupdf.FzRect(tp.m_internal.mediabox) | |
| 16172 block_n = -1 | |
| 16173 #log( 'JM_make_textpage_dict {=tp}') | |
| 16174 for block in tp: | |
| 16175 block_n += 1 | |
| 16176 if (not mupdf.fz_contains_rect(tp_rect, mupdf.FzRect(block.m_internal.bbox)) | |
| 16177 and not mupdf.fz_is_infinite_rect(tp_rect) | |
| 16178 and block.m_internal.type == mupdf.FZ_STEXT_BLOCK_IMAGE | |
| 16179 ): | |
| 16180 continue | |
| 16181 if (not mupdf.fz_is_infinite_rect(tp_rect) | |
| 16182 and mupdf.fz_is_empty_rect(mupdf.fz_intersect_rect(tp_rect, mupdf.FzRect(block.m_internal.bbox))) | |
| 16183 ): | |
| 16184 continue | |
| 16185 | |
| 16186 block_dict = dict() | |
| 16187 block_dict[dictkey_number] = block_n | |
| 16188 block_dict[dictkey_type] = block.m_internal.type | |
| 16189 if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_IMAGE: | |
| 16190 block_dict[dictkey_bbox] = JM_py_from_rect(block.m_internal.bbox) | |
| 16191 JM_make_image_block(block, block_dict) | |
| 16192 else: | |
| 16193 JM_make_text_block(block, block_dict, raw, text_buffer, tp_rect) | |
| 16194 | |
| 16195 block_list.append(block_dict) | |
| 16196 page_dict[dictkey_blocks] = block_list | |
| 16197 | |
| 16198 | |
| 16199 def JM_matrix_from_py(m): | |
| 16200 a = [0, 0, 0, 0, 0, 0] | |
| 16201 if isinstance(m, mupdf.FzMatrix): | |
| 16202 return m | |
| 16203 if isinstance(m, Matrix): | |
| 16204 return mupdf.FzMatrix(m.a, m.b, m.c, m.d, m.e, m.f) | |
| 16205 if not m or not PySequence_Check(m) or PySequence_Size(m) != 6: | |
| 16206 return mupdf.FzMatrix() | |
| 16207 for i in range(6): | |
| 16208 a[i] = JM_FLOAT_ITEM(m, i) | |
| 16209 if a[i] is None: | |
| 16210 return mupdf.FzRect() | |
| 16211 return mupdf.FzMatrix(a[0], a[1], a[2], a[3], a[4], a[5]) | |
| 16212 | |
| 16213 | |
| 16214 def JM_mediabox(page_obj): | |
| 16215 ''' | |
| 16216 return a PDF page's MediaBox | |
| 16217 ''' | |
| 16218 page_mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT) | |
| 16219 mediabox = mupdf.pdf_to_rect( | |
| 16220 mupdf.pdf_dict_get_inheritable(page_obj, PDF_NAME('MediaBox')) | |
| 16221 ) | |
| 16222 if mupdf.fz_is_empty_rect(mediabox) or mupdf.fz_is_infinite_rect(mediabox): | |
| 16223 mediabox.x0 = 0 | |
| 16224 mediabox.y0 = 0 | |
| 16225 mediabox.x1 = 612 | |
| 16226 mediabox.y1 = 792 | |
| 16227 | |
| 16228 page_mediabox = mupdf.FzRect( | |
| 16229 mupdf.fz_min(mediabox.x0, mediabox.x1), | |
| 16230 mupdf.fz_min(mediabox.y0, mediabox.y1), | |
| 16231 mupdf.fz_max(mediabox.x0, mediabox.x1), | |
| 16232 mupdf.fz_max(mediabox.y0, mediabox.y1), | |
| 16233 ) | |
| 16234 | |
| 16235 if (page_mediabox.x1 - page_mediabox.x0 < 1 | |
| 16236 or page_mediabox.y1 - page_mediabox.y0 < 1 | |
| 16237 ): | |
| 16238 page_mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT) | |
| 16239 | |
| 16240 return page_mediabox | |
| 16241 | |
| 16242 | |
| 16243 def JM_merge_range( | |
| 16244 doc_des, | |
| 16245 doc_src, | |
| 16246 spage, | |
| 16247 epage, | |
| 16248 apage, | |
| 16249 rotate, | |
| 16250 links, | |
| 16251 annots, | |
| 16252 show_progress, | |
| 16253 graft_map, | |
| 16254 ): | |
| 16255 ''' | |
| 16256 Copy a range of pages (spage, epage) from a source PDF to a specified | |
| 16257 location (apage) of the target PDF. | |
| 16258 If spage > epage, the sequence of source pages is reversed. | |
| 16259 ''' | |
| 16260 if g_use_extra: | |
| 16261 return extra.JM_merge_range( | |
| 16262 doc_des, | |
| 16263 doc_src, | |
| 16264 spage, | |
| 16265 epage, | |
| 16266 apage, | |
| 16267 rotate, | |
| 16268 links, | |
| 16269 annots, | |
| 16270 show_progress, | |
| 16271 graft_map, | |
| 16272 ) | |
| 16273 afterpage = apage | |
| 16274 counter = 0 # copied pages counter | |
| 16275 total = mupdf.fz_absi(epage - spage) + 1 # total pages to copy | |
| 16276 | |
| 16277 if spage < epage: | |
| 16278 page = spage | |
| 16279 while page <= epage: | |
| 16280 page_merge(doc_des, doc_src, page, afterpage, rotate, links, annots, graft_map) | |
| 16281 counter += 1 | |
| 16282 if show_progress > 0 and counter % show_progress == 0: | |
| 16283 message(f"Inserted {counter} of {total} pages.") | |
| 16284 page += 1 | |
| 16285 afterpage += 1 | |
| 16286 else: | |
| 16287 page = spage | |
| 16288 while page >= epage: | |
| 16289 page_merge(doc_des, doc_src, page, afterpage, rotate, links, annots, graft_map) | |
| 16290 counter += 1 | |
| 16291 if show_progress > 0 and counter % show_progress == 0: | |
| 16292 message(f"Inserted {counter} of {total} pages.") | |
| 16293 page -= 1 | |
| 16294 afterpage += 1 | |
| 16295 | |
| 16296 | |
| 16297 def JM_merge_resources( page, temp_res): | |
| 16298 ''' | |
| 16299 Merge the /Resources object created by a text pdf device into the page. | |
| 16300 The device may have created multiple /ExtGState/Alp? and /Font/F? objects. | |
| 16301 These need to be renamed (renumbered) to not overwrite existing page | |
| 16302 objects from previous executions. | |
| 16303 Returns the next available numbers n, m for objects /Alp<n>, /F<m>. | |
| 16304 ''' | |
| 16305 # page objects /Resources, /Resources/ExtGState, /Resources/Font | |
| 16306 resources = mupdf.pdf_dict_get(page.obj(), PDF_NAME('Resources')) | |
| 16307 if not resources.m_internal: | |
| 16308 resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME('Resources'), 5) | |
| 16309 main_extg = mupdf.pdf_dict_get(resources, PDF_NAME('ExtGState')) | |
| 16310 main_fonts = mupdf.pdf_dict_get(resources, PDF_NAME('Font')) | |
| 16311 | |
| 16312 # text pdf device objects /ExtGState, /Font | |
| 16313 temp_extg = mupdf.pdf_dict_get(temp_res, PDF_NAME('ExtGState')) | |
| 16314 temp_fonts = mupdf.pdf_dict_get(temp_res, PDF_NAME('Font')) | |
| 16315 | |
| 16316 max_alp = -1 | |
| 16317 max_fonts = -1 | |
| 16318 | |
| 16319 # Handle /Alp objects | |
| 16320 if mupdf.pdf_is_dict(temp_extg): # any created at all? | |
| 16321 n = mupdf.pdf_dict_len(temp_extg) | |
| 16322 if mupdf.pdf_is_dict(main_extg): # does page have /ExtGState yet? | |
| 16323 for i in range(mupdf.pdf_dict_len(main_extg)): | |
| 16324 # get highest number of objects named /Alpxxx | |
| 16325 alp = mupdf.pdf_to_name( mupdf.pdf_dict_get_key(main_extg, i)) | |
| 16326 if not alp.startswith('Alp'): | |
| 16327 continue | |
| 16328 j = mupdf.fz_atoi(alp[3:]) | |
| 16329 if j > max_alp: | |
| 16330 max_alp = j | |
| 16331 else: # create a /ExtGState for the page | |
| 16332 main_extg = mupdf.pdf_dict_put_dict(resources, PDF_NAME('ExtGState'), n) | |
| 16333 | |
| 16334 max_alp += 1 | |
| 16335 for i in range(n): # copy over renumbered /Alp objects | |
| 16336 alp = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( temp_extg, i)) | |
| 16337 j = mupdf.fz_atoi(alp[3:]) + max_alp | |
| 16338 text = f'Alp{j}' | |
| 16339 val = mupdf.pdf_dict_get_val( temp_extg, i) | |
| 16340 mupdf.pdf_dict_puts(main_extg, text, val) | |
| 16341 | |
| 16342 if mupdf.pdf_is_dict(main_fonts): # has page any fonts yet? | |
| 16343 for i in range(mupdf.pdf_dict_len(main_fonts)): # get max font number | |
| 16344 font = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( main_fonts, i)) | |
| 16345 if not font.startswith("F"): | |
| 16346 continue | |
| 16347 j = mupdf.fz_atoi(font[1:]) | |
| 16348 if j > max_fonts: | |
| 16349 max_fonts = j | |
| 16350 else: # create a Resources/Font for the page | |
| 16351 main_fonts = mupdf.pdf_dict_put_dict(resources, PDF_NAME('Font'), 2) | |
| 16352 | |
| 16353 max_fonts += 1 | |
| 16354 for i in range(mupdf.pdf_dict_len(temp_fonts)): # copy renumbered fonts | |
| 16355 font = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( temp_fonts, i)) | |
| 16356 j = mupdf.fz_atoi(font[1:]) + max_fonts | |
| 16357 text = f'F{j}' | |
| 16358 val = mupdf.pdf_dict_get_val(temp_fonts, i) | |
| 16359 mupdf.pdf_dict_puts(main_fonts, text, val) | |
| 16360 return (max_alp, max_fonts) # next available numbers | |
| 16361 | |
| 16362 | |
| 16363 def JM_mupdf_warning( text): | |
| 16364 ''' | |
| 16365 redirect MuPDF warnings | |
| 16366 ''' | |
| 16367 JM_mupdf_warnings_store.append(text) | |
| 16368 if JM_mupdf_show_warnings: | |
| 16369 message(f'MuPDF warning: {text}') | |
| 16370 | |
| 16371 | |
| 16372 def JM_mupdf_error( text): | |
| 16373 JM_mupdf_warnings_store.append(text) | |
| 16374 if JM_mupdf_show_errors: | |
| 16375 message(f'MuPDF error: {text}\n') | |
| 16376 | |
| 16377 | |
| 16378 def JM_new_bbox_device(rc, inc_layers): | |
| 16379 assert isinstance(rc, list) | |
| 16380 return JM_new_bbox_device_Device( rc, inc_layers) | |
| 16381 | |
| 16382 | |
| 16383 def JM_new_buffer_from_stext_page(page): | |
| 16384 ''' | |
| 16385 make a buffer from an stext_page's text | |
| 16386 ''' | |
| 16387 assert isinstance(page, mupdf.FzStextPage) | |
| 16388 rect = mupdf.FzRect(page.m_internal.mediabox) | |
| 16389 buf = mupdf.fz_new_buffer(256) | |
| 16390 for block in page: | |
| 16391 if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT: | |
| 16392 for line in block: | |
| 16393 for ch in line: | |
| 16394 if (not JM_rects_overlap(rect, JM_char_bbox(line, ch)) | |
| 16395 and not mupdf.fz_is_infinite_rect(rect) | |
| 16396 ): | |
| 16397 continue | |
| 16398 mupdf.fz_append_rune(buf, ch.m_internal.c) | |
| 16399 mupdf.fz_append_byte(buf, ord('\n')) | |
| 16400 mupdf.fz_append_byte(buf, ord('\n')) | |
| 16401 return buf | |
| 16402 | |
| 16403 | |
| 16404 def JM_new_javascript(pdf, value): | |
| 16405 ''' | |
| 16406 make new PDF action object from JavaScript source | |
| 16407 Parameters are a PDF document and a Python string. | |
| 16408 Returns a PDF action object. | |
| 16409 ''' | |
| 16410 if value is None: | |
| 16411 # no argument given | |
| 16412 return | |
| 16413 data = JM_StrAsChar(value) | |
| 16414 if data is None: | |
| 16415 # not convertible to char* | |
| 16416 return | |
| 16417 | |
| 16418 res = mupdf.fz_new_buffer_from_copied_data(data.encode('utf8')) | |
| 16419 source = mupdf.pdf_add_stream(pdf, res, mupdf.PdfObj(), 0) | |
| 16420 newaction = mupdf.pdf_add_new_dict(pdf, 4) | |
| 16421 mupdf.pdf_dict_put(newaction, PDF_NAME('S'), mupdf.pdf_new_name('JavaScript')) | |
| 16422 mupdf.pdf_dict_put(newaction, PDF_NAME('JS'), source) | |
| 16423 return newaction | |
| 16424 | |
| 16425 | |
| 16426 def JM_new_output_fileptr(bio): | |
| 16427 return JM_new_output_fileptr_Output( bio) | |
| 16428 | |
| 16429 | |
| 16430 def JM_norm_rotation(rotate): | |
| 16431 ''' | |
| 16432 # return normalized /Rotate value:one of 0, 90, 180, 270 | |
| 16433 ''' | |
| 16434 while rotate < 0: | |
| 16435 rotate += 360 | |
| 16436 while rotate >= 360: | |
| 16437 rotate -= 360 | |
| 16438 if rotate % 90 != 0: | |
| 16439 return 0 | |
| 16440 return rotate | |
| 16441 | |
| 16442 | |
| 16443 def JM_object_to_buffer(what, compress, ascii): | |
| 16444 res = mupdf.fz_new_buffer(512) | |
| 16445 out = mupdf.FzOutput(res) | |
| 16446 mupdf.pdf_print_obj(out, what, compress, ascii) | |
| 16447 out.fz_close_output() | |
| 16448 mupdf.fz_terminate_buffer(res) | |
| 16449 return res | |
| 16450 | |
| 16451 | |
| 16452 def JM_outline_xrefs(obj, xrefs): | |
| 16453 ''' | |
| 16454 Return list of outline xref numbers. Recursive function. Arguments: | |
| 16455 'obj' first OL item | |
| 16456 'xrefs' empty Python list | |
| 16457 ''' | |
| 16458 if not obj.m_internal: | |
| 16459 return xrefs | |
| 16460 thisobj = obj | |
| 16461 while thisobj.m_internal: | |
| 16462 newxref = mupdf.pdf_to_num( thisobj) | |
| 16463 if newxref in xrefs or mupdf.pdf_dict_get( thisobj, PDF_NAME('Type')).m_internal: | |
| 16464 # circular ref or top of chain: terminate | |
| 16465 break | |
| 16466 xrefs.append( newxref) | |
| 16467 first = mupdf.pdf_dict_get( thisobj, PDF_NAME('First')) # try go down | |
| 16468 if mupdf.pdf_is_dict( first): | |
| 16469 xrefs = JM_outline_xrefs( first, xrefs) | |
| 16470 thisobj = mupdf.pdf_dict_get( thisobj, PDF_NAME('Next')) # try go next | |
| 16471 parent = mupdf.pdf_dict_get( thisobj, PDF_NAME('Parent')) # get parent | |
| 16472 if not mupdf.pdf_is_dict( thisobj): | |
| 16473 thisobj = parent | |
| 16474 return xrefs | |
| 16475 | |
| 16476 | |
| 16477 def JM_page_rotation(page): | |
| 16478 ''' | |
| 16479 return a PDF page's /Rotate value: one of (0, 90, 180, 270) | |
| 16480 ''' | |
| 16481 rotate = 0 | |
| 16482 | |
| 16483 obj = mupdf.pdf_dict_get_inheritable( page.obj(), mupdf.PDF_ENUM_NAME_Rotate) | |
| 16484 rotate = mupdf.pdf_to_int(obj) | |
| 16485 rotate = JM_norm_rotation(rotate) | |
| 16486 return rotate | |
| 16487 | |
| 16488 | |
| 16489 def JM_pdf_obj_from_str(doc, src): | |
| 16490 ''' | |
| 16491 create PDF object from given string (new in v1.14.0: MuPDF dropped it) | |
| 16492 ''' | |
| 16493 # fixme: seems inefficient to convert to bytes instance then make another | |
| 16494 # copy inside fz_new_buffer_from_copied_data(), but no other way? | |
| 16495 # | |
| 16496 buffer_ = mupdf.fz_new_buffer_from_copied_data(bytes(src, 'utf8')) | |
| 16497 stream = mupdf.fz_open_buffer(buffer_) | |
| 16498 lexbuf = mupdf.PdfLexbuf(mupdf.PDF_LEXBUF_SMALL) | |
| 16499 result = mupdf.pdf_parse_stm_obj(doc, stream, lexbuf) | |
| 16500 return result | |
| 16501 | |
| 16502 | |
| 16503 def JM_pixmap_from_display_list( | |
| 16504 list_, | |
| 16505 ctm, | |
| 16506 cs, | |
| 16507 alpha, | |
| 16508 clip, | |
| 16509 seps, | |
| 16510 ): | |
| 16511 ''' | |
| 16512 Version of fz_new_pixmap_from_display_list (util.c) to also support | |
| 16513 rendering of only the 'clip' part of the displaylist rectangle | |
| 16514 ''' | |
| 16515 assert isinstance(list_, mupdf.FzDisplayList) | |
| 16516 if seps is None: | |
| 16517 seps = mupdf.FzSeparations() | |
| 16518 assert seps is None or isinstance(seps, mupdf.FzSeparations), f'{type(seps)=}: {seps}' | |
| 16519 | |
| 16520 rect = mupdf.fz_bound_display_list(list_) | |
| 16521 matrix = JM_matrix_from_py(ctm) | |
| 16522 rclip = JM_rect_from_py(clip) | |
| 16523 rect = mupdf.fz_intersect_rect(rect, rclip) # no-op if clip is not given | |
| 16524 | |
| 16525 rect = mupdf.fz_transform_rect(rect, matrix) | |
| 16526 irect = mupdf.fz_round_rect(rect) | |
| 16527 | |
| 16528 assert isinstance( cs, mupdf.FzColorspace) | |
| 16529 | |
| 16530 pix = mupdf.fz_new_pixmap_with_bbox(cs, irect, seps, alpha) | |
| 16531 if alpha: | |
| 16532 mupdf.fz_clear_pixmap(pix) | |
| 16533 else: | |
| 16534 mupdf.fz_clear_pixmap_with_value(pix, 0xFF) | |
| 16535 | |
| 16536 if not mupdf.fz_is_infinite_rect(rclip): | |
| 16537 dev = mupdf.fz_new_draw_device_with_bbox(matrix, pix, irect) | |
| 16538 mupdf.fz_run_display_list(list_, dev, mupdf.FzMatrix(), rclip, mupdf.FzCookie()) | |
| 16539 else: | |
| 16540 dev = mupdf.fz_new_draw_device(matrix, pix) | |
| 16541 mupdf.fz_run_display_list(list_, dev, mupdf.FzMatrix(), mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE), mupdf.FzCookie()) | |
| 16542 | |
| 16543 mupdf.fz_close_device(dev) | |
| 16544 # Use special raw Pixmap constructor so we don't set alpha to true. | |
| 16545 return Pixmap( 'raw', pix) | |
| 16546 | |
| 16547 | |
| 16548 def JM_point_from_py(p): | |
| 16549 ''' | |
| 16550 PySequence to fz_point. Default: (FZ_MIN_INF_RECT, FZ_MIN_INF_RECT) | |
| 16551 ''' | |
| 16552 if isinstance(p, mupdf.FzPoint): | |
| 16553 return p | |
| 16554 if isinstance(p, Point): | |
| 16555 return mupdf.FzPoint(p.x, p.y) | |
| 16556 if g_use_extra: | |
| 16557 return extra.JM_point_from_py( p) | |
| 16558 | |
| 16559 p0 = mupdf.FzPoint(0, 0) | |
| 16560 x = JM_FLOAT_ITEM(p, 0) | |
| 16561 y = JM_FLOAT_ITEM(p, 1) | |
| 16562 if x is None or y is None: | |
| 16563 return p0 | |
| 16564 x = max( x, FZ_MIN_INF_RECT) | |
| 16565 y = max( y, FZ_MIN_INF_RECT) | |
| 16566 x = min( x, FZ_MAX_INF_RECT) | |
| 16567 y = min( y, FZ_MAX_INF_RECT) | |
| 16568 return mupdf.FzPoint(x, y) | |
| 16569 | |
| 16570 | |
| 16571 def JM_print_stext_page_as_text(res, page): | |
| 16572 ''' | |
| 16573 Plain text output. An identical copy of fz_print_stext_page_as_text, | |
| 16574 but lines within a block are concatenated by space instead a new-line | |
| 16575 character (which else leads to 2 new-lines). | |
| 16576 ''' | |
| 16577 if 1 and g_use_extra: | |
| 16578 return extra.JM_print_stext_page_as_text(res, page) | |
| 16579 | |
| 16580 assert isinstance(res, mupdf.FzBuffer) | |
| 16581 assert isinstance(page, mupdf.FzStextPage) | |
| 16582 rect = mupdf.FzRect(page.m_internal.mediabox) | |
| 16583 last_char = 0 | |
| 16584 | |
| 16585 n_blocks = 0 | |
| 16586 n_lines = 0 | |
| 16587 n_chars = 0 | |
| 16588 for n_blocks2, block in enumerate( page): | |
| 16589 if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT: | |
| 16590 for n_lines2, line in enumerate( block): | |
| 16591 for n_chars2, ch in enumerate( line): | |
| 16592 pass | |
| 16593 n_chars += n_chars2 | |
| 16594 n_lines += n_lines2 | |
| 16595 n_blocks += n_blocks2 | |
| 16596 | |
| 16597 for block in page: | |
| 16598 if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT: | |
| 16599 for line in block: | |
| 16600 last_char = 0 | |
| 16601 for ch in line: | |
| 16602 chbbox = JM_char_bbox(line, ch) | |
| 16603 if (mupdf.fz_is_infinite_rect(rect) | |
| 16604 or JM_rects_overlap(rect, chbbox) | |
| 16605 ): | |
| 16606 #raw += chr(ch.m_internal.c) | |
| 16607 last_char = ch.m_internal.c | |
| 16608 #log( '{=last_char!r utf!r}') | |
| 16609 JM_append_rune(res, last_char) | |
| 16610 if last_char != 10 and last_char > 0: | |
| 16611 mupdf.fz_append_string(res, "\n") | |
| 16612 | |
| 16613 | |
| 16614 def JM_put_script(annot_obj, key1, key2, value): | |
| 16615 ''' | |
| 16616 Create a JavaScript PDF action. | |
| 16617 Usable for all object types which support PDF actions, even if the | |
| 16618 argument name suggests annotations. Up to 2 key values can be specified, so | |
| 16619 JavaScript actions can be stored for '/A' and '/AA/?' keys. | |
| 16620 ''' | |
| 16621 key1_obj = mupdf.pdf_dict_get(annot_obj, key1) | |
| 16622 pdf = mupdf.pdf_get_bound_document(annot_obj) # owning PDF | |
| 16623 | |
| 16624 # if no new script given, just delete corresponding key | |
| 16625 if not value: | |
| 16626 if key2 is None or not key2.m_internal: | |
| 16627 mupdf.pdf_dict_del(annot_obj, key1) | |
| 16628 elif key1_obj.m_internal: | |
| 16629 mupdf.pdf_dict_del(key1_obj, key2) | |
| 16630 return | |
| 16631 | |
| 16632 # read any existing script as a PyUnicode string | |
| 16633 if not key2.m_internal or not key1_obj.m_internal: | |
| 16634 script = JM_get_script(key1_obj) | |
| 16635 else: | |
| 16636 script = JM_get_script(mupdf.pdf_dict_get(key1_obj, key2)) | |
| 16637 | |
| 16638 # replace old script, if different from new one | |
| 16639 if value != script: | |
| 16640 newaction = JM_new_javascript(pdf, value) | |
| 16641 if not key2.m_internal: | |
| 16642 mupdf.pdf_dict_put(annot_obj, key1, newaction) | |
| 16643 else: | |
| 16644 mupdf.pdf_dict_putl(annot_obj, newaction, key1, key2) | |
| 16645 | |
| 16646 | |
| 16647 def JM_py_from_irect(r): | |
| 16648 return r.x0, r.y0, r.x1, r.y1 | |
| 16649 | |
| 16650 | |
| 16651 def JM_py_from_matrix(m): | |
| 16652 return m.a, m.b, m.c, m.d, m.e, m.f | |
| 16653 | |
| 16654 | |
| 16655 def JM_py_from_point(p): | |
| 16656 return p.x, p.y | |
| 16657 | |
| 16658 | |
| 16659 def JM_py_from_quad(q): | |
| 16660 ''' | |
| 16661 PySequence from fz_quad. | |
| 16662 ''' | |
| 16663 return ( | |
| 16664 (q.ul.x, q.ul.y), | |
| 16665 (q.ur.x, q.ur.y), | |
| 16666 (q.ll.x, q.ll.y), | |
| 16667 (q.lr.x, q.lr.y), | |
| 16668 ) | |
| 16669 | |
| 16670 | |
| 16671 def JM_py_from_rect(r): | |
| 16672 return r.x0, r.y0, r.x1, r.y1 | |
| 16673 | |
| 16674 | |
| 16675 def JM_quad_from_py(r): | |
| 16676 if isinstance(r, mupdf.FzQuad): | |
| 16677 return r | |
| 16678 # cover all cases of 4-float-sequences | |
| 16679 if hasattr(r, "__getitem__") and len(r) == 4 and hasattr(r[0], "__float__"): | |
| 16680 r = mupdf.FzRect(*tuple(r)) | |
| 16681 if isinstance( r, mupdf.FzRect): | |
| 16682 return mupdf.fz_quad_from_rect( r) | |
| 16683 if isinstance( r, Quad): | |
| 16684 return mupdf.fz_make_quad( | |
| 16685 r.ul.x, r.ul.y, | |
| 16686 r.ur.x, r.ur.y, | |
| 16687 r.ll.x, r.ll.y, | |
| 16688 r.lr.x, r.lr.y, | |
| 16689 ) | |
| 16690 q = mupdf.fz_make_quad(0, 0, 0, 0, 0, 0, 0, 0) | |
| 16691 p = [0,0,0,0] | |
| 16692 if not r or not isinstance(r, (tuple, list)) or len(r) != 4: | |
| 16693 return q | |
| 16694 | |
| 16695 if JM_FLOAT_ITEM(r, 0) is None: | |
| 16696 return mupdf.fz_quad_from_rect(JM_rect_from_py(r)) | |
| 16697 | |
| 16698 for i in range(4): | |
| 16699 if i >= len(r): | |
| 16700 return q # invalid: cancel the rest | |
| 16701 obj = r[i] # next point item | |
| 16702 if not PySequence_Check(obj) or PySequence_Size(obj) != 2: | |
| 16703 return q # invalid: cancel the rest | |
| 16704 | |
| 16705 p[i].x = JM_FLOAT_ITEM(obj, 0) | |
| 16706 p[i].y = JM_FLOAT_ITEM(obj, 1) | |
| 16707 if p[i].x is None or p[i].y is None: | |
| 16708 return q | |
| 16709 p[i].x = max( p[i].x, FZ_MIN_INF_RECT) | |
| 16710 p[i].y = max( p[i].y, FZ_MIN_INF_RECT) | |
| 16711 p[i].x = min( p[i].x, FZ_MAX_INF_RECT) | |
| 16712 p[i].y = min( p[i].y, FZ_MAX_INF_RECT) | |
| 16713 q.ul = p[0] | |
| 16714 q.ur = p[1] | |
| 16715 q.ll = p[2] | |
| 16716 q.lr = p[3] | |
| 16717 return q | |
| 16718 | |
| 16719 | |
| 16720 def JM_read_contents(pageref): | |
| 16721 ''' | |
| 16722 Read and concatenate a PDF page's /Contents object(s) in a buffer | |
| 16723 ''' | |
| 16724 assert isinstance(pageref, mupdf.PdfObj), f'{type(pageref)}' | |
| 16725 contents = mupdf.pdf_dict_get(pageref, mupdf.PDF_ENUM_NAME_Contents) | |
| 16726 if mupdf.pdf_is_array(contents): | |
| 16727 res = mupdf.FzBuffer(1024) | |
| 16728 for i in range(mupdf.pdf_array_len(contents)): | |
| 16729 if i > 0: | |
| 16730 mupdf.fz_append_byte(res, 32) | |
| 16731 obj = mupdf.pdf_array_get(contents, i) | |
| 16732 if mupdf.pdf_is_stream(obj): | |
| 16733 nres = mupdf.pdf_load_stream(obj) | |
| 16734 mupdf.fz_append_buffer(res, nres) | |
| 16735 elif contents.m_internal: | |
| 16736 res = mupdf.pdf_load_stream(contents) | |
| 16737 else: | |
| 16738 res = mupdf.FzBuffer(0) | |
| 16739 return res | |
| 16740 | |
| 16741 | |
| 16742 def JM_rect_from_py(r): | |
| 16743 if isinstance(r, mupdf.FzRect): | |
| 16744 return r | |
| 16745 if isinstance(r, mupdf.FzIrect): | |
| 16746 return mupdf.FzRect(r) | |
| 16747 if isinstance(r, Rect): | |
| 16748 return mupdf.fz_make_rect(r.x0, r.y0, r.x1, r.y1) | |
| 16749 if isinstance(r, IRect): | |
| 16750 return mupdf.fz_make_rect(r.x0, r.y0, r.x1, r.y1) | |
| 16751 if not r or not PySequence_Check(r) or PySequence_Size(r) != 4: | |
| 16752 return mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE) | |
| 16753 f = [0, 0, 0, 0] | |
| 16754 for i in range(4): | |
| 16755 f[i] = JM_FLOAT_ITEM(r, i) | |
| 16756 if f[i] is None: | |
| 16757 return mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE) | |
| 16758 if f[i] < FZ_MIN_INF_RECT: | |
| 16759 f[i] = FZ_MIN_INF_RECT | |
| 16760 if f[i] > FZ_MAX_INF_RECT: | |
| 16761 f[i] = FZ_MAX_INF_RECT | |
| 16762 return mupdf.fz_make_rect(f[0], f[1], f[2], f[3]) | |
| 16763 | |
| 16764 | |
| 16765 def JM_rects_overlap(a, b): | |
| 16766 if (0 | |
| 16767 or a.x0 >= b.x1 | |
| 16768 or a.y0 >= b.y1 | |
| 16769 or a.x1 <= b.x0 | |
| 16770 or a.y1 <= b.y0 | |
| 16771 ): | |
| 16772 return 0 | |
| 16773 return 1 | |
| 16774 | |
| 16775 | |
| 16776 def JM_refresh_links( page): | |
| 16777 ''' | |
| 16778 refreshes the link and annotation tables of a page | |
| 16779 ''' | |
| 16780 if page is None or not page.m_internal: | |
| 16781 return | |
| 16782 obj = mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots')) | |
| 16783 if obj.m_internal: | |
| 16784 pdf = page.doc() | |
| 16785 number = mupdf.pdf_lookup_page_number( pdf, page.obj()) | |
| 16786 page_mediabox = mupdf.FzRect() | |
| 16787 page_ctm = mupdf.FzMatrix() | |
| 16788 mupdf.pdf_page_transform( page, page_mediabox, page_ctm) | |
| 16789 link = mupdf.pdf_load_link_annots( pdf, page, obj, number, page_ctm) | |
| 16790 page.m_internal.links = mupdf.ll_fz_keep_link( link.m_internal) | |
| 16791 | |
| 16792 | |
| 16793 def JM_rotate_page_matrix(page): | |
| 16794 ''' | |
| 16795 calculate page rotation matrices | |
| 16796 ''' | |
| 16797 if not page.m_internal: | |
| 16798 return mupdf.FzMatrix() # no valid pdf page given | |
| 16799 rotation = JM_page_rotation(page) | |
| 16800 #log( '{rotation=}') | |
| 16801 if rotation == 0: | |
| 16802 return mupdf.FzMatrix() # no rotation | |
| 16803 cb_size = JM_cropbox_size(page.obj()) | |
| 16804 w = cb_size.x | |
| 16805 h = cb_size.y | |
| 16806 #log( '{=h w}') | |
| 16807 if rotation == 90: | |
| 16808 m = mupdf.fz_make_matrix(0, 1, -1, 0, h, 0) | |
| 16809 elif rotation == 180: | |
| 16810 m = mupdf.fz_make_matrix(-1, 0, 0, -1, w, h) | |
| 16811 else: | |
| 16812 m = mupdf.fz_make_matrix(0, -1, 1, 0, 0, w) | |
| 16813 #log( 'returning {m=}') | |
| 16814 return m | |
| 16815 | |
| 16816 | |
| 16817 def JM_search_stext_page(page, needle): | |
| 16818 if g_use_extra: | |
| 16819 return extra.JM_search_stext_page(page.m_internal, needle) | |
| 16820 | |
| 16821 rect = mupdf.FzRect(page.m_internal.mediabox) | |
| 16822 if not needle: | |
| 16823 return | |
| 16824 quads = [] | |
| 16825 class Hits: | |
| 16826 def __str__(self): | |
| 16827 return f'Hits(len={self.len} quads={self.quads} hfuzz={self.hfuzz} vfuzz={self.vfuzz}' | |
| 16828 hits = Hits() | |
| 16829 hits.len = 0 | |
| 16830 hits.quads = quads | |
| 16831 hits.hfuzz = 0.2 # merge kerns but not large gaps | |
| 16832 hits.vfuzz = 0.1 | |
| 16833 | |
| 16834 buffer_ = JM_new_buffer_from_stext_page(page) | |
| 16835 haystack_string = mupdf.fz_string_from_buffer(buffer_) | |
| 16836 haystack = 0 | |
| 16837 begin, end = find_string(haystack_string[haystack:], needle) | |
| 16838 if begin is None: | |
| 16839 #goto no_more_matches; | |
| 16840 return quads | |
| 16841 | |
| 16842 begin += haystack | |
| 16843 end += haystack | |
| 16844 inside = 0 | |
| 16845 i = 0 | |
| 16846 for block in page: | |
| 16847 if block.m_internal.type != mupdf.FZ_STEXT_BLOCK_TEXT: | |
| 16848 continue | |
| 16849 for line in block: | |
| 16850 for ch in line: | |
| 16851 i += 1 | |
| 16852 if not mupdf.fz_is_infinite_rect(rect): | |
| 16853 r = JM_char_bbox(line, ch) | |
| 16854 if not JM_rects_overlap(rect, r): | |
| 16855 #goto next_char; | |
| 16856 continue | |
| 16857 while 1: | |
| 16858 #try_new_match: | |
| 16859 if not inside: | |
| 16860 if haystack >= begin: | |
| 16861 inside = 1 | |
| 16862 if inside: | |
| 16863 if haystack < end: | |
| 16864 on_highlight_char(hits, line, ch) | |
| 16865 break | |
| 16866 else: | |
| 16867 inside = 0 | |
| 16868 begin, end = find_string(haystack_string[haystack:], needle) | |
| 16869 if begin is None: | |
| 16870 #goto no_more_matches; | |
| 16871 return quads | |
| 16872 else: | |
| 16873 #goto try_new_match; | |
| 16874 begin += haystack | |
| 16875 end += haystack | |
| 16876 continue | |
| 16877 break | |
| 16878 haystack += 1 | |
| 16879 #next_char:; | |
| 16880 assert haystack_string[haystack] == '\n', \ | |
| 16881 f'{haystack=} {haystack_string[haystack]=}' | |
| 16882 haystack += 1 | |
| 16883 assert haystack_string[haystack] == '\n', \ | |
| 16884 f'{haystack=} {haystack_string[haystack]=}' | |
| 16885 haystack += 1 | |
| 16886 #no_more_matches:; | |
| 16887 return quads | |
| 16888 | |
| 16889 | |
| 16890 def JM_scan_resources(pdf, rsrc, liste, what, stream_xref, tracer): | |
| 16891 ''' | |
| 16892 Step through /Resources, looking up image, xobject or font information | |
| 16893 ''' | |
| 16894 if mupdf.pdf_mark_obj(rsrc): | |
| 16895 mupdf.fz_warn('Circular dependencies! Consider page cleaning.') | |
| 16896 return # Circular dependencies! | |
| 16897 try: | |
| 16898 xobj = mupdf.pdf_dict_get(rsrc, mupdf.PDF_ENUM_NAME_XObject) | |
| 16899 | |
| 16900 if what == 1: # lookup fonts | |
| 16901 font = mupdf.pdf_dict_get(rsrc, mupdf.PDF_ENUM_NAME_Font) | |
| 16902 JM_gather_fonts(pdf, font, liste, stream_xref) | |
| 16903 elif what == 2: # look up images | |
| 16904 JM_gather_images(pdf, xobj, liste, stream_xref) | |
| 16905 elif what == 3: # look up form xobjects | |
| 16906 JM_gather_forms(pdf, xobj, liste, stream_xref) | |
| 16907 else: # should never happen | |
| 16908 return | |
| 16909 | |
| 16910 # check if we need to recurse into Form XObjects | |
| 16911 n = mupdf.pdf_dict_len(xobj) | |
| 16912 for i in range(n): | |
| 16913 obj = mupdf.pdf_dict_get_val(xobj, i) | |
| 16914 if mupdf.pdf_is_stream(obj): | |
| 16915 sxref = mupdf.pdf_to_num(obj) | |
| 16916 else: | |
| 16917 sxref = 0 | |
| 16918 subrsrc = mupdf.pdf_dict_get(obj, mupdf.PDF_ENUM_NAME_Resources) | |
| 16919 if subrsrc.m_internal: | |
| 16920 sxref_t = sxref | |
| 16921 if sxref_t not in tracer: | |
| 16922 tracer.append(sxref_t) | |
| 16923 JM_scan_resources( pdf, subrsrc, liste, what, sxref, tracer) | |
| 16924 else: | |
| 16925 mupdf.fz_warn('Circular dependencies! Consider page cleaning.') | |
| 16926 return | |
| 16927 finally: | |
| 16928 mupdf.pdf_unmark_obj(rsrc) | |
| 16929 | |
| 16930 | |
| 16931 def JM_set_choice_options(annot, liste): | |
| 16932 ''' | |
| 16933 set ListBox / ComboBox values | |
| 16934 ''' | |
| 16935 if not liste: | |
| 16936 return | |
| 16937 assert isinstance( liste, (tuple, list)) | |
| 16938 n = len( liste) | |
| 16939 if n == 0: | |
| 16940 return | |
| 16941 annot_obj = mupdf.pdf_annot_obj( annot) | |
| 16942 pdf = mupdf.pdf_get_bound_document( annot_obj) | |
| 16943 optarr = mupdf.pdf_new_array( pdf, n) | |
| 16944 for i in range(n): | |
| 16945 val = liste[i] | |
| 16946 opt = val | |
| 16947 if isinstance(opt, str): | |
| 16948 mupdf.pdf_array_push_text_string( optarr, opt) | |
| 16949 else: | |
| 16950 assert isinstance( val, (tuple, list)) and len( val) == 2, 'bad choice field list' | |
| 16951 opt1, opt2 = val | |
| 16952 assert opt1 and opt2, 'bad choice field list' | |
| 16953 optarrsub = mupdf.pdf_array_push_array( optarr, 2) | |
| 16954 mupdf.pdf_array_push_text_string( optarrsub, opt1) | |
| 16955 mupdf.pdf_array_push_text_string( optarrsub, opt2) | |
| 16956 mupdf.pdf_dict_put( annot_obj, PDF_NAME('Opt'), optarr) | |
| 16957 | |
| 16958 | |
| 16959 def JM_set_field_type(doc, obj, type): | |
| 16960 ''' | |
| 16961 Set the field type | |
| 16962 ''' | |
| 16963 setbits = 0 | |
| 16964 clearbits = 0 | |
| 16965 typename = None | |
| 16966 if type == mupdf.PDF_WIDGET_TYPE_BUTTON: | |
| 16967 typename = PDF_NAME('Btn') | |
| 16968 setbits = mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON | |
| 16969 elif type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON: | |
| 16970 typename = PDF_NAME('Btn') | |
| 16971 clearbits = mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON | |
| 16972 setbits = mupdf.PDF_BTN_FIELD_IS_RADIO | |
| 16973 elif type == mupdf.PDF_WIDGET_TYPE_CHECKBOX: | |
| 16974 typename = PDF_NAME('Btn') | |
| 16975 clearbits = (mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON | mupdf.PDF_BTN_FIELD_IS_RADIO) | |
| 16976 elif type == mupdf.PDF_WIDGET_TYPE_TEXT: | |
| 16977 typename = PDF_NAME('Tx') | |
| 16978 elif type == mupdf.PDF_WIDGET_TYPE_LISTBOX: | |
| 16979 typename = PDF_NAME('Ch') | |
| 16980 clearbits = mupdf.PDF_CH_FIELD_IS_COMBO | |
| 16981 elif type == mupdf.PDF_WIDGET_TYPE_COMBOBOX: | |
| 16982 typename = PDF_NAME('Ch') | |
| 16983 setbits = mupdf.PDF_CH_FIELD_IS_COMBO | |
| 16984 elif type == mupdf.PDF_WIDGET_TYPE_SIGNATURE: | |
| 16985 typename = PDF_NAME('Sig') | |
| 16986 | |
| 16987 if typename is not None and typename.m_internal: | |
| 16988 mupdf.pdf_dict_put(obj, PDF_NAME('FT'), typename) | |
| 16989 | |
| 16990 if setbits != 0 or clearbits != 0: | |
| 16991 bits = mupdf.pdf_dict_get_int(obj, PDF_NAME('Ff')) | |
| 16992 bits &= ~clearbits | |
| 16993 bits |= setbits | |
| 16994 mupdf.pdf_dict_put_int(obj, PDF_NAME('Ff'), bits) | |
| 16995 | |
| 16996 | |
| 16997 def JM_set_object_value(obj, key, value): | |
| 16998 ''' | |
| 16999 Set a PDF dict key to some value | |
| 17000 ''' | |
| 17001 eyecatcher = "fitz: replace me!" | |
| 17002 pdf = mupdf.pdf_get_bound_document(obj) | |
| 17003 # split PDF key at path seps and take last key part | |
| 17004 list_ = key.split('/') | |
| 17005 len_ = len(list_) | |
| 17006 i = len_ - 1 | |
| 17007 skey = list_[i] | |
| 17008 | |
| 17009 del list_[i] # del the last sub-key | |
| 17010 len_ = len(list_) # remaining length | |
| 17011 testkey = mupdf.pdf_dict_getp(obj, key) # check if key already exists | |
| 17012 if not testkey.m_internal: | |
| 17013 #No, it will be created here. But we cannot allow this happening if | |
| 17014 #indirect objects are referenced. So we check all higher level | |
| 17015 #sub-paths for indirect references. | |
| 17016 while len_ > 0: | |
| 17017 t = '/'.join(list_) # next high level | |
| 17018 if mupdf.pdf_is_indirect(mupdf.pdf_dict_getp(obj, JM_StrAsChar(t))): | |
| 17019 raise Exception("path to '%s' has indirects", JM_StrAsChar(skey)) | |
| 17020 del list_[len_ - 1] # del last sub-key | |
| 17021 len_ = len(list_) # remaining length | |
| 17022 # Insert our eyecatcher. Will create all sub-paths in the chain, or | |
| 17023 # respectively remove old value of key-path. | |
| 17024 mupdf.pdf_dict_putp(obj, key, mupdf.pdf_new_text_string(eyecatcher)) | |
| 17025 testkey = mupdf.pdf_dict_getp(obj, key) | |
| 17026 if not mupdf.pdf_is_string(testkey): | |
| 17027 raise Exception("cannot insert value for '%s'", key) | |
| 17028 temp = mupdf.pdf_to_text_string(testkey) | |
| 17029 if temp != eyecatcher: | |
| 17030 raise Exception("cannot insert value for '%s'", key) | |
| 17031 # read the result as a string | |
| 17032 res = JM_object_to_buffer(obj, 1, 0) | |
| 17033 objstr = JM_EscapeStrFromBuffer(res) | |
| 17034 | |
| 17035 # replace 'eyecatcher' by desired 'value' | |
| 17036 nullval = "/%s(%s)" % ( skey, eyecatcher) | |
| 17037 newval = "/%s %s" % (skey, value) | |
| 17038 newstr = objstr.replace(nullval, newval, 1) | |
| 17039 | |
| 17040 # make PDF object from resulting string | |
| 17041 new_obj = JM_pdf_obj_from_str(pdf, newstr) | |
| 17042 return new_obj | |
| 17043 | |
| 17044 | |
| 17045 def JM_set_ocg_arrays(conf, basestate, on, off, rbgroups, locked): | |
| 17046 if basestate: | |
| 17047 mupdf.pdf_dict_put_name( conf, PDF_NAME('BaseState'), basestate) | |
| 17048 | |
| 17049 if on is not None: | |
| 17050 mupdf.pdf_dict_del( conf, PDF_NAME('ON')) | |
| 17051 if on: | |
| 17052 arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('ON'), 1) | |
| 17053 JM_set_ocg_arrays_imp( arr, on) | |
| 17054 if off is not None: | |
| 17055 mupdf.pdf_dict_del( conf, PDF_NAME('OFF')) | |
| 17056 if off: | |
| 17057 arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('OFF'), 1) | |
| 17058 JM_set_ocg_arrays_imp( arr, off) | |
| 17059 if locked is not None: | |
| 17060 mupdf.pdf_dict_del( conf, PDF_NAME('Locked')) | |
| 17061 if locked: | |
| 17062 arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('Locked'), 1) | |
| 17063 JM_set_ocg_arrays_imp( arr, locked) | |
| 17064 if rbgroups is not None: | |
| 17065 mupdf.pdf_dict_del( conf, PDF_NAME('RBGroups')) | |
| 17066 if rbgroups: | |
| 17067 arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('RBGroups'), 1) | |
| 17068 n =len(rbgroups) | |
| 17069 for i in range(n): | |
| 17070 item0 = rbgroups[i] | |
| 17071 obj = mupdf.pdf_array_push_array( arr, 1) | |
| 17072 JM_set_ocg_arrays_imp( obj, item0) | |
| 17073 | |
| 17074 | |
| 17075 def JM_set_ocg_arrays_imp(arr, list_): | |
| 17076 ''' | |
| 17077 Set OCG arrays from dict of Python lists | |
| 17078 Works with dict like {"basestate":name, "on":list, "off":list, "rbg":list} | |
| 17079 ''' | |
| 17080 pdf = mupdf.pdf_get_bound_document(arr) | |
| 17081 for xref in list_: | |
| 17082 obj = mupdf.pdf_new_indirect(pdf, xref, 0) | |
| 17083 mupdf.pdf_array_push(arr, obj) | |
| 17084 | |
| 17085 | |
| 17086 def JM_set_resource_property(ref, name, xref): | |
| 17087 ''' | |
| 17088 Insert an item into Resources/Properties (used for Marked Content) | |
| 17089 Arguments: | |
| 17090 (1) e.g. page object, Form XObject | |
| 17091 (2) marked content name | |
| 17092 (3) xref of the referenced object (insert as indirect reference) | |
| 17093 ''' | |
| 17094 pdf = mupdf.pdf_get_bound_document(ref) | |
| 17095 ind = mupdf.pdf_new_indirect(pdf, xref, 0) | |
| 17096 if not ind.m_internal: | |
| 17097 RAISEPY(MSG_BAD_XREF, PyExc_ValueError) | |
| 17098 resources = mupdf.pdf_dict_get(ref, PDF_NAME('Resources')) | |
| 17099 if not resources.m_internal: | |
| 17100 resources = mupdf.pdf_dict_put_dict(ref, PDF_NAME('Resources'), 1) | |
| 17101 properties = mupdf.pdf_dict_get(resources, PDF_NAME('Properties')) | |
| 17102 if not properties.m_internal: | |
| 17103 properties = mupdf.pdf_dict_put_dict(resources, PDF_NAME('Properties'), 1) | |
| 17104 mupdf.pdf_dict_put(properties, mupdf.pdf_new_name(name), ind) | |
| 17105 | |
| 17106 | |
| 17107 def JM_set_widget_properties(annot, Widget): | |
| 17108 ''' | |
| 17109 Update the PDF form field with the properties from a Python Widget object. | |
| 17110 Called by "Page.add_widget" and "Annot.update_widget". | |
| 17111 ''' | |
| 17112 if isinstance( annot, Annot): | |
| 17113 annot = annot.this | |
| 17114 assert isinstance( annot, mupdf.PdfAnnot), f'{type(annot)=} {type=}' | |
| 17115 page = _pdf_annot_page(annot) | |
| 17116 assert page.m_internal, 'Annot is not bound to a page' | |
| 17117 annot_obj = mupdf.pdf_annot_obj(annot) | |
| 17118 pdf = page.doc() | |
| 17119 def GETATTR(name): | |
| 17120 return getattr(Widget, name, None) | |
| 17121 | |
| 17122 value = GETATTR("field_type") | |
| 17123 field_type = value | |
| 17124 | |
| 17125 # rectangle -------------------------------------------------------------- | |
| 17126 value = GETATTR("rect") | |
| 17127 rect = JM_rect_from_py(value) | |
| 17128 rot_mat = JM_rotate_page_matrix(page) | |
| 17129 rect = mupdf.fz_transform_rect(rect, rot_mat) | |
| 17130 mupdf.pdf_set_annot_rect(annot, rect) | |
| 17131 | |
| 17132 # fill color ------------------------------------------------------------- | |
| 17133 value = GETATTR("fill_color") | |
| 17134 if value and PySequence_Check(value): | |
| 17135 n = len(value) | |
| 17136 fill_col = mupdf.pdf_new_array(pdf, n) | |
| 17137 col = 0 | |
| 17138 for i in range(n): | |
| 17139 col = value[i] | |
| 17140 mupdf.pdf_array_push_real(fill_col, col) | |
| 17141 mupdf.pdf_field_set_fill_color(annot_obj, fill_col) | |
| 17142 | |
| 17143 # dashes ----------------------------------------------------------------- | |
| 17144 value = GETATTR("border_dashes") | |
| 17145 if value and PySequence_Check(value): | |
| 17146 n = len(value) | |
| 17147 dashes = mupdf.pdf_new_array(pdf, n) | |
| 17148 for i in range(n): | |
| 17149 mupdf.pdf_array_push_int(dashes, value[i]) | |
| 17150 mupdf.pdf_dict_putl(annot_obj, dashes, PDF_NAME('BS'), PDF_NAME('D')) | |
| 17151 | |
| 17152 # border color ----------------------------------------------------------- | |
| 17153 value = GETATTR("border_color") | |
| 17154 if value and PySequence_Check(value): | |
| 17155 n = len(value) | |
| 17156 border_col = mupdf.pdf_new_array(pdf, n) | |
| 17157 col = 0 | |
| 17158 for i in range(n): | |
| 17159 col = value[i] | |
| 17160 mupdf.pdf_array_push_real(border_col, col) | |
| 17161 mupdf.pdf_dict_putl(annot_obj, border_col, PDF_NAME('MK'), PDF_NAME('BC')) | |
| 17162 | |
| 17163 # entry ignored - may be used later | |
| 17164 # | |
| 17165 #int text_format = (int) PyInt_AsLong(GETATTR("text_format")); | |
| 17166 # | |
| 17167 | |
| 17168 # field label ----------------------------------------------------------- | |
| 17169 value = GETATTR("field_label") | |
| 17170 if value is not None: | |
| 17171 label = JM_StrAsChar(value) | |
| 17172 mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('TU'), label) | |
| 17173 | |
| 17174 # field name ------------------------------------------------------------- | |
| 17175 value = GETATTR("field_name") | |
| 17176 if value is not None: | |
| 17177 name = JM_StrAsChar(value) | |
| 17178 old_name = mupdf.pdf_load_field_name(annot_obj) | |
| 17179 if name != old_name: | |
| 17180 mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('T'), name) | |
| 17181 | |
| 17182 # max text len ----------------------------------------------------------- | |
| 17183 if field_type == mupdf.PDF_WIDGET_TYPE_TEXT: | |
| 17184 value = GETATTR("text_maxlen") | |
| 17185 text_maxlen = value | |
| 17186 if text_maxlen: | |
| 17187 mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('MaxLen'), text_maxlen) | |
| 17188 value = GETATTR("field_display") | |
| 17189 d = value | |
| 17190 mupdf.pdf_field_set_display(annot_obj, d) | |
| 17191 | |
| 17192 # choice values ---------------------------------------------------------- | |
| 17193 if field_type in (mupdf.PDF_WIDGET_TYPE_LISTBOX, mupdf.PDF_WIDGET_TYPE_COMBOBOX): | |
| 17194 value = GETATTR("choice_values") | |
| 17195 JM_set_choice_options(annot, value) | |
| 17196 | |
| 17197 # border style ----------------------------------------------------------- | |
| 17198 value = GETATTR("border_style") | |
| 17199 val = JM_get_border_style(value) | |
| 17200 mupdf.pdf_dict_putl(annot_obj, val, PDF_NAME('BS'), PDF_NAME('S')) | |
| 17201 | |
| 17202 # border width ----------------------------------------------------------- | |
| 17203 value = GETATTR("border_width") | |
| 17204 border_width = value | |
| 17205 mupdf.pdf_dict_putl( | |
| 17206 annot_obj, | |
| 17207 mupdf.pdf_new_real(border_width), | |
| 17208 PDF_NAME('BS'), | |
| 17209 PDF_NAME('W'), | |
| 17210 ) | |
| 17211 | |
| 17212 # /DA string ------------------------------------------------------------- | |
| 17213 value = GETATTR("_text_da") | |
| 17214 da = JM_StrAsChar(value) | |
| 17215 mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('DA'), da) | |
| 17216 mupdf.pdf_dict_del(annot_obj, PDF_NAME('DS')) # not supported by MuPDF | |
| 17217 mupdf.pdf_dict_del(annot_obj, PDF_NAME('RC')) # not supported by MuPDF | |
| 17218 | |
| 17219 # field flags ------------------------------------------------------------ | |
| 17220 field_flags = GETATTR("field_flags") | |
| 17221 if field_flags is not None: | |
| 17222 if field_type == mupdf.PDF_WIDGET_TYPE_COMBOBOX: | |
| 17223 field_flags |= mupdf.PDF_CH_FIELD_IS_COMBO | |
| 17224 elif field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON: | |
| 17225 field_flags |= mupdf.PDF_BTN_FIELD_IS_RADIO | |
| 17226 elif field_type == mupdf.PDF_WIDGET_TYPE_BUTTON: | |
| 17227 field_flags |= mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON | |
| 17228 mupdf.pdf_dict_put_int( annot_obj, PDF_NAME('Ff'), field_flags) | |
| 17229 | |
| 17230 # button caption --------------------------------------------------------- | |
| 17231 value = GETATTR("button_caption") | |
| 17232 ca = JM_StrAsChar(value) | |
| 17233 if ca: | |
| 17234 mupdf.pdf_field_set_button_caption(annot_obj, ca) | |
| 17235 | |
| 17236 # script (/A) ------------------------------------------------------- | |
| 17237 value = GETATTR("script") | |
| 17238 JM_put_script(annot_obj, PDF_NAME('A'), mupdf.PdfObj(), value) | |
| 17239 | |
| 17240 # script (/AA/K) ------------------------------------------------------- | |
| 17241 value = GETATTR("script_stroke") | |
| 17242 JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('K'), value) | |
| 17243 | |
| 17244 # script (/AA/F) ------------------------------------------------------- | |
| 17245 value = GETATTR("script_format") | |
| 17246 JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('F'), value) | |
| 17247 | |
| 17248 # script (/AA/V) ------------------------------------------------------- | |
| 17249 value = GETATTR("script_change") | |
| 17250 JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('V'), value) | |
| 17251 | |
| 17252 # script (/AA/C) ------------------------------------------------------- | |
| 17253 value = GETATTR("script_calc") | |
| 17254 JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('C'), value) | |
| 17255 | |
| 17256 # script (/AA/Bl) ------------------------------------------------------- | |
| 17257 value = GETATTR("script_blur") | |
| 17258 JM_put_script(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Bl'), value) | |
| 17259 | |
| 17260 # script (/AA/Fo) codespell:ignore -------------------------------------- | |
| 17261 value = GETATTR("script_focus") | |
| 17262 JM_put_script(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Fo'), value) | |
| 17263 | |
| 17264 # field value ------------------------------------------------------------ | |
| 17265 value = GETATTR("field_value") # field value | |
| 17266 text = JM_StrAsChar(value) # convert to text (may fail!) | |
| 17267 if field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON: | |
| 17268 if not value: | |
| 17269 mupdf.pdf_set_field_value(pdf, annot_obj, "Off", 1) | |
| 17270 mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), "Off") | |
| 17271 else: | |
| 17272 # TODO check if another button in the group is ON and if so set it Off | |
| 17273 onstate = mupdf.pdf_button_field_on_state(annot_obj) | |
| 17274 if onstate.m_internal: | |
| 17275 on = mupdf.pdf_to_name(onstate) | |
| 17276 mupdf.pdf_set_field_value(pdf, annot_obj, on, 1) | |
| 17277 mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), on) | |
| 17278 elif text: | |
| 17279 mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), text) | |
| 17280 elif field_type == mupdf.PDF_WIDGET_TYPE_CHECKBOX: | |
| 17281 onstate = mupdf.pdf_button_field_on_state(annot_obj) | |
| 17282 on = onstate.pdf_to_name() | |
| 17283 if value in (True, on) or text == 'Yes': | |
| 17284 mupdf.pdf_set_field_value(pdf, annot_obj, on, 1) | |
| 17285 mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), on) | |
| 17286 mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('V'), on) | |
| 17287 else: | |
| 17288 mupdf.pdf_dict_put_name( annot_obj, PDF_NAME('AS'), 'Off') | |
| 17289 mupdf.pdf_dict_put_name( annot_obj, PDF_NAME('V'), 'Off') | |
| 17290 else: | |
| 17291 if text: | |
| 17292 mupdf.pdf_set_field_value(pdf, annot_obj, text, 1) | |
| 17293 if field_type in (mupdf.PDF_WIDGET_TYPE_COMBOBOX, mupdf.PDF_WIDGET_TYPE_LISTBOX): | |
| 17294 mupdf.pdf_dict_del(annot_obj, PDF_NAME('I')) | |
| 17295 mupdf.pdf_dirty_annot(annot) | |
| 17296 mupdf.pdf_set_annot_hot(annot, 1) | |
| 17297 mupdf.pdf_set_annot_active(annot, 1) | |
| 17298 mupdf.pdf_update_annot(annot) | |
| 17299 | |
| 17300 | |
| 17301 def JM_show_string_cs( | |
| 17302 text, | |
| 17303 user_font, | |
| 17304 trm, | |
| 17305 s, | |
| 17306 wmode, | |
| 17307 bidi_level, | |
| 17308 markup_dir, | |
| 17309 language, | |
| 17310 ): | |
| 17311 i = 0 | |
| 17312 while i < len(s): | |
| 17313 l, ucs = mupdf.fz_chartorune(s[i:]) | |
| 17314 i += l | |
| 17315 gid = mupdf.fz_encode_character_sc(user_font, ucs) | |
| 17316 if gid == 0: | |
| 17317 gid, font = mupdf.fz_encode_character_with_fallback(user_font, ucs, 0, language) | |
| 17318 else: | |
| 17319 font = user_font | |
| 17320 mupdf.fz_show_glyph(text, font, trm, gid, ucs, wmode, bidi_level, markup_dir, language) | |
| 17321 adv = mupdf.fz_advance_glyph(font, gid, wmode) | |
| 17322 if wmode == 0: | |
| 17323 trm = mupdf.fz_pre_translate(trm, adv, 0) | |
| 17324 else: | |
| 17325 trm = mupdf.fz_pre_translate(trm, 0, -adv) | |
| 17326 return trm | |
| 17327 | |
| 17328 | |
| 17329 def JM_UnicodeFromBuffer(buff): | |
| 17330 buff_bytes = mupdf.fz_buffer_extract_copy(buff) | |
| 17331 val = buff_bytes.decode(errors='replace') | |
| 17332 z = val.find(chr(0)) | |
| 17333 if z >= 0: | |
| 17334 val = val[:z] | |
| 17335 return val | |
| 17336 | |
| 17337 | |
| 17338 def message_warning(text): | |
| 17339 ''' | |
| 17340 Generate a warning. | |
| 17341 ''' | |
| 17342 message(f'warning: {text}') | |
| 17343 | |
| 17344 | |
| 17345 def JM_update_stream(doc, obj, buffer_, compress): | |
| 17346 ''' | |
| 17347 update a stream object | |
| 17348 compress stream when beneficial | |
| 17349 ''' | |
| 17350 if compress: | |
| 17351 length, _ = mupdf.fz_buffer_storage(buffer_) | |
| 17352 if length > 30: # ignore small stuff | |
| 17353 buffer_compressed = JM_compress_buffer(buffer_) | |
| 17354 assert isinstance(buffer_compressed, mupdf.FzBuffer) | |
| 17355 if buffer_compressed.m_internal: | |
| 17356 length_compressed, _ = mupdf.fz_buffer_storage(buffer_compressed) | |
| 17357 if length_compressed < length: # was it worth the effort? | |
| 17358 mupdf.pdf_dict_put( | |
| 17359 obj, | |
| 17360 mupdf.PDF_ENUM_NAME_Filter, | |
| 17361 mupdf.PDF_ENUM_NAME_FlateDecode, | |
| 17362 ) | |
| 17363 mupdf.pdf_update_stream(doc, obj, buffer_compressed, 1) | |
| 17364 return | |
| 17365 | |
| 17366 mupdf.pdf_update_stream(doc, obj, buffer_, 0) | |
| 17367 | |
| 17368 | |
| 17369 def JM_xobject_from_page(pdfout, fsrcpage, xref, gmap): | |
| 17370 ''' | |
| 17371 Make an XObject from a PDF page | |
| 17372 For a positive xref assume that its object can be used instead | |
| 17373 ''' | |
| 17374 assert isinstance(gmap, mupdf.PdfGraftMap), f'{type(gmap)=}' | |
| 17375 if xref > 0: | |
| 17376 xobj1 = mupdf.pdf_new_indirect(pdfout, xref, 0) | |
| 17377 else: | |
| 17378 srcpage = _as_pdf_page(fsrcpage.this) | |
| 17379 spageref = srcpage.obj() | |
| 17380 mediabox = mupdf.pdf_to_rect(mupdf.pdf_dict_get_inheritable(spageref, PDF_NAME('MediaBox'))) | |
| 17381 # Deep-copy resources object of source page | |
| 17382 o = mupdf.pdf_dict_get_inheritable(spageref, PDF_NAME('Resources')) | |
| 17383 if gmap.m_internal: | |
| 17384 # use graftmap when possible | |
| 17385 resources = mupdf.pdf_graft_mapped_object(gmap, o) | |
| 17386 else: | |
| 17387 resources = mupdf.pdf_graft_object(pdfout, o) | |
| 17388 | |
| 17389 # get spgage contents source | |
| 17390 res = JM_read_contents(spageref) | |
| 17391 | |
| 17392 #------------------------------------------------------------- | |
| 17393 # create XObject representing the source page | |
| 17394 #------------------------------------------------------------- | |
| 17395 xobj1 = mupdf.pdf_new_xobject(pdfout, mediabox, mupdf.FzMatrix(), mupdf.PdfObj(0), res) | |
| 17396 # store spage contents | |
| 17397 JM_update_stream(pdfout, xobj1, res, 1) | |
| 17398 | |
| 17399 # store spage resources | |
| 17400 mupdf.pdf_dict_put(xobj1, PDF_NAME('Resources'), resources) | |
| 17401 return xobj1 | |
| 17402 | |
| 17403 | |
| 17404 def PySequence_Check(s): | |
| 17405 return isinstance(s, (tuple, list)) | |
| 17406 | |
| 17407 | |
| 17408 def PySequence_Size(s): | |
| 17409 return len(s) | |
| 17410 | |
| 17411 | |
| 17412 # constants: error messages. These are also in extra.i. | |
| 17413 # | |
| 17414 MSG_BAD_ANNOT_TYPE = "bad annot type" | |
| 17415 MSG_BAD_APN = "bad or missing annot AP/N" | |
| 17416 MSG_BAD_ARG_INK_ANNOT = "arg must be seq of seq of float pairs" | |
| 17417 MSG_BAD_ARG_POINTS = "bad seq of points" | |
| 17418 MSG_BAD_BUFFER = "bad type: 'buffer'" | |
| 17419 MSG_BAD_COLOR_SEQ = "bad color sequence" | |
| 17420 MSG_BAD_DOCUMENT = "cannot open broken document" | |
| 17421 MSG_BAD_FILETYPE = "bad filetype" | |
| 17422 MSG_BAD_LOCATION = "bad location" | |
| 17423 MSG_BAD_OC_CONFIG = "bad config number" | |
| 17424 MSG_BAD_OC_LAYER = "bad layer number" | |
| 17425 MSG_BAD_OC_REF = "bad 'oc' reference" | |
| 17426 MSG_BAD_PAGEID = "bad page id" | |
| 17427 MSG_BAD_PAGENO = "bad page number(s)" | |
| 17428 MSG_BAD_PDFROOT = "PDF has no root" | |
| 17429 MSG_BAD_RECT = "rect is infinite or empty" | |
| 17430 MSG_BAD_TEXT = "bad type: 'text'" | |
| 17431 MSG_BAD_XREF = "bad xref" | |
| 17432 MSG_COLOR_COUNT_FAILED = "color count failed" | |
| 17433 MSG_FILE_OR_BUFFER = "need font file or buffer" | |
| 17434 MSG_FONT_FAILED = "cannot create font" | |
| 17435 MSG_IS_NO_ANNOT = "is no annotation" | |
| 17436 MSG_IS_NO_IMAGE = "is no image" | |
| 17437 MSG_IS_NO_PDF = "is no PDF" | |
| 17438 MSG_IS_NO_DICT = "object is no PDF dict" | |
| 17439 MSG_PIX_NOALPHA = "source pixmap has no alpha" | |
| 17440 MSG_PIXEL_OUTSIDE = "pixel(s) outside image" | |
| 17441 | |
| 17442 | |
| 17443 JM_Exc_FileDataError = 'FileDataError' | |
| 17444 PyExc_ValueError = 'ValueError' | |
| 17445 | |
| 17446 def RAISEPY( msg, exc): | |
| 17447 #JM_Exc_CurrentException=exc | |
| 17448 #fz_throw(context, FZ_ERROR_GENERIC, msg) | |
| 17449 raise Exception( msg) | |
| 17450 | |
| 17451 | |
| 17452 def PyUnicode_DecodeRawUnicodeEscape(s, errors='strict'): | |
| 17453 # FIXED: handle raw unicode escape sequences | |
| 17454 if not s: | |
| 17455 return "" | |
| 17456 if isinstance(s, str): | |
| 17457 rc = s.encode("utf8", errors=errors) | |
| 17458 elif isinstance(s, bytes): | |
| 17459 rc = s[:] | |
| 17460 ret = rc.decode('raw_unicode_escape', errors=errors) | |
| 17461 return ret | |
| 17462 | |
| 17463 | |
| 17464 def CheckColor(c: OptSeq): | |
| 17465 if c: | |
| 17466 if ( | |
| 17467 type(c) not in (list, tuple) | |
| 17468 or len(c) not in (1, 3, 4) | |
| 17469 or min(c) < 0 | |
| 17470 or max(c) > 1 | |
| 17471 ): | |
| 17472 raise ValueError("need 1, 3 or 4 color components in range 0 to 1") | |
| 17473 | |
| 17474 | |
| 17475 def CheckFont(page: Page, fontname: str) -> tuple: | |
| 17476 """Return an entry in the page's font list if reference name matches. | |
| 17477 """ | |
| 17478 for f in page.get_fonts(): | |
| 17479 if f[4] == fontname: | |
| 17480 return f | |
| 17481 | |
| 17482 | |
| 17483 def CheckFontInfo(doc: Document, xref: int) -> list: | |
| 17484 """Return a font info if present in the document. | |
| 17485 """ | |
| 17486 for f in doc.FontInfos: | |
| 17487 if xref == f[0]: | |
| 17488 return f | |
| 17489 | |
| 17490 | |
| 17491 def CheckMarkerArg(quads: typing.Any) -> tuple: | |
| 17492 if CheckRect(quads): | |
| 17493 r = Rect(quads) | |
| 17494 return (r.quad,) | |
| 17495 if CheckQuad(quads): | |
| 17496 return (quads,) | |
| 17497 for q in quads: | |
| 17498 if not (CheckRect(q) or CheckQuad(q)): | |
| 17499 raise ValueError("bad quads entry") | |
| 17500 return quads | |
| 17501 | |
| 17502 | |
| 17503 def CheckMorph(o: typing.Any) -> bool: | |
| 17504 if not bool(o): | |
| 17505 return False | |
| 17506 if not (type(o) in (list, tuple) and len(o) == 2): | |
| 17507 raise ValueError("morph must be a sequence of length 2") | |
| 17508 if not (len(o[0]) == 2 and len(o[1]) == 6): | |
| 17509 raise ValueError("invalid morph param 0") | |
| 17510 if not o[1][4] == o[1][5] == 0: | |
| 17511 raise ValueError("invalid morph param 1") | |
| 17512 return True | |
| 17513 | |
| 17514 | |
| 17515 def CheckParent(o: typing.Any): | |
| 17516 return | |
| 17517 if not hasattr(o, "parent") or o.parent is None: | |
| 17518 raise ValueError(f"orphaned object {type(o)=}: parent is None") | |
| 17519 | |
| 17520 | |
| 17521 def CheckQuad(q: typing.Any) -> bool: | |
| 17522 """Check whether an object is convex, not empty quad-like. | |
| 17523 | |
| 17524 It must be a sequence of 4 number pairs. | |
| 17525 """ | |
| 17526 try: | |
| 17527 q0 = Quad(q) | |
| 17528 except Exception: | |
| 17529 if g_exceptions_verbose > 1: exception_info() | |
| 17530 return False | |
| 17531 return q0.is_convex | |
| 17532 | |
| 17533 | |
| 17534 def CheckRect(r: typing.Any) -> bool: | |
| 17535 """Check whether an object is non-degenerate rect-like. | |
| 17536 | |
| 17537 It must be a sequence of 4 numbers. | |
| 17538 """ | |
| 17539 try: | |
| 17540 r = Rect(r) | |
| 17541 except Exception: | |
| 17542 if g_exceptions_verbose > 1: exception_info() | |
| 17543 return False | |
| 17544 return not (r.is_empty or r.is_infinite) | |
| 17545 | |
| 17546 | |
| 17547 def ColorCode(c: typing.Union[list, tuple, float, None], f: str) -> str: | |
| 17548 if not c: | |
| 17549 return "" | |
| 17550 if hasattr(c, "__float__"): | |
| 17551 c = (c,) | |
| 17552 CheckColor(c) | |
| 17553 if len(c) == 1: | |
| 17554 s = _format_g(c[0]) + " " | |
| 17555 return s + "G " if f == "c" else s + "g " | |
| 17556 | |
| 17557 if len(c) == 3: | |
| 17558 s = _format_g(tuple(c)) + " " | |
| 17559 return s + "RG " if f == "c" else s + "rg " | |
| 17560 | |
| 17561 s = _format_g(tuple(c)) + " " | |
| 17562 return s + "K " if f == "c" else s + "k " | |
| 17563 | |
| 17564 | |
| 17565 def Page__add_text_marker(self, quads, annot_type): | |
| 17566 pdfpage = self._pdf_page() | |
| 17567 rotation = JM_page_rotation(pdfpage) | |
| 17568 def final(): | |
| 17569 if rotation != 0: | |
| 17570 mupdf.pdf_dict_put_int(pdfpage.obj(), PDF_NAME('Rotate'), rotation) | |
| 17571 try: | |
| 17572 if rotation != 0: | |
| 17573 mupdf.pdf_dict_put_int(pdfpage.obj(), PDF_NAME('Rotate'), 0) | |
| 17574 annot = mupdf.pdf_create_annot(pdfpage, annot_type) | |
| 17575 for item in quads: | |
| 17576 q = JM_quad_from_py(item) | |
| 17577 mupdf.pdf_add_annot_quad_point(annot, q) | |
| 17578 mupdf.pdf_update_annot(annot) | |
| 17579 JM_add_annot_id(annot, "A") | |
| 17580 final() | |
| 17581 except Exception: | |
| 17582 if g_exceptions_verbose: exception_info() | |
| 17583 final() | |
| 17584 return | |
| 17585 return Annot(annot) | |
| 17586 | |
| 17587 | |
| 17588 def PDF_NAME(x): | |
| 17589 assert isinstance(x, str) | |
| 17590 ret = getattr(mupdf, f'PDF_ENUM_NAME_{x}') | |
| 17591 # Note that we return a (swig proxy for) pdf_obj*, not a mupdf.PdfObj. In | |
| 17592 # the C++ API, the constructor PdfObj::PdfObj(pdf_obj*) is marked as | |
| 17593 # explicit, but this seems to be ignored by SWIG. If SWIG started to | |
| 17594 # generate code that respected `explicit`, we would need to do `return | |
| 17595 # mupdf.PdfObj(ret)`. | |
| 17596 # | |
| 17597 # [Compare with extra.i, where we define our own PDF_NAME2() macro that | |
| 17598 # returns a mupdf::PdfObj.] | |
| 17599 return ret | |
| 17600 | |
| 17601 | |
| 17602 def UpdateFontInfo(doc: Document, info: typing.Sequence): | |
| 17603 xref = info[0] | |
| 17604 found = False | |
| 17605 for i, fi in enumerate(doc.FontInfos): | |
| 17606 if fi[0] == xref: | |
| 17607 found = True | |
| 17608 break | |
| 17609 if found: | |
| 17610 doc.FontInfos[i] = info | |
| 17611 else: | |
| 17612 doc.FontInfos.append(info) | |
| 17613 | |
| 17614 | |
| 17615 def args_match(args, *types): | |
| 17616 ''' | |
| 17617 Returns true if <args> matches <types>. | |
| 17618 | |
| 17619 Each item in <types> is a type or tuple of types. Any of these types will | |
| 17620 match an item in <args>. `None` will match anything in <args>. `type(None)` | |
| 17621 will match an arg whose value is `None`. | |
| 17622 ''' | |
| 17623 j = 0 | |
| 17624 for i in range(len(types)): | |
| 17625 type_ = types[i] | |
| 17626 if j >= len(args): | |
| 17627 if isinstance(type_, tuple) and None in type_: | |
| 17628 # arg is missing but has default value. | |
| 17629 continue | |
| 17630 else: | |
| 17631 return False | |
| 17632 if type_ is not None and not isinstance(args[j], type_): | |
| 17633 return False | |
| 17634 j += 1 | |
| 17635 if j != len(args): | |
| 17636 return False | |
| 17637 return True | |
| 17638 | |
| 17639 | |
| 17640 def calc_image_matrix(width, height, tr, rotate, keep): | |
| 17641 ''' | |
| 17642 # compute image insertion matrix | |
| 17643 ''' | |
| 17644 trect = JM_rect_from_py(tr) | |
| 17645 rot = mupdf.fz_rotate(rotate) | |
| 17646 trw = trect.x1 - trect.x0 | |
| 17647 trh = trect.y1 - trect.y0 | |
| 17648 w = trw | |
| 17649 h = trh | |
| 17650 if keep: | |
| 17651 large = max(width, height) | |
| 17652 fw = width / large | |
| 17653 fh = height / large | |
| 17654 else: | |
| 17655 fw = fh = 1 | |
| 17656 small = min(fw, fh) | |
| 17657 if rotate != 0 and rotate != 180: | |
| 17658 f = fw | |
| 17659 fw = fh | |
| 17660 fh = f | |
| 17661 if fw < 1: | |
| 17662 if trw / fw > trh / fh: | |
| 17663 w = trh * small | |
| 17664 h = trh | |
| 17665 else: | |
| 17666 w = trw | |
| 17667 h = trw / small | |
| 17668 elif fw != fh: | |
| 17669 if trw / fw > trh / fh: | |
| 17670 w = trh / small | |
| 17671 h = trh | |
| 17672 else: | |
| 17673 w = trw | |
| 17674 h = trw * small | |
| 17675 else: | |
| 17676 w = trw | |
| 17677 h = trh | |
| 17678 tmp = mupdf.fz_make_point( | |
| 17679 (trect.x0 + trect.x1) / 2, | |
| 17680 (trect.y0 + trect.y1) / 2, | |
| 17681 ) | |
| 17682 mat = mupdf.fz_make_matrix(1, 0, 0, 1, -0.5, -0.5) | |
| 17683 mat = mupdf.fz_concat(mat, rot) | |
| 17684 mat = mupdf.fz_concat(mat, mupdf.fz_scale(w, h)) | |
| 17685 mat = mupdf.fz_concat(mat, mupdf.fz_translate(tmp.x, tmp.y)) | |
| 17686 return mat | |
| 17687 | |
| 17688 | |
| 17689 def detect_super_script(line, ch): | |
| 17690 if line.m_internal.wmode == 0 and line.m_internal.dir.x == 1 and line.m_internal.dir.y == 0: | |
| 17691 return ch.m_internal.origin.y < line.m_internal.first_char.origin.y - ch.m_internal.size * 0.1 | |
| 17692 return 0 | |
| 17693 | |
| 17694 | |
| 17695 def dir_str(x): | |
| 17696 ret = f'{x} {type(x)} ({len(dir(x))}):\n' | |
| 17697 for i in dir(x): | |
| 17698 ret += f' {i}\n' | |
| 17699 return ret | |
| 17700 | |
| 17701 | |
| 17702 def getTJstr(text: str, glyphs: typing.Union[list, tuple, None], simple: bool, ordering: int) -> str: | |
| 17703 """ Return a PDF string enclosed in [] brackets, suitable for the PDF TJ | |
| 17704 operator. | |
| 17705 | |
| 17706 Notes: | |
| 17707 The input string is converted to either 2 or 4 hex digits per character. | |
| 17708 Args: | |
| 17709 simple: no glyphs: 2-chars, use char codes as the glyph | |
| 17710 glyphs: 2-chars, use glyphs instead of char codes (Symbol, | |
| 17711 ZapfDingbats) | |
| 17712 not simple: ordering < 0: 4-chars, use glyphs not char codes | |
| 17713 ordering >=0: a CJK font! 4 chars, use char codes as glyphs | |
| 17714 """ | |
| 17715 if text.startswith("[<") and text.endswith(">]"): # already done | |
| 17716 return text | |
| 17717 | |
| 17718 if not bool(text): | |
| 17719 return "[<>]" | |
| 17720 | |
| 17721 if simple: # each char or its glyph is coded as a 2-byte hex | |
| 17722 if glyphs is None: # not Symbol, not ZapfDingbats: use char code | |
| 17723 otxt = "".join(["%02x" % ord(c) if ord(c) < 256 else "b7" for c in text]) | |
| 17724 else: # Symbol or ZapfDingbats: use glyphs | |
| 17725 otxt = "".join( | |
| 17726 ["%02x" % glyphs[ord(c)][0] if ord(c) < 256 else "b7" for c in text] | |
| 17727 ) | |
| 17728 return "[<" + otxt + ">]" | |
| 17729 | |
| 17730 # non-simple fonts: each char or its glyph is coded as 4-byte hex | |
| 17731 if ordering < 0: # not a CJK font: use the glyphs | |
| 17732 otxt = "".join(["%04x" % glyphs[ord(c)][0] for c in text]) | |
| 17733 else: # CJK: use the char codes | |
| 17734 otxt = "".join(["%04x" % ord(c) for c in text]) | |
| 17735 | |
| 17736 return "[<" + otxt + ">]" | |
| 17737 | |
| 17738 | |
| 17739 def get_pdf_str(s: str) -> str: | |
| 17740 """ Return a PDF string depending on its coding. | |
| 17741 | |
| 17742 Notes: | |
| 17743 Returns a string bracketed with either "()" or "<>" for hex values. | |
| 17744 If only ascii then "(original)" is returned, else if only 8 bit chars | |
| 17745 then "(original)" with interspersed octal strings \nnn is returned, | |
| 17746 else a string "<FEFF[hexstring]>" is returned, where [hexstring] is the | |
| 17747 UTF-16BE encoding of the original. | |
| 17748 """ | |
| 17749 if not bool(s): | |
| 17750 return "()" | |
| 17751 | |
| 17752 def make_utf16be(s): | |
| 17753 r = bytearray([254, 255]) + bytearray(s, "UTF-16BE") | |
| 17754 return "<" + r.hex() + ">" # brackets indicate hex | |
| 17755 | |
| 17756 # The following either returns the original string with mixed-in | |
| 17757 # octal numbers \nnn for chars outside the ASCII range, or returns | |
| 17758 # the UTF-16BE BOM version of the string. | |
| 17759 r = "" | |
| 17760 for c in s: | |
| 17761 oc = ord(c) | |
| 17762 if oc > 255: # shortcut if beyond 8-bit code range | |
| 17763 return make_utf16be(s) | |
| 17764 | |
| 17765 if oc > 31 and oc < 127: # in ASCII range | |
| 17766 if c in ("(", ")", "\\"): # these need to be escaped | |
| 17767 r += "\\" | |
| 17768 r += c | |
| 17769 continue | |
| 17770 | |
| 17771 if oc > 127: # beyond ASCII | |
| 17772 r += "\\%03o" % oc | |
| 17773 continue | |
| 17774 | |
| 17775 # now the white spaces | |
| 17776 if oc == 8: # backspace | |
| 17777 r += "\\b" | |
| 17778 elif oc == 9: # tab | |
| 17779 r += "\\t" | |
| 17780 elif oc == 10: # line feed | |
| 17781 r += "\\n" | |
| 17782 elif oc == 12: # form feed | |
| 17783 r += "\\f" | |
| 17784 elif oc == 13: # carriage return | |
| 17785 r += "\\r" | |
| 17786 else: | |
| 17787 r += "\\267" # unsupported: replace by 0xB7 | |
| 17788 | |
| 17789 return "(" + r + ")" | |
| 17790 | |
| 17791 | |
| 17792 def get_tessdata(tessdata=None): | |
| 17793 """Detect Tesseract language support folder. | |
| 17794 | |
| 17795 This function is used to enable OCR via Tesseract even if the language | |
| 17796 support folder is not specified directly or in environment variable | |
| 17797 TESSDATA_PREFIX. | |
| 17798 | |
| 17799 * If <tessdata> is set we return it directly. | |
| 17800 | |
| 17801 * Otherwise we return `os.environ['TESSDATA_PREFIX']` if set. | |
| 17802 | |
| 17803 * Otherwise we search for a Tesseract installation and return its language | |
| 17804 support folder. | |
| 17805 | |
| 17806 * Otherwise we raise an exception. | |
| 17807 """ | |
| 17808 if tessdata: | |
| 17809 return tessdata | |
| 17810 tessdata = os.getenv("TESSDATA_PREFIX") | |
| 17811 if tessdata: # use environment variable if set | |
| 17812 return tessdata | |
| 17813 | |
| 17814 # Try to locate the tesseract-ocr installation. | |
| 17815 | |
| 17816 import subprocess | |
| 17817 | |
| 17818 cp = subprocess.run('tesseract --list-langs', shell=1, capture_output=1, check=0, text=True) | |
| 17819 if cp.returncode == 0: | |
| 17820 m = re.search('List of available languages in "(.+)"', cp.stdout) | |
| 17821 if m: | |
| 17822 tessdata = m.group(1) | |
| 17823 return tessdata | |
| 17824 | |
| 17825 # Windows systems: | |
| 17826 if sys.platform == "win32": | |
| 17827 cp = subprocess.run("where tesseract", shell=1, capture_output=1, check=0, text=True) | |
| 17828 response = cp.stdout.strip() | |
| 17829 if cp.returncode or not response: | |
| 17830 raise RuntimeError("No tessdata specified and Tesseract is not installed") | |
| 17831 dirname = os.path.dirname(response) # path of tesseract.exe | |
| 17832 tessdata = os.path.join(dirname, "tessdata") # language support | |
| 17833 if os.path.exists(tessdata): # all ok? | |
| 17834 return tessdata | |
| 17835 else: # should not happen! | |
| 17836 raise RuntimeError("No tessdata specified and Tesseract installation has no {tessdata} folder") | |
| 17837 | |
| 17838 # Unix-like systems: | |
| 17839 attempts = list() | |
| 17840 for path in 'tesseract-ocr', 'tesseract': | |
| 17841 cp = subprocess.run(f'whereis {path}', shell=1, capture_output=1, check=0, text=True) | |
| 17842 if cp.returncode == 0: | |
| 17843 response = cp.stdout.strip().split() | |
| 17844 if len(response) == 2: | |
| 17845 # search tessdata in folder structure | |
| 17846 dirname = response[1] # contains tesseract-ocr installation folder | |
| 17847 pattern = f"{dirname}/*/tessdata" | |
| 17848 attempts.append(pattern) | |
| 17849 tessdatas = glob.glob(pattern) | |
| 17850 tessdatas.sort() | |
| 17851 if tessdatas: | |
| 17852 return tessdatas[-1] | |
| 17853 if attempts: | |
| 17854 text = 'No tessdata specified and no match for:\n' | |
| 17855 for attempt in attempts: | |
| 17856 text += f' {attempt}' | |
| 17857 raise RuntimeError(text) | |
| 17858 else: | |
| 17859 raise RuntimeError('No tessdata specified and Tesseract is not installed') | |
| 17860 | |
| 17861 | |
| 17862 def css_for_pymupdf_font( | |
| 17863 fontcode: str, *, CSS: OptStr = None, archive: AnyType = None, name: OptStr = None | |
| 17864 ) -> str: | |
| 17865 """Create @font-face items for the given fontcode of pymupdf-fonts. | |
| 17866 | |
| 17867 Adds @font-face support for fonts contained in package pymupdf-fonts. | |
| 17868 | |
| 17869 Creates a CSS font-family for all fonts starting with string 'fontcode'. | |
| 17870 | |
| 17871 Note: | |
| 17872 The font naming convention in package pymupdf-fonts is "fontcode<sf>", | |
| 17873 where the suffix "sf" is either empty or one of "it", "bo" or "bi". | |
| 17874 These suffixes thus represent the regular, italic, bold or bold-italic | |
| 17875 variants of a font. For example, font code "notos" refers to fonts | |
| 17876 "notos" - "Noto Sans Regular" | |
| 17877 "notosit" - "Noto Sans Italic" | |
| 17878 "notosbo" - "Noto Sans Bold" | |
| 17879 "notosbi" - "Noto Sans Bold Italic" | |
| 17880 | |
| 17881 This function creates four CSS @font-face definitions and collectively | |
| 17882 assigns the font-family name "notos" to them (or the "name" value). | |
| 17883 | |
| 17884 All fitting font buffers of the pymupdf-fonts package are placed / added | |
| 17885 to the archive provided as parameter. | |
| 17886 To use the font in pymupdf.Story, execute 'set_font(fontcode)'. The correct | |
| 17887 font weight (bold) or style (italic) will automatically be selected. | |
| 17888 Expects and returns the CSS source, with the new CSS definitions appended. | |
| 17889 | |
| 17890 Args: | |
| 17891 fontcode: (str) font code for naming the font variants to include. | |
| 17892 E.g. "fig" adds notos, notosi, notosb, notosbi fonts. | |
| 17893 A maximum of 4 font variants is accepted. | |
| 17894 CSS: (str) CSS string to add @font-face definitions to. | |
| 17895 archive: (Archive, mandatory) where to place the font buffers. | |
| 17896 name: (str) use this as family-name instead of 'fontcode'. | |
| 17897 Returns: | |
| 17898 Modified CSS, with appended @font-face statements for each font variant | |
| 17899 of fontcode. | |
| 17900 Fontbuffers associated with "fontcode" will be added to 'archive'. | |
| 17901 """ | |
| 17902 # @font-face template string | |
| 17903 CSSFONT = "\n@font-face {font-family: %s; src: url(%s);%s%s}\n" | |
| 17904 | |
| 17905 if not type(archive) is Archive: | |
| 17906 raise ValueError("'archive' must be an Archive") | |
| 17907 if CSS is None: | |
| 17908 CSS = "" | |
| 17909 | |
| 17910 # select font codes starting with the pass-in string | |
| 17911 font_keys = [k for k in fitz_fontdescriptors.keys() if k.startswith(fontcode)] | |
| 17912 if font_keys == []: | |
| 17913 raise ValueError(f"No font code '{fontcode}' found in pymupdf-fonts.") | |
| 17914 if len(font_keys) > 4: | |
| 17915 raise ValueError("fontcode too short") | |
| 17916 if name is None: # use this name for font-family | |
| 17917 name = fontcode | |
| 17918 | |
| 17919 for fkey in font_keys: | |
| 17920 font = fitz_fontdescriptors[fkey] | |
| 17921 bold = font["bold"] # determine font property | |
| 17922 italic = font["italic"] # determine font property | |
| 17923 fbuff = font["loader"]() # load the fontbuffer | |
| 17924 archive.add(fbuff, fkey) # update the archive | |
| 17925 bold_text = "font-weight: bold;" if bold else "" | |
| 17926 italic_text = "font-style: italic;" if italic else "" | |
| 17927 CSS += CSSFONT % (name, fkey, bold_text, italic_text) | |
| 17928 return CSS | |
| 17929 | |
| 17930 | |
| 17931 def get_text_length(text: str, fontname: str ="helv", fontsize: float =11, encoding: int =0) -> float: | |
| 17932 """Calculate length of a string for a built-in font. | |
| 17933 | |
| 17934 Args: | |
| 17935 fontname: name of the font. | |
| 17936 fontsize: font size points. | |
| 17937 encoding: encoding to use, 0=Latin (default), 1=Greek, 2=Cyrillic. | |
| 17938 Returns: | |
| 17939 (float) length of text. | |
| 17940 """ | |
| 17941 fontname = fontname.lower() | |
| 17942 basename = Base14_fontdict.get(fontname, None) | |
| 17943 | |
| 17944 glyphs = None | |
| 17945 if basename == "Symbol": | |
| 17946 glyphs = symbol_glyphs | |
| 17947 if basename == "ZapfDingbats": | |
| 17948 glyphs = zapf_glyphs | |
| 17949 if glyphs is not None: | |
| 17950 w = sum([glyphs[ord(c)][1] if ord(c) < 256 else glyphs[183][1] for c in text]) | |
| 17951 return w * fontsize | |
| 17952 | |
| 17953 if fontname in Base14_fontdict.keys(): | |
| 17954 return util_measure_string( | |
| 17955 text, Base14_fontdict[fontname], fontsize, encoding | |
| 17956 ) | |
| 17957 | |
| 17958 if fontname in ( | |
| 17959 "china-t", | |
| 17960 "china-s", | |
| 17961 "china-ts", | |
| 17962 "china-ss", | |
| 17963 "japan", | |
| 17964 "japan-s", | |
| 17965 "korea", | |
| 17966 "korea-s", | |
| 17967 ): | |
| 17968 return len(text) * fontsize | |
| 17969 | |
| 17970 raise ValueError("Font '%s' is unsupported" % fontname) | |
| 17971 | |
| 17972 | |
| 17973 def image_profile(img: ByteString) -> dict: | |
| 17974 """ Return basic properties of an image. | |
| 17975 | |
| 17976 Args: | |
| 17977 img: bytes, bytearray, io.BytesIO object or an opened image file. | |
| 17978 Returns: | |
| 17979 A dictionary with keys width, height, colorspace.n, bpc, type, ext and size, | |
| 17980 where 'type' is the MuPDF image type (0 to 14) and 'ext' the suitable | |
| 17981 file extension. | |
| 17982 """ | |
| 17983 if type(img) is io.BytesIO: | |
| 17984 stream = img.getvalue() | |
| 17985 elif hasattr(img, "read"): | |
| 17986 stream = img.read() | |
| 17987 elif type(img) in (bytes, bytearray): | |
| 17988 stream = img | |
| 17989 else: | |
| 17990 raise ValueError("bad argument 'img'") | |
| 17991 | |
| 17992 return TOOLS.image_profile(stream) | |
| 17993 | |
| 17994 | |
| 17995 def jm_append_merge(dev): | |
| 17996 ''' | |
| 17997 Append current path to list or merge into last path of the list. | |
| 17998 (1) Append if first path, different item lists or not a 'stroke' version | |
| 17999 of previous path | |
| 18000 (2) If new path has the same items, merge its content into previous path | |
| 18001 and change path["type"] to "fs". | |
| 18002 (3) If "out" is callable, skip the previous and pass dictionary to it. | |
| 18003 ''' | |
| 18004 #log(f'{getattr(dev, "pathdict", None)=}') | |
| 18005 assert isinstance(dev.out, list) | |
| 18006 #log( f'{dev.out=}') | |
| 18007 | |
| 18008 if callable(dev.method) or dev.method: # function or method | |
| 18009 # callback. | |
| 18010 if dev.method is None: | |
| 18011 # fixme, this surely cannot happen? | |
| 18012 assert 0 | |
| 18013 #resp = PyObject_CallFunctionObjArgs(out, dev.pathdict, NULL) | |
| 18014 else: | |
| 18015 #log(f'calling {dev.out=} {dev.method=} {dev.pathdict=}') | |
| 18016 resp = getattr(dev.out, dev.method)(dev.pathdict) | |
| 18017 if not resp: | |
| 18018 message("calling cdrawings callback function/method failed!") | |
| 18019 dev.pathdict = None | |
| 18020 return | |
| 18021 | |
| 18022 def append(): | |
| 18023 #log(f'jm_append_merge(): clearing dev.pathdict') | |
| 18024 dev.out.append(dev.pathdict.copy()) | |
| 18025 dev.pathdict.clear() | |
| 18026 assert isinstance(dev.out, list) | |
| 18027 len_ = len(dev.out) # len of output list so far | |
| 18028 #log('{len_=}') | |
| 18029 if len_ == 0: # always append first path | |
| 18030 return append() | |
| 18031 #log(f'{getattr(dev, "pathdict", None)=}') | |
| 18032 thistype = dev.pathdict[ dictkey_type] | |
| 18033 #log(f'{thistype=}') | |
| 18034 if thistype != 's': # if not stroke, then append | |
| 18035 return append() | |
| 18036 prev = dev.out[ len_-1] # get prev path | |
| 18037 #log( f'{prev=}') | |
| 18038 prevtype = prev[ dictkey_type] | |
| 18039 #log( f'{prevtype=}') | |
| 18040 if prevtype != 'f': # if previous not fill, append | |
| 18041 return append() | |
| 18042 # last check: there must be the same list of items for "f" and "s". | |
| 18043 previtems = prev[ dictkey_items] | |
| 18044 thisitems = dev.pathdict[ dictkey_items] | |
| 18045 if previtems != thisitems: | |
| 18046 return append() | |
| 18047 | |
| 18048 #rc = PyDict_Merge(prev, dev.pathdict, 0); // merge with no override | |
| 18049 try: | |
| 18050 for k, v in dev.pathdict.items(): | |
| 18051 if k not in prev: | |
| 18052 prev[k] = v | |
| 18053 rc = 0 | |
| 18054 except Exception: | |
| 18055 if g_exceptions_verbose: exception_info() | |
| 18056 #raise | |
| 18057 rc = -1 | |
| 18058 if rc == 0: | |
| 18059 prev[ dictkey_type] = 'fs' | |
| 18060 dev.pathdict.clear() | |
| 18061 else: | |
| 18062 message("could not merge stroke and fill path") | |
| 18063 append() | |
| 18064 | |
| 18065 | |
| 18066 def jm_bbox_add_rect( dev, ctx, rect, code): | |
| 18067 if not dev.layers: | |
| 18068 dev.result.append( (code, JM_py_from_rect(rect))) | |
| 18069 else: | |
| 18070 dev.result.append( (code, JM_py_from_rect(rect), dev.layer_name)) | |
| 18071 | |
| 18072 | |
| 18073 def jm_bbox_fill_image( dev, ctx, image, ctm, alpha, color_params): | |
| 18074 r = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT) | |
| 18075 r = mupdf.ll_fz_transform_rect( r.internal(), ctm) | |
| 18076 jm_bbox_add_rect( dev, ctx, r, "fill-image") | |
| 18077 | |
| 18078 | |
| 18079 def jm_bbox_fill_image_mask( dev, ctx, image, ctm, colorspace, color, alpha, color_params): | |
| 18080 try: | |
| 18081 jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_transform_rect(mupdf.fz_unit_rect, ctm), "fill-imgmask") | |
| 18082 except Exception: | |
| 18083 if g_exceptions_verbose: exception_info() | |
| 18084 raise | |
| 18085 | |
| 18086 | |
| 18087 def jm_bbox_fill_path( dev, ctx, path, even_odd, ctm, colorspace, color, alpha, color_params): | |
| 18088 even_odd = True if even_odd else False | |
| 18089 try: | |
| 18090 jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_path(path, None, ctm), "fill-path") | |
| 18091 except Exception: | |
| 18092 if g_exceptions_verbose: exception_info() | |
| 18093 raise | |
| 18094 | |
| 18095 | |
| 18096 def jm_bbox_fill_shade( dev, ctx, shade, ctm, alpha, color_params): | |
| 18097 try: | |
| 18098 jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_shade( shade, ctm), "fill-shade") | |
| 18099 except Exception: | |
| 18100 if g_exceptions_verbose: exception_info() | |
| 18101 raise | |
| 18102 | |
| 18103 | |
| 18104 def jm_bbox_stroke_text( dev, ctx, text, stroke, ctm, *args): | |
| 18105 try: | |
| 18106 jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_text( text, stroke, ctm), "stroke-text") | |
| 18107 except Exception: | |
| 18108 if g_exceptions_verbose: exception_info() | |
| 18109 raise | |
| 18110 | |
| 18111 | |
| 18112 def jm_bbox_fill_text( dev, ctx, text, ctm, *args): | |
| 18113 try: | |
| 18114 jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_text( text, None, ctm), "fill-text") | |
| 18115 except Exception: | |
| 18116 if g_exceptions_verbose: exception_info() | |
| 18117 raise | |
| 18118 | |
| 18119 | |
| 18120 def jm_bbox_ignore_text( dev, ctx, text, ctm): | |
| 18121 jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_text(text, None, ctm), "ignore-text") | |
| 18122 | |
| 18123 | |
| 18124 def jm_bbox_stroke_path( dev, ctx, path, stroke, ctm, colorspace, color, alpha, color_params): | |
| 18125 try: | |
| 18126 jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_path( path, stroke, ctm), "stroke-path") | |
| 18127 except Exception: | |
| 18128 if g_exceptions_verbose: exception_info() | |
| 18129 raise | |
| 18130 | |
| 18131 | |
| 18132 def jm_checkquad(dev): | |
| 18133 ''' | |
| 18134 Check whether the last 4 lines represent a quad. | |
| 18135 Because of how we count, the lines are a polyline already, i.e. last point | |
| 18136 of a line equals 1st point of next line. | |
| 18137 So we check for a polygon (last line's end point equals start point). | |
| 18138 If not true we return 0. | |
| 18139 ''' | |
| 18140 #log(f'{getattr(dev, "pathdict", None)=}') | |
| 18141 items = dev.pathdict[ dictkey_items] | |
| 18142 len_ = len(items) | |
| 18143 f = [0] * 8 # coordinates of the 4 corners | |
| 18144 # fill the 8 floats in f, start from items[-4:] | |
| 18145 for i in range( 4): # store line start points | |
| 18146 line = items[ len_ - 4 + i] | |
| 18147 temp = JM_point_from_py( line[1]) | |
| 18148 f[i * 2] = temp.x | |
| 18149 f[i * 2 + 1] = temp.y | |
| 18150 lp = JM_point_from_py( line[ 2]) | |
| 18151 if lp.x != f[0] or lp.y != f[1]: | |
| 18152 # not a polygon! | |
| 18153 #dev.linecount -= 1 | |
| 18154 return 0 | |
| 18155 | |
| 18156 # we have detected a quad | |
| 18157 dev.linecount = 0 # reset this | |
| 18158 # a quad item is ("qu", (ul, ur, ll, lr)), where the tuple items | |
| 18159 # are pairs of floats representing a quad corner each. | |
| 18160 | |
| 18161 # relationship of float array to quad points: | |
| 18162 # (0, 1) = ul, (2, 3) = ll, (6, 7) = ur, (4, 5) = lr | |
| 18163 q = mupdf.fz_make_quad(f[0], f[1], f[6], f[7], f[2], f[3], f[4], f[5]) | |
| 18164 rect = ('qu', JM_py_from_quad(q)) | |
| 18165 | |
| 18166 items[ len_ - 4] = rect # replace item -4 by rect | |
| 18167 del items[ len_ - 3 : len_] # delete remaining 3 items | |
| 18168 return 1 | |
| 18169 | |
| 18170 | |
| 18171 def jm_checkrect(dev): | |
| 18172 ''' | |
| 18173 Check whether the last 3 path items represent a rectangle. | |
| 18174 Returns 1 if we have modified the path, otherwise 0. | |
| 18175 ''' | |
| 18176 #log(f'{getattr(dev, "pathdict", None)=}') | |
| 18177 dev.linecount = 0 # reset line count | |
| 18178 orientation = 0 # area orientation of rectangle | |
| 18179 items = dev.pathdict[ dictkey_items] | |
| 18180 len_ = len(items) | |
| 18181 | |
| 18182 line0 = items[ len_ - 3] | |
| 18183 ll = JM_point_from_py( line0[ 1]) | |
| 18184 lr = JM_point_from_py( line0[ 2]) | |
| 18185 | |
| 18186 # no need to extract "line1"! | |
| 18187 line2 = items[ len_ - 1] | |
| 18188 ur = JM_point_from_py( line2[ 1]) | |
| 18189 ul = JM_point_from_py( line2[ 2]) | |
| 18190 | |
| 18191 # Assumption: | |
| 18192 # When decomposing rects, MuPDF always starts with a horizontal line, | |
| 18193 # followed by a vertical line, followed by a horizontal line. | |
| 18194 # First line: (ll, lr), third line: (ul, ur). | |
| 18195 # If 1st line is below 3rd line, we record anti-clockwise (+1), else | |
| 18196 # clockwise (-1) orientation. | |
| 18197 | |
| 18198 if (0 | |
| 18199 or ll.y != lr.y | |
| 18200 or ll.x != ul.x | |
| 18201 or ur.y != ul.y | |
| 18202 or ur.x != lr.x | |
| 18203 ): | |
| 18204 return 0 # not a rectangle | |
| 18205 | |
| 18206 # we have a rect, replace last 3 "l" items by one "re" item. | |
| 18207 if ul.y < lr.y: | |
| 18208 r = mupdf.fz_make_rect(ul.x, ul.y, lr.x, lr.y) | |
| 18209 orientation = 1 | |
| 18210 else: | |
| 18211 r = mupdf.fz_make_rect(ll.x, ll.y, ur.x, ur.y) | |
| 18212 orientation = -1 | |
| 18213 | |
| 18214 rect = ( 're', JM_py_from_rect(r), orientation) | |
| 18215 items[ len_ - 3] = rect # replace item -3 by rect | |
| 18216 del items[ len_ - 2 : len_] # delete remaining 2 items | |
| 18217 return 1 | |
| 18218 | |
| 18219 | |
| 18220 def jm_trace_text( dev, text, type_, ctm, colorspace, color, alpha, seqno): | |
| 18221 span = text.head | |
| 18222 while 1: | |
| 18223 if not span: | |
| 18224 break | |
| 18225 jm_trace_text_span( dev, span, type_, ctm, colorspace, color, alpha, seqno) | |
| 18226 span = span.next | |
| 18227 | |
| 18228 | |
| 18229 def jm_trace_text_span(dev, span, type_, ctm, colorspace, color, alpha, seqno): | |
| 18230 ''' | |
| 18231 jm_trace_text_span(fz_context *ctx, PyObject *out, fz_text_span *span, int type, fz_matrix ctm, fz_colorspace *colorspace, const float *color, float alpha, size_t seqno) | |
| 18232 ''' | |
| 18233 out_font = None | |
| 18234 assert isinstance( span, mupdf.fz_text_span) | |
| 18235 span = mupdf.FzTextSpan( span) | |
| 18236 assert isinstance( ctm, mupdf.fz_matrix) | |
| 18237 ctm = mupdf.FzMatrix( ctm) | |
| 18238 fontname = JM_font_name( span.font()) | |
| 18239 #float rgb[3]; | |
| 18240 #PyObject *chars = PyTuple_New(span->len); | |
| 18241 | |
| 18242 mat = mupdf.fz_concat(span.trm(), ctm) # text transformation matrix | |
| 18243 dir = mupdf.fz_transform_vector(mupdf.fz_make_point(1, 0), mat) # writing direction | |
| 18244 fsize = math.sqrt(dir.x * dir.x + dir.y * dir.y) # font size | |
| 18245 | |
| 18246 dir = mupdf.fz_normalize_vector(dir) | |
| 18247 | |
| 18248 space_adv = 0 | |
| 18249 asc = JM_font_ascender( span.font()) | |
| 18250 dsc = JM_font_descender( span.font()) | |
| 18251 if asc < 1e-3: # probably Tesseract font | |
| 18252 dsc = -0.1 | |
| 18253 asc = 0.9 | |
| 18254 | |
| 18255 # compute effective ascender / descender | |
| 18256 ascsize = asc * fsize / (asc - dsc) | |
| 18257 dscsize = dsc * fsize / (asc - dsc) | |
| 18258 fflags = 0 # font flags | |
| 18259 mono = mupdf.fz_font_is_monospaced( span.font()) | |
| 18260 fflags += mono * TEXT_FONT_MONOSPACED | |
| 18261 fflags += mupdf.fz_font_is_italic( span.font()) * TEXT_FONT_ITALIC | |
| 18262 fflags += mupdf.fz_font_is_serif( span.font()) * TEXT_FONT_SERIFED | |
| 18263 fflags += mupdf.fz_font_is_bold( span.font()) * TEXT_FONT_BOLD | |
| 18264 | |
| 18265 last_adv = 0 | |
| 18266 | |
| 18267 # walk through characters of span | |
| 18268 span_bbox = mupdf.FzRect() | |
| 18269 rot = mupdf.fz_make_matrix(dir.x, dir.y, -dir.y, dir.x, 0, 0) | |
| 18270 if dir.x == -1: # left-right flip | |
| 18271 rot.d = 1 | |
| 18272 | |
| 18273 chars = [] | |
| 18274 for i in range( span.m_internal.len): | |
| 18275 adv = 0 | |
| 18276 if span.items(i).gid >= 0: | |
| 18277 adv = mupdf.fz_advance_glyph( span.font(), span.items(i).gid, span.m_internal.wmode) | |
| 18278 adv *= fsize | |
| 18279 last_adv = adv | |
| 18280 if span.items(i).ucs == 32: | |
| 18281 space_adv = adv | |
| 18282 char_orig = mupdf.fz_make_point(span.items(i).x, span.items(i).y) | |
| 18283 char_orig = mupdf.fz_transform_point(char_orig, ctm) | |
| 18284 m1 = mupdf.fz_make_matrix(1, 0, 0, 1, -char_orig.x, -char_orig.y) | |
| 18285 m1 = mupdf.fz_concat(m1, rot) | |
| 18286 m1 = mupdf.fz_concat(m1, mupdf.FzMatrix(1, 0, 0, 1, char_orig.x, char_orig.y)) | |
| 18287 x0 = char_orig.x | |
| 18288 x1 = x0 + adv | |
| 18289 if ( | |
| 18290 (mat.d > 0 and (dir.x == 1 or dir.x == -1)) | |
| 18291 or | |
| 18292 (mat.b != 0 and mat.b == -mat.c) | |
| 18293 ): # up-down flip | |
| 18294 y0 = char_orig.y + dscsize | |
| 18295 y1 = char_orig.y + ascsize | |
| 18296 else: | |
| 18297 y0 = char_orig.y - ascsize | |
| 18298 y1 = char_orig.y - dscsize | |
| 18299 char_bbox = mupdf.fz_make_rect(x0, y0, x1, y1) | |
| 18300 char_bbox = mupdf.fz_transform_rect(char_bbox, m1) | |
| 18301 chars.append( | |
| 18302 ( | |
| 18303 span.items(i).ucs, | |
| 18304 span.items(i).gid, | |
| 18305 ( | |
| 18306 char_orig.x, | |
| 18307 char_orig.y, | |
| 18308 ), | |
| 18309 ( | |
| 18310 char_bbox.x0, | |
| 18311 char_bbox.y0, | |
| 18312 char_bbox.x1, | |
| 18313 char_bbox.y1, | |
| 18314 ), | |
| 18315 ) | |
| 18316 ) | |
| 18317 if i > 0: | |
| 18318 span_bbox = mupdf.fz_union_rect(span_bbox, char_bbox) | |
| 18319 else: | |
| 18320 span_bbox = char_bbox | |
| 18321 chars = tuple(chars) | |
| 18322 | |
| 18323 if not space_adv: | |
| 18324 if not (fflags & TEXT_FONT_MONOSPACED): | |
| 18325 c, out_font = mupdf.fz_encode_character_with_fallback( span.font(), 32, 0, 0) | |
| 18326 space_adv = mupdf.fz_advance_glyph( | |
| 18327 span.font(), | |
| 18328 c, | |
| 18329 span.m_internal.wmode, | |
| 18330 ) | |
| 18331 space_adv *= fsize | |
| 18332 if not space_adv: | |
| 18333 space_adv = last_adv | |
| 18334 else: | |
| 18335 space_adv = last_adv # for mono, any char width suffices | |
| 18336 | |
| 18337 # make the span dictionary | |
| 18338 span_dict = dict() | |
| 18339 span_dict[ 'dir'] = JM_py_from_point(dir) | |
| 18340 span_dict[ 'font'] = JM_EscapeStrFromStr(fontname) | |
| 18341 span_dict[ 'wmode'] = span.m_internal.wmode | |
| 18342 span_dict[ 'flags'] =fflags | |
| 18343 span_dict[ "bidi_lvl"] =span.m_internal.bidi_level | |
| 18344 span_dict[ "bidi_dir"] = span.m_internal.markup_dir | |
| 18345 span_dict[ 'ascender'] = asc | |
| 18346 span_dict[ 'descender'] = dsc | |
| 18347 span_dict[ 'colorspace'] = 3 | |
| 18348 | |
| 18349 if colorspace: | |
| 18350 rgb = mupdf.fz_convert_color( | |
| 18351 mupdf.FzColorspace( mupdf.ll_fz_keep_colorspace( colorspace)), | |
| 18352 color, | |
| 18353 mupdf.fz_device_rgb(), | |
| 18354 mupdf.FzColorspace(), | |
| 18355 mupdf.FzColorParams(), | |
| 18356 ) | |
| 18357 rgb = rgb[:3] # mupdf.fz_convert_color() always returns 4 items. | |
| 18358 else: | |
| 18359 rgb = (0, 0, 0) | |
| 18360 | |
| 18361 if dev.linewidth > 0: # width of character border | |
| 18362 linewidth = dev.linewidth | |
| 18363 else: | |
| 18364 linewidth = fsize * 0.05 # default: 5% of font size | |
| 18365 #log(f'{dev.linewidth=:.4f} {fsize=:.4f} {linewidth=:.4f}') | |
| 18366 | |
| 18367 span_dict[ 'color'] = rgb | |
| 18368 span_dict[ 'size'] = fsize | |
| 18369 span_dict[ "opacity"] = alpha | |
| 18370 span_dict[ "linewidth"] = linewidth | |
| 18371 span_dict[ "spacewidth"] = space_adv | |
| 18372 span_dict[ 'type'] = type_ | |
| 18373 span_dict[ 'bbox'] = JM_py_from_rect(span_bbox) | |
| 18374 span_dict[ 'layer'] = dev.layer_name | |
| 18375 span_dict[ "seqno"] = seqno | |
| 18376 span_dict[ 'chars'] = chars | |
| 18377 #log(f'{span_dict=}') | |
| 18378 dev.out.append( span_dict) | |
| 18379 | |
| 18380 | |
| 18381 def jm_lineart_color(colorspace, color): | |
| 18382 #log(f' ') | |
| 18383 if colorspace: | |
| 18384 try: | |
| 18385 # Need to be careful to use a named Python object to ensure | |
| 18386 # that the `params` we pass to mupdf.ll_fz_convert_color() is | |
| 18387 # valid. E.g. doing: | |
| 18388 # | |
| 18389 # rgb = mupdf.ll_fz_convert_color(..., mupdf.FzColorParams().internal()) | |
| 18390 # | |
| 18391 # - seems to end up with a corrupted `params`. | |
| 18392 # | |
| 18393 cs = mupdf.FzColorspace( mupdf.FzColorspace.Fixed_RGB) | |
| 18394 cp = mupdf.FzColorParams() | |
| 18395 rgb = mupdf.ll_fz_convert_color( | |
| 18396 colorspace, | |
| 18397 color, | |
| 18398 cs.m_internal, | |
| 18399 None, | |
| 18400 cp.internal(), | |
| 18401 ) | |
| 18402 except Exception: | |
| 18403 if g_exceptions_verbose: exception_info() | |
| 18404 raise | |
| 18405 return rgb[:3] | |
| 18406 return () | |
| 18407 | |
| 18408 | |
| 18409 def jm_lineart_drop_device(dev, ctx): | |
| 18410 if isinstance(dev.out, list): | |
| 18411 dev.out = [] | |
| 18412 dev.scissors = [] | |
| 18413 | |
| 18414 | |
| 18415 def jm_lineart_fill_path( dev, ctx, path, even_odd, ctm, colorspace, color, alpha, color_params): | |
| 18416 #log(f'{getattr(dev, "pathdict", None)=}') | |
| 18417 #log(f'jm_lineart_fill_path(): {dev.seqno=}') | |
| 18418 even_odd = True if even_odd else False | |
| 18419 try: | |
| 18420 assert isinstance( ctm, mupdf.fz_matrix) | |
| 18421 dev.ctm = mupdf.FzMatrix( ctm) # fz_concat(ctm, dev_ptm); | |
| 18422 dev.path_type = trace_device_FILL_PATH | |
| 18423 jm_lineart_path( dev, ctx, path) | |
| 18424 if dev.pathdict is None: | |
| 18425 return | |
| 18426 #item_count = len(dev.pathdict[ dictkey_items]) | |
| 18427 #if item_count == 0: | |
| 18428 # return | |
| 18429 dev.pathdict[ dictkey_type] ="f" | |
| 18430 dev.pathdict[ "even_odd"] = even_odd | |
| 18431 dev.pathdict[ "fill_opacity"] = alpha | |
| 18432 #log(f'setting dev.pathdict[ "closePath"] to false') | |
| 18433 #dev.pathdict[ "closePath"] = False | |
| 18434 dev.pathdict[ "fill"] = jm_lineart_color( colorspace, color) | |
| 18435 dev.pathdict[ dictkey_rect] = JM_py_from_rect(dev.pathrect) | |
| 18436 dev.pathdict[ "seqno"] = dev.seqno | |
| 18437 #jm_append_merge(dev) | |
| 18438 dev.pathdict[ 'layer'] = dev.layer_name | |
| 18439 if dev.clips: | |
| 18440 dev.pathdict[ 'level'] = dev.depth | |
| 18441 jm_append_merge(dev) | |
| 18442 dev.seqno += 1 | |
| 18443 #log(f'jm_lineart_fill_path() end: {getattr(dev, "pathdict", None)=}') | |
| 18444 except Exception: | |
| 18445 if g_exceptions_verbose: exception_info() | |
| 18446 raise | |
| 18447 | |
| 18448 | |
| 18449 # There are 3 text trace types: | |
| 18450 # 0 - fill text (PDF Tr 0) | |
| 18451 # 1 - stroke text (PDF Tr 1) | |
| 18452 # 3 - ignore text (PDF Tr 3) | |
| 18453 | |
| 18454 def jm_lineart_fill_text( dev, ctx, text, ctm, colorspace, color, alpha, color_params): | |
| 18455 if 0: | |
| 18456 log(f'{type(ctx)=} {ctx=}') | |
| 18457 log(f'{type(dev)=} {dev=}') | |
| 18458 log(f'{type(text)=} {text=}') | |
| 18459 log(f'{type(ctm)=} {ctm=}') | |
| 18460 log(f'{type(colorspace)=} {colorspace=}') | |
| 18461 log(f'{type(color)=} {color=}') | |
| 18462 log(f'{type(alpha)=} {alpha=}') | |
| 18463 log(f'{type(color_params)=} {color_params=}') | |
| 18464 jm_trace_text(dev, text, 0, ctm, colorspace, color, alpha, dev.seqno) | |
| 18465 dev.seqno += 1 | |
| 18466 | |
| 18467 | |
| 18468 def jm_lineart_ignore_text(dev, text, ctm): | |
| 18469 #log(f'{getattr(dev, "pathdict", None)=}') | |
| 18470 jm_trace_text(dev, text, 3, ctm, None, None, 1, dev.seqno) | |
| 18471 dev.seqno += 1 | |
| 18472 | |
| 18473 | |
| 18474 class Walker(mupdf.FzPathWalker2): | |
| 18475 | |
| 18476 def __init__(self, dev): | |
| 18477 super().__init__() | |
| 18478 self.use_virtual_moveto() | |
| 18479 self.use_virtual_lineto() | |
| 18480 self.use_virtual_curveto() | |
| 18481 self.use_virtual_closepath() | |
| 18482 self.dev = dev | |
| 18483 | |
| 18484 def closepath(self, ctx): # trace_close(). | |
| 18485 #log(f'Walker(): {self.dev.pathdict=}') | |
| 18486 try: | |
| 18487 if self.dev.linecount == 3: | |
| 18488 if jm_checkrect(self.dev): | |
| 18489 #log(f'end1: {self.dev.pathdict=}') | |
| 18490 return | |
| 18491 self.dev.linecount = 0 # reset # of consec. lines | |
| 18492 | |
| 18493 if self.dev.havemove: | |
| 18494 if self.dev.lastpoint != self.dev.firstpoint: | |
| 18495 item = ("l", JM_py_from_point(self.dev.lastpoint), | |
| 18496 JM_py_from_point(self.dev.firstpoint)) | |
| 18497 self.dev.pathdict[dictkey_items].append(item) | |
| 18498 self.dev.lastpoint = self.dev.firstpoint | |
| 18499 self.dev.pathdict["closePath"] = False | |
| 18500 | |
| 18501 else: | |
| 18502 #log('setting self.dev.pathdict[ "closePath"] to true') | |
| 18503 self.dev.pathdict[ "closePath"] = True | |
| 18504 #log(f'end2: {self.dev.pathdict=}') | |
| 18505 | |
| 18506 self.dev.havemove = 0 | |
| 18507 | |
| 18508 except Exception: | |
| 18509 if g_exceptions_verbose: exception_info() | |
| 18510 raise | |
| 18511 | |
| 18512 def curveto(self, ctx, x1, y1, x2, y2, x3, y3): # trace_curveto(). | |
| 18513 #log(f'Walker(): {self.dev.pathdict=}') | |
| 18514 try: | |
| 18515 self.dev.linecount = 0 # reset # of consec. lines | |
| 18516 p1 = mupdf.fz_make_point(x1, y1) | |
| 18517 p2 = mupdf.fz_make_point(x2, y2) | |
| 18518 p3 = mupdf.fz_make_point(x3, y3) | |
| 18519 p1 = mupdf.fz_transform_point(p1, self.dev.ctm) | |
| 18520 p2 = mupdf.fz_transform_point(p2, self.dev.ctm) | |
| 18521 p3 = mupdf.fz_transform_point(p3, self.dev.ctm) | |
| 18522 self.dev.pathrect = mupdf.fz_include_point_in_rect(self.dev.pathrect, p1) | |
| 18523 self.dev.pathrect = mupdf.fz_include_point_in_rect(self.dev.pathrect, p2) | |
| 18524 self.dev.pathrect = mupdf.fz_include_point_in_rect(self.dev.pathrect, p3) | |
| 18525 | |
| 18526 list_ = ( | |
| 18527 "c", | |
| 18528 JM_py_from_point(self.dev.lastpoint), | |
| 18529 JM_py_from_point(p1), | |
| 18530 JM_py_from_point(p2), | |
| 18531 JM_py_from_point(p3), | |
| 18532 ) | |
| 18533 self.dev.lastpoint = p3 | |
| 18534 self.dev.pathdict[ dictkey_items].append( list_) | |
| 18535 except Exception: | |
| 18536 if g_exceptions_verbose: exception_info() | |
| 18537 raise | |
| 18538 | |
| 18539 def lineto(self, ctx, x, y): # trace_lineto(). | |
| 18540 #log(f'Walker(): {self.dev.pathdict=}') | |
| 18541 try: | |
| 18542 p1 = mupdf.fz_transform_point( mupdf.fz_make_point(x, y), self.dev.ctm) | |
| 18543 self.dev.pathrect = mupdf.fz_include_point_in_rect( self.dev.pathrect, p1) | |
| 18544 list_ = ( | |
| 18545 'l', | |
| 18546 JM_py_from_point( self.dev.lastpoint), | |
| 18547 JM_py_from_point(p1), | |
| 18548 ) | |
| 18549 self.dev.lastpoint = p1 | |
| 18550 items = self.dev.pathdict[ dictkey_items] | |
| 18551 items.append( list_) | |
| 18552 self.dev.linecount += 1 # counts consecutive lines | |
| 18553 if self.dev.linecount == 4 and self.dev.path_type != trace_device_FILL_PATH: | |
| 18554 # shrink to "re" or "qu" item | |
| 18555 jm_checkquad(self.dev) | |
| 18556 except Exception: | |
| 18557 if g_exceptions_verbose: exception_info() | |
| 18558 raise | |
| 18559 | |
| 18560 def moveto(self, ctx, x, y): # trace_moveto(). | |
| 18561 if 0 and isinstance(self.dev.pathdict, dict): | |
| 18562 log(f'self.dev.pathdict:') | |
| 18563 for n, v in self.dev.pathdict.items(): | |
| 18564 log( ' {type(n)=} {len(n)=} {n!r} {n}: {v!r}: {v}') | |
| 18565 | |
| 18566 #log(f'Walker(): {type(self.dev.pathdict)=} {self.dev.pathdict=}') | |
| 18567 | |
| 18568 try: | |
| 18569 #log( '{=dev.ctm type(dev.ctm)}') | |
| 18570 self.dev.lastpoint = mupdf.fz_transform_point( | |
| 18571 mupdf.fz_make_point(x, y), | |
| 18572 self.dev.ctm, | |
| 18573 ) | |
| 18574 if mupdf.fz_is_infinite_rect( self.dev.pathrect): | |
| 18575 self.dev.pathrect = mupdf.fz_make_rect( | |
| 18576 self.dev.lastpoint.x, | |
| 18577 self.dev.lastpoint.y, | |
| 18578 self.dev.lastpoint.x, | |
| 18579 self.dev.lastpoint.y, | |
| 18580 ) | |
| 18581 self.dev.firstpoint = self.dev.lastpoint | |
| 18582 self.dev.havemove = 1 | |
| 18583 self.dev.linecount = 0 # reset # of consec. lines | |
| 18584 except Exception: | |
| 18585 if g_exceptions_verbose: exception_info() | |
| 18586 raise | |
| 18587 | |
| 18588 | |
| 18589 def jm_lineart_path(dev, ctx, path): | |
| 18590 ''' | |
| 18591 Create the "items" list of the path dictionary | |
| 18592 * either create or empty the path dictionary | |
| 18593 * reset the end point of the path | |
| 18594 * reset count of consecutive lines | |
| 18595 * invoke fz_walk_path(), which create the single items | |
| 18596 * if no items detected, empty path dict again | |
| 18597 ''' | |
| 18598 #log(f'{getattr(dev, "pathdict", None)=}') | |
| 18599 try: | |
| 18600 dev.pathrect = mupdf.FzRect( mupdf.FzRect.Fixed_INFINITE) | |
| 18601 dev.linecount = 0 | |
| 18602 dev.lastpoint = mupdf.FzPoint( 0, 0) | |
| 18603 dev.pathdict = dict() | |
| 18604 dev.pathdict[ dictkey_items] = [] | |
| 18605 | |
| 18606 # First time we create a Walker instance is slow, e.g. 0.3s, then later | |
| 18607 # times run in around 0.01ms. If Walker is defined locally instead of | |
| 18608 # globally, each time takes 0.3s. | |
| 18609 # | |
| 18610 walker = Walker(dev) | |
| 18611 # Unlike fz_run_page(), fz_path_walker callbacks are not passed | |
| 18612 # a pointer to the struct, instead they get an arbitrary | |
| 18613 # void*. The underlying C++ Director callbacks use this void* to | |
| 18614 # identify the fz_path_walker instance so in turn we need to pass | |
| 18615 # arg=walker.m_internal. | |
| 18616 mupdf.fz_walk_path( mupdf.FzPath(mupdf.ll_fz_keep_path(path)), walker, walker.m_internal) | |
| 18617 # Check if any items were added ... | |
| 18618 if not dev.pathdict[ dictkey_items]: | |
| 18619 dev.pathdict = None | |
| 18620 except Exception: | |
| 18621 if g_exceptions_verbose: exception_info() | |
| 18622 raise | |
| 18623 | |
| 18624 | |
| 18625 def jm_lineart_stroke_path( dev, ctx, path, stroke, ctm, colorspace, color, alpha, color_params): | |
| 18626 #log(f'{dev.pathdict=} {dev.clips=}') | |
| 18627 try: | |
| 18628 assert isinstance( ctm, mupdf.fz_matrix) | |
| 18629 dev.pathfactor = 1 | |
| 18630 if ctm.a != 0 and abs(ctm.a) == abs(ctm.d): | |
| 18631 dev.pathfactor = abs(ctm.a) | |
| 18632 elif ctm.b != 0 and abs(ctm.b) == abs(ctm.c): | |
| 18633 dev.pathfactor = abs(ctm.b) | |
| 18634 dev.ctm = mupdf.FzMatrix( ctm) # fz_concat(ctm, dev_ptm); | |
| 18635 dev.path_type = trace_device_STROKE_PATH | |
| 18636 | |
| 18637 jm_lineart_path( dev, ctx, path) | |
| 18638 if dev.pathdict is None: | |
| 18639 return | |
| 18640 dev.pathdict[ dictkey_type] = 's' | |
| 18641 dev.pathdict[ 'stroke_opacity'] = alpha | |
| 18642 dev.pathdict[ 'color'] = jm_lineart_color( colorspace, color) | |
| 18643 dev.pathdict[ dictkey_width] = dev.pathfactor * stroke.linewidth | |
| 18644 dev.pathdict[ 'lineCap'] = ( | |
| 18645 stroke.start_cap, | |
| 18646 stroke.dash_cap, | |
| 18647 stroke.end_cap, | |
| 18648 ) | |
| 18649 dev.pathdict[ 'lineJoin'] = dev.pathfactor * stroke.linejoin | |
| 18650 if 'closePath' not in dev.pathdict: | |
| 18651 #log('setting dev.pathdict["closePath"] to false') | |
| 18652 dev.pathdict['closePath'] = False | |
| 18653 | |
| 18654 # output the "dashes" string | |
| 18655 if stroke.dash_len: | |
| 18656 buff = mupdf.fz_new_buffer( 256) | |
| 18657 mupdf.fz_append_string( buff, "[ ") # left bracket | |
| 18658 for i in range( stroke.dash_len): | |
| 18659 # We use mupdf python's SWIG-generated floats_getitem() fn to | |
| 18660 # access float *stroke.dash_list[]. | |
| 18661 value = mupdf.floats_getitem( stroke.dash_list, i) # stroke.dash_list[i]. | |
| 18662 mupdf.fz_append_string( buff, f'{_format_g(dev.pathfactor * value)} ') | |
| 18663 mupdf.fz_append_string( buff, f'] {_format_g(dev.pathfactor * stroke.dash_phase)}') | |
| 18664 dev.pathdict[ 'dashes'] = buff | |
| 18665 else: | |
| 18666 dev.pathdict[ 'dashes'] = '[] 0' | |
| 18667 dev.pathdict[ dictkey_rect] = JM_py_from_rect(dev.pathrect) | |
| 18668 dev.pathdict['layer'] = dev.layer_name | |
| 18669 dev.pathdict[ 'seqno'] = dev.seqno | |
| 18670 if dev.clips: | |
| 18671 dev.pathdict[ 'level'] = dev.depth | |
| 18672 jm_append_merge(dev) | |
| 18673 dev.seqno += 1 | |
| 18674 | |
| 18675 except Exception: | |
| 18676 if g_exceptions_verbose: exception_info() | |
| 18677 raise | |
| 18678 | |
| 18679 | |
| 18680 def jm_lineart_clip_path(dev, ctx, path, even_odd, ctm, scissor): | |
| 18681 if not dev.clips: | |
| 18682 return | |
| 18683 dev.ctm = mupdf.FzMatrix(ctm) # fz_concat(ctm, trace_device_ptm); | |
| 18684 dev.path_type = trace_device_CLIP_PATH | |
| 18685 jm_lineart_path(dev, ctx, path) | |
| 18686 if dev.pathdict is None: | |
| 18687 return | |
| 18688 dev.pathdict[ dictkey_type] = 'clip' | |
| 18689 dev.pathdict[ 'even_odd'] = bool(even_odd) | |
| 18690 if 'closePath' not in dev.pathdict: | |
| 18691 #log(f'setting dev.pathdict["closePath"] to False') | |
| 18692 dev.pathdict['closePath'] = False | |
| 18693 | |
| 18694 dev.pathdict['scissor'] = JM_py_from_rect(compute_scissor(dev)) | |
| 18695 dev.pathdict['level'] = dev.depth | |
| 18696 dev.pathdict['layer'] = dev.layer_name | |
| 18697 jm_append_merge(dev) | |
| 18698 dev.depth += 1 | |
| 18699 | |
| 18700 | |
| 18701 def jm_lineart_clip_stroke_path(dev, ctx, path, stroke, ctm, scissor): | |
| 18702 if not dev.clips: | |
| 18703 return | |
| 18704 dev.ctm = mupdf.FzMatrix(ctm) # fz_concat(ctm, trace_device_ptm); | |
| 18705 dev.path_type = trace_device_CLIP_STROKE_PATH | |
| 18706 jm_lineart_path(dev, ctx, path) | |
| 18707 if dev.pathdict is None: | |
| 18708 return | |
| 18709 dev.pathdict['dictkey_type'] = 'clip' | |
| 18710 dev.pathdict['even_odd'] = None | |
| 18711 if 'closePath' not in dev.pathdict: | |
| 18712 #log(f'setting dev.pathdict["closePath"] to False') | |
| 18713 dev.pathdict['closePath'] = False | |
| 18714 dev.pathdict['scissor'] = JM_py_from_rect(compute_scissor(dev)) | |
| 18715 dev.pathdict['level'] = dev.depth | |
| 18716 dev.pathdict['layer'] = dev.layer_name | |
| 18717 jm_append_merge(dev) | |
| 18718 dev.depth += 1 | |
| 18719 | |
| 18720 | |
| 18721 def jm_lineart_clip_stroke_text(dev, ctx, text, stroke, ctm, scissor): | |
| 18722 if not dev.clips: | |
| 18723 return | |
| 18724 compute_scissor(dev) | |
| 18725 dev.depth += 1 | |
| 18726 | |
| 18727 | |
| 18728 def jm_lineart_clip_text(dev, ctx, text, ctm, scissor): | |
| 18729 if not dev.clips: | |
| 18730 return | |
| 18731 compute_scissor(dev) | |
| 18732 dev.depth += 1 | |
| 18733 | |
| 18734 | |
| 18735 def jm_lineart_clip_image_mask( dev, ctx, image, ctm, scissor): | |
| 18736 if not dev.clips: | |
| 18737 return | |
| 18738 compute_scissor(dev) | |
| 18739 dev.depth += 1 | |
| 18740 | |
| 18741 | |
| 18742 def jm_lineart_pop_clip(dev, ctx): | |
| 18743 if not dev.clips or not dev.scissors: | |
| 18744 return | |
| 18745 len_ = len(dev.scissors) | |
| 18746 if len_ < 1: | |
| 18747 return | |
| 18748 del dev.scissors[-1] | |
| 18749 dev.depth -= 1 | |
| 18750 | |
| 18751 | |
| 18752 def jm_lineart_begin_layer(dev, ctx, name): | |
| 18753 if name: | |
| 18754 dev.layer_name = name | |
| 18755 else: | |
| 18756 dev.layer_name = "" | |
| 18757 | |
| 18758 | |
| 18759 def jm_lineart_end_layer(dev, ctx): | |
| 18760 dev.layer_name = "" | |
| 18761 | |
| 18762 | |
| 18763 def jm_lineart_begin_group(dev, ctx, bbox, cs, isolated, knockout, blendmode, alpha): | |
| 18764 #log(f'{dev.pathdict=} {dev.clips=}') | |
| 18765 if not dev.clips: | |
| 18766 return | |
| 18767 dev.pathdict = { # Py_BuildValue("{s:s,s:N,s:N,s:N,s:s,s:f,s:i,s:N}", | |
| 18768 "type": "group", | |
| 18769 "rect": JM_py_from_rect(bbox), | |
| 18770 "isolated": bool(isolated), | |
| 18771 "knockout": bool(knockout), | |
| 18772 "blendmode": mupdf.fz_blendmode_name(blendmode), | |
| 18773 "opacity": alpha, | |
| 18774 "level": dev.depth, | |
| 18775 "layer": dev.layer_name | |
| 18776 } | |
| 18777 jm_append_merge(dev) | |
| 18778 dev.depth += 1 | |
| 18779 | |
| 18780 | |
| 18781 def jm_lineart_end_group(dev, ctx): | |
| 18782 #log(f'{dev.pathdict=} {dev.clips=}') | |
| 18783 if not dev.clips: | |
| 18784 return | |
| 18785 dev.depth -= 1 | |
| 18786 | |
| 18787 | |
| 18788 def jm_lineart_stroke_text(dev, ctx, text, stroke, ctm, colorspace, color, alpha, color_params): | |
| 18789 jm_trace_text(dev, text, 1, ctm, colorspace, color, alpha, dev.seqno) | |
| 18790 dev.seqno += 1 | |
| 18791 | |
| 18792 | |
| 18793 def jm_dev_linewidth( dev, ctx, path, stroke, matrix, colorspace, color, alpha, color_params): | |
| 18794 dev.linewidth = stroke.linewidth | |
| 18795 jm_increase_seqno( dev, ctx) | |
| 18796 | |
| 18797 | |
| 18798 def jm_increase_seqno( dev, ctx, *vargs): | |
| 18799 try: | |
| 18800 dev.seqno += 1 | |
| 18801 except Exception: | |
| 18802 if g_exceptions_verbose: exception_info() | |
| 18803 raise | |
| 18804 | |
| 18805 | |
| 18806 def planish_line(p1: point_like, p2: point_like) -> Matrix: | |
| 18807 """Compute matrix which maps line from p1 to p2 to the x-axis, such that it | |
| 18808 maintains its length and p1 * matrix = Point(0, 0). | |
| 18809 | |
| 18810 Args: | |
| 18811 p1, p2: point_like | |
| 18812 Returns: | |
| 18813 Matrix which maps p1 to Point(0, 0) and p2 to a point on the x axis at | |
| 18814 the same distance to Point(0,0). Will always combine a rotation and a | |
| 18815 transformation. | |
| 18816 """ | |
| 18817 p1 = Point(p1) | |
| 18818 p2 = Point(p2) | |
| 18819 return Matrix(util_hor_matrix(p1, p2)) | |
| 18820 | |
| 18821 | |
| 18822 class JM_image_reporter_Filter(mupdf.PdfFilterOptions2): | |
| 18823 def __init__(self): | |
| 18824 super().__init__() | |
| 18825 self.use_virtual_image_filter() | |
| 18826 | |
| 18827 def image_filter( self, ctx, ctm, name, image): | |
| 18828 assert isinstance(ctm, mupdf.fz_matrix) | |
| 18829 JM_image_filter(self, mupdf.FzMatrix(ctm), name, image) | |
| 18830 if mupdf_cppyy: | |
| 18831 # cppyy doesn't appear to treat returned None as nullptr, | |
| 18832 # resulting in obscure 'python exception' exception. | |
| 18833 return 0 | |
| 18834 | |
| 18835 | |
| 18836 class JM_new_bbox_device_Device(mupdf.FzDevice2): | |
| 18837 def __init__(self, result, layers): | |
| 18838 super().__init__() | |
| 18839 self.result = result | |
| 18840 self.layers = layers | |
| 18841 self.layer_name = "" | |
| 18842 self.use_virtual_fill_path() | |
| 18843 self.use_virtual_stroke_path() | |
| 18844 self.use_virtual_fill_text() | |
| 18845 self.use_virtual_stroke_text() | |
| 18846 self.use_virtual_ignore_text() | |
| 18847 self.use_virtual_fill_shade() | |
| 18848 self.use_virtual_fill_image() | |
| 18849 self.use_virtual_fill_image_mask() | |
| 18850 | |
| 18851 self.use_virtual_begin_layer() | |
| 18852 self.use_virtual_end_layer() | |
| 18853 | |
| 18854 begin_layer = jm_lineart_begin_layer | |
| 18855 end_layer = jm_lineart_end_layer | |
| 18856 | |
| 18857 fill_path = jm_bbox_fill_path | |
| 18858 stroke_path = jm_bbox_stroke_path | |
| 18859 fill_text = jm_bbox_fill_text | |
| 18860 stroke_text = jm_bbox_stroke_text | |
| 18861 ignore_text = jm_bbox_ignore_text | |
| 18862 fill_shade = jm_bbox_fill_shade | |
| 18863 fill_image = jm_bbox_fill_image | |
| 18864 fill_image_mask = jm_bbox_fill_image_mask | |
| 18865 | |
| 18866 | |
| 18867 class JM_new_output_fileptr_Output(mupdf.FzOutput2): | |
| 18868 def __init__(self, bio): | |
| 18869 super().__init__() | |
| 18870 self.bio = bio | |
| 18871 self.use_virtual_write() | |
| 18872 self.use_virtual_seek() | |
| 18873 self.use_virtual_tell() | |
| 18874 self.use_virtual_truncate() | |
| 18875 | |
| 18876 def seek( self, ctx, offset, whence): | |
| 18877 return self.bio.seek( offset, whence) | |
| 18878 | |
| 18879 def tell( self, ctx): | |
| 18880 ret = self.bio.tell() | |
| 18881 return ret | |
| 18882 | |
| 18883 def truncate( self, ctx): | |
| 18884 return self.bio.truncate() | |
| 18885 | |
| 18886 def write(self, ctx, data_raw, data_length): | |
| 18887 data = mupdf.raw_to_python_bytes(data_raw, data_length) | |
| 18888 return self.bio.write(data) | |
| 18889 | |
| 18890 | |
| 18891 def compute_scissor(dev): | |
| 18892 ''' | |
| 18893 Every scissor of a clip is a sub rectangle of the preceding clip scissor | |
| 18894 if the clip level is larger. | |
| 18895 ''' | |
| 18896 if dev.scissors is None: | |
| 18897 dev.scissors = list() | |
| 18898 num_scissors = len(dev.scissors) | |
| 18899 if num_scissors > 0: | |
| 18900 last_scissor = dev.scissors[num_scissors-1] | |
| 18901 scissor = JM_rect_from_py(last_scissor) | |
| 18902 scissor = mupdf.fz_intersect_rect(scissor, dev.pathrect) | |
| 18903 else: | |
| 18904 scissor = dev.pathrect | |
| 18905 dev.scissors.append(JM_py_from_rect(scissor)) | |
| 18906 return scissor | |
| 18907 | |
| 18908 | |
| 18909 class JM_new_lineart_device_Device(mupdf.FzDevice2): | |
| 18910 ''' | |
| 18911 LINEART device for Python method Page.get_cdrawings() | |
| 18912 ''' | |
| 18913 #log(f'JM_new_lineart_device_Device()') | |
| 18914 def __init__(self, out, clips, method): | |
| 18915 #log(f'JM_new_lineart_device_Device.__init__()') | |
| 18916 super().__init__() | |
| 18917 # fixme: this results in "Unexpected call of unimplemented virtual_fnptrs fn FzDevice2::drop_device().". | |
| 18918 #self.use_virtual_drop_device() | |
| 18919 self.use_virtual_fill_path() | |
| 18920 self.use_virtual_stroke_path() | |
| 18921 self.use_virtual_clip_path() | |
| 18922 self.use_virtual_clip_image_mask() | |
| 18923 self.use_virtual_clip_stroke_path() | |
| 18924 self.use_virtual_clip_stroke_text() | |
| 18925 self.use_virtual_clip_text() | |
| 18926 | |
| 18927 self.use_virtual_fill_text | |
| 18928 self.use_virtual_stroke_text | |
| 18929 self.use_virtual_ignore_text | |
| 18930 | |
| 18931 self.use_virtual_fill_shade() | |
| 18932 self.use_virtual_fill_image() | |
| 18933 self.use_virtual_fill_image_mask() | |
| 18934 | |
| 18935 self.use_virtual_pop_clip() | |
| 18936 | |
| 18937 self.use_virtual_begin_group() | |
| 18938 self.use_virtual_end_group() | |
| 18939 | |
| 18940 self.use_virtual_begin_layer() | |
| 18941 self.use_virtual_end_layer() | |
| 18942 | |
| 18943 self.out = out | |
| 18944 self.seqno = 0 | |
| 18945 self.depth = 0 | |
| 18946 self.clips = clips | |
| 18947 self.method = method | |
| 18948 | |
| 18949 self.scissors = None | |
| 18950 self.layer_name = "" # optional content name | |
| 18951 self.pathrect = None | |
| 18952 | |
| 18953 self.linewidth = 0 | |
| 18954 self.ptm = mupdf.FzMatrix() | |
| 18955 self.ctm = mupdf.FzMatrix() | |
| 18956 self.rot = mupdf.FzMatrix() | |
| 18957 self.lastpoint = mupdf.FzPoint() | |
| 18958 self.firstpoint = mupdf.FzPoint() | |
| 18959 self.havemove = 0 | |
| 18960 self.pathrect = mupdf.FzRect() | |
| 18961 self.pathfactor = 0 | |
| 18962 self.linecount = 0 | |
| 18963 self.path_type = 0 | |
| 18964 | |
| 18965 #drop_device = jm_lineart_drop_device | |
| 18966 | |
| 18967 fill_path = jm_lineart_fill_path | |
| 18968 stroke_path = jm_lineart_stroke_path | |
| 18969 clip_image_mask = jm_lineart_clip_image_mask | |
| 18970 clip_path = jm_lineart_clip_path | |
| 18971 clip_stroke_path = jm_lineart_clip_stroke_path | |
| 18972 clip_text = jm_lineart_clip_text | |
| 18973 clip_stroke_text = jm_lineart_clip_stroke_text | |
| 18974 | |
| 18975 fill_text = jm_increase_seqno | |
| 18976 stroke_text = jm_increase_seqno | |
| 18977 ignore_text = jm_increase_seqno | |
| 18978 | |
| 18979 fill_shade = jm_increase_seqno | |
| 18980 fill_image = jm_increase_seqno | |
| 18981 fill_image_mask = jm_increase_seqno | |
| 18982 | |
| 18983 pop_clip = jm_lineart_pop_clip | |
| 18984 | |
| 18985 begin_group = jm_lineart_begin_group | |
| 18986 end_group = jm_lineart_end_group | |
| 18987 | |
| 18988 begin_layer = jm_lineart_begin_layer | |
| 18989 end_layer = jm_lineart_end_layer | |
| 18990 | |
| 18991 | |
| 18992 class JM_new_texttrace_device(mupdf.FzDevice2): | |
| 18993 ''' | |
| 18994 Trace TEXT device for Python method Page.get_texttrace() | |
| 18995 ''' | |
| 18996 | |
| 18997 def __init__(self, out): | |
| 18998 super().__init__() | |
| 18999 self.use_virtual_fill_path() | |
| 19000 self.use_virtual_stroke_path() | |
| 19001 self.use_virtual_fill_text() | |
| 19002 self.use_virtual_stroke_text() | |
| 19003 self.use_virtual_ignore_text() | |
| 19004 self.use_virtual_fill_shade() | |
| 19005 self.use_virtual_fill_image() | |
| 19006 self.use_virtual_fill_image_mask() | |
| 19007 | |
| 19008 self.use_virtual_begin_layer() | |
| 19009 self.use_virtual_end_layer() | |
| 19010 | |
| 19011 self.out = out | |
| 19012 | |
| 19013 self.seqno = 0 | |
| 19014 self.depth = 0 | |
| 19015 self.clips = 0 | |
| 19016 self.method = None | |
| 19017 | |
| 19018 self.seqno = 0 | |
| 19019 | |
| 19020 self.pathdict = dict() | |
| 19021 self.scissors = list() | |
| 19022 self.linewidth = 0 | |
| 19023 self.ptm = mupdf.FzMatrix() | |
| 19024 self.ctm = mupdf.FzMatrix() | |
| 19025 self.rot = mupdf.FzMatrix() | |
| 19026 self.lastpoint = mupdf.FzPoint() | |
| 19027 self.pathrect = mupdf.FzRect() | |
| 19028 self.pathfactor = 0 | |
| 19029 self.linecount = 0 | |
| 19030 self.path_type = 0 | |
| 19031 self.layer_name = "" | |
| 19032 | |
| 19033 fill_path = jm_increase_seqno | |
| 19034 stroke_path = jm_dev_linewidth | |
| 19035 fill_text = jm_lineart_fill_text | |
| 19036 stroke_text = jm_lineart_stroke_text | |
| 19037 ignore_text = jm_lineart_ignore_text | |
| 19038 fill_shade = jm_increase_seqno | |
| 19039 fill_image = jm_increase_seqno | |
| 19040 fill_image_mask = jm_increase_seqno | |
| 19041 | |
| 19042 begin_layer = jm_lineart_begin_layer | |
| 19043 end_layer = jm_lineart_end_layer | |
| 19044 | |
| 19045 | |
| 19046 def ConversionHeader(i: str, filename: OptStr ="unknown"): | |
| 19047 t = i.lower() | |
| 19048 import textwrap | |
| 19049 html = textwrap.dedent(""" | |
| 19050 <!DOCTYPE html> | |
| 19051 <html> | |
| 19052 <head> | |
| 19053 <style> | |
| 19054 body{background-color:gray} | |
| 19055 div{position:relative;background-color:white;margin:1em auto} | |
| 19056 p{position:absolute;margin:0} | |
| 19057 img{position:absolute} | |
| 19058 </style> | |
| 19059 </head> | |
| 19060 <body> | |
| 19061 """) | |
| 19062 | |
| 19063 xml = textwrap.dedent(""" | |
| 19064 <?xml version="1.0"?> | |
| 19065 <document name="%s"> | |
| 19066 """ | |
| 19067 % filename | |
| 19068 ) | |
| 19069 | |
| 19070 xhtml = textwrap.dedent(""" | |
| 19071 <?xml version="1.0"?> | |
| 19072 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
| 19073 <html xmlns="http://www.w3.org/1999/xhtml"> | |
| 19074 <head> | |
| 19075 <style> | |
| 19076 body{background-color:gray} | |
| 19077 div{background-color:white;margin:1em;padding:1em} | |
| 19078 p{white-space:pre-wrap} | |
| 19079 </style> | |
| 19080 </head> | |
| 19081 <body> | |
| 19082 """) | |
| 19083 | |
| 19084 text = "" | |
| 19085 json = '{"document": "%s", "pages": [\n' % filename | |
| 19086 if t == "html": | |
| 19087 r = html | |
| 19088 elif t == "json": | |
| 19089 r = json | |
| 19090 elif t == "xml": | |
| 19091 r = xml | |
| 19092 elif t == "xhtml": | |
| 19093 r = xhtml | |
| 19094 else: | |
| 19095 r = text | |
| 19096 | |
| 19097 return r | |
| 19098 | |
| 19099 | |
| 19100 def ConversionTrailer(i: str): | |
| 19101 t = i.lower() | |
| 19102 text = "" | |
| 19103 json = "]\n}" | |
| 19104 html = "</body>\n</html>\n" | |
| 19105 xml = "</document>\n" | |
| 19106 xhtml = html | |
| 19107 if t == "html": | |
| 19108 r = html | |
| 19109 elif t == "json": | |
| 19110 r = json | |
| 19111 elif t == "xml": | |
| 19112 r = xml | |
| 19113 elif t == "xhtml": | |
| 19114 r = xhtml | |
| 19115 else: | |
| 19116 r = text | |
| 19117 | |
| 19118 return r | |
| 19119 | |
| 19120 | |
| 19121 def annot_preprocess(page: "Page") -> int: | |
| 19122 """Prepare for annotation insertion on the page. | |
| 19123 | |
| 19124 Returns: | |
| 19125 Old page rotation value. Temporarily sets rotation to 0 when required. | |
| 19126 """ | |
| 19127 CheckParent(page) | |
| 19128 if not page.parent.is_pdf: | |
| 19129 raise ValueError("is no PDF") | |
| 19130 old_rotation = page.rotation | |
| 19131 if old_rotation != 0: | |
| 19132 page.set_rotation(0) | |
| 19133 return old_rotation | |
| 19134 | |
| 19135 | |
| 19136 def annot_postprocess(page: "Page", annot: "Annot") -> None: | |
| 19137 """Clean up after annotation insertion. | |
| 19138 | |
| 19139 Set ownership flag and store annotation in page annotation dictionary. | |
| 19140 """ | |
| 19141 #annot.parent = weakref.proxy(page) | |
| 19142 assert isinstance( page, Page) | |
| 19143 assert isinstance( annot, Annot) | |
| 19144 annot.parent = page | |
| 19145 page._annot_refs[id(annot)] = annot | |
| 19146 annot.thisown = True | |
| 19147 | |
| 19148 | |
| 19149 def canon(c): | |
| 19150 assert isinstance(c, int) | |
| 19151 # TODO: proper unicode case folding | |
| 19152 # TODO: character equivalence (a matches ä, etc) | |
| 19153 if c == 0xA0 or c == 0x2028 or c == 0x2029: | |
| 19154 return ord(' ') | |
| 19155 if c == ord('\r') or c == ord('\n') or c == ord('\t'): | |
| 19156 return ord(' ') | |
| 19157 if c >= ord('A') and c <= ord('Z'): | |
| 19158 return c - ord('A') + ord('a') | |
| 19159 return c | |
| 19160 | |
| 19161 | |
| 19162 def chartocanon(s): | |
| 19163 assert isinstance(s, str) | |
| 19164 n, c = mupdf.fz_chartorune(s) | |
| 19165 c = canon(c) | |
| 19166 return n, c | |
| 19167 | |
| 19168 | |
| 19169 def dest_is_valid(o, page_count, page_object_nums, names_list): | |
| 19170 p = mupdf.pdf_dict_get( o, PDF_NAME('A')) | |
| 19171 if ( | |
| 19172 mupdf.pdf_name_eq( | |
| 19173 mupdf.pdf_dict_get( p, PDF_NAME('S')), | |
| 19174 PDF_NAME('GoTo') | |
| 19175 ) | |
| 19176 and not string_in_names_list( | |
| 19177 mupdf.pdf_dict_get( p, PDF_NAME('D')), | |
| 19178 names_list | |
| 19179 ) | |
| 19180 ): | |
| 19181 return 0 | |
| 19182 | |
| 19183 p = mupdf.pdf_dict_get( o, PDF_NAME('Dest')) | |
| 19184 if not p.m_internal: | |
| 19185 pass | |
| 19186 elif mupdf.pdf_is_string( p): | |
| 19187 return string_in_names_list( p, names_list) | |
| 19188 elif not dest_is_valid_page( | |
| 19189 mupdf.pdf_array_get( p, 0), | |
| 19190 page_object_nums, | |
| 19191 page_count, | |
| 19192 ): | |
| 19193 return 0 | |
| 19194 return 1 | |
| 19195 | |
| 19196 | |
| 19197 def dest_is_valid_page(obj, page_object_nums, pagecount): | |
| 19198 num = mupdf.pdf_to_num(obj) | |
| 19199 | |
| 19200 if num == 0: | |
| 19201 return 0 | |
| 19202 for i in range(pagecount): | |
| 19203 if page_object_nums[i] == num: | |
| 19204 return 1 | |
| 19205 return 0 | |
| 19206 | |
| 19207 | |
| 19208 def find_string(s, needle): | |
| 19209 assert isinstance(s, str) | |
| 19210 for i in range(len(s)): | |
| 19211 end = match_string(s[i:], needle) | |
| 19212 if end is not None: | |
| 19213 end += i | |
| 19214 return i, end | |
| 19215 return None, None | |
| 19216 | |
| 19217 | |
| 19218 def get_pdf_now() -> str: | |
| 19219 ''' | |
| 19220 "Now" timestamp in PDF Format | |
| 19221 ''' | |
| 19222 import time | |
| 19223 tz = "%s'%s'" % ( | |
| 19224 str(abs(time.altzone // 3600)).rjust(2, "0"), | |
| 19225 str((abs(time.altzone // 60) % 60)).rjust(2, "0"), | |
| 19226 ) | |
| 19227 tstamp = time.strftime("D:%Y%m%d%H%M%S", time.localtime()) | |
| 19228 if time.altzone > 0: | |
| 19229 tstamp += "-" + tz | |
| 19230 elif time.altzone < 0: | |
| 19231 tstamp += "+" + tz | |
| 19232 else: | |
| 19233 pass | |
| 19234 return tstamp | |
| 19235 | |
| 19236 | |
| 19237 class ElementPosition(object): | |
| 19238 """Convert a dictionary with element position information to an object.""" | |
| 19239 | |
| 19240 def __init__(self): | |
| 19241 pass | |
| 19242 | |
| 19243 | |
| 19244 def make_story_elpos(): | |
| 19245 return ElementPosition() | |
| 19246 | |
| 19247 | |
| 19248 def get_highlight_selection(page, start: point_like =None, stop: point_like =None, clip: rect_like =None) -> list: | |
| 19249 """Return rectangles of text lines between two points. | |
| 19250 | |
| 19251 Notes: | |
| 19252 The default of 'start' is top-left of 'clip'. The default of 'stop' | |
| 19253 is bottom-reight of 'clip'. | |
| 19254 | |
| 19255 Args: | |
| 19256 start: start point_like | |
| 19257 stop: end point_like, must be 'below' start | |
| 19258 clip: consider this rect_like only, default is page rectangle | |
| 19259 Returns: | |
| 19260 List of line bbox intersections with the area established by the | |
| 19261 parameters. | |
| 19262 """ | |
| 19263 # validate and normalize arguments | |
| 19264 if clip is None: | |
| 19265 clip = page.rect | |
| 19266 clip = Rect(clip) | |
| 19267 if start is None: | |
| 19268 start = clip.tl | |
| 19269 if stop is None: | |
| 19270 stop = clip.br | |
| 19271 clip.y0 = start.y | |
| 19272 clip.y1 = stop.y | |
| 19273 if clip.is_empty or clip.is_infinite: | |
| 19274 return [] | |
| 19275 | |
| 19276 # extract text of page, clip only, no images, expand ligatures | |
| 19277 blocks = page.get_text( | |
| 19278 "dict", flags=0, clip=clip, | |
| 19279 )["blocks"] | |
| 19280 | |
| 19281 lines = [] # will return this list of rectangles | |
| 19282 for b in blocks: | |
| 19283 bbox = Rect(b["bbox"]) | |
| 19284 if bbox.is_infinite or bbox.is_empty: | |
| 19285 continue | |
| 19286 for line in b["lines"]: | |
| 19287 bbox = Rect(line["bbox"]) | |
| 19288 if bbox.is_infinite or bbox.is_empty: | |
| 19289 continue | |
| 19290 lines.append(bbox) | |
| 19291 | |
| 19292 if lines == []: # did not select anything | |
| 19293 return lines | |
| 19294 | |
| 19295 lines.sort(key=lambda bbox: bbox.y1) # sort by vertical positions | |
| 19296 | |
| 19297 # cut off prefix from first line if start point is close to its top | |
| 19298 bboxf = lines.pop(0) | |
| 19299 if bboxf.y0 - start.y <= 0.1 * bboxf.height: # close enough? | |
| 19300 r = Rect(start.x, bboxf.y0, bboxf.br) # intersection rectangle | |
| 19301 if not (r.is_empty or r.is_infinite): | |
| 19302 lines.insert(0, r) # insert again if not empty | |
| 19303 else: | |
| 19304 lines.insert(0, bboxf) # insert again | |
| 19305 | |
| 19306 if lines == []: # the list might have been emptied | |
| 19307 return lines | |
| 19308 | |
| 19309 # cut off suffix from last line if stop point is close to its bottom | |
| 19310 bboxl = lines.pop() | |
| 19311 if stop.y - bboxl.y1 <= 0.1 * bboxl.height: # close enough? | |
| 19312 r = Rect(bboxl.tl, stop.x, bboxl.y1) # intersection rectangle | |
| 19313 if not (r.is_empty or r.is_infinite): | |
| 19314 lines.append(r) # append if not empty | |
| 19315 else: | |
| 19316 lines.append(bboxl) # append again | |
| 19317 | |
| 19318 return lines | |
| 19319 | |
| 19320 | |
| 19321 def glyph_name_to_unicode(name: str) -> int: | |
| 19322 """Convenience function accessing unicodedata.""" | |
| 19323 import unicodedata | |
| 19324 try: | |
| 19325 unc = ord(unicodedata.lookup(name)) | |
| 19326 except Exception: | |
| 19327 unc = 65533 | |
| 19328 return unc | |
| 19329 | |
| 19330 | |
| 19331 def hdist(dir, a, b): | |
| 19332 dx = b.x - a.x | |
| 19333 dy = b.y - a.y | |
| 19334 return mupdf.fz_abs(dx * dir.x + dy * dir.y) | |
| 19335 | |
| 19336 | |
| 19337 def make_table(rect: rect_like =(0, 0, 1, 1), cols: int =1, rows: int =1) -> list: | |
| 19338 """Return a list of (rows x cols) equal sized rectangles. | |
| 19339 | |
| 19340 Notes: | |
| 19341 A utility to fill a given area with table cells of equal size. | |
| 19342 Args: | |
| 19343 rect: rect_like to use as the table area | |
| 19344 rows: number of rows | |
| 19345 cols: number of columns | |
| 19346 Returns: | |
| 19347 A list with <rows> items, where each item is a list of <cols> | |
| 19348 PyMuPDF Rect objects of equal sizes. | |
| 19349 """ | |
| 19350 rect = Rect(rect) # ensure this is a Rect | |
| 19351 if rect.is_empty or rect.is_infinite: | |
| 19352 raise ValueError("rect must be finite and not empty") | |
| 19353 tl = rect.tl | |
| 19354 | |
| 19355 height = rect.height / rows # height of one table cell | |
| 19356 width = rect.width / cols # width of one table cell | |
| 19357 delta_h = (width, 0, width, 0) # diff to next right rect | |
| 19358 delta_v = (0, height, 0, height) # diff to next lower rect | |
| 19359 | |
| 19360 r = Rect(tl, tl.x + width, tl.y + height) # first rectangle | |
| 19361 | |
| 19362 # make the first row | |
| 19363 row = [r] | |
| 19364 for i in range(1, cols): | |
| 19365 r += delta_h # build next rect to the right | |
| 19366 row.append(r) | |
| 19367 | |
| 19368 # make result, starts with first row | |
| 19369 rects = [row] | |
| 19370 for i in range(1, rows): | |
| 19371 row = rects[i - 1] # take previously appended row | |
| 19372 nrow = [] # the new row to append | |
| 19373 for r in row: # for each previous cell add its downward copy | |
| 19374 nrow.append(r + delta_v) | |
| 19375 rects.append(nrow) # append new row to result | |
| 19376 | |
| 19377 return rects | |
| 19378 | |
| 19379 | |
| 19380 def util_ensure_widget_calc(annot): | |
| 19381 ''' | |
| 19382 Ensure that widgets with /AA/C JavaScript are in array AcroForm/CO | |
| 19383 ''' | |
| 19384 annot_obj = mupdf.pdf_annot_obj(annot.this) | |
| 19385 pdf = mupdf.pdf_get_bound_document(annot_obj) | |
| 19386 PDFNAME_CO = mupdf.pdf_new_name("CO") # = PDF_NAME(CO) | |
| 19387 acro = mupdf.pdf_dict_getl( # get AcroForm dict | |
| 19388 mupdf.pdf_trailer(pdf), | |
| 19389 PDF_NAME('Root'), | |
| 19390 PDF_NAME('AcroForm'), | |
| 19391 ) | |
| 19392 | |
| 19393 CO = mupdf.pdf_dict_get(acro, PDFNAME_CO) # = AcroForm/CO | |
| 19394 if not mupdf.pdf_is_array(CO): | |
| 19395 CO = mupdf.pdf_dict_put_array(acro, PDFNAME_CO, 2) | |
| 19396 n = mupdf.pdf_array_len(CO) | |
| 19397 found = 0 | |
| 19398 xref = mupdf.pdf_to_num(annot_obj) | |
| 19399 for i in range(n): | |
| 19400 nxref = mupdf.pdf_to_num(mupdf.pdf_array_get(CO, i)) | |
| 19401 if xref == nxref: | |
| 19402 found = 1 | |
| 19403 break | |
| 19404 if not found: | |
| 19405 mupdf.pdf_array_push(CO, mupdf.pdf_new_indirect(pdf, xref, 0)) | |
| 19406 | |
| 19407 | |
| 19408 def util_make_rect( *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None): | |
| 19409 ''' | |
| 19410 Helper for initialising rectangle classes. | |
| 19411 | |
| 19412 2022-09-02: This is quite different from PyMuPDF's util_make_rect(), which | |
| 19413 uses `goto` in ways that don't easily translate to Python. | |
| 19414 | |
| 19415 Returns (x0, y0, x1, y1) derived from <args>, then override with p0, p1, | |
| 19416 x0, y0, x1, y1 if they are not None. | |
| 19417 | |
| 19418 Accepts following forms for <args>: | |
| 19419 () returns all zeros. | |
| 19420 (top-left, bottom-right) | |
| 19421 (top-left, x1, y1) | |
| 19422 (x0, y0, bottom-right) | |
| 19423 (x0, y0, x1, y1) | |
| 19424 (rect) | |
| 19425 | |
| 19426 Where top-left and bottom-right are (x, y) or something with .x, .y | |
| 19427 members; rect is something with .x0, .y0, .x1, and .y1 members. | |
| 19428 | |
| 19429 2023-11-18: we now override with p0, p1, x0, y0, x1, y1 if not None. | |
| 19430 ''' | |
| 19431 def get_xy( arg): | |
| 19432 if isinstance( arg, (list, tuple)) and len( arg) == 2: | |
| 19433 return arg[0], arg[1] | |
| 19434 if isinstance( arg, (Point, mupdf.FzPoint, mupdf.fz_point)): | |
| 19435 return arg.x, arg.y | |
| 19436 return None, None | |
| 19437 def make_tuple( a): | |
| 19438 if isinstance( a, tuple): | |
| 19439 return a | |
| 19440 if isinstance( a, Point): | |
| 19441 return a.x, a.y | |
| 19442 elif isinstance( a, (Rect, IRect, mupdf.FzRect, mupdf.fz_rect)): | |
| 19443 return a.x0, a.y0, a.x1, a.y1 | |
| 19444 if not isinstance( a, (list, tuple)): | |
| 19445 a = a, | |
| 19446 return a | |
| 19447 def handle_args(): | |
| 19448 if len(args) == 0: | |
| 19449 return 0, 0, 0, 0 | |
| 19450 elif len(args) == 1: | |
| 19451 arg = args[0] | |
| 19452 if isinstance( arg, (list, tuple)) and len( arg) == 2: | |
| 19453 p1, p2 = arg | |
| 19454 ret = *p1, *p2 | |
| 19455 assert len(ret) == 4 | |
| 19456 return ret | |
| 19457 if isinstance( arg, (list, tuple)) and len( arg) == 3: | |
| 19458 a, b, c = arg | |
| 19459 a = make_tuple(a) | |
| 19460 b = make_tuple(b) | |
| 19461 c = make_tuple(c) | |
| 19462 ret = *a, *b, *c | |
| 19463 assert len(ret) == 4 | |
| 19464 return ret | |
| 19465 ret = make_tuple( arg) | |
| 19466 assert len(ret) == 4, f'{arg=} {ret=}' | |
| 19467 return ret | |
| 19468 elif len(args) == 2: | |
| 19469 ret = get_xy( args[0]) + get_xy( args[1]) | |
| 19470 assert len(ret) == 4 | |
| 19471 return ret | |
| 19472 elif len(args) == 3: | |
| 19473 x0, y0 = get_xy( args[0]) | |
| 19474 if (x0, y0) != (None, None): | |
| 19475 return x0, y0, args[1], args[2] | |
| 19476 x1, y1 = get_xy( args[2]) | |
| 19477 if (x1, y1) != (None, None): | |
| 19478 return args[0], args[1], x1, y1 | |
| 19479 elif len(args) == 4: | |
| 19480 return args[0], args[1], args[2], args[3] | |
| 19481 raise Exception( f'Unrecognised args: {args}') | |
| 19482 ret_x0, ret_y0, ret_x1, ret_y1 = handle_args() | |
| 19483 if p0 is not None: ret_x0, ret_y0 = get_xy(p0) | |
| 19484 if p1 is not None: ret_x1, ret_y1 = get_xy(p1) | |
| 19485 if x0 is not None: ret_x0 = x0 | |
| 19486 if y0 is not None: ret_y0 = y0 | |
| 19487 if x1 is not None: ret_x1 = x1 | |
| 19488 if y1 is not None: ret_y1 = y1 | |
| 19489 return ret_x0, ret_y0, ret_x1, ret_y1 | |
| 19490 | |
| 19491 | |
| 19492 def util_make_irect( *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None): | |
| 19493 a, b, c, d = util_make_rect( *args, p0=p0, p1=p1, x0=x0, y0=y0, x1=x1, y1=y1) | |
| 19494 def convert(x, ceil): | |
| 19495 if ceil: | |
| 19496 return int(math.ceil(x)) | |
| 19497 else: | |
| 19498 return int(math.floor(x)) | |
| 19499 a = convert(a, False) | |
| 19500 b = convert(b, False) | |
| 19501 c = convert(c, True) | |
| 19502 d = convert(d, True) | |
| 19503 return a, b, c, d | |
| 19504 | |
| 19505 | |
| 19506 def util_round_rect( rect): | |
| 19507 return JM_py_from_irect(mupdf.fz_round_rect(JM_rect_from_py(rect))) | |
| 19508 | |
| 19509 | |
| 19510 def util_transform_rect( rect, matrix): | |
| 19511 if g_use_extra: | |
| 19512 return extra.util_transform_rect( rect, matrix) | |
| 19513 return JM_py_from_rect(mupdf.fz_transform_rect(JM_rect_from_py(rect), JM_matrix_from_py(matrix))) | |
| 19514 | |
| 19515 | |
| 19516 def util_intersect_rect( r1, r2): | |
| 19517 return JM_py_from_rect( | |
| 19518 mupdf.fz_intersect_rect( | |
| 19519 JM_rect_from_py(r1), | |
| 19520 JM_rect_from_py(r2), | |
| 19521 ) | |
| 19522 ) | |
| 19523 | |
| 19524 | |
| 19525 def util_is_point_in_rect( p, r): | |
| 19526 return mupdf.fz_is_point_inside_rect( | |
| 19527 JM_point_from_py(p), | |
| 19528 JM_rect_from_py(r), | |
| 19529 ) | |
| 19530 | |
| 19531 def util_include_point_in_rect( r, p): | |
| 19532 return JM_py_from_rect( | |
| 19533 mupdf.fz_include_point_in_rect( | |
| 19534 JM_rect_from_py(r), | |
| 19535 JM_point_from_py(p), | |
| 19536 ) | |
| 19537 ) | |
| 19538 | |
| 19539 | |
| 19540 def util_point_in_quad( P, Q): | |
| 19541 p = JM_point_from_py(P) | |
| 19542 q = JM_quad_from_py(Q) | |
| 19543 return mupdf.fz_is_point_inside_quad(p, q) | |
| 19544 | |
| 19545 | |
| 19546 def util_transform_point( point, matrix): | |
| 19547 return JM_py_from_point( | |
| 19548 mupdf.fz_transform_point( | |
| 19549 JM_point_from_py(point), | |
| 19550 JM_matrix_from_py(matrix), | |
| 19551 ) | |
| 19552 ) | |
| 19553 | |
| 19554 | |
| 19555 def util_union_rect( r1, r2): | |
| 19556 return JM_py_from_rect( | |
| 19557 mupdf.fz_union_rect( | |
| 19558 JM_rect_from_py(r1), | |
| 19559 JM_rect_from_py(r2), | |
| 19560 ) | |
| 19561 ) | |
| 19562 | |
| 19563 | |
| 19564 def util_concat_matrix( m1, m2): | |
| 19565 return JM_py_from_matrix( | |
| 19566 mupdf.fz_concat( | |
| 19567 JM_matrix_from_py(m1), | |
| 19568 JM_matrix_from_py(m2), | |
| 19569 ) | |
| 19570 ) | |
| 19571 | |
| 19572 | |
| 19573 def util_invert_matrix(matrix): | |
| 19574 if 0: | |
| 19575 # Use MuPDF's fz_invert_matrix(). | |
| 19576 if isinstance( matrix, (tuple, list)): | |
| 19577 matrix = mupdf.FzMatrix( *matrix) | |
| 19578 elif isinstance( matrix, mupdf.fz_matrix): | |
| 19579 matrix = mupdf.FzMatrix( matrix) | |
| 19580 elif isinstance( matrix, Matrix): | |
| 19581 matrix = mupdf.FzMatrix( matrix.a, matrix.b, matrix.c, matrix.d, matrix.e, matrix.f) | |
| 19582 assert isinstance( matrix, mupdf.FzMatrix), f'{type(matrix)=}: {matrix}' | |
| 19583 ret = mupdf.fz_invert_matrix( matrix) | |
| 19584 if ret == matrix and (0 | |
| 19585 or abs( matrix.a - 1) >= sys.float_info.epsilon | |
| 19586 or abs( matrix.b - 0) >= sys.float_info.epsilon | |
| 19587 or abs( matrix.c - 0) >= sys.float_info.epsilon | |
| 19588 or abs( matrix.d - 1) >= sys.float_info.epsilon | |
| 19589 ): | |
| 19590 # Inversion not possible. | |
| 19591 return 1, () | |
| 19592 return 0, (ret.a, ret.b, ret.c, ret.d, ret.e, ret.f) | |
| 19593 # Do inversion in python. | |
| 19594 src = JM_matrix_from_py(matrix) | |
| 19595 a = src.a | |
| 19596 det = a * src.d - src.b * src.c | |
| 19597 if det < -sys.float_info.epsilon or det > sys.float_info.epsilon: | |
| 19598 dst = mupdf.FzMatrix() | |
| 19599 rdet = 1 / det | |
| 19600 dst.a = src.d * rdet | |
| 19601 dst.b = -src.b * rdet | |
| 19602 dst.c = -src.c * rdet | |
| 19603 dst.d = a * rdet | |
| 19604 a = -src.e * dst.a - src.f * dst.c | |
| 19605 dst.f = -src.e * dst.b - src.f * dst.d | |
| 19606 dst.e = a | |
| 19607 return 0, (dst.a, dst.b, dst.c, dst.d, dst.e, dst.f) | |
| 19608 | |
| 19609 return 1, () | |
| 19610 | |
| 19611 | |
| 19612 def util_measure_string( text, fontname, fontsize, encoding): | |
| 19613 font = mupdf.fz_new_base14_font(fontname) | |
| 19614 w = 0 | |
| 19615 pos = 0 | |
| 19616 while pos < len(text): | |
| 19617 t, c = mupdf.fz_chartorune(text[pos:]) | |
| 19618 pos += t | |
| 19619 if encoding == mupdf.PDF_SIMPLE_ENCODING_GREEK: | |
| 19620 c = mupdf.fz_iso8859_7_from_unicode(c) | |
| 19621 elif encoding == mupdf.PDF_SIMPLE_ENCODING_CYRILLIC: | |
| 19622 c = mupdf.fz_windows_1251_from_unicode(c) | |
| 19623 else: | |
| 19624 c = mupdf.fz_windows_1252_from_unicode(c) | |
| 19625 if c < 0: | |
| 19626 c = 0xB7 | |
| 19627 g = mupdf.fz_encode_character(font, c) | |
| 19628 dw = mupdf.fz_advance_glyph(font, g, 0) | |
| 19629 w += dw | |
| 19630 ret = w * fontsize | |
| 19631 return ret | |
| 19632 | |
| 19633 | |
| 19634 def util_sine_between(C, P, Q): | |
| 19635 # for points C, P, Q compute the sine between lines CP and QP | |
| 19636 c = JM_point_from_py(C) | |
| 19637 p = JM_point_from_py(P) | |
| 19638 q = JM_point_from_py(Q) | |
| 19639 s = mupdf.fz_normalize_vector(mupdf.fz_make_point(q.x - p.x, q.y - p.y)) | |
| 19640 m1 = mupdf.fz_make_matrix(1, 0, 0, 1, -p.x, -p.y) | |
| 19641 m2 = mupdf.fz_make_matrix(s.x, -s.y, s.y, s.x, 0, 0) | |
| 19642 m1 = mupdf.fz_concat(m1, m2) | |
| 19643 c = mupdf.fz_transform_point(c, m1) | |
| 19644 c = mupdf.fz_normalize_vector(c) | |
| 19645 return c.y | |
| 19646 | |
| 19647 | |
| 19648 def util_hor_matrix(C, P): | |
| 19649 ''' | |
| 19650 Return the matrix that maps two points C, P to the x-axis such that | |
| 19651 C -> (0,0) and the image of P have the same distance. | |
| 19652 ''' | |
| 19653 c = JM_point_from_py(C) | |
| 19654 p = JM_point_from_py(P) | |
| 19655 | |
| 19656 # compute (cosine, sine) of vector P-C with double precision: | |
| 19657 s = mupdf.fz_normalize_vector(mupdf.fz_make_point(p.x - c.x, p.y - c.y)) | |
| 19658 | |
| 19659 m1 = mupdf.fz_make_matrix(1, 0, 0, 1, -c.x, -c.y) | |
| 19660 m2 = mupdf.fz_make_matrix(s.x, -s.y, s.y, s.x, 0, 0) | |
| 19661 return JM_py_from_matrix(mupdf.fz_concat(m1, m2)) | |
| 19662 | |
| 19663 | |
| 19664 def match_string(h0, n0): | |
| 19665 h = 0 | |
| 19666 n = 0 | |
| 19667 e = h | |
| 19668 delta_h, hc = chartocanon(h0[h:]) | |
| 19669 h += delta_h | |
| 19670 delta_n, nc = chartocanon(n0[n:]) | |
| 19671 n += delta_n | |
| 19672 while hc == nc: | |
| 19673 e = h | |
| 19674 if hc == ord(' '): | |
| 19675 while 1: | |
| 19676 delta_h, hc = chartocanon(h0[h:]) | |
| 19677 h += delta_h | |
| 19678 if hc != ord(' '): | |
| 19679 break | |
| 19680 else: | |
| 19681 delta_h, hc = chartocanon(h0[h:]) | |
| 19682 h += delta_h | |
| 19683 if nc == ord(' '): | |
| 19684 while 1: | |
| 19685 delta_n, nc = chartocanon(n0[n:]) | |
| 19686 n += delta_n | |
| 19687 if nc != ord(' '): | |
| 19688 break | |
| 19689 else: | |
| 19690 delta_n, nc = chartocanon(n0[n:]) | |
| 19691 n += delta_n | |
| 19692 return None if nc != 0 else e | |
| 19693 | |
| 19694 | |
| 19695 def on_highlight_char(hits, line, ch): | |
| 19696 assert hits | |
| 19697 assert isinstance(line, mupdf.FzStextLine) | |
| 19698 assert isinstance(ch, mupdf.FzStextChar) | |
| 19699 vfuzz = ch.m_internal.size * hits.vfuzz | |
| 19700 hfuzz = ch.m_internal.size * hits.hfuzz | |
| 19701 ch_quad = JM_char_quad(line, ch) | |
| 19702 if hits.len > 0: | |
| 19703 # fixme: end = hits.quads[-1] | |
| 19704 quad = hits.quads[hits.len - 1] | |
| 19705 end = JM_quad_from_py(quad) | |
| 19706 if ( 1 | |
| 19707 and hdist(line.m_internal.dir, end.lr, ch_quad.ll) < hfuzz | |
| 19708 and vdist(line.m_internal.dir, end.lr, ch_quad.ll) < vfuzz | |
| 19709 and hdist(line.m_internal.dir, end.ur, ch_quad.ul) < hfuzz | |
| 19710 and vdist(line.m_internal.dir, end.ur, ch_quad.ul) < vfuzz | |
| 19711 ): | |
| 19712 end.ur = ch_quad.ur | |
| 19713 end.lr = ch_quad.lr | |
| 19714 assert hits.quads[-1] == end | |
| 19715 return | |
| 19716 hits.quads.append(ch_quad) | |
| 19717 hits.len += 1 | |
| 19718 | |
| 19719 | |
| 19720 def page_merge(doc_des, doc_src, page_from, page_to, rotate, links, copy_annots, graft_map): | |
| 19721 ''' | |
| 19722 Deep-copies a source page to the target. | |
| 19723 Modified version of function of pdfmerge.c: we also copy annotations, but | |
| 19724 we skip some subtypes. In addition we rotate output. | |
| 19725 ''' | |
| 19726 if g_use_extra: | |
| 19727 #log( 'Calling C++ extra.page_merge()') | |
| 19728 return extra.page_merge( doc_des, doc_src, page_from, page_to, rotate, links, copy_annots, graft_map) | |
| 19729 | |
| 19730 # list of object types (per page) we want to copy | |
| 19731 known_page_objs = [ | |
| 19732 PDF_NAME('Contents'), | |
| 19733 PDF_NAME('Resources'), | |
| 19734 PDF_NAME('MediaBox'), | |
| 19735 PDF_NAME('CropBox'), | |
| 19736 PDF_NAME('BleedBox'), | |
| 19737 PDF_NAME('TrimBox'), | |
| 19738 PDF_NAME('ArtBox'), | |
| 19739 PDF_NAME('Rotate'), | |
| 19740 PDF_NAME('UserUnit'), | |
| 19741 ] | |
| 19742 page_ref = mupdf.pdf_lookup_page_obj(doc_src, page_from) | |
| 19743 | |
| 19744 # make new page dict in dest doc | |
| 19745 page_dict = mupdf.pdf_new_dict(doc_des, 4) | |
| 19746 mupdf.pdf_dict_put(page_dict, PDF_NAME('Type'), PDF_NAME('Page')) | |
| 19747 | |
| 19748 # copy objects of source page into it | |
| 19749 for i in range( len(known_page_objs)): | |
| 19750 obj = mupdf.pdf_dict_get_inheritable( page_ref, known_page_objs[i]) | |
| 19751 if obj.m_internal: | |
| 19752 #log( '{=type(graft_map) type(graft_map.this)}') | |
| 19753 mupdf.pdf_dict_put( page_dict, known_page_objs[i], mupdf.pdf_graft_mapped_object(graft_map.this, obj)) | |
| 19754 | |
| 19755 # Copy annotations, but skip Link, Popup, IRT, Widget types | |
| 19756 # If selected, remove dict keys P (parent) and Popup | |
| 19757 if copy_annots: | |
| 19758 old_annots = mupdf.pdf_dict_get( page_ref, PDF_NAME('Annots')) | |
| 19759 n = mupdf.pdf_array_len( old_annots) | |
| 19760 if n > 0: | |
| 19761 new_annots = mupdf.pdf_dict_put_array( page_dict, PDF_NAME('Annots'), n) | |
| 19762 for i in range(n): | |
| 19763 o = mupdf.pdf_array_get( old_annots, i) | |
| 19764 if not o.m_internal or not mupdf.pdf_is_dict(o): | |
| 19765 continue # skip non-dict items | |
| 19766 if mupdf.pdf_dict_gets( o, "IRT").m_internal: | |
| 19767 continue | |
| 19768 subtype = mupdf.pdf_dict_get( o, PDF_NAME('Subtype')) | |
| 19769 if mupdf.pdf_name_eq( subtype, PDF_NAME('Link')): | |
| 19770 continue | |
| 19771 if mupdf.pdf_name_eq( subtype, PDF_NAME('Popup')): | |
| 19772 continue | |
| 19773 if mupdf.pdf_name_eq(subtype, PDF_NAME('Widget')): | |
| 19774 continue | |
| 19775 mupdf.pdf_dict_del( o, PDF_NAME('Popup')) | |
| 19776 mupdf.pdf_dict_del( o, PDF_NAME('P')) | |
| 19777 copy_o = mupdf.pdf_graft_mapped_object( graft_map.this, o) | |
| 19778 annot = mupdf.pdf_new_indirect( doc_des, mupdf.pdf_to_num( copy_o), 0) | |
| 19779 mupdf.pdf_array_push( new_annots, annot) | |
| 19780 | |
| 19781 # rotate the page | |
| 19782 if rotate != -1: | |
| 19783 mupdf.pdf_dict_put_int( page_dict, PDF_NAME('Rotate'), rotate) | |
| 19784 # Now add the page dictionary to dest PDF | |
| 19785 ref = mupdf.pdf_add_object( doc_des, page_dict) | |
| 19786 | |
| 19787 # Insert new page at specified location | |
| 19788 mupdf.pdf_insert_page( doc_des, page_to, ref) | |
| 19789 | |
| 19790 | |
| 19791 def paper_rect(s: str) -> Rect: | |
| 19792 """Return a Rect for the paper size indicated in string 's'. Must conform to the argument of method 'PaperSize', which will be invoked. | |
| 19793 """ | |
| 19794 width, height = paper_size(s) | |
| 19795 return Rect(0.0, 0.0, width, height) | |
| 19796 | |
| 19797 | |
| 19798 def paper_size(s: str) -> tuple: | |
| 19799 """Return a tuple (width, height) for a given paper format string. | |
| 19800 | |
| 19801 Notes: | |
| 19802 'A4-L' will return (842, 595), the values for A4 landscape. | |
| 19803 Suffix '-P' and no suffix return the portrait tuple. | |
| 19804 """ | |
| 19805 size = s.lower() | |
| 19806 f = "p" | |
| 19807 if size.endswith("-l"): | |
| 19808 f = "l" | |
| 19809 size = size[:-2] | |
| 19810 if size.endswith("-p"): | |
| 19811 size = size[:-2] | |
| 19812 rc = paper_sizes().get(size, (-1, -1)) | |
| 19813 if f == "p": | |
| 19814 return rc | |
| 19815 return (rc[1], rc[0]) | |
| 19816 | |
| 19817 | |
| 19818 def paper_sizes(): | |
| 19819 """Known paper formats @ 72 dpi as a dictionary. Key is the format string | |
| 19820 like "a4" for ISO-A4. Value is the tuple (width, height). | |
| 19821 | |
| 19822 Information taken from the following web sites: | |
| 19823 www.din-formate.de | |
| 19824 www.din-formate.info/amerikanische-formate.html | |
| 19825 www.directtools.de/wissen/normen/iso.htm | |
| 19826 """ | |
| 19827 return { | |
| 19828 "a0": (2384, 3370), | |
| 19829 "a1": (1684, 2384), | |
| 19830 "a10": (74, 105), | |
| 19831 "a2": (1191, 1684), | |
| 19832 "a3": (842, 1191), | |
| 19833 "a4": (595, 842), | |
| 19834 "a5": (420, 595), | |
| 19835 "a6": (298, 420), | |
| 19836 "a7": (210, 298), | |
| 19837 "a8": (147, 210), | |
| 19838 "a9": (105, 147), | |
| 19839 "b0": (2835, 4008), | |
| 19840 "b1": (2004, 2835), | |
| 19841 "b10": (88, 125), | |
| 19842 "b2": (1417, 2004), | |
| 19843 "b3": (1001, 1417), | |
| 19844 "b4": (709, 1001), | |
| 19845 "b5": (499, 709), | |
| 19846 "b6": (354, 499), | |
| 19847 "b7": (249, 354), | |
| 19848 "b8": (176, 249), | |
| 19849 "b9": (125, 176), | |
| 19850 "c0": (2599, 3677), | |
| 19851 "c1": (1837, 2599), | |
| 19852 "c10": (79, 113), | |
| 19853 "c2": (1298, 1837), | |
| 19854 "c3": (918, 1298), | |
| 19855 "c4": (649, 918), | |
| 19856 "c5": (459, 649), | |
| 19857 "c6": (323, 459), | |
| 19858 "c7": (230, 323), | |
| 19859 "c8": (162, 230), | |
| 19860 "c9": (113, 162), | |
| 19861 "card-4x6": (288, 432), | |
| 19862 "card-5x7": (360, 504), | |
| 19863 "commercial": (297, 684), | |
| 19864 "executive": (522, 756), | |
| 19865 "invoice": (396, 612), | |
| 19866 "ledger": (792, 1224), | |
| 19867 "legal": (612, 1008), | |
| 19868 "legal-13": (612, 936), | |
| 19869 "letter": (612, 792), | |
| 19870 "monarch": (279, 540), | |
| 19871 "tabloid-extra": (864, 1296), | |
| 19872 } | |
| 19873 | |
| 19874 def pdf_lookup_page_loc(doc, needle): | |
| 19875 return mupdf.pdf_lookup_page_loc(doc, needle) | |
| 19876 | |
| 19877 | |
| 19878 def pdfobj_string(o, prefix=''): | |
| 19879 ''' | |
| 19880 Returns description of mupdf.PdfObj (wrapper for pdf_obj) <o>. | |
| 19881 ''' | |
| 19882 assert 0, 'use mupdf.pdf_debug_obj() ?' | |
| 19883 ret = '' | |
| 19884 if mupdf.pdf_is_array(o): | |
| 19885 l = mupdf.pdf_array_len(o) | |
| 19886 ret += f'array {l}\n' | |
| 19887 for i in range(l): | |
| 19888 oo = mupdf.pdf_array_get(o, i) | |
| 19889 ret += pdfobj_string(oo, prefix + ' ') | |
| 19890 ret += '\n' | |
| 19891 elif mupdf.pdf_is_bool(o): | |
| 19892 ret += f'bool: {o.array_get_bool()}\n' | |
| 19893 elif mupdf.pdf_is_dict(o): | |
| 19894 l = mupdf.pdf_dict_len(o) | |
| 19895 ret += f'dict {l}\n' | |
| 19896 for i in range(l): | |
| 19897 key = mupdf.pdf_dict_get_key(o, i) | |
| 19898 value = mupdf.pdf_dict_get( o, key) | |
| 19899 ret += f'{prefix} {key}: ' | |
| 19900 ret += pdfobj_string( value, prefix + ' ') | |
| 19901 ret += '\n' | |
| 19902 elif mupdf.pdf_is_embedded_file(o): | |
| 19903 ret += f'embedded_file: {o.embedded_file_name()}\n' | |
| 19904 elif mupdf.pdf_is_indirect(o): | |
| 19905 ret += f'indirect: ...\n' | |
| 19906 elif mupdf.pdf_is_int(o): | |
| 19907 ret += f'int: {mupdf.pdf_to_int(o)}\n' | |
| 19908 elif mupdf.pdf_is_jpx_image(o): | |
| 19909 ret += f'jpx_image:\n' | |
| 19910 elif mupdf.pdf_is_name(o): | |
| 19911 ret += f'name: {mupdf.pdf_to_name(o)}\n' | |
| 19912 elif o.pdf_is_null: | |
| 19913 ret += f'null\n' | |
| 19914 #elif o.pdf_is_number: | |
| 19915 # ret += f'number\n' | |
| 19916 elif o.pdf_is_real: | |
| 19917 ret += f'real: {o.pdf_to_real()}\n' | |
| 19918 elif mupdf.pdf_is_stream(o): | |
| 19919 ret += f'stream\n' | |
| 19920 elif mupdf.pdf_is_string(o): | |
| 19921 ret += f'string: {mupdf.pdf_to_string(o)}\n' | |
| 19922 else: | |
| 19923 ret += '<>\n' | |
| 19924 | |
| 19925 return ret | |
| 19926 | |
| 19927 | |
| 19928 def repair_mono_font(page: "Page", font: "Font") -> None: | |
| 19929 """Repair character spacing for mono fonts. | |
| 19930 | |
| 19931 Notes: | |
| 19932 Some mono-spaced fonts are displayed with a too large character | |
| 19933 distance, e.g. "a b c" instead of "abc". This utility adds an entry | |
| 19934 "/W[0 65535 w]" to the descendent font(s) of font. The float w is | |
| 19935 taken to be the width of 0x20 (space). | |
| 19936 This should enforce viewers to use 'w' as the character width. | |
| 19937 | |
| 19938 Args: | |
| 19939 page: pymupdf.Page object. | |
| 19940 font: pymupdf.Font object. | |
| 19941 """ | |
| 19942 if not font.flags["mono"]: # font not flagged as monospaced | |
| 19943 return None | |
| 19944 doc = page.parent # the document | |
| 19945 fontlist = page.get_fonts() # list of fonts on page | |
| 19946 xrefs = [ # list of objects referring to font | |
| 19947 f[0] | |
| 19948 for f in fontlist | |
| 19949 if (f[3] == font.name and f[4].startswith("F") and f[5].startswith("Identity")) | |
| 19950 ] | |
| 19951 if xrefs == []: # our font does not occur | |
| 19952 return | |
| 19953 xrefs = set(xrefs) # drop any double counts | |
| 19954 width = int(round((font.glyph_advance(32) * 1000))) | |
| 19955 for xref in xrefs: | |
| 19956 if not TOOLS.set_font_width(doc, xref, width): | |
| 19957 log("Cannot set width for '%s' in xref %i" % (font.name, xref)) | |
| 19958 | |
| 19959 | |
| 19960 def sRGB_to_pdf(srgb: int) -> tuple: | |
| 19961 """Convert sRGB color code to a PDF color triple. | |
| 19962 | |
| 19963 There is **no error checking** for performance reasons! | |
| 19964 | |
| 19965 Args: | |
| 19966 srgb: (int) RRGGBB (red, green, blue), each color in range(255). | |
| 19967 Returns: | |
| 19968 Tuple (red, green, blue) each item in interval 0 <= item <= 1. | |
| 19969 """ | |
| 19970 t = sRGB_to_rgb(srgb) | |
| 19971 return t[0] / 255.0, t[1] / 255.0, t[2] / 255.0 | |
| 19972 | |
| 19973 | |
| 19974 def sRGB_to_rgb(srgb: int) -> tuple: | |
| 19975 """Convert sRGB color code to an RGB color triple. | |
| 19976 | |
| 19977 There is **no error checking** for performance reasons! | |
| 19978 | |
| 19979 Args: | |
| 19980 srgb: (int) SSRRGGBB (red, green, blue), each color in range(255). | |
| 19981 With MuPDF < 1.26, `s` is always 0. | |
| 19982 Returns: | |
| 19983 Tuple (red, green, blue) each item in interval 0 <= item <= 255. | |
| 19984 """ | |
| 19985 srgb &= 0xffffff | |
| 19986 r = srgb >> 16 | |
| 19987 g = (srgb - (r << 16)) >> 8 | |
| 19988 b = srgb - (r << 16) - (g << 8) | |
| 19989 return (r, g, b) | |
| 19990 | |
| 19991 | |
| 19992 def string_in_names_list(p, names_list): | |
| 19993 n = mupdf.pdf_array_len( names_list) if names_list else 0 | |
| 19994 str_ = mupdf.pdf_to_text_string( p) | |
| 19995 for i in range(0, n, 2): | |
| 19996 if mupdf.pdf_to_text_string( mupdf.pdf_array_get( names_list, i)) == str_: | |
| 19997 return 1 | |
| 19998 return 0 | |
| 19999 | |
| 20000 | |
| 20001 def strip_outline(doc, outlines, page_count, page_object_nums, names_list): | |
| 20002 ''' | |
| 20003 Returns (count, first, prev). | |
| 20004 ''' | |
| 20005 first = None | |
| 20006 count = 0 | |
| 20007 current = outlines | |
| 20008 prev = None | |
| 20009 while current.m_internal: | |
| 20010 # Strip any children to start with. This takes care of | |
| 20011 # First / Last / Count for us. | |
| 20012 nc = strip_outlines(doc, current, page_count, page_object_nums, names_list) | |
| 20013 | |
| 20014 if not dest_is_valid(current, page_count, page_object_nums, names_list): | |
| 20015 if nc == 0: | |
| 20016 # Outline with invalid dest and no children. Drop it by | |
| 20017 # pulling the next one in here. | |
| 20018 next = mupdf.pdf_dict_get(current, PDF_NAME('Next')) | |
| 20019 if not next.m_internal: | |
| 20020 # There is no next one to pull in | |
| 20021 if prev.m_internal: | |
| 20022 mupdf.pdf_dict_del(prev, PDF_NAME('Next')) | |
| 20023 elif prev.m_internal: | |
| 20024 mupdf.pdf_dict_put(prev, PDF_NAME('Next'), next) | |
| 20025 mupdf.pdf_dict_put(next, PDF_NAME('Prev'), prev) | |
| 20026 else: | |
| 20027 mupdf.pdf_dict_del(next, PDF_NAME('Prev')) | |
| 20028 current = next | |
| 20029 else: | |
| 20030 # Outline with invalid dest, but children. Just drop the dest. | |
| 20031 mupdf.pdf_dict_del(current, PDF_NAME('Dest')) | |
| 20032 mupdf.pdf_dict_del(current, PDF_NAME('A')) | |
| 20033 current = mupdf.pdf_dict_get(current, PDF_NAME('Next')) | |
| 20034 else: | |
| 20035 # Keep this one | |
| 20036 if not first or not first.m_internal: | |
| 20037 first = current | |
| 20038 prev = current | |
| 20039 current = mupdf.pdf_dict_get(current, PDF_NAME('Next')) | |
| 20040 count += 1 | |
| 20041 | |
| 20042 return count, first, prev | |
| 20043 | |
| 20044 | |
| 20045 def strip_outlines(doc, outlines, page_count, page_object_nums, names_list): | |
| 20046 if not outlines.m_internal: | |
| 20047 return 0 | |
| 20048 | |
| 20049 first = mupdf.pdf_dict_get(outlines, PDF_NAME('First')) | |
| 20050 if not first.m_internal: | |
| 20051 nc = 0 | |
| 20052 else: | |
| 20053 nc, first, last = strip_outline(doc, first, page_count, page_object_nums, names_list) | |
| 20054 | |
| 20055 if nc == 0: | |
| 20056 mupdf.pdf_dict_del(outlines, PDF_NAME('First')) | |
| 20057 mupdf.pdf_dict_del(outlines, PDF_NAME('Last')) | |
| 20058 mupdf.pdf_dict_del(outlines, PDF_NAME('Count')) | |
| 20059 else: | |
| 20060 old_count = mupdf.pdf_to_int(mupdf.pdf_dict_get(outlines, PDF_NAME('Count'))) | |
| 20061 mupdf.pdf_dict_put(outlines, PDF_NAME('First'), first) | |
| 20062 mupdf.pdf_dict_put(outlines, PDF_NAME('Last'), last) | |
| 20063 mupdf.pdf_dict_put(outlines, PDF_NAME('Count'), mupdf.pdf_new_int(nc if old_count > 0 else -nc)) | |
| 20064 return nc | |
| 20065 | |
| 20066 | |
| 20067 trace_device_FILL_PATH = 1 | |
| 20068 trace_device_STROKE_PATH = 2 | |
| 20069 trace_device_CLIP_PATH = 3 | |
| 20070 trace_device_CLIP_STROKE_PATH = 4 | |
| 20071 | |
| 20072 | |
| 20073 def unicode_to_glyph_name(ch: int) -> str: | |
| 20074 """ | |
| 20075 Convenience function accessing unicodedata. | |
| 20076 """ | |
| 20077 import unicodedata | |
| 20078 try: | |
| 20079 name = unicodedata.name(chr(ch)) | |
| 20080 except ValueError: | |
| 20081 name = ".notdef" | |
| 20082 return name | |
| 20083 | |
| 20084 | |
| 20085 def vdist(dir, a, b): | |
| 20086 dx = b.x - a.x | |
| 20087 dy = b.y - a.y | |
| 20088 return mupdf.fz_abs(dx * dir.y + dy * dir.x) | |
| 20089 | |
| 20090 | |
| 20091 def apply_pages( | |
| 20092 path, | |
| 20093 pagefn, | |
| 20094 *, | |
| 20095 pagefn_args=(), | |
| 20096 pagefn_kwargs=dict(), | |
| 20097 initfn=None, | |
| 20098 initfn_args=(), | |
| 20099 initfn_kwargs=dict(), | |
| 20100 pages=None, | |
| 20101 method='single', | |
| 20102 concurrency=None, | |
| 20103 _stats=False, | |
| 20104 ): | |
| 20105 ''' | |
| 20106 Returns list of results from `pagefn()`, optionally using concurrency for | |
| 20107 speed. | |
| 20108 | |
| 20109 Args: | |
| 20110 path: | |
| 20111 Path of document. | |
| 20112 pagefn: | |
| 20113 Function to call for each page; is passed (page, *pagefn_args, | |
| 20114 **pagefn_kwargs). Return value is added to list that we return. If | |
| 20115 `method` is not 'single', must be a top-level function - nested | |
| 20116 functions don't work with concurrency. | |
| 20117 pagefn_args | |
| 20118 pagefn_kwargs: | |
| 20119 Additional args to pass to `pagefn`. Must be picklable. | |
| 20120 initfn: | |
| 20121 If true, called once in each worker process; is passed | |
| 20122 (*initfn_args, **initfn_kwargs). | |
| 20123 initfn_args | |
| 20124 initfn_kwargs: | |
| 20125 Args to pass to initfn. Must be picklable. | |
| 20126 pages: | |
| 20127 List of page numbers to process, or None to include all pages. | |
| 20128 method: | |
| 20129 'single' | |
| 20130 Do not use concurrency. | |
| 20131 'mp' | |
| 20132 Operate concurrently using Python's `multiprocessing` module. | |
| 20133 'fork' | |
| 20134 Operate concurrently using custom implementation with | |
| 20135 `os.fork()`. Does not work on Windows. | |
| 20136 concurrency: | |
| 20137 Number of worker processes to use when operating concurrently. If | |
| 20138 None, we use the number of available CPUs. | |
| 20139 _stats: | |
| 20140 Internal, may change or be removed. If true, we output simple | |
| 20141 timing diagnostics. | |
| 20142 | |
| 20143 Note: We require a file path rather than a Document, because Document | |
| 20144 instances do not work properly after a fork - internal file descriptor | |
| 20145 offsets are shared between the parent and child processes. | |
| 20146 ''' | |
| 20147 if _stats: | |
| 20148 t0 = time.time() | |
| 20149 | |
| 20150 if method == 'single': | |
| 20151 if initfn: | |
| 20152 initfn(*initfn_args, **initfn_kwargs) | |
| 20153 ret = list() | |
| 20154 document = Document(path) | |
| 20155 if pages is None: | |
| 20156 pages = range(len(document)) | |
| 20157 for pno in pages: | |
| 20158 page = document[pno] | |
| 20159 r = pagefn(page, *pagefn_args, **initfn_kwargs) | |
| 20160 ret.append(r) | |
| 20161 | |
| 20162 else: | |
| 20163 # Use concurrency. | |
| 20164 # | |
| 20165 from . import _apply_pages | |
| 20166 | |
| 20167 if pages is None: | |
| 20168 if _stats: | |
| 20169 t = time.time() | |
| 20170 with Document(path) as document: | |
| 20171 num_pages = len(document) | |
| 20172 pages = list(range(num_pages)) | |
| 20173 if _stats: | |
| 20174 t = time.time() - t | |
| 20175 log(f'{t:.2f}s: count pages.') | |
| 20176 | |
| 20177 if _stats: | |
| 20178 t = time.time() | |
| 20179 | |
| 20180 if method == 'mp': | |
| 20181 ret = _apply_pages._multiprocessing( | |
| 20182 path, | |
| 20183 pages, | |
| 20184 pagefn, | |
| 20185 pagefn_args, | |
| 20186 pagefn_kwargs, | |
| 20187 initfn, | |
| 20188 initfn_args, | |
| 20189 initfn_kwargs, | |
| 20190 concurrency, | |
| 20191 _stats, | |
| 20192 ) | |
| 20193 | |
| 20194 elif method == 'fork': | |
| 20195 ret = _apply_pages._fork( | |
| 20196 path, | |
| 20197 pages, | |
| 20198 pagefn, | |
| 20199 pagefn_args, | |
| 20200 pagefn_kwargs, | |
| 20201 initfn, | |
| 20202 initfn_args, | |
| 20203 initfn_kwargs, | |
| 20204 concurrency, | |
| 20205 _stats, | |
| 20206 ) | |
| 20207 | |
| 20208 else: | |
| 20209 assert 0, f'Unrecognised {method=}.' | |
| 20210 | |
| 20211 if _stats: | |
| 20212 t = time.time() - t | |
| 20213 log(f'{t:.2f}s: work.') | |
| 20214 | |
| 20215 if _stats: | |
| 20216 t = time.time() - t0 | |
| 20217 log(f'{t:.2f}s: total.') | |
| 20218 return ret | |
| 20219 | |
| 20220 | |
| 20221 def get_text( | |
| 20222 path, | |
| 20223 *, | |
| 20224 pages=None, | |
| 20225 method='single', | |
| 20226 concurrency=None, | |
| 20227 | |
| 20228 option='text', | |
| 20229 clip=None, | |
| 20230 flags=None, | |
| 20231 textpage=None, | |
| 20232 sort=False, | |
| 20233 delimiters=None, | |
| 20234 | |
| 20235 _stats=False, | |
| 20236 ): | |
| 20237 ''' | |
| 20238 Returns list of results from `Page.get_text()`, optionally using | |
| 20239 concurrency for speed. | |
| 20240 | |
| 20241 Args: | |
| 20242 path: | |
| 20243 Path of document. | |
| 20244 pages: | |
| 20245 List of page numbers to process, or None to include all pages. | |
| 20246 method: | |
| 20247 'single' | |
| 20248 Do not use concurrency. | |
| 20249 'mp' | |
| 20250 Operate concurrently using Python's `multiprocessing` module. | |
| 20251 'fork' | |
| 20252 Operate concurrently using custom implementation with | |
| 20253 `os.fork`. Does not work on Windows. | |
| 20254 concurrency: | |
| 20255 Number of worker processes to use when operating concurrently. If | |
| 20256 None, we use the number of available CPUs. | |
| 20257 option | |
| 20258 clip | |
| 20259 flags | |
| 20260 textpage | |
| 20261 sort | |
| 20262 delimiters: | |
| 20263 Passed to internal calls to `Page.get_text()`. | |
| 20264 ''' | |
| 20265 args_dict = dict( | |
| 20266 option=option, | |
| 20267 clip=clip, | |
| 20268 flags=flags, | |
| 20269 textpage=textpage, | |
| 20270 sort=sort, | |
| 20271 delimiters=delimiters, | |
| 20272 ) | |
| 20273 | |
| 20274 return apply_pages( | |
| 20275 path, | |
| 20276 Page.get_text, | |
| 20277 pagefn_kwargs=args_dict, | |
| 20278 pages=pages, | |
| 20279 method=method, | |
| 20280 concurrency=concurrency, | |
| 20281 _stats=_stats, | |
| 20282 ) | |
| 20283 | |
| 20284 | |
| 20285 class TOOLS: | |
| 20286 ''' | |
| 20287 We use @staticmethod to avoid the need to create an instance of this class. | |
| 20288 ''' | |
| 20289 | |
| 20290 def _derotate_matrix(page): | |
| 20291 if isinstance(page, mupdf.PdfPage): | |
| 20292 return JM_py_from_matrix(JM_derotate_page_matrix(page)) | |
| 20293 else: | |
| 20294 return JM_py_from_matrix(mupdf.FzMatrix()) | |
| 20295 | |
| 20296 @staticmethod | |
| 20297 def _fill_widget(annot, widget): | |
| 20298 val = JM_get_widget_properties(annot, widget) | |
| 20299 | |
| 20300 widget.rect = Rect(annot.rect) | |
| 20301 widget.xref = annot.xref | |
| 20302 widget.parent = annot.parent | |
| 20303 widget._annot = annot # backpointer to annot object | |
| 20304 if not widget.script: | |
| 20305 widget.script = None | |
| 20306 if not widget.script_stroke: | |
| 20307 widget.script_stroke = None | |
| 20308 if not widget.script_format: | |
| 20309 widget.script_format = None | |
| 20310 if not widget.script_change: | |
| 20311 widget.script_change = None | |
| 20312 if not widget.script_calc: | |
| 20313 widget.script_calc = None | |
| 20314 if not widget.script_blur: | |
| 20315 widget.script_blur = None | |
| 20316 if not widget.script_focus: | |
| 20317 widget.script_focus = None | |
| 20318 return val | |
| 20319 | |
| 20320 @staticmethod | |
| 20321 def _get_all_contents(page): | |
| 20322 page = _as_pdf_page(page.this) | |
| 20323 res = JM_read_contents(page.obj()) | |
| 20324 result = JM_BinFromBuffer( res) | |
| 20325 return result | |
| 20326 | |
| 20327 @staticmethod | |
| 20328 def _insert_contents(page, newcont, overlay=1): | |
| 20329 """Add bytes as a new /Contents object for a page, and return its xref.""" | |
| 20330 pdfpage = _as_pdf_page(page, required=1) | |
| 20331 contbuf = JM_BufferFromBytes(newcont) | |
| 20332 xref = JM_insert_contents(pdfpage.doc(), pdfpage.obj(), contbuf, overlay) | |
| 20333 #fixme: pdfpage->doc->dirty = 1; | |
| 20334 return xref | |
| 20335 | |
| 20336 @staticmethod | |
| 20337 def _le_annot_parms(annot, p1, p2, fill_color): | |
| 20338 """Get common parameters for making annot line end symbols. | |
| 20339 | |
| 20340 Returns: | |
| 20341 m: matrix that maps p1, p2 to points L, P on the x-axis | |
| 20342 im: its inverse | |
| 20343 L, P: transformed p1, p2 | |
| 20344 w: line width | |
| 20345 scol: stroke color string | |
| 20346 fcol: fill color store_shrink | |
| 20347 opacity: opacity string (gs command) | |
| 20348 """ | |
| 20349 w = annot.border["width"] # line width | |
| 20350 sc = annot.colors["stroke"] # stroke color | |
| 20351 if not sc: # black if missing | |
| 20352 sc = (0,0,0) | |
| 20353 scol = " ".join(map(str, sc)) + " RG\n" | |
| 20354 if fill_color: | |
| 20355 fc = fill_color | |
| 20356 else: | |
| 20357 fc = annot.colors["fill"] # fill color | |
| 20358 if not fc: | |
| 20359 fc = (1,1,1) # white if missing | |
| 20360 fcol = " ".join(map(str, fc)) + " rg\n" | |
| 20361 # nr = annot.rect | |
| 20362 np1 = p1 # point coord relative to annot rect | |
| 20363 np2 = p2 # point coord relative to annot rect | |
| 20364 m = Matrix(util_hor_matrix(np1, np2)) # matrix makes the line horizontal | |
| 20365 im = ~m # inverted matrix | |
| 20366 L = np1 * m # converted start (left) point | |
| 20367 R = np2 * m # converted end (right) point | |
| 20368 if 0 <= annot.opacity < 1: | |
| 20369 opacity = "/H gs\n" | |
| 20370 else: | |
| 20371 opacity = "" | |
| 20372 return m, im, L, R, w, scol, fcol, opacity | |
| 20373 | |
| 20374 @staticmethod | |
| 20375 def _le_butt(annot, p1, p2, lr, fill_color): | |
| 20376 """Make stream commands for butt line end symbol. "lr" denotes left (False) or right point. | |
| 20377 """ | |
| 20378 m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color) | |
| 20379 shift = 3 | |
| 20380 d = shift * max(1, w) | |
| 20381 M = R if lr else L | |
| 20382 top = (M + (0, -d/2.)) * im | |
| 20383 bot = (M + (0, d/2.)) * im | |
| 20384 ap = "\nq\n%s%f %f m\n" % (opacity, top.x, top.y) | |
| 20385 ap += "%f %f l\n" % (bot.x, bot.y) | |
| 20386 ap += _format_g(w) + " w\n" | |
| 20387 ap += scol + "s\nQ\n" | |
| 20388 return ap | |
| 20389 | |
| 20390 @staticmethod | |
| 20391 def _le_circle(annot, p1, p2, lr, fill_color): | |
| 20392 """Make stream commands for circle line end symbol. "lr" denotes left (False) or right point. | |
| 20393 """ | |
| 20394 m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color) | |
| 20395 shift = 2.5 # 2*shift*width = length of square edge | |
| 20396 d = shift * max(1, w) | |
| 20397 M = R - (d/2., 0) if lr else L + (d/2., 0) | |
| 20398 r = Rect(M, M) + (-d, -d, d, d) # the square | |
| 20399 ap = "q\n" + opacity + TOOLS._oval_string(r.tl * im, r.tr * im, r.br * im, r.bl * im) | |
| 20400 ap += _format_g(w) + " w\n" | |
| 20401 ap += scol + fcol + "b\nQ\n" | |
| 20402 return ap | |
| 20403 | |
| 20404 @staticmethod | |
| 20405 def _le_closedarrow(annot, p1, p2, lr, fill_color): | |
| 20406 """Make stream commands for closed arrow line end symbol. "lr" denotes left (False) or right point. | |
| 20407 """ | |
| 20408 m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color) | |
| 20409 shift = 2.5 | |
| 20410 d = shift * max(1, w) | |
| 20411 p2 = R + (d/2., 0) if lr else L - (d/2., 0) | |
| 20412 p1 = p2 + (-2*d, -d) if lr else p2 + (2*d, -d) | |
| 20413 p3 = p2 + (-2*d, d) if lr else p2 + (2*d, d) | |
| 20414 p1 *= im | |
| 20415 p2 *= im | |
| 20416 p3 *= im | |
| 20417 ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y) | |
| 20418 ap += "%f %f l\n" % (p2.x, p2.y) | |
| 20419 ap += "%f %f l\n" % (p3.x, p3.y) | |
| 20420 ap += _format_g(w) + " w\n" | |
| 20421 ap += scol + fcol + "b\nQ\n" | |
| 20422 return ap | |
| 20423 | |
| 20424 @staticmethod | |
| 20425 def _le_diamond(annot, p1, p2, lr, fill_color): | |
| 20426 """Make stream commands for diamond line end symbol. "lr" denotes left (False) or right point. | |
| 20427 """ | |
| 20428 m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color) | |
| 20429 shift = 2.5 # 2*shift*width = length of square edge | |
| 20430 d = shift * max(1, w) | |
| 20431 M = R - (d/2., 0) if lr else L + (d/2., 0) | |
| 20432 r = Rect(M, M) + (-d, -d, d, d) # the square | |
| 20433 # the square makes line longer by (2*shift - 1)*width | |
| 20434 p = (r.tl + (r.bl - r.tl) * 0.5) * im | |
| 20435 ap = "q\n%s%f %f m\n" % (opacity, p.x, p.y) | |
| 20436 p = (r.tl + (r.tr - r.tl) * 0.5) * im | |
| 20437 ap += "%f %f l\n" % (p.x, p.y) | |
| 20438 p = (r.tr + (r.br - r.tr) * 0.5) * im | |
| 20439 ap += "%f %f l\n" % (p.x, p.y) | |
| 20440 p = (r.br + (r.bl - r.br) * 0.5) * im | |
| 20441 ap += "%f %f l\n" % (p.x, p.y) | |
| 20442 ap += _format_g(w) + " w\n" | |
| 20443 ap += scol + fcol + "b\nQ\n" | |
| 20444 return ap | |
| 20445 | |
| 20446 @staticmethod | |
| 20447 def _le_openarrow(annot, p1, p2, lr, fill_color): | |
| 20448 """Make stream commands for open arrow line end symbol. "lr" denotes left (False) or right point. | |
| 20449 """ | |
| 20450 m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color) | |
| 20451 shift = 2.5 | |
| 20452 d = shift * max(1, w) | |
| 20453 p2 = R + (d/2., 0) if lr else L - (d/2., 0) | |
| 20454 p1 = p2 + (-2*d, -d) if lr else p2 + (2*d, -d) | |
| 20455 p3 = p2 + (-2*d, d) if lr else p2 + (2*d, d) | |
| 20456 p1 *= im | |
| 20457 p2 *= im | |
| 20458 p3 *= im | |
| 20459 ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y) | |
| 20460 ap += "%f %f l\n" % (p2.x, p2.y) | |
| 20461 ap += "%f %f l\n" % (p3.x, p3.y) | |
| 20462 ap += _format_g(w) + " w\n" | |
| 20463 ap += scol + "S\nQ\n" | |
| 20464 return ap | |
| 20465 | |
| 20466 @staticmethod | |
| 20467 def _le_rclosedarrow(annot, p1, p2, lr, fill_color): | |
| 20468 """Make stream commands for right closed arrow line end symbol. "lr" denotes left (False) or right point. | |
| 20469 """ | |
| 20470 m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color) | |
| 20471 shift = 2.5 | |
| 20472 d = shift * max(1, w) | |
| 20473 p2 = R - (2*d, 0) if lr else L + (2*d, 0) | |
| 20474 p1 = p2 + (2*d, -d) if lr else p2 + (-2*d, -d) | |
| 20475 p3 = p2 + (2*d, d) if lr else p2 + (-2*d, d) | |
| 20476 p1 *= im | |
| 20477 p2 *= im | |
| 20478 p3 *= im | |
| 20479 ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y) | |
| 20480 ap += "%f %f l\n" % (p2.x, p2.y) | |
| 20481 ap += "%f %f l\n" % (p3.x, p3.y) | |
| 20482 ap += _format_g(w) + " w\n" | |
| 20483 ap += scol + fcol + "b\nQ\n" | |
| 20484 return ap | |
| 20485 | |
| 20486 @staticmethod | |
| 20487 def _le_ropenarrow(annot, p1, p2, lr, fill_color): | |
| 20488 """Make stream commands for right open arrow line end symbol. "lr" denotes left (False) or right point. | |
| 20489 """ | |
| 20490 m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color) | |
| 20491 shift = 2.5 | |
| 20492 d = shift * max(1, w) | |
| 20493 p2 = R - (d/3., 0) if lr else L + (d/3., 0) | |
| 20494 p1 = p2 + (2*d, -d) if lr else p2 + (-2*d, -d) | |
| 20495 p3 = p2 + (2*d, d) if lr else p2 + (-2*d, d) | |
| 20496 p1 *= im | |
| 20497 p2 *= im | |
| 20498 p3 *= im | |
| 20499 ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y) | |
| 20500 ap += "%f %f l\n" % (p2.x, p2.y) | |
| 20501 ap += "%f %f l\n" % (p3.x, p3.y) | |
| 20502 ap += _format_g(w) + " w\n" | |
| 20503 ap += scol + fcol + "S\nQ\n" | |
| 20504 return ap | |
| 20505 | |
| 20506 @staticmethod | |
| 20507 def _le_slash(annot, p1, p2, lr, fill_color): | |
| 20508 """Make stream commands for slash line end symbol. "lr" denotes left (False) or right point. | |
| 20509 """ | |
| 20510 m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color) | |
| 20511 rw = 1.1547 * max(1, w) * 1.0 # makes rect diagonal a 30 deg inclination | |
| 20512 M = R if lr else L | |
| 20513 r = Rect(M.x - rw, M.y - 2 * w, M.x + rw, M.y + 2 * w) | |
| 20514 top = r.tl * im | |
| 20515 bot = r.br * im | |
| 20516 ap = "\nq\n%s%f %f m\n" % (opacity, top.x, top.y) | |
| 20517 ap += "%f %f l\n" % (bot.x, bot.y) | |
| 20518 ap += _format_g(w) + " w\n" | |
| 20519 ap += scol + "s\nQ\n" | |
| 20520 return ap | |
| 20521 | |
| 20522 @staticmethod | |
| 20523 def _le_square(annot, p1, p2, lr, fill_color): | |
| 20524 """Make stream commands for square line end symbol. "lr" denotes left (False) or right point. | |
| 20525 """ | |
| 20526 m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color) | |
| 20527 shift = 2.5 # 2*shift*width = length of square edge | |
| 20528 d = shift * max(1, w) | |
| 20529 M = R - (d/2., 0) if lr else L + (d/2., 0) | |
| 20530 r = Rect(M, M) + (-d, -d, d, d) # the square | |
| 20531 # the square makes line longer by (2*shift - 1)*width | |
| 20532 p = r.tl * im | |
| 20533 ap = "q\n%s%f %f m\n" % (opacity, p.x, p.y) | |
| 20534 p = r.tr * im | |
| 20535 ap += "%f %f l\n" % (p.x, p.y) | |
| 20536 p = r.br * im | |
| 20537 ap += "%f %f l\n" % (p.x, p.y) | |
| 20538 p = r.bl * im | |
| 20539 ap += "%f %f l\n" % (p.x, p.y) | |
| 20540 ap += _format_g(w) + " w\n" | |
| 20541 ap += scol + fcol + "b\nQ\n" | |
| 20542 return ap | |
| 20543 | |
| 20544 @staticmethod | |
| 20545 def _oval_string(p1, p2, p3, p4): | |
| 20546 """Return /AP string defining an oval within a 4-polygon provided as points | |
| 20547 """ | |
| 20548 def bezier(p, q, r): | |
| 20549 f = "%f %f %f %f %f %f c\n" | |
| 20550 return f % (p.x, p.y, q.x, q.y, r.x, r.y) | |
| 20551 | |
| 20552 kappa = 0.55228474983 # magic number | |
| 20553 ml = p1 + (p4 - p1) * 0.5 # middle points ... | |
| 20554 mo = p1 + (p2 - p1) * 0.5 # for each ... | |
| 20555 mr = p2 + (p3 - p2) * 0.5 # polygon ... | |
| 20556 mu = p4 + (p3 - p4) * 0.5 # side | |
| 20557 ol1 = ml + (p1 - ml) * kappa # the 8 bezier | |
| 20558 ol2 = mo + (p1 - mo) * kappa # helper points | |
| 20559 or1 = mo + (p2 - mo) * kappa | |
| 20560 or2 = mr + (p2 - mr) * kappa | |
| 20561 ur1 = mr + (p3 - mr) * kappa | |
| 20562 ur2 = mu + (p3 - mu) * kappa | |
| 20563 ul1 = mu + (p4 - mu) * kappa | |
| 20564 ul2 = ml + (p4 - ml) * kappa | |
| 20565 # now draw, starting from middle point of left side | |
| 20566 ap = "%f %f m\n" % (ml.x, ml.y) | |
| 20567 ap += bezier(ol1, ol2, mo) | |
| 20568 ap += bezier(or1, or2, mr) | |
| 20569 ap += bezier(ur1, ur2, mu) | |
| 20570 ap += bezier(ul1, ul2, ml) | |
| 20571 return ap | |
| 20572 | |
| 20573 @staticmethod | |
| 20574 def _parse_da(annot): | |
| 20575 | |
| 20576 if g_use_extra: | |
| 20577 val = extra.Tools_parse_da( annot.this) | |
| 20578 else: | |
| 20579 def Tools__parse_da(annot): | |
| 20580 this_annot = annot.this | |
| 20581 assert isinstance(this_annot, mupdf.PdfAnnot) | |
| 20582 this_annot_obj = mupdf.pdf_annot_obj( this_annot) | |
| 20583 pdf = mupdf.pdf_get_bound_document( this_annot_obj) | |
| 20584 try: | |
| 20585 da = mupdf.pdf_dict_get_inheritable( this_annot_obj, PDF_NAME('DA')) | |
| 20586 if not da.m_internal: | |
| 20587 trailer = mupdf.pdf_trailer(pdf) | |
| 20588 da = mupdf.pdf_dict_getl(trailer, | |
| 20589 PDF_NAME('Root'), | |
| 20590 PDF_NAME('AcroForm'), | |
| 20591 PDF_NAME('DA'), | |
| 20592 ) | |
| 20593 da_str = mupdf.pdf_to_text_string(da) | |
| 20594 except Exception: | |
| 20595 if g_exceptions_verbose: exception_info() | |
| 20596 return | |
| 20597 return da_str | |
| 20598 val = Tools__parse_da(annot) | |
| 20599 | |
| 20600 if not val: | |
| 20601 return ((0,), "", 0) | |
| 20602 font = "Helv" | |
| 20603 fsize = 12 | |
| 20604 col = (0, 0, 0) | |
| 20605 dat = val.split() # split on any whitespace | |
| 20606 for i, item in enumerate(dat): | |
| 20607 if item == "Tf": | |
| 20608 font = dat[i - 2][1:] | |
| 20609 fsize = float(dat[i - 1]) | |
| 20610 dat[i] = dat[i-1] = dat[i-2] = "" | |
| 20611 continue | |
| 20612 if item == "g": # unicolor text | |
| 20613 col = [(float(dat[i - 1]))] | |
| 20614 dat[i] = dat[i-1] = "" | |
| 20615 continue | |
| 20616 if item == "rg": # RGB colored text | |
| 20617 col = [float(f) for f in dat[i - 3:i]] | |
| 20618 dat[i] = dat[i-1] = dat[i-2] = dat[i-3] = "" | |
| 20619 continue | |
| 20620 if item == "k": # CMYK colored text | |
| 20621 col = [float(f) for f in dat[i - 4:i]] | |
| 20622 dat[i] = dat[i-1] = dat[i-2] = dat[i-3] = dat[i-4] = "" | |
| 20623 continue | |
| 20624 | |
| 20625 val = (col, font, fsize) | |
| 20626 return val | |
| 20627 | |
| 20628 @staticmethod | |
| 20629 def _reset_widget(annot): | |
| 20630 this_annot = annot | |
| 20631 this_annot_obj = mupdf.pdf_annot_obj(this_annot) | |
| 20632 pdf = mupdf.pdf_get_bound_document(this_annot_obj) | |
| 20633 mupdf.pdf_field_reset(pdf, this_annot_obj) | |
| 20634 | |
| 20635 @staticmethod | |
| 20636 def _rotate_matrix(page): | |
| 20637 pdfpage = page._pdf_page(required=False) | |
| 20638 if not pdfpage.m_internal: | |
| 20639 return JM_py_from_matrix(mupdf.FzMatrix()) | |
| 20640 return JM_py_from_matrix(JM_rotate_page_matrix(pdfpage)) | |
| 20641 | |
| 20642 @staticmethod | |
| 20643 def _save_widget(annot, widget): | |
| 20644 JM_set_widget_properties(annot, widget) | |
| 20645 | |
| 20646 def _update_da(annot, da_str): | |
| 20647 if g_use_extra: | |
| 20648 extra.Tools_update_da( annot.this, da_str) | |
| 20649 else: | |
| 20650 try: | |
| 20651 this_annot = annot.this | |
| 20652 assert isinstance(this_annot, mupdf.PdfAnnot) | |
| 20653 mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(this_annot), PDF_NAME('DA'), da_str) | |
| 20654 mupdf.pdf_dict_del(mupdf.pdf_annot_obj(this_annot), PDF_NAME('DS')) # /* not supported */ | |
| 20655 mupdf.pdf_dict_del(mupdf.pdf_annot_obj(this_annot), PDF_NAME('RC')) # /* not supported */ | |
| 20656 except Exception: | |
| 20657 if g_exceptions_verbose: exception_info() | |
| 20658 return | |
| 20659 return | |
| 20660 | |
| 20661 @staticmethod | |
| 20662 def gen_id(): | |
| 20663 global TOOLS_JM_UNIQUE_ID | |
| 20664 TOOLS_JM_UNIQUE_ID += 1 | |
| 20665 return TOOLS_JM_UNIQUE_ID | |
| 20666 | |
| 20667 @staticmethod | |
| 20668 def glyph_cache_empty(): | |
| 20669 ''' | |
| 20670 Empty the glyph cache. | |
| 20671 ''' | |
| 20672 mupdf.fz_purge_glyph_cache() | |
| 20673 | |
| 20674 @staticmethod | |
| 20675 def image_profile(stream, keep_image=0): | |
| 20676 ''' | |
| 20677 Metadata of an image binary stream. | |
| 20678 ''' | |
| 20679 return JM_image_profile(stream, keep_image) | |
| 20680 | |
| 20681 @staticmethod | |
| 20682 def mupdf_display_errors(on=None): | |
| 20683 ''' | |
| 20684 Set MuPDF error display to True or False. | |
| 20685 ''' | |
| 20686 global JM_mupdf_show_errors | |
| 20687 if on is not None: | |
| 20688 JM_mupdf_show_errors = bool(on) | |
| 20689 return JM_mupdf_show_errors | |
| 20690 | |
| 20691 @staticmethod | |
| 20692 def mupdf_display_warnings(on=None): | |
| 20693 ''' | |
| 20694 Set MuPDF warnings display to True or False. | |
| 20695 ''' | |
| 20696 global JM_mupdf_show_warnings | |
| 20697 if on is not None: | |
| 20698 JM_mupdf_show_warnings = bool(on) | |
| 20699 return JM_mupdf_show_warnings | |
| 20700 | |
| 20701 @staticmethod | |
| 20702 def mupdf_version(): | |
| 20703 '''Get version of MuPDF binary build.''' | |
| 20704 return mupdf.FZ_VERSION | |
| 20705 | |
| 20706 @staticmethod | |
| 20707 def mupdf_warnings(reset=1): | |
| 20708 ''' | |
| 20709 Get the MuPDF warnings/errors with optional reset (default). | |
| 20710 ''' | |
| 20711 # Get any trailing `... repeated <N> times...` message. | |
| 20712 mupdf.fz_flush_warnings() | |
| 20713 ret = '\n'.join( JM_mupdf_warnings_store) | |
| 20714 if reset: | |
| 20715 TOOLS.reset_mupdf_warnings() | |
| 20716 return ret | |
| 20717 | |
| 20718 @staticmethod | |
| 20719 def reset_mupdf_warnings(): | |
| 20720 global JM_mupdf_warnings_store | |
| 20721 JM_mupdf_warnings_store = list() | |
| 20722 | |
| 20723 @staticmethod | |
| 20724 def set_aa_level(level): | |
| 20725 ''' | |
| 20726 Set anti-aliasing level. | |
| 20727 ''' | |
| 20728 mupdf.fz_set_aa_level(level) | |
| 20729 | |
| 20730 @staticmethod | |
| 20731 def set_annot_stem( stem=None): | |
| 20732 global JM_annot_id_stem | |
| 20733 if stem is None: | |
| 20734 return JM_annot_id_stem | |
| 20735 len_ = len(stem) + 1 | |
| 20736 if len_ > 50: | |
| 20737 len_ = 50 | |
| 20738 JM_annot_id_stem = stem[:50] | |
| 20739 return JM_annot_id_stem | |
| 20740 | |
| 20741 @staticmethod | |
| 20742 def set_font_width(doc, xref, width): | |
| 20743 pdf = _as_pdf_document(doc, required=0) | |
| 20744 if not pdf.m_internal: | |
| 20745 return False | |
| 20746 font = mupdf.pdf_load_object(pdf, xref) | |
| 20747 dfonts = mupdf.pdf_dict_get(font, PDF_NAME('DescendantFonts')) | |
| 20748 if mupdf.pdf_is_array(dfonts): | |
| 20749 n = mupdf.pdf_array_len(dfonts) | |
| 20750 for i in range(n): | |
| 20751 dfont = mupdf.pdf_array_get(dfonts, i) | |
| 20752 warray = mupdf.pdf_new_array(pdf, 3) | |
| 20753 mupdf.pdf_array_push(warray, mupdf.pdf_new_int(0)) | |
| 20754 mupdf.pdf_array_push(warray, mupdf.pdf_new_int(65535)) | |
| 20755 mupdf.pdf_array_push(warray, mupdf.pdf_new_int(width)) | |
| 20756 mupdf.pdf_dict_put(dfont, PDF_NAME('W'), warray) | |
| 20757 return True | |
| 20758 | |
| 20759 @staticmethod | |
| 20760 def set_graphics_min_line_width(min_line_width): | |
| 20761 ''' | |
| 20762 Set the graphics minimum line width. | |
| 20763 ''' | |
| 20764 mupdf.fz_set_graphics_min_line_width(min_line_width) | |
| 20765 | |
| 20766 @staticmethod | |
| 20767 def set_icc( on=0): | |
| 20768 """Set ICC color handling on or off.""" | |
| 20769 if on: | |
| 20770 if mupdf.FZ_ENABLE_ICC: | |
| 20771 mupdf.fz_enable_icc() | |
| 20772 else: | |
| 20773 RAISEPY( "MuPDF built w/o ICC support",PyExc_ValueError) | |
| 20774 elif mupdf.FZ_ENABLE_ICC: | |
| 20775 mupdf.fz_disable_icc() | |
| 20776 | |
| 20777 @staticmethod | |
| 20778 def set_low_memory( on=None): | |
| 20779 """Set / unset MuPDF device caching.""" | |
| 20780 if on is not None: | |
| 20781 _globals.no_device_caching = bool(on) | |
| 20782 return _globals.no_device_caching | |
| 20783 | |
| 20784 @staticmethod | |
| 20785 def set_small_glyph_heights(on=None): | |
| 20786 """Set / unset small glyph heights.""" | |
| 20787 if on is not None: | |
| 20788 _globals.small_glyph_heights = bool(on) | |
| 20789 if g_use_extra: | |
| 20790 extra.set_small_glyph_heights(_globals.small_glyph_heights) | |
| 20791 return _globals.small_glyph_heights | |
| 20792 | |
| 20793 @staticmethod | |
| 20794 def set_subset_fontnames(on=None): | |
| 20795 ''' | |
| 20796 Set / unset returning fontnames with their subset prefix. | |
| 20797 ''' | |
| 20798 if on is not None: | |
| 20799 _globals.subset_fontnames = bool(on) | |
| 20800 if g_use_extra: | |
| 20801 extra.set_subset_fontnames(_globals.subset_fontnames) | |
| 20802 return _globals.subset_fontnames | |
| 20803 | |
| 20804 @staticmethod | |
| 20805 def show_aa_level(): | |
| 20806 ''' | |
| 20807 Show anti-aliasing values. | |
| 20808 ''' | |
| 20809 return dict( | |
| 20810 graphics = mupdf.fz_graphics_aa_level(), | |
| 20811 text = mupdf.fz_text_aa_level(), | |
| 20812 graphics_min_line_width = mupdf.fz_graphics_min_line_width(), | |
| 20813 ) | |
| 20814 | |
| 20815 @staticmethod | |
| 20816 def store_maxsize(): | |
| 20817 ''' | |
| 20818 MuPDF store size limit. | |
| 20819 ''' | |
| 20820 # fixme: return gctx->store->max. | |
| 20821 return None | |
| 20822 | |
| 20823 @staticmethod | |
| 20824 def store_shrink(percent): | |
| 20825 ''' | |
| 20826 Free 'percent' of current store size. | |
| 20827 ''' | |
| 20828 if percent >= 100: | |
| 20829 mupdf.fz_empty_store() | |
| 20830 return 0 | |
| 20831 if percent > 0: | |
| 20832 mupdf.fz_shrink_store( 100 - percent) | |
| 20833 # fixme: return gctx->store->size. | |
| 20834 | |
| 20835 @staticmethod | |
| 20836 def store_size(): | |
| 20837 ''' | |
| 20838 MuPDF current store size. | |
| 20839 ''' | |
| 20840 # fixme: return gctx->store->size. | |
| 20841 return None | |
| 20842 | |
| 20843 @staticmethod | |
| 20844 def unset_quad_corrections(on=None): | |
| 20845 ''' | |
| 20846 Set ascender / descender corrections on or off. | |
| 20847 ''' | |
| 20848 if on is not None: | |
| 20849 _globals.skip_quad_corrections = bool(on) | |
| 20850 if g_use_extra: | |
| 20851 extra.set_skip_quad_corrections(_globals.skip_quad_corrections) | |
| 20852 return _globals.skip_quad_corrections | |
| 20853 | |
| 20854 # fixme: also defined at top-level. | |
| 20855 JM_annot_id_stem = 'fitz' | |
| 20856 | |
| 20857 fitz_config = JM_fitz_config() | |
| 20858 | |
| 20859 | |
| 20860 # Callbacks not yet supported with cppyy. | |
| 20861 if not mupdf_cppyy: | |
| 20862 mupdf.fz_set_warning_callback(JM_mupdf_warning) | |
| 20863 mupdf.fz_set_error_callback(JM_mupdf_error) | |
| 20864 | |
| 20865 | |
| 20866 # If there are pending warnings when we exit, we end up in this sequence: | |
| 20867 # | |
| 20868 # atexit() | |
| 20869 # -> mupdf::internal_thread_state::~internal_thread_state() | |
| 20870 # -> fz_drop_context() | |
| 20871 # -> fz_flush_warnings() | |
| 20872 # -> SWIG Director code | |
| 20873 # -> Python calling JM_mupdf_warning(). | |
| 20874 # | |
| 20875 # Unfortunately this causes a SEGV, seemingly because the SWIG Director code has | |
| 20876 # already been torn down. | |
| 20877 # | |
| 20878 # So we use a Python atexit handler to explicitly call fz_flush_warnings(); | |
| 20879 # this appears to happen early enough for the Director machinery to still | |
| 20880 # work. So in the sequence above, fz_flush_warnings() will find that there are | |
| 20881 # no pending warnings and will not attempt to call JM_mupdf_warning(). | |
| 20882 # | |
| 20883 def _atexit(): | |
| 20884 #log( 'PyMuPDF/src/__init__.py:_atexit() called') | |
| 20885 mupdf.fz_flush_warnings() | |
| 20886 mupdf.fz_set_warning_callback(None) | |
| 20887 mupdf.fz_set_error_callback(None) | |
| 20888 #log( '_atexit() returning') | |
| 20889 atexit.register( _atexit) | |
| 20890 | |
| 20891 | |
| 20892 # List of (name, red, green, blue) where: | |
| 20893 # name: upper-case name. | |
| 20894 # red, green, blue: integer in range 0..255. | |
| 20895 # | |
| 20896 from . import _wxcolors | |
| 20897 _wxcolors = _wxcolors._wxcolors | |
| 20898 | |
| 20899 | |
| 20900 # Dict mapping from name to (red, green, blue). | |
| 20901 # name: lower-case name. | |
| 20902 # red, green, blue: float in range 0..1. | |
| 20903 # | |
| 20904 pdfcolor = dict() | |
| 20905 for name, r, g, b in _wxcolors: | |
| 20906 pdfcolor[name.lower()] = (r/255, g/255, b/255) | |
| 20907 | |
| 20908 | |
| 20909 def colors_pdf_dict(): | |
| 20910 ''' | |
| 20911 Returns dict mapping from name to (red, green, blue). | |
| 20912 name: lower-case name. | |
| 20913 red, green, blue: float in range 0..1. | |
| 20914 ''' | |
| 20915 return pdfcolor | |
| 20916 | |
| 20917 | |
| 20918 def colors_wx_list(): | |
| 20919 ''' | |
| 20920 Returns list of (name, red, green, blue) tuples: | |
| 20921 name: upper-case name. | |
| 20922 red, green, blue: integers in range 0..255. | |
| 20923 ''' | |
| 20924 return _wxcolors | |
| 20925 | |
| 20926 | |
| 20927 # We cannot import utils earlier because it imports this .py file itself and | |
| 20928 # uses some pymupdf.* types in function typing. | |
| 20929 # | |
| 20930 from . import utils | |
| 20931 | |
| 20932 | |
| 20933 # Use utils.*() fns for some class methods. | |
| 20934 # | |
| 20935 recover_bbox_quad = utils.recover_bbox_quad | |
| 20936 recover_char_quad = utils.recover_char_quad | |
| 20937 recover_line_quad = utils.recover_line_quad | |
| 20938 recover_quad = utils.recover_quad | |
| 20939 recover_span_quad = utils.recover_span_quad | |
| 20940 | |
| 20941 Annot.get_text = utils.get_text | |
| 20942 Annot.get_textbox = utils.get_textbox | |
| 20943 | |
| 20944 Document._do_links = utils.do_links | |
| 20945 Document._do_widgets = utils.do_widgets | |
| 20946 Document.del_toc_item = utils.del_toc_item | |
| 20947 Document.get_char_widths = utils.get_char_widths | |
| 20948 Document.get_oc = utils.get_oc | |
| 20949 Document.get_ocmd = utils.get_ocmd | |
| 20950 Document.get_page_labels = utils.get_page_labels | |
| 20951 Document.get_page_numbers = utils.get_page_numbers | |
| 20952 Document.get_page_pixmap = utils.get_page_pixmap | |
| 20953 Document.get_page_text = utils.get_page_text | |
| 20954 Document.get_toc = utils.get_toc | |
| 20955 Document.has_annots = utils.has_annots | |
| 20956 Document.has_links = utils.has_links | |
| 20957 Document.insert_page = utils.insert_page | |
| 20958 Document.new_page = utils.new_page | |
| 20959 Document.scrub = utils.scrub | |
| 20960 Document.search_page_for = utils.search_page_for | |
| 20961 Document.set_metadata = utils.set_metadata | |
| 20962 Document.set_oc = utils.set_oc | |
| 20963 Document.set_ocmd = utils.set_ocmd | |
| 20964 Document.set_page_labels = utils.set_page_labels | |
| 20965 Document.set_toc = utils.set_toc | |
| 20966 Document.set_toc_item = utils.set_toc_item | |
| 20967 Document.subset_fonts = utils.subset_fonts | |
| 20968 Document.tobytes = Document.write | |
| 20969 Document.xref_copy = utils.xref_copy | |
| 20970 | |
| 20971 IRect.get_area = utils.get_area | |
| 20972 | |
| 20973 Page.apply_redactions = utils.apply_redactions | |
| 20974 Page.delete_image = utils.delete_image | |
| 20975 Page.delete_widget = utils.delete_widget | |
| 20976 Page.draw_bezier = utils.draw_bezier | |
| 20977 Page.draw_circle = utils.draw_circle | |
| 20978 Page.draw_curve = utils.draw_curve | |
| 20979 Page.draw_line = utils.draw_line | |
| 20980 Page.draw_oval = utils.draw_oval | |
| 20981 Page.draw_polyline = utils.draw_polyline | |
| 20982 Page.draw_quad = utils.draw_quad | |
| 20983 Page.draw_rect = utils.draw_rect | |
| 20984 Page.draw_sector = utils.draw_sector | |
| 20985 Page.draw_squiggle = utils.draw_squiggle | |
| 20986 Page.draw_zigzag = utils.draw_zigzag | |
| 20987 Page.get_image_info = utils.get_image_info | |
| 20988 Page.get_image_rects = utils.get_image_rects | |
| 20989 Page.get_label = utils.get_label | |
| 20990 Page.get_links = utils.get_links | |
| 20991 Page.get_pixmap = utils.get_pixmap | |
| 20992 Page.get_text = utils.get_text | |
| 20993 Page.get_text_blocks = utils.get_text_blocks | |
| 20994 Page.get_text_selection = utils.get_text_selection | |
| 20995 Page.get_text_words = utils.get_text_words | |
| 20996 Page.get_textbox = utils.get_textbox | |
| 20997 Page.get_textpage_ocr = utils.get_textpage_ocr | |
| 20998 Page.insert_image = utils.insert_image | |
| 20999 Page.insert_link = utils.insert_link | |
| 21000 Page.insert_text = utils.insert_text | |
| 21001 Page.insert_textbox = utils.insert_textbox | |
| 21002 Page.insert_htmlbox = utils.insert_htmlbox | |
| 21003 Page.new_shape = lambda x: utils.Shape(x) | |
| 21004 Page.replace_image = utils.replace_image | |
| 21005 Page.search_for = utils.search_for | |
| 21006 Page.show_pdf_page = utils.show_pdf_page | |
| 21007 Page.update_link = utils.update_link | |
| 21008 Page.write_text = utils.write_text | |
| 21009 Shape = utils.Shape | |
| 21010 from .table import find_tables | |
| 21011 | |
| 21012 Page.find_tables = find_tables | |
| 21013 | |
| 21014 Rect.get_area = utils.get_area | |
| 21015 | |
| 21016 TextWriter.fill_textbox = utils.fill_textbox | |
| 21017 | |
| 21018 | |
| 21019 class FitzDeprecation(DeprecationWarning): | |
| 21020 pass | |
| 21021 | |
| 21022 def restore_aliases(): | |
| 21023 warnings.filterwarnings( "once", category=FitzDeprecation) | |
| 21024 | |
| 21025 def showthis(msg, cat, filename, lineno, file=None, line=None): | |
| 21026 text = warnings.formatwarning(msg, cat, filename, lineno, line=line) | |
| 21027 s = text.find("FitzDeprecation") | |
| 21028 if s < 0: | |
| 21029 log(text) | |
| 21030 return | |
| 21031 text = text[s:].splitlines()[0][4:] | |
| 21032 log(text) | |
| 21033 | |
| 21034 warnings.showwarning = showthis | |
| 21035 | |
| 21036 def _alias(class_, new_name, legacy_name=None): | |
| 21037 ''' | |
| 21038 Adds an alias for a class_ or module item clled <class_>.<new>. | |
| 21039 | |
| 21040 class_: | |
| 21041 Class/module to modify; use None for the current module. | |
| 21042 new_name: | |
| 21043 String name of existing item, e.g. name of method. | |
| 21044 legacy_name: | |
| 21045 Name of legacy object to create in <class_>. If None, we generate | |
| 21046 from <item> by removing underscores and capitalising the next | |
| 21047 letter. | |
| 21048 ''' | |
| 21049 if class_ is None: | |
| 21050 class_ = sys.modules[__name__] | |
| 21051 if not legacy_name: | |
| 21052 legacy_name = '' | |
| 21053 capitalise_next = False | |
| 21054 for c in new_name: | |
| 21055 if c == '_': | |
| 21056 capitalise_next = True | |
| 21057 elif capitalise_next: | |
| 21058 legacy_name += c.upper() | |
| 21059 capitalise_next = False | |
| 21060 else: | |
| 21061 legacy_name += c | |
| 21062 new_object = getattr( class_, new_name) | |
| 21063 assert not getattr( class_, legacy_name, None), f'class {class_} already has {legacy_name}' | |
| 21064 if callable( new_object): | |
| 21065 def deprecated_function( *args, **kwargs): | |
| 21066 warnings.warn( | |
| 21067 f'"{legacy_name=}" removed from {class_} after v1.19.0 - use "{new_name}".', | |
| 21068 category=FitzDeprecation, | |
| 21069 ) | |
| 21070 return new_object( *args, **kwargs) | |
| 21071 setattr( class_, legacy_name, deprecated_function) | |
| 21072 deprecated_function.__doc__ = ( | |
| 21073 f'*** Deprecated and removed in version after v1.19.0 - use "{new_name}". ***\n' | |
| 21074 f'{new_object.__doc__}' | |
| 21075 ) | |
| 21076 else: | |
| 21077 setattr( class_, legacy_name, new_object) | |
| 21078 | |
| 21079 _alias( Annot, 'get_file', 'fileGet') | |
| 21080 _alias( Annot, 'get_pixmap') | |
| 21081 _alias( Annot, 'get_sound', 'soundGet') | |
| 21082 _alias( Annot, 'get_text') | |
| 21083 _alias( Annot, 'get_textbox') | |
| 21084 _alias( Annot, 'get_textpage', 'getTextPage') | |
| 21085 _alias( Annot, 'line_ends') | |
| 21086 _alias( Annot, 'set_blendmode', 'setBlendMode') | |
| 21087 _alias( Annot, 'set_border') | |
| 21088 _alias( Annot, 'set_colors') | |
| 21089 _alias( Annot, 'set_flags') | |
| 21090 _alias( Annot, 'set_info') | |
| 21091 _alias( Annot, 'set_line_ends') | |
| 21092 _alias( Annot, 'set_name') | |
| 21093 _alias( Annot, 'set_oc', 'setOC') | |
| 21094 _alias( Annot, 'set_opacity') | |
| 21095 _alias( Annot, 'set_rect') | |
| 21096 _alias( Annot, 'update_file', 'fileUpd') | |
| 21097 _alias( DisplayList, 'get_pixmap') | |
| 21098 _alias( DisplayList, 'get_textpage', 'getTextPage') | |
| 21099 _alias( Document, 'chapter_count') | |
| 21100 _alias( Document, 'chapter_page_count') | |
| 21101 _alias( Document, 'convert_to_pdf', 'convertToPDF') | |
| 21102 _alias( Document, 'copy_page') | |
| 21103 _alias( Document, 'delete_page') | |
| 21104 _alias( Document, 'delete_pages', 'deletePageRange') | |
| 21105 _alias( Document, 'embfile_add', 'embeddedFileAdd') | |
| 21106 _alias( Document, 'embfile_count', 'embeddedFileCount') | |
| 21107 _alias( Document, 'embfile_del', 'embeddedFileDel') | |
| 21108 _alias( Document, 'embfile_get', 'embeddedFileGet') | |
| 21109 _alias( Document, 'embfile_info', 'embeddedFileInfo') | |
| 21110 _alias( Document, 'embfile_names', 'embeddedFileNames') | |
| 21111 _alias( Document, 'embfile_upd', 'embeddedFileUpd') | |
| 21112 _alias( Document, 'extract_font') | |
| 21113 _alias( Document, 'extract_image') | |
| 21114 _alias( Document, 'find_bookmark') | |
| 21115 _alias( Document, 'fullcopy_page') | |
| 21116 _alias( Document, 'get_char_widths') | |
| 21117 _alias( Document, 'get_ocgs', 'getOCGs') | |
| 21118 _alias( Document, 'get_page_fonts', 'getPageFontList') | |
| 21119 _alias( Document, 'get_page_images', 'getPageImageList') | |
| 21120 _alias( Document, 'get_page_pixmap') | |
| 21121 _alias( Document, 'get_page_text') | |
| 21122 _alias( Document, 'get_page_xobjects', 'getPageXObjectList') | |
| 21123 _alias( Document, 'get_sigflags', 'getSigFlags') | |
| 21124 _alias( Document, 'get_toc', 'getToC') | |
| 21125 _alias( Document, 'get_xml_metadata') | |
| 21126 _alias( Document, 'insert_page') | |
| 21127 _alias( Document, 'insert_pdf', 'insertPDF') | |
| 21128 _alias( Document, 'is_dirty') | |
| 21129 _alias( Document, 'is_form_pdf', 'isFormPDF') | |
| 21130 _alias( Document, 'is_pdf', 'isPDF') | |
| 21131 _alias( Document, 'is_reflowable') | |
| 21132 _alias( Document, 'is_repaired') | |
| 21133 _alias( Document, 'last_location') | |
| 21134 _alias( Document, 'load_page') | |
| 21135 _alias( Document, 'make_bookmark') | |
| 21136 _alias( Document, 'move_page') | |
| 21137 _alias( Document, 'needs_pass') | |
| 21138 _alias( Document, 'new_page') | |
| 21139 _alias( Document, 'next_location') | |
| 21140 _alias( Document, 'page_count') | |
| 21141 _alias( Document, 'page_cropbox', 'pageCropBox') | |
| 21142 _alias( Document, 'page_xref') | |
| 21143 _alias( Document, 'pdf_catalog', 'PDFCatalog') | |
| 21144 _alias( Document, 'pdf_trailer', 'PDFTrailer') | |
| 21145 _alias( Document, 'prev_location', 'previousLocation') | |
| 21146 _alias( Document, 'resolve_link') | |
| 21147 _alias( Document, 'search_page_for') | |
| 21148 _alias( Document, 'set_language') | |
| 21149 _alias( Document, 'set_metadata') | |
| 21150 _alias( Document, 'set_toc', 'setToC') | |
| 21151 _alias( Document, 'set_xml_metadata') | |
| 21152 _alias( Document, 'update_object') | |
| 21153 _alias( Document, 'update_stream') | |
| 21154 _alias( Document, 'xref_is_stream', 'isStream') | |
| 21155 _alias( Document, 'xref_length') | |
| 21156 _alias( Document, 'xref_object') | |
| 21157 _alias( Document, 'xref_stream') | |
| 21158 _alias( Document, 'xref_stream_raw') | |
| 21159 _alias( Document, 'xref_xml_metadata', 'metadataXML') | |
| 21160 _alias( IRect, 'get_area') | |
| 21161 _alias( IRect, 'get_area', 'getRectArea') | |
| 21162 _alias( IRect, 'include_point') | |
| 21163 _alias( IRect, 'include_rect') | |
| 21164 _alias( IRect, 'is_empty') | |
| 21165 _alias( IRect, 'is_infinite') | |
| 21166 _alias( Link, 'is_external') | |
| 21167 _alias( Link, 'set_border') | |
| 21168 _alias( Link, 'set_colors') | |
| 21169 _alias( Matrix, 'is_rectilinear') | |
| 21170 _alias( Matrix, 'prerotate', 'preRotate') | |
| 21171 _alias( Matrix, 'prescale', 'preScale') | |
| 21172 _alias( Matrix, 'preshear', 'preShear') | |
| 21173 _alias( Matrix, 'pretranslate', 'preTranslate') | |
| 21174 _alias( None, 'get_pdf_now', 'getPDFnow') | |
| 21175 _alias( None, 'get_pdf_str', 'getPDFstr') | |
| 21176 _alias( None, 'get_text_length') | |
| 21177 _alias( None, 'get_text_length', 'getTextlength') | |
| 21178 _alias( None, 'image_profile', 'ImageProperties') | |
| 21179 _alias( None, 'paper_rect', 'PaperRect') | |
| 21180 _alias( None, 'paper_size', 'PaperSize') | |
| 21181 _alias( None, 'paper_sizes') | |
| 21182 _alias( None, 'planish_line') | |
| 21183 _alias( Outline, 'is_external') | |
| 21184 _alias( Outline, 'is_open') | |
| 21185 _alias( Page, 'add_caret_annot') | |
| 21186 _alias( Page, 'add_circle_annot') | |
| 21187 _alias( Page, 'add_file_annot') | |
| 21188 _alias( Page, 'add_freetext_annot') | |
| 21189 _alias( Page, 'add_highlight_annot') | |
| 21190 _alias( Page, 'add_ink_annot') | |
| 21191 _alias( Page, 'add_line_annot') | |
| 21192 _alias( Page, 'add_polygon_annot') | |
| 21193 _alias( Page, 'add_polyline_annot') | |
| 21194 _alias( Page, 'add_rect_annot') | |
| 21195 _alias( Page, 'add_redact_annot') | |
| 21196 _alias( Page, 'add_squiggly_annot') | |
| 21197 _alias( Page, 'add_stamp_annot') | |
| 21198 _alias( Page, 'add_strikeout_annot') | |
| 21199 _alias( Page, 'add_text_annot') | |
| 21200 _alias( Page, 'add_underline_annot') | |
| 21201 _alias( Page, 'add_widget') | |
| 21202 _alias( Page, 'clean_contents') | |
| 21203 _alias( Page, 'cropbox', 'CropBox') | |
| 21204 _alias( Page, 'cropbox_position', 'CropBoxPosition') | |
| 21205 _alias( Page, 'delete_annot') | |
| 21206 _alias( Page, 'delete_link') | |
| 21207 _alias( Page, 'delete_widget') | |
| 21208 _alias( Page, 'derotation_matrix') | |
| 21209 _alias( Page, 'draw_bezier') | |
| 21210 _alias( Page, 'draw_circle') | |
| 21211 _alias( Page, 'draw_curve') | |
| 21212 _alias( Page, 'draw_line') | |
| 21213 _alias( Page, 'draw_oval') | |
| 21214 _alias( Page, 'draw_polyline') | |
| 21215 _alias( Page, 'draw_quad') | |
| 21216 _alias( Page, 'draw_rect') | |
| 21217 _alias( Page, 'draw_sector') | |
| 21218 _alias( Page, 'draw_squiggle') | |
| 21219 _alias( Page, 'draw_zigzag') | |
| 21220 _alias( Page, 'first_annot') | |
| 21221 _alias( Page, 'first_link') | |
| 21222 _alias( Page, 'first_widget') | |
| 21223 _alias( Page, 'get_contents') | |
| 21224 _alias( Page, 'get_displaylist', 'getDisplayList') | |
| 21225 _alias( Page, 'get_drawings') | |
| 21226 _alias( Page, 'get_fonts', 'getFontList') | |
| 21227 _alias( Page, 'get_image_bbox') | |
| 21228 _alias( Page, 'get_images', 'getImageList') | |
| 21229 _alias( Page, 'get_links') | |
| 21230 _alias( Page, 'get_pixmap') | |
| 21231 _alias( Page, 'get_svg_image', 'getSVGimage') | |
| 21232 _alias( Page, 'get_text') | |
| 21233 _alias( Page, 'get_text_blocks') | |
| 21234 _alias( Page, 'get_text_words') | |
| 21235 _alias( Page, 'get_textbox') | |
| 21236 _alias( Page, 'get_textpage', 'getTextPage') | |
| 21237 _alias( Page, 'insert_font') | |
| 21238 _alias( Page, 'insert_image') | |
| 21239 _alias( Page, 'insert_link') | |
| 21240 _alias( Page, 'insert_text') | |
| 21241 _alias( Page, 'insert_textbox') | |
| 21242 _alias( Page, 'is_wrapped', '_isWrapped') | |
| 21243 _alias( Page, 'load_annot') | |
| 21244 _alias( Page, 'load_links') | |
| 21245 _alias( Page, 'mediabox', 'MediaBox') | |
| 21246 _alias( Page, 'mediabox_size', 'MediaBoxSize') | |
| 21247 _alias( Page, 'new_shape') | |
| 21248 _alias( Page, 'read_contents') | |
| 21249 _alias( Page, 'rotation_matrix') | |
| 21250 _alias( Page, 'search_for') | |
| 21251 _alias( Page, 'set_cropbox', 'setCropBox') | |
| 21252 _alias( Page, 'set_mediabox', 'setMediaBox') | |
| 21253 _alias( Page, 'set_rotation') | |
| 21254 _alias( Page, 'show_pdf_page', 'showPDFpage') | |
| 21255 _alias( Page, 'transformation_matrix') | |
| 21256 _alias( Page, 'update_link') | |
| 21257 _alias( Page, 'wrap_contents') | |
| 21258 _alias( Page, 'write_text') | |
| 21259 _alias( Pixmap, 'clear_with') | |
| 21260 _alias( Pixmap, 'copy', 'copyPixmap') | |
| 21261 _alias( Pixmap, 'gamma_with') | |
| 21262 _alias( Pixmap, 'invert_irect', 'invertIRect') | |
| 21263 _alias( Pixmap, 'pil_save', 'pillowWrite') | |
| 21264 _alias( Pixmap, 'pil_tobytes', 'pillowData') | |
| 21265 _alias( Pixmap, 'save', 'writeImage') | |
| 21266 _alias( Pixmap, 'save', 'writePNG') | |
| 21267 _alias( Pixmap, 'set_alpha') | |
| 21268 _alias( Pixmap, 'set_dpi', 'setResolution') | |
| 21269 _alias( Pixmap, 'set_origin') | |
| 21270 _alias( Pixmap, 'set_pixel') | |
| 21271 _alias( Pixmap, 'set_rect') | |
| 21272 _alias( Pixmap, 'tint_with') | |
| 21273 _alias( Pixmap, 'tobytes', 'getImageData') | |
| 21274 _alias( Pixmap, 'tobytes', 'getPNGData') | |
| 21275 _alias( Pixmap, 'tobytes', 'getPNGdata') | |
| 21276 _alias( Quad, 'is_convex') | |
| 21277 _alias( Quad, 'is_empty') | |
| 21278 _alias( Quad, 'is_rectangular') | |
| 21279 _alias( Rect, 'get_area') | |
| 21280 _alias( Rect, 'get_area', 'getRectArea') | |
| 21281 _alias( Rect, 'include_point') | |
| 21282 _alias( Rect, 'include_rect') | |
| 21283 _alias( Rect, 'is_empty') | |
| 21284 _alias( Rect, 'is_infinite') | |
| 21285 _alias( TextWriter, 'fill_textbox') | |
| 21286 _alias( TextWriter, 'write_text') | |
| 21287 _alias( utils.Shape, 'draw_bezier') | |
| 21288 _alias( utils.Shape, 'draw_circle') | |
| 21289 _alias( utils.Shape, 'draw_curve') | |
| 21290 _alias( utils.Shape, 'draw_line') | |
| 21291 _alias( utils.Shape, 'draw_oval') | |
| 21292 _alias( utils.Shape, 'draw_polyline') | |
| 21293 _alias( utils.Shape, 'draw_quad') | |
| 21294 _alias( utils.Shape, 'draw_rect') | |
| 21295 _alias( utils.Shape, 'draw_sector') | |
| 21296 _alias( utils.Shape, 'draw_squiggle') | |
| 21297 _alias( utils.Shape, 'draw_zigzag') | |
| 21298 _alias( utils.Shape, 'insert_text') | |
| 21299 _alias( utils.Shape, 'insert_textbox') | |
| 21300 | |
| 21301 if 0: | |
| 21302 restore_aliases() | |
| 21303 | |
| 21304 __version__ = VersionBind | |
| 21305 __doc__ = ( | |
| 21306 f'PyMuPDF {VersionBind}: Python bindings for the MuPDF {VersionFitz} library (rebased implementation).\n' | |
| 21307 f'Python {sys.version_info[0]}.{sys.version_info[1]} running on {sys.platform} ({64 if sys.maxsize > 2**32 else 32}-bit).\n' | |
| 21308 ) |
