Mercurial > hgrepos > Python2 > PyMuPDF
comparison src/__init__.py @ 41:71bcc18e306f
MERGE: New upstream PyMuPDF v1.26.5 including MuPDF v1.26.10
BUGS: Needs some additional changes yet.
Not yet tested.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Sat, 11 Oct 2025 15:24:40 +0200 |
| parents | 3b13504f9d89 a6bc019ac0b2 |
| children | 4621bd954a09 |
comparison
equal
deleted
inserted
replaced
| 38:8934ac156ef5 | 41:71bcc18e306f |
|---|---|
| 15 import io | 15 import io |
| 16 import math | 16 import math |
| 17 import os | 17 import os |
| 18 import pathlib | 18 import pathlib |
| 19 import glob | 19 import glob |
| 20 import packaging.version | |
| 21 import re | 20 import re |
| 22 import string | 21 import string |
| 23 import sys | 22 import sys |
| 24 import tarfile | 23 import tarfile |
| 25 import time | 24 import time |
| 382 from ._build import mupdf_location # noqa F401 | 381 from ._build import mupdf_location # noqa F401 |
| 383 from ._build import pymupdf_git_branch # noqa F401 | 382 from ._build import pymupdf_git_branch # noqa F401 |
| 384 from ._build import pymupdf_git_diff # noqa F401 | 383 from ._build import pymupdf_git_diff # noqa F401 |
| 385 from ._build import pymupdf_git_sha # noqa F401 | 384 from ._build import pymupdf_git_sha # noqa F401 |
| 386 from ._build import pymupdf_version # noqa F401 | 385 from ._build import pymupdf_version # noqa F401 |
| 386 from ._build import pymupdf_version_tuple # noqa F401 | |
| 387 from ._build import swig_version # noqa F401 | 387 from ._build import swig_version # noqa F401 |
| 388 from ._build import swig_version_tuple # noqa F401 | 388 from ._build import swig_version_tuple # noqa F401 |
| 389 | 389 |
| 390 mupdf_version = mupdf.FZ_VERSION | 390 mupdf_version = mupdf.FZ_VERSION |
| 391 | 391 |
| 392 # Removed in PyMuPDF-1.26.1. | 392 # Removed in PyMuPDF-1.26.1. |
| 393 pymupdf_date = None | 393 pymupdf_date = None |
| 394 | 394 |
| 395 # Versions as tuples; useful when comparing versions. | 395 # Versions as tuples; useful when comparing versions. |
| 396 # | 396 # |
| 397 pymupdf_version_tuple = packaging.version.Version(pymupdf_version).release | |
| 398 mupdf_version_tuple = packaging.version.Version(mupdf_version).release | 397 mupdf_version_tuple = packaging.version.Version(mupdf_version).release |
| 399 | 398 |
| 400 assert mupdf_version_tuple == (mupdf.FZ_VERSION_MAJOR, mupdf.FZ_VERSION_MINOR, mupdf.FZ_VERSION_PATCH), \ | 399 assert mupdf_version_tuple == (mupdf.FZ_VERSION_MAJOR, mupdf.FZ_VERSION_MINOR, mupdf.FZ_VERSION_PATCH), \ |
| 401 f'Inconsistent MuPDF version numbers: {mupdf_version_tuple=} != {(mupdf.FZ_VERSION_MAJOR, mupdf.FZ_VERSION_MINOR, mupdf.FZ_VERSION_PATCH)=}' | 400 f'Inconsistent MuPDF version numbers: {mupdf_version_tuple=} != {(mupdf.FZ_VERSION_MAJOR, mupdf.FZ_VERSION_MINOR, mupdf.FZ_VERSION_PATCH)=}' |
| 402 | 401 |
| 1034 res['compression'] = mupdf.pdf_to_name(obj) | 1033 res['compression'] = mupdf.pdf_to_name(obj) |
| 1035 buf = mupdf.pdf_load_stream(sound) | 1034 buf = mupdf.pdf_load_stream(sound) |
| 1036 stream = JM_BinFromBuffer(buf) | 1035 stream = JM_BinFromBuffer(buf) |
| 1037 res['stream'] = stream | 1036 res['stream'] = stream |
| 1038 return res | 1037 return res |
| 1038 | |
| 1039 def get_text(self, *args, **kwargs): | |
| 1040 return utils.get_text(self, *args, **kwargs) | |
| 1041 | |
| 1042 def get_textbox(self, *args, **kwargs): | |
| 1043 return utils.get_textbox(self, *args, **kwargs) | |
| 1039 | 1044 |
| 1040 def get_textpage(self, clip=None, flags=0): | 1045 def get_textpage(self, clip=None, flags=0): |
| 1041 """Make annotation TextPage.""" | 1046 """Make annotation TextPage.""" |
| 1042 CheckParent(self) | 1047 CheckParent(self) |
| 1043 options = mupdf.FzStextOptions(flags) | 1048 options = mupdf.FzStextOptions(flags) |
| 3057 raise RuntimeError( "PDF has no form fonts yet") | 3062 raise RuntimeError( "PDF has no form fonts yet") |
| 3058 k = mupdf.pdf_new_name( name) | 3063 k = mupdf.pdf_new_name( name) |
| 3059 v = JM_pdf_obj_from_str( pdf, font) | 3064 v = JM_pdf_obj_from_str( pdf, font) |
| 3060 mupdf.pdf_dict_put( fonts, k, v) | 3065 mupdf.pdf_dict_put( fonts, k, v) |
| 3061 | 3066 |
| 3067 def del_toc_item( | |
| 3068 self, | |
| 3069 idx: int, | |
| 3070 ) -> None: | |
| 3071 """Delete TOC / bookmark item by index.""" | |
| 3072 xref = self.get_outline_xrefs()[idx] | |
| 3073 self._remove_toc_item(xref) | |
| 3074 | |
| 3062 def _delToC(self): | 3075 def _delToC(self): |
| 3063 """Delete the TOC.""" | 3076 """Delete the TOC.""" |
| 3064 if self.is_closed or self.is_encrypted: | 3077 if self.is_closed or self.is_encrypted: |
| 3065 raise ValueError("document closed or encrypted") | 3078 raise ValueError("document closed or encrypted") |
| 3066 xrefs = [] # create Python list | 3079 xrefs = [] # create Python list |
| 3101 """Delete object.""" | 3114 """Delete object.""" |
| 3102 pdf = _as_pdf_document(self) | 3115 pdf = _as_pdf_document(self) |
| 3103 if not _INRANGE(xref, 1, mupdf.pdf_xref_len(pdf)-1): | 3116 if not _INRANGE(xref, 1, mupdf.pdf_xref_len(pdf)-1): |
| 3104 raise ValueError( MSG_BAD_XREF) | 3117 raise ValueError( MSG_BAD_XREF) |
| 3105 mupdf.pdf_delete_object(pdf, xref) | 3118 mupdf.pdf_delete_object(pdf, xref) |
| 3119 | |
| 3120 def _do_links( | |
| 3121 doc1: 'Document', | |
| 3122 doc2: 'Document', | |
| 3123 from_page: int = -1, | |
| 3124 to_page: int = -1, | |
| 3125 start_at: int = -1, | |
| 3126 ) -> None: | |
| 3127 """Insert links contained in copied page range into destination PDF. | |
| 3128 | |
| 3129 Parameter values **must** equal those of method insert_pdf(), which must | |
| 3130 have been previously executed. | |
| 3131 """ | |
| 3132 #pymupdf.log( 'utils.do_links()') | |
| 3133 # -------------------------------------------------------------------------- | |
| 3134 # internal function to create the actual "/Annots" object string | |
| 3135 # -------------------------------------------------------------------------- | |
| 3136 def cre_annot(lnk, xref_dst, pno_src, ctm): | |
| 3137 """Create annotation object string for a passed-in link.""" | |
| 3138 | |
| 3139 r = lnk["from"] * ctm # rect in PDF coordinates | |
| 3140 rect = _format_g(tuple(r)) | |
| 3141 if lnk["kind"] == LINK_GOTO: | |
| 3142 txt = annot_skel["goto1"] # annot_goto | |
| 3143 idx = pno_src.index(lnk["page"]) | |
| 3144 p = lnk["to"] * ctm # target point in PDF coordinates | |
| 3145 annot = txt(xref_dst[idx], p.x, p.y, lnk["zoom"], rect) | |
| 3146 | |
| 3147 elif lnk["kind"] == LINK_GOTOR: | |
| 3148 if lnk["page"] >= 0: | |
| 3149 txt = annot_skel["gotor1"] # annot_gotor | |
| 3150 pnt = lnk.get("to", Point(0, 0)) # destination point | |
| 3151 if type(pnt) is not Point: | |
| 3152 pnt = Point(0, 0) | |
| 3153 annot = txt( | |
| 3154 lnk["page"], | |
| 3155 pnt.x, | |
| 3156 pnt.y, | |
| 3157 lnk["zoom"], | |
| 3158 lnk["file"], | |
| 3159 lnk["file"], | |
| 3160 rect, | |
| 3161 ) | |
| 3162 else: | |
| 3163 txt = annot_skel["gotor2"] # annot_gotor_n | |
| 3164 to = get_pdf_str(lnk["to"]) | |
| 3165 to = to[1:-1] | |
| 3166 f = lnk["file"] | |
| 3167 annot = txt(to, f, rect) | |
| 3168 | |
| 3169 elif lnk["kind"] == LINK_LAUNCH: | |
| 3170 txt = annot_skel["launch"] # annot_launch | |
| 3171 annot = txt(lnk["file"], lnk["file"], rect) | |
| 3172 | |
| 3173 elif lnk["kind"] == LINK_URI: | |
| 3174 txt = annot_skel["uri"] # annot_uri | |
| 3175 annot = txt(lnk["uri"], rect) | |
| 3176 | |
| 3177 else: | |
| 3178 annot = "" | |
| 3179 | |
| 3180 return annot | |
| 3181 | |
| 3182 # -------------------------------------------------------------------------- | |
| 3183 | |
| 3184 # validate & normalize parameters | |
| 3185 if from_page < 0: | |
| 3186 fp = 0 | |
| 3187 elif from_page >= doc2.page_count: | |
| 3188 fp = doc2.page_count - 1 | |
| 3189 else: | |
| 3190 fp = from_page | |
| 3191 | |
| 3192 if to_page < 0 or to_page >= doc2.page_count: | |
| 3193 tp = doc2.page_count - 1 | |
| 3194 else: | |
| 3195 tp = to_page | |
| 3196 | |
| 3197 if start_at < 0: | |
| 3198 raise ValueError("'start_at' must be >= 0") | |
| 3199 sa = start_at | |
| 3200 | |
| 3201 incr = 1 if fp <= tp else -1 # page range could be reversed | |
| 3202 | |
| 3203 # lists of source / destination page numbers | |
| 3204 pno_src = list(range(fp, tp + incr, incr)) | |
| 3205 pno_dst = [sa + i for i in range(len(pno_src))] | |
| 3206 | |
| 3207 # lists of source / destination page xrefs | |
| 3208 xref_src = [] | |
| 3209 xref_dst = [] | |
| 3210 for i in range(len(pno_src)): | |
| 3211 p_src = pno_src[i] | |
| 3212 p_dst = pno_dst[i] | |
| 3213 old_xref = doc2.page_xref(p_src) | |
| 3214 new_xref = doc1.page_xref(p_dst) | |
| 3215 xref_src.append(old_xref) | |
| 3216 xref_dst.append(new_xref) | |
| 3217 | |
| 3218 # create the links for each copied page in destination PDF | |
| 3219 for i in range(len(xref_src)): | |
| 3220 page_src = doc2[pno_src[i]] # load source page | |
| 3221 links = page_src.get_links() # get all its links | |
| 3222 #log( '{pno_src=}') | |
| 3223 #log( '{type(page_src)=}') | |
| 3224 #log( '{page_src=}') | |
| 3225 #log( '{=i len(links)}') | |
| 3226 if len(links) == 0: # no links there | |
| 3227 page_src = None | |
| 3228 continue | |
| 3229 ctm = ~page_src.transformation_matrix # calc page transformation matrix | |
| 3230 page_dst = doc1[pno_dst[i]] # load destination page | |
| 3231 link_tab = [] # store all link definitions here | |
| 3232 for l in links: | |
| 3233 if l["kind"] == LINK_GOTO and (l["page"] not in pno_src): | |
| 3234 continue # GOTO link target not in copied pages | |
| 3235 annot_text = cre_annot(l, xref_dst, pno_src, ctm) | |
| 3236 if annot_text: | |
| 3237 link_tab.append(annot_text) | |
| 3238 if link_tab != []: | |
| 3239 page_dst._addAnnot_FromString( tuple(link_tab)) | |
| 3240 #log( 'utils.do_links() returning.') | |
| 3241 | |
| 3242 def _do_widgets( | |
| 3243 tar: 'Document', | |
| 3244 src: 'Document', | |
| 3245 graftmap, | |
| 3246 from_page: int = -1, | |
| 3247 to_page: int = -1, | |
| 3248 start_at: int = -1, | |
| 3249 join_duplicates=0, | |
| 3250 ) -> None: | |
| 3251 """Insert widgets of copied page range into target PDF. | |
| 3252 | |
| 3253 Parameter values **must** equal those of method insert_pdf() which | |
| 3254 must have been previously executed. | |
| 3255 """ | |
| 3256 if not src.is_form_pdf: # nothing to do: source PDF has no fields | |
| 3257 return | |
| 3258 | |
| 3259 def clean_kid_parents(acro_fields): | |
| 3260 """ Make sure all kids have correct "Parent" pointers.""" | |
| 3261 for i in range(acro_fields.pdf_array_len()): | |
| 3262 parent = acro_fields.pdf_array_get(i) | |
| 3263 kids = parent.pdf_dict_get(PDF_NAME("Kids")) | |
| 3264 for j in range(kids.pdf_array_len()): | |
| 3265 kid = kids.pdf_array_get(j) | |
| 3266 kid.pdf_dict_put(PDF_NAME("Parent"), parent) | |
| 3267 | |
| 3268 def join_widgets(pdf, acro_fields, xref1, xref2, name): | |
| 3269 """Called for each pair of widgets having the same name. | |
| 3270 | |
| 3271 Args: | |
| 3272 pdf: target MuPDF document | |
| 3273 acro_fields: object Root/AcroForm/Fields | |
| 3274 xref1, xref2: widget xrefs having same names | |
| 3275 name: (str) the name | |
| 3276 | |
| 3277 Result: | |
| 3278 Defined or updated widget parent that points to both widgets. | |
| 3279 """ | |
| 3280 | |
| 3281 def re_target(pdf, acro_fields, xref1, kids1, xref2, kids2): | |
| 3282 """Merge widget in xref2 into "Kids" list of widget xref1. | |
| 3283 | |
| 3284 Args: | |
| 3285 xref1, kids1: target widget and its "Kids" array. | |
| 3286 xref2, kids2: source wwidget and its "Kids" array (may be empty). | |
| 3287 """ | |
| 3288 # make indirect objects from widgets | |
| 3289 w1_ind = mupdf.pdf_new_indirect(pdf, xref1, 0) | |
| 3290 w2_ind = mupdf.pdf_new_indirect(pdf, xref2, 0) | |
| 3291 # find source widget in "Fields" array | |
| 3292 idx = acro_fields.pdf_array_find(w2_ind) | |
| 3293 acro_fields.pdf_array_delete(idx) | |
| 3294 | |
| 3295 if not kids2.pdf_is_array(): # source widget has no kids | |
| 3296 widget = mupdf.pdf_load_object(pdf, xref2) | |
| 3297 | |
| 3298 # delete name from widget and insert target as parent | |
| 3299 widget.pdf_dict_del(PDF_NAME("T")) | |
| 3300 widget.pdf_dict_put(PDF_NAME("Parent"), w1_ind) | |
| 3301 | |
| 3302 # put in target Kids | |
| 3303 kids1.pdf_array_push(w2_ind) | |
| 3304 else: # copy source kids to target kids | |
| 3305 for i in range(kids2.pdf_array_len()): | |
| 3306 kid = kids2.pdf_array_get(i) | |
| 3307 kid.pdf_dict_put(PDF_NAME("Parent"), w1_ind) | |
| 3308 kid_ind = mupdf.pdf_new_indirect(pdf, kid.pdf_to_num(), 0) | |
| 3309 kids1.pdf_array_push(kid_ind) | |
| 3310 | |
| 3311 def new_target(pdf, acro_fields, xref1, w1, xref2, w2, name): | |
| 3312 """Make new "Parent" for two widgets with same name. | |
| 3313 | |
| 3314 Args: | |
| 3315 xref1, w1: first widget | |
| 3316 xref2, w2: second widget | |
| 3317 name: field name | |
| 3318 | |
| 3319 Result: | |
| 3320 Both widgets have no "Kids". We create a new object with the | |
| 3321 name and a "Kids" array containing the widgets. | |
| 3322 Original widgets must be removed from AcroForm/Fields. | |
| 3323 """ | |
| 3324 # make new "Parent" object | |
| 3325 new = mupdf.pdf_new_dict(pdf, 5) | |
| 3326 new.pdf_dict_put_text_string(PDF_NAME("T"), name) | |
| 3327 kids = new.pdf_dict_put_array(PDF_NAME("Kids"), 2) | |
| 3328 new_obj = mupdf.pdf_add_object(pdf, new) | |
| 3329 new_obj_xref = new_obj.pdf_to_num() | |
| 3330 new_ind = mupdf.pdf_new_indirect(pdf, new_obj_xref, 0) | |
| 3331 | |
| 3332 # copy over some required source widget properties | |
| 3333 ft = w1.pdf_dict_get(PDF_NAME("FT")) | |
| 3334 w1.pdf_dict_del(PDF_NAME("FT")) | |
| 3335 new_obj.pdf_dict_put(PDF_NAME("FT"), ft) | |
| 3336 | |
| 3337 aa = w1.pdf_dict_get(PDF_NAME("AA")) | |
| 3338 w1.pdf_dict_del(PDF_NAME("AA")) | |
| 3339 new_obj.pdf_dict_put(PDF_NAME("AA"), aa) | |
| 3340 | |
| 3341 # remove name field, insert "Parent" field in source widgets | |
| 3342 w1.pdf_dict_del(PDF_NAME("T")) | |
| 3343 w1.pdf_dict_put(PDF_NAME("Parent"), new_ind) | |
| 3344 w2.pdf_dict_del(PDF_NAME("T")) | |
| 3345 w2.pdf_dict_put(PDF_NAME("Parent"), new_ind) | |
| 3346 | |
| 3347 # put source widgets in "kids" array | |
| 3348 ind1 = mupdf.pdf_new_indirect(pdf, xref1, 0) | |
| 3349 ind2 = mupdf.pdf_new_indirect(pdf, xref2, 0) | |
| 3350 kids.pdf_array_push(ind1) | |
| 3351 kids.pdf_array_push(ind2) | |
| 3352 | |
| 3353 # remove source widgets from "AcroForm/Fields" | |
| 3354 idx = acro_fields.pdf_array_find(ind1) | |
| 3355 acro_fields.pdf_array_delete(idx) | |
| 3356 idx = acro_fields.pdf_array_find(ind2) | |
| 3357 acro_fields.pdf_array_delete(idx) | |
| 3358 | |
| 3359 acro_fields.pdf_array_push(new_ind) | |
| 3360 | |
| 3361 w1 = mupdf.pdf_load_object(pdf, xref1) | |
| 3362 w2 = mupdf.pdf_load_object(pdf, xref2) | |
| 3363 kids1 = w1.pdf_dict_get(PDF_NAME("Kids")) | |
| 3364 kids2 = w2.pdf_dict_get(PDF_NAME("Kids")) | |
| 3365 | |
| 3366 # check which widget has a suitable "Kids" array | |
| 3367 if kids1.pdf_is_array(): | |
| 3368 re_target(pdf, acro_fields, xref1, kids1, xref2, kids2) # pylint: disable=arguments-out-of-order | |
| 3369 elif kids2.pdf_is_array(): | |
| 3370 re_target(pdf, acro_fields, xref2, kids2, xref1, kids1) # pylint: disable=arguments-out-of-order | |
| 3371 else: | |
| 3372 new_target(pdf, acro_fields, xref1, w1, xref2, w2, name) # pylint: disable=arguments-out-of-order | |
| 3373 | |
| 3374 def get_kids(parent, kids_list): | |
| 3375 """Return xref list of leaf kids for a parent. | |
| 3376 | |
| 3377 Call with an empty list. | |
| 3378 """ | |
| 3379 kids = mupdf.pdf_dict_get(parent, PDF_NAME("Kids")) | |
| 3380 if not kids.pdf_is_array(): | |
| 3381 return kids_list | |
| 3382 for i in range(kids.pdf_array_len()): | |
| 3383 kid = kids.pdf_array_get(i) | |
| 3384 if mupdf.pdf_is_dict(mupdf.pdf_dict_get(kid, PDF_NAME("Kids"))): | |
| 3385 kids_list = get_kids(kid, kids_list) | |
| 3386 else: | |
| 3387 kids_list.append(kid.pdf_to_num()) | |
| 3388 return kids_list | |
| 3389 | |
| 3390 def kids_xrefs(widget): | |
| 3391 """Get the xref of top "Parent" and the list of leaf widgets.""" | |
| 3392 kids_list = [] | |
| 3393 parent = mupdf.pdf_dict_get(widget, PDF_NAME("Parent")) | |
| 3394 parent_xref = parent.pdf_to_num() | |
| 3395 if parent_xref == 0: | |
| 3396 return parent_xref, kids_list | |
| 3397 kids_list = get_kids(parent, kids_list) | |
| 3398 return parent_xref, kids_list | |
| 3399 | |
| 3400 def deduplicate_names(pdf, acro_fields, join_duplicates=False): | |
| 3401 """Handle any widget name duplicates caused by the merge.""" | |
| 3402 names = {} # key is a widget name, value a list of widgets having it. | |
| 3403 | |
| 3404 # extract all names and widgets in "AcroForm/Fields" | |
| 3405 for i in range(mupdf.pdf_array_len(acro_fields)): | |
| 3406 wobject = mupdf.pdf_array_get(acro_fields, i) | |
| 3407 xref = wobject.pdf_to_num() | |
| 3408 | |
| 3409 # extract widget name and collect widget(s) using it | |
| 3410 T = mupdf.pdf_dict_get_text_string(wobject, PDF_NAME("T")) | |
| 3411 xrefs = names.get(T, []) | |
| 3412 xrefs.append(xref) | |
| 3413 names[T] = xrefs | |
| 3414 | |
| 3415 for name, xrefs in names.items(): | |
| 3416 if len(xrefs) < 2: | |
| 3417 continue | |
| 3418 xref0, xref1 = xrefs[:2] # only exactly 2 should occur! | |
| 3419 if join_duplicates: # combine fields with equal names | |
| 3420 join_widgets(pdf, acro_fields, xref0, xref1, name) | |
| 3421 else: # make field names unique | |
| 3422 newname = name + f" [{xref1}]" # append this to the name | |
| 3423 wobject = mupdf.pdf_load_object(pdf, xref1) | |
| 3424 wobject.pdf_dict_put_text_string(PDF_NAME("T"), newname) | |
| 3425 | |
| 3426 clean_kid_parents(acro_fields) | |
| 3427 | |
| 3428 def get_acroform(doc): | |
| 3429 """Retrieve the AcroForm dictionary form a PDF.""" | |
| 3430 pdf = mupdf.pdf_document_from_fz_document(doc) | |
| 3431 # AcroForm (= central form field info) | |
| 3432 return mupdf.pdf_dict_getp(mupdf.pdf_trailer(pdf), "Root/AcroForm") | |
| 3433 | |
| 3434 tarpdf = mupdf.pdf_document_from_fz_document(tar) | |
| 3435 srcpdf = mupdf.pdf_document_from_fz_document(src) | |
| 3436 | |
| 3437 if tar.is_form_pdf: | |
| 3438 # target is a Form PDF, so use it to include source fields | |
| 3439 acro = get_acroform(tar) | |
| 3440 # Important arrays in AcroForm | |
| 3441 acro_fields = acro.pdf_dict_get(PDF_NAME("Fields")) | |
| 3442 tar_co = acro.pdf_dict_get(PDF_NAME("CO")) | |
| 3443 if not tar_co.pdf_is_array(): | |
| 3444 tar_co = acro.pdf_dict_put_array(PDF_NAME("CO"), 5) | |
| 3445 else: | |
| 3446 # target is no Form PDF, so copy over source AcroForm | |
| 3447 acro = mupdf.pdf_deep_copy_obj(get_acroform(src)) # make a copy | |
| 3448 | |
| 3449 # Clear "Fields" and "CO" arrays: will be populated by page fields. | |
| 3450 # This is required to avoid copying unneeded objects. | |
| 3451 acro.pdf_dict_del(PDF_NAME("Fields")) | |
| 3452 acro.pdf_dict_put_array(PDF_NAME("Fields"), 5) | |
| 3453 acro.pdf_dict_del(PDF_NAME("CO")) | |
| 3454 acro.pdf_dict_put_array(PDF_NAME("CO"), 5) | |
| 3455 | |
| 3456 # Enrich AcroForm for copying to target | |
| 3457 acro_graft = mupdf.pdf_graft_mapped_object(graftmap, acro) | |
| 3458 | |
| 3459 # Insert AcroForm into target PDF | |
| 3460 acro_tar = mupdf.pdf_add_object(tarpdf, acro_graft) | |
| 3461 acro_fields = acro_tar.pdf_dict_get(PDF_NAME("Fields")) | |
| 3462 tar_co = acro_tar.pdf_dict_get(PDF_NAME("CO")) | |
| 3463 | |
| 3464 # get its xref and insert it into target catalog | |
| 3465 tar_xref = acro_tar.pdf_to_num() | |
| 3466 acro_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0) | |
| 3467 root = mupdf.pdf_dict_get(mupdf.pdf_trailer(tarpdf), PDF_NAME("Root")) | |
| 3468 root.pdf_dict_put(PDF_NAME("AcroForm"), acro_tar_ind) | |
| 3469 | |
| 3470 if from_page <= to_page: | |
| 3471 src_range = range(from_page, to_page + 1) | |
| 3472 else: | |
| 3473 src_range = range(from_page, to_page - 1, -1) | |
| 3474 | |
| 3475 parents = {} # information about widget parents | |
| 3476 | |
| 3477 # remove "P" owning page reference from all widgets of all source pages | |
| 3478 for i in src_range: | |
| 3479 src_page = src[i] | |
| 3480 for xref in [ | |
| 3481 xref | |
| 3482 for xref, wtype, _ in src_page.annot_xrefs() | |
| 3483 if wtype == mupdf.PDF_ANNOT_WIDGET # pylint: disable=no-member | |
| 3484 ]: | |
| 3485 w_obj = mupdf.pdf_load_object(srcpdf, xref) | |
| 3486 w_obj.pdf_dict_del(PDF_NAME("P")) | |
| 3487 | |
| 3488 # get the widget's parent structure | |
| 3489 parent_xref, old_kids = kids_xrefs(w_obj) | |
| 3490 if parent_xref: | |
| 3491 parents[parent_xref] = { | |
| 3492 "new_xref": 0, | |
| 3493 "old_kids": old_kids, | |
| 3494 "new_kids": [], | |
| 3495 } | |
| 3496 # Copy over Parent widgets first - they are not page-dependent | |
| 3497 for xref in parents.keys(): # pylint: disable=consider-using-dict-items | |
| 3498 parent = mupdf.pdf_load_object(srcpdf, xref) | |
| 3499 parent_graft = mupdf.pdf_graft_mapped_object(graftmap, parent) | |
| 3500 parent_tar = mupdf.pdf_add_object(tarpdf, parent_graft) | |
| 3501 kids_xrefs_new = get_kids(parent_tar, []) | |
| 3502 parent_xref_new = parent_tar.pdf_to_num() | |
| 3503 parent_ind = mupdf.pdf_new_indirect(tarpdf, parent_xref_new, 0) | |
| 3504 acro_fields.pdf_array_push(parent_ind) | |
| 3505 parents[xref]["new_xref"] = parent_xref_new | |
| 3506 parents[xref]["new_kids"] = kids_xrefs_new | |
| 3507 | |
| 3508 for i in range(len(src_range)): | |
| 3509 # read first copied over page in target | |
| 3510 tar_page = tar[start_at + i] | |
| 3511 | |
| 3512 # read the original page in the source PDF | |
| 3513 src_page = src[src_range[i]] | |
| 3514 | |
| 3515 # now walk through source page widgets and copy over | |
| 3516 w_xrefs = [ # widget xrefs of the source page | |
| 3517 xref | |
| 3518 for xref, wtype, _ in src_page.annot_xrefs() | |
| 3519 if wtype == mupdf.PDF_ANNOT_WIDGET # pylint: disable=no-member | |
| 3520 ] | |
| 3521 if not w_xrefs: # no widgets on this source page | |
| 3522 continue | |
| 3523 | |
| 3524 # convert to formal PDF page | |
| 3525 tar_page_pdf = mupdf.pdf_page_from_fz_page(tar_page) | |
| 3526 | |
| 3527 # extract annotations array | |
| 3528 tar_annots = mupdf.pdf_dict_get(tar_page_pdf.obj(), PDF_NAME("Annots")) | |
| 3529 if not mupdf.pdf_is_array(tar_annots): | |
| 3530 tar_annots = mupdf.pdf_dict_put_array( | |
| 3531 tar_page_pdf.obj(), PDF_NAME("Annots"), 5 | |
| 3532 ) | |
| 3533 | |
| 3534 for xref in w_xrefs: | |
| 3535 w_obj = mupdf.pdf_load_object(srcpdf, xref) | |
| 3536 | |
| 3537 # check if field takes part in inter-field validations | |
| 3538 is_aac = mupdf.pdf_is_dict(mupdf.pdf_dict_getp(w_obj, "AA/C")) | |
| 3539 | |
| 3540 # check if parent of widget already in target | |
| 3541 parent_xref = mupdf.pdf_to_num( | |
| 3542 w_obj.pdf_dict_get(PDF_NAME("Parent")) | |
| 3543 ) | |
| 3544 if parent_xref == 0: # parent not in target yet | |
| 3545 try: | |
| 3546 w_obj_graft = mupdf.pdf_graft_mapped_object(graftmap, w_obj) | |
| 3547 except Exception as e: | |
| 3548 message_warning(f"cannot copy widget at {xref=}: {e}") | |
| 3549 continue | |
| 3550 w_obj_tar = mupdf.pdf_add_object(tarpdf, w_obj_graft) | |
| 3551 tar_xref = w_obj_tar.pdf_to_num() | |
| 3552 w_obj_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0) | |
| 3553 mupdf.pdf_array_push(tar_annots, w_obj_tar_ind) | |
| 3554 mupdf.pdf_array_push(acro_fields, w_obj_tar_ind) | |
| 3555 else: | |
| 3556 parent = parents[parent_xref] | |
| 3557 idx = parent["old_kids"].index(xref) # search for xref in parent | |
| 3558 tar_xref = parent["new_kids"][idx] | |
| 3559 w_obj_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0) | |
| 3560 mupdf.pdf_array_push(tar_annots, w_obj_tar_ind) | |
| 3561 | |
| 3562 # Into "AcroForm/CO" if a computation field. | |
| 3563 if is_aac: | |
| 3564 mupdf.pdf_array_push(tar_co, w_obj_tar_ind) | |
| 3565 | |
| 3566 deduplicate_names(tarpdf, acro_fields, join_duplicates=join_duplicates) | |
| 3106 | 3567 |
| 3107 def _embeddedFileGet(self, idx): | 3568 def _embeddedFileGet(self, idx): |
| 3108 pdf = _as_pdf_document(self) | 3569 pdf = _as_pdf_document(self) |
| 3109 names = mupdf.pdf_dict_getl( | 3570 names = mupdf.pdf_dict_getl( |
| 3110 mupdf.pdf_trailer(pdf), | 3571 mupdf.pdf_trailer(pdf), |
| 4265 finally: | 4726 finally: |
| 4266 mupdf.ll_pdf_drop_page_tree( pdf.m_internal) | 4727 mupdf.ll_pdf_drop_page_tree( pdf.m_internal) |
| 4267 | 4728 |
| 4268 self._reset_page_refs() | 4729 self._reset_page_refs() |
| 4269 | 4730 |
| 4731 def get_char_widths( | |
| 4732 doc: 'Document', | |
| 4733 xref: int, | |
| 4734 limit: int = 256, | |
| 4735 idx: int = 0, | |
| 4736 fontdict: OptDict = None, | |
| 4737 ) -> list: | |
| 4738 """Get list of glyph information of a font. | |
| 4739 | |
| 4740 Notes: | |
| 4741 Must be provided by its XREF number. If we already dealt with the | |
| 4742 font, it will be recorded in doc.FontInfos. Otherwise we insert an | |
| 4743 entry there. | |
| 4744 Finally we return the glyphs for the font. This is a list of | |
| 4745 (glyph, width) where glyph is an integer controlling the char | |
| 4746 appearance, and width is a float controlling the char's spacing: | |
| 4747 width * fontsize is the actual space. | |
| 4748 For 'simple' fonts, glyph == ord(char) will usually be true. | |
| 4749 Exceptions are 'Symbol' and 'ZapfDingbats'. We are providing data for these directly here. | |
| 4750 """ | |
| 4751 fontinfo = CheckFontInfo(doc, xref) | |
| 4752 if fontinfo is None: # not recorded yet: create it | |
| 4753 if fontdict is None: | |
| 4754 name, ext, stype, asc, dsc = utils._get_font_properties(doc, xref) | |
| 4755 fontdict = { | |
| 4756 "name": name, | |
| 4757 "type": stype, | |
| 4758 "ext": ext, | |
| 4759 "ascender": asc, | |
| 4760 "descender": dsc, | |
| 4761 } | |
| 4762 else: | |
| 4763 name = fontdict["name"] | |
| 4764 ext = fontdict["ext"] | |
| 4765 stype = fontdict["type"] | |
| 4766 ordering = fontdict["ordering"] | |
| 4767 simple = fontdict["simple"] | |
| 4768 | |
| 4769 if ext == "": | |
| 4770 raise ValueError("xref is not a font") | |
| 4771 | |
| 4772 # check for 'simple' fonts | |
| 4773 if stype in ("Type1", "MMType1", "TrueType"): | |
| 4774 simple = True | |
| 4775 else: | |
| 4776 simple = False | |
| 4777 | |
| 4778 # check for CJK fonts | |
| 4779 if name in ("Fangti", "Ming"): | |
| 4780 ordering = 0 | |
| 4781 elif name in ("Heiti", "Song"): | |
| 4782 ordering = 1 | |
| 4783 elif name in ("Gothic", "Mincho"): | |
| 4784 ordering = 2 | |
| 4785 elif name in ("Dotum", "Batang"): | |
| 4786 ordering = 3 | |
| 4787 else: | |
| 4788 ordering = -1 | |
| 4789 | |
| 4790 fontdict["simple"] = simple | |
| 4791 | |
| 4792 if name == "ZapfDingbats": | |
| 4793 glyphs = zapf_glyphs | |
| 4794 elif name == "Symbol": | |
| 4795 glyphs = symbol_glyphs | |
| 4796 else: | |
| 4797 glyphs = None | |
| 4798 | |
| 4799 fontdict["glyphs"] = glyphs | |
| 4800 fontdict["ordering"] = ordering | |
| 4801 fontinfo = [xref, fontdict] | |
| 4802 doc.FontInfos.append(fontinfo) | |
| 4803 else: | |
| 4804 fontdict = fontinfo[1] | |
| 4805 glyphs = fontdict["glyphs"] | |
| 4806 simple = fontdict["simple"] | |
| 4807 ordering = fontdict["ordering"] | |
| 4808 | |
| 4809 if glyphs is None: | |
| 4810 oldlimit = 0 | |
| 4811 else: | |
| 4812 oldlimit = len(glyphs) | |
| 4813 | |
| 4814 mylimit = max(256, limit) | |
| 4815 | |
| 4816 if mylimit <= oldlimit: | |
| 4817 return glyphs | |
| 4818 | |
| 4819 if ordering < 0: # not a CJK font | |
| 4820 glyphs = doc._get_char_widths( | |
| 4821 xref, fontdict["name"], fontdict["ext"], fontdict["ordering"], mylimit, idx | |
| 4822 ) | |
| 4823 else: # CJK fonts use char codes and width = 1 | |
| 4824 glyphs = None | |
| 4825 | |
| 4826 fontdict["glyphs"] = glyphs | |
| 4827 fontinfo[1] = fontdict | |
| 4828 UpdateFontInfo(doc, fontinfo) | |
| 4829 | |
| 4830 return glyphs | |
| 4831 | |
| 4270 def get_layer(self, config=-1): | 4832 def get_layer(self, config=-1): |
| 4271 """Content of ON, OFF, RBGroups of an OC layer.""" | 4833 """Content of ON, OFF, RBGroups of an OC layer.""" |
| 4272 pdf = _as_pdf_document(self) | 4834 pdf = _as_pdf_document(self) |
| 4273 ocp = mupdf.pdf_dict_getl( | 4835 ocp = mupdf.pdf_dict_getl( |
| 4274 mupdf.pdf_trailer( pdf), | 4836 mupdf.pdf_trailer( pdf), |
| 4322 xref = 0 | 4884 xref = 0 |
| 4323 ENSURE_OPERATION(pdf) | 4885 ENSURE_OPERATION(pdf) |
| 4324 xref = mupdf.pdf_create_object(pdf) | 4886 xref = mupdf.pdf_create_object(pdf) |
| 4325 return xref | 4887 return xref |
| 4326 | 4888 |
| 4889 def get_oc(doc: 'Document', xref: int) -> int: | |
| 4890 """Return optional content object xref for an image or form xobject. | |
| 4891 | |
| 4892 Args: | |
| 4893 xref: (int) xref number of an image or form xobject. | |
| 4894 """ | |
| 4895 if doc.is_closed or doc.is_encrypted: | |
| 4896 raise ValueError("document close or encrypted") | |
| 4897 t, name = doc.xref_get_key(xref, "Subtype") | |
| 4898 if t != "name" or name not in ("/Image", "/Form"): | |
| 4899 raise ValueError("bad object type at xref %i" % xref) | |
| 4900 t, oc = doc.xref_get_key(xref, "OC") | |
| 4901 if t != "xref": | |
| 4902 return 0 | |
| 4903 rc = int(oc.replace("0 R", "")) | |
| 4904 return rc | |
| 4905 | |
| 4327 def get_ocgs(self): | 4906 def get_ocgs(self): |
| 4328 """Show existing optional content groups.""" | 4907 """Show existing optional content groups.""" |
| 4329 ci = mupdf.pdf_new_name( "CreatorInfo") | 4908 ci = mupdf.pdf_new_name( "CreatorInfo") |
| 4330 pdf = _as_pdf_document(self) | 4909 pdf = _as_pdf_document(self) |
| 4331 ocgs = mupdf.pdf_dict_getl( | 4910 ocgs = mupdf.pdf_dict_getl( |
| 4354 m = mupdf.pdf_array_len( intent) | 4933 m = mupdf.pdf_array_len( intent) |
| 4355 for j in range(m): | 4934 for j in range(m): |
| 4356 o = mupdf.pdf_array_get( intent, j) | 4935 o = mupdf.pdf_array_get( intent, j) |
| 4357 if mupdf.pdf_is_name( o): | 4936 if mupdf.pdf_is_name( o): |
| 4358 intents.append( mupdf.pdf_to_name( o)) | 4937 intents.append( mupdf.pdf_to_name( o)) |
| 4359 hidden = mupdf.pdf_is_ocg_hidden( pdf, mupdf.PdfObj(), usage, ocg) | 4938 if mupdf_version_tuple >= (1, 27): |
| 4939 resource_stack = mupdf.PdfResourceStack() | |
| 4940 hidden = mupdf.pdf_is_ocg_hidden( pdf, resource_stack, usage, ocg) | |
| 4941 else: | |
| 4942 hidden = mupdf.pdf_is_ocg_hidden( pdf, mupdf.PdfObj(), usage, ocg) | |
| 4360 item = { | 4943 item = { |
| 4361 "name": name, | 4944 "name": name, |
| 4362 "intent": intents, | 4945 "intent": intents, |
| 4363 "on": not hidden, | 4946 "on": not hidden, |
| 4364 "usage": usage, | 4947 "usage": usage, |
| 4365 } | 4948 } |
| 4366 temp = xref | 4949 temp = xref |
| 4367 rc[ temp] = item | 4950 rc[ temp] = item |
| 4368 return rc | 4951 return rc |
| 4952 | |
| 4953 def get_ocmd(doc: 'Document', xref: int) -> dict: | |
| 4954 """Return the definition of an OCMD (optional content membership dictionary). | |
| 4955 | |
| 4956 Recognizes PDF dict keys /OCGs (PDF array of OCGs), /P (policy string) and | |
| 4957 /VE (visibility expression, PDF array). Via string manipulation, this | |
| 4958 info is converted to a Python dictionary with keys "xref", "ocgs", "policy" | |
| 4959 and "ve" - ready to recycle as input for 'set_ocmd()'. | |
| 4960 """ | |
| 4961 | |
| 4962 if xref not in range(doc.xref_length()): | |
| 4963 raise ValueError("bad xref") | |
| 4964 text = doc.xref_object(xref, compressed=True) | |
| 4965 if "/Type/OCMD" not in text: | |
| 4966 raise ValueError("bad object type") | |
| 4967 textlen = len(text) | |
| 4968 | |
| 4969 p0 = text.find("/OCGs[") # look for /OCGs key | |
| 4970 p1 = text.find("]", p0) | |
| 4971 if p0 < 0 or p1 < 0: # no OCGs found | |
| 4972 ocgs = None | |
| 4973 else: | |
| 4974 ocgs = text[p0 + 6 : p1].replace("0 R", " ").split() | |
| 4975 ocgs = list(map(int, ocgs)) | |
| 4976 | |
| 4977 p0 = text.find("/P/") # look for /P policy key | |
| 4978 if p0 < 0: | |
| 4979 policy = None | |
| 4980 else: | |
| 4981 p1 = text.find("ff", p0) | |
| 4982 if p1 < 0: | |
| 4983 p1 = text.find("on", p0) | |
| 4984 if p1 < 0: # some irregular syntax | |
| 4985 raise ValueError("bad object at xref") | |
| 4986 else: | |
| 4987 policy = text[p0 + 3 : p1 + 2] | |
| 4988 | |
| 4989 p0 = text.find("/VE[") # look for /VE visibility expression key | |
| 4990 if p0 < 0: # no visibility expression found | |
| 4991 ve = None | |
| 4992 else: | |
| 4993 lp = rp = 0 # find end of /VE by finding last ']'. | |
| 4994 p1 = p0 | |
| 4995 while lp < 1 or lp != rp: | |
| 4996 p1 += 1 | |
| 4997 if not p1 < textlen: # some irregular syntax | |
| 4998 raise ValueError("bad object at xref") | |
| 4999 if text[p1] == "[": | |
| 5000 lp += 1 | |
| 5001 if text[p1] == "]": | |
| 5002 rp += 1 | |
| 5003 # p1 now positioned at the last "]" | |
| 5004 ve = text[p0 + 3 : p1 + 1] # the PDF /VE array | |
| 5005 ve = ( | |
| 5006 ve.replace("/And", '"and",') | |
| 5007 .replace("/Not", '"not",') | |
| 5008 .replace("/Or", '"or",') | |
| 5009 ) | |
| 5010 ve = ve.replace(" 0 R]", "]").replace(" 0 R", ",").replace("][", "],[") | |
| 5011 import json | |
| 5012 try: | |
| 5013 ve = json.loads(ve) | |
| 5014 except Exception: | |
| 5015 exception_info() | |
| 5016 message(f"bad /VE key: {ve!r}") | |
| 5017 raise | |
| 5018 return {"xref": xref, "ocgs": ocgs, "policy": policy, "ve": ve} | |
| 4369 | 5019 |
| 4370 def get_outline_xrefs(self): | 5020 def get_outline_xrefs(self): |
| 4371 """Get list of outline xref numbers.""" | 5021 """Get list of outline xref numbers.""" |
| 4372 xrefs = [] | 5022 xrefs = [] |
| 4373 pdf = _as_pdf_document(self, required=0) | 5023 pdf = _as_pdf_document(self, required=0) |
| 4413 val = self._getPageInfo(pno, 2) | 5063 val = self._getPageInfo(pno, 2) |
| 4414 if not full: | 5064 if not full: |
| 4415 return [v[:-1] for v in val] | 5065 return [v[:-1] for v in val] |
| 4416 return val | 5066 return val |
| 4417 | 5067 |
| 5068 def get_page_labels(self): | |
| 5069 """Return page label definitions in PDF document. | |
| 5070 | |
| 5071 Returns: | |
| 5072 A list of dictionaries with the following format: | |
| 5073 {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int}. | |
| 5074 """ | |
| 5075 # Jorj McKie, 2021-01-10 | |
| 5076 return [utils.rule_dict(item) for item in self._get_page_labels()] | |
| 5077 | |
| 5078 def get_page_numbers(doc, label, only_one=False): | |
| 5079 """Return a list of page numbers with the given label. | |
| 5080 | |
| 5081 Args: | |
| 5082 doc: PDF document object (resp. 'self'). | |
| 5083 label: (str) label. | |
| 5084 only_one: (bool) stop searching after first hit. | |
| 5085 Returns: | |
| 5086 List of page numbers having this label. | |
| 5087 """ | |
| 5088 # Jorj McKie, 2021-01-06 | |
| 5089 | |
| 5090 numbers = [] | |
| 5091 if not label: | |
| 5092 return numbers | |
| 5093 labels = doc._get_page_labels() | |
| 5094 if labels == []: | |
| 5095 return numbers | |
| 5096 for i in range(doc.page_count): | |
| 5097 plabel = utils.get_label_pno(i, labels) | |
| 5098 if plabel == label: | |
| 5099 numbers.append(i) | |
| 5100 if only_one: | |
| 5101 break | |
| 5102 return numbers | |
| 5103 | |
| 5104 def get_page_pixmap( | |
| 5105 doc: 'Document', | |
| 5106 pno: int, | |
| 5107 *, | |
| 5108 matrix: matrix_like = None, | |
| 5109 dpi=None, | |
| 5110 colorspace: Colorspace = None, | |
| 5111 clip: rect_like = None, | |
| 5112 alpha: bool = False, | |
| 5113 annots: bool = True, | |
| 5114 ) -> 'Pixmap': | |
| 5115 """Create pixmap of document page by page number. | |
| 5116 | |
| 5117 Notes: | |
| 5118 Convenience function calling page.get_pixmap. | |
| 5119 Args: | |
| 5120 pno: (int) page number | |
| 5121 matrix: pymupdf.Matrix for transformation (default: pymupdf.Identity). | |
| 5122 colorspace: (str,pymupdf.Colorspace) rgb, rgb, gray - case ignored, default csRGB. | |
| 5123 clip: (irect-like) restrict rendering to this area. | |
| 5124 alpha: (bool) include alpha channel | |
| 5125 annots: (bool) also render annotations | |
| 5126 """ | |
| 5127 if matrix is None: | |
| 5128 matrix = Identity | |
| 5129 if colorspace is None: | |
| 5130 colorspace = csRGB | |
| 5131 return doc[pno].get_pixmap( | |
| 5132 matrix=matrix, | |
| 5133 dpi=dpi, colorspace=colorspace, | |
| 5134 clip=clip, | |
| 5135 alpha=alpha, | |
| 5136 annots=annots | |
| 5137 ) | |
| 5138 | |
| 5139 def get_page_text( | |
| 5140 doc: 'Document', | |
| 5141 pno: int, | |
| 5142 option: str = "text", | |
| 5143 clip: rect_like = None, | |
| 5144 flags: OptInt = None, | |
| 5145 textpage: 'TextPage' = None, | |
| 5146 sort: bool = False, | |
| 5147 ) -> typing.Any: | |
| 5148 """Extract a document page's text by page number. | |
| 5149 | |
| 5150 Notes: | |
| 5151 Convenience function calling page.get_text(). | |
| 5152 Args: | |
| 5153 pno: page number | |
| 5154 option: (str) text, words, blocks, html, dict, json, rawdict, xhtml or xml. | |
| 5155 Returns: | |
| 5156 output from page.TextPage(). | |
| 5157 """ | |
| 5158 return doc[pno].get_text(option, clip=clip, flags=flags, sort=sort) | |
| 5159 | |
| 4418 def get_page_xobjects(self, pno: int) -> list: | 5160 def get_page_xobjects(self, pno: int) -> list: |
| 4419 """Retrieve a list of XObjects used on a page. | 5161 """Retrieve a list of XObjects used on a page. |
| 4420 """ | 5162 """ |
| 4421 if self.is_closed or self.is_encrypted: | 5163 if self.is_closed or self.is_encrypted: |
| 4422 raise ValueError("document closed or encrypted") | 5164 raise ValueError("document closed or encrypted") |
| 4439 sigflag = -1 | 5181 sigflag = -1 |
| 4440 if sigflags.m_internal: | 5182 if sigflags.m_internal: |
| 4441 sigflag = mupdf.pdf_to_int(sigflags) | 5183 sigflag = mupdf.pdf_to_int(sigflags) |
| 4442 return sigflag | 5184 return sigflag |
| 4443 | 5185 |
| 5186 def get_toc( | |
| 5187 doc: 'Document', | |
| 5188 simple: bool = True, | |
| 5189 ) -> list: | |
| 5190 """Create a table of contents. | |
| 5191 | |
| 5192 Args: | |
| 5193 simple: a bool to control output. Returns a list, where each entry consists of outline level, title, page number and link destination (if simple = False). For details see PyMuPDF's documentation. | |
| 5194 """ | |
| 5195 def recurse(olItem, liste, lvl): | |
| 5196 """Recursively follow the outline item chain and record item information in a list.""" | |
| 5197 while olItem and olItem.this.m_internal: | |
| 5198 if olItem.title: | |
| 5199 title = olItem.title | |
| 5200 else: | |
| 5201 title = " " | |
| 5202 | |
| 5203 if not olItem.is_external: | |
| 5204 if olItem.uri: | |
| 5205 if olItem.page == -1: | |
| 5206 resolve = doc.resolve_link(olItem.uri) | |
| 5207 page = resolve[0] + 1 | |
| 5208 else: | |
| 5209 page = olItem.page + 1 | |
| 5210 else: | |
| 5211 page = -1 | |
| 5212 else: | |
| 5213 page = -1 | |
| 5214 | |
| 5215 if not simple: | |
| 5216 link = utils.getLinkDict(olItem, doc) | |
| 5217 liste.append([lvl, title, page, link]) | |
| 5218 else: | |
| 5219 liste.append([lvl, title, page]) | |
| 5220 | |
| 5221 if olItem.down: | |
| 5222 liste = recurse(olItem.down, liste, lvl + 1) | |
| 5223 olItem = olItem.next | |
| 5224 return liste | |
| 5225 | |
| 5226 # ensure document is open | |
| 5227 if doc.is_closed: | |
| 5228 raise ValueError("document closed") | |
| 5229 doc.init_doc() | |
| 5230 olItem = doc.outline | |
| 5231 if not olItem: | |
| 5232 return [] | |
| 5233 lvl = 1 | |
| 5234 liste = [] | |
| 5235 toc = recurse(olItem, liste, lvl) | |
| 5236 if doc.is_pdf and not simple: | |
| 5237 doc._extend_toc_items(toc) | |
| 5238 return toc | |
| 5239 | |
| 4444 def get_xml_metadata(self): | 5240 def get_xml_metadata(self): |
| 4445 """Get document XML metadata.""" | 5241 """Get document XML metadata.""" |
| 4446 xml = None | 5242 xml = None |
| 4447 pdf = _as_pdf_document(self, required=0) | 5243 pdf = _as_pdf_document(self, required=0) |
| 4448 if pdf.m_internal: | 5244 if pdf.m_internal: |
| 4456 rc = JM_UnicodeFromBuffer(buff) | 5252 rc = JM_UnicodeFromBuffer(buff) |
| 4457 else: | 5253 else: |
| 4458 rc = '' | 5254 rc = '' |
| 4459 return rc | 5255 return rc |
| 4460 | 5256 |
| 5257 def has_annots(doc: 'Document') -> bool: | |
| 5258 """Check whether there are annotations on any page.""" | |
| 5259 if doc.is_closed: | |
| 5260 raise ValueError("document closed") | |
| 5261 if not doc.is_pdf: | |
| 5262 raise ValueError("is no PDF") | |
| 5263 for i in range(doc.page_count): | |
| 5264 for item in doc.page_annot_xrefs(i): | |
| 5265 # pylint: disable=no-member | |
| 5266 if not (item[1] == mupdf.PDF_ANNOT_LINK or item[1] == mupdf.PDF_ANNOT_WIDGET): # pylint: disable=no-member | |
| 5267 return True | |
| 5268 return False | |
| 5269 | |
| 5270 def has_links(doc: 'Document') -> bool: | |
| 5271 """Check whether there are links on any page.""" | |
| 5272 if doc.is_closed: | |
| 5273 raise ValueError("document closed") | |
| 5274 if not doc.is_pdf: | |
| 5275 raise ValueError("is no PDF") | |
| 5276 for i in range(doc.page_count): | |
| 5277 for item in doc.page_annot_xrefs(i): | |
| 5278 if item[1] == mupdf.PDF_ANNOT_LINK: # pylint: disable=no-member | |
| 5279 return True | |
| 5280 return False | |
| 5281 | |
| 4461 def init_doc(self): | 5282 def init_doc(self): |
| 4462 if self.is_encrypted: | 5283 if self.is_encrypted: |
| 4463 raise ValueError("cannot initialize - document still encrypted") | 5284 raise ValueError("cannot initialize - document still encrypted") |
| 4464 self._outline = self._loadOutline() | 5285 self._outline = self._loadOutline() |
| 4465 self.metadata = dict( | 5286 self.metadata = dict( |
| 4521 annots=annots, | 5342 annots=annots, |
| 4522 show_progress=show_progress, | 5343 show_progress=show_progress, |
| 4523 final=final, | 5344 final=final, |
| 4524 ) | 5345 ) |
| 4525 | 5346 |
| 5347 def insert_page( | |
| 5348 doc: 'Document', | |
| 5349 pno: int, | |
| 5350 text: typing.Union[str, list, None] = None, | |
| 5351 fontsize: float = 11, | |
| 5352 width: float = 595, | |
| 5353 height: float = 842, | |
| 5354 fontname: str = "helv", | |
| 5355 fontfile: OptStr = None, | |
| 5356 color: OptSeq = (0,), | |
| 5357 ) -> int: | |
| 5358 """Create a new PDF page and insert some text. | |
| 5359 | |
| 5360 Notes: | |
| 5361 Function combining pymupdf.Document.new_page() and pymupdf.Page.insert_text(). | |
| 5362 For parameter details see these methods. | |
| 5363 """ | |
| 5364 page = doc.new_page(pno=pno, width=width, height=height) | |
| 5365 if not bool(text): | |
| 5366 return 0 | |
| 5367 rc = page.insert_text( | |
| 5368 (50, 72), | |
| 5369 text, | |
| 5370 fontsize=fontsize, | |
| 5371 fontname=fontname, | |
| 5372 fontfile=fontfile, | |
| 5373 color=color, | |
| 5374 ) | |
| 5375 return rc | |
| 5376 | |
| 4526 def insert_pdf( | 5377 def insert_pdf( |
| 4527 self, | 5378 self, |
| 4528 docsrc, | 5379 docsrc, |
| 4529 *, | 5380 *, |
| 4530 from_page=-1, | 5381 from_page=-1, |
| 5021 raise ValueError("document closed") | 5872 raise ValueError("document closed") |
| 5022 document = self.this if isinstance(self.this, mupdf.FzDocument) else self.this.super() | 5873 document = self.this if isinstance(self.this, mupdf.FzDocument) else self.this.super() |
| 5023 ret = mupdf.fz_needs_password( document) | 5874 ret = mupdf.fz_needs_password( document) |
| 5024 return ret | 5875 return ret |
| 5025 | 5876 |
| 5877 def new_page( | |
| 5878 doc: 'Document', | |
| 5879 pno: int = -1, | |
| 5880 width: float = 595, | |
| 5881 height: float = 842, | |
| 5882 ) -> Page: | |
| 5883 """Create and return a new page object. | |
| 5884 | |
| 5885 Args: | |
| 5886 pno: (int) insert before this page. Default: after last page. | |
| 5887 width: (float) page width in points. Default: 595 (ISO A4 width). | |
| 5888 height: (float) page height in points. Default 842 (ISO A4 height). | |
| 5889 Returns: | |
| 5890 A pymupdf.Page object. | |
| 5891 """ | |
| 5892 doc._newPage(pno, width=width, height=height) | |
| 5893 return doc[pno] | |
| 5894 | |
| 5026 def next_location(self, page_id): | 5895 def next_location(self, page_id): |
| 5027 """Get (chapter, page) of next page.""" | 5896 """Get (chapter, page) of next page.""" |
| 5028 if self.is_closed or self.is_encrypted: | 5897 if self.is_closed or self.is_encrypted: |
| 5029 raise ValueError("document closed or encrypted") | 5898 raise ValueError("document closed or encrypted") |
| 5030 if type(page_id) is int: | 5899 if type(page_id) is int: |
| 5667 | 6536 |
| 5668 def saveIncr(self): | 6537 def saveIncr(self): |
| 5669 """ Save PDF incrementally""" | 6538 """ Save PDF incrementally""" |
| 5670 return self.save(self.name, incremental=True, encryption=mupdf.PDF_ENCRYPT_KEEP) | 6539 return self.save(self.name, incremental=True, encryption=mupdf.PDF_ENCRYPT_KEEP) |
| 5671 | 6540 |
| 6541 # ------------------------------------------------------------------------------ | |
| 6542 # Remove potentially sensitive data from a PDF. Similar to the Adobe | |
| 6543 # Acrobat 'sanitize' function | |
| 6544 # ------------------------------------------------------------------------------ | |
| 6545 def scrub( | |
| 6546 doc: 'Document', | |
| 6547 attached_files: bool = True, | |
| 6548 clean_pages: bool = True, | |
| 6549 embedded_files: bool = True, | |
| 6550 hidden_text: bool = True, | |
| 6551 javascript: bool = True, | |
| 6552 metadata: bool = True, | |
| 6553 redactions: bool = True, | |
| 6554 redact_images: int = 0, | |
| 6555 remove_links: bool = True, | |
| 6556 reset_fields: bool = True, | |
| 6557 reset_responses: bool = True, | |
| 6558 thumbnails: bool = True, | |
| 6559 xml_metadata: bool = True, | |
| 6560 ) -> None: | |
| 6561 | |
| 6562 def remove_hidden(cont_lines): | |
| 6563 """Remove hidden text from a PDF page. | |
| 6564 | |
| 6565 Args: | |
| 6566 cont_lines: list of lines with /Contents content. Should have status | |
| 6567 from after page.cleanContents(). | |
| 6568 | |
| 6569 Returns: | |
| 6570 List of /Contents lines from which hidden text has been removed. | |
| 6571 | |
| 6572 Notes: | |
| 6573 The input must have been created after the page's /Contents object(s) | |
| 6574 have been cleaned with page.cleanContents(). This ensures a standard | |
| 6575 formatting: one command per line, single spaces between operators. | |
| 6576 This allows for drastic simplification of this code. | |
| 6577 """ | |
| 6578 out_lines = [] # will return this | |
| 6579 in_text = False # indicate if within BT/ET object | |
| 6580 suppress = False # indicate text suppression active | |
| 6581 make_return = False | |
| 6582 for line in cont_lines: | |
| 6583 if line == b"BT": # start of text object | |
| 6584 in_text = True # switch on | |
| 6585 out_lines.append(line) # output it | |
| 6586 continue | |
| 6587 if line == b"ET": # end of text object | |
| 6588 in_text = False # switch off | |
| 6589 out_lines.append(line) # output it | |
| 6590 continue | |
| 6591 if line == b"3 Tr": # text suppression operator | |
| 6592 suppress = True # switch on | |
| 6593 make_return = True | |
| 6594 continue | |
| 6595 if line[-2:] == b"Tr" and line[0] != b"3": | |
| 6596 suppress = False # text rendering changed | |
| 6597 out_lines.append(line) | |
| 6598 continue | |
| 6599 if line == b"Q": # unstack command also switches off | |
| 6600 suppress = False | |
| 6601 out_lines.append(line) | |
| 6602 continue | |
| 6603 if suppress and in_text: # suppress hidden lines | |
| 6604 continue | |
| 6605 out_lines.append(line) | |
| 6606 if make_return: | |
| 6607 return out_lines | |
| 6608 else: | |
| 6609 return None | |
| 6610 | |
| 6611 if not doc.is_pdf: # only works for PDF | |
| 6612 raise ValueError("is no PDF") | |
| 6613 if doc.is_encrypted or doc.is_closed: | |
| 6614 raise ValueError("closed or encrypted doc") | |
| 6615 | |
| 6616 if not clean_pages: | |
| 6617 hidden_text = False | |
| 6618 redactions = False | |
| 6619 | |
| 6620 if metadata: | |
| 6621 doc.set_metadata({}) # remove standard metadata | |
| 6622 | |
| 6623 for page in doc: | |
| 6624 if reset_fields: | |
| 6625 # reset form fields (widgets) | |
| 6626 for widget in page.widgets(): | |
| 6627 widget.reset() | |
| 6628 | |
| 6629 if remove_links: | |
| 6630 links = page.get_links() # list of all links on page | |
| 6631 for link in links: # remove all links | |
| 6632 page.delete_link(link) | |
| 6633 | |
| 6634 found_redacts = False | |
| 6635 for annot in page.annots(): | |
| 6636 if annot.type[0] == mupdf.PDF_ANNOT_FILE_ATTACHMENT and attached_files: | |
| 6637 annot.update_file(buffer_=b" ") # set file content to empty | |
| 6638 if reset_responses: | |
| 6639 annot.delete_responses() | |
| 6640 if annot.type[0] == mupdf.PDF_ANNOT_REDACT: # pylint: disable=no-member | |
| 6641 found_redacts = True | |
| 6642 | |
| 6643 if redactions and found_redacts: | |
| 6644 page.apply_redactions(images=redact_images) | |
| 6645 | |
| 6646 if not (clean_pages or hidden_text): | |
| 6647 continue # done with the page | |
| 6648 | |
| 6649 page.clean_contents() | |
| 6650 if not page.get_contents(): | |
| 6651 continue | |
| 6652 if hidden_text: | |
| 6653 xrefs = page.get_contents() | |
| 6654 assert len(xrefs) == 1 # only one because of cleaning. | |
| 6655 xref = xrefs[0] | |
| 6656 cont = doc.xref_stream(xref) | |
| 6657 cont_lines = remove_hidden(cont.splitlines()) # remove hidden text | |
| 6658 if cont_lines: # something was actually removed | |
| 6659 cont = b"\n".join(cont_lines) | |
| 6660 doc.update_stream(xref, cont) # rewrite the page /Contents | |
| 6661 | |
| 6662 if thumbnails: # remove page thumbnails? | |
| 6663 if doc.xref_get_key(page.xref, "Thumb")[0] != "null": | |
| 6664 doc.xref_set_key(page.xref, "Thumb", "null") | |
| 6665 | |
| 6666 # pages are scrubbed, now perform document-wide scrubbing | |
| 6667 # remove embedded files | |
| 6668 if embedded_files: | |
| 6669 for name in doc.embfile_names(): | |
| 6670 doc.embfile_del(name) | |
| 6671 | |
| 6672 if xml_metadata: | |
| 6673 doc.del_xml_metadata() | |
| 6674 if not (xml_metadata or javascript): | |
| 6675 xref_limit = 0 | |
| 6676 else: | |
| 6677 xref_limit = doc.xref_length() | |
| 6678 for xref in range(1, xref_limit): | |
| 6679 if not doc.xref_object(xref): | |
| 6680 msg = "bad xref %i - clean PDF before scrubbing" % xref | |
| 6681 raise ValueError(msg) | |
| 6682 if javascript and doc.xref_get_key(xref, "S")[1] == "/JavaScript": | |
| 6683 # a /JavaScript action object | |
| 6684 obj = "<</S/JavaScript/JS()>>" # replace with a null JavaScript | |
| 6685 doc.update_object(xref, obj) # update this object | |
| 6686 continue # no further handling | |
| 6687 | |
| 6688 if not xml_metadata: | |
| 6689 continue | |
| 6690 | |
| 6691 if doc.xref_get_key(xref, "Type")[1] == "/Metadata": | |
| 6692 # delete any metadata object directly | |
| 6693 doc.update_object(xref, "<<>>") | |
| 6694 doc.update_stream(xref, b"deleted", new=True) | |
| 6695 continue | |
| 6696 | |
| 6697 if doc.xref_get_key(xref, "Metadata")[0] != "null": | |
| 6698 doc.xref_set_key(xref, "Metadata", "null") | |
| 6699 | |
| 6700 def search_page_for( | |
| 6701 doc: 'Document', | |
| 6702 pno: int, | |
| 6703 text: str, | |
| 6704 quads: bool = False, | |
| 6705 clip: rect_like = None, | |
| 6706 flags: int = None, | |
| 6707 textpage: 'TextPage' = None, | |
| 6708 ) -> list: | |
| 6709 """Search for a string on a page. | |
| 6710 | |
| 6711 Args: | |
| 6712 pno: page number | |
| 6713 text: string to be searched for | |
| 6714 clip: restrict search to this rectangle | |
| 6715 quads: (bool) return quads instead of rectangles | |
| 6716 flags: bit switches, default: join hyphened words | |
| 6717 textpage: reuse a prepared textpage | |
| 6718 Returns: | |
| 6719 a list of rectangles or quads, each containing an occurrence. | |
| 6720 """ | |
| 6721 if flags is None: | |
| 6722 flags = (0 | |
| 6723 | TEXT_DEHYPHENATE | |
| 6724 | TEXT_PRESERVE_LIGATURES | |
| 6725 | TEXT_PRESERVE_WHITESPACE | |
| 6726 | TEXT_MEDIABOX_CLIP | |
| 6727 ) | |
| 6728 return doc[pno].search_for( | |
| 6729 text, | |
| 6730 quads=quads, | |
| 6731 clip=clip, | |
| 6732 flags=flags, | |
| 6733 textpage=textpage, | |
| 6734 ) | |
| 6735 | |
| 5672 def select(self, pyliste): | 6736 def select(self, pyliste): |
| 5673 """Build sub-pdf with page numbers in the list.""" | 6737 """Build sub-pdf with page numbers in the list.""" |
| 5674 if self.is_closed or self.is_encrypted: | 6738 if self.is_closed or self.is_encrypted: |
| 5675 raise ValueError("document closed or encrypted") | 6739 raise ValueError("document closed or encrypted") |
| 5676 if not self.is_pdf: | 6740 if not self.is_pdf: |
| 5811 pdfdict += f"/{key} {value}" | 6875 pdfdict += f"/{key} {value}" |
| 5812 pdfdict += ">>" | 6876 pdfdict += ">>" |
| 5813 self.xref_set_key(xref, "MarkInfo", pdfdict) | 6877 self.xref_set_key(xref, "MarkInfo", pdfdict) |
| 5814 return True | 6878 return True |
| 5815 | 6879 |
| 6880 def set_metadata(doc: 'Document', m: dict = None) -> None: | |
| 6881 """Update the PDF /Info object. | |
| 6882 | |
| 6883 Args: | |
| 6884 m: a dictionary like doc.metadata. | |
| 6885 """ | |
| 6886 if not doc.is_pdf: | |
| 6887 raise ValueError("is no PDF") | |
| 6888 if doc.is_closed or doc.is_encrypted: | |
| 6889 raise ValueError("document closed or encrypted") | |
| 6890 if m is None: | |
| 6891 m = {} | |
| 6892 elif type(m) is not dict: | |
| 6893 raise ValueError("bad metadata") | |
| 6894 keymap = { | |
| 6895 "author": "Author", | |
| 6896 "producer": "Producer", | |
| 6897 "creator": "Creator", | |
| 6898 "title": "Title", | |
| 6899 "format": None, | |
| 6900 "encryption": None, | |
| 6901 "creationDate": "CreationDate", | |
| 6902 "modDate": "ModDate", | |
| 6903 "subject": "Subject", | |
| 6904 "keywords": "Keywords", | |
| 6905 "trapped": "Trapped", | |
| 6906 } | |
| 6907 valid_keys = set(keymap.keys()) | |
| 6908 diff_set = set(m.keys()).difference(valid_keys) | |
| 6909 if diff_set != set(): | |
| 6910 msg = "bad dict key(s): %s" % diff_set | |
| 6911 raise ValueError(msg) | |
| 6912 | |
| 6913 t, temp = doc.xref_get_key(-1, "Info") | |
| 6914 if t != "xref": | |
| 6915 info_xref = 0 | |
| 6916 else: | |
| 6917 info_xref = int(temp.replace("0 R", "")) | |
| 6918 | |
| 6919 if m == {} and info_xref == 0: # nothing to do | |
| 6920 return | |
| 6921 | |
| 6922 if info_xref == 0: # no prev metadata: get new xref | |
| 6923 info_xref = doc.get_new_xref() | |
| 6924 doc.update_object(info_xref, "<<>>") # fill it with empty object | |
| 6925 doc.xref_set_key(-1, "Info", "%i 0 R" % info_xref) | |
| 6926 elif m == {}: # remove existing metadata | |
| 6927 doc.xref_set_key(-1, "Info", "null") | |
| 6928 doc.init_doc() | |
| 6929 return | |
| 6930 | |
| 6931 for key, val in [(k, v) for k, v in m.items() if keymap[k] is not None]: | |
| 6932 pdf_key = keymap[key] | |
| 6933 if not bool(val) or val in ("none", "null"): | |
| 6934 val = "null" | |
| 6935 else: | |
| 6936 val = get_pdf_str(val) | |
| 6937 doc.xref_set_key(info_xref, pdf_key, val) | |
| 6938 doc.init_doc() | |
| 6939 return | |
| 6940 | |
| 6941 def set_oc(doc: 'Document', xref: int, oc: int) -> None: | |
| 6942 """Attach optional content object to image or form xobject. | |
| 6943 | |
| 6944 Args: | |
| 6945 xref: (int) xref number of an image or form xobject | |
| 6946 oc: (int) xref number of an OCG or OCMD | |
| 6947 """ | |
| 6948 if doc.is_closed or doc.is_encrypted: | |
| 6949 raise ValueError("document close or encrypted") | |
| 6950 t, name = doc.xref_get_key(xref, "Subtype") | |
| 6951 if t != "name" or name not in ("/Image", "/Form"): | |
| 6952 raise ValueError("bad object type at xref %i" % xref) | |
| 6953 if oc > 0: | |
| 6954 t, name = doc.xref_get_key(oc, "Type") | |
| 6955 if t != "name" or name not in ("/OCG", "/OCMD"): | |
| 6956 raise ValueError("bad object type at xref %i" % oc) | |
| 6957 if oc == 0 and "OC" in doc.xref_get_keys(xref): | |
| 6958 doc.xref_set_key(xref, "OC", "null") | |
| 6959 return None | |
| 6960 doc.xref_set_key(xref, "OC", "%i 0 R" % oc) | |
| 6961 return None | |
| 6962 | |
| 6963 def set_ocmd( | |
| 6964 doc: 'Document', | |
| 6965 xref: int = 0, | |
| 6966 ocgs: typing.Union[list, None] = None, | |
| 6967 policy: OptStr = None, | |
| 6968 ve: typing.Union[list, None] = None, | |
| 6969 ) -> int: | |
| 6970 """Create or update an OCMD object in a PDF document. | |
| 6971 | |
| 6972 Args: | |
| 6973 xref: (int) 0 for creating a new object, otherwise update existing one. | |
| 6974 ocgs: (list) OCG xref numbers, which shall be subject to 'policy'. | |
| 6975 policy: one of 'AllOn', 'AllOff', 'AnyOn', 'AnyOff' (any casing). | |
| 6976 ve: (list) visibility expression. Use instead of 'ocgs' with 'policy'. | |
| 6977 | |
| 6978 Returns: | |
| 6979 Xref of the created or updated OCMD. | |
| 6980 """ | |
| 6981 | |
| 6982 all_ocgs = set(doc.get_ocgs().keys()) | |
| 6983 | |
| 6984 def ve_maker(ve): | |
| 6985 if type(ve) not in (list, tuple) or len(ve) < 2: | |
| 6986 raise ValueError("bad 've' format: %s" % ve) | |
| 6987 if ve[0].lower() not in ("and", "or", "not"): | |
| 6988 raise ValueError("bad operand: %s" % ve[0]) | |
| 6989 if ve[0].lower() == "not" and len(ve) != 2: | |
| 6990 raise ValueError("bad 've' format: %s" % ve) | |
| 6991 item = "[/%s" % ve[0].title() | |
| 6992 for x in ve[1:]: | |
| 6993 if type(x) is int: | |
| 6994 if x not in all_ocgs: | |
| 6995 raise ValueError("bad OCG %i" % x) | |
| 6996 item += " %i 0 R" % x | |
| 6997 else: | |
| 6998 item += " %s" % ve_maker(x) | |
| 6999 item += "]" | |
| 7000 return item | |
| 7001 | |
| 7002 text = "<</Type/OCMD" | |
| 7003 | |
| 7004 if ocgs and type(ocgs) in (list, tuple): # some OCGs are provided | |
| 7005 s = set(ocgs).difference(all_ocgs) # contains illegal xrefs | |
| 7006 if s != set(): | |
| 7007 msg = "bad OCGs: %s" % s | |
| 7008 raise ValueError(msg) | |
| 7009 text += "/OCGs[" + " ".join(map(lambda x: "%i 0 R" % x, ocgs)) + "]" | |
| 7010 | |
| 7011 if policy: | |
| 7012 policy = str(policy).lower() | |
| 7013 pols = { | |
| 7014 "anyon": "AnyOn", | |
| 7015 "allon": "AllOn", | |
| 7016 "anyoff": "AnyOff", | |
| 7017 "alloff": "AllOff", | |
| 7018 } | |
| 7019 if policy not in ("anyon", "allon", "anyoff", "alloff"): | |
| 7020 raise ValueError("bad policy: %s" % policy) | |
| 7021 text += "/P/%s" % pols[policy] | |
| 7022 | |
| 7023 if ve: | |
| 7024 text += "/VE%s" % ve_maker(ve) | |
| 7025 | |
| 7026 text += ">>" | |
| 7027 | |
| 7028 # make new object or replace old OCMD (check type first) | |
| 7029 if xref == 0: | |
| 7030 xref = doc.get_new_xref() | |
| 7031 elif "/Type/OCMD" not in doc.xref_object(xref, compressed=True): | |
| 7032 raise ValueError("bad xref or not an OCMD") | |
| 7033 doc.update_object(xref, text) | |
| 7034 return xref | |
| 7035 | |
| 5816 def set_pagelayout(self, pagelayout: str): | 7036 def set_pagelayout(self, pagelayout: str): |
| 5817 """Set the PDF PageLayout value.""" | 7037 """Set the PDF PageLayout value.""" |
| 5818 valid = ("SinglePage", "OneColumn", "TwoColumnLeft", "TwoColumnRight", "TwoPageLeft", "TwoPageRight") | 7038 valid = ("SinglePage", "OneColumn", "TwoColumnLeft", "TwoColumnRight", "TwoPageLeft", "TwoPageRight") |
| 5819 xref = self.pdf_catalog() | 7039 xref = self.pdf_catalog() |
| 5820 if xref == 0: | 7040 if xref == 0: |
| 5842 for v in valid: | 7062 for v in valid: |
| 5843 if pagemode.lower() == v.lower(): | 7063 if pagemode.lower() == v.lower(): |
| 5844 self.xref_set_key(xref, "PageMode", f"/{v}") | 7064 self.xref_set_key(xref, "PageMode", f"/{v}") |
| 5845 return True | 7065 return True |
| 5846 raise ValueError("bad PageMode value") | 7066 raise ValueError("bad PageMode value") |
| 7067 | |
| 7068 def set_page_labels(doc, labels): | |
| 7069 """Add / replace page label definitions in PDF document. | |
| 7070 | |
| 7071 Args: | |
| 7072 doc: PDF document (resp. 'self'). | |
| 7073 labels: list of label dictionaries like: | |
| 7074 {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int}, | |
| 7075 as returned by get_page_labels(). | |
| 7076 """ | |
| 7077 # William Chapman, 2021-01-06 | |
| 7078 | |
| 7079 def create_label_str(label): | |
| 7080 """Convert Python label dict to corresponding PDF rule string. | |
| 7081 | |
| 7082 Args: | |
| 7083 label: (dict) build rule for the label. | |
| 7084 Returns: | |
| 7085 PDF label rule string wrapped in "<<", ">>". | |
| 7086 """ | |
| 7087 s = "%i<<" % label["startpage"] | |
| 7088 if label.get("prefix", "") != "": | |
| 7089 s += "/P(%s)" % label["prefix"] | |
| 7090 if label.get("style", "") != "": | |
| 7091 s += "/S/%s" % label["style"] | |
| 7092 if label.get("firstpagenum", 1) > 1: | |
| 7093 s += "/St %i" % label["firstpagenum"] | |
| 7094 s += ">>" | |
| 7095 return s | |
| 7096 | |
| 7097 def create_nums(labels): | |
| 7098 """Return concatenated string of all labels rules. | |
| 7099 | |
| 7100 Args: | |
| 7101 labels: (list) dictionaries as created by function 'rule_dict'. | |
| 7102 Returns: | |
| 7103 PDF compatible string for page label definitions, ready to be | |
| 7104 enclosed in PDF array 'Nums[...]'. | |
| 7105 """ | |
| 7106 labels.sort(key=lambda x: x["startpage"]) | |
| 7107 s = "".join([create_label_str(label) for label in labels]) | |
| 7108 return s | |
| 7109 | |
| 7110 doc._set_page_labels(create_nums(labels)) | |
| 7111 | |
| 7112 def set_toc( | |
| 7113 doc: 'Document', | |
| 7114 toc: list, | |
| 7115 collapse: int = 1, | |
| 7116 ) -> int: | |
| 7117 """Create new outline tree (table of contents, TOC). | |
| 7118 | |
| 7119 Args: | |
| 7120 toc: (list, tuple) each entry must contain level, title, page and | |
| 7121 optionally top margin on the page. None or '()' remove the TOC. | |
| 7122 collapse: (int) collapses entries beyond this level. Zero or None | |
| 7123 shows all entries unfolded. | |
| 7124 Returns: | |
| 7125 the number of inserted items, or the number of removed items respectively. | |
| 7126 """ | |
| 7127 if doc.is_closed or doc.is_encrypted: | |
| 7128 raise ValueError("document closed or encrypted") | |
| 7129 if not doc.is_pdf: | |
| 7130 raise ValueError("is no PDF") | |
| 7131 if not toc: # remove all entries | |
| 7132 return len(doc._delToC()) | |
| 7133 | |
| 7134 # validity checks -------------------------------------------------------- | |
| 7135 if type(toc) not in (list, tuple): | |
| 7136 raise ValueError("'toc' must be list or tuple") | |
| 7137 toclen = len(toc) | |
| 7138 page_count = doc.page_count | |
| 7139 t0 = toc[0] | |
| 7140 if type(t0) not in (list, tuple): | |
| 7141 raise ValueError("items must be sequences of 3 or 4 items") | |
| 7142 if t0[0] != 1: | |
| 7143 raise ValueError("hierarchy level of item 0 must be 1") | |
| 7144 for i in list(range(toclen - 1)): | |
| 7145 t1 = toc[i] | |
| 7146 t2 = toc[i + 1] | |
| 7147 if not -1 <= t1[2] <= page_count: | |
| 7148 raise ValueError("row %i: page number out of range" % i) | |
| 7149 if (type(t2) not in (list, tuple)) or len(t2) not in (3, 4): | |
| 7150 raise ValueError("bad row %i" % (i + 1)) | |
| 7151 if (type(t2[0]) is not int) or t2[0] < 1: | |
| 7152 raise ValueError("bad hierarchy level in row %i" % (i + 1)) | |
| 7153 if t2[0] > t1[0] + 1: | |
| 7154 raise ValueError("bad hierarchy level in row %i" % (i + 1)) | |
| 7155 # no formal errors in toc -------------------------------------------------- | |
| 7156 | |
| 7157 # -------------------------------------------------------------------------- | |
| 7158 # make a list of xref numbers, which we can use for our TOC entries | |
| 7159 # -------------------------------------------------------------------------- | |
| 7160 old_xrefs = doc._delToC() # del old outlines, get their xref numbers | |
| 7161 | |
| 7162 # prepare table of xrefs for new bookmarks | |
| 7163 old_xrefs = [] | |
| 7164 xref = [0] + old_xrefs | |
| 7165 xref[0] = doc._getOLRootNumber() # entry zero is outline root xref number | |
| 7166 if toclen > len(old_xrefs): # too few old xrefs? | |
| 7167 for i in range((toclen - len(old_xrefs))): | |
| 7168 xref.append(doc.get_new_xref()) # acquire new ones | |
| 7169 | |
| 7170 lvltab = {0: 0} # to store last entry per hierarchy level | |
| 7171 | |
| 7172 # ------------------------------------------------------------------------------ | |
| 7173 # contains new outline objects as strings - first one is the outline root | |
| 7174 # ------------------------------------------------------------------------------ | |
| 7175 olitems = [{"count": 0, "first": -1, "last": -1, "xref": xref[0]}] | |
| 7176 # ------------------------------------------------------------------------------ | |
| 7177 # build olitems as a list of PDF-like connected dictionaries | |
| 7178 # ------------------------------------------------------------------------------ | |
| 7179 for i in range(toclen): | |
| 7180 o = toc[i] | |
| 7181 lvl = o[0] # level | |
| 7182 title = get_pdf_str(o[1]) # title | |
| 7183 pno = min(doc.page_count - 1, max(0, o[2] - 1)) # page number | |
| 7184 page_xref = doc.page_xref(pno) | |
| 7185 page_height = doc.page_cropbox(pno).height | |
| 7186 top = Point(72, page_height - 36) | |
| 7187 dest_dict = {"to": top, "kind": LINK_GOTO} # fall back target | |
| 7188 if o[2] < 0: | |
| 7189 dest_dict["kind"] = LINK_NONE | |
| 7190 if len(o) > 3: # some target is specified | |
| 7191 if type(o[3]) in (int, float): # convert a number to a point | |
| 7192 dest_dict["to"] = Point(72, page_height - o[3]) | |
| 7193 else: # if something else, make sure we have a dict | |
| 7194 # We make a copy of o[3] to avoid modifying our caller's data. | |
| 7195 dest_dict = o[3].copy() if type(o[3]) is dict else dest_dict | |
| 7196 if "to" not in dest_dict: # target point not in dict? | |
| 7197 dest_dict["to"] = top # put default in | |
| 7198 else: # transform target to PDF coordinates | |
| 7199 page = doc[pno] | |
| 7200 point = Point(dest_dict["to"]) | |
| 7201 point.y = page.cropbox.height - point.y | |
| 7202 point = point * page.rotation_matrix | |
| 7203 dest_dict["to"] = (point.x, point.y) | |
| 7204 d = {} | |
| 7205 d["first"] = -1 | |
| 7206 d["count"] = 0 | |
| 7207 d["last"] = -1 | |
| 7208 d["prev"] = -1 | |
| 7209 d["next"] = -1 | |
| 7210 d["dest"] = utils.getDestStr(page_xref, dest_dict) | |
| 7211 d["top"] = dest_dict["to"] | |
| 7212 d["title"] = title | |
| 7213 d["parent"] = lvltab[lvl - 1] | |
| 7214 d["xref"] = xref[i + 1] | |
| 7215 d["color"] = dest_dict.get("color") | |
| 7216 d["flags"] = dest_dict.get("italic", 0) + 2 * dest_dict.get("bold", 0) | |
| 7217 lvltab[lvl] = i + 1 | |
| 7218 parent = olitems[lvltab[lvl - 1]] # the parent entry | |
| 7219 | |
| 7220 if ( | |
| 7221 dest_dict.get("collapse") or collapse and lvl > collapse | |
| 7222 ): # suppress expansion | |
| 7223 parent["count"] -= 1 # make /Count negative | |
| 7224 else: | |
| 7225 parent["count"] += 1 # positive /Count | |
| 7226 | |
| 7227 if parent["first"] == -1: | |
| 7228 parent["first"] = i + 1 | |
| 7229 parent["last"] = i + 1 | |
| 7230 else: | |
| 7231 d["prev"] = parent["last"] | |
| 7232 prev = olitems[parent["last"]] | |
| 7233 prev["next"] = i + 1 | |
| 7234 parent["last"] = i + 1 | |
| 7235 olitems.append(d) | |
| 7236 | |
| 7237 # ------------------------------------------------------------------------------ | |
| 7238 # now create each outline item as a string and insert it in the PDF | |
| 7239 # ------------------------------------------------------------------------------ | |
| 7240 for i, ol in enumerate(olitems): | |
| 7241 txt = "<<" | |
| 7242 if ol["count"] != 0: | |
| 7243 txt += "/Count %i" % ol["count"] | |
| 7244 try: | |
| 7245 txt += ol["dest"] | |
| 7246 except Exception: | |
| 7247 # Verbose in PyMuPDF/tests. | |
| 7248 if g_exceptions_verbose >= 2: exception_info() | |
| 7249 pass | |
| 7250 try: | |
| 7251 if ol["first"] > -1: | |
| 7252 txt += "/First %i 0 R" % xref[ol["first"]] | |
| 7253 except Exception: | |
| 7254 if g_exceptions_verbose >= 2: exception_info() | |
| 7255 pass | |
| 7256 try: | |
| 7257 if ol["last"] > -1: | |
| 7258 txt += "/Last %i 0 R" % xref[ol["last"]] | |
| 7259 except Exception: | |
| 7260 if g_exceptions_verbose >= 2: exception_info() | |
| 7261 pass | |
| 7262 try: | |
| 7263 if ol["next"] > -1: | |
| 7264 txt += "/Next %i 0 R" % xref[ol["next"]] | |
| 7265 except Exception: | |
| 7266 # Verbose in PyMuPDF/tests. | |
| 7267 if g_exceptions_verbose >= 2: exception_info() | |
| 7268 pass | |
| 7269 try: | |
| 7270 if ol["parent"] > -1: | |
| 7271 txt += "/Parent %i 0 R" % xref[ol["parent"]] | |
| 7272 except Exception: | |
| 7273 # Verbose in PyMuPDF/tests. | |
| 7274 if g_exceptions_verbose >= 2: exception_info() | |
| 7275 pass | |
| 7276 try: | |
| 7277 if ol["prev"] > -1: | |
| 7278 txt += "/Prev %i 0 R" % xref[ol["prev"]] | |
| 7279 except Exception: | |
| 7280 # Verbose in PyMuPDF/tests. | |
| 7281 if g_exceptions_verbose >= 2: exception_info() | |
| 7282 pass | |
| 7283 try: | |
| 7284 txt += "/Title" + ol["title"] | |
| 7285 except Exception: | |
| 7286 # Verbose in PyMuPDF/tests. | |
| 7287 if g_exceptions_verbose >= 2: exception_info() | |
| 7288 pass | |
| 7289 | |
| 7290 if ol.get("color") and len(ol["color"]) == 3: | |
| 7291 txt += f"/C[ {_format_g(tuple(ol['color']))}]" | |
| 7292 if ol.get("flags", 0) > 0: | |
| 7293 txt += "/F %i" % ol["flags"] | |
| 7294 | |
| 7295 if i == 0: # special: this is the outline root | |
| 7296 txt += "/Type/Outlines" # so add the /Type entry | |
| 7297 txt += ">>" | |
| 7298 doc.update_object(xref[i], txt) # insert the PDF object | |
| 7299 | |
| 7300 doc.init_doc() | |
| 7301 return toclen | |
| 7302 | |
| 7303 def set_toc_item( | |
| 7304 doc: 'Document', | |
| 7305 idx: int, | |
| 7306 dest_dict: OptDict = None, | |
| 7307 kind: OptInt = None, | |
| 7308 pno: OptInt = None, | |
| 7309 uri: OptStr = None, | |
| 7310 title: OptStr = None, | |
| 7311 to: point_like = None, | |
| 7312 filename: OptStr = None, | |
| 7313 zoom: float = 0, | |
| 7314 ) -> None: | |
| 7315 """Update TOC item by index. | |
| 7316 | |
| 7317 It allows changing the item's title and link destination. | |
| 7318 | |
| 7319 Args: | |
| 7320 idx: | |
| 7321 (int) desired index of the TOC list, as created by get_toc. | |
| 7322 dest_dict: | |
| 7323 (dict) destination dictionary as created by get_toc(False). | |
| 7324 Outrules all other parameters. If None, the remaining parameters | |
| 7325 are used to make a dest dictionary. | |
| 7326 kind: | |
| 7327 (int) kind of link (pymupdf.LINK_GOTO, etc.). If None, then only | |
| 7328 the title will be updated. If pymupdf.LINK_NONE, the TOC item will | |
| 7329 be deleted. | |
| 7330 pno: | |
| 7331 (int) page number (1-based like in get_toc). Required if | |
| 7332 pymupdf.LINK_GOTO. | |
| 7333 uri: | |
| 7334 (str) the URL, required if pymupdf.LINK_URI. | |
| 7335 title: | |
| 7336 (str) the new title. No change if None. | |
| 7337 to: | |
| 7338 (point-like) destination on the target page. If omitted, (72, 36) | |
| 7339 will be used as target coordinates. | |
| 7340 filename: | |
| 7341 (str) destination filename, required for pymupdf.LINK_GOTOR and | |
| 7342 pymupdf.LINK_LAUNCH. | |
| 7343 name: | |
| 7344 (str) a destination name for pymupdf.LINK_NAMED. | |
| 7345 zoom: | |
| 7346 (float) a zoom factor for the target location (pymupdf.LINK_GOTO). | |
| 7347 """ | |
| 7348 xref = doc.get_outline_xrefs()[idx] | |
| 7349 page_xref = 0 | |
| 7350 if type(dest_dict) is dict: | |
| 7351 if dest_dict["kind"] == LINK_GOTO: | |
| 7352 pno = dest_dict["page"] | |
| 7353 page_xref = doc.page_xref(pno) | |
| 7354 page_height = doc.page_cropbox(pno).height | |
| 7355 to = dest_dict.get('to', Point(72, 36)) | |
| 7356 to.y = page_height - to.y | |
| 7357 dest_dict["to"] = to | |
| 7358 action = utils.getDestStr(page_xref, dest_dict) | |
| 7359 if not action.startswith("/A"): | |
| 7360 raise ValueError("bad bookmark dest") | |
| 7361 color = dest_dict.get("color") | |
| 7362 if color: | |
| 7363 color = list(map(float, color)) | |
| 7364 if len(color) != 3 or min(color) < 0 or max(color) > 1: | |
| 7365 raise ValueError("bad color value") | |
| 7366 bold = dest_dict.get("bold", False) | |
| 7367 italic = dest_dict.get("italic", False) | |
| 7368 flags = italic + 2 * bold | |
| 7369 collapse = dest_dict.get("collapse") | |
| 7370 return doc._update_toc_item( | |
| 7371 xref, | |
| 7372 action=action[2:], | |
| 7373 title=title, | |
| 7374 color=color, | |
| 7375 flags=flags, | |
| 7376 collapse=collapse, | |
| 7377 ) | |
| 7378 | |
| 7379 if kind == LINK_NONE: # delete bookmark item | |
| 7380 return doc.del_toc_item(idx) | |
| 7381 if kind is None and title is None: # treat as no-op | |
| 7382 return None | |
| 7383 if kind is None: # only update title text | |
| 7384 return doc._update_toc_item(xref, action=None, title=title) | |
| 7385 | |
| 7386 if kind == LINK_GOTO: | |
| 7387 if pno is None or pno not in range(1, doc.page_count + 1): | |
| 7388 raise ValueError("bad page number") | |
| 7389 page_xref = doc.page_xref(pno - 1) | |
| 7390 page_height = doc.page_cropbox(pno - 1).height | |
| 7391 if to is None: | |
| 7392 to = Point(72, page_height - 36) | |
| 7393 else: | |
| 7394 to = Point(to) | |
| 7395 to.y = page_height - to.y | |
| 7396 | |
| 7397 ddict = { | |
| 7398 "kind": kind, | |
| 7399 "to": to, | |
| 7400 "uri": uri, | |
| 7401 "page": pno, | |
| 7402 "file": filename, | |
| 7403 "zoom": zoom, | |
| 7404 } | |
| 7405 action = utils.getDestStr(page_xref, ddict) | |
| 7406 if action == "" or not action.startswith("/A"): | |
| 7407 raise ValueError("bad bookmark dest") | |
| 7408 | |
| 7409 return doc._update_toc_item(xref, action=action[2:], title=title) | |
| 5847 | 7410 |
| 5848 def set_xml_metadata(self, metadata): | 7411 def set_xml_metadata(self, metadata): |
| 5849 """Store XML document level metadata.""" | 7412 """Store XML document level metadata.""" |
| 5850 if self.is_closed or self.is_encrypted: | 7413 if self.is_closed or self.is_encrypted: |
| 5851 raise ValueError("document closed or encrypted") | 7414 raise ValueError("document closed or encrypted") |
| 5860 else: | 7423 else: |
| 5861 xml = mupdf.pdf_add_stream( pdf, res, mupdf.PdfObj(), 0) | 7424 xml = mupdf.pdf_add_stream( pdf, res, mupdf.PdfObj(), 0) |
| 5862 mupdf.pdf_dict_put( xml, PDF_NAME('Type'), PDF_NAME('Metadata')) | 7425 mupdf.pdf_dict_put( xml, PDF_NAME('Type'), PDF_NAME('Metadata')) |
| 5863 mupdf.pdf_dict_put( xml, PDF_NAME('Subtype'), PDF_NAME('XML')) | 7426 mupdf.pdf_dict_put( xml, PDF_NAME('Subtype'), PDF_NAME('XML')) |
| 5864 mupdf.pdf_dict_put( root, PDF_NAME('Metadata'), xml) | 7427 mupdf.pdf_dict_put( root, PDF_NAME('Metadata'), xml) |
| 7428 | |
| 7429 def subset_fonts(doc: 'Document', verbose: bool = False, fallback: bool = False) -> OptInt: | |
| 7430 """Build font subsets in a PDF. | |
| 7431 | |
| 7432 Eligible fonts are potentially replaced by smaller versions. Page text is | |
| 7433 NOT rewritten and thus should retain properties like being hidden or | |
| 7434 controlled by optional content. | |
| 7435 | |
| 7436 This method by default uses MuPDF's own internal feature to create subset | |
| 7437 fonts. As this is a new function, errors may still occur. In this case, | |
| 7438 please fall back to using the previous version by using "fallback=True". | |
| 7439 Fallback mode requires the external package 'fontTools'. | |
| 7440 | |
| 7441 Args: | |
| 7442 fallback: use the older deprecated implementation. | |
| 7443 verbose: only used by fallback mode. | |
| 7444 | |
| 7445 Returns: | |
| 7446 The new MuPDF-based code returns None. The deprecated fallback | |
| 7447 mode returns 0 if there are no fonts to subset. Otherwise, it | |
| 7448 returns the decrease in fontsize (the difference in fontsize), | |
| 7449 measured in bytes. | |
| 7450 """ | |
| 7451 # Font binaries: - "buffer" -> (names, xrefs, (unicodes, glyphs)) | |
| 7452 # An embedded font is uniquely defined by its fontbuffer only. It may have | |
| 7453 # multiple names and xrefs. | |
| 7454 # Once the sets of used unicodes and glyphs are known, we compute a | |
| 7455 # smaller version of the buffer user package fontTools. | |
| 7456 | |
| 7457 if not fallback: # by default use MuPDF function | |
| 7458 pdf = mupdf.pdf_document_from_fz_document(doc) | |
| 7459 mupdf.pdf_subset_fonts2(pdf, list(range(doc.page_count))) | |
| 7460 return | |
| 7461 | |
| 7462 font_buffers = {} | |
| 7463 | |
| 7464 def get_old_widths(xref): | |
| 7465 """Retrieve old font '/W' and '/DW' values.""" | |
| 7466 df = doc.xref_get_key(xref, "DescendantFonts") | |
| 7467 if df[0] != "array": # only handle xref specifications | |
| 7468 return None, None | |
| 7469 df_xref = int(df[1][1:-1].replace("0 R", "")) | |
| 7470 widths = doc.xref_get_key(df_xref, "W") | |
| 7471 if widths[0] != "array": # no widths key found | |
| 7472 widths = None | |
| 7473 else: | |
| 7474 widths = widths[1] | |
| 7475 dwidths = doc.xref_get_key(df_xref, "DW") | |
| 7476 if dwidths[0] != "int": | |
| 7477 dwidths = None | |
| 7478 else: | |
| 7479 dwidths = dwidths[1] | |
| 7480 return widths, dwidths | |
| 7481 | |
| 7482 def set_old_widths(xref, widths, dwidths): | |
| 7483 """Restore the old '/W' and '/DW' in subsetted font. | |
| 7484 | |
| 7485 If either parameter is None or evaluates to False, the corresponding | |
| 7486 dictionary key will be set to null. | |
| 7487 """ | |
| 7488 df = doc.xref_get_key(xref, "DescendantFonts") | |
| 7489 if df[0] != "array": # only handle xref specs | |
| 7490 return None | |
| 7491 df_xref = int(df[1][1:-1].replace("0 R", "")) | |
| 7492 if (type(widths) is not str or not widths) and doc.xref_get_key(df_xref, "W")[ | |
| 7493 0 | |
| 7494 ] != "null": | |
| 7495 doc.xref_set_key(df_xref, "W", "null") | |
| 7496 else: | |
| 7497 doc.xref_set_key(df_xref, "W", widths) | |
| 7498 if (type(dwidths) is not str or not dwidths) and doc.xref_get_key( | |
| 7499 df_xref, "DW" | |
| 7500 )[0] != "null": | |
| 7501 doc.xref_set_key(df_xref, "DW", "null") | |
| 7502 else: | |
| 7503 doc.xref_set_key(df_xref, "DW", dwidths) | |
| 7504 return None | |
| 7505 | |
| 7506 def set_subset_fontname(new_xref): | |
| 7507 """Generate a name prefix to tag a font as subset. | |
| 7508 | |
| 7509 We use a random generator to select 6 upper case ASCII characters. | |
| 7510 The prefixed name must be put in the font xref as the "/BaseFont" value | |
| 7511 and in the FontDescriptor object as the '/FontName' value. | |
| 7512 """ | |
| 7513 # The following generates a prefix like 'ABCDEF+' | |
| 7514 import random | |
| 7515 import string | |
| 7516 prefix = "".join(random.choices(tuple(string.ascii_uppercase), k=6)) + "+" | |
| 7517 font_str = doc.xref_object(new_xref, compressed=True) | |
| 7518 font_str = font_str.replace("/BaseFont/", "/BaseFont/" + prefix) | |
| 7519 df = doc.xref_get_key(new_xref, "DescendantFonts") | |
| 7520 if df[0] == "array": | |
| 7521 df_xref = int(df[1][1:-1].replace("0 R", "")) | |
| 7522 fd = doc.xref_get_key(df_xref, "FontDescriptor") | |
| 7523 if fd[0] == "xref": | |
| 7524 fd_xref = int(fd[1].replace("0 R", "")) | |
| 7525 fd_str = doc.xref_object(fd_xref, compressed=True) | |
| 7526 fd_str = fd_str.replace("/FontName/", "/FontName/" + prefix) | |
| 7527 doc.update_object(fd_xref, fd_str) | |
| 7528 doc.update_object(new_xref, font_str) | |
| 7529 | |
| 7530 def build_subset(buffer, unc_set, gid_set): | |
| 7531 """Build font subset using fontTools. | |
| 7532 | |
| 7533 Args: | |
| 7534 buffer: (bytes) the font given as a binary buffer. | |
| 7535 unc_set: (set) required glyph ids. | |
| 7536 Returns: | |
| 7537 Either None if subsetting is unsuccessful or the subset font buffer. | |
| 7538 """ | |
| 7539 try: | |
| 7540 import fontTools.subset as fts | |
| 7541 except ImportError: | |
| 7542 if g_exceptions_verbose: exception_info() | |
| 7543 message("This method requires fontTools to be installed.") | |
| 7544 raise | |
| 7545 import tempfile | |
| 7546 with tempfile.TemporaryDirectory() as tmp_dir: | |
| 7547 oldfont_path = f"{tmp_dir}/oldfont.ttf" | |
| 7548 newfont_path = f"{tmp_dir}/newfont.ttf" | |
| 7549 uncfile_path = f"{tmp_dir}/uncfile.txt" | |
| 7550 args = [ | |
| 7551 oldfont_path, | |
| 7552 "--retain-gids", | |
| 7553 f"--output-file={newfont_path}", | |
| 7554 "--layout-features=*", | |
| 7555 "--passthrough-tables", | |
| 7556 "--ignore-missing-glyphs", | |
| 7557 "--ignore-missing-unicodes", | |
| 7558 "--symbol-cmap", | |
| 7559 ] | |
| 7560 | |
| 7561 # store glyph ids or unicodes as file | |
| 7562 with open(f"{tmp_dir}/uncfile.txt", "w", encoding='utf8') as unc_file: | |
| 7563 if 0xFFFD in unc_set: # error unicode exists -> use glyphs | |
| 7564 args.append(f"--gids-file={uncfile_path}") | |
| 7565 gid_set.add(189) | |
| 7566 unc_list = list(gid_set) | |
| 7567 for unc in unc_list: | |
| 7568 unc_file.write("%i\n" % unc) | |
| 7569 else: | |
| 7570 args.append(f"--unicodes-file={uncfile_path}") | |
| 7571 unc_set.add(255) | |
| 7572 unc_list = list(unc_set) | |
| 7573 for unc in unc_list: | |
| 7574 unc_file.write("%04x\n" % unc) | |
| 7575 | |
| 7576 # store fontbuffer as a file | |
| 7577 with open(oldfont_path, "wb") as fontfile: | |
| 7578 fontfile.write(buffer) | |
| 7579 try: | |
| 7580 os.remove(newfont_path) # remove old file | |
| 7581 except Exception: | |
| 7582 pass | |
| 7583 try: # invoke fontTools subsetter | |
| 7584 fts.main(args) | |
| 7585 font = Font(fontfile=newfont_path) | |
| 7586 new_buffer = font.buffer # subset font binary | |
| 7587 if font.glyph_count == 0: # intercept empty font | |
| 7588 new_buffer = None | |
| 7589 except Exception: | |
| 7590 exception_info() | |
| 7591 new_buffer = None | |
| 7592 return new_buffer | |
| 7593 | |
| 7594 def repl_fontnames(doc): | |
| 7595 """Populate 'font_buffers'. | |
| 7596 | |
| 7597 For each font candidate, store its xref and the list of names | |
| 7598 by which PDF text may refer to it (there may be multiple). | |
| 7599 """ | |
| 7600 | |
| 7601 def norm_name(name): | |
| 7602 """Recreate font name that contains PDF hex codes. | |
| 7603 | |
| 7604 E.g. #20 -> space, chr(32) | |
| 7605 """ | |
| 7606 while "#" in name: | |
| 7607 p = name.find("#") | |
| 7608 c = int(name[p + 1 : p + 3], 16) | |
| 7609 name = name.replace(name[p : p + 3], chr(c)) | |
| 7610 return name | |
| 7611 | |
| 7612 def get_fontnames(doc, item): | |
| 7613 """Return a list of fontnames for an item of page.get_fonts(). | |
| 7614 | |
| 7615 There may be multiple names e.g. for Type0 fonts. | |
| 7616 """ | |
| 7617 fontname = item[3] | |
| 7618 names = [fontname] | |
| 7619 fontname = doc.xref_get_key(item[0], "BaseFont")[1][1:] | |
| 7620 fontname = norm_name(fontname) | |
| 7621 if fontname not in names: | |
| 7622 names.append(fontname) | |
| 7623 descendents = doc.xref_get_key(item[0], "DescendantFonts") | |
| 7624 if descendents[0] != "array": | |
| 7625 return names | |
| 7626 descendents = descendents[1][1:-1] | |
| 7627 if descendents.endswith(" 0 R"): | |
| 7628 xref = int(descendents[:-4]) | |
| 7629 descendents = doc.xref_object(xref, compressed=True) | |
| 7630 p1 = descendents.find("/BaseFont") | |
| 7631 if p1 >= 0: | |
| 7632 p2 = descendents.find("/", p1 + 1) | |
| 7633 p1 = min(descendents.find("/", p2 + 1), descendents.find(">>", p2 + 1)) | |
| 7634 fontname = descendents[p2 + 1 : p1] | |
| 7635 fontname = norm_name(fontname) | |
| 7636 if fontname not in names: | |
| 7637 names.append(fontname) | |
| 7638 return names | |
| 7639 | |
| 7640 for i in range(doc.page_count): | |
| 7641 for f in doc.get_page_fonts(i, full=True): | |
| 7642 font_xref = f[0] # font xref | |
| 7643 font_ext = f[1] # font file extension | |
| 7644 basename = f[3] # font basename | |
| 7645 | |
| 7646 if font_ext not in ( # skip if not supported by fontTools | |
| 7647 "otf", | |
| 7648 "ttf", | |
| 7649 "woff", | |
| 7650 "woff2", | |
| 7651 ): | |
| 7652 continue | |
| 7653 # skip fonts which already are subsets | |
| 7654 if len(basename) > 6 and basename[6] == "+": | |
| 7655 continue | |
| 7656 | |
| 7657 extr = doc.extract_font(font_xref) | |
| 7658 fontbuffer = extr[-1] | |
| 7659 names = get_fontnames(doc, f) | |
| 7660 name_set, xref_set, subsets = font_buffers.get( | |
| 7661 fontbuffer, (set(), set(), (set(), set())) | |
| 7662 ) | |
| 7663 xref_set.add(font_xref) | |
| 7664 for name in names: | |
| 7665 name_set.add(name) | |
| 7666 font = Font(fontbuffer=fontbuffer) | |
| 7667 name_set.add(font.name) | |
| 7668 del font | |
| 7669 font_buffers[fontbuffer] = (name_set, xref_set, subsets) | |
| 7670 | |
| 7671 def find_buffer_by_name(name): | |
| 7672 for buffer, (name_set, _, _) in font_buffers.items(): | |
| 7673 if name in name_set: | |
| 7674 return buffer | |
| 7675 return None | |
| 7676 | |
| 7677 # ----------------- | |
| 7678 # main function | |
| 7679 # ----------------- | |
| 7680 repl_fontnames(doc) # populate font information | |
| 7681 if not font_buffers: # nothing found to do | |
| 7682 if verbose: | |
| 7683 message(f'No fonts to subset.') | |
| 7684 return 0 | |
| 7685 | |
| 7686 old_fontsize = 0 | |
| 7687 new_fontsize = 0 | |
| 7688 for fontbuffer in font_buffers.keys(): | |
| 7689 old_fontsize += len(fontbuffer) | |
| 7690 | |
| 7691 # Scan page text for usage of subsettable fonts | |
| 7692 for page in doc: | |
| 7693 # go through the text and extend set of used glyphs by font | |
| 7694 # we use a modified MuPDF trace device, which delivers us glyph ids. | |
| 7695 for span in page.get_texttrace(): | |
| 7696 if type(span) is not dict: # skip useless information | |
| 7697 continue | |
| 7698 fontname = span["font"][:33] # fontname for the span | |
| 7699 buffer = find_buffer_by_name(fontname) | |
| 7700 if buffer is None: | |
| 7701 continue | |
| 7702 name_set, xref_set, (set_ucs, set_gid) = font_buffers[buffer] | |
| 7703 for c in span["chars"]: | |
| 7704 set_ucs.add(c[0]) # unicode | |
| 7705 set_gid.add(c[1]) # glyph id | |
| 7706 font_buffers[buffer] = (name_set, xref_set, (set_ucs, set_gid)) | |
| 7707 | |
| 7708 # build the font subsets | |
| 7709 for old_buffer, (name_set, xref_set, subsets) in font_buffers.items(): | |
| 7710 new_buffer = build_subset(old_buffer, subsets[0], subsets[1]) | |
| 7711 fontname = list(name_set)[0] | |
| 7712 if new_buffer is None or len(new_buffer) >= len(old_buffer): | |
| 7713 # subset was not created or did not get smaller | |
| 7714 if verbose: | |
| 7715 message(f'Cannot subset {fontname!r}.') | |
| 7716 continue | |
| 7717 if verbose: | |
| 7718 message(f"Built subset of font {fontname!r}.") | |
| 7719 val = doc._insert_font(fontbuffer=new_buffer) # store subset font in PDF | |
| 7720 new_xref = val[0] # get its xref | |
| 7721 set_subset_fontname(new_xref) # tag fontname as subset font | |
| 7722 font_str = doc.xref_object( # get its object definition | |
| 7723 new_xref, | |
| 7724 compressed=True, | |
| 7725 ) | |
| 7726 # walk through the original font xrefs and replace each by the subset def | |
| 7727 for font_xref in xref_set: | |
| 7728 # we need the original '/W' and '/DW' width values | |
| 7729 width_table, def_width = get_old_widths(font_xref) | |
| 7730 # ... and replace original font definition at xref with it | |
| 7731 doc.update_object(font_xref, font_str) | |
| 7732 # now copy over old '/W' and '/DW' values | |
| 7733 if width_table or def_width: | |
| 7734 set_old_widths(font_xref, width_table, def_width) | |
| 7735 # 'new_xref' remains unused in the PDF and must be removed | |
| 7736 # by garbage collection. | |
| 7737 new_fontsize += len(new_buffer) | |
| 7738 | |
| 7739 return old_fontsize - new_fontsize | |
| 5865 | 7740 |
| 5866 def switch_layer(self, config, as_default=0): | 7741 def switch_layer(self, config, as_default=0): |
| 5867 """Activate an OC layer.""" | 7742 """Activate an OC layer.""" |
| 5868 pdf = _as_pdf_document(self) | 7743 pdf = _as_pdf_document(self) |
| 5869 cfgs = mupdf.pdf_dict_getl( | 7744 cfgs = mupdf.pdf_dict_getl( |
| 5971 preserve_metadata=preserve_metadata, | 7846 preserve_metadata=preserve_metadata, |
| 5972 use_objstms=use_objstms, | 7847 use_objstms=use_objstms, |
| 5973 compression_effort=compression_effort, | 7848 compression_effort=compression_effort, |
| 5974 ) | 7849 ) |
| 5975 return bio.getvalue() | 7850 return bio.getvalue() |
| 7851 | |
| 7852 def tobytes(self, *args, **kwargs): | |
| 7853 return self.write(*args, **kwargs) | |
| 5976 | 7854 |
| 5977 @property | 7855 @property |
| 5978 def xref(self): | 7856 def xref(self): |
| 5979 """PDF xref number of page.""" | 7857 """PDF xref number of page.""" |
| 5980 CheckParent(self) | 7858 CheckParent(self) |
| 5981 return self.parent.page_xref(self.number) | 7859 return self.parent.page_xref(self.number) |
| 5982 | 7860 |
| 7861 def xref_copy(doc: 'Document', source: int, target: int, *, keep: list = None) -> None: | |
| 7862 """Copy a PDF dictionary object to another one given their xref numbers. | |
| 7863 | |
| 7864 Args: | |
| 7865 doc: PDF document object | |
| 7866 source: source xref number | |
| 7867 target: target xref number, the xref must already exist | |
| 7868 keep: an optional list of 1st level keys in target that should not be | |
| 7869 removed before copying. | |
| 7870 Notes: | |
| 7871 This works similar to the copy() method of dictionaries in Python. The | |
| 7872 source may be a stream object. | |
| 7873 """ | |
| 7874 if doc.xref_is_stream(source): | |
| 7875 # read new xref stream, maintaining compression | |
| 7876 stream = doc.xref_stream_raw(source) | |
| 7877 doc.update_stream( | |
| 7878 target, | |
| 7879 stream, | |
| 7880 compress=False, # keeps source compression | |
| 7881 new=True, # in case target is no stream | |
| 7882 ) | |
| 7883 | |
| 7884 # empty the target completely, observe exceptions | |
| 7885 if keep is None: | |
| 7886 keep = [] | |
| 7887 for key in doc.xref_get_keys(target): | |
| 7888 if key in keep: | |
| 7889 continue | |
| 7890 doc.xref_set_key(target, key, "null") | |
| 7891 # copy over all source dict items | |
| 7892 for key in doc.xref_get_keys(source): | |
| 7893 item = doc.xref_get_key(source, key) | |
| 7894 doc.xref_set_key(target, key, item[1]) | |
| 7895 | |
| 5983 def xref_get_key(self, xref, key): | 7896 def xref_get_key(self, xref, key): |
| 5984 """Get PDF dict key value of object at 'xref'.""" | 7897 """Get PDF dict key value of object at 'xref'.""" |
| 5985 pdf = _as_pdf_document(self) | 7898 pdf = _as_pdf_document(self) |
| 5986 xreflen = mupdf.pdf_xref_len(pdf) | 7899 xreflen = mupdf.pdf_xref_len(pdf) |
| 5987 if not _INRANGE(xref, 1, xreflen-1) and xref != -1: | 7900 if not _INRANGE(xref, 1, xreflen-1) and xref != -1: |
| 6194 return xref | 8107 return xref |
| 6195 | 8108 |
| 6196 __slots__ = ('this', 'page_count2', 'this_is_pdf', '__dict__') | 8109 __slots__ = ('this', 'page_count2', 'this_is_pdf', '__dict__') |
| 6197 | 8110 |
| 6198 outline = property(lambda self: self._outline) | 8111 outline = property(lambda self: self._outline) |
| 6199 tobytes = write | |
| 6200 is_stream = xref_is_stream | 8112 is_stream = xref_is_stream |
| 6201 | 8113 |
| 6202 open = Document | 8114 open = Document |
| 6203 | 8115 |
| 6204 | 8116 |
| 8733 for xref in annot_xrefs: | 10645 for xref in annot_xrefs: |
| 8734 annot = self.load_annot(xref) | 10646 annot = self.load_annot(xref) |
| 8735 annot._yielded=True | 10647 annot._yielded=True |
| 8736 yield annot | 10648 yield annot |
| 8737 | 10649 |
| 10650 def apply_redactions( | |
| 10651 page: 'Page', | |
| 10652 images: int = 2, | |
| 10653 graphics: int = 1, | |
| 10654 text: int = 0, | |
| 10655 ) -> bool: | |
| 10656 """Apply the redaction annotations of the page. | |
| 10657 | |
| 10658 Args: | |
| 10659 page: the PDF page. | |
| 10660 images: | |
| 10661 0 - ignore images | |
| 10662 1 - remove all overlapping images | |
| 10663 2 - blank out overlapping image parts | |
| 10664 3 - remove image unless invisible | |
| 10665 graphics: | |
| 10666 0 - ignore graphics | |
| 10667 1 - remove graphics if contained in rectangle | |
| 10668 2 - remove all overlapping graphics | |
| 10669 text: | |
| 10670 0 - remove text | |
| 10671 1 - ignore text | |
| 10672 """ | |
| 10673 | |
| 10674 def center_rect(annot_rect, new_text, font, fsize): | |
| 10675 """Calculate minimal sub-rectangle for the overlay text. | |
| 10676 | |
| 10677 Notes: | |
| 10678 Because 'insert_textbox' supports no vertical text centering, | |
| 10679 we calculate an approximate number of lines here and return a | |
| 10680 sub-rect with smaller height, which should still be sufficient. | |
| 10681 Args: | |
| 10682 annot_rect: the annotation rectangle | |
| 10683 new_text: the text to insert. | |
| 10684 font: the fontname. Must be one of the CJK or Base-14 set, else | |
| 10685 the rectangle is returned unchanged. | |
| 10686 fsize: the fontsize | |
| 10687 Returns: | |
| 10688 A rectangle to use instead of the annot rectangle. | |
| 10689 """ | |
| 10690 if not new_text or annot_rect.width <= EPSILON: | |
| 10691 return annot_rect | |
| 10692 try: | |
| 10693 text_width = get_text_length(new_text, font, fsize) | |
| 10694 except (ValueError, mupdf.FzErrorBase): # unsupported font | |
| 10695 if g_exceptions_verbose: | |
| 10696 exception_info() | |
| 10697 return annot_rect | |
| 10698 line_height = fsize * 1.2 | |
| 10699 limit = annot_rect.width | |
| 10700 h = math.ceil(text_width / limit) * line_height # estimate rect height | |
| 10701 if h >= annot_rect.height: | |
| 10702 return annot_rect | |
| 10703 r = annot_rect | |
| 10704 y = (annot_rect.tl.y + annot_rect.bl.y - h) * 0.5 | |
| 10705 r.y0 = y | |
| 10706 return r | |
| 10707 | |
| 10708 CheckParent(page) | |
| 10709 doc = page.parent | |
| 10710 if doc.is_encrypted or doc.is_closed: | |
| 10711 raise ValueError("document closed or encrypted") | |
| 10712 if not doc.is_pdf: | |
| 10713 raise ValueError("is no PDF") | |
| 10714 | |
| 10715 redact_annots = [] # storage of annot values | |
| 10716 for annot in page.annots( | |
| 10717 types=(mupdf.PDF_ANNOT_REDACT,) # pylint: disable=no-member | |
| 10718 ): | |
| 10719 # loop redactions | |
| 10720 redact_annots.append(annot._get_redact_values()) # save annot values | |
| 10721 | |
| 10722 if redact_annots == []: # any redactions on this page? | |
| 10723 return False # no redactions | |
| 10724 | |
| 10725 rc = page._apply_redactions(text, images, graphics) # call MuPDF | |
| 10726 if not rc: # should not happen really | |
| 10727 raise ValueError("Error applying redactions.") | |
| 10728 | |
| 10729 # now write replacement text in old redact rectangles | |
| 10730 shape = page.new_shape() | |
| 10731 for redact in redact_annots: | |
| 10732 annot_rect = redact["rect"] | |
| 10733 fill = redact["fill"] | |
| 10734 if fill: | |
| 10735 shape.draw_rect(annot_rect) # colorize the rect background | |
| 10736 shape.finish(fill=fill, color=fill) | |
| 10737 if "text" in redact.keys(): # if we also have text | |
| 10738 new_text = redact["text"] | |
| 10739 align = redact.get("align", 0) | |
| 10740 fname = redact["fontname"] | |
| 10741 fsize = redact["fontsize"] | |
| 10742 color = redact["text_color"] | |
| 10743 # try finding vertical centered sub-rect | |
| 10744 trect = center_rect(annot_rect, new_text, fname, fsize) | |
| 10745 | |
| 10746 rc = -1 | |
| 10747 while rc < 0 and fsize >= 4: # while not enough room | |
| 10748 # (re-) try insertion | |
| 10749 rc = shape.insert_textbox( | |
| 10750 trect, | |
| 10751 new_text, | |
| 10752 fontname=fname, | |
| 10753 fontsize=fsize, | |
| 10754 color=color, | |
| 10755 align=align, | |
| 10756 ) | |
| 10757 fsize -= 0.5 # reduce font if unsuccessful | |
| 10758 shape.commit() # append new contents object | |
| 10759 return True | |
| 10760 | |
| 8738 def recolor(self, components=1): | 10761 def recolor(self, components=1): |
| 8739 """Convert colorspaces of objects on the page. | 10762 """Convert colorspaces of objects on the page. |
| 8740 | 10763 |
| 8741 Valid values are 1, 3 and 4. | 10764 Valid values are 1, 3 and 4. |
| 8742 """ | 10765 """ |
| 8841 val.parent = weakref.proxy(self) # owning page object | 10864 val.parent = weakref.proxy(self) # owning page object |
| 8842 val.parent._annot_refs[id(val)] = val | 10865 val.parent._annot_refs[id(val)] = val |
| 8843 annot._erase() | 10866 annot._erase() |
| 8844 return val | 10867 return val |
| 8845 | 10868 |
| 10869 def delete_image(page: 'Page', xref: int): | |
| 10870 """Delete the image referred to by xef. | |
| 10871 | |
| 10872 Actually replaces by a small transparent Pixmap using method Page.replace_image. | |
| 10873 | |
| 10874 Args: | |
| 10875 xref: xref of the image to delete. | |
| 10876 """ | |
| 10877 # make a small 100% transparent pixmap (of just any dimension) | |
| 10878 pix = Pixmap(csGRAY, (0, 0, 1, 1), 1) | |
| 10879 pix.clear_with() # clear all samples bytes to 0x00 | |
| 10880 page.replace_image(xref, pixmap=pix) | |
| 10881 | |
| 8846 def delete_link(self, linkdict): | 10882 def delete_link(self, linkdict): |
| 8847 """Delete a Link.""" | 10883 """Delete a Link.""" |
| 8848 CheckParent(self) | 10884 CheckParent(self) |
| 8849 if not isinstance( linkdict, dict): | 10885 if not isinstance( linkdict, dict): |
| 8850 return # have no dictionary | 10886 return # have no dictionary |
| 8885 mupdf.pdf_dict_put( page.obj(), PDF_NAME('Annots'), annots) | 10921 mupdf.pdf_dict_put( page.obj(), PDF_NAME('Annots'), annots) |
| 8886 JM_refresh_links( page) | 10922 JM_refresh_links( page) |
| 8887 | 10923 |
| 8888 return finished() | 10924 return finished() |
| 8889 | 10925 |
| 10926 def delete_widget(page: 'Page', widget: Widget) -> Widget: | |
| 10927 """Delete widget from page and return the next one.""" | |
| 10928 CheckParent(page) | |
| 10929 annot = getattr(widget, "_annot", None) | |
| 10930 if annot is None: | |
| 10931 raise ValueError("bad type: widget") | |
| 10932 nextwidget = widget.next | |
| 10933 page.delete_annot(annot) | |
| 10934 widget._annot.parent = None | |
| 10935 keylist = list(widget.__dict__.keys()) | |
| 10936 for key in keylist: | |
| 10937 del widget.__dict__[key] | |
| 10938 return nextwidget | |
| 10939 | |
| 8890 @property | 10940 @property |
| 8891 def derotation_matrix(self) -> Matrix: | 10941 def derotation_matrix(self) -> Matrix: |
| 8892 """Reflects page de-rotation.""" | 10942 """Reflects page de-rotation.""" |
| 8893 if g_use_extra: | 10943 if g_use_extra: |
| 8894 return Matrix(extra.Page_derotate_matrix( self.this)) | 10944 return Matrix(extra.Page_derotate_matrix( self.this)) |
| 8895 pdfpage = self._pdf_page(required=False) | 10945 pdfpage = self._pdf_page(required=False) |
| 8896 if not pdfpage.m_internal: | 10946 if not pdfpage.m_internal: |
| 8897 return Matrix(mupdf.FzRect(mupdf.FzRect.UNIT)) | 10947 return Matrix(mupdf.FzRect(mupdf.FzRect.UNIT)) |
| 8898 return Matrix(JM_derotate_page_matrix(pdfpage)) | 10948 return Matrix(JM_derotate_page_matrix(pdfpage)) |
| 10949 | |
| 10950 def draw_bezier( | |
| 10951 page: 'Page', | |
| 10952 p1: point_like, | |
| 10953 p2: point_like, | |
| 10954 p3: point_like, | |
| 10955 p4: point_like, | |
| 10956 color: OptSeq = (0,), | |
| 10957 fill: OptSeq = None, | |
| 10958 dashes: OptStr = None, | |
| 10959 width: float = 1, | |
| 10960 morph: OptStr = None, | |
| 10961 closePath: bool = False, | |
| 10962 lineCap: int = 0, | |
| 10963 lineJoin: int = 0, | |
| 10964 overlay: bool = True, | |
| 10965 stroke_opacity: float = 1, | |
| 10966 fill_opacity: float = 1, | |
| 10967 oc: int = 0, | |
| 10968 ) -> Point: | |
| 10969 """Draw a general cubic Bezier curve from p1 to p4 using control points p2 and p3.""" | |
| 10970 img = page.new_shape() | |
| 10971 Q = img.draw_bezier(Point(p1), Point(p2), Point(p3), Point(p4)) | |
| 10972 img.finish( | |
| 10973 color=color, | |
| 10974 fill=fill, | |
| 10975 dashes=dashes, | |
| 10976 width=width, | |
| 10977 lineCap=lineCap, | |
| 10978 lineJoin=lineJoin, | |
| 10979 morph=morph, | |
| 10980 closePath=closePath, | |
| 10981 stroke_opacity=stroke_opacity, | |
| 10982 fill_opacity=fill_opacity, | |
| 10983 oc=oc, | |
| 10984 ) | |
| 10985 img.commit(overlay) | |
| 10986 | |
| 10987 return Q | |
| 10988 | |
| 10989 def draw_circle( | |
| 10990 page: 'Page', | |
| 10991 center: point_like, | |
| 10992 radius: float, | |
| 10993 color: OptSeq = (0,), | |
| 10994 fill: OptSeq = None, | |
| 10995 morph: OptSeq = None, | |
| 10996 dashes: OptStr = None, | |
| 10997 width: float = 1, | |
| 10998 lineCap: int = 0, | |
| 10999 lineJoin: int = 0, | |
| 11000 overlay: bool = True, | |
| 11001 stroke_opacity: float = 1, | |
| 11002 fill_opacity: float = 1, | |
| 11003 oc: int = 0, | |
| 11004 ) -> Point: | |
| 11005 """Draw a circle given its center and radius.""" | |
| 11006 img = page.new_shape() | |
| 11007 Q = img.draw_circle(Point(center), radius) | |
| 11008 img.finish( | |
| 11009 color=color, | |
| 11010 fill=fill, | |
| 11011 dashes=dashes, | |
| 11012 width=width, | |
| 11013 lineCap=lineCap, | |
| 11014 lineJoin=lineJoin, | |
| 11015 morph=morph, | |
| 11016 stroke_opacity=stroke_opacity, | |
| 11017 fill_opacity=fill_opacity, | |
| 11018 oc=oc, | |
| 11019 ) | |
| 11020 img.commit(overlay) | |
| 11021 return Q | |
| 11022 | |
| 11023 def draw_curve( | |
| 11024 page: 'Page', | |
| 11025 p1: point_like, | |
| 11026 p2: point_like, | |
| 11027 p3: point_like, | |
| 11028 color: OptSeq = (0,), | |
| 11029 fill: OptSeq = None, | |
| 11030 dashes: OptStr = None, | |
| 11031 width: float = 1, | |
| 11032 morph: OptSeq = None, | |
| 11033 closePath: bool = False, | |
| 11034 lineCap: int = 0, | |
| 11035 lineJoin: int = 0, | |
| 11036 overlay: bool = True, | |
| 11037 stroke_opacity: float = 1, | |
| 11038 fill_opacity: float = 1, | |
| 11039 oc: int = 0, | |
| 11040 ) -> Point: | |
| 11041 """Draw a special Bezier curve from p1 to p3, generating control points on lines p1 to p2 and p2 to p3.""" | |
| 11042 img = page.new_shape() | |
| 11043 Q = img.draw_curve(Point(p1), Point(p2), Point(p3)) | |
| 11044 img.finish( | |
| 11045 color=color, | |
| 11046 fill=fill, | |
| 11047 dashes=dashes, | |
| 11048 width=width, | |
| 11049 lineCap=lineCap, | |
| 11050 lineJoin=lineJoin, | |
| 11051 morph=morph, | |
| 11052 closePath=closePath, | |
| 11053 stroke_opacity=stroke_opacity, | |
| 11054 fill_opacity=fill_opacity, | |
| 11055 oc=oc, | |
| 11056 ) | |
| 11057 img.commit(overlay) | |
| 11058 | |
| 11059 return Q | |
| 11060 | |
| 11061 def draw_line( | |
| 11062 page: 'Page', | |
| 11063 p1: point_like, | |
| 11064 p2: point_like, | |
| 11065 color: OptSeq = (0,), | |
| 11066 dashes: OptStr = None, | |
| 11067 width: float = 1, | |
| 11068 lineCap: int = 0, | |
| 11069 lineJoin: int = 0, | |
| 11070 overlay: bool = True, | |
| 11071 morph: OptSeq = None, | |
| 11072 stroke_opacity: float = 1, | |
| 11073 fill_opacity: float = 1, | |
| 11074 oc=0, | |
| 11075 ) -> Point: | |
| 11076 """Draw a line from point p1 to point p2.""" | |
| 11077 img = page.new_shape() | |
| 11078 p = img.draw_line(Point(p1), Point(p2)) | |
| 11079 img.finish( | |
| 11080 color=color, | |
| 11081 dashes=dashes, | |
| 11082 width=width, | |
| 11083 closePath=False, | |
| 11084 lineCap=lineCap, | |
| 11085 lineJoin=lineJoin, | |
| 11086 morph=morph, | |
| 11087 stroke_opacity=stroke_opacity, | |
| 11088 fill_opacity=fill_opacity, | |
| 11089 oc=oc, | |
| 11090 ) | |
| 11091 img.commit(overlay) | |
| 11092 | |
| 11093 return p | |
| 11094 | |
| 11095 def draw_oval( | |
| 11096 page: 'Page', | |
| 11097 rect: typing.Union[rect_like, quad_like], | |
| 11098 color: OptSeq = (0,), | |
| 11099 fill: OptSeq = None, | |
| 11100 dashes: OptStr = None, | |
| 11101 morph: OptSeq = None, | |
| 11102 width: float = 1, | |
| 11103 lineCap: int = 0, | |
| 11104 lineJoin: int = 0, | |
| 11105 overlay: bool = True, | |
| 11106 stroke_opacity: float = 1, | |
| 11107 fill_opacity: float = 1, | |
| 11108 oc: int = 0, | |
| 11109 ) -> Point: | |
| 11110 """Draw an oval given its containing rectangle or quad.""" | |
| 11111 img = page.new_shape() | |
| 11112 Q = img.draw_oval(rect) | |
| 11113 img.finish( | |
| 11114 color=color, | |
| 11115 fill=fill, | |
| 11116 dashes=dashes, | |
| 11117 width=width, | |
| 11118 lineCap=lineCap, | |
| 11119 lineJoin=lineJoin, | |
| 11120 morph=morph, | |
| 11121 stroke_opacity=stroke_opacity, | |
| 11122 fill_opacity=fill_opacity, | |
| 11123 oc=oc, | |
| 11124 ) | |
| 11125 img.commit(overlay) | |
| 11126 | |
| 11127 return Q | |
| 11128 | |
| 11129 def draw_polyline( | |
| 11130 page: 'Page', | |
| 11131 points: list, | |
| 11132 color: OptSeq = (0,), | |
| 11133 fill: OptSeq = None, | |
| 11134 dashes: OptStr = None, | |
| 11135 width: float = 1, | |
| 11136 morph: OptSeq = None, | |
| 11137 lineCap: int = 0, | |
| 11138 lineJoin: int = 0, | |
| 11139 overlay: bool = True, | |
| 11140 closePath: bool = False, | |
| 11141 stroke_opacity: float = 1, | |
| 11142 fill_opacity: float = 1, | |
| 11143 oc: int = 0, | |
| 11144 ) -> Point: | |
| 11145 """Draw multiple connected line segments.""" | |
| 11146 img = page.new_shape() | |
| 11147 Q = img.draw_polyline(points) | |
| 11148 img.finish( | |
| 11149 color=color, | |
| 11150 fill=fill, | |
| 11151 dashes=dashes, | |
| 11152 width=width, | |
| 11153 lineCap=lineCap, | |
| 11154 lineJoin=lineJoin, | |
| 11155 morph=morph, | |
| 11156 closePath=closePath, | |
| 11157 stroke_opacity=stroke_opacity, | |
| 11158 fill_opacity=fill_opacity, | |
| 11159 oc=oc, | |
| 11160 ) | |
| 11161 img.commit(overlay) | |
| 11162 | |
| 11163 return Q | |
| 11164 | |
| 11165 def draw_quad( | |
| 11166 page: 'Page', | |
| 11167 quad: quad_like, | |
| 11168 color: OptSeq = (0,), | |
| 11169 fill: OptSeq = None, | |
| 11170 dashes: OptStr = None, | |
| 11171 width: float = 1, | |
| 11172 lineCap: int = 0, | |
| 11173 lineJoin: int = 0, | |
| 11174 morph: OptSeq = None, | |
| 11175 overlay: bool = True, | |
| 11176 stroke_opacity: float = 1, | |
| 11177 fill_opacity: float = 1, | |
| 11178 oc: int = 0, | |
| 11179 ) -> Point: | |
| 11180 """Draw a quadrilateral.""" | |
| 11181 img = page.new_shape() | |
| 11182 Q = img.draw_quad(Quad(quad)) | |
| 11183 img.finish( | |
| 11184 color=color, | |
| 11185 fill=fill, | |
| 11186 dashes=dashes, | |
| 11187 width=width, | |
| 11188 lineCap=lineCap, | |
| 11189 lineJoin=lineJoin, | |
| 11190 morph=morph, | |
| 11191 stroke_opacity=stroke_opacity, | |
| 11192 fill_opacity=fill_opacity, | |
| 11193 oc=oc, | |
| 11194 ) | |
| 11195 img.commit(overlay) | |
| 11196 | |
| 11197 return Q | |
| 11198 | |
| 11199 def draw_rect( | |
| 11200 page: 'Page', | |
| 11201 rect: rect_like, | |
| 11202 color: OptSeq = (0,), | |
| 11203 fill: OptSeq = None, | |
| 11204 dashes: OptStr = None, | |
| 11205 width: float = 1, | |
| 11206 lineCap: int = 0, | |
| 11207 lineJoin: int = 0, | |
| 11208 morph: OptSeq = None, | |
| 11209 overlay: bool = True, | |
| 11210 stroke_opacity: float = 1, | |
| 11211 fill_opacity: float = 1, | |
| 11212 oc: int = 0, | |
| 11213 radius=None, | |
| 11214 ) -> Point: | |
| 11215 ''' | |
| 11216 Draw a rectangle. See Shape class method for details. | |
| 11217 ''' | |
| 11218 img = page.new_shape() | |
| 11219 Q = img.draw_rect(Rect(rect), radius=radius) | |
| 11220 img.finish( | |
| 11221 color=color, | |
| 11222 fill=fill, | |
| 11223 dashes=dashes, | |
| 11224 width=width, | |
| 11225 lineCap=lineCap, | |
| 11226 lineJoin=lineJoin, | |
| 11227 morph=morph, | |
| 11228 stroke_opacity=stroke_opacity, | |
| 11229 fill_opacity=fill_opacity, | |
| 11230 oc=oc, | |
| 11231 ) | |
| 11232 img.commit(overlay) | |
| 11233 | |
| 11234 return Q | |
| 11235 | |
| 11236 def draw_sector( | |
| 11237 page: 'Page', | |
| 11238 center: point_like, | |
| 11239 point: point_like, | |
| 11240 beta: float, | |
| 11241 color: OptSeq = (0,), | |
| 11242 fill: OptSeq = None, | |
| 11243 dashes: OptStr = None, | |
| 11244 fullSector: bool = True, | |
| 11245 morph: OptSeq = None, | |
| 11246 width: float = 1, | |
| 11247 closePath: bool = False, | |
| 11248 lineCap: int = 0, | |
| 11249 lineJoin: int = 0, | |
| 11250 overlay: bool = True, | |
| 11251 stroke_opacity: float = 1, | |
| 11252 fill_opacity: float = 1, | |
| 11253 oc: int = 0, | |
| 11254 ) -> Point: | |
| 11255 """Draw a circle sector given circle center, one arc end point and the angle of the arc. | |
| 11256 | |
| 11257 Parameters: | |
| 11258 center -- center of circle | |
| 11259 point -- arc end point | |
| 11260 beta -- angle of arc (degrees) | |
| 11261 fullSector -- connect arc ends with center | |
| 11262 """ | |
| 11263 img = page.new_shape() | |
| 11264 Q = img.draw_sector(Point(center), Point(point), beta, fullSector=fullSector) | |
| 11265 img.finish( | |
| 11266 color=color, | |
| 11267 fill=fill, | |
| 11268 dashes=dashes, | |
| 11269 width=width, | |
| 11270 lineCap=lineCap, | |
| 11271 lineJoin=lineJoin, | |
| 11272 morph=morph, | |
| 11273 closePath=closePath, | |
| 11274 stroke_opacity=stroke_opacity, | |
| 11275 fill_opacity=fill_opacity, | |
| 11276 oc=oc, | |
| 11277 ) | |
| 11278 img.commit(overlay) | |
| 11279 | |
| 11280 return Q | |
| 11281 | |
| 11282 def draw_squiggle( | |
| 11283 page: 'Page', | |
| 11284 p1: point_like, | |
| 11285 p2: point_like, | |
| 11286 breadth: float = 2, | |
| 11287 color: OptSeq = (0,), | |
| 11288 dashes: OptStr = None, | |
| 11289 width: float = 1, | |
| 11290 lineCap: int = 0, | |
| 11291 lineJoin: int = 0, | |
| 11292 overlay: bool = True, | |
| 11293 morph: OptSeq = None, | |
| 11294 stroke_opacity: float = 1, | |
| 11295 fill_opacity: float = 1, | |
| 11296 oc: int = 0, | |
| 11297 ) -> Point: | |
| 11298 """Draw a squiggly line from point p1 to point p2.""" | |
| 11299 img = page.new_shape() | |
| 11300 p = img.draw_squiggle(Point(p1), Point(p2), breadth=breadth) | |
| 11301 img.finish( | |
| 11302 color=color, | |
| 11303 dashes=dashes, | |
| 11304 width=width, | |
| 11305 closePath=False, | |
| 11306 lineCap=lineCap, | |
| 11307 lineJoin=lineJoin, | |
| 11308 morph=morph, | |
| 11309 stroke_opacity=stroke_opacity, | |
| 11310 fill_opacity=fill_opacity, | |
| 11311 oc=oc, | |
| 11312 ) | |
| 11313 img.commit(overlay) | |
| 11314 | |
| 11315 return p | |
| 11316 | |
| 11317 def draw_zigzag( | |
| 11318 page: 'Page', | |
| 11319 p1: point_like, | |
| 11320 p2: point_like, | |
| 11321 breadth: float = 2, | |
| 11322 color: OptSeq = (0,), | |
| 11323 dashes: OptStr = None, | |
| 11324 width: float = 1, | |
| 11325 lineCap: int = 0, | |
| 11326 lineJoin: int = 0, | |
| 11327 overlay: bool = True, | |
| 11328 morph: OptSeq = None, | |
| 11329 stroke_opacity: float = 1, | |
| 11330 fill_opacity: float = 1, | |
| 11331 oc: int = 0, | |
| 11332 ) -> Point: | |
| 11333 """Draw a zigzag line from point p1 to point p2.""" | |
| 11334 img = page.new_shape() | |
| 11335 p = img.draw_zigzag(Point(p1), Point(p2), breadth=breadth) | |
| 11336 img.finish( | |
| 11337 color=color, | |
| 11338 dashes=dashes, | |
| 11339 width=width, | |
| 11340 closePath=False, | |
| 11341 lineCap=lineCap, | |
| 11342 lineJoin=lineJoin, | |
| 11343 morph=morph, | |
| 11344 stroke_opacity=stroke_opacity, | |
| 11345 fill_opacity=fill_opacity, | |
| 11346 oc=oc, | |
| 11347 ) | |
| 11348 img.commit(overlay) | |
| 11349 | |
| 11350 return p | |
| 8899 | 11351 |
| 8900 def extend_textpage(self, tpage, flags=0, matrix=None): | 11352 def extend_textpage(self, tpage, flags=0, matrix=None): |
| 8901 page = self.this | 11353 page = self.this |
| 8902 tp = tpage.this | 11354 tp = tpage.this |
| 8903 assert isinstance( tp, mupdf.FzStextPage) | 11355 assert isinstance( tp, mupdf.FzStextPage) |
| 9217 paths.append(npath) | 11669 paths.append(npath) |
| 9218 | 11670 |
| 9219 val = None | 11671 val = None |
| 9220 return paths | 11672 return paths |
| 9221 | 11673 |
| 11674 def get_image_info( | |
| 11675 page: 'Page', | |
| 11676 hashes: bool = False, | |
| 11677 xrefs: bool = False | |
| 11678 ) -> list: | |
| 11679 """Extract image information only from a pymupdf.TextPage. | |
| 11680 | |
| 11681 Args: | |
| 11682 hashes: (bool) include MD5 hash for each image. | |
| 11683 xrefs: (bool) try to find the xref for each image. Sets hashes to true. | |
| 11684 """ | |
| 11685 doc = page.parent | |
| 11686 if xrefs and doc.is_pdf: | |
| 11687 hashes = True | |
| 11688 if not doc.is_pdf: | |
| 11689 xrefs = False | |
| 11690 imginfo = getattr(page, "_image_info", None) | |
| 11691 if imginfo and not xrefs: | |
| 11692 return imginfo | |
| 11693 if not imginfo: | |
| 11694 tp = page.get_textpage(flags=TEXT_PRESERVE_IMAGES) | |
| 11695 imginfo = tp.extractIMGINFO(hashes=hashes) | |
| 11696 del tp | |
| 11697 if hashes: | |
| 11698 page._image_info = imginfo | |
| 11699 if not xrefs or not doc.is_pdf: | |
| 11700 return imginfo | |
| 11701 imglist = page.get_images() | |
| 11702 digests = {} | |
| 11703 for item in imglist: | |
| 11704 xref = item[0] | |
| 11705 pix = Pixmap(doc, xref) | |
| 11706 digests[pix.digest] = xref | |
| 11707 del pix | |
| 11708 for i in range(len(imginfo)): | |
| 11709 item = imginfo[i] | |
| 11710 xref = digests.get(item["digest"], 0) | |
| 11711 item["xref"] = xref | |
| 11712 imginfo[i] = item | |
| 11713 return imginfo | |
| 11714 | |
| 11715 def get_image_rects(page: 'Page', name, transform=False) -> list: | |
| 11716 """Return list of image positions on a page. | |
| 11717 | |
| 11718 Args: | |
| 11719 name: (str, list, int) image identification. May be reference name, an | |
| 11720 item of the page's image list or an xref. | |
| 11721 transform: (bool) whether to also return the transformation matrix. | |
| 11722 Returns: | |
| 11723 A list of pymupdf.Rect objects or tuples of (pymupdf.Rect, pymupdf.Matrix) | |
| 11724 for all image locations on the page. | |
| 11725 """ | |
| 11726 if type(name) in (list, tuple): | |
| 11727 xref = name[0] | |
| 11728 elif type(name) is int: | |
| 11729 xref = name | |
| 11730 else: | |
| 11731 imglist = [i for i in page.get_images() if i[7] == name] | |
| 11732 if imglist == []: | |
| 11733 raise ValueError("bad image name") | |
| 11734 elif len(imglist) != 1: | |
| 11735 raise ValueError("multiple image names found") | |
| 11736 xref = imglist[0][0] | |
| 11737 pix = Pixmap(page.parent, xref) # make pixmap of the image to compute MD5 | |
| 11738 digest = pix.digest | |
| 11739 del pix | |
| 11740 infos = page.get_image_info(hashes=True) | |
| 11741 if not transform: | |
| 11742 bboxes = [Rect(im["bbox"]) for im in infos if im["digest"] == digest] | |
| 11743 else: | |
| 11744 bboxes = [ | |
| 11745 (Rect(im["bbox"]), Matrix(im["transform"])) | |
| 11746 for im in infos | |
| 11747 if im["digest"] == digest | |
| 11748 ] | |
| 11749 return bboxes | |
| 11750 | |
| 11751 def get_label(page): | |
| 11752 """Return the label for this PDF page. | |
| 11753 | |
| 11754 Args: | |
| 11755 page: page object. | |
| 11756 Returns: | |
| 11757 The label (str) of the page. Errors return an empty string. | |
| 11758 """ | |
| 11759 # Jorj McKie, 2021-01-06 | |
| 11760 | |
| 11761 labels = page.parent._get_page_labels() | |
| 11762 if not labels: | |
| 11763 return "" | |
| 11764 labels.sort() | |
| 11765 return utils.get_label_pno(page.number, labels) | |
| 11766 | |
| 11767 def get_links(page: 'Page') -> list: | |
| 11768 """Create a list of all links contained in a PDF page. | |
| 11769 | |
| 11770 Notes: | |
| 11771 see PyMuPDF ducmentation for details. | |
| 11772 """ | |
| 11773 | |
| 11774 CheckParent(page) | |
| 11775 ln = page.first_link | |
| 11776 links = [] | |
| 11777 while ln: | |
| 11778 nl = utils.getLinkDict(ln, page.parent) | |
| 11779 links.append(nl) | |
| 11780 ln = ln.next | |
| 11781 if links != [] and page.parent.is_pdf: | |
| 11782 linkxrefs = [x for x in | |
| 11783 #page.annot_xrefs() | |
| 11784 JM_get_annot_xref_list2(page) | |
| 11785 if x[1] == mupdf.PDF_ANNOT_LINK # pylint: disable=no-member | |
| 11786 ] | |
| 11787 if len(linkxrefs) == len(links): | |
| 11788 for i in range(len(linkxrefs)): | |
| 11789 links[i]["xref"] = linkxrefs[i][0] | |
| 11790 links[i]["id"] = linkxrefs[i][2] | |
| 11791 return links | |
| 11792 | |
| 11793 def get_pixmap( | |
| 11794 page: 'Page', | |
| 11795 *, | |
| 11796 matrix: matrix_like=Identity, | |
| 11797 dpi=None, | |
| 11798 colorspace: Colorspace=None, | |
| 11799 clip: rect_like=None, | |
| 11800 alpha: bool=False, | |
| 11801 annots: bool=True, | |
| 11802 ) -> 'Pixmap': | |
| 11803 """Create pixmap of page. | |
| 11804 | |
| 11805 Keyword args: | |
| 11806 matrix: Matrix for transformation (default: Identity). | |
| 11807 dpi: desired dots per inch. If given, matrix is ignored. | |
| 11808 colorspace: (str/Colorspace) cmyk, rgb, gray - case ignored, default csRGB. | |
| 11809 clip: (irect-like) restrict rendering to this area. | |
| 11810 alpha: (bool) whether to include alpha channel | |
| 11811 annots: (bool) whether to also render annotations | |
| 11812 """ | |
| 11813 if colorspace is None: | |
| 11814 colorspace = csRGB | |
| 11815 if dpi: | |
| 11816 zoom = dpi / 72 | |
| 11817 matrix = Matrix(zoom, zoom) | |
| 11818 | |
| 11819 if type(colorspace) is str: | |
| 11820 if colorspace.upper() == "GRAY": | |
| 11821 colorspace = csGRAY | |
| 11822 elif colorspace.upper() == "CMYK": | |
| 11823 colorspace = csCMYK | |
| 11824 else: | |
| 11825 colorspace = csRGB | |
| 11826 if colorspace.n not in (1, 3, 4): | |
| 11827 raise ValueError("unsupported colorspace") | |
| 11828 | |
| 11829 dl = page.get_displaylist(annots=annots) | |
| 11830 pix = dl.get_pixmap(matrix=matrix, colorspace=colorspace, alpha=alpha, clip=clip) | |
| 11831 dl = None | |
| 11832 if dpi: | |
| 11833 pix.set_dpi(dpi, dpi) | |
| 11834 return pix | |
| 11835 | |
| 9222 def remove_rotation(self): | 11836 def remove_rotation(self): |
| 9223 """Set page rotation to 0 while maintaining visual appearance.""" | 11837 """Set page rotation to 0 while maintaining visual appearance.""" |
| 9224 rot = self.rotation # normalized rotation value | 11838 rot = self.rotation # normalized rotation value |
| 9225 if rot == 0: | 11839 if rot == 0: |
| 9226 return Identity # nothing to do | 11840 return Identity # nothing to do |
| 9502 rc = tp.extractTextbox(rect) | 12116 rc = tp.extractTextbox(rect) |
| 9503 if textpage is None: | 12117 if textpage is None: |
| 9504 del tp | 12118 del tp |
| 9505 return rc | 12119 return rc |
| 9506 | 12120 |
| 12121 def get_text(self, *args, **kwargs): | |
| 12122 return utils.get_text(self, *args, **kwargs) | |
| 12123 | |
| 12124 def get_text_blocks(self, *args, **kwargs): | |
| 12125 return utils.get_text_blocks(self, *args, **kwargs) | |
| 12126 | |
| 12127 def get_text_selection(self, *args, **kwargs): | |
| 12128 return utils.get_text_selection(self, *args, **kwargs) | |
| 12129 | |
| 12130 def get_text_words(self, *args, **kwargs): | |
| 12131 return utils.get_text_words(self, *args, **kwargs) | |
| 12132 | |
| 12133 def get_textpage_ocr(self, *args, **kwargs): | |
| 12134 return utils.get_textpage_ocr(self, *args, **kwargs) | |
| 12135 | |
| 9507 def get_textpage(self, clip: rect_like = None, flags: int = 0, matrix=None) -> "TextPage": | 12136 def get_textpage(self, clip: rect_like = None, flags: int = 0, matrix=None) -> "TextPage": |
| 9508 CheckParent(self) | 12137 CheckParent(self) |
| 9509 if matrix is None: | 12138 if matrix is None: |
| 9510 matrix = Matrix(1, 1) | 12139 matrix = Matrix(1, 1) |
| 9511 old_rotation = self.rotation | 12140 old_rotation = self.rotation |
| 9626 return xref # we are done | 12255 return xref # we are done |
| 9627 | 12256 |
| 9628 # need to create document font info | 12257 # need to create document font info |
| 9629 doc.get_char_widths(xref, fontdict=fontdict) | 12258 doc.get_char_widths(xref, fontdict=fontdict) |
| 9630 return xref | 12259 return xref |
| 12260 | |
| 12261 def insert_htmlbox( | |
| 12262 page, | |
| 12263 rect, | |
| 12264 text, | |
| 12265 *, | |
| 12266 css=None, | |
| 12267 scale_low=0, | |
| 12268 archive=None, | |
| 12269 rotate=0, | |
| 12270 oc=0, | |
| 12271 opacity=1, | |
| 12272 overlay=True, | |
| 12273 _scale_word_width=True, | |
| 12274 _verbose=False, | |
| 12275 ) -> tuple: | |
| 12276 """Insert text with optional HTML tags and stylings into a rectangle. | |
| 12277 | |
| 12278 Args: | |
| 12279 rect: (rect-like) rectangle into which the text should be placed. | |
| 12280 text: (str) text with optional HTML tags and stylings. | |
| 12281 css: (str) CSS styling commands. | |
| 12282 scale_low: (float) force-fit content by scaling it down. Must be in | |
| 12283 range [0, 1]. If 1, no scaling will take place. If 0, arbitrary | |
| 12284 down-scaling is acceptable. A value of 0.1 would mean that content | |
| 12285 may be scaled down by at most 90%. | |
| 12286 archive: Archive object pointing to locations of used fonts or images | |
| 12287 rotate: (int) rotate the text in the box by a multiple of 90 degrees. | |
| 12288 oc: (int) the xref of an OCG / OCMD (Optional Content). | |
| 12289 opacity: (float) set opacity of inserted content. | |
| 12290 overlay: (bool) put text on top of page content. | |
| 12291 _scale_word_width: internal, for testing only. | |
| 12292 _verbose: internal, for testing only. | |
| 12293 Returns: | |
| 12294 A tuple of floats (spare_height, scale). | |
| 12295 spare_height: | |
| 12296 The height of the remaining space in <rect> below the | |
| 12297 text, or -1 if we failed to fit. | |
| 12298 scale: | |
| 12299 The scaling required; `0 < scale <= 1`. | |
| 12300 Will be less than `scale_low` if we failed to fit. | |
| 12301 """ | |
| 12302 # normalize rotation angle | |
| 12303 if not rotate % 90 == 0: | |
| 12304 raise ValueError("bad rotation angle") | |
| 12305 while rotate < 0: | |
| 12306 rotate += 360 | |
| 12307 while rotate >= 360: | |
| 12308 rotate -= 360 | |
| 12309 | |
| 12310 if not 0 <= scale_low <= 1: | |
| 12311 raise ValueError("'scale_low' must be in [0, 1]") | |
| 12312 | |
| 12313 if css is None: | |
| 12314 css = "" | |
| 12315 | |
| 12316 rect = Rect(rect) | |
| 12317 if rotate in (90, 270): | |
| 12318 temp_rect = Rect(0, 0, rect.height, rect.width) | |
| 12319 else: | |
| 12320 temp_rect = Rect(0, 0, rect.width, rect.height) | |
| 12321 | |
| 12322 # use a small border by default | |
| 12323 mycss = "body {margin:1px;}" + css # append user CSS | |
| 12324 | |
| 12325 # either make a story, or accept a given one | |
| 12326 if isinstance(text, str): # if a string, convert to a Story | |
| 12327 story = Story(html=text, user_css=mycss, archive=archive) | |
| 12328 elif isinstance(text, Story): | |
| 12329 story = text | |
| 12330 else: | |
| 12331 raise ValueError("'text' must be a string or a Story") | |
| 12332 | |
| 12333 # ---------------------------------------------------------------- | |
| 12334 # Find a scaling factor that lets our story fit in. Instead of scaling | |
| 12335 # the text smaller, we instead look at how much bigger the rect needs | |
| 12336 # to be to fit the text, then reverse the scaling to get how much we | |
| 12337 # need to scale down the text. | |
| 12338 # ---------------------------------------------------------------- | |
| 12339 rect_scale_max = None if scale_low == 0 else 1 / scale_low | |
| 12340 | |
| 12341 fit = story.fit_scale( | |
| 12342 temp_rect, | |
| 12343 scale_min=1, | |
| 12344 scale_max=rect_scale_max, | |
| 12345 flags=mupdf.FZ_PLACE_STORY_FLAG_NO_OVERFLOW if _scale_word_width else 0, | |
| 12346 verbose=_verbose, | |
| 12347 ) | |
| 12348 | |
| 12349 if not fit.big_enough: # there was no fit | |
| 12350 scale = 1 / fit.parameter | |
| 12351 return (-1, scale) | |
| 12352 | |
| 12353 # fit.filled is a tuple; we convert it in place to a Rect for | |
| 12354 # convenience. (fit.rect is already a Rect.) | |
| 12355 fit.filled = Rect(fit.filled) | |
| 12356 assert (fit.rect.x0, fit.rect.y0) == (0, 0) | |
| 12357 assert (fit.filled.x0, fit.filled.y0) == (0, 0) | |
| 12358 | |
| 12359 scale = 1 / fit.parameter | |
| 12360 assert scale >= scale_low, f'{scale_low=} {scale=}' | |
| 12361 | |
| 12362 spare_height = max((fit.rect.y1 - fit.filled.y1) * scale, 0) | |
| 12363 | |
| 12364 def rect_function(*args): | |
| 12365 return fit.rect, fit.rect, None | |
| 12366 | |
| 12367 # draw story on temp PDF page | |
| 12368 doc = story.write_with_links(rect_function) | |
| 12369 | |
| 12370 # Insert opacity if requested. | |
| 12371 # For this, we prepend a command to the /Contents. | |
| 12372 if 0 <= opacity < 1: | |
| 12373 tpage = doc[0] # load page | |
| 12374 # generate /ExtGstate for the page | |
| 12375 alp0 = tpage._set_opacity(CA=opacity, ca=opacity) | |
| 12376 s = f"/{alp0} gs\n" # generate graphic state command | |
| 12377 TOOLS._insert_contents(tpage, s.encode(), 0) | |
| 12378 | |
| 12379 # put result in target page | |
| 12380 page.show_pdf_page(rect, doc, 0, rotate=rotate, oc=oc, overlay=overlay) | |
| 12381 | |
| 12382 # ------------------------------------------------------------------------- | |
| 12383 # re-insert links in target rect (show_pdf_page cannot copy annotations) | |
| 12384 # ------------------------------------------------------------------------- | |
| 12385 # scaled center point of fit.rect | |
| 12386 mp1 = (fit.rect.tl + fit.rect.br) / 2 * scale | |
| 12387 | |
| 12388 # center point of target rect | |
| 12389 mp2 = (rect.tl + rect.br) / 2 | |
| 12390 | |
| 12391 # compute link positioning matrix: | |
| 12392 # - move center of scaled-down fit.rect to (0,0) | |
| 12393 # - rotate | |
| 12394 # - move (0,0) to center of target rect | |
| 12395 mat = ( | |
| 12396 Matrix(scale, 0, 0, scale, -mp1.x, -mp1.y) | |
| 12397 * Matrix(-rotate) | |
| 12398 * Matrix(1, 0, 0, 1, mp2.x, mp2.y) | |
| 12399 ) | |
| 12400 | |
| 12401 # copy over links | |
| 12402 for link in doc[0].get_links(): | |
| 12403 link["from"] *= mat | |
| 12404 page.insert_link(link) | |
| 12405 | |
| 12406 return spare_height, scale | |
| 12407 | |
| 12408 def insert_image( | |
| 12409 page, | |
| 12410 rect, | |
| 12411 *, | |
| 12412 alpha=-1, | |
| 12413 filename=None, | |
| 12414 height=0, | |
| 12415 keep_proportion=True, | |
| 12416 mask=None, | |
| 12417 oc=0, | |
| 12418 overlay=True, | |
| 12419 pixmap=None, | |
| 12420 rotate=0, | |
| 12421 stream=None, | |
| 12422 width=0, | |
| 12423 xref=0, | |
| 12424 ): | |
| 12425 """Insert an image for display in a rectangle. | |
| 12426 | |
| 12427 Args: | |
| 12428 rect: (rect_like) position of image on the page. | |
| 12429 alpha: (int, optional) set to 0 if image has no transparency. | |
| 12430 filename: (str, Path, file object) image filename. | |
| 12431 height: (int) | |
| 12432 keep_proportion: (bool) keep width / height ratio (default). | |
| 12433 mask: (bytes, optional) image consisting of alpha values to use. | |
| 12434 oc: (int) xref of OCG or OCMD to declare as Optional Content. | |
| 12435 overlay: (bool) put in foreground (default) or background. | |
| 12436 pixmap: (pymupdf.Pixmap) use this as image. | |
| 12437 rotate: (int) rotate by 0, 90, 180 or 270 degrees. | |
| 12438 stream: (bytes) use this as image. | |
| 12439 width: (int) | |
| 12440 xref: (int) use this as image. | |
| 12441 | |
| 12442 'page' and 'rect' are positional, all other parameters are keywords. | |
| 12443 | |
| 12444 If 'xref' is given, that image is used. Other input options are ignored. | |
| 12445 Else, exactly one of pixmap, stream or filename must be given. | |
| 12446 | |
| 12447 'alpha=0' for non-transparent images improves performance significantly. | |
| 12448 Affects stream and filename only. | |
| 12449 | |
| 12450 Optimum transparent insertions are possible by using filename / stream in | |
| 12451 conjunction with a 'mask' image of alpha values. | |
| 12452 | |
| 12453 Returns: | |
| 12454 xref (int) of inserted image. Re-use as argument for multiple insertions. | |
| 12455 """ | |
| 12456 CheckParent(page) | |
| 12457 doc = page.parent | |
| 12458 if not doc.is_pdf: | |
| 12459 raise ValueError("is no PDF") | |
| 12460 | |
| 12461 if xref == 0 and (bool(filename) + bool(stream) + bool(pixmap) != 1): | |
| 12462 raise ValueError("xref=0 needs exactly one of filename, pixmap, stream") | |
| 12463 | |
| 12464 if filename: | |
| 12465 if type(filename) is str: | |
| 12466 pass | |
| 12467 elif hasattr(filename, "absolute"): | |
| 12468 filename = str(filename) | |
| 12469 elif hasattr(filename, "name"): | |
| 12470 filename = filename.name | |
| 12471 else: | |
| 12472 raise ValueError("bad filename") | |
| 12473 | |
| 12474 if filename and not os.path.exists(filename): | |
| 12475 raise FileNotFoundError("No such file: '%s'" % filename) | |
| 12476 elif stream and type(stream) not in (bytes, bytearray, io.BytesIO): | |
| 12477 raise ValueError("stream must be bytes-like / BytesIO") | |
| 12478 elif pixmap and type(pixmap) is not Pixmap: | |
| 12479 raise ValueError("pixmap must be a Pixmap") | |
| 12480 if mask and not (stream or filename): | |
| 12481 raise ValueError("mask requires stream or filename") | |
| 12482 if mask and type(mask) not in (bytes, bytearray, io.BytesIO): | |
| 12483 raise ValueError("mask must be bytes-like / BytesIO") | |
| 12484 while rotate < 0: | |
| 12485 rotate += 360 | |
| 12486 while rotate >= 360: | |
| 12487 rotate -= 360 | |
| 12488 if rotate not in (0, 90, 180, 270): | |
| 12489 raise ValueError("bad rotate value") | |
| 12490 | |
| 12491 r = Rect(rect) | |
| 12492 if r.is_empty or r.is_infinite: | |
| 12493 raise ValueError("rect must be finite and not empty") | |
| 12494 clip = r * ~page.transformation_matrix | |
| 12495 | |
| 12496 # Create a unique image reference name. | |
| 12497 ilst = [i[7] for i in doc.get_page_images(page.number)] | |
| 12498 ilst += [i[1] for i in doc.get_page_xobjects(page.number)] | |
| 12499 ilst += [i[4] for i in doc.get_page_fonts(page.number)] | |
| 12500 n = "fzImg" # 'pymupdf image' | |
| 12501 i = 0 | |
| 12502 _imgname = n + "0" # first name candidate | |
| 12503 while _imgname in ilst: | |
| 12504 i += 1 | |
| 12505 _imgname = n + str(i) # try new name | |
| 12506 | |
| 12507 if overlay: | |
| 12508 page.wrap_contents() # ensure a balanced graphics state | |
| 12509 digests = doc.InsertedImages | |
| 12510 xref, digests = page._insert_image( | |
| 12511 filename=filename, | |
| 12512 pixmap=pixmap, | |
| 12513 stream=stream, | |
| 12514 imask=mask, | |
| 12515 clip=clip, | |
| 12516 overlay=overlay, | |
| 12517 oc=oc, | |
| 12518 xref=xref, | |
| 12519 rotate=rotate, | |
| 12520 keep_proportion=keep_proportion, | |
| 12521 width=width, | |
| 12522 height=height, | |
| 12523 alpha=alpha, | |
| 12524 _imgname=_imgname, | |
| 12525 digests=digests, | |
| 12526 ) | |
| 12527 if digests is not None: | |
| 12528 doc.InsertedImages = digests | |
| 12529 | |
| 12530 return xref | |
| 12531 | |
| 12532 def insert_link(page: 'Page', lnk: dict, mark: bool = True) -> None: | |
| 12533 """Insert a new link for the current page.""" | |
| 12534 CheckParent(page) | |
| 12535 annot = utils.getLinkText(page, lnk) | |
| 12536 if annot == "": | |
| 12537 raise ValueError("link kind not supported") | |
| 12538 page._addAnnot_FromString((annot,)) | |
| 12539 | |
| 12540 def insert_text( | |
| 12541 page: 'Page', | |
| 12542 point: point_like, | |
| 12543 text: typing.Union[str, list], | |
| 12544 *, | |
| 12545 fontsize: float = 11, | |
| 12546 lineheight: OptFloat = None, | |
| 12547 fontname: str = "helv", | |
| 12548 fontfile: OptStr = None, | |
| 12549 set_simple: int = 0, | |
| 12550 encoding: int = 0, | |
| 12551 color: OptSeq = None, | |
| 12552 fill: OptSeq = None, | |
| 12553 border_width: float = 0.05, | |
| 12554 miter_limit: float = 1, | |
| 12555 render_mode: int = 0, | |
| 12556 rotate: int = 0, | |
| 12557 morph: OptSeq = None, | |
| 12558 overlay: bool = True, | |
| 12559 stroke_opacity: float = 1, | |
| 12560 fill_opacity: float = 1, | |
| 12561 oc: int = 0, | |
| 12562 ): | |
| 12563 | |
| 12564 img = page.new_shape() | |
| 12565 rc = img.insert_text( | |
| 12566 point, | |
| 12567 text, | |
| 12568 fontsize=fontsize, | |
| 12569 lineheight=lineheight, | |
| 12570 fontname=fontname, | |
| 12571 fontfile=fontfile, | |
| 12572 set_simple=set_simple, | |
| 12573 encoding=encoding, | |
| 12574 color=color, | |
| 12575 fill=fill, | |
| 12576 border_width=border_width, | |
| 12577 render_mode=render_mode, | |
| 12578 miter_limit=miter_limit, | |
| 12579 rotate=rotate, | |
| 12580 morph=morph, | |
| 12581 stroke_opacity=stroke_opacity, | |
| 12582 fill_opacity=fill_opacity, | |
| 12583 oc=oc, | |
| 12584 ) | |
| 12585 if rc >= 0: | |
| 12586 img.commit(overlay) | |
| 12587 return rc | |
| 12588 | |
| 12589 def insert_textbox( | |
| 12590 page: 'Page', | |
| 12591 rect: rect_like, | |
| 12592 buffer: typing.Union[str, list], | |
| 12593 *, | |
| 12594 fontname: str = "helv", | |
| 12595 fontfile: OptStr = None, | |
| 12596 set_simple: int = 0, | |
| 12597 encoding: int = 0, | |
| 12598 fontsize: float = 11, | |
| 12599 lineheight: OptFloat = None, | |
| 12600 color: OptSeq = None, | |
| 12601 fill: OptSeq = None, | |
| 12602 expandtabs: int = 1, | |
| 12603 align: int = 0, | |
| 12604 rotate: int = 0, | |
| 12605 render_mode: int = 0, | |
| 12606 miter_limit: float = 1, | |
| 12607 border_width: float = 0.05, | |
| 12608 morph: OptSeq = None, | |
| 12609 overlay: bool = True, | |
| 12610 stroke_opacity: float = 1, | |
| 12611 fill_opacity: float = 1, | |
| 12612 oc: int = 0, | |
| 12613 ) -> float: | |
| 12614 """Insert text into a given rectangle. | |
| 12615 | |
| 12616 Notes: | |
| 12617 Creates a Shape object, uses its same-named method and commits it. | |
| 12618 Parameters: | |
| 12619 rect: (rect-like) area to use for text. | |
| 12620 buffer: text to be inserted | |
| 12621 fontname: a Base-14 font, font name or '/name' | |
| 12622 fontfile: name of a font file | |
| 12623 fontsize: font size | |
| 12624 lineheight: overwrite the font property | |
| 12625 color: RGB color triple | |
| 12626 expandtabs: handles tabulators with string function | |
| 12627 align: left, center, right, justified | |
| 12628 rotate: 0, 90, 180, or 270 degrees | |
| 12629 morph: morph box with a matrix and a fixpoint | |
| 12630 overlay: put text in foreground or background | |
| 12631 Returns: | |
| 12632 unused or deficit rectangle area (float) | |
| 12633 """ | |
| 12634 img = page.new_shape() | |
| 12635 rc = img.insert_textbox( | |
| 12636 rect, | |
| 12637 buffer, | |
| 12638 fontsize=fontsize, | |
| 12639 lineheight=lineheight, | |
| 12640 fontname=fontname, | |
| 12641 fontfile=fontfile, | |
| 12642 set_simple=set_simple, | |
| 12643 encoding=encoding, | |
| 12644 color=color, | |
| 12645 fill=fill, | |
| 12646 expandtabs=expandtabs, | |
| 12647 render_mode=render_mode, | |
| 12648 miter_limit=miter_limit, | |
| 12649 border_width=border_width, | |
| 12650 align=align, | |
| 12651 rotate=rotate, | |
| 12652 morph=morph, | |
| 12653 stroke_opacity=stroke_opacity, | |
| 12654 fill_opacity=fill_opacity, | |
| 12655 oc=oc, | |
| 12656 ) | |
| 12657 if rc >= 0: | |
| 12658 img.commit(overlay) | |
| 12659 return rc | |
| 9631 | 12660 |
| 9632 @property | 12661 @property |
| 9633 def is_wrapped(self): | 12662 def is_wrapped(self): |
| 9634 """Check if /Contents is in a balanced graphics state.""" | 12663 """Check if /Contents is in a balanced graphics state.""" |
| 9635 return self._count_q_balance() == (0, 0) | 12664 return self._count_q_balance() == (0, 0) |
| 9739 | 12768 |
| 9740 @property | 12769 @property |
| 9741 def mediabox_size(self): | 12770 def mediabox_size(self): |
| 9742 return Point(self.mediabox.x1, self.mediabox.y1) | 12771 return Point(self.mediabox.x1, self.mediabox.y1) |
| 9743 | 12772 |
| 12773 def new_shape(self): | |
| 12774 return Shape(self) | |
| 12775 | |
| 9744 #@property | 12776 #@property |
| 9745 #def parent( self): | 12777 #def parent( self): |
| 9746 # assert self._parent | 12778 # assert self._parent |
| 9747 # if self._parent: | 12779 # if self._parent: |
| 9748 # return self._parent | 12780 # return self._parent |
| 9757 CheckParent(self) | 12789 CheckParent(self) |
| 9758 doc = self.parent | 12790 doc = self.parent |
| 9759 page = doc.reload_page(self) | 12791 page = doc.reload_page(self) |
| 9760 # fixme this looks wrong. | 12792 # fixme this looks wrong. |
| 9761 self.this = page | 12793 self.this = page |
| 12794 | |
| 12795 def replace_image( | |
| 12796 page: 'Page', | |
| 12797 xref: int, | |
| 12798 *, | |
| 12799 filename=None, | |
| 12800 pixmap=None, | |
| 12801 stream=None, | |
| 12802 ): | |
| 12803 """Replace the image referred to by xref. | |
| 12804 | |
| 12805 Replace the image by changing the object definition stored under xref. This | |
| 12806 will leave the pages appearance instructions intact, so the new image is | |
| 12807 being displayed with the same bbox, rotation etc. | |
| 12808 By providing a small fully transparent image, an effect as if the image had | |
| 12809 been deleted can be achieved. | |
| 12810 A typical use may include replacing large images by a smaller version, | |
| 12811 e.g. with a lower resolution or graylevel instead of colored. | |
| 12812 | |
| 12813 Args: | |
| 12814 xref: the xref of the image to replace. | |
| 12815 filename, pixmap, stream: exactly one of these must be provided. The | |
| 12816 meaning being the same as in Page.insert_image. | |
| 12817 """ | |
| 12818 doc = page.parent # the owning document | |
| 12819 if not doc.xref_is_image(xref): | |
| 12820 raise ValueError("xref not an image") # insert new image anywhere in page | |
| 12821 if bool(filename) + bool(stream) + bool(pixmap) != 1: | |
| 12822 raise ValueError("Exactly one of filename/stream/pixmap must be given") | |
| 12823 new_xref = page.insert_image( | |
| 12824 page.rect, filename=filename, stream=stream, pixmap=pixmap | |
| 12825 ) | |
| 12826 doc.xref_copy(new_xref, xref) # copy over new to old | |
| 12827 last_contents_xref = page.get_contents()[-1] | |
| 12828 # new image insertion has created a new /Contents source, | |
| 12829 # which we will set to spaces now | |
| 12830 doc.update_stream(last_contents_xref, b" ") | |
| 12831 page._image_info = None # clear cache of extracted image information | |
| 9762 | 12832 |
| 9763 @property | 12833 @property |
| 9764 def rotation(self): | 12834 def rotation(self): |
| 9765 """Page rotation.""" | 12835 """Page rotation.""" |
| 9766 CheckParent(self) | 12836 CheckParent(self) |
| 9778 """Run page through a device. | 12848 """Run page through a device. |
| 9779 dw: DeviceWrapper | 12849 dw: DeviceWrapper |
| 9780 """ | 12850 """ |
| 9781 CheckParent(self) | 12851 CheckParent(self) |
| 9782 mupdf.fz_run_page(self.this, dw.device, JM_matrix_from_py(m), mupdf.FzCookie()) | 12852 mupdf.fz_run_page(self.this, dw.device, JM_matrix_from_py(m), mupdf.FzCookie()) |
| 12853 | |
| 12854 def search_for( | |
| 12855 page, | |
| 12856 text, | |
| 12857 *, | |
| 12858 clip=None, | |
| 12859 quads=False, | |
| 12860 flags=None, | |
| 12861 textpage=None, | |
| 12862 ) -> list: | |
| 12863 """Search for a string on a page. | |
| 12864 | |
| 12865 Args: | |
| 12866 text: string to be searched for | |
| 12867 clip: restrict search to this rectangle | |
| 12868 quads: (bool) return quads instead of rectangles | |
| 12869 flags: bit switches, default: join hyphened words | |
| 12870 textpage: a pre-created pymupdf.TextPage | |
| 12871 Returns: | |
| 12872 a list of rectangles or quads, each containing one occurrence. | |
| 12873 """ | |
| 12874 if flags is None: | |
| 12875 flags=(0 | |
| 12876 | TEXT_DEHYPHENATE | |
| 12877 | TEXT_PRESERVE_WHITESPACE | |
| 12878 | TEXT_PRESERVE_LIGATURES | |
| 12879 | TEXT_MEDIABOX_CLIP | |
| 12880 ) | |
| 12881 if clip is not None: | |
| 12882 clip = Rect(clip) | |
| 12883 | |
| 12884 CheckParent(page) | |
| 12885 tp = textpage | |
| 12886 if tp is None: | |
| 12887 tp = page.get_textpage(clip=clip, flags=flags) # create pymupdf.TextPage | |
| 12888 elif getattr(tp, "parent") != page: | |
| 12889 raise ValueError("not a textpage of this page") | |
| 12890 rlist = tp.search(text, quads=quads) | |
| 12891 if textpage is None: | |
| 12892 del tp | |
| 12893 return rlist | |
| 9783 | 12894 |
| 9784 def set_artbox(self, rect): | 12895 def set_artbox(self, rect): |
| 9785 """Set the ArtBox.""" | 12896 """Set the ArtBox.""" |
| 9786 return self._set_pagebox("ArtBox", rect) | 12897 return self._set_pagebox("ArtBox", rect) |
| 9787 | 12898 |
| 9846 | 12957 |
| 9847 def set_trimbox(self, rect): | 12958 def set_trimbox(self, rect): |
| 9848 """Set the TrimBox.""" | 12959 """Set the TrimBox.""" |
| 9849 return self._set_pagebox("TrimBox", rect) | 12960 return self._set_pagebox("TrimBox", rect) |
| 9850 | 12961 |
| 12962 def show_pdf_page( | |
| 12963 page, | |
| 12964 rect, | |
| 12965 docsrc, | |
| 12966 pno=0, | |
| 12967 keep_proportion=True, | |
| 12968 overlay=True, | |
| 12969 oc=0, | |
| 12970 rotate=0, | |
| 12971 clip=None, | |
| 12972 ) -> int: | |
| 12973 """Show page number 'pno' of PDF 'docsrc' in rectangle 'rect'. | |
| 12974 | |
| 12975 Args: | |
| 12976 rect: (rect-like) where to place the source image | |
| 12977 docsrc: (document) source PDF | |
| 12978 pno: (int) source page number | |
| 12979 keep_proportion: (bool) do not change width-height-ratio | |
| 12980 overlay: (bool) put in foreground | |
| 12981 oc: (xref) make visibility dependent on this OCG / OCMD (which must be defined in the target PDF) | |
| 12982 rotate: (int) degrees (multiple of 90) | |
| 12983 clip: (rect-like) part of source page rectangle | |
| 12984 Returns: | |
| 12985 xref of inserted object (for reuse) | |
| 12986 """ | |
| 12987 def calc_matrix(sr, tr, keep=True, rotate=0): | |
| 12988 """Calculate transformation matrix from source to target rect. | |
| 12989 | |
| 12990 Notes: | |
| 12991 The product of four matrices in this sequence: (1) translate correct | |
| 12992 source corner to origin, (2) rotate, (3) scale, (4) translate to | |
| 12993 target's top-left corner. | |
| 12994 Args: | |
| 12995 sr: source rect in PDF (!) coordinate system | |
| 12996 tr: target rect in PDF coordinate system | |
| 12997 keep: whether to keep source ratio of width to height | |
| 12998 rotate: rotation angle in degrees | |
| 12999 Returns: | |
| 13000 Transformation matrix. | |
| 13001 """ | |
| 13002 # calc center point of source rect | |
| 13003 smp = (sr.tl + sr.br) / 2.0 | |
| 13004 # calc center point of target rect | |
| 13005 tmp = (tr.tl + tr.br) / 2.0 | |
| 13006 | |
| 13007 # m moves to (0, 0), then rotates | |
| 13008 m = Matrix(1, 0, 0, 1, -smp.x, -smp.y) * Matrix(rotate) | |
| 13009 | |
| 13010 sr1 = sr * m # resulting source rect to calculate scale factors | |
| 13011 | |
| 13012 fw = tr.width / sr1.width # scale the width | |
| 13013 fh = tr.height / sr1.height # scale the height | |
| 13014 if keep: | |
| 13015 fw = fh = min(fw, fh) # take min if keeping aspect ratio | |
| 13016 | |
| 13017 m *= Matrix(fw, fh) # concat scale matrix | |
| 13018 m *= Matrix(1, 0, 0, 1, tmp.x, tmp.y) # concat move to target center | |
| 13019 return JM_TUPLE(m) | |
| 13020 | |
| 13021 CheckParent(page) | |
| 13022 doc = page.parent | |
| 13023 | |
| 13024 if not doc.is_pdf or not docsrc.is_pdf: | |
| 13025 raise ValueError("is no PDF") | |
| 13026 | |
| 13027 if rect.is_empty or rect.is_infinite: | |
| 13028 raise ValueError("rect must be finite and not empty") | |
| 13029 | |
| 13030 while pno < 0: # support negative page numbers | |
| 13031 pno += docsrc.page_count | |
| 13032 src_page = docsrc[pno] # load source page | |
| 13033 | |
| 13034 tar_rect = rect * ~page.transformation_matrix # target rect in PDF coordinates | |
| 13035 | |
| 13036 src_rect = src_page.rect if not clip else src_page.rect & clip # source rect | |
| 13037 if src_rect.is_empty or src_rect.is_infinite: | |
| 13038 raise ValueError("clip must be finite and not empty") | |
| 13039 src_rect = src_rect * ~src_page.transformation_matrix # ... in PDF coord | |
| 13040 | |
| 13041 matrix = calc_matrix(src_rect, tar_rect, keep=keep_proportion, rotate=rotate) | |
| 13042 | |
| 13043 # list of existing /Form /XObjects | |
| 13044 ilst = [i[1] for i in doc.get_page_xobjects(page.number)] | |
| 13045 ilst += [i[7] for i in doc.get_page_images(page.number)] | |
| 13046 ilst += [i[4] for i in doc.get_page_fonts(page.number)] | |
| 13047 | |
| 13048 # create a name not in that list | |
| 13049 n = "fzFrm" | |
| 13050 i = 0 | |
| 13051 _imgname = n + "0" | |
| 13052 while _imgname in ilst: | |
| 13053 i += 1 | |
| 13054 _imgname = n + str(i) | |
| 13055 | |
| 13056 isrc = docsrc._graft_id # used as key for graftmaps | |
| 13057 if doc._graft_id == isrc: | |
| 13058 raise ValueError("source document must not equal target") | |
| 13059 | |
| 13060 # retrieve / make Graftmap for source PDF | |
| 13061 gmap = doc.Graftmaps.get(isrc, None) | |
| 13062 if gmap is None: | |
| 13063 gmap = Graftmap(doc) | |
| 13064 doc.Graftmaps[isrc] = gmap | |
| 13065 | |
| 13066 # take note of generated xref for automatic reuse | |
| 13067 pno_id = (isrc, pno) # id of docsrc[pno] | |
| 13068 xref = doc.ShownPages.get(pno_id, 0) | |
| 13069 | |
| 13070 if overlay: | |
| 13071 page.wrap_contents() # ensure a balanced graphics state | |
| 13072 xref = page._show_pdf_page( | |
| 13073 src_page, | |
| 13074 overlay=overlay, | |
| 13075 matrix=matrix, | |
| 13076 xref=xref, | |
| 13077 oc=oc, | |
| 13078 clip=src_rect, | |
| 13079 graftmap=gmap, | |
| 13080 _imgname=_imgname, | |
| 13081 ) | |
| 13082 doc.ShownPages[pno_id] = xref | |
| 13083 | |
| 13084 return xref | |
| 13085 | |
| 9851 @property | 13086 @property |
| 9852 def transformation_matrix(self): | 13087 def transformation_matrix(self): |
| 9853 """Page transformation matrix.""" | 13088 """Page transformation matrix.""" |
| 9854 CheckParent(self) | 13089 CheckParent(self) |
| 9855 | 13090 |
| 9873 rect = self._other_box("TrimBox") | 13108 rect = self._other_box("TrimBox") |
| 9874 if rect is None: | 13109 if rect is None: |
| 9875 return self.cropbox | 13110 return self.cropbox |
| 9876 mb = self.mediabox | 13111 mb = self.mediabox |
| 9877 return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1]) | 13112 return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1]) |
| 13113 | |
| 13114 def update_link(page: 'Page', lnk: dict) -> None: | |
| 13115 """Update a link on the current page.""" | |
| 13116 CheckParent(page) | |
| 13117 annot = utils.getLinkText(page, lnk) | |
| 13118 if annot == "": | |
| 13119 raise ValueError("link kind not supported") | |
| 13120 | |
| 13121 page.parent.update_object(lnk["xref"], annot, page=page) | |
| 9878 | 13122 |
| 9879 def widgets(self, types=None): | 13123 def widgets(self, types=None): |
| 9880 """ Generator over the widgets of a page. | 13124 """ Generator over the widgets of a page. |
| 9881 | 13125 |
| 9882 Args: | 13126 Args: |
| 9900 prepend = b"q\n" * push | 13144 prepend = b"q\n" * push |
| 9901 TOOLS._insert_contents(self, prepend, False) | 13145 TOOLS._insert_contents(self, prepend, False) |
| 9902 if pop > 0: # append required pop commands | 13146 if pop > 0: # append required pop commands |
| 9903 append = b"\nQ" * pop + b"\n" | 13147 append = b"\nQ" * pop + b"\n" |
| 9904 TOOLS._insert_contents(self, append, True) | 13148 TOOLS._insert_contents(self, append, True) |
| 13149 | |
| 13150 def write_text( | |
| 13151 page: 'Page', | |
| 13152 rect=None, | |
| 13153 writers=None, | |
| 13154 overlay=True, | |
| 13155 color=None, | |
| 13156 opacity=None, | |
| 13157 keep_proportion=True, | |
| 13158 rotate=0, | |
| 13159 oc=0, | |
| 13160 ) -> None: | |
| 13161 """Write the text of one or more pymupdf.TextWriter objects. | |
| 13162 | |
| 13163 Args: | |
| 13164 rect: target rectangle. If None, the union of the text writers is used. | |
| 13165 writers: one or more pymupdf.TextWriter objects. | |
| 13166 overlay: put in foreground or background. | |
| 13167 keep_proportion: maintain aspect ratio of rectangle sides. | |
| 13168 rotate: arbitrary rotation angle. | |
| 13169 oc: the xref of an optional content object | |
| 13170 """ | |
| 13171 assert isinstance(page, Page) | |
| 13172 if not writers: | |
| 13173 raise ValueError("need at least one pymupdf.TextWriter") | |
| 13174 if type(writers) is TextWriter: | |
| 13175 if rotate == 0 and rect is None: | |
| 13176 writers.write_text(page, opacity=opacity, color=color, overlay=overlay) | |
| 13177 return None | |
| 13178 else: | |
| 13179 writers = (writers,) | |
| 13180 clip = writers[0].text_rect | |
| 13181 textdoc = Document() | |
| 13182 tpage = textdoc.new_page(width=page.rect.width, height=page.rect.height) | |
| 13183 for writer in writers: | |
| 13184 clip |= writer.text_rect | |
| 13185 writer.write_text(tpage, opacity=opacity, color=color) | |
| 13186 if rect is None: | |
| 13187 rect = clip | |
| 13188 page.show_pdf_page( | |
| 13189 rect, | |
| 13190 textdoc, | |
| 13191 0, | |
| 13192 overlay=overlay, | |
| 13193 keep_proportion=keep_proportion, | |
| 13194 rotate=rotate, | |
| 13195 clip=clip, | |
| 13196 oc=oc, | |
| 13197 ) | |
| 13198 textdoc = None | |
| 13199 tpage = None | |
| 9905 | 13200 |
| 9906 @property | 13201 @property |
| 9907 def xref(self): | 13202 def xref(self): |
| 9908 """PDF xref number of page.""" | 13203 """PDF xref number of page.""" |
| 9909 CheckParent(self) | 13204 CheckParent(self) |
| 11501 irect = property(round) | 14796 irect = property(round) |
| 11502 tl = top_left | 14797 tl = top_left |
| 11503 tr = top_right | 14798 tr = top_right |
| 11504 | 14799 |
| 11505 | 14800 |
| 14801 class Shape: | |
| 14802 """Create a new shape.""" | |
| 14803 | |
| 14804 @staticmethod | |
| 14805 def horizontal_angle(C, P): | |
| 14806 """Return the angle to the horizontal for the connection from C to P. | |
| 14807 This uses the arcus sine function and resolves its inherent ambiguity by | |
| 14808 looking up in which quadrant vector S = P - C is located. | |
| 14809 """ | |
| 14810 S = Point(P - C).unit # unit vector 'C' -> 'P' | |
| 14811 alfa = math.asin(abs(S.y)) # absolute angle from horizontal | |
| 14812 if S.x < 0: # make arcsin result unique | |
| 14813 if S.y <= 0: # bottom-left | |
| 14814 alfa = -(math.pi - alfa) | |
| 14815 else: # top-left | |
| 14816 alfa = math.pi - alfa | |
| 14817 else: | |
| 14818 if S.y >= 0: # top-right | |
| 14819 pass | |
| 14820 else: # bottom-right | |
| 14821 alfa = -alfa | |
| 14822 return alfa | |
| 14823 | |
| 14824 def __init__(self, page: Page): | |
| 14825 CheckParent(page) | |
| 14826 self.page = page | |
| 14827 self.doc = page.parent | |
| 14828 if not self.doc.is_pdf: | |
| 14829 raise ValueError("is no PDF") | |
| 14830 self.height = page.mediabox_size.y | |
| 14831 self.width = page.mediabox_size.x | |
| 14832 self.x = page.cropbox_position.x | |
| 14833 self.y = page.cropbox_position.y | |
| 14834 | |
| 14835 self.pctm = page.transformation_matrix # page transf. matrix | |
| 14836 self.ipctm = ~self.pctm # inverted transf. matrix | |
| 14837 | |
| 14838 self.draw_cont = "" | |
| 14839 self.text_cont = "" | |
| 14840 self.totalcont = "" | |
| 14841 self.last_point = None | |
| 14842 self.rect = None | |
| 14843 | |
| 14844 def updateRect(self, x): | |
| 14845 if self.rect is None: | |
| 14846 if len(x) == 2: | |
| 14847 self.rect = Rect(x, x) | |
| 14848 else: | |
| 14849 self.rect = Rect(x) | |
| 14850 | |
| 14851 else: | |
| 14852 if len(x) == 2: | |
| 14853 x = Point(x) | |
| 14854 self.rect.x0 = min(self.rect.x0, x.x) | |
| 14855 self.rect.y0 = min(self.rect.y0, x.y) | |
| 14856 self.rect.x1 = max(self.rect.x1, x.x) | |
| 14857 self.rect.y1 = max(self.rect.y1, x.y) | |
| 14858 else: | |
| 14859 x = Rect(x) | |
| 14860 self.rect.x0 = min(self.rect.x0, x.x0) | |
| 14861 self.rect.y0 = min(self.rect.y0, x.y0) | |
| 14862 self.rect.x1 = max(self.rect.x1, x.x1) | |
| 14863 self.rect.y1 = max(self.rect.y1, x.y1) | |
| 14864 | |
| 14865 def draw_line(self, p1: point_like, p2: point_like) -> Point: | |
| 14866 """Draw a line between two points.""" | |
| 14867 p1 = Point(p1) | |
| 14868 p2 = Point(p2) | |
| 14869 if not (self.last_point == p1): | |
| 14870 self.draw_cont += _format_g(JM_TUPLE(p1 * self.ipctm)) + " m\n" | |
| 14871 self.last_point = p1 | |
| 14872 self.updateRect(p1) | |
| 14873 | |
| 14874 self.draw_cont += _format_g(JM_TUPLE(p2 * self.ipctm)) + " l\n" | |
| 14875 self.updateRect(p2) | |
| 14876 self.last_point = p2 | |
| 14877 return self.last_point | |
| 14878 | |
| 14879 def draw_polyline(self, points: list) -> Point: | |
| 14880 """Draw several connected line segments.""" | |
| 14881 for i, p in enumerate(points): | |
| 14882 if i == 0: | |
| 14883 if not (self.last_point == Point(p)): | |
| 14884 self.draw_cont += _format_g(JM_TUPLE(Point(p) * self.ipctm)) + " m\n" | |
| 14885 self.last_point = Point(p) | |
| 14886 else: | |
| 14887 self.draw_cont += _format_g(JM_TUPLE(Point(p) * self.ipctm)) + " l\n" | |
| 14888 self.updateRect(p) | |
| 14889 | |
| 14890 self.last_point = Point(points[-1]) | |
| 14891 return self.last_point | |
| 14892 | |
| 14893 def draw_bezier( | |
| 14894 self, | |
| 14895 p1: point_like, | |
| 14896 p2: point_like, | |
| 14897 p3: point_like, | |
| 14898 p4: point_like, | |
| 14899 ) -> Point: | |
| 14900 """Draw a standard cubic Bezier curve.""" | |
| 14901 p1 = Point(p1) | |
| 14902 p2 = Point(p2) | |
| 14903 p3 = Point(p3) | |
| 14904 p4 = Point(p4) | |
| 14905 if not (self.last_point == p1): | |
| 14906 self.draw_cont += _format_g(JM_TUPLE(p1 * self.ipctm)) + " m\n" | |
| 14907 args = JM_TUPLE(list(p2 * self.ipctm) + list(p3 * self.ipctm) + list(p4 * self.ipctm)) | |
| 14908 self.draw_cont += _format_g(args) + " c\n" | |
| 14909 self.updateRect(p1) | |
| 14910 self.updateRect(p2) | |
| 14911 self.updateRect(p3) | |
| 14912 self.updateRect(p4) | |
| 14913 self.last_point = p4 | |
| 14914 return self.last_point | |
| 14915 | |
| 14916 def draw_oval(self, tetra: typing.Union[quad_like, rect_like]) -> Point: | |
| 14917 """Draw an ellipse inside a tetrapod.""" | |
| 14918 if len(tetra) != 4: | |
| 14919 raise ValueError("invalid arg length") | |
| 14920 if hasattr(tetra[0], "__float__"): | |
| 14921 q = Rect(tetra).quad | |
| 14922 else: | |
| 14923 q = Quad(tetra) | |
| 14924 | |
| 14925 mt = q.ul + (q.ur - q.ul) * 0.5 | |
| 14926 mr = q.ur + (q.lr - q.ur) * 0.5 | |
| 14927 mb = q.ll + (q.lr - q.ll) * 0.5 | |
| 14928 ml = q.ul + (q.ll - q.ul) * 0.5 | |
| 14929 if not (self.last_point == ml): | |
| 14930 self.draw_cont += _format_g(JM_TUPLE(ml * self.ipctm)) + " m\n" | |
| 14931 self.last_point = ml | |
| 14932 self.draw_curve(ml, q.ll, mb) | |
| 14933 self.draw_curve(mb, q.lr, mr) | |
| 14934 self.draw_curve(mr, q.ur, mt) | |
| 14935 self.draw_curve(mt, q.ul, ml) | |
| 14936 self.updateRect(q.rect) | |
| 14937 self.last_point = ml | |
| 14938 return self.last_point | |
| 14939 | |
| 14940 def draw_circle(self, center: point_like, radius: float) -> Point: | |
| 14941 """Draw a circle given its center and radius.""" | |
| 14942 if not radius > EPSILON: | |
| 14943 raise ValueError("radius must be positive") | |
| 14944 center = Point(center) | |
| 14945 p1 = center - (radius, 0) | |
| 14946 return self.draw_sector(center, p1, 360, fullSector=False) | |
| 14947 | |
| 14948 def draw_curve( | |
| 14949 self, | |
| 14950 p1: point_like, | |
| 14951 p2: point_like, | |
| 14952 p3: point_like, | |
| 14953 ) -> Point: | |
| 14954 """Draw a curve between points using one control point.""" | |
| 14955 kappa = 0.55228474983 | |
| 14956 p1 = Point(p1) | |
| 14957 p2 = Point(p2) | |
| 14958 p3 = Point(p3) | |
| 14959 k1 = p1 + (p2 - p1) * kappa | |
| 14960 k2 = p3 + (p2 - p3) * kappa | |
| 14961 return self.draw_bezier(p1, k1, k2, p3) | |
| 14962 | |
| 14963 def draw_sector( | |
| 14964 self, | |
| 14965 center: point_like, | |
| 14966 point: point_like, | |
| 14967 beta: float, | |
| 14968 fullSector: bool = True, | |
| 14969 ) -> Point: | |
| 14970 """Draw a circle sector.""" | |
| 14971 center = Point(center) | |
| 14972 point = Point(point) | |
| 14973 l3 = lambda a, b: _format_g((a, b)) + " m\n" | |
| 14974 l4 = lambda a, b, c, d, e, f: _format_g((a, b, c, d, e, f)) + " c\n" | |
| 14975 l5 = lambda a, b: _format_g((a, b)) + " l\n" | |
| 14976 betar = math.radians(-beta) | |
| 14977 w360 = math.radians(math.copysign(360, betar)) * (-1) | |
| 14978 w90 = math.radians(math.copysign(90, betar)) | |
| 14979 w45 = w90 / 2 | |
| 14980 while abs(betar) > 2 * math.pi: | |
| 14981 betar += w360 # bring angle below 360 degrees | |
| 14982 if not (self.last_point == point): | |
| 14983 self.draw_cont += l3(*JM_TUPLE(point * self.ipctm)) | |
| 14984 self.last_point = point | |
| 14985 Q = Point(0, 0) # just make sure it exists | |
| 14986 C = center | |
| 14987 P = point | |
| 14988 S = P - C # vector 'center' -> 'point' | |
| 14989 rad = abs(S) # circle radius | |
| 14990 | |
| 14991 if not rad > EPSILON: | |
| 14992 raise ValueError("radius must be positive") | |
| 14993 | |
| 14994 alfa = self.horizontal_angle(center, point) | |
| 14995 while abs(betar) > abs(w90): # draw 90 degree arcs | |
| 14996 q1 = C.x + math.cos(alfa + w90) * rad | |
| 14997 q2 = C.y + math.sin(alfa + w90) * rad | |
| 14998 Q = Point(q1, q2) # the arc's end point | |
| 14999 r1 = C.x + math.cos(alfa + w45) * rad / math.cos(w45) | |
| 15000 r2 = C.y + math.sin(alfa + w45) * rad / math.cos(w45) | |
| 15001 R = Point(r1, r2) # crossing point of tangents | |
| 15002 kappah = (1 - math.cos(w45)) * 4 / 3 / abs(R - Q) | |
| 15003 kappa = kappah * abs(P - Q) | |
| 15004 cp1 = P + (R - P) * kappa # control point 1 | |
| 15005 cp2 = Q + (R - Q) * kappa # control point 2 | |
| 15006 self.draw_cont += l4(*JM_TUPLE( | |
| 15007 list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm) | |
| 15008 )) | |
| 15009 | |
| 15010 betar -= w90 # reduce param angle by 90 deg | |
| 15011 alfa += w90 # advance start angle by 90 deg | |
| 15012 P = Q # advance to arc end point | |
| 15013 # draw (remaining) arc | |
| 15014 if abs(betar) > 1e-3: # significant degrees left? | |
| 15015 beta2 = betar / 2 | |
| 15016 q1 = C.x + math.cos(alfa + betar) * rad | |
| 15017 q2 = C.y + math.sin(alfa + betar) * rad | |
| 15018 Q = Point(q1, q2) # the arc's end point | |
| 15019 r1 = C.x + math.cos(alfa + beta2) * rad / math.cos(beta2) | |
| 15020 r2 = C.y + math.sin(alfa + beta2) * rad / math.cos(beta2) | |
| 15021 R = Point(r1, r2) # crossing point of tangents | |
| 15022 # kappa height is 4/3 of segment height | |
| 15023 kappah = (1 - math.cos(beta2)) * 4 / 3 / abs(R - Q) # kappa height | |
| 15024 kappa = kappah * abs(P - Q) / (1 - math.cos(betar)) | |
| 15025 cp1 = P + (R - P) * kappa # control point 1 | |
| 15026 cp2 = Q + (R - Q) * kappa # control point 2 | |
| 15027 self.draw_cont += l4(*JM_TUPLE( | |
| 15028 list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm) | |
| 15029 )) | |
| 15030 if fullSector: | |
| 15031 self.draw_cont += l3(*JM_TUPLE(point * self.ipctm)) | |
| 15032 self.draw_cont += l5(*JM_TUPLE(center * self.ipctm)) | |
| 15033 self.draw_cont += l5(*JM_TUPLE(Q * self.ipctm)) | |
| 15034 self.last_point = Q | |
| 15035 return self.last_point | |
| 15036 | |
| 15037 def draw_rect(self, rect: rect_like, *, radius=None) -> Point: | |
| 15038 """Draw a rectangle. | |
| 15039 | |
| 15040 Args: | |
| 15041 radius: if not None, the rectangle will have rounded corners. | |
| 15042 This is the radius of the curvature, given as percentage of | |
| 15043 the rectangle width or height. Valid are values 0 < v <= 0.5. | |
| 15044 For a sequence of two values, the corners will have different | |
| 15045 radii. Otherwise, the percentage will be computed from the | |
| 15046 shorter side. A value of (0.5, 0.5) will draw an ellipse. | |
| 15047 """ | |
| 15048 r = Rect(rect) | |
| 15049 if radius is None: # standard rectangle | |
| 15050 self.draw_cont += _format_g(JM_TUPLE( | |
| 15051 list(r.bl * self.ipctm) + [r.width, r.height] | |
| 15052 )) + " re\n" | |
| 15053 self.updateRect(r) | |
| 15054 self.last_point = r.tl | |
| 15055 return self.last_point | |
| 15056 # rounded corners requested. This requires 1 or 2 values, each | |
| 15057 # with 0 < value <= 0.5 | |
| 15058 if hasattr(radius, "__float__"): | |
| 15059 if radius <= 0 or radius > 0.5: | |
| 15060 raise ValueError(f"bad radius value {radius}.") | |
| 15061 d = min(r.width, r.height) * radius | |
| 15062 px = (d, 0) | |
| 15063 py = (0, d) | |
| 15064 elif hasattr(radius, "__len__") and len(radius) == 2: | |
| 15065 rx, ry = radius | |
| 15066 px = (rx * r.width, 0) | |
| 15067 py = (0, ry * r.height) | |
| 15068 if min(rx, ry) <= 0 or max(rx, ry) > 0.5: | |
| 15069 raise ValueError(f"bad radius value {radius}.") | |
| 15070 else: | |
| 15071 raise ValueError(f"bad radius value {radius}.") | |
| 15072 | |
| 15073 lp = self.draw_line(r.tl + py, r.bl - py) | |
| 15074 lp = self.draw_curve(lp, r.bl, r.bl + px) | |
| 15075 | |
| 15076 lp = self.draw_line(lp, r.br - px) | |
| 15077 lp = self.draw_curve(lp, r.br, r.br - py) | |
| 15078 | |
| 15079 lp = self.draw_line(lp, r.tr + py) | |
| 15080 lp = self.draw_curve(lp, r.tr, r.tr - px) | |
| 15081 | |
| 15082 lp = self.draw_line(lp, r.tl + px) | |
| 15083 self.last_point = self.draw_curve(lp, r.tl, r.tl + py) | |
| 15084 | |
| 15085 self.updateRect(r) | |
| 15086 return self.last_point | |
| 15087 | |
| 15088 def draw_quad(self, quad: quad_like) -> Point: | |
| 15089 """Draw a Quad.""" | |
| 15090 q = Quad(quad) | |
| 15091 return self.draw_polyline([q.ul, q.ll, q.lr, q.ur, q.ul]) | |
| 15092 | |
| 15093 def draw_zigzag( | |
| 15094 self, | |
| 15095 p1: point_like, | |
| 15096 p2: point_like, | |
| 15097 breadth: float = 2, | |
| 15098 ) -> Point: | |
| 15099 """Draw a zig-zagged line from p1 to p2.""" | |
| 15100 p1 = Point(p1) | |
| 15101 p2 = Point(p2) | |
| 15102 S = p2 - p1 # vector start - end | |
| 15103 rad = abs(S) # distance of points | |
| 15104 cnt = 4 * int(round(rad / (4 * breadth), 0)) # always take full phases | |
| 15105 if cnt < 4: | |
| 15106 raise ValueError("points too close") | |
| 15107 mb = rad / cnt # revised breadth | |
| 15108 matrix = Matrix(util_hor_matrix(p1, p2)) # normalize line to x-axis | |
| 15109 i_mat = ~matrix # get original position | |
| 15110 points = [] # stores edges | |
| 15111 for i in range(1, cnt): | |
| 15112 if i % 4 == 1: # point "above" connection | |
| 15113 p = Point(i, -1) * mb | |
| 15114 elif i % 4 == 3: # point "below" connection | |
| 15115 p = Point(i, 1) * mb | |
| 15116 else: # ignore others | |
| 15117 continue | |
| 15118 points.append(p * i_mat) | |
| 15119 self.draw_polyline([p1] + points + [p2]) # add start and end points | |
| 15120 return p2 | |
| 15121 | |
| 15122 def draw_squiggle( | |
| 15123 self, | |
| 15124 p1: point_like, | |
| 15125 p2: point_like, | |
| 15126 breadth=2, | |
| 15127 ) -> Point: | |
| 15128 """Draw a squiggly line from p1 to p2.""" | |
| 15129 p1 = Point(p1) | |
| 15130 p2 = Point(p2) | |
| 15131 S = p2 - p1 # vector start - end | |
| 15132 rad = abs(S) # distance of points | |
| 15133 cnt = 4 * int(round(rad / (4 * breadth), 0)) # always take full phases | |
| 15134 if cnt < 4: | |
| 15135 raise ValueError("points too close") | |
| 15136 mb = rad / cnt # revised breadth | |
| 15137 matrix = Matrix(util_hor_matrix(p1, p2)) # normalize line to x-axis | |
| 15138 i_mat = ~matrix # get original position | |
| 15139 k = 2.4142135623765633 # y of draw_curve helper point | |
| 15140 | |
| 15141 points = [] # stores edges | |
| 15142 for i in range(1, cnt): | |
| 15143 if i % 4 == 1: # point "above" connection | |
| 15144 p = Point(i, -k) * mb | |
| 15145 elif i % 4 == 3: # point "below" connection | |
| 15146 p = Point(i, k) * mb | |
| 15147 else: # else on connection line | |
| 15148 p = Point(i, 0) * mb | |
| 15149 points.append(p * i_mat) | |
| 15150 | |
| 15151 points = [p1] + points + [p2] | |
| 15152 cnt = len(points) | |
| 15153 i = 0 | |
| 15154 while i + 2 < cnt: | |
| 15155 self.draw_curve(points[i], points[i + 1], points[i + 2]) | |
| 15156 i += 2 | |
| 15157 return p2 | |
| 15158 | |
| 15159 # ============================================================================== | |
| 15160 # Shape.insert_text | |
| 15161 # ============================================================================== | |
| 15162 def insert_text( | |
| 15163 self, | |
| 15164 point: point_like, | |
| 15165 buffer: typing.Union[str, list], | |
| 15166 *, | |
| 15167 fontsize: float = 11, | |
| 15168 lineheight: OptFloat = None, | |
| 15169 fontname: str = "helv", | |
| 15170 fontfile: OptStr = None, | |
| 15171 set_simple: bool = 0, | |
| 15172 encoding: int = 0, | |
| 15173 color: OptSeq = None, | |
| 15174 fill: OptSeq = None, | |
| 15175 render_mode: int = 0, | |
| 15176 border_width: float = 0.05, | |
| 15177 miter_limit: float = 1, | |
| 15178 rotate: int = 0, | |
| 15179 morph: OptSeq = None, | |
| 15180 stroke_opacity: float = 1, | |
| 15181 fill_opacity: float = 1, | |
| 15182 oc: int = 0, | |
| 15183 ) -> int: | |
| 15184 | |
| 15185 # ensure 'text' is a list of strings, worth dealing with | |
| 15186 if not bool(buffer): | |
| 15187 return 0 | |
| 15188 | |
| 15189 if type(buffer) not in (list, tuple): | |
| 15190 text = buffer.splitlines() | |
| 15191 else: | |
| 15192 text = buffer | |
| 15193 | |
| 15194 if not len(text) > 0: | |
| 15195 return 0 | |
| 15196 | |
| 15197 point = Point(point) | |
| 15198 try: | |
| 15199 maxcode = max([ord(c) for c in " ".join(text)]) | |
| 15200 except Exception: | |
| 15201 exception_info() | |
| 15202 return 0 | |
| 15203 | |
| 15204 # ensure valid 'fontname' | |
| 15205 fname = fontname | |
| 15206 if fname.startswith("/"): | |
| 15207 fname = fname[1:] | |
| 15208 | |
| 15209 xref = self.page.insert_font( | |
| 15210 fontname=fname, fontfile=fontfile, encoding=encoding, set_simple=set_simple | |
| 15211 ) | |
| 15212 fontinfo = CheckFontInfo(self.doc, xref) | |
| 15213 | |
| 15214 fontdict = fontinfo[1] | |
| 15215 ordering = fontdict["ordering"] | |
| 15216 simple = fontdict["simple"] | |
| 15217 bfname = fontdict["name"] | |
| 15218 ascender = fontdict["ascender"] | |
| 15219 descender = fontdict["descender"] | |
| 15220 if lineheight: | |
| 15221 lheight = fontsize * lineheight | |
| 15222 elif ascender - descender <= 1: | |
| 15223 lheight = fontsize * 1.2 | |
| 15224 else: | |
| 15225 lheight = fontsize * (ascender - descender) | |
| 15226 | |
| 15227 if maxcode > 255: | |
| 15228 glyphs = self.doc.get_char_widths(xref, maxcode + 1) | |
| 15229 else: | |
| 15230 glyphs = fontdict["glyphs"] | |
| 15231 | |
| 15232 tab = [] | |
| 15233 for t in text: | |
| 15234 if simple and bfname not in ("Symbol", "ZapfDingbats"): | |
| 15235 g = None | |
| 15236 else: | |
| 15237 g = glyphs | |
| 15238 tab.append(getTJstr(t, g, simple, ordering)) | |
| 15239 text = tab | |
| 15240 | |
| 15241 color_str = ColorCode(color, "c") | |
| 15242 fill_str = ColorCode(fill, "f") | |
| 15243 if not fill and render_mode == 0: # ensure fill color when 0 Tr | |
| 15244 fill = color | |
| 15245 fill_str = ColorCode(color, "f") | |
| 15246 | |
| 15247 morphing = CheckMorph(morph) | |
| 15248 rot = rotate | |
| 15249 if rot % 90 != 0: | |
| 15250 raise ValueError("bad rotate value") | |
| 15251 | |
| 15252 while rot < 0: | |
| 15253 rot += 360 | |
| 15254 rot = rot % 360 # text rotate = 0, 90, 270, 180 | |
| 15255 | |
| 15256 templ1 = lambda a, b, c, d, e, f, g: f"\nq\n{a}{b}BT\n{c}1 0 0 1 {_format_g((d, e))} Tm\n/{f} {_format_g(g)} Tf " | |
| 15257 templ2 = lambda a: f"TJ\n0 -{_format_g(a)} TD\n" | |
| 15258 cmp90 = "0 1 -1 0 0 0 cm\n" # rotates 90 deg counter-clockwise | |
| 15259 cmm90 = "0 -1 1 0 0 0 cm\n" # rotates 90 deg clockwise | |
| 15260 cm180 = "-1 0 0 -1 0 0 cm\n" # rotates by 180 deg. | |
| 15261 height = self.height | |
| 15262 width = self.width | |
| 15263 | |
| 15264 # setting up for standard rotation directions | |
| 15265 # case rotate = 0 | |
| 15266 if morphing: | |
| 15267 m1 = Matrix(1, 0, 0, 1, morph[0].x + self.x, height - morph[0].y - self.y) | |
| 15268 mat = ~m1 * morph[1] * m1 | |
| 15269 cm = _format_g(JM_TUPLE(mat)) + " cm\n" | |
| 15270 else: | |
| 15271 cm = "" | |
| 15272 top = height - point.y - self.y # start of 1st char | |
| 15273 left = point.x + self.x # start of 1. char | |
| 15274 space = top # space available | |
| 15275 #headroom = point.y + self.y # distance to page border | |
| 15276 if rot == 90: | |
| 15277 left = height - point.y - self.y | |
| 15278 top = -point.x - self.x | |
| 15279 cm += cmp90 | |
| 15280 space = width - abs(top) | |
| 15281 #headroom = point.x + self.x | |
| 15282 | |
| 15283 elif rot == 270: | |
| 15284 left = -height + point.y + self.y | |
| 15285 top = point.x + self.x | |
| 15286 cm += cmm90 | |
| 15287 space = abs(top) | |
| 15288 #headroom = width - point.x - self.x | |
| 15289 | |
| 15290 elif rot == 180: | |
| 15291 left = -point.x - self.x | |
| 15292 top = -height + point.y + self.y | |
| 15293 cm += cm180 | |
| 15294 space = abs(point.y + self.y) | |
| 15295 #headroom = height - point.y - self.y | |
| 15296 | |
| 15297 optcont = self.page._get_optional_content(oc) | |
| 15298 if optcont is not None: | |
| 15299 bdc = "/OC /%s BDC\n" % optcont | |
| 15300 emc = "EMC\n" | |
| 15301 else: | |
| 15302 bdc = emc = "" | |
| 15303 | |
| 15304 alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity) | |
| 15305 if alpha is None: | |
| 15306 alpha = "" | |
| 15307 else: | |
| 15308 alpha = "/%s gs\n" % alpha | |
| 15309 nres = templ1(bdc, alpha, cm, left, top, fname, fontsize) | |
| 15310 | |
| 15311 if render_mode > 0: | |
| 15312 nres += "%i Tr " % render_mode | |
| 15313 nres += _format_g(border_width * fontsize) + " w " | |
| 15314 if miter_limit is not None: | |
| 15315 nres += _format_g(miter_limit) + " M " | |
| 15316 if color is not None: | |
| 15317 nres += color_str | |
| 15318 if fill is not None: | |
| 15319 nres += fill_str | |
| 15320 | |
| 15321 # ========================================================================= | |
| 15322 # start text insertion | |
| 15323 # ========================================================================= | |
| 15324 nres += text[0] | |
| 15325 nlines = 1 # set output line counter | |
| 15326 if len(text) > 1: | |
| 15327 nres += templ2(lheight) # line 1 | |
| 15328 else: | |
| 15329 nres += 'TJ' | |
| 15330 for i in range(1, len(text)): | |
| 15331 if space < lheight: | |
| 15332 break # no space left on page | |
| 15333 if i > 1: | |
| 15334 nres += "\nT* " | |
| 15335 nres += text[i] + 'TJ' | |
| 15336 space -= lheight | |
| 15337 nlines += 1 | |
| 15338 | |
| 15339 nres += "\nET\n%sQ\n" % emc | |
| 15340 | |
| 15341 # ========================================================================= | |
| 15342 # end of text insertion | |
| 15343 # ========================================================================= | |
| 15344 # update the /Contents object | |
| 15345 self.text_cont += nres | |
| 15346 return nlines | |
| 15347 | |
| 15348 # ============================================================================== | |
| 15349 # Shape.insert_textbox | |
| 15350 # ============================================================================== | |
| 15351 def insert_textbox( | |
| 15352 self, | |
| 15353 rect: rect_like, | |
| 15354 buffer: typing.Union[str, list], | |
| 15355 *, | |
| 15356 fontname: OptStr = "helv", | |
| 15357 fontfile: OptStr = None, | |
| 15358 fontsize: float = 11, | |
| 15359 lineheight: OptFloat = None, | |
| 15360 set_simple: bool = 0, | |
| 15361 encoding: int = 0, | |
| 15362 color: OptSeq = None, | |
| 15363 fill: OptSeq = None, | |
| 15364 expandtabs: int = 1, | |
| 15365 border_width: float = 0.05, | |
| 15366 miter_limit: float = 1, | |
| 15367 align: int = 0, | |
| 15368 render_mode: int = 0, | |
| 15369 rotate: int = 0, | |
| 15370 morph: OptSeq = None, | |
| 15371 stroke_opacity: float = 1, | |
| 15372 fill_opacity: float = 1, | |
| 15373 oc: int = 0, | |
| 15374 ) -> float: | |
| 15375 """Insert text into a given rectangle. | |
| 15376 | |
| 15377 Args: | |
| 15378 rect -- the textbox to fill | |
| 15379 buffer -- text to be inserted | |
| 15380 fontname -- a Base-14 font, font name or '/name' | |
| 15381 fontfile -- name of a font file | |
| 15382 fontsize -- font size | |
| 15383 lineheight -- overwrite the font property | |
| 15384 color -- RGB stroke color triple | |
| 15385 fill -- RGB fill color triple | |
| 15386 render_mode -- text rendering control | |
| 15387 border_width -- thickness of glyph borders as percentage of fontsize | |
| 15388 expandtabs -- handles tabulators with string function | |
| 15389 align -- left, center, right, justified | |
| 15390 rotate -- 0, 90, 180, or 270 degrees | |
| 15391 morph -- morph box with a matrix and a fixpoint | |
| 15392 Returns: | |
| 15393 unused or deficit rectangle area (float) | |
| 15394 """ | |
| 15395 rect = Rect(rect) | |
| 15396 if rect.is_empty or rect.is_infinite: | |
| 15397 raise ValueError("text box must be finite and not empty") | |
| 15398 | |
| 15399 color_str = ColorCode(color, "c") | |
| 15400 fill_str = ColorCode(fill, "f") | |
| 15401 if fill is None and render_mode == 0: # ensure fill color for 0 Tr | |
| 15402 fill = color | |
| 15403 fill_str = ColorCode(color, "f") | |
| 15404 | |
| 15405 optcont = self.page._get_optional_content(oc) | |
| 15406 if optcont is not None: | |
| 15407 bdc = "/OC /%s BDC\n" % optcont | |
| 15408 emc = "EMC\n" | |
| 15409 else: | |
| 15410 bdc = emc = "" | |
| 15411 | |
| 15412 # determine opacity / transparency | |
| 15413 alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity) | |
| 15414 if alpha is None: | |
| 15415 alpha = "" | |
| 15416 else: | |
| 15417 alpha = "/%s gs\n" % alpha | |
| 15418 | |
| 15419 if rotate % 90 != 0: | |
| 15420 raise ValueError("rotate must be multiple of 90") | |
| 15421 | |
| 15422 rot = rotate | |
| 15423 while rot < 0: | |
| 15424 rot += 360 | |
| 15425 rot = rot % 360 | |
| 15426 | |
| 15427 # is buffer worth of dealing with? | |
| 15428 if not bool(buffer): | |
| 15429 return rect.height if rot in (0, 180) else rect.width | |
| 15430 | |
| 15431 cmp90 = "0 1 -1 0 0 0 cm\n" # rotates counter-clockwise | |
| 15432 cmm90 = "0 -1 1 0 0 0 cm\n" # rotates clockwise | |
| 15433 cm180 = "-1 0 0 -1 0 0 cm\n" # rotates by 180 deg. | |
| 15434 height = self.height | |
| 15435 | |
| 15436 fname = fontname | |
| 15437 if fname.startswith("/"): | |
| 15438 fname = fname[1:] | |
| 15439 | |
| 15440 xref = self.page.insert_font( | |
| 15441 fontname=fname, fontfile=fontfile, encoding=encoding, set_simple=set_simple | |
| 15442 ) | |
| 15443 fontinfo = CheckFontInfo(self.doc, xref) | |
| 15444 | |
| 15445 fontdict = fontinfo[1] | |
| 15446 ordering = fontdict["ordering"] | |
| 15447 simple = fontdict["simple"] | |
| 15448 glyphs = fontdict["glyphs"] | |
| 15449 bfname = fontdict["name"] | |
| 15450 ascender = fontdict["ascender"] | |
| 15451 descender = fontdict["descender"] | |
| 15452 | |
| 15453 if lineheight: | |
| 15454 lheight_factor = lineheight | |
| 15455 elif ascender - descender <= 1: | |
| 15456 lheight_factor = 1.2 | |
| 15457 else: | |
| 15458 lheight_factor = ascender - descender | |
| 15459 lheight = fontsize * lheight_factor | |
| 15460 | |
| 15461 # create a list from buffer, split into its lines | |
| 15462 if type(buffer) in (list, tuple): | |
| 15463 t0 = "\n".join(buffer) | |
| 15464 else: | |
| 15465 t0 = buffer | |
| 15466 | |
| 15467 maxcode = max([ord(c) for c in t0]) | |
| 15468 # replace invalid char codes for simple fonts | |
| 15469 if simple and maxcode > 255: | |
| 15470 t0 = "".join([c if ord(c) < 256 else "?" for c in t0]) | |
| 15471 | |
| 15472 t0 = t0.splitlines() | |
| 15473 | |
| 15474 glyphs = self.doc.get_char_widths(xref, maxcode + 1) | |
| 15475 if simple and bfname not in ("Symbol", "ZapfDingbats"): | |
| 15476 tj_glyphs = None | |
| 15477 else: | |
| 15478 tj_glyphs = glyphs | |
| 15479 | |
| 15480 # ---------------------------------------------------------------------- | |
| 15481 # calculate pixel length of a string | |
| 15482 # ---------------------------------------------------------------------- | |
| 15483 def pixlen(x): | |
| 15484 """Calculate pixel length of x.""" | |
| 15485 if ordering < 0: | |
| 15486 return sum([glyphs[ord(c)][1] for c in x]) * fontsize | |
| 15487 else: | |
| 15488 return len(x) * fontsize | |
| 15489 | |
| 15490 # --------------------------------------------------------------------- | |
| 15491 | |
| 15492 if ordering < 0: | |
| 15493 blen = glyphs[32][1] * fontsize # pixel size of space character | |
| 15494 else: | |
| 15495 blen = fontsize | |
| 15496 | |
| 15497 text = "" # output buffer | |
| 15498 | |
| 15499 if CheckMorph(morph): | |
| 15500 m1 = Matrix( | |
| 15501 1, 0, 0, 1, morph[0].x + self.x, self.height - morph[0].y - self.y | |
| 15502 ) | |
| 15503 mat = ~m1 * morph[1] * m1 | |
| 15504 cm = _format_g(JM_TUPLE(mat)) + " cm\n" | |
| 15505 else: | |
| 15506 cm = "" | |
| 15507 | |
| 15508 # --------------------------------------------------------------------- | |
| 15509 # adjust for text orientation / rotation | |
| 15510 # --------------------------------------------------------------------- | |
| 15511 progr = 1 # direction of line progress | |
| 15512 c_pnt = Point(0, fontsize * ascender) # used for line progress | |
| 15513 if rot == 0: # normal orientation | |
| 15514 point = rect.tl + c_pnt # line 1 is 'lheight' below top | |
| 15515 maxwidth = rect.width # pixels available in one line | |
| 15516 maxheight = rect.height # available text height | |
| 15517 | |
| 15518 elif rot == 90: # rotate counter clockwise | |
| 15519 c_pnt = Point(fontsize * ascender, 0) # progress in x-direction | |
| 15520 point = rect.bl + c_pnt # line 1 'lheight' away from left | |
| 15521 maxwidth = rect.height # pixels available in one line | |
| 15522 maxheight = rect.width # available text height | |
| 15523 cm += cmp90 | |
| 15524 | |
| 15525 elif rot == 180: # text upside down | |
| 15526 # progress upwards in y direction | |
| 15527 c_pnt = -Point(0, fontsize * ascender) | |
| 15528 point = rect.br + c_pnt # line 1 'lheight' above bottom | |
| 15529 maxwidth = rect.width # pixels available in one line | |
| 15530 progr = -1 # subtract lheight for next line | |
| 15531 maxheight =rect.height # available text height | |
| 15532 cm += cm180 | |
| 15533 | |
| 15534 else: # rotate clockwise (270 or -90) | |
| 15535 # progress from right to left | |
| 15536 c_pnt = -Point(fontsize * ascender, 0) | |
| 15537 point = rect.tr + c_pnt # line 1 'lheight' left of right | |
| 15538 maxwidth = rect.height # pixels available in one line | |
| 15539 progr = -1 # subtract lheight for next line | |
| 15540 maxheight = rect.width # available text height | |
| 15541 cm += cmm90 | |
| 15542 | |
| 15543 # ===================================================================== | |
| 15544 # line loop | |
| 15545 # ===================================================================== | |
| 15546 just_tab = [] # 'justify' indicators per line | |
| 15547 | |
| 15548 for i, line in enumerate(t0): | |
| 15549 line_t = line.expandtabs(expandtabs).split(" ") # split into words | |
| 15550 num_words = len(line_t) | |
| 15551 lbuff = "" # init line buffer | |
| 15552 rest = maxwidth # available line pixels | |
| 15553 # ================================================================= | |
| 15554 # word loop | |
| 15555 # ================================================================= | |
| 15556 for j in range(num_words): | |
| 15557 word = line_t[j] | |
| 15558 pl_w = pixlen(word) # pixel len of word | |
| 15559 if rest >= pl_w: # does it fit on the line? | |
| 15560 lbuff += word + " " # yes, append word | |
| 15561 rest -= pl_w + blen # update available line space | |
| 15562 continue # next word | |
| 15563 | |
| 15564 # word doesn't fit - output line (if not empty) | |
| 15565 if lbuff: | |
| 15566 lbuff = lbuff.rstrip() + "\n" # line full, append line break | |
| 15567 text += lbuff # append to total text | |
| 15568 just_tab.append(True) # can align-justify | |
| 15569 | |
| 15570 lbuff = "" # re-init line buffer | |
| 15571 rest = maxwidth # re-init avail. space | |
| 15572 | |
| 15573 if pl_w <= maxwidth: # word shorter than 1 line? | |
| 15574 lbuff = word + " " # start the line with it | |
| 15575 rest = maxwidth - pl_w - blen # update free space | |
| 15576 continue | |
| 15577 | |
| 15578 # long word: split across multiple lines - char by char ... | |
| 15579 if len(just_tab) > 0: | |
| 15580 just_tab[-1] = False # cannot align-justify | |
| 15581 for c in word: | |
| 15582 if pixlen(lbuff) <= maxwidth - pixlen(c): | |
| 15583 lbuff += c | |
| 15584 else: # line full | |
| 15585 lbuff += "\n" # close line | |
| 15586 text += lbuff # append to text | |
| 15587 just_tab.append(False) # cannot align-justify | |
| 15588 lbuff = c # start new line with this char | |
| 15589 | |
| 15590 lbuff += " " # finish long word | |
| 15591 rest = maxwidth - pixlen(lbuff) # long word stored | |
| 15592 | |
| 15593 if lbuff: # unprocessed line content? | |
| 15594 text += lbuff.rstrip() # append to text | |
| 15595 just_tab.append(False) # cannot align-justify | |
| 15596 | |
| 15597 if i < len(t0) - 1: # not the last line? | |
| 15598 text += "\n" # insert line break | |
| 15599 | |
| 15600 # compute used part of the textbox | |
| 15601 if text.endswith("\n"): | |
| 15602 text = text[:-1] | |
| 15603 lb_count = text.count("\n") + 1 # number of lines written | |
| 15604 | |
| 15605 # text height = line count * line height plus one descender value | |
| 15606 text_height = lheight * lb_count - descender * fontsize | |
| 15607 | |
| 15608 more = text_height - maxheight # difference to height limit | |
| 15609 if more > EPSILON: # landed too much outside rect | |
| 15610 return (-1) * more # return deficit, don't output | |
| 15611 | |
| 15612 more = abs(more) | |
| 15613 if more < EPSILON: | |
| 15614 more = 0 # don't bother with epsilons | |
| 15615 nres = "\nq\n%s%sBT\n" % (bdc, alpha) + cm # initialize output buffer | |
| 15616 templ = lambda a, b, c, d: f"1 0 0 1 {_format_g((a, b))} Tm /{c} {_format_g(d)} Tf " | |
| 15617 # center, right, justify: output each line with its own specifics | |
| 15618 text_t = text.splitlines() # split text in lines again | |
| 15619 just_tab[-1] = False # never justify last line | |
| 15620 for i, t in enumerate(text_t): | |
| 15621 spacing = 0 | |
| 15622 pl = maxwidth - pixlen(t) # length of empty line part | |
| 15623 pnt = point + c_pnt * (i * lheight_factor) # text start of line | |
| 15624 if align == 1: # center: right shift by half width | |
| 15625 if rot in (0, 180): | |
| 15626 pnt = pnt + Point(pl / 2, 0) * progr | |
| 15627 else: | |
| 15628 pnt = pnt - Point(0, pl / 2) * progr | |
| 15629 elif align == 2: # right: right shift by full width | |
| 15630 if rot in (0, 180): | |
| 15631 pnt = pnt + Point(pl, 0) * progr | |
| 15632 else: | |
| 15633 pnt = pnt - Point(0, pl) * progr | |
| 15634 elif align == 3: # justify | |
| 15635 spaces = t.count(" ") # number of spaces in line | |
| 15636 if spaces > 0 and just_tab[i]: # if any, and we may justify | |
| 15637 spacing = pl / spaces # make every space this much larger | |
| 15638 else: | |
| 15639 spacing = 0 # keep normal space length | |
| 15640 top = height - pnt.y - self.y | |
| 15641 left = pnt.x + self.x | |
| 15642 if rot == 90: | |
| 15643 left = height - pnt.y - self.y | |
| 15644 top = -pnt.x - self.x | |
| 15645 elif rot == 270: | |
| 15646 left = -height + pnt.y + self.y | |
| 15647 top = pnt.x + self.x | |
| 15648 elif rot == 180: | |
| 15649 left = -pnt.x - self.x | |
| 15650 top = -height + pnt.y + self.y | |
| 15651 | |
| 15652 nres += templ(left, top, fname, fontsize) | |
| 15653 | |
| 15654 if render_mode > 0: | |
| 15655 nres += "%i Tr " % render_mode | |
| 15656 nres += _format_g(border_width * fontsize) + " w " | |
| 15657 if miter_limit is not None: | |
| 15658 nres += _format_g(miter_limit) + " M " | |
| 15659 | |
| 15660 if align == 3: | |
| 15661 nres += _format_g(spacing) + " Tw " | |
| 15662 | |
| 15663 if color is not None: | |
| 15664 nres += color_str | |
| 15665 if fill is not None: | |
| 15666 nres += fill_str | |
| 15667 nres += "%sTJ\n" % getTJstr(t, tj_glyphs, simple, ordering) | |
| 15668 | |
| 15669 nres += "ET\n%sQ\n" % emc | |
| 15670 | |
| 15671 self.text_cont += nres | |
| 15672 self.updateRect(rect) | |
| 15673 return more | |
| 15674 | |
| 15675 def finish( | |
| 15676 self, | |
| 15677 width: float = 1, | |
| 15678 color: OptSeq = (0,), | |
| 15679 fill: OptSeq = None, | |
| 15680 lineCap: int = 0, | |
| 15681 lineJoin: int = 0, | |
| 15682 dashes: OptStr = None, | |
| 15683 even_odd: bool = False, | |
| 15684 morph: OptSeq = None, | |
| 15685 closePath: bool = True, | |
| 15686 fill_opacity: float = 1, | |
| 15687 stroke_opacity: float = 1, | |
| 15688 oc: int = 0, | |
| 15689 ) -> None: | |
| 15690 """Finish the current drawing segment. | |
| 15691 | |
| 15692 Notes: | |
| 15693 Apply colors, opacity, dashes, line style and width, or | |
| 15694 morphing. Also whether to close the path | |
| 15695 by connecting last to first point. | |
| 15696 """ | |
| 15697 if self.draw_cont == "": # treat empty contents as no-op | |
| 15698 return | |
| 15699 | |
| 15700 if width == 0: # border color makes no sense then | |
| 15701 color = None | |
| 15702 elif color is None: # vice versa | |
| 15703 width = 0 | |
| 15704 # if color == None and fill == None: | |
| 15705 # raise ValueError("at least one of 'color' or 'fill' must be given") | |
| 15706 color_str = ColorCode(color, "c") # ensure proper color string | |
| 15707 fill_str = ColorCode(fill, "f") # ensure proper fill string | |
| 15708 | |
| 15709 optcont = self.page._get_optional_content(oc) | |
| 15710 if optcont is not None: | |
| 15711 self.draw_cont = "/OC /%s BDC\n" % optcont + self.draw_cont | |
| 15712 emc = "EMC\n" | |
| 15713 else: | |
| 15714 emc = "" | |
| 15715 | |
| 15716 alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity) | |
| 15717 if alpha is not None: | |
| 15718 self.draw_cont = "/%s gs\n" % alpha + self.draw_cont | |
| 15719 | |
| 15720 if width != 1 and width != 0: | |
| 15721 self.draw_cont += _format_g(width) + " w\n" | |
| 15722 | |
| 15723 if lineCap != 0: | |
| 15724 self.draw_cont = "%i J\n" % lineCap + self.draw_cont | |
| 15725 if lineJoin != 0: | |
| 15726 self.draw_cont = "%i j\n" % lineJoin + self.draw_cont | |
| 15727 | |
| 15728 if dashes not in (None, "", "[] 0"): | |
| 15729 self.draw_cont = "%s d\n" % dashes + self.draw_cont | |
| 15730 | |
| 15731 if closePath: | |
| 15732 self.draw_cont += "h\n" | |
| 15733 self.last_point = None | |
| 15734 | |
| 15735 if color is not None: | |
| 15736 self.draw_cont += color_str | |
| 15737 | |
| 15738 if fill is not None: | |
| 15739 self.draw_cont += fill_str | |
| 15740 if color is not None: | |
| 15741 if not even_odd: | |
| 15742 self.draw_cont += "B\n" | |
| 15743 else: | |
| 15744 self.draw_cont += "B*\n" | |
| 15745 else: | |
| 15746 if not even_odd: | |
| 15747 self.draw_cont += "f\n" | |
| 15748 else: | |
| 15749 self.draw_cont += "f*\n" | |
| 15750 else: | |
| 15751 self.draw_cont += "S\n" | |
| 15752 | |
| 15753 self.draw_cont += emc | |
| 15754 if CheckMorph(morph): | |
| 15755 m1 = Matrix( | |
| 15756 1, 0, 0, 1, morph[0].x + self.x, self.height - morph[0].y - self.y | |
| 15757 ) | |
| 15758 mat = ~m1 * morph[1] * m1 | |
| 15759 self.draw_cont = _format_g(JM_TUPLE(mat)) + " cm\n" + self.draw_cont | |
| 15760 | |
| 15761 self.totalcont += "\nq\n" + self.draw_cont + "Q\n" | |
| 15762 self.draw_cont = "" | |
| 15763 self.last_point = None | |
| 15764 return | |
| 15765 | |
| 15766 def commit(self, overlay: bool = True) -> None: | |
| 15767 """Update the page's /Contents object with Shape data. | |
| 15768 | |
| 15769 The argument controls whether data appear in foreground (default) | |
| 15770 or background. | |
| 15771 """ | |
| 15772 CheckParent(self.page) # doc may have died meanwhile | |
| 15773 self.totalcont += self.text_cont | |
| 15774 self.totalcont = self.totalcont.encode() | |
| 15775 | |
| 15776 if self.totalcont: | |
| 15777 if overlay: | |
| 15778 self.page.wrap_contents() # ensure a balanced graphics state | |
| 15779 # make /Contents object with dummy stream | |
| 15780 xref = TOOLS._insert_contents(self.page, b" ", overlay) | |
| 15781 # update it with potential compression | |
| 15782 self.doc.update_stream(xref, self.totalcont) | |
| 15783 | |
| 15784 self.last_point = None # clean up ... | |
| 15785 self.rect = None # | |
| 15786 self.draw_cont = "" # for potential ... | |
| 15787 self.text_cont = "" # ... | |
| 15788 self.totalcont = "" # re-use | |
| 15789 | |
| 15790 | |
| 11506 class Story: | 15791 class Story: |
| 11507 | 15792 |
| 11508 def __init__( self, html='', user_css=None, em=12, archive=None): | 15793 def __init__( self, html='', user_css=None, em=12, archive=None): |
| 11509 buffer_ = mupdf.fz_new_buffer_from_copied_data( html.encode('utf-8')) | 15794 buffer_ = mupdf.fz_new_buffer_from_copied_data( html.encode('utf-8')) |
| 11510 if archive and not isinstance(archive, Archive): | 15795 if archive and not isinstance(archive, Archive): |
| 11662 for k, v in args.items(): | 15947 for k, v in args.items(): |
| 11663 setattr( position2, k, v) | 15948 setattr( position2, k, v) |
| 11664 function( position2) | 15949 function( position2) |
| 11665 mupdf.fz_story_positions( self.this, function2) | 15950 mupdf.fz_story_positions( self.this, function2) |
| 11666 | 15951 |
| 11667 def place( self, where): | 15952 def place( self, where, flags=0): |
| 15953 ''' | |
| 15954 Wrapper for fz_place_story_flags(). | |
| 15955 ''' | |
| 11668 where = JM_rect_from_py( where) | 15956 where = JM_rect_from_py( where) |
| 11669 filled = mupdf.FzRect() | 15957 filled = mupdf.FzRect() |
| 11670 more = mupdf.fz_place_story( self.this, where, filled) | 15958 more = mupdf.fz_place_story_flags( self.this, where, filled, flags) |
| 11671 return more, JM_py_from_rect( filled) | 15959 return more, JM_py_from_rect( filled) |
| 11672 | 15960 |
| 11673 def reset( self): | 15961 def reset( self): |
| 11674 mupdf.fz_reset_story( self.this) | 15962 mupdf.fz_reset_story( self.this) |
| 11675 | 15963 |
| 11782 Members: | 16070 Members: |
| 11783 | 16071 |
| 11784 `big_enough`: | 16072 `big_enough`: |
| 11785 `True` if the fit succeeded. | 16073 `True` if the fit succeeded. |
| 11786 `filled`: | 16074 `filled`: |
| 11787 From the last call to `Story.place()`. | 16075 Tuple (x0, y0, x1, y1) from the last call to `Story.place()`. This |
| 16076 will be wider than .rect if any single word (which we never split) | |
| 16077 was too wide for .rect. | |
| 11788 `more`: | 16078 `more`: |
| 11789 `False` if the fit succeeded. | 16079 `False` if the fit succeeded. |
| 11790 `numcalls`: | 16080 `numcalls`: |
| 11791 Number of calls made to `self.place()`. | 16081 Number of calls made to `self.place()`. |
| 11792 `parameter`: | 16082 `parameter`: |
| 11793 The successful parameter value, or the largest failing value. | 16083 The successful parameter value, or the largest failing value. |
| 11794 `rect`: | 16084 `rect`: |
| 11795 The rect created from `parameter`. | 16085 The pumupdf.Rect created from `parameter`. |
| 11796 ''' | 16086 ''' |
| 11797 def __init__(self, big_enough=None, filled=None, more=None, numcalls=None, parameter=None, rect=None): | 16087 def __init__(self, big_enough=None, filled=None, more=None, numcalls=None, parameter=None, rect=None): |
| 11798 self.big_enough = big_enough | 16088 self.big_enough = big_enough |
| 11799 self.filled = filled | 16089 self.filled = filled |
| 11800 self.more = more | 16090 self.more = more |
| 11810 f' numcalls={self.numcalls}' | 16100 f' numcalls={self.numcalls}' |
| 11811 f' parameter={self.parameter}' | 16101 f' parameter={self.parameter}' |
| 11812 f' rect={self.rect}' | 16102 f' rect={self.rect}' |
| 11813 ) | 16103 ) |
| 11814 | 16104 |
| 11815 def fit(self, fn, pmin=None, pmax=None, delta=0.001, verbose=False): | 16105 def fit(self, fn, pmin=None, pmax=None, delta=0.001, verbose=False, flags=0): |
| 11816 ''' | 16106 ''' |
| 11817 Finds optimal rect that contains the story `self`. | 16107 Finds optimal rect that contains the story `self`. |
| 11818 | 16108 |
| 11819 Returns a `Story.FitResult` instance. | 16109 Returns a `Story.FitResult` instance. |
| 11820 | 16110 |
| 11837 Maximum parameter to consider; `None` for +infinity. | 16127 Maximum parameter to consider; `None` for +infinity. |
| 11838 :arg delta: | 16128 :arg delta: |
| 11839 Maximum error in returned `parameter`. | 16129 Maximum error in returned `parameter`. |
| 11840 :arg verbose: | 16130 :arg verbose: |
| 11841 If true we output diagnostics. | 16131 If true we output diagnostics. |
| 16132 :arg flags: | |
| 16133 Passed to mupdf.fz_place_story_flags(). e.g. | |
| 16134 zero or `mupdf.FZ_PLACE_STORY_FLAG_NO_OVERFLOW`. | |
| 11842 ''' | 16135 ''' |
| 11843 def log(text): | 16136 def log(text): |
| 11844 assert verbose | 16137 assert verbose |
| 11845 message(f'fit(): {text}') | 16138 message(f'fit(): {text}') |
| 11846 | 16139 |
| 11892 big_enough = False | 16185 big_enough = False |
| 11893 result = Story.FitResult(parameter=parameter, numcalls=state.numcalls) | 16186 result = Story.FitResult(parameter=parameter, numcalls=state.numcalls) |
| 11894 if verbose: | 16187 if verbose: |
| 11895 log(f'update(): not calling self.place() because rect is empty.') | 16188 log(f'update(): not calling self.place() because rect is empty.') |
| 11896 else: | 16189 else: |
| 11897 more, filled = self.place(rect) | 16190 more, filled = self.place(rect, flags) |
| 11898 state.numcalls += 1 | 16191 state.numcalls += 1 |
| 11899 big_enough = not more | 16192 big_enough = not more |
| 11900 result = Story.FitResult( | 16193 result = Story.FitResult( |
| 11901 filled=filled, | 16194 filled=filled, |
| 11902 more=more, | 16195 more=more, |
| 11961 if state.pmax - state.pmin < delta: | 16254 if state.pmax - state.pmin < delta: |
| 11962 return ret() | 16255 return ret() |
| 11963 parameter = (state.pmin + state.pmax) / 2 | 16256 parameter = (state.pmin + state.pmax) / 2 |
| 11964 update(parameter) | 16257 update(parameter) |
| 11965 | 16258 |
| 11966 def fit_scale(self, rect, scale_min=0, scale_max=None, delta=0.001, verbose=False): | 16259 def fit_scale(self, rect, scale_min=0, scale_max=None, delta=0.001, verbose=False, flags=0): |
| 11967 ''' | 16260 ''' |
| 11968 Finds smallest value `scale` in range `scale_min..scale_max` where | 16261 Finds smallest value `scale` in range `scale_min..scale_max` where |
| 11969 `scale * rect` is large enough to contain the story `self`. | 16262 `scale * rect` is large enough to contain the story `self`. |
| 11970 | 16263 |
| 11971 Returns a `Story.FitResult` instance. | 16264 Returns a `Story.FitResult` instance with `.parameter` set to `scale`. |
| 11972 | 16265 |
| 11973 :arg width: | 16266 :arg width: |
| 11974 width of rect. | 16267 width of rect. |
| 11975 :arg height: | 16268 :arg height: |
| 11976 height of rect. | 16269 height of rect. |
| 11981 infinite. | 16274 infinite. |
| 11982 :arg delta: | 16275 :arg delta: |
| 11983 Maximum error in returned scale. | 16276 Maximum error in returned scale. |
| 11984 :arg verbose: | 16277 :arg verbose: |
| 11985 If true we output diagnostics. | 16278 If true we output diagnostics. |
| 16279 :arg flags: | |
| 16280 Passed to Story.place(). | |
| 11986 ''' | 16281 ''' |
| 11987 x0, y0, x1, y1 = rect | 16282 x0, y0, x1, y1 = rect |
| 11988 width = x1 - x0 | 16283 width = x1 - x0 |
| 11989 height = y1 - y0 | 16284 height = y1 - y0 |
| 11990 def fn(scale): | 16285 def fn(scale): |
| 11991 return Rect(x0, y0, x0 + scale*width, y0 + scale*height) | 16286 return Rect(x0, y0, x0 + scale*width, y0 + scale*height) |
| 11992 return self.fit(fn, scale_min, scale_max, delta, verbose) | 16287 return self.fit(fn, scale_min, scale_max, delta, verbose, flags) |
| 11993 | 16288 |
| 11994 def fit_height(self, width, height_min=0, height_max=None, origin=(0, 0), delta=0.001, verbose=False): | 16289 def fit_height(self, width, height_min=0, height_max=None, origin=(0, 0), delta=0.001, verbose=False): |
| 11995 ''' | 16290 ''' |
| 11996 Finds smallest height in range `height_min..height_max` where a rect | 16291 Finds smallest height in range `height_min..height_max` where a rect |
| 11997 with size `(width, height)` is large enough to contain the story | 16292 with size `(width, height)` is large enough to contain the story |
| 12314 cbbox = JM_char_bbox(line, ch) | 16609 cbbox = JM_char_bbox(line, ch) |
| 12315 if (not JM_rects_overlap(tp_rect, cbbox) | 16610 if (not JM_rects_overlap(tp_rect, cbbox) |
| 12316 and not mupdf.fz_is_infinite_rect(tp_rect) | 16611 and not mupdf.fz_is_infinite_rect(tp_rect) |
| 12317 ): | 16612 ): |
| 12318 continue | 16613 continue |
| 16614 | |
| 16615 if buflen == 0 and ch.m_internal.c == 0x200d: | |
| 16616 # ZERO WIDTH JOINER cannot start a word | |
| 16617 continue | |
| 12319 word_delimiter = JM_is_word_delimiter(ch.m_internal.c, delimiters) | 16618 word_delimiter = JM_is_word_delimiter(ch.m_internal.c, delimiters) |
| 12320 this_char_rtl = JM_is_rtl_char(ch.m_internal.c) | 16619 this_char_rtl = JM_is_rtl_char(ch.m_internal.c) |
| 12321 if word_delimiter or this_char_rtl != last_char_rtl: | 16620 if word_delimiter or this_char_rtl != last_char_rtl: |
| 12322 if buflen == 0 and word_delimiter: | 16621 if buflen == 0 and word_delimiter: |
| 12323 continue # skip delimiters at line start | 16622 continue # skip delimiters at line start |
| 12513 elif idx[i] == idx2[-1] + 1: # new adjacent Latin word | 16812 elif idx[i] == idx2[-1] + 1: # new adjacent Latin word |
| 12514 idx2.append(idx[i]) | 16813 idx2.append(idx[i]) |
| 12515 | 16814 |
| 12516 text = " ".join(words) | 16815 text = " ".join(words) |
| 12517 return text | 16816 return text |
| 16817 | |
| 16818 def fill_textbox( | |
| 16819 writer: 'TextWriter', | |
| 16820 rect: rect_like, | |
| 16821 text: typing.Union[str, list], | |
| 16822 pos: point_like = None, | |
| 16823 font: typing.Optional[Font] = None, | |
| 16824 fontsize: float = 11, | |
| 16825 lineheight: OptFloat = None, | |
| 16826 align: int = 0, | |
| 16827 warn: bool = None, | |
| 16828 right_to_left: bool = False, | |
| 16829 small_caps: bool = False, | |
| 16830 ) -> tuple: | |
| 16831 """Fill a rectangle with text. | |
| 16832 | |
| 16833 Args: | |
| 16834 writer: pymupdf.TextWriter object (= "self") | |
| 16835 rect: rect-like to receive the text. | |
| 16836 text: string or list/tuple of strings. | |
| 16837 pos: point-like start position of first word. | |
| 16838 font: pymupdf.Font object (default pymupdf.Font('helv')). | |
| 16839 fontsize: the fontsize. | |
| 16840 lineheight: overwrite the font property | |
| 16841 align: (int) 0 = left, 1 = center, 2 = right, 3 = justify | |
| 16842 warn: (bool) text overflow action: none, warn, or exception | |
| 16843 right_to_left: (bool) indicate right-to-left language. | |
| 16844 """ | |
| 16845 rect = Rect(rect) | |
| 16846 if rect.is_empty: | |
| 16847 raise ValueError("fill rect must not empty.") | |
| 16848 if type(font) is not Font: | |
| 16849 font = Font("helv") | |
| 16850 | |
| 16851 def textlen(x): | |
| 16852 """Return length of a string.""" | |
| 16853 return font.text_length( | |
| 16854 x, fontsize=fontsize, small_caps=small_caps | |
| 16855 ) # abbreviation | |
| 16856 | |
| 16857 def char_lengths(x): | |
| 16858 """Return list of single character lengths for a string.""" | |
| 16859 return font.char_lengths(x, fontsize=fontsize, small_caps=small_caps) | |
| 16860 | |
| 16861 def append_this(pos, text): | |
| 16862 ret = writer.append( | |
| 16863 pos, text, font=font, fontsize=fontsize, small_caps=small_caps | |
| 16864 ) | |
| 16865 return ret | |
| 16866 | |
| 16867 tolerance = fontsize * 0.2 # extra distance to left border | |
| 16868 space_len = textlen(" ") | |
| 16869 std_width = rect.width - tolerance | |
| 16870 std_start = rect.x0 + tolerance | |
| 16871 | |
| 16872 def norm_words(width, words): | |
| 16873 """Cut any word in pieces no longer than 'width'.""" | |
| 16874 nwords = [] | |
| 16875 word_lengths = [] | |
| 16876 for w in words: | |
| 16877 wl_lst = char_lengths(w) | |
| 16878 wl = sum(wl_lst) | |
| 16879 if wl <= width: # nothing to do - copy over | |
| 16880 nwords.append(w) | |
| 16881 word_lengths.append(wl) | |
| 16882 continue | |
| 16883 | |
| 16884 # word longer than rect width - split it in parts | |
| 16885 n = len(wl_lst) | |
| 16886 while n > 0: | |
| 16887 wl = sum(wl_lst[:n]) | |
| 16888 if wl <= width: | |
| 16889 nwords.append(w[:n]) | |
| 16890 word_lengths.append(wl) | |
| 16891 w = w[n:] | |
| 16892 wl_lst = wl_lst[n:] | |
| 16893 n = len(wl_lst) | |
| 16894 else: | |
| 16895 n -= 1 | |
| 16896 return nwords, word_lengths | |
| 16897 | |
| 16898 def output_justify(start, line): | |
| 16899 """Justified output of a line.""" | |
| 16900 # ignore leading / trailing / multiple spaces | |
| 16901 words = [w for w in line.split(" ") if w != ""] | |
| 16902 nwords = len(words) | |
| 16903 if nwords == 0: | |
| 16904 return | |
| 16905 if nwords == 1: # single word cannot be justified | |
| 16906 append_this(start, words[0]) | |
| 16907 return | |
| 16908 tl = sum([textlen(w) for w in words]) # total word lengths | |
| 16909 gaps = nwords - 1 # number of word gaps | |
| 16910 gapl = (std_width - tl) / gaps # width of each gap | |
| 16911 for w in words: | |
| 16912 _, lp = append_this(start, w) # output one word | |
| 16913 start.x = lp.x + gapl # next start at word end plus gap | |
| 16914 return | |
| 16915 | |
| 16916 asc = font.ascender | |
| 16917 dsc = font.descender | |
| 16918 if not lineheight: | |
| 16919 if asc - dsc <= 1: | |
| 16920 lheight = 1.2 | |
| 16921 else: | |
| 16922 lheight = asc - dsc | |
| 16923 else: | |
| 16924 lheight = lineheight | |
| 16925 | |
| 16926 LINEHEIGHT = fontsize * lheight # effective line height | |
| 16927 width = std_width # available horizontal space | |
| 16928 | |
| 16929 # starting point of text | |
| 16930 if pos is not None: | |
| 16931 pos = Point(pos) | |
| 16932 else: # default is just below rect top-left | |
| 16933 pos = rect.tl + (tolerance, fontsize * asc) | |
| 16934 if pos not in rect: | |
| 16935 raise ValueError("Text must start in rectangle.") | |
| 16936 | |
| 16937 # calculate displacement factor for alignment | |
| 16938 if align == TEXT_ALIGN_CENTER: | |
| 16939 factor = 0.5 | |
| 16940 elif align == TEXT_ALIGN_RIGHT: | |
| 16941 factor = 1.0 | |
| 16942 else: | |
| 16943 factor = 0 | |
| 16944 | |
| 16945 # split in lines if just a string was given | |
| 16946 if type(text) is str: | |
| 16947 textlines = text.splitlines() | |
| 16948 else: | |
| 16949 textlines = [] | |
| 16950 for line in text: | |
| 16951 textlines.extend(line.splitlines()) | |
| 16952 | |
| 16953 max_lines = int((rect.y1 - pos.y) / LINEHEIGHT) + 1 | |
| 16954 | |
| 16955 new_lines = [] # the final list of textbox lines | |
| 16956 no_justify = [] # no justify for these line numbers | |
| 16957 for i, line in enumerate(textlines): | |
| 16958 if line in ("", " "): | |
| 16959 new_lines.append((line, space_len)) | |
| 16960 width = rect.width - tolerance | |
| 16961 no_justify.append((len(new_lines) - 1)) | |
| 16962 continue | |
| 16963 if i == 0: | |
| 16964 width = rect.x1 - pos.x | |
| 16965 else: | |
| 16966 width = rect.width - tolerance | |
| 16967 | |
| 16968 if right_to_left: # reverses Arabic / Hebrew text front to back | |
| 16969 line = writer.clean_rtl(line) | |
| 16970 tl = textlen(line) | |
| 16971 if tl <= width: # line short enough | |
| 16972 new_lines.append((line, tl)) | |
| 16973 no_justify.append((len(new_lines) - 1)) | |
| 16974 continue | |
| 16975 | |
| 16976 # we need to split the line in fitting parts | |
| 16977 words = line.split(" ") # the words in the line | |
| 16978 | |
| 16979 # cut in parts any words that are longer than rect width | |
| 16980 words, word_lengths = norm_words(width, words) | |
| 16981 | |
| 16982 n = len(words) | |
| 16983 while True: | |
| 16984 line0 = " ".join(words[:n]) | |
| 16985 wl = sum(word_lengths[:n]) + space_len * (n - 1) | |
| 16986 if wl <= width: | |
| 16987 new_lines.append((line0, wl)) | |
| 16988 words = words[n:] | |
| 16989 word_lengths = word_lengths[n:] | |
| 16990 n = len(words) | |
| 16991 line0 = None | |
| 16992 else: | |
| 16993 n -= 1 | |
| 16994 | |
| 16995 if len(words) == 0: | |
| 16996 break | |
| 16997 assert n | |
| 16998 | |
| 16999 # ------------------------------------------------------------------------- | |
| 17000 # List of lines created. Each item is (text, tl), where 'tl' is the PDF | |
| 17001 # output length (float) and 'text' is the text. Except for justified text, | |
| 17002 # this is output-ready. | |
| 17003 # ------------------------------------------------------------------------- | |
| 17004 nlines = len(new_lines) | |
| 17005 if nlines > max_lines: | |
| 17006 msg = "Only fitting %i of %i lines." % (max_lines, nlines) | |
| 17007 if warn is None: | |
| 17008 pass | |
| 17009 elif warn: | |
| 17010 message("Warning: " + msg) | |
| 17011 else: | |
| 17012 raise ValueError(msg) | |
| 17013 | |
| 17014 start = Point() | |
| 17015 no_justify += [len(new_lines) - 1] # no justifying of last line | |
| 17016 for i in range(max_lines): | |
| 17017 try: | |
| 17018 line, tl = new_lines.pop(0) | |
| 17019 except IndexError: | |
| 17020 if g_exceptions_verbose >= 2: exception_info() | |
| 17021 break | |
| 17022 | |
| 17023 if right_to_left: # Arabic, Hebrew | |
| 17024 line = "".join(reversed(line)) | |
| 17025 | |
| 17026 if i == 0: # may have different start for first line | |
| 17027 start = pos | |
| 17028 | |
| 17029 if align == TEXT_ALIGN_JUSTIFY and i not in no_justify and tl < std_width: | |
| 17030 output_justify(start, line) | |
| 17031 start.x = std_start | |
| 17032 start.y += LINEHEIGHT | |
| 17033 continue | |
| 17034 | |
| 17035 if i > 0 or pos.x == std_start: # left, center, right alignments | |
| 17036 start.x += (width - tl) * factor | |
| 17037 | |
| 17038 append_this(start, line) | |
| 17039 start.x = std_start | |
| 17040 start.y += LINEHEIGHT | |
| 17041 | |
| 17042 return new_lines # return non-written lines | |
| 12518 | 17043 |
| 12519 def write_text(self, page, color=None, opacity=-1, overlay=1, morph=None, matrix=None, render_mode=0, oc=0): | 17044 def write_text(self, page, color=None, opacity=-1, overlay=1, morph=None, matrix=None, render_mode=0, oc=0): |
| 12520 """Write the text to a PDF page having the TextWriter's page size. | 17045 """Write the text to a PDF page having the TextWriter's page size. |
| 12521 | 17046 |
| 12522 Args: | 17047 Args: |
| 12733 return max(0, self.y1 - self.y0) | 17258 return max(0, self.y1 - self.y0) |
| 12734 | 17259 |
| 12735 def contains(self, x): | 17260 def contains(self, x): |
| 12736 """Check if x is in the rectangle.""" | 17261 """Check if x is in the rectangle.""" |
| 12737 return self.__contains__(x) | 17262 return self.__contains__(x) |
| 17263 | |
| 17264 def get_area(self, *args) -> float: | |
| 17265 """Calculate area of rectangle.\nparameter is one of 'px' (default), 'in', 'cm', or 'mm'.""" | |
| 17266 if args: | |
| 17267 unit = args[0] | |
| 17268 else: | |
| 17269 unit = "px" | |
| 17270 u = {"px": (1, 1), "in": (1.0, 72.0), "cm": (2.54, 72.0), "mm": (25.4, 72.0)} | |
| 17271 f = (u[unit][0] / u[unit][1]) ** 2 | |
| 17272 return f * self.width * self.height | |
| 12738 | 17273 |
| 12739 def include_point(self, p): | 17274 def include_point(self, p): |
| 12740 """Extend rectangle to include point p.""" | 17275 """Extend rectangle to include point p.""" |
| 12741 rect = self.rect.include_point(p) | 17276 rect = self.rect.include_point(p) |
| 12742 return rect.irect | 17277 return rect.irect |
| 20923 red, green, blue: integers in range 0..255. | 25458 red, green, blue: integers in range 0..255. |
| 20924 ''' | 25459 ''' |
| 20925 return _wxcolors | 25460 return _wxcolors |
| 20926 | 25461 |
| 20927 | 25462 |
| 25463 def _mupdf_devel(make_links=True): | |
| 25464 ''' | |
| 25465 Allows PyMuPDF installation to be used to compile and link programmes that | |
| 25466 use the MuPDF C/C++ API. | |
| 25467 | |
| 25468 Args: | |
| 25469 make_links: | |
| 25470 If true, then on non-windows we also create softlinks to any shared | |
| 25471 libraries that are supplied with a version suffix; this allows them | |
| 25472 to be used in a link command. | |
| 25473 | |
| 25474 For example we create links such as: | |
| 25475 | |
| 25476 site-packages/pymupdf/ | |
| 25477 libmupdf.so -> libmupdf.so.26.7 | |
| 25478 libmupdfcpp.so -> libmupdfcpp.so.26.7 | |
| 25479 | |
| 25480 Returns: (mupdf_include, mupdf_lib). | |
| 25481 mupdf_include: | |
| 25482 Path of MuPDF include directory within PyMuPDF install. | |
| 25483 mupdf_lib | |
| 25484 Path of MuPDF library directory within PyMuPDF install. | |
| 25485 ''' | |
| 25486 import platform | |
| 25487 | |
| 25488 log(f'{mupdf_version=}') | |
| 25489 | |
| 25490 p = os.path.normpath(f'{__file__}/..') | |
| 25491 | |
| 25492 mupdf_include = f'{p}/mupdf-devel/include' | |
| 25493 | |
| 25494 if platform.system() == 'Windows': | |
| 25495 # Separate .lib files are used at build time. | |
| 25496 mupdf_lib = f'{p}/mupdf-devel/lib' | |
| 25497 else: | |
| 25498 # .so files are used for both buildtime and runtime linking. | |
| 25499 mupdf_lib = p | |
| 25500 log(f'Within installed PyMuPDF:') | |
| 25501 log(f' {mupdf_include=}') | |
| 25502 log(f' {mupdf_lib=}') | |
| 25503 | |
| 25504 assert os.path.isdir(mupdf_include), f'Not a directory: {mupdf_include=}.' | |
| 25505 assert os.path.isdir(mupdf_lib), f'Not a directory: {mupdf_lib=}.' | |
| 25506 | |
| 25507 if platform.system() != 'Windows' and make_links: | |
| 25508 # Make symbolic links within the installed pymupdf module so | |
| 25509 # that ld can find libmupdf.so etc. This is a bit of a hack, but | |
| 25510 # necessary because wheels cannot contain symbolic links. | |
| 25511 # | |
| 25512 # For example we create `libmupdf.so -> libmupdf.so.24.8`. | |
| 25513 # | |
| 25514 # We are careful to only create symlinks for the expected MuPDF | |
| 25515 # version, in case old .so files from a previous install are still | |
| 25516 # in place. | |
| 25517 # | |
| 25518 log(f'Creating symlinks in {mupdf_lib=} for MuPDF-{mupdf_version} .so files.') | |
| 25519 regex_suffix = mupdf_version.split('.')[1:3] | |
| 25520 regex_suffix = '[.]'.join(regex_suffix) | |
| 25521 mupdf_lib_regex = f'^(lib[^.]+[.]so)[.]{regex_suffix}$' | |
| 25522 log(f'{mupdf_lib_regex=}.') | |
| 25523 for leaf in os.listdir(mupdf_lib): | |
| 25524 m = re.match(mupdf_lib_regex, leaf) | |
| 25525 if m: | |
| 25526 pfrom = f'{mupdf_lib}/{m.group(1)}' | |
| 25527 # os.path.exists() can return false if softlink exists | |
| 25528 # but points to non-existent file, so we also use | |
| 25529 # `os.path.islink()`. | |
| 25530 if os.path.islink(pfrom) or os.path.exists(pfrom): | |
| 25531 log(f'Removing existing link {pfrom=}.') | |
| 25532 os.remove(pfrom) | |
| 25533 log(f'Creating symlink: {pfrom} -> {leaf}') | |
| 25534 os.symlink(leaf, pfrom) | |
| 25535 | |
| 25536 return mupdf_include, mupdf_lib | |
| 25537 | |
| 25538 | |
| 20928 # We cannot import utils earlier because it imports this .py file itself and | 25539 # We cannot import utils earlier because it imports this .py file itself and |
| 20929 # uses some pymupdf.* types in function typing. | 25540 # uses some pymupdf.* types in function typing. |
| 20930 # | 25541 # |
| 20931 from . import utils | 25542 from . import utils |
| 20932 | 25543 |
| 20937 recover_char_quad = utils.recover_char_quad | 25548 recover_char_quad = utils.recover_char_quad |
| 20938 recover_line_quad = utils.recover_line_quad | 25549 recover_line_quad = utils.recover_line_quad |
| 20939 recover_quad = utils.recover_quad | 25550 recover_quad = utils.recover_quad |
| 20940 recover_span_quad = utils.recover_span_quad | 25551 recover_span_quad = utils.recover_span_quad |
| 20941 | 25552 |
| 20942 Annot.get_text = utils.get_text | |
| 20943 Annot.get_textbox = utils.get_textbox | |
| 20944 | |
| 20945 Document._do_links = utils.do_links | |
| 20946 Document._do_widgets = utils.do_widgets | |
| 20947 Document.del_toc_item = utils.del_toc_item | |
| 20948 Document.get_char_widths = utils.get_char_widths | |
| 20949 Document.get_oc = utils.get_oc | |
| 20950 Document.get_ocmd = utils.get_ocmd | |
| 20951 Document.get_page_labels = utils.get_page_labels | |
| 20952 Document.get_page_numbers = utils.get_page_numbers | |
| 20953 Document.get_page_pixmap = utils.get_page_pixmap | |
| 20954 Document.get_page_text = utils.get_page_text | |
| 20955 Document.get_toc = utils.get_toc | |
| 20956 Document.has_annots = utils.has_annots | |
| 20957 Document.has_links = utils.has_links | |
| 20958 Document.insert_page = utils.insert_page | |
| 20959 Document.new_page = utils.new_page | |
| 20960 Document.scrub = utils.scrub | |
| 20961 Document.search_page_for = utils.search_page_for | |
| 20962 Document.set_metadata = utils.set_metadata | |
| 20963 Document.set_oc = utils.set_oc | |
| 20964 Document.set_ocmd = utils.set_ocmd | |
| 20965 Document.set_page_labels = utils.set_page_labels | |
| 20966 Document.set_toc = utils.set_toc | |
| 20967 Document.set_toc_item = utils.set_toc_item | |
| 20968 Document.subset_fonts = utils.subset_fonts | |
| 20969 Document.tobytes = Document.write | |
| 20970 Document.xref_copy = utils.xref_copy | |
| 20971 | |
| 20972 IRect.get_area = utils.get_area | |
| 20973 | |
| 20974 Page.apply_redactions = utils.apply_redactions | |
| 20975 Page.delete_image = utils.delete_image | |
| 20976 Page.delete_widget = utils.delete_widget | |
| 20977 Page.draw_bezier = utils.draw_bezier | |
| 20978 Page.draw_circle = utils.draw_circle | |
| 20979 Page.draw_curve = utils.draw_curve | |
| 20980 Page.draw_line = utils.draw_line | |
| 20981 Page.draw_oval = utils.draw_oval | |
| 20982 Page.draw_polyline = utils.draw_polyline | |
| 20983 Page.draw_quad = utils.draw_quad | |
| 20984 Page.draw_rect = utils.draw_rect | |
| 20985 Page.draw_sector = utils.draw_sector | |
| 20986 Page.draw_squiggle = utils.draw_squiggle | |
| 20987 Page.draw_zigzag = utils.draw_zigzag | |
| 20988 Page.get_image_info = utils.get_image_info | |
| 20989 Page.get_image_rects = utils.get_image_rects | |
| 20990 Page.get_label = utils.get_label | |
| 20991 Page.get_links = utils.get_links | |
| 20992 Page.get_pixmap = utils.get_pixmap | |
| 20993 Page.get_text = utils.get_text | |
| 20994 Page.get_text_blocks = utils.get_text_blocks | |
| 20995 Page.get_text_selection = utils.get_text_selection | |
| 20996 Page.get_text_words = utils.get_text_words | |
| 20997 Page.get_textbox = utils.get_textbox | |
| 20998 Page.get_textpage_ocr = utils.get_textpage_ocr | |
| 20999 Page.insert_image = utils.insert_image | |
| 21000 Page.insert_link = utils.insert_link | |
| 21001 Page.insert_text = utils.insert_text | |
| 21002 Page.insert_textbox = utils.insert_textbox | |
| 21003 Page.insert_htmlbox = utils.insert_htmlbox | |
| 21004 Page.new_shape = lambda x: utils.Shape(x) | |
| 21005 Page.replace_image = utils.replace_image | |
| 21006 Page.search_for = utils.search_for | |
| 21007 Page.show_pdf_page = utils.show_pdf_page | |
| 21008 Page.update_link = utils.update_link | |
| 21009 Page.write_text = utils.write_text | |
| 21010 Shape = utils.Shape | |
| 21011 from .table import find_tables | 25553 from .table import find_tables |
| 21012 | |
| 21013 Page.find_tables = find_tables | 25554 Page.find_tables = find_tables |
| 21014 | |
| 21015 Rect.get_area = utils.get_area | |
| 21016 | |
| 21017 TextWriter.fill_textbox = utils.fill_textbox | |
| 21018 | 25555 |
| 21019 | 25556 |
| 21020 class FitzDeprecation(DeprecationWarning): | 25557 class FitzDeprecation(DeprecationWarning): |
| 21021 pass | 25558 pass |
| 21022 | 25559 |
| 21283 _alias( Rect, 'include_rect') | 25820 _alias( Rect, 'include_rect') |
| 21284 _alias( Rect, 'is_empty') | 25821 _alias( Rect, 'is_empty') |
| 21285 _alias( Rect, 'is_infinite') | 25822 _alias( Rect, 'is_infinite') |
| 21286 _alias( TextWriter, 'fill_textbox') | 25823 _alias( TextWriter, 'fill_textbox') |
| 21287 _alias( TextWriter, 'write_text') | 25824 _alias( TextWriter, 'write_text') |
| 21288 _alias( utils.Shape, 'draw_bezier') | 25825 _alias( Shape, 'draw_bezier') |
| 21289 _alias( utils.Shape, 'draw_circle') | 25826 _alias( Shape, 'draw_circle') |
| 21290 _alias( utils.Shape, 'draw_curve') | 25827 _alias( Shape, 'draw_curve') |
| 21291 _alias( utils.Shape, 'draw_line') | 25828 _alias( Shape, 'draw_line') |
| 21292 _alias( utils.Shape, 'draw_oval') | 25829 _alias( Shape, 'draw_oval') |
| 21293 _alias( utils.Shape, 'draw_polyline') | 25830 _alias( Shape, 'draw_polyline') |
| 21294 _alias( utils.Shape, 'draw_quad') | 25831 _alias( Shape, 'draw_quad') |
| 21295 _alias( utils.Shape, 'draw_rect') | 25832 _alias( Shape, 'draw_rect') |
| 21296 _alias( utils.Shape, 'draw_sector') | 25833 _alias( Shape, 'draw_sector') |
| 21297 _alias( utils.Shape, 'draw_squiggle') | 25834 _alias( Shape, 'draw_squiggle') |
| 21298 _alias( utils.Shape, 'draw_zigzag') | 25835 _alias( Shape, 'draw_zigzag') |
| 21299 _alias( utils.Shape, 'insert_text') | 25836 _alias( Shape, 'insert_text') |
| 21300 _alias( utils.Shape, 'insert_textbox') | 25837 _alias( Shape, 'insert_textbox') |
| 21301 | 25838 |
| 21302 if 0: | 25839 if 0: |
| 21303 restore_aliases() | 25840 restore_aliases() |
| 21304 | 25841 |
| 21305 __version__ = VersionBind | 25842 __version__ = VersionBind |
