Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/scripts/wrap/parse.py @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 ''' | |
| 2 Support for accessing parse tree for MuPDF headers. | |
| 3 ''' | |
| 4 | |
| 5 import os | |
| 6 import sys | |
| 7 import time | |
| 8 | |
| 9 import jlib | |
| 10 | |
| 11 try: | |
| 12 import clang | |
| 13 except ImportError as e: | |
| 14 jlib.log( 'Warning, could not import clang: {e}') | |
| 15 clang = None | |
| 16 | |
| 17 from . import classes | |
| 18 from . import cpp | |
| 19 from . import state | |
| 20 from . import util | |
| 21 | |
| 22 | |
| 23 def get_extras(tu, type_): | |
| 24 ''' | |
| 25 Returns (cursor, typename, extras): | |
| 26 cursor: for base type. | |
| 27 typename: | |
| 28 extras: None or from classes.classextras. | |
| 29 ''' | |
| 30 base_type = get_base_type( type_) | |
| 31 base_type_cursor = base_type.get_declaration() | |
| 32 base_typename = get_base_typename( base_type) | |
| 33 extras = classes.classextras.get( tu, base_typename) | |
| 34 return base_type_cursor, base_typename, extras | |
| 35 | |
| 36 def fileline( cursor): | |
| 37 ''' | |
| 38 Returns <file>:<line> from cursor.location. | |
| 39 ''' | |
| 40 f = cursor.location.file | |
| 41 filename = os.path.relpath( f.name) if f else '' | |
| 42 return f'{filename}:{cursor.location.line}' | |
| 43 | |
| 44 | |
| 45 def prefix( name): | |
| 46 if name.startswith( 'fz_'): | |
| 47 return 'fz_' | |
| 48 if name.startswith( 'pdf_'): | |
| 49 return 'pdf_' | |
| 50 assert 0, f'unrecognised prefix (not fz_ or pdf_) in name={name}' | |
| 51 | |
| 52 | |
| 53 def get_fz_extras( tu, fzname): | |
| 54 ''' | |
| 55 Finds ClassExtra for <fzname>, coping if <fzname> starts with 'const ' or | |
| 56 'struct '. Returns None if not found. | |
| 57 ''' | |
| 58 fzname = util.clip( fzname, 'const ') | |
| 59 fzname = util.clip( fzname, 'struct ') | |
| 60 ce = classes.classextras.get( tu, fzname) | |
| 61 return ce | |
| 62 | |
| 63 def get_children(cursor): | |
| 64 ''' | |
| 65 Like cursor.get_children() but recurses into cursors with | |
| 66 clang.cindex.CursorKind.UNEXPOSED_DECL which picks up top-level items | |
| 67 marked with `extern "C"`, and clang.cindex.CursorKind.LINKAGE_SPEC which | |
| 68 picks up items inside `extern "C" {...}`. | |
| 69 ''' | |
| 70 verbose = 0 | |
| 71 for cursor in cursor.get_children(): | |
| 72 #verbose = state.state_.show_details( cursor.spelling) | |
| 73 #verbose = 1 | |
| 74 if cursor.kind == clang.cindex.CursorKind.UNEXPOSED_DECL: | |
| 75 # Things tagged with `extern "C" appear to be within this | |
| 76 # cursor. | |
| 77 for cursor2 in cursor.get_children(): | |
| 78 if verbose and cursor.spelling: | |
| 79 jlib.log( '{cursor.spelling=}') | |
| 80 yield cursor2 | |
| 81 elif cursor.kind == clang.cindex.CursorKind.LINKAGE_SPEC: | |
| 82 # extern "C" {...} | |
| 83 for cursor2 in cursor.get_children(): | |
| 84 if verbose and cursor.spelling: | |
| 85 jlib.log( '{cursor.spelling=}') | |
| 86 yield cursor2 | |
| 87 else: | |
| 88 if verbose and cursor.spelling: | |
| 89 jlib.log( '{cursor.spelling=}') | |
| 90 yield cursor | |
| 91 | |
| 92 def get_members( type_or_cursor, include_empty=False): | |
| 93 ''' | |
| 94 Yields cursor for each member. Uses whichever of | |
| 95 clang.cindex.Cursor.get_children() or clang.cindex.Type.get_fields() works. | |
| 96 | |
| 97 Args: | |
| 98 type_or_cursor: | |
| 99 . | |
| 100 include_empty: | |
| 101 If false (the default), we first try | |
| 102 clang.cindex.Cursor.get_children(), but ignore items for which | |
| 103 .spelling==''. If resulting list is empty, we instead use | |
| 104 clang.cindex.Type.get_fields(). | |
| 105 | |
| 106 Otherwise, we return list of items from | |
| 107 clang.cindex.Cursor.get_children(), regardless of whether they | |
| 108 have .spelling==''. This allows finding of non-typedef enums, for | |
| 109 example. | |
| 110 ''' | |
| 111 if isinstance( type_or_cursor, clang.cindex.Type): | |
| 112 cursor = type_or_cursor.get_declaration() | |
| 113 elif isinstance( type_or_cursor, clang.cindex.Cursor): | |
| 114 cursor = type_or_cursor | |
| 115 else: | |
| 116 assert 0 | |
| 117 if cursor.type.kind in (state.clang.cindex.TypeKind.TYPEDEF, state.clang.cindex.TypeKind.ELABORATED): | |
| 118 cursor2 = cursor.underlying_typedef_type.get_declaration() | |
| 119 else: | |
| 120 cursor2 = cursor | |
| 121 | |
| 122 if 0: | |
| 123 # Diagnostics to show the difference between | |
| 124 # clang.cindex.Cursor.get_children() and | |
| 125 # clang.cindex.Type.get_fields(). | |
| 126 # | |
| 127 # For example it looks like clang.cindex.Cursor.get_children() can | |
| 128 # return an extra item with .spelling=='' for 'union {...} u;'. | |
| 129 # | |
| 130 ret_cursor = list() | |
| 131 ret_cursor_no_empty = list() | |
| 132 ret_type = list() | |
| 133 for cursor3 in cursor2.get_children(): | |
| 134 item = (cursor3.spelling, cursor3.location.file.name, cursor3.location.line) | |
| 135 ret_cursor.append( item) | |
| 136 if cursor3.spelling: | |
| 137 ret_cursor_no_empty.append( item) | |
| 138 for cursor3 in cursor.type.get_canonical().get_fields(): | |
| 139 ret_type.append( (cursor3.spelling, cursor3.location.file.name, cursor3.location.line)) | |
| 140 ret_cursor.sort() | |
| 141 ret_type.sort() | |
| 142 ret_cursor_no_empty.sort() | |
| 143 if (not ret_cursor_no_empty) and ret_type: | |
| 144 jlib.log( 'ret_type and not ret_cursor_no_empty:') | |
| 145 for i in ret_type: | |
| 146 jlib.log( ' ret_type: {i}') | |
| 147 if 0 and ret_cursor != ret_type: | |
| 148 jlib.log('get_children() != get_fields():') | |
| 149 for i in ret_cursor: | |
| 150 jlib.log( ' ret_cursor: {i}') | |
| 151 for i in ret_type: | |
| 152 jlib.log( ' ret_type: {i}') | |
| 153 | |
| 154 ret = list() | |
| 155 for cursor3 in cursor2.get_children(): | |
| 156 if include_empty or cursor3.spelling: | |
| 157 ret.append(cursor3) | |
| 158 if not ret: | |
| 159 type_ = cursor.type.get_canonical() | |
| 160 for cursor3 in type_.get_fields(): | |
| 161 ret.append( cursor3) | |
| 162 for i in ret: | |
| 163 yield i | |
| 164 | |
| 165 def get_field0( type_): | |
| 166 ''' | |
| 167 Returns cursor for first field in <type_> or None if <type_> has no fields. | |
| 168 ''' | |
| 169 verbose = state.state_.show_details( type_.spelling) | |
| 170 for cursor in get_members(type_): | |
| 171 return cursor | |
| 172 | |
| 173 get_base_type_cache = dict() | |
| 174 def get_base_type( type_): | |
| 175 ''' | |
| 176 Repeatedly dereferences pointer and returns the ultimate type. | |
| 177 ''' | |
| 178 # Caching reduces time from to 0.24s to 0.1s. | |
| 179 key = type_.spelling | |
| 180 ret = get_base_type_cache.get( key) | |
| 181 if ret is None: | |
| 182 while 1: | |
| 183 type_ = state.get_name_canonical( type_) | |
| 184 if type_.kind != clang.cindex.TypeKind.POINTER: | |
| 185 break | |
| 186 type_ = type_.get_pointee() | |
| 187 ret = type_ | |
| 188 get_base_type_cache[ key] = ret | |
| 189 | |
| 190 return ret | |
| 191 | |
| 192 def get_base_typename( type_): | |
| 193 ''' | |
| 194 Follows pointer to get ultimate type, and returns its name, with any | |
| 195 leading 'struct ' or 'const ' removed. | |
| 196 ''' | |
| 197 type_ = get_base_type( type_) | |
| 198 ret = type_.spelling | |
| 199 ret = util.clip( ret, 'const ') | |
| 200 ret = util.clip( ret, 'struct ') | |
| 201 return ret | |
| 202 | |
| 203 def is_double_pointer( type_): | |
| 204 ''' | |
| 205 Returns true if <type_> is double pointer. | |
| 206 ''' | |
| 207 type_ = state.get_name_canonical( type_) | |
| 208 if type_.kind == clang.cindex.TypeKind.POINTER: | |
| 209 type_ = state.get_name_canonical( type_.get_pointee()) | |
| 210 if type_.kind == clang.cindex.TypeKind.POINTER: | |
| 211 return True | |
| 212 | |
| 213 has_refs_cache = dict() | |
| 214 def has_refs( tu, type_): | |
| 215 ''' | |
| 216 Returns (offset, bits) if <type_> has a 'refs' member, otherwise False. | |
| 217 offset: | |
| 218 Byte offset of 'refs' or name of 'refs' for use with offsetof(), | |
| 219 e.g. 'super.refs'. | |
| 220 bits: | |
| 221 Size of 'refs' in bits. Will be -1 if there is no simple .refs | |
| 222 member (e.g. fz_xml). | |
| 223 ''' | |
| 224 type0 = type_ | |
| 225 type_ = type_.get_canonical() | |
| 226 | |
| 227 key = type_.spelling | |
| 228 key = util.clip(key, 'struct ') | |
| 229 verbose = state.state_.show_details( key) | |
| 230 ret = has_refs_cache.get( key, None) | |
| 231 if ret is None: | |
| 232 ret = False | |
| 233 if verbose: | |
| 234 jlib.log( 'Analysing {type0.spelling=} {type_.spelling=} {key=}') | |
| 235 | |
| 236 for prefix in ( | |
| 237 'fz_', | |
| 238 'pdf_', | |
| 239 ): | |
| 240 if verbose: | |
| 241 jlib.log( '{type_.spelling=} {prefix=}') | |
| 242 if key.startswith( prefix): | |
| 243 if verbose: | |
| 244 jlib.log( 'Type is a fz_ or pdf_ struct: {key=}') | |
| 245 keep_name = f'{prefix}keep_{key[len(prefix):]}' | |
| 246 keep_fn_cursor = state.state_.find_function( tu, keep_name, method=False) | |
| 247 if verbose: | |
| 248 jlib.log( '{keep_name=} {keep_fn_cursor=}') | |
| 249 if keep_fn_cursor: | |
| 250 if verbose: | |
| 251 jlib.log( 'There is a keep() fn for this type so it uses reference counting: {keep_name=}') | |
| 252 base_type_cursor = get_base_type( type_).get_declaration() | |
| 253 if base_type_cursor.is_definition(): | |
| 254 if verbose: | |
| 255 jlib.log( 'Type definition is available so we look for .refs member: {key=} {type_.spelling=} {fileline(base_type_cursor)=}') | |
| 256 if verbose: | |
| 257 jlib.log('type_.get_fields()') | |
| 258 for cursor in get_members(type_): | |
| 259 jlib.log(' {cursor.spelling=}') | |
| 260 jlib.log('base_type_cursor.get_children()') | |
| 261 for cursor in base_type_cursor.get_children(): | |
| 262 jlib.log(' {cursor.spelling=}') | |
| 263 jlib.log('.') | |
| 264 for cursor in get_members(type_): | |
| 265 name = cursor.spelling | |
| 266 type2 = state.get_name_canonical( cursor.type) | |
| 267 if verbose: | |
| 268 jlib.log( '{name=} {type2.spelling=}') | |
| 269 if name == 'refs' and type2.spelling == 'int': | |
| 270 ret = 'refs', 32 | |
| 271 break | |
| 272 if name == 'storable' and type2.spelling in ('struct fz_storable', 'fz_storable'): | |
| 273 ret = 'storable.refs', 32 | |
| 274 break | |
| 275 else: | |
| 276 if 0: | |
| 277 jlib.log('Definition is not available for {key=}' | |
| 278 ' because {base_type_cursor.spelling=} .is_definition()' | |
| 279 ' returns false.' | |
| 280 ' base_type_cursor.location={fileline(base_type_cursor)}' | |
| 281 ) | |
| 282 | |
| 283 if not ret: | |
| 284 if verbose: | |
| 285 jlib.log( | |
| 286 '{type_.spelling=}: Cannot find .refs member or we only have forward' | |
| 287 ' declaration, so have to hard-code the size and offset' | |
| 288 ' of the refs member.' | |
| 289 ) | |
| 290 if base_type_cursor.is_definition(): | |
| 291 if key == 'pdf_document': | |
| 292 ret = 'super.refs', 32 | |
| 293 elif key == 'pdf_page': | |
| 294 ret = 'super.refs', 32 | |
| 295 elif key == 'fz_pixmap': | |
| 296 ret = 'storable.refs', 32 | |
| 297 elif key in ( | |
| 298 'fz_colorspace', | |
| 299 'fz_image', | |
| 300 ): | |
| 301 return 'key_storable.storable.refs', 32 | |
| 302 elif key == 'pdf_cmap': | |
| 303 return 'storable.refs', 32 | |
| 304 else: | |
| 305 #jlib.log( 'No definition available, i.e. forward decl only.') | |
| 306 if key == 'pdf_obj': | |
| 307 ret = 0, 16 | |
| 308 elif key == 'fz_path': | |
| 309 ret = 0, 8 | |
| 310 elif key in ( | |
| 311 'fz_separations', | |
| 312 'fz_halftone', | |
| 313 'pdf_annot', | |
| 314 'pdf_graft_map', | |
| 315 ): | |
| 316 # Forward decl, first member is 'int regs;'. | |
| 317 return 0, 32 | |
| 318 elif key in ( | |
| 319 'fz_display_list', | |
| 320 'fz_glyph', | |
| 321 'fz_jbig2_globals', | |
| 322 'pdf_function', | |
| 323 ): | |
| 324 # Forward decl, first member is 'fz_storable storable;'. | |
| 325 return 0, 32 | |
| 326 elif key == 'fz_xml': | |
| 327 # This only has a simple .refs member if the | |
| 328 # .up member is null, so we don't attempt to | |
| 329 # use it, by returning size=-1. | |
| 330 ret = 0, -1 | |
| 331 | |
| 332 if ret is None: | |
| 333 # Need to hard-code info for this type. | |
| 334 assert 0, jlib.expand_nv( | |
| 335 '{key=} has {keep_name}() fn but is forward decl or we cannot find .refs,' | |
| 336 ' and we have no hard-coded info about size and offset of .regs.' | |
| 337 ' {type0.spelling=} {type_.spelling=} {base_type_cursor.spelling}' | |
| 338 ) | |
| 339 assert ret, ( | |
| 340 f'{key} has {keep_name}() but have not found size/location of .refs member.' | |
| 341 f' {type_.spelling=}' | |
| 342 f' {base_type_cursor.spelling=}' | |
| 343 f': {fileline(base_type_cursor)}' | |
| 344 ) | |
| 345 | |
| 346 if type_.spelling in ( | |
| 347 'struct fz_document', | |
| 348 'struct fz_buffer', | |
| 349 ): | |
| 350 assert ret | |
| 351 #jlib.log('Populating has_refs_cache with {key=} {ret=}') | |
| 352 has_refs_cache[ key] = ret | |
| 353 return ret | |
| 354 | |
| 355 def get_value( item, name): | |
| 356 ''' | |
| 357 Enhanced wrapper for getattr(). | |
| 358 | |
| 359 We call ourselves recursively if name contains one or more '.'. If name | |
| 360 ends with (), makes fn call to get value. | |
| 361 ''' | |
| 362 if not name: | |
| 363 return item | |
| 364 dot = name.find( '.') | |
| 365 if dot >= 0: | |
| 366 item_sub = get_value( item, name[:dot]) | |
| 367 return get_value( item_sub, name[dot+1:]) | |
| 368 if name.endswith('()'): | |
| 369 value = getattr( item, name[:-2]) | |
| 370 assert callable(value) | |
| 371 return value() | |
| 372 return getattr( item, name) | |
| 373 | |
| 374 def get_list( item, *names): | |
| 375 ''' | |
| 376 Uses get_value() to find values of specified fields in <item>. | |
| 377 | |
| 378 Returns list of (name,value) pairs. | |
| 379 ''' | |
| 380 ret = [] | |
| 381 for name in names: | |
| 382 value = get_value( item, name) | |
| 383 ret.append((name, value)) | |
| 384 return ret | |
| 385 | |
| 386 def get_text( item, prefix, sep, *names): | |
| 387 ''' | |
| 388 Returns text describing <names> elements of <item>. | |
| 389 ''' | |
| 390 ret = [] | |
| 391 for name, value in get_list( item, *names): | |
| 392 ret.append( f'{name}={value}') | |
| 393 return prefix + sep.join( ret) | |
| 394 | |
| 395 | |
| 396 def dump_ast( cursor, out=None, depth=0): | |
| 397 cleanup = lambda: None | |
| 398 if out is None: | |
| 399 out = sys.stdout | |
| 400 if isinstance(out, str): | |
| 401 out = open(out, 'w') | |
| 402 cleanup = lambda : out.close() | |
| 403 try: | |
| 404 indent = depth*4*' ' | |
| 405 for cursor2 in cursor.get_children(): | |
| 406 | |
| 407 def or_none(f): | |
| 408 try: | |
| 409 return f() | |
| 410 except Exception: | |
| 411 return | |
| 412 result = or_none( cursor2.type.get_result) | |
| 413 type_ = cursor2.type | |
| 414 type_canonical = or_none( cursor2.type.get_canonical) | |
| 415 | |
| 416 text = indent | |
| 417 text += jlib.log_text( | |
| 418 '{cursor2.kind=}' | |
| 419 ' {cursor2.displayname=}' | |
| 420 ' {cursor2.spelling=}' | |
| 421 ' {cursor2.linkage=}' | |
| 422 ' {cursor2.is_definition()=}' | |
| 423 ) | |
| 424 if result: | |
| 425 text += jlib.log_text(' {result.spelling=}') | |
| 426 if type_: | |
| 427 text += jlib.log_text(' {type_.spelling=}') | |
| 428 if type_canonical: | |
| 429 text += jlib.log_text(' {type_canonical.spelling=}') | |
| 430 text += '\n' | |
| 431 if callable(out): | |
| 432 out( text) | |
| 433 else: | |
| 434 out.write(text) | |
| 435 | |
| 436 dump_ast( cursor2, out, depth+1) | |
| 437 finally: | |
| 438 cleanup() | |
| 439 | |
| 440 def show_ast( filename, includes): | |
| 441 jlib.log('Parsing {filename=}') | |
| 442 index = clang.cindex.Index.create() | |
| 443 args = [] | |
| 444 for include in includes: | |
| 445 args += ['-I', include] | |
| 446 tu = index.parse( filename, | |
| 447 args = args, | |
| 448 ) | |
| 449 dump_ast( tu.cursor) | |
| 450 | |
| 451 class Arg: | |
| 452 ''' | |
| 453 Information about a function argument. | |
| 454 | |
| 455 .cursor: | |
| 456 Cursor for the argument. | |
| 457 .name: | |
| 458 Arg name, or an invented name if none was present. | |
| 459 .separator: | |
| 460 '' for first returned argument, ', ' for the rest. | |
| 461 .alt: | |
| 462 Cursor for underlying fz_ struct type if <arg> is a pointer to or | |
| 463 ref/value of a fz_ struct type that we wrap. Else None. | |
| 464 .out_param: | |
| 465 True if this looks like an out-parameter, e.g. alt is set and | |
| 466 double pointer, or arg is pointer other than to char. | |
| 467 .name_python: | |
| 468 Same as .name or .name+'_' if .name is a Python keyword. | |
| 469 .name_csharp: | |
| 470 Same as .name or .name+'_' if .name is a C# keyword. | |
| 471 ''' | |
| 472 def __init__(self, cursor, name, separator, alt, out_param): | |
| 473 self.cursor = cursor | |
| 474 self.name = name | |
| 475 self.separator = separator | |
| 476 self.alt = alt | |
| 477 self.out_param = out_param | |
| 478 if name in ('in', 'is'): | |
| 479 self.name_python = f'{name}_' | |
| 480 else: | |
| 481 self.name_python = name | |
| 482 self.name_csharp = f'{name}_' if name in ('out', 'is', 'in', 'params') else name | |
| 483 | |
| 484 def __str__(self): | |
| 485 return f'Arg(name={self.name} alt={"true" if self.alt else "false"} out_param={self.out_param})' | |
| 486 | |
| 487 | |
| 488 get_args_cache = dict() | |
| 489 | |
| 490 def get_args( tu, cursor, include_fz_context=False, skip_first_alt=False, verbose=False): | |
| 491 ''' | |
| 492 Yields Arg instance for each arg of the function at <cursor>. | |
| 493 | |
| 494 Args: | |
| 495 tu: | |
| 496 A clang.cindex.TranslationUnit instance. | |
| 497 cursor: | |
| 498 Clang cursor for the function. | |
| 499 include_fz_context: | |
| 500 If false, we skip args that are 'struct fz_context*' | |
| 501 skip_first_alt: | |
| 502 If true, we skip the first arg with .alt set. | |
| 503 verbose: | |
| 504 . | |
| 505 ''' | |
| 506 # We are called a few times for each function, and the calculations we do | |
| 507 # are slow, so we cache the returned items. E.g. this reduces total time of | |
| 508 # --build 0 from 3.5s to 2.1s. | |
| 509 # | |
| 510 if verbose: | |
| 511 jlib.log( '## Looking at args of {cursor.spelling=}') | |
| 512 key = tu, cursor.location.file, cursor.location.line, include_fz_context, skip_first_alt | |
| 513 ret = get_args_cache.get( key) | |
| 514 if not verbose and state.state_.show_details(cursor.spelling): | |
| 515 verbose = True | |
| 516 if ret is None: | |
| 517 if verbose: | |
| 518 jlib.log( '## Looking at args of {cursor.spelling=}') | |
| 519 ret = [] | |
| 520 i = 0 | |
| 521 i_alt = 0 | |
| 522 separator = '' | |
| 523 for arg_cursor in cursor.get_arguments(): | |
| 524 if verbose: | |
| 525 jlib.log('{arg_cursor.kind=} {arg_cursor.spelling=}') | |
| 526 assert arg_cursor.kind == clang.cindex.CursorKind.PARM_DECL | |
| 527 if not include_fz_context and is_pointer_to( arg_cursor.type, 'fz_context'): | |
| 528 # Omit this arg because our generated mupdf_*() wrapping functions | |
| 529 # use internalContextGet() to get a context. | |
| 530 continue | |
| 531 name = arg_cursor.spelling or f'arg_{i}' | |
| 532 if 0 and name == 'stmofsp': | |
| 533 verbose = True | |
| 534 alt = None | |
| 535 out_param = False | |
| 536 base_type_cursor, base_typename, extras = get_extras( tu, arg_cursor.type) | |
| 537 if verbose: | |
| 538 jlib.log( 'Looking at arg. {extras=}') | |
| 539 if extras: | |
| 540 if verbose: | |
| 541 jlib.log( '{extras.opaque=} {base_type_cursor.kind=} {base_type_cursor.is_definition()=}') | |
| 542 if extras.opaque: | |
| 543 # E.g. we don't have access to definition of fz_separation, | |
| 544 # but it is marked in classes.classextras with opaque=true, | |
| 545 # so there will be a wrapper class. | |
| 546 alt = base_type_cursor | |
| 547 elif (1 | |
| 548 and base_type_cursor.kind == clang.cindex.CursorKind.STRUCT_DECL | |
| 549 #and base_type_cursor.is_definition() | |
| 550 ): | |
| 551 alt = base_type_cursor | |
| 552 if verbose: | |
| 553 jlib.log( '{arg_cursor.type.spelling=} {base_typename=} {arg_cursor.type.kind=} {get_base_typename(arg_cursor.type)=}') | |
| 554 jlib.log( '{get_base_type(arg_cursor.type).kind=}') | |
| 555 if alt: | |
| 556 if is_double_pointer( arg_cursor.type): | |
| 557 out_param = True | |
| 558 elif get_base_typename( arg_cursor.type) in ('char', 'unsigned char', 'signed char', 'void', 'FILE'): | |
| 559 if is_double_pointer( arg_cursor.type): | |
| 560 if verbose: | |
| 561 jlib.log( 'setting outparam: {cursor.spelling=} {arg_cursor.type=}') | |
| 562 if cursor.spelling == 'pdf_clean_file': | |
| 563 # Don't mark char** argv as out-param, which will also | |
| 564 # allow us to tell swig to convert python lists into | |
| 565 # (argc,char**) pair. | |
| 566 pass | |
| 567 else: | |
| 568 if verbose: | |
| 569 jlib.log('setting out_param to true') | |
| 570 out_param = True | |
| 571 elif ( base_typename.startswith( ('fz_', 'pdf_')) | |
| 572 and get_base_type(arg_cursor.type).kind != clang.cindex.TypeKind.ENUM | |
| 573 ): | |
| 574 # Pointer to fz_ struct is not usually an out-param. | |
| 575 if verbose: | |
| 576 jlib.log( | |
| 577 'not out-param because pointer to struct:' | |
| 578 ' arg is: {arg_cursor.displayname=}' | |
| 579 ' {base_typename.spelling=}' | |
| 580 ' {extras}' | |
| 581 ' {arg_cursor.type.kind=}' | |
| 582 ) | |
| 583 elif arg_cursor.type.kind == clang.cindex.TypeKind.POINTER: | |
| 584 pointee = arg_cursor.type.get_pointee() | |
| 585 if verbose: | |
| 586 jlib.log( 'clang.cindex.TypeKind.POINTER') | |
| 587 if state.get_name_canonical( pointee).kind == clang.cindex.TypeKind.FUNCTIONPROTO: | |
| 588 # Don't mark function-pointer args as out-params. | |
| 589 if verbose: | |
| 590 jlib.log( 'clang.cindex.TypeKind.FUNCTIONPROTO') | |
| 591 elif pointee.is_const_qualified(): | |
| 592 if verbose: | |
| 593 jlib.log( 'is_const_qualified()') | |
| 594 elif pointee.spelling == 'FILE': | |
| 595 pass | |
| 596 else: | |
| 597 if verbose: | |
| 598 jlib.log( 'setting out_param = True') | |
| 599 out_param = True | |
| 600 if alt: | |
| 601 i_alt += 1 | |
| 602 i += 1 | |
| 603 if alt and skip_first_alt and i_alt == 1: | |
| 604 continue | |
| 605 arg = Arg(arg_cursor, name, separator, alt, out_param) | |
| 606 ret.append(arg) | |
| 607 if verbose: | |
| 608 jlib.log( 'Appending {arg=}') | |
| 609 separator = ', ' | |
| 610 | |
| 611 get_args_cache[ key] = ret | |
| 612 | |
| 613 for arg in ret: | |
| 614 yield arg | |
| 615 | |
| 616 | |
| 617 def fn_has_struct_args( tu, cursor): | |
| 618 ''' | |
| 619 Returns true if fn at <cursor> takes any fz_* struct args. | |
| 620 ''' | |
| 621 for arg in get_args( tu, cursor): | |
| 622 if arg.alt: | |
| 623 return True | |
| 624 | |
| 625 def get_first_arg( tu, cursor): | |
| 626 ''' | |
| 627 Returns (arg, n), where <arg> is from get_args() for first argument (or | |
| 628 None if no arguments), and <n> is number of arguments. | |
| 629 ''' | |
| 630 n = 0 | |
| 631 ret = None | |
| 632 for arg in get_args( tu, cursor): | |
| 633 if n == 0: | |
| 634 ret = arg | |
| 635 n += 1 | |
| 636 return ret, n | |
| 637 | |
| 638 | |
| 639 is_cache = dict() | |
| 640 | |
| 641 def is_( type_, type2): | |
| 642 key = type_.spelling, type2 | |
| 643 ret = is_cache.get( key) | |
| 644 if ret is None: | |
| 645 d = cpp.declaration_text( type_, '', top_level='') | |
| 646 d = util.clip( d, 'const ') | |
| 647 d = util.clip( d, 'struct ') | |
| 648 d = d.strip() | |
| 649 ret = (d == type2) | |
| 650 is_cache[ key] = ret | |
| 651 return ret | |
| 652 | |
| 653 is_pointer_to_cache = dict() | |
| 654 | |
| 655 def is_pointer_to( type_, destination, verbose=False): | |
| 656 ''' | |
| 657 Returns true if <type> is a pointer to <destination>. | |
| 658 | |
| 659 We do this using text for <destination>, rather than a clang.cindex.Type | |
| 660 or clang.cindex.Cursor, so that we can represent base types such as int or | |
| 661 char without having clang parse system headers. This involves stripping any | |
| 662 initial 'struct ' text. | |
| 663 | |
| 664 Also, clang's representation of mupdf's varying use of typedef, struct and | |
| 665 forward-declarations is rather difficult to work with directly. | |
| 666 | |
| 667 type_: | |
| 668 A clang.cindex.Type. | |
| 669 destination: | |
| 670 Text typename. | |
| 671 ''' | |
| 672 # Use cache - reduces time from 0.6s to 0.2. | |
| 673 # | |
| 674 key = type_.spelling, destination | |
| 675 ret = is_pointer_to_cache.get( key) | |
| 676 if verbose or ret is None: | |
| 677 assert isinstance( type_, clang.cindex.Type) | |
| 678 if verbose: jlib.log( '{type_.spelling=}') | |
| 679 ret = None | |
| 680 destination = util.clip( destination, 'struct ') | |
| 681 if type_.kind == clang.cindex.TypeKind.POINTER: | |
| 682 pointee = type_.get_pointee() | |
| 683 if verbose: jlib.log('{pointee.spelling=}') | |
| 684 d = cpp.declaration_text( pointee, '', top_level='', verbose=verbose) | |
| 685 d = util.clip( d, 'const ') | |
| 686 d = util.clip( d, 'struct ') | |
| 687 if verbose: | |
| 688 jlib.log( '{destination=} {type_.get_pointee().kind=} {type_.get_pointee().spelling=} {state.get_name_canonical( type_.get_pointee()).spelling=}') | |
| 689 ret = d.strip() == destination or d.strip() == f'const {destination}' | |
| 690 is_pointer_to_cache[ key] = ret | |
| 691 | |
| 692 return ret | |
| 693 | |
| 694 def is_pointer_to_pointer_to( type_, destination, verbose=False): | |
| 695 if verbose: | |
| 696 jlib.log( '{type_.spelling=}') | |
| 697 if type_.kind != clang.cindex.TypeKind.POINTER: | |
| 698 return False | |
| 699 pointee = type_.get_pointee() | |
| 700 return is_pointer_to( pointee, destination, verbose=verbose) | |
| 701 | |
| 702 | |
| 703 class MethodExcludeReason_VARIADIC: | |
| 704 pass | |
| 705 class MethodExcludeReason_OMIT_CLASS: | |
| 706 pass | |
| 707 class MethodExcludeReason_NO_EXTRAS: | |
| 708 pass | |
| 709 class MethodExcludeReason_NO_RAW_CONSTRUCTOR: | |
| 710 pass | |
| 711 class MethodExcludeReason_NOT_COPYABLE: | |
| 712 pass | |
| 713 class MethodExcludeReason_NO_WRAPPER_CLASS: | |
| 714 pass | |
| 715 class MethodExcludeReason_ENUM: | |
| 716 pass | |
| 717 class MethodExcludeReason_FIRST_ARG_NOT_STRUCT: | |
| 718 pass | |
| 719 | |
| 720 # Maps from <structname> to list of functions satisfying conditions specified | |
| 721 # by find_wrappable_function_with_arg0_type() below. | |
| 722 # | |
| 723 find_wrappable_function_with_arg0_type_cache = None | |
| 724 | |
| 725 # Maps from fnname to list of strings, each string being a description of why | |
| 726 # this fn is not suitable for wrapping by class method. | |
| 727 # | |
| 728 find_wrappable_function_with_arg0_type_excluded_cache = None | |
| 729 | |
| 730 # Maps from function name to the class that has a method that wraps this | |
| 731 # function. | |
| 732 # | |
| 733 fnname_to_method_structname = dict() | |
| 734 | |
| 735 def find_wrappable_function_with_arg0_type_cache_populate( tu): | |
| 736 ''' | |
| 737 Populates caches with wrappable functions. | |
| 738 ''' | |
| 739 global find_wrappable_function_with_arg0_type_cache | |
| 740 global find_wrappable_function_with_arg0_type_excluded_cache | |
| 741 | |
| 742 if find_wrappable_function_with_arg0_type_cache: | |
| 743 return | |
| 744 | |
| 745 t0 = time.time() | |
| 746 | |
| 747 find_wrappable_function_with_arg0_type_cache = dict() | |
| 748 find_wrappable_function_with_arg0_type_excluded_cache = dict() | |
| 749 | |
| 750 for fnname, cursor in state.state_.find_functions_starting_with( tu, ('fz_', 'pdf_'), method=True): | |
| 751 | |
| 752 exclude_reasons = [] | |
| 753 | |
| 754 if fnname.startswith( 'fz_drop_') or fnname.startswith( 'fz_keep_'): | |
| 755 continue | |
| 756 if fnname.startswith( 'pdf_drop_') or fnname.startswith( 'pdf_keep_'): | |
| 757 continue | |
| 758 | |
| 759 if cursor.type.is_function_variadic(): | |
| 760 exclude_reasons.append( | |
| 761 ( | |
| 762 MethodExcludeReason_VARIADIC, | |
| 763 'function is variadic', | |
| 764 )) | |
| 765 | |
| 766 # Look at resulttype. | |
| 767 # | |
| 768 result_type = cursor.type.get_result() | |
| 769 if result_type.kind == clang.cindex.TypeKind.POINTER: | |
| 770 result_type = result_type.get_pointee() | |
| 771 result_type_name = state.get_name_canonical( result_type) | |
| 772 result_type_name = util.clip( result_type.spelling, 'struct ') | |
| 773 if result_type_name.startswith( ('fz_', 'pdf_')): | |
| 774 if result_type.kind == clang.cindex.TypeKind.TYPEDEF: | |
| 775 result_cursor = result_type.get_declaration() | |
| 776 result_type = result_cursor.underlying_typedef_type | |
| 777 | |
| 778 if result_type.kind == state.clang.cindex.TypeKind.ELABORATED: | |
| 779 result_type_extras = get_fz_extras( tu, result_type_name) | |
| 780 if not result_type_extras: | |
| 781 exclude_reasons.append( | |
| 782 ( | |
| 783 MethodExcludeReason_NO_EXTRAS, | |
| 784 f'no extras defined for result_type={result_type_name}.' | |
| 785 )) | |
| 786 else: | |
| 787 if not result_type_extras.constructor_raw: | |
| 788 exclude_reasons.append( | |
| 789 ( | |
| 790 MethodExcludeReason_NO_RAW_CONSTRUCTOR, | |
| 791 f'wrapper for result_type={result_type_name} does not have raw constructor.', | |
| 792 )) | |
| 793 if not result_type_extras.copyable: | |
| 794 exclude_reasons.append( | |
| 795 ( | |
| 796 MethodExcludeReason_NOT_COPYABLE, | |
| 797 f'wrapper for result_type={result_type_name} is not copyable.', | |
| 798 )) | |
| 799 | |
| 800 # Look at args | |
| 801 # | |
| 802 i = 0 | |
| 803 arg0_cursor = None | |
| 804 for arg in get_args( tu, cursor): | |
| 805 | |
| 806 base_typename = get_base_typename( arg.cursor.type) | |
| 807 if not arg.alt and base_typename.startswith( ('fz_', 'pdf_')): | |
| 808 t_canonical = state.get_name_canonical( arg.cursor.type) | |
| 809 if t_canonical.kind == clang.cindex.TypeKind.ENUM: | |
| 810 # We don't (yet) wrap fz_* enums, but for now at least we | |
| 811 # still wrap functions that take fz_* enum parameters - | |
| 812 # callers will have to use the fz_* type. | |
| 813 # | |
| 814 # For example this is required by mutool_draw.py because | |
| 815 # mudraw.c calls fz_set_separation_behavior(). | |
| 816 # | |
| 817 jlib.logx( | |
| 818 'not excluding {fnname=} with enum fz_ param:' | |
| 819 ' {arg.cursor.spelling=}' | |
| 820 ' {arg.cursor.type.kind}' | |
| 821 ' {state.get_name_canonical(arg.cursor.type).kind=}' | |
| 822 ) | |
| 823 elif t_canonical.kind == clang.cindex.TypeKind.POINTER: | |
| 824 pass | |
| 825 else: | |
| 826 exclude_reasons.append( | |
| 827 ( | |
| 828 MethodExcludeReason_NO_WRAPPER_CLASS, | |
| 829 f'no wrapper class for arg i={i}:' | |
| 830 f' {state.get_name_canonical( arg.cursor.type).spelling}' | |
| 831 f' {state.get_name_canonical(arg.cursor.type).kind}' | |
| 832 , | |
| 833 )) | |
| 834 if i == 0: | |
| 835 if arg.alt: | |
| 836 arg0_cursor = arg.alt | |
| 837 else: | |
| 838 exclude_reasons.append( | |
| 839 ( | |
| 840 MethodExcludeReason_FIRST_ARG_NOT_STRUCT, | |
| 841 'first arg is not fz_* struct', | |
| 842 )) | |
| 843 i += 1 | |
| 844 | |
| 845 if exclude_reasons: | |
| 846 find_wrappable_function_with_arg0_type_excluded_cache[ fnname] = exclude_reasons | |
| 847 #if fnname == 'fz_load_outline': # lgtm [py/unreachable-statement] | |
| 848 if state.state_.show_details(fnname): | |
| 849 jlib.log( 'Excluding {fnname=} from possible class methods because:') | |
| 850 for i in exclude_reasons: | |
| 851 jlib.log( ' {i}') | |
| 852 else: | |
| 853 if i > 0: | |
| 854 # <fnname> is ok to wrap. | |
| 855 arg0 = state.get_name_canonical( arg0_cursor.type).spelling | |
| 856 arg0 = util.clip( arg0, 'struct ') | |
| 857 | |
| 858 #jlib.log( '=== Adding to {arg0=}: {fnname=}. {len(fnname_to_method_structname)=}') | |
| 859 | |
| 860 items = find_wrappable_function_with_arg0_type_cache.setdefault( arg0, []) | |
| 861 items.append( fnname) | |
| 862 | |
| 863 fnname_to_method_structname[ fnname] = arg0 | |
| 864 | |
| 865 jlib.log1( f'populating find_wrappable_function_with_arg0_type_cache took {time.time()-t0:.2f}s') | |
| 866 | |
| 867 | |
| 868 def find_wrappable_function_with_arg0_type( tu, structname): | |
| 869 ''' | |
| 870 Return list of fz_*() function names which could be wrapped as a method of | |
| 871 our wrapper class for <structname>. | |
| 872 | |
| 873 The functions whose names we return, satisfy all of the following: | |
| 874 | |
| 875 First non-context param is <structname> (by reference, pointer or value). | |
| 876 | |
| 877 If return type is a fz_* struct (by reference, pointer or value), the | |
| 878 corresponding wrapper class has a raw constructor. | |
| 879 ''' | |
| 880 find_wrappable_function_with_arg0_type_cache_populate( tu) | |
| 881 | |
| 882 ret = find_wrappable_function_with_arg0_type_cache.get( structname, []) | |
| 883 if state.state_.show_details(structname): | |
| 884 jlib.log('{structname=}: {len(ret)=}:') | |
| 885 for i in ret: | |
| 886 jlib.log(' {i}') | |
| 887 return ret | |
| 888 find_struct_cache = None | |
| 889 | |
| 890 | |
| 891 def find_class_for_wrappable_function( fn_name): | |
| 892 ''' | |
| 893 If <fn_name>'s first arg is a struct and our wrapper class for this struct | |
| 894 has a method that wraps <fn_name>, return name of wrapper class. | |
| 895 | |
| 896 Otherwise return None. | |
| 897 ''' | |
| 898 return fnname_to_method_structname.get( fn_name) | |
| 899 | |
| 900 | |
| 901 def find_struct( tu, structname, require_definition=True): | |
| 902 ''' | |
| 903 Finds definition of struct. | |
| 904 | |
| 905 fixme: actually finds definition of anything, doesn't have to be a struct. | |
| 906 | |
| 907 Args: | |
| 908 tu: | |
| 909 Translation unit. | |
| 910 structname: | |
| 911 Name of struct to find. | |
| 912 require_definition: | |
| 913 Only return cursor if it is for definition of structure. | |
| 914 | |
| 915 Returns cursor for definition or None. | |
| 916 ''' | |
| 917 verbose = state.state_.show_details( structname) | |
| 918 verbose = False | |
| 919 if verbose: | |
| 920 jlib.log( '{=structname}') | |
| 921 structname = util.clip( structname, ('const ', 'struct ')) # Remove any 'struct ' prefix. | |
| 922 if verbose: | |
| 923 jlib.log( '{=structname}') | |
| 924 global find_struct_cache | |
| 925 if find_struct_cache is None: | |
| 926 find_struct_cache = dict() | |
| 927 for cursor in get_children( tu.cursor): | |
| 928 already = find_struct_cache.get( cursor.spelling) | |
| 929 if already is None: | |
| 930 find_struct_cache[ cursor.spelling] = cursor | |
| 931 elif cursor.is_definition() and not already.is_definition(): | |
| 932 find_struct_cache[ cursor.spelling] = cursor | |
| 933 ret = find_struct_cache.get( structname) | |
| 934 if verbose: | |
| 935 jlib.log( '{=ret}') | |
| 936 if not ret: | |
| 937 return | |
| 938 if verbose: | |
| 939 jlib.log( '{=require_definition ret.is_definition()}') | |
| 940 if require_definition and not ret.is_definition(): | |
| 941 return | |
| 942 return ret | |
| 943 | |
| 944 | |
| 945 def find_name( cursor, name, nest=0): | |
| 946 ''' | |
| 947 Returns cursor for specified name within <cursor>, or None if not found. | |
| 948 | |
| 949 name: | |
| 950 Name to search for. Can contain '.' characters; we look for each | |
| 951 element in turn, calling ourselves recursively. | |
| 952 | |
| 953 cursor: | |
| 954 Item to search. | |
| 955 ''' | |
| 956 assert cursor.spelling != '' | |
| 957 if cursor.spelling == '': | |
| 958 # Anonymous item; this seems to occur for (non-anonymous) unions. | |
| 959 # | |
| 960 # We recurse into children directly. | |
| 961 # | |
| 962 for c in get_members(cursor): | |
| 963 ret = find_name_internal( c, name, nest+1) | |
| 964 if ret: | |
| 965 return ret | |
| 966 | |
| 967 d = name.find( '.') | |
| 968 if d >= 0: | |
| 969 head, tail = name[:d], name[d+1:] | |
| 970 # Look for first element then for remaining. | |
| 971 c = find_name( cursor, head, nest+1) | |
| 972 if not c: | |
| 973 return | |
| 974 ret = find_name( c, tail, nest+2) | |
| 975 return ret | |
| 976 | |
| 977 for c in get_members(cursor): | |
| 978 if c.spelling == '': | |
| 979 ret = find_name( c, name, nest+1) | |
| 980 if ret: | |
| 981 return ret | |
| 982 if c.spelling == name: | |
| 983 return c |
