Mercurial > hgrepos > Python > apps > py-cutils
comparison cutils/treesum.py @ 170:8945be6b404e
Mode for treesum.py to print only the size of files and the accumulated size of a directory: --size-only.
Digests are not computed.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Fri, 10 Jan 2025 10:32:52 +0100 |
| parents | 91b8b2a8aebc |
| children | 804a823c63f5 |
comparison
equal
deleted
inserted
replaced
| 169:91b8b2a8aebc | 170:8945be6b404e |
|---|---|
| 111 "--print-size", action="store_true", | 111 "--print-size", action="store_true", |
| 112 help="""Print the size of a file or the accumulated sizes of | 112 help="""Print the size of a file or the accumulated sizes of |
| 113 directory content into the output also. | 113 directory content into the output also. |
| 114 The size is not considered when computing digests. For symbolic links | 114 The size is not considered when computing digests. For symbolic links |
| 115 the size is not printed also.""") | 115 the size is not printed also.""") |
| 116 gp.add_argument( | |
| 117 "--size-only", action="store_true", | |
| 118 help="""Print only the size of files and for each directory its | |
| 119 accumulated directory size. Digests are not computed.""") | |
| 116 gp.add_argument( | 120 gp.add_argument( |
| 117 "directories", nargs="*", metavar="DIRECTORY") | 121 "directories", nargs="*", metavar="DIRECTORY") |
| 118 | 122 |
| 119 parser = argparse.ArgumentParser( | 123 parser = argparse.ArgumentParser( |
| 120 description="Generate and verify checksums for directory trees.", | 124 description="Generate and verify checksums for directory trees.", |
| 211 minimal=None, | 215 minimal=None, |
| 212 mode=False, | 216 mode=False, |
| 213 mmap=None, | 217 mmap=None, |
| 214 mtime=False, | 218 mtime=False, |
| 215 output=None, | 219 output=None, |
| 216 print_size=False): | 220 print_size=False, |
| 221 size_only=False): | |
| 217 opts = argparse.Namespace( | 222 opts = argparse.Namespace( |
| 218 directories=directories, | 223 directories=directories, |
| 219 algorithm=(util.algotag2algotype(algorithm), | 224 algorithm=(util.algotag2algotype(algorithm), |
| 220 algorithm), | 225 algorithm), |
| 221 append_output=append_output, | 226 append_output=append_output, |
| 227 mmap=mmap, | 232 mmap=mmap, |
| 228 metadata_full_mode=full_mode, | 233 metadata_full_mode=full_mode, |
| 229 metadata_mode=mode, | 234 metadata_mode=mode, |
| 230 metadata_mtime=mtime, | 235 metadata_mtime=mtime, |
| 231 output=output, | 236 output=output, |
| 232 print_size=print_size) | 237 print_size=print_size, |
| 238 size_only=size_only) | |
| 233 return opts | 239 return opts |
| 234 | 240 |
| 235 | 241 |
| 236 def treesum(opts): | 242 def treesum(opts): |
| 237 # XXX TBD: opts.check and opts.checklist (as in shasum.py) | 243 # XXX TBD: opts.check and opts.checklist (as in shasum.py) |
| 266 outfp, d, opts.algorithm, opts.mmap, opts.base64, opts.logical, | 272 outfp, d, opts.algorithm, opts.mmap, opts.base64, opts.logical, |
| 267 opts.follow_directory_symlinks, | 273 opts.follow_directory_symlinks, |
| 268 opts.metadata_mode, | 274 opts.metadata_mode, |
| 269 opts.metadata_full_mode, | 275 opts.metadata_full_mode, |
| 270 opts.metadata_mtime, | 276 opts.metadata_mtime, |
| 277 opts.size_only, | |
| 271 opts.print_size, | 278 opts.print_size, |
| 272 minimal=opts.minimal, | 279 minimal=opts.minimal, |
| 273 comment=opts.comment) | 280 comment=opts.comment) |
| 274 | 281 |
| 275 | 282 |
| 276 def generate_treesum_for_directory( | 283 def generate_treesum_for_directory( |
| 277 outfp, root, algorithm, use_mmap, use_base64, handle_root_logical, | 284 outfp, root, algorithm, use_mmap, use_base64, handle_root_logical, |
| 278 follow_directory_symlinks, with_metadata_mode, with_metadata_full_mode, | 285 follow_directory_symlinks, with_metadata_mode, with_metadata_full_mode, |
| 279 with_metadata_mtime, print_size, | 286 with_metadata_mtime, size_only, print_size, |
| 280 minimal=None, comment=None): | 287 minimal=None, comment=None): |
| 281 """ | 288 """ |
| 282 | 289 |
| 283 :param outfp: a *binary* file with a "write()" and a "flush()" method | 290 :param outfp: a *binary* file with a "write()" and a "flush()" method |
| 284 | 291 |
| 296 flags.append("with-metadata-mtime") | 303 flags.append("with-metadata-mtime") |
| 297 if handle_root_logical: | 304 if handle_root_logical: |
| 298 flags.append("logical") | 305 flags.append("logical") |
| 299 if follow_directory_symlinks: | 306 if follow_directory_symlinks: |
| 300 flags.append("follow-directory-symlinks") | 307 flags.append("follow-directory-symlinks") |
| 301 if print_size: | 308 if size_only: |
| 302 flags.append("print-size") | 309 flags.append("size-only") |
| 310 else: | |
| 311 if print_size: | |
| 312 flags.append("print-size") | |
| 303 if flags: | 313 if flags: |
| 304 flags.sort() | 314 flags.sort() |
| 305 outfp.write(format_bsd_line("FLAGS", ",".join(flags), None, False)) | 315 outfp.write(format_bsd_line("FLAGS", ",".join(flags), None, False)) |
| 306 outfp.flush() | 316 outfp.flush() |
| 307 | 317 |
| 333 linkdgst.update(b"%d:%s," % (len(linktgt), linktgt)) | 343 linkdgst.update(b"%d:%s," % (len(linktgt), linktgt)) |
| 334 dir_dgst = algorithm[0]() | 344 dir_dgst = algorithm[0]() |
| 335 dir_dgst.update(b"1:L,") | 345 dir_dgst.update(b"1:L,") |
| 336 dir_dgst.update( | 346 dir_dgst.update( |
| 337 b"%d:%s," % (len(linkdgst.digest()), linkdgst.digest())) | 347 b"%d:%s," % (len(linkdgst.digest()), linkdgst.digest())) |
| 338 outfp.write( | 348 if size_only: |
| 339 format_bsd_line( | 349 outfp.write( |
| 340 algorithm[1], | 350 format_bsd_line( |
| 341 dir_dgst.digest(), | 351 "SIZE", |
| 342 "./@", | 352 None, |
| 343 use_base64)) | 353 "./@", |
| 354 False, | |
| 355 0)) | |
| 356 else: | |
| 357 outfp.write( | |
| 358 format_bsd_line( | |
| 359 algorithm[1], | |
| 360 dir_dgst.digest(), | |
| 361 "./@", | |
| 362 use_base64)) | |
| 344 outfp.flush() | 363 outfp.flush() |
| 345 return | 364 return |
| 346 | 365 |
| 347 for top, fsobjects in walk.walk( | 366 for top, fsobjects in walk.walk( |
| 348 root, | 367 root, |
| 361 # no mtime and no mode for symlinks | 380 # no mtime and no mode for symlinks |
| 362 dir_dgst.update( | 381 dir_dgst.update( |
| 363 b"%d:%s," | 382 b"%d:%s," |
| 364 % (len(linkdgst.digest()), linkdgst.digest())) | 383 % (len(linkdgst.digest()), linkdgst.digest())) |
| 365 opath = "/".join(top) + "/" + fso.name if top else fso.name | 384 opath = "/".join(top) + "/" + fso.name if top else fso.name |
| 366 outfp.write( | 385 if size_only: |
| 367 format_bsd_line( | 386 outfp.write( |
| 368 algorithm[1], | 387 format_bsd_line( |
| 369 linkdgst.digest(), | 388 "SIZE", |
| 370 "%s/./@" % (opath,), | 389 None, |
| 371 use_base64)) | 390 "%s/./@" % (opath,), |
| 391 False, | |
| 392 0)) | |
| 393 else: | |
| 394 outfp.write( | |
| 395 format_bsd_line( | |
| 396 algorithm[1], | |
| 397 linkdgst.digest(), | |
| 398 "%s/./@" % (opath,), | |
| 399 use_base64)) | |
| 372 outfp.flush() | 400 outfp.flush() |
| 373 continue | 401 continue |
| 374 # fetch from dir_digests | 402 # fetch from dir_digests |
| 375 dgst, dsz = dir_digests[top + (fso.name,)] | 403 dgst, dsz = dir_digests[top + (fso.name,)] |
| 376 dir_size += dsz | 404 dir_size += dsz |
| 377 dir_dgst.update(b"1:d,%d:%s," % (len(fso.fsname), fso.fsname)) | 405 dir_dgst.update(b"1:d,%d:%s," % (len(fso.fsname), fso.fsname)) |
| 378 dir_dgst.update( | 406 dir_dgst.update(b"%d:%s," % (len(dgst), dgst)) |
| 379 b"%d:%s," % (len(dgst), dgst)) | |
| 380 if with_metadata_full_mode: | 407 if with_metadata_full_mode: |
| 381 modestr = normalized_mode_str(fso.stat.st_mode) | 408 modestr = normalized_mode_str(fso.stat.st_mode) |
| 382 if not isinstance(modestr, bytes): | 409 if not isinstance(modestr, bytes): |
| 383 modestr = modestr.encode("ascii") | 410 modestr = modestr.encode("ascii") |
| 384 dir_dgst.update(b"8:fullmode,%d:%s," | 411 dir_dgst.update(b"8:fullmode,%d:%s," |
| 407 elif with_metadata_mode: | 434 elif with_metadata_mode: |
| 408 modestr = normalized_compatible_mode_str(fso.stat.st_mode) | 435 modestr = normalized_compatible_mode_str(fso.stat.st_mode) |
| 409 if not isinstance(modestr, bytes): | 436 if not isinstance(modestr, bytes): |
| 410 modestr = modestr.encode("ascii") | 437 modestr = modestr.encode("ascii") |
| 411 dir_dgst.update(b"4:mode,%d:%s," % (len(modestr), modestr)) | 438 dir_dgst.update(b"4:mode,%d:%s," % (len(modestr), modestr)) |
| 412 dgst = digest.compute_digest_file( | 439 if not size_only: |
| 413 algorithm[0], fso.path, use_mmap=use_mmap) | 440 dgst = digest.compute_digest_file( |
| 414 dir_dgst.update(b"%d:%s," % (len(dgst), dgst)) | 441 algorithm[0], fso.path, use_mmap=use_mmap) |
| 442 dir_dgst.update(b"%d:%s," % (len(dgst), dgst)) | |
| 415 opath = "/".join(top) + "/" + fso.name if top else fso.name | 443 opath = "/".join(top) + "/" + fso.name if top else fso.name |
| 416 if print_size: | 444 if size_only: |
| 417 outfp.write( | 445 outfp.write( |
| 418 format_bsd_line( | 446 format_bsd_line( |
| 419 algorithm[1], dgst, opath, use_base64, | 447 "SIZE", None, opath, False, fso.stat.st_size)) |
| 420 fso.stat.st_size)) | |
| 421 else: | 448 else: |
| 422 outfp.write( | 449 if print_size: |
| 423 format_bsd_line( | 450 outfp.write( |
| 424 algorithm[1], dgst, opath, use_base64)) | 451 format_bsd_line( |
| 452 algorithm[1], dgst, opath, use_base64, | |
| 453 fso.stat.st_size)) | |
| 454 else: | |
| 455 outfp.write( | |
| 456 format_bsd_line( | |
| 457 algorithm[1], dgst, opath, use_base64)) | |
| 425 outfp.flush() | 458 outfp.flush() |
| 426 opath = "/".join(top) + "/" if top else "" | 459 opath = "/".join(top) + "/" if top else "" |
| 427 if print_size: | 460 if size_only: |
| 428 outfp.write(format_bsd_line( | 461 outfp.write(format_bsd_line( |
| 429 algorithm[1], dir_dgst.digest(), opath, use_base64, dir_size)) | 462 "SIZE", None, opath, False, dir_size)) |
| 430 else: | 463 else: |
| 431 outfp.write(format_bsd_line( | 464 if print_size: |
| 432 algorithm[1], dir_dgst.digest(), opath, use_base64)) | 465 outfp.write(format_bsd_line( |
| 466 algorithm[1], dir_dgst.digest(), opath, | |
| 467 use_base64, dir_size)) | |
| 468 else: | |
| 469 outfp.write(format_bsd_line( | |
| 470 algorithm[1], dir_dgst.digest(), opath, use_base64)) | |
| 433 outfp.flush() | 471 outfp.flush() |
| 434 dir_digests[top] = (dir_dgst.digest(), dir_size) | 472 dir_digests[top] = (dir_dgst.digest(), dir_size) |
| 435 | 473 |
| 436 | 474 |
| 437 def normalized_compatible_mode_str(mode): | 475 def normalized_compatible_mode_str(mode): |
| 448 if not modestr.startswith("0"): | 486 if not modestr.startswith("0"): |
| 449 modestr = "0" + modestr | 487 modestr = "0" + modestr |
| 450 return modestr | 488 return modestr |
| 451 | 489 |
| 452 | 490 |
| 453 def format_bsd_line(digestname, value, filename, use_base64, size=None): | 491 def format_bsd_line(what, value, filename, use_base64, size=None): |
| 454 ls = os.linesep if isinstance(os.linesep, bytes) \ | 492 ls = os.linesep if isinstance(os.linesep, bytes) \ |
| 455 else os.linesep.encode("utf-8") | 493 else os.linesep.encode("utf-8") |
| 456 if not isinstance(digestname, bytes): | 494 if not isinstance(what, bytes): |
| 457 digestname = digestname.encode("ascii") | 495 what = what.encode("ascii") |
| 458 if digestname == b"TIMESTAMP": | 496 if what == b"TIMESTAMP": |
| 459 assert filename is None | 497 assert filename is None |
| 460 return b"TIMESTAMP = %d%s" % (value, ls) | 498 return b"TIMESTAMP = %d%s" % (value, ls) |
| 461 if digestname in (b"ISOTIMESTAMP", b"FLAGS", b"VERSION"): | 499 if what in (b"ISOTIMESTAMP", b"FLAGS", b"VERSION"): |
| 462 assert filename is None | 500 assert filename is None |
| 463 if not isinstance(value, bytes): | 501 if not isinstance(value, bytes): |
| 464 value = value.encode("ascii") | 502 value = value.encode("ascii") |
| 465 return b"%s = %s%s" % (digestname, value, ls) | 503 return b"%s = %s%s" % (what, value, ls) |
| 466 assert filename is not None | 504 assert filename is not None |
| 467 if digestname == b"COMMENT": | 505 if what == b"COMMENT": |
| 468 if not isinstance(filename, bytes): | 506 if not isinstance(filename, bytes): |
| 469 filename = filename.encode("utf-8") | 507 filename = filename.encode("utf-8") |
| 470 return b"COMMENT (%s)%s" % (filename, ls) | 508 return b"COMMENT (%s)%s" % (filename, ls) |
| 471 if not isinstance(filename, bytes): | 509 if not isinstance(filename, bytes): |
| 472 filename = util.fsencode(filename) | 510 filename = util.fsencode(filename) |
| 511 if what == b"SIZE": | |
| 512 return b"SIZE (%s) = %d%s" % (filename, size, ls) | |
| 473 if value is None: | 513 if value is None: |
| 474 return b"%s (%s)%s" % (digestname, filename, ls) | 514 return b"%s (%s)%s" % (what, filename, ls) |
| 475 if use_base64: | 515 if use_base64: |
| 476 value = base64.b64encode(value) | 516 value = base64.b64encode(value) |
| 477 else: | 517 else: |
| 478 value = binascii.hexlify(value) | 518 value = binascii.hexlify(value) |
| 479 if filename != b"./@": | 519 if filename != b"./@": |
| 480 filename = util.normalize_filename(filename, True) | 520 filename = util.normalize_filename(filename, True) |
| 481 if size is None: | 521 if size is None: |
| 482 return b"%s (%s) = %s%s" % (digestname, filename, value, ls) | 522 return b"%s (%s) = %s%s" % (what, filename, value, ls) |
| 483 else: | 523 else: |
| 484 return b"%s (%s) = %s,%d%s" % (digestname, filename, value, size, ls) | 524 return b"%s (%s) = %s,%d%s" % (what, filename, value, size, ls) |
| 485 | 525 |
| 486 | 526 |
| 487 if __name__ == "__main__": | 527 if __name__ == "__main__": |
| 488 sys.exit(main()) | 528 sys.exit(main()) |
