Mercurial > hgrepos > Python > apps > py-cutils
comparison cutils/treesum.py @ 168:bcc4441cf216
Implement "--print-size" to print file and accumulated directory sizes also.
NOTE:
- Resulting digests (file and directory) are NOT affected.
- Symbolic links are not considered.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Thu, 09 Jan 2025 18:02:46 +0100 |
| parents | df927ada9a37 |
| children | 91b8b2a8aebc |
comparison
equal
deleted
inserted
replaced
| 167:ffd14e2de130 | 168:bcc4441cf216 |
|---|---|
| 105 gp.add_argument( | 105 gp.add_argument( |
| 106 "--physical", "-P", dest="logical", action="store_false", | 106 "--physical", "-P", dest="logical", action="store_false", |
| 107 default=None, | 107 default=None, |
| 108 help="Do not follow symbolic links given on comment line " | 108 help="Do not follow symbolic links given on comment line " |
| 109 "arguments. This is the default.") | 109 "arguments. This is the default.") |
| 110 gp.add_argument( | |
| 111 "--print-size", action="store_true", | |
| 112 help="""Print the size of a file or the accumulated sizes of | |
| 113 directory content into the output also. | |
| 114 The size is not considered when computing digests. For symbolic links | |
| 115 the size is not printed also.""") | |
| 110 gp.add_argument( | 116 gp.add_argument( |
| 111 "directories", nargs="*", metavar="DIRECTORY") | 117 "directories", nargs="*", metavar="DIRECTORY") |
| 112 | 118 |
| 113 parser = argparse.ArgumentParser( | 119 parser = argparse.ArgumentParser( |
| 114 description="Generate and verify checksums for directory trees.", | 120 description="Generate and verify checksums for directory trees.", |
| 204 logical=None, | 210 logical=None, |
| 205 minimal=None, | 211 minimal=None, |
| 206 mode=False, | 212 mode=False, |
| 207 mmap=None, | 213 mmap=None, |
| 208 mtime=False, | 214 mtime=False, |
| 209 output=None): | 215 output=None, |
| 216 print_size=False): | |
| 210 opts = argparse.Namespace( | 217 opts = argparse.Namespace( |
| 211 directories=directories, | 218 directories=directories, |
| 212 algorithm=(util.algotag2algotype(algorithm), | 219 algorithm=(util.algotag2algotype(algorithm), |
| 213 algorithm), | 220 algorithm), |
| 214 append_output=append_output, | 221 append_output=append_output, |
| 219 minimal=minimal, | 226 minimal=minimal, |
| 220 mmap=mmap, | 227 mmap=mmap, |
| 221 metadata_full_mode=full_mode, | 228 metadata_full_mode=full_mode, |
| 222 metadata_mode=mode, | 229 metadata_mode=mode, |
| 223 metadata_mtime=mtime, | 230 metadata_mtime=mtime, |
| 224 output=output) | 231 output=output, |
| 232 print_size=print_size) | |
| 225 return opts | 233 return opts |
| 226 | 234 |
| 227 | 235 |
| 228 def treesum(opts): | 236 def treesum(opts): |
| 229 # XXX TBD: opts.check and opts.checklist (as in shasum.py) | 237 # XXX TBD: opts.check and opts.checklist (as in shasum.py) |
| 258 outfp, d, opts.algorithm, opts.mmap, opts.base64, opts.logical, | 266 outfp, d, opts.algorithm, opts.mmap, opts.base64, opts.logical, |
| 259 opts.follow_directory_symlinks, | 267 opts.follow_directory_symlinks, |
| 260 opts.metadata_mode, | 268 opts.metadata_mode, |
| 261 opts.metadata_full_mode, | 269 opts.metadata_full_mode, |
| 262 opts.metadata_mtime, | 270 opts.metadata_mtime, |
| 271 opts.print_size, | |
| 263 minimal=opts.minimal, | 272 minimal=opts.minimal, |
| 264 comment=opts.comment) | 273 comment=opts.comment) |
| 265 | 274 |
| 266 | 275 |
| 267 def generate_treesum_for_directory( | 276 def generate_treesum_for_directory( |
| 268 outfp, root, algorithm, use_mmap, use_base64, handle_root_logical, | 277 outfp, root, algorithm, use_mmap, use_base64, handle_root_logical, |
| 269 follow_directory_symlinks, with_metadata_mode, with_metadata_full_mode, | 278 follow_directory_symlinks, with_metadata_mode, with_metadata_full_mode, |
| 270 with_metadata_mtime, | 279 with_metadata_mtime, print_size, |
| 271 minimal=None, comment=None): | 280 minimal=None, comment=None): |
| 272 """ | 281 """ |
| 273 | 282 |
| 274 :param outfp: a *binary* file with a "write()" and a "flush()" method | 283 :param outfp: a *binary* file with a "write()" and a "flush()" method |
| 275 | 284 |
| 287 flags.append("with-metadata-mtime") | 296 flags.append("with-metadata-mtime") |
| 288 if handle_root_logical: | 297 if handle_root_logical: |
| 289 flags.append("logical") | 298 flags.append("logical") |
| 290 if follow_directory_symlinks: | 299 if follow_directory_symlinks: |
| 291 flags.append("follow-directory-symlinks") | 300 flags.append("follow-directory-symlinks") |
| 301 if print_size: | |
| 302 flags.append("print-size") | |
| 292 if flags: | 303 if flags: |
| 293 outfp.write(format_bsd_line("FLAGS", ",".join(flags), None, False)) | 304 outfp.write(format_bsd_line("FLAGS", ",".join(flags), None, False)) |
| 294 outfp.flush() | 305 outfp.flush() |
| 295 | 306 |
| 296 if minimal is None: | 307 if minimal is None: |
| 334 | 345 |
| 335 for top, fsobjects in walk.walk( | 346 for top, fsobjects in walk.walk( |
| 336 root, | 347 root, |
| 337 follow_symlinks=follow_directory_symlinks): | 348 follow_symlinks=follow_directory_symlinks): |
| 338 dir_dgst = algorithm[0]() | 349 dir_dgst = algorithm[0]() |
| 350 dir_size = 0 | |
| 351 | |
| 339 for fso in fsobjects: | 352 for fso in fsobjects: |
| 340 if fso.is_dir: | 353 if fso.is_dir: |
| 341 if fso.is_symlink and not follow_directory_symlinks: | 354 if fso.is_symlink and not follow_directory_symlinks: |
| 342 linktgt = util.fsencode(os.readlink(fso.path)) | 355 linktgt = util.fsencode(os.readlink(fso.path)) |
| 343 linkdgst = algorithm[0]() | 356 linkdgst = algorithm[0]() |
| 356 "%s/./@" % (opath,), | 369 "%s/./@" % (opath,), |
| 357 use_base64)) | 370 use_base64)) |
| 358 outfp.flush() | 371 outfp.flush() |
| 359 continue | 372 continue |
| 360 # fetch from dir_digests | 373 # fetch from dir_digests |
| 361 dgst = dir_digests[top + (fso.name,)] | 374 dgst, dsz = dir_digests[top + (fso.name,)] |
| 375 dir_size += dsz | |
| 362 dir_dgst.update(b"1:d,%d:%s," % (len(fso.fsname), fso.fsname)) | 376 dir_dgst.update(b"1:d,%d:%s," % (len(fso.fsname), fso.fsname)) |
| 363 dir_dgst.update( | 377 dir_dgst.update( |
| 364 b"%d:%s," % (len(dgst), dgst)) | 378 b"%d:%s," % (len(dgst), dgst)) |
| 365 if with_metadata_full_mode: | 379 if with_metadata_full_mode: |
| 366 modestr = normalized_mode_str(fso.stat.st_mode) | 380 modestr = normalized_mode_str(fso.stat.st_mode) |
| 373 if not isinstance(modestr, bytes): | 387 if not isinstance(modestr, bytes): |
| 374 modestr = modestr.encode("ascii") | 388 modestr = modestr.encode("ascii") |
| 375 dir_dgst.update(b"4:mode,%d:%s," % (len(modestr), modestr)) | 389 dir_dgst.update(b"4:mode,%d:%s," % (len(modestr), modestr)) |
| 376 else: | 390 else: |
| 377 dir_dgst.update(b"1:f,%d:%s," % (len(fso.fsname), fso.fsname)) | 391 dir_dgst.update(b"1:f,%d:%s," % (len(fso.fsname), fso.fsname)) |
| 392 dir_size += fso.stat.st_size | |
| 378 if with_metadata_mtime: | 393 if with_metadata_mtime: |
| 379 mtime = datetime.datetime.utcfromtimestamp( | 394 mtime = datetime.datetime.utcfromtimestamp( |
| 380 int(fso.stat.st_mtime)) | 395 int(fso.stat.st_mtime)) |
| 381 mtime = mtime.isoformat("T") + "Z" | 396 mtime = mtime.isoformat("T") + "Z" |
| 382 if not isinstance(mtime, bytes): | 397 if not isinstance(mtime, bytes): |
| 395 dir_dgst.update(b"4:mode,%d:%s," % (len(modestr), modestr)) | 410 dir_dgst.update(b"4:mode,%d:%s," % (len(modestr), modestr)) |
| 396 dgst = digest.compute_digest_file( | 411 dgst = digest.compute_digest_file( |
| 397 algorithm[0], fso.path, use_mmap=use_mmap) | 412 algorithm[0], fso.path, use_mmap=use_mmap) |
| 398 dir_dgst.update(b"%d:%s," % (len(dgst), dgst)) | 413 dir_dgst.update(b"%d:%s," % (len(dgst), dgst)) |
| 399 opath = "/".join(top) + "/" + fso.name if top else fso.name | 414 opath = "/".join(top) + "/" + fso.name if top else fso.name |
| 400 outfp.write( | 415 if print_size: |
| 401 format_bsd_line( | 416 outfp.write( |
| 402 algorithm[1], dgst, opath, use_base64)) | 417 format_bsd_line( |
| 418 algorithm[1], dgst, opath, use_base64, | |
| 419 fso.stat.st_size)) | |
| 420 else: | |
| 421 outfp.write( | |
| 422 format_bsd_line( | |
| 423 algorithm[1], dgst, opath, use_base64)) | |
| 403 outfp.flush() | 424 outfp.flush() |
| 404 opath = "/".join(top) + "/" if top else "" | 425 opath = "/".join(top) + "/" if top else "" |
| 405 outfp.write(format_bsd_line( | 426 if print_size: |
| 406 algorithm[1], dir_dgst.digest(), opath, use_base64)) | 427 outfp.write(format_bsd_line( |
| 428 algorithm[1], dir_dgst.digest(), opath, use_base64, dir_size)) | |
| 429 else: | |
| 430 outfp.write(format_bsd_line( | |
| 431 algorithm[1], dir_dgst.digest(), opath, use_base64)) | |
| 407 outfp.flush() | 432 outfp.flush() |
| 408 dir_digests[top] = dir_dgst.digest() | 433 dir_digests[top] = (dir_dgst.digest(), dir_size) |
| 409 | 434 |
| 410 | 435 |
| 411 def normalized_compatible_mode_str(mode): | 436 def normalized_compatible_mode_str(mode): |
| 412 # XXX FIXME: Windows and "executable" | 437 # XXX FIXME: Windows and "executable" |
| 413 modebits = stat.S_IMODE(mode) | 438 modebits = stat.S_IMODE(mode) |
| 422 if not modestr.startswith("0"): | 447 if not modestr.startswith("0"): |
| 423 modestr = "0" + modestr | 448 modestr = "0" + modestr |
| 424 return modestr | 449 return modestr |
| 425 | 450 |
| 426 | 451 |
| 427 def format_bsd_line(digestname, value, filename, use_base64): | 452 def format_bsd_line(digestname, value, filename, use_base64, size=None): |
| 428 ls = os.linesep if isinstance(os.linesep, bytes) \ | 453 ls = os.linesep if isinstance(os.linesep, bytes) \ |
| 429 else os.linesep.encode("utf-8") | 454 else os.linesep.encode("utf-8") |
| 430 if not isinstance(digestname, bytes): | 455 if not isinstance(digestname, bytes): |
| 431 digestname = digestname.encode("ascii") | 456 digestname = digestname.encode("ascii") |
| 432 if digestname == b"TIMESTAMP": | 457 if digestname == b"TIMESTAMP": |
| 450 value = base64.b64encode(value) | 475 value = base64.b64encode(value) |
| 451 else: | 476 else: |
| 452 value = binascii.hexlify(value) | 477 value = binascii.hexlify(value) |
| 453 if filename != b"./@": | 478 if filename != b"./@": |
| 454 filename = util.normalize_filename(filename, True) | 479 filename = util.normalize_filename(filename, True) |
| 455 return b"%s (%s) = %s%s" % (digestname, filename, value, ls) | 480 if size is None: |
| 481 return b"%s (%s) = %s%s" % (digestname, filename, value, ls) | |
| 482 else: | |
| 483 return b"%s (%s) = %s,%d%s" % (digestname, filename, value, size, ls) | |
| 456 | 484 |
| 457 | 485 |
| 458 if __name__ == "__main__": | 486 if __name__ == "__main__": |
| 459 sys.exit(main()) | 487 sys.exit(main()) |
