comparison cutils/treesum.py @ 170:8945be6b404e

Mode for treesum.py to print only the size of files and the accumulated size of a directory: --size-only. Digests are not computed.
author Franz Glasner <fzglas.hg@dom66.de>
date Fri, 10 Jan 2025 10:32:52 +0100
parents 91b8b2a8aebc
children 804a823c63f5
comparison
equal deleted inserted replaced
169:91b8b2a8aebc 170:8945be6b404e
111 "--print-size", action="store_true", 111 "--print-size", action="store_true",
112 help="""Print the size of a file or the accumulated sizes of 112 help="""Print the size of a file or the accumulated sizes of
113 directory content into the output also. 113 directory content into the output also.
114 The size is not considered when computing digests. For symbolic links 114 The size is not considered when computing digests. For symbolic links
115 the size is not printed also.""") 115 the size is not printed also.""")
116 gp.add_argument(
117 "--size-only", action="store_true",
118 help="""Print only the size of files and for each directory its
119 accumulated directory size. Digests are not computed.""")
116 gp.add_argument( 120 gp.add_argument(
117 "directories", nargs="*", metavar="DIRECTORY") 121 "directories", nargs="*", metavar="DIRECTORY")
118 122
119 parser = argparse.ArgumentParser( 123 parser = argparse.ArgumentParser(
120 description="Generate and verify checksums for directory trees.", 124 description="Generate and verify checksums for directory trees.",
211 minimal=None, 215 minimal=None,
212 mode=False, 216 mode=False,
213 mmap=None, 217 mmap=None,
214 mtime=False, 218 mtime=False,
215 output=None, 219 output=None,
216 print_size=False): 220 print_size=False,
221 size_only=False):
217 opts = argparse.Namespace( 222 opts = argparse.Namespace(
218 directories=directories, 223 directories=directories,
219 algorithm=(util.algotag2algotype(algorithm), 224 algorithm=(util.algotag2algotype(algorithm),
220 algorithm), 225 algorithm),
221 append_output=append_output, 226 append_output=append_output,
227 mmap=mmap, 232 mmap=mmap,
228 metadata_full_mode=full_mode, 233 metadata_full_mode=full_mode,
229 metadata_mode=mode, 234 metadata_mode=mode,
230 metadata_mtime=mtime, 235 metadata_mtime=mtime,
231 output=output, 236 output=output,
232 print_size=print_size) 237 print_size=print_size,
238 size_only=size_only)
233 return opts 239 return opts
234 240
235 241
236 def treesum(opts): 242 def treesum(opts):
237 # XXX TBD: opts.check and opts.checklist (as in shasum.py) 243 # XXX TBD: opts.check and opts.checklist (as in shasum.py)
266 outfp, d, opts.algorithm, opts.mmap, opts.base64, opts.logical, 272 outfp, d, opts.algorithm, opts.mmap, opts.base64, opts.logical,
267 opts.follow_directory_symlinks, 273 opts.follow_directory_symlinks,
268 opts.metadata_mode, 274 opts.metadata_mode,
269 opts.metadata_full_mode, 275 opts.metadata_full_mode,
270 opts.metadata_mtime, 276 opts.metadata_mtime,
277 opts.size_only,
271 opts.print_size, 278 opts.print_size,
272 minimal=opts.minimal, 279 minimal=opts.minimal,
273 comment=opts.comment) 280 comment=opts.comment)
274 281
275 282
276 def generate_treesum_for_directory( 283 def generate_treesum_for_directory(
277 outfp, root, algorithm, use_mmap, use_base64, handle_root_logical, 284 outfp, root, algorithm, use_mmap, use_base64, handle_root_logical,
278 follow_directory_symlinks, with_metadata_mode, with_metadata_full_mode, 285 follow_directory_symlinks, with_metadata_mode, with_metadata_full_mode,
279 with_metadata_mtime, print_size, 286 with_metadata_mtime, size_only, print_size,
280 minimal=None, comment=None): 287 minimal=None, comment=None):
281 """ 288 """
282 289
283 :param outfp: a *binary* file with a "write()" and a "flush()" method 290 :param outfp: a *binary* file with a "write()" and a "flush()" method
284 291
296 flags.append("with-metadata-mtime") 303 flags.append("with-metadata-mtime")
297 if handle_root_logical: 304 if handle_root_logical:
298 flags.append("logical") 305 flags.append("logical")
299 if follow_directory_symlinks: 306 if follow_directory_symlinks:
300 flags.append("follow-directory-symlinks") 307 flags.append("follow-directory-symlinks")
301 if print_size: 308 if size_only:
302 flags.append("print-size") 309 flags.append("size-only")
310 else:
311 if print_size:
312 flags.append("print-size")
303 if flags: 313 if flags:
304 flags.sort() 314 flags.sort()
305 outfp.write(format_bsd_line("FLAGS", ",".join(flags), None, False)) 315 outfp.write(format_bsd_line("FLAGS", ",".join(flags), None, False))
306 outfp.flush() 316 outfp.flush()
307 317
333 linkdgst.update(b"%d:%s," % (len(linktgt), linktgt)) 343 linkdgst.update(b"%d:%s," % (len(linktgt), linktgt))
334 dir_dgst = algorithm[0]() 344 dir_dgst = algorithm[0]()
335 dir_dgst.update(b"1:L,") 345 dir_dgst.update(b"1:L,")
336 dir_dgst.update( 346 dir_dgst.update(
337 b"%d:%s," % (len(linkdgst.digest()), linkdgst.digest())) 347 b"%d:%s," % (len(linkdgst.digest()), linkdgst.digest()))
338 outfp.write( 348 if size_only:
339 format_bsd_line( 349 outfp.write(
340 algorithm[1], 350 format_bsd_line(
341 dir_dgst.digest(), 351 "SIZE",
342 "./@", 352 None,
343 use_base64)) 353 "./@",
354 False,
355 0))
356 else:
357 outfp.write(
358 format_bsd_line(
359 algorithm[1],
360 dir_dgst.digest(),
361 "./@",
362 use_base64))
344 outfp.flush() 363 outfp.flush()
345 return 364 return
346 365
347 for top, fsobjects in walk.walk( 366 for top, fsobjects in walk.walk(
348 root, 367 root,
361 # no mtime and no mode for symlinks 380 # no mtime and no mode for symlinks
362 dir_dgst.update( 381 dir_dgst.update(
363 b"%d:%s," 382 b"%d:%s,"
364 % (len(linkdgst.digest()), linkdgst.digest())) 383 % (len(linkdgst.digest()), linkdgst.digest()))
365 opath = "/".join(top) + "/" + fso.name if top else fso.name 384 opath = "/".join(top) + "/" + fso.name if top else fso.name
366 outfp.write( 385 if size_only:
367 format_bsd_line( 386 outfp.write(
368 algorithm[1], 387 format_bsd_line(
369 linkdgst.digest(), 388 "SIZE",
370 "%s/./@" % (opath,), 389 None,
371 use_base64)) 390 "%s/./@" % (opath,),
391 False,
392 0))
393 else:
394 outfp.write(
395 format_bsd_line(
396 algorithm[1],
397 linkdgst.digest(),
398 "%s/./@" % (opath,),
399 use_base64))
372 outfp.flush() 400 outfp.flush()
373 continue 401 continue
374 # fetch from dir_digests 402 # fetch from dir_digests
375 dgst, dsz = dir_digests[top + (fso.name,)] 403 dgst, dsz = dir_digests[top + (fso.name,)]
376 dir_size += dsz 404 dir_size += dsz
377 dir_dgst.update(b"1:d,%d:%s," % (len(fso.fsname), fso.fsname)) 405 dir_dgst.update(b"1:d,%d:%s," % (len(fso.fsname), fso.fsname))
378 dir_dgst.update( 406 dir_dgst.update(b"%d:%s," % (len(dgst), dgst))
379 b"%d:%s," % (len(dgst), dgst))
380 if with_metadata_full_mode: 407 if with_metadata_full_mode:
381 modestr = normalized_mode_str(fso.stat.st_mode) 408 modestr = normalized_mode_str(fso.stat.st_mode)
382 if not isinstance(modestr, bytes): 409 if not isinstance(modestr, bytes):
383 modestr = modestr.encode("ascii") 410 modestr = modestr.encode("ascii")
384 dir_dgst.update(b"8:fullmode,%d:%s," 411 dir_dgst.update(b"8:fullmode,%d:%s,"
407 elif with_metadata_mode: 434 elif with_metadata_mode:
408 modestr = normalized_compatible_mode_str(fso.stat.st_mode) 435 modestr = normalized_compatible_mode_str(fso.stat.st_mode)
409 if not isinstance(modestr, bytes): 436 if not isinstance(modestr, bytes):
410 modestr = modestr.encode("ascii") 437 modestr = modestr.encode("ascii")
411 dir_dgst.update(b"4:mode,%d:%s," % (len(modestr), modestr)) 438 dir_dgst.update(b"4:mode,%d:%s," % (len(modestr), modestr))
412 dgst = digest.compute_digest_file( 439 if not size_only:
413 algorithm[0], fso.path, use_mmap=use_mmap) 440 dgst = digest.compute_digest_file(
414 dir_dgst.update(b"%d:%s," % (len(dgst), dgst)) 441 algorithm[0], fso.path, use_mmap=use_mmap)
442 dir_dgst.update(b"%d:%s," % (len(dgst), dgst))
415 opath = "/".join(top) + "/" + fso.name if top else fso.name 443 opath = "/".join(top) + "/" + fso.name if top else fso.name
416 if print_size: 444 if size_only:
417 outfp.write( 445 outfp.write(
418 format_bsd_line( 446 format_bsd_line(
419 algorithm[1], dgst, opath, use_base64, 447 "SIZE", None, opath, False, fso.stat.st_size))
420 fso.stat.st_size))
421 else: 448 else:
422 outfp.write( 449 if print_size:
423 format_bsd_line( 450 outfp.write(
424 algorithm[1], dgst, opath, use_base64)) 451 format_bsd_line(
452 algorithm[1], dgst, opath, use_base64,
453 fso.stat.st_size))
454 else:
455 outfp.write(
456 format_bsd_line(
457 algorithm[1], dgst, opath, use_base64))
425 outfp.flush() 458 outfp.flush()
426 opath = "/".join(top) + "/" if top else "" 459 opath = "/".join(top) + "/" if top else ""
427 if print_size: 460 if size_only:
428 outfp.write(format_bsd_line( 461 outfp.write(format_bsd_line(
429 algorithm[1], dir_dgst.digest(), opath, use_base64, dir_size)) 462 "SIZE", None, opath, False, dir_size))
430 else: 463 else:
431 outfp.write(format_bsd_line( 464 if print_size:
432 algorithm[1], dir_dgst.digest(), opath, use_base64)) 465 outfp.write(format_bsd_line(
466 algorithm[1], dir_dgst.digest(), opath,
467 use_base64, dir_size))
468 else:
469 outfp.write(format_bsd_line(
470 algorithm[1], dir_dgst.digest(), opath, use_base64))
433 outfp.flush() 471 outfp.flush()
434 dir_digests[top] = (dir_dgst.digest(), dir_size) 472 dir_digests[top] = (dir_dgst.digest(), dir_size)
435 473
436 474
437 def normalized_compatible_mode_str(mode): 475 def normalized_compatible_mode_str(mode):
448 if not modestr.startswith("0"): 486 if not modestr.startswith("0"):
449 modestr = "0" + modestr 487 modestr = "0" + modestr
450 return modestr 488 return modestr
451 489
452 490
453 def format_bsd_line(digestname, value, filename, use_base64, size=None): 491 def format_bsd_line(what, value, filename, use_base64, size=None):
454 ls = os.linesep if isinstance(os.linesep, bytes) \ 492 ls = os.linesep if isinstance(os.linesep, bytes) \
455 else os.linesep.encode("utf-8") 493 else os.linesep.encode("utf-8")
456 if not isinstance(digestname, bytes): 494 if not isinstance(what, bytes):
457 digestname = digestname.encode("ascii") 495 what = what.encode("ascii")
458 if digestname == b"TIMESTAMP": 496 if what == b"TIMESTAMP":
459 assert filename is None 497 assert filename is None
460 return b"TIMESTAMP = %d%s" % (value, ls) 498 return b"TIMESTAMP = %d%s" % (value, ls)
461 if digestname in (b"ISOTIMESTAMP", b"FLAGS", b"VERSION"): 499 if what in (b"ISOTIMESTAMP", b"FLAGS", b"VERSION"):
462 assert filename is None 500 assert filename is None
463 if not isinstance(value, bytes): 501 if not isinstance(value, bytes):
464 value = value.encode("ascii") 502 value = value.encode("ascii")
465 return b"%s = %s%s" % (digestname, value, ls) 503 return b"%s = %s%s" % (what, value, ls)
466 assert filename is not None 504 assert filename is not None
467 if digestname == b"COMMENT": 505 if what == b"COMMENT":
468 if not isinstance(filename, bytes): 506 if not isinstance(filename, bytes):
469 filename = filename.encode("utf-8") 507 filename = filename.encode("utf-8")
470 return b"COMMENT (%s)%s" % (filename, ls) 508 return b"COMMENT (%s)%s" % (filename, ls)
471 if not isinstance(filename, bytes): 509 if not isinstance(filename, bytes):
472 filename = util.fsencode(filename) 510 filename = util.fsencode(filename)
511 if what == b"SIZE":
512 return b"SIZE (%s) = %d%s" % (filename, size, ls)
473 if value is None: 513 if value is None:
474 return b"%s (%s)%s" % (digestname, filename, ls) 514 return b"%s (%s)%s" % (what, filename, ls)
475 if use_base64: 515 if use_base64:
476 value = base64.b64encode(value) 516 value = base64.b64encode(value)
477 else: 517 else:
478 value = binascii.hexlify(value) 518 value = binascii.hexlify(value)
479 if filename != b"./@": 519 if filename != b"./@":
480 filename = util.normalize_filename(filename, True) 520 filename = util.normalize_filename(filename, True)
481 if size is None: 521 if size is None:
482 return b"%s (%s) = %s%s" % (digestname, filename, value, ls) 522 return b"%s (%s) = %s%s" % (what, filename, value, ls)
483 else: 523 else:
484 return b"%s (%s) = %s,%d%s" % (digestname, filename, value, size, ls) 524 return b"%s (%s) = %s,%d%s" % (what, filename, value, size, ls)
485 525
486 526
487 if __name__ == "__main__": 527 if __name__ == "__main__":
488 sys.exit(main()) 528 sys.exit(main())