comparison cutils/treesum.py @ 168:bcc4441cf216

Implement "--print-size" to print file and accumulated directory sizes also. NOTE: - Resulting digests (file and directory) are NOT affected. - Symbolic links are not considered.
author Franz Glasner <fzglas.hg@dom66.de>
date Thu, 09 Jan 2025 18:02:46 +0100
parents df927ada9a37
children 91b8b2a8aebc
comparison
equal deleted inserted replaced
167:ffd14e2de130 168:bcc4441cf216
105 gp.add_argument( 105 gp.add_argument(
106 "--physical", "-P", dest="logical", action="store_false", 106 "--physical", "-P", dest="logical", action="store_false",
107 default=None, 107 default=None,
108 help="Do not follow symbolic links given on comment line " 108 help="Do not follow symbolic links given on comment line "
109 "arguments. This is the default.") 109 "arguments. This is the default.")
110 gp.add_argument(
111 "--print-size", action="store_true",
112 help="""Print the size of a file or the accumulated sizes of
113 directory content into the output also.
114 The size is not considered when computing digests. For symbolic links
115 the size is not printed also.""")
110 gp.add_argument( 116 gp.add_argument(
111 "directories", nargs="*", metavar="DIRECTORY") 117 "directories", nargs="*", metavar="DIRECTORY")
112 118
113 parser = argparse.ArgumentParser( 119 parser = argparse.ArgumentParser(
114 description="Generate and verify checksums for directory trees.", 120 description="Generate and verify checksums for directory trees.",
204 logical=None, 210 logical=None,
205 minimal=None, 211 minimal=None,
206 mode=False, 212 mode=False,
207 mmap=None, 213 mmap=None,
208 mtime=False, 214 mtime=False,
209 output=None): 215 output=None,
216 print_size=False):
210 opts = argparse.Namespace( 217 opts = argparse.Namespace(
211 directories=directories, 218 directories=directories,
212 algorithm=(util.algotag2algotype(algorithm), 219 algorithm=(util.algotag2algotype(algorithm),
213 algorithm), 220 algorithm),
214 append_output=append_output, 221 append_output=append_output,
219 minimal=minimal, 226 minimal=minimal,
220 mmap=mmap, 227 mmap=mmap,
221 metadata_full_mode=full_mode, 228 metadata_full_mode=full_mode,
222 metadata_mode=mode, 229 metadata_mode=mode,
223 metadata_mtime=mtime, 230 metadata_mtime=mtime,
224 output=output) 231 output=output,
232 print_size=print_size)
225 return opts 233 return opts
226 234
227 235
228 def treesum(opts): 236 def treesum(opts):
229 # XXX TBD: opts.check and opts.checklist (as in shasum.py) 237 # XXX TBD: opts.check and opts.checklist (as in shasum.py)
258 outfp, d, opts.algorithm, opts.mmap, opts.base64, opts.logical, 266 outfp, d, opts.algorithm, opts.mmap, opts.base64, opts.logical,
259 opts.follow_directory_symlinks, 267 opts.follow_directory_symlinks,
260 opts.metadata_mode, 268 opts.metadata_mode,
261 opts.metadata_full_mode, 269 opts.metadata_full_mode,
262 opts.metadata_mtime, 270 opts.metadata_mtime,
271 opts.print_size,
263 minimal=opts.minimal, 272 minimal=opts.minimal,
264 comment=opts.comment) 273 comment=opts.comment)
265 274
266 275
267 def generate_treesum_for_directory( 276 def generate_treesum_for_directory(
268 outfp, root, algorithm, use_mmap, use_base64, handle_root_logical, 277 outfp, root, algorithm, use_mmap, use_base64, handle_root_logical,
269 follow_directory_symlinks, with_metadata_mode, with_metadata_full_mode, 278 follow_directory_symlinks, with_metadata_mode, with_metadata_full_mode,
270 with_metadata_mtime, 279 with_metadata_mtime, print_size,
271 minimal=None, comment=None): 280 minimal=None, comment=None):
272 """ 281 """
273 282
274 :param outfp: a *binary* file with a "write()" and a "flush()" method 283 :param outfp: a *binary* file with a "write()" and a "flush()" method
275 284
287 flags.append("with-metadata-mtime") 296 flags.append("with-metadata-mtime")
288 if handle_root_logical: 297 if handle_root_logical:
289 flags.append("logical") 298 flags.append("logical")
290 if follow_directory_symlinks: 299 if follow_directory_symlinks:
291 flags.append("follow-directory-symlinks") 300 flags.append("follow-directory-symlinks")
301 if print_size:
302 flags.append("print-size")
292 if flags: 303 if flags:
293 outfp.write(format_bsd_line("FLAGS", ",".join(flags), None, False)) 304 outfp.write(format_bsd_line("FLAGS", ",".join(flags), None, False))
294 outfp.flush() 305 outfp.flush()
295 306
296 if minimal is None: 307 if minimal is None:
334 345
335 for top, fsobjects in walk.walk( 346 for top, fsobjects in walk.walk(
336 root, 347 root,
337 follow_symlinks=follow_directory_symlinks): 348 follow_symlinks=follow_directory_symlinks):
338 dir_dgst = algorithm[0]() 349 dir_dgst = algorithm[0]()
350 dir_size = 0
351
339 for fso in fsobjects: 352 for fso in fsobjects:
340 if fso.is_dir: 353 if fso.is_dir:
341 if fso.is_symlink and not follow_directory_symlinks: 354 if fso.is_symlink and not follow_directory_symlinks:
342 linktgt = util.fsencode(os.readlink(fso.path)) 355 linktgt = util.fsencode(os.readlink(fso.path))
343 linkdgst = algorithm[0]() 356 linkdgst = algorithm[0]()
356 "%s/./@" % (opath,), 369 "%s/./@" % (opath,),
357 use_base64)) 370 use_base64))
358 outfp.flush() 371 outfp.flush()
359 continue 372 continue
360 # fetch from dir_digests 373 # fetch from dir_digests
361 dgst = dir_digests[top + (fso.name,)] 374 dgst, dsz = dir_digests[top + (fso.name,)]
375 dir_size += dsz
362 dir_dgst.update(b"1:d,%d:%s," % (len(fso.fsname), fso.fsname)) 376 dir_dgst.update(b"1:d,%d:%s," % (len(fso.fsname), fso.fsname))
363 dir_dgst.update( 377 dir_dgst.update(
364 b"%d:%s," % (len(dgst), dgst)) 378 b"%d:%s," % (len(dgst), dgst))
365 if with_metadata_full_mode: 379 if with_metadata_full_mode:
366 modestr = normalized_mode_str(fso.stat.st_mode) 380 modestr = normalized_mode_str(fso.stat.st_mode)
373 if not isinstance(modestr, bytes): 387 if not isinstance(modestr, bytes):
374 modestr = modestr.encode("ascii") 388 modestr = modestr.encode("ascii")
375 dir_dgst.update(b"4:mode,%d:%s," % (len(modestr), modestr)) 389 dir_dgst.update(b"4:mode,%d:%s," % (len(modestr), modestr))
376 else: 390 else:
377 dir_dgst.update(b"1:f,%d:%s," % (len(fso.fsname), fso.fsname)) 391 dir_dgst.update(b"1:f,%d:%s," % (len(fso.fsname), fso.fsname))
392 dir_size += fso.stat.st_size
378 if with_metadata_mtime: 393 if with_metadata_mtime:
379 mtime = datetime.datetime.utcfromtimestamp( 394 mtime = datetime.datetime.utcfromtimestamp(
380 int(fso.stat.st_mtime)) 395 int(fso.stat.st_mtime))
381 mtime = mtime.isoformat("T") + "Z" 396 mtime = mtime.isoformat("T") + "Z"
382 if not isinstance(mtime, bytes): 397 if not isinstance(mtime, bytes):
395 dir_dgst.update(b"4:mode,%d:%s," % (len(modestr), modestr)) 410 dir_dgst.update(b"4:mode,%d:%s," % (len(modestr), modestr))
396 dgst = digest.compute_digest_file( 411 dgst = digest.compute_digest_file(
397 algorithm[0], fso.path, use_mmap=use_mmap) 412 algorithm[0], fso.path, use_mmap=use_mmap)
398 dir_dgst.update(b"%d:%s," % (len(dgst), dgst)) 413 dir_dgst.update(b"%d:%s," % (len(dgst), dgst))
399 opath = "/".join(top) + "/" + fso.name if top else fso.name 414 opath = "/".join(top) + "/" + fso.name if top else fso.name
400 outfp.write( 415 if print_size:
401 format_bsd_line( 416 outfp.write(
402 algorithm[1], dgst, opath, use_base64)) 417 format_bsd_line(
418 algorithm[1], dgst, opath, use_base64,
419 fso.stat.st_size))
420 else:
421 outfp.write(
422 format_bsd_line(
423 algorithm[1], dgst, opath, use_base64))
403 outfp.flush() 424 outfp.flush()
404 opath = "/".join(top) + "/" if top else "" 425 opath = "/".join(top) + "/" if top else ""
405 outfp.write(format_bsd_line( 426 if print_size:
406 algorithm[1], dir_dgst.digest(), opath, use_base64)) 427 outfp.write(format_bsd_line(
428 algorithm[1], dir_dgst.digest(), opath, use_base64, dir_size))
429 else:
430 outfp.write(format_bsd_line(
431 algorithm[1], dir_dgst.digest(), opath, use_base64))
407 outfp.flush() 432 outfp.flush()
408 dir_digests[top] = dir_dgst.digest() 433 dir_digests[top] = (dir_dgst.digest(), dir_size)
409 434
410 435
411 def normalized_compatible_mode_str(mode): 436 def normalized_compatible_mode_str(mode):
412 # XXX FIXME: Windows and "executable" 437 # XXX FIXME: Windows and "executable"
413 modebits = stat.S_IMODE(mode) 438 modebits = stat.S_IMODE(mode)
422 if not modestr.startswith("0"): 447 if not modestr.startswith("0"):
423 modestr = "0" + modestr 448 modestr = "0" + modestr
424 return modestr 449 return modestr
425 450
426 451
427 def format_bsd_line(digestname, value, filename, use_base64): 452 def format_bsd_line(digestname, value, filename, use_base64, size=None):
428 ls = os.linesep if isinstance(os.linesep, bytes) \ 453 ls = os.linesep if isinstance(os.linesep, bytes) \
429 else os.linesep.encode("utf-8") 454 else os.linesep.encode("utf-8")
430 if not isinstance(digestname, bytes): 455 if not isinstance(digestname, bytes):
431 digestname = digestname.encode("ascii") 456 digestname = digestname.encode("ascii")
432 if digestname == b"TIMESTAMP": 457 if digestname == b"TIMESTAMP":
450 value = base64.b64encode(value) 475 value = base64.b64encode(value)
451 else: 476 else:
452 value = binascii.hexlify(value) 477 value = binascii.hexlify(value)
453 if filename != b"./@": 478 if filename != b"./@":
454 filename = util.normalize_filename(filename, True) 479 filename = util.normalize_filename(filename, True)
455 return b"%s (%s) = %s%s" % (digestname, filename, value, ls) 480 if size is None:
481 return b"%s (%s) = %s%s" % (digestname, filename, value, ls)
482 else:
483 return b"%s (%s) = %s,%d%s" % (digestname, filename, value, size, ls)
456 484
457 485
458 if __name__ == "__main__": 486 if __name__ == "__main__":
459 sys.exit(main()) 487 sys.exit(main())