view cutils/treesum.py @ 177:089c40240061

Add an alternate implementation for generating directory tree digests: - Do not use something like os.walk() but use os.scandir() directly. - Recursively generate the subdirectory digests only when needed and in the right order. This fixes that the order of subdirectories in the output did not match the application order of its directory digests. The new implementation also should make filtering (that will be implemented later) easier. NOTE: The tree digests of the old and the new implementation are identical.
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 11 Jan 2025 17:41:28 +0100
parents 7f5d05a625fd
children dac26a2d9de5
line wrap: on
line source

# -*- coding: utf-8 -*-
# :-
# :Copyright: (c) 2020-2025 Franz Glasner
# :License:   BSD-3-Clause
# :-
r"""Generate and verify checksums for directory trees.

"""

from __future__ import print_function, absolute_import


__all__ = []


import argparse
import base64
import binascii
import datetime
import logging
import os
import stat
import sys
import time

from . import (__version__, __revision__)
from . import util
from .util import cm
from .util import digest
from .util import walk


def main(argv=None):

    def _populate_generate_arguments(gp):
        """Use to populate command aliases.

        This is because :class:`argparse.ArgumentParser` does not
        support them for all supported Python versions.

        """
        gp.add_argument(
            "--algorithm", "-a", action="store", type=util.argv2algo,
            help="1 (aka sha1), 224, 256 (aka sha256), 384, 512 (aka sha512), "
                 "3 (alias for sha3-512), 3-224, 3-256, 3-384, 3-512, "
                 "blake2b, blake2b-256, blake2s, "
                 "blake2 (alias for blake2b), "
                 "blake2-256 (alias for blake2b-256), "
                 "md5. "
                 "The default depends on availability in hashlib: "
                 "blake2b-256, sha256 or sha1.")
        gp.add_argument(
            "--append-output", action="store_true", dest="append_output",
            help="Append to the output file instead of overwriting it.")
        gp.add_argument(
            "--base64", action="store_true",
            help="Output checksums in base64 notation, not hexadecimal "
                 "(OpenBSD).")
        gp.add_argument(
            "--comment", action="append", default=[],
            help="Put given comment COMMENT into the output as \"COMMENT\". "
                 "Can be given more than once.")
        gp.add_argument(
            "--debug", action="store_true",
            help="Activate debug logging to stderr")
        gp.add_argument(
            "--follow-directory-symlinks", "-l", action="store_true",
            dest="follow_directory_symlinks",
            help="Follow symbolic links to directories when walking a "
                 "directory tree. Note that this is different from using "
                 "\"--logical\" or \"--physical\" for arguments given "
                 "directly on the command line")
        gp.add_argument(
            "--full-mode", action="store_true", dest="metadata_full_mode",
            help="Consider all mode bits as returned from stat(2) when "
                 "computing directory digests. "
                 "Note that mode bits on symbolic links itself are not "
                 "considered.")
        gp.add_argument(
            "--logical", "-L", dest="logical", action="store_true",
            default=None,
            help="Follow symbolic links given on command line arguments."
                 " Note that this is a different setting as to follow symbolic"
                 " links to directories when traversing a directory tree.")
        gp.add_argument(
            "--minimal", nargs="?", const="", default=None, metavar="TAG",
            help="Produce minimal output only. If a TAG is given and not "
                 "empty use it as the leading \"ROOT (<TAG>)\" output.")
        gp.add_argument(
            "--mmap", action="store_true", dest="mmap", default=None,
            help="Use mmap if available. Default is to determine "
                 "automatically from the filesize.")
        gp.add_argument(
            "--mode", action="store_true", dest="metadata_mode",
            help="Consider the permission bits of stat(2) using S_IMODE (i.e. "
                 "all bits without the filetype bits) when "
                 "computing directory digests. Note that mode bits on "
                 "symbolic links itself are not considered.")
        gp.add_argument(
            "--mtime", action="store_true", dest="metadata_mtime",
            help="Consider the mtime of files (non-directories) when "
                 "generating digests for directories. Digests for files are "
                 "not affected.")
        gp.add_argument(
            "--no-mmap", action="store_false", dest="mmap", default=None,
            help="Dont use mmap.")
        gp.add_argument(
            "--output", "-o", action="store", metavar="OUTPUT",
            help="Put the checksum into given file. "
                 "If not given or if it is given as `-' then stdout is used.")
        gp.add_argument(
            "--physical", "-P", dest="logical", action="store_false",
            default=None,
            help="Do not follow symbolic links given on comment line "
                 "arguments. This is the default.")
        gp.add_argument(
            "--print-size", action="store_true",
            help="""Print the size of a file or the accumulated sizes of
directory content into the output also.
The size is not considered when computing digests. For symbolic links
the size is not printed also.""")
        gp.add_argument(
            "--size-only", action="store_true",
            help="""Print only the size of files and for each directory its
accumulated directory size. Digests are not computed.""")
        gp.add_argument(
            "directories", nargs="*", metavar="DIRECTORY")

    parser = argparse.ArgumentParser(
        description="Generate and verify checksums for directory trees.",
        fromfile_prefix_chars='@',
        add_help=False)

    #
    # Global options for all sub-commands.
    # In a group because this allows a customized title.
    #
    gparser = parser.add_argument_group(title="Global Options")
    gparser.add_argument(
        "-v", "--version", action="version",
        version="%s (rv:%s)" % (__version__, __revision__),
        help="Show program's version number and exit")
    gparser.add_argument(
        "-h", "--help", action="help",
        help="Show this help message and exit")

    #
    # Subcommands
    #
    subparsers = parser.add_subparsers(
        dest="subcommand",
        title="Commands",
        description="This tool uses subcommands. "
                    "To see detailed help for a specific subcommand use "
                    "the -h/--help option after the subcommand name. "
                    "A list of valid commands and their short descriptions "
                    "is listed below:",
        metavar="COMMAND")

    genparser = subparsers.add_parser(
        "generate",
        help="Generate checksums for directory trees.",
        description="Generate checksums for directory trees")
    _populate_generate_arguments(genparser)
    # And an alias for "generate"
    genparser2 = subparsers.add_parser(
        "gen",
        help="Alias for \"generate\"",
        description="Generate checksums for directory trees. "
                    "This is an alias to \"generate\".")
    _populate_generate_arguments(genparser2)

    hparser = subparsers.add_parser(
        "help",
        help="Show this help message or a subcommand's help and exit",
        description="Show this help message or a subcommand's help and exit.")
    hparser.add_argument("help_command", nargs='?', metavar="COMMAND")

    vparser = subparsers.add_parser(
        "version",
        help="Show the program's version number and exit",
        description="Show the program's version number and exit.")

    # Parse leniently to just check for "version" and/or help
    opts, _dummy = parser.parse_known_args(args=argv)

    if opts.subcommand == "version":
        print("%s (rv:%s)" % (__version__, __revision__),
              file=sys.stdout)
        return 0
    if opts.subcommand == "help":
        if not opts.help_command:
            parser.print_help()
        else:
            if opts.help_command == "generate":
                genparser.print_help()
            elif opts.help_command == "gen":
                genparser2.print_help()
            elif opts.help_command == "version":
                vparser.print_help()
            elif opts.help_command == "help":
                hparser.print_help()
            else:
                parser.print_help()
        return 0

    # Reparse strictly
    opts = parser.parse_args(args=argv)

    # Minimal logging -- just for debugging - not for more "normal" use
    logging.basicConfig(
        level=logging.DEBUG if opts.debug else logging.WARNING,
        stream=sys.stderr,
        format="[%(asctime)s][%(levelname)s][%(process)d:%(name)s] %(message)s"
    )
    logging.captureWarnings(True)

    return treesum(opts)


def gen_generate_opts(directories=[],
                      algorithm=util.default_algotag(),
                      append_output=False,
                      base64=False,
                      comment=[],
                      follow_directory_symlinks=False,
                      full_mode=False,
                      logical=None,
                      minimal=None,
                      mode=False,
                      mmap=None,
                      mtime=False,
                      output=None,
                      print_size=False,
                      size_only=False):
    opts = argparse.Namespace(
        directories=directories,
        algorithm=util.argv2algo(algorithm),
        append_output=append_output,
        base64=base64,
        comment=comment,
        follow_directory_symlinks=follow_directory_symlinks,
        logical=logical,
        minimal=minimal,
        mmap=mmap,
        metadata_full_mode=full_mode,
        metadata_mode=mode,
        metadata_mtime=mtime,
        output=output,
        print_size=print_size,
        size_only=size_only)
    return opts


def treesum(opts):
    # XXX TBD: opts.check and opts.checklist (as in shasum.py)
    if opts.subcommand in ("generate", "gen"):
        return generate_treesum(opts)
    else:
        raise RuntimeError(
            "command `{}' not yet handled".format(opts.subcommand))


def generate_treesum(opts):
    # Provide defaults
    if not opts.algorithm:
        opts.algorithm = util.argv2algo(util.default_algotag())
    if not opts.directories:
        opts.directories.append(".")

    if opts.output is None or opts.output == "-":
        if hasattr(sys.stdout, "buffer"):
            out_cm = cm.nullcontext(sys.stdout.buffer)
        else:
            out_cm = cm.nullcontext(sys.stdout)
    else:
        if opts.append_output:
            out_cm = open(opts.output, "ab")
        else:
            out_cm = open(opts.output, "wb")

    with out_cm as outfp:
        for d in opts.directories:

            V1DirectoryTreesumGenerator(
                opts.algorithm, opts.mmap, opts.base64, opts.logical,
                opts.follow_directory_symlinks,
                opts.metadata_mode,
                opts.metadata_full_mode,
                opts.metadata_mtime,
                opts.size_only,
                opts.print_size,
                minimal=opts.minimal).generate(
                    outfp, d, comment=opts.comment)

            generate_treesum_for_directory(
                outfp, d, opts.algorithm, opts.mmap, opts.base64, opts.logical,
                opts.follow_directory_symlinks,
                opts.metadata_mode,
                opts.metadata_full_mode,
                opts.metadata_mtime,
                opts.size_only,
                opts.print_size,
                minimal=opts.minimal,
                comment=opts.comment)


class V1DirectoryTreesumGenerator(object):

    def __init__(self, algorithm, use_mmap, use_base64,
                 handle_root_logical, follow_directory_symlinks,
                 with_metadata_mode, with_metadata_full_mode,
                 with_metadata_mtime, size_only, print_size,
                 minimal=None,):
        super(V1DirectoryTreesumGenerator, self).__init__()
        self._algorithm = algorithm
        self._use_mmap = use_mmap
        self._use_base64 = use_base64
        self._handle_root_logical = handle_root_logical
        self._follow_directory_symlinks = follow_directory_symlinks
        self._with_metadata_mode = with_metadata_mode
        self._with_metadata_full_mode = with_metadata_full_mode
        self._with_metadata_mtime = with_metadata_mtime
        self._size_only = size_only
        self._print_size = print_size
        self._minimal = minimal

    def generate(self, outfp, root, comment=None):
        """

        :param outfp: a *binary* file with a "write()" and a "flush()" method

        """
        self._outfp = outfp
        self._outfp.write(format_bsd_line("VERSION", "1", None, False))
        self._outfp.flush()

        #
        # Note: Given non-default flags that are relevant for
        #       directory traversal.
        #
        flags = []
        if self._with_metadata_full_mode:
            flags.append("with-metadata-fullmode")
        elif self._with_metadata_mode:
            flags.append("with-metadata-mode")
        if self._with_metadata_mtime:
            flags.append("with-metadata-mtime")
        if self._handle_root_logical:
            flags.append("logical")
        if self._follow_directory_symlinks:
            flags.append("follow-directory-symlinks")
        if self._size_only:
            flags.append("size-only")
        else:
            if self._print_size:
                flags.append("print-size")
        if flags:
            flags.sort()
            self._outfp.write(
                format_bsd_line("FLAGS", ",".join(flags), None, False))

        if self._minimal is None:
            # Write execution timestamps in POSIX epoch and ISO format
            ts = int(time.time())
            self._outfp.write(format_bsd_line("TIMESTAMP", ts, None, False))
            ts = (datetime.datetime.utcfromtimestamp(ts)).isoformat("T")
            self._outfp.write(format_bsd_line("ISOTIMESTAMP", ts, None, False))

            if comment:
                for line in comment:
                    self._outfp.write(
                        format_bsd_line("COMMENT", None, line, False))

        if self._minimal is not None:
            self._outfp.write(format_bsd_line(
                "ROOT", None, self._minimal if self._minimal else "", False))
        else:
            self._outfp.write(format_bsd_line("ROOT", None, root, False))
        self._outfp.flush()

        if not self._handle_root_logical and os.path.islink(root):
            linktgt = util.fsencode(os.readlink(root))
            linkdgst = self._algorithm[0]()
            linkdgst.update(
                util.interpolate_bytes(b"%d:%s,", len(linktgt), linktgt))
            dir_dgst = self._algorithm[0]()
            dir_dgst.update(b"1:L,")
            dir_dgst.update(
                util.interpolate_bytes(
                    b"%d:%s,", len(linkdgst.digest()), linkdgst.digest()))
            if self._size_only:
                self._outfp.write(
                    format_bsd_line(
                        "SIZE",
                        None,
                        "./@",
                        False,
                        0))
            else:
                self._outfp.write(
                    format_bsd_line(
                        self._algorithm[1],
                        dir_dgst.digest(),
                        "./@",
                        self._use_base64))
            self._outfp.flush()
            return

        self._generate(os.path.normpath(root), tuple())

    def _generate(self, root, top):
        logging.debug("Handling %s/%r", root, top)
        path = os.path.join(root, *top) if top else root
        with walk.ScanDir(path) as dirscan:
            fsobjects = list(dirscan)
        fsobjects.sort(key=walk.WalkDirEntry.sort_key)
        dir_dgst = self._algorithm[0]()
        dir_size = 0
        for fso in fsobjects:
            if fso.is_dir:
                if fso.is_symlink and not self._follow_directory_symlinks:
                    linktgt = util.fsencode(os.readlink(fso.path))
                    linkdgst = self._algorithm[0]()
                    linkdgst.update(
                        util.interpolate_bytes(
                            b"%d:%s,", len(linktgt), linktgt))
                    dir_dgst.update(util.interpolate_bytes(
                        b"1:S,%d:%s,", len(fso.fsname), fso.fsname))
                    #
                    # - no mtime and no mode for symlinks
                    # - also does not count for dir_size
                    #
                    dir_dgst.update(util.interpolate_bytes(
                        b"%d:%s,",
                        len(linkdgst.digest()), linkdgst.digest()))
                    opath = "/".join(top) + "/" + fso.name if top else fso.name
                    if self._size_only:
                        self._outfp.write(format_bsd_line(
                            "SIZE", None, "%s/./@" % (opath,), False, 0))
                    else:
                        self._outfp.write(format_bsd_line(
                            self._algorithm[1],
                            linkdgst.digest(),
                            "%s/./@" % (opath,),
                            self._use_base64))
                    self._outfp.flush()
                else:
                    #
                    # Follow the symlink to dir or handle a "real" directory
                    #

                    # Get subdir data from recursing into it
                    sub_dir_dgst, sub_dir_size = self._generate(
                        root, top + (fso.name, ))

                    dir_size += sub_dir_size
                    dir_dgst.update(util.interpolate_bytes(
                        b"1:d,%d:%s,", len(fso.fsname), fso.fsname))
                    dir_dgst.update(util.interpolate_bytes(
                        b"%d:%s,", len(sub_dir_dgst), sub_dir_dgst))
                    if self._with_metadata_full_mode:
                        modestr = normalized_mode_str(fso.stat.st_mode)
                        if not isinstance(modestr, bytes):
                            modestr = modestr.encode("ascii")
                        dir_dgst.update(util.interpolate_bytes(
                            b"8:fullmode,%d:%s,", len(modestr), modestr))
                    elif self._with_metadata_mode:
                        modestr = normalized_compatible_mode_str(
                            fso.stat.st_mode)
                        if not isinstance(modestr, bytes):
                            modestr = modestr.encode("ascii")
                        dir_dgst.update(util.interpolate_bytes(
                            b"4:mode,%d:%s,", len(modestr), modestr))
            else:
                dir_dgst.update(util.interpolate_bytes(
                    b"1:f,%d:%s,", len(fso.fsname), fso.fsname))
                dir_size += fso.stat.st_size
                if self._with_metadata_mtime:
                    mtime = datetime.datetime.utcfromtimestamp(
                        int(fso.stat.st_mtime))
                    mtime = mtime.isoformat("T") + "Z"
                    if not isinstance(mtime, bytes):
                        mtime = mtime.encode("ascii")
                    dir_dgst.update(util.interpolate_bytes(
                        b"5:mtime,%d:%s,", len(mtime), mtime))
                if self._with_metadata_full_mode:
                    modestr = normalized_mode_str(fso.stat.st_mode)
                    if not isinstance(modestr, bytes):
                        modestr = modestr.encode("ascii")
                    dir_dgst.update(util.interpolate_bytes(
                        b"8:fullmode,%d:%s,", len(modestr), modestr))
                elif self._with_metadata_mode:
                    modestr = normalized_compatible_mode_str(fso.stat.st_mode)
                    if not isinstance(modestr, bytes):
                        modestr = modestr.encode("ascii")
                    dir_dgst.update(util.interpolate_bytes(
                        b"4:mode,%d:%s,", len(modestr), modestr))
                if not self._size_only:
                    dgst = digest.compute_digest_file(
                        self._algorithm[0], fso.path, use_mmap=self._use_mmap)
                    dir_dgst.update(util.interpolate_bytes(
                        b"%d:%s,", len(dgst), dgst))
                opath = "/".join(top) + "/" + fso.name if top else fso.name
                if self._size_only:
                    self._outfp.write(format_bsd_line(
                        "SIZE", None, opath, False, fso.stat.st_size))
                else:
                    if self._print_size:
                        self._outfp.write(format_bsd_line(
                            self._algorithm[1], dgst, opath, self._use_base64,
                            fso.stat.st_size))
                    else:
                        self._outfp.write(format_bsd_line(
                            self._algorithm[1], dgst, opath,
                            self._use_base64))
                self._outfp.flush()

        opath = "/".join(top) + "/" if top else ""
        if self._size_only:
            self._outfp.write(format_bsd_line(
                    "SIZE", None, opath, False, dir_size))
        else:
            if self._print_size:
                self._outfp.write(format_bsd_line(
                    self._algorithm[1], dir_dgst.digest(), opath,
                    self._use_base64, dir_size))
            else:
                self._outfp.write(format_bsd_line(
                    self._algorithm[1], dir_dgst.digest(), opath,
                    self._use_base64))
        self._outfp.flush()
        return (dir_dgst.digest(), dir_size)


def generate_treesum_for_directory(
        outfp, root, algorithm, use_mmap, use_base64, handle_root_logical,
        follow_directory_symlinks, with_metadata_mode, with_metadata_full_mode,
        with_metadata_mtime, size_only, print_size,
        minimal=None, comment=None):
    """

    :param outfp: a *binary* file with a "write()" and a "flush()" method

    """
    outfp.write(format_bsd_line("VERSION", "1", None, False))
    outfp.flush()

    # Note given non-default flags that are relevant for directory traversal
    flags = []
    if with_metadata_full_mode:
        flags.append("with-metadata-fullmode")
    elif with_metadata_mode:
        flags.append("with-metadata-mode")
    if with_metadata_mtime:
        flags.append("with-metadata-mtime")
    if handle_root_logical:
        flags.append("logical")
    if follow_directory_symlinks:
        flags.append("follow-directory-symlinks")
    if size_only:
        flags.append("size-only")
    else:
        if print_size:
            flags.append("print-size")
    if flags:
        flags.sort()
        outfp.write(format_bsd_line("FLAGS", ",".join(flags), None, False))
        outfp.flush()

    if minimal is None:
        # Write execution timestamps in POSIX epoch and ISO format
        ts = int(time.time())
        outfp.write(format_bsd_line("TIMESTAMP", ts, None, False))
        ts = (datetime.datetime.utcfromtimestamp(ts)).isoformat("T")
        outfp.write(format_bsd_line("ISOTIMESTAMP", ts, None, False))
        outfp.flush()

        if comment:
            for line in comment:
                outfp.write(format_bsd_line("COMMENT", None, line, False))

    if minimal is not None:
        outfp.write(
            format_bsd_line(
                "ROOT", None, minimal if minimal else "", False))
    else:
        outfp.write(format_bsd_line("ROOT", None, root, False))
    outfp.flush()

    dir_digests = {}

    if not handle_root_logical and os.path.islink(root):
        linktgt = util.fsencode(os.readlink(root))
        linkdgst = algorithm[0]()
        linkdgst.update(
            util.interpolate_bytes(b"%d:%s,", len(linktgt), linktgt))
        dir_dgst = algorithm[0]()
        dir_dgst.update(b"1:L,")
        dir_dgst.update(
            util.interpolate_bytes(
                b"%d:%s,", len(linkdgst.digest()), linkdgst.digest()))
        if size_only:
            outfp.write(
                format_bsd_line(
                    "SIZE",
                    None,
                    "./@",
                    False,
                    0))
        else:
            outfp.write(
                format_bsd_line(
                    algorithm[1],
                    dir_dgst.digest(),
                    "./@",
                    use_base64))
        outfp.flush()
        return

    for top, fsobjects in walk.walk(
            root,
            follow_symlinks=follow_directory_symlinks):
        dir_dgst = algorithm[0]()
        dir_size = 0

        for fso in fsobjects:
            if fso.is_dir:
                if fso.is_symlink and not follow_directory_symlinks:
                    linktgt = util.fsencode(os.readlink(fso.path))
                    linkdgst = algorithm[0]()
                    linkdgst.update(
                        util.interpolate_bytes(
                            b"%d:%s,", len(linktgt), linktgt))
                    dir_dgst.update(util.interpolate_bytes(
                        b"1:S,%d:%s,", len(fso.fsname), fso.fsname))
                    # no mtime and no mode for symlinks
                    dir_dgst.update(util.interpolate_bytes(
                        b"%d:%s,",
                        len(linkdgst.digest()), linkdgst.digest()))
                    opath = "/".join(top) + "/" + fso.name if top else fso.name
                    if size_only:
                        outfp.write(
                            format_bsd_line(
                                "SIZE",
                                None,
                                "%s/./@" % (opath,),
                                False,
                                0))
                    else:
                        outfp.write(
                            format_bsd_line(
                                algorithm[1],
                                linkdgst.digest(),
                                "%s/./@" % (opath,),
                                use_base64))
                    outfp.flush()
                    continue
                # fetch from dir_digests
                dgst, dsz = dir_digests[top + (fso.name,)]
                dir_size += dsz
                dir_dgst.update(util.interpolate_bytes(
                    b"1:d,%d:%s,", len(fso.fsname), fso.fsname))
                dir_dgst.update(util.interpolate_bytes(
                    b"%d:%s,", len(dgst), dgst))
                if with_metadata_full_mode:
                    modestr = normalized_mode_str(fso.stat.st_mode)
                    if not isinstance(modestr, bytes):
                        modestr = modestr.encode("ascii")
                    dir_dgst.update(util.interpolate_bytes(
                        b"8:fullmode,%d:%s,", len(modestr), modestr))
                elif with_metadata_mode:
                    modestr = normalized_compatible_mode_str(fso.stat.st_mode)
                    if not isinstance(modestr, bytes):
                        modestr = modestr.encode("ascii")
                    dir_dgst.update(util.interpolate_bytes(
                        b"4:mode,%d:%s,", len(modestr), modestr))
            else:
                dir_dgst.update(util.interpolate_bytes(
                    b"1:f,%d:%s,", len(fso.fsname), fso.fsname))
                dir_size += fso.stat.st_size
                if with_metadata_mtime:
                    mtime = datetime.datetime.utcfromtimestamp(
                        int(fso.stat.st_mtime))
                    mtime = mtime.isoformat("T") + "Z"
                    if not isinstance(mtime, bytes):
                        mtime = mtime.encode("ascii")
                    dir_dgst.update(util.interpolate_bytes(
                        b"5:mtime,%d:%s,", len(mtime), mtime))
                if with_metadata_full_mode:
                    modestr = normalized_mode_str(fso.stat.st_mode)
                    if not isinstance(modestr, bytes):
                        modestr = modestr.encode("ascii")
                    dir_dgst.update(util.interpolate_bytes(
                        b"8:fullmode,%d:%s,", len(modestr), modestr))
                elif with_metadata_mode:
                    modestr = normalized_compatible_mode_str(fso.stat.st_mode)
                    if not isinstance(modestr, bytes):
                        modestr = modestr.encode("ascii")
                    dir_dgst.update(util.interpolate_bytes(
                        b"4:mode,%d:%s,", len(modestr), modestr))
                if not size_only:
                    dgst = digest.compute_digest_file(
                        algorithm[0], fso.path, use_mmap=use_mmap)
                    dir_dgst.update(util.interpolate_bytes(
                        b"%d:%s,", len(dgst), dgst))
                opath = "/".join(top) + "/" + fso.name if top else fso.name
                if size_only:
                    outfp.write(
                        format_bsd_line(
                            "SIZE", None, opath, False, fso.stat.st_size))
                else:
                    if print_size:
                        outfp.write(
                            format_bsd_line(
                                algorithm[1], dgst, opath, use_base64,
                                fso.stat.st_size))
                    else:
                        outfp.write(
                            format_bsd_line(
                                algorithm[1], dgst, opath, use_base64))
                outfp.flush()
        opath = "/".join(top) + "/" if top else ""
        if size_only:
            outfp.write(format_bsd_line(
                    "SIZE", None, opath, False, dir_size))
        else:
            if print_size:
                outfp.write(format_bsd_line(
                    algorithm[1], dir_dgst.digest(), opath,
                    use_base64, dir_size))
            else:
                outfp.write(format_bsd_line(
                    algorithm[1], dir_dgst.digest(), opath, use_base64))
        outfp.flush()
        dir_digests[top] = (dir_dgst.digest(), dir_size)


def normalized_compatible_mode_str(mode):
    # XXX FIXME: Windows and "executable"
    modebits = stat.S_IMODE(mode)
    modestr = "%o" % (modebits,)
    if not modestr.startswith("0"):
        modestr = "0" + modestr
    return modestr


def normalized_mode_str(mode):
    modestr = "%o" % (mode,)
    if not modestr.startswith("0"):
        modestr = "0" + modestr
    return modestr


def format_bsd_line(what, value, filename, use_base64, size=None):
    ls = os.linesep if isinstance(os.linesep, bytes) \
        else os.linesep.encode("utf-8")
    if not isinstance(what, bytes):
        what = what.encode("ascii")
    if what == b"TIMESTAMP":
        assert filename is None
        return util.interpolate_bytes(b"TIMESTAMP = %d%s", value, ls)
    if what in (b"ISOTIMESTAMP", b"FLAGS", b"VERSION"):
        assert filename is None
        if not isinstance(value, bytes):
            value = value.encode("ascii")
        return util.interpolate_bytes(b"%s = %s%s", what, value, ls)
    assert filename is not None
    if what == b"COMMENT":
        if not isinstance(filename, bytes):
            filename = filename.encode("utf-8")
        return util.interpolate_bytes(b"COMMENT (%s)%s", filename, ls)
    if not isinstance(filename, bytes):
        filename = util.fsencode(filename)
    if what == b"SIZE":
        return util.interpolate_bytes(b"SIZE (%s) = %d%s", filename, size, ls)
    if value is None:
        return util.interpolate_bytes(b"%s (%s)%s", what, filename, ls)
    if use_base64:
        value = base64.b64encode(value)
    else:
        value = binascii.hexlify(value)
    if filename != b"./@":
        filename = util.normalize_filename(filename, True)
    if size is None:
        return util.interpolate_bytes(
            b"%s (%s) = %s%s", what, filename, value, ls)
    else:
        return util.interpolate_bytes(
            b"%s (%s) = %s,%d%s", what, filename, value, size, ls)


if __name__ == "__main__":
    sys.exit(main())