view _postprocess-sdist.py @ 398:3beac9c85781 default tip

Syntax in pyproject.toml: use unescaped syntax
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 16 Feb 2026 15:36:17 +0100
parents c033f4072c14
children
line wrap: on
line source

# -*- coding: utf-8 -*-
# :-
# SPDX-FileCopyrightText: © 2025-2026 Franz Glasner
# SPDX-License-Identifier: BSD-3-Clause
# :-
"""Postprocress a .tar.gz-sdist to include tests/data with symlinks as symlinks.

Produce an sdist with all the data in :file:`tests/data/`::

  rm -rf dist py_cutils.egg-info
  python -m build
  python _postprocess-sdist.py

"""

from __future__ import print_function, absolute_import

try:
    import tomllib
except ImportError:
    import tomli as tomllib
import importlib
import os
import io
import tarfile
import gzip


def main():
    with open("pyproject.toml", "rb") as cfgfile:
        cfg = tomllib.load(cfgfile)
    project_name = cfg["project"]["name"]
    normalized_project_name = project_name.replace("-", "_")
    project_version = cfg["project"].get("version")
    if project_version is None:
        assert "version" in cfg["project"]["dynamic"]
        project_version = cfg["tool"]["setuptools"]["dynamic"]["version"]
        if "attr" in project_version:
            vermodname, dummy, vermodattr = (project_version["attr"]
                                             .strip()
                                             .rpartition('.'))
            assert dummy is not None and vermodattr is not None
            vermod = importlib.import_module(vermodname)
            project_version = getattr(vermod, vermodattr)
        else:
            assert False
    #
    # PEP 625 requires that sdists are of the form
    # <normalized_project_name>-<project_version>.tar.gz
    #
    archive_name = "{}-{}.tar.gz".format(
        normalized_project_name, project_version)
    uncompressed_archive_name = "{}-{}.tar".format(
        normalized_project_name, project_version)
    archive_path = "dist/" + archive_name
    uncompressed_archive_path = "dist/" + uncompressed_archive_name
    assert os.path.isfile(archive_path)
    assert not os.path.isfile(uncompressed_archive_path)

    # the directory within the archive
    archive_path_prefix = "{}-{}".format(
        normalized_project_name, project_version)

    egg_directory = "{}.egg-info".format(normalized_project_name)
    assert os.path.isdir(egg_directory)
    sources_txt_path = "{}/SOURCES.txt".format(egg_directory)
    sources_txt_arcname = "{}/{}/SOURCES.txt".format(
        archive_path_prefix,
        egg_directory)

    # Uncompress
    with gzip.GzipFile(filename=archive_path, mode="rb") as ca:
        with open(uncompressed_archive_path, "wb") as uca:
            while True:
                data = ca.read(64*1024)
                if not data:
                    break
                uca.write(data)

    with tarfile.TarFile(uncompressed_archive_path, "r") as tf:
        sf = tf.extractfile(sources_txt_arcname)
        try:
            sources_txt = sf.read()
        finally:
            sf.close()

    with tarfile.TarFile(uncompressed_archive_path, "a") as tf:
        arcname = "{}/tests/data".format(archive_path_prefix)
        try:
            info = tf.getmember(arcname)
        except KeyError:
            pass
        else:
            raise RuntimeError("already postprocessed")
        pre_names = set(tf.getnames())
        tf.add("tests/data", arcname=arcname, recursive=True)

        #
        # Determine the new files and symlinks that are to be added
        # to SOURCES.txt. Skip directories.
        #
        post_names = set(tf.getnames())
        new_names = list(post_names - pre_names)
        new_names.sort()
        new_sources = []

        for np in new_names:
            nn = np[len(archive_path_prefix)+1:]
            info = tf.getmember(np)
            if not info.isdir():
                new_sources.append(nn)

        # Augment SOURCES.txt and add it to the archive
        sources_info = tf.gettarinfo(
            sources_txt_path, arcname=sources_txt_arcname)
        sf = io.BytesIO()
        sf.write(sources_txt)
        if not sources_txt.endswith(b'\n'):
            sf.write(b'\n')
        sf.write(b('\n'.join(new_sources)))
        sources_info.size = len(sf.getvalue())
        sf.seek(0)
        #
        # This adds SOURCES.txt a 2nd time: this effectively overwrites
        # the "earlier" one.
        #
        tf.addfile(sources_info, sf)

    # Compress
    with open(uncompressed_archive_path, "rb") as uca:
        with open(archive_path, "wb") as ca:
            with gzip.GzipFile(filename=uncompressed_archive_name,
                               fileobj=ca,
                               mode="wb") as gzfile:
                while True:
                    data = uca.read(64*1024)
                    if not data:
                        break
                    gzfile.write(data)


def b(buf, encoding="ascii"):
    if isinstance(buf, bytes):
        return buf
    else:
        return buf.encode(encoding)


if __name__ == "__main__":
    main()