view data_schema/util.py @ 8:2352d14ae261

Make the packagedata automatic subdir customizable
author Franz Glasner <fzglas.hg@dom66.de>
date Fri, 07 Jul 2023 00:32:30 +0200
parents 84dfd1a94926
children 0ef7141030ca
line wrap: on
line source

# -*- coding: utf-8 -*-
# :-
# :Copyright: (c) 2023 Franz Glasner
# :License: BSD-3-Clause. See LICENSE.txt for details.
# :-
r"""Some utility functions for use within the package.

"""

__all__ = ["get_data_stream"]


try:
    from importlib import resources as il_resources
except ImportError:
    il_resources = None
    import pkg_resources

import rfc3986
import rfc3986.validators


def _is_safe_path(path):
    if any(sep in path for sep in ('\\', ':')):
        return False
    if path.startswith("../"):
        return False
    if path.endswith("/.."):
        return False
    if "/../" in path:
        return False
    return True


def get_data_stream(uri, basedir=None,
                    basepackage=None, packagesubdir="packagedata"):
    """

    "data:" URIs are resolved as Python package resources for packages
    `package`.  by default this is the package where this module lives
    in.

    "file:" URIs are resolved by prepending `basedir` to the URI path.

    "data:" URIs are resolve within "<basepackage>.<packagesubdir>".

    The returned stream needs to be closes as usual.

    """
    u = rfc3986.URIReference.from_string(uri).normalize()
    if u.scheme == "data":
        if u.authority or u.query or u.fragment:
            raise ValueError("invalid data URI: authority, query and "
                             "fragment MUST be empty")
        if not rfc3986.validators.path_is_valid(u.path, require=True):
            raise ValueError("invalid or empty empty path within a data URI")
        if u.path.find('%') >= 0:
            raise ValueError("URI encoded paths not supported")
        datapackage, sep, datapath = u.path.partition(':')
        if sep:
            if not datapackage:
                datapackage = basepackage
            if datapath.find(':') >= 0:
                raise ValueError("colon in an URI's path not supported")
        else:
            datapackage = basepackage
            datapath = u.path
        # urllib3 normalizes to absolute paths: just to be sure
        if "//" in datapath:
            raise ValueError(
                "URI path for the `data' scheme contains `//' substring")
        if not datapath.startswith('/'):
            if datapackage is None:
                raise ValueError("missing the data package")
            if il_resources:
                datapath_parts = datapath.rsplit('/', 1)
                datapath_dirs = datapath_parts[:-1]
                datapath_file = datapath_parts[-1]
                if datapath_dirs:
                    datapath_sep = '.'
                else:
                    datapath_sep = ''
                if packagesubdir:
                    psubdir = "." + packagesubdir
                return il_resources.open_binary(
                    datapackage + psubdir + datapath_sep
                        + '.'.join(datapath_dirs),               # noqa: E131
                    datapath_file)
            else:
                if packagesubdir:
                    psubdir = packagesubdir + "/"
                return pkg_resources.resource_stream(  # noqa:E501    # pylint:disable=used-before-assignment
                    datapackage, psubdir + datapath)
        else:
            raise ValueError(
                "URI path for the `data' scheme must not be absolute")
    elif u.scheme == "file":
        if u.authority or u.query or u.fragment:
            raise ValueError("invalid file URI: authority, query and "
                             "fragment MUST be empty")
        if not rfc3986.validators.path_is_valid(u.path, require=True):
            raise ValueError("invalid or empty empty path within a file URI")
        if u.path.find('%') >= 0:
            raise ValueError(
                "percent-encoded paths not supported in data-stream file URI")
        if not _is_safe_path(u.path):
            raise ValueError("unsafe path in file URI is not supported")
        if u.path.startswith('/'):
            # resolve the file relative to the projectdir
            if basedir is None:
                raise TypeError("no base directory in `basedir' given")
            return open("{}/{}".format(basedir.rstrip("/\\"),
                                       u.path.lstrip('/')),
                        "rb")
        else:
            raise ValueError("relative file URI not handled")
    else:
        raise ValueError("scheme `{}' not supported".format(u.scheme))