Mercurial > hgrepos > Python > libs > data-schema
diff data_schema/util.py @ 5:84dfd1a94926
Add the existing implementation.
All tests work.
The documentation as text file is included also.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Thu, 06 Jul 2023 23:41:41 +0200 |
| parents | |
| children | 2352d14ae261 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_schema/util.py Thu Jul 06 23:41:41 2023 +0200 @@ -0,0 +1,113 @@ +# -*- coding: utf-8 -*- +# :- +# :Copyright: (c) 2023 Franz Glasner +# :License: BSD-3-Clause. See LICENSE.txt for details. +# :- +r"""Some utility functions for use within the package. + +""" + +__all__ = ["get_data_stream"] + + +try: + from importlib import resources as il_resources +except ImportError: + il_resources = None + import pkg_resources + +import rfc3986 +import rfc3986.validators + + +def _is_safe_path(path): + if any(sep in path for sep in ('\\', ':')): + return False + if path.startswith("../"): + return False + if path.endswith("/.."): + return False + if "/../" in path: + return False + return True + + +def get_data_stream(uri, basedir=None, basepackage=None): + """ + + "data:" URIs are resolved as Python package resources for packages + `package`. by default this is the package where this module lives + in. + + "file:" URIs are resolved by prepending `basedir` to the URI path. + + "data:" URIs are resolve within "<basepackage>.packagedata". + + The returned stream needs to be closes as usual. + + """ + u = rfc3986.URIReference.from_string(uri).normalize() + if u.scheme == "data": + if u.authority or u.query or u.fragment: + raise ValueError("invalid data URI: authority, query and " + "fragment MUST be empty") + if not rfc3986.validators.path_is_valid(u.path, require=True): + raise ValueError("invalid or empty empty path within a data URI") + if u.path.find('%') >= 0: + raise ValueError("URI encoded paths not supported") + datapackage, sep, datapath = u.path.partition(':') + if sep: + if not datapackage: + datapackage = basepackage + if datapath.find(':') >= 0: + raise ValueError("colon in an URI's path not supported") + else: + datapackage = basepackage + datapath = u.path + # urllib3 normalizes to absolute paths: just to be sure + if "//" in datapath: + raise ValueError( + "URI path for the `data' scheme contains `//' substring") + if not datapath.startswith('/'): + if datapackage is None: + raise ValueError("missing the data package") + if il_resources: + datapath_parts = datapath.rsplit('/', 1) + datapath_dirs = datapath_parts[:-1] + datapath_file = datapath_parts[-1] + if datapath_dirs: + datapath_sep = '.' + else: + datapath_sep = '' + return il_resources.open_binary( + datapackage + '.packagedata' + datapath_sep + + '.'.join(datapath_dirs), # noqa: E131 + datapath_file) + else: + return pkg_resources.resource_stream( # noqa:E501 # pylint:disable=used-before-assignment + datapackage, "packagedata/" + datapath) + else: + raise ValueError( + "URI path for the `data' scheme must not be absolute") + elif u.scheme == "file": + if u.authority or u.query or u.fragment: + raise ValueError("invalid file URI: authority, query and " + "fragment MUST be empty") + if not rfc3986.validators.path_is_valid(u.path, require=True): + raise ValueError("invalid or empty empty path within a file URI") + if u.path.find('%') >= 0: + raise ValueError( + "percent-encoded paths not supported in data-stream file URI") + if not _is_safe_path(u.path): + raise ValueError("unsafe path in file URI is not supported") + if u.path.startswith('/'): + # resolve the file relative to the projectdir + if basedir is None: + raise TypeError("no base directory in `basedir' given") + return open("{}/{}".format(basedir.rstrip("/\\"), + u.path.lstrip('/')), + "rb") + else: + raise ValueError("relative file URI not handled") + else: + raise ValueError("scheme `{}' not supported".format(u.scheme))
