Mercurial > hgrepos > Python > libs > data-schema
view data_schema/util.py @ 34:9a3da5a008fc v0.2
+++++ v0.2
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Wed, 19 Jul 2023 09:19:46 +0200 |
| parents | 68286d27f27d |
| children |
line wrap: on
line source
# -*- coding: utf-8 -*- # :- # :Copyright: (c) 2023 Franz Glasner # :License: BSD-3-Clause. See LICENSE.txt for details. # :- r"""Some utility functions for use within the package. """ __all__ = ["get_data_stream"] try: from importlib import resources as il_resources except ImportError: il_resources = None import pkg_resources import rfc3986 import rfc3986.validators def _is_safe_path(path): if any(sep in path for sep in ('\\', ':')): return False if path.startswith("../"): return False if path.endswith("/.."): return False if "/../" in path: return False return True def get_data_stream(uri, basedir=None, basepackage=None, packagesubdir="packagedata"): """ "data:" URIs are resolved as Python package resources for packages `package`. by default this is the package where this module lives in. "file:" URIs are resolved by prepending `basedir` to the URI path. "data:" URIs are resolve within "<basepackage>.<packagesubdir>". The returned stream must support the context manager protocol. """ u = rfc3986.URIReference.from_string(uri).normalize() if u.scheme == "data": if u.authority or u.query or u.fragment: raise ValueError("invalid data URI: authority, query and " "fragment MUST be empty") if not rfc3986.validators.path_is_valid(u.path, require=True): raise ValueError("invalid or empty empty path within a data URI") if u.path.find('%') >= 0: raise ValueError("URI encoded paths not supported") datapackage, sep, datapath = u.path.partition(':') if sep: if not datapackage: datapackage = basepackage if datapath.find(':') >= 0: raise ValueError("colon in an URI's path not supported") else: datapackage = basepackage datapath = u.path # urllib3 normalizes to absolute paths: just to be sure if "//" in datapath: raise ValueError( "URI path for the `data' scheme contains `//' substring") if not datapath.startswith('/'): if datapackage is None: raise ValueError("missing the data package") if il_resources: datapath_parts = datapath.rsplit('/', 1) datapath_dirs = datapath_parts[:-1] datapath_file = datapath_parts[-1] if datapath_dirs: datapath_sep = '.' else: datapath_sep = '' if packagesubdir: psubdir = "." + packagesubdir else: psubdir = "" return il_resources.open_binary( datapackage + psubdir + datapath_sep + '.'.join(datapath_dirs), # noqa: E131 datapath_file) else: if packagesubdir: psubdir = packagesubdir + "/" else: psubdir = "" return pkg_resources.resource_stream( # noqa:E501 # pylint:disable=used-before-assignment datapackage, psubdir + datapath) else: raise ValueError( "URI path for the `data' scheme must not be absolute") elif u.scheme == "file": if u.authority or u.query or u.fragment: raise ValueError("invalid file URI: authority, query and " "fragment MUST be empty") if not rfc3986.validators.path_is_valid(u.path, require=True): raise ValueError("invalid or empty empty path within a file URI") if u.path.find('%') >= 0: raise ValueError( "percent-encoded paths not supported in data-stream file URI") if not _is_safe_path(u.path): raise ValueError("unsafe path in file URI is not supported") if u.path.startswith('/'): # resolve the file relative to the projectdir if basedir is None: raise TypeError("no base directory in `basedir' given") return open("{}/{}".format(basedir.rstrip("/\\"), u.path.lstrip('/')), "rb") else: raise ValueError("relative file URI not handled") else: raise ValueError("scheme `{}' not supported".format(u.scheme))
