Mercurial > hgrepos > Python > libs > data-schema
comparison data_schema/util.py @ 5:84dfd1a94926
Add the existing implementation.
All tests work.
The documentation as text file is included also.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Thu, 06 Jul 2023 23:41:41 +0200 |
| parents | |
| children | 2352d14ae261 |
comparison
equal
deleted
inserted
replaced
| 4:d715f0c13c60 | 5:84dfd1a94926 |
|---|---|
| 1 # -*- coding: utf-8 -*- | |
| 2 # :- | |
| 3 # :Copyright: (c) 2023 Franz Glasner | |
| 4 # :License: BSD-3-Clause. See LICENSE.txt for details. | |
| 5 # :- | |
| 6 r"""Some utility functions for use within the package. | |
| 7 | |
| 8 """ | |
| 9 | |
| 10 __all__ = ["get_data_stream"] | |
| 11 | |
| 12 | |
| 13 try: | |
| 14 from importlib import resources as il_resources | |
| 15 except ImportError: | |
| 16 il_resources = None | |
| 17 import pkg_resources | |
| 18 | |
| 19 import rfc3986 | |
| 20 import rfc3986.validators | |
| 21 | |
| 22 | |
| 23 def _is_safe_path(path): | |
| 24 if any(sep in path for sep in ('\\', ':')): | |
| 25 return False | |
| 26 if path.startswith("../"): | |
| 27 return False | |
| 28 if path.endswith("/.."): | |
| 29 return False | |
| 30 if "/../" in path: | |
| 31 return False | |
| 32 return True | |
| 33 | |
| 34 | |
| 35 def get_data_stream(uri, basedir=None, basepackage=None): | |
| 36 """ | |
| 37 | |
| 38 "data:" URIs are resolved as Python package resources for packages | |
| 39 `package`. by default this is the package where this module lives | |
| 40 in. | |
| 41 | |
| 42 "file:" URIs are resolved by prepending `basedir` to the URI path. | |
| 43 | |
| 44 "data:" URIs are resolve within "<basepackage>.packagedata". | |
| 45 | |
| 46 The returned stream needs to be closes as usual. | |
| 47 | |
| 48 """ | |
| 49 u = rfc3986.URIReference.from_string(uri).normalize() | |
| 50 if u.scheme == "data": | |
| 51 if u.authority or u.query or u.fragment: | |
| 52 raise ValueError("invalid data URI: authority, query and " | |
| 53 "fragment MUST be empty") | |
| 54 if not rfc3986.validators.path_is_valid(u.path, require=True): | |
| 55 raise ValueError("invalid or empty empty path within a data URI") | |
| 56 if u.path.find('%') >= 0: | |
| 57 raise ValueError("URI encoded paths not supported") | |
| 58 datapackage, sep, datapath = u.path.partition(':') | |
| 59 if sep: | |
| 60 if not datapackage: | |
| 61 datapackage = basepackage | |
| 62 if datapath.find(':') >= 0: | |
| 63 raise ValueError("colon in an URI's path not supported") | |
| 64 else: | |
| 65 datapackage = basepackage | |
| 66 datapath = u.path | |
| 67 # urllib3 normalizes to absolute paths: just to be sure | |
| 68 if "//" in datapath: | |
| 69 raise ValueError( | |
| 70 "URI path for the `data' scheme contains `//' substring") | |
| 71 if not datapath.startswith('/'): | |
| 72 if datapackage is None: | |
| 73 raise ValueError("missing the data package") | |
| 74 if il_resources: | |
| 75 datapath_parts = datapath.rsplit('/', 1) | |
| 76 datapath_dirs = datapath_parts[:-1] | |
| 77 datapath_file = datapath_parts[-1] | |
| 78 if datapath_dirs: | |
| 79 datapath_sep = '.' | |
| 80 else: | |
| 81 datapath_sep = '' | |
| 82 return il_resources.open_binary( | |
| 83 datapackage + '.packagedata' + datapath_sep | |
| 84 + '.'.join(datapath_dirs), # noqa: E131 | |
| 85 datapath_file) | |
| 86 else: | |
| 87 return pkg_resources.resource_stream( # noqa:E501 # pylint:disable=used-before-assignment | |
| 88 datapackage, "packagedata/" + datapath) | |
| 89 else: | |
| 90 raise ValueError( | |
| 91 "URI path for the `data' scheme must not be absolute") | |
| 92 elif u.scheme == "file": | |
| 93 if u.authority or u.query or u.fragment: | |
| 94 raise ValueError("invalid file URI: authority, query and " | |
| 95 "fragment MUST be empty") | |
| 96 if not rfc3986.validators.path_is_valid(u.path, require=True): | |
| 97 raise ValueError("invalid or empty empty path within a file URI") | |
| 98 if u.path.find('%') >= 0: | |
| 99 raise ValueError( | |
| 100 "percent-encoded paths not supported in data-stream file URI") | |
| 101 if not _is_safe_path(u.path): | |
| 102 raise ValueError("unsafe path in file URI is not supported") | |
| 103 if u.path.startswith('/'): | |
| 104 # resolve the file relative to the projectdir | |
| 105 if basedir is None: | |
| 106 raise TypeError("no base directory in `basedir' given") | |
| 107 return open("{}/{}".format(basedir.rstrip("/\\"), | |
| 108 u.path.lstrip('/')), | |
| 109 "rb") | |
| 110 else: | |
| 111 raise ValueError("relative file URI not handled") | |
| 112 else: | |
| 113 raise ValueError("scheme `{}' not supported".format(u.scheme)) |
