comparison data_schema/util.py @ 5:84dfd1a94926

Add the existing implementation. All tests work. The documentation as text file is included also.
author Franz Glasner <fzglas.hg@dom66.de>
date Thu, 06 Jul 2023 23:41:41 +0200
parents
children 2352d14ae261
comparison
equal deleted inserted replaced
4:d715f0c13c60 5:84dfd1a94926
1 # -*- coding: utf-8 -*-
2 # :-
3 # :Copyright: (c) 2023 Franz Glasner
4 # :License: BSD-3-Clause. See LICENSE.txt for details.
5 # :-
6 r"""Some utility functions for use within the package.
7
8 """
9
10 __all__ = ["get_data_stream"]
11
12
13 try:
14 from importlib import resources as il_resources
15 except ImportError:
16 il_resources = None
17 import pkg_resources
18
19 import rfc3986
20 import rfc3986.validators
21
22
23 def _is_safe_path(path):
24 if any(sep in path for sep in ('\\', ':')):
25 return False
26 if path.startswith("../"):
27 return False
28 if path.endswith("/.."):
29 return False
30 if "/../" in path:
31 return False
32 return True
33
34
35 def get_data_stream(uri, basedir=None, basepackage=None):
36 """
37
38 "data:" URIs are resolved as Python package resources for packages
39 `package`. by default this is the package where this module lives
40 in.
41
42 "file:" URIs are resolved by prepending `basedir` to the URI path.
43
44 "data:" URIs are resolve within "<basepackage>.packagedata".
45
46 The returned stream needs to be closes as usual.
47
48 """
49 u = rfc3986.URIReference.from_string(uri).normalize()
50 if u.scheme == "data":
51 if u.authority or u.query or u.fragment:
52 raise ValueError("invalid data URI: authority, query and "
53 "fragment MUST be empty")
54 if not rfc3986.validators.path_is_valid(u.path, require=True):
55 raise ValueError("invalid or empty empty path within a data URI")
56 if u.path.find('%') >= 0:
57 raise ValueError("URI encoded paths not supported")
58 datapackage, sep, datapath = u.path.partition(':')
59 if sep:
60 if not datapackage:
61 datapackage = basepackage
62 if datapath.find(':') >= 0:
63 raise ValueError("colon in an URI's path not supported")
64 else:
65 datapackage = basepackage
66 datapath = u.path
67 # urllib3 normalizes to absolute paths: just to be sure
68 if "//" in datapath:
69 raise ValueError(
70 "URI path for the `data' scheme contains `//' substring")
71 if not datapath.startswith('/'):
72 if datapackage is None:
73 raise ValueError("missing the data package")
74 if il_resources:
75 datapath_parts = datapath.rsplit('/', 1)
76 datapath_dirs = datapath_parts[:-1]
77 datapath_file = datapath_parts[-1]
78 if datapath_dirs:
79 datapath_sep = '.'
80 else:
81 datapath_sep = ''
82 return il_resources.open_binary(
83 datapackage + '.packagedata' + datapath_sep
84 + '.'.join(datapath_dirs), # noqa: E131
85 datapath_file)
86 else:
87 return pkg_resources.resource_stream( # noqa:E501 # pylint:disable=used-before-assignment
88 datapackage, "packagedata/" + datapath)
89 else:
90 raise ValueError(
91 "URI path for the `data' scheme must not be absolute")
92 elif u.scheme == "file":
93 if u.authority or u.query or u.fragment:
94 raise ValueError("invalid file URI: authority, query and "
95 "fragment MUST be empty")
96 if not rfc3986.validators.path_is_valid(u.path, require=True):
97 raise ValueError("invalid or empty empty path within a file URI")
98 if u.path.find('%') >= 0:
99 raise ValueError(
100 "percent-encoded paths not supported in data-stream file URI")
101 if not _is_safe_path(u.path):
102 raise ValueError("unsafe path in file URI is not supported")
103 if u.path.startswith('/'):
104 # resolve the file relative to the projectdir
105 if basedir is None:
106 raise TypeError("no base directory in `basedir' given")
107 return open("{}/{}".format(basedir.rstrip("/\\"),
108 u.path.lstrip('/')),
109 "rb")
110 else:
111 raise ValueError("relative file URI not handled")
112 else:
113 raise ValueError("scheme `{}' not supported".format(u.scheme))