annotate data_schema/util.py @ 29:68286d27f27d

FIX: Allow customization of the data stream loader (get_data_stream())
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 08 Jul 2023 16:10:36 +0200
parents 88ee7d1cc0bb
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
1 # -*- coding: utf-8 -*-
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
2 # :-
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
3 # :Copyright: (c) 2023 Franz Glasner
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
4 # :License: BSD-3-Clause. See LICENSE.txt for details.
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
5 # :-
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
6 r"""Some utility functions for use within the package.
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
7
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
8 """
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
9
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
10 __all__ = ["get_data_stream"]
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
11
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
12
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
13 try:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
14 from importlib import resources as il_resources
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
15 except ImportError:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
16 il_resources = None
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
17 import pkg_resources
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
18
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
19 import rfc3986
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
20 import rfc3986.validators
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
21
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
22
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
23 def _is_safe_path(path):
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
24 if any(sep in path for sep in ('\\', ':')):
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
25 return False
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
26 if path.startswith("../"):
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
27 return False
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
28 if path.endswith("/.."):
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
29 return False
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
30 if "/../" in path:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
31 return False
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
32 return True
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
33
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
34
8
2352d14ae261 Make the packagedata automatic subdir customizable
Franz Glasner <fzglas.hg@dom66.de>
parents: 5
diff changeset
35 def get_data_stream(uri, basedir=None,
2352d14ae261 Make the packagedata automatic subdir customizable
Franz Glasner <fzglas.hg@dom66.de>
parents: 5
diff changeset
36 basepackage=None, packagesubdir="packagedata"):
5
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
37 """
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
38
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
39 "data:" URIs are resolved as Python package resources for packages
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
40 `package`. by default this is the package where this module lives
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
41 in.
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
42
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
43 "file:" URIs are resolved by prepending `basedir` to the URI path.
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
44
8
2352d14ae261 Make the packagedata automatic subdir customizable
Franz Glasner <fzglas.hg@dom66.de>
parents: 5
diff changeset
45 "data:" URIs are resolve within "<basepackage>.<packagesubdir>".
5
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
46
27
88ee7d1cc0bb FIX: documentation of util.get_data_stream(): the returned value must support the context manager protocol
Franz Glasner <fzglas.hg@dom66.de>
parents: 26
diff changeset
47 The returned stream must support the context manager protocol.
5
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
48
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
49 """
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
50 u = rfc3986.URIReference.from_string(uri).normalize()
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
51 if u.scheme == "data":
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
52 if u.authority or u.query or u.fragment:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
53 raise ValueError("invalid data URI: authority, query and "
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
54 "fragment MUST be empty")
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
55 if not rfc3986.validators.path_is_valid(u.path, require=True):
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
56 raise ValueError("invalid or empty empty path within a data URI")
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
57 if u.path.find('%') >= 0:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
58 raise ValueError("URI encoded paths not supported")
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
59 datapackage, sep, datapath = u.path.partition(':')
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
60 if sep:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
61 if not datapackage:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
62 datapackage = basepackage
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
63 if datapath.find(':') >= 0:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
64 raise ValueError("colon in an URI's path not supported")
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
65 else:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
66 datapackage = basepackage
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
67 datapath = u.path
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
68 # urllib3 normalizes to absolute paths: just to be sure
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
69 if "//" in datapath:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
70 raise ValueError(
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
71 "URI path for the `data' scheme contains `//' substring")
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
72 if not datapath.startswith('/'):
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
73 if datapackage is None:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
74 raise ValueError("missing the data package")
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
75 if il_resources:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
76 datapath_parts = datapath.rsplit('/', 1)
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
77 datapath_dirs = datapath_parts[:-1]
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
78 datapath_file = datapath_parts[-1]
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
79 if datapath_dirs:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
80 datapath_sep = '.'
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
81 else:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
82 datapath_sep = ''
8
2352d14ae261 Make the packagedata automatic subdir customizable
Franz Glasner <fzglas.hg@dom66.de>
parents: 5
diff changeset
83 if packagesubdir:
2352d14ae261 Make the packagedata automatic subdir customizable
Franz Glasner <fzglas.hg@dom66.de>
parents: 5
diff changeset
84 psubdir = "." + packagesubdir
29
68286d27f27d FIX: Allow customization of the data stream loader (get_data_stream())
Franz Glasner <fzglas.hg@dom66.de>
parents: 27
diff changeset
85 else:
68286d27f27d FIX: Allow customization of the data stream loader (get_data_stream())
Franz Glasner <fzglas.hg@dom66.de>
parents: 27
diff changeset
86 psubdir = ""
5
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
87 return il_resources.open_binary(
8
2352d14ae261 Make the packagedata automatic subdir customizable
Franz Glasner <fzglas.hg@dom66.de>
parents: 5
diff changeset
88 datapackage + psubdir + datapath_sep
5
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
89 + '.'.join(datapath_dirs), # noqa: E131
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
90 datapath_file)
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
91 else:
8
2352d14ae261 Make the packagedata automatic subdir customizable
Franz Glasner <fzglas.hg@dom66.de>
parents: 5
diff changeset
92 if packagesubdir:
2352d14ae261 Make the packagedata automatic subdir customizable
Franz Glasner <fzglas.hg@dom66.de>
parents: 5
diff changeset
93 psubdir = packagesubdir + "/"
29
68286d27f27d FIX: Allow customization of the data stream loader (get_data_stream())
Franz Glasner <fzglas.hg@dom66.de>
parents: 27
diff changeset
94 else:
68286d27f27d FIX: Allow customization of the data stream loader (get_data_stream())
Franz Glasner <fzglas.hg@dom66.de>
parents: 27
diff changeset
95 psubdir = ""
5
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
96 return pkg_resources.resource_stream( # noqa:E501 # pylint:disable=used-before-assignment
8
2352d14ae261 Make the packagedata automatic subdir customizable
Franz Glasner <fzglas.hg@dom66.de>
parents: 5
diff changeset
97 datapackage, psubdir + datapath)
5
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
98 else:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
99 raise ValueError(
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
100 "URI path for the `data' scheme must not be absolute")
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
101 elif u.scheme == "file":
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
102 if u.authority or u.query or u.fragment:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
103 raise ValueError("invalid file URI: authority, query and "
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
104 "fragment MUST be empty")
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
105 if not rfc3986.validators.path_is_valid(u.path, require=True):
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
106 raise ValueError("invalid or empty empty path within a file URI")
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
107 if u.path.find('%') >= 0:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
108 raise ValueError(
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
109 "percent-encoded paths not supported in data-stream file URI")
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
110 if not _is_safe_path(u.path):
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
111 raise ValueError("unsafe path in file URI is not supported")
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
112 if u.path.startswith('/'):
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
113 # resolve the file relative to the projectdir
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
114 if basedir is None:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
115 raise TypeError("no base directory in `basedir' given")
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
116 return open("{}/{}".format(basedir.rstrip("/\\"),
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
117 u.path.lstrip('/')),
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
118 "rb")
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
119 else:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
120 raise ValueError("relative file URI not handled")
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
121 else:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
122 raise ValueError("scheme `{}' not supported".format(u.scheme))