annotate data_schema/util.py @ 6:fc18f1cb3309

FIX: also ignore the metadata directory with the normalized named (using "-")
author Franz Glasner <fzglas.hg@dom66.de>
date Fri, 07 Jul 2023 00:00:28 +0200
parents 84dfd1a94926
children 2352d14ae261
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
1 # -*- coding: utf-8 -*-
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
2 # :-
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
3 # :Copyright: (c) 2023 Franz Glasner
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
4 # :License: BSD-3-Clause. See LICENSE.txt for details.
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
5 # :-
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
6 r"""Some utility functions for use within the package.
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
7
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
8 """
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
9
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
10 __all__ = ["get_data_stream"]
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
11
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
12
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
13 try:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
14 from importlib import resources as il_resources
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
15 except ImportError:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
16 il_resources = None
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
17 import pkg_resources
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
18
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
19 import rfc3986
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
20 import rfc3986.validators
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
21
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
22
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
23 def _is_safe_path(path):
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
24 if any(sep in path for sep in ('\\', ':')):
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
25 return False
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
26 if path.startswith("../"):
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
27 return False
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
28 if path.endswith("/.."):
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
29 return False
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
30 if "/../" in path:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
31 return False
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
32 return True
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
33
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
34
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
35 def get_data_stream(uri, basedir=None, basepackage=None):
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
36 """
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
37
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
38 "data:" URIs are resolved as Python package resources for packages
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
39 `package`. by default this is the package where this module lives
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
40 in.
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
41
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
42 "file:" URIs are resolved by prepending `basedir` to the URI path.
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
43
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
44 "data:" URIs are resolve within "<basepackage>.packagedata".
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
45
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
46 The returned stream needs to be closes as usual.
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
47
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
48 """
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
49 u = rfc3986.URIReference.from_string(uri).normalize()
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
50 if u.scheme == "data":
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
51 if u.authority or u.query or u.fragment:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
52 raise ValueError("invalid data URI: authority, query and "
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
53 "fragment MUST be empty")
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
54 if not rfc3986.validators.path_is_valid(u.path, require=True):
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
55 raise ValueError("invalid or empty empty path within a data URI")
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
56 if u.path.find('%') >= 0:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
57 raise ValueError("URI encoded paths not supported")
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
58 datapackage, sep, datapath = u.path.partition(':')
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
59 if sep:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
60 if not datapackage:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
61 datapackage = basepackage
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
62 if datapath.find(':') >= 0:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
63 raise ValueError("colon in an URI's path not supported")
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
64 else:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
65 datapackage = basepackage
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
66 datapath = u.path
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
67 # urllib3 normalizes to absolute paths: just to be sure
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
68 if "//" in datapath:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
69 raise ValueError(
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
70 "URI path for the `data' scheme contains `//' substring")
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
71 if not datapath.startswith('/'):
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
72 if datapackage is None:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
73 raise ValueError("missing the data package")
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
74 if il_resources:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
75 datapath_parts = datapath.rsplit('/', 1)
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
76 datapath_dirs = datapath_parts[:-1]
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
77 datapath_file = datapath_parts[-1]
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
78 if datapath_dirs:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
79 datapath_sep = '.'
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
80 else:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
81 datapath_sep = ''
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
82 return il_resources.open_binary(
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
83 datapackage + '.packagedata' + datapath_sep
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
84 + '.'.join(datapath_dirs), # noqa: E131
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
85 datapath_file)
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
86 else:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
87 return pkg_resources.resource_stream( # noqa:E501 # pylint:disable=used-before-assignment
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
88 datapackage, "packagedata/" + datapath)
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
89 else:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
90 raise ValueError(
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
91 "URI path for the `data' scheme must not be absolute")
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
92 elif u.scheme == "file":
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
93 if u.authority or u.query or u.fragment:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
94 raise ValueError("invalid file URI: authority, query and "
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
95 "fragment MUST be empty")
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
96 if not rfc3986.validators.path_is_valid(u.path, require=True):
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
97 raise ValueError("invalid or empty empty path within a file URI")
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
98 if u.path.find('%') >= 0:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
99 raise ValueError(
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
100 "percent-encoded paths not supported in data-stream file URI")
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
101 if not _is_safe_path(u.path):
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
102 raise ValueError("unsafe path in file URI is not supported")
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
103 if u.path.startswith('/'):
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
104 # resolve the file relative to the projectdir
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
105 if basedir is None:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
106 raise TypeError("no base directory in `basedir' given")
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
107 return open("{}/{}".format(basedir.rstrip("/\\"),
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
108 u.path.lstrip('/')),
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
109 "rb")
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
110 else:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
111 raise ValueError("relative file URI not handled")
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
112 else:
84dfd1a94926 Add the existing implementation.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff changeset
113 raise ValueError("scheme `{}' not supported".format(u.scheme))