changeset 29:68286d27f27d

FIX: Allow customization of the data stream loader (get_data_stream())
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 08 Jul 2023 16:10:36 +0200
parents db3491e1b590
children 2e7c08c356ee
files data_schema/__init__.py data_schema/util.py docs/schema.txt tests/test_schema.py
diffstat 4 files changed, 90 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/data_schema/__init__.py	Sat Jul 08 13:51:20 2023 +0200
+++ b/data_schema/__init__.py	Sat Jul 08 16:10:36 2023 +0200
@@ -236,7 +236,8 @@
 
 ValidationSettings = collections.namedtuple(
     "ValidationSettings",
-    ["skip_keys", "break_on_keynames_problems", "schema_loader"])
+    ["skip_keys", "break_on_keynames_problems",
+     "data_stream_loader", "schema_loader"])
 
 
 class _Schema(dict):
@@ -328,13 +329,16 @@
     def __repr__(self):
         return "<_Schema " + super().__repr__() + ">"
 
-    def get_cached_schema(self, key, load_if_needed=True, schema_loader=None):
+    def get_cached_schema(self, key, load_if_needed=True,
+                          data_stream_loader=None,
+                          schema_loader=None):
         root = self.ROOT
         s = root._schema_cache.get(key, None)
         if s is None and load_if_needed:
             if schema_loader is None:
                 raise SchemaError("no schema loader available")
-            with get_data_stream(key) as schemastream:
+            dsl = data_stream_loader or get_data_stream
+            with dsl(key) as schemastream:
                 # load schema a new `$self' (i.e. sub-root is True)
                 s = _Schema(self, True, schema_loader(schemastream))
             root._schema_cache[key] = s
@@ -501,6 +505,7 @@
     settings = {
         "skip_keys": None,
         "break_on_keynames_problems": True,
+        "data_stream_loader": get_data_stream,
         "schema_loader": default_schema_loader
     }
     settings.update(kwds)
@@ -1408,6 +1413,7 @@
             s = schema.get_cached_schema(
                 uri.path,
                 load_if_needed=True,
+                data_stream_loader=context.settings.data_stream_loader,
                 schema_loader=context.settings.schema_loader)
         if uri.fragment is None:
             raise SchemaError("fragment required in reference")
--- a/data_schema/util.py	Sat Jul 08 13:51:20 2023 +0200
+++ b/data_schema/util.py	Sat Jul 08 16:10:36 2023 +0200
@@ -82,6 +82,8 @@
                     datapath_sep = ''
                 if packagesubdir:
                     psubdir = "." + packagesubdir
+                else:
+                    psubdir = ""
                 return il_resources.open_binary(
                     datapackage + psubdir + datapath_sep
                         + '.'.join(datapath_dirs),               # noqa: E131
@@ -89,6 +91,8 @@
             else:
                 if packagesubdir:
                     psubdir = packagesubdir + "/"
+                else:
+                    psubdir = ""
                 return pkg_resources.resource_stream(  # noqa:E501    # pylint:disable=used-before-assignment
                     datapackage, psubdir + datapath)
         else:
--- a/docs/schema.txt	Sat Jul 08 13:51:20 2023 +0200
+++ b/docs/schema.txt	Sat Jul 08 16:10:36 2023 +0200
@@ -36,7 +36,7 @@
   Bei Treffer wird dieser Key komplett ignoriert. Das ist also eine globale
   Ignore-Liste für Dict-Keys.
 
-  Default: ``None``
+  Default: `None`
 
 - ``break_on_keynames_problems``
 
@@ -45,6 +45,14 @@
 
   Default: ``True``
 
+- ``data_stream_loader``
+
+  Default: `None` (i.e. use the builtin
+           :func:`data_schema.util.get_data_stream`)
+
+  A callable with an `uri` argument that returns a file-alike and a
+  context manager to be feed into the schema loader.
+                 
 - ``schema_loader``
 
   Default: configmix.yaml.load (if available) or ``None``
--- a/tests/test_schema.py	Sat Jul 08 13:51:20 2023 +0200
+++ b/tests/test_schema.py	Sat Jul 08 16:10:36 2023 +0200
@@ -1,6 +1,7 @@
 
 import copy
 import datetime
+import functools
 import re
 import unittest
 
@@ -141,6 +142,7 @@
         schema = object()
         settings = data_schema.ValidationSettings(
             skip_keys=[], break_on_keynames_problems=True,
+            data_stream_loader=None,
             schema_loader=data_schema.default_schema_loader)
         ctx = data_schema.Context(
             None, root_object=obj, root_schema=schema, settings=settings)
@@ -154,6 +156,7 @@
         schema = object()
         settings = data_schema.ValidationSettings(
             skip_keys=[], break_on_keynames_problems=True,
+            data_stream_loader=None,
             schema_loader=data_schema.default_schema_loader)
         ctx = data_schema.Context(
             None, root_object=obj, root_schema=schema, settings=settings)
@@ -170,6 +173,7 @@
     def test_root_context_init_root_empty(self):
         settings = data_schema.ValidationSettings(
             skip_keys=[], break_on_keynames_problems=True,
+            data_stream_loader=None,
             schema_loader=data_schema.default_schema_loader)
         self.assertRaises(
             TypeError,
@@ -181,6 +185,7 @@
     def test_root_context_init_only_one_of_key_index(self):
         settings = data_schema.ValidationSettings(
             skip_keys=[], break_on_keynames_problems=True,
+            data_stream_loader=None,
             schema_loader=data_schema.default_schema_loader)
         root = data_schema.Context(None, settings=settings)
         self.assertRaises(
@@ -190,6 +195,7 @@
     def test_root_context_init_exactly_one(self):
         settings = data_schema.ValidationSettings(
             skip_keys=[], break_on_keynames_problems=True,
+            data_stream_loader=None,
             schema_loader=data_schema.default_schema_loader)
         root = data_schema.Context(None, settings=settings)
         self.assertRaises(TypeError, data_schema.Context, root)
@@ -197,6 +203,7 @@
     def test_nonroot_rootobj_schema(self):
         settings = data_schema.ValidationSettings(
             skip_keys=[], break_on_keynames_problems=True,
+            data_stream_loader=None,
             schema_loader=data_schema.default_schema_loader)
         obj = object()
         schema = object()
@@ -214,6 +221,7 @@
     def test_str(self):
         settings = data_schema.ValidationSettings(
             skip_keys=[], break_on_keynames_problems=True,
+            data_stream_loader=None,
             schema_loader=data_schema.default_schema_loader)
         root = data_schema.Context(None, settings=settings)
         ctx1 = data_schema.Context(root, key="key1")
@@ -224,6 +232,7 @@
     def test_repr(self):
         settings = data_schema.ValidationSettings(
             skip_keys=[], break_on_keynames_problems=True,
+            data_stream_loader=None,
             schema_loader=data_schema.default_schema_loader)
         root = data_schema.Context(None, settings=settings)
         ctx1 = data_schema.Context(root, key="key1")
@@ -234,6 +243,7 @@
     def test_root(self):
         settings = data_schema.ValidationSettings(
             skip_keys=[], break_on_keynames_problems=True,
+            data_stream_loader=None,
             schema_loader=data_schema.default_schema_loader)
         root = data_schema.Context(None, settings=settings)
         self.assertTrue(root.is_root)
@@ -254,9 +264,11 @@
     def test_extra_settings_in_between(self):
         settings = data_schema.ValidationSettings(
             skip_keys=[], break_on_keynames_problems=True,
+            data_stream_loader=None,
             schema_loader=data_schema.default_schema_loader)
         settings2 = data_schema.ValidationSettings(
             skip_keys=[], break_on_keynames_problems=True,
+            data_stream_loader=None,
             schema_loader=data_schema.default_schema_loader)
         root = data_schema.Context(None, settings=settings)
         self.assertTrue(root.is_root)
@@ -277,6 +289,7 @@
     def test_key_xor_index(self):
         settings = data_schema.ValidationSettings(
             skip_keys=[], break_on_keynames_problems=True,
+            data_stream_loader=None,
             schema_loader=data_schema.default_schema_loader)
         root = data_schema.Context(None, settings=settings)
         self.assertRaises(
@@ -289,6 +302,7 @@
     def test_keyindex_requires_key(self):
         settings = data_schema.ValidationSettings(
             skip_keys=[], break_on_keynames_problems=True,
+            data_stream_loader=None,
             schema_loader=data_schema.default_schema_loader)
         self.assertRaises(
             ValueError,
@@ -1250,6 +1264,7 @@
     def test_raise_no_schema_loader_available(self):
         settings = data_schema.ValidationSettings(
             skip_keys=[], break_on_keynames_problems=True,
+            data_stream_loader=None,
             schema_loader=None)
         schema = data_schema._Schema(
             None, True, {"$ref": "schema:file:/tmp/xxx#/"})
@@ -1264,9 +1279,12 @@
     def test_raise_schema_loader_available_but_invalid_basedir(self):
         settings = data_schema.ValidationSettings(
             skip_keys=[], break_on_keynames_problems=True,
+            data_stream_loader=None,
             schema_loader=data_schema.default_schema_loader)
         schema = data_schema._Schema(
-            None, True, {"$ref": "schema:file:/tmp/xxx#/"})
+            None, True, {
+                "$ref": "schema:" + _config.FILEURI_PREFIX + "test1.schema.yml#/"
+            })
         ctx = data_schema.Context(
             None, root_schema=schema, settings=settings)
         self.assertRaises(
@@ -1275,6 +1293,55 @@
             schema,
             ctx)
 
+    def test_schema_loader_and_data_stream_available_file(self):
+        dsl = functools.partial(
+            data_schema.util.get_data_stream,
+            basedir=_config.PROJECTDIR)
+        settings = data_schema.ValidationSettings(
+            skip_keys=[], break_on_keynames_problems=True,
+            data_stream_loader=dsl,
+            schema_loader=data_schema.default_schema_loader)
+        schema = data_schema._Schema(
+            None, True, {
+                "$ref": "schema:" + _config.FILEURI_PREFIX + "test1.schema.yml#/"
+            })
+        ctx = data_schema.Context(
+            None, root_schema=schema, settings=settings)
+        data_schema.process_schema_references(schema, ctx)
+
+    def test_schema_loader_and_data_stream_available_package_explicit_subdir(self):
+        dsl = functools.partial(
+            data_schema.util.get_data_stream,
+            basedir=_config.PROJECTDIR,
+            packagesubdir=None)
+        settings = data_schema.ValidationSettings(
+            skip_keys=[], break_on_keynames_problems=True,
+            data_stream_loader=dsl,
+            schema_loader=data_schema.default_schema_loader)
+        schema = data_schema._Schema(
+            None, True, {
+                "$ref": "schema:data:testschematalib.packagedata:test2.schema.yml#/"
+            })
+        ctx = data_schema.Context(
+            None, root_schema=schema, settings=settings)
+        data_schema.process_schema_references(schema, ctx)
+
+    def test_schema_loader_and_data_stream_available_package_implicit_subdir(self):
+        dsl = functools.partial(
+            data_schema.util.get_data_stream,
+            basedir=_config.PROJECTDIR)
+        settings = data_schema.ValidationSettings(
+            skip_keys=[], break_on_keynames_problems=True,
+            data_stream_loader=dsl,
+            schema_loader=data_schema.default_schema_loader)
+        schema = data_schema._Schema(
+            None, True, {
+                "$ref": "schema:data:testschematalib:test2.schema.yml#/"
+            })
+        ctx = data_schema.Context(
+            None, root_schema=schema, settings=settings)
+        data_schema.process_schema_references(schema, ctx)
+
     def test_raise_if_scheme_ref_is_not_the_single_key(self):
         schema = data_schema._Schema(
             None, True, {"$ref": "schema:#/",