changeset 554:36d7aa000435

Implement a C-version of Configuration.interpolate_variables
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 03 Jan 2022 00:11:41 +0100
parents 9d2bd411f5c5
children b7434a34a1f4
files configmix/_speedups.c configmix/config.py tests/test.py
diffstat 3 files changed, 448 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/configmix/_speedups.c	Sun Jan 02 20:43:24 2022 +0100
+++ b/configmix/_speedups.c	Mon Jan 03 00:11:41 2022 +0100
@@ -23,8 +23,13 @@
     PyObject *QUOTE;
     PyObject *NS_SEPARATOR;
     PyObject *FILTER_SEPARATOR;
+    PyObject *EMPTY_FILTER;
+    PyObject *NONE_FILTER;
     PyObject *EMPTY_STR;
     PyObject *QUOTE_MAP;
+    PyObject *MISSING;
+    PyObject *STARTTOK;
+    PyObject *ENDTOK;
 };
 
 
@@ -684,12 +689,304 @@
 }
 
 
+static
+PyObject *
+fast_interpolate_variables(PyObject *self, PyObject *args)
+{
+    PyObject *config;
+    PyObject *s;
+    PyObject *cache;
+
+    Py_ssize_t s_len;
+    Py_ssize_t idx;
+    Py_ssize_t i, j;
+    PyObject *parts = NULL;
+    Py_ssize_t parts_len;
+    PyObject *res_parts = NULL;
+    PyObject *res = NULL;
+    PyObject *tmp;
+    PyObject *tmp2;
+    PyObject *pb;
+    Py_ssize_t pb_len;
+    PyObject *varname = NULL;
+    PyObject *varvalue = NULL;
+    PyObject *filters = NULL;
+    int cacheable;
+    int use_cache = 1;
+    int first_part_is_empty;
+    PyObject *err_type;
+    PyObject *err_value;
+    PyObject *err_tb;
+    struct speedups_state *sstate;
+
+    if (!PyArg_UnpackTuple(args, "s", 3, 3, &config, &s, &cache)) {
+        return NULL;
+    }
+    s_len = PyUnicode_GetLength(s);   /* also an implicit type check */
+    if (s_len < 0) {
+        return NULL;
+    }
+    if (s_len < 4) {
+        PyErr_Clear();
+        Py_INCREF(s);
+        return s;
+    }
+    sstate = PyModule_GetState(self);
+    if (sstate == NULL) {
+        PyErr_SetString(PyExc_RuntimeError, "no module state available");
+        return NULL;
+    }
+
+    idx = PyUnicode_Find(s, sstate->STARTTOK, 0, s_len, 1);
+    if (idx < 0) {
+        PyErr_Clear();
+        Py_INCREF(s);
+        return s;
+    }
+
+    res = PyDict_GetItem(cache, s);      /* borrowed */
+    if (res != NULL) {
+        if (res == sstate->MISSING) {
+            return PyErr_Format(
+                PyExc_KeyError,
+                "Cannot interpolate variables in string %R (cached)",
+                s);
+        }
+        else {
+            Py_INCREF(res);
+            return res;
+        }
+    }
+
+    parts = PyUnicode_Split(s, sstate->STARTTOK, -1);
+    if (parts == NULL) {
+        goto error;
+    }
+    parts_len = PyList_Size(parts);
+    if (parts_len < 0) {
+        goto error;
+    }
+    res_parts = PyList_New(1);
+    if (res_parts == NULL) {
+        goto error;
+    }
+
+    tmp = PyList_GetItem(parts, 0);   /* borrowed */
+    if (tmp == NULL) {
+        goto error;
+    }
+    /*
+     * The first item may be also the empty string if `s' starts with
+     * an interpolation token.
+     */
+    first_part_is_empty = PyObject_Not(tmp);
+    Py_INCREF(tmp);  /* because PyList_SetItem steals -- and o is borrowed */
+    PyList_SetItem(res_parts, 0, tmp);    /* steals -- cannot fail */
+    tmp = NULL;
+
+    for (i=1; i<parts_len; i++) {
+        pb = PyList_GetItem(parts, i);    /* borrowed */
+        pb_len = PyUnicode_GetLength(pb);
+        if (pb_len < 0) {
+            goto error;
+        }
+        idx = PyUnicode_Find(pb, sstate->ENDTOK, 0, pb_len, 1);
+        if (idx < 0) {
+            /*
+             * Behave similar to the pure-Python version: copy the complete
+             * rest as-is. Also include the start tokens!
+             */
+            if (PyList_Append(res_parts, sstate->STARTTOK) < 0) {
+                goto error;
+            }
+            if (PyList_Append(res_parts, pb) < 0) {
+                goto error;
+            }
+            for (j=i+1; j<parts_len; j++) {
+                if (PyList_Append(res_parts, sstate->STARTTOK) < 0) {
+                    goto error;
+                }
+                pb = PyList_GetItem(parts, j);   /* borrowed */
+                if (PyList_Append(res_parts, pb) < 0) {
+                    goto error;
+                }
+            }
+            break;   /* the for-loop */
+        }
+
+        varname = PyUnicode_Substring(pb, 0, idx);
+        if (varname == NULL) {
+            goto error;
+        }
+
+        tmp = _fast_split_filters(varname, NULL, sstate);
+        if (tmp == NULL) {
+            goto error;
+        }
+        if (PyTuple_Size(tmp) != 2) {
+            PyErr_SetString(PyExc_TypeError, "tuple of size 2 expected");
+            Py_DECREF(tmp);
+            goto error;
+        }
+        /* Unpack the result tuple */
+        tmp2 = PyTuple_GetItem(tmp, 0);   /* borrowed -- cannot fail */
+        Py_DECREF(varname);
+        Py_INCREF(tmp2);
+        varname = tmp2;
+        tmp2 = PyTuple_GetItem(tmp, 1);   /* borrowed -- cannot fail */
+        Py_INCREF(tmp2);
+        filters = tmp2;
+        Py_DECREF(tmp);
+        tmp = tmp2 = NULL;
+
+        tmp = PyObject_CallMethod(
+            config, "_getvar_s_with_cache_info", "O", varname);
+        if (tmp == NULL) {
+            if (PyErr_ExceptionMatches(PyExc_KeyError)) {
+                /* XXX TBD handle KeyError (None and Empty-filter) */
+                cacheable = 1;
+                if (PySequence_Contains(filters, sstate->NONE_FILTER) == 1) {
+                    PyErr_Clear();
+                    Py_INCREF(Py_None);
+                    varvalue = Py_None;
+                }
+                else {
+                    if (PySequence_Contains(filters, sstate->EMPTY_FILTER) == 1) {
+                        PyErr_Clear();
+                        Py_INCREF(sstate->EMPTY_STR);
+                        varvalue = sstate->EMPTY_STR;
+                    }
+                    else {
+                        PyErr_Fetch(&err_type, &err_value, &err_tb);
+                        /* this does NOT steal */
+                        PyDict_SetItem(cache, s, sstate->MISSING);
+                        PyErr_Restore(err_type, err_value, err_tb);
+                        goto error;
+                    }
+                }
+            }
+            else {
+                /* other exception/error than KeyError */
+                goto error;
+            }
+        }
+        else {
+            if (PyTuple_Size(tmp) != 2) {
+                Py_DECREF(tmp);
+                PyErr_SetString(PyExc_TypeError, "tuple of size 2 expected");
+                goto error;
+            }
+            /* unpack the result */
+            varvalue = PyTuple_GetItem(tmp, 0); /* borrowed -- but want own */
+            Py_INCREF(varvalue);
+            cacheable = PyObject_IsTrue(PyTuple_GetItem(tmp, 1));
+            Py_DECREF(tmp); tmp = NULL;
+        }
+
+        if (!cacheable) {
+            use_cache = 0;
+        }
+
+        Py_DECREF(varname); varname = NULL;
+
+        tmp = PyObject_CallMethod(
+            config, "_apply_filters", "OO", filters, varvalue);
+        if (tmp == NULL) {
+            goto error;
+        }
+        Py_DECREF(varvalue);
+        varvalue = tmp;
+        tmp = NULL;
+
+        /*
+         * Dont apply and type conversions to the variable value if
+         * the whole `s` is just one expansion
+         */
+        if (first_part_is_empty && (i == 1) && (pb_len == s_len - 2) && (idx == pb_len - 2)) {
+            res = varvalue; varvalue = NULL;
+            goto success;     /* break out early */
+        }
+        if (varvalue != Py_None) {
+            tmp = PyObject_Str(varvalue);
+            if (tmp == NULL) {
+                goto error;
+            }
+            if (PyList_Append(res_parts, tmp) < 0) {
+                Py_DECREF(tmp);
+                goto error;
+            }
+            Py_DECREF(tmp);
+        }
+        Py_DECREF(varvalue); varvalue = NULL;
+        /* append the rest of the string */
+        tmp = PyUnicode_Substring(pb, idx+2, pb_len);
+        if (tmp == NULL) {
+            goto error;
+        }
+        if (PyList_Append(res_parts, tmp) < 0) {
+            Py_DECREF(tmp);
+            goto error;
+        }
+        Py_DECREF(tmp); tmp = NULL;
+    }
+
+    res = PyUnicode_Join(sstate->EMPTY_STR, res_parts);
+    if (res == NULL) {
+        goto error;
+    }
+
+success:
+    Py_DECREF(parts);
+    Py_DECREF(res_parts);
+    Py_XDECREF(filters);
+
+    if (use_cache) {
+        PyDict_SetItem(cache, s, res);
+        PyErr_Clear();    /* clear any possible cache-related error */
+    }
+    return res;
+
+error:
+    Py_XDECREF(varname);
+    Py_XDECREF(varvalue);
+    Py_XDECREF(parts);
+    Py_XDECREF(res_parts);
+    Py_XDECREF(res);
+    Py_XDECREF(filters);
+    return NULL;
+}
+
+
+static
+PyObject *
+sync_MISSING(PyObject *self, PyObject *missing)
+{
+    struct speedups_state *sstate;
+
+    sstate = PyModule_GetState(self);
+    if (sstate == NULL) {
+        PyErr_SetString(PyExc_RuntimeError, "no module state available");
+        return NULL;
+    }
+    if (sstate->MISSING != NULL) {
+        PyErr_SetString(PyExc_RuntimeError, "_MISSING already set");
+        return NULL;
+    }
+    Py_INCREF(missing);
+    sstate->MISSING = missing;
+    Py_RETURN_NONE;
+}
+
+
 static struct PyMethodDef speedups_methods[] = {
     {"fast_unquote", fast_unquote, METH_O, PyDoc_STR("C-implementation of configmix.unquote")},
     {"fast_quote", fast_quote, METH_O, PyDoc_STR("C-implementation of configmix.quote")},
     {"fast_pathstr2path", fast_pathstr2path, METH_O, PyDoc_STR("C-implementation of configmix.pathstr2path")},
     {"_fast_split_filters", fast_split_filters, METH_O, PyDoc_STR("C-implementation of configmix.config._split_filters")},
     {"_fast_split_ns", fast_split_ns, METH_O, PyDoc_STR("C-implementation of configmix.config._split_ns")},
+    {"_fast_interpolate_variables", fast_interpolate_variables, METH_VARARGS, PyDoc_STR("C-implementation of configmix.config.Configuration.interpolate_variables")},
+    {"_sync_MISSING", sync_MISSING, METH_O, PyDoc_STR("Internal function to easily sync the _MISSING object with configmix.config")},
+
     {NULL, NULL, 0, NULL}
 };
 
@@ -739,6 +1036,18 @@
     }
     PyUnicode_InternInPlace(&(sstate->FILTER_SEPARATOR));
 
+    sstate->EMPTY_FILTER = PyUnicode_FromStringAndSize("Empty", 5);
+    if (sstate->EMPTY_FILTER == NULL) {
+        return -1;
+    }
+    PyUnicode_InternInPlace(&(sstate->EMPTY_FILTER));
+
+    sstate->NONE_FILTER = PyUnicode_FromStringAndSize("None", 4);
+    if (sstate->NONE_FILTER == NULL) {
+        return -1;
+    }
+    PyUnicode_InternInPlace(&(sstate->NONE_FILTER));
+
     sstate->EMPTY_STR = PyUnicode_FromStringAndSize("", 0);
     if (sstate->EMPTY_STR == NULL) {
         return -1;
@@ -762,6 +1071,18 @@
         return -1;
     }
 
+    sstate->STARTTOK = PyUnicode_FromStringAndSize("{{", 2);
+    if (sstate->STARTTOK == NULL) {
+        return -1;
+    }
+    PyUnicode_InternInPlace(&(sstate->STARTTOK));
+
+    sstate->ENDTOK = PyUnicode_FromStringAndSize("}}", 2);
+    if (sstate->ENDTOK == NULL) {
+        return -1;
+    }
+    PyUnicode_InternInPlace(&(sstate->ENDTOK));
+
     return 0;
 }
 
@@ -777,8 +1098,13 @@
         Py_VISIT(sstate->QUOTE);
         Py_VISIT(sstate->NS_SEPARATOR);
         Py_VISIT(sstate->FILTER_SEPARATOR);
+        Py_VISIT(sstate->EMPTY_FILTER);
+        Py_VISIT(sstate->NONE_FILTER);
         Py_VISIT(sstate->EMPTY_STR);
         Py_VISIT(sstate->QUOTE_MAP);
+        Py_VISIT(sstate->MISSING);
+        Py_VISIT(sstate->STARTTOK);
+        Py_VISIT(sstate->ENDTOK);
     }
     return 0;
 }
@@ -795,8 +1121,13 @@
         Py_CLEAR(sstate->QUOTE);
         Py_CLEAR(sstate->NS_SEPARATOR);
         Py_CLEAR(sstate->FILTER_SEPARATOR);
+        Py_CLEAR(sstate->EMPTY_FILTER);
+        Py_CLEAR(sstate->NONE_FILTER);
         Py_CLEAR(sstate->EMPTY_STR);
         Py_CLEAR(sstate->QUOTE_MAP);
+        Py_CLEAR(sstate->MISSING);
+        Py_CLEAR(sstate->STARTTOK);
+        Py_CLEAR(sstate->ENDTOK);
     }
     return 0;
 }
--- a/configmix/config.py	Sun Jan 02 20:43:24 2022 +0100
+++ b/configmix/config.py	Mon Jan 03 00:11:41 2022 +0100
@@ -32,13 +32,17 @@
 from .constants import REF_NAMESPACE, NONE_FILTER, EMPTY_FILTER
 try:
     from ._speedups import (fast_unquote, fast_quote, fast_pathstr2path,
-                            _fast_split_ns, _fast_split_filters)
+                            _fast_split_ns, _fast_split_filters,
+                            _fast_interpolate_variables,
+                            _sync_MISSING)
 except ImportError:
     fast_unquote = None
     fast_quote = None
     fast_pathstr2path = None
     _fast_split_ns = None
     _fast_split_filters = None
+    _fast_interpolate_variables = None
+    _sync_MISSING = None
 
 
 _MARKER = object()
@@ -873,7 +877,7 @@
         else:
             return obj
 
-    def interpolate_variables(self, s):
+    def py_interpolate_variables(self, s):
         """Expand all variables in the single string `s`"""
         len_s = len(s)
         if len_s < 4:
@@ -943,6 +947,18 @@
             self.__interpolation_cache[s] = res
         return res
 
+    if _fast_interpolate_variables:
+
+        def fast_interpolate_variables(self, s):
+            return _fast_interpolate_variables(
+                self, s, self.__interpolation_cache)
+
+        interpolate_variables = fast_interpolate_variables
+
+    else:
+
+        interpolate_variables = py_interpolate_variables
+
     def _apply_filters(self, filters, value):
         for name in filters:
             try:
@@ -1221,3 +1237,7 @@
     def __repr__(self):
         r = "_JailedConfiguration(rootpath=%s)" % n(repr(self._path))
         return r
+
+
+if _sync_MISSING:
+    _sync_MISSING(_MISSING)
--- a/tests/test.py	Sun Jan 02 20:43:24 2022 +0100
+++ b/tests/test.py	Mon Jan 03 00:11:41 2022 +0100
@@ -1920,7 +1920,99 @@
     def test_split_filters_many(self):
         self.assertEqual((u"the-varname", [u"Empty", u"None"]),
                          self.split_filters(u"the-varname|Empty|None"))
-        
+
+    def test_None_filter_single(self):
+        cfg = configmix.load()
+        x = getattr(cfg, self.interpolate_meth)(u"{{non-existing|None}}")
+        self.assertIsNone(x)
+        # caching should have no effect
+        y = getattr(cfg, self.interpolate_meth)(u"{{non-existing|None}}")
+        self.assertIsNone(y)
+
+    def test_None_filter_embedded(self):
+        cfg = configmix.load()
+        x = getattr(cfg, self.interpolate_meth)(u"A{{non-existing|None}}Z")
+        self.assertEqual(u"AZ", x)
+        # caching should have no effect
+        y = getattr(cfg, self.interpolate_meth)(u"A{{non-existing|None}}Z")
+        self.assertEqual(u"AZ", y)
+
+    def test_Empty_filtersingle(self):
+        cfg = configmix.load()
+        x = getattr(cfg, self.interpolate_meth)(u"{{non-existing|Empty}}")
+        self.assertEqual("", x)
+        # caching should have no effect
+        y = getattr(cfg, self.interpolate_meth)(u"{{non-existing|Empty}}")
+        self.assertEqual("", y)
+
+    def test_None_filter_pass_through(self):
+        cfg = configmix.load(os.path.join(TESTDATADIR, "conf20.yml"),
+                             os.path.join(TESTDATADIR, "conf21.yml"),
+                             os.path.join(TESTDATADIR, "conf22.ini"),
+                             os.path.join(TESTDATADIR, "conf23.json"),
+                             os.path.join(TESTDATADIR, "conf24.toml"))
+        x = getattr(cfg, self.interpolate_meth)(u"{{intl.cache.items|None}}")
+        self.assertEqual(10, x)
+        # caching should have no effect
+        y = getattr(cfg, self.interpolate_meth)(u"{{intl.cache.items|None}}")
+        self.assertEqual(10, y)
+
+    def test_Empty_filter_pass_through(self):
+        cfg = configmix.load(os.path.join(TESTDATADIR, "conf20.yml"),
+                             os.path.join(TESTDATADIR, "conf21.yml"),
+                             os.path.join(TESTDATADIR, "conf22.ini"),
+                             os.path.join(TESTDATADIR, "conf23.json"),
+                             os.path.join(TESTDATADIR, "conf24.toml"))
+        x = getattr(cfg, self.interpolate_meth)(u"{{intl.cache.items|Empty}}")
+        self.assertEqual(10, x)
+        # caching should have no effect
+        y = getattr(cfg, self.interpolate_meth)(u"{{intl.cache.items|Empty}}")
+        self.assertEqual(10, y)
+
+    def test_Empty_filter_no_pass_through_2(self):
+        cfg = configmix.load(os.path.join(TESTDATADIR, "conf20.yml"),
+                             os.path.join(TESTDATADIR, "conf21.yml"),
+                             os.path.join(TESTDATADIR, "conf22.ini"),
+                             os.path.join(TESTDATADIR, "conf23.json"),
+                             os.path.join(TESTDATADIR, "conf24.toml"))
+        x = getattr(cfg, self.interpolate_meth)(u"{{intl.cache.items|Empty}}{{intl.cache.items}}")
+        self.assertEqual(u"1010", x)
+        # caching should have no effect
+        y = getattr(cfg, self.interpolate_meth)(u"{{intl.cache.items|Empty}}{{intl.cache.items}}")
+        self.assertEqual(u"1010", y)        
+
+    def test_interpolate_wrong_syntax(self):
+        cfg = configmix.load()
+        x1 = getattr(cfg, self.interpolate_meth)(u"{{the-variable}")
+        self.assertEqual(u"{{the-variable}", x1)
+        x2 = getattr(cfg, self.interpolate_meth)(u"A{{the-variable}Z")
+        self.assertEqual(u"A{{the-variable}Z", x2)
+        x3 = getattr(cfg, self.interpolate_meth)(u"A{{1{{2{{3}Z")
+        self.assertEqual(u"A{{1{{2{{3}Z", x3)
+        # caching should have no effect
+        y1 = getattr(cfg, self.interpolate_meth)(u"{{the-variable}")
+        self.assertEqual(u"{{the-variable}", y1)
+        y2 = getattr(cfg, self.interpolate_meth)(u"A{{the-variable}Z")
+        self.assertEqual(u"A{{the-variable}Z", y2)
+        y3 = getattr(cfg, self.interpolate_meth)(u"A{{1{{2{{3}Z")
+        self.assertEqual(u"A{{1{{2{{3}Z", y3)
+
+    def test_interpolate_empty_str(self):
+        cfg = configmix.load()
+        x = getattr(cfg, self.interpolate_meth)(u"")
+        self.assertEqual(u"", x)
+        # caching should have no effect
+        y = getattr(cfg, self.interpolate_meth)(u"")
+        self.assertEqual(u"", y)
+
+    def test_interpolate_no_interpolation(self):
+        cfg = configmix.load()
+        x = getattr(cfg, self.interpolate_meth)(u"no-interpolation-here")
+        self.assertEqual(u"no-interpolation-here", x)
+        # caching should have no effect
+        y = getattr(cfg, self.interpolate_meth)(u"no-interpolation-here")
+        self.assertEqual(u"no-interpolation-here", y)
+
 
 class T09Parser(_TParserMixin, unittest.TestCase):
 
@@ -1930,6 +2022,7 @@
         self.pathstr2path = configmix.config.py_pathstr2path
         self.split_ns = configmix.config._py_split_ns
         self.split_filters = configmix.config._py_split_filters
+        self.interpolate_meth = "py_interpolate_variables"
 
     def test_split_ns_wrong_type(self):
         self.assertRaises(
@@ -1959,6 +2052,7 @@
             self.pathstr2path = configmix.config.fast_pathstr2path
             self.split_ns = configmix.config._fast_split_ns
             self.split_filters = configmix.config._fast_split_filters
+            self.interpolate_meth = "fast_interpolate_variables"
 
         def test_split_ns_wrong_type(self):
             self.assertRaises(