changeset 543:491413368c7c

Added also a fast C-implementation of configmix.config._split_ns
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 01 Jan 2022 18:01:32 +0100
parents f71d34dda19f
children db2d108e14e7
files configmix/_speedups.c configmix/config.py tests/_perf_config.py tests/test.py
diffstat 4 files changed, 134 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- a/configmix/_speedups.c	Fri Dec 31 21:24:16 2021 +0100
+++ b/configmix/_speedups.c	Sat Jan 01 18:01:32 2022 +0100
@@ -21,6 +21,7 @@
 struct speedups_state {
     PyObject *DOT;
     PyObject *QUOTE;
+    PyObject *NS_SEPARATOR;
     PyObject *EMPTY_STR;
 };
 
@@ -81,10 +82,9 @@
 
 static
 PyObject *
-_fast_unquote(PyObject *self, PyObject *s, struct speedups_state *sstate)
+_fast_unquote(PyObject *self, PyObject *s, Py_ssize_t s_len, struct speedups_state *sstate)
 {
     Py_ssize_t find;
-    Py_ssize_t s_len;
     Py_ssize_t parts_len;
     PyObject *res;
     PyObject *res_parts = NULL;
@@ -99,9 +99,11 @@
         PyErr_SetString(PyExc_TypeError, "a (unicode) string type is expected");
         return NULL;
     }
-    s_len = PyUnicode_GetLength(s);
     if (s_len < 0) {
-        return NULL;
+        s_len = PyUnicode_GetLength(s);
+        if (s_len < 0) {
+            return NULL;
+        }
     }
     if (s_len == 0) {
         Py_INCREF(s);
@@ -230,7 +232,7 @@
 PyObject *
 fast_unquote(PyObject *self, PyObject *s)
 {
-    return _fast_unquote(self, s, NULL);
+    return _fast_unquote(self, s, -1, NULL);
 }
 
 
@@ -278,7 +280,7 @@
     }
     for (i=0; i < parts_len; i++) {
         o = PyList_GetItem(parts, i);   /* borrowed */
-        u = _fast_unquote(self, o, sstate);
+        u = _fast_unquote(self, o, -1, sstate);
         if (u == NULL) {
             goto error;
         }
@@ -295,9 +297,67 @@
 }
 
 
+static
+PyObject *
+fast_split_ns(PyObject *self, PyObject *varname)
+{
+    PyObject *res = NULL;
+    Py_ssize_t ns_idx;
+    Py_ssize_t varname_len;
+    PyObject *o1;
+    PyObject *o2;
+
+    varname_len = PyUnicode_GetLength(varname);
+    if (varname_len < 0) {
+        return NULL;
+    }
+    ns_idx = PyUnicode_FindChar(varname, ':', 0, varname_len, 1);
+    if (ns_idx == -2) {
+        return NULL;
+    }
+    if (ns_idx == -1) {
+        res = PyTuple_New(2);
+        if (res == NULL) {
+            return NULL;
+        }
+        Py_INCREF(Py_None);
+        PyTuple_SetItem(res, 0, Py_None);  /* steals */
+        Py_INCREF(varname);
+        PyTuple_SetItem(res, 1, varname);  /* steals */
+        return res;
+    }
+
+    res = PyTuple_New(2);
+    if (res == NULL) {
+        return NULL;
+    }
+    o1 = PyUnicode_Substring(varname, 0, ns_idx);
+    if (o1 == NULL) {
+        Py_DECREF(res);
+        return NULL;
+    }
+    o2 = _fast_unquote(self, o1, ns_idx, NULL);
+    if (o2 == NULL) {
+        Py_DECREF(o1);
+        Py_DECREF(res);
+        return NULL;
+    }
+    Py_DECREF(o1);
+    PyTuple_SetItem(res, 0, o2);    /* steals */
+    o1 = PyUnicode_Substring(varname, ns_idx+1, varname_len);
+    if (o1 == NULL) {
+        Py_DECREF(res);
+        return NULL;
+    }
+    PyTuple_SetItem(res, 1, o1);    /* steals */
+    return res;
+}
+
+
 static struct PyMethodDef speedups_methods[] = {
     {"fast_unquote", fast_unquote, METH_O, PyDoc_STR("C-implementation of configmix.unquote")},
     {"fast_pathstr2path", fast_pathstr2path, METH_O, PyDoc_STR("C-implementation of configmix.pathstr2path")},
+    {"_fast_split_ns", fast_split_ns, METH_O, PyDoc_STR("C-implementation of configmix.config._split_ns")},
     {NULL, NULL, 0, NULL}
 };
 
@@ -329,6 +389,12 @@
     }
     PyUnicode_InternInPlace(&(sstate->QUOTE));
 
+    sstate->NS_SEPARATOR = PyUnicode_FromStringAndSize(":", 1);
+    if (sstate->NS_SEPARATOR == NULL) {
+        return -1;
+    }
+    PyUnicode_InternInPlace(&(sstate->NS_SEPARATOR));
+
     sstate->EMPTY_STR = PyUnicode_FromStringAndSize("", 0);
     if (sstate->EMPTY_STR == NULL) {
         return -1;
@@ -348,6 +414,7 @@
     if (sstate != NULL) {
         Py_VISIT(sstate->DOT);
         Py_VISIT(sstate->QUOTE);
+        Py_VISIT(sstate->NS_SEPARATOR);
         Py_VISIT(sstate->EMPTY_STR);
     }
     return 0;
@@ -363,6 +430,7 @@
     if (sstate != NULL) {
         Py_CLEAR(sstate->DOT);
         Py_CLEAR(sstate->QUOTE);
+        Py_CLEAR(sstate->NS_SEPARATOR);
         Py_CLEAR(sstate->EMPTY_STR);
     }
     return 0;
--- a/configmix/config.py	Fri Dec 31 21:24:16 2021 +0100
+++ b/configmix/config.py	Sat Jan 01 18:01:32 2022 +0100
@@ -31,10 +31,11 @@
 from .compat import u, uchr, n, str_and_u, PY2
 from .constants import REF_NAMESPACE, NONE_FILTER, EMPTY_FILTER
 try:
-    from ._speedups import fast_unquote, fast_pathstr2path
+    from ._speedups import fast_unquote, fast_pathstr2path, _fast_split_ns
 except ImportError:
     fast_unquote = None
     fast_pathstr2path = None
+    _fast_split_ns = None
 
 
 _MARKER = object()
@@ -362,7 +363,7 @@
     pathstr2path = py_pathstr2path
 
 
-def _split_ns(varname):
+def _py_split_ns(varname):
     """Split the variable name string `varname` into the namespace and
     the namespace-specific name
 
@@ -371,6 +372,9 @@
              namespace-specific (variable-)name
     :rtype: tuple(str or None, str)
 
+    .. note:: The returned namespace may be an empty string if the namespace
+              separator is found.
+
     """
     ns, sep, rest = varname.partition(_NS_SEPARATOR)
     if sep:
@@ -379,6 +383,12 @@
         return (None, ns)
 
 
+if _fast_split_ns:
+    _split_ns = _fast_split_ns
+else:
+    _split_ns = _py_split_ns
+
+
 def _split_filters(varname):
     """Split off the filter part from the `varname` string
 
--- a/tests/_perf_config.py	Fri Dec 31 21:24:16 2021 +0100
+++ b/tests/_perf_config.py	Sat Jan 01 18:01:32 2022 +0100
@@ -16,19 +16,20 @@
 all = not opts or "all" in opts
 
 try:
-    from configmix.config import fast_unquote, fast_pathstr2path
+    from configmix.config import fast_unquote, fast_pathstr2path, _fast_split_ns
 except ImportError:
-    fast_unquote = fast_pathstr2path = None
+    fast_unquote = fast_pathstr2path = _fast_split_ns = None
 
 setup = """
 import os
 
 import configmix
-from configmix.config import _HIER_SEPARATOR, quote, py_pathstr2path, py_unquote
+from configmix.config import _HIER_SEPARATOR, quote, py_pathstr2path, \
+                             py_unquote, _py_split_ns
 try:
-    from configmix.config import fast_unquote, fast_pathstr2path
+    from configmix.config import fast_unquote, fast_pathstr2path, _fast_split_ns
 except ImportError:
-    fast_unquote = fast_pathstr2path = None
+    fast_unquote = fast_pathstr2path = _fast_split_ns = None
 
 TESTDATADIR = os.path.join(
     os.path.abspath(os.path.dirname(configmix.__file__)),
@@ -40,6 +41,7 @@
 
 se = u""
 s1 = u"abc.def.hij"
+ns_s1 = u"PY:abc.def.hij"
 
 """
 
@@ -56,14 +58,19 @@
         print("fast-unquote/nothing/split: %.4f" % timeit.timeit('a = [fast_unquote(vp) for vp in u"abc.def.hij".split(_HIER_SEPARATOR)]', setup=setup, number=num_quote))
         print("fast-unquote/yes/split: %.4f" % timeit.timeit('a = [fast_unquote(vp) for vp in u"ab%x20.def.h%x2ej".split(_HIER_SEPARATOR)]', setup=setup, number=num_quote))
         print("fast-unquote/nothing/no-split: %.4f" % timeit.timeit('a = [fast_unquote(vp) for vp in (u"abc," u"def", u"hij")]', setup=setup, number=num_quote))
-        print("fast-unquote/yes/no-split: %.4f" % timeit.timeit('a = [fast_unquote(vp) for vp in (u"ab%x20", u"def", u"h%x2ej")]', setup=setup, number=num_quote))    
+        print("fast-unquote/yes/no-split: %.4f" % timeit.timeit('a = [fast_unquote(vp) for vp in (u"ab%x20", u"def", u"h%x2ej")]', setup=setup, number=num_quote))
     print("pathstr2path/non-empty: %.4f" % timeit.timeit('a = py_pathstr2path(s1)', setup=setup, number=num_quote))
     print("pathstr2path/empty: %.4f" % timeit.timeit('a = py_pathstr2path(se)', setup=setup, number=num_quote))
     if fast_pathstr2path:
         print("fast-pathstr2path/non-empty: %.4f" % timeit.timeit('a = fast_pathstr2path(s1)', setup=setup, number=num_quote))
-        print("fast-pathstr2path/empty: %.4f" % timeit.timeit('a = fast_pathstr2path(se)', setup=setup, number=num_quote))    
+        print("fast-pathstr2path/empty: %.4f" % timeit.timeit('a = fast_pathstr2path(se)', setup=setup, number=num_quote))
     print("quote/nothing: %.4f" % timeit.timeit('a = [quote(vp) for vp in (u"abc", u"def", u"hij")]', setup=setup, number=num_quote))
     print("quote/yes: %.4f" % timeit.timeit('a = [quote(vp) for vp in (u"ab:c", u"def", u"h.ij")]', setup=setup, number=num_quote))
+    print("split-ns/no-ns: %.4f" % timeit.timeit('a = _py_split_ns(s1)', setup=setup, number=num_quote))
+    print("split-ns/ns: %.4f" % timeit.timeit('a = _py_split_ns(ns_s1)', setup=setup, number=num_quote))    
+    if _fast_split_ns:
+        print("fast-split-ns/no-ns: %.4f" % timeit.timeit('a = _fast_split_ns(s1)', setup=setup, number=num_quote))
+        print("fast-split-ns/ns: %.4f" % timeit.timeit('a = _fast_split_ns(ns_s1)', setup=setup, number=num_quote))                
     print("="*50)
 
 if all or "default" in opts or "non-existing" in opts:
@@ -92,7 +99,7 @@
     print("expand-string-one-noncached: %.4f" % timeit.timeit('cfg.getvarl_s(u"tree1", u"tree2", "key12")', setup=setup, number=num))
     print("expand-string-one-noncached: %.4f" % timeit.timeit('cfg.getvar(u"tree1.tree2.key12")', setup=setup, number=num))
     print("expand-string-one-noncached: %.4f" % timeit.timeit('cfg.getvar_s(u"tree1.tree2.key12")', setup=setup, number=num))
-    print("-"*50)    
+    print("-"*50)
 
 if all or "expand-string-many" in opts:
     print("expand-string-many: %.4f" % timeit.timeit('cfg.getvarl(u"tree1", u"tree2", "key10")', setup=setup, number=num))
@@ -106,7 +113,7 @@
     print("expand-string-many-noncached: %.4f" % timeit.timeit('cfg.getvarl_s(u"tree1", u"tree2", u"key13")', setup=setup, number=num))
     print("expand-string-many-noncached: %.4f" % timeit.timeit('cfg.getvar(u"tree1.tree2.key13")', setup=setup, number=num))
     print("expand-string-many-noncached: %.4f" % timeit.timeit('cfg.getvar_s(u"tree1.tree2.key13")', setup=setup, number=num))
-    print("-"*50)    
+    print("-"*50)
 
 if all or "expand-list" in opts:
     print("expand-list: %.4f" % timeit.timeit('cfg.getvarl(u"tree1", u"tree2", "key8")', setup=setup, number=num))
--- a/tests/test.py	Fri Dec 31 21:24:16 2021 +0100
+++ b/tests/test.py	Sat Jan 01 18:01:32 2022 +0100
@@ -1777,7 +1777,7 @@
         e = configmix.quote(u"")
         self.assertEqual(u"", e)
         self.assertEqual(u"", self.unquote(e))
-        
+
     def test_quoting_and_unquoting_are_inverse(self):
         for c in u"""%.:#|"'{}[]""":
             qc = configmix.quote(c)
@@ -1874,12 +1874,36 @@
         p = self.pathstr2path(u"a%x2Eb.c%u002Ed.e%U0000002Ef")
         self.assertEqual((u"a.b", u"c.d", u"e.f"), p)
 
+    def test_split_ns_empty(self):
+        self.assertEqual((None, u""), self.split_ns(u""))
+
+    def test_split_ns_empty_parts(self):
+        self.assertEqual((u"", u""), self.split_ns(u":"))
+
+    def test_split_ns_no_ns(self):
+        self.assertEqual((None, u"the-varname"), self.split_ns(u"the-varname"))
+
+    def test_split_ns_non_quoted(self):
+        self.assertEqual(
+            (u"the-ns", "the-rest:with:colons|filter1|filter2"),
+            self.split_ns(u"the-ns:the-rest:with:colons|filter1|filter2"))
+
+    def test_split_ns_quoting(self):
+        self.assertEqual((u":", u"%x3a"), self.split_ns(u"%x3a:%x3a"))
+
 
 class T09Parser(_TParserMixin, unittest.TestCase):
 
     def setUp(self):
         self.unquote = configmix.config.py_unquote
         self.pathstr2path = configmix.config.py_pathstr2path
+        self.split_ns = configmix.config._py_split_ns
+
+    def test_split_ns_wrong_type(self):
+        self.assertRaises(
+            AttributeError,    # no .partition
+            self.split_ns,
+            1)
 
 
 if configmix.config.fast_unquote is not None:
@@ -1888,6 +1912,13 @@
         def setUp(self):
             self.unquote = configmix.config.fast_unquote
             self.pathstr2path = configmix.config.fast_pathstr2path
+            self.split_ns = configmix.config._fast_split_ns
+
+        def test_split_ns_wrong_type(self):
+            self.assertRaises(
+                TypeError,
+                self.split_ns,
+                b":")
 
 
 if __name__ == "__main__":