changeset 550:79db28e879f8

Provide a C-implementation of configmix.config.quote() also: fast_quote
author Franz Glasner <fzglas.hg@dom66.de>
date Sun, 02 Jan 2022 02:04:07 +0100
parents 84657447ab39
children 4c968c5cfce6
files configmix/_speedups.c configmix/config.py tests/_perf_config.py tests/test.py
diffstat 4 files changed, 140 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/configmix/_speedups.c	Sun Jan 02 01:00:10 2022 +0100
+++ b/configmix/_speedups.c	Sun Jan 02 02:04:07 2022 +0100
@@ -23,6 +23,7 @@
     PyObject *QUOTE;
     PyObject *NS_SEPARATOR;
     PyObject *EMPTY_STR;
+    PyObject *QUOTE_MAP;
 };
 
 
@@ -402,6 +403,60 @@
 
 static
 PyObject *
+fast_quote(PyObject *self, PyObject *s)
+{
+    Py_ssize_t s_len;
+    Py_ssize_t i;
+    Py_UCS4 c;
+    int need_quoting;
+    struct speedups_state *sstate;
+
+    s_len = PyUnicode_GetLength(s);
+    if (s_len < 0) {
+        return NULL;
+    }
+    if (s_len == 0) {
+        Py_INCREF(s);
+        return s;
+    }
+    need_quoting = 0;
+    for (i=0; i<s_len; i++) {
+        c = PyUnicode_ReadChar(s, i);   /* type already checked */
+        switch (c) {
+        case 0x25:
+        case 0x2e:
+        case 0x3a:
+        case 0x23:
+        case 0x7c:
+        case 0x22:
+        case 0x27:
+        case 0x7b:
+        case 0x7d:
+        case 0x5b:
+        case 0x5d:
+            need_quoting = 1;
+            i = s_len;   /* break the for-loop */
+            break;
+        default:
+            /* VOID */
+            ;
+        }
+    }
+    if (!need_quoting) {
+        Py_INCREF(s);
+        return s;
+    }
+    sstate = PyModule_GetState(self);
+    if (sstate == NULL) {
+        PyErr_SetString(PyExc_RuntimeError, "no module state available");
+        return NULL;
+    }
+    return PyUnicode_Translate(s, sstate->QUOTE_MAP, "strict");
+}
+
+
+static
+PyObject *
 fast_pathstr2path(PyObject *self, PyObject *varname)
 {
     Py_ssize_t varname_len;
@@ -520,6 +575,7 @@
 
 static struct PyMethodDef speedups_methods[] = {
     {"fast_unquote", fast_unquote, METH_O, PyDoc_STR("C-implementation of configmix.unquote")},
+    {"fast_quote", fast_quote, METH_O, PyDoc_STR("C-implementation of configmix.quote")},
     {"fast_pathstr2path", fast_pathstr2path, METH_O, PyDoc_STR("C-implementation of configmix.pathstr2path")},
     {"_fast_split_ns", fast_split_ns, METH_O, PyDoc_STR("C-implementation of configmix.config._split_ns")},
     {NULL, NULL, 0, NULL}
@@ -571,6 +627,23 @@
     }
     PyUnicode_InternInPlace(&(sstate->EMPTY_STR));
 
+    sstate->QUOTE_MAP = Py_BuildValue(
+        "{IsIsIsIsIsIsIsIsIsIsIs}",
+        0x25, "%x25",     /* QUOTE: % */
+        0x2e, "%x2e",     /* DOT: . */
+        0x3a, "%x3a",     /* NS_SEPARATOR: : */
+        0x23, "%x23",     /* COMMENT/anchor: # */
+        0x7c, "%x7c",     /* FILTER_SEPARATOR: | */
+        0x22, "%x22",
+        0x27, "%x27",
+        0x7b, "%x7b",
+        0x7d, "%x7d",
+        0x5b, "%x5b",
+        0x5d, "%x5d");
+    if (sstate->QUOTE_MAP == NULL) {
+        return -1;
+    }
+
     return 0;
 }
 
@@ -586,6 +659,7 @@
         Py_VISIT(sstate->QUOTE);
         Py_VISIT(sstate->NS_SEPARATOR);
         Py_VISIT(sstate->EMPTY_STR);
+        Py_VISIT(sstate->QUOTE_MAP);
     }
     return 0;
 }
@@ -602,6 +676,7 @@
         Py_CLEAR(sstate->QUOTE);
         Py_CLEAR(sstate->NS_SEPARATOR);
         Py_CLEAR(sstate->EMPTY_STR);
+        Py_CLEAR(sstate->QUOTE_MAP);
     }
     return 0;
 }
--- a/configmix/config.py	Sun Jan 02 01:00:10 2022 +0100
+++ b/configmix/config.py	Sun Jan 02 02:04:07 2022 +0100
@@ -31,9 +31,11 @@
 from .compat import u, uchr, n, str_and_u, PY2
 from .constants import REF_NAMESPACE, NONE_FILTER, EMPTY_FILTER
 try:
-    from ._speedups import fast_unquote, fast_pathstr2path, _fast_split_ns
+    from ._speedups import (fast_unquote, fast_quote, fast_pathstr2path,
+                            _fast_split_ns)
 except ImportError:
     fast_unquote = None
+    fast_quote = None
     fast_pathstr2path = None
     _fast_split_ns = None
 
@@ -257,7 +259,7 @@
 """
 
 
-def quote(s):
+def py_quote(s):
     """Replace important special characters in string `s` by replacing
     them with ``%xNN`` where `NN` are the two hexadecimal digits of the
     characters unicode codepoint value.
@@ -286,6 +288,12 @@
         return s
 
 
+if fast_quote:
+    quote = fast_quote
+else:
+    quote = py_quote
+
+
 def py_unquote(s):
     """Unquote the content of `s`: handle all patterns ``%xNN``,
     ``%uNNNN`` or ``%UNNNNNNNN``.
--- a/tests/_perf_config.py	Sun Jan 02 01:00:10 2022 +0100
+++ b/tests/_perf_config.py	Sun Jan 02 02:04:07 2022 +0100
@@ -16,20 +16,22 @@
 all = not opts or "all" in opts
 
 try:
-    from configmix.config import fast_unquote, fast_pathstr2path, _fast_split_ns
+    from configmix.config import fast_unquote, fast_quote, \
+                                 fast_pathstr2path, _fast_split_ns
 except ImportError:
-    fast_unquote = fast_pathstr2path = _fast_split_ns = None
+    fast_unquote = fast_quote = fast_pathstr2path = _fast_split_ns = None
 
 setup = """
 import os
 
 import configmix
-from configmix.config import _HIER_SEPARATOR, quote, py_pathstr2path, \
-                             py_unquote, _py_split_ns
+from configmix.config import _HIER_SEPARATOR, \
+                             py_quote, py_unquote, py_pathstr2path, \
+                             _py_split_ns
 try:
-    from configmix.config import fast_unquote, fast_pathstr2path, _fast_split_ns
+    from configmix.config import fast_unquote, fast_quote, fast_pathstr2path, _fast_split_ns
 except ImportError:
-    fast_unquote = fast_pathstr2path = _fast_split_ns = None
+    fast_unquote = fast_quote = fast_pathstr2path = _fast_split_ns = None
 
 TESTDATADIR = os.path.join(
     os.path.abspath(os.path.dirname(configmix.__file__)),
@@ -64,8 +66,12 @@
     if fast_pathstr2path:
         print("fast-pathstr2path/non-empty: %.4f" % timeit.timeit('a = fast_pathstr2path(s1)', setup=setup, number=num_quote))
         print("fast-pathstr2path/empty: %.4f" % timeit.timeit('a = fast_pathstr2path(se)', setup=setup, number=num_quote))
-    print("quote/nothing: %.4f" % timeit.timeit('a = [quote(vp) for vp in (u"abc", u"def", u"hij")]', setup=setup, number=num_quote))
-    print("quote/yes: %.4f" % timeit.timeit('a = [quote(vp) for vp in (u"ab:c", u"def", u"h.ij")]', setup=setup, number=num_quote))
+    print("quote/nothing: %.4f" % timeit.timeit('a = [py_quote(vp) for vp in (u"abc", u"def", u"hij")]', setup=setup, number=num_quote))
+    print("quote/yes: %.4f" % timeit.timeit('a = [py_quote(vp) for vp in (u"ab:c", u"def", u"h.ij")]', setup=setup, number=num_quote))
+    if fast_quote:
+        print("fast-quote/nothing: %.4f" % timeit.timeit('a = [fast_quote(vp) for vp in (u"abc", u"def", u"hij")]', setup=setup, number=num_quote))
+        print("fast-quote/yes: %.4f" % timeit.timeit('a = [fast_quote(vp) for vp in (u"ab:c", u"def", u"h.ij")]', setup=setup, number=num_quote))
+        
     print("split-ns/no-ns: %.4f" % timeit.timeit('a = _py_split_ns(s1)', setup=setup, number=num_quote))
     print("split-ns/ns: %.4f" % timeit.timeit('a = _py_split_ns(ns_s1)', setup=setup, number=num_quote))    
     if _fast_split_ns:
--- a/tests/test.py	Sun Jan 02 01:00:10 2022 +0100
+++ b/tests/test.py	Sun Jan 02 02:04:07 2022 +0100
@@ -1774,23 +1774,35 @@
 
 class _TParserMixin:
     def test_quote_and_unquote_empty(self):
-        e = configmix.quote(u"")
+        e = self.quote(u"")
         self.assertEqual(u"", e)
         self.assertEqual(u"", self.unquote(e))
 
     def test_quoting_and_unquoting_are_inverse(self):
         for c in u"""%.:#|"'{}[]""":
-            qc = configmix.quote(c)
+            qc = self.quote(c)
             self.assertTrue(qc.startswith(u"%x") and len(qc) == 4)
             self.assertEqual(c, self.unquote(qc))
 
+    def test_quoting_and_unquoting_are_inverse_all(self):
+        c = u"""%.:#|"'{}[]"""
+        qc = self.quote(c)
+        self.assertEqual(len(c)*4, len(qc))
+        self.assertEqual(c, self.unquote(qc))
+
     def test_quoting_and_unquoting_are_identical(self):
         # other characters
         for c in configmix.config._QUOTE_SAFE:
-            qc = configmix.quote(c)
+            qc = self.quote(c)
             self.assertEqual(c, qc)
             self.assertEqual(c, self.unquote(qc))
 
+    def test_quoting_and_unquoting_are_identical_all(self):
+        # other characters
+        qc = self.quote(configmix.config._QUOTE_SAFE)
+        self.assertEqual(configmix.config._QUOTE_SAFE, qc)
+        self.assertEqual(configmix.config._QUOTE_SAFE, self.unquote(qc))
+
     def test_unquote_unimax(self):
         self.assertEqual(u"\U00019001", self.unquote(u"%U00019001"))
         self.assertEqual(u"X\U00019AF1Z", self.unquote(u"X%U00019aF1Z"))
@@ -1896,6 +1908,7 @@
 
     def setUp(self):
         self.unquote = configmix.config.py_unquote
+        self.quote = configmix.config.py_quote
         self.pathstr2path = configmix.config.py_pathstr2path
         self.split_ns = configmix.config._py_split_ns
 
@@ -1905,12 +1918,25 @@
             self.split_ns,
             1)
 
+    def test_quote_wrong_type(self):
+        self.assertRaises(
+            AttributeError,    # no .lstrip
+            self.quote,
+            1)
+
+    def test_unquote_wrong_type(self):
+        self.assertRaises(
+            TypeError,    # argument of type "int" is not iterable
+            self.unquote,
+            1)
+
 
 if configmix.config.fast_unquote is not None:
     class T10FastParser(_TParserMixin, unittest.TestCase):
 
         def setUp(self):
             self.unquote = configmix.config.fast_unquote
+            self.quote = configmix.config.fast_quote
             self.pathstr2path = configmix.config.fast_pathstr2path
             self.split_ns = configmix.config._fast_split_ns
 
@@ -1920,6 +1946,18 @@
                 self.split_ns,
                 b":")
 
+        def test_quote_wrong_type(self):
+            self.assertRaises(
+                TypeError,
+                self.quote,
+                b":")
+
+        def test_unquote_wrong_type(self):
+            self.assertRaises(
+                TypeError,
+                self.unquote,
+                b":")
+
 
 if __name__ == "__main__":
     unittest.main()