Mercurial > hgrepos > Python > libs > ConfigMix
changeset 550:79db28e879f8
Provide a C-implementation of configmix.config.quote() also: fast_quote
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Sun, 02 Jan 2022 02:04:07 +0100 |
| parents | 84657447ab39 |
| children | 4c968c5cfce6 |
| files | configmix/_speedups.c configmix/config.py tests/_perf_config.py tests/test.py |
| diffstat | 4 files changed, 140 insertions(+), 13 deletions(-) [+] |
line wrap: on
line diff
--- a/configmix/_speedups.c Sun Jan 02 01:00:10 2022 +0100 +++ b/configmix/_speedups.c Sun Jan 02 02:04:07 2022 +0100 @@ -23,6 +23,7 @@ PyObject *QUOTE; PyObject *NS_SEPARATOR; PyObject *EMPTY_STR; + PyObject *QUOTE_MAP; }; @@ -402,6 +403,60 @@ static PyObject * +fast_quote(PyObject *self, PyObject *s) +{ + Py_ssize_t s_len; + Py_ssize_t i; + Py_UCS4 c; + int need_quoting; + struct speedups_state *sstate; + + s_len = PyUnicode_GetLength(s); + if (s_len < 0) { + return NULL; + } + if (s_len == 0) { + Py_INCREF(s); + return s; + } + need_quoting = 0; + for (i=0; i<s_len; i++) { + c = PyUnicode_ReadChar(s, i); /* type already checked */ + switch (c) { + case 0x25: + case 0x2e: + case 0x3a: + case 0x23: + case 0x7c: + case 0x22: + case 0x27: + case 0x7b: + case 0x7d: + case 0x5b: + case 0x5d: + need_quoting = 1; + i = s_len; /* break the for-loop */ + break; + default: + /* VOID */ + ; + } + } + if (!need_quoting) { + Py_INCREF(s); + return s; + } + sstate = PyModule_GetState(self); + if (sstate == NULL) { + PyErr_SetString(PyExc_RuntimeError, "no module state available"); + return NULL; + } + return PyUnicode_Translate(s, sstate->QUOTE_MAP, "strict"); +} + + +static +PyObject * fast_pathstr2path(PyObject *self, PyObject *varname) { Py_ssize_t varname_len; @@ -520,6 +575,7 @@ static struct PyMethodDef speedups_methods[] = { {"fast_unquote", fast_unquote, METH_O, PyDoc_STR("C-implementation of configmix.unquote")}, + {"fast_quote", fast_quote, METH_O, PyDoc_STR("C-implementation of configmix.quote")}, {"fast_pathstr2path", fast_pathstr2path, METH_O, PyDoc_STR("C-implementation of configmix.pathstr2path")}, {"_fast_split_ns", fast_split_ns, METH_O, PyDoc_STR("C-implementation of configmix.config._split_ns")}, {NULL, NULL, 0, NULL} @@ -571,6 +627,23 @@ } PyUnicode_InternInPlace(&(sstate->EMPTY_STR)); + sstate->QUOTE_MAP = Py_BuildValue( + "{IsIsIsIsIsIsIsIsIsIsIs}", + 0x25, "%x25", /* QUOTE: % */ + 0x2e, "%x2e", /* DOT: . */ + 0x3a, "%x3a", /* NS_SEPARATOR: : */ + 0x23, "%x23", /* COMMENT/anchor: # */ + 0x7c, "%x7c", /* FILTER_SEPARATOR: | */ + 0x22, "%x22", + 0x27, "%x27", + 0x7b, "%x7b", + 0x7d, "%x7d", + 0x5b, "%x5b", + 0x5d, "%x5d"); + if (sstate->QUOTE_MAP == NULL) { + return -1; + } + return 0; } @@ -586,6 +659,7 @@ Py_VISIT(sstate->QUOTE); Py_VISIT(sstate->NS_SEPARATOR); Py_VISIT(sstate->EMPTY_STR); + Py_VISIT(sstate->QUOTE_MAP); } return 0; } @@ -602,6 +676,7 @@ Py_CLEAR(sstate->QUOTE); Py_CLEAR(sstate->NS_SEPARATOR); Py_CLEAR(sstate->EMPTY_STR); + Py_CLEAR(sstate->QUOTE_MAP); } return 0; }
--- a/configmix/config.py Sun Jan 02 01:00:10 2022 +0100 +++ b/configmix/config.py Sun Jan 02 02:04:07 2022 +0100 @@ -31,9 +31,11 @@ from .compat import u, uchr, n, str_and_u, PY2 from .constants import REF_NAMESPACE, NONE_FILTER, EMPTY_FILTER try: - from ._speedups import fast_unquote, fast_pathstr2path, _fast_split_ns + from ._speedups import (fast_unquote, fast_quote, fast_pathstr2path, + _fast_split_ns) except ImportError: fast_unquote = None + fast_quote = None fast_pathstr2path = None _fast_split_ns = None @@ -257,7 +259,7 @@ """ -def quote(s): +def py_quote(s): """Replace important special characters in string `s` by replacing them with ``%xNN`` where `NN` are the two hexadecimal digits of the characters unicode codepoint value. @@ -286,6 +288,12 @@ return s +if fast_quote: + quote = fast_quote +else: + quote = py_quote + + def py_unquote(s): """Unquote the content of `s`: handle all patterns ``%xNN``, ``%uNNNN`` or ``%UNNNNNNNN``.
--- a/tests/_perf_config.py Sun Jan 02 01:00:10 2022 +0100 +++ b/tests/_perf_config.py Sun Jan 02 02:04:07 2022 +0100 @@ -16,20 +16,22 @@ all = not opts or "all" in opts try: - from configmix.config import fast_unquote, fast_pathstr2path, _fast_split_ns + from configmix.config import fast_unquote, fast_quote, \ + fast_pathstr2path, _fast_split_ns except ImportError: - fast_unquote = fast_pathstr2path = _fast_split_ns = None + fast_unquote = fast_quote = fast_pathstr2path = _fast_split_ns = None setup = """ import os import configmix -from configmix.config import _HIER_SEPARATOR, quote, py_pathstr2path, \ - py_unquote, _py_split_ns +from configmix.config import _HIER_SEPARATOR, \ + py_quote, py_unquote, py_pathstr2path, \ + _py_split_ns try: - from configmix.config import fast_unquote, fast_pathstr2path, _fast_split_ns + from configmix.config import fast_unquote, fast_quote, fast_pathstr2path, _fast_split_ns except ImportError: - fast_unquote = fast_pathstr2path = _fast_split_ns = None + fast_unquote = fast_quote = fast_pathstr2path = _fast_split_ns = None TESTDATADIR = os.path.join( os.path.abspath(os.path.dirname(configmix.__file__)), @@ -64,8 +66,12 @@ if fast_pathstr2path: print("fast-pathstr2path/non-empty: %.4f" % timeit.timeit('a = fast_pathstr2path(s1)', setup=setup, number=num_quote)) print("fast-pathstr2path/empty: %.4f" % timeit.timeit('a = fast_pathstr2path(se)', setup=setup, number=num_quote)) - print("quote/nothing: %.4f" % timeit.timeit('a = [quote(vp) for vp in (u"abc", u"def", u"hij")]', setup=setup, number=num_quote)) - print("quote/yes: %.4f" % timeit.timeit('a = [quote(vp) for vp in (u"ab:c", u"def", u"h.ij")]', setup=setup, number=num_quote)) + print("quote/nothing: %.4f" % timeit.timeit('a = [py_quote(vp) for vp in (u"abc", u"def", u"hij")]', setup=setup, number=num_quote)) + print("quote/yes: %.4f" % timeit.timeit('a = [py_quote(vp) for vp in (u"ab:c", u"def", u"h.ij")]', setup=setup, number=num_quote)) + if fast_quote: + print("fast-quote/nothing: %.4f" % timeit.timeit('a = [fast_quote(vp) for vp in (u"abc", u"def", u"hij")]', setup=setup, number=num_quote)) + print("fast-quote/yes: %.4f" % timeit.timeit('a = [fast_quote(vp) for vp in (u"ab:c", u"def", u"h.ij")]', setup=setup, number=num_quote)) + print("split-ns/no-ns: %.4f" % timeit.timeit('a = _py_split_ns(s1)', setup=setup, number=num_quote)) print("split-ns/ns: %.4f" % timeit.timeit('a = _py_split_ns(ns_s1)', setup=setup, number=num_quote)) if _fast_split_ns:
--- a/tests/test.py Sun Jan 02 01:00:10 2022 +0100 +++ b/tests/test.py Sun Jan 02 02:04:07 2022 +0100 @@ -1774,23 +1774,35 @@ class _TParserMixin: def test_quote_and_unquote_empty(self): - e = configmix.quote(u"") + e = self.quote(u"") self.assertEqual(u"", e) self.assertEqual(u"", self.unquote(e)) def test_quoting_and_unquoting_are_inverse(self): for c in u"""%.:#|"'{}[]""": - qc = configmix.quote(c) + qc = self.quote(c) self.assertTrue(qc.startswith(u"%x") and len(qc) == 4) self.assertEqual(c, self.unquote(qc)) + def test_quoting_and_unquoting_are_inverse_all(self): + c = u"""%.:#|"'{}[]""" + qc = self.quote(c) + self.assertEqual(len(c)*4, len(qc)) + self.assertEqual(c, self.unquote(qc)) + def test_quoting_and_unquoting_are_identical(self): # other characters for c in configmix.config._QUOTE_SAFE: - qc = configmix.quote(c) + qc = self.quote(c) self.assertEqual(c, qc) self.assertEqual(c, self.unquote(qc)) + def test_quoting_and_unquoting_are_identical_all(self): + # other characters + qc = self.quote(configmix.config._QUOTE_SAFE) + self.assertEqual(configmix.config._QUOTE_SAFE, qc) + self.assertEqual(configmix.config._QUOTE_SAFE, self.unquote(qc)) + def test_unquote_unimax(self): self.assertEqual(u"\U00019001", self.unquote(u"%U00019001")) self.assertEqual(u"X\U00019AF1Z", self.unquote(u"X%U00019aF1Z")) @@ -1896,6 +1908,7 @@ def setUp(self): self.unquote = configmix.config.py_unquote + self.quote = configmix.config.py_quote self.pathstr2path = configmix.config.py_pathstr2path self.split_ns = configmix.config._py_split_ns @@ -1905,12 +1918,25 @@ self.split_ns, 1) + def test_quote_wrong_type(self): + self.assertRaises( + AttributeError, # no .lstrip + self.quote, + 1) + + def test_unquote_wrong_type(self): + self.assertRaises( + TypeError, # argument of type "int" is not iterable + self.unquote, + 1) + if configmix.config.fast_unquote is not None: class T10FastParser(_TParserMixin, unittest.TestCase): def setUp(self): self.unquote = configmix.config.fast_unquote + self.quote = configmix.config.fast_quote self.pathstr2path = configmix.config.fast_pathstr2path self.split_ns = configmix.config._fast_split_ns @@ -1920,6 +1946,18 @@ self.split_ns, b":") + def test_quote_wrong_type(self): + self.assertRaises( + TypeError, + self.quote, + b":") + + def test_unquote_wrong_type(self): + self.assertRaises( + TypeError, + self.unquote, + b":") + if __name__ == "__main__": unittest.main()
