# HG changeset patch # User Franz Glasner # Date 1653895889 -7200 # Node ID 2b1c7a68f91335d5b76c0db6f65249d32e71cd93 # Parent b74f20e19c0171e9b9cb82ed4f5fccf16be4dcbf Enable indexed access to lists in the configuration using an access path string representation like "~NNN~" diff -r b74f20e19c01 -r 2b1c7a68f913 CHANGES.txt --- a/CHANGES.txt Sun May 29 15:32:54 2022 +0200 +++ b/CHANGES.txt Mon May 30 09:31:29 2022 +0200 @@ -16,6 +16,10 @@ ~~~~~~~~~ - **[feature]** + Enable indexed access to lists in the configuration using an access + path string representation like ``~NNN~` + +- **[feature]** Allow to enable and disable the internal caching - **[feature]** diff -r b74f20e19c01 -r 2b1c7a68f913 configmix/_speedups.c --- a/configmix/_speedups.c Sun May 29 15:32:54 2022 +0200 +++ b/configmix/_speedups.c Mon May 30 09:31:29 2022 +0200 @@ -75,6 +75,54 @@ } +static +int +_dec2num(PyObject *s, Py_ssize_t start, Py_ssize_t end, Py_ssize_t *result) +{ + Py_ssize_t i; + Py_UCS4 c; + Py_ssize_t r = 0; + int sign = 0; + + for (i=start; i<=end; i++) { + /* Overflow error check */ + if (r > 3275) { + PyErr_SetString(PyExc_OverflowError, "index too large"); + return -1; + } + r *= 10; + c = PyUnicode_ReadChar(s, i); + if ((c >= 48) && (c <= 57)) { /* 0 - 9 */ + r += (c - 48); + } + else { + if (i == start) { + /* check for number sign (but only at the first index) */ + if (c == 0x2d) { + sign = -1; + continue; + } + else { + if (c == 0x2b) { + sign = 1; + continue; + } + } + } + PyErr_Format(PyExc_ValueError, "invalid base-10 literal: %c", (int)c); + return -1; + } + } + if (sign >= 0) { + *result = r; + } + else { + *result = -r; + } + return 0; /* success */ +} + + #if defined(Py_LIMITED_API) static @@ -292,6 +340,19 @@ if (s_len == 0) { return Py_NewRef(s); } + if (s_len > 2) { + /* Check for ~NNN~ syntax */ + c = PyUnicode_ReadChar(s, 0); + if (c == 0x7e) { + c = PyUnicode_ReadChar(s, s_len - 1); + if (c == 0x7e) { + if (_dec2num(s, 1, s_len - 2, &i) == 0) { + return PyLong_FromSsize_t(i); + } + PyErr_Clear(); + } + } + } find = PyUnicode_FindChar(s, '%', 0, s_len, 1); if (find == -2) { return NULL; @@ -429,6 +490,11 @@ s_len = PyUnicode_GetLength(s); if (s_len < 0) { + if (PyObject_IsInstance(s, (PyObject *)&PyLong_Type)) { + PyErr_Clear(); + return PyUnicode_FromFormat("~%S~", s); + } + PyErr_SetString(PyExc_TypeError, "given object has no len()"); return NULL; } if (s_len == 0) { @@ -449,6 +515,7 @@ case 0x7d: case 0x5b: case 0x5d: + case 0x7e: need_quoting = 1; i = s_len; /* break the for-loop */ break; @@ -1638,7 +1705,7 @@ PyUnicode_InternInPlace(&(sstate->EMPTY_STR)); sstate->QUOTE_MAP = Py_BuildValue( - "{IsIsIsIsIsIsIsIsIsIsIs}", + "{IsIsIsIsIsIsIsIsIsIsIsIs}", 0x25, "%x25", /* QUOTE: % */ 0x2e, "%x2e", /* DOT: . */ 0x3a, "%x3a", /* NS_SEPARATOR: : */ @@ -1649,7 +1716,8 @@ 0x7b, "%x7b", 0x7d, "%x7d", 0x5b, "%x5b", - 0x5d, "%x5d"); + 0x5d, "%x5d", + 0x7e, "%x7e"); /* tilde ~ */ if (sstate->QUOTE_MAP == NULL) { return -1; } diff -r b74f20e19c01 -r 2b1c7a68f913 configmix/config.py --- a/configmix/config.py Sun May 29 15:32:54 2022 +0200 +++ b/configmix/config.py Mon May 30 09:31:29 2022 +0200 @@ -243,6 +243,7 @@ _STARTTOK_REF = _STARTTOK + REF_NAMESPACE + _NS_SEPARATOR _ENDTOK_REF = _ENDTOK _DOT = u(b'.') +_TILDE = u(b'~') _QUOTE = u(b'%') _QUOTE_x = u(b'x') _QUOTE_u = u(b'u') @@ -260,11 +261,12 @@ 0x7d: u(b'%x7d'), 0x5b: u(b'%x5b'), 0x5d: u(b'%x5d'), + 0x7e: u(b'%x7e'), # tilde `~` } _QUOTE_SAFE = u(b'abcdefghijklmnopqrstuvwxyz' b'ABCDEFGHIJKLMNOPQRSTUVWXYZ' b'0123456789' - b'-_@!$&/\\()=?*+~;,<>^') + b'-_@!$&/\\()=?*+;,<>^') """Mostly used configuration key characters that do not need any quoting """ @@ -281,9 +283,19 @@ See also the :ref:`quoting` section. """ - # Quick check whether all of the chars are in _QUOTE_SAFE - if not s.lstrip(_QUOTE_SAFE): - return s + try: + # Quick check whether all of the chars are in _QUOTE_SAFE + if not s.lstrip(_QUOTE_SAFE): + return s + except AttributeError: + # + # Check whether s is an index (int) and return the special tag if + # it is so + # + if isinstance(s, int): + return "~%d~" % (s, ) + else: + raise # Slow path re_encode = False @@ -312,6 +324,15 @@ This is the inverse of :func:`.quote`. """ + s_len = len(s) + if s_len > 2 and s[0] == _TILDE and s[-1] == _TILDE: + try: + v = int(s[1:-1], 10) + if v // 10 > 3275: # be compatible to the fast C implementation + raise OverflowError("index too large") + return v + except (ValueError, OverflowError): + pass if _QUOTE not in s: return s parts = s.split(_QUOTE) diff -r b74f20e19c01 -r 2b1c7a68f913 tests/test.py --- a/tests/test.py Sun May 29 15:32:54 2022 +0200 +++ b/tests/test.py Mon May 30 09:31:29 2022 +0200 @@ -944,7 +944,14 @@ i, cfg.getvarl_s(u"test", u"List", i)) - def test48_index_access_to_lists_with_subdicts(self): + def test48_indexed_access_to_lists(self): + cfg = self._load(os.path.join(TESTDATADIR, "conf20.yml")) + for i in range(4): + self.assertEqual( + i, + cfg.getvar_s(u"test.List.~%d~" % (i, ))) + + def test49_index_access_to_lists_with_subdicts(self): cfg = self._load(os.path.join(TESTDATADIR, "index-access-for-jails.yml")) for idx in range(len(cfg.getvarl(u"the-list"))): @@ -952,6 +959,14 @@ idx, cfg.getvarl_s(u"the-list", idx, u"entry")) + def test50_index_access_to_lists_with_subdicts(self): + cfg = self._load(os.path.join(TESTDATADIR, + "index-access-for-jails.yml")) + for idx in range(len(cfg.getvarl(u"the-list"))): + self.assertEqual( + idx, + cfg.getvar_s(u"the-list.~%d~.entry" % (idx, ))) + class T02LoadAndMerge(_T02MixinLoadAndMerge, unittest.TestCase): @@ -1897,6 +1912,18 @@ jcfg = cfg.jailed(rootpath=(u"the-list", idx)) self.assertEqual(1, len(jcfg)) self.assertEqual(idx, jcfg.getvarl_s(u"entry")) + self.assertEqual((u"the-list", idx), jcfg._path) + self.assertEqual(u"the-list.~%d~." % (idx, ), jcfg._pathstr) + + def test_index_jail_access_with_strpath(self): + cfg = configmix.load(os.path.join( + TESTDATADIR, "index-access-for-jails.yml")) + for idx in range(len(cfg.getvarl(u"the-list"))): + jcfg = cfg.jailed(root=u"the-list.~%d~" % (idx, )) + self.assertEqual(1, len(jcfg)) + self.assertEqual(idx, jcfg.getvarl_s(u"entry")) + self.assertEqual((u"the-list", idx), jcfg._path) + self.assertEqual(u"the-list.~%d~." % (idx, ), jcfg._pathstr) class _TParserMixin: @@ -1906,13 +1933,13 @@ self.assertEqual(u"", self.unquote(e)) def test_quoting_and_unquoting_are_inverse(self): - for c in u"""%.:#|"'{}[]""": + for c in u"""%.:#|"'{}[]~""": qc = self.quote(c) self.assertTrue(qc.startswith(u"%x") and len(qc) == 4) self.assertEqual(c, self.unquote(qc)) def test_quoting_and_unquoting_are_inverse_all(self): - c = u"""%.:#|"'{}[]""" + c = u"""%.:#|"'{}[]~""" qc = self.quote(c) self.assertEqual(len(c)*4, len(qc)) self.assertEqual(c, self.unquote(qc)) @@ -1930,6 +1957,33 @@ self.assertEqual(configmix.config._QUOTE_SAFE, qc) self.assertEqual(configmix.config._QUOTE_SAFE, self.unquote(qc)) + def test_quote_index_to_tilde(self): + self.assertEqual(u"~4~", self.quote(4)) + + def test_unquote_index_with_tilde(self): + self.assertEqual(4, self.unquote(u"~4~")) + + def test_unquote_empty_tilde(self): + self.assertEqual(u"~~", self.unquote(u"~~")) + + def test_unquote_invalid_number_tilde(self): + self.assertEqual(u"~0x4~", self.unquote(u"~0x4~")) + + def test_unquote_invalid_number_tilde_2(self): + self.assertEqual(u"~\U00019001~", self.unquote(u"~%U00019001~")) + + def test_quote_unquote_indexes(self): + for idx in range(0, 10000): + self.assertEqual(idx, self.unquote(self.quote(idx))) + + def test_quote_unquote_negative_index(self): + for idx in (-1, -2, -3): + self.assertEqual(idx, self.unquote(self.quote(idx))) + + def test_index_overflow_border(self): + self.assertEqual(32759, self.unquote(u"~32759~")) + self.assertEqual(u"~32760~", self.unquote(u"~32760~")) + def test_unquote_unimax(self): self.assertEqual(u"\U00019001", self.unquote(u"%U00019001")) self.assertEqual(u"X\U00019AF1Z", self.unquote(u"X%U00019aF1Z")) @@ -2013,6 +2067,10 @@ p = self.pathstr2path(u"a%x2Eb.c%u002Ed.e%U0000002Ef") self.assertEqual((u"a.b", u"c.d", u"e.f"), p) + def test_split_unquote_with_index(self): + p = self.pathstr2path(u"a%x2Eb.~555~.c%u002Ed.e%U0000002Ef.~6~") + self.assertEqual((u"a.b", 555, u"c.d", u"e.f", 6), p) + def test_split_ns_empty(self): self.assertEqual((None, u""), self.split_ns(u"")) @@ -2243,7 +2301,7 @@ self.assertRaises( AttributeError, # no .lstrip self.quote, - 1) + 1.0) def test_unquote_wrong_type(self): self.assertRaises(