changeset 656:2b1c7a68f913

Enable indexed access to lists in the configuration using an access path string representation like "~NNN~"
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 30 May 2022 09:31:29 +0200
parents b74f20e19c01
children 213f0ec3bbbc
files CHANGES.txt configmix/_speedups.c configmix/config.py tests/test.py
diffstat 4 files changed, 161 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/CHANGES.txt	Sun May 29 15:32:54 2022 +0200
+++ b/CHANGES.txt	Mon May 30 09:31:29 2022 +0200
@@ -16,6 +16,10 @@
 ~~~~~~~~~
 
 - **[feature]**
+  Enable indexed access to lists in the configuration using an access
+  path string representation like ``~NNN~`
+
+- **[feature]**
   Allow to enable and disable the internal caching
 
 - **[feature]**
--- a/configmix/_speedups.c	Sun May 29 15:32:54 2022 +0200
+++ b/configmix/_speedups.c	Mon May 30 09:31:29 2022 +0200
@@ -75,6 +75,54 @@
 }
 
 
+static
+int
+_dec2num(PyObject *s, Py_ssize_t start, Py_ssize_t end, Py_ssize_t *result)
+{
+    Py_ssize_t i;
+    Py_UCS4 c;
+    Py_ssize_t r = 0;
+    int sign = 0;
+
+    for (i=start; i<=end; i++) {
+        /* Overflow error check */
+        if (r > 3275) {
+            PyErr_SetString(PyExc_OverflowError, "index too large");
+            return -1;
+        }
+        r *= 10;
+        c = PyUnicode_ReadChar(s, i);
+        if ((c >= 48) && (c <= 57)) {    /* 0 - 9 */
+            r += (c - 48);
+        }
+        else {
+            if (i == start) {
+                /* check for number sign (but only at the first index) */
+                if (c == 0x2d) {
+                    sign = -1;
+                    continue;
+                }
+                else {
+                    if (c == 0x2b) {
+                        sign = 1;
+                        continue;
+                    }
+                }
+            }
+            PyErr_Format(PyExc_ValueError, "invalid base-10 literal: %c", (int)c);
+            return -1;
+        }
+    }
+    if (sign >= 0) {
+        *result = r;
+    }
+    else {
+        *result = -r;
+    }
+    return 0;  /* success */
+}
+
+
 #if defined(Py_LIMITED_API)
 
 static
@@ -292,6 +340,19 @@
     if (s_len == 0) {
         return Py_NewRef(s);
     }
+    if (s_len > 2) {
+        /* Check for ~NNN~ syntax */
+        c = PyUnicode_ReadChar(s, 0);
+        if (c == 0x7e) {
+            c = PyUnicode_ReadChar(s, s_len - 1);
+            if (c == 0x7e) {
+                if (_dec2num(s, 1, s_len - 2, &i) == 0) {
+                    return PyLong_FromSsize_t(i);
+                }
+                PyErr_Clear();
+            }
+        }
+    }
     find = PyUnicode_FindChar(s, '%', 0, s_len, 1);
     if (find == -2) {
         return NULL;
@@ -429,6 +490,11 @@
 
     s_len = PyUnicode_GetLength(s);
     if (s_len < 0) {
+        if (PyObject_IsInstance(s, (PyObject *)&PyLong_Type)) {
+            PyErr_Clear();
+            return PyUnicode_FromFormat("~%S~", s);
+        }
+        PyErr_SetString(PyExc_TypeError, "given object has no len()");
         return NULL;
     }
     if (s_len == 0) {
@@ -449,6 +515,7 @@
         case 0x7d:
         case 0x5b:
         case 0x5d:
+        case 0x7e:
             need_quoting = 1;
             i = s_len;   /* break the for-loop */
             break;
@@ -1638,7 +1705,7 @@
     PyUnicode_InternInPlace(&(sstate->EMPTY_STR));
 
     sstate->QUOTE_MAP = Py_BuildValue(
-        "{IsIsIsIsIsIsIsIsIsIsIs}",
+        "{IsIsIsIsIsIsIsIsIsIsIsIs}",
         0x25, "%x25",     /* QUOTE: % */
         0x2e, "%x2e",     /* DOT: . */
         0x3a, "%x3a",     /* NS_SEPARATOR: : */
@@ -1649,7 +1716,8 @@
         0x7b, "%x7b",
         0x7d, "%x7d",
         0x5b, "%x5b",
-        0x5d, "%x5d");
+        0x5d, "%x5d",
+        0x7e, "%x7e");    /* tilde ~ */
     if (sstate->QUOTE_MAP == NULL) {
         return -1;
     }
--- a/configmix/config.py	Sun May 29 15:32:54 2022 +0200
+++ b/configmix/config.py	Mon May 30 09:31:29 2022 +0200
@@ -243,6 +243,7 @@
 _STARTTOK_REF = _STARTTOK + REF_NAMESPACE + _NS_SEPARATOR
 _ENDTOK_REF = _ENDTOK
 _DOT = u(b'.')
+_TILDE = u(b'~')
 _QUOTE = u(b'%')
 _QUOTE_x = u(b'x')
 _QUOTE_u = u(b'u')
@@ -260,11 +261,12 @@
     0x7d: u(b'%x7d'),
     0x5b: u(b'%x5b'),
     0x5d: u(b'%x5d'),
+    0x7e: u(b'%x7e'),    # tilde `~`
 }
 _QUOTE_SAFE = u(b'abcdefghijklmnopqrstuvwxyz'
                 b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
                 b'0123456789'
-                b'-_@!$&/\\()=?*+~;,<>^')
+                b'-_@!$&/\\()=?*+;,<>^')
 """Mostly used configuration key characters that do not need any quoting
 
 """
@@ -281,9 +283,19 @@
     See also the :ref:`quoting` section.
 
     """
-    # Quick check whether all of the chars are in _QUOTE_SAFE
-    if not s.lstrip(_QUOTE_SAFE):
-        return s
+    try:
+        # Quick check whether all of the chars are in _QUOTE_SAFE
+        if not s.lstrip(_QUOTE_SAFE):
+            return s
+    except AttributeError:
+        #
+        # Check whether s is an index (int) and return the special tag if
+        # it is so
+        #
+        if isinstance(s, int):
+            return "~%d~" % (s, )
+        else:
+            raise
 
     # Slow path
     re_encode = False
@@ -312,6 +324,15 @@
     This is the inverse of :func:`.quote`.
 
     """
+    s_len = len(s)
+    if s_len > 2 and s[0] == _TILDE and s[-1] == _TILDE:
+        try:
+            v = int(s[1:-1], 10)
+            if v // 10 > 3275:  # be compatible to the fast C implementation
+                raise OverflowError("index too large")
+            return v
+        except (ValueError, OverflowError):
+            pass
     if _QUOTE not in s:
         return s
     parts = s.split(_QUOTE)
--- a/tests/test.py	Sun May 29 15:32:54 2022 +0200
+++ b/tests/test.py	Mon May 30 09:31:29 2022 +0200
@@ -944,7 +944,14 @@
                 i,
                 cfg.getvarl_s(u"test", u"List", i))
 
-    def test48_index_access_to_lists_with_subdicts(self):
+    def test48_indexed_access_to_lists(self):
+        cfg = self._load(os.path.join(TESTDATADIR, "conf20.yml"))
+        for i in range(4):
+            self.assertEqual(
+                i,
+                cfg.getvar_s(u"test.List.~%d~" % (i, )))
+
+    def test49_index_access_to_lists_with_subdicts(self):
         cfg = self._load(os.path.join(TESTDATADIR,
                                       "index-access-for-jails.yml"))
         for idx in range(len(cfg.getvarl(u"the-list"))):
@@ -952,6 +959,14 @@
                 idx,
                 cfg.getvarl_s(u"the-list", idx, u"entry"))
 
+    def test50_index_access_to_lists_with_subdicts(self):
+        cfg = self._load(os.path.join(TESTDATADIR,
+                                      "index-access-for-jails.yml"))
+        for idx in range(len(cfg.getvarl(u"the-list"))):
+            self.assertEqual(
+                idx,
+                cfg.getvar_s(u"the-list.~%d~.entry" % (idx, )))
+
 
 class T02LoadAndMerge(_T02MixinLoadAndMerge, unittest.TestCase):
 
@@ -1897,6 +1912,18 @@
             jcfg = cfg.jailed(rootpath=(u"the-list", idx))
             self.assertEqual(1, len(jcfg))
             self.assertEqual(idx, jcfg.getvarl_s(u"entry"))
+            self.assertEqual((u"the-list", idx), jcfg._path)
+            self.assertEqual(u"the-list.~%d~." % (idx, ), jcfg._pathstr)
+
+    def test_index_jail_access_with_strpath(self):
+        cfg = configmix.load(os.path.join(
+            TESTDATADIR, "index-access-for-jails.yml"))
+        for idx in range(len(cfg.getvarl(u"the-list"))):
+            jcfg = cfg.jailed(root=u"the-list.~%d~" % (idx, ))
+            self.assertEqual(1, len(jcfg))
+            self.assertEqual(idx, jcfg.getvarl_s(u"entry"))
+            self.assertEqual((u"the-list", idx), jcfg._path)
+            self.assertEqual(u"the-list.~%d~." % (idx, ), jcfg._pathstr)
 
 
 class _TParserMixin:
@@ -1906,13 +1933,13 @@
         self.assertEqual(u"", self.unquote(e))
 
     def test_quoting_and_unquoting_are_inverse(self):
-        for c in u"""%.:#|"'{}[]""":
+        for c in u"""%.:#|"'{}[]~""":
             qc = self.quote(c)
             self.assertTrue(qc.startswith(u"%x") and len(qc) == 4)
             self.assertEqual(c, self.unquote(qc))
 
     def test_quoting_and_unquoting_are_inverse_all(self):
-        c = u"""%.:#|"'{}[]"""
+        c = u"""%.:#|"'{}[]~"""
         qc = self.quote(c)
         self.assertEqual(len(c)*4, len(qc))
         self.assertEqual(c, self.unquote(qc))
@@ -1930,6 +1957,33 @@
         self.assertEqual(configmix.config._QUOTE_SAFE, qc)
         self.assertEqual(configmix.config._QUOTE_SAFE, self.unquote(qc))
 
+    def test_quote_index_to_tilde(self):
+        self.assertEqual(u"~4~", self.quote(4))
+
+    def test_unquote_index_with_tilde(self):
+        self.assertEqual(4, self.unquote(u"~4~"))
+
+    def test_unquote_empty_tilde(self):
+        self.assertEqual(u"~~", self.unquote(u"~~"))
+
+    def test_unquote_invalid_number_tilde(self):
+        self.assertEqual(u"~0x4~", self.unquote(u"~0x4~"))
+
+    def test_unquote_invalid_number_tilde_2(self):
+        self.assertEqual(u"~\U00019001~", self.unquote(u"~%U00019001~"))
+
+    def test_quote_unquote_indexes(self):
+        for idx in range(0, 10000):
+            self.assertEqual(idx, self.unquote(self.quote(idx)))
+
+    def test_quote_unquote_negative_index(self):
+        for idx in (-1, -2, -3):
+            self.assertEqual(idx, self.unquote(self.quote(idx)))
+
+    def test_index_overflow_border(self):
+        self.assertEqual(32759, self.unquote(u"~32759~"))
+        self.assertEqual(u"~32760~", self.unquote(u"~32760~"))
+
     def test_unquote_unimax(self):
         self.assertEqual(u"\U00019001", self.unquote(u"%U00019001"))
         self.assertEqual(u"X\U00019AF1Z", self.unquote(u"X%U00019aF1Z"))
@@ -2013,6 +2067,10 @@
         p = self.pathstr2path(u"a%x2Eb.c%u002Ed.e%U0000002Ef")
         self.assertEqual((u"a.b", u"c.d", u"e.f"), p)
 
+    def test_split_unquote_with_index(self):
+        p = self.pathstr2path(u"a%x2Eb.~555~.c%u002Ed.e%U0000002Ef.~6~")
+        self.assertEqual((u"a.b", 555, u"c.d", u"e.f", 6), p)
+
     def test_split_ns_empty(self):
         self.assertEqual((None, u""), self.split_ns(u""))
 
@@ -2243,7 +2301,7 @@
         self.assertRaises(
             AttributeError,    # no .lstrip
             self.quote,
-            1)
+            1.0)
 
     def test_unquote_wrong_type(self):
         self.assertRaises(