Mercurial > hgrepos > Python > apps > py-cutils
changeset 296:ca293f708cb4
Begin some preparation for handling glob-style patterns in treeview.
Needed to implement inclusions and exclusions.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Sun, 02 Mar 2025 22:54:40 +0100 |
| parents | 4a259fb9968e |
| children | 141a3aa0b403 |
| files | cutils/util/glob.py setup.cfg tests/test_match.py |
| diffstat | 3 files changed, 192 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cutils/util/glob.py Sun Mar 02 22:54:40 2025 +0100 @@ -0,0 +1,122 @@ +# -*- coding: utf-8 -*- +# :- +# :Copyright: (c) 2020-2025 Franz Glasner +# :License: BSD-3-Clause +# :- +r"""Glob handling. + +.. seealso:: + - https://docs.oracle.com/javase/8/docs/api/java/nio/file/FileSystem.html#getPathMatcher-java.lang.String- + - https://docs.oracle.com/javase/tutorial/essential/io/fileOps.html#glob + +The following rules are used to interpret glob patterns: + +- The ``*`` character matches zero or more characters of a name + component without crossing directory boundaries. + +- The ``**`` characters matches zero or more characters crossing + directory boundaries. + +- The ``?`` character matches exactly one character of a name component. + +- The backslash character (``\``) is used to escape characters that + would otherwise be interpreted as special characters. The expression + ``\\`` matches a single backslash and ``\{`` matches a left brace for + example. + +- The ``[ ]`` characters are a bracket expression that match a single + character of a name component out of a set of characters. For example, + ``[abc]`` matches "``a``", "``b``", or "``c``". The hyphen (``-``) may + be used to specify a range so ``[a-z]`` specifies a range that matches + from "``a``" to "``z``" (inclusive). These forms can be mixed so + ``[abce-g]`` matches "``a``", "``b``", "``c``", "``ey", "``f``" or + "``g``". + + If the character after the ``[`` is a ``!`` then it is used for negation + so ``[!a-c]`` matches any character except "``a``", "``b``", or "``c``". + + Within a bracket expression the ``*``, ``?`` and ``\`` characters match + themselves. + + The ``-`` character matches itself if it is the first or last character + within the brackets, or the first or last character after the ``!`` if + negating. + + Also, the ``]`` character matches itself if it is the first character + within the brackets, or the first character after the ``!`` if negating. + +- The ``{ }`` characters are a group of subpatterns, where the group matches + if any subpattern in the group matches. + + The ``,`` character is used to separate the subpatterns. Groups cannot be + nested. + +- Leading period/dot characters in file name are treated as regular characters + in match operations. For example, the ``*`` glob pattern matches file name + ``.login``. + +- All other characters match themselves. + +""" + +from __future__ import print_function, absolute_import + + +__all__ = ["glob_to_regexp"] + + +from . import PY2 + + +def glob_to_regexp(g): + pass + + +class CharIter(object): + + """Iterator over byte or unicode strings with peek support. + + On Python3 always yields an octet of :class:`bytes` instead of + :class:`int`s if the iterator iterates over :class:`bytes`. + + """ + + __slots__ = ("_it", "_nch") + + def __init__(self, w): + self._nch = None + if PY2: + if isinstance(w, (bytes, unicode)): # noqa: F821 undefined name + self._it = iter(w) + else: + self._it = w + else: + if isinstance(w, (bytes, str)): + self._it = iter(w) + else: + self._it = w + + def __iter__(self): + return self + + def __next__(self): + if self._nch is not None: + c = self._nch + self._nch = None + return c + c = next(self._it) + return bytes((c,)) if isinstance(c, int) else c + + if PY2: + next = __next__ + + def peek(self): + """Peek the next character. + + Return `None` if the iterator is exhausted. + + """ + if self._nch is not None: + return self._nch + self._nch = next(self._it, None) + return self._nch
--- a/setup.cfg Fri Feb 28 14:11:20 2025 +0100 +++ b/setup.cfg Sun Mar 02 22:54:40 2025 +0100 @@ -58,3 +58,6 @@ exclude = # Ignore the vendored crcmod2/crcmod sub-package cutils/crcmod +per-file-ignores = + # E501: line too long + cutils/util/glob.py:E501
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_match.py Sun Mar 02 22:54:40 2025 +0100 @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- +r"""Unit tests for :mod:`cutils.util.glob` + +""" + +from __future__ import absolute_import, print_function + +import _test_setup # noqa: F401 imported but unused + +import sys +import unittest + +from cutils.util.glob import CharIter + + +class TestCharIter(unittest.TestCase): + + def test_transitive_iter(self): + it = CharIter("1234") + self.assertIs(iter(it), it) + + def test_native_str(self): + it = CharIter("1234") + chars = [] + for c in it: + chars.append(c) + self.assertEqual("1234", "".join(chars)) + + def test_unicode_str(self): + it = CharIter(u"1234") + chars = [] + for c in it: + chars.append(c) + self.assertEqual(u"1234", "".join(chars)) + + def test_byte_str(self): + it = CharIter(b"1234") + chars = [] + for c in it: + chars.append(c) + self.assertEqual(b"1234", b"".join(chars)) + + def test_peek_exhausted(self): + it = CharIter("1234") + for _ in it: + pass + self.assertIsNone(it.peek()) + + def test_peek_first(self): + it = CharIter("1234") + self.assertEqual("1", it.peek()) + chars = "".join(it) + self.assertEqual("1234", chars) + self.assertIsNone(it.peek()) + + def test_peek_from_second(self): + it = CharIter("1234") + self.assertEqual("1", it.peek()) + self.assertEqual("1", next(it)) + self.assertEqual("2", it.peek()) + chars = "".join(it) + self.assertEqual("234", chars) + self.assertIsNone(it.peek()) + + +if __name__ == "__main__": + sys.exit(unittest.main())
