comparison cutils/util/glob.py @ 296:ca293f708cb4

Begin some preparation for handling glob-style patterns in treeview. Needed to implement inclusions and exclusions.
author Franz Glasner <fzglas.hg@dom66.de>
date Sun, 02 Mar 2025 22:54:40 +0100
parents
children 141a3aa0b403
comparison
equal deleted inserted replaced
295:4a259fb9968e 296:ca293f708cb4
1 # -*- coding: utf-8 -*-
2 # :-
3 # :Copyright: (c) 2020-2025 Franz Glasner
4 # :License: BSD-3-Clause
5 # :-
6 r"""Glob handling.
7
8 .. seealso::
9 - https://docs.oracle.com/javase/8/docs/api/java/nio/file/FileSystem.html#getPathMatcher-java.lang.String-
10 - https://docs.oracle.com/javase/tutorial/essential/io/fileOps.html#glob
11
12 The following rules are used to interpret glob patterns:
13
14 - The ``*`` character matches zero or more characters of a name
15 component without crossing directory boundaries.
16
17 - The ``**`` characters matches zero or more characters crossing
18 directory boundaries.
19
20 - The ``?`` character matches exactly one character of a name component.
21
22 - The backslash character (``\``) is used to escape characters that
23 would otherwise be interpreted as special characters. The expression
24 ``\\`` matches a single backslash and ``\{`` matches a left brace for
25 example.
26
27 - The ``[ ]`` characters are a bracket expression that match a single
28 character of a name component out of a set of characters. For example,
29 ``[abc]`` matches "``a``", "``b``", or "``c``". The hyphen (``-``) may
30 be used to specify a range so ``[a-z]`` specifies a range that matches
31 from "``a``" to "``z``" (inclusive). These forms can be mixed so
32 ``[abce-g]`` matches "``a``", "``b``", "``c``", "``ey", "``f``" or
33 "``g``".
34
35 If the character after the ``[`` is a ``!`` then it is used for negation
36 so ``[!a-c]`` matches any character except "``a``", "``b``", or "``c``".
37
38 Within a bracket expression the ``*``, ``?`` and ``\`` characters match
39 themselves.
40
41 The ``-`` character matches itself if it is the first or last character
42 within the brackets, or the first or last character after the ``!`` if
43 negating.
44
45 Also, the ``]`` character matches itself if it is the first character
46 within the brackets, or the first character after the ``!`` if negating.
47
48 - The ``{ }`` characters are a group of subpatterns, where the group matches
49 if any subpattern in the group matches.
50
51 The ``,`` character is used to separate the subpatterns. Groups cannot be
52 nested.
53
54 - Leading period/dot characters in file name are treated as regular characters
55 in match operations. For example, the ``*`` glob pattern matches file name
56 ``.login``.
57
58 - All other characters match themselves.
59
60 """
61
62 from __future__ import print_function, absolute_import
63
64
65 __all__ = ["glob_to_regexp"]
66
67
68 from . import PY2
69
70
71 def glob_to_regexp(g):
72 pass
73
74
75 class CharIter(object):
76
77 """Iterator over byte or unicode strings with peek support.
78
79 On Python3 always yields an octet of :class:`bytes` instead of
80 :class:`int`s if the iterator iterates over :class:`bytes`.
81
82 """
83
84 __slots__ = ("_it", "_nch")
85
86 def __init__(self, w):
87 self._nch = None
88 if PY2:
89 if isinstance(w, (bytes, unicode)): # noqa: F821 undefined name
90 self._it = iter(w)
91 else:
92 self._it = w
93 else:
94 if isinstance(w, (bytes, str)):
95 self._it = iter(w)
96 else:
97 self._it = w
98
99 def __iter__(self):
100 return self
101
102 def __next__(self):
103 if self._nch is not None:
104 c = self._nch
105 self._nch = None
106 return c
107 c = next(self._it)
108 return bytes((c,)) if isinstance(c, int) else c
109
110 if PY2:
111 next = __next__
112
113 def peek(self):
114 """Peek the next character.
115
116 Return `None` if the iterator is exhausted.
117
118 """
119 if self._nch is not None:
120 return self._nch
121 self._nch = next(self._it, None)
122 return self._nch