Mercurial > hgrepos > Python > apps > py-cutils
comparison cutils/util/glob.py @ 297:141a3aa0b403
First version of converting a glob-style pattern to a regex
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Tue, 04 Mar 2025 01:52:18 +0100 |
| parents | ca293f708cb4 |
| children | 16a5c337fcb9 |
comparison
equal
deleted
inserted
replaced
| 296:ca293f708cb4 | 297:141a3aa0b403 |
|---|---|
| 25 example. | 25 example. |
| 26 | 26 |
| 27 - The ``[ ]`` characters are a bracket expression that match a single | 27 - The ``[ ]`` characters are a bracket expression that match a single |
| 28 character of a name component out of a set of characters. For example, | 28 character of a name component out of a set of characters. For example, |
| 29 ``[abc]`` matches "``a``", "``b``", or "``c``". The hyphen (``-``) may | 29 ``[abc]`` matches "``a``", "``b``", or "``c``". The hyphen (``-``) may |
| 30 be used to specify a range so ``[a-z]`` specifies a range that matches | 30 be used to specify a range so ``'[^/]*'[a-z]`` specifies a range that matches |
| 31 from "``a``" to "``z``" (inclusive). These forms can be mixed so | 31 from "``a``" to "``z``" (inclusive). These forms can be mixed so |
| 32 ``[abce-g]`` matches "``a``", "``b``", "``c``", "``ey", "``f``" or | 32 ``[abce-g]`` matches "``a``", "``b``", "``c``", "``ey", "``f``" or |
| 33 "``g``". | 33 "``g``". |
| 34 | 34 |
| 35 If the character after the ``[`` is a ``!`` then it is used for negation | 35 If the character after the ``[`` is a ``!`` then it is used for negation |
| 43 negating. | 43 negating. |
| 44 | 44 |
| 45 Also, the ``]`` character matches itself if it is the first character | 45 Also, the ``]`` character matches itself if it is the first character |
| 46 within the brackets, or the first character after the ``!`` if negating. | 46 within the brackets, or the first character after the ``!`` if negating. |
| 47 | 47 |
| 48 - The ``{ }`` characters are a group of subpatterns, where the group matches | 48 - The curly brace characters ``{ }`` denote a group of subpatterns, where |
| 49 if any subpattern in the group matches. | 49 the group matches if any subpattern in the group matches. |
| 50 | 50 |
| 51 The ``,`` character is used to separate the subpatterns. Groups cannot be | 51 The ``,`` character is used to separate the subpatterns. Groups can be |
| 52 nested. | 52 nested. |
| 53 | 53 |
| 54 - Leading period/dot characters in file name are treated as regular characters | 54 - Leading period/dot characters in file name are treated as regular characters |
| 55 in match operations. For example, the ``*`` glob pattern matches file name | 55 in match operations. For example, the ``*`` glob pattern matches file name |
| 56 ``.login``. | 56 ``.login``. |
| 63 | 63 |
| 64 | 64 |
| 65 __all__ = ["glob_to_regexp"] | 65 __all__ = ["glob_to_regexp"] |
| 66 | 66 |
| 67 | 67 |
| 68 import logging | |
| 69 import re | |
| 70 | |
| 68 from . import PY2 | 71 from . import PY2 |
| 69 | 72 |
| 70 | 73 |
| 71 def glob_to_regexp(g): | 74 def glob_to_regexp(globobj): |
| 72 pass | 75 """Convert a glob string to a regular expression string. |
| 76 | |
| 77 The resulting regexp is *not* rooted. | |
| 78 | |
| 79 :param globobj: the pattern with glob syntax or an iterator over the | |
| 80 characters in such a pattern | |
| 81 :type globobj: str or iterator over str | |
| 82 | |
| 83 """ | |
| 84 res = [] | |
| 85 grouplvl = 0 # support for nested pattern groups | |
| 86 globiter = CharIter(globobj) | |
| 87 for c in globiter: | |
| 88 if c == '?': | |
| 89 res.append(".") | |
| 90 elif c == '*': | |
| 91 if globiter.peek() == '*': | |
| 92 # extended glob | |
| 93 next(globiter) | |
| 94 if globiter.peek() == '/': | |
| 95 next(globiter) | |
| 96 res.append("(?:.*/)?") | |
| 97 else: | |
| 98 res.append(".*") | |
| 99 else: | |
| 100 res.append("[^/]*") | |
| 101 elif c == '\\': | |
| 102 try: | |
| 103 res.append(re.escape(next(globiter))) | |
| 104 except StopIteration: | |
| 105 # XXX FIXME: or raise an exception with an invalid syntax | |
| 106 logging.warning( | |
| 107 "lone trailing backslash in glob: %s", globobj) | |
| 108 res.append("\\\\") | |
| 109 elif c == '[': | |
| 110 bres = [] # need a temp store because of errors | |
| 111 if globiter.peek() == '!': # XXX FIXME: handle '^' also? see below! | |
| 112 next(globiter) | |
| 113 bres.append("^") | |
| 114 if globiter.peek() == ']': | |
| 115 next(globiter) | |
| 116 bres.append("\\]") | |
| 117 elif globiter.peek() == '-': | |
| 118 next(globiter) | |
| 119 bres.append("\\-") | |
| 120 elif globiter.peek() == '^': | |
| 121 # | |
| 122 # XXX FIXME: as an extension: FreeBSD /bin/sh handles this | |
| 123 # like `!'. Should we follow it? | |
| 124 # | |
| 125 next(globiter) | |
| 126 if len(bres) > 0 and bres[0] == '^': | |
| 127 bres.append("^") | |
| 128 else: | |
| 129 bres.append("\\^") | |
| 130 for c2 in globiter: | |
| 131 if c2 == ']': | |
| 132 # normal and regular break | |
| 133 if bres[-1] == '-': | |
| 134 bres.insert(-1, "\\") | |
| 135 res.append("[") | |
| 136 res.extend(bres) | |
| 137 res.append("]") | |
| 138 break | |
| 139 if c2 == '\\': | |
| 140 bres.append("\\\\") | |
| 141 else: | |
| 142 bres.append(c2) # no escaping needed | |
| 143 else: | |
| 144 # no trailing `]' char | |
| 145 logging.warning( | |
| 146 "missing trailing bracket `]' in this glob: %s", globobj) | |
| 147 # | |
| 148 # FreeBSD's /bin/sh handles this like putting the given pattern | |
| 149 # into single quotes -- effectively disabling any glob syntax. | |
| 150 # We do this here also. | |
| 151 # | |
| 152 res.append("\\[") | |
| 153 res.append(re.escape("".join(bres))) | |
| 154 elif c == '{': | |
| 155 grouplvl += 1 | |
| 156 res.append("(?:") | |
| 157 elif grouplvl > 0 and c == '}': | |
| 158 grouplvl -= 1 | |
| 159 res.append(")") | |
| 160 elif grouplvl > 0 and c == ',': | |
| 161 res.append("|") | |
| 162 else: | |
| 163 res.append(re.escape(c)) | |
| 164 if grouplvl > 0: | |
| 165 if grouplvl > 1: | |
| 166 logging.warning("missing braces `}' in this glob: %s", globobj) | |
| 167 else: | |
| 168 logging.warning("missing brace `}' in this glob: %s", globobj) | |
| 169 while grouplvl > 0: | |
| 170 # XXX FIXME: what about trailing `|' chars | |
| 171 grouplvl -= 1 | |
| 172 res.append(")") | |
| 173 return "".join(res) | |
| 73 | 174 |
| 74 | 175 |
| 75 class CharIter(object): | 176 class CharIter(object): |
| 76 | 177 |
| 77 """Iterator over byte or unicode strings with peek support. | 178 """Iterator over byte or unicode strings with peek support. |
