Mercurial > hgrepos > Python > libs > pygments-lexer-pseudocode2
comparison pygments_lexer_pseudocode2/bases.py @ 56:661461fb4dfc
Make the "py-name" rules parameterized: allow to provide the token type.
For this to work the implemting function must be global, because the
access to the class is not yet possible at construction time.
So consistently make some previons LexBase members module globals.
Make some LexBase members module globals consistently.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 27 Apr 2026 12:37:27 +0200 |
| parents | 5bfa9113d3c4 |
| children | 7153e945a3d6 |
comparison
equal
deleted
inserted
replaced
| 55:baf4ed7ac81a | 56:661461fb4dfc |
|---|---|
| 3 # SPDX-FileCopyrightText: © 2026 Franz Glasner | 3 # SPDX-FileCopyrightText: © 2026 Franz Glasner |
| 4 # SPDX-License-Identifier: MIT | 4 # SPDX-License-Identifier: MIT |
| 5 # :- | 5 # :- |
| 6 r"""Some common bases for the lexers.""" | 6 r"""Some common bases for the lexers.""" |
| 7 | 7 |
| 8 __all__ = ["LexBase"] | 8 __all__ = ["LexBase", "uni_name", "py_innerstring_rules", "py_name_rules"] |
| 9 | 9 |
| 10 | 10 |
| 11 from pygments import unistring | 11 from pygments import unistring |
| 12 from pygments.lexer import RegexLexer, combined, bygroups, include | 12 from pygments.lexer import RegexLexer, combined, bygroups, include |
| 13 from pygments.token import Error, Name, Number, String, Comment | 13 from pygments.token import Error, Name, Number, String, Comment |
| 14 | |
| 15 | |
| 16 # | |
| 17 # SPDX-SnippetBegin | |
| 18 # SPDX-License-Identifier: BSD-2-Clause | |
| 19 # SPDX-SnippetCopyrightText: Copyright 2006-2023 by the Pygments team | |
| 20 # SPDX-SnippetCopyrightText: Copyright 2026 by Franz Glasner | |
| 21 # | |
| 22 | |
| 23 uni_name = "[%s][%s]*" % (unistring.xid_start, unistring.xid_continue) | |
| 24 | |
| 25 | |
| 26 def py_innerstring_rules(ttype): | |
| 27 return [ | |
| 28 # the old style '%s' % (...) string formatting (still valid in Py3) | |
| 29 (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' | |
| 30 '[hlL]?[E-GXc-giorsaux%]', String.Interpol), | |
| 31 # the new style '{}'.format(...) string formatting | |
| 32 (r'\{' | |
| 33 r'((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name | |
| 34 r'(\![sra])?' # conversion | |
| 35 r'(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?' | |
| 36 r'\}', String.Interpol), | |
| 37 # | |
| 38 # backslashes, quotes and formatting signs must be parsed | |
| 39 # one at a time | |
| 40 # | |
| 41 (r'[^\\\'"%{\n]+', ttype), | |
| 42 (r'[\'"\\]', ttype), | |
| 43 # unhandled string formatting sign | |
| 44 (r'%|(\{{1,2})', ttype) | |
| 45 # newlines are an error (use "nl" state) | |
| 46 ] | |
| 47 | |
| 48 | |
| 49 def py_name_rules(ttype, deco_ttype=Name.Decorator): | |
| 50 return [ | |
| 51 # We recognize decorator syntax here | |
| 52 (r'@' + uni_name, deco_ttype), | |
| 53 # | |
| 54 # Python's new matrix multiplication operator: | |
| 55 # not used here in pseudocode | |
| 56 # (r'@', Operator), | |
| 57 (uni_name, ttype), | |
| 58 ] | |
| 59 | |
| 60 # SPDX-SnippetEnd | |
| 14 | 61 |
| 15 | 62 |
| 16 class LexBase(RegexLexer): | 63 class LexBase(RegexLexer): |
| 17 | 64 |
| 18 """A base that defines some common lexer states. | 65 """A base that defines some common lexer states. |
| 19 | 66 |
| 20 Default flags are not important. | 67 Default flags are not important. |
| 21 | 68 |
| 22 """ | 69 """ |
| 23 | |
| 24 # | |
| 25 # SPDX-SnippetBegin | |
| 26 # SPDX-License-Identifier: BSD-2-Clause | |
| 27 # SPDX-SnippetCopyrightText: Copyright 2006-2023 by the Pygments team | |
| 28 # | |
| 29 | |
| 30 uni_name = "[%s][%s]*" % (unistring.xid_start, unistring.xid_continue) | |
| 31 | |
| 32 def py_innerstring_rules(ttype): | |
| 33 return [ | |
| 34 # the old style '%s' % (...) string formatting (still valid in Py3) | |
| 35 (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' | |
| 36 '[hlL]?[E-GXc-giorsaux%]', String.Interpol), | |
| 37 # the new style '{}'.format(...) string formatting | |
| 38 (r'\{' | |
| 39 r'((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name | |
| 40 r'(\![sra])?' # conversion | |
| 41 r'(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?' | |
| 42 r'\}', String.Interpol), | |
| 43 # | |
| 44 # backslashes, quotes and formatting signs must be parsed | |
| 45 # one at a time | |
| 46 # | |
| 47 (r'[^\\\'"%{\n]+', ttype), | |
| 48 (r'[\'"\\]', ttype), | |
| 49 # unhandled string formatting sign | |
| 50 (r'%|(\{{1,2})', ttype) | |
| 51 # newlines are an error (use "nl" state) | |
| 52 ] | |
| 53 # SPDX-SnippetEnd | |
| 54 | 70 |
| 55 tokens = { | 71 tokens = { |
| 56 # | 72 # |
| 57 # These states are borrowed from Pygment's Python lexer. | 73 # These states are borrowed from Pygment's Python lexer. |
| 58 # Their names have been prefixed with `py-'. | 74 # Their names have been prefixed with `py-'. |
| 121 include('py-strings-single'), | 137 include('py-strings-single'), |
| 122 (r'\n', String.Single) | 138 (r'\n', String.Single) |
| 123 ], | 139 ], |
| 124 'py-strings-single': py_innerstring_rules(String.Single), | 140 'py-strings-single': py_innerstring_rules(String.Single), |
| 125 'py-strings-double': py_innerstring_rules(String.Double), | 141 'py-strings-double': py_innerstring_rules(String.Double), |
| 126 'py-name': [ | 142 'py-name': py_name_rules(Name.Entity), |
| 127 # We recognize decorator syntax here | |
| 128 (r'@' + uni_name, Name.Decorator), | |
| 129 # | |
| 130 # Python's new matrix multiplication operator: | |
| 131 # not used here in pseudocode | |
| 132 # (r'@', Operator), | |
| 133 (uni_name, Name), | |
| 134 ], | |
| 135 # SPDX-SnippetEnd | 143 # SPDX-SnippetEnd |
| 136 # This snippet is from the Pygments' documentation "Write your own lexer" | 144 # This snippet is from the Pygments' documentation "Write your own lexer" |
| 137 'multiline-nested-comment': [ | 145 'multiline-nested-comment': [ |
| 138 (r'[^*/]+', Comment.Multiline), | 146 (r'[^*/]+', Comment.Multiline), |
| 139 (r'/\*', Comment.Multiline, '#push'), | 147 (r'/\*', Comment.Multiline, '#push'), |
