comparison pygments_lexer_pseudocode2/bases.py @ 56:661461fb4dfc

Make the "py-name" rules parameterized: allow to provide the token type. For this to work the implemting function must be global, because the access to the class is not yet possible at construction time. So consistently make some previons LexBase members module globals. Make some LexBase members module globals consistently.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 27 Apr 2026 12:37:27 +0200
parents 5bfa9113d3c4
children 7153e945a3d6
comparison
equal deleted inserted replaced
55:baf4ed7ac81a 56:661461fb4dfc
3 # SPDX-FileCopyrightText: © 2026 Franz Glasner 3 # SPDX-FileCopyrightText: © 2026 Franz Glasner
4 # SPDX-License-Identifier: MIT 4 # SPDX-License-Identifier: MIT
5 # :- 5 # :-
6 r"""Some common bases for the lexers.""" 6 r"""Some common bases for the lexers."""
7 7
8 __all__ = ["LexBase"] 8 __all__ = ["LexBase", "uni_name", "py_innerstring_rules", "py_name_rules"]
9 9
10 10
11 from pygments import unistring 11 from pygments import unistring
12 from pygments.lexer import RegexLexer, combined, bygroups, include 12 from pygments.lexer import RegexLexer, combined, bygroups, include
13 from pygments.token import Error, Name, Number, String, Comment 13 from pygments.token import Error, Name, Number, String, Comment
14
15
16 #
17 # SPDX-SnippetBegin
18 # SPDX-License-Identifier: BSD-2-Clause
19 # SPDX-SnippetCopyrightText: Copyright 2006-2023 by the Pygments team
20 # SPDX-SnippetCopyrightText: Copyright 2026 by Franz Glasner
21 #
22
23 uni_name = "[%s][%s]*" % (unistring.xid_start, unistring.xid_continue)
24
25
26 def py_innerstring_rules(ttype):
27 return [
28 # the old style '%s' % (...) string formatting (still valid in Py3)
29 (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
30 '[hlL]?[E-GXc-giorsaux%]', String.Interpol),
31 # the new style '{}'.format(...) string formatting
32 (r'\{'
33 r'((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name
34 r'(\![sra])?' # conversion
35 r'(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?'
36 r'\}', String.Interpol),
37 #
38 # backslashes, quotes and formatting signs must be parsed
39 # one at a time
40 #
41 (r'[^\\\'"%{\n]+', ttype),
42 (r'[\'"\\]', ttype),
43 # unhandled string formatting sign
44 (r'%|(\{{1,2})', ttype)
45 # newlines are an error (use "nl" state)
46 ]
47
48
49 def py_name_rules(ttype, deco_ttype=Name.Decorator):
50 return [
51 # We recognize decorator syntax here
52 (r'@' + uni_name, deco_ttype),
53 #
54 # Python's new matrix multiplication operator:
55 # not used here in pseudocode
56 # (r'@', Operator),
57 (uni_name, ttype),
58 ]
59
60 # SPDX-SnippetEnd
14 61
15 62
16 class LexBase(RegexLexer): 63 class LexBase(RegexLexer):
17 64
18 """A base that defines some common lexer states. 65 """A base that defines some common lexer states.
19 66
20 Default flags are not important. 67 Default flags are not important.
21 68
22 """ 69 """
23
24 #
25 # SPDX-SnippetBegin
26 # SPDX-License-Identifier: BSD-2-Clause
27 # SPDX-SnippetCopyrightText: Copyright 2006-2023 by the Pygments team
28 #
29
30 uni_name = "[%s][%s]*" % (unistring.xid_start, unistring.xid_continue)
31
32 def py_innerstring_rules(ttype):
33 return [
34 # the old style '%s' % (...) string formatting (still valid in Py3)
35 (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
36 '[hlL]?[E-GXc-giorsaux%]', String.Interpol),
37 # the new style '{}'.format(...) string formatting
38 (r'\{'
39 r'((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name
40 r'(\![sra])?' # conversion
41 r'(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?'
42 r'\}', String.Interpol),
43 #
44 # backslashes, quotes and formatting signs must be parsed
45 # one at a time
46 #
47 (r'[^\\\'"%{\n]+', ttype),
48 (r'[\'"\\]', ttype),
49 # unhandled string formatting sign
50 (r'%|(\{{1,2})', ttype)
51 # newlines are an error (use "nl" state)
52 ]
53 # SPDX-SnippetEnd
54 70
55 tokens = { 71 tokens = {
56 # 72 #
57 # These states are borrowed from Pygment's Python lexer. 73 # These states are borrowed from Pygment's Python lexer.
58 # Their names have been prefixed with `py-'. 74 # Their names have been prefixed with `py-'.
121 include('py-strings-single'), 137 include('py-strings-single'),
122 (r'\n', String.Single) 138 (r'\n', String.Single)
123 ], 139 ],
124 'py-strings-single': py_innerstring_rules(String.Single), 140 'py-strings-single': py_innerstring_rules(String.Single),
125 'py-strings-double': py_innerstring_rules(String.Double), 141 'py-strings-double': py_innerstring_rules(String.Double),
126 'py-name': [ 142 'py-name': py_name_rules(Name.Entity),
127 # We recognize decorator syntax here
128 (r'@' + uni_name, Name.Decorator),
129 #
130 # Python's new matrix multiplication operator:
131 # not used here in pseudocode
132 # (r'@', Operator),
133 (uni_name, Name),
134 ],
135 # SPDX-SnippetEnd 143 # SPDX-SnippetEnd
136 # This snippet is from the Pygments' documentation "Write your own lexer" 144 # This snippet is from the Pygments' documentation "Write your own lexer"
137 'multiline-nested-comment': [ 145 'multiline-nested-comment': [
138 (r'[^*/]+', Comment.Multiline), 146 (r'[^*/]+', Comment.Multiline),
139 (r'/\*', Comment.Multiline, '#push'), 147 (r'/\*', Comment.Multiline, '#push'),