comparison pygments_lexer_pseudocode2/bases.py @ 34:1f741934205e

Begin a new Pseudocode lexer using numbers and strings from Python
author Franz Glasner <fzglas.hg@dom66.de>
date Tue, 21 Apr 2026 19:40:08 +0200
parents db1bc740a201
children a3151d837258
comparison
equal deleted inserted replaced
33:db1bc740a201 34:1f741934205e
3 # SPDX-FileCopyrightText: © 2026 Franz Glasner 3 # SPDX-FileCopyrightText: © 2026 Franz Glasner
4 # SPDX-License-Identifier: MIT 4 # SPDX-License-Identifier: MIT
5 # :- 5 # :-
6 r"""Some common bases for the lexers.""" 6 r"""Some common bases for the lexers."""
7 7
8 8 __all__ = ["LexBase"]
9 __all__ = []
10 9
11 10
12 from pygments.lexer import RegexLexer 11 from pygments.lexer import RegexLexer, combined, bygroups, include
13 from pygments.token import Number 12 from pygments.token import Number, String
14 13
15 14
16 class LexBase(RegexLexer): 15 class LexBase(RegexLexer):
17 16
18 """A base that defines some common lexer states. 17 """A base that defines some common lexer states.
19 18
20 Default flags are not important. 19 Default flags are not important.
21 20
22 """ 21 """
23 22
24 tokens = {
25 #
26 # This state is borrowed from Pygment's Python lexer.
27 # 23 #
28 # SPDX-SnippetBegin 24 # SPDX-SnippetBegin
29 # SPDX-License-Identifier: BSD-2-Clause 25 # SPDX-License-Identifier: BSD-2-Clause
30 # SPDX-SnippetCopyrightText: Copyright 2006-2023 by the Pygments team 26 # SPDX-SnippetCopyrightText: Copyright 2006-2023 by the Pygments team
27 #
28 def py_innerstring_rules(ttype):
29 return [
30 # the old style '%s' % (...) string formatting (still valid in Py3)
31 (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
32 '[hlL]?[E-GXc-giorsaux%]', String.Interpol),
33 # the new style '{}'.format(...) string formatting
34 (r'\{'
35 r'((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name
36 r'(\![sra])?' # conversion
37 r'(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?'
38 r'\}', String.Interpol),
39 #
40 # backslashes, quotes and formatting signs must be parsed
41 # one at a time
42 #
43 (r'[^\\\'"%{\n]+', ttype),
44 (r'[\'"\\]', ttype),
45 # unhandled string formatting sign
46 (r'%|(\{{1,2})', ttype)
47 # newlines are an error (use "nl" state)
48 ]
49 # SPDX-SnippetEnd
50
51 tokens = {
52 #
53 # These states are borrowed from Pygment's Python lexer.
54 # Their names have been prefixed with `py-'.
55 #
56 # SPDX-SnippetBegin
57 # SPDX-License-Identifier: BSD-2-Clause
58 # SPDX-SnippetCopyrightText: Copyright 2006-2023 by the Pygments team
59 #
31 'py-numbers': [ 60 'py-numbers': [
32 (r'(\d(?:_?\d)*\.(?:\d(?:_?\d)*)?|(?:\d(?:_?\d)*)?\.\d(?:_?\d)*)' 61 (r'(\d(?:_?\d)*\.(?:\d(?:_?\d)*)?|(?:\d(?:_?\d)*)?\.\d(?:_?\d)*)'
33 r'([eE][+-]?\d(?:_?\d)*)?', Number.Float), 62 r'([eE][+-]?\d(?:_?\d)*)?', Number.Float),
34 (r'\d(?:_?\d)*[eE][+-]?\d(?:_?\d)*j?', Number.Float), 63 (r'\d(?:_?\d)*[eE][+-]?\d(?:_?\d)*j?', Number.Float),
35 (r'0[oO](?:_?[0-7])+', Number.Oct), 64 (r'0[oO](?:_?[0-7])+', Number.Oct),
36 (r'0[bB](?:_?[01])+', Number.Bin), 65 (r'0[bB](?:_?[01])+', Number.Bin),
37 (r'0[xX](?:_?[a-fA-F0-9])+', Number.Hex), 66 (r'0[xX](?:_?[a-fA-F0-9])+', Number.Hex),
38 (r'\d(?:_?\d)*', Number.Integer), 67 (r'\d(?:_?\d)*', Number.Integer),
39 ], 68 ],
69 'py-strings': [
70 # non-raw strings
71 ('([uU]?)(""")', bygroups(String.Affix, String.Double),
72 combined('py-stringescape', 'py-tdqs')),
73 ("([uU]?)(''')", bygroups(String.Affix, String.Single),
74 combined('py-stringescape', 'py-tsqs')),
75 ('([uU]?)(")', bygroups(String.Affix, String.Double),
76 combined('py-stringescape', 'py-dqs')),
77 ("([uU]?)(')", bygroups(String.Affix, String.Single),
78 combined('py-stringescape', 'py-sqs')),
79 # non-raw bytes
80 ('([bB])(""")', bygroups(String.Affix, String.Double),
81 combined('py-bytesescape', 'py-tdqs')),
82 ("([bB])(''')", bygroups(String.Affix, String.Single),
83 combined('py-bytesescape', 'py-tsqs')),
84 ('([bB])(")', bygroups(String.Affix, String.Double),
85 combined('py-bytesescape', 'py-dqs')),
86 ("([bB])(')", bygroups(String.Affix, String.Single),
87 combined('py-bytesescape', 'py-sqs')),
88 ],
89 'py-stringescape': [
90 (r'\\(N\{.*?\}|u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8})', String.Escape),
91 include('py-bytesescape')
92 ],
93 'py-bytesescape': [
94 (r'\\([\\abfnrtv"\']|\n|x[a-fA-F0-9]{2}|[0-7]{1,3})',
95 String.Escape)
96 ],
97 'py-dqs': [
98 (r'"', String.Double, '#pop'),
99 (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
100 include('py-strings-double')
101 ],
102 'py-sqs': [
103 (r"'", String.Single, '#pop'),
104 (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings
105 include('py-strings-single')
106 ],
107 'py-tdqs': [
108 (r'"""', String.Double, '#pop'),
109 include('py-strings-double'),
110 (r'\n', String.Double)
111 ],
112 'py-tsqs': [
113 (r"'''", String.Single, '#pop'),
114 include('py-strings-single'),
115 (r'\n', String.Single)
116 ],
117 'py-strings-single': py_innerstring_rules(String.Single),
118 'py-strings-double': py_innerstring_rules(String.Double),
40 # SPDX-SnippetEnd 119 # SPDX-SnippetEnd
41 } 120 }