comparison pygments_lexer_pseudocode2/algpseudocode.py @ 75:711f8d19e27a

New implementation of "STATEMENT" (also aliased to "STATE" and "BLOCK"). Now this needs curly braces.
author Franz Glasner <fzglas.hg@dom66.de>
date Wed, 29 Apr 2026 12:43:28 +0200
parents c1357674622d
children 27e12387154d
comparison
equal deleted inserted replaced
74:c1357674622d 75:711f8d19e27a
14 14
15 15
16 import re 16 import re
17 17
18 import pygments.util 18 import pygments.util
19 from pygments.lexer import include, bygroups 19 from pygments.lexer import bygroups, include, words
20 from pygments.token import (Comment, Keyword, Name, Text, Whitespace) 20 from pygments.token import (Comment, Keyword, Name, Operator, Punctuation,
21 Text, Whitespace)
21 22
22 # 23 #
23 # Relative imports do not work with pygments.lexers.load_lexer_from_file() 24 # Relative imports do not work with pygments.lexers.load_lexer_from_file()
24 # in all of our supported Python releases. 25 # in all of our supported Python releases.
25 # 26 #
71 SYMBOL_REMARK = "▷" # U+25B7: Unicode 1.0 (Geometric Shapes) 72 SYMBOL_REMARK = "▷" # U+25B7: Unicode 1.0 (Geometric Shapes)
72 # SYMBOL_REMARK = "▻" # U+25BB: Unicode 1.0 (Geometric Shapes) 73 # SYMBOL_REMARK = "▻" # U+25BB: Unicode 1.0 (Geometric Shapes)
73 SYMBOL_BLOCK = "◆" # U+25C6: Unicode 1.0 (Geometric Shapes) 74 SYMBOL_BLOCK = "◆" # U+25C6: Unicode 1.0 (Geometric Shapes)
74 # SYMBOL_BLOCK = "┃" # U+2503: Unicode 1.0 (Bow Drawing) 75 # SYMBOL_BLOCK = "┃" # U+2503: Unicode 1.0 (Bow Drawing)
75 # SYMBOL_BLOCK = "●" # U+25CF: Unicode 1.0 (Geometric Shapes) 76 # SYMBOL_BLOCK = "●" # U+25CF: Unicode 1.0 (Geometric Shapes)
76 SYMBOL_TEXTSTATEMENT = "■" # U+25A0: Unicode 1.0 (Geometric Shapes) 77 SYMBOL_TEXTSTATEMENT = "▪" # U+25AA: Unicode 1.0 (Geometric Shapes)
78 # SYMBOL_TEXTSTATEMENT = "■" # U+25A0: Unicode 1.0 (Geometric Shapes)
77 SYMBOLS = { 79 SYMBOLS = {
80 # Group REMARK
78 "REMARK": SYMBOL_REMARK, 81 "REMARK": SYMBOL_REMARK,
79 "REM": SYMBOL_REMARK, 82 "REM": SYMBOL_REMARK,
83 # Group STATEMENT
84 "STATEMENT": SYMBOL_BLOCK,
85 "STATE": SYMBOL_BLOCK,
80 "BLOCK": SYMBOL_BLOCK, 86 "BLOCK": SYMBOL_BLOCK,
87 # Group TEXTSTATEMENT
81 "TEXTSTATEMENT": SYMBOL_TEXTSTATEMENT, 88 "TEXTSTATEMENT": SYMBOL_TEXTSTATEMENT,
82 "TSTATEMENT": SYMBOL_TEXTSTATEMENT, 89 "TSTATEMENT": SYMBOL_TEXTSTATEMENT,
83 "TSTATE": SYMBOL_TEXTSTATEMENT, 90 "TSTATE": SYMBOL_TEXTSTATEMENT,
84 "TEXT": SYMBOL_TEXTSTATEMENT, 91 "TEXTBLOCK": SYMBOL_TEXTSTATEMENT,
92 "TBLOCK": SYMBOL_TEXTSTATEMENT,
85 } 93 }
86 94
87 def op_translate(toktype): 95 def op_translate(toktype):
88 96
89 def _op_translate(lexer, match, ctx=None): 97 def _op_translate(lexer, match, ctx=None):
136 tokens = { 144 tokens = {
137 "root": [ 145 "root": [
138 (r"\n", Whitespace), 146 (r"\n", Whitespace),
139 (r"/\*", Comment.Multiline, "multiline-nested-comment"), 147 (r"/\*", Comment.Multiline, "multiline-nested-comment"),
140 (r"//.*$", Comment.Single), 148 (r"//.*$", Comment.Single),
141 (r"(?i)\\(remark|rem)\b(.*)$", 149 include("remark"),
142 bygroups(op_symbol(Comment.Single), Comment.Single)), 150 (r"(?i)\\(block|state(?:ment)?)\s*(\{)",
143 (r"(?i)\\(block)\b(.*)$", 151 bygroups(op_symbol(Text), LexBase.op_fixed(Whitespace, " ")),
144 bygroups(op_symbol(Text), Text)), 152 "block-expr"),
145 (r"\\\n", Text), 153 (r"\\\n", Text),
146 (r"(?i)\\(" 154 (r"(?i)\\("
147 r"(?:prog(?:ram)?)" 155 r"(?:prog(?:ram)?)"
148 r"|(?:algo(?:rithm)?)" 156 r"|(?:algo(?:rithm)?)"
149 r"|(?:proc(?:edure)?)" 157 r"|(?:proc(?:edure)?)"
180 r")\b", 188 r")\b",
181 bygroups(op_translate(Keyword))), 189 bygroups(op_translate(Keyword))),
182 include("expr"), 190 include("expr"),
183 (r"\s+", Text), 191 (r"\s+", Text),
184 ], 192 ],
193 "remark": [
194 (r"(?i)\\(remark|rem)\b(.*)$",
195 bygroups(op_symbol(Comment.Single), Comment.Single)),
196 ],
185 "entity-name": [ # may be multiline 197 "entity-name": [ # may be multiline
186 (r"[^\\}]+", Name.Entity), 198 (r"[^\\}]+", Name.Entity),
187 (r"\\\}", LexBase.op_fixed(Name.Entity, "}")), 199 (r"\\\}", LexBase.op_fixed(Name.Entity, "}")),
188 (r"\\", LexBase.op_fixed(Name.Entity, "\\")), 200 (r"\\", LexBase.op_fixed(Name.Entity, "\\")),
189 (r"\}", LexBase.op_ignore, "#pop"), 201 (r"\}", LexBase.op_ignore, "#pop"),
193 (r"\\\}", op_opt_ignore_or_fixed(Name.Entity, "}")), 205 (r"\\\}", op_opt_ignore_or_fixed(Name.Entity, "}")),
194 (r"\\", op_opt_ignore_or_fixed(Name.Entity, "\\")), 206 (r"\\", op_opt_ignore_or_fixed(Name.Entity, "\\")),
195 (r"\}", LexBase.op_ignore, "#pop"), 207 (r"\}", LexBase.op_ignore, "#pop"),
196 ], 208 ],
197 "expr": [ 209 "expr": [
210 include("punctuation"),
198 include("py-strings"), 211 include("py-strings"),
199 include("py-numbers"), 212 include("py-numbers"),
200 (r"(?i)\\text\s*\{", LexBase.op_ignore, "expr-text"), 213 (r"(?i)\\text\s*\{", LexBase.op_ignore, "text-in-expr"),
214 include("remark"),
215 include("text-operators"),
216 include("math-builtins"),
201 include("py-name"), 217 include("py-name"),
202 ], 218 ],
203 "expr-text": [ 219 "block-expr": [ # somewhat similar to "root"
220 (r"\}", LexBase.op_ignore, "#pop"),
221 (r"\n", Whitespace),
222 include("expr-in-braces"),
223 (r"\s+", Text),
224 ],
225 "expr-in-braces": [
226 include("punctuation-in-braces"),
227 include("py-strings"),
228 include("py-numbers"),
229 (r"(?i)\\text\s*\{", LexBase.op_ignore, "text-in-expr"),
230 include("remark"),
231 include("text-operators"),
232 include("math-builtins"),
233 include("py-name"),
234 ],
235 "text-in-expr": [
204 (r"[^\\}]+", Text), 236 (r"[^\\}]+", Text),
205 (r"\\\}", LexBase.op_fixed(Text, "}")), 237 (r"\\\}", LexBase.op_fixed(Text, "}")),
206 (r"\\", LexBase.op_fixed(Text, "\\")), 238 (r"\\", LexBase.op_fixed(Text, "\\")),
207 (r"\}", LexBase.op_ignore, "#pop"), 239 (r"\}", LexBase.op_ignore, "#pop"),
240 ],
241 "math-builtins": [
242 (words(("sqrt", "pow", "cos", "sin", "tan", "arcos", "arcsin",
243 "arctan", "arctan2", "mod", "exp", "ln", "log"),
244 prefix=r"(?<!\.)",
245 suffix=r"\b"),
246 Name.Builtin),
247 ],
248 "text-operators": [
249 (words(("in", "is", "and", "or", "xor", "not"),
250 prefix=r"(?<!\.)",
251 suffix=r"\b"),
252 Operator.Word),
253 ],
254 "punctuation": [
255 (r"[{}:(),;[\]]", Punctuation),
256 ],
257 "punctuation-in-braces": [
258 # like "punctuation" but needs an escaped curly brace for }
259 (r"\\\}", LexBase.op_fixed(Punctuation, "}")),
260 (r"[{:(),;[\]]", Punctuation),
208 ], 261 ],
209 } 262 }
210 263
211 def __init__(self, **options): 264 def __init__(self, **options):
212 val = pygments.util.get_bool_opt(options, "no_end", default=False) 265 val = pygments.util.get_bool_opt(options, "no_end", default=False)