comparison pygments_lexer_pseudocode2/algpseudocode.py @ 84:3ac1c4502ad0

Implement "\tt-XXX" and "ttx-XXX" for explicit token types. Overwrite defaults or implement tokens that are not yet handled by default.
author Franz Glasner <fzglas.hg@dom66.de>
date Thu, 30 Apr 2026 19:37:24 +0200
parents cd79d2c76347
children ae5e741d2a9b
comparison
equal deleted inserted replaced
83:cd79d2c76347 84:3ac1c4502ad0
16 import re 16 import re
17 17
18 import pygments.util 18 import pygments.util
19 from pygments.lexer import bygroups, include, words 19 from pygments.lexer import bygroups, include, words
20 from pygments.token import (Comment, Keyword, Name, Operator, Punctuation, 20 from pygments.token import (Comment, Keyword, Name, Operator, Punctuation,
21 Text, Whitespace) 21 Text, Whitespace,
22 STANDARD_TYPES)
22 23
23 # 24 #
24 # Relative imports do not work with pygments.lexers.load_lexer_from_file() 25 # Relative imports do not work with pygments.lexers.load_lexer_from_file()
25 # in all of our supported Python releases. 26 # in all of our supported Python releases.
26 # 27 #
150 yield match.start(), toktype, lexer.SYMBOLS.get(kw, kw) 151 yield match.start(), toktype, lexer.SYMBOLS.get(kw, kw)
151 if ctx: 152 if ctx:
152 ctx.pos = match.end() 153 ctx.pos = match.end()
153 154
154 return _op_symbol 155 return _op_symbol
156
157 def op_explicit_tokentype(lexer, match, ctx=None):
158 needed_css = match.group("type")
159 for ttype, css in STANDARD_TYPES.items():
160 if css == needed_css:
161 toktype = ttype
162 break
163 else:
164 toktype = Text
165 yield match.start(), toktype, match.group("character")
166 if ctx:
167 ctx.pos = match.end()
155 168
156 tokens = { 169 tokens = {
157 "root": [ 170 "root": [
158 (r"\n", Whitespace), 171 (r"\n", Whitespace),
159 (r"/\*", Comment.Multiline, "multiline-nested-comment"), 172 (r"/\*", Comment.Multiline, "multiline-nested-comment"),
232 "expr": [ 245 "expr": [
233 include("punctuation"), 246 include("punctuation"),
234 include("py-strings"), 247 include("py-strings"),
235 include("py-numbers"), 248 include("py-numbers"),
236 (r"(?i)\\text\s*\{", LexBase.op_ignore, "text-in-expr"), 249 (r"(?i)\\text\s*\{", LexBase.op_ignore, "text-in-expr"),
250 include("explicit-tokentype"),
237 include("remark"), 251 include("remark"),
238 include("keyword-constants"), 252 include("keyword-constants"),
239 include("text-operators"), 253 include("text-operators"),
240 include("math-builtins"), 254 include("math-builtins"),
241 include("py-name"), 255 include("py-name"),
243 "expr-in-braces": [ 257 "expr-in-braces": [
244 include("punctuation-in-braces"), 258 include("punctuation-in-braces"),
245 include("py-strings"), 259 include("py-strings"),
246 include("py-numbers"), 260 include("py-numbers"),
247 (r"(?i)\\text\s*\{", LexBase.op_ignore, "text-in-expr"), 261 (r"(?i)\\text\s*\{", LexBase.op_ignore, "text-in-expr"),
262 include("explicit-tokentype"),
248 include("remark"), 263 include("remark"),
249 include("keyword-constants"), 264 include("keyword-constants"),
250 include("text-operators"), 265 include("text-operators"),
251 include("math-builtins"), 266 include("math-builtins"),
252 include("py-name"), 267 include("py-name"),
265 (r"\n", Whitespace), 280 (r"\n", Whitespace),
266 (r"\\\}", LexBase.op_fixed(Text, "}")), 281 (r"\\\}", LexBase.op_fixed(Text, "}")),
267 (r"(?i)\\expr(?:ession)?\s*\{", 282 (r"(?i)\\expr(?:ession)?\s*\{",
268 LexBase.op_ignore, 283 LexBase.op_ignore,
269 "block-expr"), 284 "block-expr"),
285 include("explicit-tokentype"),
270 include("remark"), 286 include("remark"),
271 (r"\\\\", LexBase.op_fixed(Text, "\\")), 287 (r"\\\\", LexBase.op_fixed(Text, "\\")),
272 (r"\\", LexBase.op_fixed(Text, "\\")), 288 (r"\\", LexBase.op_fixed(Text, "\\")),
273 ], 289 ],
274 "text-in-expr": [ 290 "text-in-expr": [
277 (r"\n", Whitespace), 293 (r"\n", Whitespace),
278 (r"\\\}", LexBase.op_fixed(Text, "}")), 294 (r"\\\}", LexBase.op_fixed(Text, "}")),
279 (r"(?:i)\\(expr(?:ession)?)\s*(\{)", 295 (r"(?:i)\\(expr(?:ession)?)\s*(\{)",
280 bygroups(LexBase.op_ignore, LexBase.op_ignore), 296 bygroups(LexBase.op_ignore, LexBase.op_ignore),
281 "expr-in-braces"), 297 "expr-in-braces"),
298 include("explicit-tokentype"),
282 (r"\\\\", LexBase.op_fixed(Text, "\\")), 299 (r"\\\\", LexBase.op_fixed(Text, "\\")),
283 (r"\\", LexBase.op_fixed(Text, "\\")), 300 (r"\\", LexBase.op_fixed(Text, "\\")),
284 ], 301 ],
285 "math-builtins": [ 302 "math-builtins": [
286 (words(("sqrt", "pow", "cos", "sin", "tan", "arcos", "arcsin", 303 (words(("sqrt", "pow", "cos", "sin", "tan", "arcos", "arcsin",
309 ], 326 ],
310 "punctuation-in-braces": [ 327 "punctuation-in-braces": [
311 # like "punctuation" but needs an escaped curly brace for } 328 # like "punctuation" but needs an escaped curly brace for }
312 (r"\\\}", LexBase.op_fixed(Punctuation, "}")), 329 (r"\\\}", LexBase.op_fixed(Punctuation, "}")),
313 (r"[{:(),;[\]]", Punctuation), 330 (r"[{:(),;[\]]", Punctuation),
331 ],
332 "explicit-tokentype": [
333 # All these REs are CASE-SENSITIVE!
334
335 # Multiple characters possible, but no escaping!
336 (r"\\ttx\-(?P<type>[^/:|=*+!\$~]+)(?P<sep>[/:|=*+!\$~])"
337 r"(?P<character>(.|\n)+?)(?P=sep)",
338 op_explicit_tokentype),
339 (r"\\ttx\-(?P<type>[^{]+)\{"
340 r"(?P<character>[^}]+?)\}",
341 op_explicit_tokentype),
342 (r"\\ttx\-(?P<type>[^\(]+)\("
343 r"(?P<character>[^\)]+?)\)",
344 op_explicit_tokentype),
345 (r"\\ttx\-(?P<type>[^<]+)<"
346 r"(?P<character>[^>]+?)>",
347 op_explicit_tokentype),
348
349 # Every character is possible: no escaping needed!
350 (r"\\tt-(?P<type>[^/]+)/(?P<character>(?:.|\n))",
351 op_explicit_tokentype),
314 ], 352 ],
315 } 353 }
316 354
317 def __init__(self, **options): 355 def __init__(self, **options):
318 val = pygments.util.get_bool_opt(options, "no_end", default=False) 356 val = pygments.util.get_bool_opt(options, "no_end", default=False)