comparison pygments_lexer_pseudocode2/lexers/algpseudocode.py @ 285:afbca50b7dc1

Implement an alternate syntax for "Explicit Token Types". Uses a generic two-argument syntax and allows escaping of characters using the common excaping rules. For this to work the AlgPseudocodeLexer is now based on Pygment's ExtendedRegexLexer instead of RegexLexer.
author Franz Glasner <fzglas.hg@dom66.de>
date Wed, 20 May 2026 20:35:37 +0200
parents 5eba722df93e
children 051c8877ee22
comparison
equal deleted inserted replaced
284:1683a10eabb2 285:afbca50b7dc1
232 else: 232 else:
233 val = match.group("characters") 233 val = match.group("characters")
234 yield match.start(), toktype, val 234 yield match.start(), toktype, val
235 if ctx: 235 if ctx:
236 ctx.pos = match.end() 236 ctx.pos = match.end()
237
238 def op_explicit_tokentype_ex_start(lexer, match, ctx):
239 needed_css = match.group("type")
240 ctx.explicit_token_type = REVERSED_STANDARD_TYPES.get(needed_css, None)
241 if ctx.explicit_token_type is None:
242 # Be more error friendly
243 ctx.explicit_token_type = Generic.Error
244 _logger.warning("Unhandled explicit token type: %s", match.group())
245 yield match.start(), ctx.explicit_token_type, match.group()
246 ctx.pos = match.end()
247
248 def op_explicit_tokentype_ex_value(lexer, match, ctx):
249 yield match.start(), ctx.explicit_token_type, match.group(1)
250 ctx.pos = match.end()
251
252 def op_explicit_tokentype_ex_end(lexer, match, ctx):
253 if ctx.explicit_token_type is Generic.Error:
254 yield match.start(), ctx.explicit_token_type, match.group()
255 ctx.pos = match.end()
256 ctx.explicit_token_type = None
237 257
238 tokens = { 258 tokens = {
239 "root": [ 259 "root": [
240 (r"\n", Whitespace), 260 (r"\n", Whitespace),
241 (r"/\*", Comment.Multiline, "multiline-nested-comment"), 261 (r"/\*", Comment.Multiline, "multiline-nested-comment"),
492 (r"""\\(['"])""", bygroups(Punctuation)), 512 (r"""\\(['"])""", bygroups(Punctuation)),
493 ], 513 ],
494 "explicit-tokentype": [ 514 "explicit-tokentype": [
495 # All these REs are CASE-SENSITIVE! 515 # All these REs are CASE-SENSITIVE!
496 516
517 #
518 # New extended (more flexible, allows escaping)
519 #
520 (r"""\\ttX[ \t]*\{(?P<type>[^}]+)\}[ \t]*\{""",
521 op_explicit_tokentype_ex_start,
522 "extended-explicit-tokentype"),
523
524 #
525 # Old variants
526 #
527
497 # Multiple characters possible, but no escaping! 528 # Multiple characters possible, but no escaping!
498 (r"""\\ttx\-(?P<type>[a-zA-Z0-9_-]+?)""" 529 (r"""\\ttx\-(?P<type>[a-zA-Z0-9_-]+?)"""
499 r"""(?P<sep>[/?.,:;%|=*+!\\$~"'#@_-])""" 530 r"""(?P<sep>[/?.,:;%|=*+!\\$~"'#@_-])"""
500 r"""(?P<characters>(.|\n)+?)(?P=sep)""", 531 r"""(?P<characters>(.|\n)+?)(?P=sep)""",
501 op_explicit_tokentype), 532 op_explicit_tokentype),
509 op_explicit_tokentype), 540 op_explicit_tokentype),
510 541
511 # Every character is possible: no escaping needed! 542 # Every character is possible: no escaping needed!
512 (r"\\tt-(?P<type>[^/]+?)/(?P<characters>(?:.|\n))", 543 (r"\\tt-(?P<type>[^/]+?)/(?P<characters>(?:.|\n))",
513 op_explicit_tokentype), 544 op_explicit_tokentype),
545 ],
546 "extended-explicit-tokentype": [
547 (r"([^\\}]+)", op_explicit_tokentype_ex_value),
548 (r"\}", op_explicit_tokentype_ex_end, "#pop"),
549 (r"\\(\})", op_explicit_tokentype_ex_value),
550 (r"\\(\\)", op_explicit_tokentype_ex_value),
551 (r"\\", LexBase.op_fixed(Generic.Error, "\\")), # weak error
514 ], 552 ],
515 } 553 }
516 554
517 def __init__(self, **options): 555 def __init__(self, **options):
518 self.no_end = pygments.util.get_bool_opt( 556 self.no_end = pygments.util.get_bool_opt(