diff pygments_lexer_pseudocode2/lexers/algpseudocode.py @ 285:afbca50b7dc1

Implement an alternate syntax for "Explicit Token Types". Uses a generic two-argument syntax and allows escaping of characters using the common excaping rules. For this to work the AlgPseudocodeLexer is now based on Pygment's ExtendedRegexLexer instead of RegexLexer.
author Franz Glasner <fzglas.hg@dom66.de>
date Wed, 20 May 2026 20:35:37 +0200
parents 5eba722df93e
children 051c8877ee22
line wrap: on
line diff
--- a/pygments_lexer_pseudocode2/lexers/algpseudocode.py	Wed May 20 20:32:42 2026 +0200
+++ b/pygments_lexer_pseudocode2/lexers/algpseudocode.py	Wed May 20 20:35:37 2026 +0200
@@ -235,6 +235,26 @@
         if ctx:
             ctx.pos = match.end()
 
+    def op_explicit_tokentype_ex_start(lexer, match, ctx):
+        needed_css = match.group("type")
+        ctx.explicit_token_type = REVERSED_STANDARD_TYPES.get(needed_css, None)
+        if ctx.explicit_token_type is None:
+            # Be more error friendly
+            ctx.explicit_token_type = Generic.Error
+            _logger.warning("Unhandled explicit token type: %s", match.group())
+            yield match.start(), ctx.explicit_token_type, match.group()
+        ctx.pos = match.end()
+
+    def op_explicit_tokentype_ex_value(lexer, match, ctx):
+        yield match.start(), ctx.explicit_token_type, match.group(1)
+        ctx.pos = match.end()
+
+    def op_explicit_tokentype_ex_end(lexer, match, ctx):
+        if ctx.explicit_token_type is Generic.Error:
+            yield match.start(), ctx.explicit_token_type, match.group()
+        ctx.pos = match.end()
+        ctx.explicit_token_type = None
+
     tokens = {
         "root": [
             (r"\n", Whitespace),
@@ -494,6 +514,17 @@
         "explicit-tokentype": [
             # All these REs are CASE-SENSITIVE!
 
+            #
+            # New extended (more flexible, allows escaping)
+            #
+            (r"""\\ttX[ \t]*\{(?P<type>[^}]+)\}[ \t]*\{""",
+             op_explicit_tokentype_ex_start,
+             "extended-explicit-tokentype"),
+
+            #
+            # Old variants
+            #
+
             # Multiple characters possible, but no escaping!
             (r"""\\ttx\-(?P<type>[a-zA-Z0-9_-]+?)"""
              r"""(?P<sep>[/?.,:;%|=*+!\\$~"'#@_-])"""
@@ -512,6 +543,13 @@
             (r"\\tt-(?P<type>[^/]+?)/(?P<characters>(?:.|\n))",
              op_explicit_tokentype),
         ],
+        "extended-explicit-tokentype": [
+            (r"([^\\}]+)", op_explicit_tokentype_ex_value),
+            (r"\}", op_explicit_tokentype_ex_end, "#pop"),
+            (r"\\(\})", op_explicit_tokentype_ex_value),
+            (r"\\(\\)", op_explicit_tokentype_ex_value),
+            (r"\\", LexBase.op_fixed(Generic.Error, "\\")),   # weak error
+        ],
     }
 
     def __init__(self, **options):