changeset 84:3ac1c4502ad0

Implement "\tt-XXX" and "ttx-XXX" for explicit token types. Overwrite defaults or implement tokens that are not yet handled by default.
author Franz Glasner <fzglas.hg@dom66.de>
date Thu, 30 Apr 2026 19:37:24 +0200
parents cd79d2c76347
children ae5e741d2a9b
files pygments_lexer_pseudocode2/algpseudocode.py tests/test_algpseudo.py
diffstat 2 files changed, 111 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/pygments_lexer_pseudocode2/algpseudocode.py	Thu Apr 30 13:21:38 2026 +0200
+++ b/pygments_lexer_pseudocode2/algpseudocode.py	Thu Apr 30 19:37:24 2026 +0200
@@ -18,7 +18,8 @@
 import pygments.util
 from pygments.lexer import bygroups, include, words
 from pygments.token import (Comment, Keyword, Name, Operator, Punctuation,
-                            Text, Whitespace)
+                            Text, Whitespace,
+                            STANDARD_TYPES)
 
 #
 # Relative imports do not work with pygments.lexers.load_lexer_from_file()
@@ -153,6 +154,18 @@
 
         return _op_symbol
 
+    def op_explicit_tokentype(lexer, match, ctx=None):
+        needed_css = match.group("type")
+        for ttype, css in STANDARD_TYPES.items():
+            if css == needed_css:
+                toktype = ttype
+                break
+        else:
+            toktype = Text
+        yield match.start(), toktype, match.group("character")
+        if ctx:
+            ctx.pos = match.end()
+
     tokens = {
         "root": [
             (r"\n", Whitespace),
@@ -234,6 +247,7 @@
             include("py-strings"),
             include("py-numbers"),
             (r"(?i)\\text\s*\{", LexBase.op_ignore, "text-in-expr"),
+            include("explicit-tokentype"),
             include("remark"),
             include("keyword-constants"),
             include("text-operators"),
@@ -245,6 +259,7 @@
             include("py-strings"),
             include("py-numbers"),
             (r"(?i)\\text\s*\{", LexBase.op_ignore, "text-in-expr"),
+            include("explicit-tokentype"),
             include("remark"),
             include("keyword-constants"),
             include("text-operators"),
@@ -267,6 +282,7 @@
             (r"(?i)\\expr(?:ession)?\s*\{",
              LexBase.op_ignore,
              "block-expr"),
+            include("explicit-tokentype"),
             include("remark"),
             (r"\\\\", LexBase.op_fixed(Text, "\\")),
             (r"\\", LexBase.op_fixed(Text, "\\")),
@@ -279,6 +295,7 @@
             (r"(?:i)\\(expr(?:ession)?)\s*(\{)",
              bygroups(LexBase.op_ignore, LexBase.op_ignore),
              "expr-in-braces"),
+            include("explicit-tokentype"),
             (r"\\\\", LexBase.op_fixed(Text, "\\")),
             (r"\\", LexBase.op_fixed(Text, "\\")),
         ],
@@ -312,6 +329,27 @@
             (r"\\\}", LexBase.op_fixed(Punctuation, "}")),
             (r"[{:(),;[\]]", Punctuation),
         ],
+        "explicit-tokentype": [
+            # All these REs are CASE-SENSITIVE!
+
+            # Multiple characters possible, but no escaping!
+            (r"\\ttx\-(?P<type>[^/:|=*+!\$~]+)(?P<sep>[/:|=*+!\$~])"
+             r"(?P<character>(.|\n)+?)(?P=sep)",
+             op_explicit_tokentype),
+            (r"\\ttx\-(?P<type>[^{]+)\{"
+             r"(?P<character>[^}]+?)\}",
+             op_explicit_tokentype),
+            (r"\\ttx\-(?P<type>[^\(]+)\("
+             r"(?P<character>[^\)]+?)\)",
+             op_explicit_tokentype),
+            (r"\\ttx\-(?P<type>[^<]+)<"
+             r"(?P<character>[^>]+?)>",
+             op_explicit_tokentype),
+
+            # Every character is possible: no escaping needed!
+            (r"\\tt-(?P<type>[^/]+)/(?P<character>(?:.|\n))",
+             op_explicit_tokentype),
+        ],
     }
 
     def __init__(self, **options):
--- a/tests/test_algpseudo.py	Thu Apr 30 13:21:38 2026 +0200
+++ b/tests/test_algpseudo.py	Thu Apr 30 19:37:24 2026 +0200
@@ -451,6 +451,78 @@
              ],
             pygments.lex(r"\tstate{a 1.2 \\expr{x in X\} c}", self.lexer))
 
+    def test_explicit_extended_single_tokentype_1(self):
+        self.assertTokenStreamEqualComplete(
+            [("Operator", "%"),
+             ("Text.Whitespace", "\n"),
+             ],
+            pygments.lex(r"\ttx-o:%:", self.lexer))
+
+    def test_explicit_extended_single_tokentype_2(self):
+        self.assertTokenStreamEqualComplete(
+            [("Operator", "{"),
+             ("Text.Whitespace", "\n"),
+             ],
+            pygments.lex(r"\ttx-o{{}", self.lexer))
+
+    def test_explicit_extended_single_tokentype_3(self):
+        self.assertTokenStreamEqualComplete(
+            [("Operator", "<"),
+             ("Text.Whitespace", "\n"),
+             ],
+            pygments.lex(r"\ttx-o<<>", self.lexer))
+
+    def test_explicit_extended_single_tokentype_4(self):
+        self.assertTokenStreamEqualComplete(
+            [("Operator", "("),
+             ("Text.Whitespace", "\n"),
+             ],
+            pygments.lex(r"\ttx-o(()", self.lexer))
+
+    def test_explicit_extended_multi_tokentype_1(self):
+        self.assertTokenStreamEqualComplete(
+            [("Operator", "xxx in A"),
+             ("Text.Whitespace", "\n"),
+             ],
+            pygments.lex(r"\ttx-o/xxx in A/", self.lexer))
+
+    def test_explicit_extended_multi_tokentype_2(self):
+        self.assertTokenStreamEqualComplete(
+            [("Operator", "xxx in B"),
+             ("Text.Whitespace", "\n"),
+             ],
+            pygments.lex(r"\ttx-o{xxx in B}", self.lexer))
+
+    def test_explicit_extended_multi_tokentype_3(self):
+        self.assertTokenStreamEqualComplete(
+            [("Operator", "xxx in C"),
+             ("Text.Whitespace", "\n"),
+             ],
+            pygments.lex(r"\ttx-o<xxx in C>", self.lexer))
+
+    def test_explicit_extended_multi_tokentype_4(self):
+        self.assertTokenStreamEqualComplete(
+            [("Operator", "xxx in D"),
+             ("Text.Whitespace", "\n"),
+             ],
+            pygments.lex(r"\ttx-o(xxx in D)", self.lexer))
+
+    def test_explicit_tokentype_simple_1(self):
+        self.assertTokenStreamEqualComplete(
+            [("Operator", "}"),
+             ("Operator", "/"),
+             ("Text.Whitespace", "\n"),
+             ],
+            pygments.lex(r"\tt-o/}\tt-o//", self.lexer))
+
+    def test_explicit_tokentype_simple_2(self):
+        self.assertTokenStreamEqualComplete(
+            [("Operator", "\n"),
+             ("Operator", "/"),
+             ("Text", " \n"),    # because of r"\s+", Text for expressions
+             ],
+            pygments.lex("\\tt-o/\n\\tt-o// ", self.lexer))
+
 
 class PygmentizeCompletely(unittest.TestCase):