changeset 87:d8ca835c74ea

FIX: Erroneous parsing of \tt-XXX and \ttx-XXX: Need to restrict the tokentype group because otherwise it would match too much if some sort of braces are mixed on a single line.
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 02 May 2026 10:07:59 +0200
parents 0c05dc09c9e2
children 03e9031b5eca
files pygments_lexer_pseudocode2/algpseudocode.py tests/test_algpseudo.py
diffstat 2 files changed, 104 insertions(+), 12 deletions(-) [+]
line wrap: on
line diff
--- a/pygments_lexer_pseudocode2/algpseudocode.py	Sat May 02 09:54:03 2026 +0200
+++ b/pygments_lexer_pseudocode2/algpseudocode.py	Sat May 02 10:07:59 2026 +0200
@@ -16,8 +16,8 @@
 
 import pygments.util
 from pygments.lexer import bygroups, include, words
-from pygments.token import (Comment, Keyword, Name, Operator, Punctuation,
-                            Text, Whitespace)
+from pygments.token import (Comment, Generic, Keyword, Name, Operator,
+                            Punctuation, Text, Whitespace)
 
 #
 # Relative imports do not work with pygments.lexers.load_lexer_from_file()
@@ -155,8 +155,14 @@
 
     def op_explicit_tokentype(lexer, match, ctx=None):
         needed_css = match.group("type")
-        toktype = REVERSED_STANDARD_TYPES.get(needed_css, Text)
-        yield match.start(), toktype, match.group("character")
+        toktype = REVERSED_STANDARD_TYPES.get(needed_css, None)
+        if toktype is None:
+            # Be more error friendly
+            toktype = Generic.Error
+            val = match.group()
+        else:
+            val = match.group("character")
+        yield match.start(), toktype, val
         if ctx:
             ctx.pos = match.end()
 
@@ -327,21 +333,20 @@
             # All these REs are CASE-SENSITIVE!
 
             # Multiple characters possible, but no escaping!
-            (r"\\ttx\-(?P<type>[^/:|=*+!\$~]+)(?P<sep>[/:|=*+!\$~])"
+            (r"\\ttx\-(?P<type>[a-zA-Z0-9_-]+?)(?P<sep>[/:|=*+!\$~])"
              r"(?P<character>(.|\n)+?)(?P=sep)",
              op_explicit_tokentype),
-            (r"\\ttx\-(?P<type>[^{]+)\{"
-             r"(?P<character>[^}]+?)\}",
+            (r"\\ttx\-(?P<type>[a-zA-Z0-9_-]+?)\{(?P<character>[^}]+?)\}",
+             op_explicit_tokentype),
+            (r"\\ttx\-(?P<type>[a-zA-Z0-9_-]+?)\((?P<character>[^)]+?)\)",
              op_explicit_tokentype),
-            (r"\\ttx\-(?P<type>[^\(]+)\("
-             r"(?P<character>[^\)]+?)\)",
+            (r"\\ttx\-(?P<type>[a-zA-Z0-9_-]+?)<(?P<character>[^>]+?)>",
              op_explicit_tokentype),
-            (r"\\ttx\-(?P<type>[^<]+)<"
-             r"(?P<character>[^>]+?)>",
+            (r"\\ttx\-(?P<type>[a-zA-Z0-9_-]+?)\[(?P<character>[^\]]+?)\]",
              op_explicit_tokentype),
 
             # Every character is possible: no escaping needed!
-            (r"\\tt-(?P<type>[^/]+)/(?P<character>(?:.|\n))",
+            (r"\\tt-(?P<type>[^/]+?)/(?P<character>(?:.|\n))",
              op_explicit_tokentype),
         ],
     }
--- a/tests/test_algpseudo.py	Sat May 02 09:54:03 2026 +0200
+++ b/tests/test_algpseudo.py	Sat May 02 10:07:59 2026 +0200
@@ -6,6 +6,7 @@
 
 from _tsetup import ALGLEXERFILENAME, ALGLEXERCLASS
 
+import sys
 import unittest
 
 import pygments
@@ -523,6 +524,92 @@
              ],
             pygments.lex("\\tt-o/\n\\tt-o// ", self.lexer))
 
+    @unittest.skipIf(sys.version_info[0] <= 2, "Unicode issue on Python 2")
+    def test_explicit_tokentype_with_remark(self):
+        self.assertTokenStreamEqualComplete(
+            [("Operator", "∈ ∌"),
+             ("Text", "    "),
+             ("Comment.Single", "▷"),
+             ("Comment.Single", " ∈ ∌ as (ordinary) operators"),
+             ("Text.Whitespace", "\n"),
+             ],
+            pygments.lex(
+                r"""\ttx-o<∈ ∌>    \rem ∈ ∌ as (ordinary) operators""",
+                self.lexer))
+
+    def test_explicit_tokentype_with_remark_2(self):
+        self.assertTokenStreamEqualComplete(
+            [("Operator", "new_operator"),
+             ("Text", "  "),
+             ("Comment.Single", "▷"),
+             ("Comment.Single", " a (synthesized) operator"),
+             ("Text.Whitespace", "\n"),
+             ],
+            pygments.lex(
+                r"""\ttx-o<new_operator>  \rem a (synthesized) operator""",
+                self.lexer))
+
+    @unittest.skipIf(sys.version_info[0] <= 2, "Unicode issue on Python 2")
+    def test_explicit_tokentype_with_possibly_conflicting_parens(self):
+        self.assertTokenStreamEqualComplete(
+            [("Name.Function", "∈_∌"),
+             ("Punctuation", "("),
+             ("Name.Entity", "p1"),
+             ("Punctuation", ","),
+             ("Text", " "),
+             ("Name.Entity", "p2"),
+             ("Punctuation", ")"),
+             ("Text.Whitespace", "\n"),
+             ],
+            pygments.lex(
+                r"""\ttx-nf<∈_∌>(p1, p2)""",
+                self.lexer))
+
+    def test_explicit_tokentype_with_possibly_conflicting_parens_2(self):
+        self.assertTokenStreamEqualComplete(
+            [("Name.Decorator", "a_Decorator"),
+             ("Punctuation", "("),
+             ("Name.Entity", "p1"),
+             ("Punctuation", ","),
+             ("Text", " "),
+             ("Name.Entity", "p2"),
+             ("Punctuation", ")"),
+             ("Text", "   "),
+             ("Comment.Single", "▷"),
+             ("Comment.Single", " (Python) decorator"),
+             ("Text.Whitespace", "\n"),
+             ],
+            pygments.lex(
+                r"""\ttx-nd<a_Decorator>(p1, p2)   \rem (Python) decorator""",
+                self.lexer))
+
+    def test_explicit_tokentype_with_possibly_conflicting_parens_3(self):
+        self.assertTokenStreamEqualComplete(
+            [("Name.Decorator", "a_Decorator"),
+             ("Punctuation", "("),
+             ("Name.Entity", "p1"),
+             ("Punctuation", ","),
+             ("Text", " "),
+             ("Name.Entity", "p2"),
+             ("Punctuation", ")"),
+             ("Text", "   "),
+             ("Comment.Single", "▷"),
+             ("Comment.Single", " (Python) annotation"),
+             ("Text.Whitespace", "\n"),
+             ],
+            pygments.lex(
+                r"""\ttx-nd[a_Decorator](p1, p2)   \rem (Python) annotation""",
+                self.lexer))
+
+    def test_explicit_tokentype_error(self):
+        self.assertTokenStreamEqualComplete(
+            [("Generic.Error", r"""\ttx-non-existing[a_Decorator]"""),
+             ("Text.Whitespace", "\n"),
+             ],
+            pygments.lex(
+                r"""\ttx-non-existing[a_Decorator]""",
+                self.lexer))
+
 
 class PygmentizeCompletely(unittest.TestCase):