diff pygments_lexer_pseudocode2/algpseudocode.py @ 87:d8ca835c74ea

FIX: Erroneous parsing of \tt-XXX and \ttx-XXX: Need to restrict the tokentype group because otherwise it would match too much if some sort of braces are mixed on a single line.
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 02 May 2026 10:07:59 +0200
parents ae5e741d2a9b
children 7414eed7b275
line wrap: on
line diff
--- a/pygments_lexer_pseudocode2/algpseudocode.py	Sat May 02 09:54:03 2026 +0200
+++ b/pygments_lexer_pseudocode2/algpseudocode.py	Sat May 02 10:07:59 2026 +0200
@@ -16,8 +16,8 @@
 
 import pygments.util
 from pygments.lexer import bygroups, include, words
-from pygments.token import (Comment, Keyword, Name, Operator, Punctuation,
-                            Text, Whitespace)
+from pygments.token import (Comment, Generic, Keyword, Name, Operator,
+                            Punctuation, Text, Whitespace)
 
 #
 # Relative imports do not work with pygments.lexers.load_lexer_from_file()
@@ -155,8 +155,14 @@
 
     def op_explicit_tokentype(lexer, match, ctx=None):
         needed_css = match.group("type")
-        toktype = REVERSED_STANDARD_TYPES.get(needed_css, Text)
-        yield match.start(), toktype, match.group("character")
+        toktype = REVERSED_STANDARD_TYPES.get(needed_css, None)
+        if toktype is None:
+            # Be more error friendly
+            toktype = Generic.Error
+            val = match.group()
+        else:
+            val = match.group("character")
+        yield match.start(), toktype, val
         if ctx:
             ctx.pos = match.end()
 
@@ -327,21 +333,20 @@
             # All these REs are CASE-SENSITIVE!
 
             # Multiple characters possible, but no escaping!
-            (r"\\ttx\-(?P<type>[^/:|=*+!\$~]+)(?P<sep>[/:|=*+!\$~])"
+            (r"\\ttx\-(?P<type>[a-zA-Z0-9_-]+?)(?P<sep>[/:|=*+!\$~])"
              r"(?P<character>(.|\n)+?)(?P=sep)",
              op_explicit_tokentype),
-            (r"\\ttx\-(?P<type>[^{]+)\{"
-             r"(?P<character>[^}]+?)\}",
+            (r"\\ttx\-(?P<type>[a-zA-Z0-9_-]+?)\{(?P<character>[^}]+?)\}",
+             op_explicit_tokentype),
+            (r"\\ttx\-(?P<type>[a-zA-Z0-9_-]+?)\((?P<character>[^)]+?)\)",
              op_explicit_tokentype),
-            (r"\\ttx\-(?P<type>[^\(]+)\("
-             r"(?P<character>[^\)]+?)\)",
+            (r"\\ttx\-(?P<type>[a-zA-Z0-9_-]+?)<(?P<character>[^>]+?)>",
              op_explicit_tokentype),
-            (r"\\ttx\-(?P<type>[^<]+)<"
-             r"(?P<character>[^>]+?)>",
+            (r"\\ttx\-(?P<type>[a-zA-Z0-9_-]+?)\[(?P<character>[^\]]+?)\]",
              op_explicit_tokentype),
 
             # Every character is possible: no escaping needed!
-            (r"\\tt-(?P<type>[^/]+)/(?P<character>(?:.|\n))",
+            (r"\\tt-(?P<type>[^/]+?)/(?P<character>(?:.|\n))",
              op_explicit_tokentype),
         ],
     }