# HG changeset patch # User Franz Glasner # Date 1777709279 -7200 # Node ID d8ca835c74eaaadc0755317314cebe33dc5f9b1a # Parent 0c05dc09c9e281c670dd8e51b13a7c628ffa3d76 FIX: Erroneous parsing of \tt-XXX and \ttx-XXX: Need to restrict the tokentype group because otherwise it would match too much if some sort of braces are mixed on a single line. diff -r 0c05dc09c9e2 -r d8ca835c74ea pygments_lexer_pseudocode2/algpseudocode.py --- a/pygments_lexer_pseudocode2/algpseudocode.py Sat May 02 09:54:03 2026 +0200 +++ b/pygments_lexer_pseudocode2/algpseudocode.py Sat May 02 10:07:59 2026 +0200 @@ -16,8 +16,8 @@ import pygments.util from pygments.lexer import bygroups, include, words -from pygments.token import (Comment, Keyword, Name, Operator, Punctuation, - Text, Whitespace) +from pygments.token import (Comment, Generic, Keyword, Name, Operator, + Punctuation, Text, Whitespace) # # Relative imports do not work with pygments.lexers.load_lexer_from_file() @@ -155,8 +155,14 @@ def op_explicit_tokentype(lexer, match, ctx=None): needed_css = match.group("type") - toktype = REVERSED_STANDARD_TYPES.get(needed_css, Text) - yield match.start(), toktype, match.group("character") + toktype = REVERSED_STANDARD_TYPES.get(needed_css, None) + if toktype is None: + # Be more error friendly + toktype = Generic.Error + val = match.group() + else: + val = match.group("character") + yield match.start(), toktype, val if ctx: ctx.pos = match.end() @@ -327,21 +333,20 @@ # All these REs are CASE-SENSITIVE! # Multiple characters possible, but no escaping! - (r"\\ttx\-(?P[^/:|=*+!\$~]+)(?P[/:|=*+!\$~])" + (r"\\ttx\-(?P[a-zA-Z0-9_-]+?)(?P[/:|=*+!\$~])" r"(?P(.|\n)+?)(?P=sep)", op_explicit_tokentype), - (r"\\ttx\-(?P[^{]+)\{" - r"(?P[^}]+?)\}", + (r"\\ttx\-(?P[a-zA-Z0-9_-]+?)\{(?P[^}]+?)\}", + op_explicit_tokentype), + (r"\\ttx\-(?P[a-zA-Z0-9_-]+?)\((?P[^)]+?)\)", op_explicit_tokentype), - (r"\\ttx\-(?P[^\(]+)\(" - r"(?P[^\)]+?)\)", + (r"\\ttx\-(?P[a-zA-Z0-9_-]+?)<(?P[^>]+?)>", op_explicit_tokentype), - (r"\\ttx\-(?P[^<]+)<" - r"(?P[^>]+?)>", + (r"\\ttx\-(?P[a-zA-Z0-9_-]+?)\[(?P[^\]]+?)\]", op_explicit_tokentype), # Every character is possible: no escaping needed! - (r"\\tt-(?P[^/]+)/(?P(?:.|\n))", + (r"\\tt-(?P[^/]+?)/(?P(?:.|\n))", op_explicit_tokentype), ], } diff -r 0c05dc09c9e2 -r d8ca835c74ea tests/test_algpseudo.py --- a/tests/test_algpseudo.py Sat May 02 09:54:03 2026 +0200 +++ b/tests/test_algpseudo.py Sat May 02 10:07:59 2026 +0200 @@ -6,6 +6,7 @@ from _tsetup import ALGLEXERFILENAME, ALGLEXERCLASS +import sys import unittest import pygments @@ -523,6 +524,92 @@ ], pygments.lex("\\tt-o/\n\\tt-o// ", self.lexer)) + @unittest.skipIf(sys.version_info[0] <= 2, "Unicode issue on Python 2") + def test_explicit_tokentype_with_remark(self): + self.assertTokenStreamEqualComplete( + [("Operator", "∈ ∌"), + ("Text", " "), + ("Comment.Single", "▷"), + ("Comment.Single", " ∈ ∌ as (ordinary) operators"), + ("Text.Whitespace", "\n"), + ], + pygments.lex( + r"""\ttx-o<∈ ∌> \rem ∈ ∌ as (ordinary) operators""", + self.lexer)) + + def test_explicit_tokentype_with_remark_2(self): + self.assertTokenStreamEqualComplete( + [("Operator", "new_operator"), + ("Text", " "), + ("Comment.Single", "▷"), + ("Comment.Single", " a (synthesized) operator"), + ("Text.Whitespace", "\n"), + ], + pygments.lex( + r"""\ttx-o \rem a (synthesized) operator""", + self.lexer)) + + @unittest.skipIf(sys.version_info[0] <= 2, "Unicode issue on Python 2") + def test_explicit_tokentype_with_possibly_conflicting_parens(self): + self.assertTokenStreamEqualComplete( + [("Name.Function", "∈_∌"), + ("Punctuation", "("), + ("Name.Entity", "p1"), + ("Punctuation", ","), + ("Text", " "), + ("Name.Entity", "p2"), + ("Punctuation", ")"), + ("Text.Whitespace", "\n"), + ], + pygments.lex( + r"""\ttx-nf<∈_∌>(p1, p2)""", + self.lexer)) + + def test_explicit_tokentype_with_possibly_conflicting_parens_2(self): + self.assertTokenStreamEqualComplete( + [("Name.Decorator", "a_Decorator"), + ("Punctuation", "("), + ("Name.Entity", "p1"), + ("Punctuation", ","), + ("Text", " "), + ("Name.Entity", "p2"), + ("Punctuation", ")"), + ("Text", " "), + ("Comment.Single", "▷"), + ("Comment.Single", " (Python) decorator"), + ("Text.Whitespace", "\n"), + ], + pygments.lex( + r"""\ttx-nd(p1, p2) \rem (Python) decorator""", + self.lexer)) + + def test_explicit_tokentype_with_possibly_conflicting_parens_3(self): + self.assertTokenStreamEqualComplete( + [("Name.Decorator", "a_Decorator"), + ("Punctuation", "("), + ("Name.Entity", "p1"), + ("Punctuation", ","), + ("Text", " "), + ("Name.Entity", "p2"), + ("Punctuation", ")"), + ("Text", " "), + ("Comment.Single", "▷"), + ("Comment.Single", " (Python) annotation"), + ("Text.Whitespace", "\n"), + ], + pygments.lex( + r"""\ttx-nd[a_Decorator](p1, p2) \rem (Python) annotation""", + self.lexer)) + + def test_explicit_tokentype_error(self): + self.assertTokenStreamEqualComplete( + [("Generic.Error", r"""\ttx-non-existing[a_Decorator]"""), + ("Text.Whitespace", "\n"), + ], + pygments.lex( + r"""\ttx-non-existing[a_Decorator]""", + self.lexer)) + class PygmentizeCompletely(unittest.TestCase):