# HG changeset patch # User Franz Glasner # Date 1777570644 -7200 # Node ID 3ac1c4502ad097349e4632d535d1e3ab4355791e # Parent cd79d2c76347cb94b350ada8a7a0fdf00d874846 Implement "\tt-XXX" and "ttx-XXX" for explicit token types. Overwrite defaults or implement tokens that are not yet handled by default. diff -r cd79d2c76347 -r 3ac1c4502ad0 pygments_lexer_pseudocode2/algpseudocode.py --- a/pygments_lexer_pseudocode2/algpseudocode.py Thu Apr 30 13:21:38 2026 +0200 +++ b/pygments_lexer_pseudocode2/algpseudocode.py Thu Apr 30 19:37:24 2026 +0200 @@ -18,7 +18,8 @@ import pygments.util from pygments.lexer import bygroups, include, words from pygments.token import (Comment, Keyword, Name, Operator, Punctuation, - Text, Whitespace) + Text, Whitespace, + STANDARD_TYPES) # # Relative imports do not work with pygments.lexers.load_lexer_from_file() @@ -153,6 +154,18 @@ return _op_symbol + def op_explicit_tokentype(lexer, match, ctx=None): + needed_css = match.group("type") + for ttype, css in STANDARD_TYPES.items(): + if css == needed_css: + toktype = ttype + break + else: + toktype = Text + yield match.start(), toktype, match.group("character") + if ctx: + ctx.pos = match.end() + tokens = { "root": [ (r"\n", Whitespace), @@ -234,6 +247,7 @@ include("py-strings"), include("py-numbers"), (r"(?i)\\text\s*\{", LexBase.op_ignore, "text-in-expr"), + include("explicit-tokentype"), include("remark"), include("keyword-constants"), include("text-operators"), @@ -245,6 +259,7 @@ include("py-strings"), include("py-numbers"), (r"(?i)\\text\s*\{", LexBase.op_ignore, "text-in-expr"), + include("explicit-tokentype"), include("remark"), include("keyword-constants"), include("text-operators"), @@ -267,6 +282,7 @@ (r"(?i)\\expr(?:ession)?\s*\{", LexBase.op_ignore, "block-expr"), + include("explicit-tokentype"), include("remark"), (r"\\\\", LexBase.op_fixed(Text, "\\")), (r"\\", LexBase.op_fixed(Text, "\\")), @@ -279,6 +295,7 @@ (r"(?:i)\\(expr(?:ession)?)\s*(\{)", bygroups(LexBase.op_ignore, LexBase.op_ignore), "expr-in-braces"), + include("explicit-tokentype"), (r"\\\\", LexBase.op_fixed(Text, "\\")), (r"\\", LexBase.op_fixed(Text, "\\")), ], @@ -312,6 +329,27 @@ (r"\\\}", LexBase.op_fixed(Punctuation, "}")), (r"[{:(),;[\]]", Punctuation), ], + "explicit-tokentype": [ + # All these REs are CASE-SENSITIVE! + + # Multiple characters possible, but no escaping! + (r"\\ttx\-(?P[^/:|=*+!\$~]+)(?P[/:|=*+!\$~])" + r"(?P(.|\n)+?)(?P=sep)", + op_explicit_tokentype), + (r"\\ttx\-(?P[^{]+)\{" + r"(?P[^}]+?)\}", + op_explicit_tokentype), + (r"\\ttx\-(?P[^\(]+)\(" + r"(?P[^\)]+?)\)", + op_explicit_tokentype), + (r"\\ttx\-(?P[^<]+)<" + r"(?P[^>]+?)>", + op_explicit_tokentype), + + # Every character is possible: no escaping needed! + (r"\\tt-(?P[^/]+)/(?P(?:.|\n))", + op_explicit_tokentype), + ], } def __init__(self, **options): diff -r cd79d2c76347 -r 3ac1c4502ad0 tests/test_algpseudo.py --- a/tests/test_algpseudo.py Thu Apr 30 13:21:38 2026 +0200 +++ b/tests/test_algpseudo.py Thu Apr 30 19:37:24 2026 +0200 @@ -451,6 +451,78 @@ ], pygments.lex(r"\tstate{a 1.2 \\expr{x in X\} c}", self.lexer)) + def test_explicit_extended_single_tokentype_1(self): + self.assertTokenStreamEqualComplete( + [("Operator", "%"), + ("Text.Whitespace", "\n"), + ], + pygments.lex(r"\ttx-o:%:", self.lexer)) + + def test_explicit_extended_single_tokentype_2(self): + self.assertTokenStreamEqualComplete( + [("Operator", "{"), + ("Text.Whitespace", "\n"), + ], + pygments.lex(r"\ttx-o{{}", self.lexer)) + + def test_explicit_extended_single_tokentype_3(self): + self.assertTokenStreamEqualComplete( + [("Operator", "<"), + ("Text.Whitespace", "\n"), + ], + pygments.lex(r"\ttx-o<<>", self.lexer)) + + def test_explicit_extended_single_tokentype_4(self): + self.assertTokenStreamEqualComplete( + [("Operator", "("), + ("Text.Whitespace", "\n"), + ], + pygments.lex(r"\ttx-o(()", self.lexer)) + + def test_explicit_extended_multi_tokentype_1(self): + self.assertTokenStreamEqualComplete( + [("Operator", "xxx in A"), + ("Text.Whitespace", "\n"), + ], + pygments.lex(r"\ttx-o/xxx in A/", self.lexer)) + + def test_explicit_extended_multi_tokentype_2(self): + self.assertTokenStreamEqualComplete( + [("Operator", "xxx in B"), + ("Text.Whitespace", "\n"), + ], + pygments.lex(r"\ttx-o{xxx in B}", self.lexer)) + + def test_explicit_extended_multi_tokentype_3(self): + self.assertTokenStreamEqualComplete( + [("Operator", "xxx in C"), + ("Text.Whitespace", "\n"), + ], + pygments.lex(r"\ttx-o", self.lexer)) + + def test_explicit_extended_multi_tokentype_4(self): + self.assertTokenStreamEqualComplete( + [("Operator", "xxx in D"), + ("Text.Whitespace", "\n"), + ], + pygments.lex(r"\ttx-o(xxx in D)", self.lexer)) + + def test_explicit_tokentype_simple_1(self): + self.assertTokenStreamEqualComplete( + [("Operator", "}"), + ("Operator", "/"), + ("Text.Whitespace", "\n"), + ], + pygments.lex(r"\tt-o/}\tt-o//", self.lexer)) + + def test_explicit_tokentype_simple_2(self): + self.assertTokenStreamEqualComplete( + [("Operator", "\n"), + ("Operator", "/"), + ("Text", " \n"), # because of r"\s+", Text for expressions + ], + pygments.lex("\\tt-o/\n\\tt-o// ", self.lexer)) + class PygmentizeCompletely(unittest.TestCase):