Mercurial > hgrepos > Python > libs > pygments-lexer-pseudocode2
changeset 84:3ac1c4502ad0
Implement "\tt-XXX" and "ttx-XXX" for explicit token types.
Overwrite defaults or implement tokens that are not yet handled by default.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Thu, 30 Apr 2026 19:37:24 +0200 |
| parents | cd79d2c76347 |
| children | ae5e741d2a9b |
| files | pygments_lexer_pseudocode2/algpseudocode.py tests/test_algpseudo.py |
| diffstat | 2 files changed, 111 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/pygments_lexer_pseudocode2/algpseudocode.py Thu Apr 30 13:21:38 2026 +0200 +++ b/pygments_lexer_pseudocode2/algpseudocode.py Thu Apr 30 19:37:24 2026 +0200 @@ -18,7 +18,8 @@ import pygments.util from pygments.lexer import bygroups, include, words from pygments.token import (Comment, Keyword, Name, Operator, Punctuation, - Text, Whitespace) + Text, Whitespace, + STANDARD_TYPES) # # Relative imports do not work with pygments.lexers.load_lexer_from_file() @@ -153,6 +154,18 @@ return _op_symbol + def op_explicit_tokentype(lexer, match, ctx=None): + needed_css = match.group("type") + for ttype, css in STANDARD_TYPES.items(): + if css == needed_css: + toktype = ttype + break + else: + toktype = Text + yield match.start(), toktype, match.group("character") + if ctx: + ctx.pos = match.end() + tokens = { "root": [ (r"\n", Whitespace), @@ -234,6 +247,7 @@ include("py-strings"), include("py-numbers"), (r"(?i)\\text\s*\{", LexBase.op_ignore, "text-in-expr"), + include("explicit-tokentype"), include("remark"), include("keyword-constants"), include("text-operators"), @@ -245,6 +259,7 @@ include("py-strings"), include("py-numbers"), (r"(?i)\\text\s*\{", LexBase.op_ignore, "text-in-expr"), + include("explicit-tokentype"), include("remark"), include("keyword-constants"), include("text-operators"), @@ -267,6 +282,7 @@ (r"(?i)\\expr(?:ession)?\s*\{", LexBase.op_ignore, "block-expr"), + include("explicit-tokentype"), include("remark"), (r"\\\\", LexBase.op_fixed(Text, "\\")), (r"\\", LexBase.op_fixed(Text, "\\")), @@ -279,6 +295,7 @@ (r"(?:i)\\(expr(?:ession)?)\s*(\{)", bygroups(LexBase.op_ignore, LexBase.op_ignore), "expr-in-braces"), + include("explicit-tokentype"), (r"\\\\", LexBase.op_fixed(Text, "\\")), (r"\\", LexBase.op_fixed(Text, "\\")), ], @@ -312,6 +329,27 @@ (r"\\\}", LexBase.op_fixed(Punctuation, "}")), (r"[{:(),;[\]]", Punctuation), ], + "explicit-tokentype": [ + # All these REs are CASE-SENSITIVE! + + # Multiple characters possible, but no escaping! + (r"\\ttx\-(?P<type>[^/:|=*+!\$~]+)(?P<sep>[/:|=*+!\$~])" + r"(?P<character>(.|\n)+?)(?P=sep)", + op_explicit_tokentype), + (r"\\ttx\-(?P<type>[^{]+)\{" + r"(?P<character>[^}]+?)\}", + op_explicit_tokentype), + (r"\\ttx\-(?P<type>[^\(]+)\(" + r"(?P<character>[^\)]+?)\)", + op_explicit_tokentype), + (r"\\ttx\-(?P<type>[^<]+)<" + r"(?P<character>[^>]+?)>", + op_explicit_tokentype), + + # Every character is possible: no escaping needed! + (r"\\tt-(?P<type>[^/]+)/(?P<character>(?:.|\n))", + op_explicit_tokentype), + ], } def __init__(self, **options):
--- a/tests/test_algpseudo.py Thu Apr 30 13:21:38 2026 +0200 +++ b/tests/test_algpseudo.py Thu Apr 30 19:37:24 2026 +0200 @@ -451,6 +451,78 @@ ], pygments.lex(r"\tstate{a 1.2 \\expr{x in X\} c}", self.lexer)) + def test_explicit_extended_single_tokentype_1(self): + self.assertTokenStreamEqualComplete( + [("Operator", "%"), + ("Text.Whitespace", "\n"), + ], + pygments.lex(r"\ttx-o:%:", self.lexer)) + + def test_explicit_extended_single_tokentype_2(self): + self.assertTokenStreamEqualComplete( + [("Operator", "{"), + ("Text.Whitespace", "\n"), + ], + pygments.lex(r"\ttx-o{{}", self.lexer)) + + def test_explicit_extended_single_tokentype_3(self): + self.assertTokenStreamEqualComplete( + [("Operator", "<"), + ("Text.Whitespace", "\n"), + ], + pygments.lex(r"\ttx-o<<>", self.lexer)) + + def test_explicit_extended_single_tokentype_4(self): + self.assertTokenStreamEqualComplete( + [("Operator", "("), + ("Text.Whitespace", "\n"), + ], + pygments.lex(r"\ttx-o(()", self.lexer)) + + def test_explicit_extended_multi_tokentype_1(self): + self.assertTokenStreamEqualComplete( + [("Operator", "xxx in A"), + ("Text.Whitespace", "\n"), + ], + pygments.lex(r"\ttx-o/xxx in A/", self.lexer)) + + def test_explicit_extended_multi_tokentype_2(self): + self.assertTokenStreamEqualComplete( + [("Operator", "xxx in B"), + ("Text.Whitespace", "\n"), + ], + pygments.lex(r"\ttx-o{xxx in B}", self.lexer)) + + def test_explicit_extended_multi_tokentype_3(self): + self.assertTokenStreamEqualComplete( + [("Operator", "xxx in C"), + ("Text.Whitespace", "\n"), + ], + pygments.lex(r"\ttx-o<xxx in C>", self.lexer)) + + def test_explicit_extended_multi_tokentype_4(self): + self.assertTokenStreamEqualComplete( + [("Operator", "xxx in D"), + ("Text.Whitespace", "\n"), + ], + pygments.lex(r"\ttx-o(xxx in D)", self.lexer)) + + def test_explicit_tokentype_simple_1(self): + self.assertTokenStreamEqualComplete( + [("Operator", "}"), + ("Operator", "/"), + ("Text.Whitespace", "\n"), + ], + pygments.lex(r"\tt-o/}\tt-o//", self.lexer)) + + def test_explicit_tokentype_simple_2(self): + self.assertTokenStreamEqualComplete( + [("Operator", "\n"), + ("Operator", "/"), + ("Text", " \n"), # because of r"\s+", Text for expressions + ], + pygments.lex("\\tt-o/\n\\tt-o// ", self.lexer)) + class PygmentizeCompletely(unittest.TestCase):
