Mercurial > hgrepos > Python > libs > pygments-lexer-pseudocode2
changeset 87:d8ca835c74ea
FIX: Erroneous parsing of \tt-XXX and \ttx-XXX:
Need to restrict the tokentype group because otherwise it would match
too much if some sort of braces are mixed on a single line.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Sat, 02 May 2026 10:07:59 +0200 |
| parents | 0c05dc09c9e2 |
| children | 03e9031b5eca |
| files | pygments_lexer_pseudocode2/algpseudocode.py tests/test_algpseudo.py |
| diffstat | 2 files changed, 104 insertions(+), 12 deletions(-) [+] |
line wrap: on
line diff
--- a/pygments_lexer_pseudocode2/algpseudocode.py Sat May 02 09:54:03 2026 +0200 +++ b/pygments_lexer_pseudocode2/algpseudocode.py Sat May 02 10:07:59 2026 +0200 @@ -16,8 +16,8 @@ import pygments.util from pygments.lexer import bygroups, include, words -from pygments.token import (Comment, Keyword, Name, Operator, Punctuation, - Text, Whitespace) +from pygments.token import (Comment, Generic, Keyword, Name, Operator, + Punctuation, Text, Whitespace) # # Relative imports do not work with pygments.lexers.load_lexer_from_file() @@ -155,8 +155,14 @@ def op_explicit_tokentype(lexer, match, ctx=None): needed_css = match.group("type") - toktype = REVERSED_STANDARD_TYPES.get(needed_css, Text) - yield match.start(), toktype, match.group("character") + toktype = REVERSED_STANDARD_TYPES.get(needed_css, None) + if toktype is None: + # Be more error friendly + toktype = Generic.Error + val = match.group() + else: + val = match.group("character") + yield match.start(), toktype, val if ctx: ctx.pos = match.end() @@ -327,21 +333,20 @@ # All these REs are CASE-SENSITIVE! # Multiple characters possible, but no escaping! - (r"\\ttx\-(?P<type>[^/:|=*+!\$~]+)(?P<sep>[/:|=*+!\$~])" + (r"\\ttx\-(?P<type>[a-zA-Z0-9_-]+?)(?P<sep>[/:|=*+!\$~])" r"(?P<character>(.|\n)+?)(?P=sep)", op_explicit_tokentype), - (r"\\ttx\-(?P<type>[^{]+)\{" - r"(?P<character>[^}]+?)\}", + (r"\\ttx\-(?P<type>[a-zA-Z0-9_-]+?)\{(?P<character>[^}]+?)\}", + op_explicit_tokentype), + (r"\\ttx\-(?P<type>[a-zA-Z0-9_-]+?)\((?P<character>[^)]+?)\)", op_explicit_tokentype), - (r"\\ttx\-(?P<type>[^\(]+)\(" - r"(?P<character>[^\)]+?)\)", + (r"\\ttx\-(?P<type>[a-zA-Z0-9_-]+?)<(?P<character>[^>]+?)>", op_explicit_tokentype), - (r"\\ttx\-(?P<type>[^<]+)<" - r"(?P<character>[^>]+?)>", + (r"\\ttx\-(?P<type>[a-zA-Z0-9_-]+?)\[(?P<character>[^\]]+?)\]", op_explicit_tokentype), # Every character is possible: no escaping needed! - (r"\\tt-(?P<type>[^/]+)/(?P<character>(?:.|\n))", + (r"\\tt-(?P<type>[^/]+?)/(?P<character>(?:.|\n))", op_explicit_tokentype), ], }
--- a/tests/test_algpseudo.py Sat May 02 09:54:03 2026 +0200 +++ b/tests/test_algpseudo.py Sat May 02 10:07:59 2026 +0200 @@ -6,6 +6,7 @@ from _tsetup import ALGLEXERFILENAME, ALGLEXERCLASS +import sys import unittest import pygments @@ -523,6 +524,92 @@ ], pygments.lex("\\tt-o/\n\\tt-o// ", self.lexer)) + @unittest.skipIf(sys.version_info[0] <= 2, "Unicode issue on Python 2") + def test_explicit_tokentype_with_remark(self): + self.assertTokenStreamEqualComplete( + [("Operator", "∈ ∌"), + ("Text", " "), + ("Comment.Single", "▷"), + ("Comment.Single", " ∈ ∌ as (ordinary) operators"), + ("Text.Whitespace", "\n"), + ], + pygments.lex( + r"""\ttx-o<∈ ∌> \rem ∈ ∌ as (ordinary) operators""", + self.lexer)) + + def test_explicit_tokentype_with_remark_2(self): + self.assertTokenStreamEqualComplete( + [("Operator", "new_operator"), + ("Text", " "), + ("Comment.Single", "▷"), + ("Comment.Single", " a (synthesized) operator"), + ("Text.Whitespace", "\n"), + ], + pygments.lex( + r"""\ttx-o<new_operator> \rem a (synthesized) operator""", + self.lexer)) + + @unittest.skipIf(sys.version_info[0] <= 2, "Unicode issue on Python 2") + def test_explicit_tokentype_with_possibly_conflicting_parens(self): + self.assertTokenStreamEqualComplete( + [("Name.Function", "∈_∌"), + ("Punctuation", "("), + ("Name.Entity", "p1"), + ("Punctuation", ","), + ("Text", " "), + ("Name.Entity", "p2"), + ("Punctuation", ")"), + ("Text.Whitespace", "\n"), + ], + pygments.lex( + r"""\ttx-nf<∈_∌>(p1, p2)""", + self.lexer)) + + def test_explicit_tokentype_with_possibly_conflicting_parens_2(self): + self.assertTokenStreamEqualComplete( + [("Name.Decorator", "a_Decorator"), + ("Punctuation", "("), + ("Name.Entity", "p1"), + ("Punctuation", ","), + ("Text", " "), + ("Name.Entity", "p2"), + ("Punctuation", ")"), + ("Text", " "), + ("Comment.Single", "▷"), + ("Comment.Single", " (Python) decorator"), + ("Text.Whitespace", "\n"), + ], + pygments.lex( + r"""\ttx-nd<a_Decorator>(p1, p2) \rem (Python) decorator""", + self.lexer)) + + def test_explicit_tokentype_with_possibly_conflicting_parens_3(self): + self.assertTokenStreamEqualComplete( + [("Name.Decorator", "a_Decorator"), + ("Punctuation", "("), + ("Name.Entity", "p1"), + ("Punctuation", ","), + ("Text", " "), + ("Name.Entity", "p2"), + ("Punctuation", ")"), + ("Text", " "), + ("Comment.Single", "▷"), + ("Comment.Single", " (Python) annotation"), + ("Text.Whitespace", "\n"), + ], + pygments.lex( + r"""\ttx-nd[a_Decorator](p1, p2) \rem (Python) annotation""", + self.lexer)) + + def test_explicit_tokentype_error(self): + self.assertTokenStreamEqualComplete( + [("Generic.Error", r"""\ttx-non-existing[a_Decorator]"""), + ("Text.Whitespace", "\n"), + ], + pygments.lex( + r"""\ttx-non-existing[a_Decorator]""", + self.lexer)) + class PygmentizeCompletely(unittest.TestCase):
