Mercurial > hgrepos > Python > libs > pygments-lexer-pseudocode2
diff pygments_lexer_pseudocode2/algpseudocode.py @ 105:cec52d83869a
Handle much more characters from the Unicode codeset in expressions.
While there: FIX: Add forgotten Punctuation characters `?' and `@'.
While there: Allow the escaping of single and double quotes that normally
start a string (e.g. for expressions like f' is the first derivation of f).
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 04 May 2026 16:30:36 +0200 |
| parents | ffe6ea2cf69b |
| children | f6b46a379aba |
line wrap: on
line diff
--- a/pygments_lexer_pseudocode2/algpseudocode.py Mon May 04 16:23:18 2026 +0200 +++ b/pygments_lexer_pseudocode2/algpseudocode.py Mon May 04 16:30:36 2026 +0200 @@ -26,7 +26,7 @@ # from pygments_lexer_pseudocode2.bases import LexBase from pygments_lexer_pseudocode2.utils import REVERSED_STANDARD_TYPES - +from pygments_lexer_pseudocode2 import uniprops # # As in the local imports: use an explicit name because __name__ is @@ -127,6 +127,18 @@ "TSTATE": SYMBOL_TEXTSTATEMENT, "TEXTBLOCK": SYMBOL_TEXTSTATEMENT, "TBLOCK": SYMBOL_TEXTSTATEMENT, + "<-": "←", + "->": "→", + "=>": "⇒", + "<=": "≤", + ">=": "≥", + "<>": "≠", + "!=": "≠", + ":=": "∶=", # "≔" not recognizable + "=:": "=∶", # "≕", not recognizable + "<=>": "⇔", + "<->": "↔", + "?=": "≟", } def op_translate(toktype): @@ -296,6 +308,8 @@ r")\b", bygroups(op_translate(Keyword))), include("expr"), + include("unicode-separators"), + include("unicode-other"), (r"[^\S\n]+", Text), (r".", Generic.Error), # tolerance for errors ], @@ -318,7 +332,10 @@ (r"\\", op_opt_ignore_or_fixed(Name.Entity, "\\")), ], "expr": [ - include("punctuation"), + include("math-symbols"), # must be before punctuation + include("ascii-punctuation"), + include("unicode-punctuation"), + include("escaped-string-start"), include("py-strings"), include("py-numbers"), (r"(?i)\\text[ \t]*\{", LexBase.op_ignore, "text-in-expr"), @@ -330,7 +347,10 @@ include("py-name"), ], "expr-in-braces": [ - include("punctuation-in-braces"), + include("math-symbols"), # must be before punctuation + include("ascii-punctuation-in-braces"), + include("unicode-punctuation"), + include("escaped-string-start"), include("py-strings"), include("py-numbers"), (r"(?i)\\text[ \t]*\{", LexBase.op_ignore, "text-in-expr"), @@ -347,6 +367,8 @@ include("expr-in-braces"), (r"\\\\", LexBase.op_fixed(Text, "\\")), (r"\\", LexBase.op_fixed(Text, "\\")), + include("unicode-separators"), + include("unicode-other"), (r"[^\S\n]+", Text), (r".", Generic.Error), # tolerance for errors ], @@ -385,6 +407,11 @@ suffix=r"\b"), Name.Builtin), ], + "math-symbols": [ + (r"<=>|<->|<-|->|=>|<=|>=|<>|!=|:=|=:|\?=", op_symbol(Operator)), + (r"[!&<>=+\-*/%|~]", Operator), # ASCII + (u"[%s]" % (uniprops.Sm,), Operator), # other Unicode + ], "word-operators": [ (words(("IN", "In", "in", "IS", "Is", "is", @@ -405,13 +432,37 @@ suffix=r"\b"), Keyword.Constant), ], - "punctuation": [ - (r"[{}:(),;[\]]", Punctuation), + "ascii-punctuation": [ + (r"[{}:(),;[\]?@]", Punctuation), + ], + "ascii-punctuation-in-braces": [ + # + # Like "punctuation" but needs an escaped curly brace for } because + # a single closing curly brace pops the current state here. + # + (r"\\\}", LexBase.op_fixed(Punctuation, "}")), + (r"[{:(),;[\]?@]", Punctuation), + ], + "unicode-separators": [ + (u"[%s]" % (uniprops.Zl,), Whitespace), + (u"[%s]" % (uniprops.Zp,), Whitespace), + (u"[%s]" % (uniprops.Zs,), Whitespace), ], - "punctuation-in-braces": [ - # like "punctuation" but needs an escaped curly brace for } - (r"\\\}", LexBase.op_fixed(Punctuation, "}")), - (r"[{:(),;[\]]", Punctuation), + "unicode-punctuation": [ + (u"[%s]" % (uniprops.Pc,), Punctuation), + (u"[%s]" % (uniprops.Pd,), Punctuation), + (u"[%s]" % (uniprops.Ps,), Punctuation), + (u"[%s]" % (uniprops.Pe,), Punctuation), + (u"[%s]" % (uniprops.Pi,), Punctuation), + (u"[%s]" % (uniprops.Pf,), Punctuation), + (u"[%s]" % (uniprops.Po,), Punctuation), + ], + "unicode-other": [ + (u"[%s]" % (uniprops.Sc,), Text), # Currency + (u"[%s]" % (uniprops.So,), Text), # Other symbols + ], + "escaped-string-start": [ + (r"""\\(['"])""", bygroups(Punctuation)), ], "explicit-tokentype": [ # All these REs are CASE-SENSITIVE!
