Python/libs/pygments-lexer-pseudocode2: pygments_lexer_pseudocode2/algpseudocode.py comparison

comparison pygments_lexer_pseudocode2/algpseudocode.py @ 105:cec52d83869a

Handle much more characters from the Unicode codeset in expressions. While there: FIX: Add forgotten Punctuation characters `?' and `@'. While there: Allow the escaping of single and double quotes that normally start a string (e.g. for expressions like f' is the first derivation of f).

author	Franz Glasner <fzglas.hg@dom66.de>
date	Mon, 04 May 2026 16:30:36 +0200
parents	ffe6ea2cf69b
children	f6b46a379aba

comparison

equal deleted inserted replaced

-:ffe6ea2cf69b
+:cec52d83869a
 # Relative imports do not work with pygments.lexers.load_lexer_from_file()
 # in all of our supported Python releases.
 #
 from pygments_lexer_pseudocode2.bases import LexBase
 from pygments_lexer_pseudocode2.utils import REVERSED_STANDARD_TYPES
+from pygments_lexer_pseudocode2 import uniprops
 #
 # As in the local imports: use an explicit name because __name__ is
 # __builtins__
 #
 "TEXTSTATE": SYMBOL_TEXTSTATEMENT,
 "TSTATEMENT": SYMBOL_TEXTSTATEMENT,
 "TSTATE": SYMBOL_TEXTSTATEMENT,
 "TEXTBLOCK": SYMBOL_TEXTSTATEMENT,
 "TBLOCK": SYMBOL_TEXTSTATEMENT,
+"<-": "←",
+"->": "→",
+"=>": "⇒",
+"<=": "≤",
+">=": "≥",
+"<>": "≠",
+"!=": "≠",
+":=": "∶=",  # "≔"   not recognizable
+"=:": "=∶",  # "≕",  not recognizable
+"<=>": "⇔",
+"<->": "↔",
+"?=": "≟",
 }
 def op_translate(toktype):
 def _op_translate(lexer, match, ctx=None):
 r"(?:is)"
 r"|(?:with)"
 r")\b",
 bygroups(op_translate(Keyword))),
 include("expr"),
+include("unicode-separators"),
+include("unicode-other"),
 (r"[^\S\n]+", Text),
 (r".", Generic.Error),     # tolerance for errors
 ],
 "remark": [
 (r"(?i)\\(remark|rem)\b(.*)$",
 (r"\\\}", op_opt_ignore_or_fixed(Name.Entity, "}")),
 (r"\\\\", op_opt_ignore_or_fixed(Name.Entity, "\\")),
 (r"\\", op_opt_ignore_or_fixed(Name.Entity, "\\")),
 ],
 "expr": [
-include("punctuation"),
+include("math-symbols"),          # must be before punctuation
+include("ascii-punctuation"),
+include("unicode-punctuation"),
+include("escaped-string-start"),
 include("py-strings"),
 include("py-numbers"),
 (r"(?i)\\text[ \t]*\{", LexBase.op_ignore, "text-in-expr"),
 include("explicit-tokentype"),
 include("remark"),
 include("word-operators"),
 include("math-builtins"),
 include("py-name"),
 ],
 "expr-in-braces": [
-include("punctuation-in-braces"),
+include("math-symbols"),          # must be before punctuation
+include("ascii-punctuation-in-braces"),
+include("unicode-punctuation"),
+include("escaped-string-start"),
 include("py-strings"),
 include("py-numbers"),
 (r"(?i)\\text[ \t]*\{", LexBase.op_ignore, "text-in-expr"),
 include("explicit-tokentype"),
 include("remark"),
 (r"\}", LexBase.op_ignore, "#pop"),
 (r"\n", Whitespace),
 include("expr-in-braces"),
 (r"\\\\", LexBase.op_fixed(Text, "\\")),
 (r"\\", LexBase.op_fixed(Text, "\\")),
+include("unicode-separators"),
+include("unicode-other"),
 (r"[^\S\n]+", Text),
 (r".", Generic.Error),     # tolerance for errors
 ],
 "text-statement": [  # like block but default to text-mode
 (r"[^\\}\n]+", Text),
 "min", "max"),
 prefix=r"(?<!\.)",
 suffix=r"\b"),
 Name.Builtin),
 ],
+"math-symbols": [
+(r"<=>|<->|<-|->|=>|<=|>=|<>|!=|:=|=:|\?=", op_symbol(Operator)),
+(r"[!&<>=+\-*/%|~]", Operator),         # ASCII
+(u"[%s]" % (uniprops.Sm,), Operator),   # other Unicode
+],
 "word-operators": [
 (words(("IN", "In", "in",
 "IS", "Is", "is",
 "AND", "And", "and",
 "OR", "Or", "or",
 "Empty", "EMPTY", "empty"),
 prefix=r"(?<!\.)",
 suffix=r"\b"),
 Keyword.Constant),
 ],
-"punctuation": [
+"ascii-punctuation": [
-(r"[{}:(),;[\]]", Punctuation),
+(r"[{}:(),;[\]?@]", Punctuation),
 ],
-"punctuation-in-braces": [
+"ascii-punctuation-in-braces": [
-# like "punctuation" but needs an escaped curly brace for }
+#
+# Like "punctuation" but needs an escaped curly brace for } because
+# a single closing curly brace pops the current state here.
+#
 (r"\\\}", LexBase.op_fixed(Punctuation, "}")),
-(r"[{:(),;[\]]", Punctuation),
+(r"[{:(),;[\]?@]", Punctuation),
+],
+"unicode-separators": [
+(u"[%s]" % (uniprops.Zl,), Whitespace),
+(u"[%s]" % (uniprops.Zp,), Whitespace),
+(u"[%s]" % (uniprops.Zs,), Whitespace),
+],
+"unicode-punctuation": [
+(u"[%s]" % (uniprops.Pc,), Punctuation),
+(u"[%s]" % (uniprops.Pd,), Punctuation),
+(u"[%s]" % (uniprops.Ps,), Punctuation),
+(u"[%s]" % (uniprops.Pe,), Punctuation),
+(u"[%s]" % (uniprops.Pi,), Punctuation),
+(u"[%s]" % (uniprops.Pf,), Punctuation),
+(u"[%s]" % (uniprops.Po,), Punctuation),
+],
+"unicode-other": [
+(u"[%s]" % (uniprops.Sc,), Text),    # Currency
+(u"[%s]" % (uniprops.So,), Text),    # Other symbols
+],
+"escaped-string-start": [
+(r"""\\(['"])""", bygroups(Punctuation)),
 ],
 "explicit-tokentype": [
 # All these REs are CASE-SENSITIVE!
 # Multiple characters possible, but no escaping!

Mercurial > hgrepos > Python > libs > pygments-lexer-pseudocode2

comparison pygments_lexer_pseudocode2/algpseudocode.py @ 105:cec52d83869a