diff pygments_lexer_pseudocode2/algpseudocode.py @ 105:cec52d83869a

Handle much more characters from the Unicode codeset in expressions. While there: FIX: Add forgotten Punctuation characters `?' and `@'. While there: Allow the escaping of single and double quotes that normally start a string (e.g. for expressions like f' is the first derivation of f).
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 04 May 2026 16:30:36 +0200
parents ffe6ea2cf69b
children f6b46a379aba
line wrap: on
line diff
--- a/pygments_lexer_pseudocode2/algpseudocode.py	Mon May 04 16:23:18 2026 +0200
+++ b/pygments_lexer_pseudocode2/algpseudocode.py	Mon May 04 16:30:36 2026 +0200
@@ -26,7 +26,7 @@
 #
 from pygments_lexer_pseudocode2.bases import LexBase
 from pygments_lexer_pseudocode2.utils import REVERSED_STANDARD_TYPES
-
+from pygments_lexer_pseudocode2 import uniprops
 
 #
 # As in the local imports: use an explicit name because __name__ is
@@ -127,6 +127,18 @@
         "TSTATE": SYMBOL_TEXTSTATEMENT,
         "TEXTBLOCK": SYMBOL_TEXTSTATEMENT,
         "TBLOCK": SYMBOL_TEXTSTATEMENT,
+        "<-": "←",
+        "->": "→",
+        "=>": "⇒",
+        "<=": "≤",
+        ">=": "≥",
+        "<>": "≠",
+        "!=": "≠",
+        ":=": "∶=",  # "≔"   not recognizable
+        "=:": "=∶",  # "≕",  not recognizable
+        "<=>": "⇔",
+        "<->": "↔",
+        "?=": "≟",
     }
 
     def op_translate(toktype):
@@ -296,6 +308,8 @@
              r")\b",
              bygroups(op_translate(Keyword))),
             include("expr"),
+            include("unicode-separators"),
+            include("unicode-other"),
             (r"[^\S\n]+", Text),
             (r".", Generic.Error),     # tolerance for errors
         ],
@@ -318,7 +332,10 @@
             (r"\\", op_opt_ignore_or_fixed(Name.Entity, "\\")),
         ],
         "expr": [
-            include("punctuation"),
+            include("math-symbols"),          # must be before punctuation
+            include("ascii-punctuation"),
+            include("unicode-punctuation"),
+            include("escaped-string-start"),
             include("py-strings"),
             include("py-numbers"),
             (r"(?i)\\text[ \t]*\{", LexBase.op_ignore, "text-in-expr"),
@@ -330,7 +347,10 @@
             include("py-name"),
         ],
         "expr-in-braces": [
-            include("punctuation-in-braces"),
+            include("math-symbols"),          # must be before punctuation
+            include("ascii-punctuation-in-braces"),
+            include("unicode-punctuation"),
+            include("escaped-string-start"),
             include("py-strings"),
             include("py-numbers"),
             (r"(?i)\\text[ \t]*\{", LexBase.op_ignore, "text-in-expr"),
@@ -347,6 +367,8 @@
             include("expr-in-braces"),
             (r"\\\\", LexBase.op_fixed(Text, "\\")),
             (r"\\", LexBase.op_fixed(Text, "\\")),
+            include("unicode-separators"),
+            include("unicode-other"),
             (r"[^\S\n]+", Text),
             (r".", Generic.Error),     # tolerance for errors
         ],
@@ -385,6 +407,11 @@
                    suffix=r"\b"),
              Name.Builtin),
         ],
+        "math-symbols": [
+            (r"<=>|<->|<-|->|=>|<=|>=|<>|!=|:=|=:|\?=", op_symbol(Operator)),
+            (r"[!&<>=+\-*/%|~]", Operator),         # ASCII
+            (u"[%s]" % (uniprops.Sm,), Operator),   # other Unicode
+        ],
         "word-operators": [
             (words(("IN", "In", "in",
                     "IS", "Is", "is",
@@ -405,13 +432,37 @@
                    suffix=r"\b"),
              Keyword.Constant),
         ],
-        "punctuation": [
-            (r"[{}:(),;[\]]", Punctuation),
+        "ascii-punctuation": [
+            (r"[{}:(),;[\]?@]", Punctuation),
+        ],
+        "ascii-punctuation-in-braces": [
+            #
+            # Like "punctuation" but needs an escaped curly brace for } because
+            # a single closing curly brace pops the current state here.
+            #
+            (r"\\\}", LexBase.op_fixed(Punctuation, "}")),
+            (r"[{:(),;[\]?@]", Punctuation),
+        ],
+        "unicode-separators": [
+            (u"[%s]" % (uniprops.Zl,), Whitespace),
+            (u"[%s]" % (uniprops.Zp,), Whitespace),
+            (u"[%s]" % (uniprops.Zs,), Whitespace),
         ],
-        "punctuation-in-braces": [
-            # like "punctuation" but needs an escaped curly brace for }
-            (r"\\\}", LexBase.op_fixed(Punctuation, "}")),
-            (r"[{:(),;[\]]", Punctuation),
+        "unicode-punctuation": [
+            (u"[%s]" % (uniprops.Pc,), Punctuation),
+            (u"[%s]" % (uniprops.Pd,), Punctuation),
+            (u"[%s]" % (uniprops.Ps,), Punctuation),
+            (u"[%s]" % (uniprops.Pe,), Punctuation),
+            (u"[%s]" % (uniprops.Pi,), Punctuation),
+            (u"[%s]" % (uniprops.Pf,), Punctuation),
+            (u"[%s]" % (uniprops.Po,), Punctuation),
+        ],
+        "unicode-other": [
+            (u"[%s]" % (uniprops.Sc,), Text),    # Currency
+            (u"[%s]" % (uniprops.So,), Text),    # Other symbols
+        ],
+        "escaped-string-start": [
+            (r"""\\(['"])""", bygroups(Punctuation)),
         ],
         "explicit-tokentype": [
             # All these REs are CASE-SENSITIVE!