comparison pygments_lexer_pseudocode2/algpseudocode.py @ 105:cec52d83869a

Handle much more characters from the Unicode codeset in expressions. While there: FIX: Add forgotten Punctuation characters `?' and `@'. While there: Allow the escaping of single and double quotes that normally start a string (e.g. for expressions like f' is the first derivation of f).
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 04 May 2026 16:30:36 +0200
parents ffe6ea2cf69b
children f6b46a379aba
comparison
equal deleted inserted replaced
104:ffe6ea2cf69b 105:cec52d83869a
24 # Relative imports do not work with pygments.lexers.load_lexer_from_file() 24 # Relative imports do not work with pygments.lexers.load_lexer_from_file()
25 # in all of our supported Python releases. 25 # in all of our supported Python releases.
26 # 26 #
27 from pygments_lexer_pseudocode2.bases import LexBase 27 from pygments_lexer_pseudocode2.bases import LexBase
28 from pygments_lexer_pseudocode2.utils import REVERSED_STANDARD_TYPES 28 from pygments_lexer_pseudocode2.utils import REVERSED_STANDARD_TYPES
29 29 from pygments_lexer_pseudocode2 import uniprops
30 30
31 # 31 #
32 # As in the local imports: use an explicit name because __name__ is 32 # As in the local imports: use an explicit name because __name__ is
33 # __builtins__ 33 # __builtins__
34 # 34 #
125 "TEXTSTATE": SYMBOL_TEXTSTATEMENT, 125 "TEXTSTATE": SYMBOL_TEXTSTATEMENT,
126 "TSTATEMENT": SYMBOL_TEXTSTATEMENT, 126 "TSTATEMENT": SYMBOL_TEXTSTATEMENT,
127 "TSTATE": SYMBOL_TEXTSTATEMENT, 127 "TSTATE": SYMBOL_TEXTSTATEMENT,
128 "TEXTBLOCK": SYMBOL_TEXTSTATEMENT, 128 "TEXTBLOCK": SYMBOL_TEXTSTATEMENT,
129 "TBLOCK": SYMBOL_TEXTSTATEMENT, 129 "TBLOCK": SYMBOL_TEXTSTATEMENT,
130 "<-": "←",
131 "->": "→",
132 "=>": "⇒",
133 "<=": "≤",
134 ">=": "≥",
135 "<>": "≠",
136 "!=": "≠",
137 ":=": "∶=", # "≔" not recognizable
138 "=:": "=∶", # "≕", not recognizable
139 "<=>": "⇔",
140 "<->": "↔",
141 "?=": "≟",
130 } 142 }
131 143
132 def op_translate(toktype): 144 def op_translate(toktype):
133 145
134 def _op_translate(lexer, match, ctx=None): 146 def _op_translate(lexer, match, ctx=None):
294 r"(?:is)" 306 r"(?:is)"
295 r"|(?:with)" 307 r"|(?:with)"
296 r")\b", 308 r")\b",
297 bygroups(op_translate(Keyword))), 309 bygroups(op_translate(Keyword))),
298 include("expr"), 310 include("expr"),
311 include("unicode-separators"),
312 include("unicode-other"),
299 (r"[^\S\n]+", Text), 313 (r"[^\S\n]+", Text),
300 (r".", Generic.Error), # tolerance for errors 314 (r".", Generic.Error), # tolerance for errors
301 ], 315 ],
302 "remark": [ 316 "remark": [
303 (r"(?i)\\(remark|rem)\b(.*)$", 317 (r"(?i)\\(remark|rem)\b(.*)$",
316 (r"\\\}", op_opt_ignore_or_fixed(Name.Entity, "}")), 330 (r"\\\}", op_opt_ignore_or_fixed(Name.Entity, "}")),
317 (r"\\\\", op_opt_ignore_or_fixed(Name.Entity, "\\")), 331 (r"\\\\", op_opt_ignore_or_fixed(Name.Entity, "\\")),
318 (r"\\", op_opt_ignore_or_fixed(Name.Entity, "\\")), 332 (r"\\", op_opt_ignore_or_fixed(Name.Entity, "\\")),
319 ], 333 ],
320 "expr": [ 334 "expr": [
321 include("punctuation"), 335 include("math-symbols"), # must be before punctuation
336 include("ascii-punctuation"),
337 include("unicode-punctuation"),
338 include("escaped-string-start"),
322 include("py-strings"), 339 include("py-strings"),
323 include("py-numbers"), 340 include("py-numbers"),
324 (r"(?i)\\text[ \t]*\{", LexBase.op_ignore, "text-in-expr"), 341 (r"(?i)\\text[ \t]*\{", LexBase.op_ignore, "text-in-expr"),
325 include("explicit-tokentype"), 342 include("explicit-tokentype"),
326 include("remark"), 343 include("remark"),
328 include("word-operators"), 345 include("word-operators"),
329 include("math-builtins"), 346 include("math-builtins"),
330 include("py-name"), 347 include("py-name"),
331 ], 348 ],
332 "expr-in-braces": [ 349 "expr-in-braces": [
333 include("punctuation-in-braces"), 350 include("math-symbols"), # must be before punctuation
351 include("ascii-punctuation-in-braces"),
352 include("unicode-punctuation"),
353 include("escaped-string-start"),
334 include("py-strings"), 354 include("py-strings"),
335 include("py-numbers"), 355 include("py-numbers"),
336 (r"(?i)\\text[ \t]*\{", LexBase.op_ignore, "text-in-expr"), 356 (r"(?i)\\text[ \t]*\{", LexBase.op_ignore, "text-in-expr"),
337 include("explicit-tokentype"), 357 include("explicit-tokentype"),
338 include("remark"), 358 include("remark"),
345 (r"\}", LexBase.op_ignore, "#pop"), 365 (r"\}", LexBase.op_ignore, "#pop"),
346 (r"\n", Whitespace), 366 (r"\n", Whitespace),
347 include("expr-in-braces"), 367 include("expr-in-braces"),
348 (r"\\\\", LexBase.op_fixed(Text, "\\")), 368 (r"\\\\", LexBase.op_fixed(Text, "\\")),
349 (r"\\", LexBase.op_fixed(Text, "\\")), 369 (r"\\", LexBase.op_fixed(Text, "\\")),
370 include("unicode-separators"),
371 include("unicode-other"),
350 (r"[^\S\n]+", Text), 372 (r"[^\S\n]+", Text),
351 (r".", Generic.Error), # tolerance for errors 373 (r".", Generic.Error), # tolerance for errors
352 ], 374 ],
353 "text-statement": [ # like block but default to text-mode 375 "text-statement": [ # like block but default to text-mode
354 (r"[^\\}\n]+", Text), 376 (r"[^\\}\n]+", Text),
383 "min", "max"), 405 "min", "max"),
384 prefix=r"(?<!\.)", 406 prefix=r"(?<!\.)",
385 suffix=r"\b"), 407 suffix=r"\b"),
386 Name.Builtin), 408 Name.Builtin),
387 ], 409 ],
410 "math-symbols": [
411 (r"<=>|<->|<-|->|=>|<=|>=|<>|!=|:=|=:|\?=", op_symbol(Operator)),
412 (r"[!&<>=+\-*/%|~]", Operator), # ASCII
413 (u"[%s]" % (uniprops.Sm,), Operator), # other Unicode
414 ],
388 "word-operators": [ 415 "word-operators": [
389 (words(("IN", "In", "in", 416 (words(("IN", "In", "in",
390 "IS", "Is", "is", 417 "IS", "Is", "is",
391 "AND", "And", "and", 418 "AND", "And", "and",
392 "OR", "Or", "or", 419 "OR", "Or", "or",
403 "Empty", "EMPTY", "empty"), 430 "Empty", "EMPTY", "empty"),
404 prefix=r"(?<!\.)", 431 prefix=r"(?<!\.)",
405 suffix=r"\b"), 432 suffix=r"\b"),
406 Keyword.Constant), 433 Keyword.Constant),
407 ], 434 ],
408 "punctuation": [ 435 "ascii-punctuation": [
409 (r"[{}:(),;[\]]", Punctuation), 436 (r"[{}:(),;[\]?@]", Punctuation),
410 ], 437 ],
411 "punctuation-in-braces": [ 438 "ascii-punctuation-in-braces": [
412 # like "punctuation" but needs an escaped curly brace for } 439 #
440 # Like "punctuation" but needs an escaped curly brace for } because
441 # a single closing curly brace pops the current state here.
442 #
413 (r"\\\}", LexBase.op_fixed(Punctuation, "}")), 443 (r"\\\}", LexBase.op_fixed(Punctuation, "}")),
414 (r"[{:(),;[\]]", Punctuation), 444 (r"[{:(),;[\]?@]", Punctuation),
445 ],
446 "unicode-separators": [
447 (u"[%s]" % (uniprops.Zl,), Whitespace),
448 (u"[%s]" % (uniprops.Zp,), Whitespace),
449 (u"[%s]" % (uniprops.Zs,), Whitespace),
450 ],
451 "unicode-punctuation": [
452 (u"[%s]" % (uniprops.Pc,), Punctuation),
453 (u"[%s]" % (uniprops.Pd,), Punctuation),
454 (u"[%s]" % (uniprops.Ps,), Punctuation),
455 (u"[%s]" % (uniprops.Pe,), Punctuation),
456 (u"[%s]" % (uniprops.Pi,), Punctuation),
457 (u"[%s]" % (uniprops.Pf,), Punctuation),
458 (u"[%s]" % (uniprops.Po,), Punctuation),
459 ],
460 "unicode-other": [
461 (u"[%s]" % (uniprops.Sc,), Text), # Currency
462 (u"[%s]" % (uniprops.So,), Text), # Other symbols
463 ],
464 "escaped-string-start": [
465 (r"""\\(['"])""", bygroups(Punctuation)),
415 ], 466 ],
416 "explicit-tokentype": [ 467 "explicit-tokentype": [
417 # All these REs are CASE-SENSITIVE! 468 # All these REs are CASE-SENSITIVE!
418 469
419 # Multiple characters possible, but no escaping! 470 # Multiple characters possible, but no escaping!