Mercurial > hgrepos > Python > libs > pygments-lexer-pseudocode2
comparison pygments_lexer_pseudocode2/algpseudocode.py @ 105:cec52d83869a
Handle much more characters from the Unicode codeset in expressions.
While there: FIX: Add forgotten Punctuation characters `?' and `@'.
While there: Allow the escaping of single and double quotes that normally
start a string (e.g. for expressions like f' is the first derivation of f).
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 04 May 2026 16:30:36 +0200 |
| parents | ffe6ea2cf69b |
| children | f6b46a379aba |
comparison
equal
deleted
inserted
replaced
| 104:ffe6ea2cf69b | 105:cec52d83869a |
|---|---|
| 24 # Relative imports do not work with pygments.lexers.load_lexer_from_file() | 24 # Relative imports do not work with pygments.lexers.load_lexer_from_file() |
| 25 # in all of our supported Python releases. | 25 # in all of our supported Python releases. |
| 26 # | 26 # |
| 27 from pygments_lexer_pseudocode2.bases import LexBase | 27 from pygments_lexer_pseudocode2.bases import LexBase |
| 28 from pygments_lexer_pseudocode2.utils import REVERSED_STANDARD_TYPES | 28 from pygments_lexer_pseudocode2.utils import REVERSED_STANDARD_TYPES |
| 29 | 29 from pygments_lexer_pseudocode2 import uniprops |
| 30 | 30 |
| 31 # | 31 # |
| 32 # As in the local imports: use an explicit name because __name__ is | 32 # As in the local imports: use an explicit name because __name__ is |
| 33 # __builtins__ | 33 # __builtins__ |
| 34 # | 34 # |
| 125 "TEXTSTATE": SYMBOL_TEXTSTATEMENT, | 125 "TEXTSTATE": SYMBOL_TEXTSTATEMENT, |
| 126 "TSTATEMENT": SYMBOL_TEXTSTATEMENT, | 126 "TSTATEMENT": SYMBOL_TEXTSTATEMENT, |
| 127 "TSTATE": SYMBOL_TEXTSTATEMENT, | 127 "TSTATE": SYMBOL_TEXTSTATEMENT, |
| 128 "TEXTBLOCK": SYMBOL_TEXTSTATEMENT, | 128 "TEXTBLOCK": SYMBOL_TEXTSTATEMENT, |
| 129 "TBLOCK": SYMBOL_TEXTSTATEMENT, | 129 "TBLOCK": SYMBOL_TEXTSTATEMENT, |
| 130 "<-": "←", | |
| 131 "->": "→", | |
| 132 "=>": "⇒", | |
| 133 "<=": "≤", | |
| 134 ">=": "≥", | |
| 135 "<>": "≠", | |
| 136 "!=": "≠", | |
| 137 ":=": "∶=", # "≔" not recognizable | |
| 138 "=:": "=∶", # "≕", not recognizable | |
| 139 "<=>": "⇔", | |
| 140 "<->": "↔", | |
| 141 "?=": "≟", | |
| 130 } | 142 } |
| 131 | 143 |
| 132 def op_translate(toktype): | 144 def op_translate(toktype): |
| 133 | 145 |
| 134 def _op_translate(lexer, match, ctx=None): | 146 def _op_translate(lexer, match, ctx=None): |
| 294 r"(?:is)" | 306 r"(?:is)" |
| 295 r"|(?:with)" | 307 r"|(?:with)" |
| 296 r")\b", | 308 r")\b", |
| 297 bygroups(op_translate(Keyword))), | 309 bygroups(op_translate(Keyword))), |
| 298 include("expr"), | 310 include("expr"), |
| 311 include("unicode-separators"), | |
| 312 include("unicode-other"), | |
| 299 (r"[^\S\n]+", Text), | 313 (r"[^\S\n]+", Text), |
| 300 (r".", Generic.Error), # tolerance for errors | 314 (r".", Generic.Error), # tolerance for errors |
| 301 ], | 315 ], |
| 302 "remark": [ | 316 "remark": [ |
| 303 (r"(?i)\\(remark|rem)\b(.*)$", | 317 (r"(?i)\\(remark|rem)\b(.*)$", |
| 316 (r"\\\}", op_opt_ignore_or_fixed(Name.Entity, "}")), | 330 (r"\\\}", op_opt_ignore_or_fixed(Name.Entity, "}")), |
| 317 (r"\\\\", op_opt_ignore_or_fixed(Name.Entity, "\\")), | 331 (r"\\\\", op_opt_ignore_or_fixed(Name.Entity, "\\")), |
| 318 (r"\\", op_opt_ignore_or_fixed(Name.Entity, "\\")), | 332 (r"\\", op_opt_ignore_or_fixed(Name.Entity, "\\")), |
| 319 ], | 333 ], |
| 320 "expr": [ | 334 "expr": [ |
| 321 include("punctuation"), | 335 include("math-symbols"), # must be before punctuation |
| 336 include("ascii-punctuation"), | |
| 337 include("unicode-punctuation"), | |
| 338 include("escaped-string-start"), | |
| 322 include("py-strings"), | 339 include("py-strings"), |
| 323 include("py-numbers"), | 340 include("py-numbers"), |
| 324 (r"(?i)\\text[ \t]*\{", LexBase.op_ignore, "text-in-expr"), | 341 (r"(?i)\\text[ \t]*\{", LexBase.op_ignore, "text-in-expr"), |
| 325 include("explicit-tokentype"), | 342 include("explicit-tokentype"), |
| 326 include("remark"), | 343 include("remark"), |
| 328 include("word-operators"), | 345 include("word-operators"), |
| 329 include("math-builtins"), | 346 include("math-builtins"), |
| 330 include("py-name"), | 347 include("py-name"), |
| 331 ], | 348 ], |
| 332 "expr-in-braces": [ | 349 "expr-in-braces": [ |
| 333 include("punctuation-in-braces"), | 350 include("math-symbols"), # must be before punctuation |
| 351 include("ascii-punctuation-in-braces"), | |
| 352 include("unicode-punctuation"), | |
| 353 include("escaped-string-start"), | |
| 334 include("py-strings"), | 354 include("py-strings"), |
| 335 include("py-numbers"), | 355 include("py-numbers"), |
| 336 (r"(?i)\\text[ \t]*\{", LexBase.op_ignore, "text-in-expr"), | 356 (r"(?i)\\text[ \t]*\{", LexBase.op_ignore, "text-in-expr"), |
| 337 include("explicit-tokentype"), | 357 include("explicit-tokentype"), |
| 338 include("remark"), | 358 include("remark"), |
| 345 (r"\}", LexBase.op_ignore, "#pop"), | 365 (r"\}", LexBase.op_ignore, "#pop"), |
| 346 (r"\n", Whitespace), | 366 (r"\n", Whitespace), |
| 347 include("expr-in-braces"), | 367 include("expr-in-braces"), |
| 348 (r"\\\\", LexBase.op_fixed(Text, "\\")), | 368 (r"\\\\", LexBase.op_fixed(Text, "\\")), |
| 349 (r"\\", LexBase.op_fixed(Text, "\\")), | 369 (r"\\", LexBase.op_fixed(Text, "\\")), |
| 370 include("unicode-separators"), | |
| 371 include("unicode-other"), | |
| 350 (r"[^\S\n]+", Text), | 372 (r"[^\S\n]+", Text), |
| 351 (r".", Generic.Error), # tolerance for errors | 373 (r".", Generic.Error), # tolerance for errors |
| 352 ], | 374 ], |
| 353 "text-statement": [ # like block but default to text-mode | 375 "text-statement": [ # like block but default to text-mode |
| 354 (r"[^\\}\n]+", Text), | 376 (r"[^\\}\n]+", Text), |
| 383 "min", "max"), | 405 "min", "max"), |
| 384 prefix=r"(?<!\.)", | 406 prefix=r"(?<!\.)", |
| 385 suffix=r"\b"), | 407 suffix=r"\b"), |
| 386 Name.Builtin), | 408 Name.Builtin), |
| 387 ], | 409 ], |
| 410 "math-symbols": [ | |
| 411 (r"<=>|<->|<-|->|=>|<=|>=|<>|!=|:=|=:|\?=", op_symbol(Operator)), | |
| 412 (r"[!&<>=+\-*/%|~]", Operator), # ASCII | |
| 413 (u"[%s]" % (uniprops.Sm,), Operator), # other Unicode | |
| 414 ], | |
| 388 "word-operators": [ | 415 "word-operators": [ |
| 389 (words(("IN", "In", "in", | 416 (words(("IN", "In", "in", |
| 390 "IS", "Is", "is", | 417 "IS", "Is", "is", |
| 391 "AND", "And", "and", | 418 "AND", "And", "and", |
| 392 "OR", "Or", "or", | 419 "OR", "Or", "or", |
| 403 "Empty", "EMPTY", "empty"), | 430 "Empty", "EMPTY", "empty"), |
| 404 prefix=r"(?<!\.)", | 431 prefix=r"(?<!\.)", |
| 405 suffix=r"\b"), | 432 suffix=r"\b"), |
| 406 Keyword.Constant), | 433 Keyword.Constant), |
| 407 ], | 434 ], |
| 408 "punctuation": [ | 435 "ascii-punctuation": [ |
| 409 (r"[{}:(),;[\]]", Punctuation), | 436 (r"[{}:(),;[\]?@]", Punctuation), |
| 410 ], | 437 ], |
| 411 "punctuation-in-braces": [ | 438 "ascii-punctuation-in-braces": [ |
| 412 # like "punctuation" but needs an escaped curly brace for } | 439 # |
| 440 # Like "punctuation" but needs an escaped curly brace for } because | |
| 441 # a single closing curly brace pops the current state here. | |
| 442 # | |
| 413 (r"\\\}", LexBase.op_fixed(Punctuation, "}")), | 443 (r"\\\}", LexBase.op_fixed(Punctuation, "}")), |
| 414 (r"[{:(),;[\]]", Punctuation), | 444 (r"[{:(),;[\]?@]", Punctuation), |
| 445 ], | |
| 446 "unicode-separators": [ | |
| 447 (u"[%s]" % (uniprops.Zl,), Whitespace), | |
| 448 (u"[%s]" % (uniprops.Zp,), Whitespace), | |
| 449 (u"[%s]" % (uniprops.Zs,), Whitespace), | |
| 450 ], | |
| 451 "unicode-punctuation": [ | |
| 452 (u"[%s]" % (uniprops.Pc,), Punctuation), | |
| 453 (u"[%s]" % (uniprops.Pd,), Punctuation), | |
| 454 (u"[%s]" % (uniprops.Ps,), Punctuation), | |
| 455 (u"[%s]" % (uniprops.Pe,), Punctuation), | |
| 456 (u"[%s]" % (uniprops.Pi,), Punctuation), | |
| 457 (u"[%s]" % (uniprops.Pf,), Punctuation), | |
| 458 (u"[%s]" % (uniprops.Po,), Punctuation), | |
| 459 ], | |
| 460 "unicode-other": [ | |
| 461 (u"[%s]" % (uniprops.Sc,), Text), # Currency | |
| 462 (u"[%s]" % (uniprops.So,), Text), # Other symbols | |
| 463 ], | |
| 464 "escaped-string-start": [ | |
| 465 (r"""\\(['"])""", bygroups(Punctuation)), | |
| 415 ], | 466 ], |
| 416 "explicit-tokentype": [ | 467 "explicit-tokentype": [ |
| 417 # All these REs are CASE-SENSITIVE! | 468 # All these REs are CASE-SENSITIVE! |
| 418 | 469 |
| 419 # Multiple characters possible, but no escaping! | 470 # Multiple characters possible, but no escaping! |
