Mercurial > hgrepos > Python > libs > pygments-lexer-pseudocode2
view pygments_lexer_pseudocode2/algpseudocode.py @ 84:3ac1c4502ad0
Implement "\tt-XXX" and "ttx-XXX" for explicit token types.
Overwrite defaults or implement tokens that are not yet handled by default.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Thu, 30 Apr 2026 19:37:24 +0200 |
| parents | cd79d2c76347 |
| children | ae5e741d2a9b |
line wrap: on
line source
# -*- coding: utf-8 -*- # :- # SPDX-FileCopyrightText: © 2026 Franz Glasner # SPDX-License-Identifier: MIT # :- r"""A pseudocode lexer along the lines of CTAN's algpseudocode or algpseudocodex. """ __all__ = ["AlgPseudocodeLexer", "AlgPseudocodeLexer_DE", "AlgPseudocodeLexer_FR"] import re import pygments.util from pygments.lexer import bygroups, include, words from pygments.token import (Comment, Keyword, Name, Operator, Punctuation, Text, Whitespace, STANDARD_TYPES) # # Relative imports do not work with pygments.lexers.load_lexer_from_file() # in all of our supported Python releases. # from pygments_lexer_pseudocode2.bases import LexBase class AlgPseudocodeLexer(LexBase): """A pseudocode lexer along the lines of CTAN's algpseudocode or algpseudocodex. Some ideas (e.g. strings) are borrowed from Pygment's Python lexer. """ name = "AlgPseudocode" aliases = ["algpseudocode", "algpseudo"] filenames = ["*.algpseudo", "*.algpseudocode"] mimetypes = [] flags = re.MULTILINE LANG = "en" TRANSLATIONS = { "PROG": "PROGRAM", "PROGRAM": "PROGRAM", "ALGO": "ALGORITHM", "ALGORITHM": "ALGORITHM", "PROC": "PROCEDURE", "PROCEDURE": "PROCEDURE", "FUNC": "FUNCTION", "FUNCTION": "FUNCTION", "FN": "FUNCTION", "CLASS": "CLASS", "IS": "IS", "WITH": "WITH", } END_TRANSLATIONS = { "PROG": "END OF PROGRAM", "PROGRAM": "END OF PROGRAM", "ALGO": "END OF ALGORITHM", "ALGORITHM": "END OF ALGORITHM", "PROC": "END OF PROCEDURE", "PROCEDURE": "END OF PROCEDURE", "FUNC": "END OF FUNCTION", "FUNCTION": "END OF FUNCTION", "FN": "END OF FUNCTION", "CLASS": "END OF CLASS", } DEFAULT_END_PREFIX = "END OF " SYMBOL_REMARK = "▷" # U+25B7: Unicode 1.0 (Geometric Shapes) # SYMBOL_REMARK = "▻" # U+25BB: Unicode 1.0 (Geometric Shapes) SYMBOL_BLOCK = "◆" # U+25C6: Unicode 1.0 (Geometric Shapes) # SYMBOL_BLOCK = "┃" # U+2503: Unicode 1.0 (Bow Drawing) # SYMBOL_BLOCK = "●" # U+25CF: Unicode 1.0 (Geometric Shapes) SYMBOL_TEXTSTATEMENT = "▪" # U+25AA: Unicode 1.0 (Geometric Shapes) # SYMBOL_TEXTSTATEMENT = "■" # U+25A0: Unicode 1.0 (Geometric Shapes) SYMBOLS = { # Group REMARK "REMARK": SYMBOL_REMARK, "REM": SYMBOL_REMARK, # Group STATEMENT "STATEMENT": SYMBOL_BLOCK, "STATE": SYMBOL_BLOCK, "BLOCK": SYMBOL_BLOCK, # Group TEXTSTATEMENT "TEXTSTATEMENT": SYMBOL_TEXTSTATEMENT, "TEXTSTATE": SYMBOL_TEXTSTATEMENT, "TSTATEMENT": SYMBOL_TEXTSTATEMENT, "TSTATE": SYMBOL_TEXTSTATEMENT, "TEXTBLOCK": SYMBOL_TEXTSTATEMENT, "TBLOCK": SYMBOL_TEXTSTATEMENT, } def op_translate(toktype): def _op_translate(lexer, match, ctx=None): kw = match.group().upper() yield match.start(), toktype, lexer.TRANSLATIONS.get(kw, kw) if ctx: ctx.pos = match.end() return _op_translate def op_opt_end_translate(toktype): def _op_end_translate(lexer, match, ctx=None): if not lexer.no_end: kw = match.group().upper() yield (match.start(), toktype, lexer.END_TRANSLATIONS.get( kw, lexer.DEFAULT_END_PREFIX + kw)) if ctx: ctx.pos = match.end() return _op_end_translate def op_opt_ignore(toktype): def _op_opt_ignore(lexer, match, ctx=None): if not lexer.no_end: yield match.start(), toktype, match.group() if ctx: ctx.pos = match.end() return _op_opt_ignore def op_opt_ignore_or_fixed(toktype, value): """Yield a fixed given token type and value or -- if the lexer's `no_end` setting evals to ``True`` nothing. """ def _op_opt_ignore_or_fixed(lexer, match, ctx=None): if not lexer.no_end: yield match.start(), toktype, value if ctx: ctx.pos = match.end() return _op_opt_ignore_or_fixed def op_symbol(toktype): def _op_symbol(lexer, match, ctx=None): kw = match.group().upper() yield match.start(), toktype, lexer.SYMBOLS.get(kw, kw) if ctx: ctx.pos = match.end() return _op_symbol def op_explicit_tokentype(lexer, match, ctx=None): needed_css = match.group("type") for ttype, css in STANDARD_TYPES.items(): if css == needed_css: toktype = ttype break else: toktype = Text yield match.start(), toktype, match.group("character") if ctx: ctx.pos = match.end() tokens = { "root": [ (r"\n", Whitespace), (r"/\*", Comment.Multiline, "multiline-nested-comment"), (r"//.*$", Comment.Single), include("remark"), (r"(?i)\\(block|state(?:ment)?)\s*(\{)", bygroups(op_symbol(Text), LexBase.op_fixed(Whitespace, " ")), "block-expr"), (r"(?i)\\(" r"(?:textstate(?:ment)?)" r"|(?:tstate(?:ment)?)" r"|(?:textblock)" r"|(?:tblock)" r")\s*(\{)", bygroups(op_symbol(Text), LexBase.op_fixed(Whitespace, " ")), "text-statement"), (r"\\\n", Text), (r"(?i)\\(" r"(?:prog(?:ram)?)" r"|(?:algo(?:rithm)?)" r"|(?:proc(?:edure)?)" r"|(?:func(?:tion)?|(?:fn))" r"|(?:class)" r")\s*(\{)", bygroups(op_translate(Keyword), LexBase.op_fixed(Whitespace, " ")), "entity-name"), # ENDxxx keywords with optional entity name # with name (r"(?i)\\end(?:_|-)?(" r"(?:prog(?:ram)?)" r"|(?:algo(?:rithm)?)" r"|(?:proc(?:edure)?)" r"|(?:func(?:tion)?)" r"|(?:class)" r")\s*(\{)", bygroups(op_opt_end_translate(Keyword), op_opt_ignore_or_fixed(Whitespace, " ")), "entity-name-end"), # without name (r"(?i)\\end(?:_|-)?(" r"(?:prog(?:ram)?)" r"|(?:algo(?:rithm)?)" r"|(?:proc(?:edure)?)" r"|(?:func(?:tion)?)" r"|(?:class)" r")\b", bygroups(op_opt_end_translate(Keyword))), # Keywords (r"(?i)\\(" r"(?:is)" r"|(?:with)" r")\b", bygroups(op_translate(Keyword))), include("expr"), (r"\s+", Text), ], "remark": [ (r"(?i)\\(remark|rem)\b(.*)$", bygroups(op_symbol(Comment.Single), Comment.Single)), ], "entity-name": [ # may be multiline (r"[^\\}]+", Name.Entity), (r"\}", LexBase.op_ignore, "#pop"), (r"\\\}", LexBase.op_fixed(Name.Entity, "}")), (r"\\\\", LexBase.op_fixed(Name.Entity, "\\")), (r"\\", LexBase.op_fixed(Name.Entity, "\\")), ], "entity-name-end": [ # may be multiline -- suppressed if no_end (r"[^\\}]+", op_opt_ignore(Name.Entity)), (r"\}", LexBase.op_ignore, "#pop"), (r"\\\}", op_opt_ignore_or_fixed(Name.Entity, "}")), (r"\\\\", op_opt_ignore_or_fixed(Name.Entity, "\\")), (r"\\", op_opt_ignore_or_fixed(Name.Entity, "\\")), ], "expr": [ include("punctuation"), include("py-strings"), include("py-numbers"), (r"(?i)\\text\s*\{", LexBase.op_ignore, "text-in-expr"), include("explicit-tokentype"), include("remark"), include("keyword-constants"), include("text-operators"), include("math-builtins"), include("py-name"), ], "expr-in-braces": [ include("punctuation-in-braces"), include("py-strings"), include("py-numbers"), (r"(?i)\\text\s*\{", LexBase.op_ignore, "text-in-expr"), include("explicit-tokentype"), include("remark"), include("keyword-constants"), include("text-operators"), include("math-builtins"), include("py-name"), ], "block-expr": [ # somewhat similar to "root" (r"\}", LexBase.op_ignore, "#pop"), (r"\n", Whitespace), include("expr-in-braces"), (r"\\\\", LexBase.op_fixed(Text, "\\")), (r"\\", LexBase.op_fixed(Text, "\\")), (r"\s+", Text), ], "text-statement": [ # like block but default to text-mode (r"[^\\}\n]+", Text), (r"\}", LexBase.op_ignore, "#pop"), (r"\n", Whitespace), (r"\\\}", LexBase.op_fixed(Text, "}")), (r"(?i)\\expr(?:ession)?\s*\{", LexBase.op_ignore, "block-expr"), include("explicit-tokentype"), include("remark"), (r"\\\\", LexBase.op_fixed(Text, "\\")), (r"\\", LexBase.op_fixed(Text, "\\")), ], "text-in-expr": [ (r"[^\\}\n]+", Text), (r"\}", LexBase.op_ignore, "#pop"), (r"\n", Whitespace), (r"\\\}", LexBase.op_fixed(Text, "}")), (r"(?:i)\\(expr(?:ession)?)\s*(\{)", bygroups(LexBase.op_ignore, LexBase.op_ignore), "expr-in-braces"), include("explicit-tokentype"), (r"\\\\", LexBase.op_fixed(Text, "\\")), (r"\\", LexBase.op_fixed(Text, "\\")), ], "math-builtins": [ (words(("sqrt", "pow", "cos", "sin", "tan", "arcos", "arcsin", "arctan", "arctan2", "mod", "exp", "ln", "log"), prefix=r"(?<!\.)", suffix=r"\b"), Name.Builtin), ], "text-operators": [ (words(("in", "is", "and", "or", "xor", "not"), prefix=r"(?<!\.)", suffix=r"\b"), Operator.Word), ], "keyword-constants": [ (words(("True", "TRUE", "true", "False", "FALSE", "false", "None", "NONE", "non", "Nil", "NIL", "nil", "Null", "NULL", "null", "Empty", "EMPTY", "empty"), prefix=r"(?<!\.)", suffix=r"\b"), Keyword.Constant), ], "punctuation": [ (r"[{}:(),;[\]]", Punctuation), ], "punctuation-in-braces": [ # like "punctuation" but needs an escaped curly brace for } (r"\\\}", LexBase.op_fixed(Punctuation, "}")), (r"[{:(),;[\]]", Punctuation), ], "explicit-tokentype": [ # All these REs are CASE-SENSITIVE! # Multiple characters possible, but no escaping! (r"\\ttx\-(?P<type>[^/:|=*+!\$~]+)(?P<sep>[/:|=*+!\$~])" r"(?P<character>(.|\n)+?)(?P=sep)", op_explicit_tokentype), (r"\\ttx\-(?P<type>[^{]+)\{" r"(?P<character>[^}]+?)\}", op_explicit_tokentype), (r"\\ttx\-(?P<type>[^\(]+)\(" r"(?P<character>[^\)]+?)\)", op_explicit_tokentype), (r"\\ttx\-(?P<type>[^<]+)<" r"(?P<character>[^>]+?)>", op_explicit_tokentype), # Every character is possible: no escaping needed! (r"\\tt-(?P<type>[^/]+)/(?P<character>(?:.|\n))", op_explicit_tokentype), ], } def __init__(self, **options): val = pygments.util.get_bool_opt(options, "no_end", default=False) self.no_end = val LexBase.__init__(self, **options) class AlgPseudocodeLexer_DE(AlgPseudocodeLexer): name = "AlgPseudocodeDE" aliases = ["algpseudocode-de", "algpseudo-de"] filenames = ["*.algpseudo-de", "*.algpseudocode-de"] LANG = "de" TRANSLATIONS = AlgPseudocodeLexer.TRANSLATIONS.copy() TRANSLATIONS.update({ "PROG": "PROGRAMM", "PROGRAM": "PROGRAMM", "ALGO": "ALGORITHMUS", "ALGORITHM": "ALGORITHMUS", "PROC": "PROZEDUR", "PROCEDURE": "PROZEDUR", "FUNC": "FUNKTION", "FUNCTION": "FUNKTION", "FN": "FUNKTION", "CLASS": "KLASSE", "IS": "IST", "WITH": "MIT", }) END_TRANSLATIONS = AlgPseudocodeLexer.END_TRANSLATIONS.copy() END_TRANSLATIONS.update({ "PROG": "ENDE DES PROGRAMMS", "PROGRAM": "ENDE VON PROGRAMMS", "ALGO": "ENDE DES ALGORITHMUS", "ALGORITHM": "ENDE DES ALGORITHMUS", "PROC": "ENDE DER PROZEDUR", "PROCEDURE": "ENDE DER PROZEDUR", "FUNC": "ENDE DER FUNKTION", "FUNCTION": "ENDE DER FUNKTION", "FN": "ENDE DER FUNKTION", "CLASS": "ENDE DER KLASSE", }) DEFAULT_END_PREFIX = "ENDE VON " class AlgPseudocodeLexer_FR(AlgPseudocodeLexer): name = "AlgPseudocodeFR" aliases = ["algpseudocode-fr", "algpseudo-fr"] filenames = ["*.algpseudo-fr", "*.algpseudocode-fr"] LANG = "de" TRANSLATIONS = AlgPseudocodeLexer.TRANSLATIONS.copy() TRANSLATIONS.update({ "PROG": "PROGRAMME", "PROGRAM": "PROGRAMME", "ALGO": "ALGORITHME", "ALGORITHM": "ALGORITHME", "PROC": "PROCÉDURE", "PROCEDURE": "PROCÉDURE", "FUNC": "FONCTION", "FUNCTION": "FOUNCTION", "FN": "FONCTION", "CLASS": "CLASSE", "IS": "EST", "WITH": "AVEC", }) END_TRANSLATIONS = AlgPseudocodeLexer.END_TRANSLATIONS.copy() END_TRANSLATIONS.update({ "PROG": "FIN DE PROGRAMME", "PROGRAM": "FIN DE PROGRAMME", "ALGO": "FIN D'ALGORITHME", "ALGORITHM": "FIN D'ALGORITHME", "PROC": "FIN DE PROCÉDURE", "PROCEDURE": "FIN DE PROCÉDURE", "FUNC": "FIN DE FONCTION", "FUNCTION": "FIN DE FOUNCTION", "FN": "FIN DE FONCTION", "CLASS": "FIN DE CLASSE", }) DEFAULT_END_PREFIX = "FIN DE "
