Mercurial > hgrepos > Python > libs > pygments-lexer-pseudocode2
view pygments_lexer_pseudocode2/algpseudocode.py @ 62:7153e945a3d6
Implement ignoring of \ENDxxx including its optional name parts
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Tue, 28 Apr 2026 10:09:17 +0200 |
| parents | 392745b66969 |
| children | 05c53e431c88 |
line wrap: on
line source
# -*- coding: utf-8 -*- # :- # SPDX-FileCopyrightText: © 2026 Franz Glasner # SPDX-License-Identifier: MIT # :- r"""A pseudocode lexer along the lines of CTAN's algpseudocode or algpseudocodex. """ __all__ = ["AlgPseudocodeLexer", "AlgPseudocodeLexer_DE", "AlgPseudocodeLexer_FR"] import re import pygments.util from pygments.lexer import include, bygroups from pygments.token import (Comment, Keyword, Name, Text, Whitespace) # # Relative imports do not work with pygments.lexers.load_lexer_from_file() # in all of our supported Python releases. # from pygments_lexer_pseudocode2.bases import LexBase class AlgPseudocodeLexer(LexBase): """A pseudocode lexer along the lines of CTAN's algpseudocode or algpseudocodex. Some ideas (e.g. strings) are borrowed from Pygment's Python lexer. """ name = "AlgPseudocode" aliases = ["algpseudocode", "algpseudo"] filenames = ["*.algpseudo", "*.algpseudocode"] mimetypes = [] flags = re.MULTILINE LANG = "en" TRANSLATIONS = { "PROG": "PROGRAM", "PROGRAM": "PROGRAM", "ALGO": "ALGORITHM", "ALGORITHM": "ALGORITHM", "PROC": "PROCEDURE", "PROCEDURE": "PROCEDURE", "FUNC": "FUNCTION", "FUNCTION": "FUNCTION", "FN": "FUNCTION", "CLASS": "CLASS", "IS": "IS", } END_TRANSLATIONS = { "PROG": "END OF PROGRAM", "PROGRAM": "END OF PROGRAM", "ALGO": "END OF ALGORITHM", "ALGORITHM": "END OF ALGORITHM", "PROC": "END OF PROCEDURE", "PROCEDURE": "END OF PROCEDURE", "FUNC": "END OF FUNCTION", "FUNCTION": "END OF FUNCTION", "FN": "END OF FUNCTION", "CLASS": "END OF CLASS", } DEFAULT_END_PREFIX = "END OF " SYMBOL_REMARK = "▷" # U+25B7: Unicode 1.0 (Geometric Shapes) # SYMBOL_REMARK = "▻" # U+25BB: Unicode 1.0 (Geometric Shapes) SYMBOL_BLOCK = "◆" # U+25C6: Unicode 1.0 (Geometric Shapes) # SYMBOL_BLOCK = "┃" # U+2503: Unicode 1.0 (Bow Drawing) # SYMBOL_BLOCK = "●" # U+25CF: Unicode 1.0 (Geometric Shapes) SYMBOL_TEXTSTATEMENT = "■" # U+25A0: Unicode 1.0 (Geometric Shapes) SYMBOLS = { "REMARK": SYMBOL_REMARK, "Remark": SYMBOL_REMARK, "remark": SYMBOL_REMARK, "REM": SYMBOL_REMARK, "Rem": SYMBOL_REMARK, "rem": SYMBOL_REMARK, "R": SYMBOL_REMARK, "r": SYMBOL_REMARK, "BLOCK": SYMBOL_BLOCK, "Block": SYMBOL_BLOCK, "block": SYMBOL_BLOCK, "TEXTSTATEMENT": SYMBOL_TEXTSTATEMENT, "TextStatement": SYMBOL_TEXTSTATEMENT, "textstatement": SYMBOL_TEXTSTATEMENT, "TSTATEMENT": SYMBOL_TEXTSTATEMENT, "TStatement": SYMBOL_TEXTSTATEMENT, "tstatement": SYMBOL_TEXTSTATEMENT, "TSTATE": SYMBOL_TEXTSTATEMENT, "TState": SYMBOL_TEXTSTATEMENT, "tstate": SYMBOL_TEXTSTATEMENT, "TEXT": SYMBOL_TEXTSTATEMENT, "Text": SYMBOL_TEXTSTATEMENT, "text": SYMBOL_TEXTSTATEMENT, } def op_translate(toktype): def _op_translate(lexer, match, ctx=None): kw = match.group().upper() yield match.start(), toktype, lexer.TRANSLATIONS.get(kw, kw) return _op_translate def op_opt_end_translate(toktype): def _op_end_translate(lexer, match, ctx=None): if not lexer.no_end: kw = match.group().upper() yield (match.start(), toktype, lexer.END_TRANSLATIONS.get( kw, lexer.DEFAULT_END_PREFIX + kw)) return _op_end_translate def op_opt_ignore(toktype): def _op_opt_ignore(lexer, match, ctx=None): if not lexer.no_end: yield match.start(), toktype, match.group() return _op_opt_ignore def op_symbol(toktype): def _op_symbol(lexer, match, ctx=None): kw = match.group() yield match.start(), toktype, lexer.SYMBOLS.get(kw, kw) return _op_symbol tokens = { "root": [ (r"\n", Whitespace), (r"/\*", Comment.Multiline, "multiline-nested-comment"), (r"//.*$", Comment.Single), (r"(?:\\)(REMARK|Remark|remark|REM|Rem|rem|R|r)\b(.*)$", bygroups(op_symbol(Comment.Single), Comment.Single)), (r"(?:\\)(BLOCK|Block|block)\b(.*)$", bygroups(op_symbol(Text), Text)), (r"\\\n", Text), (r"(?i)\\(" r"(?:prog(?:ram)?)" r"|(?:algo(?:rithm)?)" r"|(?:proc(?:edure)?)" r"|(?:func(?:tion)?|(?:fn))" r"|(?:class)" r")(\s*)(\{)", bygroups(op_translate(Keyword), Whitespace, LexBase.op_ignore), "entity-name"), # ENDxxx keywords with optional entity name # with name (r"(?i)\\end(?:_|-)?(" r"(?:prog(?:ram)?)" r"|(?:algo(?:rithm)?)" r"|(?:proc(?:edure)?)" r"|(?:func(?:tion)?)" r"|(?:class)" r")(\s*)(\{)", bygroups(op_opt_end_translate(Keyword), op_opt_ignore(Whitespace), LexBase.op_ignore), "entity-name-end"), # without name (r"(?i)\\end(?:_|-)?(" r"(?:prog(?:ram)?)" r"|(?:algo(?:rithm)?)" r"|(?:proc(?:edure)?)" r"|(?:func(?:tion)?)" r"|(?:class)" r")\b", bygroups(op_opt_end_translate(Keyword))), # Keywords (r"(?i)\\(" r"(?:is)" r")\b", bygroups(op_translate(Keyword))), include("expr"), (r"\s+", Text), ], "entity-name": [ # may be multiline (r"[^\\}]+", Name.Entity), (r"\\\}", Name.Entity), (r"\\", Name.Entity), (r"\}", LexBase.op_ignore, "#pop"), ], "entity-name-end": [ # may be multiline -- suppressed if no_end (r"[^\\}]+", op_opt_ignore(Name.Entity)), (r"\\\}", op_opt_ignore(Name.Entity)), (r"\\", op_opt_ignore(Name.Entity)), (r"\}", LexBase.op_ignore, "#pop"), ], "expr": [ include("py-strings"), include("py-numbers"), include("py-name"), ] } def __init__(self, **options): val = pygments.util.get_bool_opt(options, "no_end", default=False) self.no_end = val LexBase.__init__(self, **options) class AlgPseudocodeLexer_DE(AlgPseudocodeLexer): name = "AlgPseudocodeDE" aliases = ["algpseudocode-de", "algpseudo-de"] filenames = ["*.algpseudo-de", "*.algpseudocode-de"] LANG = "de" TRANSLATIONS = AlgPseudocodeLexer.TRANSLATIONS.copy() TRANSLATIONS.update({ "PROG": "PROGRAMM", "PROGRAM": "PROGRAMM", "ALGO": "ALGORITHMUS", "ALGORITHM": "ALGORITHMUS", "PROC": "PROZEDUR", "PROCEDURE": "PROZEDUR", "FUNC": "FUNKTION", "FUNCTION": "FUNKTION", "FN": "FUNKTION", "CLASS": "KLASSE", "IS": "IST", }) END_TRANSLATIONS = AlgPseudocodeLexer.END_TRANSLATIONS.copy() END_TRANSLATIONS.update({ "PROG": "ENDE DES PROGRAMMS", "PROGRAM": "ENDE VON PROGRAMMS", "ALGO": "ENDE DES ALGORITHMUS", "ALGORITHM": "ENDE DES ALGORITHMUS", "PROC": "ENDE DER PROZEDUR", "PROCEDURE": "ENDE DER PROZEDUR", "FUNC": "ENDE DER FUNKTION", "FUNCTION": "ENDE DER FUNKTION", "FN": "ENDE DER FUNKTION", "CLASS": "ENDE DER KLASSE", }) DEFAULT_END_PREFIX = "ENDE VON " class AlgPseudocodeLexer_FR(AlgPseudocodeLexer): name = "AlgPseudocodeFR" aliases = ["algpseudocode-fr", "algpseudo-fr"] filenames = ["*.algpseudo-fr", "*.algpseudocode-fr"] LANG = "de" TRANSLATIONS = AlgPseudocodeLexer.TRANSLATIONS.copy() TRANSLATIONS.update({ "PROG": "PROGRAMME", "PROGRAM": "PROGRAMME", "ALGO": "ALGORITHME", "ALGORITHM": "ALGORITHME", "PROC": "PROCÉDURE", "PROCEDURE": "PROCÉDURE", "FUNC": "FONCTION", "FUNCTION": "FOUNCTION", "FN": "FONCTION", "CLASS": "CLASSE", "IS": "EST", }) END_TRANSLATIONS = AlgPseudocodeLexer.END_TRANSLATIONS.copy() END_TRANSLATIONS.update({ "PROG": "FIN DE PROGRAMME", "PROGRAM": "FIN DE PROGRAMME", "ALGO": "FIN D'ALGORITHME", "ALGORITHM": "FIN D'ALGORITHME", "PROC": "FIN DE PROCÉDURE", "PROCEDURE": "FIN DE PROCÉDURE", "FUNC": "FIN DE FONCTION", "FUNCTION": "FIN DE FOUNCTION", "FN": "FIN DE FONCTION", "CLASS": "FIN DE CLASSE", }) DEFAULT_END_PREFIX = "FIN DE "
