Mercurial > hgrepos > Python > libs > pygments-lexer-pseudocode2
view pygments_lexer_pseudocode2/algpseudocode.py @ 101:aae16e3624e2
Tests for single-line comments (// and #)
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 04 May 2026 16:12:44 +0200 |
| parents | 7cfad325d3bb |
| children | d8368294413a |
line wrap: on
line source
# -*- coding: utf-8 -*- # :- # SPDX-FileCopyrightText: © 2026 Franz Glasner # SPDX-License-Identifier: MIT # :- r"""A pseudocode lexer along the lines of CTAN's algpseudocode or algpseudocodex. """ __all__ = ["AlgPseudocodeLexer", "AlgPseudocodeLexer_DE", "AlgPseudocodeLexer_FR"] import logging import re import pygments.util from pygments.lexer import bygroups, include, words from pygments.token import (Comment, Generic, Keyword, Name, Operator, Punctuation, Text, Whitespace) # # Relative imports do not work with pygments.lexers.load_lexer_from_file() # in all of our supported Python releases. # from pygments_lexer_pseudocode2.bases import LexBase from pygments_lexer_pseudocode2.utils import REVERSED_STANDARD_TYPES # # As in the local imports: use an explicit name because __name__ is # __builtins__ # _logger = logging.getLogger("pygments_lexer_pseudocode2.algpseudocode") class AlgPseudocodeLexer(LexBase): """A pseudocode lexer along the lines of CTAN's algpseudocode or algpseudocodex. Some ideas (e.g. strings) are borrowed from Pygment's Python lexer. """ name = "AlgPseudocode" aliases = ["algpseudocode", "algpseudo"] filenames = ["*.algpseudo", "*.algpseudocode"] mimetypes = [] flags = re.MULTILINE LANG = "en" TRANSLATIONS = { "PROG": "PROGRAM", "PROGRAM": "PROGRAM", "ALGO": "ALGORITHM", "ALGORITHM": "ALGORITHM", "PROC": "PROCEDURE", "PROCEDURE": "PROCEDURE", "FUNC": "FUNCTION", "FUNCTION": "FUNCTION", "FN": "FUNCTION", "CLASS": "CLASS", "INPUT": "Input:", "INPUTS": "Inputs:", "OUTPUT": "Output:", "OUTPUTS": "Outputs:", "RETURN": "Return:", "RETURNS": "Returns:", "IS": "IS", "WITH": "WITH", "IF": "IF", "THEN": "THEN", "ELSE": "ELSE", "ELSEIF": "ELSE IF", "ELSIF": "ELSE IF", "ELIF": "ELSE IF", "DO": "DO", "WHILE": "WHILE", "FOR": "FOR", "FORALL": "FOR ALL", "STEP": "STEP", "LOOP": "LOOP", "REPEAT": "REPEAT", "UNTIL": "UNTIL", "BEGIN": "BEGIN", "END": "END", # not in END_TRANSLATIONS } END_TRANSLATIONS = { "PROG": "END OF PROGRAM", "PROGRAM": "END OF PROGRAM", "ALGO": "END OF ALGORITHM", "ALGORITHM": "END OF ALGORITHM", "PROC": "END OF PROCEDURE", "PROCEDURE": "END OF PROCEDURE", "FUNC": "END OF FUNCTION", "FUNCTION": "END OF FUNCTION", "FN": "END OF FUNCTION", "CLASS": "END OF CLASS", "IF": "END IF", "WHILE": "END WHILE", "FOR": "END FOR", "FORALL": "END FOR ALL", "LOOP": "END LOOP", } DEFAULT_END_PREFIX = "END OF " SYMBOL_REMARK = "▷" # U+25B7: Unicode 1.0 (Geometric Shapes) # SYMBOL_REMARK = "▻" # U+25BB: Unicode 1.0 (Geometric Shapes) SYMBOL_BLOCK = "◆" # U+25C6: Unicode 1.0 (Geometric Shapes) # SYMBOL_BLOCK = "┃" # U+2503: Unicode 1.0 (Bow Drawing) # SYMBOL_BLOCK = "●" # U+25CF: Unicode 1.0 (Geometric Shapes) SYMBOL_TEXTSTATEMENT = "▪" # U+25AA: Unicode 1.0 (Geometric Shapes) # SYMBOL_TEXTSTATEMENT = "■" # U+25A0: Unicode 1.0 (Geometric Shapes) SYMBOLS = { # Group REMARK "REMARK": SYMBOL_REMARK, "REM": SYMBOL_REMARK, # Group STATEMENT "STATEMENT": SYMBOL_BLOCK, "STATE": SYMBOL_BLOCK, "BLOCK": SYMBOL_BLOCK, # Group TEXTSTATEMENT "TEXTSTATEMENT": SYMBOL_TEXTSTATEMENT, "TEXTSTATE": SYMBOL_TEXTSTATEMENT, "TSTATEMENT": SYMBOL_TEXTSTATEMENT, "TSTATE": SYMBOL_TEXTSTATEMENT, "TEXTBLOCK": SYMBOL_TEXTSTATEMENT, "TBLOCK": SYMBOL_TEXTSTATEMENT, } def op_translate(toktype): def _op_translate(lexer, match, ctx=None): kw = match.group().upper() yield match.start(), toktype, lexer.TRANSLATIONS.get(kw, kw) if ctx: ctx.pos = match.end() return _op_translate def op_opt_end_translate(toktype): def _op_end_translate(lexer, match, ctx=None): if not lexer.no_end: kw = match.group().upper() yield (match.start(), toktype, lexer.END_TRANSLATIONS.get( kw, lexer.DEFAULT_END_PREFIX + kw)) if ctx: ctx.pos = match.end() return _op_end_translate def op_opt_ignore(toktype): def _op_opt_ignore(lexer, match, ctx=None): if not lexer.no_end: yield match.start(), toktype, match.group() if ctx: ctx.pos = match.end() return _op_opt_ignore def op_opt_ignore_or_fixed(toktype, value): """Yield a fixed given token type and value or -- if the lexer's `no_end` setting evals to ``True`` nothing. """ def _op_opt_ignore_or_fixed(lexer, match, ctx=None): if not lexer.no_end: yield match.start(), toktype, value if ctx: ctx.pos = match.end() return _op_opt_ignore_or_fixed def op_symbol(toktype): def _op_symbol(lexer, match, ctx=None): kw = match.group().upper() yield match.start(), toktype, lexer.SYMBOLS.get(kw, kw) if ctx: ctx.pos = match.end() return _op_symbol def op_explicit_tokentype(lexer, match, ctx=None): needed_css = match.group("type") toktype = REVERSED_STANDARD_TYPES.get(needed_css, None) if toktype is None: # Be more error friendly toktype = Generic.Error val = match.group() _logger.warning("Unhandled explicit token type: %s", val) else: val = match.group("character") yield match.start(), toktype, val if ctx: ctx.pos = match.end() tokens = { "root": [ (r"\n", Whitespace), (r"/\*", Comment.Multiline, "multiline-nested-comment"), (r"(//|#).*$", Comment.Single), include("remark"), (r"(?i)\\(block|state(?:ment)?)[ \t]*(\{)", bygroups(op_symbol(Text), LexBase.op_fixed(Whitespace, " ")), "block-expr"), (r"(?i)\\(" r"(?:textstate(?:ment)?)" r"|(?:tstate(?:ment)?)" r"|(?:textblock)" r"|(?:tblock)" r")[ \t]*(\{)", bygroups(op_symbol(Text), LexBase.op_fixed(Whitespace, " ")), "text-statement"), (r"(?i)\\(" r"(?:input(?:s)?)" r"|(?:output(?:s)?)" r"|(?:return(?:s)?)" r")[ \t]*(\{)", bygroups(op_translate(Keyword), LexBase.op_fixed(Whitespace, " ")), "text-statement"), (r"(?i)\\(" r"(?:if)" r"|(?:then)" r"|(?:else)" r"|(?:el(?:s(?:e)?)?if)" r"|(?:do)" r"|(?:while)" r"|(?:forall)" r"|(?:for)" r"|(?:step)" r"|(?:loop)" r"|(?:repeat)" r"|(?:until)" r")\b", bygroups(op_translate(Keyword))), (r"\\\n", Text), (r"(?i)\\(" r"(?:prog(?:ram)?)" r"|(?:algo(?:rithm)?)" r"|(?:proc(?:edure)?)" r"|(?:func(?:tion)?|(?:fn))" r"|(?:class)" r")[ \t]*(\{)", bygroups(op_translate(Keyword), LexBase.op_fixed(Whitespace, " ")), "entity-name"), # ENDxxx keywords with optional entity name in two parts: # 1. with name (r"(?i)\\end(?:[_\-]|(?:[ \t]+))?(" r"(?:prog(?:ram)?)" r"|(?:algo(?:rithm)?)" r"|(?:proc(?:edure)?)" r"|(?:func(?:tion)?)" r"|(?:fn)" r"|(?:class)" r")(?:[_\-]|(?:[\t ]+))?(\{)", bygroups(op_opt_end_translate(Keyword), op_opt_ignore_or_fixed(Whitespace, " ")), "entity-name-end"), # 2. without name # 3. AND keywords that do not allow a param (e.g. endif) (r"(?i)\\end(?:[_\-]|(?:[ \t]+))?(" r"(?:prog(?:ram)?)" r"|(?:algo(?:rithm)?)" r"|(?:proc(?:edure)?)" r"|(?:func(?:tion)?)" r"|(?:fn)" r"|(?:class)" r"|(?:if)" r"|(?:while)" r"|(?:for)" r"|(?:forall)" r"|(?:loop)" r")\b", bygroups(op_opt_end_translate(Keyword))), # # A single begin or end that is never suppressed because # it is supposed to be paired with begin # (r"(?i)\\(begin|end)\b", bygroups(op_translate(Keyword))), # Keywords (r"(?i)\\(" r"(?:is)" r"|(?:with)" r")\b", bygroups(op_translate(Keyword))), include("expr"), (r"[^\S\n]+", Text), (r".", Generic.Error), # tolerance for errors ], "remark": [ (r"(?i)\\(remark|rem)\b(.*)$", bygroups(op_symbol(Comment.Single), Comment.Single)), ], "entity-name": [ # may be multiline (r"[^\\}]+", Name.Entity), (r"\}", LexBase.op_ignore, "#pop"), (r"\\\}", LexBase.op_fixed(Name.Entity, "}")), (r"\\\\", LexBase.op_fixed(Name.Entity, "\\")), (r"\\", LexBase.op_fixed(Name.Entity, "\\")), ], "entity-name-end": [ # may be multiline -- suppressed if no_end (r"[^\\}]+", op_opt_ignore(Name.Entity)), (r"\}", LexBase.op_ignore, "#pop"), (r"\\\}", op_opt_ignore_or_fixed(Name.Entity, "}")), (r"\\\\", op_opt_ignore_or_fixed(Name.Entity, "\\")), (r"\\", op_opt_ignore_or_fixed(Name.Entity, "\\")), ], "expr": [ include("punctuation"), include("py-strings"), include("py-numbers"), (r"(?i)\\text[ \t]*\{", LexBase.op_ignore, "text-in-expr"), include("explicit-tokentype"), include("remark"), include("keyword-constants"), include("word-operators"), include("math-builtins"), include("py-name"), ], "expr-in-braces": [ include("punctuation-in-braces"), include("py-strings"), include("py-numbers"), (r"(?i)\\text[ \t]*\{", LexBase.op_ignore, "text-in-expr"), include("explicit-tokentype"), include("remark"), include("keyword-constants"), include("word-operators"), include("math-builtins"), include("py-name"), ], "block-expr": [ # somewhat similar to "root" (r"\}", LexBase.op_ignore, "#pop"), (r"\n", Whitespace), include("expr-in-braces"), (r"\\\\", LexBase.op_fixed(Text, "\\")), (r"\\", LexBase.op_fixed(Text, "\\")), (r"[^\S\n]+", Text), (r".", Generic.Error), # tolerance for errors ], "text-statement": [ # like block but default to text-mode (r"[^\\}\n]+", Text), (r"\}", LexBase.op_ignore, "#pop"), (r"\n", Whitespace), (r"\\\}", LexBase.op_fixed(Text, "}")), (r"(?i)\\expr(?:ession)?[ \t]*\{", LexBase.op_ignore, "block-expr"), include("explicit-tokentype"), include("remark"), (r"\\\\", LexBase.op_fixed(Text, "\\")), (r"\\", LexBase.op_fixed(Text, "\\")), (r".", Generic.Error), # tolerance for errors ], "text-in-expr": [ (r"[^\\}\n]+", Text), (r"\}", LexBase.op_ignore, "#pop"), (r"\n", Whitespace), (r"\\\}", LexBase.op_fixed(Text, "}")), (r"(?i)\\expr(?:ession)?[ \t]*\{", LexBase.op_ignore, "block-expr"), include("explicit-tokentype"), (r"\\\\", LexBase.op_fixed(Text, "\\")), (r"\\", LexBase.op_fixed(Text, "\\")), (r".", Generic.Error), # tolerance for errors ], "math-builtins": [ (words(("sqrt", "pow", "cos", "sin", "tan", "arcos", "arcsin", "arctan", "arctan2", "mod", "exp", "ln", "log"), prefix=r"(?<!\.)", suffix=r"\b"), Name.Builtin), ], "word-operators": [ (words(("IN", "In", "in", "IS", "Is", "is", "AND", "And", "and", "OR", "Or", "or", "XOR", "Xor", "xor", "NOT", "Not", "not"), prefix=r"(?<!\.)", suffix=r"\b"), Operator.Word), ], "keyword-constants": [ (words(("True", "TRUE", "true", "False", "FALSE", "false", "None", "NONE", "non", "Nil", "NIL", "nil", "Null", "NULL", "null", "Empty", "EMPTY", "empty"), prefix=r"(?<!\.)", suffix=r"\b"), Keyword.Constant), ], "punctuation": [ (r"[{}:(),;[\]]", Punctuation), ], "punctuation-in-braces": [ # like "punctuation" but needs an escaped curly brace for } (r"\\\}", LexBase.op_fixed(Punctuation, "}")), (r"[{:(),;[\]]", Punctuation), ], "explicit-tokentype": [ # All these REs are CASE-SENSITIVE! # Multiple characters possible, but no escaping! (r"\\ttx\-(?P<type>[a-zA-Z0-9_-]+?)(?P<sep>[/:|=*+!\$~])" r"(?P<character>(.|\n)+?)(?P=sep)", op_explicit_tokentype), (r"\\ttx\-(?P<type>[a-zA-Z0-9_-]+?)\{(?P<character>[^}]+?)\}", op_explicit_tokentype), (r"\\ttx\-(?P<type>[a-zA-Z0-9_-]+?)\((?P<character>[^)]+?)\)", op_explicit_tokentype), (r"\\ttx\-(?P<type>[a-zA-Z0-9_-]+?)<(?P<character>[^>]+?)>", op_explicit_tokentype), (r"\\ttx\-(?P<type>[a-zA-Z0-9_-]+?)\[(?P<character>[^\]]+?)\]", op_explicit_tokentype), # Every character is possible: no escaping needed! (r"\\tt-(?P<type>[^/]+?)/(?P<character>(?:.|\n))", op_explicit_tokentype), ], } def __init__(self, **options): val = pygments.util.get_bool_opt(options, "no_end", default=False) self.no_end = val LexBase.__init__(self, **options) class AlgPseudocodeLexer_DE(AlgPseudocodeLexer): name = "AlgPseudocodeDE" aliases = ["algpseudocode-de", "algpseudo-de"] filenames = ["*.algpseudo-de", "*.algpseudocode-de"] LANG = "de" TRANSLATIONS = AlgPseudocodeLexer.TRANSLATIONS.copy() TRANSLATIONS.update({ "PROG": "PROGRAMM", "PROGRAM": "PROGRAMM", "ALGO": "ALGORITHMUS", "ALGORITHM": "ALGORITHMUS", "PROC": "PROZEDUR", "PROCEDURE": "PROZEDUR", "FUNC": "FUNKTION", "FUNCTION": "FUNKTION", "FN": "FUNKTION", "CLASS": "KLASSE", "IS": "IST", "WITH": "MIT", }) END_TRANSLATIONS = AlgPseudocodeLexer.END_TRANSLATIONS.copy() END_TRANSLATIONS.update({ "PROG": "ENDE DES PROGRAMMS", "PROGRAM": "ENDE VON PROGRAMMS", "ALGO": "ENDE DES ALGORITHMUS", "ALGORITHM": "ENDE DES ALGORITHMUS", "PROC": "ENDE DER PROZEDUR", "PROCEDURE": "ENDE DER PROZEDUR", "FUNC": "ENDE DER FUNKTION", "FUNCTION": "ENDE DER FUNKTION", "FN": "ENDE DER FUNKTION", "CLASS": "ENDE DER KLASSE", }) DEFAULT_END_PREFIX = "ENDE VON " class AlgPseudocodeLexer_FR(AlgPseudocodeLexer): name = "AlgPseudocodeFR" aliases = ["algpseudocode-fr", "algpseudo-fr"] filenames = ["*.algpseudo-fr", "*.algpseudocode-fr"] LANG = "de" TRANSLATIONS = AlgPseudocodeLexer.TRANSLATIONS.copy() TRANSLATIONS.update({ "PROG": "PROGRAMME", "PROGRAM": "PROGRAMME", "ALGO": "ALGORITHME", "ALGORITHM": "ALGORITHME", "PROC": "PROCÉDURE", "PROCEDURE": "PROCÉDURE", "FUNC": "FONCTION", "FUNCTION": "FOUNCTION", "FN": "FONCTION", "CLASS": "CLASSE", "IS": "EST", "WITH": "AVEC", }) END_TRANSLATIONS = AlgPseudocodeLexer.END_TRANSLATIONS.copy() END_TRANSLATIONS.update({ "PROG": "FIN DE PROGRAMME", "PROGRAM": "FIN DE PROGRAMME", "ALGO": "FIN D'ALGORITHME", "ALGORITHM": "FIN D'ALGORITHME", "PROC": "FIN DE PROCÉDURE", "PROCEDURE": "FIN DE PROCÉDURE", "FUNC": "FIN DE FONCTION", "FUNCTION": "FIN DE FOUNCTION", "FN": "FIN DE FONCTION", "CLASS": "FIN DE CLASSE", }) DEFAULT_END_PREFIX = "FIN DE "
