Mercurial > hgrepos > Python > libs > pygments-lexer-pseudocode2


# -*- coding: utf-8 -*-
# :-
# SPDX-FileCopyrightText: © 2026 Franz Glasner
# SPDX-License-Identifier: MIT
# :-
r"""A pseudocode lexer along the lines of CTAN's algpseudocode or
algpseudocodex.

"""

__all__ = ["AlgPseudocodeLexer",
           "AlgPseudocodeLexer_DE", "AlgPseudocodeLexer_FR"]


import re

import pygments.util
from pygments.lexer import bygroups, include, words
from pygments.token import (Comment, Keyword, Name, Operator, Punctuation,
                            Text, Whitespace,
                            STANDARD_TYPES)

#
# Relative imports do not work with pygments.lexers.load_lexer_from_file()
# in all of our supported Python releases.
#
from pygments_lexer_pseudocode2.bases import LexBase


class AlgPseudocodeLexer(LexBase):

    """A pseudocode lexer along the lines of CTAN's algpseudocode or
    algpseudocodex.

    Some ideas (e.g. strings) are borrowed from Pygment's Python lexer.

    """

    name = "AlgPseudocode"
    aliases = ["algpseudocode", "algpseudo"]
    filenames = ["*.algpseudo", "*.algpseudocode"]
    mimetypes = []
    flags = re.MULTILINE

    LANG = "en"
    TRANSLATIONS = {
        "PROG": "PROGRAM",
        "PROGRAM": "PROGRAM",
        "ALGO": "ALGORITHM",
        "ALGORITHM": "ALGORITHM",
        "PROC": "PROCEDURE",
        "PROCEDURE": "PROCEDURE",
        "FUNC": "FUNCTION",
        "FUNCTION": "FUNCTION",
        "FN": "FUNCTION",
        "CLASS": "CLASS",
        "IS": "IS",
        "WITH": "WITH",
    }
    END_TRANSLATIONS = {
        "PROG": "END OF PROGRAM",
        "PROGRAM": "END OF PROGRAM",
        "ALGO": "END OF ALGORITHM",
        "ALGORITHM": "END OF ALGORITHM",
        "PROC": "END OF PROCEDURE",
        "PROCEDURE": "END OF PROCEDURE",
        "FUNC": "END OF FUNCTION",
        "FUNCTION": "END OF FUNCTION",
        "FN": "END OF FUNCTION",
        "CLASS": "END OF CLASS",
    }
    DEFAULT_END_PREFIX = "END OF "
    SYMBOL_REMARK = "▷"            # U+25B7: Unicode 1.0 (Geometric Shapes)
    # SYMBOL_REMARK = "▻"          # U+25BB: Unicode 1.0 (Geometric Shapes)
    SYMBOL_BLOCK = "◆"             # U+25C6: Unicode 1.0 (Geometric Shapes)
    # SYMBOL_BLOCK = "┃"           # U+2503: Unicode 1.0 (Bow Drawing)
    # SYMBOL_BLOCK = "●"           # U+25CF: Unicode 1.0 (Geometric Shapes)
    SYMBOL_TEXTSTATEMENT = "▪"     # U+25AA: Unicode 1.0 (Geometric Shapes)
    # SYMBOL_TEXTSTATEMENT = "■"   # U+25A0: Unicode 1.0 (Geometric Shapes)
    SYMBOLS = {
        # Group REMARK
        "REMARK": SYMBOL_REMARK,
        "REM": SYMBOL_REMARK,
        # Group STATEMENT
        "STATEMENT": SYMBOL_BLOCK,
        "STATE": SYMBOL_BLOCK,
        "BLOCK": SYMBOL_BLOCK,
        # Group TEXTSTATEMENT
        "TEXTSTATEMENT": SYMBOL_TEXTSTATEMENT,
        "TEXTSTATE": SYMBOL_TEXTSTATEMENT,
        "TSTATEMENT": SYMBOL_TEXTSTATEMENT,
        "TSTATE": SYMBOL_TEXTSTATEMENT,
        "TEXTBLOCK": SYMBOL_TEXTSTATEMENT,
        "TBLOCK": SYMBOL_TEXTSTATEMENT,
    }

    def op_translate(toktype):

        def _op_translate(lexer, match, ctx=None):
            kw = match.group().upper()
            yield match.start(), toktype, lexer.TRANSLATIONS.get(kw, kw)
            if ctx:
                ctx.pos = match.end()

        return _op_translate

    def op_opt_end_translate(toktype):

        def _op_end_translate(lexer, match, ctx=None):
            if not lexer.no_end:
                kw = match.group().upper()
                yield (match.start(),
                       toktype,
                       lexer.END_TRANSLATIONS.get(
                           kw,
                           lexer.DEFAULT_END_PREFIX + kw))
            if ctx:
                ctx.pos = match.end()

        return _op_end_translate

    def op_opt_ignore(toktype):

        def _op_opt_ignore(lexer, match, ctx=None):
            if not lexer.no_end:
                yield match.start(), toktype, match.group()
            if ctx:
                ctx.pos = match.end()

        return _op_opt_ignore

    def op_opt_ignore_or_fixed(toktype, value):
        """Yield a fixed given token type and value or -- if the lexer's
        `no_end` setting evals to ``True`` nothing.

        """

        def _op_opt_ignore_or_fixed(lexer, match, ctx=None):
            if not lexer.no_end:
                yield match.start(), toktype, value
            if ctx:
                ctx.pos = match.end()

        return _op_opt_ignore_or_fixed

    def op_symbol(toktype):

        def _op_symbol(lexer, match, ctx=None):
            kw = match.group().upper()
            yield match.start(), toktype, lexer.SYMBOLS.get(kw, kw)
            if ctx:
                ctx.pos = match.end()

        return _op_symbol

    def op_explicit_tokentype(lexer, match, ctx=None):
        needed_css = match.group("type")
        for ttype, css in STANDARD_TYPES.items():
            if css == needed_css:
                toktype = ttype
                break
        else:
            toktype = Text
        yield match.start(), toktype, match.group("character")
        if ctx:
            ctx.pos = match.end()

    tokens = {
        "root": [
            (r"\n", Whitespace),
            (r"/\*", Comment.Multiline, "multiline-nested-comment"),
            (r"//.*$", Comment.Single),
            include("remark"),
            (r"(?i)\\(block|state(?:ment)?)\s*(\{)",
             bygroups(op_symbol(Text), LexBase.op_fixed(Whitespace, " ")),
             "block-expr"),
            (r"(?i)\\("
             r"(?:textstate(?:ment)?)"
             r"|(?:tstate(?:ment)?)"
             r"|(?:textblock)"
             r"|(?:tblock)"
             r")\s*(\{)",
             bygroups(op_symbol(Text), LexBase.op_fixed(Whitespace, " ")),
             "text-statement"),
            (r"\\\n", Text),
            (r"(?i)\\("
             r"(?:prog(?:ram)?)"
             r"|(?:algo(?:rithm)?)"
             r"|(?:proc(?:edure)?)"
             r"|(?:func(?:tion)?|(?:fn))"
             r"|(?:class)"
             r")\s*(\{)",
             bygroups(op_translate(Keyword),
                      LexBase.op_fixed(Whitespace, " ")),
             "entity-name"),
            # ENDxxx keywords with optional entity name
            #   with name
            (r"(?i)\\end(?:_|-)?("
             r"(?:prog(?:ram)?)"
             r"|(?:algo(?:rithm)?)"
             r"|(?:proc(?:edure)?)"
             r"|(?:func(?:tion)?)"
             r"|(?:class)"
             r")\s*(\{)",
             bygroups(op_opt_end_translate(Keyword),
                      op_opt_ignore_or_fixed(Whitespace, " ")),
             "entity-name-end"),
            #   without name
            (r"(?i)\\end(?:_|-)?("
             r"(?:prog(?:ram)?)"
             r"|(?:algo(?:rithm)?)"
             r"|(?:proc(?:edure)?)"
             r"|(?:func(?:tion)?)"
             r"|(?:class)"
             r")\b",
             bygroups(op_opt_end_translate(Keyword))),
            # Keywords
            (r"(?i)\\("
             r"(?:is)"
             r"|(?:with)"
             r")\b",
             bygroups(op_translate(Keyword))),
            include("expr"),
            (r"\s+", Text),
        ],
        "remark": [
            (r"(?i)\\(remark|rem)\b(.*)$",
             bygroups(op_symbol(Comment.Single), Comment.Single)),
        ],
        "entity-name": [      # may be multiline
            (r"[^\\}]+", Name.Entity),
            (r"\}", LexBase.op_ignore, "#pop"),
            (r"\\\}", LexBase.op_fixed(Name.Entity, "}")),
            (r"\\\\", LexBase.op_fixed(Name.Entity, "\\")),
            (r"\\", LexBase.op_fixed(Name.Entity, "\\")),
        ],
        "entity-name-end": [  # may be multiline -- suppressed if no_end
            (r"[^\\}]+", op_opt_ignore(Name.Entity)),
            (r"\}", LexBase.op_ignore, "#pop"),
            (r"\\\}", op_opt_ignore_or_fixed(Name.Entity, "}")),
            (r"\\\\", op_opt_ignore_or_fixed(Name.Entity, "\\")),
            (r"\\", op_opt_ignore_or_fixed(Name.Entity, "\\")),
        ],
        "expr": [
            include("punctuation"),
            include("py-strings"),
            include("py-numbers"),
            (r"(?i)\\text\s*\{", LexBase.op_ignore, "text-in-expr"),
            include("explicit-tokentype"),
            include("remark"),
            include("keyword-constants"),
            include("text-operators"),
            include("math-builtins"),
            include("py-name"),
        ],
        "expr-in-braces": [
            include("punctuation-in-braces"),
            include("py-strings"),
            include("py-numbers"),
            (r"(?i)\\text\s*\{", LexBase.op_ignore, "text-in-expr"),
            include("explicit-tokentype"),
            include("remark"),
            include("keyword-constants"),
            include("text-operators"),
            include("math-builtins"),
            include("py-name"),
        ],
        "block-expr": [      # somewhat similar to "root"
            (r"\}", LexBase.op_ignore, "#pop"),
            (r"\n", Whitespace),
            include("expr-in-braces"),
            (r"\\\\", LexBase.op_fixed(Text, "\\")),
            (r"\\", LexBase.op_fixed(Text, "\\")),
            (r"\s+", Text),
        ],
        "text-statement": [  # like block but default to text-mode
            (r"[^\\}\n]+", Text),
            (r"\}", LexBase.op_ignore, "#pop"),
            (r"\n", Whitespace),
            (r"\\\}", LexBase.op_fixed(Text, "}")),
            (r"(?i)\\expr(?:ession)?\s*\{",
             LexBase.op_ignore,
             "block-expr"),
            include("explicit-tokentype"),
            include("remark"),
            (r"\\\\", LexBase.op_fixed(Text, "\\")),
            (r"\\", LexBase.op_fixed(Text, "\\")),
        ],
        "text-in-expr": [
            (r"[^\\}\n]+", Text),
            (r"\}", LexBase.op_ignore, "#pop"),
            (r"\n", Whitespace),
            (r"\\\}", LexBase.op_fixed(Text, "}")),
            (r"(?:i)\\(expr(?:ession)?)\s*(\{)",
             bygroups(LexBase.op_ignore, LexBase.op_ignore),
             "expr-in-braces"),
            include("explicit-tokentype"),
            (r"\\\\", LexBase.op_fixed(Text, "\\")),
            (r"\\", LexBase.op_fixed(Text, "\\")),
        ],
        "math-builtins": [
            (words(("sqrt", "pow", "cos", "sin", "tan", "arcos", "arcsin",
                    "arctan", "arctan2", "mod", "exp", "ln", "log"),
                   prefix=r"(?<!\.)",
                   suffix=r"\b"),
             Name.Builtin),
        ],
        "text-operators": [
            (words(("in", "is", "and", "or", "xor", "not"),
                   prefix=r"(?<!\.)",
                   suffix=r"\b"),
             Operator.Word),
        ],
        "keyword-constants": [
            (words(("True", "TRUE", "true", "False", "FALSE", "false",
                    "None", "NONE", "non", "Nil", "NIL", "nil",
                    "Null", "NULL", "null",
                    "Empty", "EMPTY", "empty"),
                   prefix=r"(?<!\.)",
                   suffix=r"\b"),
             Keyword.Constant),
        ],
        "punctuation": [
            (r"[{}:(),;[\]]", Punctuation),
        ],
        "punctuation-in-braces": [
            # like "punctuation" but needs an escaped curly brace for }
            (r"\\\}", LexBase.op_fixed(Punctuation, "}")),
            (r"[{:(),;[\]]", Punctuation),
        ],
        "explicit-tokentype": [
            # All these REs are CASE-SENSITIVE!

            # Multiple characters possible, but no escaping!
            (r"\\ttx\-(?P<type>[^/:|=*+!\$~]+)(?P<sep>[/:|=*+!\$~])"
             r"(?P<character>(.|\n)+?)(?P=sep)",
             op_explicit_tokentype),
            (r"\\ttx\-(?P<type>[^{]+)\{"
             r"(?P<character>[^}]+?)\}",
             op_explicit_tokentype),
            (r"\\ttx\-(?P<type>[^\(]+)\("
             r"(?P<character>[^\)]+?)\)",
             op_explicit_tokentype),
            (r"\\ttx\-(?P<type>[^<]+)<"
             r"(?P<character>[^>]+?)>",
             op_explicit_tokentype),

            # Every character is possible: no escaping needed!
            (r"\\tt-(?P<type>[^/]+)/(?P<character>(?:.|\n))",
             op_explicit_tokentype),
        ],
    }

    def __init__(self, **options):
        val = pygments.util.get_bool_opt(options, "no_end", default=False)
        self.no_end = val
        LexBase.__init__(self, **options)


class AlgPseudocodeLexer_DE(AlgPseudocodeLexer):

    name = "AlgPseudocodeDE"
    aliases = ["algpseudocode-de", "algpseudo-de"]
    filenames = ["*.algpseudo-de", "*.algpseudocode-de"]

    LANG = "de"
    TRANSLATIONS = AlgPseudocodeLexer.TRANSLATIONS.copy()
    TRANSLATIONS.update({
        "PROG": "PROGRAMM",
        "PROGRAM": "PROGRAMM",
        "ALGO": "ALGORITHMUS",
        "ALGORITHM": "ALGORITHMUS",
        "PROC": "PROZEDUR",
        "PROCEDURE": "PROZEDUR",
        "FUNC": "FUNKTION",
        "FUNCTION": "FUNKTION",
        "FN": "FUNKTION",
        "CLASS": "KLASSE",
        "IS": "IST",
        "WITH": "MIT",
    })
    END_TRANSLATIONS = AlgPseudocodeLexer.END_TRANSLATIONS.copy()
    END_TRANSLATIONS.update({
        "PROG": "ENDE DES PROGRAMMS",
        "PROGRAM": "ENDE VON PROGRAMMS",
        "ALGO": "ENDE DES ALGORITHMUS",
        "ALGORITHM": "ENDE DES ALGORITHMUS",
        "PROC": "ENDE DER PROZEDUR",
        "PROCEDURE": "ENDE DER PROZEDUR",
        "FUNC": "ENDE DER FUNKTION",
        "FUNCTION": "ENDE DER FUNKTION",
        "FN": "ENDE DER FUNKTION",
        "CLASS": "ENDE DER KLASSE",
    })
    DEFAULT_END_PREFIX = "ENDE VON "


class AlgPseudocodeLexer_FR(AlgPseudocodeLexer):

    name = "AlgPseudocodeFR"
    aliases = ["algpseudocode-fr", "algpseudo-fr"]
    filenames = ["*.algpseudo-fr", "*.algpseudocode-fr"]

    LANG = "de"
    TRANSLATIONS = AlgPseudocodeLexer.TRANSLATIONS.copy()
    TRANSLATIONS.update({
        "PROG": "PROGRAMME",
        "PROGRAM": "PROGRAMME",
        "ALGO": "ALGORITHME",
        "ALGORITHM": "ALGORITHME",
        "PROC": "PROCÉDURE",
        "PROCEDURE": "PROCÉDURE",
        "FUNC": "FONCTION",
        "FUNCTION": "FOUNCTION",
        "FN": "FONCTION",
        "CLASS": "CLASSE",
        "IS": "EST",
        "WITH": "AVEC",
    })
    END_TRANSLATIONS = AlgPseudocodeLexer.END_TRANSLATIONS.copy()
    END_TRANSLATIONS.update({
        "PROG": "FIN DE PROGRAMME",
        "PROGRAM": "FIN DE PROGRAMME",
        "ALGO": "FIN D'ALGORITHME",
        "ALGORITHM": "FIN D'ALGORITHME",
        "PROC": "FIN DE PROCÉDURE",
        "PROCEDURE": "FIN DE PROCÉDURE",
        "FUNC": "FIN DE FONCTION",
        "FUNCTION": "FIN DE FOUNCTION",
        "FN": "FIN DE FONCTION",
        "CLASS": "FIN DE CLASSE",
    })
    DEFAULT_END_PREFIX = "FIN DE "
author	Franz Glasner <fzglas.hg@dom66.de>
date	Thu, 30 Apr 2026 19:37:24 +0200
parents	cd79d2c76347
children	ae5e741d2a9b