# HG changeset patch # User Franz Glasner # Date 1776793208 -7200 # Node ID 1f741934205e0cce961694ede7b646c39d7dd91e # Parent db1bc740a2014ea221174b424207dc8f1aedc4eb Begin a new Pseudocode lexer using numbers and strings from Python diff -r db1bc740a201 -r 1f741934205e pygments_lexer_pseudocode2/bases.py --- a/pygments_lexer_pseudocode2/bases.py Tue Apr 21 12:33:17 2026 +0200 +++ b/pygments_lexer_pseudocode2/bases.py Tue Apr 21 19:40:08 2026 +0200 @@ -5,12 +5,11 @@ # :- r"""Some common bases for the lexers.""" - -__all__ = [] +__all__ = ["LexBase"] -from pygments.lexer import RegexLexer -from pygments.token import Number +from pygments.lexer import RegexLexer, combined, bygroups, include +from pygments.token import Number, String class LexBase(RegexLexer): @@ -21,13 +20,43 @@ """ - tokens = { -# -# This state is borrowed from Pygment's Python lexer. # # SPDX-SnippetBegin # SPDX-License-Identifier: BSD-2-Clause # SPDX-SnippetCopyrightText: Copyright 2006-2023 by the Pygments team +# + def py_innerstring_rules(ttype): + return [ + # the old style '%s' % (...) string formatting (still valid in Py3) + (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' + '[hlL]?[E-GXc-giorsaux%]', String.Interpol), + # the new style '{}'.format(...) string formatting + (r'\{' + r'((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name + r'(\![sra])?' # conversion + r'(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?' + r'\}', String.Interpol), + # + # backslashes, quotes and formatting signs must be parsed + # one at a time + # + (r'[^\\\'"%{\n]+', ttype), + (r'[\'"\\]', ttype), + # unhandled string formatting sign + (r'%|(\{{1,2})', ttype) + # newlines are an error (use "nl" state) + ] +# SPDX-SnippetEnd + + tokens = { +# +# These states are borrowed from Pygment's Python lexer. +# Their names have been prefixed with `py-'. +# +# SPDX-SnippetBegin +# SPDX-License-Identifier: BSD-2-Clause +# SPDX-SnippetCopyrightText: Copyright 2006-2023 by the Pygments team +# 'py-numbers': [ (r'(\d(?:_?\d)*\.(?:\d(?:_?\d)*)?|(?:\d(?:_?\d)*)?\.\d(?:_?\d)*)' r'([eE][+-]?\d(?:_?\d)*)?', Number.Float), @@ -37,5 +66,55 @@ (r'0[xX](?:_?[a-fA-F0-9])+', Number.Hex), (r'\d(?:_?\d)*', Number.Integer), ], + 'py-strings': [ + # non-raw strings + ('([uU]?)(""")', bygroups(String.Affix, String.Double), + combined('py-stringescape', 'py-tdqs')), + ("([uU]?)(''')", bygroups(String.Affix, String.Single), + combined('py-stringescape', 'py-tsqs')), + ('([uU]?)(")', bygroups(String.Affix, String.Double), + combined('py-stringescape', 'py-dqs')), + ("([uU]?)(')", bygroups(String.Affix, String.Single), + combined('py-stringescape', 'py-sqs')), + # non-raw bytes + ('([bB])(""")', bygroups(String.Affix, String.Double), + combined('py-bytesescape', 'py-tdqs')), + ("([bB])(''')", bygroups(String.Affix, String.Single), + combined('py-bytesescape', 'py-tsqs')), + ('([bB])(")', bygroups(String.Affix, String.Double), + combined('py-bytesescape', 'py-dqs')), + ("([bB])(')", bygroups(String.Affix, String.Single), + combined('py-bytesescape', 'py-sqs')), + ], + 'py-stringescape': [ + (r'\\(N\{.*?\}|u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8})', String.Escape), + include('py-bytesescape') + ], + 'py-bytesescape': [ + (r'\\([\\abfnrtv"\']|\n|x[a-fA-F0-9]{2}|[0-7]{1,3})', + String.Escape) + ], + 'py-dqs': [ + (r'"', String.Double, '#pop'), + (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings + include('py-strings-double') + ], + 'py-sqs': [ + (r"'", String.Single, '#pop'), + (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings + include('py-strings-single') + ], + 'py-tdqs': [ + (r'"""', String.Double, '#pop'), + include('py-strings-double'), + (r'\n', String.Double) + ], + 'py-tsqs': [ + (r"'''", String.Single, '#pop'), + include('py-strings-single'), + (r'\n', String.Single) + ], + 'py-strings-single': py_innerstring_rules(String.Single), + 'py-strings-double': py_innerstring_rules(String.Double), # SPDX-SnippetEnd } diff -r db1bc740a201 -r 1f741934205e pygments_lexer_pseudocode2/pseudocode.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pygments_lexer_pseudocode2/pseudocode.py Tue Apr 21 19:40:08 2026 +0200 @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +# :- +# SPDX-FileCopyrightText: © 2026 Franz Glasner +# SPDX-License-Identifier: MIT +# :- +r"""A pseudocode lexer along the lines of CTAN's algpseudocode or +algpseudocodex. + +""" + +__all__ = ["PseudocodeLexer"] + + +import re + +from pygments.lexer import include +from pygments.token import (Text, Whitespace) + +# +# Relative imports do not work with pygments.lexers.load_lexer_from_file() +# in all of our supported Python releases. +# +from pygments_lexer_pseudocode2.bases import LexBase + + +class PseudocodeLexer(LexBase): + + """A pseudocode lexer along the lines of CTAN's algpseudocode or + algpseudocodex. + + Some ideas (e.g. strings) are borrowed from Pygment's Python lexer. + + """ + + name = "Pseudocode" + aliases = ["pseudocode", "pseudo", "algorithm", "algo"] + filenames = ["*.algo", "*.pseudocode"] + mimetypes = [] + flags = re.MULTILINE + + tokens = { + "root": [ + (r"\n", Whitespace), + (r"\\\n", Text), + include("expr"), + ], + "expr": [ + include("py-strings"), + include("py-numbers"), + ] + } diff -r db1bc740a201 -r 1f741934205e pyproject.toml --- a/pyproject.toml Tue Apr 21 12:33:17 2026 +0200 +++ b/pyproject.toml Tue Apr 21 19:40:08 2026 +0200 @@ -40,6 +40,7 @@ [project.entry-points.'pygments.lexers'] # The mostly original and sonewhat extended Pseudocode lexer (fr) fr_pseudocodelexer = "pygments_lexer_pseudocode2.fr_pseudocode:FrPseudocodeLexer" +pseudocodelexer = "pygments_lexer_pseudocode2.pseudocode:PseudocodeLexer" [tool.setuptools] packages = [ diff -r db1bc740a201 -r 1f741934205e tests/_tsetup.py --- a/tests/_tsetup.py Tue Apr 21 12:33:17 2026 +0200 +++ b/tests/_tsetup.py Tue Apr 21 19:40:08 2026 +0200 @@ -8,6 +8,16 @@ """ +__all__ = [ + "PROJECTDIR", + "LEXERCLASS", + "FRLEXERCLASS", + "LEXERFILENAME", + "FRLEXERFILENAME", + "TESTSNIPPETSDIR", +] + + import logging import os import sys @@ -17,8 +27,13 @@ os.path.normpath(os.path.join(os.path.dirname(__file__), ".."))) sys.path.insert(0, PROJECTDIR) FRLEXERFILENAME = os.path.join(PROJECTDIR, - "pygments_lexer_pseudocode2/fr_pseudocode.py") + "pygments_lexer_pseudocode2", + "fr_pseudocode.py") FRLEXERCLASS = "FrPseudocodeLexer" +LEXERFILENAME = os.path.join(PROJECTDIR, + "pygments_lexer_pseudocode2", + "pseudocode.py") +LEXERCLASS = "PseudocodeLexer" TESTSNIPPETSDIR = os.path.join( os.path.abspath(os.path.dirname(__file__)), "snippets") diff -r db1bc740a201 -r 1f741934205e tests/test_pseudo.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_pseudo.py Tue Apr 21 19:40:08 2026 +0200 @@ -0,0 +1,148 @@ +# -*- coding: utf-8 -*- +# :- +# SPDX-FileCopyrightText: © 2026 Franz Glasner +# SPDX-License-Identifier: MIT +# :- + +from _tsetup import LEXERFILENAME, LEXERCLASS + +import unittest + +import pygments +import pygments.lexers + +import _testhelper + + +class TestSnippets(unittest.TestCase, _testhelper.TokenAssertHelper): + + def setUp(self): + self.lexer = pygments.lexers.load_lexer_from_file( + LEXERFILENAME, LEXERCLASS) + + def test_lf(self): + self.assertTokenStreamEqualComplete( + [("Text.Whitespace", "\n")], + pygments.lex("\n", self.lexer)) + + def test_protected_lf(self): + self.assertTokenStreamEqualComplete( + [("Text", "\\\n")], + pygments.lex("\\\n", self.lexer)) + + def test_number_int(self): + self.assertTokenStreamEqualComplete( + [("Number.Integer", "10"), + ("Text.Whitespace", "\n"), + ], + pygments.lex("10", self.lexer)) + + def test_number_float_1(self): + self.assertTokenStreamEqualComplete( + [("Number.Float", "3.1415926"), + ("Text.Whitespace", "\n"), + ], + pygments.lex("3.1415926", self.lexer)) + + def test_number_float_2(self): + self.assertTokenStreamEqualComplete( + [("Number.Float", "3.14e-12"), + ("Text.Whitespace", "\n"), + ], + pygments.lex("3.14e-12", self.lexer)) + + def test_string_s_1(self): + self.assertTokenStreamEqualComplete( + [("String.Single", "'"), + ("String.Single", "HU"), + ("String.Single", '"'), + ("String.Single", "HE HA"), + ("String.Escape", "\\'"), + ("String.Single", "HO"), + ("String.Single", "'"), + ("Text.Whitespace", "\n"), + ], + pygments.lex("""'HU"HE HA\\'HO'""", self.lexer)) + + def test_string_s_2(self): + self.assertTokenStreamEqual( + [("String.Single", "'"), + ("String.Single", "HUHU"), + ("Text.Whitespace", "\n"), + ("Error", "H"), + ], + pygments.lex("'HUHU\nHEHE'", self.lexer)) + + def test_string_ts_1(self): + self.assertTokenStreamEqualComplete( + [("String.Single", "'''"), + ("String.Single", "HUHU HEHE"), + ("String.Single", "'''"), + ("Text.Whitespace", "\n"), + ], + pygments.lex("'''HUHU HEHE'''", self.lexer)) + + def test_string_ts_2(self): + self.assertTokenStreamEqualComplete( + [("String.Single", "'''"), + ("String.Single", "HI"), + ("String.Single", "'"), + ("String.Single", "HU"), + ("String.Single", "\n"), + ("String.Single", "HE"), + ("String.Single", '"'), + ("String.Single", "HA"), + ("String.Single", "'''"), + ("Text.Whitespace", "\n"), + ], + pygments.lex("""'''HI'HU\nHE"HA'''""", self.lexer)) + + def test_string_d_1(self): + self.assertTokenStreamEqualComplete( + [("String.Double", '"'), + ("String.Double", 'HU'), + ("String.Double", "'"), + ("String.Double", 'HE HA'), + ("String.Escape", '\\"'), + ("String.Double", 'HO'), + ("String.Double", '"'), + ("Text.Whitespace", "\n"), + ], + pygments.lex('''"HU'HE HA\\"HO"''', self.lexer)) + + def test_string_d_2(self): + self.assertTokenStreamEqual( + [("String.Double", '"'), + ("String.Double", "HUHU"), + ("Text.Whitespace", "\n"), + ("Error", "H"), + ], + pygments.lex('"HUHU\nHEHE"', self.lexer)) + + def test_string_td_1(self): + self.assertTokenStreamEqualComplete( + [("String.Double", '"""'), + ("String.Double", 'HUHU HAHA'), + ("String.Double", '"""'), + ("Text.Whitespace", "\n"), + ], + pygments.lex('"""HUHU HAHA"""', self.lexer)) + + def test_string_td_2(self): + self.assertTokenStreamEqualComplete( + [("String.Double", '"""'), + ("String.Double", 'HU'), + ("String.Double", '"'), + ("String.Double", "HO"), + ("String.Double", "\n"), + ("String.Double", "HE"), + ("String.Double", "'"), + ("String.Double", "HA"), + ("String.Double", '"""'), + ("Text.Whitespace", "\n"), + ], + pygments.lex('''"""HU"HO\nHE'HA"""''', self.lexer)) + + +if __name__ == "__main__": + unittest.main()