changeset 34:1f741934205e

Begin a new Pseudocode lexer using numbers and strings from Python
author Franz Glasner <fzglas.hg@dom66.de>
date Tue, 21 Apr 2026 19:40:08 +0200
parents db1bc740a201
children d9a3551a1038
files pygments_lexer_pseudocode2/bases.py pygments_lexer_pseudocode2/pseudocode.py pyproject.toml tests/_tsetup.py tests/test_pseudo.py
diffstat 5 files changed, 302 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/pygments_lexer_pseudocode2/bases.py	Tue Apr 21 12:33:17 2026 +0200
+++ b/pygments_lexer_pseudocode2/bases.py	Tue Apr 21 19:40:08 2026 +0200
@@ -5,12 +5,11 @@
 # :-
 r"""Some common bases for the lexers."""
 
-
-__all__ = []
+__all__ = ["LexBase"]
 
 
-from pygments.lexer import RegexLexer
-from pygments.token import Number
+from pygments.lexer import RegexLexer, combined, bygroups, include
+from pygments.token import Number, String
 
 
 class LexBase(RegexLexer):
@@ -21,13 +20,43 @@
 
     """
 
-    tokens = {
-#
-# This state is borrowed from Pygment's Python lexer.
 #
 # SPDX-SnippetBegin
 # SPDX-License-Identifier: BSD-2-Clause
 # SPDX-SnippetCopyrightText: Copyright 2006-2023 by the Pygments team
+#
+    def py_innerstring_rules(ttype):
+        return [
+            # the old style '%s' % (...) string formatting (still valid in Py3)
+            (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
+             '[hlL]?[E-GXc-giorsaux%]', String.Interpol),
+            # the new style '{}'.format(...) string formatting
+            (r'\{'
+             r'((\w+)((\.\w+)|(\[[^\]]+\]))*)?'  # field name
+             r'(\![sra])?'                       # conversion
+             r'(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?'
+             r'\}', String.Interpol),
+            #
+            # backslashes, quotes and formatting signs must be parsed
+            # one at a time
+            #
+            (r'[^\\\'"%{\n]+', ttype),
+            (r'[\'"\\]', ttype),
+            # unhandled string formatting sign
+            (r'%|(\{{1,2})', ttype)
+            # newlines are an error (use "nl" state)
+        ]
+# SPDX-SnippetEnd
+
+    tokens = {
+#
+# These states are borrowed from Pygment's Python lexer.
+# Their names have been prefixed with `py-'.
+#
+# SPDX-SnippetBegin
+# SPDX-License-Identifier: BSD-2-Clause
+# SPDX-SnippetCopyrightText: Copyright 2006-2023 by the Pygments team
+#
         'py-numbers': [
             (r'(\d(?:_?\d)*\.(?:\d(?:_?\d)*)?|(?:\d(?:_?\d)*)?\.\d(?:_?\d)*)'
              r'([eE][+-]?\d(?:_?\d)*)?', Number.Float),
@@ -37,5 +66,55 @@
             (r'0[xX](?:_?[a-fA-F0-9])+', Number.Hex),
             (r'\d(?:_?\d)*', Number.Integer),
         ],
+        'py-strings': [
+            # non-raw strings
+            ('([uU]?)(""")', bygroups(String.Affix, String.Double),
+             combined('py-stringescape', 'py-tdqs')),
+            ("([uU]?)(''')", bygroups(String.Affix, String.Single),
+             combined('py-stringescape', 'py-tsqs')),
+            ('([uU]?)(")', bygroups(String.Affix, String.Double),
+             combined('py-stringescape', 'py-dqs')),
+            ("([uU]?)(')", bygroups(String.Affix, String.Single),
+             combined('py-stringescape', 'py-sqs')),
+            # non-raw bytes
+            ('([bB])(""")', bygroups(String.Affix, String.Double),
+             combined('py-bytesescape', 'py-tdqs')),
+            ("([bB])(''')", bygroups(String.Affix, String.Single),
+             combined('py-bytesescape', 'py-tsqs')),
+            ('([bB])(")', bygroups(String.Affix, String.Double),
+             combined('py-bytesescape', 'py-dqs')),
+            ("([bB])(')", bygroups(String.Affix, String.Single),
+             combined('py-bytesescape', 'py-sqs')),
+        ],
+        'py-stringescape': [
+            (r'\\(N\{.*?\}|u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8})', String.Escape),
+            include('py-bytesescape')
+        ],
+        'py-bytesescape': [
+            (r'\\([\\abfnrtv"\']|\n|x[a-fA-F0-9]{2}|[0-7]{1,3})',
+             String.Escape)
+        ],
+        'py-dqs': [
+            (r'"', String.Double, '#pop'),
+            (r'\\\\|\\"|\\\n', String.Escape),  # included here for raw strings
+            include('py-strings-double')
+        ],
+        'py-sqs': [
+            (r"'", String.Single, '#pop'),
+            (r"\\\\|\\'|\\\n", String.Escape),  # included here for raw strings
+            include('py-strings-single')
+        ],
+        'py-tdqs': [
+            (r'"""', String.Double, '#pop'),
+            include('py-strings-double'),
+            (r'\n', String.Double)
+        ],
+        'py-tsqs': [
+            (r"'''", String.Single, '#pop'),
+            include('py-strings-single'),
+            (r'\n', String.Single)
+        ],
+        'py-strings-single': py_innerstring_rules(String.Single),
+        'py-strings-double': py_innerstring_rules(String.Double),
 # SPDX-SnippetEnd
     }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pygments_lexer_pseudocode2/pseudocode.py	Tue Apr 21 19:40:08 2026 +0200
@@ -0,0 +1,51 @@
+# -*- coding: utf-8 -*-
+# :-
+# SPDX-FileCopyrightText: © 2026 Franz Glasner
+# SPDX-License-Identifier: MIT
+# :-
+r"""A pseudocode lexer along the lines of CTAN's algpseudocode or
+algpseudocodex.
+
+"""
+
+__all__ = ["PseudocodeLexer"]
+
+
+import re
+
+from pygments.lexer import include
+from pygments.token import (Text, Whitespace)
+
+#
+# Relative imports do not work with pygments.lexers.load_lexer_from_file()
+# in all of our supported Python releases.
+#
+from pygments_lexer_pseudocode2.bases import LexBase
+
+
+class PseudocodeLexer(LexBase):
+
+    """A pseudocode lexer along the lines of CTAN's algpseudocode or
+    algpseudocodex.
+
+    Some ideas (e.g. strings) are borrowed from Pygment's Python lexer.
+
+    """
+
+    name = "Pseudocode"
+    aliases = ["pseudocode", "pseudo", "algorithm", "algo"]
+    filenames = ["*.algo", "*.pseudocode"]
+    mimetypes = []
+    flags = re.MULTILINE
+
+    tokens = {
+        "root": [
+            (r"\n", Whitespace),
+            (r"\\\n", Text),
+            include("expr"),
+        ],
+        "expr": [
+            include("py-strings"),
+            include("py-numbers"),
+        ]
+    }
--- a/pyproject.toml	Tue Apr 21 12:33:17 2026 +0200
+++ b/pyproject.toml	Tue Apr 21 19:40:08 2026 +0200
@@ -40,6 +40,7 @@
 [project.entry-points.'pygments.lexers']
 # The mostly original and sonewhat extended Pseudocode lexer (fr)
 fr_pseudocodelexer = "pygments_lexer_pseudocode2.fr_pseudocode:FrPseudocodeLexer"
+pseudocodelexer = "pygments_lexer_pseudocode2.pseudocode:PseudocodeLexer"
 
 [tool.setuptools]
 packages = [
--- a/tests/_tsetup.py	Tue Apr 21 12:33:17 2026 +0200
+++ b/tests/_tsetup.py	Tue Apr 21 19:40:08 2026 +0200
@@ -8,6 +8,16 @@
 
 """
 
+__all__ = [
+    "PROJECTDIR",
+    "LEXERCLASS",
+    "FRLEXERCLASS",
+    "LEXERFILENAME",
+    "FRLEXERFILENAME",
+    "TESTSNIPPETSDIR",
+]
+
+
 import logging
 import os
 import sys
@@ -17,8 +27,13 @@
         os.path.normpath(os.path.join(os.path.dirname(__file__), "..")))
 sys.path.insert(0, PROJECTDIR)
 FRLEXERFILENAME = os.path.join(PROJECTDIR,
-                               "pygments_lexer_pseudocode2/fr_pseudocode.py")
+                               "pygments_lexer_pseudocode2",
+                               "fr_pseudocode.py")
 FRLEXERCLASS = "FrPseudocodeLexer"
+LEXERFILENAME = os.path.join(PROJECTDIR,
+                             "pygments_lexer_pseudocode2",
+                             "pseudocode.py")
+LEXERCLASS = "PseudocodeLexer"
 TESTSNIPPETSDIR = os.path.join(
     os.path.abspath(os.path.dirname(__file__)),
     "snippets")
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_pseudo.py	Tue Apr 21 19:40:08 2026 +0200
@@ -0,0 +1,148 @@
+# -*- coding: utf-8 -*-
+# :-
+# SPDX-FileCopyrightText: © 2026 Franz Glasner
+# SPDX-License-Identifier: MIT
+# :-
+
+from _tsetup import LEXERFILENAME, LEXERCLASS
+
+import unittest
+
+import pygments
+import pygments.lexers
+
+import _testhelper
+
+
+class TestSnippets(unittest.TestCase, _testhelper.TokenAssertHelper):
+
+    def setUp(self):
+        self.lexer = pygments.lexers.load_lexer_from_file(
+            LEXERFILENAME, LEXERCLASS)
+
+    def test_lf(self):
+        self.assertTokenStreamEqualComplete(
+            [("Text.Whitespace", "\n")],
+            pygments.lex("\n", self.lexer))
+
+    def test_protected_lf(self):
+        self.assertTokenStreamEqualComplete(
+            [("Text", "\\\n")],
+            pygments.lex("\\\n", self.lexer))
+
+    def test_number_int(self):
+        self.assertTokenStreamEqualComplete(
+            [("Number.Integer", "10"),
+             ("Text.Whitespace", "\n"),
+             ],
+            pygments.lex("10", self.lexer))
+
+    def test_number_float_1(self):
+        self.assertTokenStreamEqualComplete(
+            [("Number.Float", "3.1415926"),
+             ("Text.Whitespace", "\n"),
+             ],
+            pygments.lex("3.1415926", self.lexer))
+
+    def test_number_float_2(self):
+        self.assertTokenStreamEqualComplete(
+            [("Number.Float", "3.14e-12"),
+             ("Text.Whitespace", "\n"),
+             ],
+            pygments.lex("3.14e-12", self.lexer))
+
+    def test_string_s_1(self):
+        self.assertTokenStreamEqualComplete(
+            [("String.Single", "'"),
+             ("String.Single", "HU"),
+             ("String.Single", '"'),
+             ("String.Single", "HE HA"),
+             ("String.Escape", "\\'"),
+             ("String.Single", "HO"),
+             ("String.Single", "'"),
+             ("Text.Whitespace", "\n"),
+             ],
+            pygments.lex("""'HU"HE HA\\'HO'""", self.lexer))
+
+    def test_string_s_2(self):
+        self.assertTokenStreamEqual(
+            [("String.Single", "'"),
+             ("String.Single", "HUHU"),
+             ("Text.Whitespace", "\n"),
+             ("Error", "H"),
+             ],
+            pygments.lex("'HUHU\nHEHE'", self.lexer))
+
+    def test_string_ts_1(self):
+        self.assertTokenStreamEqualComplete(
+            [("String.Single", "'''"),
+             ("String.Single", "HUHU HEHE"),
+             ("String.Single", "'''"),
+             ("Text.Whitespace", "\n"),
+             ],
+            pygments.lex("'''HUHU HEHE'''", self.lexer))
+
+    def test_string_ts_2(self):
+        self.assertTokenStreamEqualComplete(
+            [("String.Single", "'''"),
+             ("String.Single", "HI"),
+             ("String.Single", "'"),
+             ("String.Single", "HU"),
+             ("String.Single", "\n"),
+             ("String.Single", "HE"),
+             ("String.Single", '"'),
+             ("String.Single", "HA"),
+             ("String.Single", "'''"),
+             ("Text.Whitespace", "\n"),
+             ],
+            pygments.lex("""'''HI'HU\nHE"HA'''""", self.lexer))
+
+    def test_string_d_1(self):
+        self.assertTokenStreamEqualComplete(
+            [("String.Double", '"'),
+             ("String.Double", 'HU'),
+             ("String.Double", "'"),
+             ("String.Double", 'HE HA'),
+             ("String.Escape", '\\"'),
+             ("String.Double", 'HO'),
+             ("String.Double", '"'),
+             ("Text.Whitespace", "\n"),
+             ],
+            pygments.lex('''"HU'HE HA\\"HO"''', self.lexer))
+
+    def test_string_d_2(self):
+        self.assertTokenStreamEqual(
+            [("String.Double", '"'),
+             ("String.Double", "HUHU"),
+             ("Text.Whitespace", "\n"),
+             ("Error", "H"),
+             ],
+            pygments.lex('"HUHU\nHEHE"', self.lexer))
+
+    def test_string_td_1(self):
+        self.assertTokenStreamEqualComplete(
+            [("String.Double", '"""'),
+             ("String.Double", 'HUHU HAHA'),
+             ("String.Double", '"""'),
+             ("Text.Whitespace", "\n"),
+             ],
+            pygments.lex('"""HUHU HAHA"""', self.lexer))
+
+    def test_string_td_2(self):
+        self.assertTokenStreamEqualComplete(
+            [("String.Double", '"""'),
+             ("String.Double", 'HU'),
+             ("String.Double", '"'),
+             ("String.Double", "HO"),
+             ("String.Double", "\n"),
+             ("String.Double", "HE"),
+             ("String.Double", "'"),
+             ("String.Double", "HA"),
+             ("String.Double", '"""'),
+             ("Text.Whitespace", "\n"),
+             ],
+            pygments.lex('''"""HU"HO\nHE'HA"""''', self.lexer))
+
+
+if __name__ == "__main__":
+    unittest.main()