changeset 39:a3151d837258

Some basic keywords for programs, algorithms, procedures and functions; also comments (single and multiline) and "remarks"
author Franz Glasner <fzglas.hg@dom66.de>
date Fri, 24 Apr 2026 09:44:34 +0200
parents 69522d4cafac
children df08226a6984
files pygments_lexer_pseudocode2/bases.py pygments_lexer_pseudocode2/pseudocode.py tests/test_pseudo.py
diffstat 3 files changed, 63 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/pygments_lexer_pseudocode2/bases.py	Wed Apr 22 16:27:35 2026 +0200
+++ b/pygments_lexer_pseudocode2/bases.py	Fri Apr 24 09:44:34 2026 +0200
@@ -9,7 +9,7 @@
 
 
 from pygments.lexer import RegexLexer, combined, bygroups, include
-from pygments.token import Number, String
+from pygments.token import Number, String, Comment
 
 
 class LexBase(RegexLexer):
@@ -117,4 +117,10 @@
         'py-strings-single': py_innerstring_rules(String.Single),
         'py-strings-double': py_innerstring_rules(String.Double),
 # SPDX-SnippetEnd
+    'multiline-nested-comment': [
+            (r'[^*/]+', Comment.Multiline),
+            (r'/\*', Comment.Multiline, '#push'),
+            (r'\*/', Comment.Multiline, '#pop'),
+            (r'[*/]', Comment.Multiline),
+        ]
     }
--- a/pygments_lexer_pseudocode2/pseudocode.py	Wed Apr 22 16:27:35 2026 +0200
+++ b/pygments_lexer_pseudocode2/pseudocode.py	Fri Apr 24 09:44:34 2026 +0200
@@ -13,8 +13,8 @@
 
 import re
 
-from pygments.lexer import include
-from pygments.token import (Keyword, Text, Whitespace)
+from pygments.lexer import include, bygroups
+from pygments.token import (Comment, Keyword, Text, Whitespace)
 
 #
 # Relative imports do not work with pygments.lexers.load_lexer_from_file()
@@ -51,6 +51,19 @@
         "FN": "FUNCTION",
         "CLASS": "CLASS",
     }
+    SYMBOLS = {
+        "REMARK": "▷",  # U+25B7: Unicode 1.0 (Arrows)
+        "Remark": "▷",
+        "remark": "▷",
+        "REM": "▷",
+        "Rem": "▷",
+        "rem": "▷",
+        "R": "▷",
+        "r": "▷",
+        "BLOCK": "┃",   # U+2503: Unicode 1.0 (Bow Drawing)
+        "Block": "┃",
+        "block": "┃",
+    }
 
     def op_translate(toktype):
 
@@ -60,9 +73,23 @@
 
         return _op_translate
 
+    def op_symbol(toktype):
+
+        def _op_symbol(lexer, match, ctx=None):
+            kw = match.group()
+            yield match.start(), toktype, lexer.SYMBOLS.get(kw, kw)
+
+        return _op_symbol
+
     tokens = {
         "root": [
             (r"\n", Whitespace),
+            (r"/\*", Comment.Multiline, "multiline-nested-comment"),
+            (r"//.*$", Comment.Single),
+            (r"(?:\\)(REMARK|Remark|remark|REM|Rem|rem|R|r)\b(.*)$",
+             bygroups(op_symbol(Comment.Single), Comment.Single)),
+            (r"(?:\\)(BLOCK|Block|block)\b(.*)$",
+             bygroups(op_symbol(Text), Text)),
             (r"\\\n", Text),
             (r"(?i)\\("
              r"(?:prog(?:ram)?)"
@@ -92,7 +119,7 @@
         "PROG": "PROGRAMM",
         "PROGRAM": "PROGRAMM",
         "ALGO": "ALGORITHMUS",
-        "ALGORITHM": "ALGORITHM",
+        "ALGORITHM": "ALGORITHMUS",
         "PROC": "PROZEDUR",
         "PROCEDURE": "PROZEDUR",
         "FUNC": "FUNKTION",
--- a/tests/test_pseudo.py	Wed Apr 22 16:27:35 2026 +0200
+++ b/tests/test_pseudo.py	Fri Apr 24 09:44:34 2026 +0200
@@ -180,6 +180,32 @@
              ],
             pygments.lex("\\FN", self.lexer))
 
+    def test_remark_1(self):
+        self.assertTokenStreamEqualComplete(
+            [("Comment.Single", "▷"),
+             ("Comment.Single", "  the remark"),
+             ("Text.Whitespace", "\n"),
+             ],
+            pygments.lex("\\REMARK  the remark\n", self.lexer))
+
+    def test_remark_2(self):
+        self.assertTokenStreamEqualComplete(
+            [("Comment.Single", "▷"),
+             ("Comment.Single", "  the remark 1"),
+             ("Text.Whitespace", "\n"),
+             ("Comment.Single", "▷"),
+             ("Comment.Single", "  the remark 2"),
+             ("Text.Whitespace", "\n"),
+             ("Comment.Single", "▷"),
+             ("Comment.Single", " the remark 3"),
+             ("Text.Whitespace", "\n"),
+             ],
+            pygments.lex(
+                """\\REMARK  the remark 1
+\\Rem  the remark 2
+\\r the remark 3
+""", self.lexer))
+
 
 if __name__ == "__main__":
     unittest.main()