diff pygments_lexer_pseudocode2/pseudocode.py @ 39:a3151d837258

Some basic keywords for programs, algorithms, procedures and functions; also comments (single and multiline) and "remarks"
author Franz Glasner <fzglas.hg@dom66.de>
date Fri, 24 Apr 2026 09:44:34 +0200
parents 69522d4cafac
children df08226a6984
line wrap: on
line diff
--- a/pygments_lexer_pseudocode2/pseudocode.py	Wed Apr 22 16:27:35 2026 +0200
+++ b/pygments_lexer_pseudocode2/pseudocode.py	Fri Apr 24 09:44:34 2026 +0200
@@ -13,8 +13,8 @@
 
 import re
 
-from pygments.lexer import include
-from pygments.token import (Keyword, Text, Whitespace)
+from pygments.lexer import include, bygroups
+from pygments.token import (Comment, Keyword, Text, Whitespace)
 
 #
 # Relative imports do not work with pygments.lexers.load_lexer_from_file()
@@ -51,6 +51,19 @@
         "FN": "FUNCTION",
         "CLASS": "CLASS",
     }
+    SYMBOLS = {
+        "REMARK": "▷",  # U+25B7: Unicode 1.0 (Arrows)
+        "Remark": "▷",
+        "remark": "▷",
+        "REM": "▷",
+        "Rem": "▷",
+        "rem": "▷",
+        "R": "▷",
+        "r": "▷",
+        "BLOCK": "┃",   # U+2503: Unicode 1.0 (Bow Drawing)
+        "Block": "┃",
+        "block": "┃",
+    }
 
     def op_translate(toktype):
 
@@ -60,9 +73,23 @@
 
         return _op_translate
 
+    def op_symbol(toktype):
+
+        def _op_symbol(lexer, match, ctx=None):
+            kw = match.group()
+            yield match.start(), toktype, lexer.SYMBOLS.get(kw, kw)
+
+        return _op_symbol
+
     tokens = {
         "root": [
             (r"\n", Whitespace),
+            (r"/\*", Comment.Multiline, "multiline-nested-comment"),
+            (r"//.*$", Comment.Single),
+            (r"(?:\\)(REMARK|Remark|remark|REM|Rem|rem|R|r)\b(.*)$",
+             bygroups(op_symbol(Comment.Single), Comment.Single)),
+            (r"(?:\\)(BLOCK|Block|block)\b(.*)$",
+             bygroups(op_symbol(Text), Text)),
             (r"\\\n", Text),
             (r"(?i)\\("
              r"(?:prog(?:ram)?)"
@@ -92,7 +119,7 @@
         "PROG": "PROGRAMM",
         "PROGRAM": "PROGRAMM",
         "ALGO": "ALGORITHMUS",
-        "ALGORITHM": "ALGORITHM",
+        "ALGORITHM": "ALGORITHMUS",
         "PROC": "PROZEDUR",
         "PROCEDURE": "PROZEDUR",
         "FUNC": "FUNKTION",