diff pygments_lexer_pseudocode2/bases.py @ 34:1f741934205e

Begin a new Pseudocode lexer using numbers and strings from Python
author Franz Glasner <fzglas.hg@dom66.de>
date Tue, 21 Apr 2026 19:40:08 +0200
parents db1bc740a201
children a3151d837258
line wrap: on
line diff
--- a/pygments_lexer_pseudocode2/bases.py	Tue Apr 21 12:33:17 2026 +0200
+++ b/pygments_lexer_pseudocode2/bases.py	Tue Apr 21 19:40:08 2026 +0200
@@ -5,12 +5,11 @@
 # :-
 r"""Some common bases for the lexers."""
 
-
-__all__ = []
+__all__ = ["LexBase"]
 
 
-from pygments.lexer import RegexLexer
-from pygments.token import Number
+from pygments.lexer import RegexLexer, combined, bygroups, include
+from pygments.token import Number, String
 
 
 class LexBase(RegexLexer):
@@ -21,13 +20,43 @@
 
     """
 
-    tokens = {
-#
-# This state is borrowed from Pygment's Python lexer.
 #
 # SPDX-SnippetBegin
 # SPDX-License-Identifier: BSD-2-Clause
 # SPDX-SnippetCopyrightText: Copyright 2006-2023 by the Pygments team
+#
+    def py_innerstring_rules(ttype):
+        return [
+            # the old style '%s' % (...) string formatting (still valid in Py3)
+            (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
+             '[hlL]?[E-GXc-giorsaux%]', String.Interpol),
+            # the new style '{}'.format(...) string formatting
+            (r'\{'
+             r'((\w+)((\.\w+)|(\[[^\]]+\]))*)?'  # field name
+             r'(\![sra])?'                       # conversion
+             r'(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?'
+             r'\}', String.Interpol),
+            #
+            # backslashes, quotes and formatting signs must be parsed
+            # one at a time
+            #
+            (r'[^\\\'"%{\n]+', ttype),
+            (r'[\'"\\]', ttype),
+            # unhandled string formatting sign
+            (r'%|(\{{1,2})', ttype)
+            # newlines are an error (use "nl" state)
+        ]
+# SPDX-SnippetEnd
+
+    tokens = {
+#
+# These states are borrowed from Pygment's Python lexer.
+# Their names have been prefixed with `py-'.
+#
+# SPDX-SnippetBegin
+# SPDX-License-Identifier: BSD-2-Clause
+# SPDX-SnippetCopyrightText: Copyright 2006-2023 by the Pygments team
+#
         'py-numbers': [
             (r'(\d(?:_?\d)*\.(?:\d(?:_?\d)*)?|(?:\d(?:_?\d)*)?\.\d(?:_?\d)*)'
              r'([eE][+-]?\d(?:_?\d)*)?', Number.Float),
@@ -37,5 +66,55 @@
             (r'0[xX](?:_?[a-fA-F0-9])+', Number.Hex),
             (r'\d(?:_?\d)*', Number.Integer),
         ],
+        'py-strings': [
+            # non-raw strings
+            ('([uU]?)(""")', bygroups(String.Affix, String.Double),
+             combined('py-stringescape', 'py-tdqs')),
+            ("([uU]?)(''')", bygroups(String.Affix, String.Single),
+             combined('py-stringescape', 'py-tsqs')),
+            ('([uU]?)(")', bygroups(String.Affix, String.Double),
+             combined('py-stringescape', 'py-dqs')),
+            ("([uU]?)(')", bygroups(String.Affix, String.Single),
+             combined('py-stringescape', 'py-sqs')),
+            # non-raw bytes
+            ('([bB])(""")', bygroups(String.Affix, String.Double),
+             combined('py-bytesescape', 'py-tdqs')),
+            ("([bB])(''')", bygroups(String.Affix, String.Single),
+             combined('py-bytesescape', 'py-tsqs')),
+            ('([bB])(")', bygroups(String.Affix, String.Double),
+             combined('py-bytesescape', 'py-dqs')),
+            ("([bB])(')", bygroups(String.Affix, String.Single),
+             combined('py-bytesescape', 'py-sqs')),
+        ],
+        'py-stringescape': [
+            (r'\\(N\{.*?\}|u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8})', String.Escape),
+            include('py-bytesescape')
+        ],
+        'py-bytesescape': [
+            (r'\\([\\abfnrtv"\']|\n|x[a-fA-F0-9]{2}|[0-7]{1,3})',
+             String.Escape)
+        ],
+        'py-dqs': [
+            (r'"', String.Double, '#pop'),
+            (r'\\\\|\\"|\\\n', String.Escape),  # included here for raw strings
+            include('py-strings-double')
+        ],
+        'py-sqs': [
+            (r"'", String.Single, '#pop'),
+            (r"\\\\|\\'|\\\n", String.Escape),  # included here for raw strings
+            include('py-strings-single')
+        ],
+        'py-tdqs': [
+            (r'"""', String.Double, '#pop'),
+            include('py-strings-double'),
+            (r'\n', String.Double)
+        ],
+        'py-tsqs': [
+            (r"'''", String.Single, '#pop'),
+            include('py-strings-single'),
+            (r'\n', String.Single)
+        ],
+        'py-strings-single': py_innerstring_rules(String.Single),
+        'py-strings-double': py_innerstring_rules(String.Double),
 # SPDX-SnippetEnd
     }