diff pygments_lexer_pseudocode2/lexers/bases.py @ 164:a4317957148b

Move all lexers into a subpackage pygments_lexer_pseudocode2.lexers. This is to prepare for a new subpackage with filters.
author Franz Glasner <fzglas.hg@dom66.de>
date Fri, 08 May 2026 21:19:54 +0200
parents pygments_lexer_pseudocode2/bases.py@e1663ac707b0
children afbca50b7dc1
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pygments_lexer_pseudocode2/lexers/bases.py	Fri May 08 21:19:54 2026 +0200
@@ -0,0 +1,188 @@
+# -*- coding: utf-8 -*-
+# :-
+# SPDX-FileCopyrightText: © 2026 Franz Glasner
+# SPDX-License-Identifier: MIT
+# :-
+r"""Some common bases for the lexers."""
+
+__all__ = ["LexBase", "uni_name", "py_innerstring_rules", "py_name_rules"]
+
+
+import sys
+
+from pygments import unistring
+from pygments.lexer import RegexLexer, combined, bygroups, include
+from pygments.token import (Comment, Error, Name, Number, Other, String)
+
+
+PY2 = sys.version_info[0] <= 2
+
+
+#
+# SPDX-SnippetBegin
+# SPDX-License-Identifier: BSD-2-Clause
+# SPDX-SnippetCopyrightText: Copyright 2006-2023 by the Pygments team
+# SPDX-SnippetCopyrightText: Copyright 2026 by Franz Glasner
+#
+
+uni_name = "[%s][%s]*" % (unistring.xid_start, unistring.xid_continue)
+
+
+"""PY3 allows no @staticmethod but PY2 needs it."""
+if PY2:
+    _staticmethod = staticmethod
+else:
+    def _staticmethod(fn):
+        return fn
+
+
+def py_innerstring_rules(ttype):
+    return [
+        # the old style '%s' % (...) string formatting (still valid in Py3)
+        (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
+         '[hlL]?[E-GXc-giorsaux%]', String.Interpol),
+        # the new style '{}'.format(...) string formatting
+        (r'\{'
+         r'((\w+)((\.\w+)|(\[[^\]]+\]))*)?'  # field name
+         r'(\![sra])?'                       # conversion
+         r'(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?'
+         r'\}', String.Interpol),
+        #
+        # backslashes, quotes and formatting signs must be parsed
+        # one at a time
+        #
+        (r'[^\\\'"%{\n]+', ttype),
+        (r'[\'"\\]', ttype),
+        # unhandled string formatting sign
+        (r'%|(\{{1,2})', ttype)
+        # newlines are an error (use "nl" state)
+    ]
+
+
+def py_name_rules(ttype, deco_ttype=Name.Decorator):
+    return [
+        # We recognize decorator syntax here
+        (r'@' + uni_name, deco_ttype),
+        #
+        # Python's new matrix multiplication operator:
+        # not used here in pseudocode
+        # (r'@', Operator),
+        (uni_name, ttype),
+    ]
+
+# SPDX-SnippetEnd
+
+
+class LexBase(RegexLexer):
+
+    """A base that defines some common lexer states.
+
+    Default flags are not important.
+
+    """
+
+    def op_ignore(lexer, match, ctx=None):
+        """Unconditionally ignore the match."""
+        if False:
+            yield match.start(), Other, ""
+        if ctx:
+            ctx.pos = match.end()
+
+    @_staticmethod
+    def op_fixed(toktype, value):
+        """Unconditionally yield a given token type and value."""
+
+        def _op_fixed(lexer, match, ctx=None):
+            yield match.start(), toktype, value
+            if ctx:
+                ctx.pos = match.end()
+
+        return _op_fixed
+
+    tokens = {
+#
+# These states are borrowed from Pygment's Python lexer.
+# Their names have been prefixed with `py-'.
+#
+# SPDX-SnippetBegin
+# SPDX-License-Identifier: BSD-2-Clause
+# SPDX-SnippetCopyrightText: Copyright 2006-2023 by the Pygments team
+# SPDX-SnippetCopyrightText: Copyright 2026 by Franz Glasner
+#
+        'py-numbers': [
+            (r'(\d(?:_?\d)*\.(?:\d(?:_?\d)*)?|(?:\d(?:_?\d)*)?\.\d(?:_?\d)*)'
+             r'([eE][+-]?\d(?:_?\d)*)?', Number.Float),
+            (r'\d(?:_?\d)*[eE][+-]?\d(?:_?\d)*j?', Number.Float),
+            (r'0[oO](?:_?[0-7])+', Number.Oct),
+            (r'0[bB](?:_?[01])+', Number.Bin),
+            (r'0[xX](?:_?[a-fA-F0-9])+', Number.Hex),
+            (r'\d(?:_?\d)*', Number.Integer),
+        ],
+        'py-strings': [
+            # non-raw strings
+            ('([uU]?)(""")', bygroups(String.Affix, String.Double),
+             combined('py-stringescape', 'py-tdqs')),
+            ("([uU]?)(''')", bygroups(String.Affix, String.Single),
+             combined('py-stringescape', 'py-tsqs')),
+            ('([uU]?)(")', bygroups(String.Affix, String.Double),
+             combined('py-stringescape', 'py-dqs')),
+            ("([uU]?)(')", bygroups(String.Affix, String.Single),
+             combined('py-stringescape', 'py-sqs')),
+            # non-raw bytes
+            ('([bB])(""")', bygroups(String.Affix, String.Double),
+             combined('py-bytesescape', 'py-tdqs')),
+            ("([bB])(''')", bygroups(String.Affix, String.Single),
+             combined('py-bytesescape', 'py-tsqs')),
+            ('([bB])(")', bygroups(String.Affix, String.Double),
+             combined('py-bytesescape', 'py-dqs')),
+            ("([bB])(')", bygroups(String.Affix, String.Single),
+             combined('py-bytesescape', 'py-sqs')),
+        ],
+        'py-stringescape': [
+            (r'\\(N\{.*?\}|u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8})', String.Escape),
+            include('py-bytesescape')
+        ],
+        'py-bytesescape': [
+            (r'\\([\\abfnrtv"\']|\n|x[a-fA-F0-9]{2}|[0-7]{1,3})',
+             String.Escape)
+        ],
+        'py-dqs': [
+            (r'"', String.Double, '#pop'),
+            (r'\\\\|\\"|\\\n', String.Escape),  # included here for raw strings
+            include('py-strings-double'),
+            (r'\n', Error),    # added by fag
+        ],
+        'py-sqs': [
+            (r"'", String.Single, '#pop'),
+            (r"\\\\|\\'|\\\n", String.Escape),  # included here for raw strings
+            include('py-strings-single'),
+            (r'\n', Error),    # added by fag
+        ],
+        'py-tdqs': [
+            (r'"""', String.Double, '#pop'),
+            include('py-strings-double'),
+            (r'\n', String.Double)
+        ],
+        'py-tsqs': [
+            (r"'''", String.Single, '#pop'),
+            include('py-strings-single'),
+            (r'\n', String.Single)
+        ],
+        'py-strings-single': py_innerstring_rules(String.Single),
+        'py-strings-double': py_innerstring_rules(String.Double),
+        'py-name': py_name_rules(Name.Entity),
+# SPDX-SnippetEnd
+    # This snippet is from the Pygments' documentation "Write your own lexer"
+    'multiline-nested-comment': [
+            (r'[^*/]+', Comment.Multiline),
+            (r'/\*', Comment.Multiline, '#push'),
+            (r'\*/', Comment.Multiline, '#pop'),
+            (r'[*/]', Comment.Multiline),
+        ],
+    'multiline-nested-comment-alt': [
+            (r'[^*()]+', Comment.Multiline),
+            (r'\(\*', Comment.Multiline, '#push'),
+            (r'\*\)', Comment.Multiline, '#pop'),
+            (r'[*()]', Comment.Multiline),
+        ]
+    }