changeset 166:6a7dace0141e

Also implement filters to replace tokens in a token stream: TokenReplaceFilter and ErrorToGenericErrorTokenFilter
author Franz Glasner <fzglas.hg@dom66.de>
date Fri, 08 May 2026 23:54:06 +0200
parents 88f872c50aae
children ddefcc20367c
files README.rst docs/conf.py docs/details-filter.rst docs/details.rst docs/filterlist.rst docs/intro.rst pygments_lexer_pseudocode2/filters/__init__.py pyproject.toml tests/test_filter.py
diffstat 9 files changed, 232 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/README.rst	Fri May 08 23:50:24 2026 +0200
+++ b/README.rst	Fri May 08 23:54:06 2026 +0200
@@ -13,6 +13,10 @@
 
 .. include:: lexerlist.rst
 
+It additionally contains the following filters:
+
+.. include:: filterlist.rst
+
 
 Installation
 ============
--- a/docs/conf.py	Fri May 08 23:50:24 2026 +0200
+++ b/docs/conf.py	Fri May 08 23:54:06 2026 +0200
@@ -96,3 +96,10 @@
     app.add_lexer("no-raiseonerror-algpseudocode",
                   functools.partial(AlgPseudocodeLexer,
                                     prohibit_raiseonerror_filter=True))
+    #
+    # To test with the custom filter that maps
+    # Token.Error to Token.Generic.Error
+    #
+    app.add_lexer("genericerror-algpseudocode",
+                  functools.partial(AlgPseudocodeLexer,
+                                    filters=["errortogenericerror"]))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/docs/details-filter.rst	Fri May 08 23:54:06 2026 +0200
@@ -0,0 +1,31 @@
+.. -*- coding: utf-8; indent-tabs-mode: nil; -*-
+
+.. _details-filter:
+
+********
+ Filter
+********
+
+ErrorToGenericErrorTokenFilter
+==============================
+
+:Name: errortogenericerror
+:Filter Options: none
+
+Replace all :py:class:`pygments.token.Token.Error` tokens in a stream by
+:py:class:`pygments.token.Token.Generic.Error` tokens.
+
+
+TokenReplaceFilter
+==================
+
+:Name: tokenreplace
+:Required Filter Options:
+   **token_from**
+     **Type:** :py:class:`str` or :py:class:`pygments.token.Token`
+
+   **token_to**
+     **Type:** :py:class:`str` or :py:class:`pygments.token.Token`
+
+Replace all token types given in `token_from` by the token type given
+in `token_to`.
--- a/docs/details.rst	Fri May 08 23:50:24 2026 +0200
+++ b/docs/details.rst	Fri May 08 23:54:06 2026 +0200
@@ -2,10 +2,10 @@
 
 *********
  Details
-********* 
+*********
 
 .. toctree::
 
    details-algpseudocode
    details-frpseudocode
-   
+   details-filter
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/docs/filterlist.rst	Fri May 08 23:54:06 2026 +0200
@@ -0,0 +1,20 @@
+.. -*- coding: utf-8; indent-tabs-mode: nil; -*-
+
+.. to be included in some documentation pages
+
+.. list-table::
+   :align: left
+   :header-rows: 1
+
+   * - Filter Name
+     - Description
+     - Filter Class
+
+   * - errortogenericerror
+     - Replace all ``Error`` tokens in a stream by ``Generic.Error`` tokens
+     - ErrorToGenericErrorTokenFilter
+
+   * - tokenreplace
+     - A configurable token replacer: replace a given token type by another
+       given token type in a stream
+     - TokenReplaceFilter
--- a/docs/intro.rst	Fri May 08 23:50:24 2026 +0200
+++ b/docs/intro.rst	Fri May 08 23:54:06 2026 +0200
@@ -1,10 +1,13 @@
 .. -*- coding: utf-8; indent-tabs-mode: nil; -*-
 
 
-**************   
+**************
  Introduction
 **************
 
+Lexer
+=====
+
 The package contains the following lexers:
 
 .. include:: lexerlist.rst
@@ -129,6 +132,36 @@
 
 .. literalinclude:: examples/algorithm-edmonds-karp.pseudocode
    :language: algpseudocode-de
-   :lines: 2-           
+   :lines: 2-
+
+More details you will find :ref:`here <details-algpseudocode>`.
+
+
+Filter
+======
+
+The package contains the following filters:
+
+.. include:: filterlist.rst
+
+The AlgPseudocode lexer yields an error token for the following code block.
+`Sphinx`_ therefore suppresses highlighting completely:
+
+.. code-block:: none
 
-More details you will find :ref:`here <details-algpseudocode>`.          
+   \EXPR{TEST}
+
+With a custom AlgPseudocode lexer that has ``prohibit_raiseonerror_filter``
+activated the output in `Sphinx`_ is as:
+
+.. code-block:: no-raiseonerror-algpseudocode
+
+   \EXPR{TEST}
+
+
+With the "errortogenericerror" filter the very same block is highlighted
+as:
+
+.. code-block:: genericerror-algpseudocode
+
+   \EXPR{TEST}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pygments_lexer_pseudocode2/filters/__init__.py	Fri May 08 23:54:06 2026 +0200
@@ -0,0 +1,63 @@
+# -*- coding: utf-8 -*-
+# :-
+# SPDX-FileCopyrightText: © 2026 Franz Glasner
+# SPDX-License-Identifier: MIT
+# :-
+r"Sub-package for filters."""
+
+__all__ = [
+    "TokenReplaceFilter",
+    "ErrorToGenericErrorTokenFilter",
+]
+
+
+from pygments.filter import Filter
+from pygments.token import (Error, Generic, Token,
+                            is_token_subtype, string_to_tokentype)
+
+
+class TokenReplaceFilter(Filter):
+
+    """Replace a given fixed token type with another token."""
+
+    def __init__(self, **options):
+        """Specifiy the replacement options:
+
+        :param token_from:
+        :type token_from: str or pygments.token.Token
+
+        :param token_to:
+        :type token_to: str or pygments.token.Token
+
+        Both these arguments are *required*!
+
+        """
+        Filter.__init__(self, **options)
+        # The option "token_from" is required!
+        self.token_from = options["token_from"]
+        if not is_token_subtype(self.token_from, Token):
+            self.token_from = string_to_tokentype(self.token_from)
+        # The option "token_to" is required!
+        self.token_to = options["token_to"]
+        if not is_token_subtype(self.token_to, Token):
+            self.token_to = string_to_tokentype(self.token_to)
+
+    def filter(self, lexer, stream):
+        for ttype, value in stream:
+            if ttype is self.token_from:
+                yield self.token_to, value
+            else:
+                yield ttype, value
+
+
+class ErrorToGenericErrorTokenFilter(TokenReplaceFilter):
+
+    """Convert all :py:class:`pygments.token.Token.Error` tokens to
+    :py:class:`pygments.token.Token.Generic.Error` tokens.
+
+    """
+
+    def __init__(self, **options):
+        options["token_from"] = Error
+        options["token_to"] = Generic.Error
+        TokenReplaceFilter.__init__(self, **options)
--- a/pyproject.toml	Fri May 08 23:50:24 2026 +0200
+++ b/pyproject.toml	Fri May 08 23:54:06 2026 +0200
@@ -44,16 +44,23 @@
 homepage = "https://github.com/svvac/pseudocode-pygments-lexer"
 
 [project.entry-points.'pygments.lexers']
+# The key is not significant here
 # The mostly original and somewhat extended Pseudocode lexer (fr)
 fr_pseudocodelexer = "pygments_lexer_pseudocode2.lexers.fr_pseudocode:FrPseudocodeLexer"
 algpseudocodelexer = "pygments_lexer_pseudocode2.lexers.algpseudocode:AlgPseudocodeLexer"
 algpseudocodelexer_de = "pygments_lexer_pseudocode2.lexers.algpseudocode:AlgPseudocodeLexer_DE"
 algpseudocodelexer_fr = "pygments_lexer_pseudocode2.lexers.algpseudocode:AlgPseudocodeLexer_FR"
 
+[project.entry-points."pygments.filters"]
+# The key *is* significant: it is the name the filter will be recognized as.
+tokenreplace = "pygments_lexer_pseudocode2.filters:TokenReplaceFilter"
+errortogenericerror = "pygments_lexer_pseudocode2.filters:ErrorToGenericErrorTokenFilter"
+
 [tool.setuptools]
 packages = [
     "pygments_lexer_pseudocode2",
-    "pygments_lexer_pseudocode2.lexers",    
+    "pygments_lexer_pseudocode2.lexers",
+    "pygments_lexer_pseudocode2.filters",
 ]
 platforms = ["any"]
 zip-safe = true
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_filter.py	Fri May 08 23:54:06 2026 +0200
@@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+# :-
+# SPDX-FileCopyrightText: © 2026 Franz Glasner
+# SPDX-License-Identifier: MIT
+# :-
+
+from _tsetup import ALGLEXERFILENAME, ALGLEXERCLASS
+
+import unittest
+
+import pygments
+import pygments.lexers
+
+import _testhelper
+import pygments_lexer_pseudocode2.filters
+
+
+class TestFilters(unittest.TestCase, _testhelper.TokenAssertHelper):
+
+    def test_prohibit_raiseonerror_filter(self):
+        lexer = pygments.lexers.load_lexer_from_file(
+            ALGLEXERFILENAME, ALGLEXERCLASS,
+            prohibit_raiseonerror_filter=True)
+        lexer.add_filter("raiseonerror")
+        self.assertFalse(lexer.filters)
+
+    def test_sphinx_default(self):
+        lexer = pygments.lexers.load_lexer_from_file(
+            ALGLEXERFILENAME, ALGLEXERCLASS)
+        lexer.add_filter("raiseonerror")
+        self.assertTrue(lexer.filters)
+
+    def test_error(self):
+        lexer = pygments.lexers.load_lexer_from_file(
+            ALGLEXERFILENAME, ALGLEXERCLASS)
+        self.assertTokenStreamEqual(
+            [("Error", "\\"),
+             ],
+            pygments.lex(r"\EXPR{", lexer))
+
+    def test_error_to_genericerror_filter(self):
+        #
+        # Simulate the call sequence as done in Sphinx.
+        # Cannot use the filter name here because tests are run without
+        # the package to be "installed".
+        #
+        filter = (pygments_lexer_pseudocode2.filters
+                  .ErrorToGenericErrorTokenFilter())
+        lexer = pygments.lexers.load_lexer_from_file(
+            ALGLEXERFILENAME, ALGLEXERCLASS,
+            filters=(filter,))
+        lexer.add_filter("raiseonerror")
+        self.assertEqual(2, len(lexer.filters))
+        self.assertTokenStreamEqual(
+            [("Generic.Error", "\\"),
+             ],
+            pygments.lex(r"\EXPR{", lexer))
+
+
+if __name__ == "__main__":
+    unittest.main()