changeset 286:051c8877ee22

Implement lexer option "strict_tokentype". It allows the \ttX command to synthesize not yet existing token types.
author Franz Glasner <fzglas.hg@dom66.de>
date Thu, 21 May 2026 09:32:35 +0200
parents afbca50b7dc1
children f506d752e801
files docs/conf.py docs/lexer-algpseudocode.rst pygments_lexer_pseudocode2/lexers/algpseudocode.py tests/test_algpseudo.py
diffstat 4 files changed, 84 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/docs/conf.py	Wed May 20 20:35:37 2026 +0200
+++ b/docs/conf.py	Thu May 21 09:32:35 2026 +0200
@@ -230,6 +230,10 @@
     app.add_lexer("no-raiseonerror-algpseudocode",
                   functools.partial(AlgPseudocodeLexer,
                                     prohibit_raiseonerror_filter=True))
+    # Example for \ttX{Generic.Not.Yet.Existing}{...}
+    app.add_lexer("nonstrict-algpseudocode",
+                  functools.partial(AlgPseudocodeLexer,
+                                    strict_tokentype=False))
     #
     # To test with the custom filter that maps
     # Token.Error to Token.Generic.Error
--- a/docs/lexer-algpseudocode.rst	Wed May 20 20:35:37 2026 +0200
+++ b/docs/lexer-algpseudocode.rst	Thu May 21 09:32:35 2026 +0200
@@ -123,6 +123,19 @@
      If :py:obj:`True` all the ``\ENDxxx`` commands will be skipped and yield
      no output.
 
+  .. describe:: strict_tokentype
+
+     **Type::** :py:class:`bool`
+
+     **Default:** :py:obj:`True`
+
+     Control whether `Explicit Token Types`_ yield
+     :py:class:`pygments.token.Token.Generic.Error` tokens
+     (when ``True``, this is the default)
+     or a token type that is synthesized on the fly by
+     :py:func:`pygments.token.string_to_tokentype`
+     (when ``False``).
+
   .. describe:: gets
 
      **Type:** :py:class:`str` or :py:obj:`None`
@@ -476,6 +489,21 @@
    Its corresponding token type (the associated `key` in this dictionary)
    will be used as token type for the token.
 
+   If a corresponding token type is not found the lexer's behaviour depends
+   on the lexer option ``strict_tokentype`` (see `Lexer Options`_):
+
+     If ``True`` (the default) the command yields a
+     :py:class:`pygments.token.Token.Generic.Error` token type for the
+     given command's content.
+
+     If ``False`` then the `Pygments`_ function
+     :py:func:`pygments.token.string_to_tokentype` will be called.
+     This function yields either an existing token type or synthesizes
+     a new one on the fly.
+     The associated highlighting in the output may not be well defined.
+
+   For this argument escaping is neither needed nor supported.
+
 #. The content of the second argument will given the token type of
    the first parameter.
 
@@ -517,11 +545,20 @@
       */
    \text{• \\ttX{s\}{Escaping brace \\\} and backslash \\\\!\}}   \ttX{s}{Escaping brace \} and backslash \\!}
      /*
-      * This is a non-existing token type: you get some generic error markup
-      * with a Generic.Error token and no expansion.
+      * This is a non-existing token type:
+      * by default you get some generic error markup with a Generic.Error
+      * token and no expansion.
+      * See also `Lexer Options` and `strict_tokentype`.
       */
    \text{• \\ttX{NON-EXISTING\}{∈_∌\}(p1, p2)}   \ttX{NON_EXISTING}{∈_∌}(p1, p2)
 
+An example with a lexer and ``strict_tokentype=False``
+(highlighting obviously is like standard text with the templates used):
+
+.. code-block:: nonstrict-algpseudocode
+
+   \text{• \\ttX{Generic.Not.Yet.Existing\}{∈_∌\}(p1, p2)}      \ttX{Generic.Not.Yet.Existing}{∈_∌}(p1, p2)
+
 
 
 Old Syntax (Deprecated)
--- a/pygments_lexer_pseudocode2/lexers/algpseudocode.py	Wed May 20 20:35:37 2026 +0200
+++ b/pygments_lexer_pseudocode2/lexers/algpseudocode.py	Thu May 21 09:32:35 2026 +0200
@@ -18,7 +18,8 @@
 import pygments.util
 from pygments.lexer import bygroups, include, words
 from pygments.token import (Comment, Error, Generic, Keyword, Name, Operator,
-                            Punctuation, Text, Whitespace)
+                            Punctuation, Text, Whitespace,
+                            string_to_tokentype)
 
 #
 # Relative imports do not work with pygments.lexers.load_lexer_from_file()
@@ -237,12 +238,26 @@
 
     def op_explicit_tokentype_ex_start(lexer, match, ctx):
         needed_css = match.group("type")
-        ctx.explicit_token_type = REVERSED_STANDARD_TYPES.get(needed_css, None)
-        if ctx.explicit_token_type is None:
+        toktype = REVERSED_STANDARD_TYPES.get(needed_css, None)
+        if toktype is None:
+            if not lexer.strict_tokentype:
+                toktype = string_to_tokentype(needed_css)
+                if toktype is None:
+                    _logger.warning(
+                        "Unhandled explicit token type: %s", needed_css)
+                else:
+                    _logger.debug(
+                        "Synthesized new token type: %s", needed_css)
+            else:
+                _logger.warning(
+                    "Unhandled explicit token type: %s", needed_css)
+        if toktype is None:
             # Be more error friendly
             ctx.explicit_token_type = Generic.Error
-            _logger.warning("Unhandled explicit token type: %s", match.group())
             yield match.start(), ctx.explicit_token_type, match.group()
+        else:
+            ctx.explicit_token_type = toktype
+            # Nothing to yield: just record the required token type
         ctx.pos = match.end()
 
     def op_explicit_tokentype_ex_value(lexer, match, ctx):
@@ -555,6 +570,8 @@
     def __init__(self, **options):
         self.no_end = pygments.util.get_bool_opt(
             options, "no_end", default=False)
+        self.strict_tokentype = pygments.util.get_bool_opt(
+            options, "strict_tokentype", default=True)
         self.symbol_gets = options.get("gets", None)
         if self.symbol_gets is None:
             self.symbol_gets = self.SYMBOLS["<-"]   # Default: "⟵"  # U+27F5
--- a/tests/test_algpseudo.py	Wed May 20 20:35:37 2026 +0200
+++ b/tests/test_algpseudo.py	Thu May 21 09:32:35 2026 +0200
@@ -11,6 +11,7 @@
 import pygments
 import pygments.lexers
 import pygments.formatters
+from pygments.token import Token
 
 import _testhelper
 
@@ -18,6 +19,7 @@
 class TestSnippets(unittest.TestCase, _testhelper.TokenAssertHelper):
 
     def setUp(self):
+        # The standard lexer
         self.lexer = pygments.lexers.load_lexer_from_file(
             ALGLEXERFILENAME, ALGLEXERCLASS)
 
@@ -794,7 +796,7 @@
             pygments.lex(
                 """\\ttX{nv}{simple\\part 2}""", self.lexer))
 
-    def test_extended_explicit_tokentype_non_existing_type(self):
+    def test_extended_explicit_tokentype_non_existing_type_strict(self):
         self.assertTokenStreamEqualComplete(
             [("Generic.Error", u"\\ttX{NON_EXISTING}{"),
              ("Generic.Error", u"∈_∌"),
@@ -810,6 +812,23 @@
             pygments.lex(
                 u"\\ttX{NON_EXISTING}{∈_∌}(p1, p2)", self.lexer))
 
+    def test_extended_explicit_tokentype_non_existing_type_lenient(self):
+        lexer = pygments.lexers.load_lexer_from_file(
+            ALGLEXERFILENAME, ALGLEXERCLASS, strict_tokentype=False)
+        self.assertTokenStreamEqualComplete(
+            [(getattr(getattr(getattr(Token, "DOES"), "IT"), "NOT-EXIST"),
+              u"∈_∌"),
+             ("Punctuation", "("),
+             ("Name.Entity", "p1"),
+             ("Punctuation", ","),
+             ("Text", " "),
+             ("Name.Entity", "p2"),
+             ("Punctuation", ")"),
+             ("Text.Whitespace", "\n"),
+             ],
+            pygments.lex(
+                u"\\ttX{DOES.IT.NOT-EXIST}{∈_∌}(p1, p2)", lexer))
+
     def test_just_braces_in_expressions(self):
         self.assertTokenStreamEqualComplete(
             [("Punctuation", "{"),