changeset 288:298841bc4dee

Allow "normal" Pygments token names in "\ttX" ("Error", "Text.Whitespace", ...)
author Franz Glasner <fzglas.hg@dom66.de>
date Fri, 22 May 2026 12:32:38 +0200
parents f506d752e801
children 6fc7f9c1d89d
files docs/lexer-algpseudocode.rst pygments_lexer_pseudocode2/lexers/algpseudocode.py pygments_lexer_pseudocode2/utils.py tests/test_algpseudo.py
diffstat 4 files changed, 119 insertions(+), 25 deletions(-) [+]
line wrap: on
line diff
--- a/docs/lexer-algpseudocode.rst	Fri May 22 12:31:09 2026 +0200
+++ b/docs/lexer-algpseudocode.rst	Fri May 22 12:32:38 2026 +0200
@@ -484,10 +484,17 @@
 
 This command has two required parameters:
 
-#. The content of the first argument `ARG1` must be a `value` in the
-   :py:data:`pygments.token.STANDARD_TYPES` dict.
-   Its corresponding token type (the associated `key` in this dictionary)
-   will be used as token type for the token.
+#. The content of the first argument `ARG1` can be one of
+
+   - A `value` in the :py:data:`pygments.token.STANDARD_TYPES` dict.
+
+     Its corresponding token type (the associated `key` in this
+     dictionary) will be used as token type for the token.
+
+   - A string representation of an existing token type without the
+     ``Token.`` prefix
+     (e.g. ``String``, ``Generic``, ``Generic.EmphStrong``, ``Text``,
+     ``Text.Multiline``).
 
    If a corresponding token type is not found the lexer's behaviour depends
    on the lexer option ``strict_tokentype`` (see `Lexer Options`_):
@@ -498,9 +505,10 @@
 
      If ``False`` then the `Pygments`_ function
      :py:func:`pygments.token.string_to_tokentype` will be called.
-     This function yields either an existing token type or synthesizes
+     This function returns either an existing token type or synthesizes
      a new one on the fly.
-     The associated highlighting in the output may not be well defined.
+     The associated highlighting with freshly created token types in the
+     output may not be well defined.
 
    For this argument escaping is neither needed nor supported.
 
@@ -509,12 +517,16 @@
 
    Standard `Escaping Rules`_ apply to this argument!
 
-Examples:
+.. rubric:: Examples:
 
 .. code-block:: algpseudocode
 
-   \text{• \\ttX{kc\}{C\}}      \ttX{kc}{C}         \rem C as Keyword.Constant
-   \text{• \\ttX{ow\}{∈\}}      \ttX{ow}{∈}         \rem ∈ as Operator.Word
+   \text{• \\ttX{\}{token\}}                 \ttX{}{token}     \rem just a base "Token"
+
+   \text{• \\ttX{kc\}{C\}}                   \ttX{kc}{C}         \rem C as Keyword.Constant
+   \text{• \\ttX{Keyword.Constant\}{C\}}     \ttX{kc}{C}         \rem C as Keyword.Constant
+   \text{• \\ttX{ow\}{∈\}}                   \ttX{ow}{∈}         \rem ∈ as Operator.Word
+   \text{• \\ttX{Operator.Word\}{∈\}}        \ttX{ow}{∈}         \rem ∈ as Operator.Word
    \text{• \\ttX{kc\}{A Constant Keyword\}}  \ttX{kc}{A Constant Keyword}  \rem An explicit Keyword.Constant
    \text{• \\ttX{nv\}{A Variable Name\}}     \ttX{nv}{A Variable Name}     \rem An explicit Name.Variable
    \text{• \\ttX{ni\}{An Entity*Name\}}      \ttX{ni}{An Entity*Name}      \rem An explicit Name.Entity
@@ -524,21 +536,24 @@
       * The line below has ∈_∌ as (peculiar) function name.
       * Their params are automatic (i.e. a normal expression).
       */
-   \text{• \\ttX{nf\}{∈_∌\}(p1, p2)}             \ttX{nf}{∈_∌}(p1, p2)
+   \text{• \\ttX{nf\}{∈_∌\}(p1, p2)}               \ttX{nf}{∈_∌}(p1, p2)
+   \text{• \\ttX{Name.Function\}{∈_∌\}(p1, p2)}    \ttX{Name.Function}{∈_∌}(p1, p2)
      /*
       * The line below has ∈_∌ as (peculiar) decorator name (as used in Python).
       * Their params are automatic (i.e. a normal expression).
       */
-   \text{• \\ttX{nd\}{∈_∌\}(p1, p2)}             \ttX{nd}{∈_∌}(p1, p2)
+   \text{• \\ttX{nd\}{∈_∌\}(p1, p2)}               \ttX{nd}{∈_∌}(p1, p2)
+   \text{• \\ttX{Name.Decorator\}{∈_∌\}(p1, p2)}   \ttX{Name.Decorator}{∈_∌}(p1, p2)
      /*
       * Normal emphasis ("strong")
       */
-   \text{• \\ttX{gs\}{this is strong\}}          \ttX{gs}{this is strong}
+   \text{• \\ttX{gs\}{this is strong\}}                       \ttX{gs}{this is strong}
+   \text{• \\ttX{Generic.Strong\}{this is strong\}}           \ttX{Generic.Strong}{this is strong}
      /*
       * A strong emphasis.
-      * Note that the backslash is a valid delimiter!
       */
-   \text{• \\ttX{ges\}{A Strong Emphasis!\}}     \ttX{ges}{A Strong Emphasis!}
+   \text{• \\ttX{ges\}{A Strong Emphasis!\}}                  \ttX{ges}{A Strong Emphasis!}
+   \text{• \\ttX{Generic.EmphStrong\}{A Strong Emphasis!\}}   \ttX{Generic.EmphStrong}{A Strong Emphasis!}
      /*
       * Escaping is allowed and needed for the closing brace!
       * The example token type is a "String".
@@ -550,15 +565,14 @@
       * token and no expansion.
       * See also `Lexer Options` and `strict_tokentype`.
       */
-   \text{• \\ttX{NON-EXISTING\}{∈_∌\}(p1, p2)}   \ttX{NON_EXISTING}{∈_∌}(p1, p2)
+   \text{• \\ttX{NON-EXISTING\}{∈_∌\}(p1, p2)}     \ttX{NON-EXISTING}{∈_∌}(p1, p2)
 
 An example with a lexer and ``strict_tokentype=False``
 (highlighting obviously is like standard text with the templates used):
 
 .. code-block:: nonstrict-algpseudocode
 
-   \text{• \\ttX{Generic.Not.Yet.Existing\}{∈_∌\}(p1, p2)}      \ttX{Generic.Not.Yet.Existing}{∈_∌}(p1, p2)
-
+   \text{• \\ttX{Generic.Not.Yet.Existing\}{∈_∌\}(p1, p2)}     \ttX{Generic.Not.Yet.Existing}{∈_∌}(p1, p2)
 
 
 Old Syntax (Deprecated)
--- a/pygments_lexer_pseudocode2/lexers/algpseudocode.py	Fri May 22 12:31:09 2026 +0200
+++ b/pygments_lexer_pseudocode2/lexers/algpseudocode.py	Fri May 22 12:32:38 2026 +0200
@@ -26,7 +26,8 @@
 # in all of our supported Python releases.
 #
 from pygments_lexer_pseudocode2.lexers.bases import LexBase
-from pygments_lexer_pseudocode2.utils import REVERSED_STANDARD_TYPES
+from pygments_lexer_pseudocode2.utils import (
+    REVERSED_STANDARD_TYPES, string_to_defined_tokentype)
 from pygments_lexer_pseudocode2 import uniprops
 
 #
@@ -240,17 +241,20 @@
         needed_css = match.group("type")
         toktype = REVERSED_STANDARD_TYPES.get(needed_css, None)
         if toktype is None:
-            if not lexer.strict_tokentype:
+            if lexer.strict_tokentype:
+                toktype = string_to_defined_tokentype(needed_css)
+                if toktype is None:
+                    _logger.warning(
+                        "Unhandled explicit token type: %s", needed_css)
+            else:
                 toktype = string_to_tokentype(needed_css)
                 if toktype is None:
                     _logger.warning(
                         "Unhandled explicit token type: %s", needed_css)
                 else:
                     _logger.debug(
-                        "Synthesized new token type: %s", needed_css)
-            else:
-                _logger.warning(
-                    "Unhandled explicit token type: %s", needed_css)
+                        "New token type may have been synthesized: %s",
+                        needed_css)
         if toktype is None:
             # Be more error friendly
             ctx.explicit_token_type = Generic.Error
@@ -532,7 +536,7 @@
             #
             # New extended (more flexible, allows escaping)
             #
-            (r"""\\ttX[ \t]*\{(?P<type>[^}]+)\}[ \t]*\{""",
+            (r"""\\ttX[ \t]*\{(?P<type>[^}]*)\}[ \t]*\{""",
              op_explicit_tokentype_ex_start,
              "extended-explicit-tokentype"),
 
--- a/pygments_lexer_pseudocode2/utils.py	Fri May 22 12:31:09 2026 +0200
+++ b/pygments_lexer_pseudocode2/utils.py	Fri May 22 12:32:38 2026 +0200
@@ -7,7 +7,10 @@
 
 """
 
-__all__ = ["REVERSED_STANDARD_TYPES"]
+__all__ = [
+    "REVERSED_STANDARD_TYPES",
+    "string_to_defined_tokentype",
+]
 
 
 import pygments.token
@@ -19,3 +22,39 @@
 for _toktype, _cssstyle in pygments.token.STANDARD_TYPES.items():
     REVERSED_STANDARD_TYPES[_cssstyle] = _toktype
 del _toktype, _cssstyle
+
+
+def string_to_defined_tokentype(s):
+    """Determine whether the token type `s` given as string is defined.
+
+    :param str s: A token type string as in
+                 :py:func:`pygments.token.string_to_tokentype`.
+    :returns: An **existing** token if
+              :py:func:`pygments.token.string_to_tokentype`
+              would return an already existing token type,
+              :py:obj:`None` otherwise.
+    :rtype: :py:class:`pygments.token._TokenType` or :py:obj:`None`
+
+    This implementation is needed because
+    :py:func:`pygments.token.string_to_tokentype` synthesizes a new token
+    on not yet existing token types.
+    And :py:func:`is_token_subtype` works only on token instances.
+
+    """
+    ttype = pygments.token.Token
+    ttype_prefix = "Token."
+    if not s:
+        return ttype
+    for part in s.split("."):
+        for subtype in ttype.subtypes:
+            subtypename = str(subtype)
+            # Remove prefix
+            assert subtypename.startswith(ttype_prefix)
+            subtypename = subtypename[len(ttype_prefix):]
+            if subtypename == part:
+                ttype = subtype
+                ttype_prefix += "%s." % (part,)
+                break
+        else:
+            return None
+    return ttype
--- a/tests/test_algpseudo.py	Fri May 22 12:31:09 2026 +0200
+++ b/tests/test_algpseudo.py	Fri May 22 12:32:38 2026 +0200
@@ -13,6 +13,8 @@
 import pygments.formatters
 from pygments.token import Token
 
+from pygments_lexer_pseudocode2.utils import string_to_defined_tokentype
+
 import _testhelper
 
 
@@ -1243,5 +1245,40 @@
         self.assertTrue(highlighted.startswith(r"""\begin{Verbatim}"""))
 
 
+class TokenBehaviour(unittest.TestCase):
+
+    def test_string_to_defined_tokentype(self):
+        t = string_to_defined_tokentype("Generic")
+        self.assertIs(t, Token.Generic)
+
+    def test_string_to_defined_tokentype_2(self):
+        t = string_to_defined_tokentype("Generic.Error")
+        self.assertIs(t, Token.Generic.Error)
+
+    def test_string_to_defined_tokentype_empty(self):
+        t = string_to_defined_tokentype("")
+        self.assertIs(t, Token)
+
+    def test_string_to_undefined_tokentype(self):
+        self.assertIsNone(string_to_defined_tokentype("non-existing"))
+        # twice to assert that it it not created by the call
+        self.assertIsNone(string_to_defined_tokentype("non-existing"))
+
+    def test_string_to_undefined_tokentype_2(self):
+        self.assertIsNone(string_to_defined_tokentype("Generic.non-existing"))
+        # twice to assert that it it not created by the call
+        self.assertIsNone(string_to_defined_tokentype("Generic.non-existing"))
+
+    def test_string_to_undefined_tokentype_3(self):
+        self.assertIsNone(string_to_defined_tokentype("Not.Yet.Existing"))
+        # twice to assert that it it not created by the call
+        self.assertIsNone(string_to_defined_tokentype("Not.Yet.Existing"))
+
+    def test_string_to_undefined_tokentype_4(self):
+        self.assertIsNone(string_to_defined_tokentype("Generic..Error"))
+        # twice to assert that it it not created by the call
+        self.assertIsNone(string_to_defined_tokentype("Generic..Error"))
+
+
 if __name__ == "__main__":
     unittest.main()