# HG changeset patch # User Franz Glasner # Date 1779445958 -7200 # Node ID 298841bc4deeb32d560582300a82fb4fcb5b4713 # Parent f506d752e8012b3622bfde974106fa98dfd724c8 Allow "normal" Pygments token names in "\ttX" ("Error", "Text.Whitespace", ...) diff -r f506d752e801 -r 298841bc4dee docs/lexer-algpseudocode.rst --- a/docs/lexer-algpseudocode.rst Fri May 22 12:31:09 2026 +0200 +++ b/docs/lexer-algpseudocode.rst Fri May 22 12:32:38 2026 +0200 @@ -484,10 +484,17 @@ This command has two required parameters: -#. The content of the first argument `ARG1` must be a `value` in the - :py:data:`pygments.token.STANDARD_TYPES` dict. - Its corresponding token type (the associated `key` in this dictionary) - will be used as token type for the token. +#. The content of the first argument `ARG1` can be one of + + - A `value` in the :py:data:`pygments.token.STANDARD_TYPES` dict. + + Its corresponding token type (the associated `key` in this + dictionary) will be used as token type for the token. + + - A string representation of an existing token type without the + ``Token.`` prefix + (e.g. ``String``, ``Generic``, ``Generic.EmphStrong``, ``Text``, + ``Text.Multiline``). If a corresponding token type is not found the lexer's behaviour depends on the lexer option ``strict_tokentype`` (see `Lexer Options`_): @@ -498,9 +505,10 @@ If ``False`` then the `Pygments`_ function :py:func:`pygments.token.string_to_tokentype` will be called. - This function yields either an existing token type or synthesizes + This function returns either an existing token type or synthesizes a new one on the fly. - The associated highlighting in the output may not be well defined. + The associated highlighting with freshly created token types in the + output may not be well defined. For this argument escaping is neither needed nor supported. @@ -509,12 +517,16 @@ Standard `Escaping Rules`_ apply to this argument! -Examples: +.. rubric:: Examples: .. code-block:: algpseudocode - \text{• \\ttX{kc\}{C\}} \ttX{kc}{C} \rem C as Keyword.Constant - \text{• \\ttX{ow\}{∈\}} \ttX{ow}{∈} \rem ∈ as Operator.Word + \text{• \\ttX{\}{token\}} \ttX{}{token} \rem just a base "Token" + + \text{• \\ttX{kc\}{C\}} \ttX{kc}{C} \rem C as Keyword.Constant + \text{• \\ttX{Keyword.Constant\}{C\}} \ttX{kc}{C} \rem C as Keyword.Constant + \text{• \\ttX{ow\}{∈\}} \ttX{ow}{∈} \rem ∈ as Operator.Word + \text{• \\ttX{Operator.Word\}{∈\}} \ttX{ow}{∈} \rem ∈ as Operator.Word \text{• \\ttX{kc\}{A Constant Keyword\}} \ttX{kc}{A Constant Keyword} \rem An explicit Keyword.Constant \text{• \\ttX{nv\}{A Variable Name\}} \ttX{nv}{A Variable Name} \rem An explicit Name.Variable \text{• \\ttX{ni\}{An Entity*Name\}} \ttX{ni}{An Entity*Name} \rem An explicit Name.Entity @@ -524,21 +536,24 @@ * The line below has ∈_∌ as (peculiar) function name. * Their params are automatic (i.e. a normal expression). */ - \text{• \\ttX{nf\}{∈_∌\}(p1, p2)} \ttX{nf}{∈_∌}(p1, p2) + \text{• \\ttX{nf\}{∈_∌\}(p1, p2)} \ttX{nf}{∈_∌}(p1, p2) + \text{• \\ttX{Name.Function\}{∈_∌\}(p1, p2)} \ttX{Name.Function}{∈_∌}(p1, p2) /* * The line below has ∈_∌ as (peculiar) decorator name (as used in Python). * Their params are automatic (i.e. a normal expression). */ - \text{• \\ttX{nd\}{∈_∌\}(p1, p2)} \ttX{nd}{∈_∌}(p1, p2) + \text{• \\ttX{nd\}{∈_∌\}(p1, p2)} \ttX{nd}{∈_∌}(p1, p2) + \text{• \\ttX{Name.Decorator\}{∈_∌\}(p1, p2)} \ttX{Name.Decorator}{∈_∌}(p1, p2) /* * Normal emphasis ("strong") */ - \text{• \\ttX{gs\}{this is strong\}} \ttX{gs}{this is strong} + \text{• \\ttX{gs\}{this is strong\}} \ttX{gs}{this is strong} + \text{• \\ttX{Generic.Strong\}{this is strong\}} \ttX{Generic.Strong}{this is strong} /* * A strong emphasis. - * Note that the backslash is a valid delimiter! */ - \text{• \\ttX{ges\}{A Strong Emphasis!\}} \ttX{ges}{A Strong Emphasis!} + \text{• \\ttX{ges\}{A Strong Emphasis!\}} \ttX{ges}{A Strong Emphasis!} + \text{• \\ttX{Generic.EmphStrong\}{A Strong Emphasis!\}} \ttX{Generic.EmphStrong}{A Strong Emphasis!} /* * Escaping is allowed and needed for the closing brace! * The example token type is a "String". @@ -550,15 +565,14 @@ * token and no expansion. * See also `Lexer Options` and `strict_tokentype`. */ - \text{• \\ttX{NON-EXISTING\}{∈_∌\}(p1, p2)} \ttX{NON_EXISTING}{∈_∌}(p1, p2) + \text{• \\ttX{NON-EXISTING\}{∈_∌\}(p1, p2)} \ttX{NON-EXISTING}{∈_∌}(p1, p2) An example with a lexer and ``strict_tokentype=False`` (highlighting obviously is like standard text with the templates used): .. code-block:: nonstrict-algpseudocode - \text{• \\ttX{Generic.Not.Yet.Existing\}{∈_∌\}(p1, p2)} \ttX{Generic.Not.Yet.Existing}{∈_∌}(p1, p2) - + \text{• \\ttX{Generic.Not.Yet.Existing\}{∈_∌\}(p1, p2)} \ttX{Generic.Not.Yet.Existing}{∈_∌}(p1, p2) Old Syntax (Deprecated) diff -r f506d752e801 -r 298841bc4dee pygments_lexer_pseudocode2/lexers/algpseudocode.py --- a/pygments_lexer_pseudocode2/lexers/algpseudocode.py Fri May 22 12:31:09 2026 +0200 +++ b/pygments_lexer_pseudocode2/lexers/algpseudocode.py Fri May 22 12:32:38 2026 +0200 @@ -26,7 +26,8 @@ # in all of our supported Python releases. # from pygments_lexer_pseudocode2.lexers.bases import LexBase -from pygments_lexer_pseudocode2.utils import REVERSED_STANDARD_TYPES +from pygments_lexer_pseudocode2.utils import ( + REVERSED_STANDARD_TYPES, string_to_defined_tokentype) from pygments_lexer_pseudocode2 import uniprops # @@ -240,17 +241,20 @@ needed_css = match.group("type") toktype = REVERSED_STANDARD_TYPES.get(needed_css, None) if toktype is None: - if not lexer.strict_tokentype: + if lexer.strict_tokentype: + toktype = string_to_defined_tokentype(needed_css) + if toktype is None: + _logger.warning( + "Unhandled explicit token type: %s", needed_css) + else: toktype = string_to_tokentype(needed_css) if toktype is None: _logger.warning( "Unhandled explicit token type: %s", needed_css) else: _logger.debug( - "Synthesized new token type: %s", needed_css) - else: - _logger.warning( - "Unhandled explicit token type: %s", needed_css) + "New token type may have been synthesized: %s", + needed_css) if toktype is None: # Be more error friendly ctx.explicit_token_type = Generic.Error @@ -532,7 +536,7 @@ # # New extended (more flexible, allows escaping) # - (r"""\\ttX[ \t]*\{(?P[^}]+)\}[ \t]*\{""", + (r"""\\ttX[ \t]*\{(?P[^}]*)\}[ \t]*\{""", op_explicit_tokentype_ex_start, "extended-explicit-tokentype"), diff -r f506d752e801 -r 298841bc4dee pygments_lexer_pseudocode2/utils.py --- a/pygments_lexer_pseudocode2/utils.py Fri May 22 12:31:09 2026 +0200 +++ b/pygments_lexer_pseudocode2/utils.py Fri May 22 12:32:38 2026 +0200 @@ -7,7 +7,10 @@ """ -__all__ = ["REVERSED_STANDARD_TYPES"] +__all__ = [ + "REVERSED_STANDARD_TYPES", + "string_to_defined_tokentype", +] import pygments.token @@ -19,3 +22,39 @@ for _toktype, _cssstyle in pygments.token.STANDARD_TYPES.items(): REVERSED_STANDARD_TYPES[_cssstyle] = _toktype del _toktype, _cssstyle + + +def string_to_defined_tokentype(s): + """Determine whether the token type `s` given as string is defined. + + :param str s: A token type string as in + :py:func:`pygments.token.string_to_tokentype`. + :returns: An **existing** token if + :py:func:`pygments.token.string_to_tokentype` + would return an already existing token type, + :py:obj:`None` otherwise. + :rtype: :py:class:`pygments.token._TokenType` or :py:obj:`None` + + This implementation is needed because + :py:func:`pygments.token.string_to_tokentype` synthesizes a new token + on not yet existing token types. + And :py:func:`is_token_subtype` works only on token instances. + + """ + ttype = pygments.token.Token + ttype_prefix = "Token." + if not s: + return ttype + for part in s.split("."): + for subtype in ttype.subtypes: + subtypename = str(subtype) + # Remove prefix + assert subtypename.startswith(ttype_prefix) + subtypename = subtypename[len(ttype_prefix):] + if subtypename == part: + ttype = subtype + ttype_prefix += "%s." % (part,) + break + else: + return None + return ttype diff -r f506d752e801 -r 298841bc4dee tests/test_algpseudo.py --- a/tests/test_algpseudo.py Fri May 22 12:31:09 2026 +0200 +++ b/tests/test_algpseudo.py Fri May 22 12:32:38 2026 +0200 @@ -13,6 +13,8 @@ import pygments.formatters from pygments.token import Token +from pygments_lexer_pseudocode2.utils import string_to_defined_tokentype + import _testhelper @@ -1243,5 +1245,40 @@ self.assertTrue(highlighted.startswith(r"""\begin{Verbatim}""")) +class TokenBehaviour(unittest.TestCase): + + def test_string_to_defined_tokentype(self): + t = string_to_defined_tokentype("Generic") + self.assertIs(t, Token.Generic) + + def test_string_to_defined_tokentype_2(self): + t = string_to_defined_tokentype("Generic.Error") + self.assertIs(t, Token.Generic.Error) + + def test_string_to_defined_tokentype_empty(self): + t = string_to_defined_tokentype("") + self.assertIs(t, Token) + + def test_string_to_undefined_tokentype(self): + self.assertIsNone(string_to_defined_tokentype("non-existing")) + # twice to assert that it it not created by the call + self.assertIsNone(string_to_defined_tokentype("non-existing")) + + def test_string_to_undefined_tokentype_2(self): + self.assertIsNone(string_to_defined_tokentype("Generic.non-existing")) + # twice to assert that it it not created by the call + self.assertIsNone(string_to_defined_tokentype("Generic.non-existing")) + + def test_string_to_undefined_tokentype_3(self): + self.assertIsNone(string_to_defined_tokentype("Not.Yet.Existing")) + # twice to assert that it it not created by the call + self.assertIsNone(string_to_defined_tokentype("Not.Yet.Existing")) + + def test_string_to_undefined_tokentype_4(self): + self.assertIsNone(string_to_defined_tokentype("Generic..Error")) + # twice to assert that it it not created by the call + self.assertIsNone(string_to_defined_tokentype("Generic..Error")) + + if __name__ == "__main__": unittest.main()