# HG changeset patch # User Franz Glasner # Date 1779348755 -7200 # Node ID 051c8877ee220b8b7b3f3d0c011fa9ad975e4761 # Parent afbca50b7dc1205828f26dc6536d0910f622c8b1 Implement lexer option "strict_tokentype". It allows the \ttX command to synthesize not yet existing token types. diff -r afbca50b7dc1 -r 051c8877ee22 docs/conf.py --- a/docs/conf.py Wed May 20 20:35:37 2026 +0200 +++ b/docs/conf.py Thu May 21 09:32:35 2026 +0200 @@ -230,6 +230,10 @@ app.add_lexer("no-raiseonerror-algpseudocode", functools.partial(AlgPseudocodeLexer, prohibit_raiseonerror_filter=True)) + # Example for \ttX{Generic.Not.Yet.Existing}{...} + app.add_lexer("nonstrict-algpseudocode", + functools.partial(AlgPseudocodeLexer, + strict_tokentype=False)) # # To test with the custom filter that maps # Token.Error to Token.Generic.Error diff -r afbca50b7dc1 -r 051c8877ee22 docs/lexer-algpseudocode.rst --- a/docs/lexer-algpseudocode.rst Wed May 20 20:35:37 2026 +0200 +++ b/docs/lexer-algpseudocode.rst Thu May 21 09:32:35 2026 +0200 @@ -123,6 +123,19 @@ If :py:obj:`True` all the ``\ENDxxx`` commands will be skipped and yield no output. + .. describe:: strict_tokentype + + **Type::** :py:class:`bool` + + **Default:** :py:obj:`True` + + Control whether `Explicit Token Types`_ yield + :py:class:`pygments.token.Token.Generic.Error` tokens + (when ``True``, this is the default) + or a token type that is synthesized on the fly by + :py:func:`pygments.token.string_to_tokentype` + (when ``False``). + .. describe:: gets **Type:** :py:class:`str` or :py:obj:`None` @@ -476,6 +489,21 @@ Its corresponding token type (the associated `key` in this dictionary) will be used as token type for the token. + If a corresponding token type is not found the lexer's behaviour depends + on the lexer option ``strict_tokentype`` (see `Lexer Options`_): + + If ``True`` (the default) the command yields a + :py:class:`pygments.token.Token.Generic.Error` token type for the + given command's content. + + If ``False`` then the `Pygments`_ function + :py:func:`pygments.token.string_to_tokentype` will be called. + This function yields either an existing token type or synthesizes + a new one on the fly. + The associated highlighting in the output may not be well defined. + + For this argument escaping is neither needed nor supported. + #. The content of the second argument will given the token type of the first parameter. @@ -517,11 +545,20 @@ */ \text{• \\ttX{s\}{Escaping brace \\\} and backslash \\\\!\}} \ttX{s}{Escaping brace \} and backslash \\!} /* - * This is a non-existing token type: you get some generic error markup - * with a Generic.Error token and no expansion. + * This is a non-existing token type: + * by default you get some generic error markup with a Generic.Error + * token and no expansion. + * See also `Lexer Options` and `strict_tokentype`. */ \text{• \\ttX{NON-EXISTING\}{∈_∌\}(p1, p2)} \ttX{NON_EXISTING}{∈_∌}(p1, p2) +An example with a lexer and ``strict_tokentype=False`` +(highlighting obviously is like standard text with the templates used): + +.. code-block:: nonstrict-algpseudocode + + \text{• \\ttX{Generic.Not.Yet.Existing\}{∈_∌\}(p1, p2)} \ttX{Generic.Not.Yet.Existing}{∈_∌}(p1, p2) + Old Syntax (Deprecated) diff -r afbca50b7dc1 -r 051c8877ee22 pygments_lexer_pseudocode2/lexers/algpseudocode.py --- a/pygments_lexer_pseudocode2/lexers/algpseudocode.py Wed May 20 20:35:37 2026 +0200 +++ b/pygments_lexer_pseudocode2/lexers/algpseudocode.py Thu May 21 09:32:35 2026 +0200 @@ -18,7 +18,8 @@ import pygments.util from pygments.lexer import bygroups, include, words from pygments.token import (Comment, Error, Generic, Keyword, Name, Operator, - Punctuation, Text, Whitespace) + Punctuation, Text, Whitespace, + string_to_tokentype) # # Relative imports do not work with pygments.lexers.load_lexer_from_file() @@ -237,12 +238,26 @@ def op_explicit_tokentype_ex_start(lexer, match, ctx): needed_css = match.group("type") - ctx.explicit_token_type = REVERSED_STANDARD_TYPES.get(needed_css, None) - if ctx.explicit_token_type is None: + toktype = REVERSED_STANDARD_TYPES.get(needed_css, None) + if toktype is None: + if not lexer.strict_tokentype: + toktype = string_to_tokentype(needed_css) + if toktype is None: + _logger.warning( + "Unhandled explicit token type: %s", needed_css) + else: + _logger.debug( + "Synthesized new token type: %s", needed_css) + else: + _logger.warning( + "Unhandled explicit token type: %s", needed_css) + if toktype is None: # Be more error friendly ctx.explicit_token_type = Generic.Error - _logger.warning("Unhandled explicit token type: %s", match.group()) yield match.start(), ctx.explicit_token_type, match.group() + else: + ctx.explicit_token_type = toktype + # Nothing to yield: just record the required token type ctx.pos = match.end() def op_explicit_tokentype_ex_value(lexer, match, ctx): @@ -555,6 +570,8 @@ def __init__(self, **options): self.no_end = pygments.util.get_bool_opt( options, "no_end", default=False) + self.strict_tokentype = pygments.util.get_bool_opt( + options, "strict_tokentype", default=True) self.symbol_gets = options.get("gets", None) if self.symbol_gets is None: self.symbol_gets = self.SYMBOLS["<-"] # Default: "⟵" # U+27F5 diff -r afbca50b7dc1 -r 051c8877ee22 tests/test_algpseudo.py --- a/tests/test_algpseudo.py Wed May 20 20:35:37 2026 +0200 +++ b/tests/test_algpseudo.py Thu May 21 09:32:35 2026 +0200 @@ -11,6 +11,7 @@ import pygments import pygments.lexers import pygments.formatters +from pygments.token import Token import _testhelper @@ -18,6 +19,7 @@ class TestSnippets(unittest.TestCase, _testhelper.TokenAssertHelper): def setUp(self): + # The standard lexer self.lexer = pygments.lexers.load_lexer_from_file( ALGLEXERFILENAME, ALGLEXERCLASS) @@ -794,7 +796,7 @@ pygments.lex( """\\ttX{nv}{simple\\part 2}""", self.lexer)) - def test_extended_explicit_tokentype_non_existing_type(self): + def test_extended_explicit_tokentype_non_existing_type_strict(self): self.assertTokenStreamEqualComplete( [("Generic.Error", u"\\ttX{NON_EXISTING}{"), ("Generic.Error", u"∈_∌"), @@ -810,6 +812,23 @@ pygments.lex( u"\\ttX{NON_EXISTING}{∈_∌}(p1, p2)", self.lexer)) + def test_extended_explicit_tokentype_non_existing_type_lenient(self): + lexer = pygments.lexers.load_lexer_from_file( + ALGLEXERFILENAME, ALGLEXERCLASS, strict_tokentype=False) + self.assertTokenStreamEqualComplete( + [(getattr(getattr(getattr(Token, "DOES"), "IT"), "NOT-EXIST"), + u"∈_∌"), + ("Punctuation", "("), + ("Name.Entity", "p1"), + ("Punctuation", ","), + ("Text", " "), + ("Name.Entity", "p2"), + ("Punctuation", ")"), + ("Text.Whitespace", "\n"), + ], + pygments.lex( + u"\\ttX{DOES.IT.NOT-EXIST}{∈_∌}(p1, p2)", lexer)) + def test_just_braces_in_expressions(self): self.assertTokenStreamEqualComplete( [("Punctuation", "{"),