comparison pygments_lexer_pseudocode2/algpseudocode.py @ 87:d8ca835c74ea

FIX: Erroneous parsing of \tt-XXX and \ttx-XXX: Need to restrict the tokentype group because otherwise it would match too much if some sort of braces are mixed on a single line.
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 02 May 2026 10:07:59 +0200
parents ae5e741d2a9b
children 7414eed7b275
comparison
equal deleted inserted replaced
86:0c05dc09c9e2 87:d8ca835c74ea
14 14
15 import re 15 import re
16 16
17 import pygments.util 17 import pygments.util
18 from pygments.lexer import bygroups, include, words 18 from pygments.lexer import bygroups, include, words
19 from pygments.token import (Comment, Keyword, Name, Operator, Punctuation, 19 from pygments.token import (Comment, Generic, Keyword, Name, Operator,
20 Text, Whitespace) 20 Punctuation, Text, Whitespace)
21 21
22 # 22 #
23 # Relative imports do not work with pygments.lexers.load_lexer_from_file() 23 # Relative imports do not work with pygments.lexers.load_lexer_from_file()
24 # in all of our supported Python releases. 24 # in all of our supported Python releases.
25 # 25 #
153 153
154 return _op_symbol 154 return _op_symbol
155 155
156 def op_explicit_tokentype(lexer, match, ctx=None): 156 def op_explicit_tokentype(lexer, match, ctx=None):
157 needed_css = match.group("type") 157 needed_css = match.group("type")
158 toktype = REVERSED_STANDARD_TYPES.get(needed_css, Text) 158 toktype = REVERSED_STANDARD_TYPES.get(needed_css, None)
159 yield match.start(), toktype, match.group("character") 159 if toktype is None:
160 # Be more error friendly
161 toktype = Generic.Error
162 val = match.group()
163 else:
164 val = match.group("character")
165 yield match.start(), toktype, val
160 if ctx: 166 if ctx:
161 ctx.pos = match.end() 167 ctx.pos = match.end()
162 168
163 tokens = { 169 tokens = {
164 "root": [ 170 "root": [
325 ], 331 ],
326 "explicit-tokentype": [ 332 "explicit-tokentype": [
327 # All these REs are CASE-SENSITIVE! 333 # All these REs are CASE-SENSITIVE!
328 334
329 # Multiple characters possible, but no escaping! 335 # Multiple characters possible, but no escaping!
330 (r"\\ttx\-(?P<type>[^/:|=*+!\$~]+)(?P<sep>[/:|=*+!\$~])" 336 (r"\\ttx\-(?P<type>[a-zA-Z0-9_-]+?)(?P<sep>[/:|=*+!\$~])"
331 r"(?P<character>(.|\n)+?)(?P=sep)", 337 r"(?P<character>(.|\n)+?)(?P=sep)",
332 op_explicit_tokentype), 338 op_explicit_tokentype),
333 (r"\\ttx\-(?P<type>[^{]+)\{" 339 (r"\\ttx\-(?P<type>[a-zA-Z0-9_-]+?)\{(?P<character>[^}]+?)\}",
334 r"(?P<character>[^}]+?)\}", 340 op_explicit_tokentype),
335 op_explicit_tokentype), 341 (r"\\ttx\-(?P<type>[a-zA-Z0-9_-]+?)\((?P<character>[^)]+?)\)",
336 (r"\\ttx\-(?P<type>[^\(]+)\(" 342 op_explicit_tokentype),
337 r"(?P<character>[^\)]+?)\)", 343 (r"\\ttx\-(?P<type>[a-zA-Z0-9_-]+?)<(?P<character>[^>]+?)>",
338 op_explicit_tokentype), 344 op_explicit_tokentype),
339 (r"\\ttx\-(?P<type>[^<]+)<" 345 (r"\\ttx\-(?P<type>[a-zA-Z0-9_-]+?)\[(?P<character>[^\]]+?)\]",
340 r"(?P<character>[^>]+?)>",
341 op_explicit_tokentype), 346 op_explicit_tokentype),
342 347
343 # Every character is possible: no escaping needed! 348 # Every character is possible: no escaping needed!
344 (r"\\tt-(?P<type>[^/]+)/(?P<character>(?:.|\n))", 349 (r"\\tt-(?P<type>[^/]+?)/(?P<character>(?:.|\n))",
345 op_explicit_tokentype), 350 op_explicit_tokentype),
346 ], 351 ],
347 } 352 }
348 353
349 def __init__(self, **options): 354 def __init__(self, **options):