Mercurial > hgrepos > Python > libs > pygments-lexer-pseudocode2
view pygments_lexer_pseudocode2/bases.py @ 160:b4028838e0c8
Implement lexer option "prohibit_raiseonerror_filter".
Sphinx raises by default when an Error token is seen (by means of the
"raiseonerror" filter that is applied by default to lexers in Sphinx).
This option skips this and allows error locations to be seen and highlighted
properly.
While there convert most Generic.Error tokens to Error tokens because now
they can be handled by a lexer with "prohibit_raiseonerror_filter=True".
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Fri, 08 May 2026 17:46:28 +0200 |
| parents | e1663ac707b0 |
| children |
line wrap: on
line source
# -*- coding: utf-8 -*- # :- # SPDX-FileCopyrightText: © 2026 Franz Glasner # SPDX-License-Identifier: MIT # :- r"""Some common bases for the lexers.""" __all__ = ["LexBase", "uni_name", "py_innerstring_rules", "py_name_rules"] import sys from pygments import unistring from pygments.lexer import RegexLexer, combined, bygroups, include from pygments.token import (Comment, Error, Name, Number, Other, String) PY2 = sys.version_info[0] <= 2 # # SPDX-SnippetBegin # SPDX-License-Identifier: BSD-2-Clause # SPDX-SnippetCopyrightText: Copyright 2006-2023 by the Pygments team # SPDX-SnippetCopyrightText: Copyright 2026 by Franz Glasner # uni_name = "[%s][%s]*" % (unistring.xid_start, unistring.xid_continue) """PY3 allows no @staticmethod but PY2 needs it.""" if PY2: _staticmethod = staticmethod else: def _staticmethod(fn): return fn def py_innerstring_rules(ttype): return [ # the old style '%s' % (...) string formatting (still valid in Py3) (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' '[hlL]?[E-GXc-giorsaux%]', String.Interpol), # the new style '{}'.format(...) string formatting (r'\{' r'((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name r'(\![sra])?' # conversion r'(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?' r'\}', String.Interpol), # # backslashes, quotes and formatting signs must be parsed # one at a time # (r'[^\\\'"%{\n]+', ttype), (r'[\'"\\]', ttype), # unhandled string formatting sign (r'%|(\{{1,2})', ttype) # newlines are an error (use "nl" state) ] def py_name_rules(ttype, deco_ttype=Name.Decorator): return [ # We recognize decorator syntax here (r'@' + uni_name, deco_ttype), # # Python's new matrix multiplication operator: # not used here in pseudocode # (r'@', Operator), (uni_name, ttype), ] # SPDX-SnippetEnd class LexBase(RegexLexer): """A base that defines some common lexer states. Default flags are not important. """ def op_ignore(lexer, match, ctx=None): """Unconditionally ignore the match.""" if False: yield match.start(), Other, "" if ctx: ctx.pos = match.end() @_staticmethod def op_fixed(toktype, value): """Unconditionally yield a given token type and value.""" def _op_fixed(lexer, match, ctx=None): yield match.start(), toktype, value if ctx: ctx.pos = match.end() return _op_fixed tokens = { # # These states are borrowed from Pygment's Python lexer. # Their names have been prefixed with `py-'. # # SPDX-SnippetBegin # SPDX-License-Identifier: BSD-2-Clause # SPDX-SnippetCopyrightText: Copyright 2006-2023 by the Pygments team # SPDX-SnippetCopyrightText: Copyright 2026 by Franz Glasner # 'py-numbers': [ (r'(\d(?:_?\d)*\.(?:\d(?:_?\d)*)?|(?:\d(?:_?\d)*)?\.\d(?:_?\d)*)' r'([eE][+-]?\d(?:_?\d)*)?', Number.Float), (r'\d(?:_?\d)*[eE][+-]?\d(?:_?\d)*j?', Number.Float), (r'0[oO](?:_?[0-7])+', Number.Oct), (r'0[bB](?:_?[01])+', Number.Bin), (r'0[xX](?:_?[a-fA-F0-9])+', Number.Hex), (r'\d(?:_?\d)*', Number.Integer), ], 'py-strings': [ # non-raw strings ('([uU]?)(""")', bygroups(String.Affix, String.Double), combined('py-stringescape', 'py-tdqs')), ("([uU]?)(''')", bygroups(String.Affix, String.Single), combined('py-stringescape', 'py-tsqs')), ('([uU]?)(")', bygroups(String.Affix, String.Double), combined('py-stringescape', 'py-dqs')), ("([uU]?)(')", bygroups(String.Affix, String.Single), combined('py-stringescape', 'py-sqs')), # non-raw bytes ('([bB])(""")', bygroups(String.Affix, String.Double), combined('py-bytesescape', 'py-tdqs')), ("([bB])(''')", bygroups(String.Affix, String.Single), combined('py-bytesescape', 'py-tsqs')), ('([bB])(")', bygroups(String.Affix, String.Double), combined('py-bytesescape', 'py-dqs')), ("([bB])(')", bygroups(String.Affix, String.Single), combined('py-bytesescape', 'py-sqs')), ], 'py-stringescape': [ (r'\\(N\{.*?\}|u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8})', String.Escape), include('py-bytesescape') ], 'py-bytesescape': [ (r'\\([\\abfnrtv"\']|\n|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape) ], 'py-dqs': [ (r'"', String.Double, '#pop'), (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings include('py-strings-double'), (r'\n', Error), # added by fag ], 'py-sqs': [ (r"'", String.Single, '#pop'), (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings include('py-strings-single'), (r'\n', Error), # added by fag ], 'py-tdqs': [ (r'"""', String.Double, '#pop'), include('py-strings-double'), (r'\n', String.Double) ], 'py-tsqs': [ (r"'''", String.Single, '#pop'), include('py-strings-single'), (r'\n', String.Single) ], 'py-strings-single': py_innerstring_rules(String.Single), 'py-strings-double': py_innerstring_rules(String.Double), 'py-name': py_name_rules(Name.Entity), # SPDX-SnippetEnd # This snippet is from the Pygments' documentation "Write your own lexer" 'multiline-nested-comment': [ (r'[^*/]+', Comment.Multiline), (r'/\*', Comment.Multiline, '#push'), (r'\*/', Comment.Multiline, '#pop'), (r'[*/]', Comment.Multiline), ], 'multiline-nested-comment-alt': [ (r'[^*()]+', Comment.Multiline), (r'\(\*', Comment.Multiline, '#push'), (r'\*\)', Comment.Multiline, '#pop'), (r'[*()]', Comment.Multiline), ] }
