Mercurial > hgrepos > Python > libs > pygments-lexer-pseudocode2
view pygments_lexer_pseudocode2/bases.py @ 56:661461fb4dfc
Make the "py-name" rules parameterized: allow to provide the token type.
For this to work the implemting function must be global, because the
access to the class is not yet possible at construction time.
So consistently make some previons LexBase members module globals.
Make some LexBase members module globals consistently.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 27 Apr 2026 12:37:27 +0200 |
| parents | 5bfa9113d3c4 |
| children | 7153e945a3d6 |
line wrap: on
line source
# -*- coding: utf-8 -*- # :- # SPDX-FileCopyrightText: © 2026 Franz Glasner # SPDX-License-Identifier: MIT # :- r"""Some common bases for the lexers.""" __all__ = ["LexBase", "uni_name", "py_innerstring_rules", "py_name_rules"] from pygments import unistring from pygments.lexer import RegexLexer, combined, bygroups, include from pygments.token import Error, Name, Number, String, Comment # # SPDX-SnippetBegin # SPDX-License-Identifier: BSD-2-Clause # SPDX-SnippetCopyrightText: Copyright 2006-2023 by the Pygments team # SPDX-SnippetCopyrightText: Copyright 2026 by Franz Glasner # uni_name = "[%s][%s]*" % (unistring.xid_start, unistring.xid_continue) def py_innerstring_rules(ttype): return [ # the old style '%s' % (...) string formatting (still valid in Py3) (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' '[hlL]?[E-GXc-giorsaux%]', String.Interpol), # the new style '{}'.format(...) string formatting (r'\{' r'((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name r'(\![sra])?' # conversion r'(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?' r'\}', String.Interpol), # # backslashes, quotes and formatting signs must be parsed # one at a time # (r'[^\\\'"%{\n]+', ttype), (r'[\'"\\]', ttype), # unhandled string formatting sign (r'%|(\{{1,2})', ttype) # newlines are an error (use "nl" state) ] def py_name_rules(ttype, deco_ttype=Name.Decorator): return [ # We recognize decorator syntax here (r'@' + uni_name, deco_ttype), # # Python's new matrix multiplication operator: # not used here in pseudocode # (r'@', Operator), (uni_name, ttype), ] # SPDX-SnippetEnd class LexBase(RegexLexer): """A base that defines some common lexer states. Default flags are not important. """ tokens = { # # These states are borrowed from Pygment's Python lexer. # Their names have been prefixed with `py-'. # # SPDX-SnippetBegin # SPDX-License-Identifier: BSD-2-Clause # SPDX-SnippetCopyrightText: Copyright 2006-2023 by the Pygments team # SPDX-SnippetCopyrightText: Copyright 2026 by Franz Glasner # 'py-numbers': [ (r'(\d(?:_?\d)*\.(?:\d(?:_?\d)*)?|(?:\d(?:_?\d)*)?\.\d(?:_?\d)*)' r'([eE][+-]?\d(?:_?\d)*)?', Number.Float), (r'\d(?:_?\d)*[eE][+-]?\d(?:_?\d)*j?', Number.Float), (r'0[oO](?:_?[0-7])+', Number.Oct), (r'0[bB](?:_?[01])+', Number.Bin), (r'0[xX](?:_?[a-fA-F0-9])+', Number.Hex), (r'\d(?:_?\d)*', Number.Integer), ], 'py-strings': [ # non-raw strings ('([uU]?)(""")', bygroups(String.Affix, String.Double), combined('py-stringescape', 'py-tdqs')), ("([uU]?)(''')", bygroups(String.Affix, String.Single), combined('py-stringescape', 'py-tsqs')), ('([uU]?)(")', bygroups(String.Affix, String.Double), combined('py-stringescape', 'py-dqs')), ("([uU]?)(')", bygroups(String.Affix, String.Single), combined('py-stringescape', 'py-sqs')), # non-raw bytes ('([bB])(""")', bygroups(String.Affix, String.Double), combined('py-bytesescape', 'py-tdqs')), ("([bB])(''')", bygroups(String.Affix, String.Single), combined('py-bytesescape', 'py-tsqs')), ('([bB])(")', bygroups(String.Affix, String.Double), combined('py-bytesescape', 'py-dqs')), ("([bB])(')", bygroups(String.Affix, String.Single), combined('py-bytesescape', 'py-sqs')), ], 'py-stringescape': [ (r'\\(N\{.*?\}|u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8})', String.Escape), include('py-bytesescape') ], 'py-bytesescape': [ (r'\\([\\abfnrtv"\']|\n|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape) ], 'py-dqs': [ (r'"', String.Double, '#pop'), (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings include('py-strings-double'), (r'\n', Error), # added by fag ], 'py-sqs': [ (r"'", String.Single, '#pop'), (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings include('py-strings-single'), (r'\n', Error), # added by fag ], 'py-tdqs': [ (r'"""', String.Double, '#pop'), include('py-strings-double'), (r'\n', String.Double) ], 'py-tsqs': [ (r"'''", String.Single, '#pop'), include('py-strings-single'), (r'\n', String.Single) ], 'py-strings-single': py_innerstring_rules(String.Single), 'py-strings-double': py_innerstring_rules(String.Double), 'py-name': py_name_rules(Name.Entity), # SPDX-SnippetEnd # This snippet is from the Pygments' documentation "Write your own lexer" 'multiline-nested-comment': [ (r'[^*/]+', Comment.Multiline), (r'/\*', Comment.Multiline, '#push'), (r'\*/', Comment.Multiline, '#pop'), (r'[*/]', Comment.Multiline), ] }
