Mercurial > hgrepos > Python > libs > pygments-lexer-pseudocode2
diff pygments_lexer_pseudocode2/lexers/bases.py @ 164:a4317957148b
Move all lexers into a subpackage pygments_lexer_pseudocode2.lexers.
This is to prepare for a new subpackage with filters.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Fri, 08 May 2026 21:19:54 +0200 |
| parents | pygments_lexer_pseudocode2/bases.py@e1663ac707b0 |
| children | afbca50b7dc1 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pygments_lexer_pseudocode2/lexers/bases.py Fri May 08 21:19:54 2026 +0200 @@ -0,0 +1,188 @@ +# -*- coding: utf-8 -*- +# :- +# SPDX-FileCopyrightText: © 2026 Franz Glasner +# SPDX-License-Identifier: MIT +# :- +r"""Some common bases for the lexers.""" + +__all__ = ["LexBase", "uni_name", "py_innerstring_rules", "py_name_rules"] + + +import sys + +from pygments import unistring +from pygments.lexer import RegexLexer, combined, bygroups, include +from pygments.token import (Comment, Error, Name, Number, Other, String) + + +PY2 = sys.version_info[0] <= 2 + + +# +# SPDX-SnippetBegin +# SPDX-License-Identifier: BSD-2-Clause +# SPDX-SnippetCopyrightText: Copyright 2006-2023 by the Pygments team +# SPDX-SnippetCopyrightText: Copyright 2026 by Franz Glasner +# + +uni_name = "[%s][%s]*" % (unistring.xid_start, unistring.xid_continue) + + +"""PY3 allows no @staticmethod but PY2 needs it.""" +if PY2: + _staticmethod = staticmethod +else: + def _staticmethod(fn): + return fn + + +def py_innerstring_rules(ttype): + return [ + # the old style '%s' % (...) string formatting (still valid in Py3) + (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' + '[hlL]?[E-GXc-giorsaux%]', String.Interpol), + # the new style '{}'.format(...) string formatting + (r'\{' + r'((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name + r'(\![sra])?' # conversion + r'(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?' + r'\}', String.Interpol), + # + # backslashes, quotes and formatting signs must be parsed + # one at a time + # + (r'[^\\\'"%{\n]+', ttype), + (r'[\'"\\]', ttype), + # unhandled string formatting sign + (r'%|(\{{1,2})', ttype) + # newlines are an error (use "nl" state) + ] + + +def py_name_rules(ttype, deco_ttype=Name.Decorator): + return [ + # We recognize decorator syntax here + (r'@' + uni_name, deco_ttype), + # + # Python's new matrix multiplication operator: + # not used here in pseudocode + # (r'@', Operator), + (uni_name, ttype), + ] + +# SPDX-SnippetEnd + + +class LexBase(RegexLexer): + + """A base that defines some common lexer states. + + Default flags are not important. + + """ + + def op_ignore(lexer, match, ctx=None): + """Unconditionally ignore the match.""" + if False: + yield match.start(), Other, "" + if ctx: + ctx.pos = match.end() + + @_staticmethod + def op_fixed(toktype, value): + """Unconditionally yield a given token type and value.""" + + def _op_fixed(lexer, match, ctx=None): + yield match.start(), toktype, value + if ctx: + ctx.pos = match.end() + + return _op_fixed + + tokens = { +# +# These states are borrowed from Pygment's Python lexer. +# Their names have been prefixed with `py-'. +# +# SPDX-SnippetBegin +# SPDX-License-Identifier: BSD-2-Clause +# SPDX-SnippetCopyrightText: Copyright 2006-2023 by the Pygments team +# SPDX-SnippetCopyrightText: Copyright 2026 by Franz Glasner +# + 'py-numbers': [ + (r'(\d(?:_?\d)*\.(?:\d(?:_?\d)*)?|(?:\d(?:_?\d)*)?\.\d(?:_?\d)*)' + r'([eE][+-]?\d(?:_?\d)*)?', Number.Float), + (r'\d(?:_?\d)*[eE][+-]?\d(?:_?\d)*j?', Number.Float), + (r'0[oO](?:_?[0-7])+', Number.Oct), + (r'0[bB](?:_?[01])+', Number.Bin), + (r'0[xX](?:_?[a-fA-F0-9])+', Number.Hex), + (r'\d(?:_?\d)*', Number.Integer), + ], + 'py-strings': [ + # non-raw strings + ('([uU]?)(""")', bygroups(String.Affix, String.Double), + combined('py-stringescape', 'py-tdqs')), + ("([uU]?)(''')", bygroups(String.Affix, String.Single), + combined('py-stringescape', 'py-tsqs')), + ('([uU]?)(")', bygroups(String.Affix, String.Double), + combined('py-stringescape', 'py-dqs')), + ("([uU]?)(')", bygroups(String.Affix, String.Single), + combined('py-stringescape', 'py-sqs')), + # non-raw bytes + ('([bB])(""")', bygroups(String.Affix, String.Double), + combined('py-bytesescape', 'py-tdqs')), + ("([bB])(''')", bygroups(String.Affix, String.Single), + combined('py-bytesescape', 'py-tsqs')), + ('([bB])(")', bygroups(String.Affix, String.Double), + combined('py-bytesescape', 'py-dqs')), + ("([bB])(')", bygroups(String.Affix, String.Single), + combined('py-bytesescape', 'py-sqs')), + ], + 'py-stringescape': [ + (r'\\(N\{.*?\}|u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8})', String.Escape), + include('py-bytesescape') + ], + 'py-bytesescape': [ + (r'\\([\\abfnrtv"\']|\n|x[a-fA-F0-9]{2}|[0-7]{1,3})', + String.Escape) + ], + 'py-dqs': [ + (r'"', String.Double, '#pop'), + (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings + include('py-strings-double'), + (r'\n', Error), # added by fag + ], + 'py-sqs': [ + (r"'", String.Single, '#pop'), + (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings + include('py-strings-single'), + (r'\n', Error), # added by fag + ], + 'py-tdqs': [ + (r'"""', String.Double, '#pop'), + include('py-strings-double'), + (r'\n', String.Double) + ], + 'py-tsqs': [ + (r"'''", String.Single, '#pop'), + include('py-strings-single'), + (r'\n', String.Single) + ], + 'py-strings-single': py_innerstring_rules(String.Single), + 'py-strings-double': py_innerstring_rules(String.Double), + 'py-name': py_name_rules(Name.Entity), +# SPDX-SnippetEnd + # This snippet is from the Pygments' documentation "Write your own lexer" + 'multiline-nested-comment': [ + (r'[^*/]+', Comment.Multiline), + (r'/\*', Comment.Multiline, '#push'), + (r'\*/', Comment.Multiline, '#pop'), + (r'[*/]', Comment.Multiline), + ], + 'multiline-nested-comment-alt': [ + (r'[^*()]+', Comment.Multiline), + (r'\(\*', Comment.Multiline, '#push'), + (r'\*\)', Comment.Multiline, '#pop'), + (r'[*()]', Comment.Multiline), + ] + }
