diff pygments_lexer_pseudocode2/uniprops.py @ 105:cec52d83869a

Handle much more characters from the Unicode codeset in expressions. While there: FIX: Add forgotten Punctuation characters `?' and `@'. While there: Allow the escaping of single and double quotes that normally start a string (e.g. for expressions like f' is the first derivation of f).
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 04 May 2026 16:30:36 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pygments_lexer_pseudocode2/uniprops.py	Mon May 04 16:30:36 2026 +0200
@@ -0,0 +1,48 @@
+# -*- coding: utf-8 -*-
+# :-
+# SPDX-FileCopyrightText: © 2026 Franz Glasner
+# SPDX-License-Identifier: MIT
+# :-
+r"""A somewhat changed variant of :mod:`pygments.unistring`.
+
+We handle ASCII characters mostly ourself.
+
+"""
+
+__all__ = []
+
+
+import pygments.unistring
+
+
+def _remove_ascii(s):
+    """Remove the characters in the ASCII range from `s` and return the
+    adjusted string.
+
+    Assumes that in `s` the ASCII chars are sorted before the Unicode
+    codepoints as in :mod:`pygments.unistring`.
+
+    """
+    idx = 0
+    while ord(s[idx]) < 0x80:
+        idx += 1
+    if idx > 0:
+        return s[idx:]
+    else:
+        # nothing changed
+        return s
+
+
+Pc = _remove_ascii(pygments.unistring.Pc)
+Pd = _remove_ascii(pygments.unistring.Pd)
+Pe = _remove_ascii(pygments.unistring.Pe)
+Ps = _remove_ascii(pygments.unistring.Ps)
+Pi = _remove_ascii(pygments.unistring.Pi)
+Pf = _remove_ascii(pygments.unistring.Pf)
+Po = _remove_ascii(pygments.unistring.Po)
+Sc = _remove_ascii(pygments.unistring.Sc)
+So = _remove_ascii(pygments.unistring.So)
+Sm = _remove_ascii(pygments.unistring.Sm)
+Zl = _remove_ascii(pygments.unistring.Zl)
+Zp = _remove_ascii(pygments.unistring.Zp)
+Zs = _remove_ascii(pygments.unistring.Zs)