changeset 389:fa2e44e749ab

Handle all special and function characters in the timestamps database. Escape all characters in the range \x00-\x1f and \x7f and all characters in ",@#<|=>". Also do not normalize CR-LF combinations within an escaped field.
author Franz Glasner <fzglas.hg@dom66.de>
date Sun, 04 Aug 2019 09:08:35 +0200
parents 423ec8fad8d7
children ad291947fae8
files extensions/timestamps.py
diffstat 1 files changed, 14 insertions(+), 15 deletions(-) [+]
line wrap: on
line diff
--- a/extensions/timestamps.py	Sat Aug 03 21:54:09 2019 +0200
+++ b/extensions/timestamps.py	Sun Aug 04 09:08:35 2019 +0200
@@ -34,8 +34,11 @@
 
   * field separator: COMMA (0x2c)
 
-    If fields contain a ``,`` or ``#`` or ``@``: put them into ``@``;
+    If fields contain a ``,`` or ``#``, ``@``, ``<``, ``|``, ``=`` or ``>``:
+    put them into ``@``;
     eventually duplicate ``@`` characters within.
+    This is true of the field contains a control character in the range
+    ``\x00``--``\x1f`` or ``\x7f``.
 
     Leading white space is discarded from fields. If needed use ``@``.
 
@@ -674,6 +677,13 @@
     return matcher
 
 
+#: Special characters to be escaped in a database field (includes all
+#: function characters)
+_SPECIAL_CHARS = [pycompat.bytechr(i) for i in range(0, 32)]
+_SPECIAL_CHARS.append(pycompat.bytechr(0x7f))
+_SPECIAL_CHARS = b",@#<|=>" + b"".join(_SPECIAL_CHARS)
+
+
 def _escape_str_field(fn):
     """Escape the string `fn` for database output if needed
 
@@ -683,7 +693,7 @@
     if fn.startswith(b' ') or fn.startswith(b'\t') \
        or fn.endswith(b' ') or fn.endswith(b'\t'):
         return b"@%s@" % fn.replace(b'@', b"@@")
-    for c in b",@#<|=>":
+    for c in _SPECIAL_CHARS:
         if c in fn:
             return b"@%s@" % fn.replace(b'@', b"@@")
     return fn
@@ -736,7 +746,8 @@
             sf = []
             #
             # text field with special chars:
-            # non-terminal '@' have been duplicated
+            # non-terminal '@' have been duplicated, all other characters
+            # including CR-LF combinations are read unchanged
             #
             c2 = fp.read(1)
             while c2:
@@ -755,18 +766,6 @@
                         # the next character will be read below:
                         # just stay at the trailing `@'
                         #
-                elif c2 in b"\r\n":
-                    if c2 == b'\r':
-                        #
-                        # handle CR-LF ('\r\n') combination: skip a '\n' that
-                        # follows directly
-                        #
-                        c3 = fp.peek(1)
-                        if c3 and c3[0] == b"\n":
-                            fp.read(1)
-                    lineno += 1
-                    sf.append(b'\n')
-                    c2 = fp.read(1)
                 else:
                     sf.append(c2)
                     c2 = fp.read(1)