changeset 120:ba5970a2dcef

The default file encoding when reading INI style files with configmix.ini.load() is now "UTF-8". Added unittests for proper Unicode handling.
author Franz Glasner <f.glasner@feldmann-mg.com>
date Thu, 29 Mar 2018 12:38:52 +0200
parents eefde3288fb8
children 0d378dcc018b
files CHANGES.txt configmix/ini.py doc/changes.rst tests/data/conf1.ini tests/data/conf1.py tests/data/conf1.yml tests/test.py
diffstat 7 files changed, 23 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/CHANGES.txt	Thu Mar 29 12:37:20 2018 +0200
+++ b/CHANGES.txt	Thu Mar 29 12:38:52 2018 +0200
@@ -23,6 +23,15 @@
       shallow copy.
 
    .. change::
+      :tags: breaking, feature
+
+      The default file encoding when reading INI style files with
+      :py:func:`configmix.ini.load` is now "UTF-8". Previously it was
+      undefined and therefore dependent on the user's locale.
+
+      An `encoding` keyword argument can be specified explicitely now.
+
+   .. change::
       :tags: doc
 
       Begin the documentation with `Sphinx <http://www.sphinx-doc.org>`_
--- a/configmix/ini.py	Thu Mar 29 12:37:20 2018 +0200
+++ b/configmix/ini.py	Thu Mar 29 12:38:52 2018 +0200
@@ -143,7 +143,8 @@
         return DictImpl(self.itemsx(section, options))
 
 
-def load(filename, extract=["config"]):
+def load(filename, extract=["config"],
+         encoding="utf-8"):
     """Load a single INI file and read/interpolate the sections given in
     `extract`.
 
@@ -152,9 +153,11 @@
     Then build a tree out of sections which start with any of the `extract`
     content value and a point ``.``.
 
+    The encoding of the file is given in `encoding`.
+
     """
     conf = DictImpl()
-    ini = INIConfigParser(filename)
+    ini = INIConfigParser(filename, encoding=encoding)
     for sect in extract:
         try:
             cfg = ini.options(sect)
--- a/doc/changes.rst	Thu Mar 29 12:37:20 2018 +0200
+++ b/doc/changes.rst	Thu Mar 29 12:38:52 2018 +0200
@@ -25,3 +25,7 @@
 
   The public signature of :py:func:`configmix.safe_merge` has *not*
   changed.
+
+- The default file encoding when reading INI style files with
+  :py:func:`configmix.ini.load` is now "UTF-8". Previously it was undefined
+  and therefore dependent on the user's locale.
--- a/tests/data/conf1.ini	Thu Mar 29 12:37:20 2018 +0200
+++ b/tests/data/conf1.ini	Thu Mar 29 12:38:52 2018 +0200
@@ -7,3 +7,4 @@
 key4 = :bool:yes
 key5 = :bool:off
 key6 = :int:0o377
+key7 = Umlaute: ÄÖÜäöüß
--- a/tests/data/conf1.py	Thu Mar 29 12:37:20 2018 +0200
+++ b/tests/data/conf1.py	Thu Mar 29 12:38:52 2018 +0200
@@ -6,3 +6,4 @@
 key4 = True
 key5 = False
 key6 = 0o377
+key7 = u"Umlaute: ÄÖÜäöüß"
--- a/tests/data/conf1.yml	Thu Mar 29 12:37:20 2018 +0200
+++ b/tests/data/conf1.yml	Thu Mar 29 12:38:52 2018 +0200
@@ -8,3 +8,4 @@
 key4: true
 key5: false
 key6: 0377
+key7: 'Umlaute: ÄÖÜäöüß'
--- a/tests/test.py	Thu Mar 29 12:37:20 2018 +0200
+++ b/tests/test.py	Thu Mar 29 12:38:52 2018 +0200
@@ -39,6 +39,8 @@
         self.assertFalse(cfg.get("key5"))
         self.assertTrue(isinstance(cfg.get("key5"), bool))
         self.assertEqual(255, cfg.get("key6"))
+        self.assertEqual(u("Umlaute: ÄÖÜäöüß"),
+                         cfg.get("key7"))
 
     def __check_tree(self, cfg):
         self.assertEqual(u("in the root namespace"),