diff cutils/util/walk.py @ 162:29dd5528174c

Implement walk._walk() using os.listdir() also. Use it if no scandir is available.
author Franz Glasner <fzglas.hg@dom66.de>
date Thu, 09 Jan 2025 13:36:41 +0100
parents 481cc9b26861
children a813094ae4f5
line wrap: on
line diff
--- a/cutils/util/walk.py	Tue Jan 07 19:20:32 2025 +0100
+++ b/cutils/util/walk.py	Thu Jan 09 13:36:41 2025 +0100
@@ -113,47 +113,74 @@
         w._stat_result = entry.stat(follow_symlinks=True)
         return w
 
+    @classmethod
+    def from_path_name(cls_, path, name):
+        w = cls_(name)
+        w._path = os.path.join(path, name)
+        try:
+            w._is_dir = os.path.isdir(w._path)
+        except OSError:
+            #
+            # If is_dir() raises an OSError, consider that the entry
+            # is not a directory, same behaviour than os.path.isdir().
+            #
+            w._is_dir = False
+        try:
+            w._is_symlink = os.path.islink(w._path)
+        except OSError:
+            #
+            # If is_symlink() raises an OSError, consider that the entry
+            # is not a symbolic link, same behaviour than os.path.islink().
+            #
+            w._is_symlink = False
+        w._stat_result = os.stat(w._path)
+        return w
+
     @staticmethod
     def sort_key(entry):
         return entry._fsname
 
 
+def walk(root, follow_symlinks=False):
+    """A heyvily customized :func:`os.walk` alike that differs from the
+    original:
+
+    - optimized for use in :command:`treesum`
+    - most errors are not suppressed
+    - the `root` is never part of the returned data
+    - the returned directory in "top" is not a string form but a list of
+      individual path segments
+    - there is only one yielded list
+
+      * contains :class:`WalkDirEntry`
+      * sorted by its fsname
+
+      The caller can easily get the old dirs and nondirs by filtering
+      the yielded list using "entry.is_dir".
+
+    - recurse into sub-directories first ("topdown=False")
+    - sort consistently all yielded lists by the filesystem encoding
+
+    .. note:: The implementation is based on Python 3.11 and needs a
+              functional :func:`os.scandir` or :func:`scandir.scandir`
+              implementation. It intentionally follows the logic in
+              Python 3.11 while it could be simplified because we are not
+              implementing some of the original flags (e.g. like
+              `topdown`).
+
+    """
+    normed_root = os.path.normpath(root)
+    yield from _walk(normed_root, tuple(), follow_symlinks=follow_symlinks)
+
+
 if scandir:
 
-    def walk(root, follow_symlinks=False):
-        """A heyvily customized :func:`os.walk` alike that differs from the
-        original:
-
-        - optimized for use in :command:`treesum`
-        - most errors are not suppressed
-        - the `root` is never part of the returned data
-        - the returned directory in "top" is not a string form but a list of
-          individual path segments
-        - there is only one yielded list
+    def _walk(root, top, follow_symlinks):
+        """:func:`walk` helper.
 
-          * contains :class:`WalkDirEntry`
-          * sorted by its fsname
-
-          The caller can easily get the old dirs and nondirs by filtering
-          the yielded list using "entry.is_dir".
-
-        - recurse into sub-directories first ("topdown=False")
-        - sort consistently all yielded lists by the filesystem encoding
-
-        .. note:: The implementation is based on Python 3.11 and needs a
-                  functional :func:`os.scandir` or :func:`scandir.scandir`
-                  implementation. It intentionally follows the logic in
-                  Python 3.11 while it could be simplified because we are not
-                  implementing some of the original flags (e.g. like
-                  `topdown`).
+        Implemented using :func:`os.scandir`.
 
         """
-        normed_root = os.path.normpath(root)
-        yield from _walk(normed_root, tuple(), follow_symlinks=follow_symlinks)
-
-
-    def _walk(root, top, follow_symlinks):  # noqa: E303  too many empty lines
-        """:func:`walk` helper"""
         if top:
             path = os.path.join(root, *top)
         else:
@@ -195,4 +222,41 @@
 
 else:
 
-    raise ImportError("no `scandir()' module available")
+    def _walk(root, top, follow_symlinks):
+        """:func:`walk` helper.
+
+        Implemented using :func:`os.listdir`.
+
+        """
+        if top:
+            path = os.path.join(root, *top)
+        else:
+            path = root
+
+        fsobjects, walk_dirs = [], []
+
+        names = os.listdir(path)
+        for name in names:
+            entry = WalkDirEntry.from_path_name(path, name)
+            fsobjects.append(entry)
+            #
+            # Always bottom-up: recurse into sub-directories, but exclude
+            # symlinks to directories if follow_symlinks is False
+            #
+            if entry.is_dir:
+                if follow_symlinks:
+                    walk_into = True
+                else:
+                    walk_into = not entry.is_symlink
+                if walk_into:
+                    walk_dirs.append(entry)
+
+        # Sort by low-level filesystem encoding
+        walk_dirs.sort(key=WalkDirEntry.sort_key)
+        fsobjects.sort(key=WalkDirEntry.sort_key)
+
+        # Recurse into sub-directories
+        for wd in walk_dirs:
+            yield from _walk(root, top + (wd.name,), follow_symlinks)
+        # Yield after recursion if going bottom up
+        yield top, fsobjects