changeset 779:0bb535e50271

farray.sh: Implement Heapsort in the "bottom-up" implementation. BUGS: A little bit slower than the "standard" implementation.
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 26 Oct 2024 13:52:25 +0200
parents 84527b00d29a
children e6af933f475e
files share/local-bsdtools/farray.sh tests/farray-array.t tests/sort-perf/sort-perf.sh
diffstat 3 files changed, 223 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/share/local-bsdtools/farray.sh	Fri Oct 25 20:29:04 2024 +0200
+++ b/share/local-bsdtools/farray.sh	Sat Oct 26 13:52:25 2024 +0200
@@ -1899,12 +1899,193 @@
 #        echo "==== AFTER: START: $__farr_start,   END: $__farr_end"
         #farray_debug "$__farr_name"
     done
-
-
+}
+
+
+#:
+#: Sort an array using Heapsort.
+#:
+#: Args:
+#:   $1: The array to sort to
+#:
+#: See Also:
+#:   `_farr_array_heapsort`
+#:
+farray_heapsort_bottomup() {
+    local __farr_name
+
+    local __farr_token __farr_gvrname __farr_len \
+          __farr_pos __farr_val __farr_val_1
+
+    _farr_array_get_meta "$@"
+
+    _farr_array_heapsort_bottomup
+}
+
+
+#:
+#: Internal sort implementation using Heapsort.
+#:
+#: Stable: no
+#:
+#: See: https://en.wikipedia.org/wiki/Heapsort
+#:
+#: Here the bottom-up implementation.
+#:
+#: Input (Globals):
+#:   All the "Output (Globals)" from `farr_array_get_meta`:
+#:   __farr_name, __farr_token, __farr_gvrname and __farr_len.
+#:   These variables are declared local in this function but the shell
+#:   initializes them from the same variables in the nearest dynamic scope.
+#:   So they are really input variables and not also output variables.
+#:
+_farr_array_heapsort_bottomup() {
+    #
+
+    local __farr_end __farr_tmpitem
+
+    _farr_array_hsbu_heapify
+
+    #
+    # The following loop maintains the invariants that a[1:endβˆ’1] is a
+    # heap, and every element a[end:count] beyond end is greater
+    # than everything before it, i.e. a[end:count] is in sorted
+    # order.)
+
+    __farr_end=$((__farr_len + 1))
+    while [ "${__farr_end}" -gt 2 ]; do
+        # The heap size is reduced by one)
+        __farr_end=$((__farr_end - 1))
+        #
+        # a[1] is the root and largest value. The swap moves it in
+        # front of the sorted elements.)
+        #
+        #swap(a[end], a[1])
+        eval __farr_tmpitem=\"\$\{"${__farr_gvrname}"_1\}\"
+        eval "${__farr_gvrname}"_1=\"\$\{"${__farr_gvrname}"_"${__farr_end}"\}\"
+        setvar "${__farr_gvrname}"_"${__farr_end}" "${__farr_tmpitem}"
+        # the swap ruined the heap property, so restore it)
+        #siftDown(a, 1, end)
+        _farr_array_hsbu_sift_down 1 "${__farr_end}"
+    done
 }
 
 
 #:
+#: Input (Globals):
+#:   All the "Output (Globals)" from `farr_array_get_meta`:
+#:   __farr_name, __farr_token, __farr_gvrname and __farr_len.
+#:   These variables are declared local in this function but the shell
+#:   initializes them from the same variables in the nearest dynamic scope.
+#:   So they are really input variables and not also output variables.
+#:
+_farr_array_hsbu_heapify() {
+    #
+
+    local __farr_start
+    #
+    # start is initialized to the first leaf node
+    #
+    # The last element in a 1-based array is at index count; find the
+    # parent of that element.
+    #
+    __farr_start=$(( (__farr_len / 2) + 1))
+    while [ "${__farr_start}" -gt 1 ]; do
+        # go to the last non-heap node
+        __farr_start=$((__farr_start - 1))
+        #
+        # Sift down the node at index 'start' to the proper place such
+        # that all nodes below the start index are in heap order.
+        #
+        _farr_array_hsbu_sift_down "${__farr_start}" $((__farr_len + 1))
+    done
+    # after sifting down the root all nodes/elements are in heap order
+}
+
+
+#:
+#: Args:
+#:   $1 (int): Index i
+#:   $2(int) : end
+#:
+#: Input (Globals):
+#:   All the "Output (Globals)" from `farr_array_get_meta`:
+#:   __farr_name, __farr_token, __farr_gvrname and __farr_len.
+#:   These variables are declared local in this function but the shell
+#:   initializes them from the same variables in the nearest dynamic scope.
+#:   So they are really input variables and not also output variables.
+#:
+_farr_array_hsbu_sift_down() {
+    # $1 $2
+
+    local __farr_j __farr_item_i __farr_item_j __farr_tmpitem
+
+    _farr_array_hsbu_leaf_search __farr_j "$1" "$2"
+
+    eval __farr_item_i=\"\$\{"${__farr_gvrname}"_"${1}"\}\"
+    eval __farr_item_j=\"\$\{"${__farr_gvrname}"_"${__farr_j}"\}\"
+    while [ "${__farr_item_i}" '>' "${__farr_item_j}" ]; do
+        #j ← iParent(j)
+        __farr_j=$((__farr_j / 2))
+        eval __farr_item_j=\"\$\{"${__farr_gvrname}"_"${__farr_j}"\}\"
+    done
+    while [ "${__farr_j}" -gt "${1}" ]; do
+        #swap(a[i], a[j])
+        eval __farr_tmpitem=\"\$\{"${__farr_gvrname}"_"${1}"\}\"
+        eval "${__farr_gvrname}"_"${1}"=\"\$\{"${__farr_gvrname}"_"${__farr_j}"\}\"
+        setvar "${__farr_gvrname}"_"${__farr_j}" "${__farr_tmpitem}"
+        #j ← iParent(j)
+        __farr_j=$((__farr_j / 2))
+    done
+}
+
+
+#:
+#: Args:
+#:   $1 (str): The variable name where to store the return value into
+#:   $2 (int): Index i
+#:   $3 (int): end
+#:
+#: Input (Globals):
+#:   All the "Output (Globals)" from `farr_array_get_meta`:
+#:   __farr_name, __farr_token, __farr_gvrname and __farr_len.
+#:   These variables are declared local in this function but the shell
+#:   initializes them from the same variables in the nearest dynamic scope.
+#:   So they are really input variables and not also output variables.
+#:
+_farr_array_hsbu_leaf_search() {
+    # $1 $2 $3
+
+    local __farr_ls_j __farr_ls_rightchild_j __farr_ls_leftchild_j \
+          __farr_ls_tmp_rightchild __farr_ls_tmp_leftchild
+
+    __farr_ls_j="${2}"
+
+    __farr_ls_rightchild_j=$((2 * __farr_ls_j + 1))
+    __farr_ls_leftchild_j=$((2 * __farr_ls_j))
+    while [ "${__farr_ls_rightchild_j}" -lt "${3}" ]; do
+        # Determine which of j's two children is the greater
+        eval __farr_ls_tmp_rightchild=\"\$\{"${__farr_gvrname}"_"${__farr_ls_rightchild_j}"\}\"
+        eval __farr_ls_tmp_leftchild=\"\$\{"${__farr_gvrname}"_"${__farr_ls_leftchild_j}"\}\"
+        if [ "${__farr_ls_tmp_rightchild}" '>' "${__farr_ls_tmp_leftchild}" ];  then
+            __farr_ls_j="${__farr_ls_rightchild_j}"
+        else
+            __farr_ls_j="${__farr_ls_leftchild_j}"
+        fi
+        __farr_ls_rightchild_j=$((2 * __farr_ls_j + 1))
+        __farr_ls_leftchild_j=$((2 * __farr_ls_j))
+    done
+    # At the last level, there might be only one child
+    if [ "${__farr_ls_leftchild_j}" -lt "${3}" ]; then
+        # j ← iLeftChild(j)
+        __farr_ls_j=$((2 * __farr_ls_j))
+    fi
+    setvar "${1}" "${__farr_ls_j}"
+}
+
+
+
+#:
 #: Try to find the index of a given value in an existing array.
 #:
 #: It assumes that the array is sorted and uses a binary search algorithm
--- a/tests/farray-array.t	Fri Oct 25 20:29:04 2024 +0200
+++ b/tests/farray-array.t	Sat Oct 26 13:52:25 2024 +0200
@@ -1409,6 +1409,19 @@
   $ farray_release TEST
   $ check_no_array_artifacts
 
+  $ farray_create TEST 5 3 2 4
+  $ farray_heapsort_bottomup TEST
+  $ check_array_is_sorted "$TEST"
+  $ farray_debug TEST
+  DEBUG: array `TEST' has length 4
+  DEBUG:   the items:
+  DEBUG:     1: `2'
+  DEBUG:     2: `3'
+  DEBUG:     3: `4'
+  DEBUG:     4: `5'
+  $ farray_release TEST
+  $ check_no_array_artifacts
+
   $ create_random_array UNSORTED 1000
   $ check_array_is_sorted "$UNSORTED"
   [1]
@@ -1437,18 +1450,33 @@
   $ check_array_is_sorted "$TEST"
   $ farray_release TEST
 
+  $ farray_create TEST
+  $ farray_splice "" TEST 1 "" UNSORTED
+  $ check_array_is_sorted "$TEST"
+  [1]
+  $ farray_heapsort_bottomup TEST
+  $ check_array_is_sorted "$TEST"
+  $ farray_release TEST
+
   $ farray_release UNSORTED
   $ check_no_array_artifacts
 
 # Extra checks for Heapsort
 
   $ farray_create UNSORTED '189548216' '544226607' '690563482' '224884577' '843524724' '922143089' '917031008' '602352555' '397442038' '350475285'
+
   $ farray_create TEST
   $ farray_splice "" TEST 1 "" UNSORTED
   $ farray_heapsort TEST
   $ check_array_is_sorted "$TEST"
+  $ farray_release TEST
 
+  $ farray_create TEST
+  $ farray_splice "" TEST 1 "" UNSORTED
+  $ farray_heapsort_bottomup TEST
+  $ check_array_is_sorted "$TEST"
   $ farray_release TEST
+
   $ farray_release UNSORTED
   $ check_no_array_artifacts
 
@@ -1917,3 +1945,5 @@
 =========
 
   $ check_no_local_artifacts
+
+  $ set
--- a/tests/sort-perf/sort-perf.sh	Fri Oct 25 20:29:04 2024 +0200
+++ b/tests/sort-perf/sort-perf.sh	Sat Oct 26 13:52:25 2024 +0200
@@ -65,13 +65,22 @@
 
     farray_create TEST
     farray_splice "" TEST 1 "" UNSORTED
-    echo "===> Heap Sort with ${1} items"
+    echo "===> Heap Sort (standard) with ${1} items"
     get_ts
     farray_heapsort TEST
     get_ts
     check_array_is_sorted "$TEST" || fatal "sort failed"
     farray_release TEST
 
+    farray_create TEST
+    farray_splice "" TEST 1 "" UNSORTED
+    echo "===> Heap Sort (bottom-up) with ${1} items"
+    get_ts
+    farray_heapsort_bottomup TEST
+    get_ts
+    check_array_is_sorted "$TEST" || fatal "sort failed"
+    farray_release TEST    
+
     farray_release UNSORTED
 }