changeset 778:84527b00d29a

farray.sh: Implement Heapsort in the "standard" implementation. BUGS: Performance here equals Shellsort.
author Franz Glasner <fzglas.hg@dom66.de>
date Fri, 25 Oct 2024 20:29:04 +0200
parents 3f9b22ddacb8
children 0bb535e50271
files share/local-bsdtools/farray.sh tests/farray-array.t tests/sort-perf/sort-perf.sh
diffstat 3 files changed, 163 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/share/local-bsdtools/farray.sh	Thu Oct 24 11:44:35 2024 +0200
+++ b/share/local-bsdtools/farray.sh	Fri Oct 25 20:29:04 2024 +0200
@@ -1792,6 +1792,119 @@
 
 
 #:
+#: Sort an array using Heapsort.
+#:
+#: Args:
+#:   $1: The array to sort to
+#:
+#: See Also:
+#:   `_farr_array_heapsort`
+#:
+farray_heapsort() {
+    local __farr_name
+
+    local __farr_token __farr_gvrname __farr_len \
+          __farr_pos __farr_val __farr_val_1
+
+    _farr_array_get_meta "$@"
+
+    _farr_array_heapsort
+}
+
+
+#:
+#: Internal sort implementation using Heapsort.
+#:
+#: Stable: no
+#:
+#: See: https://en.wikipedia.org/wiki/Heapsort
+#:
+#: Here the "standard" implementation is used.
+#:
+#: Input (Globals):
+#:   All the "Output (Globals)" from `farr_array_get_meta`:
+#:   __farr_name, __farr_token, __farr_gvrname and __farr_len.
+#:   These variables are declared local in this function but the shell
+#:   initializes them from the same variables in the nearest dynamic scope.
+#:   So they are really input variables and not also output variables.
+#:
+_farr_array_heapsort() {
+
+    local __farr_name __farr_token __farr_gvrname __farr_len \
+          __farr_start __farr_end __farr_root __farr_childp1 \
+          __farr_tmpitem __farr_rootitem __farr_childitem __farr_childp1item
+
+    __farr_start=$(( (__farr_len / 2) + 1))    # aka: iParent(of the last element)
+    __farr_end=$((__farr_len + 1))
+    while [ "${__farr_end}" -gt 2 ]; do
+#        echo  "== WHILE: START: $__farr_start,   END: $__farr_end"
+        if [ "${__farr_start}" -gt 1 ]; then
+#            echo "==== HEAP CONSTRUCTION"
+            # Heap construction
+            __farr_start=$((__farr_start - 1))
+        else
+#            echo "==== HEAP EXTRACTION"
+            # Heap extraction
+            __farr_end=$((__farr_end - 1))
+            # swap(a[end], a[1])
+            eval __farr_tmpitem=\"\$\{"${__farr_gvrname}"_1\}\"
+            eval "${__farr_gvrname}"_1=\"\$\{"${__farr_gvrname}"_"${__farr_end}"\}\"
+            setvar "${__farr_gvrname}"_"${__farr_end}" "${__farr_tmpitem}"
+        fi
+#        echo "==== BEFORE: START: $__farr_start,   END: $__farr_end"
+        #farray_debug "$__farr_name"
+        # The following is siftDown(a, start, end)
+        __farr_root="${__farr_start}"
+        eval __farr_rootitem=\"\$\{"${__farr_gvrname}"_"${__farr_root}"\}\"
+#        echo "==== NEW ROOT: ${__farr_root},   a[root]: ${__farr_rootitem}"
+        while [ $((2 * __farr_root)) -lt "${__farr_end}" ]; do
+            child=$((2 * __farr_root))
+            eval __farr_childitem=\"\$\{"${__farr_gvrname}"_"${child}"\}\"
+#            echo "CHILD: ${child},   ROOT: ${__farr_root},   END: ${__farr_end},   a[child]: ${__farr_childitem}"
+            # If there is a right child and that child is greater
+            __farr_childp1=$((child + 1))
+            if [ "${__farr_childp1}" -lt "${__farr_end}" ]; then
+                eval __farr_childp1item=\"\$\{"${__farr_gvrname}"_"${__farr_childp1}"\}\"
+#                echo "===== a[child+1]: ${__farr_childp1item}"
+                if [ "${__farr_childitem}" '<' "${__farr_childp1item}" ]; then
+                    #child=$((child + 1))
+                    child="${__farr_childp1}"
+                    eval __farr_childitem=\"\$\{"${__farr_gvrname}"_"${child}"\}\"
+#                    echo "====== INCR CHILD TO: $child"
+                fi
+            fi
+#            echo "====== a[root]: ${__farr_rootitem},   a[child]: ${__farr_childitem}"
+            if [ "${__farr_rootitem}" '<' "${__farr_childitem}" ]; then
+#                echo "====== SWAPPING: root: ${__farr_root}, child: ${child}, a[root]: $__farr_rootitem, a[child]: $__farr_childitem"
+                # swap(a[root], a[child])
+                setvar "${__farr_gvrname}"_"${__farr_root}" "${__farr_childitem}"
+                setvar "${__farr_gvrname}"_"${child}" "${__farr_rootitem}"
+                # repeat to continue sifting down
+                __farr_root="${child}"
+
+                #
+                # XXX FIXME: Why are these lines not needed?
+                #
+                # This gives the *wrong* result
+                #__farr_rootitem="${__farr_childitem}"
+                # This does not harm but seems not needed
+                #eval __farr_rootitem=\"\$\{"${__farr_gvrname}"_"${__farr_root}"\}\"
+            else
+                # return to outer loop
+#                echo "====== BREAK"
+                break
+            fi
+#            echo "====== LOOPING ..."
+        done
+#        echo "==== AFTER: START: $__farr_start,   END: $__farr_end"
+        #farray_debug "$__farr_name"
+    done
+
+
+}
+
+
+#:
 #: Try to find the index of a given value in an existing array.
 #:
 #: It assumes that the array is sorted and uses a binary search algorithm
--- a/tests/farray-array.t	Thu Oct 24 11:44:35 2024 +0200
+++ b/tests/farray-array.t	Fri Oct 25 20:29:04 2024 +0200
@@ -1396,6 +1396,19 @@
   $ farray_release TEST
   $ check_no_array_artifacts
 
+  $ farray_create TEST 5 3 2 4
+  $ farray_heapsort TEST
+  $ check_array_is_sorted "$TEST"
+  $ farray_debug TEST
+  DEBUG: array `TEST' has length 4
+  DEBUG:   the items:
+  DEBUG:     1: `2'
+  DEBUG:     2: `3'
+  DEBUG:     3: `4'
+  DEBUG:     4: `5'
+  $ farray_release TEST
+  $ check_no_array_artifacts
+
   $ create_random_array UNSORTED 1000
   $ check_array_is_sorted "$UNSORTED"
   [1]
@@ -1416,6 +1429,26 @@
   $ check_array_is_sorted "$TEST"
   $ farray_release TEST
 
+  $ farray_create TEST
+  $ farray_splice "" TEST 1 "" UNSORTED
+  $ check_array_is_sorted "$TEST"
+  [1]
+  $ farray_heapsort TEST
+  $ check_array_is_sorted "$TEST"
+  $ farray_release TEST
+
+  $ farray_release UNSORTED
+  $ check_no_array_artifacts
+
+# Extra checks for Heapsort
+
+  $ farray_create UNSORTED '189548216' '544226607' '690563482' '224884577' '843524724' '922143089' '917031008' '602352555' '397442038' '350475285'
+  $ farray_create TEST
+  $ farray_splice "" TEST 1 "" UNSORTED
+  $ farray_heapsort TEST
+  $ check_array_is_sorted "$TEST"
+
+  $ farray_release TEST
   $ farray_release UNSORTED
   $ check_no_array_artifacts
 
--- a/tests/sort-perf/sort-perf.sh	Thu Oct 24 11:44:35 2024 +0200
+++ b/tests/sort-perf/sort-perf.sh	Fri Oct 25 20:29:04 2024 +0200
@@ -12,6 +12,11 @@
 . "${_p_datadir}/farray.sh"
 
 
+fatal() {
+    exit 1 "ERROR: $@" 1>&2
+}
+
+
 get_ts() {
     /usr/bin/ntpq -c "rv 0 clock" localhost
 }
@@ -39,23 +44,34 @@
 	get_ts
 	farray_gnomesort TEST
 	get_ts
+	check_array_is_sorted "$TEST" || fatal "sort failed"
 	farray_release TEST
     else
 	echo "===> SKIPPED: Gnome Sort with ${1} items"
     fi
 
-    if [ "${1}" -le 20000 ]; then
+    if [ "${1}" -le 50000 ]; then
 	farray_create TEST
 	farray_splice "" TEST 1 "" UNSORTED
 	echo "===> Shell Sort with ${1} items"
 	get_ts
 	farray_shellsort TEST
 	get_ts
+	check_array_is_sorted "$TEST" || fatal "sort failed"
 	farray_release TEST
     else
 	echo "===> SKIPPED: Shell Sort with ${1} items"
     fi
 
+    farray_create TEST
+    farray_splice "" TEST 1 "" UNSORTED
+    echo "===> Heap Sort with ${1} items"
+    get_ts
+    farray_heapsort TEST
+    get_ts
+    check_array_is_sorted "$TEST" || fatal "sort failed"
+    farray_release TEST
+
     farray_release UNSORTED
 }