1*cb63e24eSchristos /* Copyright (C) 1991-2024 Free Software Foundation, Inc.
26f4ced0bSchristos This file is part of libctf (imported from Gnulib).
36f4ced0bSchristos Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
46f4ced0bSchristos
56f4ced0bSchristos The GNU C Library is free software; you can redistribute it and/or
66f4ced0bSchristos modify it under the terms of the GNU Lesser General Public
76f4ced0bSchristos License as published by the Free Software Foundation; either
86f4ced0bSchristos version 2.1 of the License, or (at your option) any later version.
96f4ced0bSchristos
106f4ced0bSchristos The GNU C Library is distributed in the hope that it will be useful,
116f4ced0bSchristos but WITHOUT ANY WARRANTY; without even the implied warranty of
126f4ced0bSchristos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
136f4ced0bSchristos Lesser General Public License for more details.
146f4ced0bSchristos
156f4ced0bSchristos You should have received a copy of the GNU Lesser General Public
166f4ced0bSchristos License along with the GNU C Library; if not, see
176f4ced0bSchristos <https://www.gnu.org/licenses/>. */
186f4ced0bSchristos
196f4ced0bSchristos /* If you consider tuning this algorithm, you should consult first:
206f4ced0bSchristos Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
216f4ced0bSchristos Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */
226f4ced0bSchristos
236f4ced0bSchristos #ifndef _LIBC
246f4ced0bSchristos # include <config.h>
256f4ced0bSchristos #endif
266f4ced0bSchristos
276f4ced0bSchristos #include <limits.h>
286f4ced0bSchristos #include <stdlib.h>
296f4ced0bSchristos #include <string.h>
306f4ced0bSchristos #include "ctf-decls.h"
316f4ced0bSchristos
326f4ced0bSchristos #ifndef _LIBC
336f4ced0bSchristos # define _quicksort ctf_qsort_r
346f4ced0bSchristos # define __compar_d_fn_t compar_d_fn_t
356f4ced0bSchristos typedef int (*compar_d_fn_t) (const void *, const void *, void *);
366f4ced0bSchristos #endif
376f4ced0bSchristos
386f4ced0bSchristos /* Byte-wise swap two items of size SIZE. */
396f4ced0bSchristos #define SWAP(a, b, size) \
406f4ced0bSchristos do \
416f4ced0bSchristos { \
426f4ced0bSchristos size_t __size = (size); \
436f4ced0bSchristos char *__a = (a), *__b = (b); \
446f4ced0bSchristos do \
456f4ced0bSchristos { \
466f4ced0bSchristos char __tmp = *__a; \
476f4ced0bSchristos *__a++ = *__b; \
486f4ced0bSchristos *__b++ = __tmp; \
496f4ced0bSchristos } while (--__size > 0); \
506f4ced0bSchristos } while (0)
516f4ced0bSchristos
526f4ced0bSchristos /* Discontinue quicksort algorithm when partition gets below this size.
536f4ced0bSchristos This particular magic number was chosen to work best on a Sun 4/260. */
546f4ced0bSchristos #define MAX_THRESH 4
556f4ced0bSchristos
566f4ced0bSchristos /* Stack node declarations used to store unfulfilled partition obligations. */
576f4ced0bSchristos typedef struct
586f4ced0bSchristos {
596f4ced0bSchristos char *lo;
606f4ced0bSchristos char *hi;
616f4ced0bSchristos } stack_node;
626f4ced0bSchristos
636f4ced0bSchristos /* The next 4 #defines implement a very fast in-line stack abstraction. */
646f4ced0bSchristos /* The stack needs log (total_elements) entries (we could even subtract
656f4ced0bSchristos log(MAX_THRESH)). Since total_elements has type size_t, we get as
666f4ced0bSchristos upper bound for log (total_elements):
676f4ced0bSchristos bits per byte (CHAR_BIT) * sizeof(size_t). */
686f4ced0bSchristos #define STACK_SIZE (CHAR_BIT * sizeof(size_t))
696f4ced0bSchristos #define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top))
706f4ced0bSchristos #define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi)))
716f4ced0bSchristos #define STACK_NOT_EMPTY (stack < top)
726f4ced0bSchristos
736f4ced0bSchristos
746f4ced0bSchristos /* Order size using quicksort. This implementation incorporates
756f4ced0bSchristos four optimizations discussed in Sedgewick:
766f4ced0bSchristos
776f4ced0bSchristos 1. Non-recursive, using an explicit stack of pointer that store the
786f4ced0bSchristos next array partition to sort. To save time, this maximum amount
796f4ced0bSchristos of space required to store an array of SIZE_MAX is allocated on the
806f4ced0bSchristos stack. Assuming a 32-bit (64 bit) integer for size_t, this needs
816f4ced0bSchristos only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
826f4ced0bSchristos Pretty cheap, actually.
836f4ced0bSchristos
846f4ced0bSchristos 2. Chose the pivot element using a median-of-three decision tree.
856f4ced0bSchristos This reduces the probability of selecting a bad pivot value and
866f4ced0bSchristos eliminates certain extraneous comparisons.
876f4ced0bSchristos
886f4ced0bSchristos 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
896f4ced0bSchristos insertion sort to order the MAX_THRESH items within each partition.
906f4ced0bSchristos This is a big win, since insertion sort is faster for small, mostly
916f4ced0bSchristos sorted array segments.
926f4ced0bSchristos
936f4ced0bSchristos 4. The larger of the two sub-partitions is always pushed onto the
946f4ced0bSchristos stack first, with the algorithm then concentrating on the
956f4ced0bSchristos smaller partition. This *guarantees* no more than log (total_elems)
966f4ced0bSchristos stack size is needed (actually O(1) in this case)! */
976f4ced0bSchristos
986f4ced0bSchristos void
_quicksort(void * const pbase,size_t total_elems,size_t size,__compar_d_fn_t cmp,void * arg)996f4ced0bSchristos _quicksort (void *const pbase, size_t total_elems, size_t size,
1006f4ced0bSchristos __compar_d_fn_t cmp, void *arg)
1016f4ced0bSchristos {
1026f4ced0bSchristos char *base_ptr = (char *) pbase;
1036f4ced0bSchristos
1046f4ced0bSchristos const size_t max_thresh = MAX_THRESH * size;
1056f4ced0bSchristos
1066f4ced0bSchristos if (total_elems == 0)
1076f4ced0bSchristos /* Avoid lossage with unsigned arithmetic below. */
1086f4ced0bSchristos return;
1096f4ced0bSchristos
1106f4ced0bSchristos if (total_elems > MAX_THRESH)
1116f4ced0bSchristos {
1126f4ced0bSchristos char *lo = base_ptr;
1136f4ced0bSchristos char *hi = &lo[size * (total_elems - 1)];
1146f4ced0bSchristos stack_node stack[STACK_SIZE];
1156f4ced0bSchristos stack_node *top = stack;
1166f4ced0bSchristos
1176f4ced0bSchristos PUSH (NULL, NULL);
1186f4ced0bSchristos
1196f4ced0bSchristos while (STACK_NOT_EMPTY)
1206f4ced0bSchristos {
1216f4ced0bSchristos char *left_ptr;
1226f4ced0bSchristos char *right_ptr;
1236f4ced0bSchristos
1246f4ced0bSchristos /* Select median value from among LO, MID, and HI. Rearrange
1256f4ced0bSchristos LO and HI so the three values are sorted. This lowers the
1266f4ced0bSchristos probability of picking a pathological pivot value and
1276f4ced0bSchristos skips a comparison for both the LEFT_PTR and RIGHT_PTR in
1286f4ced0bSchristos the while loops. */
1296f4ced0bSchristos
1306f4ced0bSchristos char *mid = lo + size * ((hi - lo) / size >> 1);
1316f4ced0bSchristos
1326f4ced0bSchristos if ((*cmp) ((void *) mid, (void *) lo, arg) < 0)
1336f4ced0bSchristos SWAP (mid, lo, size);
1346f4ced0bSchristos if ((*cmp) ((void *) hi, (void *) mid, arg) < 0)
1356f4ced0bSchristos SWAP (mid, hi, size);
1366f4ced0bSchristos else
1376f4ced0bSchristos goto jump_over;
1386f4ced0bSchristos if ((*cmp) ((void *) mid, (void *) lo, arg) < 0)
1396f4ced0bSchristos SWAP (mid, lo, size);
1406f4ced0bSchristos jump_over:;
1416f4ced0bSchristos
1426f4ced0bSchristos left_ptr = lo + size;
1436f4ced0bSchristos right_ptr = hi - size;
1446f4ced0bSchristos
1456f4ced0bSchristos /* Here's the famous ``collapse the walls'' section of quicksort.
1466f4ced0bSchristos Gotta like those tight inner loops! They are the main reason
1476f4ced0bSchristos that this algorithm runs much faster than others. */
1486f4ced0bSchristos do
1496f4ced0bSchristos {
1506f4ced0bSchristos while ((*cmp) ((void *) left_ptr, (void *) mid, arg) < 0)
1516f4ced0bSchristos left_ptr += size;
1526f4ced0bSchristos
1536f4ced0bSchristos while ((*cmp) ((void *) mid, (void *) right_ptr, arg) < 0)
1546f4ced0bSchristos right_ptr -= size;
1556f4ced0bSchristos
1566f4ced0bSchristos if (left_ptr < right_ptr)
1576f4ced0bSchristos {
1586f4ced0bSchristos SWAP (left_ptr, right_ptr, size);
1596f4ced0bSchristos if (mid == left_ptr)
1606f4ced0bSchristos mid = right_ptr;
1616f4ced0bSchristos else if (mid == right_ptr)
1626f4ced0bSchristos mid = left_ptr;
1636f4ced0bSchristos left_ptr += size;
1646f4ced0bSchristos right_ptr -= size;
1656f4ced0bSchristos }
1666f4ced0bSchristos else if (left_ptr == right_ptr)
1676f4ced0bSchristos {
1686f4ced0bSchristos left_ptr += size;
1696f4ced0bSchristos right_ptr -= size;
1706f4ced0bSchristos break;
1716f4ced0bSchristos }
1726f4ced0bSchristos }
1736f4ced0bSchristos while (left_ptr <= right_ptr);
1746f4ced0bSchristos
1756f4ced0bSchristos /* Set up pointers for next iteration. First determine whether
1766f4ced0bSchristos left and right partitions are below the threshold size. If so,
1776f4ced0bSchristos ignore one or both. Otherwise, push the larger partition's
1786f4ced0bSchristos bounds on the stack and continue sorting the smaller one. */
1796f4ced0bSchristos
1806f4ced0bSchristos if ((size_t) (right_ptr - lo) <= max_thresh)
1816f4ced0bSchristos {
1826f4ced0bSchristos if ((size_t) (hi - left_ptr) <= max_thresh)
1836f4ced0bSchristos /* Ignore both small partitions. */
1846f4ced0bSchristos POP (lo, hi);
1856f4ced0bSchristos else
1866f4ced0bSchristos /* Ignore small left partition. */
1876f4ced0bSchristos lo = left_ptr;
1886f4ced0bSchristos }
1896f4ced0bSchristos else if ((size_t) (hi - left_ptr) <= max_thresh)
1906f4ced0bSchristos /* Ignore small right partition. */
1916f4ced0bSchristos hi = right_ptr;
1926f4ced0bSchristos else if ((right_ptr - lo) > (hi - left_ptr))
1936f4ced0bSchristos {
1946f4ced0bSchristos /* Push larger left partition indices. */
1956f4ced0bSchristos PUSH (lo, right_ptr);
1966f4ced0bSchristos lo = left_ptr;
1976f4ced0bSchristos }
1986f4ced0bSchristos else
1996f4ced0bSchristos {
2006f4ced0bSchristos /* Push larger right partition indices. */
2016f4ced0bSchristos PUSH (left_ptr, hi);
2026f4ced0bSchristos hi = right_ptr;
2036f4ced0bSchristos }
2046f4ced0bSchristos }
2056f4ced0bSchristos }
2066f4ced0bSchristos
2076f4ced0bSchristos /* Once the BASE_PTR array is partially sorted by quicksort the rest
2086f4ced0bSchristos is completely sorted using insertion sort, since this is efficient
2096f4ced0bSchristos for partitions below MAX_THRESH size. BASE_PTR points to the beginning
2106f4ced0bSchristos of the array to sort, and END_PTR points at the very last element in
2116f4ced0bSchristos the array (*not* one beyond it!). */
2126f4ced0bSchristos
2136f4ced0bSchristos #define min(x, y) ((x) < (y) ? (x) : (y))
2146f4ced0bSchristos
2156f4ced0bSchristos {
2166f4ced0bSchristos char *const end_ptr = &base_ptr[size * (total_elems - 1)];
2176f4ced0bSchristos char *tmp_ptr = base_ptr;
2186f4ced0bSchristos char *thresh = min(end_ptr, base_ptr + max_thresh);
2196f4ced0bSchristos char *run_ptr;
2206f4ced0bSchristos
2216f4ced0bSchristos /* Find smallest element in first threshold and place it at the
2226f4ced0bSchristos array's beginning. This is the smallest array element,
2236f4ced0bSchristos and the operation speeds up insertion sort's inner loop. */
2246f4ced0bSchristos
2256f4ced0bSchristos for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
2266f4ced0bSchristos if ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, arg) < 0)
2276f4ced0bSchristos tmp_ptr = run_ptr;
2286f4ced0bSchristos
2296f4ced0bSchristos if (tmp_ptr != base_ptr)
2306f4ced0bSchristos SWAP (tmp_ptr, base_ptr, size);
2316f4ced0bSchristos
2326f4ced0bSchristos /* Insertion sort, running from left-hand-side up to right-hand-side. */
2336f4ced0bSchristos
2346f4ced0bSchristos run_ptr = base_ptr + size;
2356f4ced0bSchristos while ((run_ptr += size) <= end_ptr)
2366f4ced0bSchristos {
2376f4ced0bSchristos tmp_ptr = run_ptr - size;
2386f4ced0bSchristos while ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, arg) < 0)
2396f4ced0bSchristos tmp_ptr -= size;
2406f4ced0bSchristos
2416f4ced0bSchristos tmp_ptr += size;
2426f4ced0bSchristos if (tmp_ptr != run_ptr)
2436f4ced0bSchristos {
2446f4ced0bSchristos char *trav;
2456f4ced0bSchristos
2466f4ced0bSchristos trav = run_ptr + size;
2476f4ced0bSchristos while (--trav >= run_ptr)
2486f4ced0bSchristos {
2496f4ced0bSchristos char c = *trav;
2506f4ced0bSchristos char *hi, *lo;
2516f4ced0bSchristos
2526f4ced0bSchristos for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
2536f4ced0bSchristos *hi = *lo;
2546f4ced0bSchristos *hi = c;
2556f4ced0bSchristos }
2566f4ced0bSchristos }
2576f4ced0bSchristos }
2586f4ced0bSchristos }
2596f4ced0bSchristos }
260