xref: /netbsd-src/external/gpl3/binutils/dist/libctf/ctf-qsort_r.c (revision cb63e24e8d6aae7ddac1859a9015f48b1d8bd90e)
1*cb63e24eSchristos /* Copyright (C) 1991-2024 Free Software Foundation, Inc.
26f4ced0bSchristos    This file is part of libctf (imported from Gnulib).
36f4ced0bSchristos    Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
46f4ced0bSchristos 
56f4ced0bSchristos    The GNU C Library is free software; you can redistribute it and/or
66f4ced0bSchristos    modify it under the terms of the GNU Lesser General Public
76f4ced0bSchristos    License as published by the Free Software Foundation; either
86f4ced0bSchristos    version 2.1 of the License, or (at your option) any later version.
96f4ced0bSchristos 
106f4ced0bSchristos    The GNU C Library is distributed in the hope that it will be useful,
116f4ced0bSchristos    but WITHOUT ANY WARRANTY; without even the implied warranty of
126f4ced0bSchristos    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
136f4ced0bSchristos    Lesser General Public License for more details.
146f4ced0bSchristos 
156f4ced0bSchristos    You should have received a copy of the GNU Lesser General Public
166f4ced0bSchristos    License along with the GNU C Library; if not, see
176f4ced0bSchristos    <https://www.gnu.org/licenses/>.  */
186f4ced0bSchristos 
196f4ced0bSchristos /* If you consider tuning this algorithm, you should consult first:
206f4ced0bSchristos    Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
216f4ced0bSchristos    Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993.  */
226f4ced0bSchristos 
236f4ced0bSchristos #ifndef _LIBC
246f4ced0bSchristos # include <config.h>
256f4ced0bSchristos #endif
266f4ced0bSchristos 
276f4ced0bSchristos #include <limits.h>
286f4ced0bSchristos #include <stdlib.h>
296f4ced0bSchristos #include <string.h>
306f4ced0bSchristos #include "ctf-decls.h"
316f4ced0bSchristos 
326f4ced0bSchristos #ifndef _LIBC
336f4ced0bSchristos # define _quicksort ctf_qsort_r
346f4ced0bSchristos # define __compar_d_fn_t compar_d_fn_t
356f4ced0bSchristos typedef int (*compar_d_fn_t) (const void *, const void *, void *);
366f4ced0bSchristos #endif
376f4ced0bSchristos 
386f4ced0bSchristos /* Byte-wise swap two items of size SIZE. */
396f4ced0bSchristos #define SWAP(a, b, size)						      \
406f4ced0bSchristos   do									      \
416f4ced0bSchristos     {									      \
426f4ced0bSchristos       size_t __size = (size);						      \
436f4ced0bSchristos       char *__a = (a), *__b = (b);					      \
446f4ced0bSchristos       do								      \
456f4ced0bSchristos 	{								      \
466f4ced0bSchristos 	  char __tmp = *__a;						      \
476f4ced0bSchristos 	  *__a++ = *__b;						      \
486f4ced0bSchristos 	  *__b++ = __tmp;						      \
496f4ced0bSchristos 	} while (--__size > 0);						      \
506f4ced0bSchristos     } while (0)
516f4ced0bSchristos 
526f4ced0bSchristos /* Discontinue quicksort algorithm when partition gets below this size.
536f4ced0bSchristos    This particular magic number was chosen to work best on a Sun 4/260. */
546f4ced0bSchristos #define MAX_THRESH 4
556f4ced0bSchristos 
566f4ced0bSchristos /* Stack node declarations used to store unfulfilled partition obligations. */
576f4ced0bSchristos typedef struct
586f4ced0bSchristos   {
596f4ced0bSchristos     char *lo;
606f4ced0bSchristos     char *hi;
616f4ced0bSchristos   } stack_node;
626f4ced0bSchristos 
636f4ced0bSchristos /* The next 4 #defines implement a very fast in-line stack abstraction. */
646f4ced0bSchristos /* The stack needs log (total_elements) entries (we could even subtract
656f4ced0bSchristos    log(MAX_THRESH)).  Since total_elements has type size_t, we get as
666f4ced0bSchristos    upper bound for log (total_elements):
676f4ced0bSchristos    bits per byte (CHAR_BIT) * sizeof(size_t).  */
686f4ced0bSchristos #define STACK_SIZE	(CHAR_BIT * sizeof(size_t))
696f4ced0bSchristos #define PUSH(low, high)	((void) ((top->lo = (low)), (top->hi = (high)), ++top))
706f4ced0bSchristos #define	POP(low, high)	((void) (--top, (low = top->lo), (high = top->hi)))
716f4ced0bSchristos #define	STACK_NOT_EMPTY	(stack < top)
726f4ced0bSchristos 
736f4ced0bSchristos 
746f4ced0bSchristos /* Order size using quicksort.  This implementation incorporates
756f4ced0bSchristos    four optimizations discussed in Sedgewick:
766f4ced0bSchristos 
776f4ced0bSchristos    1. Non-recursive, using an explicit stack of pointer that store the
786f4ced0bSchristos       next array partition to sort.  To save time, this maximum amount
796f4ced0bSchristos       of space required to store an array of SIZE_MAX is allocated on the
806f4ced0bSchristos       stack.  Assuming a 32-bit (64 bit) integer for size_t, this needs
816f4ced0bSchristos       only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
826f4ced0bSchristos       Pretty cheap, actually.
836f4ced0bSchristos 
846f4ced0bSchristos    2. Chose the pivot element using a median-of-three decision tree.
856f4ced0bSchristos       This reduces the probability of selecting a bad pivot value and
866f4ced0bSchristos       eliminates certain extraneous comparisons.
876f4ced0bSchristos 
886f4ced0bSchristos    3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
896f4ced0bSchristos       insertion sort to order the MAX_THRESH items within each partition.
906f4ced0bSchristos       This is a big win, since insertion sort is faster for small, mostly
916f4ced0bSchristos       sorted array segments.
926f4ced0bSchristos 
936f4ced0bSchristos    4. The larger of the two sub-partitions is always pushed onto the
946f4ced0bSchristos       stack first, with the algorithm then concentrating on the
956f4ced0bSchristos       smaller partition.  This *guarantees* no more than log (total_elems)
966f4ced0bSchristos       stack size is needed (actually O(1) in this case)!  */
976f4ced0bSchristos 
986f4ced0bSchristos void
_quicksort(void * const pbase,size_t total_elems,size_t size,__compar_d_fn_t cmp,void * arg)996f4ced0bSchristos _quicksort (void *const pbase, size_t total_elems, size_t size,
1006f4ced0bSchristos 	    __compar_d_fn_t cmp, void *arg)
1016f4ced0bSchristos {
1026f4ced0bSchristos   char *base_ptr = (char *) pbase;
1036f4ced0bSchristos 
1046f4ced0bSchristos   const size_t max_thresh = MAX_THRESH * size;
1056f4ced0bSchristos 
1066f4ced0bSchristos   if (total_elems == 0)
1076f4ced0bSchristos     /* Avoid lossage with unsigned arithmetic below.  */
1086f4ced0bSchristos     return;
1096f4ced0bSchristos 
1106f4ced0bSchristos   if (total_elems > MAX_THRESH)
1116f4ced0bSchristos     {
1126f4ced0bSchristos       char *lo = base_ptr;
1136f4ced0bSchristos       char *hi = &lo[size * (total_elems - 1)];
1146f4ced0bSchristos       stack_node stack[STACK_SIZE];
1156f4ced0bSchristos       stack_node *top = stack;
1166f4ced0bSchristos 
1176f4ced0bSchristos       PUSH (NULL, NULL);
1186f4ced0bSchristos 
1196f4ced0bSchristos       while (STACK_NOT_EMPTY)
1206f4ced0bSchristos         {
1216f4ced0bSchristos           char *left_ptr;
1226f4ced0bSchristos           char *right_ptr;
1236f4ced0bSchristos 
1246f4ced0bSchristos 	  /* Select median value from among LO, MID, and HI. Rearrange
1256f4ced0bSchristos 	     LO and HI so the three values are sorted. This lowers the
1266f4ced0bSchristos 	     probability of picking a pathological pivot value and
1276f4ced0bSchristos 	     skips a comparison for both the LEFT_PTR and RIGHT_PTR in
1286f4ced0bSchristos 	     the while loops. */
1296f4ced0bSchristos 
1306f4ced0bSchristos 	  char *mid = lo + size * ((hi - lo) / size >> 1);
1316f4ced0bSchristos 
1326f4ced0bSchristos 	  if ((*cmp) ((void *) mid, (void *) lo, arg) < 0)
1336f4ced0bSchristos 	    SWAP (mid, lo, size);
1346f4ced0bSchristos 	  if ((*cmp) ((void *) hi, (void *) mid, arg) < 0)
1356f4ced0bSchristos 	    SWAP (mid, hi, size);
1366f4ced0bSchristos 	  else
1376f4ced0bSchristos 	    goto jump_over;
1386f4ced0bSchristos 	  if ((*cmp) ((void *) mid, (void *) lo, arg) < 0)
1396f4ced0bSchristos 	    SWAP (mid, lo, size);
1406f4ced0bSchristos 	jump_over:;
1416f4ced0bSchristos 
1426f4ced0bSchristos 	  left_ptr  = lo + size;
1436f4ced0bSchristos 	  right_ptr = hi - size;
1446f4ced0bSchristos 
1456f4ced0bSchristos 	  /* Here's the famous ``collapse the walls'' section of quicksort.
1466f4ced0bSchristos 	     Gotta like those tight inner loops!  They are the main reason
1476f4ced0bSchristos 	     that this algorithm runs much faster than others. */
1486f4ced0bSchristos 	  do
1496f4ced0bSchristos 	    {
1506f4ced0bSchristos 	      while ((*cmp) ((void *) left_ptr, (void *) mid, arg) < 0)
1516f4ced0bSchristos 		left_ptr += size;
1526f4ced0bSchristos 
1536f4ced0bSchristos 	      while ((*cmp) ((void *) mid, (void *) right_ptr, arg) < 0)
1546f4ced0bSchristos 		right_ptr -= size;
1556f4ced0bSchristos 
1566f4ced0bSchristos 	      if (left_ptr < right_ptr)
1576f4ced0bSchristos 		{
1586f4ced0bSchristos 		  SWAP (left_ptr, right_ptr, size);
1596f4ced0bSchristos 		  if (mid == left_ptr)
1606f4ced0bSchristos 		    mid = right_ptr;
1616f4ced0bSchristos 		  else if (mid == right_ptr)
1626f4ced0bSchristos 		    mid = left_ptr;
1636f4ced0bSchristos 		  left_ptr += size;
1646f4ced0bSchristos 		  right_ptr -= size;
1656f4ced0bSchristos 		}
1666f4ced0bSchristos 	      else if (left_ptr == right_ptr)
1676f4ced0bSchristos 		{
1686f4ced0bSchristos 		  left_ptr += size;
1696f4ced0bSchristos 		  right_ptr -= size;
1706f4ced0bSchristos 		  break;
1716f4ced0bSchristos 		}
1726f4ced0bSchristos 	    }
1736f4ced0bSchristos 	  while (left_ptr <= right_ptr);
1746f4ced0bSchristos 
1756f4ced0bSchristos           /* Set up pointers for next iteration.  First determine whether
1766f4ced0bSchristos              left and right partitions are below the threshold size.  If so,
1776f4ced0bSchristos              ignore one or both.  Otherwise, push the larger partition's
1786f4ced0bSchristos              bounds on the stack and continue sorting the smaller one. */
1796f4ced0bSchristos 
1806f4ced0bSchristos           if ((size_t) (right_ptr - lo) <= max_thresh)
1816f4ced0bSchristos             {
1826f4ced0bSchristos               if ((size_t) (hi - left_ptr) <= max_thresh)
1836f4ced0bSchristos 		/* Ignore both small partitions. */
1846f4ced0bSchristos                 POP (lo, hi);
1856f4ced0bSchristos               else
1866f4ced0bSchristos 		/* Ignore small left partition. */
1876f4ced0bSchristos                 lo = left_ptr;
1886f4ced0bSchristos             }
1896f4ced0bSchristos           else if ((size_t) (hi - left_ptr) <= max_thresh)
1906f4ced0bSchristos 	    /* Ignore small right partition. */
1916f4ced0bSchristos             hi = right_ptr;
1926f4ced0bSchristos           else if ((right_ptr - lo) > (hi - left_ptr))
1936f4ced0bSchristos             {
1946f4ced0bSchristos 	      /* Push larger left partition indices. */
1956f4ced0bSchristos               PUSH (lo, right_ptr);
1966f4ced0bSchristos               lo = left_ptr;
1976f4ced0bSchristos             }
1986f4ced0bSchristos           else
1996f4ced0bSchristos             {
2006f4ced0bSchristos 	      /* Push larger right partition indices. */
2016f4ced0bSchristos               PUSH (left_ptr, hi);
2026f4ced0bSchristos               hi = right_ptr;
2036f4ced0bSchristos             }
2046f4ced0bSchristos         }
2056f4ced0bSchristos     }
2066f4ced0bSchristos 
2076f4ced0bSchristos   /* Once the BASE_PTR array is partially sorted by quicksort the rest
2086f4ced0bSchristos      is completely sorted using insertion sort, since this is efficient
2096f4ced0bSchristos      for partitions below MAX_THRESH size. BASE_PTR points to the beginning
2106f4ced0bSchristos      of the array to sort, and END_PTR points at the very last element in
2116f4ced0bSchristos      the array (*not* one beyond it!). */
2126f4ced0bSchristos 
2136f4ced0bSchristos #define min(x, y) ((x) < (y) ? (x) : (y))
2146f4ced0bSchristos 
2156f4ced0bSchristos   {
2166f4ced0bSchristos     char *const end_ptr = &base_ptr[size * (total_elems - 1)];
2176f4ced0bSchristos     char *tmp_ptr = base_ptr;
2186f4ced0bSchristos     char *thresh = min(end_ptr, base_ptr + max_thresh);
2196f4ced0bSchristos     char *run_ptr;
2206f4ced0bSchristos 
2216f4ced0bSchristos     /* Find smallest element in first threshold and place it at the
2226f4ced0bSchristos        array's beginning.  This is the smallest array element,
2236f4ced0bSchristos        and the operation speeds up insertion sort's inner loop. */
2246f4ced0bSchristos 
2256f4ced0bSchristos     for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
2266f4ced0bSchristos       if ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, arg) < 0)
2276f4ced0bSchristos         tmp_ptr = run_ptr;
2286f4ced0bSchristos 
2296f4ced0bSchristos     if (tmp_ptr != base_ptr)
2306f4ced0bSchristos       SWAP (tmp_ptr, base_ptr, size);
2316f4ced0bSchristos 
2326f4ced0bSchristos     /* Insertion sort, running from left-hand-side up to right-hand-side.  */
2336f4ced0bSchristos 
2346f4ced0bSchristos     run_ptr = base_ptr + size;
2356f4ced0bSchristos     while ((run_ptr += size) <= end_ptr)
2366f4ced0bSchristos       {
2376f4ced0bSchristos 	tmp_ptr = run_ptr - size;
2386f4ced0bSchristos 	while ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, arg) < 0)
2396f4ced0bSchristos 	  tmp_ptr -= size;
2406f4ced0bSchristos 
2416f4ced0bSchristos 	tmp_ptr += size;
2426f4ced0bSchristos         if (tmp_ptr != run_ptr)
2436f4ced0bSchristos           {
2446f4ced0bSchristos             char *trav;
2456f4ced0bSchristos 
2466f4ced0bSchristos 	    trav = run_ptr + size;
2476f4ced0bSchristos 	    while (--trav >= run_ptr)
2486f4ced0bSchristos               {
2496f4ced0bSchristos                 char c = *trav;
2506f4ced0bSchristos                 char *hi, *lo;
2516f4ced0bSchristos 
2526f4ced0bSchristos                 for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
2536f4ced0bSchristos                   *hi = *lo;
2546f4ced0bSchristos                 *hi = c;
2556f4ced0bSchristos               }
2566f4ced0bSchristos           }
2576f4ced0bSchristos       }
2586f4ced0bSchristos   }
2596f4ced0bSchristos }
260