xref: /netbsd-src/external/mit/isl/dist/isl_sort.c (revision 5971e316fdea024efff6be8f03536623db06833e)
1*5971e316Smrg /*
2*5971e316Smrg  * The code of this file was taken from http://jeffreystedfast.blogspot.be,
3*5971e316Smrg  * where it was posted in 2011 by Jeffrey Stedfast under the MIT license.
4*5971e316Smrg  * The MIT license text is as follows:
5*5971e316Smrg  *
6*5971e316Smrg  * Permission is hereby granted, free of charge, to any person obtaining a copy
7*5971e316Smrg  * of this software and associated documentation files (the "Software"), to
8*5971e316Smrg  * deal in the Software without restriction, including without limitation the
9*5971e316Smrg  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10*5971e316Smrg  * sell copies of the Software, and to permit persons to whom the Software is
11*5971e316Smrg  * furnished to do so, subject to the following conditions:
12*5971e316Smrg  *
13*5971e316Smrg  * The above copyright notice and this permission notice shall be included in
14*5971e316Smrg  * all copies or substantial portions of the Software.
15*5971e316Smrg  *
16*5971e316Smrg  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17*5971e316Smrg  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18*5971e316Smrg  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19*5971e316Smrg  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20*5971e316Smrg  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21*5971e316Smrg  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22*5971e316Smrg  * IN THE SOFTWARE.
23*5971e316Smrg  */
24*5971e316Smrg 
25*5971e316Smrg #include <errno.h>
26*5971e316Smrg #include <string.h>
27*5971e316Smrg #include <stdlib.h>
28*5971e316Smrg #include <isl_sort.h>
29*5971e316Smrg 
30*5971e316Smrg #define MID(lo, hi) (lo + ((hi - lo) >> 1))
31*5971e316Smrg 
32*5971e316Smrg /* The code here is an optimized merge sort. Starting from a generic merge sort
33*5971e316Smrg  * the following optimizations were applied:
34*5971e316Smrg  *
35*5971e316Smrg  * o Batching of memcpy() calls: Instead of calling memcpy() to copy each and
36*5971e316Smrg  *   every element into a temporary buffer, blocks of elements are copied
37*5971e316Smrg  *   at a time.
38*5971e316Smrg  *
39*5971e316Smrg  * o To reduce the number of memcpy() calls further, copying leading
40*5971e316Smrg  *   and trailing elements into our temporary buffer is avoided, in case it is
41*5971e316Smrg  *   not necessary to merge them.
42*5971e316Smrg  *
43*5971e316Smrg  * A further optimization could be to specialize memcpy calls based on the
44*5971e316Smrg  * size of the types we compare. For now, this code does not include the
45*5971e316Smrg  * relevant optimization, as clang e.g. inlines a very efficient memcpy()
46*5971e316Smrg  * implementation. It is not clear, that the specialized version as provided in
47*5971e316Smrg  * the blog post, is really superior to the one that will be inlined by
48*5971e316Smrg  * default. So we decided to keep the code simple until this optimization was
49*5971e316Smrg  * proven to be beneficial.
50*5971e316Smrg  */
51*5971e316Smrg 
52*5971e316Smrg static void
msort(void * array,void * buf,size_t low,size_t high,size_t size,int (* compare)(const void *,const void *,void *),void * arg)53*5971e316Smrg msort (void *array, void *buf, size_t low, size_t high, size_t size,
54*5971e316Smrg        int (* compare) (const void *, const void *, void *), void *arg)
55*5971e316Smrg {
56*5971e316Smrg     char *a1, *al, *am, *ah, *ls, *hs, *lo, *hi, *b;
57*5971e316Smrg     size_t copied = 0;
58*5971e316Smrg     size_t mid;
59*5971e316Smrg 
60*5971e316Smrg     mid = MID (low, high);
61*5971e316Smrg 
62*5971e316Smrg     if (mid + 1 < high)
63*5971e316Smrg         msort (array, buf, mid + 1, high, size, compare, arg);
64*5971e316Smrg 
65*5971e316Smrg     if (mid > low)
66*5971e316Smrg         msort (array, buf, low, mid, size, compare, arg);
67*5971e316Smrg 
68*5971e316Smrg     ah = ((char *) array) + ((high + 1) * size);
69*5971e316Smrg     am = ((char *) array) + ((mid + 1) * size);
70*5971e316Smrg     a1 = al = ((char *) array) + (low * size);
71*5971e316Smrg 
72*5971e316Smrg     b = (char *) buf;
73*5971e316Smrg     lo = al;
74*5971e316Smrg     hi = am;
75*5971e316Smrg 
76*5971e316Smrg     do {
77*5971e316Smrg         ls = lo;
78*5971e316Smrg         hs = hi;
79*5971e316Smrg 
80*5971e316Smrg         if (lo > al || hi > am) {
81*5971e316Smrg             /* our last loop already compared lo & hi and found lo <= hi */
82*5971e316Smrg             lo += size;
83*5971e316Smrg         }
84*5971e316Smrg 
85*5971e316Smrg         while (lo < am && compare (lo, hi, arg) <= 0)
86*5971e316Smrg             lo += size;
87*5971e316Smrg 
88*5971e316Smrg         if (lo < am) {
89*5971e316Smrg             if (copied == 0) {
90*5971e316Smrg                 /* avoid copying the leading items */
91*5971e316Smrg                 a1 = lo;
92*5971e316Smrg                 ls = lo;
93*5971e316Smrg             }
94*5971e316Smrg 
95*5971e316Smrg             /* our last compare tells us hi < lo */
96*5971e316Smrg             hi += size;
97*5971e316Smrg 
98*5971e316Smrg             while (hi < ah && compare (hi, lo, arg) < 0)
99*5971e316Smrg                 hi += size;
100*5971e316Smrg 
101*5971e316Smrg             if (lo > ls) {
102*5971e316Smrg                 memcpy (b, ls, lo - ls);
103*5971e316Smrg                 copied += (lo - ls);
104*5971e316Smrg                 b += (lo - ls);
105*5971e316Smrg             }
106*5971e316Smrg 
107*5971e316Smrg             memcpy (b, hs, hi - hs);
108*5971e316Smrg             copied += (hi - hs);
109*5971e316Smrg             b += (hi - hs);
110*5971e316Smrg         } else if (copied) {
111*5971e316Smrg             memcpy (b, ls, lo - ls);
112*5971e316Smrg             copied += (lo - ls);
113*5971e316Smrg             b += (lo - ls);
114*5971e316Smrg 
115*5971e316Smrg             /* copy everything we needed to re-order back into array */
116*5971e316Smrg             memcpy (a1, buf, copied);
117*5971e316Smrg             return;
118*5971e316Smrg         } else {
119*5971e316Smrg             /* everything already in order */
120*5971e316Smrg             return;
121*5971e316Smrg         }
122*5971e316Smrg     } while (hi < ah);
123*5971e316Smrg 
124*5971e316Smrg     if (lo < am) {
125*5971e316Smrg         memcpy (b, lo, am - lo);
126*5971e316Smrg         copied += (am - lo);
127*5971e316Smrg     }
128*5971e316Smrg 
129*5971e316Smrg     memcpy (a1, buf, copied);
130*5971e316Smrg }
131*5971e316Smrg 
132*5971e316Smrg static int
MergeSort(void * base,size_t nmemb,size_t size,int (* compare)(const void *,const void *,void *),void * arg)133*5971e316Smrg MergeSort (void *base, size_t nmemb, size_t size,
134*5971e316Smrg            int (* compare) (const void *, const void *, void *), void *arg)
135*5971e316Smrg {
136*5971e316Smrg     void *tmp;
137*5971e316Smrg 
138*5971e316Smrg     if (nmemb < 2)
139*5971e316Smrg         return 0;
140*5971e316Smrg 
141*5971e316Smrg     if (!(tmp = malloc (nmemb * size))) {
142*5971e316Smrg         errno = ENOMEM;
143*5971e316Smrg         return -1;
144*5971e316Smrg     }
145*5971e316Smrg 
146*5971e316Smrg     msort (base, tmp, 0, nmemb - 1, size, compare, arg);
147*5971e316Smrg 
148*5971e316Smrg     free (tmp);
149*5971e316Smrg 
150*5971e316Smrg     return 0;
151*5971e316Smrg }
152*5971e316Smrg 
isl_sort(void * const pbase,size_t total_elems,size_t size,int (* cmp)(const void *,const void *,void * arg),void * arg)153*5971e316Smrg int isl_sort(void *const pbase, size_t total_elems, size_t size,
154*5971e316Smrg 	int (*cmp)(const void *, const void *, void *arg), void *arg)
155*5971e316Smrg {
156*5971e316Smrg     return MergeSort (pbase, total_elems, size, cmp, arg);
157*5971e316Smrg }
158