1*5971e316Smrg /*
2*5971e316Smrg * The code of this file was taken from http://jeffreystedfast.blogspot.be,
3*5971e316Smrg * where it was posted in 2011 by Jeffrey Stedfast under the MIT license.
4*5971e316Smrg * The MIT license text is as follows:
5*5971e316Smrg *
6*5971e316Smrg * Permission is hereby granted, free of charge, to any person obtaining a copy
7*5971e316Smrg * of this software and associated documentation files (the "Software"), to
8*5971e316Smrg * deal in the Software without restriction, including without limitation the
9*5971e316Smrg * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10*5971e316Smrg * sell copies of the Software, and to permit persons to whom the Software is
11*5971e316Smrg * furnished to do so, subject to the following conditions:
12*5971e316Smrg *
13*5971e316Smrg * The above copyright notice and this permission notice shall be included in
14*5971e316Smrg * all copies or substantial portions of the Software.
15*5971e316Smrg *
16*5971e316Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17*5971e316Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18*5971e316Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19*5971e316Smrg * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20*5971e316Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21*5971e316Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22*5971e316Smrg * IN THE SOFTWARE.
23*5971e316Smrg */
24*5971e316Smrg
25*5971e316Smrg #include <errno.h>
26*5971e316Smrg #include <string.h>
27*5971e316Smrg #include <stdlib.h>
28*5971e316Smrg #include <isl_sort.h>
29*5971e316Smrg
30*5971e316Smrg #define MID(lo, hi) (lo + ((hi - lo) >> 1))
31*5971e316Smrg
32*5971e316Smrg /* The code here is an optimized merge sort. Starting from a generic merge sort
33*5971e316Smrg * the following optimizations were applied:
34*5971e316Smrg *
35*5971e316Smrg * o Batching of memcpy() calls: Instead of calling memcpy() to copy each and
36*5971e316Smrg * every element into a temporary buffer, blocks of elements are copied
37*5971e316Smrg * at a time.
38*5971e316Smrg *
39*5971e316Smrg * o To reduce the number of memcpy() calls further, copying leading
40*5971e316Smrg * and trailing elements into our temporary buffer is avoided, in case it is
41*5971e316Smrg * not necessary to merge them.
42*5971e316Smrg *
43*5971e316Smrg * A further optimization could be to specialize memcpy calls based on the
44*5971e316Smrg * size of the types we compare. For now, this code does not include the
45*5971e316Smrg * relevant optimization, as clang e.g. inlines a very efficient memcpy()
46*5971e316Smrg * implementation. It is not clear, that the specialized version as provided in
47*5971e316Smrg * the blog post, is really superior to the one that will be inlined by
48*5971e316Smrg * default. So we decided to keep the code simple until this optimization was
49*5971e316Smrg * proven to be beneficial.
50*5971e316Smrg */
51*5971e316Smrg
52*5971e316Smrg static void
msort(void * array,void * buf,size_t low,size_t high,size_t size,int (* compare)(const void *,const void *,void *),void * arg)53*5971e316Smrg msort (void *array, void *buf, size_t low, size_t high, size_t size,
54*5971e316Smrg int (* compare) (const void *, const void *, void *), void *arg)
55*5971e316Smrg {
56*5971e316Smrg char *a1, *al, *am, *ah, *ls, *hs, *lo, *hi, *b;
57*5971e316Smrg size_t copied = 0;
58*5971e316Smrg size_t mid;
59*5971e316Smrg
60*5971e316Smrg mid = MID (low, high);
61*5971e316Smrg
62*5971e316Smrg if (mid + 1 < high)
63*5971e316Smrg msort (array, buf, mid + 1, high, size, compare, arg);
64*5971e316Smrg
65*5971e316Smrg if (mid > low)
66*5971e316Smrg msort (array, buf, low, mid, size, compare, arg);
67*5971e316Smrg
68*5971e316Smrg ah = ((char *) array) + ((high + 1) * size);
69*5971e316Smrg am = ((char *) array) + ((mid + 1) * size);
70*5971e316Smrg a1 = al = ((char *) array) + (low * size);
71*5971e316Smrg
72*5971e316Smrg b = (char *) buf;
73*5971e316Smrg lo = al;
74*5971e316Smrg hi = am;
75*5971e316Smrg
76*5971e316Smrg do {
77*5971e316Smrg ls = lo;
78*5971e316Smrg hs = hi;
79*5971e316Smrg
80*5971e316Smrg if (lo > al || hi > am) {
81*5971e316Smrg /* our last loop already compared lo & hi and found lo <= hi */
82*5971e316Smrg lo += size;
83*5971e316Smrg }
84*5971e316Smrg
85*5971e316Smrg while (lo < am && compare (lo, hi, arg) <= 0)
86*5971e316Smrg lo += size;
87*5971e316Smrg
88*5971e316Smrg if (lo < am) {
89*5971e316Smrg if (copied == 0) {
90*5971e316Smrg /* avoid copying the leading items */
91*5971e316Smrg a1 = lo;
92*5971e316Smrg ls = lo;
93*5971e316Smrg }
94*5971e316Smrg
95*5971e316Smrg /* our last compare tells us hi < lo */
96*5971e316Smrg hi += size;
97*5971e316Smrg
98*5971e316Smrg while (hi < ah && compare (hi, lo, arg) < 0)
99*5971e316Smrg hi += size;
100*5971e316Smrg
101*5971e316Smrg if (lo > ls) {
102*5971e316Smrg memcpy (b, ls, lo - ls);
103*5971e316Smrg copied += (lo - ls);
104*5971e316Smrg b += (lo - ls);
105*5971e316Smrg }
106*5971e316Smrg
107*5971e316Smrg memcpy (b, hs, hi - hs);
108*5971e316Smrg copied += (hi - hs);
109*5971e316Smrg b += (hi - hs);
110*5971e316Smrg } else if (copied) {
111*5971e316Smrg memcpy (b, ls, lo - ls);
112*5971e316Smrg copied += (lo - ls);
113*5971e316Smrg b += (lo - ls);
114*5971e316Smrg
115*5971e316Smrg /* copy everything we needed to re-order back into array */
116*5971e316Smrg memcpy (a1, buf, copied);
117*5971e316Smrg return;
118*5971e316Smrg } else {
119*5971e316Smrg /* everything already in order */
120*5971e316Smrg return;
121*5971e316Smrg }
122*5971e316Smrg } while (hi < ah);
123*5971e316Smrg
124*5971e316Smrg if (lo < am) {
125*5971e316Smrg memcpy (b, lo, am - lo);
126*5971e316Smrg copied += (am - lo);
127*5971e316Smrg }
128*5971e316Smrg
129*5971e316Smrg memcpy (a1, buf, copied);
130*5971e316Smrg }
131*5971e316Smrg
132*5971e316Smrg static int
MergeSort(void * base,size_t nmemb,size_t size,int (* compare)(const void *,const void *,void *),void * arg)133*5971e316Smrg MergeSort (void *base, size_t nmemb, size_t size,
134*5971e316Smrg int (* compare) (const void *, const void *, void *), void *arg)
135*5971e316Smrg {
136*5971e316Smrg void *tmp;
137*5971e316Smrg
138*5971e316Smrg if (nmemb < 2)
139*5971e316Smrg return 0;
140*5971e316Smrg
141*5971e316Smrg if (!(tmp = malloc (nmemb * size))) {
142*5971e316Smrg errno = ENOMEM;
143*5971e316Smrg return -1;
144*5971e316Smrg }
145*5971e316Smrg
146*5971e316Smrg msort (base, tmp, 0, nmemb - 1, size, compare, arg);
147*5971e316Smrg
148*5971e316Smrg free (tmp);
149*5971e316Smrg
150*5971e316Smrg return 0;
151*5971e316Smrg }
152*5971e316Smrg
isl_sort(void * const pbase,size_t total_elems,size_t size,int (* cmp)(const void *,const void *,void * arg),void * arg)153*5971e316Smrg int isl_sort(void *const pbase, size_t total_elems, size_t size,
154*5971e316Smrg int (*cmp)(const void *, const void *, void *arg), void *arg)
155*5971e316Smrg {
156*5971e316Smrg return MergeSort (pbase, total_elems, size, cmp, arg);
157*5971e316Smrg }
158