xref: /csrg-svn/lib/libc/stdlib/merge.c (revision 56962)
1*56962Sbostic /*-
2*56962Sbostic  * Copyright (c) 1992 The Regents of the University of California.
3*56962Sbostic  * All rights reserved.
4*56962Sbostic  *
5*56962Sbostic  * This code is derived from software contributed to Berkeley by
6*56962Sbostic  * Peter McIlroy.
7*56962Sbostic  *
8*56962Sbostic  * %sccs.include.redist.c%
9*56962Sbostic  */
10*56962Sbostic 
11*56962Sbostic #if defined(LIBC_SCCS) && !defined(lint)
12*56962Sbostic static char sccsid[] = "@(#)merge.c	5.1 (Berkeley) 12/02/92";
13*56962Sbostic #endif /* LIBC_SCCS and not lint */
14*56962Sbostic 
15*56962Sbostic /*
16*56962Sbostic  * Hybrid exponential search/linear search merge sort with hybrid
17*56962Sbostic  * natural/pairwise first pass.  Requires about .3% more comparisons
18*56962Sbostic  * for random data than LSMS with pairwise first pass alone.
19*56962Sbostic  * It works for objects as small as two bytes.
20*56962Sbostic  */
21*56962Sbostic 
22*56962Sbostic #define NATURAL
23*56962Sbostic #define THRESHOLD 16	/* Best choice for natural merge cut-off. */
24*56962Sbostic 
25*56962Sbostic /* #define NATURAL to get hybrid natural merge.
26*56962Sbostic  * (The default is pairwise merging.)
27*56962Sbostic  */
28*56962Sbostic 
29*56962Sbostic #include <sys/types.h>
30*56962Sbostic 
31*56962Sbostic #include <errno.h>
32*56962Sbostic #include <stdlib.h>
33*56962Sbostic #include <string.h>
34*56962Sbostic 
35*56962Sbostic static void setup __P((u_char *, u_char *, size_t, size_t, int (*)()));
36*56962Sbostic static void insertionsort __P((u_char *, size_t, size_t, int (*)()));
37*56962Sbostic 
38*56962Sbostic #define ISIZE sizeof(int)
39*56962Sbostic #define PSIZE sizeof(u_char *)
40*56962Sbostic #define ICOPY_LIST(src, dst, last)				\
41*56962Sbostic 	do							\
42*56962Sbostic 	*(int*)dst = *(int*)src, src += ISIZE, dst += ISIZE;	\
43*56962Sbostic 	while(src < last)
44*56962Sbostic #define ICOPY_ELT(src, dst, i)					\
45*56962Sbostic 	do							\
46*56962Sbostic 	*(int*) dst = *(int*) src, src += ISIZE, dst += ISIZE;	\
47*56962Sbostic 	while (i -= ISIZE)
48*56962Sbostic 
49*56962Sbostic #define CCOPY_LIST(src, dst, last)		\
50*56962Sbostic 	do					\
51*56962Sbostic 		*dst++ = *src++;		\
52*56962Sbostic 	while (src < last)
53*56962Sbostic #define CCOPY_ELT(src, dst, i)			\
54*56962Sbostic 	do					\
55*56962Sbostic 		*dst++ = *src++;		\
56*56962Sbostic 	while (i -= 1)
57*56962Sbostic 
58*56962Sbostic /*
59*56962Sbostic  * Find the next possible pointer head.  (Trickery for forcing an array
60*56962Sbostic  * to do double duty as a linked list when objects do not align with word
61*56962Sbostic  * boundaries.
62*56962Sbostic  */
63*56962Sbostic /* Assumption: PSIZE is a power of 2. */
64*56962Sbostic #define EVAL(p) (u_char **)						\
65*56962Sbostic 	((u_char *)0 +							\
66*56962Sbostic 	    (((u_char *)p + PSIZE - 1 - (u_char *) 0) & ~(PSIZE - 1)))
67*56962Sbostic 
68*56962Sbostic /*
69*56962Sbostic  * Arguments are as for qsort.
70*56962Sbostic  */
71*56962Sbostic int
72*56962Sbostic mergesort(base, nmemb, size, cmp)
73*56962Sbostic 	void *base;
74*56962Sbostic 	size_t nmemb;
75*56962Sbostic 	register size_t size;
76*56962Sbostic 	int (*cmp) __P((const void *, const void *));
77*56962Sbostic {
78*56962Sbostic 	register int i, sense;
79*56962Sbostic 	int big, iflag;
80*56962Sbostic 	register u_char *f1, *f2, *t, *b, *tp2, *q, *l1, *l2;
81*56962Sbostic 	u_char *list2, *list1, *p2, *p, *last, **p1;
82*56962Sbostic 
83*56962Sbostic 	if (size < PSIZE/2) {	/* pointers must fit into 2*size */
84*56962Sbostic 		errno = EINVAL;
85*56962Sbostic 		return (-1);
86*56962Sbostic 	}
87*56962Sbostic 
88*56962Sbostic 	/*
89*56962Sbostic 	 * XXX
90*56962Sbostic 	 * Stupid subtraction for the Cray.
91*56962Sbostic 	 */
92*56962Sbostic 	iflag = 0;
93*56962Sbostic 	if (!(size % ISIZE) && !(((char *)base - (char *)0) % ISIZE))
94*56962Sbostic 		iflag = 1;
95*56962Sbostic 
96*56962Sbostic 	if ((list2 = malloc(nmemb * size + PSIZE)) == NULL)
97*56962Sbostic 		return (-1);
98*56962Sbostic 
99*56962Sbostic 	list1 = base;
100*56962Sbostic 	setup(list1, list2, nmemb, size, cmp);
101*56962Sbostic 	last = list2 + nmemb * size;
102*56962Sbostic 	i = big = 0;
103*56962Sbostic 	while (*EVAL(list2) != last) {
104*56962Sbostic 	    l2 = list1;
105*56962Sbostic 	    p1 = EVAL(list1);
106*56962Sbostic 	    for (tp2 = p2 = list2; p2 != last; p1 = EVAL(l2)) {
107*56962Sbostic 	    	p2 = *EVAL(p2);
108*56962Sbostic 	    	f1 = l2;
109*56962Sbostic 	    	f2 = l1 = list1 + (p2 - list2);
110*56962Sbostic 	    	if (p2 != last)
111*56962Sbostic 	    		p2 = *EVAL(p2);
112*56962Sbostic 	    	l2 = list1 + (p2 - list2);
113*56962Sbostic 	    	while (f1 < l1 && f2 < l2) {
114*56962Sbostic 	    		if ((*cmp)(f1, f2) <= 0) {
115*56962Sbostic 	    			q = f2;
116*56962Sbostic 	    			b = f1, t = l1;
117*56962Sbostic 	    			sense = -1;
118*56962Sbostic 	    		} else {
119*56962Sbostic 	    			q = f1;
120*56962Sbostic 	    			b = f2, t = l2;
121*56962Sbostic 	    			sense = 0;
122*56962Sbostic 	    		}
123*56962Sbostic 	    		if (!big) {	/* here i = 0 */
124*56962Sbostic LINEAR:	    			while ((b += size) < t && cmp(q, b) >sense)
125*56962Sbostic 	    				if (++i == 6) {
126*56962Sbostic 	    					big = 1;
127*56962Sbostic 	    					goto EXPONENTIAL;
128*56962Sbostic 	    				}
129*56962Sbostic 	    		} else {
130*56962Sbostic EXPONENTIAL:	    		for (i = size; ; i <<= 1)
131*56962Sbostic 	    				if ((p = (b + i)) >= t) {
132*56962Sbostic 	    					if ((p = t - size) > b &&
133*56962Sbostic 						    (*cmp)(q, p) <= sense)
134*56962Sbostic 	    						t = p;
135*56962Sbostic 	    					else
136*56962Sbostic 	    						b = p;
137*56962Sbostic 	    					break;
138*56962Sbostic 	    				} else if ((*cmp)(q, p) <= sense) {
139*56962Sbostic 	    					t = p;
140*56962Sbostic 	    					if (i == size)
141*56962Sbostic 	    						big = 0;
142*56962Sbostic 	    					goto FASTCASE;
143*56962Sbostic 	    				} else
144*56962Sbostic 	    					b = p;
145*56962Sbostic SLOWCASE:	    		while (t > b+size) {
146*56962Sbostic 	    				i = (((t - b) / size) >> 1) * size;
147*56962Sbostic 	    				if ((*cmp)(q, p = b + i) <= sense)
148*56962Sbostic 	    					t = p;
149*56962Sbostic 	    				else
150*56962Sbostic 	    					b = p;
151*56962Sbostic 	    			}
152*56962Sbostic 	    			goto COPY;
153*56962Sbostic FASTCASE:	    		while (i > size)
154*56962Sbostic 	    				if ((*cmp)(q,
155*56962Sbostic 	    					p = b + (i >>= 1)) <= sense)
156*56962Sbostic 	    					t = p;
157*56962Sbostic 	    				else
158*56962Sbostic 	    					b = p;
159*56962Sbostic COPY:	    			b = t;
160*56962Sbostic 	    		}
161*56962Sbostic 	    		i = size;
162*56962Sbostic 	    		if (q == f1) {
163*56962Sbostic 	    			if (iflag) {
164*56962Sbostic 	    				ICOPY_LIST(f2, tp2, b);
165*56962Sbostic 	    				ICOPY_ELT(f1, tp2, i);
166*56962Sbostic 	    			} else {
167*56962Sbostic 	    				CCOPY_LIST(f2, tp2, b);
168*56962Sbostic 	    				CCOPY_ELT(f1, tp2, i);
169*56962Sbostic 	    			}
170*56962Sbostic 	    		} else {
171*56962Sbostic 	    			if (iflag) {
172*56962Sbostic 	    				ICOPY_LIST(f1, tp2, b);
173*56962Sbostic 	    				ICOPY_ELT(f2, tp2, i);
174*56962Sbostic 	    			} else {
175*56962Sbostic 	    				CCOPY_LIST(f1, tp2, b);
176*56962Sbostic 	    				CCOPY_ELT(f2, tp2, i);
177*56962Sbostic 	    			}
178*56962Sbostic 	    		}
179*56962Sbostic 	    	}
180*56962Sbostic 	    	if (f2 < l2) {
181*56962Sbostic 	    		if (iflag)
182*56962Sbostic 	    			ICOPY_LIST(f2, tp2, l2);
183*56962Sbostic 	    		else
184*56962Sbostic 	    			CCOPY_LIST(f2, tp2, l2);
185*56962Sbostic 	    	} else if (f1 < l1) {
186*56962Sbostic 	    		if (iflag)
187*56962Sbostic 	    			ICOPY_LIST(f1, tp2, l1);
188*56962Sbostic 	    		else
189*56962Sbostic 	    			CCOPY_LIST(f1, tp2, l1);
190*56962Sbostic 	    	}
191*56962Sbostic 	    	*p1 = l2;
192*56962Sbostic 	    }
193*56962Sbostic 	    tp2 = list1;	/* swap list1, list2 */
194*56962Sbostic 	    list1 = list2;
195*56962Sbostic 	    list2 = tp2;
196*56962Sbostic 	    last = list2 + nmemb*size;
197*56962Sbostic 	}
198*56962Sbostic 	if (base == list2) {
199*56962Sbostic 		memmove(list2, list1, nmemb*size);
200*56962Sbostic 		list2 = list1;
201*56962Sbostic 	}
202*56962Sbostic 	free(list2);
203*56962Sbostic 	return (0);
204*56962Sbostic }
205*56962Sbostic 
206*56962Sbostic #define	swap(a, b) {					\
207*56962Sbostic 		s = b;					\
208*56962Sbostic 		i = size;				\
209*56962Sbostic 		do {					\
210*56962Sbostic 			tmp = *a; *a++ = *s; *s++ = tmp; \
211*56962Sbostic 		} while (--i);				\
212*56962Sbostic 		a -= size;				\
213*56962Sbostic 	}
214*56962Sbostic #define reverse(bot, top) {				\
215*56962Sbostic 	s = top;					\
216*56962Sbostic 	do {						\
217*56962Sbostic 		i = size;				\
218*56962Sbostic 		do {					\
219*56962Sbostic 			tmp = *bot; *bot++ = *s; *s++ = tmp; \
220*56962Sbostic 		} while (--i);				\
221*56962Sbostic 		s -= size2;				\
222*56962Sbostic 	} while(bot < s);				\
223*56962Sbostic }
224*56962Sbostic 
225*56962Sbostic /*
226*56962Sbostic  * Optional hybrid natural/pairwise first pass.  Eats up list1 in runs of
227*56962Sbostic  * increasing order, list2 in a corresponding linked list.  Checks for runs
228*56962Sbostic  * when THRESHOLD/2 pairs compare with same sense.  (Only used when NATURAL
229*56962Sbostic  * is defined.  Otherwise simple pairwise merging is used.)
230*56962Sbostic  */
231*56962Sbostic void
232*56962Sbostic setup(list1, list2, n, size, cmp)
233*56962Sbostic 	size_t n, size;
234*56962Sbostic 	int (*cmp) __P((const void *, const void *));
235*56962Sbostic 	u_char *list1, *list2;
236*56962Sbostic {
237*56962Sbostic 	int i, length, size2, tmp, sense;
238*56962Sbostic 	u_char *f1, *f2, *s, *l2, *last, *p2;
239*56962Sbostic 
240*56962Sbostic 	size2 = size*2;
241*56962Sbostic 	if (n <= 5) {
242*56962Sbostic 		insertionsort(list1, n, size, cmp);
243*56962Sbostic 		*EVAL(list2) = (u_char*) list2 + n*size;
244*56962Sbostic 		return;
245*56962Sbostic 	}
246*56962Sbostic 	/*
247*56962Sbostic 	 * Avoid running pointers out of bounds; limit n to evens
248*56962Sbostic 	 * for simplicity.
249*56962Sbostic 	 */
250*56962Sbostic 	i = 4 + (n & 1);
251*56962Sbostic 	insertionsort(list1 + (n - i) * size, i, size, cmp);
252*56962Sbostic 	last = list1 + size * (n - i);
253*56962Sbostic 	*EVAL(list2 + (last - list1)) = list2 + n * size;
254*56962Sbostic 
255*56962Sbostic #ifdef NATURAL
256*56962Sbostic 	p2 = list2;
257*56962Sbostic 	f1 = list1;
258*56962Sbostic 	sense = (cmp(f1, f1 + size) > 0);
259*56962Sbostic 	for (; f1 < last; sense = !sense) {
260*56962Sbostic 		length = 2;
261*56962Sbostic 					/* Find pairs with same sense. */
262*56962Sbostic 		for (f2 = f1 + size2; f2 < last; f2 += size2) {
263*56962Sbostic 			if ((cmp(f2, f2+ size) > 0) != sense)
264*56962Sbostic 				break;
265*56962Sbostic 			length += 2;
266*56962Sbostic 		}
267*56962Sbostic 		if (length < THRESHOLD) {		/* Pairwise merge */
268*56962Sbostic 			do {
269*56962Sbostic 				p2 = *EVAL(p2) = f1 + size2 - list1 + list2;
270*56962Sbostic 				if (sense > 0)
271*56962Sbostic 					swap (f1, f1 + size);
272*56962Sbostic 			} while ((f1 += size2) < f2);
273*56962Sbostic 		} else {				/* Natural merge */
274*56962Sbostic 			l2 = f2;
275*56962Sbostic 			for (f2 = f1 + size2; f2 < l2; f2 += size2) {
276*56962Sbostic 				if ((cmp(f2-size, f2) > 0) != sense) {
277*56962Sbostic 					p2 = *EVAL(p2) = f2 - list1 + list2;
278*56962Sbostic 					if (sense > 0)
279*56962Sbostic 						reverse(f1, f2-size);
280*56962Sbostic 					f1 = f2;
281*56962Sbostic 				}
282*56962Sbostic 			}
283*56962Sbostic 			if (sense > 0)
284*56962Sbostic 				reverse (f1, f2-size);
285*56962Sbostic 			f1 = f2;
286*56962Sbostic 			if (f2 < last || cmp(f2, f2 + size) > 0)
287*56962Sbostic 				p2 = *EVAL(p2) = f2 - list1 + list2;
288*56962Sbostic 			else
289*56962Sbostic 				p2 = *EVAL(p2) = list2 + n*size;
290*56962Sbostic 		}
291*56962Sbostic 	}
292*56962Sbostic #else		/* pairwise merge only. */
293*56962Sbostic 	for (f1 = list1, p2 = list2; f1 < last; f1 += size2) {
294*56962Sbostic 		p2 = *EVAL(p2) = p2 + size2;
295*56962Sbostic 		if (cmp (f1, f1 + size) > 0)
296*56962Sbostic 			swap(f1, f1 + size);
297*56962Sbostic 	}
298*56962Sbostic #endif /* NATURAL */
299*56962Sbostic }
300*56962Sbostic 
301*56962Sbostic /*
302*56962Sbostic  * This is to avoid out-of-bounds addresses in sorting the
303*56962Sbostic  * last 4 elements.
304*56962Sbostic  */
305*56962Sbostic static void
306*56962Sbostic insertionsort(a, n, size, cmp)
307*56962Sbostic 	u_char *a;
308*56962Sbostic 	size_t n, size;
309*56962Sbostic 	int (*cmp) __P((const void *, const void *));
310*56962Sbostic {
311*56962Sbostic 	u_char *ai, *s, *t, *u, tmp;
312*56962Sbostic 	int i;
313*56962Sbostic 
314*56962Sbostic 	for (ai = a+size; --n >= 1; ai += size)
315*56962Sbostic 		for (t = ai; t > a; t -= size) {
316*56962Sbostic 			u = t - size;
317*56962Sbostic 			if (cmp(u, t) <= 0)
318*56962Sbostic 				break;
319*56962Sbostic 			swap(u, t);
320*56962Sbostic 		}
321*56962Sbostic }
322