xref: /csrg-svn/lib/libc/stdlib/qsort.c (revision 54201)
142126Sbostic /*-
245643Sbostic  * Copyright (c) 1980, 1983, 1990 The Regents of the University of California.
334434Sbostic  * All rights reserved.
434434Sbostic  *
542126Sbostic  * %sccs.include.redist.c%
622102Smckusick  */
71975Swnj 
826577Sdonn #if defined(LIBC_SCCS) && !defined(lint)
9*54201Sbostic static char sccsid[] = "@(#)qsort.c	5.10 (Berkeley) 06/22/92";
1034434Sbostic #endif /* LIBC_SCCS and not lint */
1122102Smckusick 
1245643Sbostic #include <sys/types.h>
1346599Sdonn #include <stdlib.h>
1442179Sbostic 
1511449Smckusick /*
16*54201Sbostic  * XXX
17*54201Sbostic  * Report from John Bentley at ATT:  the BSD qsort goes quadratic on random
18*54201Sbostic  * 0 and 1  (N/2 of each.)  This is because it does not keep = elements
19*54201Sbostic  * together.   (<= go before test element; > go after, > so everything gets
20*54201Sbostic  * shoved before the test element, which is qsort's worst case.)
21*54201Sbostic  */
22*54201Sbostic 
23*54201Sbostic /*
2445643Sbostic  * MTHRESH is the smallest partition for which we compare for a median
2545643Sbostic  * value instead of using the middle value.
2611449Smckusick  */
2745643Sbostic #define	MTHRESH	6
281975Swnj 
2945643Sbostic /*
3045643Sbostic  * THRESH is the minimum number of entries in a partition for continued
3145643Sbostic  * partitioning.
3245643Sbostic  */
3345643Sbostic #define	THRESH	4
341975Swnj 
3545643Sbostic void
3645643Sbostic qsort(bot, nmemb, size, compar)
3746599Sdonn 	void *bot;
3846599Sdonn 	size_t nmemb, size;
3946599Sdonn 	int (*compar) __P((const void *, const void *));
4045643Sbostic {
4146599Sdonn 	static void insertion_sort(), quick_sort();
421975Swnj 
4345643Sbostic 	if (nmemb <= 1)
4445643Sbostic 		return;
4545643Sbostic 
4645643Sbostic 	if (nmemb >= THRESH)
4745643Sbostic 		quick_sort(bot, nmemb, size, compar);
4845643Sbostic 	else
4945643Sbostic 		insertion_sort(bot, nmemb, size, compar);
5045643Sbostic }
5145643Sbostic 
5211449Smckusick /*
5345643Sbostic  * Swap two areas of size number of bytes.  Although qsort(3) permits random
5445643Sbostic  * blocks of memory to be sorted, sorting pointers is almost certainly the
5545643Sbostic  * common case (and, were it not, could easily be made so).  Regardless, it
5645643Sbostic  * isn't worth optimizing; the SWAP's get sped up by the cache, and pointer
5745643Sbostic  * arithmetic gets lost in the time required for comparison function calls.
5811449Smckusick  */
5945643Sbostic #define	SWAP(a, b) { \
6045643Sbostic 	cnt = size; \
6145643Sbostic 	do { \
6245643Sbostic 		ch = *a; \
6345643Sbostic 		*a++ = *b; \
6445643Sbostic 		*b++ = ch; \
6545643Sbostic 	} while (--cnt); \
6645643Sbostic }
671975Swnj 
6845643Sbostic /*
6945643Sbostic  * Knuth, Vol. 3, page 116, Algorithm Q, step b, argues that a single pass
7045643Sbostic  * of straight insertion sort after partitioning is complete is better than
7145643Sbostic  * sorting each small partition as it is created.  This isn't correct in this
7245643Sbostic  * implementation because comparisons require at least one (and often two)
7345643Sbostic  * function calls and are likely to be the dominating expense of the sort.
7445643Sbostic  * Doing a final insertion sort does more comparisons than are necessary
7545643Sbostic  * because it compares the "edges" and medians of the partitions which are
7645643Sbostic  * known to be already sorted.
7745643Sbostic  *
7845643Sbostic  * This is also the reasoning behind selecting a small THRESH value (see
7945643Sbostic  * Knuth, page 122, equation 26), since the quicksort algorithm does less
8045643Sbostic  * comparisons than the insertion sort.
8145643Sbostic  */
8245643Sbostic #define	SORT(bot, n) { \
8345643Sbostic 	if (n > 1) \
8445643Sbostic 		if (n == 2) { \
8545643Sbostic 			t1 = bot + size; \
8645643Sbostic 			if (compar(t1, bot) < 0) \
8745643Sbostic 				SWAP(t1, bot); \
8845643Sbostic 		} else \
8945643Sbostic 			insertion_sort(bot, n, size, compar); \
9045643Sbostic }
9145643Sbostic 
9245643Sbostic static void
9345643Sbostic quick_sort(bot, nmemb, size, compar)
9445643Sbostic 	register char *bot;
9545643Sbostic 	register int size;
9645643Sbostic 	int nmemb, (*compar)();
9711449Smckusick {
9845643Sbostic 	register int cnt;
9945643Sbostic 	register u_char ch;
10045643Sbostic 	register char *top, *mid, *t1, *t2;
10145643Sbostic 	register int n1, n2;
10245643Sbostic 	char *bsv;
10346599Sdonn 	static void insertion_sort();
1041975Swnj 
10545643Sbostic 	/* bot and nmemb must already be set. */
10645643Sbostic partition:
10745643Sbostic 
10845643Sbostic 	/* find mid and top elements */
10945643Sbostic 	mid = bot + size * (nmemb >> 1);
11045643Sbostic 	top = bot + (nmemb - 1) * size;
11145643Sbostic 
11211449Smckusick 	/*
11345643Sbostic 	 * Find the median of the first, last and middle element (see Knuth,
11445643Sbostic 	 * Vol. 3, page 123, Eq. 28).  This test order gets the equalities
11545643Sbostic 	 * right.
11611449Smckusick 	 */
11745643Sbostic 	if (nmemb >= MTHRESH) {
11845643Sbostic 		n1 = compar(bot, mid);
11945643Sbostic 		n2 = compar(mid, top);
12045643Sbostic 		if (n1 < 0 && n2 > 0)
12145643Sbostic 			t1 = compar(bot, top) < 0 ? top : bot;
12245643Sbostic 		else if (n1 > 0 && n2 < 0)
12345643Sbostic 			t1 = compar(bot, top) > 0 ? top : bot;
12445643Sbostic 		else
12545643Sbostic 			t1 = mid;
12645643Sbostic 
12745643Sbostic 		/* if mid element not selected, swap selection there */
12845643Sbostic 		if (t1 != mid) {
12945643Sbostic 			SWAP(t1, mid);
13045643Sbostic 			mid -= size;
13111449Smckusick 		}
13211449Smckusick 	}
13345643Sbostic 
13445643Sbostic 	/* Standard quicksort, Knuth, Vol. 3, page 116, Algorithm Q. */
13545643Sbostic #define	didswap	n1
13645643Sbostic #define	newbot	t1
13745643Sbostic #define	replace	t2
13845643Sbostic 	didswap = 0;
13945643Sbostic 	for (bsv = bot;;) {
14045643Sbostic 		for (; bot < mid && compar(bot, mid) <= 0; bot += size);
14145643Sbostic 		while (top > mid) {
14245643Sbostic 			if (compar(mid, top) <= 0) {
14345643Sbostic 				top -= size;
14445643Sbostic 				continue;
1451975Swnj 			}
14645643Sbostic 			newbot = bot + size;	/* value of bot after swap */
14745643Sbostic 			if (bot == mid)		/* top <-> mid, mid == top */
14845643Sbostic 				replace = mid = top;
14945643Sbostic 			else {			/* bot <-> top */
15045643Sbostic 				replace = top;
15145643Sbostic 				top -= size;
15245643Sbostic 			}
15345643Sbostic 			goto swap;
1541975Swnj 		}
15545643Sbostic 		if (bot == mid)
15645643Sbostic 			break;
15745643Sbostic 
15845643Sbostic 		/* bot <-> mid, mid == bot */
15945643Sbostic 		replace = mid;
16045643Sbostic 		newbot = mid = bot;		/* value of bot after swap */
16145643Sbostic 		top -= size;
16245643Sbostic 
16345643Sbostic swap:		SWAP(bot, replace);
16445643Sbostic 		bot = newbot;
16545643Sbostic 		didswap = 1;
16611449Smckusick 	}
1671975Swnj 
16845643Sbostic 	/*
16945643Sbostic 	 * Quicksort behaves badly in the presence of data which is already
17045643Sbostic 	 * sorted (see Knuth, Vol. 3, page 119) going from O N lg N to O N^2.
17145643Sbostic 	 * To avoid this worst case behavior, if a re-partitioning occurs
17245643Sbostic 	 * without swapping any elements, it is not further partitioned and
17345643Sbostic 	 * is insert sorted.  This wins big with almost sorted data sets and
17445643Sbostic 	 * only loses if the data set is very strangely partitioned.  A fix
17545643Sbostic 	 * for those data sets would be to return prematurely if the insertion
17645643Sbostic 	 * sort routine is forced to make an excessive number of swaps, and
17745643Sbostic 	 * continue the partitioning.
17845643Sbostic 	 */
17945643Sbostic 	if (!didswap) {
18045643Sbostic 		insertion_sort(bsv, nmemb, size, compar);
18145643Sbostic 		return;
18245643Sbostic 	}
18311449Smckusick 
18445643Sbostic 	/*
18545643Sbostic 	 * Re-partition or sort as necessary.  Note that the mid element
18645643Sbostic 	 * itself is correctly positioned and can be ignored.
18745643Sbostic 	 */
18845643Sbostic #define	nlower	n1
18945643Sbostic #define	nupper	n2
19045643Sbostic 	bot = bsv;
19145643Sbostic 	nlower = (mid - bot) / size;	/* size of lower partition */
19245643Sbostic 	mid += size;
19345643Sbostic 	nupper = nmemb - nlower - 1;	/* size of upper partition */
19411449Smckusick 
19511449Smckusick 	/*
19645643Sbostic 	 * If must call recursively, do it on the smaller partition; this
19745643Sbostic 	 * bounds the stack to lg N entries.
19811449Smckusick 	 */
19945643Sbostic 	if (nlower > nupper) {
20045643Sbostic 		if (nupper >= THRESH)
20145643Sbostic 			quick_sort(mid, nupper, size, compar);
20245643Sbostic 		else {
20345643Sbostic 			SORT(mid, nupper);
20445643Sbostic 			if (nlower < THRESH) {
20545643Sbostic 				SORT(bot, nlower);
20645643Sbostic 				return;
2071975Swnj 			}
20811449Smckusick 		}
20945643Sbostic 		nmemb = nlower;
21045643Sbostic 	} else {
21145643Sbostic 		if (nlower >= THRESH)
21245643Sbostic 			quick_sort(bot, nlower, size, compar);
21345643Sbostic 		else {
21445643Sbostic 			SORT(bot, nlower);
21545643Sbostic 			if (nupper < THRESH) {
21645643Sbostic 				SORT(mid, nupper);
21745643Sbostic 				return;
2181975Swnj 			}
2191975Swnj 		}
22045643Sbostic 		bot = mid;
22145643Sbostic 		nmemb = nupper;
22245643Sbostic 	}
22345643Sbostic 	goto partition;
22445643Sbostic 	/* NOTREACHED */
2251975Swnj }
22645643Sbostic 
22745643Sbostic static void
22845643Sbostic insertion_sort(bot, nmemb, size, compar)
22945643Sbostic 	char *bot;
23045643Sbostic 	register int size;
23145643Sbostic 	int nmemb, (*compar)();
23245643Sbostic {
23345643Sbostic 	register int cnt;
23445643Sbostic 	register u_char ch;
23545643Sbostic 	register char *s1, *s2, *t1, *t2, *top;
23645643Sbostic 
23745643Sbostic 	/*
23845643Sbostic 	 * A simple insertion sort (see Knuth, Vol. 3, page 81, Algorithm
23945643Sbostic 	 * S).  Insertion sort has the same worst case as most simple sorts
24045643Sbostic 	 * (O N^2).  It gets used here because it is (O N) in the case of
24145643Sbostic 	 * sorted data.
24245643Sbostic 	 */
24345643Sbostic 	top = bot + nmemb * size;
24445643Sbostic 	for (t1 = bot + size; t1 < top;) {
24545643Sbostic 		for (t2 = t1; (t2 -= size) >= bot && compar(t1, t2) < 0;);
24645643Sbostic 		if (t1 != (t2 += size)) {
24745643Sbostic 			/* Bubble bytes up through each element. */
24845643Sbostic 			for (cnt = size; cnt--; ++t1) {
24945643Sbostic 				ch = *t1;
25045643Sbostic 				for (s1 = s2 = t1; (s2 -= size) >= t2; s1 = s2)
25145643Sbostic 					*s1 = *s2;
25245643Sbostic 				*s1 = ch;
25345643Sbostic 			}
25445643Sbostic 		} else
25545643Sbostic 			t1 += size;
25645643Sbostic 	}
25745643Sbostic }
258