xref: /minix3/lib/libc/stdlib/radixsort.c (revision 2fe8fb192fe7e8720e3e7a77f928da545e872a6a)
1*2fe8fb19SBen Gras /*	$NetBSD: radixsort.c,v 1.19 2009/09/05 08:53:06 dsl Exp $	*/
2*2fe8fb19SBen Gras 
3*2fe8fb19SBen Gras /*-
4*2fe8fb19SBen Gras  * Copyright (c) 1990, 1993
5*2fe8fb19SBen Gras  *	The Regents of the University of California.  All rights reserved.
6*2fe8fb19SBen Gras  *
7*2fe8fb19SBen Gras  * This code is derived from software contributed to Berkeley by
8*2fe8fb19SBen Gras  * Peter McIlroy and by Dan Bernstein at New York University,
9*2fe8fb19SBen Gras  *
10*2fe8fb19SBen Gras  * Redistribution and use in source and binary forms, with or without
11*2fe8fb19SBen Gras  * modification, are permitted provided that the following conditions
12*2fe8fb19SBen Gras  * are met:
13*2fe8fb19SBen Gras  * 1. Redistributions of source code must retain the above copyright
14*2fe8fb19SBen Gras  *    notice, this list of conditions and the following disclaimer.
15*2fe8fb19SBen Gras  * 2. Redistributions in binary form must reproduce the above copyright
16*2fe8fb19SBen Gras  *    notice, this list of conditions and the following disclaimer in the
17*2fe8fb19SBen Gras  *    documentation and/or other materials provided with the distribution.
18*2fe8fb19SBen Gras  * 3. Neither the name of the University nor the names of its contributors
19*2fe8fb19SBen Gras  *    may be used to endorse or promote products derived from this software
20*2fe8fb19SBen Gras  *    without specific prior written permission.
21*2fe8fb19SBen Gras  *
22*2fe8fb19SBen Gras  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23*2fe8fb19SBen Gras  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24*2fe8fb19SBen Gras  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25*2fe8fb19SBen Gras  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26*2fe8fb19SBen Gras  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27*2fe8fb19SBen Gras  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28*2fe8fb19SBen Gras  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29*2fe8fb19SBen Gras  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30*2fe8fb19SBen Gras  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31*2fe8fb19SBen Gras  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32*2fe8fb19SBen Gras  * SUCH DAMAGE.
33*2fe8fb19SBen Gras  */
34*2fe8fb19SBen Gras 
35*2fe8fb19SBen Gras #include <sys/cdefs.h>
36*2fe8fb19SBen Gras #if defined(LIBC_SCCS) && !defined(lint)
37*2fe8fb19SBen Gras #if 0
38*2fe8fb19SBen Gras static char sccsid[] = "@(#)radixsort.c	8.2 (Berkeley) 4/28/95";
39*2fe8fb19SBen Gras #else
40*2fe8fb19SBen Gras __RCSID("$NetBSD: radixsort.c,v 1.19 2009/09/05 08:53:06 dsl Exp $");
41*2fe8fb19SBen Gras #endif
42*2fe8fb19SBen Gras #endif /* LIBC_SCCS and not lint */
43*2fe8fb19SBen Gras 
44*2fe8fb19SBen Gras /*
45*2fe8fb19SBen Gras  * Radixsort routines.
46*2fe8fb19SBen Gras  *
47*2fe8fb19SBen Gras  * Program r_sort_a() is unstable but uses O(logN) extra memory for a stack.
48*2fe8fb19SBen Gras  * Use radixsort(a, n, trace, endchar) for this case.
49*2fe8fb19SBen Gras  *
50*2fe8fb19SBen Gras  * For stable sorting (using N extra pointers) use sradixsort(), which calls
51*2fe8fb19SBen Gras  * r_sort_b().
52*2fe8fb19SBen Gras  *
53*2fe8fb19SBen Gras  * For a description of this code, see D. McIlroy, P. McIlroy, K. Bostic,
54*2fe8fb19SBen Gras  * "Engineering Radix Sort".
55*2fe8fb19SBen Gras  */
56*2fe8fb19SBen Gras 
57*2fe8fb19SBen Gras #include "namespace.h"
58*2fe8fb19SBen Gras #include <sys/types.h>
59*2fe8fb19SBen Gras 
60*2fe8fb19SBen Gras #include <assert.h>
61*2fe8fb19SBen Gras #include <errno.h>
62*2fe8fb19SBen Gras #include <stdlib.h>
63*2fe8fb19SBen Gras 
64*2fe8fb19SBen Gras #ifdef __weak_alias
65*2fe8fb19SBen Gras __weak_alias(radixsort,_radixsort)
66*2fe8fb19SBen Gras __weak_alias(sradixsort,_sradixsort)
67*2fe8fb19SBen Gras #endif
68*2fe8fb19SBen Gras 
69*2fe8fb19SBen Gras typedef struct {
70*2fe8fb19SBen Gras 	const u_char **sa;
71*2fe8fb19SBen Gras 	int sn, si;
72*2fe8fb19SBen Gras } stack;
73*2fe8fb19SBen Gras 
74*2fe8fb19SBen Gras static inline void simplesort(const u_char **, int, int, const u_char *, u_int);
75*2fe8fb19SBen Gras static void r_sort_a(const u_char **, int, int, const u_char *, u_int);
76*2fe8fb19SBen Gras static void r_sort_b(const u_char **,
77*2fe8fb19SBen Gras 	    const u_char **, int, int, const u_char *, u_int);
78*2fe8fb19SBen Gras 
79*2fe8fb19SBen Gras #define	THRESHOLD	20		/* Divert to simplesort(). */
80*2fe8fb19SBen Gras #define	SIZE		512		/* Default stack size. */
81*2fe8fb19SBen Gras 
82*2fe8fb19SBen Gras #define SETUP {								\
83*2fe8fb19SBen Gras 	if (tab == NULL) {						\
84*2fe8fb19SBen Gras 		tr = tr0;						\
85*2fe8fb19SBen Gras 		for (c = 0; c < endch; c++)				\
86*2fe8fb19SBen Gras 			tr0[c] = c + 1;					\
87*2fe8fb19SBen Gras 		tr0[c] = 0;						\
88*2fe8fb19SBen Gras 		for (c++; c < 256; c++)					\
89*2fe8fb19SBen Gras 			tr0[c] = c;					\
90*2fe8fb19SBen Gras 		endch = 0;						\
91*2fe8fb19SBen Gras 	} else {							\
92*2fe8fb19SBen Gras 		endch = tab[endch];					\
93*2fe8fb19SBen Gras 		tr = tab;						\
94*2fe8fb19SBen Gras 		if (endch != 0 && endch != 255) {			\
95*2fe8fb19SBen Gras 			errno = EINVAL;					\
96*2fe8fb19SBen Gras 			return (-1);					\
97*2fe8fb19SBen Gras 		}							\
98*2fe8fb19SBen Gras 	}								\
99*2fe8fb19SBen Gras }
100*2fe8fb19SBen Gras 
101*2fe8fb19SBen Gras int
radixsort(const u_char ** a,int n,const u_char * tab,u_int endch)102*2fe8fb19SBen Gras radixsort(const u_char **a, int n, const u_char *tab, u_int endch)
103*2fe8fb19SBen Gras {
104*2fe8fb19SBen Gras 	const u_char *tr;
105*2fe8fb19SBen Gras 	u_int c;
106*2fe8fb19SBen Gras 	u_char tr0[256];
107*2fe8fb19SBen Gras 
108*2fe8fb19SBen Gras 	_DIAGASSERT(a != NULL);
109*2fe8fb19SBen Gras 
110*2fe8fb19SBen Gras 	SETUP;
111*2fe8fb19SBen Gras 	r_sort_a(a, n, 0, tr, endch);
112*2fe8fb19SBen Gras 	return (0);
113*2fe8fb19SBen Gras }
114*2fe8fb19SBen Gras 
115*2fe8fb19SBen Gras int
sradixsort(const u_char ** a,int n,const u_char * tab,u_int endch)116*2fe8fb19SBen Gras sradixsort(const u_char **a, int n, const u_char *tab, u_int endch)
117*2fe8fb19SBen Gras {
118*2fe8fb19SBen Gras 	const u_char *tr, **ta;
119*2fe8fb19SBen Gras 	u_int c;
120*2fe8fb19SBen Gras 	u_char tr0[256];
121*2fe8fb19SBen Gras 
122*2fe8fb19SBen Gras 	_DIAGASSERT(a != NULL);
123*2fe8fb19SBen Gras 	if (a == NULL) {
124*2fe8fb19SBen Gras 		errno = EFAULT;
125*2fe8fb19SBen Gras 		return (-1);
126*2fe8fb19SBen Gras 	}
127*2fe8fb19SBen Gras 
128*2fe8fb19SBen Gras 	SETUP;
129*2fe8fb19SBen Gras 	if (n < THRESHOLD)
130*2fe8fb19SBen Gras 		simplesort(a, n, 0, tr, endch);
131*2fe8fb19SBen Gras 	else {
132*2fe8fb19SBen Gras 		if ((ta = malloc(n * sizeof(a))) == NULL)
133*2fe8fb19SBen Gras 			return (-1);
134*2fe8fb19SBen Gras 		r_sort_b(a, ta, n, 0, tr, endch);
135*2fe8fb19SBen Gras 		free(ta);
136*2fe8fb19SBen Gras 	}
137*2fe8fb19SBen Gras 	return (0);
138*2fe8fb19SBen Gras }
139*2fe8fb19SBen Gras 
140*2fe8fb19SBen Gras #define empty(s)	(s >= sp)
141*2fe8fb19SBen Gras #define pop(a, n, i)	a = (--sp)->sa, n = sp->sn, i = sp->si
142*2fe8fb19SBen Gras #define push(a, n, i)	sp->sa = a, sp->sn = n, (sp++)->si = i
143*2fe8fb19SBen Gras #define swap(a, b, t)	t = a, a = b, b = t
144*2fe8fb19SBen Gras 
145*2fe8fb19SBen Gras /* Unstable, in-place sort. */
146*2fe8fb19SBen Gras static void
r_sort_a(const u_char ** a,int n,int i,const u_char * tr,u_int endch)147*2fe8fb19SBen Gras r_sort_a(const u_char **a, int n, int i, const u_char *tr, u_int endch)
148*2fe8fb19SBen Gras {
149*2fe8fb19SBen Gras 	static u_int count[256], nc, bmin;
150*2fe8fb19SBen Gras 	u_int c;
151*2fe8fb19SBen Gras 	const u_char **ak, *r;
152*2fe8fb19SBen Gras 	stack s[SIZE], *sp, *sp0, *sp1, temp;
153*2fe8fb19SBen Gras 	u_int *cp, bigc;
154*2fe8fb19SBen Gras 	const u_char **an, *t, **aj, **top[256];
155*2fe8fb19SBen Gras 
156*2fe8fb19SBen Gras 	_DIAGASSERT(a != NULL);
157*2fe8fb19SBen Gras 	_DIAGASSERT(tr != NULL);
158*2fe8fb19SBen Gras 
159*2fe8fb19SBen Gras 	/* Set up stack. */
160*2fe8fb19SBen Gras 	sp = s;
161*2fe8fb19SBen Gras 	push(a, n, i);
162*2fe8fb19SBen Gras 	while (!empty(s)) {
163*2fe8fb19SBen Gras 		pop(a, n, i);
164*2fe8fb19SBen Gras 		if (n < THRESHOLD) {
165*2fe8fb19SBen Gras 			simplesort(a, n, i, tr, endch);
166*2fe8fb19SBen Gras 			continue;
167*2fe8fb19SBen Gras 		}
168*2fe8fb19SBen Gras 		an = a + n;
169*2fe8fb19SBen Gras 
170*2fe8fb19SBen Gras 		/* Make character histogram. */
171*2fe8fb19SBen Gras 		if (nc == 0) {
172*2fe8fb19SBen Gras 			bmin = 255;	/* First occupied bin, excluding eos. */
173*2fe8fb19SBen Gras 			for (ak = a; ak < an;) {
174*2fe8fb19SBen Gras 				c = tr[(*ak++)[i]];
175*2fe8fb19SBen Gras 				if (++count[c] == 1 && c != endch) {
176*2fe8fb19SBen Gras 					if (c < bmin)
177*2fe8fb19SBen Gras 						bmin = c;
178*2fe8fb19SBen Gras 					nc++;
179*2fe8fb19SBen Gras 				}
180*2fe8fb19SBen Gras 			}
181*2fe8fb19SBen Gras 			if (sp + nc > s + SIZE) {	/* Get more stack. */
182*2fe8fb19SBen Gras 				r_sort_a(a, n, i, tr, endch);
183*2fe8fb19SBen Gras 				continue;
184*2fe8fb19SBen Gras 			}
185*2fe8fb19SBen Gras 		}
186*2fe8fb19SBen Gras 
187*2fe8fb19SBen Gras 		/*
188*2fe8fb19SBen Gras 		 * Set top[]; push incompletely sorted bins onto stack.
189*2fe8fb19SBen Gras 		 * top[] = pointers to last out-of-place element in bins.
190*2fe8fb19SBen Gras 		 * count[] = counts of elements in bins.
191*2fe8fb19SBen Gras 		 * Before permuting: top[c-1] + count[c] = top[c];
192*2fe8fb19SBen Gras 		 * during deal: top[c] counts down to top[c-1].
193*2fe8fb19SBen Gras 		 */
194*2fe8fb19SBen Gras 		sp0 = sp1 = sp;		/* Stack position of biggest bin. */
195*2fe8fb19SBen Gras 		bigc = 2;		/* Size of biggest bin. */
196*2fe8fb19SBen Gras 		if (endch == 0)		/* Special case: set top[eos]. */
197*2fe8fb19SBen Gras 			top[0] = ak = a + count[0];
198*2fe8fb19SBen Gras 		else {
199*2fe8fb19SBen Gras 			ak = a;
200*2fe8fb19SBen Gras 			top[255] = an;
201*2fe8fb19SBen Gras 		}
202*2fe8fb19SBen Gras 		for (cp = count + bmin; nc > 0; cp++) {
203*2fe8fb19SBen Gras 			while (*cp == 0)	/* Find next non-empty pile. */
204*2fe8fb19SBen Gras 				cp++;
205*2fe8fb19SBen Gras 			if (*cp > 1) {
206*2fe8fb19SBen Gras 				if (*cp > bigc) {
207*2fe8fb19SBen Gras 					bigc = *cp;
208*2fe8fb19SBen Gras 					sp1 = sp;
209*2fe8fb19SBen Gras 				}
210*2fe8fb19SBen Gras 				push(ak, *cp, i+1);
211*2fe8fb19SBen Gras 			}
212*2fe8fb19SBen Gras 			top[cp-count] = ak += *cp;
213*2fe8fb19SBen Gras 			nc--;
214*2fe8fb19SBen Gras 		}
215*2fe8fb19SBen Gras 		swap(*sp0, *sp1, temp);	/* Play it safe -- biggest bin last. */
216*2fe8fb19SBen Gras 
217*2fe8fb19SBen Gras 		/*
218*2fe8fb19SBen Gras 		 * Permute misplacements home.  Already home: everything
219*2fe8fb19SBen Gras 		 * before aj, and in bin[c], items from top[c] on.
220*2fe8fb19SBen Gras 		 * Inner loop:
221*2fe8fb19SBen Gras 		 *	r = next element to put in place;
222*2fe8fb19SBen Gras 		 *	ak = top[r[i]] = location to put the next element.
223*2fe8fb19SBen Gras 		 *	aj = bottom of 1st disordered bin.
224*2fe8fb19SBen Gras 		 * Outer loop:
225*2fe8fb19SBen Gras 		 *	Once the 1st disordered bin is done, ie. aj >= ak,
226*2fe8fb19SBen Gras 		 *	aj<-aj + count[c] connects the bins in a linked list;
227*2fe8fb19SBen Gras 		 *	reset count[c].
228*2fe8fb19SBen Gras 		 */
229*2fe8fb19SBen Gras 		for (aj = a; aj < an;  *aj = r, aj += count[c], count[c] = 0)
230*2fe8fb19SBen Gras 			for (r = *aj;  aj < (ak = --top[c = tr[r[i]]]);)
231*2fe8fb19SBen Gras 				swap(*ak, r, t);
232*2fe8fb19SBen Gras 	}
233*2fe8fb19SBen Gras }
234*2fe8fb19SBen Gras 
235*2fe8fb19SBen Gras /* Stable sort, requiring additional memory. */
236*2fe8fb19SBen Gras static void
r_sort_b(const u_char ** a,const u_char ** ta,int n,int i,const u_char * tr,u_int endch)237*2fe8fb19SBen Gras r_sort_b(const u_char **a, const u_char **ta, int n, int i, const u_char *tr,
238*2fe8fb19SBen Gras     u_int endch)
239*2fe8fb19SBen Gras {
240*2fe8fb19SBen Gras 	static u_int count[256], nc, bmin;
241*2fe8fb19SBen Gras 	u_int c;
242*2fe8fb19SBen Gras 	const u_char **ak, **ai;
243*2fe8fb19SBen Gras 	stack s[512], *sp, *sp0, *sp1, temp;
244*2fe8fb19SBen Gras 	const u_char **top[256];
245*2fe8fb19SBen Gras 	u_int *cp, bigc;
246*2fe8fb19SBen Gras 
247*2fe8fb19SBen Gras 	_DIAGASSERT(a != NULL);
248*2fe8fb19SBen Gras 	_DIAGASSERT(ta != NULL);
249*2fe8fb19SBen Gras 	_DIAGASSERT(tr != NULL);
250*2fe8fb19SBen Gras 
251*2fe8fb19SBen Gras 	sp = s;
252*2fe8fb19SBen Gras 	push(a, n, i);
253*2fe8fb19SBen Gras 	while (!empty(s)) {
254*2fe8fb19SBen Gras 		pop(a, n, i);
255*2fe8fb19SBen Gras 		if (n < THRESHOLD) {
256*2fe8fb19SBen Gras 			simplesort(a, n, i, tr, endch);
257*2fe8fb19SBen Gras 			continue;
258*2fe8fb19SBen Gras 		}
259*2fe8fb19SBen Gras 
260*2fe8fb19SBen Gras 		if (nc == 0) {
261*2fe8fb19SBen Gras 			bmin = 255;
262*2fe8fb19SBen Gras 			for (ak = a + n; --ak >= a;) {
263*2fe8fb19SBen Gras 				c = tr[(*ak)[i]];
264*2fe8fb19SBen Gras 				if (++count[c] == 1 && c != endch) {
265*2fe8fb19SBen Gras 					if (c < bmin)
266*2fe8fb19SBen Gras 						bmin = c;
267*2fe8fb19SBen Gras 					nc++;
268*2fe8fb19SBen Gras 				}
269*2fe8fb19SBen Gras 			}
270*2fe8fb19SBen Gras 			if (sp + nc > s + SIZE) {
271*2fe8fb19SBen Gras 				r_sort_b(a, ta, n, i, tr, endch);
272*2fe8fb19SBen Gras 				continue;
273*2fe8fb19SBen Gras 			}
274*2fe8fb19SBen Gras 		}
275*2fe8fb19SBen Gras 
276*2fe8fb19SBen Gras 		sp0 = sp1 = sp;
277*2fe8fb19SBen Gras 		bigc = 2;
278*2fe8fb19SBen Gras 		if (endch == 0) {
279*2fe8fb19SBen Gras 			top[0] = ak = a + count[0];
280*2fe8fb19SBen Gras 			count[0] = 0;
281*2fe8fb19SBen Gras 		} else {
282*2fe8fb19SBen Gras 			ak = a;
283*2fe8fb19SBen Gras 			top[255] = a + n;
284*2fe8fb19SBen Gras 			count[255] = 0;
285*2fe8fb19SBen Gras 		}
286*2fe8fb19SBen Gras 		for (cp = count + bmin; nc > 0; cp++) {
287*2fe8fb19SBen Gras 			while (*cp == 0)
288*2fe8fb19SBen Gras 				cp++;
289*2fe8fb19SBen Gras 			if ((c = *cp) > 1) {
290*2fe8fb19SBen Gras 				if (c > bigc) {
291*2fe8fb19SBen Gras 					bigc = c;
292*2fe8fb19SBen Gras 					sp1 = sp;
293*2fe8fb19SBen Gras 				}
294*2fe8fb19SBen Gras 				push(ak, c, i+1);
295*2fe8fb19SBen Gras 			}
296*2fe8fb19SBen Gras 			top[cp-count] = ak += c;
297*2fe8fb19SBen Gras 			*cp = 0;			/* Reset count[]. */
298*2fe8fb19SBen Gras 			nc--;
299*2fe8fb19SBen Gras 		}
300*2fe8fb19SBen Gras 		swap(*sp0, *sp1, temp);
301*2fe8fb19SBen Gras 
302*2fe8fb19SBen Gras 		for (ak = ta + n, ai = a+n; ak > ta;)	/* Copy to temp. */
303*2fe8fb19SBen Gras 			*--ak = *--ai;
304*2fe8fb19SBen Gras 		for (ak = ta+n; --ak >= ta;)		/* Deal to piles. */
305*2fe8fb19SBen Gras 			*--top[tr[(*ak)[i]]] = *ak;
306*2fe8fb19SBen Gras 	}
307*2fe8fb19SBen Gras }
308*2fe8fb19SBen Gras 
309*2fe8fb19SBen Gras /* insertion sort */
310*2fe8fb19SBen Gras static inline void
simplesort(const u_char ** a,int n,int b,const u_char * tr,u_int endch)311*2fe8fb19SBen Gras simplesort(const u_char **a, int n, int b, const u_char *tr, u_int endch)
312*2fe8fb19SBen Gras {
313*2fe8fb19SBen Gras 	u_char ch;
314*2fe8fb19SBen Gras 	const u_char  **ak, **ai, *s, *t;
315*2fe8fb19SBen Gras 
316*2fe8fb19SBen Gras 	_DIAGASSERT(a != NULL);
317*2fe8fb19SBen Gras 	_DIAGASSERT(tr != NULL);
318*2fe8fb19SBen Gras 
319*2fe8fb19SBen Gras 	for (ak = a+1; --n >= 1; ak++)
320*2fe8fb19SBen Gras 		for (ai = ak; ai > a; ai--) {
321*2fe8fb19SBen Gras 			for (s = ai[0] + b, t = ai[-1] + b;
322*2fe8fb19SBen Gras 			    (ch = tr[*s]) != endch; s++, t++)
323*2fe8fb19SBen Gras 				if (ch != tr[*t])
324*2fe8fb19SBen Gras 					break;
325*2fe8fb19SBen Gras 			if (ch >= tr[*t])
326*2fe8fb19SBen Gras 				break;
327*2fe8fb19SBen Gras 			swap(ai[0], ai[-1], s);
328*2fe8fb19SBen Gras 		}
329*2fe8fb19SBen Gras }
330