xref: /minix3/usr.bin/sort/radix_sort.c (revision 0fbbaa43e914d38ef3af549125d31574117d1ebf)
1*0fbbaa43SLionel Sambuc /*	$NetBSD: radix_sort.c,v 1.4 2009/09/19 16:18:00 dsl Exp $	*/
2*0fbbaa43SLionel Sambuc 
3*0fbbaa43SLionel Sambuc /*-
4*0fbbaa43SLionel Sambuc  * Copyright (c) 1990, 1993
5*0fbbaa43SLionel Sambuc  *	The Regents of the University of California.  All rights reserved.
6*0fbbaa43SLionel Sambuc  *
7*0fbbaa43SLionel Sambuc  * This code is derived from software contributed to Berkeley by
8*0fbbaa43SLionel Sambuc  * Peter McIlroy and by Dan Bernstein at New York University,
9*0fbbaa43SLionel Sambuc  *
10*0fbbaa43SLionel Sambuc  * Redistribution and use in source and binary forms, with or without
11*0fbbaa43SLionel Sambuc  * modification, are permitted provided that the following conditions
12*0fbbaa43SLionel Sambuc  * are met:
13*0fbbaa43SLionel Sambuc  * 1. Redistributions of source code must retain the above copyright
14*0fbbaa43SLionel Sambuc  *    notice, this list of conditions and the following disclaimer.
15*0fbbaa43SLionel Sambuc  * 2. Redistributions in binary form must reproduce the above copyright
16*0fbbaa43SLionel Sambuc  *    notice, this list of conditions and the following disclaimer in the
17*0fbbaa43SLionel Sambuc  *    documentation and/or other materials provided with the distribution.
18*0fbbaa43SLionel Sambuc  * 3. Neither the name of the University nor the names of its contributors
19*0fbbaa43SLionel Sambuc  *    may be used to endorse or promote products derived from this software
20*0fbbaa43SLionel Sambuc  *    without specific prior written permission.
21*0fbbaa43SLionel Sambuc  *
22*0fbbaa43SLionel Sambuc  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23*0fbbaa43SLionel Sambuc  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24*0fbbaa43SLionel Sambuc  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25*0fbbaa43SLionel Sambuc  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26*0fbbaa43SLionel Sambuc  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27*0fbbaa43SLionel Sambuc  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28*0fbbaa43SLionel Sambuc  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29*0fbbaa43SLionel Sambuc  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30*0fbbaa43SLionel Sambuc  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31*0fbbaa43SLionel Sambuc  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32*0fbbaa43SLionel Sambuc  * SUCH DAMAGE.
33*0fbbaa43SLionel Sambuc  */
34*0fbbaa43SLionel Sambuc 
35*0fbbaa43SLionel Sambuc #include <sys/cdefs.h>
36*0fbbaa43SLionel Sambuc #if defined(LIBC_SCCS) && !defined(lint)
37*0fbbaa43SLionel Sambuc #if 0
38*0fbbaa43SLionel Sambuc static char sccsid[] = "@(#)radixsort.c	8.2 (Berkeley) 4/28/95";
39*0fbbaa43SLionel Sambuc #else
40*0fbbaa43SLionel Sambuc __RCSID("$NetBSD: radix_sort.c,v 1.4 2009/09/19 16:18:00 dsl Exp $");
41*0fbbaa43SLionel Sambuc #endif
42*0fbbaa43SLionel Sambuc #endif /* LIBC_SCCS and not lint */
43*0fbbaa43SLionel Sambuc 
44*0fbbaa43SLionel Sambuc /*
45*0fbbaa43SLionel Sambuc  * 'stable' radix sort initially from libc/stdlib/radixsort.c
46*0fbbaa43SLionel Sambuc  */
47*0fbbaa43SLionel Sambuc 
48*0fbbaa43SLionel Sambuc #include <sys/types.h>
49*0fbbaa43SLionel Sambuc 
50*0fbbaa43SLionel Sambuc #include <assert.h>
51*0fbbaa43SLionel Sambuc #include <errno.h>
52*0fbbaa43SLionel Sambuc #include <util.h>
53*0fbbaa43SLionel Sambuc #include "sort.h"
54*0fbbaa43SLionel Sambuc 
55*0fbbaa43SLionel Sambuc typedef struct {
56*0fbbaa43SLionel Sambuc 	RECHEADER **sa;		/* Base of saved area */
57*0fbbaa43SLionel Sambuc 	int sn;			/* Number of entries */
58*0fbbaa43SLionel Sambuc 	int si;			/* index into data for compare */
59*0fbbaa43SLionel Sambuc } stack;
60*0fbbaa43SLionel Sambuc 
61*0fbbaa43SLionel Sambuc static void simplesort(RECHEADER **, int, int);
62*0fbbaa43SLionel Sambuc 
63*0fbbaa43SLionel Sambuc #define	THRESHOLD	20		/* Divert to simplesort(). */
64*0fbbaa43SLionel Sambuc 
65*0fbbaa43SLionel Sambuc #define empty(s)	(s >= sp)
66*0fbbaa43SLionel Sambuc #define pop(a, n, i)	a = (--sp)->sa, n = sp->sn, i = sp->si
67*0fbbaa43SLionel Sambuc #define push(a, n, i)	sp->sa = a, sp->sn = n, (sp++)->si = i
68*0fbbaa43SLionel Sambuc #define swap(a, b, t)	t = a, a = b, b = t
69*0fbbaa43SLionel Sambuc 
70*0fbbaa43SLionel Sambuc void
radix_sort(RECHEADER ** a,RECHEADER ** ta,int n)71*0fbbaa43SLionel Sambuc radix_sort(RECHEADER **a, RECHEADER **ta, int n)
72*0fbbaa43SLionel Sambuc {
73*0fbbaa43SLionel Sambuc 	u_int count[256], nc, bmin;
74*0fbbaa43SLionel Sambuc 	u_int c;
75*0fbbaa43SLionel Sambuc 	RECHEADER **ak, **tai, **lim;
76*0fbbaa43SLionel Sambuc 	RECHEADER *hdr;
77*0fbbaa43SLionel Sambuc 	int stack_size = 512;
78*0fbbaa43SLionel Sambuc 	stack *s, *sp, *sp0, *sp1, temp;
79*0fbbaa43SLionel Sambuc 	RECHEADER **top[256];
80*0fbbaa43SLionel Sambuc 	u_int *cp, bigc;
81*0fbbaa43SLionel Sambuc 	int data_index = 0;
82*0fbbaa43SLionel Sambuc 
83*0fbbaa43SLionel Sambuc 	if (n < THRESHOLD && !DEBUG('r')) {
84*0fbbaa43SLionel Sambuc 		simplesort(a, n, 0);
85*0fbbaa43SLionel Sambuc 		return;
86*0fbbaa43SLionel Sambuc 	}
87*0fbbaa43SLionel Sambuc 
88*0fbbaa43SLionel Sambuc 	s = emalloc(stack_size * sizeof *s);
89*0fbbaa43SLionel Sambuc 	memset(&count, 0, sizeof count);
90*0fbbaa43SLionel Sambuc 	/* Technically 'top' doesn't need zeroing */
91*0fbbaa43SLionel Sambuc 	memset(&top, 0, sizeof top);
92*0fbbaa43SLionel Sambuc 
93*0fbbaa43SLionel Sambuc 	sp = s;
94*0fbbaa43SLionel Sambuc 	push(a, n, data_index);
95*0fbbaa43SLionel Sambuc 	while (!empty(s)) {
96*0fbbaa43SLionel Sambuc 		pop(a, n, data_index);
97*0fbbaa43SLionel Sambuc 		if (n < THRESHOLD && !DEBUG('r')) {
98*0fbbaa43SLionel Sambuc 			simplesort(a, n, data_index);
99*0fbbaa43SLionel Sambuc 			continue;
100*0fbbaa43SLionel Sambuc 		}
101*0fbbaa43SLionel Sambuc 
102*0fbbaa43SLionel Sambuc 		/* Count number of times each 'next byte' occurs */
103*0fbbaa43SLionel Sambuc 		nc = 0;
104*0fbbaa43SLionel Sambuc 		bmin = 255;
105*0fbbaa43SLionel Sambuc 		lim = a + n;
106*0fbbaa43SLionel Sambuc 		for (ak = a, tai = ta; ak < lim; ak++) {
107*0fbbaa43SLionel Sambuc 			hdr = *ak;
108*0fbbaa43SLionel Sambuc 			if (data_index >= hdr->keylen) {
109*0fbbaa43SLionel Sambuc 				/* Short key, copy to start of output */
110*0fbbaa43SLionel Sambuc 				if (UNIQUE && a != sp->sa)
111*0fbbaa43SLionel Sambuc 					/* Stop duplicate being written out */
112*0fbbaa43SLionel Sambuc 					hdr->keylen = -1;
113*0fbbaa43SLionel Sambuc 				*a++ = hdr;
114*0fbbaa43SLionel Sambuc 				n--;
115*0fbbaa43SLionel Sambuc 				continue;
116*0fbbaa43SLionel Sambuc 			}
117*0fbbaa43SLionel Sambuc 			/* Save in temp buffer for distribute */
118*0fbbaa43SLionel Sambuc 			*tai++ = hdr;
119*0fbbaa43SLionel Sambuc 			c = hdr->data[data_index];
120*0fbbaa43SLionel Sambuc 			if (++count[c] == 1) {
121*0fbbaa43SLionel Sambuc 				if (c < bmin)
122*0fbbaa43SLionel Sambuc 					bmin = c;
123*0fbbaa43SLionel Sambuc 				nc++;
124*0fbbaa43SLionel Sambuc 			}
125*0fbbaa43SLionel Sambuc 		}
126*0fbbaa43SLionel Sambuc 		/*
127*0fbbaa43SLionel Sambuc 		 * We need save the bounds for each 'next byte' that
128*0fbbaa43SLionel Sambuc 		 * occurs more so we can sort each block.
129*0fbbaa43SLionel Sambuc 		 */
130*0fbbaa43SLionel Sambuc 		if (sp + nc > s + stack_size) {
131*0fbbaa43SLionel Sambuc 			stack_size *= 2;
132*0fbbaa43SLionel Sambuc 			sp1 = erealloc(s, stack_size * sizeof *s);
133*0fbbaa43SLionel Sambuc 			sp = sp1 + (sp - s);
134*0fbbaa43SLionel Sambuc 			s = sp1;
135*0fbbaa43SLionel Sambuc 		}
136*0fbbaa43SLionel Sambuc 
137*0fbbaa43SLionel Sambuc 		/* Minor optimisation to do the largest set last */
138*0fbbaa43SLionel Sambuc 		sp0 = sp1 = sp;
139*0fbbaa43SLionel Sambuc 		bigc = 2;
140*0fbbaa43SLionel Sambuc 		/* Convert 'counts' positions, saving bounds for later sorts */
141*0fbbaa43SLionel Sambuc 		ak = a;
142*0fbbaa43SLionel Sambuc 		for (cp = count + bmin; nc > 0; cp++) {
143*0fbbaa43SLionel Sambuc 			while (*cp == 0)
144*0fbbaa43SLionel Sambuc 				cp++;
145*0fbbaa43SLionel Sambuc 			if ((c = *cp) > 1) {
146*0fbbaa43SLionel Sambuc 				if (c > bigc) {
147*0fbbaa43SLionel Sambuc 					bigc = c;
148*0fbbaa43SLionel Sambuc 					sp1 = sp;
149*0fbbaa43SLionel Sambuc 				}
150*0fbbaa43SLionel Sambuc 				push(ak, c, data_index+1);
151*0fbbaa43SLionel Sambuc 			}
152*0fbbaa43SLionel Sambuc 			ak += c;
153*0fbbaa43SLionel Sambuc 			top[cp-count] = ak;
154*0fbbaa43SLionel Sambuc 			*cp = 0;			/* Reset count[]. */
155*0fbbaa43SLionel Sambuc 			nc--;
156*0fbbaa43SLionel Sambuc 		}
157*0fbbaa43SLionel Sambuc 		swap(*sp0, *sp1, temp);
158*0fbbaa43SLionel Sambuc 
159*0fbbaa43SLionel Sambuc 		for (ak = ta+n; --ak >= ta;)		/* Deal to piles. */
160*0fbbaa43SLionel Sambuc 			*--top[(*ak)->data[data_index]] = *ak;
161*0fbbaa43SLionel Sambuc 	}
162*0fbbaa43SLionel Sambuc 
163*0fbbaa43SLionel Sambuc 	free(s);
164*0fbbaa43SLionel Sambuc }
165*0fbbaa43SLionel Sambuc 
166*0fbbaa43SLionel Sambuc /* insertion sort, short records are sorted before long ones */
167*0fbbaa43SLionel Sambuc static void
simplesort(RECHEADER ** a,int n,int data_index)168*0fbbaa43SLionel Sambuc simplesort(RECHEADER **a, int n, int data_index)
169*0fbbaa43SLionel Sambuc {
170*0fbbaa43SLionel Sambuc 	RECHEADER **ak, **ai;
171*0fbbaa43SLionel Sambuc 	RECHEADER *akh;
172*0fbbaa43SLionel Sambuc 	RECHEADER **lim = a + n;
173*0fbbaa43SLionel Sambuc 	const u_char *s, *t;
174*0fbbaa43SLionel Sambuc 	int s_len, t_len;
175*0fbbaa43SLionel Sambuc 	int i;
176*0fbbaa43SLionel Sambuc 	int r;
177*0fbbaa43SLionel Sambuc 
178*0fbbaa43SLionel Sambuc 	if (n <= 1)
179*0fbbaa43SLionel Sambuc 		return;
180*0fbbaa43SLionel Sambuc 
181*0fbbaa43SLionel Sambuc 	for (ak = a+1; ak < lim; ak++) {
182*0fbbaa43SLionel Sambuc 		akh = *ak;
183*0fbbaa43SLionel Sambuc 		s = akh->data;
184*0fbbaa43SLionel Sambuc 		s_len = akh->keylen;
185*0fbbaa43SLionel Sambuc 		for (ai = ak; ;) {
186*0fbbaa43SLionel Sambuc 			ai--;
187*0fbbaa43SLionel Sambuc 			t_len = (*ai)->keylen;
188*0fbbaa43SLionel Sambuc 			if (t_len != -1) {
189*0fbbaa43SLionel Sambuc 				t = (*ai)->data;
190*0fbbaa43SLionel Sambuc 				for (i = data_index; ; i++) {
191*0fbbaa43SLionel Sambuc 					if (i >= s_len || i >= t_len) {
192*0fbbaa43SLionel Sambuc 						r = s_len - t_len;
193*0fbbaa43SLionel Sambuc 						break;
194*0fbbaa43SLionel Sambuc 					}
195*0fbbaa43SLionel Sambuc 					r =  s[i]  - t[i];
196*0fbbaa43SLionel Sambuc 					if (r != 0)
197*0fbbaa43SLionel Sambuc 						break;
198*0fbbaa43SLionel Sambuc 				}
199*0fbbaa43SLionel Sambuc 				if (r >= 0) {
200*0fbbaa43SLionel Sambuc 					if (r == 0 && UNIQUE) {
201*0fbbaa43SLionel Sambuc 						/* Put record below existing */
202*0fbbaa43SLionel Sambuc 						ai[1] = ai[0];
203*0fbbaa43SLionel Sambuc 						/* Mark as duplicate - ignore */
204*0fbbaa43SLionel Sambuc 						akh->keylen = -1;
205*0fbbaa43SLionel Sambuc 					} else {
206*0fbbaa43SLionel Sambuc 						ai++;
207*0fbbaa43SLionel Sambuc 					}
208*0fbbaa43SLionel Sambuc 					break;
209*0fbbaa43SLionel Sambuc 				}
210*0fbbaa43SLionel Sambuc 			}
211*0fbbaa43SLionel Sambuc 			ai[1] = ai[0];
212*0fbbaa43SLionel Sambuc 			if (ai == a)
213*0fbbaa43SLionel Sambuc 				break;
214*0fbbaa43SLionel Sambuc 		}
215*0fbbaa43SLionel Sambuc 		ai[0] = akh;
216*0fbbaa43SLionel Sambuc 	}
217*0fbbaa43SLionel Sambuc }
218