1*2fe8fb19SBen Gras /* $NetBSD: radixsort.c,v 1.19 2009/09/05 08:53:06 dsl Exp $ */
2*2fe8fb19SBen Gras
3*2fe8fb19SBen Gras /*-
4*2fe8fb19SBen Gras * Copyright (c) 1990, 1993
5*2fe8fb19SBen Gras * The Regents of the University of California. All rights reserved.
6*2fe8fb19SBen Gras *
7*2fe8fb19SBen Gras * This code is derived from software contributed to Berkeley by
8*2fe8fb19SBen Gras * Peter McIlroy and by Dan Bernstein at New York University,
9*2fe8fb19SBen Gras *
10*2fe8fb19SBen Gras * Redistribution and use in source and binary forms, with or without
11*2fe8fb19SBen Gras * modification, are permitted provided that the following conditions
12*2fe8fb19SBen Gras * are met:
13*2fe8fb19SBen Gras * 1. Redistributions of source code must retain the above copyright
14*2fe8fb19SBen Gras * notice, this list of conditions and the following disclaimer.
15*2fe8fb19SBen Gras * 2. Redistributions in binary form must reproduce the above copyright
16*2fe8fb19SBen Gras * notice, this list of conditions and the following disclaimer in the
17*2fe8fb19SBen Gras * documentation and/or other materials provided with the distribution.
18*2fe8fb19SBen Gras * 3. Neither the name of the University nor the names of its contributors
19*2fe8fb19SBen Gras * may be used to endorse or promote products derived from this software
20*2fe8fb19SBen Gras * without specific prior written permission.
21*2fe8fb19SBen Gras *
22*2fe8fb19SBen Gras * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23*2fe8fb19SBen Gras * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24*2fe8fb19SBen Gras * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25*2fe8fb19SBen Gras * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26*2fe8fb19SBen Gras * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27*2fe8fb19SBen Gras * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28*2fe8fb19SBen Gras * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29*2fe8fb19SBen Gras * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30*2fe8fb19SBen Gras * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31*2fe8fb19SBen Gras * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32*2fe8fb19SBen Gras * SUCH DAMAGE.
33*2fe8fb19SBen Gras */
34*2fe8fb19SBen Gras
35*2fe8fb19SBen Gras #include <sys/cdefs.h>
36*2fe8fb19SBen Gras #if defined(LIBC_SCCS) && !defined(lint)
37*2fe8fb19SBen Gras #if 0
38*2fe8fb19SBen Gras static char sccsid[] = "@(#)radixsort.c 8.2 (Berkeley) 4/28/95";
39*2fe8fb19SBen Gras #else
40*2fe8fb19SBen Gras __RCSID("$NetBSD: radixsort.c,v 1.19 2009/09/05 08:53:06 dsl Exp $");
41*2fe8fb19SBen Gras #endif
42*2fe8fb19SBen Gras #endif /* LIBC_SCCS and not lint */
43*2fe8fb19SBen Gras
44*2fe8fb19SBen Gras /*
45*2fe8fb19SBen Gras * Radixsort routines.
46*2fe8fb19SBen Gras *
47*2fe8fb19SBen Gras * Program r_sort_a() is unstable but uses O(logN) extra memory for a stack.
48*2fe8fb19SBen Gras * Use radixsort(a, n, trace, endchar) for this case.
49*2fe8fb19SBen Gras *
50*2fe8fb19SBen Gras * For stable sorting (using N extra pointers) use sradixsort(), which calls
51*2fe8fb19SBen Gras * r_sort_b().
52*2fe8fb19SBen Gras *
53*2fe8fb19SBen Gras * For a description of this code, see D. McIlroy, P. McIlroy, K. Bostic,
54*2fe8fb19SBen Gras * "Engineering Radix Sort".
55*2fe8fb19SBen Gras */
56*2fe8fb19SBen Gras
57*2fe8fb19SBen Gras #include "namespace.h"
58*2fe8fb19SBen Gras #include <sys/types.h>
59*2fe8fb19SBen Gras
60*2fe8fb19SBen Gras #include <assert.h>
61*2fe8fb19SBen Gras #include <errno.h>
62*2fe8fb19SBen Gras #include <stdlib.h>
63*2fe8fb19SBen Gras
64*2fe8fb19SBen Gras #ifdef __weak_alias
65*2fe8fb19SBen Gras __weak_alias(radixsort,_radixsort)
66*2fe8fb19SBen Gras __weak_alias(sradixsort,_sradixsort)
67*2fe8fb19SBen Gras #endif
68*2fe8fb19SBen Gras
69*2fe8fb19SBen Gras typedef struct {
70*2fe8fb19SBen Gras const u_char **sa;
71*2fe8fb19SBen Gras int sn, si;
72*2fe8fb19SBen Gras } stack;
73*2fe8fb19SBen Gras
74*2fe8fb19SBen Gras static inline void simplesort(const u_char **, int, int, const u_char *, u_int);
75*2fe8fb19SBen Gras static void r_sort_a(const u_char **, int, int, const u_char *, u_int);
76*2fe8fb19SBen Gras static void r_sort_b(const u_char **,
77*2fe8fb19SBen Gras const u_char **, int, int, const u_char *, u_int);
78*2fe8fb19SBen Gras
79*2fe8fb19SBen Gras #define THRESHOLD 20 /* Divert to simplesort(). */
80*2fe8fb19SBen Gras #define SIZE 512 /* Default stack size. */
81*2fe8fb19SBen Gras
82*2fe8fb19SBen Gras #define SETUP { \
83*2fe8fb19SBen Gras if (tab == NULL) { \
84*2fe8fb19SBen Gras tr = tr0; \
85*2fe8fb19SBen Gras for (c = 0; c < endch; c++) \
86*2fe8fb19SBen Gras tr0[c] = c + 1; \
87*2fe8fb19SBen Gras tr0[c] = 0; \
88*2fe8fb19SBen Gras for (c++; c < 256; c++) \
89*2fe8fb19SBen Gras tr0[c] = c; \
90*2fe8fb19SBen Gras endch = 0; \
91*2fe8fb19SBen Gras } else { \
92*2fe8fb19SBen Gras endch = tab[endch]; \
93*2fe8fb19SBen Gras tr = tab; \
94*2fe8fb19SBen Gras if (endch != 0 && endch != 255) { \
95*2fe8fb19SBen Gras errno = EINVAL; \
96*2fe8fb19SBen Gras return (-1); \
97*2fe8fb19SBen Gras } \
98*2fe8fb19SBen Gras } \
99*2fe8fb19SBen Gras }
100*2fe8fb19SBen Gras
101*2fe8fb19SBen Gras int
radixsort(const u_char ** a,int n,const u_char * tab,u_int endch)102*2fe8fb19SBen Gras radixsort(const u_char **a, int n, const u_char *tab, u_int endch)
103*2fe8fb19SBen Gras {
104*2fe8fb19SBen Gras const u_char *tr;
105*2fe8fb19SBen Gras u_int c;
106*2fe8fb19SBen Gras u_char tr0[256];
107*2fe8fb19SBen Gras
108*2fe8fb19SBen Gras _DIAGASSERT(a != NULL);
109*2fe8fb19SBen Gras
110*2fe8fb19SBen Gras SETUP;
111*2fe8fb19SBen Gras r_sort_a(a, n, 0, tr, endch);
112*2fe8fb19SBen Gras return (0);
113*2fe8fb19SBen Gras }
114*2fe8fb19SBen Gras
115*2fe8fb19SBen Gras int
sradixsort(const u_char ** a,int n,const u_char * tab,u_int endch)116*2fe8fb19SBen Gras sradixsort(const u_char **a, int n, const u_char *tab, u_int endch)
117*2fe8fb19SBen Gras {
118*2fe8fb19SBen Gras const u_char *tr, **ta;
119*2fe8fb19SBen Gras u_int c;
120*2fe8fb19SBen Gras u_char tr0[256];
121*2fe8fb19SBen Gras
122*2fe8fb19SBen Gras _DIAGASSERT(a != NULL);
123*2fe8fb19SBen Gras if (a == NULL) {
124*2fe8fb19SBen Gras errno = EFAULT;
125*2fe8fb19SBen Gras return (-1);
126*2fe8fb19SBen Gras }
127*2fe8fb19SBen Gras
128*2fe8fb19SBen Gras SETUP;
129*2fe8fb19SBen Gras if (n < THRESHOLD)
130*2fe8fb19SBen Gras simplesort(a, n, 0, tr, endch);
131*2fe8fb19SBen Gras else {
132*2fe8fb19SBen Gras if ((ta = malloc(n * sizeof(a))) == NULL)
133*2fe8fb19SBen Gras return (-1);
134*2fe8fb19SBen Gras r_sort_b(a, ta, n, 0, tr, endch);
135*2fe8fb19SBen Gras free(ta);
136*2fe8fb19SBen Gras }
137*2fe8fb19SBen Gras return (0);
138*2fe8fb19SBen Gras }
139*2fe8fb19SBen Gras
140*2fe8fb19SBen Gras #define empty(s) (s >= sp)
141*2fe8fb19SBen Gras #define pop(a, n, i) a = (--sp)->sa, n = sp->sn, i = sp->si
142*2fe8fb19SBen Gras #define push(a, n, i) sp->sa = a, sp->sn = n, (sp++)->si = i
143*2fe8fb19SBen Gras #define swap(a, b, t) t = a, a = b, b = t
144*2fe8fb19SBen Gras
145*2fe8fb19SBen Gras /* Unstable, in-place sort. */
146*2fe8fb19SBen Gras static void
r_sort_a(const u_char ** a,int n,int i,const u_char * tr,u_int endch)147*2fe8fb19SBen Gras r_sort_a(const u_char **a, int n, int i, const u_char *tr, u_int endch)
148*2fe8fb19SBen Gras {
149*2fe8fb19SBen Gras static u_int count[256], nc, bmin;
150*2fe8fb19SBen Gras u_int c;
151*2fe8fb19SBen Gras const u_char **ak, *r;
152*2fe8fb19SBen Gras stack s[SIZE], *sp, *sp0, *sp1, temp;
153*2fe8fb19SBen Gras u_int *cp, bigc;
154*2fe8fb19SBen Gras const u_char **an, *t, **aj, **top[256];
155*2fe8fb19SBen Gras
156*2fe8fb19SBen Gras _DIAGASSERT(a != NULL);
157*2fe8fb19SBen Gras _DIAGASSERT(tr != NULL);
158*2fe8fb19SBen Gras
159*2fe8fb19SBen Gras /* Set up stack. */
160*2fe8fb19SBen Gras sp = s;
161*2fe8fb19SBen Gras push(a, n, i);
162*2fe8fb19SBen Gras while (!empty(s)) {
163*2fe8fb19SBen Gras pop(a, n, i);
164*2fe8fb19SBen Gras if (n < THRESHOLD) {
165*2fe8fb19SBen Gras simplesort(a, n, i, tr, endch);
166*2fe8fb19SBen Gras continue;
167*2fe8fb19SBen Gras }
168*2fe8fb19SBen Gras an = a + n;
169*2fe8fb19SBen Gras
170*2fe8fb19SBen Gras /* Make character histogram. */
171*2fe8fb19SBen Gras if (nc == 0) {
172*2fe8fb19SBen Gras bmin = 255; /* First occupied bin, excluding eos. */
173*2fe8fb19SBen Gras for (ak = a; ak < an;) {
174*2fe8fb19SBen Gras c = tr[(*ak++)[i]];
175*2fe8fb19SBen Gras if (++count[c] == 1 && c != endch) {
176*2fe8fb19SBen Gras if (c < bmin)
177*2fe8fb19SBen Gras bmin = c;
178*2fe8fb19SBen Gras nc++;
179*2fe8fb19SBen Gras }
180*2fe8fb19SBen Gras }
181*2fe8fb19SBen Gras if (sp + nc > s + SIZE) { /* Get more stack. */
182*2fe8fb19SBen Gras r_sort_a(a, n, i, tr, endch);
183*2fe8fb19SBen Gras continue;
184*2fe8fb19SBen Gras }
185*2fe8fb19SBen Gras }
186*2fe8fb19SBen Gras
187*2fe8fb19SBen Gras /*
188*2fe8fb19SBen Gras * Set top[]; push incompletely sorted bins onto stack.
189*2fe8fb19SBen Gras * top[] = pointers to last out-of-place element in bins.
190*2fe8fb19SBen Gras * count[] = counts of elements in bins.
191*2fe8fb19SBen Gras * Before permuting: top[c-1] + count[c] = top[c];
192*2fe8fb19SBen Gras * during deal: top[c] counts down to top[c-1].
193*2fe8fb19SBen Gras */
194*2fe8fb19SBen Gras sp0 = sp1 = sp; /* Stack position of biggest bin. */
195*2fe8fb19SBen Gras bigc = 2; /* Size of biggest bin. */
196*2fe8fb19SBen Gras if (endch == 0) /* Special case: set top[eos]. */
197*2fe8fb19SBen Gras top[0] = ak = a + count[0];
198*2fe8fb19SBen Gras else {
199*2fe8fb19SBen Gras ak = a;
200*2fe8fb19SBen Gras top[255] = an;
201*2fe8fb19SBen Gras }
202*2fe8fb19SBen Gras for (cp = count + bmin; nc > 0; cp++) {
203*2fe8fb19SBen Gras while (*cp == 0) /* Find next non-empty pile. */
204*2fe8fb19SBen Gras cp++;
205*2fe8fb19SBen Gras if (*cp > 1) {
206*2fe8fb19SBen Gras if (*cp > bigc) {
207*2fe8fb19SBen Gras bigc = *cp;
208*2fe8fb19SBen Gras sp1 = sp;
209*2fe8fb19SBen Gras }
210*2fe8fb19SBen Gras push(ak, *cp, i+1);
211*2fe8fb19SBen Gras }
212*2fe8fb19SBen Gras top[cp-count] = ak += *cp;
213*2fe8fb19SBen Gras nc--;
214*2fe8fb19SBen Gras }
215*2fe8fb19SBen Gras swap(*sp0, *sp1, temp); /* Play it safe -- biggest bin last. */
216*2fe8fb19SBen Gras
217*2fe8fb19SBen Gras /*
218*2fe8fb19SBen Gras * Permute misplacements home. Already home: everything
219*2fe8fb19SBen Gras * before aj, and in bin[c], items from top[c] on.
220*2fe8fb19SBen Gras * Inner loop:
221*2fe8fb19SBen Gras * r = next element to put in place;
222*2fe8fb19SBen Gras * ak = top[r[i]] = location to put the next element.
223*2fe8fb19SBen Gras * aj = bottom of 1st disordered bin.
224*2fe8fb19SBen Gras * Outer loop:
225*2fe8fb19SBen Gras * Once the 1st disordered bin is done, ie. aj >= ak,
226*2fe8fb19SBen Gras * aj<-aj + count[c] connects the bins in a linked list;
227*2fe8fb19SBen Gras * reset count[c].
228*2fe8fb19SBen Gras */
229*2fe8fb19SBen Gras for (aj = a; aj < an; *aj = r, aj += count[c], count[c] = 0)
230*2fe8fb19SBen Gras for (r = *aj; aj < (ak = --top[c = tr[r[i]]]);)
231*2fe8fb19SBen Gras swap(*ak, r, t);
232*2fe8fb19SBen Gras }
233*2fe8fb19SBen Gras }
234*2fe8fb19SBen Gras
235*2fe8fb19SBen Gras /* Stable sort, requiring additional memory. */
236*2fe8fb19SBen Gras static void
r_sort_b(const u_char ** a,const u_char ** ta,int n,int i,const u_char * tr,u_int endch)237*2fe8fb19SBen Gras r_sort_b(const u_char **a, const u_char **ta, int n, int i, const u_char *tr,
238*2fe8fb19SBen Gras u_int endch)
239*2fe8fb19SBen Gras {
240*2fe8fb19SBen Gras static u_int count[256], nc, bmin;
241*2fe8fb19SBen Gras u_int c;
242*2fe8fb19SBen Gras const u_char **ak, **ai;
243*2fe8fb19SBen Gras stack s[512], *sp, *sp0, *sp1, temp;
244*2fe8fb19SBen Gras const u_char **top[256];
245*2fe8fb19SBen Gras u_int *cp, bigc;
246*2fe8fb19SBen Gras
247*2fe8fb19SBen Gras _DIAGASSERT(a != NULL);
248*2fe8fb19SBen Gras _DIAGASSERT(ta != NULL);
249*2fe8fb19SBen Gras _DIAGASSERT(tr != NULL);
250*2fe8fb19SBen Gras
251*2fe8fb19SBen Gras sp = s;
252*2fe8fb19SBen Gras push(a, n, i);
253*2fe8fb19SBen Gras while (!empty(s)) {
254*2fe8fb19SBen Gras pop(a, n, i);
255*2fe8fb19SBen Gras if (n < THRESHOLD) {
256*2fe8fb19SBen Gras simplesort(a, n, i, tr, endch);
257*2fe8fb19SBen Gras continue;
258*2fe8fb19SBen Gras }
259*2fe8fb19SBen Gras
260*2fe8fb19SBen Gras if (nc == 0) {
261*2fe8fb19SBen Gras bmin = 255;
262*2fe8fb19SBen Gras for (ak = a + n; --ak >= a;) {
263*2fe8fb19SBen Gras c = tr[(*ak)[i]];
264*2fe8fb19SBen Gras if (++count[c] == 1 && c != endch) {
265*2fe8fb19SBen Gras if (c < bmin)
266*2fe8fb19SBen Gras bmin = c;
267*2fe8fb19SBen Gras nc++;
268*2fe8fb19SBen Gras }
269*2fe8fb19SBen Gras }
270*2fe8fb19SBen Gras if (sp + nc > s + SIZE) {
271*2fe8fb19SBen Gras r_sort_b(a, ta, n, i, tr, endch);
272*2fe8fb19SBen Gras continue;
273*2fe8fb19SBen Gras }
274*2fe8fb19SBen Gras }
275*2fe8fb19SBen Gras
276*2fe8fb19SBen Gras sp0 = sp1 = sp;
277*2fe8fb19SBen Gras bigc = 2;
278*2fe8fb19SBen Gras if (endch == 0) {
279*2fe8fb19SBen Gras top[0] = ak = a + count[0];
280*2fe8fb19SBen Gras count[0] = 0;
281*2fe8fb19SBen Gras } else {
282*2fe8fb19SBen Gras ak = a;
283*2fe8fb19SBen Gras top[255] = a + n;
284*2fe8fb19SBen Gras count[255] = 0;
285*2fe8fb19SBen Gras }
286*2fe8fb19SBen Gras for (cp = count + bmin; nc > 0; cp++) {
287*2fe8fb19SBen Gras while (*cp == 0)
288*2fe8fb19SBen Gras cp++;
289*2fe8fb19SBen Gras if ((c = *cp) > 1) {
290*2fe8fb19SBen Gras if (c > bigc) {
291*2fe8fb19SBen Gras bigc = c;
292*2fe8fb19SBen Gras sp1 = sp;
293*2fe8fb19SBen Gras }
294*2fe8fb19SBen Gras push(ak, c, i+1);
295*2fe8fb19SBen Gras }
296*2fe8fb19SBen Gras top[cp-count] = ak += c;
297*2fe8fb19SBen Gras *cp = 0; /* Reset count[]. */
298*2fe8fb19SBen Gras nc--;
299*2fe8fb19SBen Gras }
300*2fe8fb19SBen Gras swap(*sp0, *sp1, temp);
301*2fe8fb19SBen Gras
302*2fe8fb19SBen Gras for (ak = ta + n, ai = a+n; ak > ta;) /* Copy to temp. */
303*2fe8fb19SBen Gras *--ak = *--ai;
304*2fe8fb19SBen Gras for (ak = ta+n; --ak >= ta;) /* Deal to piles. */
305*2fe8fb19SBen Gras *--top[tr[(*ak)[i]]] = *ak;
306*2fe8fb19SBen Gras }
307*2fe8fb19SBen Gras }
308*2fe8fb19SBen Gras
309*2fe8fb19SBen Gras /* insertion sort */
310*2fe8fb19SBen Gras static inline void
simplesort(const u_char ** a,int n,int b,const u_char * tr,u_int endch)311*2fe8fb19SBen Gras simplesort(const u_char **a, int n, int b, const u_char *tr, u_int endch)
312*2fe8fb19SBen Gras {
313*2fe8fb19SBen Gras u_char ch;
314*2fe8fb19SBen Gras const u_char **ak, **ai, *s, *t;
315*2fe8fb19SBen Gras
316*2fe8fb19SBen Gras _DIAGASSERT(a != NULL);
317*2fe8fb19SBen Gras _DIAGASSERT(tr != NULL);
318*2fe8fb19SBen Gras
319*2fe8fb19SBen Gras for (ak = a+1; --n >= 1; ak++)
320*2fe8fb19SBen Gras for (ai = ak; ai > a; ai--) {
321*2fe8fb19SBen Gras for (s = ai[0] + b, t = ai[-1] + b;
322*2fe8fb19SBen Gras (ch = tr[*s]) != endch; s++, t++)
323*2fe8fb19SBen Gras if (ch != tr[*t])
324*2fe8fb19SBen Gras break;
325*2fe8fb19SBen Gras if (ch >= tr[*t])
326*2fe8fb19SBen Gras break;
327*2fe8fb19SBen Gras swap(ai[0], ai[-1], s);
328*2fe8fb19SBen Gras }
329*2fe8fb19SBen Gras }
330