xref: /csrg-svn/contrib/sort/fsort.c (revision 60905)
1*60905Sbostic /*-
2*60905Sbostic  * Copyright (c) 1993 The Regents of the University of California.
3*60905Sbostic  * All rights reserved.
4*60905Sbostic  *
5*60905Sbostic  * This code is derived from software contributed to Berkeley by
6*60905Sbostic  * Peter McIlroy.
7*60905Sbostic  *
8*60905Sbostic  * %sccs.include.redist.c%
9*60905Sbostic  */
10*60905Sbostic 
11*60905Sbostic #ifndef lint
12*60905Sbostic static char sccsid[] = "@(#)fsort.c	5.1 (Berkeley) 06/01/93";
13*60905Sbostic #endif /* not lint */
14*60905Sbostic 
15*60905Sbostic /*
16*60905Sbostic  * Read in the next bin.  If it fits in one segment sort it;
17*60905Sbostic  * otherwise refine it by segment deeper by one character,
18*60905Sbostic  * and try again on smaller bins.  Sort the final bin at this level
19*60905Sbostic  * of recursion to keep the head of fstack at 0.
20*60905Sbostic  * After PANIC passes, abort to merge sort.
21*60905Sbostic */
22*60905Sbostic #include "sort.h"
23*60905Sbostic #include "fsort.h"
24*60905Sbostic 
25*60905Sbostic #include <stdlib.h>
26*60905Sbostic #include <string.h>
27*60905Sbostic 
28*60905Sbostic u_char **keylist = 0, *buffer = 0, *linebuf = 0;
29*60905Sbostic struct tempfile fstack[MAXFCT];
30*60905Sbostic extern char *toutpath;
31*60905Sbostic #define FSORTMAX 4
32*60905Sbostic int PANIC = FSORTMAX;
33*60905Sbostic 
34*60905Sbostic void
35*60905Sbostic fsort(binno, depth, infiles, nfiles, outfd, ftbl)
36*60905Sbostic 	register int binno, depth, nfiles;
37*60905Sbostic 	register union f_handle infiles;
38*60905Sbostic 	FILE *outfd;
39*60905Sbostic 	register struct field *ftbl;
40*60905Sbostic {
41*60905Sbostic 	register u_char *bufend, **keypos, *tmpbuf;
42*60905Sbostic 	u_char *weights;
43*60905Sbostic 	int ntfiles, mfct = 0, total, i, maxb, lastb, panic = 0;
44*60905Sbostic 	register int c, nelem;
45*60905Sbostic 	long sizes [NBINS+1];
46*60905Sbostic 	union f_handle tfiles, mstart = {MAXFCT-16};
47*60905Sbostic 	register int (*get)(int, union f_handle, int, RECHEADER *,
48*60905Sbostic 		u_char *, struct field *);
49*60905Sbostic 	register struct recheader *crec;
50*60905Sbostic 	struct field tfield[2];
51*60905Sbostic 	FILE *prevfd, *tailfd[FSORTMAX+1];
52*60905Sbostic 
53*60905Sbostic 	memset(tailfd, 0, sizeof(tailfd));
54*60905Sbostic 	prevfd = outfd;
55*60905Sbostic 	memset(tfield, 0, sizeof(tfield));
56*60905Sbostic 	if (ftbl[0].flags & R)
57*60905Sbostic 		tfield[0].weights = Rascii;
58*60905Sbostic 	else
59*60905Sbostic 		tfield[0].weights = ascii;
60*60905Sbostic 	tfield[0].icol.num = 1;
61*60905Sbostic 	weights = ftbl[0].weights;
62*60905Sbostic 	if (!buffer) {
63*60905Sbostic 		buffer = malloc(BUFSIZE);
64*60905Sbostic 		keylist = malloc(MAXNUM * sizeof(u_char *));
65*60905Sbostic 		if (!SINGL_FLD)
66*60905Sbostic 			linebuf = malloc(MAXLLEN);
67*60905Sbostic 	}
68*60905Sbostic 	bufend = buffer + BUFSIZE;
69*60905Sbostic 	if (binno >= 0) {
70*60905Sbostic 		tfiles.top = infiles.top + nfiles;
71*60905Sbostic 		get = getnext;
72*60905Sbostic 	} else {
73*60905Sbostic 		tfiles.top = 0;
74*60905Sbostic 		if (SINGL_FLD)
75*60905Sbostic 			get = makeline;
76*60905Sbostic 		else
77*60905Sbostic 			get = makekey;
78*60905Sbostic 	}
79*60905Sbostic 	for (;;) {
80*60905Sbostic 		memset(sizes, 0, sizeof(sizes));
81*60905Sbostic 		c = ntfiles = 0;
82*60905Sbostic 		if (binno == weights[REC_D] &&
83*60905Sbostic 		    !(SINGL_FLD && ftbl[0].flags & F)) {	/* pop */
84*60905Sbostic 			rd_append(weights[REC_D],
85*60905Sbostic 			    infiles, nfiles, prevfd, buffer, bufend);
86*60905Sbostic 			break;
87*60905Sbostic 		} else if (binno == weights[REC_D]) {
88*60905Sbostic 			depth = 0;		/* start over on flat weights */
89*60905Sbostic 			ftbl = tfield;
90*60905Sbostic 			weights = ftbl[0].weights;
91*60905Sbostic 		}
92*60905Sbostic 		while (c != EOF) {
93*60905Sbostic 			keypos = keylist;
94*60905Sbostic 			nelem = 0;
95*60905Sbostic 			crec = (RECHEADER *) buffer;
96*60905Sbostic 			while((c = get(binno, infiles, nfiles, crec, bufend,
97*60905Sbostic 			    ftbl)) == 0) {
98*60905Sbostic 				*keypos++ = crec->data + depth;
99*60905Sbostic 				if (++nelem == MAXNUM) {
100*60905Sbostic 					c = BUFFEND;
101*60905Sbostic 					break;
102*60905Sbostic 				}
103*60905Sbostic 				crec =(RECHEADER *)	((char *) crec +
104*60905Sbostic 				SALIGN(crec->length) + sizeof(TRECHEADER));
105*60905Sbostic 			}
106*60905Sbostic 			if (c == BUFFEND || ntfiles || mfct) {	/* push */
107*60905Sbostic 				if (panic >= PANIC) {
108*60905Sbostic 					fstack[MAXFCT-16+mfct].fd = ftmp();
109*60905Sbostic 					if (radixsort(keylist, nelem, weights,
110*60905Sbostic 					    REC_D))
111*60905Sbostic 						err(2, NULL);
112*60905Sbostic 					append(keylist, nelem, depth, fstack[
113*60905Sbostic 					 MAXFCT-16+mfct].fd, putrec, ftbl);
114*60905Sbostic 					mfct++;
115*60905Sbostic 					/* reduce number of open files */
116*60905Sbostic 					if (mfct == 16 ||(c == EOF && ntfiles)) {
117*60905Sbostic 						tmpbuf = malloc(bufend -
118*60905Sbostic 						    crec->data);
119*60905Sbostic 						memmove(tmpbuf, crec->data,
120*60905Sbostic 						    bufend - crec->data);
121*60905Sbostic 						fstack[tfiles.top + ntfiles].fd
122*60905Sbostic 						    = ftmp();
123*60905Sbostic 						fmerge(0, mstart, mfct, geteasy,
124*60905Sbostic 						  fstack[tfiles.top+ntfiles].fd,
125*60905Sbostic 						  putrec, ftbl);
126*60905Sbostic 						++ntfiles;
127*60905Sbostic 						mfct = 0;
128*60905Sbostic 						memmove(crec->data, tmpbuf,
129*60905Sbostic 						    bufend - crec->data);
130*60905Sbostic 						free(tmpbuf);
131*60905Sbostic 					}
132*60905Sbostic 				} else {
133*60905Sbostic 					fstack[tfiles.top + ntfiles].fd= ftmp();
134*60905Sbostic 					onepass(keylist, depth, nelem, sizes,
135*60905Sbostic 					weights, fstack[tfiles.top+ntfiles].fd);
136*60905Sbostic 					++ntfiles;
137*60905Sbostic 				}
138*60905Sbostic 			}
139*60905Sbostic 		}
140*60905Sbostic 		get = getnext;
141*60905Sbostic 		if (!ntfiles && !mfct) {	/* everything in memory--pop */
142*60905Sbostic 			if (nelem > 1)
143*60905Sbostic 			   if (radixsort(keylist, nelem, weights, REC_D))
144*60905Sbostic 				err(2, NULL);
145*60905Sbostic 			append(keylist, nelem, depth, outfd, putline, ftbl);
146*60905Sbostic 			break;					/* pop */
147*60905Sbostic 		}
148*60905Sbostic 		if (panic >= PANIC) {
149*60905Sbostic 			if (!ntfiles)
150*60905Sbostic 				fmerge(0, mstart, mfct, geteasy,
151*60905Sbostic 				    outfd, putline, ftbl);
152*60905Sbostic 			else
153*60905Sbostic 				fmerge(0, tfiles, ntfiles, geteasy,
154*60905Sbostic 				    outfd, putline, ftbl);
155*60905Sbostic 			break;
156*60905Sbostic 
157*60905Sbostic 		}
158*60905Sbostic 		total = maxb = lastb = 0;	/* find if one bin dominates */
159*60905Sbostic 		for (i = 0; i < NBINS; i++)
160*60905Sbostic 		  if (sizes[i]) {
161*60905Sbostic 			if (sizes[i] > sizes[maxb])
162*60905Sbostic 				maxb = i;
163*60905Sbostic 			lastb = i;
164*60905Sbostic 			total += sizes[i];
165*60905Sbostic 		}
166*60905Sbostic 		if (sizes[maxb] < max((total / 2) , BUFSIZE))
167*60905Sbostic 			maxb = lastb;	/* otherwise pop after last bin */
168*60905Sbostic 		fstack[tfiles.top].lastb = lastb;
169*60905Sbostic 		fstack[tfiles.top].maxb = maxb;
170*60905Sbostic 
171*60905Sbostic 			/* start refining next level. */
172*60905Sbostic 		get(-1, tfiles, ntfiles, crec, bufend, 0);	/* rewind */
173*60905Sbostic 		for (i = 0; i < maxb; i++) {
174*60905Sbostic 			if (!sizes[i])	/* bin empty; step ahead file offset */
175*60905Sbostic 				get(i, tfiles, ntfiles, crec, bufend, 0);
176*60905Sbostic 			else
177*60905Sbostic 				fsort(i, depth+1, tfiles, ntfiles, outfd, ftbl);
178*60905Sbostic 		}
179*60905Sbostic 		if (lastb != maxb) {
180*60905Sbostic 			if (prevfd != outfd)
181*60905Sbostic 				tailfd[panic] = prevfd;
182*60905Sbostic 			prevfd = ftmp();
183*60905Sbostic 			for (i = maxb+1; i <= lastb; i++)
184*60905Sbostic 				if (!sizes[i])
185*60905Sbostic 					get(i, tfiles, ntfiles, crec, bufend,0);
186*60905Sbostic 				else
187*60905Sbostic 					fsort(i, depth+1, tfiles, ntfiles,
188*60905Sbostic 					    prevfd, ftbl);
189*60905Sbostic 		}
190*60905Sbostic 
191*60905Sbostic 		/* sort biggest (or last) bin at this level */
192*60905Sbostic 		depth++;
193*60905Sbostic 		panic++;
194*60905Sbostic 		binno = maxb;
195*60905Sbostic 		infiles.top = tfiles.top;	/* getnext will free tfiles, */
196*60905Sbostic 		nfiles = ntfiles;		/* so overwrite them */
197*60905Sbostic 	}
198*60905Sbostic 	if (prevfd != outfd) {
199*60905Sbostic 		concat(outfd, prevfd);
200*60905Sbostic 		fclose(prevfd);
201*60905Sbostic 	}
202*60905Sbostic 	for (i = panic; i >= 0; --i)
203*60905Sbostic 		if (tailfd[i]) {
204*60905Sbostic 			concat(outfd, tailfd[i]);
205*60905Sbostic 			fclose(tailfd[i]);
206*60905Sbostic 		}
207*60905Sbostic }
208*60905Sbostic 
209*60905Sbostic /*
210*60905Sbostic  This is one pass of radix exchange, dumping the bins to disk.
211*60905Sbostic  */
212*60905Sbostic #define swap(a, b, t) t = a, a = b, b = t
213*60905Sbostic void
214*60905Sbostic onepass(a, depth, n, sizes, tr, fd)
215*60905Sbostic 	u_char **a;
216*60905Sbostic 	int depth;
217*60905Sbostic 	long n, sizes[];
218*60905Sbostic 	u_char *tr;
219*60905Sbostic 	FILE *fd;
220*60905Sbostic {
221*60905Sbostic 	long tsizes[NBINS+1];
222*60905Sbostic 	u_char **bin[257], **top[256], ***bp, ***bpmax, ***tp;
223*60905Sbostic 	static histo[256];
224*60905Sbostic 	int *hp;
225*60905Sbostic 	register int c;
226*60905Sbostic 	u_char **an, *t, **aj;
227*60905Sbostic 	register u_char **ak, *r;
228*60905Sbostic 
229*60905Sbostic 	memset(tsizes, 0, sizeof(tsizes));
230*60905Sbostic 	depth += sizeof(TRECHEADER);
231*60905Sbostic 	an = a + n;
232*60905Sbostic 	for (ak = a; ak < an; ak++) {
233*60905Sbostic 		histo[c = tr[**ak]]++;
234*60905Sbostic 		tsizes[c] += ((RECHEADER *) (*ak -= depth))->length;
235*60905Sbostic 	}
236*60905Sbostic 
237*60905Sbostic 	bin[0] = a;
238*60905Sbostic 	bpmax = bin + 256;
239*60905Sbostic 	tp = top, hp = histo;
240*60905Sbostic 	for (bp = bin; bp < bpmax; bp++) {
241*60905Sbostic 		*tp++ = *(bp+1) = *bp + (c = *hp);
242*60905Sbostic 		*hp++ = 0;
243*60905Sbostic 		if (c <= 1)
244*60905Sbostic 			continue;
245*60905Sbostic 	}
246*60905Sbostic 	for(aj = a; aj < an; *aj = r, aj = bin[c+1])
247*60905Sbostic 		for(r = *aj; aj < (ak = --top[c = tr[r[depth]]]) ;)
248*60905Sbostic 			swap(*ak, r, t);
249*60905Sbostic 
250*60905Sbostic 	for (ak = a, c = 0; c < 256; c++) {
251*60905Sbostic 		an = bin[c+1];
252*60905Sbostic 		n = an - ak;
253*60905Sbostic 		tsizes[c] += n * sizeof(TRECHEADER);
254*60905Sbostic 		/* tell getnext how many elements in this bin, this segment. */
255*60905Sbostic 		EWRITE(tsizes+c, sizeof(long), 1, fd);
256*60905Sbostic 		sizes[c] += tsizes[c];
257*60905Sbostic 		for (; ak < an; ++ak)
258*60905Sbostic 			putrec((RECHEADER *) *ak, fd);
259*60905Sbostic 	}
260*60905Sbostic }
261