xref: /csrg-svn/contrib/sort/fsort.c (revision 62247)
160905Sbostic /*-
2*62247Sbostic  * Copyright (c) 1993
3*62247Sbostic  *	The Regents of the University of California.  All rights reserved.
460905Sbostic  *
560905Sbostic  * This code is derived from software contributed to Berkeley by
660905Sbostic  * Peter McIlroy.
760905Sbostic  *
860905Sbostic  * %sccs.include.redist.c%
960905Sbostic  */
1060905Sbostic 
1160905Sbostic #ifndef lint
12*62247Sbostic static char sccsid[] = "@(#)fsort.c	8.1 (Berkeley) 06/06/93";
1360905Sbostic #endif /* not lint */
1460905Sbostic 
1560905Sbostic /*
1660905Sbostic  * Read in the next bin.  If it fits in one segment sort it;
1760905Sbostic  * otherwise refine it by segment deeper by one character,
1860905Sbostic  * and try again on smaller bins.  Sort the final bin at this level
1960905Sbostic  * of recursion to keep the head of fstack at 0.
2060905Sbostic  * After PANIC passes, abort to merge sort.
2160905Sbostic */
2260905Sbostic #include "sort.h"
2360905Sbostic #include "fsort.h"
2460905Sbostic 
2560905Sbostic #include <stdlib.h>
2660905Sbostic #include <string.h>
2760905Sbostic 
2860905Sbostic u_char **keylist = 0, *buffer = 0, *linebuf = 0;
2960905Sbostic struct tempfile fstack[MAXFCT];
3060905Sbostic extern char *toutpath;
3160905Sbostic #define FSORTMAX 4
3260905Sbostic int PANIC = FSORTMAX;
3360905Sbostic 
3460905Sbostic void
fsort(binno,depth,infiles,nfiles,outfd,ftbl)3560905Sbostic fsort(binno, depth, infiles, nfiles, outfd, ftbl)
3660905Sbostic 	register int binno, depth, nfiles;
3760905Sbostic 	register union f_handle infiles;
3860905Sbostic 	FILE *outfd;
3960905Sbostic 	register struct field *ftbl;
4060905Sbostic {
4160905Sbostic 	register u_char *bufend, **keypos, *tmpbuf;
4260905Sbostic 	u_char *weights;
4360905Sbostic 	int ntfiles, mfct = 0, total, i, maxb, lastb, panic = 0;
4460905Sbostic 	register int c, nelem;
4560905Sbostic 	long sizes [NBINS+1];
4660905Sbostic 	union f_handle tfiles, mstart = {MAXFCT-16};
4760905Sbostic 	register int (*get)(int, union f_handle, int, RECHEADER *,
4860905Sbostic 		u_char *, struct field *);
4960905Sbostic 	register struct recheader *crec;
5060905Sbostic 	struct field tfield[2];
5160905Sbostic 	FILE *prevfd, *tailfd[FSORTMAX+1];
5260905Sbostic 
5360905Sbostic 	memset(tailfd, 0, sizeof(tailfd));
5460905Sbostic 	prevfd = outfd;
5560905Sbostic 	memset(tfield, 0, sizeof(tfield));
5660905Sbostic 	if (ftbl[0].flags & R)
5760905Sbostic 		tfield[0].weights = Rascii;
5860905Sbostic 	else
5960905Sbostic 		tfield[0].weights = ascii;
6060905Sbostic 	tfield[0].icol.num = 1;
6160905Sbostic 	weights = ftbl[0].weights;
6260905Sbostic 	if (!buffer) {
6360905Sbostic 		buffer = malloc(BUFSIZE);
6460905Sbostic 		keylist = malloc(MAXNUM * sizeof(u_char *));
6560905Sbostic 		if (!SINGL_FLD)
6660905Sbostic 			linebuf = malloc(MAXLLEN);
6760905Sbostic 	}
6860905Sbostic 	bufend = buffer + BUFSIZE;
6960905Sbostic 	if (binno >= 0) {
7060905Sbostic 		tfiles.top = infiles.top + nfiles;
7160905Sbostic 		get = getnext;
7260905Sbostic 	} else {
7360905Sbostic 		tfiles.top = 0;
7460905Sbostic 		if (SINGL_FLD)
7560905Sbostic 			get = makeline;
7660905Sbostic 		else
7760905Sbostic 			get = makekey;
7860905Sbostic 	}
7960905Sbostic 	for (;;) {
8060905Sbostic 		memset(sizes, 0, sizeof(sizes));
8160905Sbostic 		c = ntfiles = 0;
8260905Sbostic 		if (binno == weights[REC_D] &&
8360905Sbostic 		    !(SINGL_FLD && ftbl[0].flags & F)) {	/* pop */
8460905Sbostic 			rd_append(weights[REC_D],
8560905Sbostic 			    infiles, nfiles, prevfd, buffer, bufend);
8660905Sbostic 			break;
8760905Sbostic 		} else if (binno == weights[REC_D]) {
8860905Sbostic 			depth = 0;		/* start over on flat weights */
8960905Sbostic 			ftbl = tfield;
9060905Sbostic 			weights = ftbl[0].weights;
9160905Sbostic 		}
9260905Sbostic 		while (c != EOF) {
9360905Sbostic 			keypos = keylist;
9460905Sbostic 			nelem = 0;
9560905Sbostic 			crec = (RECHEADER *) buffer;
9660905Sbostic 			while((c = get(binno, infiles, nfiles, crec, bufend,
9760905Sbostic 			    ftbl)) == 0) {
9860905Sbostic 				*keypos++ = crec->data + depth;
9960905Sbostic 				if (++nelem == MAXNUM) {
10060905Sbostic 					c = BUFFEND;
10160905Sbostic 					break;
10260905Sbostic 				}
10360905Sbostic 				crec =(RECHEADER *)	((char *) crec +
10460905Sbostic 				SALIGN(crec->length) + sizeof(TRECHEADER));
10560905Sbostic 			}
10660905Sbostic 			if (c == BUFFEND || ntfiles || mfct) {	/* push */
10760905Sbostic 				if (panic >= PANIC) {
10860905Sbostic 					fstack[MAXFCT-16+mfct].fd = ftmp();
10960905Sbostic 					if (radixsort(keylist, nelem, weights,
11060905Sbostic 					    REC_D))
11160905Sbostic 						err(2, NULL);
11260905Sbostic 					append(keylist, nelem, depth, fstack[
11360905Sbostic 					 MAXFCT-16+mfct].fd, putrec, ftbl);
11460905Sbostic 					mfct++;
11560905Sbostic 					/* reduce number of open files */
11660905Sbostic 					if (mfct == 16 ||(c == EOF && ntfiles)) {
11760905Sbostic 						tmpbuf = malloc(bufend -
11860905Sbostic 						    crec->data);
11960905Sbostic 						memmove(tmpbuf, crec->data,
12060905Sbostic 						    bufend - crec->data);
12160905Sbostic 						fstack[tfiles.top + ntfiles].fd
12260905Sbostic 						    = ftmp();
12360905Sbostic 						fmerge(0, mstart, mfct, geteasy,
12460905Sbostic 						  fstack[tfiles.top+ntfiles].fd,
12560905Sbostic 						  putrec, ftbl);
12660905Sbostic 						++ntfiles;
12760905Sbostic 						mfct = 0;
12860905Sbostic 						memmove(crec->data, tmpbuf,
12960905Sbostic 						    bufend - crec->data);
13060905Sbostic 						free(tmpbuf);
13160905Sbostic 					}
13260905Sbostic 				} else {
13360905Sbostic 					fstack[tfiles.top + ntfiles].fd= ftmp();
13460905Sbostic 					onepass(keylist, depth, nelem, sizes,
13560905Sbostic 					weights, fstack[tfiles.top+ntfiles].fd);
13660905Sbostic 					++ntfiles;
13760905Sbostic 				}
13860905Sbostic 			}
13960905Sbostic 		}
14060905Sbostic 		get = getnext;
14160905Sbostic 		if (!ntfiles && !mfct) {	/* everything in memory--pop */
14260905Sbostic 			if (nelem > 1)
14360905Sbostic 			   if (radixsort(keylist, nelem, weights, REC_D))
14460905Sbostic 				err(2, NULL);
14560905Sbostic 			append(keylist, nelem, depth, outfd, putline, ftbl);
14660905Sbostic 			break;					/* pop */
14760905Sbostic 		}
14860905Sbostic 		if (panic >= PANIC) {
14960905Sbostic 			if (!ntfiles)
15060905Sbostic 				fmerge(0, mstart, mfct, geteasy,
15160905Sbostic 				    outfd, putline, ftbl);
15260905Sbostic 			else
15360905Sbostic 				fmerge(0, tfiles, ntfiles, geteasy,
15460905Sbostic 				    outfd, putline, ftbl);
15560905Sbostic 			break;
15660905Sbostic 
15760905Sbostic 		}
15860905Sbostic 		total = maxb = lastb = 0;	/* find if one bin dominates */
15960905Sbostic 		for (i = 0; i < NBINS; i++)
16060905Sbostic 		  if (sizes[i]) {
16160905Sbostic 			if (sizes[i] > sizes[maxb])
16260905Sbostic 				maxb = i;
16360905Sbostic 			lastb = i;
16460905Sbostic 			total += sizes[i];
16560905Sbostic 		}
16660905Sbostic 		if (sizes[maxb] < max((total / 2) , BUFSIZE))
16760905Sbostic 			maxb = lastb;	/* otherwise pop after last bin */
16860905Sbostic 		fstack[tfiles.top].lastb = lastb;
16960905Sbostic 		fstack[tfiles.top].maxb = maxb;
17060905Sbostic 
17160905Sbostic 			/* start refining next level. */
17260905Sbostic 		get(-1, tfiles, ntfiles, crec, bufend, 0);	/* rewind */
17360905Sbostic 		for (i = 0; i < maxb; i++) {
17460905Sbostic 			if (!sizes[i])	/* bin empty; step ahead file offset */
17560905Sbostic 				get(i, tfiles, ntfiles, crec, bufend, 0);
17660905Sbostic 			else
17760905Sbostic 				fsort(i, depth+1, tfiles, ntfiles, outfd, ftbl);
17860905Sbostic 		}
17960905Sbostic 		if (lastb != maxb) {
18060905Sbostic 			if (prevfd != outfd)
18160905Sbostic 				tailfd[panic] = prevfd;
18260905Sbostic 			prevfd = ftmp();
18360905Sbostic 			for (i = maxb+1; i <= lastb; i++)
18460905Sbostic 				if (!sizes[i])
18560905Sbostic 					get(i, tfiles, ntfiles, crec, bufend,0);
18660905Sbostic 				else
18760905Sbostic 					fsort(i, depth+1, tfiles, ntfiles,
18860905Sbostic 					    prevfd, ftbl);
18960905Sbostic 		}
19060905Sbostic 
19160905Sbostic 		/* sort biggest (or last) bin at this level */
19260905Sbostic 		depth++;
19360905Sbostic 		panic++;
19460905Sbostic 		binno = maxb;
19560905Sbostic 		infiles.top = tfiles.top;	/* getnext will free tfiles, */
19660905Sbostic 		nfiles = ntfiles;		/* so overwrite them */
19760905Sbostic 	}
19860905Sbostic 	if (prevfd != outfd) {
19960905Sbostic 		concat(outfd, prevfd);
20060905Sbostic 		fclose(prevfd);
20160905Sbostic 	}
20260905Sbostic 	for (i = panic; i >= 0; --i)
20360905Sbostic 		if (tailfd[i]) {
20460905Sbostic 			concat(outfd, tailfd[i]);
20560905Sbostic 			fclose(tailfd[i]);
20660905Sbostic 		}
20760905Sbostic }
20860905Sbostic 
20960905Sbostic /*
21060905Sbostic  This is one pass of radix exchange, dumping the bins to disk.
21160905Sbostic  */
21260905Sbostic #define swap(a, b, t) t = a, a = b, b = t
21360905Sbostic void
onepass(a,depth,n,sizes,tr,fd)21460905Sbostic onepass(a, depth, n, sizes, tr, fd)
21560905Sbostic 	u_char **a;
21660905Sbostic 	int depth;
21760905Sbostic 	long n, sizes[];
21860905Sbostic 	u_char *tr;
21960905Sbostic 	FILE *fd;
22060905Sbostic {
22160905Sbostic 	long tsizes[NBINS+1];
22260905Sbostic 	u_char **bin[257], **top[256], ***bp, ***bpmax, ***tp;
22360905Sbostic 	static histo[256];
22460905Sbostic 	int *hp;
22560905Sbostic 	register int c;
22660905Sbostic 	u_char **an, *t, **aj;
22760905Sbostic 	register u_char **ak, *r;
22860905Sbostic 
22960905Sbostic 	memset(tsizes, 0, sizeof(tsizes));
23060905Sbostic 	depth += sizeof(TRECHEADER);
23160905Sbostic 	an = a + n;
23260905Sbostic 	for (ak = a; ak < an; ak++) {
23360905Sbostic 		histo[c = tr[**ak]]++;
23460905Sbostic 		tsizes[c] += ((RECHEADER *) (*ak -= depth))->length;
23560905Sbostic 	}
23660905Sbostic 
23760905Sbostic 	bin[0] = a;
23860905Sbostic 	bpmax = bin + 256;
23960905Sbostic 	tp = top, hp = histo;
24060905Sbostic 	for (bp = bin; bp < bpmax; bp++) {
24160905Sbostic 		*tp++ = *(bp+1) = *bp + (c = *hp);
24260905Sbostic 		*hp++ = 0;
24360905Sbostic 		if (c <= 1)
24460905Sbostic 			continue;
24560905Sbostic 	}
24660905Sbostic 	for(aj = a; aj < an; *aj = r, aj = bin[c+1])
24760905Sbostic 		for(r = *aj; aj < (ak = --top[c = tr[r[depth]]]) ;)
24860905Sbostic 			swap(*ak, r, t);
24960905Sbostic 
25060905Sbostic 	for (ak = a, c = 0; c < 256; c++) {
25160905Sbostic 		an = bin[c+1];
25260905Sbostic 		n = an - ak;
25360905Sbostic 		tsizes[c] += n * sizeof(TRECHEADER);
25460905Sbostic 		/* tell getnext how many elements in this bin, this segment. */
25560905Sbostic 		EWRITE(tsizes+c, sizeof(long), 1, fd);
25660905Sbostic 		sizes[c] += tsizes[c];
25760905Sbostic 		for (; ak < an; ++ak)
25860905Sbostic 			putrec((RECHEADER *) *ak, fd);
25960905Sbostic 	}
26060905Sbostic }
261