xref: /csrg-svn/contrib/sort/msort.c (revision 62247)
160908Sbostic /*-
2*62247Sbostic  * Copyright (c) 1993
3*62247Sbostic  *	The Regents of the University of California.  All rights reserved.
460908Sbostic  *
560908Sbostic  * This code is derived from software contributed to Berkeley by
660908Sbostic  * Peter McIlroy.
760908Sbostic  *
860908Sbostic  * %sccs.include.redist.c%
960908Sbostic  */
1060908Sbostic 
1160908Sbostic #ifndef lint
12*62247Sbostic static char sccsid[] = "@(#)msort.c	8.1 (Berkeley) 06/06/93";
1360908Sbostic #endif /* not lint */
1460908Sbostic 
1560908Sbostic #include "sort.h"
1660908Sbostic #include "fsort.h"
1760908Sbostic 
1860908Sbostic #include <stdlib.h>
1960908Sbostic #include <string.h>
2060908Sbostic #include <unistd.h>
2160908Sbostic 
2260908Sbostic /* Subroutines using comparisons: merge sort and check order */
2360908Sbostic #define DELETE (1)
2460908Sbostic #define LALIGN(n) ((n+3) & ~3)
2560908Sbostic 
2660908Sbostic typedef struct mfile {
2760908Sbostic 	u_char *end;
2860908Sbostic 	short flno;
2960908Sbostic 	struct recheader rec[1];
3060908Sbostic } MFILE;
3160908Sbostic typedef struct tmfile {
3260908Sbostic 	u_char *end;
3360908Sbostic 	short flno;
3460908Sbostic 	struct trecheader rec[1];
3560908Sbostic } TMFILE;
3660908Sbostic u_char *wts, *wts1 = 0;
3760908Sbostic struct mfile *cfilebuf;
3860908Sbostic 
3960908Sbostic static int cmp __P((struct recheader *, struct recheader *));
4060908Sbostic static int insert __P((struct mfile **, struct mfile **, int, int));
4160908Sbostic 
4260908Sbostic void
fmerge(binno,files,nfiles,get,outfd,fput,ftbl)4360908Sbostic fmerge(binno, files, nfiles, get, outfd, fput, ftbl)
4460908Sbostic 	union f_handle files;
4560908Sbostic 	int binno, nfiles;
4660908Sbostic 	int (*get)();
4760908Sbostic 	FILE *outfd;
4860908Sbostic 	void (*fput)();
4960908Sbostic 	struct field *ftbl;
5060908Sbostic {
5160908Sbostic 	FILE *tout;
5260908Sbostic 	int i, j, last;
5360908Sbostic 	void (*put)(struct recheader *, FILE *);
5460908Sbostic 	extern int geteasy();
5560908Sbostic 	struct tempfile *l_fstack;
5660908Sbostic 
5760908Sbostic 	wts = ftbl->weights;
5860908Sbostic 	if (!UNIQUE && SINGL_FLD && ftbl->flags & F)
5960908Sbostic 		wts1 = (ftbl->flags & R) ? Rascii : ascii;
6060908Sbostic 	if (!cfilebuf)
6160908Sbostic 		cfilebuf = malloc(MAXLLEN + sizeof(TMFILE));
6260908Sbostic 
6360908Sbostic 	i = min(16, nfiles) * LALIGN(MAXLLEN+sizeof(TMFILE));
6460908Sbostic 	if (!buffer || i > BUFSIZE) {
6560908Sbostic 		buffer = buffer ? realloc(buffer, i) : malloc(i);
6660908Sbostic 		if (!buffer)
6760908Sbostic 			err(2, NULL);
6860908Sbostic 		if (!SINGL_FLD)
6960908Sbostic 			linebuf = malloc(MAXLLEN);
7060908Sbostic 	}
7160908Sbostic 
7260908Sbostic 	if (binno >= 0)
7360908Sbostic 		l_fstack = fstack + files.top;
7460908Sbostic 	else
7560908Sbostic 		l_fstack = fstack;
7660908Sbostic 	while (nfiles) {
7760908Sbostic 		put = putrec;
7860908Sbostic 		for (j = 0; j < nfiles; j += 16) {
7960908Sbostic 			if (nfiles <= 16) {
8060908Sbostic 				tout = outfd;
8160908Sbostic 				put = fput;
8260908Sbostic 			}
8360908Sbostic 			else
8460908Sbostic 				tout = ftmp();
8560908Sbostic 			last = min(16, nfiles - j);
8660908Sbostic 			if (binno < 0) {
8760908Sbostic 				for (i = 0; i < last; i++)
8860908Sbostic 					if (!(l_fstack[i+MAXFCT-1-16].fd =
8960908Sbostic 					    fopen(files.names[j + i], "r")))
9060908Sbostic 						err(2, "%s", files.names[j+i]);
9160908Sbostic 				merge(MAXFCT-1-16, last, get, tout, put, ftbl);
9260908Sbostic 			}
9360908Sbostic 			else {
9460908Sbostic 				for (i = 0; i< last; i++)
9560908Sbostic 					rewind(l_fstack[i+j].fd);
9660908Sbostic 				merge(files.top+j, last, get, tout, put, ftbl);
9760908Sbostic 			}
9860908Sbostic 			if (nfiles > 16) l_fstack[j/16].fd = tout;
9960908Sbostic 		}
10060908Sbostic 		nfiles = (nfiles + 15) / 16;
10160908Sbostic 		if (nfiles == 1)
10260908Sbostic 			nfiles = 0;
10360908Sbostic 		if (binno < 0) {
10460908Sbostic 			binno = 0;
10560908Sbostic 			get = geteasy;
10660908Sbostic 			files.top = 0;
10760908Sbostic 		}
10860908Sbostic 	}
10960908Sbostic }
11060908Sbostic 
11160908Sbostic void
merge(infl0,nfiles,get,outfd,put,ftbl)11260908Sbostic merge(infl0, nfiles, get, outfd, put, ftbl)
11360908Sbostic 	int infl0, nfiles;
11460908Sbostic 	int (*get)();
11560908Sbostic 	void (*put)(struct recheader *, FILE *);
11660908Sbostic 	FILE *outfd;
11760908Sbostic 	struct field *ftbl;
11860908Sbostic {
11960908Sbostic 	int c, i, j;
12060908Sbostic 	union f_handle dummy = {0};
12160908Sbostic 	struct mfile *flist[16], *cfile;
12260908Sbostic 	for (i = j = 0; i < nfiles; i++) {
12360908Sbostic 		cfile = (MFILE *) (buffer +
12460908Sbostic 		    i * LALIGN(MAXLLEN + sizeof(TMFILE)));
12560908Sbostic 		cfile->flno = j + infl0;
12660908Sbostic 		cfile->end = cfile->rec->data + MAXLLEN;
12760908Sbostic 		for (c = 1; c == 1;) {
12860908Sbostic 			if (EOF == (c = get(j+infl0, dummy, nfiles,
12960908Sbostic 			   cfile->rec, cfile->end, ftbl))) {
13060908Sbostic 				--i;
13160908Sbostic 				--nfiles;
13260908Sbostic 				break;
13360908Sbostic 			}
13460908Sbostic 			if (i)
13560908Sbostic 				c = insert(flist, &cfile, i, !DELETE);
13660908Sbostic 			else
13760908Sbostic 				flist[0] = cfile;
13860908Sbostic 		}
13960908Sbostic 		j++;
14060908Sbostic 	}
14160908Sbostic 	cfile = cfilebuf;
14260908Sbostic 	cfile->flno = flist[0]->flno;
14360908Sbostic 	cfile->end = cfile->rec->data + MAXLLEN;
14460908Sbostic 	while (nfiles) {
14560908Sbostic 		for (c = 1; c == 1;) {
14660908Sbostic 			if (EOF == (c = get(cfile->flno, dummy, nfiles,
14760908Sbostic 			   cfile->rec, cfile->end, ftbl))) {
14860908Sbostic 				put(flist[0]->rec, outfd);
14960908Sbostic 				memmove(flist, flist + 1,
15060908Sbostic 				    sizeof(MFILE *) * (--nfiles));
15160908Sbostic 				cfile->flno = flist[0]->flno;
15260908Sbostic 				break;
15360908Sbostic 			}
15460908Sbostic 			if (!(c = insert(flist, &cfile, nfiles, DELETE)))
15560908Sbostic 				put(cfile->rec, outfd);
15660908Sbostic 		}
15760908Sbostic 	}
15860908Sbostic }
15960908Sbostic 
16060908Sbostic /*
16160908Sbostic  * if delete: inserts *rec in flist, deletes flist[0], and leaves it in *rec;
16260908Sbostic  * otherwise just inserts *rec in flist.
16360908Sbostic */
16460908Sbostic static int
insert(flist,rec,ttop,delete)16560908Sbostic insert(flist, rec, ttop, delete)
16660908Sbostic 	struct mfile **flist, **rec;
16760908Sbostic 	int delete, ttop;			/* delete = 0 or 1 */
16860908Sbostic {
16960908Sbostic 	register struct mfile *tmprec;
17060908Sbostic 	register int top, mid, bot = 0, cmpv = 1;
17160908Sbostic 	tmprec = *rec;
17260908Sbostic 	top = ttop;
17360908Sbostic 	for (mid = top/2; bot +1 != top; mid = (bot+top)/2) {
17460908Sbostic 		cmpv = cmp(tmprec->rec, flist[mid]->rec);
17560908Sbostic 		if (cmpv < 0)
17660908Sbostic 			top = mid;
17760908Sbostic 		else if (cmpv > 0)
17860908Sbostic 			bot = mid;
17960908Sbostic 		else {
18060908Sbostic 			if (!UNIQUE)
18160908Sbostic 				bot = mid - 1;
18260908Sbostic 			break;
18360908Sbostic 		}
18460908Sbostic 	}
18560908Sbostic 	if (delete) {
18660908Sbostic 		if (UNIQUE) {
18760908Sbostic 			if (!bot && cmpv)
18860908Sbostic 				cmpv = cmp(tmprec->rec, flist[0]->rec);
18960908Sbostic 			if (!cmpv)
19060908Sbostic 				return(1);
19160908Sbostic 		}
19260908Sbostic 		tmprec = flist[0];
19360908Sbostic 		if (bot)
19460908Sbostic 			memmove(flist, flist+1, bot * sizeof(MFILE **));
19560908Sbostic 		flist[bot] = *rec;
19660908Sbostic 		*rec = tmprec;
19760908Sbostic 		(*rec)->flno = (*flist)->flno;
19860908Sbostic 		return (0);
19960908Sbostic 	}
20060908Sbostic 	else {
20160908Sbostic 		if (!bot && !(UNIQUE && !cmpv)) {
20260908Sbostic 			cmpv = cmp(tmprec->rec, flist[0]->rec);
20360908Sbostic 			if (cmpv < 0)
20460908Sbostic 				bot = -1;
20560908Sbostic 		}
20660908Sbostic 		if (UNIQUE && !cmpv)
20760908Sbostic 			return (1);
20860908Sbostic 		bot++;
20960908Sbostic 		memmove(flist + bot+1, flist + bot,
21060908Sbostic 		    (ttop - bot) * sizeof(MFILE **));
21160908Sbostic 		flist[bot] = *rec;
21260908Sbostic 		return (0);
21360908Sbostic 	}
21460908Sbostic }
21560908Sbostic 
21660908Sbostic /*
21760908Sbostic  * check order on one file
21860908Sbostic  */
21960908Sbostic void
order(infile,get,ftbl)22060908Sbostic order(infile, get, ftbl)
22160908Sbostic 	union f_handle infile;
22260908Sbostic 	int (*get)();
22360908Sbostic 	struct field *ftbl;
22460908Sbostic {
22560908Sbostic 	u_char *end;
22660908Sbostic 	int c;
22760908Sbostic 	struct recheader *crec, *prec, *trec;
22860908Sbostic 
22960908Sbostic 	if (!SINGL_FLD)
23060908Sbostic 		linebuf = malloc(MAXLLEN);
23160908Sbostic 	buffer = malloc(2 * (MAXLLEN + sizeof(TRECHEADER)));
23260908Sbostic 	end = buffer + 2 * (MAXLLEN + sizeof(TRECHEADER));
23360908Sbostic 	crec = (RECHEADER *) buffer;
23460908Sbostic 	prec = (RECHEADER *) (buffer + MAXLLEN + sizeof(TRECHEADER));
23560908Sbostic 	wts = ftbl->weights;
23660908Sbostic 	if (SINGL_FLD && ftbl->flags & F)
23760908Sbostic 		wts1 = ftbl->flags & R ? Rascii : ascii;
23860908Sbostic 	else
23960908Sbostic 		wts1 = 0;
24060908Sbostic 	if (0 == get(-1, infile, 1, prec, end, ftbl))
24160908Sbostic 	while (0 == get(-1, infile, 1, crec, end, ftbl)) {
24260908Sbostic 		if (0 < (c = cmp(prec, crec))) {
24360908Sbostic 			crec->data[crec->length-1] = 0;
24460908Sbostic 			errx(1, "found disorder: %s", crec->data+crec->offset);
24560908Sbostic 		}
24660908Sbostic 		if (UNIQUE && !c) {
24760908Sbostic 			crec->data[crec->length-1] = 0;
24860908Sbostic 			errx(1, "found non-uniqueness: %s",
24960908Sbostic 			    crec->data+crec->offset);
25060908Sbostic 		}
25160908Sbostic 		trec = prec;
25260908Sbostic 		prec = crec;
25360908Sbostic 		crec = trec;
25460908Sbostic 	}
25560908Sbostic 	exit(0);
25660908Sbostic }
25760908Sbostic 
25860908Sbostic static int
cmp(rec1,rec2)25960908Sbostic cmp(rec1, rec2)
26060908Sbostic 	struct recheader *rec1, *rec2;
26160908Sbostic {
26260908Sbostic 	register r;
26360908Sbostic 	register u_char *pos1, *pos2, *end;
26460908Sbostic 	register u_char *cwts;
26560908Sbostic 	for (cwts = wts; cwts; cwts = (cwts == wts1 ? 0 : wts1)) {
26660908Sbostic 		pos1 = rec1->data;
26760908Sbostic 		pos2 = rec2->data;
26860908Sbostic 		if (!SINGL_FLD && UNIQUE)
26960908Sbostic 			end = pos1 + min(rec1->offset, rec2->offset);
27060908Sbostic 		else
27160908Sbostic 			end = pos1 + min(rec1->length, rec2->length);
27260908Sbostic 		for (; pos1 < end; ) {
27360908Sbostic 			if (r = cwts[*pos1++] - cwts[*pos2++])
27460908Sbostic 				return (r);
27560908Sbostic 		}
27660908Sbostic 	}
27760908Sbostic 	return (0);
27860908Sbostic }
279