xref: /csrg-svn/contrib/sort/files.c (revision 62247)
160904Sbostic /*-
2*62247Sbostic  * Copyright (c) 1993
3*62247Sbostic  *	The Regents of the University of California.  All rights reserved.
460904Sbostic  *
560904Sbostic  * This code is derived from software contributed to Berkeley by
660904Sbostic  * Peter McIlroy.
760904Sbostic  *
860904Sbostic  * %sccs.include.redist.c%
960904Sbostic  */
1060904Sbostic 
1160904Sbostic #ifndef lint
12*62247Sbostic static char sccsid[] = "@(#)files.c	8.1 (Berkeley) 06/06/93";
1360904Sbostic #endif /* not lint */
1460904Sbostic 
1560904Sbostic #include "sort.h"
1660904Sbostic #include "fsort.h"
1760904Sbostic 
1860904Sbostic #include <string.h>
1960904Sbostic 
2060904Sbostic /*
2160904Sbostic  * this is the subroutine for file management for fsort().
2260904Sbostic  * It keeps the buffers for all temporary files.
2360904Sbostic  */
2460904Sbostic int
getnext(binno,infl0,nfiles,pos,end,dummy)2560904Sbostic getnext(binno, infl0, nfiles, pos, end, dummy)
2660904Sbostic 	int binno, nfiles;
2760904Sbostic 	union f_handle infl0;
2860904Sbostic 	register struct recheader *pos;
2960904Sbostic 	register u_char *end;
3060904Sbostic 	struct field *dummy;
3160904Sbostic {
3260904Sbostic 	register int i;
3360904Sbostic 	register u_char *hp;
3460904Sbostic 	static long nleft = 0;
3560904Sbostic 	static int cnt = 0, flag = -1;
3660904Sbostic 	static u_char maxb = 0;
3760904Sbostic 	static FILE *fd;
3860904Sbostic 
3960904Sbostic 	if (nleft == 0) {
4060904Sbostic 		if (binno < 0)	/* reset files. */ {
4160904Sbostic 			for (i = 0; i < nfiles; i++) {
4260904Sbostic 				rewind(fstack[infl0.top + i].fd);
4360904Sbostic 				fstack[infl0.top + i].max_o = 0;
4460904Sbostic 			}
4560904Sbostic 			flag = -1;
4660904Sbostic 			nleft = cnt = 0;
4760904Sbostic 			return(-1);
4860904Sbostic 		}
4960904Sbostic 		maxb = fstack[infl0.top].maxb;
5060904Sbostic 		for (; nleft == 0; cnt++) {
5160904Sbostic 			if (cnt >= nfiles) {
5260904Sbostic 				cnt = 0;
5360904Sbostic 				return (EOF);
5460904Sbostic 			}
5560904Sbostic 			fd = fstack[infl0.top + cnt].fd;
5660904Sbostic 			hp = (u_char *) &nleft;
5760904Sbostic 			for (i = sizeof(TRECHEADER); i; --i)
5860904Sbostic 				*hp++ = getc(fd);
5960904Sbostic 			if (binno < maxb)
6060904Sbostic 				fstack[infl0.top+cnt].max_o
6160904Sbostic 					+= sizeof(nleft) + nleft;
6260904Sbostic 			else if (binno == maxb) {
6360904Sbostic 				if (binno != fstack[infl0.top].lastb) {
6460904Sbostic 					fseek(fd, fstack[infl0.top+
6560904Sbostic 						cnt].max_o, SEEK_SET);
6660904Sbostic 					fread(&nleft, sizeof(nleft), 1, fd);
6760904Sbostic 				}
6860904Sbostic 				if (nleft == 0)
6960904Sbostic 					fclose(fd);
7060904Sbostic 			} else if (binno == maxb + 1) {		/* skip a bin */
7160904Sbostic 				fseek(fd, nleft, SEEK_CUR);
7260904Sbostic 				fread(&nleft, sizeof(nleft), 1, fd);
7360904Sbostic 				flag = cnt;
7460904Sbostic 			}
7560904Sbostic 		}
7660904Sbostic 	}
7760904Sbostic 	if ((u_char *) pos > end - sizeof(TRECHEADER))
7860904Sbostic 		return (BUFFEND);
7960904Sbostic 	hp = (u_char *) pos;
8060904Sbostic 	for (i = sizeof(TRECHEADER); i ; --i)
8160904Sbostic 		*hp++ = (u_char) getc(fd);
8260904Sbostic 	if (end - pos->data < pos->length) {
8360904Sbostic 		for (i = sizeof(TRECHEADER); i ;  i--)
8460904Sbostic 			ungetc(*--hp, fd);
8560904Sbostic 		return (BUFFEND);
8660904Sbostic 	}
8760904Sbostic 	fread(pos->data, pos->length, 1, fd);
8860904Sbostic 	nleft -= pos->length + sizeof(TRECHEADER);
8960904Sbostic 	if (nleft == 0 && binno == fstack[infl0.top].maxb)
9060904Sbostic 		fclose(fd);
9160904Sbostic 	return (0);
9260904Sbostic }
9360904Sbostic 
9460904Sbostic /*
9560904Sbostic  * this is called when there is no special key. It's only called
9660904Sbostic  * in the first fsort pass.
9760904Sbostic  */
9860904Sbostic int
makeline(flno,filelist,nfiles,buffer,bufend,dummy2)9960904Sbostic makeline(flno, filelist, nfiles, buffer, bufend, dummy2)
10060904Sbostic 	int flno, nfiles;
10160904Sbostic 	union f_handle filelist;
10260904Sbostic 	struct recheader *buffer;
10360904Sbostic 	u_char *bufend;
10460904Sbostic 	struct field *dummy2;
10560904Sbostic {
10660904Sbostic 	static char *opos;
10760904Sbostic 	register char *end, *pos;
10860904Sbostic 	static int fileno = 0, overflow = 0;
10960904Sbostic 	static FILE *fd = 0;
11060904Sbostic 	register int c;
11160904Sbostic 
11260904Sbostic 	pos = (char *) buffer->data;
11360904Sbostic 	end = min((char *) bufend, pos + MAXLLEN);
11460904Sbostic 	if (overflow) {
11560904Sbostic 		memmove(pos, opos, bufend - (u_char *) opos);
11660904Sbostic 		pos += ((char *) bufend - opos);
11760904Sbostic 		overflow = 0;
11860904Sbostic 	}
11960904Sbostic 	for (;;) {
12060904Sbostic 		if (flno >= 0) {
12160904Sbostic 			if (!(fd = fstack[flno].fd))
12260904Sbostic 				return (EOF);
12360904Sbostic 		} else if (!fd) {
12460904Sbostic 			if (fileno  >= nfiles) return(EOF);
12560904Sbostic 			if (!(fd = fopen(filelist.names[fileno], "r")))
12660904Sbostic 				err(2, "%s", filelist.names[fileno]);
12760904Sbostic 			++fileno;
12860904Sbostic 		}
12960904Sbostic 		while ((pos < end) && ((c = getc(fd)) != EOF)) {
13060904Sbostic 			if ((*pos++ = c) == REC_D) {
13160904Sbostic 				buffer->offset = 0;
13260904Sbostic 				buffer->length = pos - (char *) buffer->data;
13360904Sbostic 				return (0);
13460904Sbostic 			}
13560904Sbostic 		}
13660904Sbostic 		if (pos >= end && end == (char *) bufend) {
13760904Sbostic 			if ((char *) buffer->data < end) {
13860904Sbostic 				overflow = 1;
13960904Sbostic 				opos = (char *) buffer->data;
14060904Sbostic 			}
14160904Sbostic 			return (BUFFEND);
14260904Sbostic 		} else if (c == EOF) {
14360904Sbostic 			if (buffer->data != (u_char *) pos) {
14460904Sbostic 				warnx("last character not record delimiter");
14560904Sbostic 				*pos++ = REC_D;
14660904Sbostic 				buffer->offset = 0;
14760904Sbostic 				buffer->length = pos - (char *) buffer->data;
14860904Sbostic 				return(0);
14960904Sbostic 			}
15060904Sbostic 			FCLOSE(fd);
15160904Sbostic 			fd = 0;
15260904Sbostic 			if(flno >= 0) fstack[flno].fd = 0;
15360904Sbostic 		} else {
15460904Sbostic 			buffer->data[100] = '\000';
15560904Sbostic 			warnx("line too long:ignoring %s...", buffer->data);
15660904Sbostic 		}
15760904Sbostic 	}
15860904Sbostic }
15960904Sbostic 
16060904Sbostic /*
16160904Sbostic  * This generates keys. It's only called in the first fsort pass
16260904Sbostic  */
16360904Sbostic int
makekey(flno,filelist,nfiles,buffer,bufend,ftbl)16460904Sbostic makekey(flno, filelist, nfiles, buffer, bufend, ftbl)
16560904Sbostic 	int flno, nfiles;
16660904Sbostic 	union f_handle filelist;
16760904Sbostic 	struct recheader *buffer;
16860904Sbostic 	u_char *bufend;
16960904Sbostic 	struct field *ftbl;
17060904Sbostic {
17160904Sbostic 	static int (*get)();
17260904Sbostic 	static int fileno = 0;
17360904Sbostic 	static FILE *dbdesc = 0;
17460904Sbostic 	static DBT dbkey[1], line[1];
17560904Sbostic 	static int overflow = 0;
17660904Sbostic 	int c;
17760904Sbostic 	if (overflow) {
17860904Sbostic 		overflow = 0;
17960904Sbostic 		enterkey(buffer, line, bufend - (u_char *) buffer, ftbl);
18060904Sbostic 		return (0);
18160904Sbostic 	}
18260904Sbostic 	for (;;) {
18360904Sbostic 		if (flno >= 0) {
18460904Sbostic 			get = seq;
18560904Sbostic 			if (!(dbdesc = fstack[flno].fd))
18660904Sbostic 				return(EOF);
18760904Sbostic 		} else if (!dbdesc) {
18860904Sbostic 			if (fileno  >= nfiles)
18960904Sbostic 				return (EOF);
19060904Sbostic 			dbdesc = fopen(filelist.names[fileno], "r");
19160904Sbostic 			if (!dbdesc)
19260904Sbostic 				err(2, "%s", filelist.names[fileno]);
19360904Sbostic 			++fileno;
19460904Sbostic 			get = seq;
19560904Sbostic 		}
19660904Sbostic 		if (!(c = get(dbdesc, line, dbkey))) {
19760904Sbostic 			if ((signed)line->size > bufend - buffer->data)
19860904Sbostic 				overflow = 1;
19960904Sbostic 			else
20060904Sbostic 				overflow = enterkey(buffer, line,
20160904Sbostic 				    bufend - (u_char *) buffer, ftbl);
20260904Sbostic 			if (overflow)
20360904Sbostic 				return (BUFFEND);
20460904Sbostic 			else
20560904Sbostic 				return (0);
20660904Sbostic 		}
20760904Sbostic 		if (c == EOF) {
20860904Sbostic 			FCLOSE(dbdesc);
20960904Sbostic 			dbdesc = 0;
21060904Sbostic 			if (flno >= 0) fstack[flno].fd = 0;
21160904Sbostic 		} else {
21260904Sbostic 
21360904Sbostic 			((char *) line->data)[60] = '\000';
21460904Sbostic 			warnx("line too long: ignoring %.100s...",
21560904Sbostic 			    (char *)line->data);
21660904Sbostic 		}
21760904Sbostic 
21860904Sbostic 	}
21960904Sbostic }
22060904Sbostic 
22160904Sbostic /*
22260904Sbostic  * get a key/line pair from fd
22360904Sbostic  */
22460904Sbostic int
seq(fd,line,key)22560904Sbostic seq(fd, line, key)
22660904Sbostic 	FILE *fd;
22760904Sbostic 	DBT *key, *line;
22860904Sbostic {
22960904Sbostic 	static char *buf, flag = 1;
23060904Sbostic 	register char *end, *pos;
23160904Sbostic 	register int c;
23260904Sbostic 	if (flag) {
23360904Sbostic 		flag = 0;
23460904Sbostic 		buf = (char *) linebuf;
23560904Sbostic 		end = buf + MAXLLEN;
23660904Sbostic 		line->data = buf;
23760904Sbostic 	}
23860904Sbostic 	pos = buf;
23960904Sbostic 	while ((c = getc(fd)) != EOF) {
24060904Sbostic 		if ((*pos++ = c) == REC_D) {
24160904Sbostic 			line->size = pos - buf;
24260904Sbostic 			return (0);
24360904Sbostic 		}
24460904Sbostic 		if (pos == end) {
24560904Sbostic 			line->size = MAXLLEN;
24660904Sbostic 			*--pos = REC_D;
24760904Sbostic 			while ((c = getc(fd)) != EOF) {
24860904Sbostic 				if (c == REC_D)
24960904Sbostic 					return (BUFFEND);
25060904Sbostic 			}
25160904Sbostic 		}
25260904Sbostic 	}
25360904Sbostic 	if (pos != buf) {
25460904Sbostic 		warnx("last character not record delimiter");
25560904Sbostic 		*pos++ = REC_D;
25660904Sbostic 		line->size = pos - buf;
25760904Sbostic 		return (0);
25860904Sbostic 	} else
25960904Sbostic 		return (EOF);
26060904Sbostic }
26160904Sbostic 
26260904Sbostic /*
26360904Sbostic  * write a key/line pair to a temporary file
26460904Sbostic  */
26560904Sbostic void
putrec(rec,fd)26660904Sbostic putrec(rec, fd)
26760904Sbostic 	register struct recheader *rec;
26860904Sbostic 	register FILE *fd;
26960904Sbostic {
27060904Sbostic 	EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fd);
27160904Sbostic }
27260904Sbostic 
27360904Sbostic /*
27460904Sbostic  * write a line to output
27560904Sbostic  */
27660904Sbostic void
putline(rec,fd)27760904Sbostic putline(rec, fd)
27860904Sbostic 	register struct recheader *rec;
27960904Sbostic 	register FILE *fd;
28060904Sbostic {
28160904Sbostic 	EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fd);
28260904Sbostic }
28360904Sbostic 
28460904Sbostic /*
28560904Sbostic  * get a record from a temporary file. (Used by merge sort.)
28660904Sbostic  */
28760904Sbostic int
geteasy(flno,filelist,nfiles,rec,end,dummy2)28860904Sbostic geteasy(flno, filelist, nfiles, rec, end, dummy2)
28960904Sbostic 	int flno, nfiles;
29060904Sbostic 	union f_handle filelist;
29160904Sbostic 	register struct recheader *rec;
29260904Sbostic 	register u_char *end;
29360904Sbostic 	struct field *dummy2;
29460904Sbostic {
29560904Sbostic 	int i;
29660904Sbostic 	FILE *fd;
29760904Sbostic 	fd = fstack[flno].fd;
29860904Sbostic 	if ((u_char *) rec > end - sizeof(TRECHEADER))
29960904Sbostic 		return (BUFFEND);
30060904Sbostic 	if (!fread(rec, 1, sizeof(TRECHEADER), fd)) {
30160904Sbostic 		fclose(fd);
30260904Sbostic 		fstack[flno].fd = 0;
30360904Sbostic 		return (EOF);
30460904Sbostic 	}
30560904Sbostic 	if (end - rec->data < rec->length) {
30660904Sbostic 		for (i = sizeof(TRECHEADER) - 1; i >= 0;  i--)
30760904Sbostic 			ungetc(*((char *) rec + i), fd);
30860904Sbostic 		return (BUFFEND);
30960904Sbostic 	}
31060904Sbostic 	fread(rec->data, rec->length, 1, fd);
31160904Sbostic 	return (0);
31260904Sbostic }
313