xref: /csrg-svn/contrib/sort/files.c (revision 60904)
1*60904Sbostic /*-
2*60904Sbostic  * Copyright (c) 1993 The Regents of the University of California.
3*60904Sbostic  * All rights reserved.
4*60904Sbostic  *
5*60904Sbostic  * This code is derived from software contributed to Berkeley by
6*60904Sbostic  * Peter McIlroy.
7*60904Sbostic  *
8*60904Sbostic  * %sccs.include.redist.c%
9*60904Sbostic  */
10*60904Sbostic 
11*60904Sbostic #ifndef lint
12*60904Sbostic static char sccsid[] = "@(#)files.c	5.1 (Berkeley) 06/01/93";
13*60904Sbostic #endif /* not lint */
14*60904Sbostic 
15*60904Sbostic #include "sort.h"
16*60904Sbostic #include "fsort.h"
17*60904Sbostic 
18*60904Sbostic #include <string.h>
19*60904Sbostic 
20*60904Sbostic /*
21*60904Sbostic  * this is the subroutine for file management for fsort().
22*60904Sbostic  * It keeps the buffers for all temporary files.
23*60904Sbostic  */
24*60904Sbostic int
25*60904Sbostic getnext(binno, infl0, nfiles, pos, end, dummy)
26*60904Sbostic 	int binno, nfiles;
27*60904Sbostic 	union f_handle infl0;
28*60904Sbostic 	register struct recheader *pos;
29*60904Sbostic 	register u_char *end;
30*60904Sbostic 	struct field *dummy;
31*60904Sbostic {
32*60904Sbostic 	register int i;
33*60904Sbostic 	register u_char *hp;
34*60904Sbostic 	static long nleft = 0;
35*60904Sbostic 	static int cnt = 0, flag = -1;
36*60904Sbostic 	static u_char maxb = 0;
37*60904Sbostic 	static FILE *fd;
38*60904Sbostic 
39*60904Sbostic 	if (nleft == 0) {
40*60904Sbostic 		if (binno < 0)	/* reset files. */ {
41*60904Sbostic 			for (i = 0; i < nfiles; i++) {
42*60904Sbostic 				rewind(fstack[infl0.top + i].fd);
43*60904Sbostic 				fstack[infl0.top + i].max_o = 0;
44*60904Sbostic 			}
45*60904Sbostic 			flag = -1;
46*60904Sbostic 			nleft = cnt = 0;
47*60904Sbostic 			return(-1);
48*60904Sbostic 		}
49*60904Sbostic 		maxb = fstack[infl0.top].maxb;
50*60904Sbostic 		for (; nleft == 0; cnt++) {
51*60904Sbostic 			if (cnt >= nfiles) {
52*60904Sbostic 				cnt = 0;
53*60904Sbostic 				return (EOF);
54*60904Sbostic 			}
55*60904Sbostic 			fd = fstack[infl0.top + cnt].fd;
56*60904Sbostic 			hp = (u_char *) &nleft;
57*60904Sbostic 			for (i = sizeof(TRECHEADER); i; --i)
58*60904Sbostic 				*hp++ = getc(fd);
59*60904Sbostic 			if (binno < maxb)
60*60904Sbostic 				fstack[infl0.top+cnt].max_o
61*60904Sbostic 					+= sizeof(nleft) + nleft;
62*60904Sbostic 			else if (binno == maxb) {
63*60904Sbostic 				if (binno != fstack[infl0.top].lastb) {
64*60904Sbostic 					fseek(fd, fstack[infl0.top+
65*60904Sbostic 						cnt].max_o, SEEK_SET);
66*60904Sbostic 					fread(&nleft, sizeof(nleft), 1, fd);
67*60904Sbostic 				}
68*60904Sbostic 				if (nleft == 0)
69*60904Sbostic 					fclose(fd);
70*60904Sbostic 			} else if (binno == maxb + 1) {		/* skip a bin */
71*60904Sbostic 				fseek(fd, nleft, SEEK_CUR);
72*60904Sbostic 				fread(&nleft, sizeof(nleft), 1, fd);
73*60904Sbostic 				flag = cnt;
74*60904Sbostic 			}
75*60904Sbostic 		}
76*60904Sbostic 	}
77*60904Sbostic 	if ((u_char *) pos > end - sizeof(TRECHEADER))
78*60904Sbostic 		return (BUFFEND);
79*60904Sbostic 	hp = (u_char *) pos;
80*60904Sbostic 	for (i = sizeof(TRECHEADER); i ; --i)
81*60904Sbostic 		*hp++ = (u_char) getc(fd);
82*60904Sbostic 	if (end - pos->data < pos->length) {
83*60904Sbostic 		for (i = sizeof(TRECHEADER); i ;  i--)
84*60904Sbostic 			ungetc(*--hp, fd);
85*60904Sbostic 		return (BUFFEND);
86*60904Sbostic 	}
87*60904Sbostic 	fread(pos->data, pos->length, 1, fd);
88*60904Sbostic 	nleft -= pos->length + sizeof(TRECHEADER);
89*60904Sbostic 	if (nleft == 0 && binno == fstack[infl0.top].maxb)
90*60904Sbostic 		fclose(fd);
91*60904Sbostic 	return (0);
92*60904Sbostic }
93*60904Sbostic 
94*60904Sbostic /*
95*60904Sbostic  * this is called when there is no special key. It's only called
96*60904Sbostic  * in the first fsort pass.
97*60904Sbostic  */
98*60904Sbostic int
99*60904Sbostic makeline(flno, filelist, nfiles, buffer, bufend, dummy2)
100*60904Sbostic 	int flno, nfiles;
101*60904Sbostic 	union f_handle filelist;
102*60904Sbostic 	struct recheader *buffer;
103*60904Sbostic 	u_char *bufend;
104*60904Sbostic 	struct field *dummy2;
105*60904Sbostic {
106*60904Sbostic 	static char *opos;
107*60904Sbostic 	register char *end, *pos;
108*60904Sbostic 	static int fileno = 0, overflow = 0;
109*60904Sbostic 	static FILE *fd = 0;
110*60904Sbostic 	register int c;
111*60904Sbostic 
112*60904Sbostic 	pos = (char *) buffer->data;
113*60904Sbostic 	end = min((char *) bufend, pos + MAXLLEN);
114*60904Sbostic 	if (overflow) {
115*60904Sbostic 		memmove(pos, opos, bufend - (u_char *) opos);
116*60904Sbostic 		pos += ((char *) bufend - opos);
117*60904Sbostic 		overflow = 0;
118*60904Sbostic 	}
119*60904Sbostic 	for (;;) {
120*60904Sbostic 		if (flno >= 0) {
121*60904Sbostic 			if (!(fd = fstack[flno].fd))
122*60904Sbostic 				return (EOF);
123*60904Sbostic 		} else if (!fd) {
124*60904Sbostic 			if (fileno  >= nfiles) return(EOF);
125*60904Sbostic 			if (!(fd = fopen(filelist.names[fileno], "r")))
126*60904Sbostic 				err(2, "%s", filelist.names[fileno]);
127*60904Sbostic 			++fileno;
128*60904Sbostic 		}
129*60904Sbostic 		while ((pos < end) && ((c = getc(fd)) != EOF)) {
130*60904Sbostic 			if ((*pos++ = c) == REC_D) {
131*60904Sbostic 				buffer->offset = 0;
132*60904Sbostic 				buffer->length = pos - (char *) buffer->data;
133*60904Sbostic 				return (0);
134*60904Sbostic 			}
135*60904Sbostic 		}
136*60904Sbostic 		if (pos >= end && end == (char *) bufend) {
137*60904Sbostic 			if ((char *) buffer->data < end) {
138*60904Sbostic 				overflow = 1;
139*60904Sbostic 				opos = (char *) buffer->data;
140*60904Sbostic 			}
141*60904Sbostic 			return (BUFFEND);
142*60904Sbostic 		} else if (c == EOF) {
143*60904Sbostic 			if (buffer->data != (u_char *) pos) {
144*60904Sbostic 				warnx("last character not record delimiter");
145*60904Sbostic 				*pos++ = REC_D;
146*60904Sbostic 				buffer->offset = 0;
147*60904Sbostic 				buffer->length = pos - (char *) buffer->data;
148*60904Sbostic 				return(0);
149*60904Sbostic 			}
150*60904Sbostic 			FCLOSE(fd);
151*60904Sbostic 			fd = 0;
152*60904Sbostic 			if(flno >= 0) fstack[flno].fd = 0;
153*60904Sbostic 		} else {
154*60904Sbostic 			buffer->data[100] = '\000';
155*60904Sbostic 			warnx("line too long:ignoring %s...", buffer->data);
156*60904Sbostic 		}
157*60904Sbostic 	}
158*60904Sbostic }
159*60904Sbostic 
160*60904Sbostic /*
161*60904Sbostic  * This generates keys. It's only called in the first fsort pass
162*60904Sbostic  */
163*60904Sbostic int
164*60904Sbostic makekey(flno, filelist, nfiles, buffer, bufend, ftbl)
165*60904Sbostic 	int flno, nfiles;
166*60904Sbostic 	union f_handle filelist;
167*60904Sbostic 	struct recheader *buffer;
168*60904Sbostic 	u_char *bufend;
169*60904Sbostic 	struct field *ftbl;
170*60904Sbostic {
171*60904Sbostic 	static int (*get)();
172*60904Sbostic 	static int fileno = 0;
173*60904Sbostic 	static FILE *dbdesc = 0;
174*60904Sbostic 	static DBT dbkey[1], line[1];
175*60904Sbostic 	static int overflow = 0;
176*60904Sbostic 	int c;
177*60904Sbostic 	if (overflow) {
178*60904Sbostic 		overflow = 0;
179*60904Sbostic 		enterkey(buffer, line, bufend - (u_char *) buffer, ftbl);
180*60904Sbostic 		return (0);
181*60904Sbostic 	}
182*60904Sbostic 	for (;;) {
183*60904Sbostic 		if (flno >= 0) {
184*60904Sbostic 			get = seq;
185*60904Sbostic 			if (!(dbdesc = fstack[flno].fd))
186*60904Sbostic 				return(EOF);
187*60904Sbostic 		} else if (!dbdesc) {
188*60904Sbostic 			if (fileno  >= nfiles)
189*60904Sbostic 				return (EOF);
190*60904Sbostic 			dbdesc = fopen(filelist.names[fileno], "r");
191*60904Sbostic 			if (!dbdesc)
192*60904Sbostic 				err(2, "%s", filelist.names[fileno]);
193*60904Sbostic 			++fileno;
194*60904Sbostic 			get = seq;
195*60904Sbostic 		}
196*60904Sbostic 		if (!(c = get(dbdesc, line, dbkey))) {
197*60904Sbostic 			if ((signed)line->size > bufend - buffer->data)
198*60904Sbostic 				overflow = 1;
199*60904Sbostic 			else
200*60904Sbostic 				overflow = enterkey(buffer, line,
201*60904Sbostic 				    bufend - (u_char *) buffer, ftbl);
202*60904Sbostic 			if (overflow)
203*60904Sbostic 				return (BUFFEND);
204*60904Sbostic 			else
205*60904Sbostic 				return (0);
206*60904Sbostic 		}
207*60904Sbostic 		if (c == EOF) {
208*60904Sbostic 			FCLOSE(dbdesc);
209*60904Sbostic 			dbdesc = 0;
210*60904Sbostic 			if (flno >= 0) fstack[flno].fd = 0;
211*60904Sbostic 		} else {
212*60904Sbostic 
213*60904Sbostic 			((char *) line->data)[60] = '\000';
214*60904Sbostic 			warnx("line too long: ignoring %.100s...",
215*60904Sbostic 			    (char *)line->data);
216*60904Sbostic 		}
217*60904Sbostic 
218*60904Sbostic 	}
219*60904Sbostic }
220*60904Sbostic 
221*60904Sbostic /*
222*60904Sbostic  * get a key/line pair from fd
223*60904Sbostic  */
224*60904Sbostic int
225*60904Sbostic seq(fd, line, key)
226*60904Sbostic 	FILE *fd;
227*60904Sbostic 	DBT *key, *line;
228*60904Sbostic {
229*60904Sbostic 	static char *buf, flag = 1;
230*60904Sbostic 	register char *end, *pos;
231*60904Sbostic 	register int c;
232*60904Sbostic 	if (flag) {
233*60904Sbostic 		flag = 0;
234*60904Sbostic 		buf = (char *) linebuf;
235*60904Sbostic 		end = buf + MAXLLEN;
236*60904Sbostic 		line->data = buf;
237*60904Sbostic 	}
238*60904Sbostic 	pos = buf;
239*60904Sbostic 	while ((c = getc(fd)) != EOF) {
240*60904Sbostic 		if ((*pos++ = c) == REC_D) {
241*60904Sbostic 			line->size = pos - buf;
242*60904Sbostic 			return (0);
243*60904Sbostic 		}
244*60904Sbostic 		if (pos == end) {
245*60904Sbostic 			line->size = MAXLLEN;
246*60904Sbostic 			*--pos = REC_D;
247*60904Sbostic 			while ((c = getc(fd)) != EOF) {
248*60904Sbostic 				if (c == REC_D)
249*60904Sbostic 					return (BUFFEND);
250*60904Sbostic 			}
251*60904Sbostic 		}
252*60904Sbostic 	}
253*60904Sbostic 	if (pos != buf) {
254*60904Sbostic 		warnx("last character not record delimiter");
255*60904Sbostic 		*pos++ = REC_D;
256*60904Sbostic 		line->size = pos - buf;
257*60904Sbostic 		return (0);
258*60904Sbostic 	} else
259*60904Sbostic 		return (EOF);
260*60904Sbostic }
261*60904Sbostic 
262*60904Sbostic /*
263*60904Sbostic  * write a key/line pair to a temporary file
264*60904Sbostic  */
265*60904Sbostic void
266*60904Sbostic putrec(rec, fd)
267*60904Sbostic 	register struct recheader *rec;
268*60904Sbostic 	register FILE *fd;
269*60904Sbostic {
270*60904Sbostic 	EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fd);
271*60904Sbostic }
272*60904Sbostic 
273*60904Sbostic /*
274*60904Sbostic  * write a line to output
275*60904Sbostic  */
276*60904Sbostic void
277*60904Sbostic putline(rec, fd)
278*60904Sbostic 	register struct recheader *rec;
279*60904Sbostic 	register FILE *fd;
280*60904Sbostic {
281*60904Sbostic 	EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fd);
282*60904Sbostic }
283*60904Sbostic 
284*60904Sbostic /*
285*60904Sbostic  * get a record from a temporary file. (Used by merge sort.)
286*60904Sbostic  */
287*60904Sbostic int
288*60904Sbostic geteasy(flno, filelist, nfiles, rec, end, dummy2)
289*60904Sbostic 	int flno, nfiles;
290*60904Sbostic 	union f_handle filelist;
291*60904Sbostic 	register struct recheader *rec;
292*60904Sbostic 	register u_char *end;
293*60904Sbostic 	struct field *dummy2;
294*60904Sbostic {
295*60904Sbostic 	int i;
296*60904Sbostic 	FILE *fd;
297*60904Sbostic 	fd = fstack[flno].fd;
298*60904Sbostic 	if ((u_char *) rec > end - sizeof(TRECHEADER))
299*60904Sbostic 		return (BUFFEND);
300*60904Sbostic 	if (!fread(rec, 1, sizeof(TRECHEADER), fd)) {
301*60904Sbostic 		fclose(fd);
302*60904Sbostic 		fstack[flno].fd = 0;
303*60904Sbostic 		return (EOF);
304*60904Sbostic 	}
305*60904Sbostic 	if (end - rec->data < rec->length) {
306*60904Sbostic 		for (i = sizeof(TRECHEADER) - 1; i >= 0;  i--)
307*60904Sbostic 			ungetc(*((char *) rec + i), fd);
308*60904Sbostic 		return (BUFFEND);
309*60904Sbostic 	}
310*60904Sbostic 	fread(rec->data, rec->length, 1, fd);
311*60904Sbostic 	return (0);
312*60904Sbostic }
313