xref: /netbsd-src/usr.bin/sort/files.c (revision 326b2259b73e878289ebd80cd9d20bc5aee35e99)
1 /*	$NetBSD: files.c,v 1.19 2003/08/07 11:15:53 agc Exp $	*/
2 
3 /*-
4  * Copyright (c) 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Peter McIlroy.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include "sort.h"
36 #include "fsort.h"
37 
38 #ifndef lint
39 __RCSID("$NetBSD: files.c,v 1.19 2003/08/07 11:15:53 agc Exp $");
40 __SCCSID("@(#)files.c	8.1 (Berkeley) 6/6/93");
41 #endif /* not lint */
42 
43 #include <string.h>
44 
45 static int	seq __P((FILE *, DBT *, DBT *));
46 
47 /*
48  * this is the subroutine for file management for fsort().
49  * It keeps the buffers for all temporary files.
50  */
51 int
52 getnext(binno, infl0, filelist, nfiles, pos, end, dummy)
53 	int binno, infl0;
54 	struct filelist *filelist;
55 	int nfiles;
56 	RECHEADER *pos;
57 	u_char *end;
58 	struct field *dummy;
59 {
60 	int i;
61 	u_char *hp;
62 	static size_t nleft = 0;
63 	static int cnt = 0, flag = -1;
64 	static u_char maxb = 0;
65 	static FILE *fp;
66 
67 	if (nleft == 0) {
68 		if (binno < 0)	/* reset files. */ {
69 			for (i = 0; i < nfiles; i++) {
70 				rewind(fstack[infl0 + i].fp);
71 				fstack[infl0 + i].max_o = 0;
72 			}
73 			flag = -1;
74 			nleft = cnt = 0;
75 			return (-1);
76 		}
77 		maxb = fstack[infl0].maxb;
78 		for (; nleft == 0; cnt++) {
79 			if (cnt >= nfiles) {
80 				cnt = 0;
81 				return (EOF);
82 			}
83 			fp = fstack[infl0 + cnt].fp;
84 			fread(&nleft, sizeof(nleft), 1, fp);
85 			if (binno < maxb)
86 				fstack[infl0+cnt].max_o
87 					+= sizeof(nleft) + nleft;
88 			else if (binno == maxb) {
89 				if (binno != fstack[infl0].lastb) {
90 					fseeko(fp, fstack[infl0+
91 						cnt].max_o, SEEK_SET);
92 					fread(&nleft, sizeof(nleft), 1, fp);
93 				}
94 				if (nleft == 0)
95 					fclose(fp);
96 			} else if (binno == maxb + 1) {		/* skip a bin */
97 				fseek(fp, nleft, SEEK_CUR);
98 				fread(&nleft, sizeof(nleft), 1, fp);
99 				flag = cnt;
100 			}
101 		}
102 	}
103 	if ((u_char *) pos > end - sizeof(TRECHEADER))
104 		return (BUFFEND);
105 	fread(pos, sizeof(TRECHEADER), 1, fp);
106 	if (end - pos->data < pos->length) {
107 		hp = ((u_char *)pos) + sizeof(TRECHEADER);
108 		for (i = sizeof(TRECHEADER); i ;  i--)
109 			ungetc(*--hp, fp);
110 		return (BUFFEND);
111 	}
112 	fread(pos->data, pos->length, 1, fp);
113 	nleft -= pos->length + sizeof(TRECHEADER);
114 	if (nleft == 0 && binno == fstack[infl0].maxb)
115 		fclose(fp);
116 	return (0);
117 }
118 
119 /*
120  * this is called when there is no special key. It's only called
121  * in the first fsort pass.
122  */
123 int
124 makeline(flno, top, filelist, nfiles, recbuf, bufend, dummy2)
125 	int flno, top;
126 	struct filelist *filelist;
127 	int nfiles;
128 	RECHEADER *recbuf;
129 	u_char *bufend;
130 	struct field *dummy2;
131 {
132 	static u_char *obufend;
133 	static size_t osz;
134 	char *pos;
135 	static int filenum = 0, overflow = 0;
136 	static FILE *fp = 0;
137 	int c;
138 
139 	pos = (char *) recbuf->data;
140 	if (overflow) {
141 		/*
142 		 * Buffer shortage is solved by either of two ways:
143 		 * o flush previous buffered data and start using the
144 		 *   buffer from start (see fsort())
145 		 * o realloc buffer and bump bufend
146 		 *
147 		 * The former is preferred, realloc is only done when
148 		 * there is exactly one item in buffer which does not fit.
149 		 */
150 		if (bufend == obufend)
151 			memmove(pos, bufend - osz, osz);
152 
153 		pos += osz;
154 		overflow = 0;
155 	}
156 	for (;;) {
157 		if (flno >= 0 && (fp = fstack[flno].fp) == NULL)
158 			return (EOF);
159 		else if (fp == NULL) {
160 			if (filenum  >= nfiles)
161 				return (EOF);
162 			if (!(fp = fopen(filelist->names[filenum], "r")))
163 				err(2, "%s", filelist->names[filenum]);
164 			filenum++;
165 		}
166 		while ((pos < (char *)bufend) && ((c = getc(fp)) != EOF)) {
167 			if ((*pos++ = c) == REC_D) {
168 				recbuf->offset = 0;
169 				recbuf->length = pos - (char *) recbuf->data;
170 				return (0);
171 			}
172 		}
173 		if (pos >= (char *)bufend) {
174 			if (recbuf->data < bufend) {
175 				overflow = 1;
176 				obufend = bufend;
177 				osz = (pos - (char *) recbuf->data);
178 			}
179 			return (BUFFEND);
180 		} else if (c == EOF) {
181 			if (recbuf->data != (u_char *) pos) {
182 				*pos++ = REC_D;
183 				recbuf->offset = 0;
184 				recbuf->length = pos - (char *) recbuf->data;
185 				return (0);
186 			}
187 			FCLOSE(fp);
188 			fp = 0;
189 			if (flno >= 0)
190 				fstack[flno].fp = 0;
191 		} else {
192 
193 			warnx("makeline: line too long: ignoring '%.100s...'", recbuf->data);
194 
195 			/* Consume the rest of line from input */
196 			while((c = getc(fp)) != REC_D && c != EOF)
197 				;
198 
199 			recbuf->offset = 0;
200 			recbuf->length = 0;
201 
202 			return (BUFFEND);
203 		}
204 	}
205 }
206 
207 /*
208  * This generates keys. It's only called in the first fsort pass
209  */
210 int
211 makekey(flno, top, filelist, nfiles, recbuf, bufend, ftbl)
212 	int flno, top;
213 	struct filelist *filelist;
214 	int nfiles;
215 	RECHEADER *recbuf;
216 	u_char *bufend;
217 	struct field *ftbl;
218 {
219 	static int filenum = 0;
220 	static FILE *dbdesc = 0;
221 	static DBT dbkey[1], line[1];
222 	static int overflow = 0;
223 	int c;
224 
225 	if (overflow) {
226 		overflow = enterkey(recbuf, line, bufend - (u_char *)recbuf,
227 									ftbl);
228 		if (overflow)
229 			return (BUFFEND);
230 		else
231 			return (0);
232 	}
233 
234 	for (;;) {
235 		if (flno >= 0) {
236 			if (!(dbdesc = fstack[flno].fp))
237 				return (EOF);
238 		} else if (!dbdesc) {
239 			if (filenum  >= nfiles)
240 				return (EOF);
241 			dbdesc = fopen(filelist->names[filenum], "r");
242 			if (!dbdesc)
243 				err(2, "%s", filelist->names[filenum]);
244 			filenum++;
245 		}
246 		if (!(c = seq(dbdesc, line, dbkey))) {
247 			if ((signed)line->size > bufend - recbuf->data) {
248 				overflow = 1;
249 			} else {
250 				overflow = enterkey(recbuf, line,
251 				    bufend - (u_char *) recbuf, ftbl);
252 			}
253 			if (overflow)
254 				return (BUFFEND);
255 			else
256 				return (0);
257 		}
258 		if (c == EOF) {
259 			FCLOSE(dbdesc);
260 			dbdesc = 0;
261 			if (flno >= 0)
262 				fstack[flno].fp = 0;
263 		} else {
264 			((char *) line->data)[60] = '\000';
265 			warnx("makekey: line too long: ignoring %.100s...",
266 			    (char *)line->data);
267 		}
268 	}
269 }
270 
271 /*
272  * get a key/line pair from fp
273  */
274 static int
275 seq(fp, line, key)
276 	FILE *fp;
277 	DBT *key, *line;
278 {
279 	static char *buf, flag = 1;
280 	char *end, *pos;
281 	int c;
282 
283 	if (flag) {
284 		flag = 0;
285 		buf = (char *) linebuf;
286 		end = buf + linebuf_size;
287 		line->data = buf;
288 	}
289 	pos = buf;
290 	while ((c = getc(fp)) != EOF) {
291 		if ((*pos++ = c) == REC_D) {
292 			line->size = pos - buf;
293 			return (0);
294 		}
295 		if (pos == end) {
296 			linebuf_size *= 2;
297 			linebuf = realloc(linebuf, linebuf_size);
298 			if (!linebuf)
299 				err(2, "realloc of linebuf to %lu bytes failed",
300 					(unsigned long)linebuf_size);
301 
302 			end = linebuf + linebuf_size;
303 			pos = linebuf + (pos - buf);
304 			line->data = buf = (char *)linebuf;
305 			continue;
306 		}
307 	}
308 	if (pos != buf) {
309 		*pos++ = REC_D;
310 		line->size = pos - buf;
311 		return (0);
312 	} else
313 		return (EOF);
314 }
315 
316 /*
317  * write a key/line pair to a temporary file
318  */
319 void
320 putrec(rec, fp)
321 	const RECHEADER *rec;
322 	FILE *fp;
323 {
324 	EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fp);
325 }
326 
327 /*
328  * write a line to output
329  */
330 void
331 putline(rec, fp)
332 	const RECHEADER *rec;
333 	FILE *fp;
334 {
335 	EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp);
336 }
337 
338 /*
339  * get a record from a temporary file. (Used by merge sort.)
340  */
341 int
342 geteasy(flno, top, filelist, nfiles, rec, end, dummy2)
343 	int flno, top;
344 	struct filelist *filelist;
345 	int nfiles;
346 	RECHEADER *rec;
347 	u_char *end;
348 	struct field *dummy2;
349 {
350 	int i;
351 	FILE *fp;
352 
353 	fp = fstack[flno].fp;
354 	if ((u_char *) rec > end - sizeof(TRECHEADER))
355 		return (BUFFEND);
356 	if (!fread(rec, 1, sizeof(TRECHEADER), fp)) {
357 		fclose(fp);
358 		fstack[flno].fp = 0;
359 		return (EOF);
360 	}
361 	if (end - rec->data < rec->length) {
362 		for (i = sizeof(TRECHEADER) - 1; i >= 0;  i--)
363 			ungetc(*((char *) rec + i), fp);
364 		return (BUFFEND);
365 	}
366 	fread(rec->data, rec->length, 1, fp);
367 	return (0);
368 }
369