xref: /netbsd-src/usr.bin/sort/files.c (revision aaf4ece63a859a04e37cf3a7229b5fab0157cc06)
1 /*	$NetBSD: files.c,v 1.24 2005/06/07 09:51:34 he Exp $	*/
2 
3 /*-
4  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Ben Harris and Jaromir Dolecek.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *        This product includes software developed by the NetBSD
21  *        Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 /*-
40  * Copyright (c) 1993
41  *	The Regents of the University of California.  All rights reserved.
42  *
43  * This code is derived from software contributed to Berkeley by
44  * Peter McIlroy.
45  *
46  * Redistribution and use in source and binary forms, with or without
47  * modification, are permitted provided that the following conditions
48  * are met:
49  * 1. Redistributions of source code must retain the above copyright
50  *    notice, this list of conditions and the following disclaimer.
51  * 2. Redistributions in binary form must reproduce the above copyright
52  *    notice, this list of conditions and the following disclaimer in the
53  *    documentation and/or other materials provided with the distribution.
54  * 3. Neither the name of the University nor the names of its contributors
55  *    may be used to endorse or promote products derived from this software
56  *    without specific prior written permission.
57  *
58  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68  * SUCH DAMAGE.
69  */
70 
71 #include "sort.h"
72 #include "fsort.h"
73 
74 #ifndef lint
75 __RCSID("$NetBSD: files.c,v 1.24 2005/06/07 09:51:34 he Exp $");
76 __SCCSID("@(#)files.c	8.1 (Berkeley) 6/6/93");
77 #endif /* not lint */
78 
79 #include <string.h>
80 
81 static int	seq __P((FILE *, DBT *, DBT *));
82 
83 /*
84  * this is the subroutine for file management for fsort().
85  * It keeps the buffers for all temporary files.
86  */
87 int
88 getnext(binno, infl0, filelist, nfiles, pos, end, dummy)
89 	int binno, infl0;
90 	struct filelist *filelist;
91 	int nfiles;
92 	RECHEADER *pos;
93 	u_char *end;
94 	struct field *dummy;
95 {
96 	int i;
97 	u_char *hp;
98 	static size_t nleft = 0;
99 	static int cnt = 0, flag = -1;
100 	static u_char maxb = 0;
101 	static FILE *fp;
102 
103 	if (nleft == 0) {
104 		if (binno < 0)	/* reset files. */ {
105 			for (i = 0; i < nfiles; i++) {
106 				rewind(fstack[infl0 + i].fp);
107 				fstack[infl0 + i].max_o = 0;
108 			}
109 			flag = -1;
110 			nleft = cnt = 0;
111 			return (-1);
112 		}
113 		maxb = fstack[infl0].maxb;
114 		for (; nleft == 0; cnt++) {
115 			if (cnt >= nfiles) {
116 				cnt = 0;
117 				return (EOF);
118 			}
119 			fp = fstack[infl0 + cnt].fp;
120 			fread(&nleft, sizeof(nleft), 1, fp);
121 			if (binno < maxb)
122 				fstack[infl0+cnt].max_o
123 					+= sizeof(nleft) + nleft;
124 			else if (binno == maxb) {
125 				if (binno != fstack[infl0].lastb) {
126 					fseeko(fp, fstack[infl0+
127 						cnt].max_o, SEEK_SET);
128 					fread(&nleft, sizeof(nleft), 1, fp);
129 				}
130 				if (nleft == 0)
131 					fclose(fp);
132 			} else if (binno == maxb + 1) {		/* skip a bin */
133 				fseek(fp, nleft, SEEK_CUR);
134 				fread(&nleft, sizeof(nleft), 1, fp);
135 				flag = cnt;
136 			}
137 		}
138 	}
139 	if ((u_char *) pos > end - sizeof(TRECHEADER))
140 		return (BUFFEND);
141 	fread(pos, sizeof(TRECHEADER), 1, fp);
142 	if (end - pos->data < pos->length) {
143 		hp = ((u_char *)pos) + sizeof(TRECHEADER);
144 		for (i = sizeof(TRECHEADER); i ;  i--)
145 			ungetc(*--hp, fp);
146 		return (BUFFEND);
147 	}
148 	fread(pos->data, pos->length, 1, fp);
149 	nleft -= pos->length + sizeof(TRECHEADER);
150 	if (nleft == 0 && binno == fstack[infl0].maxb)
151 		fclose(fp);
152 	return (0);
153 }
154 
155 /*
156  * this is called when there is no special key. It's only called
157  * in the first fsort pass.
158  */
159 int
160 makeline(flno, top, filelist, nfiles, recbuf, bufend, dummy2)
161 	int flno, top;
162 	struct filelist *filelist;
163 	int nfiles;
164 	RECHEADER *recbuf;
165 	u_char *bufend;
166 	struct field *dummy2;
167 {
168 	static u_char *obufend;
169 	static size_t osz;
170 	char *pos;
171 	static int filenum = 0, overflow = 0;
172 	static FILE *fp = 0;
173 	int c;
174 
175 	c = 0;		/* XXXGCC -Wuninitialized [pmppc] */
176 
177 	pos = (char *) recbuf->data;
178 	if (overflow) {
179 		/*
180 		 * Buffer shortage is solved by either of two ways:
181 		 * o flush previous buffered data and start using the
182 		 *   buffer from start (see fsort())
183 		 * o realloc buffer and bump bufend
184 		 *
185 		 * The former is preferred, realloc is only done when
186 		 * there is exactly one item in buffer which does not fit.
187 		 */
188 		if (bufend == obufend)
189 			memmove(pos, bufend - osz, osz);
190 
191 		pos += osz;
192 		overflow = 0;
193 	}
194 	for (;;) {
195 		if (flno >= 0 && (fp = fstack[flno].fp) == NULL)
196 			return (EOF);
197 		else if (fp == NULL) {
198 			if (filenum  >= nfiles)
199 				return (EOF);
200 			if (!(fp = fopen(filelist->names[filenum], "r")))
201 				err(2, "%s", filelist->names[filenum]);
202 			filenum++;
203 		}
204 		while ((pos < (char *)bufend) && ((c = getc(fp)) != EOF)) {
205 			if ((*pos++ = c) == REC_D) {
206 				recbuf->offset = 0;
207 				recbuf->length = pos - (char *) recbuf->data;
208 				return (0);
209 			}
210 		}
211 		if (pos >= (char *)bufend) {
212 			if (recbuf->data < bufend) {
213 				overflow = 1;
214 				obufend = bufend;
215 				osz = (pos - (char *) recbuf->data);
216 			}
217 			return (BUFFEND);
218 		} else if (c == EOF) {
219 			if (recbuf->data != (u_char *) pos) {
220 				*pos++ = REC_D;
221 				recbuf->offset = 0;
222 				recbuf->length = pos - (char *) recbuf->data;
223 				return (0);
224 			}
225 			FCLOSE(fp);
226 			fp = 0;
227 			if (flno >= 0)
228 				fstack[flno].fp = 0;
229 		} else {
230 
231 			warnx("makeline: line too long: ignoring '%.100s...'", recbuf->data);
232 
233 			/* Consume the rest of line from input */
234 			while ((c = getc(fp)) != REC_D && c != EOF)
235 				;
236 
237 			recbuf->offset = 0;
238 			recbuf->length = 0;
239 
240 			return (BUFFEND);
241 		}
242 	}
243 }
244 
245 /*
246  * This generates keys. It's only called in the first fsort pass
247  */
248 int
249 makekey(flno, top, filelist, nfiles, recbuf, bufend, ftbl)
250 	int flno, top;
251 	struct filelist *filelist;
252 	int nfiles;
253 	RECHEADER *recbuf;
254 	u_char *bufend;
255 	struct field *ftbl;
256 {
257 	static int filenum = 0;
258 	static FILE *dbdesc = 0;
259 	static DBT dbkey[1], line[1];
260 	static int overflow = 0;
261 	int c;
262 
263 	if (overflow) {
264 		overflow = enterkey(recbuf, line, bufend - (u_char *)recbuf,
265 									ftbl);
266 		if (overflow)
267 			return (BUFFEND);
268 		else
269 			return (0);
270 	}
271 
272 	for (;;) {
273 		if (flno >= 0) {
274 			if (!(dbdesc = fstack[flno].fp))
275 				return (EOF);
276 		} else if (!dbdesc) {
277 			if (filenum  >= nfiles)
278 				return (EOF);
279 			dbdesc = fopen(filelist->names[filenum], "r");
280 			if (!dbdesc)
281 				err(2, "%s", filelist->names[filenum]);
282 			filenum++;
283 		}
284 		if (!(c = seq(dbdesc, line, dbkey))) {
285 			if ((signed)line->size > bufend - recbuf->data) {
286 				overflow = 1;
287 			} else {
288 				overflow = enterkey(recbuf, line,
289 				    bufend - (u_char *) recbuf, ftbl);
290 			}
291 			if (overflow)
292 				return (BUFFEND);
293 			else
294 				return (0);
295 		}
296 		if (c == EOF) {
297 			FCLOSE(dbdesc);
298 			dbdesc = 0;
299 			if (flno >= 0)
300 				fstack[flno].fp = 0;
301 		} else {
302 			((char *) line->data)[60] = '\000';
303 			warnx("makekey: line too long: ignoring %.100s...",
304 			    (char *)line->data);
305 		}
306 	}
307 }
308 
309 /*
310  * get a key/line pair from fp
311  */
312 static int
313 seq(fp, line, key)
314 	FILE *fp;
315 	DBT *key, *line;
316 {
317 	static char *buf, flag = 1;
318 	char *end, *pos;
319 	int c;
320 	u_char *nlinebuf;
321 
322 	if (flag) {
323 		/* one-time initialization */
324 		flag = 0;
325 		buf = (char *) linebuf;
326 		line->data = buf;
327 	}
328 	end = buf + linebuf_size;
329 	pos = buf;
330 	while ((c = getc(fp)) != EOF) {
331 		if ((*pos++ = c) == REC_D) {
332 			line->size = pos - buf;
333 			return (0);
334 		}
335 		if (pos == end) {
336 			nlinebuf = realloc(linebuf, linebuf_size * 2);
337 			if (!nlinebuf)
338 				err(2, "realloc of linebuf to %lu bytes failed",
339 					(unsigned long)linebuf_size * 2);
340 			linebuf = nlinebuf;
341 			linebuf_size *= 2;
342 
343 			end = linebuf + linebuf_size;
344 			pos = linebuf + (pos - buf);
345 			line->data = buf = (char *)linebuf;
346 			continue;
347 		}
348 	}
349 	if (pos != buf) {
350 		*pos++ = REC_D;
351 		line->size = pos - buf;
352 		return (0);
353 	} else
354 		return (EOF);
355 }
356 
357 /*
358  * write a key/line pair to a temporary file
359  */
360 void
361 putrec(rec, fp)
362 	const RECHEADER *rec;
363 	FILE *fp;
364 {
365 	EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fp);
366 }
367 
368 /*
369  * write a line to output
370  */
371 void
372 putline(rec, fp)
373 	const RECHEADER *rec;
374 	FILE *fp;
375 {
376 	EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp);
377 }
378 
379 /*
380  * get a record from a temporary file. (Used by merge sort.)
381  */
382 int
383 geteasy(flno, top, filelist, nfiles, rec, end, dummy2)
384 	int flno, top;
385 	struct filelist *filelist;
386 	int nfiles;
387 	RECHEADER *rec;
388 	u_char *end;
389 	struct field *dummy2;
390 {
391 	int i;
392 	FILE *fp;
393 
394 	fp = fstack[flno].fp;
395 	if ((u_char *) rec > end - sizeof(TRECHEADER))
396 		return (BUFFEND);
397 	if (!fread(rec, 1, sizeof(TRECHEADER), fp)) {
398 		fclose(fp);
399 		fstack[flno].fp = 0;
400 		return (EOF);
401 	}
402 	if (end - rec->data < rec->length) {
403 		for (i = sizeof(TRECHEADER) - 1; i >= 0;  i--)
404 			ungetc(*((char *) rec + i), fp);
405 		return (BUFFEND);
406 	}
407 	fread(rec->data, rec->length, 1, fp);
408 	return (0);
409 }
410