xref: /netbsd-src/usr.bin/sort/files.c (revision 23c8222edbfb0f0932d88a8351d3a0cf817dfb9e)
1 /*	$NetBSD: files.c,v 1.23 2004/02/15 11:52:12 jdolecek Exp $	*/
2 
3 /*-
4  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Ben Harris and Jaromir Dolecek.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *        This product includes software developed by the NetBSD
21  *        Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 /*-
40  * Copyright (c) 1993
41  *	The Regents of the University of California.  All rights reserved.
42  *
43  * This code is derived from software contributed to Berkeley by
44  * Peter McIlroy.
45  *
46  * Redistribution and use in source and binary forms, with or without
47  * modification, are permitted provided that the following conditions
48  * are met:
49  * 1. Redistributions of source code must retain the above copyright
50  *    notice, this list of conditions and the following disclaimer.
51  * 2. Redistributions in binary form must reproduce the above copyright
52  *    notice, this list of conditions and the following disclaimer in the
53  *    documentation and/or other materials provided with the distribution.
54  * 3. Neither the name of the University nor the names of its contributors
55  *    may be used to endorse or promote products derived from this software
56  *    without specific prior written permission.
57  *
58  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68  * SUCH DAMAGE.
69  */
70 
71 #include "sort.h"
72 #include "fsort.h"
73 
74 #ifndef lint
75 __RCSID("$NetBSD: files.c,v 1.23 2004/02/15 11:52:12 jdolecek Exp $");
76 __SCCSID("@(#)files.c	8.1 (Berkeley) 6/6/93");
77 #endif /* not lint */
78 
79 #include <string.h>
80 
81 static int	seq __P((FILE *, DBT *, DBT *));
82 
83 /*
84  * this is the subroutine for file management for fsort().
85  * It keeps the buffers for all temporary files.
86  */
87 int
88 getnext(binno, infl0, filelist, nfiles, pos, end, dummy)
89 	int binno, infl0;
90 	struct filelist *filelist;
91 	int nfiles;
92 	RECHEADER *pos;
93 	u_char *end;
94 	struct field *dummy;
95 {
96 	int i;
97 	u_char *hp;
98 	static size_t nleft = 0;
99 	static int cnt = 0, flag = -1;
100 	static u_char maxb = 0;
101 	static FILE *fp;
102 
103 	if (nleft == 0) {
104 		if (binno < 0)	/* reset files. */ {
105 			for (i = 0; i < nfiles; i++) {
106 				rewind(fstack[infl0 + i].fp);
107 				fstack[infl0 + i].max_o = 0;
108 			}
109 			flag = -1;
110 			nleft = cnt = 0;
111 			return (-1);
112 		}
113 		maxb = fstack[infl0].maxb;
114 		for (; nleft == 0; cnt++) {
115 			if (cnt >= nfiles) {
116 				cnt = 0;
117 				return (EOF);
118 			}
119 			fp = fstack[infl0 + cnt].fp;
120 			fread(&nleft, sizeof(nleft), 1, fp);
121 			if (binno < maxb)
122 				fstack[infl0+cnt].max_o
123 					+= sizeof(nleft) + nleft;
124 			else if (binno == maxb) {
125 				if (binno != fstack[infl0].lastb) {
126 					fseeko(fp, fstack[infl0+
127 						cnt].max_o, SEEK_SET);
128 					fread(&nleft, sizeof(nleft), 1, fp);
129 				}
130 				if (nleft == 0)
131 					fclose(fp);
132 			} else if (binno == maxb + 1) {		/* skip a bin */
133 				fseek(fp, nleft, SEEK_CUR);
134 				fread(&nleft, sizeof(nleft), 1, fp);
135 				flag = cnt;
136 			}
137 		}
138 	}
139 	if ((u_char *) pos > end - sizeof(TRECHEADER))
140 		return (BUFFEND);
141 	fread(pos, sizeof(TRECHEADER), 1, fp);
142 	if (end - pos->data < pos->length) {
143 		hp = ((u_char *)pos) + sizeof(TRECHEADER);
144 		for (i = sizeof(TRECHEADER); i ;  i--)
145 			ungetc(*--hp, fp);
146 		return (BUFFEND);
147 	}
148 	fread(pos->data, pos->length, 1, fp);
149 	nleft -= pos->length + sizeof(TRECHEADER);
150 	if (nleft == 0 && binno == fstack[infl0].maxb)
151 		fclose(fp);
152 	return (0);
153 }
154 
155 /*
156  * this is called when there is no special key. It's only called
157  * in the first fsort pass.
158  */
159 int
160 makeline(flno, top, filelist, nfiles, recbuf, bufend, dummy2)
161 	int flno, top;
162 	struct filelist *filelist;
163 	int nfiles;
164 	RECHEADER *recbuf;
165 	u_char *bufend;
166 	struct field *dummy2;
167 {
168 	static u_char *obufend;
169 	static size_t osz;
170 	char *pos;
171 	static int filenum = 0, overflow = 0;
172 	static FILE *fp = 0;
173 	int c;
174 
175 	pos = (char *) recbuf->data;
176 	if (overflow) {
177 		/*
178 		 * Buffer shortage is solved by either of two ways:
179 		 * o flush previous buffered data and start using the
180 		 *   buffer from start (see fsort())
181 		 * o realloc buffer and bump bufend
182 		 *
183 		 * The former is preferred, realloc is only done when
184 		 * there is exactly one item in buffer which does not fit.
185 		 */
186 		if (bufend == obufend)
187 			memmove(pos, bufend - osz, osz);
188 
189 		pos += osz;
190 		overflow = 0;
191 	}
192 	for (;;) {
193 		if (flno >= 0 && (fp = fstack[flno].fp) == NULL)
194 			return (EOF);
195 		else if (fp == NULL) {
196 			if (filenum  >= nfiles)
197 				return (EOF);
198 			if (!(fp = fopen(filelist->names[filenum], "r")))
199 				err(2, "%s", filelist->names[filenum]);
200 			filenum++;
201 		}
202 		while ((pos < (char *)bufend) && ((c = getc(fp)) != EOF)) {
203 			if ((*pos++ = c) == REC_D) {
204 				recbuf->offset = 0;
205 				recbuf->length = pos - (char *) recbuf->data;
206 				return (0);
207 			}
208 		}
209 		if (pos >= (char *)bufend) {
210 			if (recbuf->data < bufend) {
211 				overflow = 1;
212 				obufend = bufend;
213 				osz = (pos - (char *) recbuf->data);
214 			}
215 			return (BUFFEND);
216 		} else if (c == EOF) {
217 			if (recbuf->data != (u_char *) pos) {
218 				*pos++ = REC_D;
219 				recbuf->offset = 0;
220 				recbuf->length = pos - (char *) recbuf->data;
221 				return (0);
222 			}
223 			FCLOSE(fp);
224 			fp = 0;
225 			if (flno >= 0)
226 				fstack[flno].fp = 0;
227 		} else {
228 
229 			warnx("makeline: line too long: ignoring '%.100s...'", recbuf->data);
230 
231 			/* Consume the rest of line from input */
232 			while ((c = getc(fp)) != REC_D && c != EOF)
233 				;
234 
235 			recbuf->offset = 0;
236 			recbuf->length = 0;
237 
238 			return (BUFFEND);
239 		}
240 	}
241 }
242 
243 /*
244  * This generates keys. It's only called in the first fsort pass
245  */
246 int
247 makekey(flno, top, filelist, nfiles, recbuf, bufend, ftbl)
248 	int flno, top;
249 	struct filelist *filelist;
250 	int nfiles;
251 	RECHEADER *recbuf;
252 	u_char *bufend;
253 	struct field *ftbl;
254 {
255 	static int filenum = 0;
256 	static FILE *dbdesc = 0;
257 	static DBT dbkey[1], line[1];
258 	static int overflow = 0;
259 	int c;
260 
261 	if (overflow) {
262 		overflow = enterkey(recbuf, line, bufend - (u_char *)recbuf,
263 									ftbl);
264 		if (overflow)
265 			return (BUFFEND);
266 		else
267 			return (0);
268 	}
269 
270 	for (;;) {
271 		if (flno >= 0) {
272 			if (!(dbdesc = fstack[flno].fp))
273 				return (EOF);
274 		} else if (!dbdesc) {
275 			if (filenum  >= nfiles)
276 				return (EOF);
277 			dbdesc = fopen(filelist->names[filenum], "r");
278 			if (!dbdesc)
279 				err(2, "%s", filelist->names[filenum]);
280 			filenum++;
281 		}
282 		if (!(c = seq(dbdesc, line, dbkey))) {
283 			if ((signed)line->size > bufend - recbuf->data) {
284 				overflow = 1;
285 			} else {
286 				overflow = enterkey(recbuf, line,
287 				    bufend - (u_char *) recbuf, ftbl);
288 			}
289 			if (overflow)
290 				return (BUFFEND);
291 			else
292 				return (0);
293 		}
294 		if (c == EOF) {
295 			FCLOSE(dbdesc);
296 			dbdesc = 0;
297 			if (flno >= 0)
298 				fstack[flno].fp = 0;
299 		} else {
300 			((char *) line->data)[60] = '\000';
301 			warnx("makekey: line too long: ignoring %.100s...",
302 			    (char *)line->data);
303 		}
304 	}
305 }
306 
307 /*
308  * get a key/line pair from fp
309  */
310 static int
311 seq(fp, line, key)
312 	FILE *fp;
313 	DBT *key, *line;
314 {
315 	static char *buf, flag = 1;
316 	char *end, *pos;
317 	int c;
318 	u_char *nlinebuf;
319 
320 	if (flag) {
321 		/* one-time initialization */
322 		flag = 0;
323 		buf = (char *) linebuf;
324 		line->data = buf;
325 	}
326 	end = buf + linebuf_size;
327 	pos = buf;
328 	while ((c = getc(fp)) != EOF) {
329 		if ((*pos++ = c) == REC_D) {
330 			line->size = pos - buf;
331 			return (0);
332 		}
333 		if (pos == end) {
334 			nlinebuf = realloc(linebuf, linebuf_size * 2);
335 			if (!nlinebuf)
336 				err(2, "realloc of linebuf to %lu bytes failed",
337 					(unsigned long)linebuf_size * 2);
338 			linebuf = nlinebuf;
339 			linebuf_size *= 2;
340 
341 			end = linebuf + linebuf_size;
342 			pos = linebuf + (pos - buf);
343 			line->data = buf = (char *)linebuf;
344 			continue;
345 		}
346 	}
347 	if (pos != buf) {
348 		*pos++ = REC_D;
349 		line->size = pos - buf;
350 		return (0);
351 	} else
352 		return (EOF);
353 }
354 
355 /*
356  * write a key/line pair to a temporary file
357  */
358 void
359 putrec(rec, fp)
360 	const RECHEADER *rec;
361 	FILE *fp;
362 {
363 	EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fp);
364 }
365 
366 /*
367  * write a line to output
368  */
369 void
370 putline(rec, fp)
371 	const RECHEADER *rec;
372 	FILE *fp;
373 {
374 	EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp);
375 }
376 
377 /*
378  * get a record from a temporary file. (Used by merge sort.)
379  */
380 int
381 geteasy(flno, top, filelist, nfiles, rec, end, dummy2)
382 	int flno, top;
383 	struct filelist *filelist;
384 	int nfiles;
385 	RECHEADER *rec;
386 	u_char *end;
387 	struct field *dummy2;
388 {
389 	int i;
390 	FILE *fp;
391 
392 	fp = fstack[flno].fp;
393 	if ((u_char *) rec > end - sizeof(TRECHEADER))
394 		return (BUFFEND);
395 	if (!fread(rec, 1, sizeof(TRECHEADER), fp)) {
396 		fclose(fp);
397 		fstack[flno].fp = 0;
398 		return (EOF);
399 	}
400 	if (end - rec->data < rec->length) {
401 		for (i = sizeof(TRECHEADER) - 1; i >= 0;  i--)
402 			ungetc(*((char *) rec + i), fp);
403 		return (BUFFEND);
404 	}
405 	fread(rec->data, rec->length, 1, fp);
406 	return (0);
407 }
408