xref: /netbsd-src/usr.bin/sort/files.c (revision b5677b36047b601b9addaaa494a58ceae82c2a6c)
1 /*	$NetBSD: files.c,v 1.26 2008/04/28 20:24:15 martin Exp $	*/
2 
3 /*-
4  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Ben Harris and Jaromir Dolecek.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*-
33  * Copyright (c) 1993
34  *	The Regents of the University of California.  All rights reserved.
35  *
36  * This code is derived from software contributed to Berkeley by
37  * Peter McIlroy.
38  *
39  * Redistribution and use in source and binary forms, with or without
40  * modification, are permitted provided that the following conditions
41  * are met:
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. Neither the name of the University nor the names of its contributors
48  *    may be used to endorse or promote products derived from this software
49  *    without specific prior written permission.
50  *
51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE.
62  */
63 
64 #include "sort.h"
65 #include "fsort.h"
66 
67 #ifndef lint
68 __RCSID("$NetBSD: files.c,v 1.26 2008/04/28 20:24:15 martin Exp $");
69 __SCCSID("@(#)files.c	8.1 (Berkeley) 6/6/93");
70 #endif /* not lint */
71 
72 #include <string.h>
73 
74 static int	seq __P((FILE *, DBT *, DBT *));
75 
76 /*
77  * this is the subroutine for file management for fsort().
78  * It keeps the buffers for all temporary files.
79  */
80 int
81 getnext(binno, infl0, filelist, nfiles, pos, end, dummy)
82 	int binno, infl0;
83 	struct filelist *filelist;
84 	int nfiles;
85 	RECHEADER *pos;
86 	u_char *end;
87 	struct field *dummy;
88 {
89 	int i;
90 	u_char *hp;
91 	static size_t nleft = 0;
92 	static int cnt = 0, flag = -1;
93 	static u_char maxb = 0;
94 	static FILE *fp;
95 
96 	if (nleft == 0) {
97 		if (binno < 0)	/* reset files. */ {
98 			for (i = 0; i < nfiles; i++) {
99 				rewind(fstack[infl0 + i].fp);
100 				fstack[infl0 + i].max_o = 0;
101 			}
102 			flag = -1;
103 			nleft = cnt = 0;
104 			return (-1);
105 		}
106 		maxb = fstack[infl0].maxb;
107 		for (; nleft == 0; cnt++) {
108 			if (cnt >= nfiles) {
109 				cnt = 0;
110 				return (EOF);
111 			}
112 			fp = fstack[infl0 + cnt].fp;
113 			fread(&nleft, sizeof(nleft), 1, fp);
114 			if (binno < maxb)
115 				fstack[infl0+cnt].max_o
116 					+= sizeof(nleft) + nleft;
117 			else if (binno == maxb) {
118 				if (binno != fstack[infl0].lastb) {
119 					fseeko(fp, fstack[infl0+
120 						cnt].max_o, SEEK_SET);
121 					fread(&nleft, sizeof(nleft), 1, fp);
122 				}
123 				if (nleft == 0)
124 					fclose(fp);
125 			} else if (binno == maxb + 1) {		/* skip a bin */
126 				fseek(fp, nleft, SEEK_CUR);
127 				fread(&nleft, sizeof(nleft), 1, fp);
128 				flag = cnt;
129 			}
130 		}
131 	}
132 	if ((u_char *) pos > end - sizeof(TRECHEADER))
133 		return (BUFFEND);
134 	fread(pos, sizeof(TRECHEADER), 1, fp);
135 	if (end - pos->data < pos->length) {
136 		hp = ((u_char *)pos) + sizeof(TRECHEADER);
137 		for (i = sizeof(TRECHEADER); i ;  i--)
138 			ungetc(*--hp, fp);
139 		return (BUFFEND);
140 	}
141 	fread(pos->data, pos->length, 1, fp);
142 	nleft -= pos->length + sizeof(TRECHEADER);
143 	if (nleft == 0 && binno == fstack[infl0].maxb)
144 		fclose(fp);
145 	return (0);
146 }
147 
148 /*
149  * this is called when there is no special key. It's only called
150  * in the first fsort pass.
151  */
152 int
153 makeline(flno, top, filelist, nfiles, recbuf, bufend, dummy2)
154 	int flno, top;
155 	struct filelist *filelist;
156 	int nfiles;
157 	RECHEADER *recbuf;
158 	u_char *bufend;
159 	struct field *dummy2;
160 {
161 	static u_char *obufend;
162 	static size_t osz;
163 	char *pos;
164 	static int filenum = 0, overflow = 0;
165 	static FILE *fp = 0;
166 	int c;
167 
168 	c = 0;		/* XXXGCC -Wuninitialized [pmppc] */
169 
170 	pos = (char *) recbuf->data;
171 	if (overflow) {
172 		/*
173 		 * Buffer shortage is solved by either of two ways:
174 		 * o flush previous buffered data and start using the
175 		 *   buffer from start (see fsort())
176 		 * o realloc buffer and bump bufend
177 		 *
178 		 * The former is preferred, realloc is only done when
179 		 * there is exactly one item in buffer which does not fit.
180 		 */
181 		if (bufend == obufend)
182 			memmove(pos, bufend - osz, osz);
183 
184 		pos += osz;
185 		overflow = 0;
186 	}
187 	for (;;) {
188 		if (flno >= 0 && (fp = fstack[flno].fp) == NULL)
189 			return (EOF);
190 		else if (fp == NULL) {
191 			if (filenum  >= nfiles)
192 				return (EOF);
193 			if (!(fp = fopen(filelist->names[filenum], "r")))
194 				err(2, "%s", filelist->names[filenum]);
195 			filenum++;
196 		}
197 		while ((pos < (char *)bufend) && ((c = getc(fp)) != EOF)) {
198 			if ((*pos++ = c) == REC_D) {
199 				recbuf->offset = 0;
200 				recbuf->length = pos - (char *) recbuf->data;
201 				return (0);
202 			}
203 		}
204 		if (pos >= (char *)bufend) {
205 			if (recbuf->data < bufend) {
206 				overflow = 1;
207 				obufend = bufend;
208 				osz = (pos - (char *) recbuf->data);
209 			}
210 			return (BUFFEND);
211 		} else if (c == EOF) {
212 			if (recbuf->data != (u_char *) pos) {
213 				*pos++ = REC_D;
214 				recbuf->offset = 0;
215 				recbuf->length = pos - (char *) recbuf->data;
216 				return (0);
217 			}
218 			FCLOSE(fp);
219 			fp = 0;
220 			if (flno >= 0)
221 				fstack[flno].fp = 0;
222 		} else {
223 
224 			warnx("makeline: line too long: ignoring '%.100s...'", recbuf->data);
225 
226 			/* Consume the rest of line from input */
227 			while ((c = getc(fp)) != REC_D && c != EOF)
228 				;
229 
230 			recbuf->offset = 0;
231 			recbuf->length = 0;
232 
233 			return (BUFFEND);
234 		}
235 	}
236 }
237 
238 /*
239  * This generates keys. It's only called in the first fsort pass
240  */
241 int
242 makekey(flno, top, filelist, nfiles, recbuf, bufend, ftbl)
243 	int flno, top;
244 	struct filelist *filelist;
245 	int nfiles;
246 	RECHEADER *recbuf;
247 	u_char *bufend;
248 	struct field *ftbl;
249 {
250 	static int filenum = 0;
251 	static FILE *dbdesc = 0;
252 	static DBT dbkey[1], line[1];
253 	static int overflow = 0;
254 	int c;
255 
256 	if (overflow) {
257 		overflow = enterkey(recbuf, line, bufend - (u_char *)recbuf,
258 									ftbl);
259 		if (overflow)
260 			return (BUFFEND);
261 		else
262 			return (0);
263 	}
264 
265 	for (;;) {
266 		if (flno >= 0) {
267 			if (!(dbdesc = fstack[flno].fp))
268 				return (EOF);
269 		} else if (!dbdesc) {
270 			if (filenum  >= nfiles)
271 				return (EOF);
272 			dbdesc = fopen(filelist->names[filenum], "r");
273 			if (!dbdesc)
274 				err(2, "%s", filelist->names[filenum]);
275 			filenum++;
276 		}
277 		if (!(c = seq(dbdesc, line, dbkey))) {
278 			if ((signed)line->size > bufend - recbuf->data) {
279 				overflow = 1;
280 			} else {
281 				overflow = enterkey(recbuf, line,
282 				    bufend - (u_char *) recbuf, ftbl);
283 			}
284 			if (overflow)
285 				return (BUFFEND);
286 			else
287 				return (0);
288 		}
289 		if (c == EOF) {
290 			FCLOSE(dbdesc);
291 			dbdesc = 0;
292 			if (flno >= 0)
293 				fstack[flno].fp = 0;
294 		} else {
295 			((char *) line->data)[60] = '\000';
296 			warnx("makekey: line too long: ignoring %.100s...",
297 			    (char *)line->data);
298 		}
299 	}
300 }
301 
302 /*
303  * get a key/line pair from fp
304  */
305 static int
306 seq(fp, line, key)
307 	FILE *fp;
308 	DBT *key, *line;
309 {
310 	static u_char *buf, flag = 1;
311 	u_char *end, *pos;
312 	int c;
313 	u_char *nlinebuf;
314 
315 	if (flag) {
316 		/* one-time initialization */
317 		flag = 0;
318 		buf = linebuf;
319 		line->data = buf;
320 	}
321 	end = buf + linebuf_size;
322 	pos = buf;
323 	while ((c = getc(fp)) != EOF) {
324 		if ((*pos++ = c) == REC_D) {
325 			line->size = pos - buf;
326 			return (0);
327 		}
328 		if (pos == end) {
329 			nlinebuf = realloc(linebuf, linebuf_size * 2);
330 			if (!nlinebuf)
331 				err(2, "realloc of linebuf to %lu bytes failed",
332 					(unsigned long)linebuf_size * 2);
333 			linebuf = nlinebuf;
334 			linebuf_size *= 2;
335 
336 			end = linebuf + linebuf_size;
337 			pos = linebuf + (pos - buf);
338 			line->data = buf = linebuf;
339 			continue;
340 		}
341 	}
342 	if (pos != buf) {
343 		*pos++ = REC_D;
344 		line->size = pos - buf;
345 		return (0);
346 	} else
347 		return (EOF);
348 }
349 
350 /*
351  * write a key/line pair to a temporary file
352  */
353 void
354 putrec(rec, fp)
355 	const RECHEADER *rec;
356 	FILE *fp;
357 {
358 	EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fp);
359 }
360 
361 /*
362  * write a line to output
363  */
364 void
365 putline(rec, fp)
366 	const RECHEADER *rec;
367 	FILE *fp;
368 {
369 	EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp);
370 }
371 
372 /*
373  * get a record from a temporary file. (Used by merge sort.)
374  */
375 int
376 geteasy(flno, top, filelist, nfiles, rec, end, dummy2)
377 	int flno, top;
378 	struct filelist *filelist;
379 	int nfiles;
380 	RECHEADER *rec;
381 	u_char *end;
382 	struct field *dummy2;
383 {
384 	int i;
385 	FILE *fp;
386 
387 	fp = fstack[flno].fp;
388 	if ((u_char *) rec > end - sizeof(TRECHEADER))
389 		return (BUFFEND);
390 	if (!fread(rec, 1, sizeof(TRECHEADER), fp)) {
391 		fclose(fp);
392 		fstack[flno].fp = 0;
393 		return (EOF);
394 	}
395 	if (end - rec->data < rec->length) {
396 		for (i = sizeof(TRECHEADER) - 1; i >= 0;  i--)
397 			ungetc(*((char *) rec + i), fp);
398 		return (BUFFEND);
399 	}
400 	fread(rec->data, rec->length, 1, fp);
401 	return (0);
402 }
403