xref: /netbsd-src/usr.bin/sort/files.c (revision 2d48ac808c43ea6701ba8f33cfc3645685301f79)
1 /*	$NetBSD: files.c,v 1.37 2009/09/10 22:02:40 dsl Exp $	*/
2 
3 /*-
4  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Ben Harris and Jaromir Dolecek.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*-
33  * Copyright (c) 1993
34  *	The Regents of the University of California.  All rights reserved.
35  *
36  * This code is derived from software contributed to Berkeley by
37  * Peter McIlroy.
38  *
39  * Redistribution and use in source and binary forms, with or without
40  * modification, are permitted provided that the following conditions
41  * are met:
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. Neither the name of the University nor the names of its contributors
48  *    may be used to endorse or promote products derived from this software
49  *    without specific prior written permission.
50  *
51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE.
62  */
63 
64 #include "sort.h"
65 #include "fsort.h"
66 
67 #ifndef lint
68 __RCSID("$NetBSD: files.c,v 1.37 2009/09/10 22:02:40 dsl Exp $");
69 __SCCSID("@(#)files.c	8.1 (Berkeley) 6/6/93");
70 #endif /* not lint */
71 
72 #include <string.h>
73 
74 static ssize_t	seq(FILE *, u_char **);
75 
76 /*
77  * this is called when there is no special key. It's only called
78  * in the first fsort pass.
79  */
80 int
81 makeline(int flno, int top, struct filelist *filelist, int nfiles,
82     RECHEADER *recbuf, u_char *bufend, struct field *dummy2)
83 {
84 	static u_char *obufend;
85 	static size_t osz;
86 	u_char *pos;
87 	static int filenum = 0, overflow = 0;
88 	static FILE *fp = 0;
89 	int c;
90 
91 	c = 0;		/* XXXGCC -Wuninitialized [pmppc] */
92 
93 	pos = recbuf->data;
94 	if (overflow) {
95 		/*
96 		 * Buffer shortage is solved by either of two ways:
97 		 * o flush previous buffered data and start using the
98 		 *   buffer from start (see fsort())
99 		 * o realloc buffer and bump bufend
100 		 *
101 		 * The former is preferred, realloc is only done when
102 		 * there is exactly one item in buffer which does not fit.
103 		 */
104 		if (bufend == obufend)
105 			memmove(pos, bufend - osz, osz);
106 
107 		pos += osz;
108 		overflow = 0;
109 	}
110 
111 	for (;;) {
112 		if (flno >= 0 && (fp = fstack[flno].fp) == NULL)
113 			return (EOF);
114 		else if (fp == NULL) {
115 			if (filenum  >= nfiles)
116 				return (EOF);
117 			if (!(fp = fopen(filelist->names[filenum], "r")))
118 				err(2, "%s", filelist->names[filenum]);
119 			filenum++;
120 		}
121 		while ((pos < bufend) && ((c = getc(fp)) != EOF)) {
122 			*pos++ = c;
123 			if (c == REC_D) {
124 				recbuf->offset = 0;
125 				recbuf->length = pos - recbuf->data;
126 				recbuf->keylen = recbuf->length - 1;
127 				return (0);
128 			}
129 		}
130 		if (pos >= bufend) {
131 			if (recbuf->data < bufend) {
132 				overflow = 1;
133 				obufend = bufend;
134 				osz = (pos - recbuf->data);
135 			}
136 			return (BUFFEND);
137 		} else if (c == EOF) {
138 			if (recbuf->data != pos) {
139 				*pos++ = REC_D;
140 				recbuf->offset = 0;
141 				recbuf->length = pos - recbuf->data;
142 				recbuf->keylen = recbuf->length - 1;
143 				return (0);
144 			}
145 			FCLOSE(fp);
146 			fp = 0;
147 			if (flno >= 0)
148 				fstack[flno].fp = 0;
149 		} else {
150 
151 			warnx("makeline: line too long: ignoring '%.100s...'", recbuf->data);
152 
153 			/* Consume the rest of line from input */
154 			while ((c = getc(fp)) != REC_D && c != EOF)
155 				;
156 
157 			recbuf->offset = 0;
158 			recbuf->length = 0;
159 			recbuf->keylen = 0;
160 
161 			return (BUFFEND);
162 		}
163 	}
164 }
165 
166 /*
167  * This generates keys. It's only called in the first fsort pass
168  */
169 int
170 makekey(int flno, int top, struct filelist *filelist, int nfiles,
171     RECHEADER *recbuf, u_char *bufend, struct field *ftbl)
172 {
173 	static int filenum = 0;
174 	static FILE *dbdesc = 0;
175 	static u_char *line_data;
176 	static ssize_t line_size;
177 	static int overflow = 0;
178 
179 	/* We get re-entered after returning BUFFEND - save old data */
180 	if (overflow) {
181 		overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl);
182 		return overflow ? BUFFEND : 0;
183 	}
184 
185 	/* Loop through files until we find a line of input */
186 	for (;;) {
187 		if (flno >= 0) {
188 			if (!(dbdesc = fstack[flno].fp))
189 				return (EOF);
190 		} else if (!dbdesc) {
191 			if (filenum  >= nfiles)
192 				return (EOF);
193 			dbdesc = fopen(filelist->names[filenum], "r");
194 			if (!dbdesc)
195 				err(2, "%s", filelist->names[filenum]);
196 			filenum++;
197 		}
198 		line_size = seq(dbdesc, &line_data);
199 		if (line_size != 0)
200 			/* Got a line */
201 			break;
202 
203 		/* End of file ... */
204 		FCLOSE(dbdesc);
205 		dbdesc = 0;
206 		if (flno >= 0)
207 			fstack[flno].fp = 0;
208 	}
209 
210 	if (line_size > bufend - recbuf->data) {
211 		overflow = 1;
212 	} else {
213 		overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl);
214 	}
215 	return overflow ? BUFFEND : 0;
216 }
217 
218 /*
219  * get a line of input from fp
220  */
221 static ssize_t
222 seq(FILE *fp, u_char **line)
223 {
224 	static u_char *buf;
225 	static size_t buf_size = DEFLLEN;
226 	u_char *end, *pos;
227 	int c;
228 	u_char *new_buf;
229 
230 	if (!buf) {
231 		/* one-time initialization */
232 		buf = malloc(buf_size);
233 		if (!buf)
234 		    err(2, "malloc of linebuf for %zu bytes failed",
235 			    buf_size);
236 	}
237 
238 	end = buf + buf_size;
239 	pos = buf;
240 	while ((c = getc(fp)) != EOF) {
241 		*pos++ = c;
242 		if (c == REC_D) {
243 			*line = buf;
244 			return pos - buf;
245 		}
246 		if (pos == end) {
247 			/* Long line - double size of buffer */
248 			/* XXX: Check here for stupidly long lines */
249 			buf_size *= 2;
250 			new_buf = realloc(buf, buf_size);
251 			if (!new_buf)
252 				err(2, "realloc of linebuf to %zu bytes failed",
253 					buf_size);
254 
255 			end = new_buf + buf_size;
256 			pos = new_buf + (pos - buf);
257 			buf = new_buf;
258 		}
259 	}
260 
261 	if (pos != buf) {
262 		/* EOF part way through line - add line terminator */
263 		*pos++ = REC_D;
264 		*line = buf;
265 		return pos - buf;
266 	}
267 
268 	return 0;
269 }
270 
271 /*
272  * write a key/line pair to a temporary file
273  */
274 void
275 putrec(const RECHEADER *rec, FILE *fp)
276 {
277 	EWRITE(rec, 1, offsetof(RECHEADER, data) + rec->length, fp);
278 }
279 
280 /*
281  * write a line to output
282  */
283 void
284 putline(const RECHEADER *rec, FILE *fp)
285 {
286 	EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp);
287 }
288 
289 /*
290  * write dump of key to output (for -Dk)
291  */
292 void
293 putkeydump(const RECHEADER *rec, FILE *fp)
294 {
295 	EWRITE(rec, 1, offsetof(RECHEADER, data) + rec->offset, fp);
296 }
297 
298 /*
299  * get a record from a temporary file. (Used by merge sort.)
300  */
301 int
302 geteasy(int flno, int top, struct filelist *filelist, int nfiles,
303     RECHEADER *rec, u_char *end, struct field *dummy2)
304 {
305 	int i;
306 	FILE *fp;
307 
308 	fp = fstack[flno].fp;
309 	if ((u_char *)(rec + 1) > end)
310 		return (BUFFEND);
311 	if (!fread(rec, 1, offsetof(RECHEADER, data), fp)) {
312 		fclose(fp);
313 		fstack[flno].fp = 0;
314 		return (EOF);
315 	}
316 	if (end - rec->data < (ptrdiff_t)rec->length) {
317 		for (i = offsetof(RECHEADER, data) - 1; i >= 0;  i--)
318 			ungetc(*((char *) rec + i), fp);
319 		return (BUFFEND);
320 	}
321 	fread(rec->data, rec->length, 1, fp);
322 	return (0);
323 }
324