xref: /netbsd-src/usr.bin/sort/files.c (revision de4fa6c51a9708fc05f88b618fa6fad87c9508ec)
1 /*	$NetBSD: files.c,v 1.35 2009/08/22 10:53:28 dsl Exp $	*/
2 
3 /*-
4  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Ben Harris and Jaromir Dolecek.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*-
33  * Copyright (c) 1993
34  *	The Regents of the University of California.  All rights reserved.
35  *
36  * This code is derived from software contributed to Berkeley by
37  * Peter McIlroy.
38  *
39  * Redistribution and use in source and binary forms, with or without
40  * modification, are permitted provided that the following conditions
41  * are met:
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. Neither the name of the University nor the names of its contributors
48  *    may be used to endorse or promote products derived from this software
49  *    without specific prior written permission.
50  *
51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE.
62  */
63 
64 #include "sort.h"
65 #include "fsort.h"
66 
67 #ifndef lint
68 __RCSID("$NetBSD: files.c,v 1.35 2009/08/22 10:53:28 dsl Exp $");
69 __SCCSID("@(#)files.c	8.1 (Berkeley) 6/6/93");
70 #endif /* not lint */
71 
72 #include <string.h>
73 
74 static ssize_t	seq(FILE *, u_char **);
75 
76 /*
77  * this is called when there is no special key. It's only called
78  * in the first fsort pass.
79  */
80 int
81 makeline(int flno, int top, struct filelist *filelist, int nfiles,
82     RECHEADER *recbuf, u_char *bufend, struct field *dummy2)
83 {
84 	static u_char *obufend;
85 	static size_t osz;
86 	u_char *pos;
87 	static int filenum = 0, overflow = 0;
88 	static FILE *fp = 0;
89 	int c;
90 
91 	c = 0;		/* XXXGCC -Wuninitialized [pmppc] */
92 
93 	pos = recbuf->data;
94 	if (overflow) {
95 		/*
96 		 * Buffer shortage is solved by either of two ways:
97 		 * o flush previous buffered data and start using the
98 		 *   buffer from start (see fsort())
99 		 * o realloc buffer and bump bufend
100 		 *
101 		 * The former is preferred, realloc is only done when
102 		 * there is exactly one item in buffer which does not fit.
103 		 */
104 		if (bufend == obufend)
105 			memmove(pos, bufend - osz, osz);
106 
107 		pos += osz;
108 		overflow = 0;
109 	}
110 
111 	for (;;) {
112 		if (flno >= 0 && (fp = fstack[flno].fp) == NULL)
113 			return (EOF);
114 		else if (fp == NULL) {
115 			if (filenum  >= nfiles)
116 				return (EOF);
117 			if (!(fp = fopen(filelist->names[filenum], "r")))
118 				err(2, "%s", filelist->names[filenum]);
119 			filenum++;
120 		}
121 		while ((pos < bufend) && ((c = getc(fp)) != EOF)) {
122 			*pos++ = c;
123 			if (c == REC_D) {
124 				recbuf->offset = 0;
125 				recbuf->length = pos - recbuf->data;
126 				return (0);
127 			}
128 		}
129 		if (pos >= bufend) {
130 			if (recbuf->data < bufend) {
131 				overflow = 1;
132 				obufend = bufend;
133 				osz = (pos - recbuf->data);
134 			}
135 			return (BUFFEND);
136 		} else if (c == EOF) {
137 			if (recbuf->data != pos) {
138 				*pos++ = REC_D;
139 				recbuf->offset = 0;
140 				recbuf->length = pos - recbuf->data;
141 				return (0);
142 			}
143 			FCLOSE(fp);
144 			fp = 0;
145 			if (flno >= 0)
146 				fstack[flno].fp = 0;
147 		} else {
148 
149 			warnx("makeline: line too long: ignoring '%.100s...'", recbuf->data);
150 
151 			/* Consume the rest of line from input */
152 			while ((c = getc(fp)) != REC_D && c != EOF)
153 				;
154 
155 			recbuf->offset = 0;
156 			recbuf->length = 0;
157 
158 			return (BUFFEND);
159 		}
160 	}
161 }
162 
163 /*
164  * This generates keys. It's only called in the first fsort pass
165  */
166 int
167 makekey(int flno, int top, struct filelist *filelist, int nfiles,
168     RECHEADER *recbuf, u_char *bufend, struct field *ftbl)
169 {
170 	static int filenum = 0;
171 	static FILE *dbdesc = 0;
172 	static u_char *line_data;
173 	static ssize_t line_size;
174 	static int overflow = 0;
175 
176 	/* We get re-entered after returning BUFFEND - save old data */
177 	if (overflow) {
178 		overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl);
179 		return overflow ? BUFFEND : 0;
180 	}
181 
182 	/* Loop through files until we find a line of input */
183 	for (;;) {
184 		if (flno >= 0) {
185 			if (!(dbdesc = fstack[flno].fp))
186 				return (EOF);
187 		} else if (!dbdesc) {
188 			if (filenum  >= nfiles)
189 				return (EOF);
190 			dbdesc = fopen(filelist->names[filenum], "r");
191 			if (!dbdesc)
192 				err(2, "%s", filelist->names[filenum]);
193 			filenum++;
194 		}
195 		line_size = seq(dbdesc, &line_data);
196 		if (line_size != 0)
197 			/* Got a line */
198 			break;
199 
200 		/* End of file ... */
201 		FCLOSE(dbdesc);
202 		dbdesc = 0;
203 		if (flno >= 0)
204 			fstack[flno].fp = 0;
205 	}
206 
207 	if (line_size > bufend - recbuf->data) {
208 		overflow = 1;
209 	} else {
210 		overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl);
211 	}
212 	return overflow ? BUFFEND : 0;
213 }
214 
215 /*
216  * get a line of input from fp
217  */
218 static ssize_t
219 seq(FILE *fp, u_char **line)
220 {
221 	static u_char *buf;
222 	static size_t buf_size = DEFLLEN;
223 	u_char *end, *pos;
224 	int c;
225 	u_char *new_buf;
226 
227 	if (!buf) {
228 		/* one-time initialization */
229 		buf = malloc(buf_size);
230 		if (!buf)
231 		    err(2, "malloc of linebuf for %zu bytes failed",
232 			    buf_size);
233 	}
234 
235 	end = buf + buf_size;
236 	pos = buf;
237 	while ((c = getc(fp)) != EOF) {
238 		*pos++ = c;
239 		if (c == REC_D) {
240 			*line = buf;
241 			return pos - buf;
242 		}
243 		if (pos == end) {
244 			/* Long line - double size of buffer */
245 			/* XXX: Check here for stupidly long lines */
246 			buf_size *= 2;
247 			new_buf = realloc(buf, buf_size);
248 			if (!new_buf)
249 				err(2, "realloc of linebuf to %zu bytes failed",
250 					buf_size);
251 
252 			end = new_buf + buf_size;
253 			pos = new_buf + (pos - buf);
254 			buf = new_buf;
255 		}
256 	}
257 
258 	if (pos != buf) {
259 		/* EOF part way through line - add line terminator */
260 		*pos++ = REC_D;
261 		*line = buf;
262 		return pos - buf;
263 	}
264 
265 	return 0;
266 }
267 
268 /*
269  * write a key/line pair to a temporary file
270  */
271 void
272 putrec(const RECHEADER *rec, FILE *fp)
273 {
274 	EWRITE(rec, 1, rec->length + REC_DATA_OFFSET, fp);
275 }
276 
277 /*
278  * write a line to output
279  */
280 void
281 putline(const RECHEADER *rec, FILE *fp)
282 {
283 	EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp);
284 }
285 
286 /*
287  * write dump of key to output (for -Dk)
288  */
289 void
290 putkeydump(const RECHEADER *rec, FILE *fp)
291 {
292 	EWRITE(rec, 1, rec->offset + REC_DATA_OFFSET, fp);
293 }
294 
295 /*
296  * get a record from a temporary file. (Used by merge sort.)
297  */
298 int
299 geteasy(int flno, int top, struct filelist *filelist, int nfiles,
300     RECHEADER *rec, u_char *end, struct field *dummy2)
301 {
302 	int i;
303 	FILE *fp;
304 
305 	fp = fstack[flno].fp;
306 	if ((u_char *) rec > end - REC_DATA_OFFSET)
307 		return (BUFFEND);
308 	if (!fread(rec, 1, REC_DATA_OFFSET, fp)) {
309 		fclose(fp);
310 		fstack[flno].fp = 0;
311 		return (EOF);
312 	}
313 	if (end - rec->data < (ptrdiff_t)rec->length) {
314 		for (i = REC_DATA_OFFSET - 1; i >= 0;  i--)
315 			ungetc(*((char *) rec + i), fp);
316 		return (BUFFEND);
317 	}
318 	fread(rec->data, rec->length, 1, fp);
319 	return (0);
320 }
321