xref: /netbsd-src/usr.bin/sort/files.c (revision 271138cb3a6de667c600b54c1d0896e7b41a929b)
1 /*	$NetBSD: files.c,v 1.40 2009/10/07 21:03:29 dsl Exp $	*/
2 
3 /*-
4  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Ben Harris and Jaromir Dolecek.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*-
33  * Copyright (c) 1993
34  *	The Regents of the University of California.  All rights reserved.
35  *
36  * This code is derived from software contributed to Berkeley by
37  * Peter McIlroy.
38  *
39  * Redistribution and use in source and binary forms, with or without
40  * modification, are permitted provided that the following conditions
41  * are met:
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. Neither the name of the University nor the names of its contributors
48  *    may be used to endorse or promote products derived from this software
49  *    without specific prior written permission.
50  *
51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE.
62  */
63 
64 #include "sort.h"
65 #include "fsort.h"
66 
67 #ifndef lint
68 __RCSID("$NetBSD: files.c,v 1.40 2009/10/07 21:03:29 dsl Exp $");
69 __SCCSID("@(#)files.c	8.1 (Berkeley) 6/6/93");
70 #endif /* not lint */
71 
72 #include <string.h>
73 
74 /* Align records in temporary files to avoid misaligned copies */
75 #define REC_ROUNDUP(n) (((n) + sizeof (long) - 1) & ~(sizeof (long) - 1))
76 
77 static ssize_t	seq(FILE *, u_char **);
78 
79 /*
80  * this is called when there is no special key. It's only called
81  * in the first fsort pass.
82  */
83 
84 static u_char *opos;
85 static size_t osz;
86 
87 void
88 makeline_copydown(RECHEADER *recbuf)
89 {
90 	memmove(recbuf->data, opos, osz);
91 }
92 
93 int
94 makeline(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *dummy2)
95 {
96 	u_char *pos;
97 	int c;
98 
99 	pos = recbuf->data;
100 	if (osz != 0) {
101 		/*
102 		 * Buffer shortage is solved by either of two ways:
103 		 * o flush previous buffered data and start using the
104 		 *   buffer from start.
105 		 *   makeline_copydown() above must be called.
106 		 * o realloc buffer
107 		 *
108 		 * This code has relied on realloc changing 'bufend',
109 		 * but that isn't necessarily true.
110 		 */
111 		pos += osz;
112 		osz = 0;
113 	}
114 
115 	while (pos < bufend) {
116 		c = getc(fp);
117 		if (c == EOF) {
118 			if (pos == recbuf->data) {
119 				FCLOSE(fp);
120 				return EOF;
121 			}
122 			/* Add terminator to partial line */
123 			c = REC_D;
124 		}
125 		*pos++ = c;
126 		if (c == REC_D) {
127 			recbuf->offset = 0;
128 			recbuf->length = pos - recbuf->data;
129 			recbuf->keylen = recbuf->length - 1;
130 			return (0);
131 		}
132 	}
133 
134 	/* Ran out of buffer space... */
135 	if (recbuf->data < bufend) {
136 		/* Remember where the partial record is */
137 		osz = pos - recbuf->data;
138 		opos = recbuf->data;
139 	}
140 	return (BUFFEND);
141 }
142 
143 /*
144  * This generates keys. It's only called in the first fsort pass
145  */
146 int
147 makekey(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *ftbl)
148 {
149 	static u_char *line_data;
150 	static ssize_t line_size;
151 	static int overflow = 0;
152 
153 	/* We get re-entered after returning BUFFEND - save old data */
154 	if (overflow) {
155 		overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl);
156 		return overflow ? BUFFEND : 0;
157 	}
158 
159 	line_size = seq(fp, &line_data);
160 	if (line_size == 0) {
161 		FCLOSE(fp);
162 		return EOF;
163 	}
164 
165 	if (line_size > bufend - recbuf->data) {
166 		overflow = 1;
167 	} else {
168 		overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl);
169 	}
170 	return overflow ? BUFFEND : 0;
171 }
172 
173 /*
174  * get a line of input from fp
175  */
176 static ssize_t
177 seq(FILE *fp, u_char **line)
178 {
179 	static u_char *buf;
180 	static size_t buf_size = DEFLLEN;
181 	u_char *end, *pos;
182 	int c;
183 	u_char *new_buf;
184 
185 	if (!buf) {
186 		/* one-time initialization */
187 		buf = malloc(buf_size);
188 		if (!buf)
189 		    err(2, "malloc of linebuf for %zu bytes failed",
190 			    buf_size);
191 	}
192 
193 	end = buf + buf_size;
194 	pos = buf;
195 	while ((c = getc(fp)) != EOF) {
196 		*pos++ = c;
197 		if (c == REC_D) {
198 			*line = buf;
199 			return pos - buf;
200 		}
201 		if (pos == end) {
202 			/* Long line - double size of buffer */
203 			/* XXX: Check here for stupidly long lines */
204 			buf_size *= 2;
205 			new_buf = realloc(buf, buf_size);
206 			if (!new_buf)
207 				err(2, "realloc of linebuf to %zu bytes failed",
208 					buf_size);
209 
210 			end = new_buf + buf_size;
211 			pos = new_buf + (pos - buf);
212 			buf = new_buf;
213 		}
214 	}
215 
216 	if (pos != buf) {
217 		/* EOF part way through line - add line terminator */
218 		*pos++ = REC_D;
219 		*line = buf;
220 		return pos - buf;
221 	}
222 
223 	return 0;
224 }
225 
226 /*
227  * write a key/line pair to a temporary file
228  */
229 void
230 putrec(const RECHEADER *rec, FILE *fp)
231 {
232 	EWRITE(rec, 1, REC_ROUNDUP(offsetof(RECHEADER, data) + rec->length), fp);
233 }
234 
235 /*
236  * write a line to output
237  */
238 void
239 putline(const RECHEADER *rec, FILE *fp)
240 {
241 	EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp);
242 }
243 
244 /*
245  * write dump of key to output (for -Dk)
246  */
247 void
248 putkeydump(const RECHEADER *rec, FILE *fp)
249 {
250 	EWRITE(rec, 1, REC_ROUNDUP(offsetof(RECHEADER, data) + rec->offset), fp);
251 }
252 
253 /*
254  * get a record from a temporary file. (Used by merge sort.)
255  */
256 int
257 geteasy(FILE *fp, RECHEADER *rec, u_char *end, struct field *dummy2)
258 {
259 	length_t file_len;
260 	int i;
261 
262 	(void)sizeof (char[offsetof(RECHEADER, length) == 0 ? 1 : -1]);
263 
264 	if ((u_char *)(rec + 1) > end)
265 		return (BUFFEND);
266 	if (!fread(&rec->length, 1, sizeof rec->length, fp)) {
267 		fclose(fp);
268 		return (EOF);
269 	}
270 	file_len = REC_ROUNDUP(offsetof(RECHEADER, data) + rec->length);
271 	if (end - rec->data < (ptrdiff_t)file_len) {
272 		for (i = sizeof rec->length - 1; i >= 0;  i--)
273 			ungetc(*((char *) rec + i), fp);
274 		return (BUFFEND);
275 	}
276 
277 	fread(&rec->length + 1, file_len - sizeof rec->length, 1, fp);
278 	return (0);
279 }
280