xref: /netbsd-src/usr.bin/sort/files.c (revision 5aefcfdc06931dd97e76246d2fe0302f7b3fe094)
1 /*	$NetBSD: files.c,v 1.6 2000/10/17 15:22:57 jdolecek Exp $	*/
2 
3 /*-
4  * Copyright (c) 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Peter McIlroy.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38 
39 #include "sort.h"
40 #include "fsort.h"
41 
42 #ifndef lint
43 __RCSID("$NetBSD: files.c,v 1.6 2000/10/17 15:22:57 jdolecek Exp $");
44 __SCCSID("@(#)files.c	8.1 (Berkeley) 6/6/93");
45 #endif /* not lint */
46 
47 #include <string.h>
48 
49 static int seq __P((FILE *, DBT *, DBT *));
50 
51 /*
52  * this is the subroutine for file management for fsort().
53  * It keeps the buffers for all temporary files.
54  */
55 int
56 getnext(binno, infl0, nfiles, pos, end, dummy)
57 	int binno, nfiles;
58 	union f_handle infl0;
59 	struct recheader *pos;
60 	u_char *end;
61 	struct field *dummy;
62 {
63 	int i;
64 	u_char *hp;
65 	static size_t nleft = 0;
66 	static int cnt = 0, flag = -1;
67 	static u_char maxb = 0;
68 	static FILE *fp;
69 
70 	if (nleft == 0) {
71 		if (binno < 0)	/* reset files. */ {
72 			for (i = 0; i < nfiles; i++) {
73 				rewind(fstack[infl0.top + i].fp);
74 				fstack[infl0.top + i].max_o = 0;
75 			}
76 			flag = -1;
77 			nleft = cnt = 0;
78 			return(-1);
79 		}
80 		maxb = fstack[infl0.top].maxb;
81 		for (; nleft == 0; cnt++) {
82 			if (cnt >= nfiles) {
83 				cnt = 0;
84 				return (EOF);
85 			}
86 			fp = fstack[infl0.top + cnt].fp;
87 			fread(&nleft, sizeof(nleft), 1, fp);
88 			if (binno < maxb)
89 				fstack[infl0.top+cnt].max_o
90 					+= sizeof(nleft) + nleft;
91 			else if (binno == maxb) {
92 				if (binno != fstack[infl0.top].lastb) {
93 					fseek(fp, fstack[infl0.top+
94 						cnt].max_o, SEEK_SET);
95 					fread(&nleft, sizeof(nleft), 1, fp);
96 				}
97 				if (nleft == 0)
98 					fclose(fp);
99 			} else if (binno == maxb + 1) {		/* skip a bin */
100 				fseek(fp, nleft, SEEK_CUR);
101 				fread(&nleft, sizeof(nleft), 1, fp);
102 				flag = cnt;
103 			}
104 		}
105 	}
106 	if ((u_char *) pos > end - sizeof(TRECHEADER))
107 		return (BUFFEND);
108 	fread(pos, sizeof(TRECHEADER), 1, fp);
109 	if (end - pos->data < pos->length) {
110 		hp = ((u_char *) pos) + sizeof(TRECHEADER);
111 		for (i = sizeof(TRECHEADER); i ;  i--)
112 			ungetc(*--hp, fp);
113 		return (BUFFEND);
114 	}
115 	fread(pos->data, pos->length, 1, fp);
116 	nleft -= pos->length + sizeof(TRECHEADER);
117 	if (nleft == 0 && binno == fstack[infl0.top].maxb)
118 		fclose(fp);
119 	return (0);
120 }
121 
122 /*
123  * this is called when there is no special key. It's only called
124  * in the first fsort pass.
125  */
126 int
127 makeline(flno, filelist, nfiles, buffer, bufend, dummy2)
128 	int flno, nfiles;
129 	union f_handle filelist;
130 	struct recheader *buffer;
131 	u_char *bufend;
132 	struct field *dummy2;
133 {
134 	static size_t olen;
135 	char *pos;
136 	static int fileno = 0, overflow = 0;
137 	static FILE *fp = 0;
138 	int c;
139 
140 	pos = (char *) buffer->data;
141 	if (overflow) {
142 		pos += olen;
143 		overflow = 0;
144 	}
145 	for (;;) {
146 		if (flno >= 0) {
147 			if (!(fp = fstack[flno].fp))
148 				return (EOF);
149 		} else if (!fp) {
150 			if (fileno  >= nfiles) return(EOF);
151 			if (!(fp = fopen(filelist.names[fileno], "r")))
152 				err(2, "%s", filelist.names[fileno]);
153 			++fileno;
154 		}
155 		while ((pos < (char *)bufend) && ((c = getc(fp)) != EOF)) {
156 			if ((*pos++ = c) == REC_D) {
157 				buffer->offset = 0;
158 				buffer->length = pos - (char *) buffer->data;
159 				return (0);
160 			}
161 		}
162 		if (pos >= (char *)bufend) {
163 			if (buffer->data < bufend) {
164 				overflow = 1;
165 				olen = (size_t) (((char *) bufend) -
166 						((char *) buffer->data));
167 			}
168 			return (BUFFEND);
169 		} else if (c == EOF) {
170 			if (buffer->data != (u_char *) pos) {
171 				warnx("last character not record delimiter");
172 				*pos++ = REC_D;
173 				buffer->offset = 0;
174 				buffer->length = pos - (char *) buffer->data;
175 				return(0);
176 			}
177 			FCLOSE(fp);
178 			fp = 0;
179 			if(flno >= 0) fstack[flno].fp = 0;
180 		} else {
181 
182 			warnx("makeline: line too long: ignoring '%.100s...'", buffer->data);
183 
184 			/* consume rest of line from input */
185 			while((c = getc(fp)) != REC_D && c != EOF);
186 
187 			buffer->offset = 0;
188 			buffer->length = 0;
189 			return BUFFEND;
190 		}
191 	}
192 }
193 
194 /*
195  * This generates keys. It's only called in the first fsort pass
196  */
197 int
198 makekey(flno, filelist, nfiles, buffer, bufend, ftbl)
199 	int flno, nfiles;
200 	union f_handle filelist;
201 	struct recheader *buffer;
202 	u_char *bufend;
203 	struct field *ftbl;
204 {
205 	static int fileno = 0;
206 	static FILE *dbdesc = 0;
207 	static DBT dbkey[1], line[1];
208 	static int overflow = 0;
209 	int c;
210 	if (overflow) {
211 		overflow =
212 		  enterkey(buffer, line, bufend - (u_char *) buffer, ftbl);
213 		if (overflow)
214 			return (BUFFEND);
215 		else
216 			return (0);
217 	}
218 	for (;;) {
219 		if (flno >= 0) {
220 			if (!(dbdesc = fstack[flno].fp))
221 				return(EOF);
222 		} else if (!dbdesc) {
223 			if (fileno  >= nfiles)
224 				return (EOF);
225 			dbdesc = fopen(filelist.names[fileno], "r");
226 			if (!dbdesc)
227 				err(2, "%s", filelist.names[fileno]);
228 			++fileno;
229 		}
230 		if (!(c = seq(dbdesc, line, dbkey))) {
231 			if ((signed)line->size > bufend - buffer->data) {
232 				overflow = 1;
233 			} else {
234 				overflow = enterkey(buffer, line,
235 				    bufend - (u_char *) buffer, ftbl);
236 			}
237 			if (overflow)
238 				return (BUFFEND);
239 			else
240 				return (0);
241 		}
242 		if (c == EOF) {
243 			FCLOSE(dbdesc);
244 			dbdesc = 0;
245 			if (flno >= 0) fstack[flno].fp = 0;
246 		} else {
247 			((char *) line->data)[60] = '\000';
248 			warnx("makekey: line too long: ignoring %.100s...",
249 			    (char *)line->data);
250 		}
251 
252 	}
253 }
254 
255 /*
256  * get a key/line pair from fp
257  */
258 static int
259 seq(fp, line, key)
260 	FILE *fp;
261 	DBT *key, *line;
262 {
263 	static char *buf, flag = 1;
264 	char *end, *pos;
265 	int c;
266 	if (flag) {
267 		flag = 0;
268 		buf = (char *) linebuf;
269 		end = buf + linebuf_size;
270 		line->data = buf;
271 	}
272 	pos = buf;
273 	while ((c = getc(fp)) != EOF) {
274 		if ((*pos++ = c) == REC_D) {
275 			line->size = pos - buf;
276 			return (0);
277 		}
278 		if (pos == end) {
279 			linebuf_size *= 2;
280 			linebuf = realloc(linebuf, linebuf_size);
281 			if (!linebuf)
282 				err(2, "realloc for linebuf to %lu bytes failed", (unsigned long) linebuf_size);
283 
284 			end = linebuf + linebuf_size;
285 			pos = linebuf + (pos - buf);
286 			line->data = buf = (char *)linebuf;
287 			continue;
288 #if 0
289 			line->size = DEFLLEN;
290 			*--pos = REC_D;
291 			while ((c = getc(fp)) != EOF) {
292 				if (c == REC_D)
293 					return (BUFFEND);
294 			}
295 			break;
296 #endif
297 		}
298 	}
299 	if (pos != buf) {
300 		warnx("last character not record delimiter");
301 		*pos++ = REC_D;
302 		line->size = pos - buf;
303 		return (0);
304 	} else
305 		return (EOF);
306 }
307 
308 /*
309  * write a key/line pair to a temporary file
310  */
311 void
312 putrec(rec, fp)
313 	const struct recheader *rec;
314 	FILE *fp;
315 {
316 	EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fp);
317 }
318 
319 /*
320  * write a line to output
321  */
322 void
323 putline(rec, fp)
324 	const struct recheader *rec;
325 	FILE *fp;
326 {
327 	EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp);
328 }
329 
330 /*
331  * get a record from a temporary file. (Used by merge sort.)
332  */
333 int
334 geteasy(flno, filelist, nfiles, rec, end, dummy2)
335 	int flno, nfiles;
336 	union f_handle filelist;
337 	struct recheader *rec;
338 	u_char *end;
339 	struct field *dummy2;
340 {
341 	int i;
342 	FILE *fp;
343 	fp = fstack[flno].fp;
344 	if ((u_char *) rec > end - sizeof(TRECHEADER))
345 		return (BUFFEND);
346 	if (!fread(rec, 1, sizeof(TRECHEADER), fp)) {
347 		fclose(fp);
348 		fstack[flno].fp = 0;
349 		return (EOF);
350 	}
351 	if (end - rec->data < rec->length) {
352 		for (i = sizeof(TRECHEADER) - 1; i >= 0;  i--)
353 			ungetc(*((char *) rec + i), fp);
354 		return (BUFFEND);
355 	}
356 	fread(rec->data, rec->length, 1, fp);
357 	return (0);
358 }
359