xref: /minix3/usr.bin/sort/files.c (revision 0fbbaa43e914d38ef3af549125d31574117d1ebf)
1*0fbbaa43SLionel Sambuc /*	$NetBSD: files.c,v 1.41 2009/11/06 18:34:22 joerg Exp $	*/
2*0fbbaa43SLionel Sambuc 
3*0fbbaa43SLionel Sambuc /*-
4*0fbbaa43SLionel Sambuc  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5*0fbbaa43SLionel Sambuc  * All rights reserved.
6*0fbbaa43SLionel Sambuc  *
7*0fbbaa43SLionel Sambuc  * This code is derived from software contributed to The NetBSD Foundation
8*0fbbaa43SLionel Sambuc  * by Ben Harris and Jaromir Dolecek.
9*0fbbaa43SLionel Sambuc  *
10*0fbbaa43SLionel Sambuc  * Redistribution and use in source and binary forms, with or without
11*0fbbaa43SLionel Sambuc  * modification, are permitted provided that the following conditions
12*0fbbaa43SLionel Sambuc  * are met:
13*0fbbaa43SLionel Sambuc  * 1. Redistributions of source code must retain the above copyright
14*0fbbaa43SLionel Sambuc  *    notice, this list of conditions and the following disclaimer.
15*0fbbaa43SLionel Sambuc  * 2. Redistributions in binary form must reproduce the above copyright
16*0fbbaa43SLionel Sambuc  *    notice, this list of conditions and the following disclaimer in the
17*0fbbaa43SLionel Sambuc  *    documentation and/or other materials provided with the distribution.
18*0fbbaa43SLionel Sambuc  *
19*0fbbaa43SLionel Sambuc  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20*0fbbaa43SLionel Sambuc  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21*0fbbaa43SLionel Sambuc  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22*0fbbaa43SLionel Sambuc  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23*0fbbaa43SLionel Sambuc  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24*0fbbaa43SLionel Sambuc  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25*0fbbaa43SLionel Sambuc  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26*0fbbaa43SLionel Sambuc  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27*0fbbaa43SLionel Sambuc  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28*0fbbaa43SLionel Sambuc  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29*0fbbaa43SLionel Sambuc  * POSSIBILITY OF SUCH DAMAGE.
30*0fbbaa43SLionel Sambuc  */
31*0fbbaa43SLionel Sambuc 
32*0fbbaa43SLionel Sambuc /*-
33*0fbbaa43SLionel Sambuc  * Copyright (c) 1993
34*0fbbaa43SLionel Sambuc  *	The Regents of the University of California.  All rights reserved.
35*0fbbaa43SLionel Sambuc  *
36*0fbbaa43SLionel Sambuc  * This code is derived from software contributed to Berkeley by
37*0fbbaa43SLionel Sambuc  * Peter McIlroy.
38*0fbbaa43SLionel Sambuc  *
39*0fbbaa43SLionel Sambuc  * Redistribution and use in source and binary forms, with or without
40*0fbbaa43SLionel Sambuc  * modification, are permitted provided that the following conditions
41*0fbbaa43SLionel Sambuc  * are met:
42*0fbbaa43SLionel Sambuc  * 1. Redistributions of source code must retain the above copyright
43*0fbbaa43SLionel Sambuc  *    notice, this list of conditions and the following disclaimer.
44*0fbbaa43SLionel Sambuc  * 2. Redistributions in binary form must reproduce the above copyright
45*0fbbaa43SLionel Sambuc  *    notice, this list of conditions and the following disclaimer in the
46*0fbbaa43SLionel Sambuc  *    documentation and/or other materials provided with the distribution.
47*0fbbaa43SLionel Sambuc  * 3. Neither the name of the University nor the names of its contributors
48*0fbbaa43SLionel Sambuc  *    may be used to endorse or promote products derived from this software
49*0fbbaa43SLionel Sambuc  *    without specific prior written permission.
50*0fbbaa43SLionel Sambuc  *
51*0fbbaa43SLionel Sambuc  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52*0fbbaa43SLionel Sambuc  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53*0fbbaa43SLionel Sambuc  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54*0fbbaa43SLionel Sambuc  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55*0fbbaa43SLionel Sambuc  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56*0fbbaa43SLionel Sambuc  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57*0fbbaa43SLionel Sambuc  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58*0fbbaa43SLionel Sambuc  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59*0fbbaa43SLionel Sambuc  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60*0fbbaa43SLionel Sambuc  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61*0fbbaa43SLionel Sambuc  * SUCH DAMAGE.
62*0fbbaa43SLionel Sambuc  */
63*0fbbaa43SLionel Sambuc 
64*0fbbaa43SLionel Sambuc #include "sort.h"
65*0fbbaa43SLionel Sambuc #include "fsort.h"
66*0fbbaa43SLionel Sambuc 
67*0fbbaa43SLionel Sambuc __RCSID("$NetBSD: files.c,v 1.41 2009/11/06 18:34:22 joerg Exp $");
68*0fbbaa43SLionel Sambuc 
69*0fbbaa43SLionel Sambuc #include <string.h>
70*0fbbaa43SLionel Sambuc 
71*0fbbaa43SLionel Sambuc /* Align records in temporary files to avoid misaligned copies */
72*0fbbaa43SLionel Sambuc #define REC_ROUNDUP(n) (((n) + sizeof (long) - 1) & ~(sizeof (long) - 1))
73*0fbbaa43SLionel Sambuc 
74*0fbbaa43SLionel Sambuc static ssize_t	seq(FILE *, u_char **);
75*0fbbaa43SLionel Sambuc 
76*0fbbaa43SLionel Sambuc /*
77*0fbbaa43SLionel Sambuc  * this is called when there is no special key. It's only called
78*0fbbaa43SLionel Sambuc  * in the first fsort pass.
79*0fbbaa43SLionel Sambuc  */
80*0fbbaa43SLionel Sambuc 
81*0fbbaa43SLionel Sambuc static u_char *opos;
82*0fbbaa43SLionel Sambuc static size_t osz;
83*0fbbaa43SLionel Sambuc 
84*0fbbaa43SLionel Sambuc void
85*0fbbaa43SLionel Sambuc makeline_copydown(RECHEADER *recbuf)
86*0fbbaa43SLionel Sambuc {
87*0fbbaa43SLionel Sambuc 	memmove(recbuf->data, opos, osz);
88*0fbbaa43SLionel Sambuc }
89*0fbbaa43SLionel Sambuc 
90*0fbbaa43SLionel Sambuc int
91*0fbbaa43SLionel Sambuc makeline(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *dummy2)
92*0fbbaa43SLionel Sambuc {
93*0fbbaa43SLionel Sambuc 	u_char *pos;
94*0fbbaa43SLionel Sambuc 	int c;
95*0fbbaa43SLionel Sambuc 
96*0fbbaa43SLionel Sambuc 	pos = recbuf->data;
97*0fbbaa43SLionel Sambuc 	if (osz != 0) {
98*0fbbaa43SLionel Sambuc 		/*
99*0fbbaa43SLionel Sambuc 		 * Buffer shortage is solved by either of two ways:
100*0fbbaa43SLionel Sambuc 		 * o flush previous buffered data and start using the
101*0fbbaa43SLionel Sambuc 		 *   buffer from start.
102*0fbbaa43SLionel Sambuc 		 *   makeline_copydown() above must be called.
103*0fbbaa43SLionel Sambuc 		 * o realloc buffer
104*0fbbaa43SLionel Sambuc 		 *
105*0fbbaa43SLionel Sambuc 		 * This code has relied on realloc changing 'bufend',
106*0fbbaa43SLionel Sambuc 		 * but that isn't necessarily true.
107*0fbbaa43SLionel Sambuc 		 */
108*0fbbaa43SLionel Sambuc 		pos += osz;
109*0fbbaa43SLionel Sambuc 		osz = 0;
110*0fbbaa43SLionel Sambuc 	}
111*0fbbaa43SLionel Sambuc 
112*0fbbaa43SLionel Sambuc 	while (pos < bufend) {
113*0fbbaa43SLionel Sambuc 		c = getc(fp);
114*0fbbaa43SLionel Sambuc 		if (c == EOF) {
115*0fbbaa43SLionel Sambuc 			if (pos == recbuf->data) {
116*0fbbaa43SLionel Sambuc 				FCLOSE(fp);
117*0fbbaa43SLionel Sambuc 				return EOF;
118*0fbbaa43SLionel Sambuc 			}
119*0fbbaa43SLionel Sambuc 			/* Add terminator to partial line */
120*0fbbaa43SLionel Sambuc 			c = REC_D;
121*0fbbaa43SLionel Sambuc 		}
122*0fbbaa43SLionel Sambuc 		*pos++ = c;
123*0fbbaa43SLionel Sambuc 		if (c == REC_D) {
124*0fbbaa43SLionel Sambuc 			recbuf->offset = 0;
125*0fbbaa43SLionel Sambuc 			recbuf->length = pos - recbuf->data;
126*0fbbaa43SLionel Sambuc 			recbuf->keylen = recbuf->length - 1;
127*0fbbaa43SLionel Sambuc 			return (0);
128*0fbbaa43SLionel Sambuc 		}
129*0fbbaa43SLionel Sambuc 	}
130*0fbbaa43SLionel Sambuc 
131*0fbbaa43SLionel Sambuc 	/* Ran out of buffer space... */
132*0fbbaa43SLionel Sambuc 	if (recbuf->data < bufend) {
133*0fbbaa43SLionel Sambuc 		/* Remember where the partial record is */
134*0fbbaa43SLionel Sambuc 		osz = pos - recbuf->data;
135*0fbbaa43SLionel Sambuc 		opos = recbuf->data;
136*0fbbaa43SLionel Sambuc 	}
137*0fbbaa43SLionel Sambuc 	return (BUFFEND);
138*0fbbaa43SLionel Sambuc }
139*0fbbaa43SLionel Sambuc 
140*0fbbaa43SLionel Sambuc /*
141*0fbbaa43SLionel Sambuc  * This generates keys. It's only called in the first fsort pass
142*0fbbaa43SLionel Sambuc  */
143*0fbbaa43SLionel Sambuc int
144*0fbbaa43SLionel Sambuc makekey(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *ftbl)
145*0fbbaa43SLionel Sambuc {
146*0fbbaa43SLionel Sambuc 	static u_char *line_data;
147*0fbbaa43SLionel Sambuc 	static ssize_t line_size;
148*0fbbaa43SLionel Sambuc 	static int overflow = 0;
149*0fbbaa43SLionel Sambuc 
150*0fbbaa43SLionel Sambuc 	/* We get re-entered after returning BUFFEND - save old data */
151*0fbbaa43SLionel Sambuc 	if (overflow) {
152*0fbbaa43SLionel Sambuc 		overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl);
153*0fbbaa43SLionel Sambuc 		return overflow ? BUFFEND : 0;
154*0fbbaa43SLionel Sambuc 	}
155*0fbbaa43SLionel Sambuc 
156*0fbbaa43SLionel Sambuc 	line_size = seq(fp, &line_data);
157*0fbbaa43SLionel Sambuc 	if (line_size == 0) {
158*0fbbaa43SLionel Sambuc 		FCLOSE(fp);
159*0fbbaa43SLionel Sambuc 		return EOF;
160*0fbbaa43SLionel Sambuc 	}
161*0fbbaa43SLionel Sambuc 
162*0fbbaa43SLionel Sambuc 	if (line_size > bufend - recbuf->data) {
163*0fbbaa43SLionel Sambuc 		overflow = 1;
164*0fbbaa43SLionel Sambuc 	} else {
165*0fbbaa43SLionel Sambuc 		overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl);
166*0fbbaa43SLionel Sambuc 	}
167*0fbbaa43SLionel Sambuc 	return overflow ? BUFFEND : 0;
168*0fbbaa43SLionel Sambuc }
169*0fbbaa43SLionel Sambuc 
170*0fbbaa43SLionel Sambuc /*
171*0fbbaa43SLionel Sambuc  * get a line of input from fp
172*0fbbaa43SLionel Sambuc  */
173*0fbbaa43SLionel Sambuc static ssize_t
174*0fbbaa43SLionel Sambuc seq(FILE *fp, u_char **line)
175*0fbbaa43SLionel Sambuc {
176*0fbbaa43SLionel Sambuc 	static u_char *buf;
177*0fbbaa43SLionel Sambuc 	static size_t buf_size = DEFLLEN;
178*0fbbaa43SLionel Sambuc 	u_char *end, *pos;
179*0fbbaa43SLionel Sambuc 	int c;
180*0fbbaa43SLionel Sambuc 	u_char *new_buf;
181*0fbbaa43SLionel Sambuc 
182*0fbbaa43SLionel Sambuc 	if (!buf) {
183*0fbbaa43SLionel Sambuc 		/* one-time initialization */
184*0fbbaa43SLionel Sambuc 		buf = malloc(buf_size);
185*0fbbaa43SLionel Sambuc 		if (!buf)
186*0fbbaa43SLionel Sambuc 		    err(2, "malloc of linebuf for %zu bytes failed",
187*0fbbaa43SLionel Sambuc 			    buf_size);
188*0fbbaa43SLionel Sambuc 	}
189*0fbbaa43SLionel Sambuc 
190*0fbbaa43SLionel Sambuc 	end = buf + buf_size;
191*0fbbaa43SLionel Sambuc 	pos = buf;
192*0fbbaa43SLionel Sambuc 	while ((c = getc(fp)) != EOF) {
193*0fbbaa43SLionel Sambuc 		*pos++ = c;
194*0fbbaa43SLionel Sambuc 		if (c == REC_D) {
195*0fbbaa43SLionel Sambuc 			*line = buf;
196*0fbbaa43SLionel Sambuc 			return pos - buf;
197*0fbbaa43SLionel Sambuc 		}
198*0fbbaa43SLionel Sambuc 		if (pos == end) {
199*0fbbaa43SLionel Sambuc 			/* Long line - double size of buffer */
200*0fbbaa43SLionel Sambuc 			/* XXX: Check here for stupidly long lines */
201*0fbbaa43SLionel Sambuc 			buf_size *= 2;
202*0fbbaa43SLionel Sambuc 			new_buf = realloc(buf, buf_size);
203*0fbbaa43SLionel Sambuc 			if (!new_buf)
204*0fbbaa43SLionel Sambuc 				err(2, "realloc of linebuf to %zu bytes failed",
205*0fbbaa43SLionel Sambuc 					buf_size);
206*0fbbaa43SLionel Sambuc 
207*0fbbaa43SLionel Sambuc 			end = new_buf + buf_size;
208*0fbbaa43SLionel Sambuc 			pos = new_buf + (pos - buf);
209*0fbbaa43SLionel Sambuc 			buf = new_buf;
210*0fbbaa43SLionel Sambuc 		}
211*0fbbaa43SLionel Sambuc 	}
212*0fbbaa43SLionel Sambuc 
213*0fbbaa43SLionel Sambuc 	if (pos != buf) {
214*0fbbaa43SLionel Sambuc 		/* EOF part way through line - add line terminator */
215*0fbbaa43SLionel Sambuc 		*pos++ = REC_D;
216*0fbbaa43SLionel Sambuc 		*line = buf;
217*0fbbaa43SLionel Sambuc 		return pos - buf;
218*0fbbaa43SLionel Sambuc 	}
219*0fbbaa43SLionel Sambuc 
220*0fbbaa43SLionel Sambuc 	return 0;
221*0fbbaa43SLionel Sambuc }
222*0fbbaa43SLionel Sambuc 
223*0fbbaa43SLionel Sambuc /*
224*0fbbaa43SLionel Sambuc  * write a key/line pair to a temporary file
225*0fbbaa43SLionel Sambuc  */
226*0fbbaa43SLionel Sambuc void
227*0fbbaa43SLionel Sambuc putrec(const RECHEADER *rec, FILE *fp)
228*0fbbaa43SLionel Sambuc {
229*0fbbaa43SLionel Sambuc 	EWRITE(rec, 1, REC_ROUNDUP(offsetof(RECHEADER, data) + rec->length), fp);
230*0fbbaa43SLionel Sambuc }
231*0fbbaa43SLionel Sambuc 
232*0fbbaa43SLionel Sambuc /*
233*0fbbaa43SLionel Sambuc  * write a line to output
234*0fbbaa43SLionel Sambuc  */
235*0fbbaa43SLionel Sambuc void
236*0fbbaa43SLionel Sambuc putline(const RECHEADER *rec, FILE *fp)
237*0fbbaa43SLionel Sambuc {
238*0fbbaa43SLionel Sambuc 	EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp);
239*0fbbaa43SLionel Sambuc }
240*0fbbaa43SLionel Sambuc 
241*0fbbaa43SLionel Sambuc /*
242*0fbbaa43SLionel Sambuc  * write dump of key to output (for -Dk)
243*0fbbaa43SLionel Sambuc  */
244*0fbbaa43SLionel Sambuc void
245*0fbbaa43SLionel Sambuc putkeydump(const RECHEADER *rec, FILE *fp)
246*0fbbaa43SLionel Sambuc {
247*0fbbaa43SLionel Sambuc 	EWRITE(rec, 1, REC_ROUNDUP(offsetof(RECHEADER, data) + rec->offset), fp);
248*0fbbaa43SLionel Sambuc }
249*0fbbaa43SLionel Sambuc 
250*0fbbaa43SLionel Sambuc /*
251*0fbbaa43SLionel Sambuc  * get a record from a temporary file. (Used by merge sort.)
252*0fbbaa43SLionel Sambuc  */
253*0fbbaa43SLionel Sambuc int
254*0fbbaa43SLionel Sambuc geteasy(FILE *fp, RECHEADER *rec, u_char *end, struct field *dummy2)
255*0fbbaa43SLionel Sambuc {
256*0fbbaa43SLionel Sambuc 	length_t file_len;
257*0fbbaa43SLionel Sambuc 	int i;
258*0fbbaa43SLionel Sambuc 
259*0fbbaa43SLionel Sambuc 	(void)sizeof (char[offsetof(RECHEADER, length) == 0 ? 1 : -1]);
260*0fbbaa43SLionel Sambuc 
261*0fbbaa43SLionel Sambuc 	if ((u_char *)(rec + 1) > end)
262*0fbbaa43SLionel Sambuc 		return (BUFFEND);
263*0fbbaa43SLionel Sambuc 	if (!fread(&rec->length, 1, sizeof rec->length, fp)) {
264*0fbbaa43SLionel Sambuc 		fclose(fp);
265*0fbbaa43SLionel Sambuc 		return (EOF);
266*0fbbaa43SLionel Sambuc 	}
267*0fbbaa43SLionel Sambuc 	file_len = REC_ROUNDUP(offsetof(RECHEADER, data) + rec->length);
268*0fbbaa43SLionel Sambuc 	if (end - rec->data < (ptrdiff_t)file_len) {
269*0fbbaa43SLionel Sambuc 		for (i = sizeof rec->length - 1; i >= 0;  i--)
270*0fbbaa43SLionel Sambuc 			ungetc(*((char *) rec + i), fp);
271*0fbbaa43SLionel Sambuc 		return (BUFFEND);
272*0fbbaa43SLionel Sambuc 	}
273*0fbbaa43SLionel Sambuc 
274*0fbbaa43SLionel Sambuc 	fread(&rec->length + 1, file_len - sizeof rec->length, 1, fp);
275*0fbbaa43SLionel Sambuc 	return (0);
276*0fbbaa43SLionel Sambuc }
277