xref: /minix3/usr.bin/sort/files.c (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1*0a6a1f1dSLionel Sambuc /*	$NetBSD: files.c,v 1.42 2015/08/05 07:10:03 mrg Exp $	*/
20fbbaa43SLionel Sambuc 
30fbbaa43SLionel Sambuc /*-
40fbbaa43SLionel Sambuc  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
50fbbaa43SLionel Sambuc  * All rights reserved.
60fbbaa43SLionel Sambuc  *
70fbbaa43SLionel Sambuc  * This code is derived from software contributed to The NetBSD Foundation
80fbbaa43SLionel Sambuc  * by Ben Harris and Jaromir Dolecek.
90fbbaa43SLionel Sambuc  *
100fbbaa43SLionel Sambuc  * Redistribution and use in source and binary forms, with or without
110fbbaa43SLionel Sambuc  * modification, are permitted provided that the following conditions
120fbbaa43SLionel Sambuc  * are met:
130fbbaa43SLionel Sambuc  * 1. Redistributions of source code must retain the above copyright
140fbbaa43SLionel Sambuc  *    notice, this list of conditions and the following disclaimer.
150fbbaa43SLionel Sambuc  * 2. Redistributions in binary form must reproduce the above copyright
160fbbaa43SLionel Sambuc  *    notice, this list of conditions and the following disclaimer in the
170fbbaa43SLionel Sambuc  *    documentation and/or other materials provided with the distribution.
180fbbaa43SLionel Sambuc  *
190fbbaa43SLionel Sambuc  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
200fbbaa43SLionel Sambuc  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
210fbbaa43SLionel Sambuc  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
220fbbaa43SLionel Sambuc  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
230fbbaa43SLionel Sambuc  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
240fbbaa43SLionel Sambuc  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
250fbbaa43SLionel Sambuc  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
260fbbaa43SLionel Sambuc  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
270fbbaa43SLionel Sambuc  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
280fbbaa43SLionel Sambuc  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
290fbbaa43SLionel Sambuc  * POSSIBILITY OF SUCH DAMAGE.
300fbbaa43SLionel Sambuc  */
310fbbaa43SLionel Sambuc 
320fbbaa43SLionel Sambuc /*-
330fbbaa43SLionel Sambuc  * Copyright (c) 1993
340fbbaa43SLionel Sambuc  *	The Regents of the University of California.  All rights reserved.
350fbbaa43SLionel Sambuc  *
360fbbaa43SLionel Sambuc  * This code is derived from software contributed to Berkeley by
370fbbaa43SLionel Sambuc  * Peter McIlroy.
380fbbaa43SLionel Sambuc  *
390fbbaa43SLionel Sambuc  * Redistribution and use in source and binary forms, with or without
400fbbaa43SLionel Sambuc  * modification, are permitted provided that the following conditions
410fbbaa43SLionel Sambuc  * are met:
420fbbaa43SLionel Sambuc  * 1. Redistributions of source code must retain the above copyright
430fbbaa43SLionel Sambuc  *    notice, this list of conditions and the following disclaimer.
440fbbaa43SLionel Sambuc  * 2. Redistributions in binary form must reproduce the above copyright
450fbbaa43SLionel Sambuc  *    notice, this list of conditions and the following disclaimer in the
460fbbaa43SLionel Sambuc  *    documentation and/or other materials provided with the distribution.
470fbbaa43SLionel Sambuc  * 3. Neither the name of the University nor the names of its contributors
480fbbaa43SLionel Sambuc  *    may be used to endorse or promote products derived from this software
490fbbaa43SLionel Sambuc  *    without specific prior written permission.
500fbbaa43SLionel Sambuc  *
510fbbaa43SLionel Sambuc  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
520fbbaa43SLionel Sambuc  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
530fbbaa43SLionel Sambuc  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
540fbbaa43SLionel Sambuc  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
550fbbaa43SLionel Sambuc  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
560fbbaa43SLionel Sambuc  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
570fbbaa43SLionel Sambuc  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
580fbbaa43SLionel Sambuc  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
590fbbaa43SLionel Sambuc  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
600fbbaa43SLionel Sambuc  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
610fbbaa43SLionel Sambuc  * SUCH DAMAGE.
620fbbaa43SLionel Sambuc  */
630fbbaa43SLionel Sambuc 
640fbbaa43SLionel Sambuc #include "sort.h"
650fbbaa43SLionel Sambuc #include "fsort.h"
660fbbaa43SLionel Sambuc 
67*0a6a1f1dSLionel Sambuc __RCSID("$NetBSD: files.c,v 1.42 2015/08/05 07:10:03 mrg Exp $");
680fbbaa43SLionel Sambuc 
690fbbaa43SLionel Sambuc #include <string.h>
700fbbaa43SLionel Sambuc 
710fbbaa43SLionel Sambuc /* Align records in temporary files to avoid misaligned copies */
720fbbaa43SLionel Sambuc #define REC_ROUNDUP(n) (((n) + sizeof (long) - 1) & ~(sizeof (long) - 1))
730fbbaa43SLionel Sambuc 
740fbbaa43SLionel Sambuc static ssize_t	seq(FILE *, u_char **);
750fbbaa43SLionel Sambuc 
760fbbaa43SLionel Sambuc /*
770fbbaa43SLionel Sambuc  * this is called when there is no special key. It's only called
780fbbaa43SLionel Sambuc  * in the first fsort pass.
790fbbaa43SLionel Sambuc  */
800fbbaa43SLionel Sambuc 
810fbbaa43SLionel Sambuc static u_char *opos;
820fbbaa43SLionel Sambuc static size_t osz;
830fbbaa43SLionel Sambuc 
840fbbaa43SLionel Sambuc void
makeline_copydown(RECHEADER * recbuf)850fbbaa43SLionel Sambuc makeline_copydown(RECHEADER *recbuf)
860fbbaa43SLionel Sambuc {
870fbbaa43SLionel Sambuc 	memmove(recbuf->data, opos, osz);
880fbbaa43SLionel Sambuc }
890fbbaa43SLionel Sambuc 
900fbbaa43SLionel Sambuc int
makeline(FILE * fp,RECHEADER * recbuf,u_char * bufend,struct field * dummy2)910fbbaa43SLionel Sambuc makeline(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *dummy2)
920fbbaa43SLionel Sambuc {
930fbbaa43SLionel Sambuc 	u_char *pos;
940fbbaa43SLionel Sambuc 	int c;
950fbbaa43SLionel Sambuc 
960fbbaa43SLionel Sambuc 	pos = recbuf->data;
970fbbaa43SLionel Sambuc 	if (osz != 0) {
980fbbaa43SLionel Sambuc 		/*
990fbbaa43SLionel Sambuc 		 * Buffer shortage is solved by either of two ways:
1000fbbaa43SLionel Sambuc 		 * o flush previous buffered data and start using the
1010fbbaa43SLionel Sambuc 		 *   buffer from start.
1020fbbaa43SLionel Sambuc 		 *   makeline_copydown() above must be called.
1030fbbaa43SLionel Sambuc 		 * o realloc buffer
1040fbbaa43SLionel Sambuc 		 *
1050fbbaa43SLionel Sambuc 		 * This code has relied on realloc changing 'bufend',
1060fbbaa43SLionel Sambuc 		 * but that isn't necessarily true.
1070fbbaa43SLionel Sambuc 		 */
1080fbbaa43SLionel Sambuc 		pos += osz;
1090fbbaa43SLionel Sambuc 		osz = 0;
1100fbbaa43SLionel Sambuc 	}
1110fbbaa43SLionel Sambuc 
1120fbbaa43SLionel Sambuc 	while (pos < bufend) {
1130fbbaa43SLionel Sambuc 		c = getc(fp);
1140fbbaa43SLionel Sambuc 		if (c == EOF) {
1150fbbaa43SLionel Sambuc 			if (pos == recbuf->data) {
1160fbbaa43SLionel Sambuc 				FCLOSE(fp);
1170fbbaa43SLionel Sambuc 				return EOF;
1180fbbaa43SLionel Sambuc 			}
1190fbbaa43SLionel Sambuc 			/* Add terminator to partial line */
1200fbbaa43SLionel Sambuc 			c = REC_D;
1210fbbaa43SLionel Sambuc 		}
1220fbbaa43SLionel Sambuc 		*pos++ = c;
1230fbbaa43SLionel Sambuc 		if (c == REC_D) {
1240fbbaa43SLionel Sambuc 			recbuf->offset = 0;
1250fbbaa43SLionel Sambuc 			recbuf->length = pos - recbuf->data;
1260fbbaa43SLionel Sambuc 			recbuf->keylen = recbuf->length - 1;
1270fbbaa43SLionel Sambuc 			return (0);
1280fbbaa43SLionel Sambuc 		}
1290fbbaa43SLionel Sambuc 	}
1300fbbaa43SLionel Sambuc 
1310fbbaa43SLionel Sambuc 	/* Ran out of buffer space... */
1320fbbaa43SLionel Sambuc 	if (recbuf->data < bufend) {
1330fbbaa43SLionel Sambuc 		/* Remember where the partial record is */
1340fbbaa43SLionel Sambuc 		osz = pos - recbuf->data;
1350fbbaa43SLionel Sambuc 		opos = recbuf->data;
1360fbbaa43SLionel Sambuc 	}
1370fbbaa43SLionel Sambuc 	return (BUFFEND);
1380fbbaa43SLionel Sambuc }
1390fbbaa43SLionel Sambuc 
1400fbbaa43SLionel Sambuc /*
1410fbbaa43SLionel Sambuc  * This generates keys. It's only called in the first fsort pass
1420fbbaa43SLionel Sambuc  */
1430fbbaa43SLionel Sambuc int
makekey(FILE * fp,RECHEADER * recbuf,u_char * bufend,struct field * ftbl)1440fbbaa43SLionel Sambuc makekey(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *ftbl)
1450fbbaa43SLionel Sambuc {
1460fbbaa43SLionel Sambuc 	static u_char *line_data;
1470fbbaa43SLionel Sambuc 	static ssize_t line_size;
1480fbbaa43SLionel Sambuc 	static int overflow = 0;
1490fbbaa43SLionel Sambuc 
1500fbbaa43SLionel Sambuc 	/* We get re-entered after returning BUFFEND - save old data */
1510fbbaa43SLionel Sambuc 	if (overflow) {
1520fbbaa43SLionel Sambuc 		overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl);
1530fbbaa43SLionel Sambuc 		return overflow ? BUFFEND : 0;
1540fbbaa43SLionel Sambuc 	}
1550fbbaa43SLionel Sambuc 
1560fbbaa43SLionel Sambuc 	line_size = seq(fp, &line_data);
1570fbbaa43SLionel Sambuc 	if (line_size == 0) {
1580fbbaa43SLionel Sambuc 		FCLOSE(fp);
1590fbbaa43SLionel Sambuc 		return EOF;
1600fbbaa43SLionel Sambuc 	}
1610fbbaa43SLionel Sambuc 
1620fbbaa43SLionel Sambuc 	if (line_size > bufend - recbuf->data) {
1630fbbaa43SLionel Sambuc 		overflow = 1;
1640fbbaa43SLionel Sambuc 	} else {
1650fbbaa43SLionel Sambuc 		overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl);
1660fbbaa43SLionel Sambuc 	}
1670fbbaa43SLionel Sambuc 	return overflow ? BUFFEND : 0;
1680fbbaa43SLionel Sambuc }
1690fbbaa43SLionel Sambuc 
1700fbbaa43SLionel Sambuc /*
1710fbbaa43SLionel Sambuc  * get a line of input from fp
1720fbbaa43SLionel Sambuc  */
1730fbbaa43SLionel Sambuc static ssize_t
seq(FILE * fp,u_char ** line)1740fbbaa43SLionel Sambuc seq(FILE *fp, u_char **line)
1750fbbaa43SLionel Sambuc {
1760fbbaa43SLionel Sambuc 	static u_char *buf;
1770fbbaa43SLionel Sambuc 	static size_t buf_size = DEFLLEN;
1780fbbaa43SLionel Sambuc 	u_char *end, *pos;
1790fbbaa43SLionel Sambuc 	int c;
1800fbbaa43SLionel Sambuc 	u_char *new_buf;
1810fbbaa43SLionel Sambuc 
1820fbbaa43SLionel Sambuc 	if (!buf) {
1830fbbaa43SLionel Sambuc 		/* one-time initialization */
1840fbbaa43SLionel Sambuc 		buf = malloc(buf_size);
1850fbbaa43SLionel Sambuc 		if (!buf)
1860fbbaa43SLionel Sambuc 		    err(2, "malloc of linebuf for %zu bytes failed",
1870fbbaa43SLionel Sambuc 			    buf_size);
1880fbbaa43SLionel Sambuc 	}
1890fbbaa43SLionel Sambuc 
1900fbbaa43SLionel Sambuc 	end = buf + buf_size;
1910fbbaa43SLionel Sambuc 	pos = buf;
1920fbbaa43SLionel Sambuc 	while ((c = getc(fp)) != EOF) {
1930fbbaa43SLionel Sambuc 		*pos++ = c;
1940fbbaa43SLionel Sambuc 		if (c == REC_D) {
1950fbbaa43SLionel Sambuc 			*line = buf;
1960fbbaa43SLionel Sambuc 			return pos - buf;
1970fbbaa43SLionel Sambuc 		}
1980fbbaa43SLionel Sambuc 		if (pos == end) {
1990fbbaa43SLionel Sambuc 			/* Long line - double size of buffer */
2000fbbaa43SLionel Sambuc 			/* XXX: Check here for stupidly long lines */
2010fbbaa43SLionel Sambuc 			buf_size *= 2;
2020fbbaa43SLionel Sambuc 			new_buf = realloc(buf, buf_size);
2030fbbaa43SLionel Sambuc 			if (!new_buf)
2040fbbaa43SLionel Sambuc 				err(2, "realloc of linebuf to %zu bytes failed",
2050fbbaa43SLionel Sambuc 					buf_size);
2060fbbaa43SLionel Sambuc 
2070fbbaa43SLionel Sambuc 			end = new_buf + buf_size;
2080fbbaa43SLionel Sambuc 			pos = new_buf + (pos - buf);
2090fbbaa43SLionel Sambuc 			buf = new_buf;
2100fbbaa43SLionel Sambuc 		}
2110fbbaa43SLionel Sambuc 	}
2120fbbaa43SLionel Sambuc 
2130fbbaa43SLionel Sambuc 	if (pos != buf) {
2140fbbaa43SLionel Sambuc 		/* EOF part way through line - add line terminator */
2150fbbaa43SLionel Sambuc 		*pos++ = REC_D;
2160fbbaa43SLionel Sambuc 		*line = buf;
2170fbbaa43SLionel Sambuc 		return pos - buf;
2180fbbaa43SLionel Sambuc 	}
2190fbbaa43SLionel Sambuc 
2200fbbaa43SLionel Sambuc 	return 0;
2210fbbaa43SLionel Sambuc }
2220fbbaa43SLionel Sambuc 
2230fbbaa43SLionel Sambuc /*
2240fbbaa43SLionel Sambuc  * write a key/line pair to a temporary file
2250fbbaa43SLionel Sambuc  */
2260fbbaa43SLionel Sambuc void
putrec(const RECHEADER * rec,FILE * fp)2270fbbaa43SLionel Sambuc putrec(const RECHEADER *rec, FILE *fp)
2280fbbaa43SLionel Sambuc {
229*0a6a1f1dSLionel Sambuc 	EWRITE(rec, 1, REC_ROUNDUP(offsetof(RECHEADER, data) + rec->length), fp,
230*0a6a1f1dSLionel Sambuc 	       "failed to write temp file");
2310fbbaa43SLionel Sambuc }
2320fbbaa43SLionel Sambuc 
2330fbbaa43SLionel Sambuc /*
2340fbbaa43SLionel Sambuc  * write a line to output
2350fbbaa43SLionel Sambuc  */
2360fbbaa43SLionel Sambuc void
putline(const RECHEADER * rec,FILE * fp)2370fbbaa43SLionel Sambuc putline(const RECHEADER *rec, FILE *fp)
2380fbbaa43SLionel Sambuc {
239*0a6a1f1dSLionel Sambuc 	EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp,
240*0a6a1f1dSLionel Sambuc 	       "failed to write");
2410fbbaa43SLionel Sambuc }
2420fbbaa43SLionel Sambuc 
2430fbbaa43SLionel Sambuc /*
2440fbbaa43SLionel Sambuc  * write dump of key to output (for -Dk)
2450fbbaa43SLionel Sambuc  */
2460fbbaa43SLionel Sambuc void
putkeydump(const RECHEADER * rec,FILE * fp)2470fbbaa43SLionel Sambuc putkeydump(const RECHEADER *rec, FILE *fp)
2480fbbaa43SLionel Sambuc {
249*0a6a1f1dSLionel Sambuc 	EWRITE(rec, 1, REC_ROUNDUP(offsetof(RECHEADER, data) + rec->offset), fp,
250*0a6a1f1dSLionel Sambuc 	       "failed to write debug key");
2510fbbaa43SLionel Sambuc }
2520fbbaa43SLionel Sambuc 
2530fbbaa43SLionel Sambuc /*
2540fbbaa43SLionel Sambuc  * get a record from a temporary file. (Used by merge sort.)
2550fbbaa43SLionel Sambuc  */
2560fbbaa43SLionel Sambuc int
geteasy(FILE * fp,RECHEADER * rec,u_char * end,struct field * dummy2)2570fbbaa43SLionel Sambuc geteasy(FILE *fp, RECHEADER *rec, u_char *end, struct field *dummy2)
2580fbbaa43SLionel Sambuc {
2590fbbaa43SLionel Sambuc 	length_t file_len;
2600fbbaa43SLionel Sambuc 	int i;
2610fbbaa43SLionel Sambuc 
2620fbbaa43SLionel Sambuc 	(void)sizeof (char[offsetof(RECHEADER, length) == 0 ? 1 : -1]);
2630fbbaa43SLionel Sambuc 
2640fbbaa43SLionel Sambuc 	if ((u_char *)(rec + 1) > end)
2650fbbaa43SLionel Sambuc 		return (BUFFEND);
2660fbbaa43SLionel Sambuc 	if (!fread(&rec->length, 1, sizeof rec->length, fp)) {
2670fbbaa43SLionel Sambuc 		fclose(fp);
2680fbbaa43SLionel Sambuc 		return (EOF);
2690fbbaa43SLionel Sambuc 	}
2700fbbaa43SLionel Sambuc 	file_len = REC_ROUNDUP(offsetof(RECHEADER, data) + rec->length);
2710fbbaa43SLionel Sambuc 	if (end - rec->data < (ptrdiff_t)file_len) {
2720fbbaa43SLionel Sambuc 		for (i = sizeof rec->length - 1; i >= 0;  i--)
2730fbbaa43SLionel Sambuc 			ungetc(*((char *) rec + i), fp);
2740fbbaa43SLionel Sambuc 		return (BUFFEND);
2750fbbaa43SLionel Sambuc 	}
2760fbbaa43SLionel Sambuc 
2770fbbaa43SLionel Sambuc 	fread(&rec->length + 1, file_len - sizeof rec->length, 1, fp);
2780fbbaa43SLionel Sambuc 	return (0);
2790fbbaa43SLionel Sambuc }
280