xref: /minix3/usr.bin/sort/fsort.c (revision 0fbbaa43e914d38ef3af549125d31574117d1ebf)
1*0fbbaa43SLionel Sambuc /*	$NetBSD: fsort.c,v 1.47 2010/02/05 21:58:41 enami Exp $	*/
2*0fbbaa43SLionel Sambuc 
3*0fbbaa43SLionel Sambuc /*-
4*0fbbaa43SLionel Sambuc  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5*0fbbaa43SLionel Sambuc  * All rights reserved.
6*0fbbaa43SLionel Sambuc  *
7*0fbbaa43SLionel Sambuc  * This code is derived from software contributed to The NetBSD Foundation
8*0fbbaa43SLionel Sambuc  * by Ben Harris and Jaromir Dolecek.
9*0fbbaa43SLionel Sambuc  *
10*0fbbaa43SLionel Sambuc  * Redistribution and use in source and binary forms, with or without
11*0fbbaa43SLionel Sambuc  * modification, are permitted provided that the following conditions
12*0fbbaa43SLionel Sambuc  * are met:
13*0fbbaa43SLionel Sambuc  * 1. Redistributions of source code must retain the above copyright
14*0fbbaa43SLionel Sambuc  *    notice, this list of conditions and the following disclaimer.
15*0fbbaa43SLionel Sambuc  * 2. Redistributions in binary form must reproduce the above copyright
16*0fbbaa43SLionel Sambuc  *    notice, this list of conditions and the following disclaimer in the
17*0fbbaa43SLionel Sambuc  *    documentation and/or other materials provided with the distribution.
18*0fbbaa43SLionel Sambuc  *
19*0fbbaa43SLionel Sambuc  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20*0fbbaa43SLionel Sambuc  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21*0fbbaa43SLionel Sambuc  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22*0fbbaa43SLionel Sambuc  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23*0fbbaa43SLionel Sambuc  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24*0fbbaa43SLionel Sambuc  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25*0fbbaa43SLionel Sambuc  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26*0fbbaa43SLionel Sambuc  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27*0fbbaa43SLionel Sambuc  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28*0fbbaa43SLionel Sambuc  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29*0fbbaa43SLionel Sambuc  * POSSIBILITY OF SUCH DAMAGE.
30*0fbbaa43SLionel Sambuc  */
31*0fbbaa43SLionel Sambuc 
32*0fbbaa43SLionel Sambuc /*-
33*0fbbaa43SLionel Sambuc  * Copyright (c) 1993
34*0fbbaa43SLionel Sambuc  *	The Regents of the University of California.  All rights reserved.
35*0fbbaa43SLionel Sambuc  *
36*0fbbaa43SLionel Sambuc  * This code is derived from software contributed to Berkeley by
37*0fbbaa43SLionel Sambuc  * Peter McIlroy.
38*0fbbaa43SLionel Sambuc  *
39*0fbbaa43SLionel Sambuc  * Redistribution and use in source and binary forms, with or without
40*0fbbaa43SLionel Sambuc  * modification, are permitted provided that the following conditions
41*0fbbaa43SLionel Sambuc  * are met:
42*0fbbaa43SLionel Sambuc  * 1. Redistributions of source code must retain the above copyright
43*0fbbaa43SLionel Sambuc  *    notice, this list of conditions and the following disclaimer.
44*0fbbaa43SLionel Sambuc  * 2. Redistributions in binary form must reproduce the above copyright
45*0fbbaa43SLionel Sambuc  *    notice, this list of conditions and the following disclaimer in the
46*0fbbaa43SLionel Sambuc  *    documentation and/or other materials provided with the distribution.
47*0fbbaa43SLionel Sambuc  * 3. Neither the name of the University nor the names of its contributors
48*0fbbaa43SLionel Sambuc  *    may be used to endorse or promote products derived from this software
49*0fbbaa43SLionel Sambuc  *    without specific prior written permission.
50*0fbbaa43SLionel Sambuc  *
51*0fbbaa43SLionel Sambuc  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52*0fbbaa43SLionel Sambuc  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53*0fbbaa43SLionel Sambuc  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54*0fbbaa43SLionel Sambuc  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55*0fbbaa43SLionel Sambuc  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56*0fbbaa43SLionel Sambuc  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57*0fbbaa43SLionel Sambuc  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58*0fbbaa43SLionel Sambuc  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59*0fbbaa43SLionel Sambuc  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60*0fbbaa43SLionel Sambuc  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61*0fbbaa43SLionel Sambuc  * SUCH DAMAGE.
62*0fbbaa43SLionel Sambuc  */
63*0fbbaa43SLionel Sambuc 
64*0fbbaa43SLionel Sambuc /*
65*0fbbaa43SLionel Sambuc  * Read in a block of records (until 'enough').
66*0fbbaa43SLionel Sambuc  * sort, write to temp file.
67*0fbbaa43SLionel Sambuc  * Merge sort temp files into output file
68*0fbbaa43SLionel Sambuc  * Small files miss out the temp file stage.
69*0fbbaa43SLionel Sambuc  * Large files might get multiple merges.
70*0fbbaa43SLionel Sambuc  */
71*0fbbaa43SLionel Sambuc #include "sort.h"
72*0fbbaa43SLionel Sambuc #include "fsort.h"
73*0fbbaa43SLionel Sambuc 
74*0fbbaa43SLionel Sambuc __RCSID("$NetBSD: fsort.c,v 1.47 2010/02/05 21:58:41 enami Exp $");
75*0fbbaa43SLionel Sambuc 
76*0fbbaa43SLionel Sambuc #include <stdlib.h>
77*0fbbaa43SLionel Sambuc #include <string.h>
78*0fbbaa43SLionel Sambuc 
79*0fbbaa43SLionel Sambuc #define SALIGN(n) ((n+sizeof(length_t)-1) & ~(sizeof(length_t)-1))
80*0fbbaa43SLionel Sambuc 
81*0fbbaa43SLionel Sambuc void
fsort(struct filelist * filelist,int nfiles,FILE * outfp,struct field * ftbl)82*0fbbaa43SLionel Sambuc fsort(struct filelist *filelist, int nfiles, FILE *outfp, struct field *ftbl)
83*0fbbaa43SLionel Sambuc {
84*0fbbaa43SLionel Sambuc 	RECHEADER **keylist;
85*0fbbaa43SLionel Sambuc 	RECHEADER **keypos, **keyp;
86*0fbbaa43SLionel Sambuc 	RECHEADER *buffer;
87*0fbbaa43SLionel Sambuc 	size_t bufsize = DEFBUFSIZE;
88*0fbbaa43SLionel Sambuc 	u_char *bufend;
89*0fbbaa43SLionel Sambuc 	int mfct = 0;
90*0fbbaa43SLionel Sambuc 	int c, nelem;
91*0fbbaa43SLionel Sambuc 	get_func_t get;
92*0fbbaa43SLionel Sambuc 	RECHEADER *crec;
93*0fbbaa43SLionel Sambuc 	RECHEADER *nbuffer;
94*0fbbaa43SLionel Sambuc 	FILE *fp, *tmp_fp;
95*0fbbaa43SLionel Sambuc 	int file_no;
96*0fbbaa43SLionel Sambuc 	int max_recs = DEBUG('m') ? 16 : MAXNUM;
97*0fbbaa43SLionel Sambuc 
98*0fbbaa43SLionel Sambuc 	buffer = allocrec(NULL, bufsize);
99*0fbbaa43SLionel Sambuc 	bufend = (u_char *)buffer + bufsize;
100*0fbbaa43SLionel Sambuc 	/* Allocate double length keymap for radix_sort */
101*0fbbaa43SLionel Sambuc 	keylist = malloc(2 * max_recs * sizeof(*keylist));
102*0fbbaa43SLionel Sambuc 	if (buffer == NULL || keylist == NULL)
103*0fbbaa43SLionel Sambuc 		err(2, "failed to malloc initial buffer or keylist");
104*0fbbaa43SLionel Sambuc 
105*0fbbaa43SLionel Sambuc 	if (SINGL_FLD)
106*0fbbaa43SLionel Sambuc 		/* Key and data are one! */
107*0fbbaa43SLionel Sambuc 		get = makeline;
108*0fbbaa43SLionel Sambuc 	else
109*0fbbaa43SLionel Sambuc 		/* Key (merged key fields) added before data */
110*0fbbaa43SLionel Sambuc 		get = makekey;
111*0fbbaa43SLionel Sambuc 
112*0fbbaa43SLionel Sambuc 	file_no = 0;
113*0fbbaa43SLionel Sambuc #if defined(__minix)
114*0fbbaa43SLionel Sambuc 	/* LSC FIXME: Not very pretty, but reduce the diff */
115*0fbbaa43SLionel Sambuc #include "pathnames.h"
116*0fbbaa43SLionel Sambuc 	if (!strcmp(filelist->names[0], _PATH_STDIN))
117*0fbbaa43SLionel Sambuc 		fp = stdin;
118*0fbbaa43SLionel Sambuc 	else
119*0fbbaa43SLionel Sambuc #endif /* defined(__minix) */
120*0fbbaa43SLionel Sambuc 	fp = fopen(filelist->names[0], "r");
121*0fbbaa43SLionel Sambuc 	if (fp == NULL)
122*0fbbaa43SLionel Sambuc 		err(2, "%s", filelist->names[0]);
123*0fbbaa43SLionel Sambuc 
124*0fbbaa43SLionel Sambuc 	/* Loop through reads of chunk of input files that get sorted
125*0fbbaa43SLionel Sambuc 	 * and then merged together. */
126*0fbbaa43SLionel Sambuc 	for (;;) {
127*0fbbaa43SLionel Sambuc 		keypos = keylist;
128*0fbbaa43SLionel Sambuc 		nelem = 0;
129*0fbbaa43SLionel Sambuc 		crec = buffer;
130*0fbbaa43SLionel Sambuc 		makeline_copydown(crec);
131*0fbbaa43SLionel Sambuc 
132*0fbbaa43SLionel Sambuc 		/* Loop reading records */
133*0fbbaa43SLionel Sambuc 		for (;;) {
134*0fbbaa43SLionel Sambuc 			c = get(fp, crec, bufend, ftbl);
135*0fbbaa43SLionel Sambuc 			/* 'c' is 0, EOF or BUFFEND */
136*0fbbaa43SLionel Sambuc 			if (c == 0) {
137*0fbbaa43SLionel Sambuc 				/* Save start of key in input buffer */
138*0fbbaa43SLionel Sambuc 				*keypos++ = crec;
139*0fbbaa43SLionel Sambuc 				if (++nelem == max_recs) {
140*0fbbaa43SLionel Sambuc 					c = BUFFEND;
141*0fbbaa43SLionel Sambuc 					break;
142*0fbbaa43SLionel Sambuc 				}
143*0fbbaa43SLionel Sambuc 				crec = (RECHEADER *)(crec->data + SALIGN(crec->length));
144*0fbbaa43SLionel Sambuc 				continue;
145*0fbbaa43SLionel Sambuc 			}
146*0fbbaa43SLionel Sambuc 			if (c == EOF) {
147*0fbbaa43SLionel Sambuc 				/* try next file */
148*0fbbaa43SLionel Sambuc 				if (++file_no >= nfiles)
149*0fbbaa43SLionel Sambuc 					/* no more files */
150*0fbbaa43SLionel Sambuc 					break;
151*0fbbaa43SLionel Sambuc #if defined(__minix)
152*0fbbaa43SLionel Sambuc 				if (!strcmp(filelist->names[0], _PATH_STDIN))
153*0fbbaa43SLionel Sambuc 					fp = stdin;
154*0fbbaa43SLionel Sambuc 				else
155*0fbbaa43SLionel Sambuc #endif /* defined(__minix) */
156*0fbbaa43SLionel Sambuc 				fp = fopen(filelist->names[file_no], "r");
157*0fbbaa43SLionel Sambuc 				if (fp == NULL)
158*0fbbaa43SLionel Sambuc 					err(2, "%s", filelist->names[file_no]);
159*0fbbaa43SLionel Sambuc 				continue;
160*0fbbaa43SLionel Sambuc 			}
161*0fbbaa43SLionel Sambuc 			if (nelem >= max_recs
162*0fbbaa43SLionel Sambuc 			    || (bufsize >= MAXBUFSIZE && nelem > 8))
163*0fbbaa43SLionel Sambuc 				/* Need to sort and save this lot of data */
164*0fbbaa43SLionel Sambuc 				break;
165*0fbbaa43SLionel Sambuc 
166*0fbbaa43SLionel Sambuc 			/* c == BUFFEND, and we can process more data */
167*0fbbaa43SLionel Sambuc 			/* Allocate a larger buffer for this lot of data */
168*0fbbaa43SLionel Sambuc 			bufsize *= 2;
169*0fbbaa43SLionel Sambuc 			nbuffer = allocrec(buffer, bufsize);
170*0fbbaa43SLionel Sambuc 			if (!nbuffer) {
171*0fbbaa43SLionel Sambuc 				err(2, "failed to realloc buffer to %zu bytes",
172*0fbbaa43SLionel Sambuc 					bufsize);
173*0fbbaa43SLionel Sambuc 			}
174*0fbbaa43SLionel Sambuc 
175*0fbbaa43SLionel Sambuc 			/* patch up keylist[] */
176*0fbbaa43SLionel Sambuc 			for (keyp = &keypos[-1]; keyp >= keylist; keyp--)
177*0fbbaa43SLionel Sambuc 				*keyp = nbuffer + (*keyp - buffer);
178*0fbbaa43SLionel Sambuc 
179*0fbbaa43SLionel Sambuc 			crec = nbuffer + (crec - buffer);
180*0fbbaa43SLionel Sambuc 			buffer = nbuffer;
181*0fbbaa43SLionel Sambuc 			bufend = (u_char *)buffer + bufsize;
182*0fbbaa43SLionel Sambuc 		}
183*0fbbaa43SLionel Sambuc 
184*0fbbaa43SLionel Sambuc 		/* Sort this set of records */
185*0fbbaa43SLionel Sambuc 		radix_sort(keylist, keylist + max_recs, nelem);
186*0fbbaa43SLionel Sambuc 
187*0fbbaa43SLionel Sambuc 		if (c == EOF && mfct == 0) {
188*0fbbaa43SLionel Sambuc 			/* all the data is (sorted) in the buffer */
189*0fbbaa43SLionel Sambuc 			append(keylist, nelem, outfp,
190*0fbbaa43SLionel Sambuc 			    DEBUG('k') ? putkeydump : putline);
191*0fbbaa43SLionel Sambuc 			break;
192*0fbbaa43SLionel Sambuc 		}
193*0fbbaa43SLionel Sambuc 
194*0fbbaa43SLionel Sambuc 		/* Save current data to a temporary file for a later merge */
195*0fbbaa43SLionel Sambuc 		if (nelem != 0) {
196*0fbbaa43SLionel Sambuc 			tmp_fp = ftmp();
197*0fbbaa43SLionel Sambuc 			append(keylist, nelem, tmp_fp, putrec);
198*0fbbaa43SLionel Sambuc 			save_for_merge(tmp_fp, geteasy, ftbl);
199*0fbbaa43SLionel Sambuc 		}
200*0fbbaa43SLionel Sambuc 		mfct = 1;
201*0fbbaa43SLionel Sambuc 
202*0fbbaa43SLionel Sambuc 		if (c == EOF) {
203*0fbbaa43SLionel Sambuc 			/* merge to output file */
204*0fbbaa43SLionel Sambuc 			merge_sort(outfp,
205*0fbbaa43SLionel Sambuc 			    DEBUG('k') ? putkeydump : putline, ftbl);
206*0fbbaa43SLionel Sambuc 			break;
207*0fbbaa43SLionel Sambuc 		}
208*0fbbaa43SLionel Sambuc 	}
209*0fbbaa43SLionel Sambuc 
210*0fbbaa43SLionel Sambuc 	free(keylist);
211*0fbbaa43SLionel Sambuc 	keylist = NULL;
212*0fbbaa43SLionel Sambuc 	free(buffer);
213*0fbbaa43SLionel Sambuc 	buffer = NULL;
214*0fbbaa43SLionel Sambuc }
215