1*0fbbaa43SLionel Sambuc /* $NetBSD: fsort.c,v 1.47 2010/02/05 21:58:41 enami Exp $ */
2*0fbbaa43SLionel Sambuc
3*0fbbaa43SLionel Sambuc /*-
4*0fbbaa43SLionel Sambuc * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5*0fbbaa43SLionel Sambuc * All rights reserved.
6*0fbbaa43SLionel Sambuc *
7*0fbbaa43SLionel Sambuc * This code is derived from software contributed to The NetBSD Foundation
8*0fbbaa43SLionel Sambuc * by Ben Harris and Jaromir Dolecek.
9*0fbbaa43SLionel Sambuc *
10*0fbbaa43SLionel Sambuc * Redistribution and use in source and binary forms, with or without
11*0fbbaa43SLionel Sambuc * modification, are permitted provided that the following conditions
12*0fbbaa43SLionel Sambuc * are met:
13*0fbbaa43SLionel Sambuc * 1. Redistributions of source code must retain the above copyright
14*0fbbaa43SLionel Sambuc * notice, this list of conditions and the following disclaimer.
15*0fbbaa43SLionel Sambuc * 2. Redistributions in binary form must reproduce the above copyright
16*0fbbaa43SLionel Sambuc * notice, this list of conditions and the following disclaimer in the
17*0fbbaa43SLionel Sambuc * documentation and/or other materials provided with the distribution.
18*0fbbaa43SLionel Sambuc *
19*0fbbaa43SLionel Sambuc * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20*0fbbaa43SLionel Sambuc * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21*0fbbaa43SLionel Sambuc * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22*0fbbaa43SLionel Sambuc * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23*0fbbaa43SLionel Sambuc * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24*0fbbaa43SLionel Sambuc * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25*0fbbaa43SLionel Sambuc * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26*0fbbaa43SLionel Sambuc * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27*0fbbaa43SLionel Sambuc * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28*0fbbaa43SLionel Sambuc * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29*0fbbaa43SLionel Sambuc * POSSIBILITY OF SUCH DAMAGE.
30*0fbbaa43SLionel Sambuc */
31*0fbbaa43SLionel Sambuc
32*0fbbaa43SLionel Sambuc /*-
33*0fbbaa43SLionel Sambuc * Copyright (c) 1993
34*0fbbaa43SLionel Sambuc * The Regents of the University of California. All rights reserved.
35*0fbbaa43SLionel Sambuc *
36*0fbbaa43SLionel Sambuc * This code is derived from software contributed to Berkeley by
37*0fbbaa43SLionel Sambuc * Peter McIlroy.
38*0fbbaa43SLionel Sambuc *
39*0fbbaa43SLionel Sambuc * Redistribution and use in source and binary forms, with or without
40*0fbbaa43SLionel Sambuc * modification, are permitted provided that the following conditions
41*0fbbaa43SLionel Sambuc * are met:
42*0fbbaa43SLionel Sambuc * 1. Redistributions of source code must retain the above copyright
43*0fbbaa43SLionel Sambuc * notice, this list of conditions and the following disclaimer.
44*0fbbaa43SLionel Sambuc * 2. Redistributions in binary form must reproduce the above copyright
45*0fbbaa43SLionel Sambuc * notice, this list of conditions and the following disclaimer in the
46*0fbbaa43SLionel Sambuc * documentation and/or other materials provided with the distribution.
47*0fbbaa43SLionel Sambuc * 3. Neither the name of the University nor the names of its contributors
48*0fbbaa43SLionel Sambuc * may be used to endorse or promote products derived from this software
49*0fbbaa43SLionel Sambuc * without specific prior written permission.
50*0fbbaa43SLionel Sambuc *
51*0fbbaa43SLionel Sambuc * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52*0fbbaa43SLionel Sambuc * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53*0fbbaa43SLionel Sambuc * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54*0fbbaa43SLionel Sambuc * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55*0fbbaa43SLionel Sambuc * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56*0fbbaa43SLionel Sambuc * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57*0fbbaa43SLionel Sambuc * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58*0fbbaa43SLionel Sambuc * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59*0fbbaa43SLionel Sambuc * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60*0fbbaa43SLionel Sambuc * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61*0fbbaa43SLionel Sambuc * SUCH DAMAGE.
62*0fbbaa43SLionel Sambuc */
63*0fbbaa43SLionel Sambuc
64*0fbbaa43SLionel Sambuc /*
65*0fbbaa43SLionel Sambuc * Read in a block of records (until 'enough').
66*0fbbaa43SLionel Sambuc * sort, write to temp file.
67*0fbbaa43SLionel Sambuc * Merge sort temp files into output file
68*0fbbaa43SLionel Sambuc * Small files miss out the temp file stage.
69*0fbbaa43SLionel Sambuc * Large files might get multiple merges.
70*0fbbaa43SLionel Sambuc */
71*0fbbaa43SLionel Sambuc #include "sort.h"
72*0fbbaa43SLionel Sambuc #include "fsort.h"
73*0fbbaa43SLionel Sambuc
74*0fbbaa43SLionel Sambuc __RCSID("$NetBSD: fsort.c,v 1.47 2010/02/05 21:58:41 enami Exp $");
75*0fbbaa43SLionel Sambuc
76*0fbbaa43SLionel Sambuc #include <stdlib.h>
77*0fbbaa43SLionel Sambuc #include <string.h>
78*0fbbaa43SLionel Sambuc
79*0fbbaa43SLionel Sambuc #define SALIGN(n) ((n+sizeof(length_t)-1) & ~(sizeof(length_t)-1))
80*0fbbaa43SLionel Sambuc
81*0fbbaa43SLionel Sambuc void
fsort(struct filelist * filelist,int nfiles,FILE * outfp,struct field * ftbl)82*0fbbaa43SLionel Sambuc fsort(struct filelist *filelist, int nfiles, FILE *outfp, struct field *ftbl)
83*0fbbaa43SLionel Sambuc {
84*0fbbaa43SLionel Sambuc RECHEADER **keylist;
85*0fbbaa43SLionel Sambuc RECHEADER **keypos, **keyp;
86*0fbbaa43SLionel Sambuc RECHEADER *buffer;
87*0fbbaa43SLionel Sambuc size_t bufsize = DEFBUFSIZE;
88*0fbbaa43SLionel Sambuc u_char *bufend;
89*0fbbaa43SLionel Sambuc int mfct = 0;
90*0fbbaa43SLionel Sambuc int c, nelem;
91*0fbbaa43SLionel Sambuc get_func_t get;
92*0fbbaa43SLionel Sambuc RECHEADER *crec;
93*0fbbaa43SLionel Sambuc RECHEADER *nbuffer;
94*0fbbaa43SLionel Sambuc FILE *fp, *tmp_fp;
95*0fbbaa43SLionel Sambuc int file_no;
96*0fbbaa43SLionel Sambuc int max_recs = DEBUG('m') ? 16 : MAXNUM;
97*0fbbaa43SLionel Sambuc
98*0fbbaa43SLionel Sambuc buffer = allocrec(NULL, bufsize);
99*0fbbaa43SLionel Sambuc bufend = (u_char *)buffer + bufsize;
100*0fbbaa43SLionel Sambuc /* Allocate double length keymap for radix_sort */
101*0fbbaa43SLionel Sambuc keylist = malloc(2 * max_recs * sizeof(*keylist));
102*0fbbaa43SLionel Sambuc if (buffer == NULL || keylist == NULL)
103*0fbbaa43SLionel Sambuc err(2, "failed to malloc initial buffer or keylist");
104*0fbbaa43SLionel Sambuc
105*0fbbaa43SLionel Sambuc if (SINGL_FLD)
106*0fbbaa43SLionel Sambuc /* Key and data are one! */
107*0fbbaa43SLionel Sambuc get = makeline;
108*0fbbaa43SLionel Sambuc else
109*0fbbaa43SLionel Sambuc /* Key (merged key fields) added before data */
110*0fbbaa43SLionel Sambuc get = makekey;
111*0fbbaa43SLionel Sambuc
112*0fbbaa43SLionel Sambuc file_no = 0;
113*0fbbaa43SLionel Sambuc #if defined(__minix)
114*0fbbaa43SLionel Sambuc /* LSC FIXME: Not very pretty, but reduce the diff */
115*0fbbaa43SLionel Sambuc #include "pathnames.h"
116*0fbbaa43SLionel Sambuc if (!strcmp(filelist->names[0], _PATH_STDIN))
117*0fbbaa43SLionel Sambuc fp = stdin;
118*0fbbaa43SLionel Sambuc else
119*0fbbaa43SLionel Sambuc #endif /* defined(__minix) */
120*0fbbaa43SLionel Sambuc fp = fopen(filelist->names[0], "r");
121*0fbbaa43SLionel Sambuc if (fp == NULL)
122*0fbbaa43SLionel Sambuc err(2, "%s", filelist->names[0]);
123*0fbbaa43SLionel Sambuc
124*0fbbaa43SLionel Sambuc /* Loop through reads of chunk of input files that get sorted
125*0fbbaa43SLionel Sambuc * and then merged together. */
126*0fbbaa43SLionel Sambuc for (;;) {
127*0fbbaa43SLionel Sambuc keypos = keylist;
128*0fbbaa43SLionel Sambuc nelem = 0;
129*0fbbaa43SLionel Sambuc crec = buffer;
130*0fbbaa43SLionel Sambuc makeline_copydown(crec);
131*0fbbaa43SLionel Sambuc
132*0fbbaa43SLionel Sambuc /* Loop reading records */
133*0fbbaa43SLionel Sambuc for (;;) {
134*0fbbaa43SLionel Sambuc c = get(fp, crec, bufend, ftbl);
135*0fbbaa43SLionel Sambuc /* 'c' is 0, EOF or BUFFEND */
136*0fbbaa43SLionel Sambuc if (c == 0) {
137*0fbbaa43SLionel Sambuc /* Save start of key in input buffer */
138*0fbbaa43SLionel Sambuc *keypos++ = crec;
139*0fbbaa43SLionel Sambuc if (++nelem == max_recs) {
140*0fbbaa43SLionel Sambuc c = BUFFEND;
141*0fbbaa43SLionel Sambuc break;
142*0fbbaa43SLionel Sambuc }
143*0fbbaa43SLionel Sambuc crec = (RECHEADER *)(crec->data + SALIGN(crec->length));
144*0fbbaa43SLionel Sambuc continue;
145*0fbbaa43SLionel Sambuc }
146*0fbbaa43SLionel Sambuc if (c == EOF) {
147*0fbbaa43SLionel Sambuc /* try next file */
148*0fbbaa43SLionel Sambuc if (++file_no >= nfiles)
149*0fbbaa43SLionel Sambuc /* no more files */
150*0fbbaa43SLionel Sambuc break;
151*0fbbaa43SLionel Sambuc #if defined(__minix)
152*0fbbaa43SLionel Sambuc if (!strcmp(filelist->names[0], _PATH_STDIN))
153*0fbbaa43SLionel Sambuc fp = stdin;
154*0fbbaa43SLionel Sambuc else
155*0fbbaa43SLionel Sambuc #endif /* defined(__minix) */
156*0fbbaa43SLionel Sambuc fp = fopen(filelist->names[file_no], "r");
157*0fbbaa43SLionel Sambuc if (fp == NULL)
158*0fbbaa43SLionel Sambuc err(2, "%s", filelist->names[file_no]);
159*0fbbaa43SLionel Sambuc continue;
160*0fbbaa43SLionel Sambuc }
161*0fbbaa43SLionel Sambuc if (nelem >= max_recs
162*0fbbaa43SLionel Sambuc || (bufsize >= MAXBUFSIZE && nelem > 8))
163*0fbbaa43SLionel Sambuc /* Need to sort and save this lot of data */
164*0fbbaa43SLionel Sambuc break;
165*0fbbaa43SLionel Sambuc
166*0fbbaa43SLionel Sambuc /* c == BUFFEND, and we can process more data */
167*0fbbaa43SLionel Sambuc /* Allocate a larger buffer for this lot of data */
168*0fbbaa43SLionel Sambuc bufsize *= 2;
169*0fbbaa43SLionel Sambuc nbuffer = allocrec(buffer, bufsize);
170*0fbbaa43SLionel Sambuc if (!nbuffer) {
171*0fbbaa43SLionel Sambuc err(2, "failed to realloc buffer to %zu bytes",
172*0fbbaa43SLionel Sambuc bufsize);
173*0fbbaa43SLionel Sambuc }
174*0fbbaa43SLionel Sambuc
175*0fbbaa43SLionel Sambuc /* patch up keylist[] */
176*0fbbaa43SLionel Sambuc for (keyp = &keypos[-1]; keyp >= keylist; keyp--)
177*0fbbaa43SLionel Sambuc *keyp = nbuffer + (*keyp - buffer);
178*0fbbaa43SLionel Sambuc
179*0fbbaa43SLionel Sambuc crec = nbuffer + (crec - buffer);
180*0fbbaa43SLionel Sambuc buffer = nbuffer;
181*0fbbaa43SLionel Sambuc bufend = (u_char *)buffer + bufsize;
182*0fbbaa43SLionel Sambuc }
183*0fbbaa43SLionel Sambuc
184*0fbbaa43SLionel Sambuc /* Sort this set of records */
185*0fbbaa43SLionel Sambuc radix_sort(keylist, keylist + max_recs, nelem);
186*0fbbaa43SLionel Sambuc
187*0fbbaa43SLionel Sambuc if (c == EOF && mfct == 0) {
188*0fbbaa43SLionel Sambuc /* all the data is (sorted) in the buffer */
189*0fbbaa43SLionel Sambuc append(keylist, nelem, outfp,
190*0fbbaa43SLionel Sambuc DEBUG('k') ? putkeydump : putline);
191*0fbbaa43SLionel Sambuc break;
192*0fbbaa43SLionel Sambuc }
193*0fbbaa43SLionel Sambuc
194*0fbbaa43SLionel Sambuc /* Save current data to a temporary file for a later merge */
195*0fbbaa43SLionel Sambuc if (nelem != 0) {
196*0fbbaa43SLionel Sambuc tmp_fp = ftmp();
197*0fbbaa43SLionel Sambuc append(keylist, nelem, tmp_fp, putrec);
198*0fbbaa43SLionel Sambuc save_for_merge(tmp_fp, geteasy, ftbl);
199*0fbbaa43SLionel Sambuc }
200*0fbbaa43SLionel Sambuc mfct = 1;
201*0fbbaa43SLionel Sambuc
202*0fbbaa43SLionel Sambuc if (c == EOF) {
203*0fbbaa43SLionel Sambuc /* merge to output file */
204*0fbbaa43SLionel Sambuc merge_sort(outfp,
205*0fbbaa43SLionel Sambuc DEBUG('k') ? putkeydump : putline, ftbl);
206*0fbbaa43SLionel Sambuc break;
207*0fbbaa43SLionel Sambuc }
208*0fbbaa43SLionel Sambuc }
209*0fbbaa43SLionel Sambuc
210*0fbbaa43SLionel Sambuc free(keylist);
211*0fbbaa43SLionel Sambuc keylist = NULL;
212*0fbbaa43SLionel Sambuc free(buffer);
213*0fbbaa43SLionel Sambuc buffer = NULL;
214*0fbbaa43SLionel Sambuc }
215