1*0fbbaa43SLionel Sambuc /* $NetBSD: msort.c,v 1.30 2010/02/05 21:58:42 enami Exp $ */
2*0fbbaa43SLionel Sambuc
3*0fbbaa43SLionel Sambuc /*-
4*0fbbaa43SLionel Sambuc * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5*0fbbaa43SLionel Sambuc * All rights reserved.
6*0fbbaa43SLionel Sambuc *
7*0fbbaa43SLionel Sambuc * This code is derived from software contributed to The NetBSD Foundation
8*0fbbaa43SLionel Sambuc * by Ben Harris and Jaromir Dolecek.
9*0fbbaa43SLionel Sambuc *
10*0fbbaa43SLionel Sambuc * Redistribution and use in source and binary forms, with or without
11*0fbbaa43SLionel Sambuc * modification, are permitted provided that the following conditions
12*0fbbaa43SLionel Sambuc * are met:
13*0fbbaa43SLionel Sambuc * 1. Redistributions of source code must retain the above copyright
14*0fbbaa43SLionel Sambuc * notice, this list of conditions and the following disclaimer.
15*0fbbaa43SLionel Sambuc * 2. Redistributions in binary form must reproduce the above copyright
16*0fbbaa43SLionel Sambuc * notice, this list of conditions and the following disclaimer in the
17*0fbbaa43SLionel Sambuc * documentation and/or other materials provided with the distribution.
18*0fbbaa43SLionel Sambuc *
19*0fbbaa43SLionel Sambuc * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20*0fbbaa43SLionel Sambuc * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21*0fbbaa43SLionel Sambuc * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22*0fbbaa43SLionel Sambuc * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23*0fbbaa43SLionel Sambuc * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24*0fbbaa43SLionel Sambuc * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25*0fbbaa43SLionel Sambuc * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26*0fbbaa43SLionel Sambuc * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27*0fbbaa43SLionel Sambuc * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28*0fbbaa43SLionel Sambuc * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29*0fbbaa43SLionel Sambuc * POSSIBILITY OF SUCH DAMAGE.
30*0fbbaa43SLionel Sambuc */
31*0fbbaa43SLionel Sambuc
32*0fbbaa43SLionel Sambuc /*-
33*0fbbaa43SLionel Sambuc * Copyright (c) 1993
34*0fbbaa43SLionel Sambuc * The Regents of the University of California. All rights reserved.
35*0fbbaa43SLionel Sambuc *
36*0fbbaa43SLionel Sambuc * This code is derived from software contributed to Berkeley by
37*0fbbaa43SLionel Sambuc * Peter McIlroy.
38*0fbbaa43SLionel Sambuc *
39*0fbbaa43SLionel Sambuc * Redistribution and use in source and binary forms, with or without
40*0fbbaa43SLionel Sambuc * modification, are permitted provided that the following conditions
41*0fbbaa43SLionel Sambuc * are met:
42*0fbbaa43SLionel Sambuc * 1. Redistributions of source code must retain the above copyright
43*0fbbaa43SLionel Sambuc * notice, this list of conditions and the following disclaimer.
44*0fbbaa43SLionel Sambuc * 2. Redistributions in binary form must reproduce the above copyright
45*0fbbaa43SLionel Sambuc * notice, this list of conditions and the following disclaimer in the
46*0fbbaa43SLionel Sambuc * documentation and/or other materials provided with the distribution.
47*0fbbaa43SLionel Sambuc * 3. Neither the name of the University nor the names of its contributors
48*0fbbaa43SLionel Sambuc * may be used to endorse or promote products derived from this software
49*0fbbaa43SLionel Sambuc * without specific prior written permission.
50*0fbbaa43SLionel Sambuc *
51*0fbbaa43SLionel Sambuc * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52*0fbbaa43SLionel Sambuc * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53*0fbbaa43SLionel Sambuc * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54*0fbbaa43SLionel Sambuc * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55*0fbbaa43SLionel Sambuc * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56*0fbbaa43SLionel Sambuc * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57*0fbbaa43SLionel Sambuc * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58*0fbbaa43SLionel Sambuc * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59*0fbbaa43SLionel Sambuc * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60*0fbbaa43SLionel Sambuc * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61*0fbbaa43SLionel Sambuc * SUCH DAMAGE.
62*0fbbaa43SLionel Sambuc */
63*0fbbaa43SLionel Sambuc
64*0fbbaa43SLionel Sambuc #include "sort.h"
65*0fbbaa43SLionel Sambuc #include "fsort.h"
66*0fbbaa43SLionel Sambuc
67*0fbbaa43SLionel Sambuc __RCSID("$NetBSD: msort.c,v 1.30 2010/02/05 21:58:42 enami Exp $");
68*0fbbaa43SLionel Sambuc
69*0fbbaa43SLionel Sambuc #include <stdlib.h>
70*0fbbaa43SLionel Sambuc #include <string.h>
71*0fbbaa43SLionel Sambuc #include <unistd.h>
72*0fbbaa43SLionel Sambuc #include <util.h>
73*0fbbaa43SLionel Sambuc
74*0fbbaa43SLionel Sambuc /* Subroutines using comparisons: merge sort and check order */
75*0fbbaa43SLionel Sambuc #define DELETE (1)
76*0fbbaa43SLionel Sambuc
77*0fbbaa43SLionel Sambuc typedef struct mfile {
78*0fbbaa43SLionel Sambuc FILE *fp;
79*0fbbaa43SLionel Sambuc get_func_t get;
80*0fbbaa43SLionel Sambuc RECHEADER *rec;
81*0fbbaa43SLionel Sambuc u_char *end;
82*0fbbaa43SLionel Sambuc } MFILE;
83*0fbbaa43SLionel Sambuc
84*0fbbaa43SLionel Sambuc static int cmp(RECHEADER *, RECHEADER *);
85*0fbbaa43SLionel Sambuc static int insert(struct mfile **, struct mfile *, int, int);
86*0fbbaa43SLionel Sambuc static void merge_sort_fstack(FILE *, put_func_t, struct field *);
87*0fbbaa43SLionel Sambuc
88*0fbbaa43SLionel Sambuc /*
89*0fbbaa43SLionel Sambuc * Number of files merge() can merge in one pass.
90*0fbbaa43SLionel Sambuc */
91*0fbbaa43SLionel Sambuc #define MERGE_FNUM 16
92*0fbbaa43SLionel Sambuc
93*0fbbaa43SLionel Sambuc static struct mfile fstack[MERGE_FNUM];
94*0fbbaa43SLionel Sambuc static struct mfile fstack_1[MERGE_FNUM];
95*0fbbaa43SLionel Sambuc static struct mfile fstack_2[MERGE_FNUM];
96*0fbbaa43SLionel Sambuc static int fstack_count, fstack_1_count, fstack_2_count;
97*0fbbaa43SLionel Sambuc
98*0fbbaa43SLionel Sambuc void
save_for_merge(FILE * fp,get_func_t get,struct field * ftbl)99*0fbbaa43SLionel Sambuc save_for_merge(FILE *fp, get_func_t get, struct field *ftbl)
100*0fbbaa43SLionel Sambuc {
101*0fbbaa43SLionel Sambuc FILE *mfp, *mfp1, *mfp2;
102*0fbbaa43SLionel Sambuc
103*0fbbaa43SLionel Sambuc if (fstack_count == MERGE_FNUM) {
104*0fbbaa43SLionel Sambuc /* Must reduce the number of temporary files */
105*0fbbaa43SLionel Sambuc mfp = ftmp();
106*0fbbaa43SLionel Sambuc merge_sort_fstack(mfp, putrec, ftbl);
107*0fbbaa43SLionel Sambuc /* Save output in next layer */
108*0fbbaa43SLionel Sambuc if (fstack_1_count == MERGE_FNUM) {
109*0fbbaa43SLionel Sambuc mfp1 = ftmp();
110*0fbbaa43SLionel Sambuc memcpy(fstack, fstack_1, sizeof fstack);
111*0fbbaa43SLionel Sambuc merge_sort_fstack(mfp1, putrec, ftbl);
112*0fbbaa43SLionel Sambuc if (fstack_2_count == MERGE_FNUM) {
113*0fbbaa43SLionel Sambuc /* More than 4096 files! */
114*0fbbaa43SLionel Sambuc mfp2 = ftmp();
115*0fbbaa43SLionel Sambuc memcpy(fstack, fstack_2, sizeof fstack);
116*0fbbaa43SLionel Sambuc merge_sort_fstack(mfp2, putrec, ftbl);
117*0fbbaa43SLionel Sambuc fstack_2[0].fp = mfp2;
118*0fbbaa43SLionel Sambuc fstack_2_count = 1;
119*0fbbaa43SLionel Sambuc }
120*0fbbaa43SLionel Sambuc fstack_2[fstack_2_count].fp = mfp1;
121*0fbbaa43SLionel Sambuc fstack_2[fstack_2_count].get = geteasy;
122*0fbbaa43SLionel Sambuc fstack_2_count++;
123*0fbbaa43SLionel Sambuc fstack_1_count = 0;
124*0fbbaa43SLionel Sambuc }
125*0fbbaa43SLionel Sambuc fstack_1[fstack_1_count].fp = mfp;
126*0fbbaa43SLionel Sambuc fstack_1[fstack_1_count].get = geteasy;
127*0fbbaa43SLionel Sambuc fstack_1_count++;
128*0fbbaa43SLionel Sambuc fstack_count = 0;
129*0fbbaa43SLionel Sambuc }
130*0fbbaa43SLionel Sambuc
131*0fbbaa43SLionel Sambuc fstack[fstack_count].fp = fp;
132*0fbbaa43SLionel Sambuc fstack[fstack_count++].get = get;
133*0fbbaa43SLionel Sambuc }
134*0fbbaa43SLionel Sambuc
135*0fbbaa43SLionel Sambuc void
fmerge(struct filelist * filelist,int nfiles,FILE * outfp,struct field * ftbl)136*0fbbaa43SLionel Sambuc fmerge(struct filelist *filelist, int nfiles, FILE *outfp, struct field *ftbl)
137*0fbbaa43SLionel Sambuc {
138*0fbbaa43SLionel Sambuc get_func_t get = SINGL_FLD ? makeline : makekey;
139*0fbbaa43SLionel Sambuc FILE *fp;
140*0fbbaa43SLionel Sambuc int i;
141*0fbbaa43SLionel Sambuc
142*0fbbaa43SLionel Sambuc for (i = 0; i < nfiles; i++) {
143*0fbbaa43SLionel Sambuc #if defined(__minix)
144*0fbbaa43SLionel Sambuc /* LSC FIXME: Not very pretty, but reduce the diff */
145*0fbbaa43SLionel Sambuc #include "pathnames.h"
146*0fbbaa43SLionel Sambuc if (!strcmp(filelist->names[0], _PATH_STDIN))
147*0fbbaa43SLionel Sambuc fp = stdin;
148*0fbbaa43SLionel Sambuc else
149*0fbbaa43SLionel Sambuc #endif /* defined(__minix) */
150*0fbbaa43SLionel Sambuc fp = fopen(filelist->names[i], "r");
151*0fbbaa43SLionel Sambuc if (fp == NULL)
152*0fbbaa43SLionel Sambuc err(2, "%s", filelist->names[i]);
153*0fbbaa43SLionel Sambuc save_for_merge(fp, get, ftbl);
154*0fbbaa43SLionel Sambuc }
155*0fbbaa43SLionel Sambuc
156*0fbbaa43SLionel Sambuc merge_sort(outfp, putline, ftbl);
157*0fbbaa43SLionel Sambuc }
158*0fbbaa43SLionel Sambuc
159*0fbbaa43SLionel Sambuc void
merge_sort(FILE * outfp,put_func_t put,struct field * ftbl)160*0fbbaa43SLionel Sambuc merge_sort(FILE *outfp, put_func_t put, struct field *ftbl)
161*0fbbaa43SLionel Sambuc {
162*0fbbaa43SLionel Sambuc int count = fstack_1_count + fstack_2_count;
163*0fbbaa43SLionel Sambuc FILE *mfp;
164*0fbbaa43SLionel Sambuc int i;
165*0fbbaa43SLionel Sambuc
166*0fbbaa43SLionel Sambuc if (count == 0) {
167*0fbbaa43SLionel Sambuc /* All files in initial array */
168*0fbbaa43SLionel Sambuc merge_sort_fstack(outfp, put, ftbl);
169*0fbbaa43SLionel Sambuc return;
170*0fbbaa43SLionel Sambuc }
171*0fbbaa43SLionel Sambuc
172*0fbbaa43SLionel Sambuc count += fstack_count;
173*0fbbaa43SLionel Sambuc
174*0fbbaa43SLionel Sambuc /* Too many files for one merge sort */
175*0fbbaa43SLionel Sambuc for (;;) {
176*0fbbaa43SLionel Sambuc /* Sort latest 16 files */
177*0fbbaa43SLionel Sambuc i = count;
178*0fbbaa43SLionel Sambuc if (i > MERGE_FNUM)
179*0fbbaa43SLionel Sambuc i = MERGE_FNUM;
180*0fbbaa43SLionel Sambuc while (fstack_count > 0)
181*0fbbaa43SLionel Sambuc fstack[--i] = fstack[--fstack_count];
182*0fbbaa43SLionel Sambuc while (i > 0 && fstack_1_count > 0)
183*0fbbaa43SLionel Sambuc fstack[--i] = fstack_1[--fstack_1_count];
184*0fbbaa43SLionel Sambuc while (i > 0)
185*0fbbaa43SLionel Sambuc fstack[--i] = fstack_2[--fstack_2_count];
186*0fbbaa43SLionel Sambuc if (count <= MERGE_FNUM) {
187*0fbbaa43SLionel Sambuc /* Got all the data */
188*0fbbaa43SLionel Sambuc fstack_count = count;
189*0fbbaa43SLionel Sambuc merge_sort_fstack(outfp, put, ftbl);
190*0fbbaa43SLionel Sambuc return;
191*0fbbaa43SLionel Sambuc }
192*0fbbaa43SLionel Sambuc mfp = ftmp();
193*0fbbaa43SLionel Sambuc fstack_count = count > MERGE_FNUM ? MERGE_FNUM : count;
194*0fbbaa43SLionel Sambuc merge_sort_fstack(mfp, putrec, ftbl);
195*0fbbaa43SLionel Sambuc fstack[0].fp = mfp;
196*0fbbaa43SLionel Sambuc fstack[0].get = geteasy;
197*0fbbaa43SLionel Sambuc fstack_count = 1;
198*0fbbaa43SLionel Sambuc count -= MERGE_FNUM - 1;
199*0fbbaa43SLionel Sambuc }
200*0fbbaa43SLionel Sambuc }
201*0fbbaa43SLionel Sambuc
202*0fbbaa43SLionel Sambuc static void
merge_sort_fstack(FILE * outfp,put_func_t put,struct field * ftbl)203*0fbbaa43SLionel Sambuc merge_sort_fstack(FILE *outfp, put_func_t put, struct field *ftbl)
204*0fbbaa43SLionel Sambuc {
205*0fbbaa43SLionel Sambuc struct mfile *flistb[MERGE_FNUM], **flist = flistb, *cfile;
206*0fbbaa43SLionel Sambuc RECHEADER *new_rec;
207*0fbbaa43SLionel Sambuc u_char *new_end;
208*0fbbaa43SLionel Sambuc void *tmp;
209*0fbbaa43SLionel Sambuc int c, i, nfiles;
210*0fbbaa43SLionel Sambuc size_t sz;
211*0fbbaa43SLionel Sambuc
212*0fbbaa43SLionel Sambuc /* Read one record from each file (read again if a duplicate) */
213*0fbbaa43SLionel Sambuc for (nfiles = i = 0; i < fstack_count; i++) {
214*0fbbaa43SLionel Sambuc cfile = &fstack[i];
215*0fbbaa43SLionel Sambuc if (cfile->rec == NULL) {
216*0fbbaa43SLionel Sambuc cfile->rec = allocrec(NULL, DEFLLEN);
217*0fbbaa43SLionel Sambuc cfile->end = (u_char *)cfile->rec + DEFLLEN;
218*0fbbaa43SLionel Sambuc }
219*0fbbaa43SLionel Sambuc rewind(cfile->fp);
220*0fbbaa43SLionel Sambuc
221*0fbbaa43SLionel Sambuc for (;;) {
222*0fbbaa43SLionel Sambuc c = cfile->get(cfile->fp, cfile->rec, cfile->end, ftbl);
223*0fbbaa43SLionel Sambuc if (c == EOF)
224*0fbbaa43SLionel Sambuc break;
225*0fbbaa43SLionel Sambuc
226*0fbbaa43SLionel Sambuc if (c == BUFFEND) {
227*0fbbaa43SLionel Sambuc /* Double buffer size */
228*0fbbaa43SLionel Sambuc sz = (cfile->end - (u_char *)cfile->rec) * 2;
229*0fbbaa43SLionel Sambuc cfile->rec = allocrec(cfile->rec, sz);
230*0fbbaa43SLionel Sambuc cfile->end = (u_char *)cfile->rec + sz;
231*0fbbaa43SLionel Sambuc continue;
232*0fbbaa43SLionel Sambuc }
233*0fbbaa43SLionel Sambuc
234*0fbbaa43SLionel Sambuc if (nfiles != 0) {
235*0fbbaa43SLionel Sambuc if (insert(flist, cfile, nfiles, !DELETE))
236*0fbbaa43SLionel Sambuc /* Duplicate removed */
237*0fbbaa43SLionel Sambuc continue;
238*0fbbaa43SLionel Sambuc } else
239*0fbbaa43SLionel Sambuc flist[0] = cfile;
240*0fbbaa43SLionel Sambuc nfiles++;
241*0fbbaa43SLionel Sambuc break;
242*0fbbaa43SLionel Sambuc }
243*0fbbaa43SLionel Sambuc }
244*0fbbaa43SLionel Sambuc
245*0fbbaa43SLionel Sambuc if (nfiles == 0)
246*0fbbaa43SLionel Sambuc return;
247*0fbbaa43SLionel Sambuc
248*0fbbaa43SLionel Sambuc /*
249*0fbbaa43SLionel Sambuc * We now loop reading a new record from the file with the
250*0fbbaa43SLionel Sambuc * 'sorted first' existing record.
251*0fbbaa43SLionel Sambuc * As each record is added, the 'first' record is written to the
252*0fbbaa43SLionel Sambuc * output file - maintaining one record from each file in the sorted
253*0fbbaa43SLionel Sambuc * list.
254*0fbbaa43SLionel Sambuc */
255*0fbbaa43SLionel Sambuc new_rec = allocrec(NULL, DEFLLEN);
256*0fbbaa43SLionel Sambuc new_end = (u_char *)new_rec + DEFLLEN;
257*0fbbaa43SLionel Sambuc for (;;) {
258*0fbbaa43SLionel Sambuc cfile = flist[0];
259*0fbbaa43SLionel Sambuc c = cfile->get(cfile->fp, new_rec, new_end, ftbl);
260*0fbbaa43SLionel Sambuc if (c == EOF) {
261*0fbbaa43SLionel Sambuc /* Write out last record from now-empty input */
262*0fbbaa43SLionel Sambuc put(cfile->rec, outfp);
263*0fbbaa43SLionel Sambuc if (--nfiles == 0)
264*0fbbaa43SLionel Sambuc break;
265*0fbbaa43SLionel Sambuc /* Replace from file with now-first sorted record. */
266*0fbbaa43SLionel Sambuc /* (Moving base 'flist' saves copying everything!) */
267*0fbbaa43SLionel Sambuc flist++;
268*0fbbaa43SLionel Sambuc continue;
269*0fbbaa43SLionel Sambuc }
270*0fbbaa43SLionel Sambuc if (c == BUFFEND) {
271*0fbbaa43SLionel Sambuc /* Buffer not large enough - double in size */
272*0fbbaa43SLionel Sambuc sz = (new_end - (u_char *)new_rec) * 2;
273*0fbbaa43SLionel Sambuc new_rec = allocrec(new_rec, sz);
274*0fbbaa43SLionel Sambuc new_end = (u_char *)new_rec +sz;
275*0fbbaa43SLionel Sambuc continue;
276*0fbbaa43SLionel Sambuc }
277*0fbbaa43SLionel Sambuc
278*0fbbaa43SLionel Sambuc /* Swap in new buffer, saving old */
279*0fbbaa43SLionel Sambuc tmp = cfile->rec;
280*0fbbaa43SLionel Sambuc cfile->rec = new_rec;
281*0fbbaa43SLionel Sambuc new_rec = tmp;
282*0fbbaa43SLionel Sambuc tmp = cfile->end;
283*0fbbaa43SLionel Sambuc cfile->end = new_end;
284*0fbbaa43SLionel Sambuc new_end = tmp;
285*0fbbaa43SLionel Sambuc
286*0fbbaa43SLionel Sambuc /* Add into sort, removing the original first entry */
287*0fbbaa43SLionel Sambuc c = insert(flist, cfile, nfiles, DELETE);
288*0fbbaa43SLionel Sambuc if (c != 0 || (UNIQUE && cfile == flist[0]
289*0fbbaa43SLionel Sambuc && cmp(new_rec, cfile->rec) == 0)) {
290*0fbbaa43SLionel Sambuc /* Was an unwanted duplicate, restore buffer */
291*0fbbaa43SLionel Sambuc tmp = cfile->rec;
292*0fbbaa43SLionel Sambuc cfile->rec = new_rec;
293*0fbbaa43SLionel Sambuc new_rec = tmp;
294*0fbbaa43SLionel Sambuc tmp = cfile->end;
295*0fbbaa43SLionel Sambuc cfile->end = new_end;
296*0fbbaa43SLionel Sambuc new_end = tmp;
297*0fbbaa43SLionel Sambuc continue;
298*0fbbaa43SLionel Sambuc }
299*0fbbaa43SLionel Sambuc
300*0fbbaa43SLionel Sambuc /* Write out 'old' record */
301*0fbbaa43SLionel Sambuc put(new_rec, outfp);
302*0fbbaa43SLionel Sambuc }
303*0fbbaa43SLionel Sambuc
304*0fbbaa43SLionel Sambuc free(new_rec);
305*0fbbaa43SLionel Sambuc }
306*0fbbaa43SLionel Sambuc
307*0fbbaa43SLionel Sambuc /*
308*0fbbaa43SLionel Sambuc * if delete: inserts rec in flist, deletes flist[0];
309*0fbbaa43SLionel Sambuc * otherwise just inserts *rec in flist.
310*0fbbaa43SLionel Sambuc * Returns 1 if record is a duplicate to be ignored.
311*0fbbaa43SLionel Sambuc */
312*0fbbaa43SLionel Sambuc static int
insert(struct mfile ** flist,struct mfile * rec,int ttop,int delete)313*0fbbaa43SLionel Sambuc insert(struct mfile **flist, struct mfile *rec, int ttop, int delete)
314*0fbbaa43SLionel Sambuc {
315*0fbbaa43SLionel Sambuc int mid, top = ttop, bot = 0, cmpv = 1;
316*0fbbaa43SLionel Sambuc
317*0fbbaa43SLionel Sambuc for (mid = top / 2; bot + 1 != top; mid = (bot + top) / 2) {
318*0fbbaa43SLionel Sambuc cmpv = cmp(rec->rec, flist[mid]->rec);
319*0fbbaa43SLionel Sambuc if (cmpv == 0 ) {
320*0fbbaa43SLionel Sambuc if (UNIQUE)
321*0fbbaa43SLionel Sambuc /* Duplicate key, read another record */
322*0fbbaa43SLionel Sambuc /* NB: This doesn't guarantee to keep any
323*0fbbaa43SLionel Sambuc * particular record. */
324*0fbbaa43SLionel Sambuc return 1;
325*0fbbaa43SLionel Sambuc /*
326*0fbbaa43SLionel Sambuc * Apply sort by input file order.
327*0fbbaa43SLionel Sambuc * We could truncate the sort is the fileno are
328*0fbbaa43SLionel Sambuc * adjacent - but that is all too hard!
329*0fbbaa43SLionel Sambuc * The fileno cannot be equal, since we only have one
330*0fbbaa43SLionel Sambuc * record from each file (+ flist[0] which never
331*0fbbaa43SLionel Sambuc * comes here).
332*0fbbaa43SLionel Sambuc */
333*0fbbaa43SLionel Sambuc cmpv = rec < flist[mid] ? -1 : 1;
334*0fbbaa43SLionel Sambuc if (REVERSE)
335*0fbbaa43SLionel Sambuc cmpv = -cmpv;
336*0fbbaa43SLionel Sambuc }
337*0fbbaa43SLionel Sambuc if (cmpv < 0)
338*0fbbaa43SLionel Sambuc top = mid;
339*0fbbaa43SLionel Sambuc else
340*0fbbaa43SLionel Sambuc bot = mid;
341*0fbbaa43SLionel Sambuc }
342*0fbbaa43SLionel Sambuc
343*0fbbaa43SLionel Sambuc /* At this point we haven't yet compared against flist[0] */
344*0fbbaa43SLionel Sambuc
345*0fbbaa43SLionel Sambuc if (delete) {
346*0fbbaa43SLionel Sambuc /* flist[0] is ourselves, only the caller knows the old data */
347*0fbbaa43SLionel Sambuc if (bot != 0) {
348*0fbbaa43SLionel Sambuc memmove(flist, flist + 1, bot * sizeof(MFILE *));
349*0fbbaa43SLionel Sambuc flist[bot] = rec;
350*0fbbaa43SLionel Sambuc }
351*0fbbaa43SLionel Sambuc return 0;
352*0fbbaa43SLionel Sambuc }
353*0fbbaa43SLionel Sambuc
354*0fbbaa43SLionel Sambuc /* Inserting original set of records */
355*0fbbaa43SLionel Sambuc
356*0fbbaa43SLionel Sambuc if (bot == 0 && cmpv != 0) {
357*0fbbaa43SLionel Sambuc /* Doesn't match flist[1], must compare with flist[0] */
358*0fbbaa43SLionel Sambuc cmpv = cmp(rec->rec, flist[0]->rec);
359*0fbbaa43SLionel Sambuc if (cmpv == 0 && UNIQUE)
360*0fbbaa43SLionel Sambuc return 1;
361*0fbbaa43SLionel Sambuc /* Add matching keys in file order (ie new is later) */
362*0fbbaa43SLionel Sambuc if (cmpv < 0)
363*0fbbaa43SLionel Sambuc bot = -1;
364*0fbbaa43SLionel Sambuc }
365*0fbbaa43SLionel Sambuc bot++;
366*0fbbaa43SLionel Sambuc memmove(flist + bot + 1, flist + bot, (ttop - bot) * sizeof(MFILE *));
367*0fbbaa43SLionel Sambuc flist[bot] = rec;
368*0fbbaa43SLionel Sambuc return 0;
369*0fbbaa43SLionel Sambuc }
370*0fbbaa43SLionel Sambuc
371*0fbbaa43SLionel Sambuc /*
372*0fbbaa43SLionel Sambuc * check order on one file
373*0fbbaa43SLionel Sambuc */
374*0fbbaa43SLionel Sambuc void
order(struct filelist * filelist,struct field * ftbl)375*0fbbaa43SLionel Sambuc order(struct filelist *filelist, struct field *ftbl)
376*0fbbaa43SLionel Sambuc {
377*0fbbaa43SLionel Sambuc get_func_t get = SINGL_FLD ? makeline : makekey;
378*0fbbaa43SLionel Sambuc RECHEADER *crec, *prec, *trec;
379*0fbbaa43SLionel Sambuc u_char *crec_end, *prec_end, *trec_end;
380*0fbbaa43SLionel Sambuc FILE *fp;
381*0fbbaa43SLionel Sambuc int c;
382*0fbbaa43SLionel Sambuc
383*0fbbaa43SLionel Sambuc #if defined(__minix)
384*0fbbaa43SLionel Sambuc if (!strcmp(filelist->names[0], _PATH_STDIN))
385*0fbbaa43SLionel Sambuc fp = stdin;
386*0fbbaa43SLionel Sambuc else
387*0fbbaa43SLionel Sambuc #endif /* defined(__minix) */
388*0fbbaa43SLionel Sambuc fp = fopen(filelist->names[0], "r");
389*0fbbaa43SLionel Sambuc if (fp == NULL)
390*0fbbaa43SLionel Sambuc err(2, "%s", filelist->names[0]);
391*0fbbaa43SLionel Sambuc
392*0fbbaa43SLionel Sambuc crec = malloc(offsetof(RECHEADER, data[DEFLLEN]));
393*0fbbaa43SLionel Sambuc crec_end = crec->data + DEFLLEN;
394*0fbbaa43SLionel Sambuc prec = malloc(offsetof(RECHEADER, data[DEFLLEN]));
395*0fbbaa43SLionel Sambuc prec_end = prec->data + DEFLLEN;
396*0fbbaa43SLionel Sambuc
397*0fbbaa43SLionel Sambuc /* XXX this does exit(0) for overlong lines */
398*0fbbaa43SLionel Sambuc if (get(fp, prec, prec_end, ftbl) != 0)
399*0fbbaa43SLionel Sambuc exit(0);
400*0fbbaa43SLionel Sambuc while (get(fp, crec, crec_end, ftbl) == 0) {
401*0fbbaa43SLionel Sambuc if (0 < (c = cmp(prec, crec))) {
402*0fbbaa43SLionel Sambuc crec->data[crec->length-1] = 0;
403*0fbbaa43SLionel Sambuc errx(1, "found disorder: %s", crec->data+crec->offset);
404*0fbbaa43SLionel Sambuc }
405*0fbbaa43SLionel Sambuc if (UNIQUE && !c) {
406*0fbbaa43SLionel Sambuc crec->data[crec->length-1] = 0;
407*0fbbaa43SLionel Sambuc errx(1, "found non-uniqueness: %s",
408*0fbbaa43SLionel Sambuc crec->data+crec->offset);
409*0fbbaa43SLionel Sambuc }
410*0fbbaa43SLionel Sambuc /*
411*0fbbaa43SLionel Sambuc * Swap pointers so that this record is on place pointed
412*0fbbaa43SLionel Sambuc * to by prec and new record is read to place pointed to by
413*0fbbaa43SLionel Sambuc * crec.
414*0fbbaa43SLionel Sambuc */
415*0fbbaa43SLionel Sambuc trec = prec;
416*0fbbaa43SLionel Sambuc prec = crec;
417*0fbbaa43SLionel Sambuc crec = trec;
418*0fbbaa43SLionel Sambuc trec_end = prec_end;
419*0fbbaa43SLionel Sambuc prec_end = crec_end;
420*0fbbaa43SLionel Sambuc crec_end = trec_end;
421*0fbbaa43SLionel Sambuc }
422*0fbbaa43SLionel Sambuc exit(0);
423*0fbbaa43SLionel Sambuc }
424*0fbbaa43SLionel Sambuc
425*0fbbaa43SLionel Sambuc static int
cmp(RECHEADER * rec1,RECHEADER * rec2)426*0fbbaa43SLionel Sambuc cmp(RECHEADER *rec1, RECHEADER *rec2)
427*0fbbaa43SLionel Sambuc {
428*0fbbaa43SLionel Sambuc int len;
429*0fbbaa43SLionel Sambuc int r;
430*0fbbaa43SLionel Sambuc
431*0fbbaa43SLionel Sambuc /* key is weights */
432*0fbbaa43SLionel Sambuc len = min(rec1->keylen, rec2->keylen);
433*0fbbaa43SLionel Sambuc r = memcmp(rec1->data, rec2->data, len);
434*0fbbaa43SLionel Sambuc if (r == 0)
435*0fbbaa43SLionel Sambuc r = rec1->keylen - rec2->keylen;
436*0fbbaa43SLionel Sambuc if (REVERSE)
437*0fbbaa43SLionel Sambuc r = -r;
438*0fbbaa43SLionel Sambuc return r;
439*0fbbaa43SLionel Sambuc }
440