xref: /minix3/usr.bin/sort/msort.c (revision 0fbbaa43e914d38ef3af549125d31574117d1ebf)
1*0fbbaa43SLionel Sambuc /*	$NetBSD: msort.c,v 1.30 2010/02/05 21:58:42 enami Exp $	*/
2*0fbbaa43SLionel Sambuc 
3*0fbbaa43SLionel Sambuc /*-
4*0fbbaa43SLionel Sambuc  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5*0fbbaa43SLionel Sambuc  * All rights reserved.
6*0fbbaa43SLionel Sambuc  *
7*0fbbaa43SLionel Sambuc  * This code is derived from software contributed to The NetBSD Foundation
8*0fbbaa43SLionel Sambuc  * by Ben Harris and Jaromir Dolecek.
9*0fbbaa43SLionel Sambuc  *
10*0fbbaa43SLionel Sambuc  * Redistribution and use in source and binary forms, with or without
11*0fbbaa43SLionel Sambuc  * modification, are permitted provided that the following conditions
12*0fbbaa43SLionel Sambuc  * are met:
13*0fbbaa43SLionel Sambuc  * 1. Redistributions of source code must retain the above copyright
14*0fbbaa43SLionel Sambuc  *    notice, this list of conditions and the following disclaimer.
15*0fbbaa43SLionel Sambuc  * 2. Redistributions in binary form must reproduce the above copyright
16*0fbbaa43SLionel Sambuc  *    notice, this list of conditions and the following disclaimer in the
17*0fbbaa43SLionel Sambuc  *    documentation and/or other materials provided with the distribution.
18*0fbbaa43SLionel Sambuc  *
19*0fbbaa43SLionel Sambuc  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20*0fbbaa43SLionel Sambuc  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21*0fbbaa43SLionel Sambuc  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22*0fbbaa43SLionel Sambuc  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23*0fbbaa43SLionel Sambuc  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24*0fbbaa43SLionel Sambuc  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25*0fbbaa43SLionel Sambuc  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26*0fbbaa43SLionel Sambuc  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27*0fbbaa43SLionel Sambuc  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28*0fbbaa43SLionel Sambuc  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29*0fbbaa43SLionel Sambuc  * POSSIBILITY OF SUCH DAMAGE.
30*0fbbaa43SLionel Sambuc  */
31*0fbbaa43SLionel Sambuc 
32*0fbbaa43SLionel Sambuc /*-
33*0fbbaa43SLionel Sambuc  * Copyright (c) 1993
34*0fbbaa43SLionel Sambuc  *	The Regents of the University of California.  All rights reserved.
35*0fbbaa43SLionel Sambuc  *
36*0fbbaa43SLionel Sambuc  * This code is derived from software contributed to Berkeley by
37*0fbbaa43SLionel Sambuc  * Peter McIlroy.
38*0fbbaa43SLionel Sambuc  *
39*0fbbaa43SLionel Sambuc  * Redistribution and use in source and binary forms, with or without
40*0fbbaa43SLionel Sambuc  * modification, are permitted provided that the following conditions
41*0fbbaa43SLionel Sambuc  * are met:
42*0fbbaa43SLionel Sambuc  * 1. Redistributions of source code must retain the above copyright
43*0fbbaa43SLionel Sambuc  *    notice, this list of conditions and the following disclaimer.
44*0fbbaa43SLionel Sambuc  * 2. Redistributions in binary form must reproduce the above copyright
45*0fbbaa43SLionel Sambuc  *    notice, this list of conditions and the following disclaimer in the
46*0fbbaa43SLionel Sambuc  *    documentation and/or other materials provided with the distribution.
47*0fbbaa43SLionel Sambuc  * 3. Neither the name of the University nor the names of its contributors
48*0fbbaa43SLionel Sambuc  *    may be used to endorse or promote products derived from this software
49*0fbbaa43SLionel Sambuc  *    without specific prior written permission.
50*0fbbaa43SLionel Sambuc  *
51*0fbbaa43SLionel Sambuc  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52*0fbbaa43SLionel Sambuc  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53*0fbbaa43SLionel Sambuc  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54*0fbbaa43SLionel Sambuc  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55*0fbbaa43SLionel Sambuc  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56*0fbbaa43SLionel Sambuc  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57*0fbbaa43SLionel Sambuc  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58*0fbbaa43SLionel Sambuc  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59*0fbbaa43SLionel Sambuc  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60*0fbbaa43SLionel Sambuc  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61*0fbbaa43SLionel Sambuc  * SUCH DAMAGE.
62*0fbbaa43SLionel Sambuc  */
63*0fbbaa43SLionel Sambuc 
64*0fbbaa43SLionel Sambuc #include "sort.h"
65*0fbbaa43SLionel Sambuc #include "fsort.h"
66*0fbbaa43SLionel Sambuc 
67*0fbbaa43SLionel Sambuc __RCSID("$NetBSD: msort.c,v 1.30 2010/02/05 21:58:42 enami Exp $");
68*0fbbaa43SLionel Sambuc 
69*0fbbaa43SLionel Sambuc #include <stdlib.h>
70*0fbbaa43SLionel Sambuc #include <string.h>
71*0fbbaa43SLionel Sambuc #include <unistd.h>
72*0fbbaa43SLionel Sambuc #include <util.h>
73*0fbbaa43SLionel Sambuc 
74*0fbbaa43SLionel Sambuc /* Subroutines using comparisons: merge sort and check order */
75*0fbbaa43SLionel Sambuc #define DELETE (1)
76*0fbbaa43SLionel Sambuc 
77*0fbbaa43SLionel Sambuc typedef struct mfile {
78*0fbbaa43SLionel Sambuc 	FILE         *fp;
79*0fbbaa43SLionel Sambuc 	get_func_t   get;
80*0fbbaa43SLionel Sambuc 	RECHEADER    *rec;
81*0fbbaa43SLionel Sambuc 	u_char       *end;
82*0fbbaa43SLionel Sambuc } MFILE;
83*0fbbaa43SLionel Sambuc 
84*0fbbaa43SLionel Sambuc static int cmp(RECHEADER *, RECHEADER *);
85*0fbbaa43SLionel Sambuc static int insert(struct mfile **, struct mfile *, int, int);
86*0fbbaa43SLionel Sambuc static void merge_sort_fstack(FILE *, put_func_t, struct field *);
87*0fbbaa43SLionel Sambuc 
88*0fbbaa43SLionel Sambuc /*
89*0fbbaa43SLionel Sambuc  * Number of files merge() can merge in one pass.
90*0fbbaa43SLionel Sambuc  */
91*0fbbaa43SLionel Sambuc #define MERGE_FNUM      16
92*0fbbaa43SLionel Sambuc 
93*0fbbaa43SLionel Sambuc static struct mfile fstack[MERGE_FNUM];
94*0fbbaa43SLionel Sambuc static struct mfile fstack_1[MERGE_FNUM];
95*0fbbaa43SLionel Sambuc static struct mfile fstack_2[MERGE_FNUM];
96*0fbbaa43SLionel Sambuc static int fstack_count, fstack_1_count, fstack_2_count;
97*0fbbaa43SLionel Sambuc 
98*0fbbaa43SLionel Sambuc void
save_for_merge(FILE * fp,get_func_t get,struct field * ftbl)99*0fbbaa43SLionel Sambuc save_for_merge(FILE *fp, get_func_t get, struct field *ftbl)
100*0fbbaa43SLionel Sambuc {
101*0fbbaa43SLionel Sambuc 	FILE *mfp, *mfp1, *mfp2;
102*0fbbaa43SLionel Sambuc 
103*0fbbaa43SLionel Sambuc 	if (fstack_count == MERGE_FNUM) {
104*0fbbaa43SLionel Sambuc 		/* Must reduce the number of temporary files */
105*0fbbaa43SLionel Sambuc 		mfp = ftmp();
106*0fbbaa43SLionel Sambuc 		merge_sort_fstack(mfp, putrec, ftbl);
107*0fbbaa43SLionel Sambuc 		/* Save output in next layer */
108*0fbbaa43SLionel Sambuc 		if (fstack_1_count == MERGE_FNUM) {
109*0fbbaa43SLionel Sambuc 			mfp1 = ftmp();
110*0fbbaa43SLionel Sambuc 			memcpy(fstack, fstack_1, sizeof fstack);
111*0fbbaa43SLionel Sambuc 			merge_sort_fstack(mfp1, putrec, ftbl);
112*0fbbaa43SLionel Sambuc 			if (fstack_2_count == MERGE_FNUM) {
113*0fbbaa43SLionel Sambuc 				/* More than 4096 files! */
114*0fbbaa43SLionel Sambuc 				mfp2 = ftmp();
115*0fbbaa43SLionel Sambuc 				memcpy(fstack, fstack_2, sizeof fstack);
116*0fbbaa43SLionel Sambuc 				merge_sort_fstack(mfp2, putrec, ftbl);
117*0fbbaa43SLionel Sambuc 				fstack_2[0].fp = mfp2;
118*0fbbaa43SLionel Sambuc 				fstack_2_count = 1;
119*0fbbaa43SLionel Sambuc 			}
120*0fbbaa43SLionel Sambuc 			fstack_2[fstack_2_count].fp = mfp1;
121*0fbbaa43SLionel Sambuc 			fstack_2[fstack_2_count].get = geteasy;
122*0fbbaa43SLionel Sambuc 			fstack_2_count++;
123*0fbbaa43SLionel Sambuc 			fstack_1_count = 0;
124*0fbbaa43SLionel Sambuc 		}
125*0fbbaa43SLionel Sambuc 		fstack_1[fstack_1_count].fp = mfp;
126*0fbbaa43SLionel Sambuc 		fstack_1[fstack_1_count].get = geteasy;
127*0fbbaa43SLionel Sambuc 		fstack_1_count++;
128*0fbbaa43SLionel Sambuc 		fstack_count = 0;
129*0fbbaa43SLionel Sambuc 	}
130*0fbbaa43SLionel Sambuc 
131*0fbbaa43SLionel Sambuc 	fstack[fstack_count].fp = fp;
132*0fbbaa43SLionel Sambuc 	fstack[fstack_count++].get = get;
133*0fbbaa43SLionel Sambuc }
134*0fbbaa43SLionel Sambuc 
135*0fbbaa43SLionel Sambuc void
fmerge(struct filelist * filelist,int nfiles,FILE * outfp,struct field * ftbl)136*0fbbaa43SLionel Sambuc fmerge(struct filelist *filelist, int nfiles, FILE *outfp, struct field *ftbl)
137*0fbbaa43SLionel Sambuc {
138*0fbbaa43SLionel Sambuc 	get_func_t get = SINGL_FLD ? makeline : makekey;
139*0fbbaa43SLionel Sambuc 	FILE *fp;
140*0fbbaa43SLionel Sambuc 	int i;
141*0fbbaa43SLionel Sambuc 
142*0fbbaa43SLionel Sambuc 	for (i = 0; i < nfiles; i++) {
143*0fbbaa43SLionel Sambuc #if defined(__minix)
144*0fbbaa43SLionel Sambuc 	/* LSC FIXME: Not very pretty, but reduce the diff */
145*0fbbaa43SLionel Sambuc #include "pathnames.h"
146*0fbbaa43SLionel Sambuc 		if (!strcmp(filelist->names[0], _PATH_STDIN))
147*0fbbaa43SLionel Sambuc 			fp = stdin;
148*0fbbaa43SLionel Sambuc 		else
149*0fbbaa43SLionel Sambuc #endif /* defined(__minix) */
150*0fbbaa43SLionel Sambuc 		fp = fopen(filelist->names[i], "r");
151*0fbbaa43SLionel Sambuc 		if (fp == NULL)
152*0fbbaa43SLionel Sambuc 			err(2, "%s", filelist->names[i]);
153*0fbbaa43SLionel Sambuc 		save_for_merge(fp, get, ftbl);
154*0fbbaa43SLionel Sambuc 	}
155*0fbbaa43SLionel Sambuc 
156*0fbbaa43SLionel Sambuc 	merge_sort(outfp, putline, ftbl);
157*0fbbaa43SLionel Sambuc }
158*0fbbaa43SLionel Sambuc 
159*0fbbaa43SLionel Sambuc void
merge_sort(FILE * outfp,put_func_t put,struct field * ftbl)160*0fbbaa43SLionel Sambuc merge_sort(FILE *outfp, put_func_t put, struct field *ftbl)
161*0fbbaa43SLionel Sambuc {
162*0fbbaa43SLionel Sambuc 	int count = fstack_1_count + fstack_2_count;
163*0fbbaa43SLionel Sambuc 	FILE *mfp;
164*0fbbaa43SLionel Sambuc 	int i;
165*0fbbaa43SLionel Sambuc 
166*0fbbaa43SLionel Sambuc 	if (count == 0) {
167*0fbbaa43SLionel Sambuc 		/* All files in initial array */
168*0fbbaa43SLionel Sambuc 		merge_sort_fstack(outfp, put, ftbl);
169*0fbbaa43SLionel Sambuc 		return;
170*0fbbaa43SLionel Sambuc 	}
171*0fbbaa43SLionel Sambuc 
172*0fbbaa43SLionel Sambuc 	count += fstack_count;
173*0fbbaa43SLionel Sambuc 
174*0fbbaa43SLionel Sambuc 	/* Too many files for one merge sort */
175*0fbbaa43SLionel Sambuc 	for (;;) {
176*0fbbaa43SLionel Sambuc 		/* Sort latest 16 files */
177*0fbbaa43SLionel Sambuc 		i = count;
178*0fbbaa43SLionel Sambuc 		if (i > MERGE_FNUM)
179*0fbbaa43SLionel Sambuc 			i = MERGE_FNUM;
180*0fbbaa43SLionel Sambuc 		while (fstack_count > 0)
181*0fbbaa43SLionel Sambuc 			fstack[--i] = fstack[--fstack_count];
182*0fbbaa43SLionel Sambuc 		while (i > 0 && fstack_1_count > 0)
183*0fbbaa43SLionel Sambuc 			fstack[--i] = fstack_1[--fstack_1_count];
184*0fbbaa43SLionel Sambuc 		while (i > 0)
185*0fbbaa43SLionel Sambuc 			fstack[--i] = fstack_2[--fstack_2_count];
186*0fbbaa43SLionel Sambuc 		if (count <= MERGE_FNUM) {
187*0fbbaa43SLionel Sambuc 			/* Got all the data */
188*0fbbaa43SLionel Sambuc 			fstack_count = count;
189*0fbbaa43SLionel Sambuc 			merge_sort_fstack(outfp, put, ftbl);
190*0fbbaa43SLionel Sambuc 			return;
191*0fbbaa43SLionel Sambuc 		}
192*0fbbaa43SLionel Sambuc 		mfp = ftmp();
193*0fbbaa43SLionel Sambuc 		fstack_count = count > MERGE_FNUM ? MERGE_FNUM : count;
194*0fbbaa43SLionel Sambuc 		merge_sort_fstack(mfp, putrec, ftbl);
195*0fbbaa43SLionel Sambuc 		fstack[0].fp = mfp;
196*0fbbaa43SLionel Sambuc 		fstack[0].get = geteasy;
197*0fbbaa43SLionel Sambuc 		fstack_count = 1;
198*0fbbaa43SLionel Sambuc 		count -= MERGE_FNUM - 1;
199*0fbbaa43SLionel Sambuc 	}
200*0fbbaa43SLionel Sambuc }
201*0fbbaa43SLionel Sambuc 
202*0fbbaa43SLionel Sambuc static void
merge_sort_fstack(FILE * outfp,put_func_t put,struct field * ftbl)203*0fbbaa43SLionel Sambuc merge_sort_fstack(FILE *outfp, put_func_t put, struct field *ftbl)
204*0fbbaa43SLionel Sambuc {
205*0fbbaa43SLionel Sambuc 	struct mfile *flistb[MERGE_FNUM], **flist = flistb, *cfile;
206*0fbbaa43SLionel Sambuc 	RECHEADER *new_rec;
207*0fbbaa43SLionel Sambuc 	u_char *new_end;
208*0fbbaa43SLionel Sambuc 	void *tmp;
209*0fbbaa43SLionel Sambuc 	int c, i, nfiles;
210*0fbbaa43SLionel Sambuc 	size_t sz;
211*0fbbaa43SLionel Sambuc 
212*0fbbaa43SLionel Sambuc 	/* Read one record from each file (read again if a duplicate) */
213*0fbbaa43SLionel Sambuc 	for (nfiles = i = 0; i < fstack_count; i++) {
214*0fbbaa43SLionel Sambuc 		cfile = &fstack[i];
215*0fbbaa43SLionel Sambuc 		if (cfile->rec == NULL) {
216*0fbbaa43SLionel Sambuc 			cfile->rec = allocrec(NULL, DEFLLEN);
217*0fbbaa43SLionel Sambuc 			cfile->end = (u_char *)cfile->rec + DEFLLEN;
218*0fbbaa43SLionel Sambuc 		}
219*0fbbaa43SLionel Sambuc 		rewind(cfile->fp);
220*0fbbaa43SLionel Sambuc 
221*0fbbaa43SLionel Sambuc 		for (;;) {
222*0fbbaa43SLionel Sambuc 			c = cfile->get(cfile->fp, cfile->rec, cfile->end, ftbl);
223*0fbbaa43SLionel Sambuc 			if (c == EOF)
224*0fbbaa43SLionel Sambuc 				break;
225*0fbbaa43SLionel Sambuc 
226*0fbbaa43SLionel Sambuc 			if (c == BUFFEND) {
227*0fbbaa43SLionel Sambuc 				/* Double buffer size */
228*0fbbaa43SLionel Sambuc 				sz = (cfile->end - (u_char *)cfile->rec) * 2;
229*0fbbaa43SLionel Sambuc 				cfile->rec = allocrec(cfile->rec, sz);
230*0fbbaa43SLionel Sambuc 				cfile->end = (u_char *)cfile->rec + sz;
231*0fbbaa43SLionel Sambuc 				continue;
232*0fbbaa43SLionel Sambuc 			}
233*0fbbaa43SLionel Sambuc 
234*0fbbaa43SLionel Sambuc 			if (nfiles != 0) {
235*0fbbaa43SLionel Sambuc 				if (insert(flist, cfile, nfiles, !DELETE))
236*0fbbaa43SLionel Sambuc 					/* Duplicate removed */
237*0fbbaa43SLionel Sambuc 					continue;
238*0fbbaa43SLionel Sambuc 			} else
239*0fbbaa43SLionel Sambuc 				flist[0] = cfile;
240*0fbbaa43SLionel Sambuc 			nfiles++;
241*0fbbaa43SLionel Sambuc 			break;
242*0fbbaa43SLionel Sambuc 		}
243*0fbbaa43SLionel Sambuc 	}
244*0fbbaa43SLionel Sambuc 
245*0fbbaa43SLionel Sambuc 	if (nfiles == 0)
246*0fbbaa43SLionel Sambuc 		return;
247*0fbbaa43SLionel Sambuc 
248*0fbbaa43SLionel Sambuc 	/*
249*0fbbaa43SLionel Sambuc 	 * We now loop reading a new record from the file with the
250*0fbbaa43SLionel Sambuc 	 * 'sorted first' existing record.
251*0fbbaa43SLionel Sambuc 	 * As each record is added, the 'first' record is written to the
252*0fbbaa43SLionel Sambuc 	 * output file - maintaining one record from each file in the sorted
253*0fbbaa43SLionel Sambuc 	 * list.
254*0fbbaa43SLionel Sambuc 	 */
255*0fbbaa43SLionel Sambuc 	new_rec = allocrec(NULL, DEFLLEN);
256*0fbbaa43SLionel Sambuc 	new_end = (u_char *)new_rec + DEFLLEN;
257*0fbbaa43SLionel Sambuc 	for (;;) {
258*0fbbaa43SLionel Sambuc 		cfile = flist[0];
259*0fbbaa43SLionel Sambuc 		c = cfile->get(cfile->fp, new_rec, new_end, ftbl);
260*0fbbaa43SLionel Sambuc 		if (c == EOF) {
261*0fbbaa43SLionel Sambuc 			/* Write out last record from now-empty input */
262*0fbbaa43SLionel Sambuc 			put(cfile->rec, outfp);
263*0fbbaa43SLionel Sambuc 			if (--nfiles == 0)
264*0fbbaa43SLionel Sambuc 				break;
265*0fbbaa43SLionel Sambuc 			/* Replace from file with now-first sorted record. */
266*0fbbaa43SLionel Sambuc 			/* (Moving base 'flist' saves copying everything!) */
267*0fbbaa43SLionel Sambuc 			flist++;
268*0fbbaa43SLionel Sambuc 			continue;
269*0fbbaa43SLionel Sambuc 		}
270*0fbbaa43SLionel Sambuc 		if (c == BUFFEND) {
271*0fbbaa43SLionel Sambuc 			/* Buffer not large enough - double in size */
272*0fbbaa43SLionel Sambuc 			sz = (new_end - (u_char *)new_rec) * 2;
273*0fbbaa43SLionel Sambuc 			new_rec = allocrec(new_rec, sz);
274*0fbbaa43SLionel Sambuc 			new_end = (u_char *)new_rec +sz;
275*0fbbaa43SLionel Sambuc 			continue;
276*0fbbaa43SLionel Sambuc 		}
277*0fbbaa43SLionel Sambuc 
278*0fbbaa43SLionel Sambuc 		/* Swap in new buffer, saving old */
279*0fbbaa43SLionel Sambuc 		tmp = cfile->rec;
280*0fbbaa43SLionel Sambuc 		cfile->rec = new_rec;
281*0fbbaa43SLionel Sambuc 		new_rec = tmp;
282*0fbbaa43SLionel Sambuc 		tmp = cfile->end;
283*0fbbaa43SLionel Sambuc 		cfile->end = new_end;
284*0fbbaa43SLionel Sambuc 		new_end = tmp;
285*0fbbaa43SLionel Sambuc 
286*0fbbaa43SLionel Sambuc 		/* Add into sort, removing the original first entry */
287*0fbbaa43SLionel Sambuc 		c = insert(flist, cfile, nfiles, DELETE);
288*0fbbaa43SLionel Sambuc 		if (c != 0 || (UNIQUE && cfile == flist[0]
289*0fbbaa43SLionel Sambuc 			    && cmp(new_rec, cfile->rec) == 0)) {
290*0fbbaa43SLionel Sambuc 			/* Was an unwanted duplicate, restore buffer */
291*0fbbaa43SLionel Sambuc 			tmp = cfile->rec;
292*0fbbaa43SLionel Sambuc 			cfile->rec = new_rec;
293*0fbbaa43SLionel Sambuc 			new_rec = tmp;
294*0fbbaa43SLionel Sambuc 			tmp = cfile->end;
295*0fbbaa43SLionel Sambuc 			cfile->end = new_end;
296*0fbbaa43SLionel Sambuc 			new_end = tmp;
297*0fbbaa43SLionel Sambuc 			continue;
298*0fbbaa43SLionel Sambuc 		}
299*0fbbaa43SLionel Sambuc 
300*0fbbaa43SLionel Sambuc 		/* Write out 'old' record */
301*0fbbaa43SLionel Sambuc 		put(new_rec, outfp);
302*0fbbaa43SLionel Sambuc 	}
303*0fbbaa43SLionel Sambuc 
304*0fbbaa43SLionel Sambuc 	free(new_rec);
305*0fbbaa43SLionel Sambuc }
306*0fbbaa43SLionel Sambuc 
307*0fbbaa43SLionel Sambuc /*
308*0fbbaa43SLionel Sambuc  * if delete: inserts rec in flist, deletes flist[0];
309*0fbbaa43SLionel Sambuc  * otherwise just inserts *rec in flist.
310*0fbbaa43SLionel Sambuc  * Returns 1 if record is a duplicate to be ignored.
311*0fbbaa43SLionel Sambuc  */
312*0fbbaa43SLionel Sambuc static int
insert(struct mfile ** flist,struct mfile * rec,int ttop,int delete)313*0fbbaa43SLionel Sambuc insert(struct mfile **flist, struct mfile *rec, int ttop, int delete)
314*0fbbaa43SLionel Sambuc {
315*0fbbaa43SLionel Sambuc 	int mid, top = ttop, bot = 0, cmpv = 1;
316*0fbbaa43SLionel Sambuc 
317*0fbbaa43SLionel Sambuc 	for (mid = top / 2; bot + 1 != top; mid = (bot + top) / 2) {
318*0fbbaa43SLionel Sambuc 		cmpv = cmp(rec->rec, flist[mid]->rec);
319*0fbbaa43SLionel Sambuc 		if (cmpv == 0 ) {
320*0fbbaa43SLionel Sambuc 			if (UNIQUE)
321*0fbbaa43SLionel Sambuc 				/* Duplicate key, read another record */
322*0fbbaa43SLionel Sambuc 				/* NB: This doesn't guarantee to keep any
323*0fbbaa43SLionel Sambuc 				 * particular record. */
324*0fbbaa43SLionel Sambuc 				return 1;
325*0fbbaa43SLionel Sambuc 			/*
326*0fbbaa43SLionel Sambuc 			 * Apply sort by input file order.
327*0fbbaa43SLionel Sambuc 			 * We could truncate the sort is the fileno are
328*0fbbaa43SLionel Sambuc 			 * adjacent - but that is all too hard!
329*0fbbaa43SLionel Sambuc 			 * The fileno cannot be equal, since we only have one
330*0fbbaa43SLionel Sambuc 			 * record from each file (+ flist[0] which never
331*0fbbaa43SLionel Sambuc 			 * comes here).
332*0fbbaa43SLionel Sambuc 			 */
333*0fbbaa43SLionel Sambuc 			cmpv = rec < flist[mid] ? -1 : 1;
334*0fbbaa43SLionel Sambuc 			if (REVERSE)
335*0fbbaa43SLionel Sambuc 				cmpv = -cmpv;
336*0fbbaa43SLionel Sambuc 		}
337*0fbbaa43SLionel Sambuc 		if (cmpv < 0)
338*0fbbaa43SLionel Sambuc 			top = mid;
339*0fbbaa43SLionel Sambuc 		else
340*0fbbaa43SLionel Sambuc 			bot = mid;
341*0fbbaa43SLionel Sambuc 	}
342*0fbbaa43SLionel Sambuc 
343*0fbbaa43SLionel Sambuc 	/* At this point we haven't yet compared against flist[0] */
344*0fbbaa43SLionel Sambuc 
345*0fbbaa43SLionel Sambuc 	if (delete) {
346*0fbbaa43SLionel Sambuc 		/* flist[0] is ourselves, only the caller knows the old data */
347*0fbbaa43SLionel Sambuc 		if (bot != 0) {
348*0fbbaa43SLionel Sambuc 			memmove(flist, flist + 1, bot * sizeof(MFILE *));
349*0fbbaa43SLionel Sambuc 			flist[bot] = rec;
350*0fbbaa43SLionel Sambuc 		}
351*0fbbaa43SLionel Sambuc 		return 0;
352*0fbbaa43SLionel Sambuc 	}
353*0fbbaa43SLionel Sambuc 
354*0fbbaa43SLionel Sambuc 	/* Inserting original set of records */
355*0fbbaa43SLionel Sambuc 
356*0fbbaa43SLionel Sambuc 	if (bot == 0 && cmpv != 0) {
357*0fbbaa43SLionel Sambuc 		/* Doesn't match flist[1], must compare with flist[0] */
358*0fbbaa43SLionel Sambuc 		cmpv = cmp(rec->rec, flist[0]->rec);
359*0fbbaa43SLionel Sambuc 		if (cmpv == 0 && UNIQUE)
360*0fbbaa43SLionel Sambuc 			return 1;
361*0fbbaa43SLionel Sambuc 		/* Add matching keys in file order (ie new is later) */
362*0fbbaa43SLionel Sambuc 		if (cmpv < 0)
363*0fbbaa43SLionel Sambuc 			bot = -1;
364*0fbbaa43SLionel Sambuc 	}
365*0fbbaa43SLionel Sambuc 	bot++;
366*0fbbaa43SLionel Sambuc 	memmove(flist + bot + 1, flist + bot, (ttop - bot) * sizeof(MFILE *));
367*0fbbaa43SLionel Sambuc 	flist[bot] = rec;
368*0fbbaa43SLionel Sambuc 	return 0;
369*0fbbaa43SLionel Sambuc }
370*0fbbaa43SLionel Sambuc 
371*0fbbaa43SLionel Sambuc /*
372*0fbbaa43SLionel Sambuc  * check order on one file
373*0fbbaa43SLionel Sambuc  */
374*0fbbaa43SLionel Sambuc void
order(struct filelist * filelist,struct field * ftbl)375*0fbbaa43SLionel Sambuc order(struct filelist *filelist, struct field *ftbl)
376*0fbbaa43SLionel Sambuc {
377*0fbbaa43SLionel Sambuc 	get_func_t get = SINGL_FLD ? makeline : makekey;
378*0fbbaa43SLionel Sambuc 	RECHEADER *crec, *prec, *trec;
379*0fbbaa43SLionel Sambuc 	u_char *crec_end, *prec_end, *trec_end;
380*0fbbaa43SLionel Sambuc 	FILE *fp;
381*0fbbaa43SLionel Sambuc 	int c;
382*0fbbaa43SLionel Sambuc 
383*0fbbaa43SLionel Sambuc #if defined(__minix)
384*0fbbaa43SLionel Sambuc 	if (!strcmp(filelist->names[0], _PATH_STDIN))
385*0fbbaa43SLionel Sambuc 		fp = stdin;
386*0fbbaa43SLionel Sambuc 	else
387*0fbbaa43SLionel Sambuc #endif /* defined(__minix) */
388*0fbbaa43SLionel Sambuc 	fp = fopen(filelist->names[0], "r");
389*0fbbaa43SLionel Sambuc 	if (fp == NULL)
390*0fbbaa43SLionel Sambuc 		err(2, "%s", filelist->names[0]);
391*0fbbaa43SLionel Sambuc 
392*0fbbaa43SLionel Sambuc 	crec = malloc(offsetof(RECHEADER, data[DEFLLEN]));
393*0fbbaa43SLionel Sambuc 	crec_end = crec->data + DEFLLEN;
394*0fbbaa43SLionel Sambuc 	prec = malloc(offsetof(RECHEADER, data[DEFLLEN]));
395*0fbbaa43SLionel Sambuc 	prec_end = prec->data + DEFLLEN;
396*0fbbaa43SLionel Sambuc 
397*0fbbaa43SLionel Sambuc 	/* XXX this does exit(0) for overlong lines */
398*0fbbaa43SLionel Sambuc 	if (get(fp, prec, prec_end, ftbl) != 0)
399*0fbbaa43SLionel Sambuc 		exit(0);
400*0fbbaa43SLionel Sambuc 	while (get(fp, crec, crec_end, ftbl) == 0) {
401*0fbbaa43SLionel Sambuc 		if (0 < (c = cmp(prec, crec))) {
402*0fbbaa43SLionel Sambuc 			crec->data[crec->length-1] = 0;
403*0fbbaa43SLionel Sambuc 			errx(1, "found disorder: %s", crec->data+crec->offset);
404*0fbbaa43SLionel Sambuc 		}
405*0fbbaa43SLionel Sambuc 		if (UNIQUE && !c) {
406*0fbbaa43SLionel Sambuc 			crec->data[crec->length-1] = 0;
407*0fbbaa43SLionel Sambuc 			errx(1, "found non-uniqueness: %s",
408*0fbbaa43SLionel Sambuc 			    crec->data+crec->offset);
409*0fbbaa43SLionel Sambuc 		}
410*0fbbaa43SLionel Sambuc 		/*
411*0fbbaa43SLionel Sambuc 		 * Swap pointers so that this record is on place pointed
412*0fbbaa43SLionel Sambuc 		 * to by prec and new record is read to place pointed to by
413*0fbbaa43SLionel Sambuc 		 * crec.
414*0fbbaa43SLionel Sambuc 		 */
415*0fbbaa43SLionel Sambuc 		trec = prec;
416*0fbbaa43SLionel Sambuc 		prec = crec;
417*0fbbaa43SLionel Sambuc 		crec = trec;
418*0fbbaa43SLionel Sambuc 		trec_end = prec_end;
419*0fbbaa43SLionel Sambuc 		prec_end = crec_end;
420*0fbbaa43SLionel Sambuc 		crec_end = trec_end;
421*0fbbaa43SLionel Sambuc 	}
422*0fbbaa43SLionel Sambuc 	exit(0);
423*0fbbaa43SLionel Sambuc }
424*0fbbaa43SLionel Sambuc 
425*0fbbaa43SLionel Sambuc static int
cmp(RECHEADER * rec1,RECHEADER * rec2)426*0fbbaa43SLionel Sambuc cmp(RECHEADER *rec1, RECHEADER *rec2)
427*0fbbaa43SLionel Sambuc {
428*0fbbaa43SLionel Sambuc 	int len;
429*0fbbaa43SLionel Sambuc 	int r;
430*0fbbaa43SLionel Sambuc 
431*0fbbaa43SLionel Sambuc 	/* key is weights */
432*0fbbaa43SLionel Sambuc 	len = min(rec1->keylen, rec2->keylen);
433*0fbbaa43SLionel Sambuc 	r = memcmp(rec1->data, rec2->data, len);
434*0fbbaa43SLionel Sambuc 	if (r == 0)
435*0fbbaa43SLionel Sambuc 		r = rec1->keylen - rec2->keylen;
436*0fbbaa43SLionel Sambuc 	if (REVERSE)
437*0fbbaa43SLionel Sambuc 		r = -r;
438*0fbbaa43SLionel Sambuc 	return r;
439*0fbbaa43SLionel Sambuc }
440