xref: /netbsd-src/usr.bin/sort/append.c (revision 404fbe5fb94ca1e054339640cabb2801ce52dd30)
1 /*	$NetBSD: append.c,v 1.14 2008/04/28 20:24:15 martin Exp $	*/
2 
3 /*-
4  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Ben Harris and Jaromir Dolecek.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*-
33  * Copyright (c) 1993
34  *	The Regents of the University of California.  All rights reserved.
35  *
36  * This code is derived from software contributed to Berkeley by
37  * Peter McIlroy.
38  *
39  * Redistribution and use in source and binary forms, with or without
40  * modification, are permitted provided that the following conditions
41  * are met:
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. Neither the name of the University nor the names of its contributors
48  *    may be used to endorse or promote products derived from this software
49  *    without specific prior written permission.
50  *
51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE.
62  */
63 
64 #include "sort.h"
65 
66 #ifndef lint
67 __RCSID("$NetBSD: append.c,v 1.14 2008/04/28 20:24:15 martin Exp $");
68 __SCCSID("@(#)append.c	8.1 (Berkeley) 6/6/93");
69 #endif /* not lint */
70 
71 #include <stdlib.h>
72 #include <string.h>
73 
74 #define OUTPUT {							\
75 	if ((n = cpos - ppos) > 1) {					\
76 		for (; ppos < cpos; ++ppos)				\
77 			*ppos -= odepth;				\
78 		ppos -= n;						\
79 		if (stable_sort)					\
80 			sradixsort(ppos, n, wts1, REC_D);		\
81 		else							\
82 			radixsort(ppos, n, wts1, REC_D);		\
83 		for (; ppos < cpos; ppos++) {				\
84 			prec = (const RECHEADER *) (*ppos - sizeof(TRECHEADER));\
85 			put(prec, fp);					\
86 		}							\
87 	} else put(prec, fp);						\
88 }
89 
90 /*
91  * copy sorted lines to output; check for uniqueness
92  */
93 void
94 append(keylist, nelem, depth, fp, put, ftbl)
95 	const u_char **keylist;
96 	int nelem;
97 	int depth;
98 	FILE *fp;
99 	put_func_t put;
100 	struct field *ftbl;
101 {
102 	u_char *wts, *wts1;
103 	int n, odepth = depth;
104 	const u_char **cpos, **ppos, **lastkey;
105 	const u_char *cend, *pend, *start;
106 	const struct recheader *crec, *prec;
107 
108 	if (*keylist == '\0' && UNIQUE)
109 		return;
110 	wts1 = wts = ftbl[0].weights;
111 	if ((!UNIQUE) && SINGL_FLD) {
112 		if ((ftbl[0].flags & F) && (ftbl[0].flags & R))
113 			wts1 = Rascii;
114 		else if (ftbl[0].flags & F)
115 			wts1 = ascii;
116 	}
117 	lastkey = keylist + nelem;
118 	depth += sizeof(TRECHEADER);
119 	if (SINGL_FLD && (UNIQUE || wts1 != wts)) {
120 		ppos = keylist;
121 		prec = (const RECHEADER *) (*ppos - depth);
122 		if (UNIQUE)
123 			put(prec, fp);
124 		for (cpos = &keylist[1]; cpos < lastkey; cpos++) {
125 			crec = (const RECHEADER *) (*cpos - depth);
126 			if (crec->length  == prec->length) {
127 				/*
128 				 * Set pend and cend so that trailing NUL and
129 				 * record separator is ignored.
130 				 */
131 				pend = (const u_char *) &prec->data + prec->length - 2;
132 				cend = (const u_char *) &crec->data + crec->length - 2;
133 				for (start = *cpos; cend >= start; cend--) {
134 					if (wts[*cend] != wts[*pend])
135 						break;
136 					pend--;
137 				}
138 				if (pend + 1 != *ppos) {
139 					if (!UNIQUE) {
140 						OUTPUT;
141 					} else
142 						put(crec, fp);
143 					ppos = cpos;
144 					prec = crec;
145 				}
146 			} else {
147 				if (!UNIQUE) {
148 					OUTPUT;
149 				} else
150 					put(crec, fp);
151 				ppos = cpos;
152 				prec = crec;
153 			}
154 		}
155 		if (!UNIQUE)  { OUTPUT; }
156 	} else if (UNIQUE) {
157 		ppos = keylist;
158 		prec = (const RECHEADER *) (*ppos - depth);
159 		put(prec, fp);
160 		for (cpos = &keylist[1]; cpos < lastkey; cpos++) {
161 			crec = (const RECHEADER *) (*cpos - depth);
162 			if (crec->offset == prec->offset) {
163 				/*
164 				 * Set pend and cend so that trailing NUL and
165 				 * record separator is ignored.
166 				 */
167 				pend = (const u_char *) &prec->data + prec->offset - 2;
168 				cend = (const u_char *) &crec->data + crec->offset - 2;
169 				for (start = *cpos; cend >= start; cend--) {
170 					if (wts[*cend] != wts[*pend])
171 						break;
172 					pend--;
173 				}
174 				if (pend + 1 != *ppos) {
175 					ppos = cpos;
176 					prec = crec;
177 					put(prec, fp);
178 				}
179 			} else {
180 				ppos = cpos;
181 				prec = crec;
182 				put(prec, fp);
183 			}
184 		}
185 	} else for (cpos = keylist; cpos < lastkey; cpos++) {
186 		crec = (const RECHEADER *) (*cpos - depth);
187 		put(crec, fp);
188 	}
189 }
190 
191 /*
192  * output the already sorted eol bin.
193  */
194 void
195 rd_append(binno, infl0, nfiles, outfp, buffer, bufend)
196 	u_char *buffer;
197 	int infl0;
198 	int binno, nfiles;
199 	FILE *outfp;
200 	u_char *bufend;
201 {
202 	RECHEADER *rec;
203 
204 	rec = (RECHEADER *) buffer;
205 	if (!getnext(binno, infl0, NULL, nfiles,
206 			(RECHEADER *) buffer, bufend, 0)) {
207 		putline(rec, outfp);
208 		while (getnext(binno, infl0, NULL, nfiles, (RECHEADER *) buffer,
209 			bufend, 0) == 0) {
210 			if (!UNIQUE)
211 				putline(rec, outfp);
212 		}
213 	}
214 }
215 
216 /*
217  * append plain text--used after sorting the biggest bin.
218  */
219 void
220 concat(a, b)
221 	FILE *a, *b;
222 {
223         int nread;
224         char buffer[4096];
225 
226 	rewind(b);
227         while ((nread = fread(buffer, 1, 4096, b)) > 0)
228                 EWRITE(buffer, 1, nread, a);
229 }
230