xref: /netbsd-src/usr.bin/sort/append.c (revision 23c8222edbfb0f0932d88a8351d3a0cf817dfb9e)
1 /*	$NetBSD: append.c,v 1.13 2004/02/15 11:52:12 jdolecek Exp $	*/
2 
3 /*-
4  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Ben Harris and Jaromir Dolecek.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *        This product includes software developed by the NetBSD
21  *        Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 /*-
40  * Copyright (c) 1993
41  *	The Regents of the University of California.  All rights reserved.
42  *
43  * This code is derived from software contributed to Berkeley by
44  * Peter McIlroy.
45  *
46  * Redistribution and use in source and binary forms, with or without
47  * modification, are permitted provided that the following conditions
48  * are met:
49  * 1. Redistributions of source code must retain the above copyright
50  *    notice, this list of conditions and the following disclaimer.
51  * 2. Redistributions in binary form must reproduce the above copyright
52  *    notice, this list of conditions and the following disclaimer in the
53  *    documentation and/or other materials provided with the distribution.
54  * 3. Neither the name of the University nor the names of its contributors
55  *    may be used to endorse or promote products derived from this software
56  *    without specific prior written permission.
57  *
58  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68  * SUCH DAMAGE.
69  */
70 
71 #include "sort.h"
72 
73 #ifndef lint
74 __RCSID("$NetBSD: append.c,v 1.13 2004/02/15 11:52:12 jdolecek Exp $");
75 __SCCSID("@(#)append.c	8.1 (Berkeley) 6/6/93");
76 #endif /* not lint */
77 
78 #include <stdlib.h>
79 #include <string.h>
80 
81 #define OUTPUT {							\
82 	if ((n = cpos - ppos) > 1) {					\
83 		for (; ppos < cpos; ++ppos)				\
84 			*ppos -= odepth;				\
85 		ppos -= n;						\
86 		if (stable_sort)					\
87 			sradixsort(ppos, n, wts1, REC_D);		\
88 		else							\
89 			radixsort(ppos, n, wts1, REC_D);		\
90 		for (; ppos < cpos; ppos++) {				\
91 			prec = (const RECHEADER *) (*ppos - sizeof(TRECHEADER));\
92 			put(prec, fp);					\
93 		}							\
94 	} else put(prec, fp);						\
95 }
96 
97 /*
98  * copy sorted lines to output; check for uniqueness
99  */
100 void
101 append(keylist, nelem, depth, fp, put, ftbl)
102 	const u_char **keylist;
103 	int nelem;
104 	int depth;
105 	FILE *fp;
106 	put_func_t put;
107 	struct field *ftbl;
108 {
109 	u_char *wts, *wts1;
110 	int n, odepth = depth;
111 	const u_char **cpos, **ppos, **lastkey;
112 	const u_char *cend, *pend, *start;
113 	const struct recheader *crec, *prec;
114 
115 	if (*keylist == '\0' && UNIQUE)
116 		return;
117 	wts1 = wts = ftbl[0].weights;
118 	if ((!UNIQUE) && SINGL_FLD) {
119 		if ((ftbl[0].flags & F) && (ftbl[0].flags & R))
120 			wts1 = Rascii;
121 		else if (ftbl[0].flags & F)
122 			wts1 = ascii;
123 	}
124 	lastkey = keylist + nelem;
125 	depth += sizeof(TRECHEADER);
126 	if (SINGL_FLD && (UNIQUE || wts1 != wts)) {
127 		ppos = keylist;
128 		prec = (const RECHEADER *) (*ppos - depth);
129 		if (UNIQUE)
130 			put(prec, fp);
131 		for (cpos = &keylist[1]; cpos < lastkey; cpos++) {
132 			crec = (const RECHEADER *) (*cpos - depth);
133 			if (crec->length  == prec->length) {
134 				/*
135 				 * Set pend and cend so that trailing NUL and
136 				 * record separator is ignored.
137 				 */
138 				pend = (const u_char *) &prec->data + prec->length - 2;
139 				cend = (const u_char *) &crec->data + crec->length - 2;
140 				for (start = *cpos; cend >= start; cend--) {
141 					if (wts[*cend] != wts[*pend])
142 						break;
143 					pend--;
144 				}
145 				if (pend + 1 != *ppos) {
146 					if (!UNIQUE) {
147 						OUTPUT;
148 					} else
149 						put(crec, fp);
150 					ppos = cpos;
151 					prec = crec;
152 				}
153 			} else {
154 				if (!UNIQUE) {
155 					OUTPUT;
156 				} else
157 					put(crec, fp);
158 				ppos = cpos;
159 				prec = crec;
160 			}
161 		}
162 		if (!UNIQUE)  { OUTPUT; }
163 	} else if (UNIQUE) {
164 		ppos = keylist;
165 		prec = (const RECHEADER *) (*ppos - depth);
166 		put(prec, fp);
167 		for (cpos = &keylist[1]; cpos < lastkey; cpos++) {
168 			crec = (const RECHEADER *) (*cpos - depth);
169 			if (crec->offset == prec->offset) {
170 				/*
171 				 * Set pend and cend so that trailing NUL and
172 				 * record separator is ignored.
173 				 */
174 				pend = (const u_char *) &prec->data + prec->offset - 2;
175 				cend = (const u_char *) &crec->data + crec->offset - 2;
176 				for (start = *cpos; cend >= start; cend--) {
177 					if (wts[*cend] != wts[*pend])
178 						break;
179 					pend--;
180 				}
181 				if (pend + 1 != *ppos) {
182 					ppos = cpos;
183 					prec = crec;
184 					put(prec, fp);
185 				}
186 			} else {
187 				ppos = cpos;
188 				prec = crec;
189 				put(prec, fp);
190 			}
191 		}
192 	} else for (cpos = keylist; cpos < lastkey; cpos++) {
193 		crec = (const RECHEADER *) (*cpos - depth);
194 		put(crec, fp);
195 	}
196 }
197 
198 /*
199  * output the already sorted eol bin.
200  */
201 void
202 rd_append(binno, infl0, nfiles, outfp, buffer, bufend)
203 	u_char *buffer;
204 	int infl0;
205 	int binno, nfiles;
206 	FILE *outfp;
207 	u_char *bufend;
208 {
209 	RECHEADER *rec;
210 
211 	rec = (RECHEADER *) buffer;
212 	if (!getnext(binno, infl0, NULL, nfiles,
213 			(RECHEADER *) buffer, bufend, 0)) {
214 		putline(rec, outfp);
215 		while (getnext(binno, infl0, NULL, nfiles, (RECHEADER *) buffer,
216 			bufend, 0) == 0) {
217 			if (!UNIQUE)
218 				putline(rec, outfp);
219 		}
220 	}
221 }
222 
223 /*
224  * append plain text--used after sorting the biggest bin.
225  */
226 void
227 concat(a, b)
228 	FILE *a, *b;
229 {
230         int nread;
231         char buffer[4096];
232 
233 	rewind(b);
234         while ((nread = fread(buffer, 1, 4096, b)) > 0)
235                 EWRITE(buffer, 1, nread, a);
236 }
237