1 /* $NetBSD: append.c,v 1.13 2004/02/15 11:52:12 jdolecek Exp $ */ 2 3 /*- 4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Ben Harris and Jaromir Dolecek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /*- 40 * Copyright (c) 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * Peter McIlroy. 45 * 46 * Redistribution and use in source and binary forms, with or without 47 * modification, are permitted provided that the following conditions 48 * are met: 49 * 1. Redistributions of source code must retain the above copyright 50 * notice, this list of conditions and the following disclaimer. 51 * 2. Redistributions in binary form must reproduce the above copyright 52 * notice, this list of conditions and the following disclaimer in the 53 * documentation and/or other materials provided with the distribution. 54 * 3. Neither the name of the University nor the names of its contributors 55 * may be used to endorse or promote products derived from this software 56 * without specific prior written permission. 57 * 58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 68 * SUCH DAMAGE. 69 */ 70 71 #include "sort.h" 72 73 #ifndef lint 74 __RCSID("$NetBSD: append.c,v 1.13 2004/02/15 11:52:12 jdolecek Exp $"); 75 __SCCSID("@(#)append.c 8.1 (Berkeley) 6/6/93"); 76 #endif /* not lint */ 77 78 #include <stdlib.h> 79 #include <string.h> 80 81 #define OUTPUT { \ 82 if ((n = cpos - ppos) > 1) { \ 83 for (; ppos < cpos; ++ppos) \ 84 *ppos -= odepth; \ 85 ppos -= n; \ 86 if (stable_sort) \ 87 sradixsort(ppos, n, wts1, REC_D); \ 88 else \ 89 radixsort(ppos, n, wts1, REC_D); \ 90 for (; ppos < cpos; ppos++) { \ 91 prec = (const RECHEADER *) (*ppos - sizeof(TRECHEADER));\ 92 put(prec, fp); \ 93 } \ 94 } else put(prec, fp); \ 95 } 96 97 /* 98 * copy sorted lines to output; check for uniqueness 99 */ 100 void 101 append(keylist, nelem, depth, fp, put, ftbl) 102 const u_char **keylist; 103 int nelem; 104 int depth; 105 FILE *fp; 106 put_func_t put; 107 struct field *ftbl; 108 { 109 u_char *wts, *wts1; 110 int n, odepth = depth; 111 const u_char **cpos, **ppos, **lastkey; 112 const u_char *cend, *pend, *start; 113 const struct recheader *crec, *prec; 114 115 if (*keylist == '\0' && UNIQUE) 116 return; 117 wts1 = wts = ftbl[0].weights; 118 if ((!UNIQUE) && SINGL_FLD) { 119 if ((ftbl[0].flags & F) && (ftbl[0].flags & R)) 120 wts1 = Rascii; 121 else if (ftbl[0].flags & F) 122 wts1 = ascii; 123 } 124 lastkey = keylist + nelem; 125 depth += sizeof(TRECHEADER); 126 if (SINGL_FLD && (UNIQUE || wts1 != wts)) { 127 ppos = keylist; 128 prec = (const RECHEADER *) (*ppos - depth); 129 if (UNIQUE) 130 put(prec, fp); 131 for (cpos = &keylist[1]; cpos < lastkey; cpos++) { 132 crec = (const RECHEADER *) (*cpos - depth); 133 if (crec->length == prec->length) { 134 /* 135 * Set pend and cend so that trailing NUL and 136 * record separator is ignored. 137 */ 138 pend = (const u_char *) &prec->data + prec->length - 2; 139 cend = (const u_char *) &crec->data + crec->length - 2; 140 for (start = *cpos; cend >= start; cend--) { 141 if (wts[*cend] != wts[*pend]) 142 break; 143 pend--; 144 } 145 if (pend + 1 != *ppos) { 146 if (!UNIQUE) { 147 OUTPUT; 148 } else 149 put(crec, fp); 150 ppos = cpos; 151 prec = crec; 152 } 153 } else { 154 if (!UNIQUE) { 155 OUTPUT; 156 } else 157 put(crec, fp); 158 ppos = cpos; 159 prec = crec; 160 } 161 } 162 if (!UNIQUE) { OUTPUT; } 163 } else if (UNIQUE) { 164 ppos = keylist; 165 prec = (const RECHEADER *) (*ppos - depth); 166 put(prec, fp); 167 for (cpos = &keylist[1]; cpos < lastkey; cpos++) { 168 crec = (const RECHEADER *) (*cpos - depth); 169 if (crec->offset == prec->offset) { 170 /* 171 * Set pend and cend so that trailing NUL and 172 * record separator is ignored. 173 */ 174 pend = (const u_char *) &prec->data + prec->offset - 2; 175 cend = (const u_char *) &crec->data + crec->offset - 2; 176 for (start = *cpos; cend >= start; cend--) { 177 if (wts[*cend] != wts[*pend]) 178 break; 179 pend--; 180 } 181 if (pend + 1 != *ppos) { 182 ppos = cpos; 183 prec = crec; 184 put(prec, fp); 185 } 186 } else { 187 ppos = cpos; 188 prec = crec; 189 put(prec, fp); 190 } 191 } 192 } else for (cpos = keylist; cpos < lastkey; cpos++) { 193 crec = (const RECHEADER *) (*cpos - depth); 194 put(crec, fp); 195 } 196 } 197 198 /* 199 * output the already sorted eol bin. 200 */ 201 void 202 rd_append(binno, infl0, nfiles, outfp, buffer, bufend) 203 u_char *buffer; 204 int infl0; 205 int binno, nfiles; 206 FILE *outfp; 207 u_char *bufend; 208 { 209 RECHEADER *rec; 210 211 rec = (RECHEADER *) buffer; 212 if (!getnext(binno, infl0, NULL, nfiles, 213 (RECHEADER *) buffer, bufend, 0)) { 214 putline(rec, outfp); 215 while (getnext(binno, infl0, NULL, nfiles, (RECHEADER *) buffer, 216 bufend, 0) == 0) { 217 if (!UNIQUE) 218 putline(rec, outfp); 219 } 220 } 221 } 222 223 /* 224 * append plain text--used after sorting the biggest bin. 225 */ 226 void 227 concat(a, b) 228 FILE *a, *b; 229 { 230 int nread; 231 char buffer[4096]; 232 233 rewind(b); 234 while ((nread = fread(buffer, 1, 4096, b)) > 0) 235 EWRITE(buffer, 1, nread, a); 236 } 237