1 /* $NetBSD: append.c,v 1.14 2008/04/28 20:24:15 martin Exp $ */ 2 3 /*- 4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Ben Harris and Jaromir Dolecek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * Peter McIlroy. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 */ 63 64 #include "sort.h" 65 66 #ifndef lint 67 __RCSID("$NetBSD: append.c,v 1.14 2008/04/28 20:24:15 martin Exp $"); 68 __SCCSID("@(#)append.c 8.1 (Berkeley) 6/6/93"); 69 #endif /* not lint */ 70 71 #include <stdlib.h> 72 #include <string.h> 73 74 #define OUTPUT { \ 75 if ((n = cpos - ppos) > 1) { \ 76 for (; ppos < cpos; ++ppos) \ 77 *ppos -= odepth; \ 78 ppos -= n; \ 79 if (stable_sort) \ 80 sradixsort(ppos, n, wts1, REC_D); \ 81 else \ 82 radixsort(ppos, n, wts1, REC_D); \ 83 for (; ppos < cpos; ppos++) { \ 84 prec = (const RECHEADER *) (*ppos - sizeof(TRECHEADER));\ 85 put(prec, fp); \ 86 } \ 87 } else put(prec, fp); \ 88 } 89 90 /* 91 * copy sorted lines to output; check for uniqueness 92 */ 93 void 94 append(keylist, nelem, depth, fp, put, ftbl) 95 const u_char **keylist; 96 int nelem; 97 int depth; 98 FILE *fp; 99 put_func_t put; 100 struct field *ftbl; 101 { 102 u_char *wts, *wts1; 103 int n, odepth = depth; 104 const u_char **cpos, **ppos, **lastkey; 105 const u_char *cend, *pend, *start; 106 const struct recheader *crec, *prec; 107 108 if (*keylist == '\0' && UNIQUE) 109 return; 110 wts1 = wts = ftbl[0].weights; 111 if ((!UNIQUE) && SINGL_FLD) { 112 if ((ftbl[0].flags & F) && (ftbl[0].flags & R)) 113 wts1 = Rascii; 114 else if (ftbl[0].flags & F) 115 wts1 = ascii; 116 } 117 lastkey = keylist + nelem; 118 depth += sizeof(TRECHEADER); 119 if (SINGL_FLD && (UNIQUE || wts1 != wts)) { 120 ppos = keylist; 121 prec = (const RECHEADER *) (*ppos - depth); 122 if (UNIQUE) 123 put(prec, fp); 124 for (cpos = &keylist[1]; cpos < lastkey; cpos++) { 125 crec = (const RECHEADER *) (*cpos - depth); 126 if (crec->length == prec->length) { 127 /* 128 * Set pend and cend so that trailing NUL and 129 * record separator is ignored. 130 */ 131 pend = (const u_char *) &prec->data + prec->length - 2; 132 cend = (const u_char *) &crec->data + crec->length - 2; 133 for (start = *cpos; cend >= start; cend--) { 134 if (wts[*cend] != wts[*pend]) 135 break; 136 pend--; 137 } 138 if (pend + 1 != *ppos) { 139 if (!UNIQUE) { 140 OUTPUT; 141 } else 142 put(crec, fp); 143 ppos = cpos; 144 prec = crec; 145 } 146 } else { 147 if (!UNIQUE) { 148 OUTPUT; 149 } else 150 put(crec, fp); 151 ppos = cpos; 152 prec = crec; 153 } 154 } 155 if (!UNIQUE) { OUTPUT; } 156 } else if (UNIQUE) { 157 ppos = keylist; 158 prec = (const RECHEADER *) (*ppos - depth); 159 put(prec, fp); 160 for (cpos = &keylist[1]; cpos < lastkey; cpos++) { 161 crec = (const RECHEADER *) (*cpos - depth); 162 if (crec->offset == prec->offset) { 163 /* 164 * Set pend and cend so that trailing NUL and 165 * record separator is ignored. 166 */ 167 pend = (const u_char *) &prec->data + prec->offset - 2; 168 cend = (const u_char *) &crec->data + crec->offset - 2; 169 for (start = *cpos; cend >= start; cend--) { 170 if (wts[*cend] != wts[*pend]) 171 break; 172 pend--; 173 } 174 if (pend + 1 != *ppos) { 175 ppos = cpos; 176 prec = crec; 177 put(prec, fp); 178 } 179 } else { 180 ppos = cpos; 181 prec = crec; 182 put(prec, fp); 183 } 184 } 185 } else for (cpos = keylist; cpos < lastkey; cpos++) { 186 crec = (const RECHEADER *) (*cpos - depth); 187 put(crec, fp); 188 } 189 } 190 191 /* 192 * output the already sorted eol bin. 193 */ 194 void 195 rd_append(binno, infl0, nfiles, outfp, buffer, bufend) 196 u_char *buffer; 197 int infl0; 198 int binno, nfiles; 199 FILE *outfp; 200 u_char *bufend; 201 { 202 RECHEADER *rec; 203 204 rec = (RECHEADER *) buffer; 205 if (!getnext(binno, infl0, NULL, nfiles, 206 (RECHEADER *) buffer, bufend, 0)) { 207 putline(rec, outfp); 208 while (getnext(binno, infl0, NULL, nfiles, (RECHEADER *) buffer, 209 bufend, 0) == 0) { 210 if (!UNIQUE) 211 putline(rec, outfp); 212 } 213 } 214 } 215 216 /* 217 * append plain text--used after sorting the biggest bin. 218 */ 219 void 220 concat(a, b) 221 FILE *a, *b; 222 { 223 int nread; 224 char buffer[4096]; 225 226 rewind(b); 227 while ((nread = fread(buffer, 1, 4096, b)) > 0) 228 EWRITE(buffer, 1, nread, a); 229 } 230