1 /* $NetBSD: msort.c,v 1.17 2004/02/17 19:09:36 jdolecek Exp $ */ 2 3 /*- 4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Ben Harris and Jaromir Dolecek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /*- 40 * Copyright (c) 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * Peter McIlroy. 45 * 46 * Redistribution and use in source and binary forms, with or without 47 * modification, are permitted provided that the following conditions 48 * are met: 49 * 1. Redistributions of source code must retain the above copyright 50 * notice, this list of conditions and the following disclaimer. 51 * 2. Redistributions in binary form must reproduce the above copyright 52 * notice, this list of conditions and the following disclaimer in the 53 * documentation and/or other materials provided with the distribution. 54 * 3. Neither the name of the University nor the names of its contributors 55 * may be used to endorse or promote products derived from this software 56 * without specific prior written permission. 57 * 58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 68 * SUCH DAMAGE. 69 */ 70 71 #include "sort.h" 72 #include "fsort.h" 73 74 #ifndef lint 75 __RCSID("$NetBSD: msort.c,v 1.17 2004/02/17 19:09:36 jdolecek Exp $"); 76 __SCCSID("@(#)msort.c 8.1 (Berkeley) 6/6/93"); 77 #endif /* not lint */ 78 79 #include <stdlib.h> 80 #include <string.h> 81 #include <unistd.h> 82 83 /* Subroutines using comparisons: merge sort and check order */ 84 #define DELETE (1) 85 86 typedef struct mfile { 87 u_char *end; 88 short flno; 89 struct recheader rec[1]; 90 } MFILE; 91 92 static u_char *wts, *wts1 = NULL; 93 94 static int cmp __P((RECHEADER *, RECHEADER *)); 95 static int insert __P((struct mfile **, struct mfile **, int, int)); 96 static void merge(int, int, get_func_t, FILE *, put_func_t, struct field *); 97 98 void 99 fmerge(binno, top, filelist, nfiles, get, outfp, fput, ftbl) 100 int binno, top; 101 struct filelist *filelist; 102 int nfiles; 103 get_func_t get; 104 FILE *outfp; 105 put_func_t fput; 106 struct field *ftbl; 107 { 108 FILE *tout; 109 int i, j, last; 110 put_func_t put; 111 struct tempfile *l_fstack; 112 113 wts = ftbl->weights; 114 if (!UNIQUE && SINGL_FLD && ftbl->flags & F) 115 wts1 = (ftbl->flags & R) ? Rascii : ascii; 116 117 if (!buffer) { 118 buffer = malloc(bufsize); 119 if (!buffer) 120 err(2, "fmerge(): malloc"); 121 memset(buffer, 0, bufsize); 122 123 if (!linebuf && !SINGL_FLD) { 124 linebuf_size = DEFLLEN; 125 linebuf = malloc(linebuf_size); 126 memset(linebuf, 0, linebuf_size); 127 } 128 } 129 130 if (binno >= 0) 131 l_fstack = fstack + top; 132 else 133 l_fstack = fstack; 134 135 while (nfiles) { 136 put = putrec; 137 for (j = 0; j < nfiles; j += MERGE_FNUM) { 138 if (nfiles <= MERGE_FNUM) { 139 tout = outfp; 140 put = fput; 141 } 142 else 143 tout = ftmp(); 144 last = min(MERGE_FNUM, nfiles - j); 145 if (binno < 0) { 146 for (i = 0; i < last; i++) 147 if (!(l_fstack[i+MAXFCT-1-MERGE_FNUM].fp = 148 fopen(filelist->names[j+i], "r"))) 149 err(2, "%s", 150 filelist->names[j+i]); 151 merge(MAXFCT-1-MERGE_FNUM, last, get, tout, put, ftbl); 152 } else { 153 for (i = 0; i< last; i++) 154 rewind(l_fstack[i+j].fp); 155 merge(top+j, last, get, tout, put, ftbl); 156 } 157 if (nfiles > MERGE_FNUM) 158 l_fstack[j/MERGE_FNUM].fp = tout; 159 } 160 nfiles = (nfiles + (MERGE_FNUM - 1)) / MERGE_FNUM; 161 if (nfiles == 1) 162 nfiles = 0; 163 if (binno < 0) { 164 binno = 0; 165 get = geteasy; 166 top = 0; 167 } 168 } 169 } 170 171 static void 172 merge(infl0, nfiles, get, outfp, put, ftbl) 173 int infl0, nfiles; 174 get_func_t get; 175 put_func_t put; 176 FILE *outfp; 177 struct field *ftbl; 178 { 179 int c, i, j, nf = nfiles; 180 struct mfile *flistb[MERGE_FNUM], **flist = flistb, *cfile; 181 size_t availsz = bufsize; 182 static void *bufs[MERGE_FNUM + 1]; 183 static size_t bufs_sz[MERGE_FNUM + 1]; 184 185 /* 186 * We need nfiles + 1 buffers. One is 'buffer', the 187 * rest needs to be allocated. 188 */ 189 bufs[0] = buffer; 190 bufs_sz[0] = bufsize; 191 for (i = 1; i < nfiles + 1; i++) { 192 if (bufs[i]) 193 continue; 194 195 bufs[i] = malloc(DEFLLEN); 196 if (!bufs[i]) 197 err(2, "merge: malloc"); 198 memset(bufs[i], 0, DEFLLEN); 199 bufs_sz[i] = DEFLLEN; 200 } 201 202 for (i = j = 0; i < nfiles; i++, j++) { 203 cfile = (struct mfile *) bufs[j]; 204 cfile->flno = infl0 + j; 205 cfile->end = (u_char *) bufs[j] + bufs_sz[j]; 206 for (c = 1; c == 1;) { 207 if (EOF == (c = get(cfile->flno, 0, NULL, nfiles, 208 cfile->rec, cfile->end, ftbl))) { 209 --i; 210 --nfiles; 211 break; 212 } 213 214 if (c == BUFFEND) { 215 cfile = realloc(bufs[j], bufs_sz[j]); 216 if (!cfile) 217 err(2, "merge: realloc"); 218 219 bufs[j] = (void *) cfile; 220 bufs_sz[j] *= 2; 221 cfile->end = (u_char *)cfile + bufs_sz[j]; 222 223 c = 1; 224 continue; 225 } 226 227 if (i) 228 c = insert(flist, &cfile, i, !DELETE); 229 else 230 flist[0] = cfile; 231 } 232 } 233 234 cfile = (struct mfile *) bufs[nf]; 235 cfile->flno = flist[0]->flno; 236 cfile->end = (u_char *) cfile + bufs_sz[nf]; 237 while (nfiles) { 238 for (c = 1; c == 1;) { 239 if (EOF == (c = get(cfile->flno, 0, NULL, nfiles, 240 cfile->rec, cfile->end, ftbl))) { 241 put(flist[0]->rec, outfp); 242 if (--nfiles > 0) { 243 flist++; 244 cfile->flno = flist[0]->flno; 245 } 246 break; 247 } 248 if (c == BUFFEND) { 249 char *oldbuf = (char *) cfile; 250 availsz = (char *) cfile->end - oldbuf; 251 availsz *= 2; 252 cfile = realloc(oldbuf, availsz); 253 if (!cfile) 254 err(2, "merge: realloc"); 255 256 for (i = 0; i < nf + 1; i++) { 257 if (bufs[i] == oldbuf) { 258 bufs[i] = (char *)cfile; 259 bufs_sz[i] = availsz; 260 break; 261 } 262 } 263 264 cfile->end = (u_char *)cfile + availsz; 265 c = 1; 266 continue; 267 } 268 269 if (!(c = insert(flist, &cfile, nfiles, DELETE))) 270 put(cfile->rec, outfp); 271 } 272 } 273 274 if (bufs_sz[0] > bufsize) { 275 buffer = bufs[0]; 276 bufsize = bufs_sz[0]; 277 } 278 } 279 280 /* 281 * if delete: inserts *rec in flist, deletes flist[0], and leaves it in *rec; 282 * otherwise just inserts *rec in flist. 283 */ 284 static int 285 insert(flist, rec, ttop, delete) 286 struct mfile **flist, **rec; 287 int delete, ttop; /* delete = 0 or 1 */ 288 { 289 struct mfile *tmprec = *rec; 290 int mid, top = ttop, bot = 0, cmpv = 1; 291 292 for (mid = top / 2; bot + 1 != top; mid = (bot + top) / 2) { 293 cmpv = cmp(tmprec->rec, flist[mid]->rec); 294 if (cmpv < 0) 295 top = mid; 296 else if (cmpv > 0) 297 bot = mid; 298 else { 299 if (UNIQUE) 300 break; 301 302 if (stable_sort) { 303 /* 304 * Apply sort by fileno, to give priority 305 * to earlier specified files, hence providing 306 * more stable sort. 307 * If fileno is same, the new record should 308 * be put _after_ the previous entry. 309 */ 310 cmpv = tmprec->flno - flist[mid]->flno; 311 if (cmpv >= 0) 312 bot = mid; 313 else /* cmpv == 0 */ 314 bot = mid - 1; 315 } else { 316 /* non-stable sort */ 317 bot = mid - 1; 318 } 319 320 break; 321 } 322 } 323 324 if (delete) { 325 if (UNIQUE) { 326 if (!bot && cmpv) 327 cmpv = cmp(tmprec->rec, flist[0]->rec); 328 if (!cmpv) 329 return (1); 330 } 331 tmprec = flist[0]; 332 if (bot) 333 memmove(flist, flist + 1, bot * sizeof(MFILE **)); 334 flist[bot] = *rec; 335 *rec = tmprec; 336 (*rec)->flno = flist[0]->flno; 337 return (0); 338 } else { 339 if (!bot && !(UNIQUE && !cmpv)) { 340 cmpv = cmp(tmprec->rec, flist[0]->rec); 341 if (cmpv < 0) 342 bot = -1; 343 } 344 if (UNIQUE && !cmpv) 345 return (1); 346 bot++; 347 memmove(flist + bot + 1, flist + bot, 348 (ttop - bot) * sizeof(MFILE **)); 349 flist[bot] = *rec; 350 return (0); 351 } 352 } 353 354 /* 355 * check order on one file 356 */ 357 void 358 order(filelist, get, ftbl) 359 struct filelist *filelist; 360 get_func_t get; 361 struct field *ftbl; 362 { 363 u_char *crec_end, *prec_end, *trec_end; 364 int c; 365 RECHEADER *crec, *prec, *trec; 366 367 if (!SINGL_FLD) 368 linebuf = malloc(DEFLLEN); 369 buffer = malloc(2 * (DEFLLEN + sizeof(TRECHEADER))); 370 crec = (RECHEADER *) buffer; 371 crec_end = buffer + DEFLLEN + sizeof(TRECHEADER); 372 prec = (RECHEADER *) (buffer + DEFLLEN + sizeof(TRECHEADER)); 373 prec_end = buffer + 2*(DEFLLEN + sizeof(TRECHEADER)); 374 wts = ftbl->weights; 375 if (SINGL_FLD && (ftbl->flags & F)) 376 wts1 = (ftbl->flags & R) ? Rascii : ascii; 377 else 378 wts1 = NULL; 379 if (0 == get(-1, 0, filelist, 1, prec, prec_end, ftbl)) 380 while (0 == get(-1, 0, filelist, 1, crec, crec_end, ftbl)) { 381 if (0 < (c = cmp(prec, crec))) { 382 crec->data[crec->length-1] = 0; 383 errx(1, "found disorder: %s", crec->data+crec->offset); 384 } 385 if (UNIQUE && !c) { 386 crec->data[crec->length-1] = 0; 387 errx(1, "found non-uniqueness: %s", 388 crec->data+crec->offset); 389 } 390 /* 391 * Swap pointers so that this record is on place pointed 392 * to by prec and new record is read to place pointed to by 393 * crec. 394 */ 395 trec = prec; 396 prec = crec; 397 crec = trec; 398 trec_end = prec_end; 399 prec_end = crec_end; 400 crec_end = trec_end; 401 } 402 exit(0); 403 } 404 405 static int 406 cmp(rec1, rec2) 407 RECHEADER *rec1, *rec2; 408 { 409 int r; 410 u_char *pos1, *pos2, *end; 411 u_char *cwts; 412 for (cwts = wts; cwts; cwts = (cwts == wts1 ? NULL : wts1)) { 413 pos1 = rec1->data; 414 pos2 = rec2->data; 415 if (!SINGL_FLD && (UNIQUE || stable_sort)) 416 end = pos1 + min(rec1->offset, rec2->offset); 417 else 418 end = pos1 + min(rec1->length, rec2->length); 419 420 for (; pos1 < end; ) { 421 if ((r = cwts[*pos1++] - cwts[*pos2++])) 422 return (r); 423 } 424 } 425 return (0); 426 } 427