1 /* $NetBSD: files.c,v 1.26 2008/04/28 20:24:15 martin Exp $ */ 2 3 /*- 4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Ben Harris and Jaromir Dolecek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * Peter McIlroy. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 */ 63 64 #include "sort.h" 65 #include "fsort.h" 66 67 #ifndef lint 68 __RCSID("$NetBSD: files.c,v 1.26 2008/04/28 20:24:15 martin Exp $"); 69 __SCCSID("@(#)files.c 8.1 (Berkeley) 6/6/93"); 70 #endif /* not lint */ 71 72 #include <string.h> 73 74 static int seq __P((FILE *, DBT *, DBT *)); 75 76 /* 77 * this is the subroutine for file management for fsort(). 78 * It keeps the buffers for all temporary files. 79 */ 80 int 81 getnext(binno, infl0, filelist, nfiles, pos, end, dummy) 82 int binno, infl0; 83 struct filelist *filelist; 84 int nfiles; 85 RECHEADER *pos; 86 u_char *end; 87 struct field *dummy; 88 { 89 int i; 90 u_char *hp; 91 static size_t nleft = 0; 92 static int cnt = 0, flag = -1; 93 static u_char maxb = 0; 94 static FILE *fp; 95 96 if (nleft == 0) { 97 if (binno < 0) /* reset files. */ { 98 for (i = 0; i < nfiles; i++) { 99 rewind(fstack[infl0 + i].fp); 100 fstack[infl0 + i].max_o = 0; 101 } 102 flag = -1; 103 nleft = cnt = 0; 104 return (-1); 105 } 106 maxb = fstack[infl0].maxb; 107 for (; nleft == 0; cnt++) { 108 if (cnt >= nfiles) { 109 cnt = 0; 110 return (EOF); 111 } 112 fp = fstack[infl0 + cnt].fp; 113 fread(&nleft, sizeof(nleft), 1, fp); 114 if (binno < maxb) 115 fstack[infl0+cnt].max_o 116 += sizeof(nleft) + nleft; 117 else if (binno == maxb) { 118 if (binno != fstack[infl0].lastb) { 119 fseeko(fp, fstack[infl0+ 120 cnt].max_o, SEEK_SET); 121 fread(&nleft, sizeof(nleft), 1, fp); 122 } 123 if (nleft == 0) 124 fclose(fp); 125 } else if (binno == maxb + 1) { /* skip a bin */ 126 fseek(fp, nleft, SEEK_CUR); 127 fread(&nleft, sizeof(nleft), 1, fp); 128 flag = cnt; 129 } 130 } 131 } 132 if ((u_char *) pos > end - sizeof(TRECHEADER)) 133 return (BUFFEND); 134 fread(pos, sizeof(TRECHEADER), 1, fp); 135 if (end - pos->data < pos->length) { 136 hp = ((u_char *)pos) + sizeof(TRECHEADER); 137 for (i = sizeof(TRECHEADER); i ; i--) 138 ungetc(*--hp, fp); 139 return (BUFFEND); 140 } 141 fread(pos->data, pos->length, 1, fp); 142 nleft -= pos->length + sizeof(TRECHEADER); 143 if (nleft == 0 && binno == fstack[infl0].maxb) 144 fclose(fp); 145 return (0); 146 } 147 148 /* 149 * this is called when there is no special key. It's only called 150 * in the first fsort pass. 151 */ 152 int 153 makeline(flno, top, filelist, nfiles, recbuf, bufend, dummy2) 154 int flno, top; 155 struct filelist *filelist; 156 int nfiles; 157 RECHEADER *recbuf; 158 u_char *bufend; 159 struct field *dummy2; 160 { 161 static u_char *obufend; 162 static size_t osz; 163 char *pos; 164 static int filenum = 0, overflow = 0; 165 static FILE *fp = 0; 166 int c; 167 168 c = 0; /* XXXGCC -Wuninitialized [pmppc] */ 169 170 pos = (char *) recbuf->data; 171 if (overflow) { 172 /* 173 * Buffer shortage is solved by either of two ways: 174 * o flush previous buffered data and start using the 175 * buffer from start (see fsort()) 176 * o realloc buffer and bump bufend 177 * 178 * The former is preferred, realloc is only done when 179 * there is exactly one item in buffer which does not fit. 180 */ 181 if (bufend == obufend) 182 memmove(pos, bufend - osz, osz); 183 184 pos += osz; 185 overflow = 0; 186 } 187 for (;;) { 188 if (flno >= 0 && (fp = fstack[flno].fp) == NULL) 189 return (EOF); 190 else if (fp == NULL) { 191 if (filenum >= nfiles) 192 return (EOF); 193 if (!(fp = fopen(filelist->names[filenum], "r"))) 194 err(2, "%s", filelist->names[filenum]); 195 filenum++; 196 } 197 while ((pos < (char *)bufend) && ((c = getc(fp)) != EOF)) { 198 if ((*pos++ = c) == REC_D) { 199 recbuf->offset = 0; 200 recbuf->length = pos - (char *) recbuf->data; 201 return (0); 202 } 203 } 204 if (pos >= (char *)bufend) { 205 if (recbuf->data < bufend) { 206 overflow = 1; 207 obufend = bufend; 208 osz = (pos - (char *) recbuf->data); 209 } 210 return (BUFFEND); 211 } else if (c == EOF) { 212 if (recbuf->data != (u_char *) pos) { 213 *pos++ = REC_D; 214 recbuf->offset = 0; 215 recbuf->length = pos - (char *) recbuf->data; 216 return (0); 217 } 218 FCLOSE(fp); 219 fp = 0; 220 if (flno >= 0) 221 fstack[flno].fp = 0; 222 } else { 223 224 warnx("makeline: line too long: ignoring '%.100s...'", recbuf->data); 225 226 /* Consume the rest of line from input */ 227 while ((c = getc(fp)) != REC_D && c != EOF) 228 ; 229 230 recbuf->offset = 0; 231 recbuf->length = 0; 232 233 return (BUFFEND); 234 } 235 } 236 } 237 238 /* 239 * This generates keys. It's only called in the first fsort pass 240 */ 241 int 242 makekey(flno, top, filelist, nfiles, recbuf, bufend, ftbl) 243 int flno, top; 244 struct filelist *filelist; 245 int nfiles; 246 RECHEADER *recbuf; 247 u_char *bufend; 248 struct field *ftbl; 249 { 250 static int filenum = 0; 251 static FILE *dbdesc = 0; 252 static DBT dbkey[1], line[1]; 253 static int overflow = 0; 254 int c; 255 256 if (overflow) { 257 overflow = enterkey(recbuf, line, bufend - (u_char *)recbuf, 258 ftbl); 259 if (overflow) 260 return (BUFFEND); 261 else 262 return (0); 263 } 264 265 for (;;) { 266 if (flno >= 0) { 267 if (!(dbdesc = fstack[flno].fp)) 268 return (EOF); 269 } else if (!dbdesc) { 270 if (filenum >= nfiles) 271 return (EOF); 272 dbdesc = fopen(filelist->names[filenum], "r"); 273 if (!dbdesc) 274 err(2, "%s", filelist->names[filenum]); 275 filenum++; 276 } 277 if (!(c = seq(dbdesc, line, dbkey))) { 278 if ((signed)line->size > bufend - recbuf->data) { 279 overflow = 1; 280 } else { 281 overflow = enterkey(recbuf, line, 282 bufend - (u_char *) recbuf, ftbl); 283 } 284 if (overflow) 285 return (BUFFEND); 286 else 287 return (0); 288 } 289 if (c == EOF) { 290 FCLOSE(dbdesc); 291 dbdesc = 0; 292 if (flno >= 0) 293 fstack[flno].fp = 0; 294 } else { 295 ((char *) line->data)[60] = '\000'; 296 warnx("makekey: line too long: ignoring %.100s...", 297 (char *)line->data); 298 } 299 } 300 } 301 302 /* 303 * get a key/line pair from fp 304 */ 305 static int 306 seq(fp, line, key) 307 FILE *fp; 308 DBT *key, *line; 309 { 310 static u_char *buf, flag = 1; 311 u_char *end, *pos; 312 int c; 313 u_char *nlinebuf; 314 315 if (flag) { 316 /* one-time initialization */ 317 flag = 0; 318 buf = linebuf; 319 line->data = buf; 320 } 321 end = buf + linebuf_size; 322 pos = buf; 323 while ((c = getc(fp)) != EOF) { 324 if ((*pos++ = c) == REC_D) { 325 line->size = pos - buf; 326 return (0); 327 } 328 if (pos == end) { 329 nlinebuf = realloc(linebuf, linebuf_size * 2); 330 if (!nlinebuf) 331 err(2, "realloc of linebuf to %lu bytes failed", 332 (unsigned long)linebuf_size * 2); 333 linebuf = nlinebuf; 334 linebuf_size *= 2; 335 336 end = linebuf + linebuf_size; 337 pos = linebuf + (pos - buf); 338 line->data = buf = linebuf; 339 continue; 340 } 341 } 342 if (pos != buf) { 343 *pos++ = REC_D; 344 line->size = pos - buf; 345 return (0); 346 } else 347 return (EOF); 348 } 349 350 /* 351 * write a key/line pair to a temporary file 352 */ 353 void 354 putrec(rec, fp) 355 const RECHEADER *rec; 356 FILE *fp; 357 { 358 EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fp); 359 } 360 361 /* 362 * write a line to output 363 */ 364 void 365 putline(rec, fp) 366 const RECHEADER *rec; 367 FILE *fp; 368 { 369 EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp); 370 } 371 372 /* 373 * get a record from a temporary file. (Used by merge sort.) 374 */ 375 int 376 geteasy(flno, top, filelist, nfiles, rec, end, dummy2) 377 int flno, top; 378 struct filelist *filelist; 379 int nfiles; 380 RECHEADER *rec; 381 u_char *end; 382 struct field *dummy2; 383 { 384 int i; 385 FILE *fp; 386 387 fp = fstack[flno].fp; 388 if ((u_char *) rec > end - sizeof(TRECHEADER)) 389 return (BUFFEND); 390 if (!fread(rec, 1, sizeof(TRECHEADER), fp)) { 391 fclose(fp); 392 fstack[flno].fp = 0; 393 return (EOF); 394 } 395 if (end - rec->data < rec->length) { 396 for (i = sizeof(TRECHEADER) - 1; i >= 0; i--) 397 ungetc(*((char *) rec + i), fp); 398 return (BUFFEND); 399 } 400 fread(rec->data, rec->length, 1, fp); 401 return (0); 402 } 403