1 /* $NetBSD: files.c,v 1.24 2005/06/07 09:51:34 he Exp $ */ 2 3 /*- 4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Ben Harris and Jaromir Dolecek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /*- 40 * Copyright (c) 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * Peter McIlroy. 45 * 46 * Redistribution and use in source and binary forms, with or without 47 * modification, are permitted provided that the following conditions 48 * are met: 49 * 1. Redistributions of source code must retain the above copyright 50 * notice, this list of conditions and the following disclaimer. 51 * 2. Redistributions in binary form must reproduce the above copyright 52 * notice, this list of conditions and the following disclaimer in the 53 * documentation and/or other materials provided with the distribution. 54 * 3. Neither the name of the University nor the names of its contributors 55 * may be used to endorse or promote products derived from this software 56 * without specific prior written permission. 57 * 58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 68 * SUCH DAMAGE. 69 */ 70 71 #include "sort.h" 72 #include "fsort.h" 73 74 #ifndef lint 75 __RCSID("$NetBSD: files.c,v 1.24 2005/06/07 09:51:34 he Exp $"); 76 __SCCSID("@(#)files.c 8.1 (Berkeley) 6/6/93"); 77 #endif /* not lint */ 78 79 #include <string.h> 80 81 static int seq __P((FILE *, DBT *, DBT *)); 82 83 /* 84 * this is the subroutine for file management for fsort(). 85 * It keeps the buffers for all temporary files. 86 */ 87 int 88 getnext(binno, infl0, filelist, nfiles, pos, end, dummy) 89 int binno, infl0; 90 struct filelist *filelist; 91 int nfiles; 92 RECHEADER *pos; 93 u_char *end; 94 struct field *dummy; 95 { 96 int i; 97 u_char *hp; 98 static size_t nleft = 0; 99 static int cnt = 0, flag = -1; 100 static u_char maxb = 0; 101 static FILE *fp; 102 103 if (nleft == 0) { 104 if (binno < 0) /* reset files. */ { 105 for (i = 0; i < nfiles; i++) { 106 rewind(fstack[infl0 + i].fp); 107 fstack[infl0 + i].max_o = 0; 108 } 109 flag = -1; 110 nleft = cnt = 0; 111 return (-1); 112 } 113 maxb = fstack[infl0].maxb; 114 for (; nleft == 0; cnt++) { 115 if (cnt >= nfiles) { 116 cnt = 0; 117 return (EOF); 118 } 119 fp = fstack[infl0 + cnt].fp; 120 fread(&nleft, sizeof(nleft), 1, fp); 121 if (binno < maxb) 122 fstack[infl0+cnt].max_o 123 += sizeof(nleft) + nleft; 124 else if (binno == maxb) { 125 if (binno != fstack[infl0].lastb) { 126 fseeko(fp, fstack[infl0+ 127 cnt].max_o, SEEK_SET); 128 fread(&nleft, sizeof(nleft), 1, fp); 129 } 130 if (nleft == 0) 131 fclose(fp); 132 } else if (binno == maxb + 1) { /* skip a bin */ 133 fseek(fp, nleft, SEEK_CUR); 134 fread(&nleft, sizeof(nleft), 1, fp); 135 flag = cnt; 136 } 137 } 138 } 139 if ((u_char *) pos > end - sizeof(TRECHEADER)) 140 return (BUFFEND); 141 fread(pos, sizeof(TRECHEADER), 1, fp); 142 if (end - pos->data < pos->length) { 143 hp = ((u_char *)pos) + sizeof(TRECHEADER); 144 for (i = sizeof(TRECHEADER); i ; i--) 145 ungetc(*--hp, fp); 146 return (BUFFEND); 147 } 148 fread(pos->data, pos->length, 1, fp); 149 nleft -= pos->length + sizeof(TRECHEADER); 150 if (nleft == 0 && binno == fstack[infl0].maxb) 151 fclose(fp); 152 return (0); 153 } 154 155 /* 156 * this is called when there is no special key. It's only called 157 * in the first fsort pass. 158 */ 159 int 160 makeline(flno, top, filelist, nfiles, recbuf, bufend, dummy2) 161 int flno, top; 162 struct filelist *filelist; 163 int nfiles; 164 RECHEADER *recbuf; 165 u_char *bufend; 166 struct field *dummy2; 167 { 168 static u_char *obufend; 169 static size_t osz; 170 char *pos; 171 static int filenum = 0, overflow = 0; 172 static FILE *fp = 0; 173 int c; 174 175 c = 0; /* XXXGCC -Wuninitialized [pmppc] */ 176 177 pos = (char *) recbuf->data; 178 if (overflow) { 179 /* 180 * Buffer shortage is solved by either of two ways: 181 * o flush previous buffered data and start using the 182 * buffer from start (see fsort()) 183 * o realloc buffer and bump bufend 184 * 185 * The former is preferred, realloc is only done when 186 * there is exactly one item in buffer which does not fit. 187 */ 188 if (bufend == obufend) 189 memmove(pos, bufend - osz, osz); 190 191 pos += osz; 192 overflow = 0; 193 } 194 for (;;) { 195 if (flno >= 0 && (fp = fstack[flno].fp) == NULL) 196 return (EOF); 197 else if (fp == NULL) { 198 if (filenum >= nfiles) 199 return (EOF); 200 if (!(fp = fopen(filelist->names[filenum], "r"))) 201 err(2, "%s", filelist->names[filenum]); 202 filenum++; 203 } 204 while ((pos < (char *)bufend) && ((c = getc(fp)) != EOF)) { 205 if ((*pos++ = c) == REC_D) { 206 recbuf->offset = 0; 207 recbuf->length = pos - (char *) recbuf->data; 208 return (0); 209 } 210 } 211 if (pos >= (char *)bufend) { 212 if (recbuf->data < bufend) { 213 overflow = 1; 214 obufend = bufend; 215 osz = (pos - (char *) recbuf->data); 216 } 217 return (BUFFEND); 218 } else if (c == EOF) { 219 if (recbuf->data != (u_char *) pos) { 220 *pos++ = REC_D; 221 recbuf->offset = 0; 222 recbuf->length = pos - (char *) recbuf->data; 223 return (0); 224 } 225 FCLOSE(fp); 226 fp = 0; 227 if (flno >= 0) 228 fstack[flno].fp = 0; 229 } else { 230 231 warnx("makeline: line too long: ignoring '%.100s...'", recbuf->data); 232 233 /* Consume the rest of line from input */ 234 while ((c = getc(fp)) != REC_D && c != EOF) 235 ; 236 237 recbuf->offset = 0; 238 recbuf->length = 0; 239 240 return (BUFFEND); 241 } 242 } 243 } 244 245 /* 246 * This generates keys. It's only called in the first fsort pass 247 */ 248 int 249 makekey(flno, top, filelist, nfiles, recbuf, bufend, ftbl) 250 int flno, top; 251 struct filelist *filelist; 252 int nfiles; 253 RECHEADER *recbuf; 254 u_char *bufend; 255 struct field *ftbl; 256 { 257 static int filenum = 0; 258 static FILE *dbdesc = 0; 259 static DBT dbkey[1], line[1]; 260 static int overflow = 0; 261 int c; 262 263 if (overflow) { 264 overflow = enterkey(recbuf, line, bufend - (u_char *)recbuf, 265 ftbl); 266 if (overflow) 267 return (BUFFEND); 268 else 269 return (0); 270 } 271 272 for (;;) { 273 if (flno >= 0) { 274 if (!(dbdesc = fstack[flno].fp)) 275 return (EOF); 276 } else if (!dbdesc) { 277 if (filenum >= nfiles) 278 return (EOF); 279 dbdesc = fopen(filelist->names[filenum], "r"); 280 if (!dbdesc) 281 err(2, "%s", filelist->names[filenum]); 282 filenum++; 283 } 284 if (!(c = seq(dbdesc, line, dbkey))) { 285 if ((signed)line->size > bufend - recbuf->data) { 286 overflow = 1; 287 } else { 288 overflow = enterkey(recbuf, line, 289 bufend - (u_char *) recbuf, ftbl); 290 } 291 if (overflow) 292 return (BUFFEND); 293 else 294 return (0); 295 } 296 if (c == EOF) { 297 FCLOSE(dbdesc); 298 dbdesc = 0; 299 if (flno >= 0) 300 fstack[flno].fp = 0; 301 } else { 302 ((char *) line->data)[60] = '\000'; 303 warnx("makekey: line too long: ignoring %.100s...", 304 (char *)line->data); 305 } 306 } 307 } 308 309 /* 310 * get a key/line pair from fp 311 */ 312 static int 313 seq(fp, line, key) 314 FILE *fp; 315 DBT *key, *line; 316 { 317 static char *buf, flag = 1; 318 char *end, *pos; 319 int c; 320 u_char *nlinebuf; 321 322 if (flag) { 323 /* one-time initialization */ 324 flag = 0; 325 buf = (char *) linebuf; 326 line->data = buf; 327 } 328 end = buf + linebuf_size; 329 pos = buf; 330 while ((c = getc(fp)) != EOF) { 331 if ((*pos++ = c) == REC_D) { 332 line->size = pos - buf; 333 return (0); 334 } 335 if (pos == end) { 336 nlinebuf = realloc(linebuf, linebuf_size * 2); 337 if (!nlinebuf) 338 err(2, "realloc of linebuf to %lu bytes failed", 339 (unsigned long)linebuf_size * 2); 340 linebuf = nlinebuf; 341 linebuf_size *= 2; 342 343 end = linebuf + linebuf_size; 344 pos = linebuf + (pos - buf); 345 line->data = buf = (char *)linebuf; 346 continue; 347 } 348 } 349 if (pos != buf) { 350 *pos++ = REC_D; 351 line->size = pos - buf; 352 return (0); 353 } else 354 return (EOF); 355 } 356 357 /* 358 * write a key/line pair to a temporary file 359 */ 360 void 361 putrec(rec, fp) 362 const RECHEADER *rec; 363 FILE *fp; 364 { 365 EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fp); 366 } 367 368 /* 369 * write a line to output 370 */ 371 void 372 putline(rec, fp) 373 const RECHEADER *rec; 374 FILE *fp; 375 { 376 EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp); 377 } 378 379 /* 380 * get a record from a temporary file. (Used by merge sort.) 381 */ 382 int 383 geteasy(flno, top, filelist, nfiles, rec, end, dummy2) 384 int flno, top; 385 struct filelist *filelist; 386 int nfiles; 387 RECHEADER *rec; 388 u_char *end; 389 struct field *dummy2; 390 { 391 int i; 392 FILE *fp; 393 394 fp = fstack[flno].fp; 395 if ((u_char *) rec > end - sizeof(TRECHEADER)) 396 return (BUFFEND); 397 if (!fread(rec, 1, sizeof(TRECHEADER), fp)) { 398 fclose(fp); 399 fstack[flno].fp = 0; 400 return (EOF); 401 } 402 if (end - rec->data < rec->length) { 403 for (i = sizeof(TRECHEADER) - 1; i >= 0; i--) 404 ungetc(*((char *) rec + i), fp); 405 return (BUFFEND); 406 } 407 fread(rec->data, rec->length, 1, fp); 408 return (0); 409 } 410