1 /* $NetBSD: files.c,v 1.22 2003/10/18 03:03:20 itojun Exp $ */ 2 3 /*- 4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Ben Harris and Jaromir Dolecek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /*- 40 * Copyright (c) 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * Peter McIlroy. 45 * 46 * Redistribution and use in source and binary forms, with or without 47 * modification, are permitted provided that the following conditions 48 * are met: 49 * 1. Redistributions of source code must retain the above copyright 50 * notice, this list of conditions and the following disclaimer. 51 * 2. Redistributions in binary form must reproduce the above copyright 52 * notice, this list of conditions and the following disclaimer in the 53 * documentation and/or other materials provided with the distribution. 54 * 3. Neither the name of the University nor the names of its contributors 55 * may be used to endorse or promote products derived from this software 56 * without specific prior written permission. 57 * 58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 68 * SUCH DAMAGE. 69 */ 70 71 #include "sort.h" 72 #include "fsort.h" 73 74 #ifndef lint 75 __RCSID("$NetBSD: files.c,v 1.22 2003/10/18 03:03:20 itojun Exp $"); 76 __SCCSID("@(#)files.c 8.1 (Berkeley) 6/6/93"); 77 #endif /* not lint */ 78 79 #include <string.h> 80 81 static int seq __P((FILE *, DBT *, DBT *)); 82 83 /* 84 * this is the subroutine for file management for fsort(). 85 * It keeps the buffers for all temporary files. 86 */ 87 int 88 getnext(binno, infl0, filelist, nfiles, pos, end, dummy) 89 int binno, infl0; 90 struct filelist *filelist; 91 int nfiles; 92 RECHEADER *pos; 93 u_char *end; 94 struct field *dummy; 95 { 96 int i; 97 u_char *hp; 98 static size_t nleft = 0; 99 static int cnt = 0, flag = -1; 100 static u_char maxb = 0; 101 static FILE *fp; 102 103 if (nleft == 0) { 104 if (binno < 0) /* reset files. */ { 105 for (i = 0; i < nfiles; i++) { 106 rewind(fstack[infl0 + i].fp); 107 fstack[infl0 + i].max_o = 0; 108 } 109 flag = -1; 110 nleft = cnt = 0; 111 return (-1); 112 } 113 maxb = fstack[infl0].maxb; 114 for (; nleft == 0; cnt++) { 115 if (cnt >= nfiles) { 116 cnt = 0; 117 return (EOF); 118 } 119 fp = fstack[infl0 + cnt].fp; 120 fread(&nleft, sizeof(nleft), 1, fp); 121 if (binno < maxb) 122 fstack[infl0+cnt].max_o 123 += sizeof(nleft) + nleft; 124 else if (binno == maxb) { 125 if (binno != fstack[infl0].lastb) { 126 fseeko(fp, fstack[infl0+ 127 cnt].max_o, SEEK_SET); 128 fread(&nleft, sizeof(nleft), 1, fp); 129 } 130 if (nleft == 0) 131 fclose(fp); 132 } else if (binno == maxb + 1) { /* skip a bin */ 133 fseek(fp, nleft, SEEK_CUR); 134 fread(&nleft, sizeof(nleft), 1, fp); 135 flag = cnt; 136 } 137 } 138 } 139 if ((u_char *) pos > end - sizeof(TRECHEADER)) 140 return (BUFFEND); 141 fread(pos, sizeof(TRECHEADER), 1, fp); 142 if (end - pos->data < pos->length) { 143 hp = ((u_char *)pos) + sizeof(TRECHEADER); 144 for (i = sizeof(TRECHEADER); i ; i--) 145 ungetc(*--hp, fp); 146 return (BUFFEND); 147 } 148 fread(pos->data, pos->length, 1, fp); 149 nleft -= pos->length + sizeof(TRECHEADER); 150 if (nleft == 0 && binno == fstack[infl0].maxb) 151 fclose(fp); 152 return (0); 153 } 154 155 /* 156 * this is called when there is no special key. It's only called 157 * in the first fsort pass. 158 */ 159 int 160 makeline(flno, top, filelist, nfiles, recbuf, bufend, dummy2) 161 int flno, top; 162 struct filelist *filelist; 163 int nfiles; 164 RECHEADER *recbuf; 165 u_char *bufend; 166 struct field *dummy2; 167 { 168 static u_char *obufend; 169 static size_t osz; 170 char *pos; 171 static int filenum = 0, overflow = 0; 172 static FILE *fp = 0; 173 int c; 174 175 pos = (char *) recbuf->data; 176 if (overflow) { 177 /* 178 * Buffer shortage is solved by either of two ways: 179 * o flush previous buffered data and start using the 180 * buffer from start (see fsort()) 181 * o realloc buffer and bump bufend 182 * 183 * The former is preferred, realloc is only done when 184 * there is exactly one item in buffer which does not fit. 185 */ 186 if (bufend == obufend) 187 memmove(pos, bufend - osz, osz); 188 189 pos += osz; 190 overflow = 0; 191 } 192 for (;;) { 193 if (flno >= 0 && (fp = fstack[flno].fp) == NULL) 194 return (EOF); 195 else if (fp == NULL) { 196 if (filenum >= nfiles) 197 return (EOF); 198 if (!(fp = fopen(filelist->names[filenum], "r"))) 199 err(2, "%s", filelist->names[filenum]); 200 filenum++; 201 } 202 while ((pos < (char *)bufend) && ((c = getc(fp)) != EOF)) { 203 if ((*pos++ = c) == REC_D) { 204 recbuf->offset = 0; 205 recbuf->length = pos - (char *) recbuf->data; 206 return (0); 207 } 208 } 209 if (pos >= (char *)bufend) { 210 if (recbuf->data < bufend) { 211 overflow = 1; 212 obufend = bufend; 213 osz = (pos - (char *) recbuf->data); 214 } 215 return (BUFFEND); 216 } else if (c == EOF) { 217 if (recbuf->data != (u_char *) pos) { 218 *pos++ = REC_D; 219 recbuf->offset = 0; 220 recbuf->length = pos - (char *) recbuf->data; 221 return (0); 222 } 223 FCLOSE(fp); 224 fp = 0; 225 if (flno >= 0) 226 fstack[flno].fp = 0; 227 } else { 228 229 warnx("makeline: line too long: ignoring '%.100s...'", recbuf->data); 230 231 /* Consume the rest of line from input */ 232 while ((c = getc(fp)) != REC_D && c != EOF) 233 ; 234 235 recbuf->offset = 0; 236 recbuf->length = 0; 237 238 return (BUFFEND); 239 } 240 } 241 } 242 243 /* 244 * This generates keys. It's only called in the first fsort pass 245 */ 246 int 247 makekey(flno, top, filelist, nfiles, recbuf, bufend, ftbl) 248 int flno, top; 249 struct filelist *filelist; 250 int nfiles; 251 RECHEADER *recbuf; 252 u_char *bufend; 253 struct field *ftbl; 254 { 255 static int filenum = 0; 256 static FILE *dbdesc = 0; 257 static DBT dbkey[1], line[1]; 258 static int overflow = 0; 259 int c; 260 261 if (overflow) { 262 overflow = enterkey(recbuf, line, bufend - (u_char *)recbuf, 263 ftbl); 264 if (overflow) 265 return (BUFFEND); 266 else 267 return (0); 268 } 269 270 for (;;) { 271 if (flno >= 0) { 272 if (!(dbdesc = fstack[flno].fp)) 273 return (EOF); 274 } else if (!dbdesc) { 275 if (filenum >= nfiles) 276 return (EOF); 277 dbdesc = fopen(filelist->names[filenum], "r"); 278 if (!dbdesc) 279 err(2, "%s", filelist->names[filenum]); 280 filenum++; 281 } 282 if (!(c = seq(dbdesc, line, dbkey))) { 283 if ((signed)line->size > bufend - recbuf->data) { 284 overflow = 1; 285 } else { 286 overflow = enterkey(recbuf, line, 287 bufend - (u_char *) recbuf, ftbl); 288 } 289 if (overflow) 290 return (BUFFEND); 291 else 292 return (0); 293 } 294 if (c == EOF) { 295 FCLOSE(dbdesc); 296 dbdesc = 0; 297 if (flno >= 0) 298 fstack[flno].fp = 0; 299 } else { 300 ((char *) line->data)[60] = '\000'; 301 warnx("makekey: line too long: ignoring %.100s...", 302 (char *)line->data); 303 } 304 } 305 } 306 307 /* 308 * get a key/line pair from fp 309 */ 310 static int 311 seq(fp, line, key) 312 FILE *fp; 313 DBT *key, *line; 314 { 315 static char *buf, flag = 1; 316 char *end, *pos; 317 int c; 318 u_char *nlinebuf; 319 320 if (flag) { 321 flag = 0; 322 buf = (char *) linebuf; 323 end = buf + linebuf_size; 324 line->data = buf; 325 } 326 pos = buf; 327 while ((c = getc(fp)) != EOF) { 328 if ((*pos++ = c) == REC_D) { 329 line->size = pos - buf; 330 return (0); 331 } 332 if (pos == end) { 333 nlinebuf = realloc(linebuf, linebuf_size * 2); 334 if (!nlinebuf) 335 err(2, "realloc of linebuf to %lu bytes failed", 336 (unsigned long)linebuf_size * 2); 337 linebuf = nlinebuf; 338 linebuf_size *= 2; 339 340 end = linebuf + linebuf_size; 341 pos = linebuf + (pos - buf); 342 line->data = buf = (char *)linebuf; 343 continue; 344 } 345 } 346 if (pos != buf) { 347 *pos++ = REC_D; 348 line->size = pos - buf; 349 return (0); 350 } else 351 return (EOF); 352 } 353 354 /* 355 * write a key/line pair to a temporary file 356 */ 357 void 358 putrec(rec, fp) 359 const RECHEADER *rec; 360 FILE *fp; 361 { 362 EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fp); 363 } 364 365 /* 366 * write a line to output 367 */ 368 void 369 putline(rec, fp) 370 const RECHEADER *rec; 371 FILE *fp; 372 { 373 EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp); 374 } 375 376 /* 377 * get a record from a temporary file. (Used by merge sort.) 378 */ 379 int 380 geteasy(flno, top, filelist, nfiles, rec, end, dummy2) 381 int flno, top; 382 struct filelist *filelist; 383 int nfiles; 384 RECHEADER *rec; 385 u_char *end; 386 struct field *dummy2; 387 { 388 int i; 389 FILE *fp; 390 391 fp = fstack[flno].fp; 392 if ((u_char *) rec > end - sizeof(TRECHEADER)) 393 return (BUFFEND); 394 if (!fread(rec, 1, sizeof(TRECHEADER), fp)) { 395 fclose(fp); 396 fstack[flno].fp = 0; 397 return (EOF); 398 } 399 if (end - rec->data < rec->length) { 400 for (i = sizeof(TRECHEADER) - 1; i >= 0; i--) 401 ungetc(*((char *) rec + i), fp); 402 return (BUFFEND); 403 } 404 fread(rec->data, rec->length, 1, fp); 405 return (0); 406 } 407