1 /* $NetBSD: files.c,v 1.23 2004/02/15 11:52:12 jdolecek Exp $ */ 2 3 /*- 4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Ben Harris and Jaromir Dolecek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /*- 40 * Copyright (c) 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * Peter McIlroy. 45 * 46 * Redistribution and use in source and binary forms, with or without 47 * modification, are permitted provided that the following conditions 48 * are met: 49 * 1. Redistributions of source code must retain the above copyright 50 * notice, this list of conditions and the following disclaimer. 51 * 2. Redistributions in binary form must reproduce the above copyright 52 * notice, this list of conditions and the following disclaimer in the 53 * documentation and/or other materials provided with the distribution. 54 * 3. Neither the name of the University nor the names of its contributors 55 * may be used to endorse or promote products derived from this software 56 * without specific prior written permission. 57 * 58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 68 * SUCH DAMAGE. 69 */ 70 71 #include "sort.h" 72 #include "fsort.h" 73 74 #ifndef lint 75 __RCSID("$NetBSD: files.c,v 1.23 2004/02/15 11:52:12 jdolecek Exp $"); 76 __SCCSID("@(#)files.c 8.1 (Berkeley) 6/6/93"); 77 #endif /* not lint */ 78 79 #include <string.h> 80 81 static int seq __P((FILE *, DBT *, DBT *)); 82 83 /* 84 * this is the subroutine for file management for fsort(). 85 * It keeps the buffers for all temporary files. 86 */ 87 int 88 getnext(binno, infl0, filelist, nfiles, pos, end, dummy) 89 int binno, infl0; 90 struct filelist *filelist; 91 int nfiles; 92 RECHEADER *pos; 93 u_char *end; 94 struct field *dummy; 95 { 96 int i; 97 u_char *hp; 98 static size_t nleft = 0; 99 static int cnt = 0, flag = -1; 100 static u_char maxb = 0; 101 static FILE *fp; 102 103 if (nleft == 0) { 104 if (binno < 0) /* reset files. */ { 105 for (i = 0; i < nfiles; i++) { 106 rewind(fstack[infl0 + i].fp); 107 fstack[infl0 + i].max_o = 0; 108 } 109 flag = -1; 110 nleft = cnt = 0; 111 return (-1); 112 } 113 maxb = fstack[infl0].maxb; 114 for (; nleft == 0; cnt++) { 115 if (cnt >= nfiles) { 116 cnt = 0; 117 return (EOF); 118 } 119 fp = fstack[infl0 + cnt].fp; 120 fread(&nleft, sizeof(nleft), 1, fp); 121 if (binno < maxb) 122 fstack[infl0+cnt].max_o 123 += sizeof(nleft) + nleft; 124 else if (binno == maxb) { 125 if (binno != fstack[infl0].lastb) { 126 fseeko(fp, fstack[infl0+ 127 cnt].max_o, SEEK_SET); 128 fread(&nleft, sizeof(nleft), 1, fp); 129 } 130 if (nleft == 0) 131 fclose(fp); 132 } else if (binno == maxb + 1) { /* skip a bin */ 133 fseek(fp, nleft, SEEK_CUR); 134 fread(&nleft, sizeof(nleft), 1, fp); 135 flag = cnt; 136 } 137 } 138 } 139 if ((u_char *) pos > end - sizeof(TRECHEADER)) 140 return (BUFFEND); 141 fread(pos, sizeof(TRECHEADER), 1, fp); 142 if (end - pos->data < pos->length) { 143 hp = ((u_char *)pos) + sizeof(TRECHEADER); 144 for (i = sizeof(TRECHEADER); i ; i--) 145 ungetc(*--hp, fp); 146 return (BUFFEND); 147 } 148 fread(pos->data, pos->length, 1, fp); 149 nleft -= pos->length + sizeof(TRECHEADER); 150 if (nleft == 0 && binno == fstack[infl0].maxb) 151 fclose(fp); 152 return (0); 153 } 154 155 /* 156 * this is called when there is no special key. It's only called 157 * in the first fsort pass. 158 */ 159 int 160 makeline(flno, top, filelist, nfiles, recbuf, bufend, dummy2) 161 int flno, top; 162 struct filelist *filelist; 163 int nfiles; 164 RECHEADER *recbuf; 165 u_char *bufend; 166 struct field *dummy2; 167 { 168 static u_char *obufend; 169 static size_t osz; 170 char *pos; 171 static int filenum = 0, overflow = 0; 172 static FILE *fp = 0; 173 int c; 174 175 pos = (char *) recbuf->data; 176 if (overflow) { 177 /* 178 * Buffer shortage is solved by either of two ways: 179 * o flush previous buffered data and start using the 180 * buffer from start (see fsort()) 181 * o realloc buffer and bump bufend 182 * 183 * The former is preferred, realloc is only done when 184 * there is exactly one item in buffer which does not fit. 185 */ 186 if (bufend == obufend) 187 memmove(pos, bufend - osz, osz); 188 189 pos += osz; 190 overflow = 0; 191 } 192 for (;;) { 193 if (flno >= 0 && (fp = fstack[flno].fp) == NULL) 194 return (EOF); 195 else if (fp == NULL) { 196 if (filenum >= nfiles) 197 return (EOF); 198 if (!(fp = fopen(filelist->names[filenum], "r"))) 199 err(2, "%s", filelist->names[filenum]); 200 filenum++; 201 } 202 while ((pos < (char *)bufend) && ((c = getc(fp)) != EOF)) { 203 if ((*pos++ = c) == REC_D) { 204 recbuf->offset = 0; 205 recbuf->length = pos - (char *) recbuf->data; 206 return (0); 207 } 208 } 209 if (pos >= (char *)bufend) { 210 if (recbuf->data < bufend) { 211 overflow = 1; 212 obufend = bufend; 213 osz = (pos - (char *) recbuf->data); 214 } 215 return (BUFFEND); 216 } else if (c == EOF) { 217 if (recbuf->data != (u_char *) pos) { 218 *pos++ = REC_D; 219 recbuf->offset = 0; 220 recbuf->length = pos - (char *) recbuf->data; 221 return (0); 222 } 223 FCLOSE(fp); 224 fp = 0; 225 if (flno >= 0) 226 fstack[flno].fp = 0; 227 } else { 228 229 warnx("makeline: line too long: ignoring '%.100s...'", recbuf->data); 230 231 /* Consume the rest of line from input */ 232 while ((c = getc(fp)) != REC_D && c != EOF) 233 ; 234 235 recbuf->offset = 0; 236 recbuf->length = 0; 237 238 return (BUFFEND); 239 } 240 } 241 } 242 243 /* 244 * This generates keys. It's only called in the first fsort pass 245 */ 246 int 247 makekey(flno, top, filelist, nfiles, recbuf, bufend, ftbl) 248 int flno, top; 249 struct filelist *filelist; 250 int nfiles; 251 RECHEADER *recbuf; 252 u_char *bufend; 253 struct field *ftbl; 254 { 255 static int filenum = 0; 256 static FILE *dbdesc = 0; 257 static DBT dbkey[1], line[1]; 258 static int overflow = 0; 259 int c; 260 261 if (overflow) { 262 overflow = enterkey(recbuf, line, bufend - (u_char *)recbuf, 263 ftbl); 264 if (overflow) 265 return (BUFFEND); 266 else 267 return (0); 268 } 269 270 for (;;) { 271 if (flno >= 0) { 272 if (!(dbdesc = fstack[flno].fp)) 273 return (EOF); 274 } else if (!dbdesc) { 275 if (filenum >= nfiles) 276 return (EOF); 277 dbdesc = fopen(filelist->names[filenum], "r"); 278 if (!dbdesc) 279 err(2, "%s", filelist->names[filenum]); 280 filenum++; 281 } 282 if (!(c = seq(dbdesc, line, dbkey))) { 283 if ((signed)line->size > bufend - recbuf->data) { 284 overflow = 1; 285 } else { 286 overflow = enterkey(recbuf, line, 287 bufend - (u_char *) recbuf, ftbl); 288 } 289 if (overflow) 290 return (BUFFEND); 291 else 292 return (0); 293 } 294 if (c == EOF) { 295 FCLOSE(dbdesc); 296 dbdesc = 0; 297 if (flno >= 0) 298 fstack[flno].fp = 0; 299 } else { 300 ((char *) line->data)[60] = '\000'; 301 warnx("makekey: line too long: ignoring %.100s...", 302 (char *)line->data); 303 } 304 } 305 } 306 307 /* 308 * get a key/line pair from fp 309 */ 310 static int 311 seq(fp, line, key) 312 FILE *fp; 313 DBT *key, *line; 314 { 315 static char *buf, flag = 1; 316 char *end, *pos; 317 int c; 318 u_char *nlinebuf; 319 320 if (flag) { 321 /* one-time initialization */ 322 flag = 0; 323 buf = (char *) linebuf; 324 line->data = buf; 325 } 326 end = buf + linebuf_size; 327 pos = buf; 328 while ((c = getc(fp)) != EOF) { 329 if ((*pos++ = c) == REC_D) { 330 line->size = pos - buf; 331 return (0); 332 } 333 if (pos == end) { 334 nlinebuf = realloc(linebuf, linebuf_size * 2); 335 if (!nlinebuf) 336 err(2, "realloc of linebuf to %lu bytes failed", 337 (unsigned long)linebuf_size * 2); 338 linebuf = nlinebuf; 339 linebuf_size *= 2; 340 341 end = linebuf + linebuf_size; 342 pos = linebuf + (pos - buf); 343 line->data = buf = (char *)linebuf; 344 continue; 345 } 346 } 347 if (pos != buf) { 348 *pos++ = REC_D; 349 line->size = pos - buf; 350 return (0); 351 } else 352 return (EOF); 353 } 354 355 /* 356 * write a key/line pair to a temporary file 357 */ 358 void 359 putrec(rec, fp) 360 const RECHEADER *rec; 361 FILE *fp; 362 { 363 EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fp); 364 } 365 366 /* 367 * write a line to output 368 */ 369 void 370 putline(rec, fp) 371 const RECHEADER *rec; 372 FILE *fp; 373 { 374 EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp); 375 } 376 377 /* 378 * get a record from a temporary file. (Used by merge sort.) 379 */ 380 int 381 geteasy(flno, top, filelist, nfiles, rec, end, dummy2) 382 int flno, top; 383 struct filelist *filelist; 384 int nfiles; 385 RECHEADER *rec; 386 u_char *end; 387 struct field *dummy2; 388 { 389 int i; 390 FILE *fp; 391 392 fp = fstack[flno].fp; 393 if ((u_char *) rec > end - sizeof(TRECHEADER)) 394 return (BUFFEND); 395 if (!fread(rec, 1, sizeof(TRECHEADER), fp)) { 396 fclose(fp); 397 fstack[flno].fp = 0; 398 return (EOF); 399 } 400 if (end - rec->data < rec->length) { 401 for (i = sizeof(TRECHEADER) - 1; i >= 0; i--) 402 ungetc(*((char *) rec + i), fp); 403 return (BUFFEND); 404 } 405 fread(rec->data, rec->length, 1, fp); 406 return (0); 407 } 408