1 /* $NetBSD: files.c,v 1.19 2003/08/07 11:15:53 agc Exp $ */ 2 3 /*- 4 * Copyright (c) 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Peter McIlroy. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include "sort.h" 36 #include "fsort.h" 37 38 #ifndef lint 39 __RCSID("$NetBSD: files.c,v 1.19 2003/08/07 11:15:53 agc Exp $"); 40 __SCCSID("@(#)files.c 8.1 (Berkeley) 6/6/93"); 41 #endif /* not lint */ 42 43 #include <string.h> 44 45 static int seq __P((FILE *, DBT *, DBT *)); 46 47 /* 48 * this is the subroutine for file management for fsort(). 49 * It keeps the buffers for all temporary files. 50 */ 51 int 52 getnext(binno, infl0, filelist, nfiles, pos, end, dummy) 53 int binno, infl0; 54 struct filelist *filelist; 55 int nfiles; 56 RECHEADER *pos; 57 u_char *end; 58 struct field *dummy; 59 { 60 int i; 61 u_char *hp; 62 static size_t nleft = 0; 63 static int cnt = 0, flag = -1; 64 static u_char maxb = 0; 65 static FILE *fp; 66 67 if (nleft == 0) { 68 if (binno < 0) /* reset files. */ { 69 for (i = 0; i < nfiles; i++) { 70 rewind(fstack[infl0 + i].fp); 71 fstack[infl0 + i].max_o = 0; 72 } 73 flag = -1; 74 nleft = cnt = 0; 75 return (-1); 76 } 77 maxb = fstack[infl0].maxb; 78 for (; nleft == 0; cnt++) { 79 if (cnt >= nfiles) { 80 cnt = 0; 81 return (EOF); 82 } 83 fp = fstack[infl0 + cnt].fp; 84 fread(&nleft, sizeof(nleft), 1, fp); 85 if (binno < maxb) 86 fstack[infl0+cnt].max_o 87 += sizeof(nleft) + nleft; 88 else if (binno == maxb) { 89 if (binno != fstack[infl0].lastb) { 90 fseeko(fp, fstack[infl0+ 91 cnt].max_o, SEEK_SET); 92 fread(&nleft, sizeof(nleft), 1, fp); 93 } 94 if (nleft == 0) 95 fclose(fp); 96 } else if (binno == maxb + 1) { /* skip a bin */ 97 fseek(fp, nleft, SEEK_CUR); 98 fread(&nleft, sizeof(nleft), 1, fp); 99 flag = cnt; 100 } 101 } 102 } 103 if ((u_char *) pos > end - sizeof(TRECHEADER)) 104 return (BUFFEND); 105 fread(pos, sizeof(TRECHEADER), 1, fp); 106 if (end - pos->data < pos->length) { 107 hp = ((u_char *)pos) + sizeof(TRECHEADER); 108 for (i = sizeof(TRECHEADER); i ; i--) 109 ungetc(*--hp, fp); 110 return (BUFFEND); 111 } 112 fread(pos->data, pos->length, 1, fp); 113 nleft -= pos->length + sizeof(TRECHEADER); 114 if (nleft == 0 && binno == fstack[infl0].maxb) 115 fclose(fp); 116 return (0); 117 } 118 119 /* 120 * this is called when there is no special key. It's only called 121 * in the first fsort pass. 122 */ 123 int 124 makeline(flno, top, filelist, nfiles, recbuf, bufend, dummy2) 125 int flno, top; 126 struct filelist *filelist; 127 int nfiles; 128 RECHEADER *recbuf; 129 u_char *bufend; 130 struct field *dummy2; 131 { 132 static u_char *obufend; 133 static size_t osz; 134 char *pos; 135 static int filenum = 0, overflow = 0; 136 static FILE *fp = 0; 137 int c; 138 139 pos = (char *) recbuf->data; 140 if (overflow) { 141 /* 142 * Buffer shortage is solved by either of two ways: 143 * o flush previous buffered data and start using the 144 * buffer from start (see fsort()) 145 * o realloc buffer and bump bufend 146 * 147 * The former is preferred, realloc is only done when 148 * there is exactly one item in buffer which does not fit. 149 */ 150 if (bufend == obufend) 151 memmove(pos, bufend - osz, osz); 152 153 pos += osz; 154 overflow = 0; 155 } 156 for (;;) { 157 if (flno >= 0 && (fp = fstack[flno].fp) == NULL) 158 return (EOF); 159 else if (fp == NULL) { 160 if (filenum >= nfiles) 161 return (EOF); 162 if (!(fp = fopen(filelist->names[filenum], "r"))) 163 err(2, "%s", filelist->names[filenum]); 164 filenum++; 165 } 166 while ((pos < (char *)bufend) && ((c = getc(fp)) != EOF)) { 167 if ((*pos++ = c) == REC_D) { 168 recbuf->offset = 0; 169 recbuf->length = pos - (char *) recbuf->data; 170 return (0); 171 } 172 } 173 if (pos >= (char *)bufend) { 174 if (recbuf->data < bufend) { 175 overflow = 1; 176 obufend = bufend; 177 osz = (pos - (char *) recbuf->data); 178 } 179 return (BUFFEND); 180 } else if (c == EOF) { 181 if (recbuf->data != (u_char *) pos) { 182 *pos++ = REC_D; 183 recbuf->offset = 0; 184 recbuf->length = pos - (char *) recbuf->data; 185 return (0); 186 } 187 FCLOSE(fp); 188 fp = 0; 189 if (flno >= 0) 190 fstack[flno].fp = 0; 191 } else { 192 193 warnx("makeline: line too long: ignoring '%.100s...'", recbuf->data); 194 195 /* Consume the rest of line from input */ 196 while((c = getc(fp)) != REC_D && c != EOF) 197 ; 198 199 recbuf->offset = 0; 200 recbuf->length = 0; 201 202 return (BUFFEND); 203 } 204 } 205 } 206 207 /* 208 * This generates keys. It's only called in the first fsort pass 209 */ 210 int 211 makekey(flno, top, filelist, nfiles, recbuf, bufend, ftbl) 212 int flno, top; 213 struct filelist *filelist; 214 int nfiles; 215 RECHEADER *recbuf; 216 u_char *bufend; 217 struct field *ftbl; 218 { 219 static int filenum = 0; 220 static FILE *dbdesc = 0; 221 static DBT dbkey[1], line[1]; 222 static int overflow = 0; 223 int c; 224 225 if (overflow) { 226 overflow = enterkey(recbuf, line, bufend - (u_char *)recbuf, 227 ftbl); 228 if (overflow) 229 return (BUFFEND); 230 else 231 return (0); 232 } 233 234 for (;;) { 235 if (flno >= 0) { 236 if (!(dbdesc = fstack[flno].fp)) 237 return (EOF); 238 } else if (!dbdesc) { 239 if (filenum >= nfiles) 240 return (EOF); 241 dbdesc = fopen(filelist->names[filenum], "r"); 242 if (!dbdesc) 243 err(2, "%s", filelist->names[filenum]); 244 filenum++; 245 } 246 if (!(c = seq(dbdesc, line, dbkey))) { 247 if ((signed)line->size > bufend - recbuf->data) { 248 overflow = 1; 249 } else { 250 overflow = enterkey(recbuf, line, 251 bufend - (u_char *) recbuf, ftbl); 252 } 253 if (overflow) 254 return (BUFFEND); 255 else 256 return (0); 257 } 258 if (c == EOF) { 259 FCLOSE(dbdesc); 260 dbdesc = 0; 261 if (flno >= 0) 262 fstack[flno].fp = 0; 263 } else { 264 ((char *) line->data)[60] = '\000'; 265 warnx("makekey: line too long: ignoring %.100s...", 266 (char *)line->data); 267 } 268 } 269 } 270 271 /* 272 * get a key/line pair from fp 273 */ 274 static int 275 seq(fp, line, key) 276 FILE *fp; 277 DBT *key, *line; 278 { 279 static char *buf, flag = 1; 280 char *end, *pos; 281 int c; 282 283 if (flag) { 284 flag = 0; 285 buf = (char *) linebuf; 286 end = buf + linebuf_size; 287 line->data = buf; 288 } 289 pos = buf; 290 while ((c = getc(fp)) != EOF) { 291 if ((*pos++ = c) == REC_D) { 292 line->size = pos - buf; 293 return (0); 294 } 295 if (pos == end) { 296 linebuf_size *= 2; 297 linebuf = realloc(linebuf, linebuf_size); 298 if (!linebuf) 299 err(2, "realloc of linebuf to %lu bytes failed", 300 (unsigned long)linebuf_size); 301 302 end = linebuf + linebuf_size; 303 pos = linebuf + (pos - buf); 304 line->data = buf = (char *)linebuf; 305 continue; 306 } 307 } 308 if (pos != buf) { 309 *pos++ = REC_D; 310 line->size = pos - buf; 311 return (0); 312 } else 313 return (EOF); 314 } 315 316 /* 317 * write a key/line pair to a temporary file 318 */ 319 void 320 putrec(rec, fp) 321 const RECHEADER *rec; 322 FILE *fp; 323 { 324 EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fp); 325 } 326 327 /* 328 * write a line to output 329 */ 330 void 331 putline(rec, fp) 332 const RECHEADER *rec; 333 FILE *fp; 334 { 335 EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp); 336 } 337 338 /* 339 * get a record from a temporary file. (Used by merge sort.) 340 */ 341 int 342 geteasy(flno, top, filelist, nfiles, rec, end, dummy2) 343 int flno, top; 344 struct filelist *filelist; 345 int nfiles; 346 RECHEADER *rec; 347 u_char *end; 348 struct field *dummy2; 349 { 350 int i; 351 FILE *fp; 352 353 fp = fstack[flno].fp; 354 if ((u_char *) rec > end - sizeof(TRECHEADER)) 355 return (BUFFEND); 356 if (!fread(rec, 1, sizeof(TRECHEADER), fp)) { 357 fclose(fp); 358 fstack[flno].fp = 0; 359 return (EOF); 360 } 361 if (end - rec->data < rec->length) { 362 for (i = sizeof(TRECHEADER) - 1; i >= 0; i--) 363 ungetc(*((char *) rec + i), fp); 364 return (BUFFEND); 365 } 366 fread(rec->data, rec->length, 1, fp); 367 return (0); 368 } 369