1 /* $NetBSD: files.c,v 1.13 2001/01/13 20:10:52 jdolecek Exp $ */ 2 3 /*- 4 * Copyright (c) 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Peter McIlroy. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 #include "sort.h" 40 #include "fsort.h" 41 42 #ifndef lint 43 __RCSID("$NetBSD: files.c,v 1.13 2001/01/13 20:10:52 jdolecek Exp $"); 44 __SCCSID("@(#)files.c 8.1 (Berkeley) 6/6/93"); 45 #endif /* not lint */ 46 47 #include <string.h> 48 49 static int seq __P((FILE *, DBT *, DBT *)); 50 51 /* 52 * this is the subroutine for file management for fsort(). 53 * It keeps the buffers for all temporary files. 54 */ 55 int 56 getnext(binno, infl0, filelist, nfiles, pos, end, dummy) 57 int binno, infl0; 58 struct filelist *filelist; 59 int nfiles; 60 struct recheader *pos; 61 u_char *end; 62 struct field *dummy; 63 { 64 int i; 65 u_char *hp; 66 static size_t nleft = 0; 67 static int cnt = 0, flag = -1; 68 static u_char maxb = 0; 69 static FILE *fp; 70 71 if (nleft == 0) { 72 if (binno < 0) /* reset files. */ { 73 for (i = 0; i < nfiles; i++) { 74 rewind(fstack[infl0 + i].fp); 75 fstack[infl0 + i].max_o = 0; 76 } 77 flag = -1; 78 nleft = cnt = 0; 79 return(-1); 80 } 81 maxb = fstack[infl0].maxb; 82 for (; nleft == 0; cnt++) { 83 if (cnt >= nfiles) { 84 cnt = 0; 85 return (EOF); 86 } 87 fp = fstack[infl0 + cnt].fp; 88 fread(&nleft, sizeof(nleft), 1, fp); 89 if (binno < maxb) 90 fstack[infl0+cnt].max_o 91 += sizeof(nleft) + nleft; 92 else if (binno == maxb) { 93 if (binno != fstack[infl0].lastb) { 94 fseek(fp, fstack[infl0+ 95 cnt].max_o, SEEK_SET); 96 fread(&nleft, sizeof(nleft), 1, fp); 97 } 98 if (nleft == 0) 99 fclose(fp); 100 } else if (binno == maxb + 1) { /* skip a bin */ 101 fseek(fp, nleft, SEEK_CUR); 102 fread(&nleft, sizeof(nleft), 1, fp); 103 flag = cnt; 104 } 105 } 106 } 107 if ((u_char *) pos > end - sizeof(TRECHEADER)) 108 return (BUFFEND); 109 fread(pos, sizeof(TRECHEADER), 1, fp); 110 if (end - pos->data < pos->length) { 111 hp = ((u_char *) pos) + sizeof(TRECHEADER); 112 for (i = sizeof(TRECHEADER); i ; i--) 113 ungetc(*--hp, fp); 114 return (BUFFEND); 115 } 116 fread(pos->data, pos->length, 1, fp); 117 nleft -= pos->length + sizeof(TRECHEADER); 118 if (nleft == 0 && binno == fstack[infl0].maxb) 119 fclose(fp); 120 return (0); 121 } 122 123 /* 124 * this is called when there is no special key. It's only called 125 * in the first fsort pass. 126 */ 127 int 128 makeline(flno, top, filelist, nfiles, buffer, bufend, dummy2) 129 int flno, top; 130 struct filelist *filelist; 131 int nfiles; 132 struct recheader *buffer; 133 u_char *bufend; 134 struct field *dummy2; 135 { 136 static char *opos; 137 char *pos; 138 static int fileno = 0, overflow = 0; 139 static FILE *fp = 0; 140 int c; 141 142 pos = (char *) buffer->data; 143 if (overflow) { 144 memmove(pos, opos, bufend - (u_char *) opos); 145 pos += ((char *)bufend - opos); 146 overflow = 0; 147 } 148 for (;;) { 149 if (flno >= 0) { 150 if (!(fp = fstack[flno].fp)) 151 return (EOF); 152 } else if (!fp) { 153 if (fileno >= nfiles) return(EOF); 154 if (!(fp = fopen(filelist->names[fileno], "r"))) 155 err(2, "%s", filelist->names[fileno]); 156 ++fileno; 157 } 158 while ((pos < (char *)bufend) && ((c = getc(fp)) != EOF)) { 159 if ((*pos++ = c) == REC_D) { 160 buffer->offset = 0; 161 buffer->length = pos - (char *) buffer->data; 162 return (0); 163 } 164 } 165 if (pos >= (char *)bufend) { 166 if (buffer->data < bufend) { 167 overflow = 1; 168 opos = (char *)buffer->data; 169 } 170 return (BUFFEND); 171 } else if (c == EOF) { 172 if (buffer->data != (u_char *) pos) { 173 *pos++ = REC_D; 174 buffer->offset = 0; 175 buffer->length = pos - (char *) buffer->data; 176 return(0); 177 } 178 FCLOSE(fp); 179 fp = 0; 180 if(flno >= 0) fstack[flno].fp = 0; 181 } else { 182 183 warnx("makeline: line too long: ignoring '%.100s...'", buffer->data); 184 185 /* consume rest of line from input */ 186 while((c = getc(fp)) != REC_D && c != EOF); 187 188 buffer->offset = 0; 189 buffer->length = 0; 190 return BUFFEND; 191 } 192 } 193 } 194 195 /* 196 * This generates keys. It's only called in the first fsort pass 197 */ 198 int 199 makekey(flno, top, filelist, nfiles, buffer, bufend, ftbl) 200 int flno, top; 201 struct filelist *filelist; 202 int nfiles; 203 struct recheader *buffer; 204 u_char *bufend; 205 struct field *ftbl; 206 { 207 static int fileno = 0; 208 static FILE *dbdesc = 0; 209 static DBT dbkey[1], line[1]; 210 static int overflow = 0; 211 int c; 212 if (overflow) { 213 overflow = enterkey(buffer, line, bufend - (u_char *) buffer, 214 ftbl); 215 if (overflow) 216 return (BUFFEND); 217 else 218 return (0); 219 } 220 for (;;) { 221 if (flno >= 0) { 222 if (!(dbdesc = fstack[flno].fp)) 223 return(EOF); 224 } else if (!dbdesc) { 225 if (fileno >= nfiles) 226 return (EOF); 227 dbdesc = fopen(filelist->names[fileno], "r"); 228 if (!dbdesc) 229 err(2, "%s", filelist->names[fileno]); 230 ++fileno; 231 } 232 if (!(c = seq(dbdesc, line, dbkey))) { 233 if ((signed)line->size > bufend - buffer->data) { 234 overflow = 1; 235 } else { 236 overflow = enterkey(buffer, line, 237 bufend - (u_char *) buffer, ftbl); 238 } 239 if (overflow) 240 return (BUFFEND); 241 else 242 return (0); 243 } 244 if (c == EOF) { 245 FCLOSE(dbdesc); 246 dbdesc = 0; 247 if (flno >= 0) fstack[flno].fp = 0; 248 } else { 249 ((char *) line->data)[60] = '\000'; 250 warnx("makekey: line too long: ignoring %.100s...", 251 (char *)line->data); 252 } 253 254 } 255 } 256 257 /* 258 * get a key/line pair from fp 259 */ 260 static int 261 seq(fp, line, key) 262 FILE *fp; 263 DBT *key, *line; 264 { 265 static char *buf, flag = 1; 266 char *end, *pos; 267 int c; 268 if (flag) { 269 flag = 0; 270 buf = (char *) linebuf; 271 end = buf + linebuf_size; 272 line->data = buf; 273 } 274 pos = buf; 275 while ((c = getc(fp)) != EOF) { 276 if ((*pos++ = c) == REC_D) { 277 line->size = pos - buf; 278 return (0); 279 } 280 if (pos == end) { 281 linebuf_size *= 2; 282 linebuf = realloc(linebuf, linebuf_size); 283 if (!linebuf) 284 err(2, "realloc for linebuf to %lu bytes failed", (unsigned long) linebuf_size); 285 286 end = linebuf + linebuf_size; 287 pos = linebuf + (pos - buf); 288 line->data = buf = (char *)linebuf; 289 continue; 290 } 291 } 292 if (pos != buf) { 293 *pos++ = REC_D; 294 line->size = pos - buf; 295 return (0); 296 } else 297 return (EOF); 298 } 299 300 /* 301 * write a key/line pair to a temporary file 302 */ 303 void 304 putrec(rec, fp) 305 const struct recheader *rec; 306 FILE *fp; 307 { 308 EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fp); 309 } 310 311 /* 312 * write a line to output 313 */ 314 void 315 putline(rec, fp) 316 const struct recheader *rec; 317 FILE *fp; 318 { 319 EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp); 320 } 321 322 /* 323 * get a record from a temporary file. (Used by merge sort.) 324 */ 325 int 326 geteasy(flno, top, filelist, nfiles, rec, end, dummy2) 327 int flno, top; 328 struct filelist *filelist; 329 int nfiles; 330 struct recheader *rec; 331 u_char *end; 332 struct field *dummy2; 333 { 334 int i; 335 FILE *fp; 336 fp = fstack[flno].fp; 337 if ((u_char *) rec > end - sizeof(TRECHEADER)) 338 return (BUFFEND); 339 if (!fread(rec, 1, sizeof(TRECHEADER), fp)) { 340 fclose(fp); 341 fstack[flno].fp = 0; 342 return (EOF); 343 } 344 if (end - rec->data < rec->length) { 345 for (i = sizeof(TRECHEADER) - 1; i >= 0; i--) 346 ungetc(*((char *) rec + i), fp); 347 return (BUFFEND); 348 } 349 fread(rec->data, rec->length, 1, fp); 350 return (0); 351 } 352 353 /* 354 * Return pointer to an open file stream, err out on failure. 355 */ 356 FILE * 357 ftmp(void) 358 { 359 FILE *fp = tmpfile(); 360 361 if (fp == NULL) 362 err(2, "tmpfile()"); 363 364 return fp; 365 } 366