1 /* $NetBSD: files.c,v 1.6 2000/10/17 15:22:57 jdolecek Exp $ */ 2 3 /*- 4 * Copyright (c) 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Peter McIlroy. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 #include "sort.h" 40 #include "fsort.h" 41 42 #ifndef lint 43 __RCSID("$NetBSD: files.c,v 1.6 2000/10/17 15:22:57 jdolecek Exp $"); 44 __SCCSID("@(#)files.c 8.1 (Berkeley) 6/6/93"); 45 #endif /* not lint */ 46 47 #include <string.h> 48 49 static int seq __P((FILE *, DBT *, DBT *)); 50 51 /* 52 * this is the subroutine for file management for fsort(). 53 * It keeps the buffers for all temporary files. 54 */ 55 int 56 getnext(binno, infl0, nfiles, pos, end, dummy) 57 int binno, nfiles; 58 union f_handle infl0; 59 struct recheader *pos; 60 u_char *end; 61 struct field *dummy; 62 { 63 int i; 64 u_char *hp; 65 static size_t nleft = 0; 66 static int cnt = 0, flag = -1; 67 static u_char maxb = 0; 68 static FILE *fp; 69 70 if (nleft == 0) { 71 if (binno < 0) /* reset files. */ { 72 for (i = 0; i < nfiles; i++) { 73 rewind(fstack[infl0.top + i].fp); 74 fstack[infl0.top + i].max_o = 0; 75 } 76 flag = -1; 77 nleft = cnt = 0; 78 return(-1); 79 } 80 maxb = fstack[infl0.top].maxb; 81 for (; nleft == 0; cnt++) { 82 if (cnt >= nfiles) { 83 cnt = 0; 84 return (EOF); 85 } 86 fp = fstack[infl0.top + cnt].fp; 87 fread(&nleft, sizeof(nleft), 1, fp); 88 if (binno < maxb) 89 fstack[infl0.top+cnt].max_o 90 += sizeof(nleft) + nleft; 91 else if (binno == maxb) { 92 if (binno != fstack[infl0.top].lastb) { 93 fseek(fp, fstack[infl0.top+ 94 cnt].max_o, SEEK_SET); 95 fread(&nleft, sizeof(nleft), 1, fp); 96 } 97 if (nleft == 0) 98 fclose(fp); 99 } else if (binno == maxb + 1) { /* skip a bin */ 100 fseek(fp, nleft, SEEK_CUR); 101 fread(&nleft, sizeof(nleft), 1, fp); 102 flag = cnt; 103 } 104 } 105 } 106 if ((u_char *) pos > end - sizeof(TRECHEADER)) 107 return (BUFFEND); 108 fread(pos, sizeof(TRECHEADER), 1, fp); 109 if (end - pos->data < pos->length) { 110 hp = ((u_char *) pos) + sizeof(TRECHEADER); 111 for (i = sizeof(TRECHEADER); i ; i--) 112 ungetc(*--hp, fp); 113 return (BUFFEND); 114 } 115 fread(pos->data, pos->length, 1, fp); 116 nleft -= pos->length + sizeof(TRECHEADER); 117 if (nleft == 0 && binno == fstack[infl0.top].maxb) 118 fclose(fp); 119 return (0); 120 } 121 122 /* 123 * this is called when there is no special key. It's only called 124 * in the first fsort pass. 125 */ 126 int 127 makeline(flno, filelist, nfiles, buffer, bufend, dummy2) 128 int flno, nfiles; 129 union f_handle filelist; 130 struct recheader *buffer; 131 u_char *bufend; 132 struct field *dummy2; 133 { 134 static size_t olen; 135 char *pos; 136 static int fileno = 0, overflow = 0; 137 static FILE *fp = 0; 138 int c; 139 140 pos = (char *) buffer->data; 141 if (overflow) { 142 pos += olen; 143 overflow = 0; 144 } 145 for (;;) { 146 if (flno >= 0) { 147 if (!(fp = fstack[flno].fp)) 148 return (EOF); 149 } else if (!fp) { 150 if (fileno >= nfiles) return(EOF); 151 if (!(fp = fopen(filelist.names[fileno], "r"))) 152 err(2, "%s", filelist.names[fileno]); 153 ++fileno; 154 } 155 while ((pos < (char *)bufend) && ((c = getc(fp)) != EOF)) { 156 if ((*pos++ = c) == REC_D) { 157 buffer->offset = 0; 158 buffer->length = pos - (char *) buffer->data; 159 return (0); 160 } 161 } 162 if (pos >= (char *)bufend) { 163 if (buffer->data < bufend) { 164 overflow = 1; 165 olen = (size_t) (((char *) bufend) - 166 ((char *) buffer->data)); 167 } 168 return (BUFFEND); 169 } else if (c == EOF) { 170 if (buffer->data != (u_char *) pos) { 171 warnx("last character not record delimiter"); 172 *pos++ = REC_D; 173 buffer->offset = 0; 174 buffer->length = pos - (char *) buffer->data; 175 return(0); 176 } 177 FCLOSE(fp); 178 fp = 0; 179 if(flno >= 0) fstack[flno].fp = 0; 180 } else { 181 182 warnx("makeline: line too long: ignoring '%.100s...'", buffer->data); 183 184 /* consume rest of line from input */ 185 while((c = getc(fp)) != REC_D && c != EOF); 186 187 buffer->offset = 0; 188 buffer->length = 0; 189 return BUFFEND; 190 } 191 } 192 } 193 194 /* 195 * This generates keys. It's only called in the first fsort pass 196 */ 197 int 198 makekey(flno, filelist, nfiles, buffer, bufend, ftbl) 199 int flno, nfiles; 200 union f_handle filelist; 201 struct recheader *buffer; 202 u_char *bufend; 203 struct field *ftbl; 204 { 205 static int fileno = 0; 206 static FILE *dbdesc = 0; 207 static DBT dbkey[1], line[1]; 208 static int overflow = 0; 209 int c; 210 if (overflow) { 211 overflow = 212 enterkey(buffer, line, bufend - (u_char *) buffer, ftbl); 213 if (overflow) 214 return (BUFFEND); 215 else 216 return (0); 217 } 218 for (;;) { 219 if (flno >= 0) { 220 if (!(dbdesc = fstack[flno].fp)) 221 return(EOF); 222 } else if (!dbdesc) { 223 if (fileno >= nfiles) 224 return (EOF); 225 dbdesc = fopen(filelist.names[fileno], "r"); 226 if (!dbdesc) 227 err(2, "%s", filelist.names[fileno]); 228 ++fileno; 229 } 230 if (!(c = seq(dbdesc, line, dbkey))) { 231 if ((signed)line->size > bufend - buffer->data) { 232 overflow = 1; 233 } else { 234 overflow = enterkey(buffer, line, 235 bufend - (u_char *) buffer, ftbl); 236 } 237 if (overflow) 238 return (BUFFEND); 239 else 240 return (0); 241 } 242 if (c == EOF) { 243 FCLOSE(dbdesc); 244 dbdesc = 0; 245 if (flno >= 0) fstack[flno].fp = 0; 246 } else { 247 ((char *) line->data)[60] = '\000'; 248 warnx("makekey: line too long: ignoring %.100s...", 249 (char *)line->data); 250 } 251 252 } 253 } 254 255 /* 256 * get a key/line pair from fp 257 */ 258 static int 259 seq(fp, line, key) 260 FILE *fp; 261 DBT *key, *line; 262 { 263 static char *buf, flag = 1; 264 char *end, *pos; 265 int c; 266 if (flag) { 267 flag = 0; 268 buf = (char *) linebuf; 269 end = buf + linebuf_size; 270 line->data = buf; 271 } 272 pos = buf; 273 while ((c = getc(fp)) != EOF) { 274 if ((*pos++ = c) == REC_D) { 275 line->size = pos - buf; 276 return (0); 277 } 278 if (pos == end) { 279 linebuf_size *= 2; 280 linebuf = realloc(linebuf, linebuf_size); 281 if (!linebuf) 282 err(2, "realloc for linebuf to %lu bytes failed", (unsigned long) linebuf_size); 283 284 end = linebuf + linebuf_size; 285 pos = linebuf + (pos - buf); 286 line->data = buf = (char *)linebuf; 287 continue; 288 #if 0 289 line->size = DEFLLEN; 290 *--pos = REC_D; 291 while ((c = getc(fp)) != EOF) { 292 if (c == REC_D) 293 return (BUFFEND); 294 } 295 break; 296 #endif 297 } 298 } 299 if (pos != buf) { 300 warnx("last character not record delimiter"); 301 *pos++ = REC_D; 302 line->size = pos - buf; 303 return (0); 304 } else 305 return (EOF); 306 } 307 308 /* 309 * write a key/line pair to a temporary file 310 */ 311 void 312 putrec(rec, fp) 313 const struct recheader *rec; 314 FILE *fp; 315 { 316 EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fp); 317 } 318 319 /* 320 * write a line to output 321 */ 322 void 323 putline(rec, fp) 324 const struct recheader *rec; 325 FILE *fp; 326 { 327 EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp); 328 } 329 330 /* 331 * get a record from a temporary file. (Used by merge sort.) 332 */ 333 int 334 geteasy(flno, filelist, nfiles, rec, end, dummy2) 335 int flno, nfiles; 336 union f_handle filelist; 337 struct recheader *rec; 338 u_char *end; 339 struct field *dummy2; 340 { 341 int i; 342 FILE *fp; 343 fp = fstack[flno].fp; 344 if ((u_char *) rec > end - sizeof(TRECHEADER)) 345 return (BUFFEND); 346 if (!fread(rec, 1, sizeof(TRECHEADER), fp)) { 347 fclose(fp); 348 fstack[flno].fp = 0; 349 return (EOF); 350 } 351 if (end - rec->data < rec->length) { 352 for (i = sizeof(TRECHEADER) - 1; i >= 0; i--) 353 ungetc(*((char *) rec + i), fp); 354 return (BUFFEND); 355 } 356 fread(rec->data, rec->length, 1, fp); 357 return (0); 358 } 359