1 /* $NetBSD: files.c,v 1.37 2009/09/10 22:02:40 dsl Exp $ */ 2 3 /*- 4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Ben Harris and Jaromir Dolecek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * Peter McIlroy. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 */ 63 64 #include "sort.h" 65 #include "fsort.h" 66 67 #ifndef lint 68 __RCSID("$NetBSD: files.c,v 1.37 2009/09/10 22:02:40 dsl Exp $"); 69 __SCCSID("@(#)files.c 8.1 (Berkeley) 6/6/93"); 70 #endif /* not lint */ 71 72 #include <string.h> 73 74 static ssize_t seq(FILE *, u_char **); 75 76 /* 77 * this is called when there is no special key. It's only called 78 * in the first fsort pass. 79 */ 80 int 81 makeline(int flno, int top, struct filelist *filelist, int nfiles, 82 RECHEADER *recbuf, u_char *bufend, struct field *dummy2) 83 { 84 static u_char *obufend; 85 static size_t osz; 86 u_char *pos; 87 static int filenum = 0, overflow = 0; 88 static FILE *fp = 0; 89 int c; 90 91 c = 0; /* XXXGCC -Wuninitialized [pmppc] */ 92 93 pos = recbuf->data; 94 if (overflow) { 95 /* 96 * Buffer shortage is solved by either of two ways: 97 * o flush previous buffered data and start using the 98 * buffer from start (see fsort()) 99 * o realloc buffer and bump bufend 100 * 101 * The former is preferred, realloc is only done when 102 * there is exactly one item in buffer which does not fit. 103 */ 104 if (bufend == obufend) 105 memmove(pos, bufend - osz, osz); 106 107 pos += osz; 108 overflow = 0; 109 } 110 111 for (;;) { 112 if (flno >= 0 && (fp = fstack[flno].fp) == NULL) 113 return (EOF); 114 else if (fp == NULL) { 115 if (filenum >= nfiles) 116 return (EOF); 117 if (!(fp = fopen(filelist->names[filenum], "r"))) 118 err(2, "%s", filelist->names[filenum]); 119 filenum++; 120 } 121 while ((pos < bufend) && ((c = getc(fp)) != EOF)) { 122 *pos++ = c; 123 if (c == REC_D) { 124 recbuf->offset = 0; 125 recbuf->length = pos - recbuf->data; 126 recbuf->keylen = recbuf->length - 1; 127 return (0); 128 } 129 } 130 if (pos >= bufend) { 131 if (recbuf->data < bufend) { 132 overflow = 1; 133 obufend = bufend; 134 osz = (pos - recbuf->data); 135 } 136 return (BUFFEND); 137 } else if (c == EOF) { 138 if (recbuf->data != pos) { 139 *pos++ = REC_D; 140 recbuf->offset = 0; 141 recbuf->length = pos - recbuf->data; 142 recbuf->keylen = recbuf->length - 1; 143 return (0); 144 } 145 FCLOSE(fp); 146 fp = 0; 147 if (flno >= 0) 148 fstack[flno].fp = 0; 149 } else { 150 151 warnx("makeline: line too long: ignoring '%.100s...'", recbuf->data); 152 153 /* Consume the rest of line from input */ 154 while ((c = getc(fp)) != REC_D && c != EOF) 155 ; 156 157 recbuf->offset = 0; 158 recbuf->length = 0; 159 recbuf->keylen = 0; 160 161 return (BUFFEND); 162 } 163 } 164 } 165 166 /* 167 * This generates keys. It's only called in the first fsort pass 168 */ 169 int 170 makekey(int flno, int top, struct filelist *filelist, int nfiles, 171 RECHEADER *recbuf, u_char *bufend, struct field *ftbl) 172 { 173 static int filenum = 0; 174 static FILE *dbdesc = 0; 175 static u_char *line_data; 176 static ssize_t line_size; 177 static int overflow = 0; 178 179 /* We get re-entered after returning BUFFEND - save old data */ 180 if (overflow) { 181 overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl); 182 return overflow ? BUFFEND : 0; 183 } 184 185 /* Loop through files until we find a line of input */ 186 for (;;) { 187 if (flno >= 0) { 188 if (!(dbdesc = fstack[flno].fp)) 189 return (EOF); 190 } else if (!dbdesc) { 191 if (filenum >= nfiles) 192 return (EOF); 193 dbdesc = fopen(filelist->names[filenum], "r"); 194 if (!dbdesc) 195 err(2, "%s", filelist->names[filenum]); 196 filenum++; 197 } 198 line_size = seq(dbdesc, &line_data); 199 if (line_size != 0) 200 /* Got a line */ 201 break; 202 203 /* End of file ... */ 204 FCLOSE(dbdesc); 205 dbdesc = 0; 206 if (flno >= 0) 207 fstack[flno].fp = 0; 208 } 209 210 if (line_size > bufend - recbuf->data) { 211 overflow = 1; 212 } else { 213 overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl); 214 } 215 return overflow ? BUFFEND : 0; 216 } 217 218 /* 219 * get a line of input from fp 220 */ 221 static ssize_t 222 seq(FILE *fp, u_char **line) 223 { 224 static u_char *buf; 225 static size_t buf_size = DEFLLEN; 226 u_char *end, *pos; 227 int c; 228 u_char *new_buf; 229 230 if (!buf) { 231 /* one-time initialization */ 232 buf = malloc(buf_size); 233 if (!buf) 234 err(2, "malloc of linebuf for %zu bytes failed", 235 buf_size); 236 } 237 238 end = buf + buf_size; 239 pos = buf; 240 while ((c = getc(fp)) != EOF) { 241 *pos++ = c; 242 if (c == REC_D) { 243 *line = buf; 244 return pos - buf; 245 } 246 if (pos == end) { 247 /* Long line - double size of buffer */ 248 /* XXX: Check here for stupidly long lines */ 249 buf_size *= 2; 250 new_buf = realloc(buf, buf_size); 251 if (!new_buf) 252 err(2, "realloc of linebuf to %zu bytes failed", 253 buf_size); 254 255 end = new_buf + buf_size; 256 pos = new_buf + (pos - buf); 257 buf = new_buf; 258 } 259 } 260 261 if (pos != buf) { 262 /* EOF part way through line - add line terminator */ 263 *pos++ = REC_D; 264 *line = buf; 265 return pos - buf; 266 } 267 268 return 0; 269 } 270 271 /* 272 * write a key/line pair to a temporary file 273 */ 274 void 275 putrec(const RECHEADER *rec, FILE *fp) 276 { 277 EWRITE(rec, 1, offsetof(RECHEADER, data) + rec->length, fp); 278 } 279 280 /* 281 * write a line to output 282 */ 283 void 284 putline(const RECHEADER *rec, FILE *fp) 285 { 286 EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp); 287 } 288 289 /* 290 * write dump of key to output (for -Dk) 291 */ 292 void 293 putkeydump(const RECHEADER *rec, FILE *fp) 294 { 295 EWRITE(rec, 1, offsetof(RECHEADER, data) + rec->offset, fp); 296 } 297 298 /* 299 * get a record from a temporary file. (Used by merge sort.) 300 */ 301 int 302 geteasy(int flno, int top, struct filelist *filelist, int nfiles, 303 RECHEADER *rec, u_char *end, struct field *dummy2) 304 { 305 int i; 306 FILE *fp; 307 308 fp = fstack[flno].fp; 309 if ((u_char *)(rec + 1) > end) 310 return (BUFFEND); 311 if (!fread(rec, 1, offsetof(RECHEADER, data), fp)) { 312 fclose(fp); 313 fstack[flno].fp = 0; 314 return (EOF); 315 } 316 if (end - rec->data < (ptrdiff_t)rec->length) { 317 for (i = offsetof(RECHEADER, data) - 1; i >= 0; i--) 318 ungetc(*((char *) rec + i), fp); 319 return (BUFFEND); 320 } 321 fread(rec->data, rec->length, 1, fp); 322 return (0); 323 } 324