1 /* $NetBSD: files.c,v 1.35 2009/08/22 10:53:28 dsl Exp $ */ 2 3 /*- 4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Ben Harris and Jaromir Dolecek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * Peter McIlroy. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 */ 63 64 #include "sort.h" 65 #include "fsort.h" 66 67 #ifndef lint 68 __RCSID("$NetBSD: files.c,v 1.35 2009/08/22 10:53:28 dsl Exp $"); 69 __SCCSID("@(#)files.c 8.1 (Berkeley) 6/6/93"); 70 #endif /* not lint */ 71 72 #include <string.h> 73 74 static ssize_t seq(FILE *, u_char **); 75 76 /* 77 * this is called when there is no special key. It's only called 78 * in the first fsort pass. 79 */ 80 int 81 makeline(int flno, int top, struct filelist *filelist, int nfiles, 82 RECHEADER *recbuf, u_char *bufend, struct field *dummy2) 83 { 84 static u_char *obufend; 85 static size_t osz; 86 u_char *pos; 87 static int filenum = 0, overflow = 0; 88 static FILE *fp = 0; 89 int c; 90 91 c = 0; /* XXXGCC -Wuninitialized [pmppc] */ 92 93 pos = recbuf->data; 94 if (overflow) { 95 /* 96 * Buffer shortage is solved by either of two ways: 97 * o flush previous buffered data and start using the 98 * buffer from start (see fsort()) 99 * o realloc buffer and bump bufend 100 * 101 * The former is preferred, realloc is only done when 102 * there is exactly one item in buffer which does not fit. 103 */ 104 if (bufend == obufend) 105 memmove(pos, bufend - osz, osz); 106 107 pos += osz; 108 overflow = 0; 109 } 110 111 for (;;) { 112 if (flno >= 0 && (fp = fstack[flno].fp) == NULL) 113 return (EOF); 114 else if (fp == NULL) { 115 if (filenum >= nfiles) 116 return (EOF); 117 if (!(fp = fopen(filelist->names[filenum], "r"))) 118 err(2, "%s", filelist->names[filenum]); 119 filenum++; 120 } 121 while ((pos < bufend) && ((c = getc(fp)) != EOF)) { 122 *pos++ = c; 123 if (c == REC_D) { 124 recbuf->offset = 0; 125 recbuf->length = pos - recbuf->data; 126 return (0); 127 } 128 } 129 if (pos >= bufend) { 130 if (recbuf->data < bufend) { 131 overflow = 1; 132 obufend = bufend; 133 osz = (pos - recbuf->data); 134 } 135 return (BUFFEND); 136 } else if (c == EOF) { 137 if (recbuf->data != pos) { 138 *pos++ = REC_D; 139 recbuf->offset = 0; 140 recbuf->length = pos - recbuf->data; 141 return (0); 142 } 143 FCLOSE(fp); 144 fp = 0; 145 if (flno >= 0) 146 fstack[flno].fp = 0; 147 } else { 148 149 warnx("makeline: line too long: ignoring '%.100s...'", recbuf->data); 150 151 /* Consume the rest of line from input */ 152 while ((c = getc(fp)) != REC_D && c != EOF) 153 ; 154 155 recbuf->offset = 0; 156 recbuf->length = 0; 157 158 return (BUFFEND); 159 } 160 } 161 } 162 163 /* 164 * This generates keys. It's only called in the first fsort pass 165 */ 166 int 167 makekey(int flno, int top, struct filelist *filelist, int nfiles, 168 RECHEADER *recbuf, u_char *bufend, struct field *ftbl) 169 { 170 static int filenum = 0; 171 static FILE *dbdesc = 0; 172 static u_char *line_data; 173 static ssize_t line_size; 174 static int overflow = 0; 175 176 /* We get re-entered after returning BUFFEND - save old data */ 177 if (overflow) { 178 overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl); 179 return overflow ? BUFFEND : 0; 180 } 181 182 /* Loop through files until we find a line of input */ 183 for (;;) { 184 if (flno >= 0) { 185 if (!(dbdesc = fstack[flno].fp)) 186 return (EOF); 187 } else if (!dbdesc) { 188 if (filenum >= nfiles) 189 return (EOF); 190 dbdesc = fopen(filelist->names[filenum], "r"); 191 if (!dbdesc) 192 err(2, "%s", filelist->names[filenum]); 193 filenum++; 194 } 195 line_size = seq(dbdesc, &line_data); 196 if (line_size != 0) 197 /* Got a line */ 198 break; 199 200 /* End of file ... */ 201 FCLOSE(dbdesc); 202 dbdesc = 0; 203 if (flno >= 0) 204 fstack[flno].fp = 0; 205 } 206 207 if (line_size > bufend - recbuf->data) { 208 overflow = 1; 209 } else { 210 overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl); 211 } 212 return overflow ? BUFFEND : 0; 213 } 214 215 /* 216 * get a line of input from fp 217 */ 218 static ssize_t 219 seq(FILE *fp, u_char **line) 220 { 221 static u_char *buf; 222 static size_t buf_size = DEFLLEN; 223 u_char *end, *pos; 224 int c; 225 u_char *new_buf; 226 227 if (!buf) { 228 /* one-time initialization */ 229 buf = malloc(buf_size); 230 if (!buf) 231 err(2, "malloc of linebuf for %zu bytes failed", 232 buf_size); 233 } 234 235 end = buf + buf_size; 236 pos = buf; 237 while ((c = getc(fp)) != EOF) { 238 *pos++ = c; 239 if (c == REC_D) { 240 *line = buf; 241 return pos - buf; 242 } 243 if (pos == end) { 244 /* Long line - double size of buffer */ 245 /* XXX: Check here for stupidly long lines */ 246 buf_size *= 2; 247 new_buf = realloc(buf, buf_size); 248 if (!new_buf) 249 err(2, "realloc of linebuf to %zu bytes failed", 250 buf_size); 251 252 end = new_buf + buf_size; 253 pos = new_buf + (pos - buf); 254 buf = new_buf; 255 } 256 } 257 258 if (pos != buf) { 259 /* EOF part way through line - add line terminator */ 260 *pos++ = REC_D; 261 *line = buf; 262 return pos - buf; 263 } 264 265 return 0; 266 } 267 268 /* 269 * write a key/line pair to a temporary file 270 */ 271 void 272 putrec(const RECHEADER *rec, FILE *fp) 273 { 274 EWRITE(rec, 1, rec->length + REC_DATA_OFFSET, fp); 275 } 276 277 /* 278 * write a line to output 279 */ 280 void 281 putline(const RECHEADER *rec, FILE *fp) 282 { 283 EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp); 284 } 285 286 /* 287 * write dump of key to output (for -Dk) 288 */ 289 void 290 putkeydump(const RECHEADER *rec, FILE *fp) 291 { 292 EWRITE(rec, 1, rec->offset + REC_DATA_OFFSET, fp); 293 } 294 295 /* 296 * get a record from a temporary file. (Used by merge sort.) 297 */ 298 int 299 geteasy(int flno, int top, struct filelist *filelist, int nfiles, 300 RECHEADER *rec, u_char *end, struct field *dummy2) 301 { 302 int i; 303 FILE *fp; 304 305 fp = fstack[flno].fp; 306 if ((u_char *) rec > end - REC_DATA_OFFSET) 307 return (BUFFEND); 308 if (!fread(rec, 1, REC_DATA_OFFSET, fp)) { 309 fclose(fp); 310 fstack[flno].fp = 0; 311 return (EOF); 312 } 313 if (end - rec->data < (ptrdiff_t)rec->length) { 314 for (i = REC_DATA_OFFSET - 1; i >= 0; i--) 315 ungetc(*((char *) rec + i), fp); 316 return (BUFFEND); 317 } 318 fread(rec->data, rec->length, 1, fp); 319 return (0); 320 } 321