1 /* $NetBSD: files.c,v 1.40 2009/10/07 21:03:29 dsl Exp $ */ 2 3 /*- 4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Ben Harris and Jaromir Dolecek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * Peter McIlroy. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 */ 63 64 #include "sort.h" 65 #include "fsort.h" 66 67 #ifndef lint 68 __RCSID("$NetBSD: files.c,v 1.40 2009/10/07 21:03:29 dsl Exp $"); 69 __SCCSID("@(#)files.c 8.1 (Berkeley) 6/6/93"); 70 #endif /* not lint */ 71 72 #include <string.h> 73 74 /* Align records in temporary files to avoid misaligned copies */ 75 #define REC_ROUNDUP(n) (((n) + sizeof (long) - 1) & ~(sizeof (long) - 1)) 76 77 static ssize_t seq(FILE *, u_char **); 78 79 /* 80 * this is called when there is no special key. It's only called 81 * in the first fsort pass. 82 */ 83 84 static u_char *opos; 85 static size_t osz; 86 87 void 88 makeline_copydown(RECHEADER *recbuf) 89 { 90 memmove(recbuf->data, opos, osz); 91 } 92 93 int 94 makeline(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *dummy2) 95 { 96 u_char *pos; 97 int c; 98 99 pos = recbuf->data; 100 if (osz != 0) { 101 /* 102 * Buffer shortage is solved by either of two ways: 103 * o flush previous buffered data and start using the 104 * buffer from start. 105 * makeline_copydown() above must be called. 106 * o realloc buffer 107 * 108 * This code has relied on realloc changing 'bufend', 109 * but that isn't necessarily true. 110 */ 111 pos += osz; 112 osz = 0; 113 } 114 115 while (pos < bufend) { 116 c = getc(fp); 117 if (c == EOF) { 118 if (pos == recbuf->data) { 119 FCLOSE(fp); 120 return EOF; 121 } 122 /* Add terminator to partial line */ 123 c = REC_D; 124 } 125 *pos++ = c; 126 if (c == REC_D) { 127 recbuf->offset = 0; 128 recbuf->length = pos - recbuf->data; 129 recbuf->keylen = recbuf->length - 1; 130 return (0); 131 } 132 } 133 134 /* Ran out of buffer space... */ 135 if (recbuf->data < bufend) { 136 /* Remember where the partial record is */ 137 osz = pos - recbuf->data; 138 opos = recbuf->data; 139 } 140 return (BUFFEND); 141 } 142 143 /* 144 * This generates keys. It's only called in the first fsort pass 145 */ 146 int 147 makekey(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *ftbl) 148 { 149 static u_char *line_data; 150 static ssize_t line_size; 151 static int overflow = 0; 152 153 /* We get re-entered after returning BUFFEND - save old data */ 154 if (overflow) { 155 overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl); 156 return overflow ? BUFFEND : 0; 157 } 158 159 line_size = seq(fp, &line_data); 160 if (line_size == 0) { 161 FCLOSE(fp); 162 return EOF; 163 } 164 165 if (line_size > bufend - recbuf->data) { 166 overflow = 1; 167 } else { 168 overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl); 169 } 170 return overflow ? BUFFEND : 0; 171 } 172 173 /* 174 * get a line of input from fp 175 */ 176 static ssize_t 177 seq(FILE *fp, u_char **line) 178 { 179 static u_char *buf; 180 static size_t buf_size = DEFLLEN; 181 u_char *end, *pos; 182 int c; 183 u_char *new_buf; 184 185 if (!buf) { 186 /* one-time initialization */ 187 buf = malloc(buf_size); 188 if (!buf) 189 err(2, "malloc of linebuf for %zu bytes failed", 190 buf_size); 191 } 192 193 end = buf + buf_size; 194 pos = buf; 195 while ((c = getc(fp)) != EOF) { 196 *pos++ = c; 197 if (c == REC_D) { 198 *line = buf; 199 return pos - buf; 200 } 201 if (pos == end) { 202 /* Long line - double size of buffer */ 203 /* XXX: Check here for stupidly long lines */ 204 buf_size *= 2; 205 new_buf = realloc(buf, buf_size); 206 if (!new_buf) 207 err(2, "realloc of linebuf to %zu bytes failed", 208 buf_size); 209 210 end = new_buf + buf_size; 211 pos = new_buf + (pos - buf); 212 buf = new_buf; 213 } 214 } 215 216 if (pos != buf) { 217 /* EOF part way through line - add line terminator */ 218 *pos++ = REC_D; 219 *line = buf; 220 return pos - buf; 221 } 222 223 return 0; 224 } 225 226 /* 227 * write a key/line pair to a temporary file 228 */ 229 void 230 putrec(const RECHEADER *rec, FILE *fp) 231 { 232 EWRITE(rec, 1, REC_ROUNDUP(offsetof(RECHEADER, data) + rec->length), fp); 233 } 234 235 /* 236 * write a line to output 237 */ 238 void 239 putline(const RECHEADER *rec, FILE *fp) 240 { 241 EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp); 242 } 243 244 /* 245 * write dump of key to output (for -Dk) 246 */ 247 void 248 putkeydump(const RECHEADER *rec, FILE *fp) 249 { 250 EWRITE(rec, 1, REC_ROUNDUP(offsetof(RECHEADER, data) + rec->offset), fp); 251 } 252 253 /* 254 * get a record from a temporary file. (Used by merge sort.) 255 */ 256 int 257 geteasy(FILE *fp, RECHEADER *rec, u_char *end, struct field *dummy2) 258 { 259 length_t file_len; 260 int i; 261 262 (void)sizeof (char[offsetof(RECHEADER, length) == 0 ? 1 : -1]); 263 264 if ((u_char *)(rec + 1) > end) 265 return (BUFFEND); 266 if (!fread(&rec->length, 1, sizeof rec->length, fp)) { 267 fclose(fp); 268 return (EOF); 269 } 270 file_len = REC_ROUNDUP(offsetof(RECHEADER, data) + rec->length); 271 if (end - rec->data < (ptrdiff_t)file_len) { 272 for (i = sizeof rec->length - 1; i >= 0; i--) 273 ungetc(*((char *) rec + i), fp); 274 return (BUFFEND); 275 } 276 277 fread(&rec->length + 1, file_len - sizeof rec->length, 1, fp); 278 return (0); 279 } 280