1 /* $NetBSD: files.c,v 1.42 2015/08/05 07:10:03 mrg Exp $ */ 2 3 /*- 4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Ben Harris and Jaromir Dolecek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * Peter McIlroy. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 */ 63 64 #include "sort.h" 65 #include "fsort.h" 66 67 __RCSID("$NetBSD: files.c,v 1.42 2015/08/05 07:10:03 mrg Exp $"); 68 69 #include <string.h> 70 71 /* Align records in temporary files to avoid misaligned copies */ 72 #define REC_ROUNDUP(n) (((n) + sizeof (long) - 1) & ~(sizeof (long) - 1)) 73 74 static ssize_t seq(FILE *, u_char **); 75 76 /* 77 * this is called when there is no special key. It's only called 78 * in the first fsort pass. 79 */ 80 81 static u_char *opos; 82 static size_t osz; 83 84 void 85 makeline_copydown(RECHEADER *recbuf) 86 { 87 memmove(recbuf->data, opos, osz); 88 } 89 90 int 91 makeline(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *dummy2) 92 { 93 u_char *pos; 94 int c; 95 96 pos = recbuf->data; 97 if (osz != 0) { 98 /* 99 * Buffer shortage is solved by either of two ways: 100 * o flush previous buffered data and start using the 101 * buffer from start. 102 * makeline_copydown() above must be called. 103 * o realloc buffer 104 * 105 * This code has relied on realloc changing 'bufend', 106 * but that isn't necessarily true. 107 */ 108 pos += osz; 109 osz = 0; 110 } 111 112 while (pos < bufend) { 113 c = getc(fp); 114 if (c == EOF) { 115 if (pos == recbuf->data) { 116 FCLOSE(fp); 117 return EOF; 118 } 119 /* Add terminator to partial line */ 120 c = REC_D; 121 } 122 *pos++ = c; 123 if (c == REC_D) { 124 recbuf->offset = 0; 125 recbuf->length = pos - recbuf->data; 126 recbuf->keylen = recbuf->length - 1; 127 return (0); 128 } 129 } 130 131 /* Ran out of buffer space... */ 132 if (recbuf->data < bufend) { 133 /* Remember where the partial record is */ 134 osz = pos - recbuf->data; 135 opos = recbuf->data; 136 } 137 return (BUFFEND); 138 } 139 140 /* 141 * This generates keys. It's only called in the first fsort pass 142 */ 143 int 144 makekey(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *ftbl) 145 { 146 static u_char *line_data; 147 static ssize_t line_size; 148 static int overflow = 0; 149 150 /* We get re-entered after returning BUFFEND - save old data */ 151 if (overflow) { 152 overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl); 153 return overflow ? BUFFEND : 0; 154 } 155 156 line_size = seq(fp, &line_data); 157 if (line_size == 0) { 158 FCLOSE(fp); 159 return EOF; 160 } 161 162 if (line_size > bufend - recbuf->data) { 163 overflow = 1; 164 } else { 165 overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl); 166 } 167 return overflow ? BUFFEND : 0; 168 } 169 170 /* 171 * get a line of input from fp 172 */ 173 static ssize_t 174 seq(FILE *fp, u_char **line) 175 { 176 static u_char *buf; 177 static size_t buf_size = DEFLLEN; 178 u_char *end, *pos; 179 int c; 180 u_char *new_buf; 181 182 if (!buf) { 183 /* one-time initialization */ 184 buf = malloc(buf_size); 185 if (!buf) 186 err(2, "malloc of linebuf for %zu bytes failed", 187 buf_size); 188 } 189 190 end = buf + buf_size; 191 pos = buf; 192 while ((c = getc(fp)) != EOF) { 193 *pos++ = c; 194 if (c == REC_D) { 195 *line = buf; 196 return pos - buf; 197 } 198 if (pos == end) { 199 /* Long line - double size of buffer */ 200 /* XXX: Check here for stupidly long lines */ 201 buf_size *= 2; 202 new_buf = realloc(buf, buf_size); 203 if (!new_buf) 204 err(2, "realloc of linebuf to %zu bytes failed", 205 buf_size); 206 207 end = new_buf + buf_size; 208 pos = new_buf + (pos - buf); 209 buf = new_buf; 210 } 211 } 212 213 if (pos != buf) { 214 /* EOF part way through line - add line terminator */ 215 *pos++ = REC_D; 216 *line = buf; 217 return pos - buf; 218 } 219 220 return 0; 221 } 222 223 /* 224 * write a key/line pair to a temporary file 225 */ 226 void 227 putrec(const RECHEADER *rec, FILE *fp) 228 { 229 EWRITE(rec, 1, REC_ROUNDUP(offsetof(RECHEADER, data) + rec->length), fp, 230 "failed to write temp file"); 231 } 232 233 /* 234 * write a line to output 235 */ 236 void 237 putline(const RECHEADER *rec, FILE *fp) 238 { 239 EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp, 240 "failed to write"); 241 } 242 243 /* 244 * write dump of key to output (for -Dk) 245 */ 246 void 247 putkeydump(const RECHEADER *rec, FILE *fp) 248 { 249 EWRITE(rec, 1, REC_ROUNDUP(offsetof(RECHEADER, data) + rec->offset), fp, 250 "failed to write debug key"); 251 } 252 253 /* 254 * get a record from a temporary file. (Used by merge sort.) 255 */ 256 int 257 geteasy(FILE *fp, RECHEADER *rec, u_char *end, struct field *dummy2) 258 { 259 length_t file_len; 260 int i; 261 262 (void)sizeof (char[offsetof(RECHEADER, length) == 0 ? 1 : -1]); 263 264 if ((u_char *)(rec + 1) > end) 265 return (BUFFEND); 266 if (!fread(&rec->length, 1, sizeof rec->length, fp)) { 267 fclose(fp); 268 return (EOF); 269 } 270 file_len = REC_ROUNDUP(offsetof(RECHEADER, data) + rec->length); 271 if (end - rec->data < (ptrdiff_t)file_len) { 272 for (i = sizeof rec->length - 1; i >= 0; i--) 273 ungetc(*((char *) rec + i), fp); 274 return (BUFFEND); 275 } 276 277 fread(&rec->length + 1, file_len - sizeof rec->length, 1, fp); 278 return (0); 279 } 280