1 /* $NetBSD: fsort.c,v 1.45 2009/10/09 20:32:57 dsl Exp $ */ 2 3 /*- 4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Ben Harris and Jaromir Dolecek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * Peter McIlroy. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 */ 63 64 /* 65 * Read in a block of records (until 'enough'). 66 * sort, write to temp file. 67 * Merge sort temp files into output file 68 * Small files miss out the temp file stage. 69 * Large files might get multiple merges. 70 */ 71 #include "sort.h" 72 #include "fsort.h" 73 74 #ifndef lint 75 __RCSID("$NetBSD: fsort.c,v 1.45 2009/10/09 20:32:57 dsl Exp $"); 76 __SCCSID("@(#)fsort.c 8.1 (Berkeley) 6/6/93"); 77 #endif /* not lint */ 78 79 #include <stdlib.h> 80 #include <string.h> 81 82 #define SALIGN(n) ((n+sizeof(length_t)-1) & ~(sizeof(length_t)-1)) 83 84 void 85 fsort(struct filelist *filelist, int nfiles, FILE *outfp, struct field *ftbl) 86 { 87 RECHEADER **keylist; 88 RECHEADER **keypos, **keyp; 89 RECHEADER *buffer; 90 size_t bufsize = DEFBUFSIZE; 91 u_char *bufend; 92 int mfct = 0; 93 int c, nelem; 94 get_func_t get; 95 RECHEADER *crec; 96 RECHEADER *nbuffer; 97 FILE *fp, *tmp_fp; 98 int file_no; 99 int max_recs = DEBUG('m') ? 16 : MAXNUM; 100 101 buffer = malloc(bufsize); 102 bufend = (u_char *)buffer + bufsize; 103 /* Allocate double length keymap for radix_sort */ 104 keylist = malloc(2 * max_recs * sizeof(*keylist)); 105 if (buffer == NULL || keylist == NULL) 106 err(2, "failed to malloc initial buffer or keylist"); 107 108 if (SINGL_FLD) 109 /* Key and data are one! */ 110 get = makeline; 111 else 112 /* Key (merged key fields) added before data */ 113 get = makekey; 114 115 file_no = 0; 116 fp = fopen(filelist->names[0], "r"); 117 if (fp == NULL) 118 err(2, "%s", filelist->names[0]); 119 120 /* Loop through reads of chunk of input files that get sorted 121 * and then merged together. */ 122 for (;;) { 123 keypos = keylist; 124 nelem = 0; 125 crec = buffer; 126 makeline_copydown(crec); 127 128 /* Loop reading records */ 129 for (;;) { 130 c = get(fp, crec, bufend, ftbl); 131 /* 'c' is 0, EOF or BUFFEND */ 132 if (c == 0) { 133 /* Save start of key in input buffer */ 134 *keypos++ = crec; 135 if (++nelem == max_recs) { 136 c = BUFFEND; 137 break; 138 } 139 crec = (RECHEADER *)(crec->data + SALIGN(crec->length)); 140 continue; 141 } 142 if (c == EOF) { 143 /* try next file */ 144 if (++file_no >= nfiles) 145 /* no more files */ 146 break; 147 fp = fopen(filelist->names[file_no], "r"); 148 if (fp == NULL) 149 err(2, "%s", filelist->names[file_no]); 150 continue; 151 } 152 if (nelem >= max_recs 153 || (bufsize >= MAXBUFSIZE && nelem > 8)) 154 /* Need to sort and save this lot of data */ 155 break; 156 157 /* c == BUFFEND, and we can process more data */ 158 /* Allocate a larger buffer for this lot of data */ 159 bufsize *= 2; 160 nbuffer = realloc(buffer, bufsize); 161 if (!nbuffer) { 162 err(2, "failed to realloc buffer to %zu bytes", 163 bufsize); 164 } 165 166 /* patch up keylist[] */ 167 for (keyp = &keypos[-1]; keyp >= keylist; keyp--) 168 *keyp = nbuffer + (*keyp - buffer); 169 170 crec = nbuffer + (crec - buffer); 171 buffer = nbuffer; 172 bufend = (u_char *)buffer + bufsize; 173 } 174 175 /* Sort this set of records */ 176 radix_sort(keylist, keylist + max_recs, nelem); 177 178 if (c == EOF && mfct == 0) { 179 /* all the data is (sorted) in the buffer */ 180 append(keylist, nelem, outfp, 181 DEBUG('k') ? putkeydump : putline); 182 break; 183 } 184 185 /* Save current data to a temporary file for a later merge */ 186 if (nelem != 0) { 187 tmp_fp = ftmp(); 188 append(keylist, nelem, tmp_fp, putrec); 189 save_for_merge(tmp_fp, geteasy, ftbl); 190 } 191 mfct = 1; 192 193 if (c == EOF) { 194 /* merge to output file */ 195 merge_sort(outfp, 196 DEBUG('k') ? putkeydump : putline, ftbl); 197 break; 198 } 199 } 200 201 free(keylist); 202 keylist = NULL; 203 free(buffer); 204 buffer = NULL; 205 } 206