1 /* $NetBSD: sort.c,v 1.41 2004/07/23 13:26:11 wiz Exp $ */ 2 3 /*- 4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Ben Harris and Jaromir Dolecek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /*- 40 * Copyright (c) 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * Peter McIlroy. 45 * 46 * Redistribution and use in source and binary forms, with or without 47 * modification, are permitted provided that the following conditions 48 * are met: 49 * 1. Redistributions of source code must retain the above copyright 50 * notice, this list of conditions and the following disclaimer. 51 * 2. Redistributions in binary form must reproduce the above copyright 52 * notice, this list of conditions and the following disclaimer in the 53 * documentation and/or other materials provided with the distribution. 54 * 3. Neither the name of the University nor the names of its contributors 55 * may be used to endorse or promote products derived from this software 56 * without specific prior written permission. 57 * 58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 68 * SUCH DAMAGE. 69 */ 70 71 /* Sort sorts a file using an optional user-defined key. 72 * Sort uses radix sort for internal sorting, and allows 73 * a choice of merge sort and radix sort for external sorting. 74 */ 75 76 #include "sort.h" 77 #include "fsort.h" 78 #include "pathnames.h" 79 80 #ifndef lint 81 __COPYRIGHT("@(#) Copyright (c) 1993\n\ 82 The Regents of the University of California. All rights reserved.\n"); 83 #endif /* not lint */ 84 85 #ifndef lint 86 __RCSID("$NetBSD: sort.c,v 1.41 2004/07/23 13:26:11 wiz Exp $"); 87 __SCCSID("@(#)sort.c 8.1 (Berkeley) 6/6/93"); 88 #endif /* not lint */ 89 90 #include <sys/types.h> 91 #include <sys/time.h> 92 #include <sys/resource.h> 93 94 #include <paths.h> 95 #include <signal.h> 96 #include <stdlib.h> 97 #include <string.h> 98 #include <unistd.h> 99 #include <locale.h> 100 101 int REC_D = '\n'; 102 u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */ 103 104 /* 105 * weight tables. Gweights is one of ascii, Rascii.. 106 * modified to weight rec_d = 0 (or 255) 107 */ 108 u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS]; 109 int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0; 110 111 /* 112 * Default to stable sort. 113 */ 114 int stable_sort = 1; 115 116 static char toutpath[MAXPATHLEN]; 117 118 const char *tmpdir; /* where temporary files should be put */ 119 120 static void cleanup __P((void)); 121 static void onsignal __P((int)); 122 static void usage __P((const char *)); 123 124 int main __P((int argc, char **argv)); 125 126 int 127 main(argc, argv) 128 int argc; 129 char *argv[]; 130 { 131 get_func_t get; 132 int ch, i, stdinflag = 0, tmp = 0; 133 char cflag = 0, mflag = 0; 134 char *outfile, *outpath = 0; 135 struct field *fldtab, *p; 136 size_t fldtab_sz = 3, fidx = 0; 137 struct filelist filelist; 138 FILE *outfp = NULL; 139 struct rlimit rl; 140 141 setlocale(LC_ALL, ""); 142 143 /* bump RLIMIT_NOFILE to maximum our hard limit allows */ 144 if (getrlimit(RLIMIT_NOFILE, &rl) < 0) 145 err(2, "getrlimit"); 146 rl.rlim_cur = rl.rlim_max; 147 if (setrlimit(RLIMIT_NOFILE, &rl) < 0) 148 err(2, "setrlimit"); 149 150 d_mask[REC_D = '\n'] = REC_D_F; 151 d_mask['\t'] = d_mask[' '] = BLANK | FLD_D; 152 153 fldtab = malloc(fldtab_sz * sizeof(*fldtab)); 154 memset(fldtab, 0, fldtab_sz * sizeof(*fldtab)); 155 156 fixit(&argc, argv); 157 158 if (!(tmpdir = getenv("TMPDIR"))) 159 tmpdir = _PATH_TMP; 160 161 while ((ch = getopt(argc, argv, "bcdfik:mHno:rR:sSt:T:ux")) != -1) { 162 switch (ch) { 163 case 'b': 164 fldtab->flags |= BI | BT; 165 break; 166 case 'c': 167 cflag = 1; 168 break; 169 case 'd': case 'f': case 'i': case 'n': case 'r': 170 tmp |= optval(ch, 0); 171 if ((tmp & R) && (tmp & F)) 172 fldtab->weights = RFtable; 173 else if (tmp & F) 174 fldtab->weights = Ftable; 175 else if (tmp & R) 176 fldtab->weights = Rascii; 177 fldtab->flags |= tmp; 178 break; 179 case 'H': 180 PANIC = 0; 181 break; 182 case 'k': 183 p = realloc(fldtab, (fldtab_sz + 1) * sizeof(*fldtab)); 184 if (!p) 185 err(1, "realloc"); 186 fldtab = p; 187 memset(&fldtab[fldtab_sz], 0, 188 sizeof(fldtab[fldtab_sz])); 189 fldtab_sz++; 190 191 setfield(optarg, &fldtab[++fidx], fldtab->flags); 192 break; 193 case 'm': 194 mflag = 1; 195 break; 196 case 'o': 197 outpath = optarg; 198 break; 199 case 's': 200 /* for GNU sort compatibility (this is our default) */ 201 stable_sort = 1; 202 break; 203 case 'S': 204 stable_sort = 0; 205 break; 206 case 't': 207 if (SEP_FLAG) 208 usage("multiple field delimiters"); 209 SEP_FLAG = 1; 210 d_mask[' '] &= ~FLD_D; 211 d_mask['\t'] &= ~FLD_D; 212 d_mask[(u_char)*optarg] |= FLD_D; 213 if (d_mask[(u_char)*optarg] & REC_D_F) 214 errx(2, "record/field delimiter clash"); 215 break; 216 case 'R': 217 if (REC_D != '\n') 218 usage("multiple record delimiters"); 219 if ('\n' == (REC_D = *optarg)) 220 break; 221 d_mask['\n'] = d_mask[' ']; 222 d_mask[REC_D] = REC_D_F; 223 break; 224 case 'T': 225 /* -T tmpdir */ 226 tmpdir = optarg; 227 break; 228 case 'u': 229 UNIQUE = 1; 230 break; 231 case '?': 232 default: 233 usage(NULL); 234 } 235 } 236 if (cflag && argc > optind+1) 237 errx(2, "too many input files for -c option"); 238 if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) { 239 outpath = argv[argc-1]; 240 argc -= 2; 241 } 242 if (mflag && argc - optind > (MAXFCT - (16+1))*16) 243 errx(2, "too many input files for -m option"); 244 for (i = optind; i < argc; i++) { 245 /* allow one occurrence of /dev/stdin */ 246 if (!strcmp(argv[i], "-") || !strcmp(argv[i], _PATH_STDIN)) { 247 if (stdinflag) 248 warnx("ignoring extra \"%s\" in file list", 249 argv[i]); 250 else 251 stdinflag = 1; 252 253 /* change to /dev/stdin if '-' */ 254 if (argv[i][0] == '-') 255 argv[i] = _PATH_STDIN; 256 257 } else if ((ch = access(argv[i], R_OK))) 258 err(2, "%s", argv[i]); 259 } 260 if (!(fldtab->flags & (I|D|N) || fldtab[1].icol.num)) { 261 SINGL_FLD = 1; 262 fldtab[0].icol.num = 1; 263 } else { 264 if (!fldtab[1].icol.num) { 265 fldtab[0].flags &= ~(BI|BT); 266 setfield("1", &fldtab[++fidx], fldtab->flags); 267 } 268 fldreset(fldtab); 269 fldtab[0].flags &= ~F; 270 } 271 settables(fldtab[0].flags); 272 num_init(); 273 fldtab->weights = gweights; 274 if (optind == argc) { 275 static const char * const names[] = { _PATH_STDIN, NULL }; 276 277 filelist.names = names; 278 optind--; 279 } else 280 filelist.names = (const char * const *) &argv[optind]; 281 282 if (SINGL_FLD) 283 get = makeline; 284 else 285 get = makekey; 286 287 if (cflag) { 288 order(&filelist, get, fldtab); 289 /* NOT REACHED */ 290 } 291 if (!outpath) { 292 (void)snprintf(toutpath, 293 sizeof(toutpath), "%sstdout", _PATH_DEV); 294 outfile = outpath = toutpath; 295 outfp = stdout; 296 } else if (!(ch = access(outpath, 0)) && 297 strncmp(_PATH_DEV, outpath, 5)) { 298 struct sigaction act; 299 static const int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, 300 SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, 0}; 301 int outfd; 302 errno = 0; 303 if (access(outpath, W_OK)) 304 err(2, "%s", outpath); 305 (void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXX", 306 outpath); 307 if ((outfd = mkstemp(toutpath)) == -1) 308 err(2, "Cannot create temporary file `%s'", toutpath); 309 if ((outfp = fdopen(outfd, "w")) == NULL) 310 err(2, "Cannot open temporary file `%s'", toutpath); 311 outfile = toutpath; 312 (void)atexit(cleanup); 313 act.sa_handler = onsignal; 314 (void) sigemptyset(&act.sa_mask); 315 act.sa_flags = SA_RESTART | SA_RESETHAND; 316 for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */ 317 sigaction(sigtable[i], &act, 0); 318 } else 319 outfile = outpath; 320 321 if (outfp == NULL && (outfp = fopen(outfile, "w")) == NULL) 322 err(2, "output file %s", outfile); 323 324 if (mflag) { 325 fmerge(-1, 0, &filelist, argc-optind, get, outfp, putline, 326 fldtab); 327 } else 328 fsort(-1, 0, 0, &filelist, argc-optind, outfp, fldtab); 329 330 if (outfile != outpath) { 331 if (access(outfile, 0)) 332 err(2, "%s", outfile); 333 (void)unlink(outpath); 334 if (link(outfile, outpath)) 335 err(2, "cannot link %s: output left in %s", 336 outpath, outfile); 337 (void)unlink(outfile); 338 } 339 exit(0); 340 } 341 342 static void 343 onsignal(sig) 344 int sig; 345 { 346 cleanup(); 347 } 348 349 static void 350 cleanup() 351 { 352 if (toutpath[0]) 353 (void)unlink(toutpath); 354 } 355 356 static void 357 usage(msg) 358 const char *msg; 359 { 360 if (msg != NULL) 361 (void)fprintf(stderr, "sort: %s\n", msg); 362 (void)fprintf(stderr, 363 "usage: %s [-bcdfHimnrSsu] [-k field1[,field2]] [-o output]" 364 " [-R char] [-T dir]", getprogname()); 365 (void)fprintf(stderr, 366 " [-t char] [file ...]\n"); 367 exit(2); 368 } 369