1 /* $NetBSD: sort.c,v 1.64 2017/01/10 21:13:45 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Ben Harris and Jaromir Dolecek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * Peter McIlroy. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 */ 63 #include <sys/cdefs.h> 64 #ifndef lint 65 __COPYRIGHT("@(#) Copyright (c) 1993\ 66 The Regents of the University of California. All rights reserved."); 67 #endif /* not lint */ 68 __RCSID("$NetBSD: sort.c,v 1.64 2017/01/10 21:13:45 christos Exp $"); 69 70 /* Sort sorts a file using an optional user-defined key. 71 * Sort uses radix sort for internal sorting, and allows 72 * a choice of merge sort and radix sort for external sorting. 73 */ 74 75 #include <sys/types.h> 76 #include <sys/time.h> 77 #include <sys/stat.h> 78 #include <sys/resource.h> 79 80 #include <locale.h> 81 #include <paths.h> 82 #include <signal.h> 83 #include <stdlib.h> 84 #include <string.h> 85 #include <unistd.h> 86 #include <util.h> 87 88 #include "sort.h" 89 #include "fsort.h" 90 #include "pathnames.h" 91 92 int REC_D = '\n'; 93 u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */ 94 95 /* 96 * weight tables. Gweights is one of ascii, Rascii.. 97 * modified to weight rec_d = 0 (or 255) 98 */ 99 u_char *const weight_tables[4] = { ascii, Rascii, Ftable, RFtable }; 100 u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS]; 101 102 int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0; 103 int REVERSE = 0; 104 int posix_sort; 105 106 unsigned int debug_flags = 0; 107 108 static char toutpath[MAXPATHLEN]; 109 110 const char *tmpdir; /* where temporary files should be put */ 111 112 static void cleanup(void); 113 static void onsignal(int); 114 __dead static void usage(const char *); 115 116 int 117 main(int argc, char *argv[]) 118 { 119 int ch, i, stdinflag = 0; 120 char mode = 0; 121 char *outfile, *outpath = 0; 122 struct field *fldtab; 123 size_t fldtab_sz, fld_cnt; 124 struct filelist filelist; 125 int num_input_files; 126 FILE *outfp = NULL; 127 struct rlimit rl; 128 struct stat st; 129 130 setlocale(LC_ALL, ""); 131 132 /* bump RLIMIT_NOFILE to maximum our hard limit allows */ 133 if (getrlimit(RLIMIT_NOFILE, &rl) < 0) 134 err(2, "getrlimit"); 135 rl.rlim_cur = rl.rlim_max; 136 if (setrlimit(RLIMIT_NOFILE, &rl) < 0) 137 err(2, "setrlimit"); 138 139 d_mask[REC_D = '\n'] = REC_D_F; 140 d_mask['\t'] = d_mask[' '] = BLANK | FLD_D; 141 142 /* fldtab[0] is the global options. */ 143 fldtab_sz = 3; 144 fld_cnt = 0; 145 fldtab = emalloc(fldtab_sz * sizeof(*fldtab)); 146 memset(fldtab, 0, fldtab_sz * sizeof(*fldtab)); 147 148 #define SORT_OPTS "bcCdD:fHik:lmno:rR:sSt:T:u" 149 150 /* Convert "+field" args to -k format */ 151 fixit(&argc, argv, SORT_OPTS); 152 153 if (!(tmpdir = getenv("TMPDIR"))) 154 tmpdir = _PATH_TMP; 155 156 while ((ch = getopt(argc, argv, SORT_OPTS)) != -1) { 157 switch (ch) { 158 case 'b': 159 fldtab[0].flags |= BI | BT; 160 break; 161 case 'c': case 'C': case 'm': 162 if (mode) 163 usage("Incompatible operation modes"); 164 mode = ch; 165 break; 166 case 'D': /* Debug flags */ 167 for (i = 0; optarg[i]; i++) 168 debug_flags |= 1 << (optarg[i] & 31); 169 break; 170 case 'd': case 'f': case 'i': case 'n': case 'l': 171 fldtab[0].flags |= optval(ch, 0); 172 break; 173 case 'H': 174 /* -H was ; use merge sort for blocks of large files' */ 175 /* That is now the default. */ 176 break; 177 case 'k': 178 fldtab = erealloc(fldtab, (fldtab_sz + 1) * sizeof(*fldtab)); 179 memset(&fldtab[fldtab_sz], 0, sizeof(fldtab[0])); 180 fldtab_sz++; 181 182 setfield(optarg, &fldtab[++fld_cnt], fldtab[0].flags); 183 break; 184 case 'o': 185 outpath = optarg; 186 break; 187 case 'r': 188 REVERSE = 1; 189 break; 190 case 'R': 191 if (REC_D != '\n') 192 usage("multiple record delimiters"); 193 REC_D = *optarg; 194 if (optarg[1] != '\0') { 195 char *ep; 196 int t = 0; 197 198 if (optarg[0] == '\\') 199 optarg++, t = 8; 200 REC_D = (int)strtol(optarg, &ep, t); 201 if (*ep != '\0' || REC_D < 0 || 202 REC_D >= (int)__arraycount(d_mask)) 203 errx(2, "invalid record delimiter %s", 204 optarg); 205 } 206 if (REC_D == '\n') 207 break; 208 d_mask['\n'] = d_mask[' ']; 209 d_mask[REC_D] = REC_D_F; 210 break; 211 case 's': 212 /* 213 * Nominally 'stable sort', keep lines with equal keys 214 * in input file order. (Default for NetBSD) 215 * (-s for GNU sort compatibility.) 216 */ 217 posix_sort = 0; 218 break; 219 case 'S': 220 /* 221 * Reverse of -s! 222 * This needs to enforce a POSIX sort where records 223 * with equal keys are then sorted by the raw data. 224 * Currently not implemented! 225 * (using libc radixsort() v sradixsort() doesn't 226 * have the desired effect.) 227 */ 228 posix_sort = 1; 229 break; 230 case 't': 231 if (SEP_FLAG) 232 usage("multiple field delimiters"); 233 SEP_FLAG = 1; 234 d_mask[' '] &= ~FLD_D; 235 d_mask['\t'] &= ~FLD_D; 236 d_mask['\n'] &= ~FLD_D; 237 d_mask[(u_char)*optarg] |= FLD_D; 238 if (d_mask[(u_char)*optarg] & REC_D_F) 239 errx(2, "record/field delimiter clash"); 240 break; 241 case 'T': 242 /* -T tmpdir */ 243 tmpdir = optarg; 244 break; 245 case 'u': 246 UNIQUE = 1; 247 break; 248 case '?': 249 default: 250 usage(NULL); 251 } 252 } 253 254 if (UNIQUE) 255 /* Don't sort on raw record if keys match */ 256 posix_sort = 0; 257 258 if ((mode == 'c' || mode == 'C') && argc > optind+1) 259 errx(2, "too many input files for -c option"); 260 if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) { 261 outpath = argv[argc-1]; 262 argc -= 2; 263 } 264 if (mode == 'm' && argc - optind > (MAXFCT - (16+1))*16) 265 errx(2, "too many input files for -m option"); 266 267 for (i = optind; i < argc; i++) { 268 /* allow one occurrence of /dev/stdin */ 269 if (!strcmp(argv[i], "-") || !strcmp(argv[i], _PATH_STDIN)) { 270 if (stdinflag) 271 warnx("ignoring extra \"%s\" in file list", 272 argv[i]); 273 else 274 stdinflag = 1; 275 276 /* change to /dev/stdin if '-' */ 277 if (argv[i][0] == '-') { 278 static char path_stdin[] = _PATH_STDIN; 279 argv[i] = path_stdin; 280 } 281 282 } else if ((ch = access(argv[i], R_OK))) 283 err(2, "%s", argv[i]); 284 } 285 286 if (fldtab[1].icol.num == 0) { 287 /* No sort key specified */ 288 if (fldtab[0].flags & (I|D|F|N|L)) { 289 /* Modified - generate a key that covers the line */ 290 fldtab[0].flags &= ~(BI|BT); 291 setfield("1", &fldtab[++fld_cnt], fldtab->flags); 292 fldreset(fldtab); 293 } else { 294 /* Unmodified, just compare the line */ 295 SINGL_FLD = 1; 296 fldtab[0].icol.num = 1; 297 } 298 } else { 299 fldreset(fldtab); 300 } 301 302 settables(); 303 304 if (optind == argc) { 305 static const char * const names[] = { _PATH_STDIN, NULL }; 306 filelist.names = names; 307 num_input_files = 1; 308 } else { 309 filelist.names = (const char * const *) &argv[optind]; 310 num_input_files = argc - optind; 311 } 312 313 if (mode == 'c' || mode == 'C') { 314 order(&filelist, fldtab, mode == 'C'); 315 /* NOT REACHED */ 316 } 317 318 if (!outpath) { 319 toutpath[0] = '\0'; /* path not used in this case */ 320 outfile = outpath = toutpath; 321 outfp = stdout; 322 } else if (lstat(outpath, &st) == 0 323 && !S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) { 324 /* output file exists and isn't character or block device */ 325 struct sigaction act; 326 static const int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, 327 SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, 0}; 328 int outfd; 329 errno = 0; 330 if (access(outpath, W_OK)) 331 err(2, "%s", outpath); 332 (void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXX", 333 outpath); 334 if ((outfd = mkstemp(toutpath)) == -1) 335 err(2, "Cannot create temporary file `%s'", toutpath); 336 (void)atexit(cleanup); 337 act.sa_handler = onsignal; 338 (void) sigemptyset(&act.sa_mask); 339 act.sa_flags = SA_RESTART | SA_RESETHAND; 340 for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */ 341 sigaction(sigtable[i], &act, 0); 342 outfile = toutpath; 343 if ((outfp = fdopen(outfd, "w")) == NULL) 344 err(2, "Cannot open temporary file `%s'", toutpath); 345 } else { 346 outfile = outpath; 347 348 if ((outfp = fopen(outfile, "w")) == NULL) 349 err(2, "output file %s", outfile); 350 } 351 352 if (mode == 'm') 353 fmerge(&filelist, num_input_files, outfp, fldtab); 354 else 355 fsort(&filelist, num_input_files, outfp, fldtab); 356 357 if (outfile != outpath) { 358 if (access(outfile, F_OK)) 359 err(2, "%s", outfile); 360 361 /* 362 * Copy file permissions bits of the original file. 363 * st is initialized above, when we create the 364 * temporary spool file. 365 */ 366 if (lchmod(outfile, st.st_mode & ALLPERMS) != 0) { 367 err(2, "cannot chmod %s: output left in %s", 368 outpath, outfile); 369 } 370 371 (void)unlink(outpath); 372 if (link(outfile, outpath)) 373 err(2, "cannot link %s: output left in %s", 374 outpath, outfile); 375 (void)unlink(outfile); 376 toutpath[0] = 0; 377 } 378 exit(0); 379 } 380 381 static void 382 onsignal(int sig) 383 { 384 cleanup(); 385 } 386 387 static void 388 cleanup(void) 389 { 390 if (toutpath[0]) 391 (void)unlink(toutpath); 392 } 393 394 static void 395 usage(const char *msg) 396 { 397 const char *pn = getprogname(); 398 399 if (msg != NULL) 400 (void)fprintf(stderr, "%s: %s\n", getprogname(), msg); 401 (void)fprintf(stderr, 402 "usage: %s [-bdfHilmnrSsu] [-k kstart[,kend]] [-o output]" 403 " [-R char] [-T dir]\n", pn); 404 (void)fprintf(stderr, 405 " [-t char] [file ...]\n"); 406 (void)fprintf(stderr, 407 " or: %s -C|-c [-bdfilnru] [-k kstart[,kend]] [-o output]" 408 " [-R char]\n", pn); 409 (void)fprintf(stderr, 410 " [-t char] [file]\n"); 411 exit(2); 412 } 413 414 RECHEADER * 415 allocrec(RECHEADER *rec, size_t size) 416 { 417 418 return (erealloc(rec, size + sizeof(long) - 1)); 419 } 420