1 /* $OpenBSD: sort.c,v 1.41 2013/11/13 15:07:27 deraadt Exp $ */ 2 3 /*- 4 * Copyright (c) 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Peter McIlroy. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 /* 36 * Sort sorts a file using an optional user-defined key. 37 * Sort uses radix sort for internal sorting, and allows 38 * a choice of merge sort and radix sort for external sorting. 39 */ 40 41 #include "sort.h" 42 #include "fsort.h" 43 #include "pathnames.h" 44 45 #include <sys/types.h> 46 #include <sys/stat.h> 47 #include <locale.h> 48 #include <paths.h> 49 #include <signal.h> 50 #include <stdlib.h> 51 #include <string.h> 52 #include <unistd.h> 53 #include <err.h> 54 55 int REC_D = '\n'; 56 u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */ 57 58 /* 59 * weight tables. Gweights is one of ascii, Rascii.. 60 * modified to weight rec_d = 0 (or 255) 61 */ 62 extern u_char gweights[NBINS]; 63 u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS]; 64 65 /* 66 * masks of ignored characters. Alltable is 256 ones 67 */ 68 u_char dtable[NBINS], itable[NBINS], alltable[NBINS]; 69 int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0, STABLE = 0; 70 struct coldesc *clist; 71 int ncols = 0; 72 int ND = 10; /* limit on number of -k options. */ 73 74 char *devstdin = _PATH_STDIN; 75 char *tmpdir = _PATH_VARTMP; 76 char toutpath[PATH_MAX]; 77 78 static void cleanup(void); 79 static void onsig(int); 80 static void usage(char *); 81 82 #define CHECK_NFIELDS \ 83 if (++nfields == ND) { \ 84 ND += 10; \ 85 if ((p = realloc(fldtab, \ 86 ND * sizeof(*fldtab))) == NULL) \ 87 errx(2, "cannot allocate memory"); \ 88 ftpos = p + (ftpos - fldtab); \ 89 fldtab = p; \ 90 } 91 92 int 93 main(int argc, char *argv[]) 94 { 95 int (*get)(int, union f_handle, int, RECHEADER *, u_char *, struct field *); 96 int ch, i, stdinflag = 0, tmp = 0; 97 char nfields = 0, cflag = 0, c_warn = 0, mflag = 0; 98 char *outfile, *outpath = 0; 99 struct field *fldtab, *ftpos; 100 union f_handle filelist; 101 FILE *outfp = NULL; 102 struct field *p; 103 104 setlocale(LC_ALL, ""); 105 106 if ((clist = calloc((ND+1)*2, sizeof(struct coldesc))) == NULL || 107 (ftpos = fldtab = calloc(ND+2, sizeof(struct field))) == NULL) 108 errx(2, "cannot allocate memory"); 109 memset(d_mask, 0, NBINS); 110 d_mask[REC_D = '\n'] = REC_D_F; 111 d_mask['\t'] = d_mask[' '] = BLANK | FLD_D; 112 fixit(&argc, argv); 113 if (!issetugid() && (outfile = getenv("TMPDIR"))) 114 tmpdir = outfile; 115 while ((ch = getopt(argc, argv, "bCcdfik:mHno:rR:t:T:uy:zs")) != -1) { 116 switch (ch) { 117 case 'b': fldtab->flags |= BI | BT; 118 break; 119 case 'd': 120 case 'f': 121 case 'i': 122 case 'n': 123 case 'r': tmp |= optval(ch, 0); 124 if (tmp & R && tmp & F) 125 fldtab->weights = RFtable; 126 else if (tmp & F) 127 fldtab->weights = Ftable; 128 else if (tmp & R) 129 fldtab->weights = Rascii; 130 fldtab->flags |= tmp; 131 break; 132 case 'o': 133 outpath = optarg; 134 break; 135 case 'k': 136 CHECK_NFIELDS; 137 setfield(optarg, ++ftpos, fldtab->flags); 138 break; 139 case 't': 140 if (SEP_FLAG) 141 usage("multiple field delimiters"); 142 SEP_FLAG = 1; 143 d_mask[' '] &= ~FLD_D; 144 d_mask['\t'] &= ~FLD_D; 145 d_mask[(int)*optarg] |= FLD_D; 146 if (d_mask[(int)*optarg] & REC_D_F) 147 err(2, "record/field delimiter clash"); 148 break; 149 case 'R': 150 if (REC_D != '\n') 151 usage("multiple record delimiters"); 152 if ('\n' == (REC_D = *optarg)) 153 break; 154 d_mask['\n'] = d_mask[' ']; 155 d_mask[REC_D] = REC_D_F; 156 break; 157 case 'T': 158 tmpdir = optarg; 159 break; 160 case 'u': 161 UNIQUE = 1; 162 break; 163 case 'C': 164 cflag = 1; 165 c_warn = 0; 166 break; 167 case 'c': 168 cflag = 1; 169 c_warn = 1; 170 break; 171 case 'm': 172 mflag = 1; 173 break; 174 case 'H': 175 PANIC = 0; 176 break; 177 case 'y': 178 /* accept -y for backwards compat. */ 179 break; 180 case 'z': 181 if (REC_D != '\n') 182 usage("multiple record delimiters"); 183 REC_D = '\0'; 184 d_mask['\n'] = d_mask[' ']; 185 d_mask[REC_D] = REC_D_F; 186 break; 187 case 's': 188 STABLE = 1; 189 break; 190 case '?': 191 default: 192 usage(NULL); 193 } 194 } 195 196 if (cflag && argc > optind+1) 197 errx(2, "too many input files for the -%c option", 198 c_warn ? 'c' : 'C'); 199 200 if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) { 201 outpath = argv[argc-1]; 202 argc -= 2; 203 } 204 205 if (mflag && argc - optind > (MAXFCT - (16+1))*16) 206 errx(2, "too many input files for -m option"); 207 208 for (i = optind; i < argc; i++) { 209 /* allow one occurrence of /dev/stdin */ 210 if (!strcmp(argv[i], "-") || !strcmp(argv[i], devstdin)) { 211 if (stdinflag) 212 warnx("ignoring extra \"%s\" in file list", 213 argv[i]); 214 else { 215 stdinflag = 1; 216 argv[i] = devstdin; 217 } 218 } else if ((ch = access(argv[i], R_OK))) 219 err(2, "%s", argv[i]); 220 } 221 222 if (!(fldtab->flags & (I|D|N) || fldtab[1].icol.num)) { 223 SINGL_FLD = 1; 224 fldtab[0].icol.num = 1; 225 } else { 226 if (!fldtab[1].icol.num) { 227 CHECK_NFIELDS; 228 fldtab[0].flags &= ~(BI|BT); 229 setfield("1", ++ftpos, fldtab->flags); 230 } 231 fldreset(fldtab); 232 fldtab[0].flags &= ~F; 233 } 234 settables(fldtab[0].flags); 235 num_init(); 236 fldtab->weights = gweights; 237 238 if (optind == argc) { 239 static char *names[2]; 240 241 names[0] = devstdin; 242 names[1] = NULL; 243 filelist.names = names; 244 optind--; 245 } else 246 filelist.names = argv+optind; 247 248 if (SINGL_FLD) 249 get = makeline; 250 else 251 get = makekey; 252 253 if (!SINGL_FLD) { 254 if ((linebuf = malloc(linebuf_size)) == NULL) 255 err(2, NULL); 256 } 257 258 if (cflag) { 259 order(filelist, get, fldtab, c_warn); 260 /* NOT REACHED */ 261 } 262 263 if (!outpath) { 264 (void)snprintf(toutpath, 265 sizeof(toutpath), "%sstdout", _PATH_DEV); 266 outfile = outpath = toutpath; 267 } else if (!(ch = access(outpath, 0)) && 268 strncmp(_PATH_DEV, outpath, 5)) { 269 struct sigaction oact, act; 270 int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, SIGXCPU, SIGXFSZ, 271 SIGVTALRM, SIGPROF, 0}; 272 int outfd; 273 mode_t um; 274 275 errno = 0; 276 277 if (access(outpath, W_OK)) 278 err(2, "%s", outpath); 279 (void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXXXXXX", 280 outpath); 281 um = umask(S_IWGRP|S_IWOTH); 282 (void)umask(um); 283 if ((outfd = mkstemp(toutpath)) == -1 || 284 fchmod(outfd, DEFFILEMODE & ~um) == -1 || 285 (outfp = fdopen(outfd, "w")) == NULL) 286 err(2, "%s", toutpath); 287 outfile = toutpath; 288 289 (void)atexit(cleanup); 290 sigfillset(&act.sa_mask); 291 act.sa_flags = SA_RESTART; 292 act.sa_handler = onsig; 293 for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */ 294 if (sigaction(sigtable[i], NULL, &oact) < 0 || 295 oact.sa_handler != SIG_IGN && 296 sigaction(sigtable[i], &act, NULL) < 0) 297 err(2, "sigaction"); 298 } else 299 outfile = outpath; 300 if (outfp == NULL && (outfp = fopen(outfile, "w")) == NULL) 301 err(2, "%s", outfile); 302 if (mflag) 303 fmerge(-1, filelist, argc-optind, get, outfp, putline, fldtab); 304 else 305 fsort(-1, 0, filelist, argc-optind, outfp, fldtab); 306 if (outfile != outpath) { 307 if (access(outfile, 0)) 308 err(2, "%s", outfile); 309 (void)unlink(outpath); 310 if (link(outfile, outpath)) 311 err(2, "cannot link %s: output left in %s", 312 outpath, outfile); 313 (void)unlink(outfile); 314 } 315 exit(0); 316 } 317 318 /* ARGSUSED */ 319 static void 320 onsig(int signo) 321 { 322 323 cleanup(); 324 _exit(2); /* return 2 on error/interrupt */ 325 } 326 327 static void 328 cleanup(void) 329 { 330 331 if (toutpath[0]) 332 (void)unlink(toutpath); 333 } 334 335 static void 336 usage(char *msg) 337 { 338 extern char *__progname; 339 340 if (msg != NULL) 341 warnx("%s", msg); 342 (void)fprintf(stderr, "usage: %s [-bCcdfHimnrsuz] " 343 "[-k field1[,field2]] [-o output] [-R char]\n" 344 "\t[-T dir] [-t char] [file ...]\n", __progname); 345 exit(2); 346 } 347