1 /* $OpenBSD: sort.c,v 1.39 2009/12/22 19:47:02 schwarze Exp $ */ 2 3 /*- 4 * Copyright (c) 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Peter McIlroy. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 /* 36 * Sort sorts a file using an optional user-defined key. 37 * Sort uses radix sort for internal sorting, and allows 38 * a choice of merge sort and radix sort for external sorting. 39 */ 40 41 #include "sort.h" 42 #include "fsort.h" 43 #include "pathnames.h" 44 45 #include <sys/types.h> 46 #include <sys/stat.h> 47 #include <locale.h> 48 #include <paths.h> 49 #include <signal.h> 50 #include <stdlib.h> 51 #include <string.h> 52 #include <unistd.h> 53 #include <err.h> 54 55 int REC_D = '\n'; 56 u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */ 57 58 /* 59 * weight tables. Gweights is one of ascii, Rascii.. 60 * modified to weight rec_d = 0 (or 255) 61 */ 62 extern u_char gweights[NBINS]; 63 u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS]; 64 65 /* 66 * masks of ignored characters. Alltable is 256 ones 67 */ 68 u_char dtable[NBINS], itable[NBINS], alltable[NBINS]; 69 int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0, STABLE = 0; 70 struct coldesc *clist; 71 int ncols = 0; 72 int ND = 10; /* limit on number of -k options. */ 73 74 char *devstdin = _PATH_STDIN; 75 char *tmpdir = _PATH_VARTMP; 76 char toutpath[PATH_MAX]; 77 78 static void cleanup(void); 79 static void onsig(int); 80 static void usage(char *); 81 82 #define CHECK_NFIELDS \ 83 if (++nfields == ND) { \ 84 ND += 10; \ 85 if ((p = realloc(fldtab, ND)) == NULL) \ 86 errx(2, "cannot allocate memory"); \ 87 ftpos = p + (ftpos - fldtab); \ 88 fldtab = p; \ 89 } 90 91 int 92 main(int argc, char *argv[]) 93 { 94 int (*get)(int, union f_handle, int, RECHEADER *, u_char *, struct field *); 95 int ch, i, stdinflag = 0, tmp = 0; 96 char nfields = 0, cflag = 0, c_warn = 0, mflag = 0; 97 char *outfile, *outpath = 0; 98 struct field *fldtab, *ftpos; 99 union f_handle filelist; 100 FILE *outfp = NULL; 101 void *p; 102 103 setlocale(LC_ALL, ""); 104 105 if ((clist = calloc((ND+1)*2, sizeof(struct coldesc))) == NULL || 106 (ftpos = fldtab = calloc(ND+2, sizeof(struct field))) == NULL) 107 errx(2, "cannot allocate memory"); 108 memset(d_mask, 0, NBINS); 109 d_mask[REC_D = '\n'] = REC_D_F; 110 d_mask['\t'] = d_mask[' '] = BLANK | FLD_D; 111 fixit(&argc, argv); 112 if (!issetugid() && (outfile = getenv("TMPDIR"))) 113 tmpdir = outfile; 114 while ((ch = getopt(argc, argv, "bCcdfik:mHno:rR:t:T:uy:zs")) != -1) { 115 switch (ch) { 116 case 'b': fldtab->flags |= BI | BT; 117 break; 118 case 'd': 119 case 'f': 120 case 'i': 121 case 'n': 122 case 'r': tmp |= optval(ch, 0); 123 if (tmp & R && tmp & F) 124 fldtab->weights = RFtable; 125 else if (tmp & F) 126 fldtab->weights = Ftable; 127 else if (tmp & R) 128 fldtab->weights = Rascii; 129 fldtab->flags |= tmp; 130 break; 131 case 'o': 132 outpath = optarg; 133 break; 134 case 'k': 135 CHECK_NFIELDS; 136 setfield(optarg, ++ftpos, fldtab->flags); 137 break; 138 case 't': 139 if (SEP_FLAG) 140 usage("multiple field delimiters"); 141 SEP_FLAG = 1; 142 d_mask[' '] &= ~FLD_D; 143 d_mask['\t'] &= ~FLD_D; 144 d_mask[(int)*optarg] |= FLD_D; 145 if (d_mask[(int)*optarg] & REC_D_F) 146 err(2, "record/field delimiter clash"); 147 break; 148 case 'R': 149 if (REC_D != '\n') 150 usage("multiple record delimiters"); 151 if ('\n' == (REC_D = *optarg)) 152 break; 153 d_mask['\n'] = d_mask[' ']; 154 d_mask[REC_D] = REC_D_F; 155 break; 156 case 'T': 157 tmpdir = optarg; 158 break; 159 case 'u': 160 UNIQUE = 1; 161 break; 162 case 'C': 163 cflag = 1; 164 c_warn = 0; 165 break; 166 case 'c': 167 cflag = 1; 168 c_warn = 1; 169 break; 170 case 'm': 171 mflag = 1; 172 break; 173 case 'H': 174 PANIC = 0; 175 break; 176 case 'y': 177 /* accept -y for backwards compat. */ 178 break; 179 case 'z': 180 if (REC_D != '\n') 181 usage("multiple record delimiters"); 182 REC_D = '\0'; 183 d_mask['\n'] = d_mask[' ']; 184 d_mask[REC_D] = REC_D_F; 185 break; 186 case 's': 187 STABLE = 1; 188 break; 189 case '?': 190 default: 191 usage(NULL); 192 } 193 } 194 195 if (cflag && argc > optind+1) 196 errx(2, "too many input files for the -%c option", 197 c_warn ? 'c' : 'C'); 198 199 if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) { 200 outpath = argv[argc-1]; 201 argc -= 2; 202 } 203 204 if (mflag && argc - optind > (MAXFCT - (16+1))*16) 205 errx(2, "too many input files for -m option"); 206 207 for (i = optind; i < argc; i++) { 208 /* allow one occurrence of /dev/stdin */ 209 if (!strcmp(argv[i], "-") || !strcmp(argv[i], devstdin)) { 210 if (stdinflag) 211 warnx("ignoring extra \"%s\" in file list", 212 argv[i]); 213 else { 214 stdinflag = 1; 215 argv[i] = devstdin; 216 } 217 } else if ((ch = access(argv[i], R_OK))) 218 err(2, "%s", argv[i]); 219 } 220 221 if (!(fldtab->flags & (I|D|N) || fldtab[1].icol.num)) { 222 SINGL_FLD = 1; 223 fldtab[0].icol.num = 1; 224 } else { 225 if (!fldtab[1].icol.num) { 226 CHECK_NFIELDS; 227 fldtab[0].flags &= ~(BI|BT); 228 setfield("1", ++ftpos, fldtab->flags); 229 } 230 fldreset(fldtab); 231 fldtab[0].flags &= ~F; 232 } 233 settables(fldtab[0].flags); 234 num_init(); 235 fldtab->weights = gweights; 236 237 if (optind == argc) { 238 static char *names[2]; 239 240 names[0] = devstdin; 241 names[1] = NULL; 242 filelist.names = names; 243 optind--; 244 } else 245 filelist.names = argv+optind; 246 247 if (SINGL_FLD) 248 get = makeline; 249 else 250 get = makekey; 251 252 if (!SINGL_FLD) { 253 if ((linebuf = malloc(linebuf_size)) == NULL) 254 err(2, NULL); 255 } 256 257 if (cflag) { 258 order(filelist, get, fldtab, c_warn); 259 /* NOT REACHED */ 260 } 261 262 if (!outpath) { 263 (void)snprintf(toutpath, 264 sizeof(toutpath), "%sstdout", _PATH_DEV); 265 outfile = outpath = toutpath; 266 } else if (!(ch = access(outpath, 0)) && 267 strncmp(_PATH_DEV, outpath, 5)) { 268 struct sigaction oact, act; 269 int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, SIGXCPU, SIGXFSZ, 270 SIGVTALRM, SIGPROF, 0}; 271 int outfd; 272 mode_t um; 273 274 errno = 0; 275 276 if (access(outpath, W_OK)) 277 err(2, "%s", outpath); 278 (void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXXXXXX", 279 outpath); 280 um = umask(S_IWGRP|S_IWOTH); 281 (void)umask(um); 282 if ((outfd = mkstemp(toutpath)) == -1 || 283 fchmod(outfd, DEFFILEMODE & ~um) == -1 || 284 (outfp = fdopen(outfd, "w")) == 0) 285 err(2, "%s", toutpath); 286 outfile = toutpath; 287 288 (void)atexit(cleanup); 289 sigfillset(&act.sa_mask); 290 act.sa_flags = SA_RESTART; 291 act.sa_handler = onsig; 292 for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */ 293 if (sigaction(sigtable[i], NULL, &oact) < 0 || 294 oact.sa_handler != SIG_IGN && 295 sigaction(sigtable[i], &act, NULL) < 0) 296 err(2, "sigaction"); 297 } else 298 outfile = outpath; 299 if (outfp == NULL && (outfp = fopen(outfile, "w")) == NULL) 300 err(2, "%s", outfile); 301 if (mflag) 302 fmerge(-1, filelist, argc-optind, get, outfp, putline, fldtab); 303 else 304 fsort(-1, 0, filelist, argc-optind, outfp, fldtab); 305 if (outfile != outpath) { 306 if (access(outfile, 0)) 307 err(2, "%s", outfile); 308 (void)unlink(outpath); 309 if (link(outfile, outpath)) 310 err(2, "cannot link %s: output left in %s", 311 outpath, outfile); 312 (void)unlink(outfile); 313 } 314 exit(0); 315 } 316 317 /* ARGSUSED */ 318 static void 319 onsig(int signo) 320 { 321 322 cleanup(); 323 _exit(2); /* return 2 on error/interrupt */ 324 } 325 326 static void 327 cleanup(void) 328 { 329 330 if (toutpath[0]) 331 (void)unlink(toutpath); 332 } 333 334 static void 335 usage(char *msg) 336 { 337 extern char *__progname; 338 339 if (msg != NULL) 340 warnx("%s", msg); 341 (void)fprintf(stderr, "usage: %s [-bCcdfHimnrsuz] " 342 "[-k field1[,field2]] [-o output] [-R char]\n" 343 "\t[-T dir] [-t char] [file ...]\n", __progname); 344 exit(2); 345 } 346