1 /* $OpenBSD: sort.c,v 1.16 2001/02/04 21:27:01 ericj Exp $ */ 2 3 /*- 4 * Copyright (c) 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Peter McIlroy. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 #ifndef lint 40 static char copyright[] = 41 "@(#) Copyright (c) 1993\n\ 42 The Regents of the University of California. All rights reserved.\n"; 43 #endif /* not lint */ 44 45 #ifndef lint 46 #if 0 47 static char sccsid[] = "@(#)sort.c 8.1 (Berkeley) 6/6/93"; 48 #else 49 static char rcsid[] = "$OpenBSD: sort.c,v 1.16 2001/02/04 21:27:01 ericj Exp $"; 50 #endif 51 #endif /* not lint */ 52 53 /* 54 * Sort sorts a file using an optional user-defined key. 55 * Sort uses radix sort for internal sorting, and allows 56 * a choice of merge sort and radix sort for external sorting. 57 */ 58 59 #include "sort.h" 60 #include "fsort.h" 61 #include "pathnames.h" 62 63 #include <sys/types.h> 64 #include <sys/stat.h> 65 #include <locale.h> 66 #include <paths.h> 67 #include <signal.h> 68 #include <stdlib.h> 69 #include <string.h> 70 #include <unistd.h> 71 #include <err.h> 72 73 int REC_D = '\n'; 74 u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */ 75 76 /* 77 * weight tables. Gweights is one of ascii, Rascii.. 78 * modified to weight rec_d = 0 (or 255) 79 */ 80 extern u_char gweights[NBINS]; 81 u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS]; 82 83 /* 84 * masks of ignored characters. Alltable is 256 ones 85 */ 86 u_char dtable[NBINS], itable[NBINS], alltable[NBINS]; 87 int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0; 88 struct coldesc *clist; 89 int ncols = 0; 90 int ND = 10; /* limit on number of -k options. */ 91 92 char devstdin[] = _PATH_STDIN; 93 char toutpath[_POSIX_PATH_MAX]; 94 char *tmpdir = _PATH_VARTMP; 95 96 static void cleanup __P((void)); 97 static void onsig __P((int)); 98 static void usage __P((char *)); 99 100 #define CHECK_NFIELDS \ 101 if (++nfields == ND) { \ 102 ND += 10; \ 103 if ((p = realloc(fldtab, ND)) == NULL) \ 104 errx(2, "cannot allocate memory"); \ 105 ftpos = p + (ftpos - fldtab); \ 106 fldtab = p; \ 107 } 108 109 int 110 main(argc, argv) 111 int argc; 112 char *argv[]; 113 { 114 int (*get)(); 115 int ch, i, stdinflag = 0, tmp = 0; 116 char nfields = 0, cflag = 0, mflag = 0; 117 char *outfile, *outpath = 0; 118 struct field *fldtab, *ftpos; 119 union f_handle filelist; 120 FILE *outfp = NULL; 121 void *p; 122 123 setlocale(LC_ALL, ""); 124 125 if ((clist = calloc((ND+1)*2, sizeof(struct coldesc))) == NULL || 126 (ftpos = fldtab = calloc(ND+2, sizeof(struct field))) == NULL) 127 errx(2, "cannot allocate memory"); 128 memset(d_mask, 0, NBINS); 129 d_mask[REC_D = '\n'] = REC_D_F; 130 SINGL_FLD = SEP_FLAG = 0; 131 d_mask['\t'] = d_mask[' '] = BLANK | FLD_D; 132 fixit(&argc, argv); 133 if (!issetugid() && (outfile = getenv("TMPDIR"))) 134 tmpdir = outfile; 135 while ((ch = getopt(argc, argv, "bcdfik:mHno:rR:t:T:uy:")) != -1) { 136 switch (ch) { 137 case 'b': fldtab->flags |= BI | BT; 138 break; 139 case 'd': 140 case 'f': 141 case 'i': 142 case 'n': 143 case 'r': tmp |= optval(ch, 0); 144 if (tmp & R && tmp & F) 145 fldtab->weights = RFtable; 146 else if (tmp & F) 147 fldtab->weights = Ftable; 148 else if (tmp & R) 149 fldtab->weights = Rascii; 150 fldtab->flags |= tmp; 151 break; 152 case 'o': 153 outpath = optarg; 154 break; 155 case 'k': 156 CHECK_NFIELDS; 157 setfield(optarg, ++ftpos, fldtab->flags); 158 break; 159 case 't': 160 if (SEP_FLAG) 161 usage("multiple field delimiters"); 162 SEP_FLAG = 1; 163 d_mask[' '] &= ~FLD_D; 164 d_mask['\t'] &= ~FLD_D; 165 d_mask[(int)*optarg] |= FLD_D; 166 if (d_mask[(int)*optarg] & REC_D_F) 167 err(2, "record/field delimiter clash"); 168 break; 169 case 'R': 170 if (REC_D != '\n') 171 usage("multiple record delimiters"); 172 if ('\n' == (REC_D = *optarg)) 173 break; 174 d_mask['\n'] = d_mask[' ']; 175 d_mask[REC_D] = REC_D_F; 176 break; 177 case 'T': 178 tmpdir = optarg; 179 break; 180 case 'u': 181 UNIQUE = 1; 182 break; 183 case 'c': 184 cflag = 1; 185 break; 186 case 'm': 187 mflag = 1; 188 break; 189 case 'H': 190 PANIC = 0; 191 break; 192 case 'y': 193 /* accept -y for backwards compat. */ 194 break; 195 case '?': 196 default: 197 usage(NULL); 198 } 199 } 200 201 if (cflag && argc > optind+1) 202 errx(2, "too many input files for -c option"); 203 204 if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) { 205 outpath = argv[argc-1]; 206 argc -= 2; 207 } 208 209 if (mflag && argc - optind > (MAXFCT - (16+1))*16) 210 errx(2, "too many input files for -m option"); 211 212 for (i = optind; i < argc; i++) { 213 /* allow one occurrence of /dev/stdin */ 214 if (!strcmp(argv[i], "-") || !strcmp(argv[i], devstdin)) { 215 if (stdinflag) 216 warnx("ignoring extra \"%s\" in file list", 217 argv[i]); 218 else { 219 stdinflag = 1; 220 argv[i] = devstdin; 221 } 222 } else if ((ch = access(argv[i], R_OK))) 223 err(2, "%s", argv[i]); 224 } 225 226 if (!(fldtab->flags & (I|D|N) || fldtab[1].icol.num)) { 227 SINGL_FLD = 1; 228 fldtab[0].icol.num = 1; 229 } else { 230 if (!fldtab[1].icol.num) { 231 CHECK_NFIELDS; 232 fldtab[0].flags &= ~(BI|BT); 233 setfield("1", ++ftpos, fldtab->flags); 234 } 235 fldreset(fldtab); 236 fldtab[0].flags &= ~F; 237 } 238 settables(fldtab[0].flags); 239 num_init(); 240 fldtab->weights = gweights; 241 242 if (optind == argc) { 243 static char *names[2]; 244 245 names[0] = devstdin; 246 names[1] = NULL; 247 filelist.names = names; 248 optind--; 249 } else 250 filelist.names = argv+optind; 251 252 if (SINGL_FLD) 253 get = makeline; 254 else 255 get = makekey; 256 257 if (cflag) { 258 order(filelist, get, fldtab); 259 /* NOT REACHED */ 260 } 261 262 if (!outpath) { 263 (void)snprintf(toutpath, 264 sizeof(toutpath), "%sstdout", _PATH_DEV); 265 outfile = outpath = toutpath; 266 } else if (!(ch = access(outpath, 0)) && 267 strncmp(_PATH_DEV, outpath, 5)) { 268 struct sigaction act = {0, SIG_BLOCK, 6}; 269 int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, SIGXCPU, SIGXFSZ, 270 SIGVTALRM, SIGPROF, 0}; 271 int outfd; 272 mode_t um; 273 274 errno = 0; 275 276 if (access(outpath, W_OK)) 277 err(2, "%s", outpath); 278 act.sa_handler = onsig; 279 (void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXXXXXX", 280 outpath); 281 /* use default umask to try and avoid one syscall */ 282 um = umask(S_IWGRP|S_IWOTH); 283 if (um != S_IWGRP|S_IWOTH) 284 (void)umask(um); 285 if ((outfd = mkstemp(toutpath)) == -1 || 286 fchmod(outfd, DEFFILEMODE & ~um) == -1 || 287 (outfp = fdopen(outfd, "w")) == 0) 288 err(2, "%s", toutpath); 289 outfile = toutpath; 290 291 (void)atexit(cleanup); 292 for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */ 293 sigaction(sigtable[i], &act, 0); 294 } else 295 outfile = outpath; 296 if (outfp == NULL && (outfp = fopen(outfile, "w")) == NULL) 297 err(2, "%s", outfile); 298 if (mflag) 299 fmerge(-1, filelist, argc-optind, get, outfp, putline, fldtab); 300 else 301 fsort(-1, 0, filelist, argc-optind, outfp, fldtab); 302 if (outfile != outpath) { 303 if (access(outfile, 0)) 304 err(2, "%s", outfile); 305 (void)unlink(outpath); 306 if (link(outfile, outpath)) 307 err(2, "cannot link %s: output left in %s", 308 outpath, outfile); 309 (void)unlink(outfile); 310 } 311 exit(0); 312 } 313 314 static void 315 onsig(s) 316 int s; 317 { 318 319 cleanup(); 320 _exit(2); /* return 2 on error/interrupt */ 321 } 322 323 static void 324 cleanup() 325 { 326 327 if (toutpath[0]) 328 (void)unlink(toutpath); 329 } 330 331 static void 332 usage(msg) 333 char *msg; 334 { 335 extern char *__progname; 336 337 if (msg != NULL) 338 warnx("%s", msg); 339 (void)fprintf(stderr, "usage: %s [-T dir] [-o output] [-cmubdfinrH] " 340 "[-t char] [-R char] [-k keydef] ... [files]\n", __progname); 341 exit(2); 342 } 343