1 /* $NetBSD: sort.c,v 1.55 2009/09/10 22:02:40 dsl Exp $ */ 2 3 /*- 4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Ben Harris and Jaromir Dolecek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * Peter McIlroy. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 */ 63 64 /* Sort sorts a file using an optional user-defined key. 65 * Sort uses radix sort for internal sorting, and allows 66 * a choice of merge sort and radix sort for external sorting. 67 */ 68 69 #include <util.h> 70 #include "sort.h" 71 #include "fsort.h" 72 #include "pathnames.h" 73 74 #ifndef lint 75 __COPYRIGHT("@(#) Copyright (c) 1993\ 76 The Regents of the University of California. All rights reserved."); 77 #endif /* not lint */ 78 79 #ifndef lint 80 __RCSID("$NetBSD: sort.c,v 1.55 2009/09/10 22:02:40 dsl Exp $"); 81 __SCCSID("@(#)sort.c 8.1 (Berkeley) 6/6/93"); 82 #endif /* not lint */ 83 84 #include <sys/types.h> 85 #include <sys/time.h> 86 #include <sys/resource.h> 87 88 #include <paths.h> 89 #include <signal.h> 90 #include <stdlib.h> 91 #include <string.h> 92 #include <unistd.h> 93 #include <locale.h> 94 95 int REC_D = '\n'; 96 u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */ 97 98 /* 99 * weight tables. Gweights is one of ascii, Rascii.. 100 * modified to weight rec_d = 0 (or 255) 101 */ 102 u_char *const weight_tables[4] = { ascii, Rascii, Ftable, RFtable }; 103 u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS]; 104 105 int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0; 106 int REVERSE = 0; 107 int posix_sort; 108 109 unsigned int debug_flags = 0; 110 111 static char toutpath[MAXPATHLEN]; 112 113 const char *tmpdir; /* where temporary files should be put */ 114 115 static void cleanup(void); 116 static void onsignal(int); 117 static void usage(const char *); 118 119 int main(int argc, char **argv); 120 121 int 122 main(int argc, char *argv[]) 123 { 124 get_func_t get; 125 int ch, i, stdinflag = 0; 126 char cflag = 0, mflag = 0; 127 char *outfile, *outpath = 0; 128 struct field *fldtab; 129 size_t fldtab_sz, fld_cnt; 130 struct filelist filelist; 131 int num_input_files; 132 FILE *outfp = NULL; 133 struct rlimit rl; 134 struct stat st; 135 136 setlocale(LC_ALL, ""); 137 138 /* bump RLIMIT_NOFILE to maximum our hard limit allows */ 139 if (getrlimit(RLIMIT_NOFILE, &rl) < 0) 140 err(2, "getrlimit"); 141 rl.rlim_cur = rl.rlim_max; 142 if (setrlimit(RLIMIT_NOFILE, &rl) < 0) 143 err(2, "setrlimit"); 144 145 d_mask[REC_D = '\n'] = REC_D_F; 146 d_mask['\t'] = d_mask[' '] = BLANK | FLD_D; 147 148 /* fldtab[0] is the global options. */ 149 fldtab_sz = 3; 150 fld_cnt = 0; 151 fldtab = emalloc(fldtab_sz * sizeof(*fldtab)); 152 memset(fldtab, 0, fldtab_sz * sizeof(*fldtab)); 153 154 /* Convert "+field" args to -f format */ 155 fixit(&argc, argv); 156 157 if (!(tmpdir = getenv("TMPDIR"))) 158 tmpdir = _PATH_TMP; 159 160 while ((ch = getopt(argc, argv, "bcdD:fik:mHno:rR:sSt:T:ux")) != -1) { 161 switch (ch) { 162 case 'b': 163 fldtab[0].flags |= BI | BT; 164 break; 165 case 'c': 166 cflag = 1; 167 break; 168 case 'D': /* Debug flags */ 169 for (i = 0; optarg[i]; i++) 170 debug_flags |= 1 << (optarg[i] & 31); 171 break; 172 case 'd': case 'f': case 'i': case 'n': 173 fldtab[0].flags |= optval(ch, 0); 174 break; 175 case 'H': 176 /* -H was ; use merge sort for blocks of large files' */ 177 /* That is now the default. */ 178 break; 179 case 'k': 180 fldtab = erealloc(fldtab, (fldtab_sz + 1) * sizeof(*fldtab)); 181 memset(&fldtab[fldtab_sz], 0, sizeof(fldtab[0])); 182 fldtab_sz++; 183 184 setfield(optarg, &fldtab[++fld_cnt], fldtab[0].flags); 185 break; 186 case 'm': 187 mflag = 1; 188 break; 189 case 'o': 190 outpath = optarg; 191 break; 192 case 'r': 193 REVERSE = 1; 194 break; 195 case 's': 196 /* 197 * Nominally 'stable sort', keep lines with equal keys 198 * in input file order. (Default for NetBSD) 199 * (-s for GNU sort compatibility.) 200 */ 201 posix_sort = 0; 202 break; 203 case 'S': 204 /* 205 * Reverse of -s! 206 * This needs to enforce a POSIX sort where records 207 * with equal keys are then sorted by the raw data. 208 * Currently not implemented! 209 * (using libc radixsort() v sradixsort() doesn't 210 * have the desired effect.) 211 */ 212 posix_sort = 1; 213 break; 214 case 't': 215 if (SEP_FLAG) 216 usage("multiple field delimiters"); 217 SEP_FLAG = 1; 218 d_mask[' '] &= ~FLD_D; 219 d_mask['\t'] &= ~FLD_D; 220 d_mask[(u_char)*optarg] |= FLD_D; 221 if (d_mask[(u_char)*optarg] & REC_D_F) 222 errx(2, "record/field delimiter clash"); 223 break; 224 case 'R': 225 if (REC_D != '\n') 226 usage("multiple record delimiters"); 227 REC_D = *optarg; 228 if (REC_D == '\n') 229 break; 230 if (optarg[1] != '\0') { 231 char *ep; 232 int t = 0; 233 if (optarg[0] == '\\') 234 optarg++, t = 8; 235 REC_D = (int)strtol(optarg, &ep, t); 236 if (*ep != '\0' || REC_D < 0 || 237 REC_D >= (int)__arraycount(d_mask)) 238 errx(2, "invalid record delimiter %s", 239 optarg); 240 } 241 d_mask['\n'] = d_mask[' ']; 242 d_mask[REC_D] = REC_D_F; 243 break; 244 case 'T': 245 /* -T tmpdir */ 246 tmpdir = optarg; 247 break; 248 case 'u': 249 UNIQUE = 1; 250 break; 251 case '?': 252 default: 253 usage(NULL); 254 } 255 } 256 257 if (UNIQUE) 258 /* Don't sort on raw record if keys match */ 259 posix_sort = 0; 260 261 if (cflag && argc > optind+1) 262 errx(2, "too many input files for -c option"); 263 if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) { 264 outpath = argv[argc-1]; 265 argc -= 2; 266 } 267 if (mflag && argc - optind > (MAXFCT - (16+1))*16) 268 errx(2, "too many input files for -m option"); 269 270 for (i = optind; i < argc; i++) { 271 /* allow one occurrence of /dev/stdin */ 272 if (!strcmp(argv[i], "-") || !strcmp(argv[i], _PATH_STDIN)) { 273 if (stdinflag) 274 warnx("ignoring extra \"%s\" in file list", 275 argv[i]); 276 else 277 stdinflag = 1; 278 279 /* change to /dev/stdin if '-' */ 280 if (argv[i][0] == '-') { 281 static char path_stdin[] = _PATH_STDIN; 282 argv[i] = path_stdin; 283 } 284 285 } else if ((ch = access(argv[i], R_OK))) 286 err(2, "%s", argv[i]); 287 } 288 289 if (fldtab[1].icol.num == 0) { 290 /* No sort key specified */ 291 if (fldtab[0].flags & (I|D|F|N)) { 292 /* Modified - generate a key that covers the line */ 293 fldtab[0].flags &= ~(BI|BT); 294 setfield("1", &fldtab[++fld_cnt], fldtab->flags); 295 fldreset(fldtab); 296 } else { 297 /* Unmodified, just compare the line */ 298 SINGL_FLD = 1; 299 fldtab[0].icol.num = 1; 300 } 301 } else { 302 fldreset(fldtab); 303 } 304 305 settables(); 306 307 if (optind == argc) { 308 static const char * const names[] = { _PATH_STDIN, NULL }; 309 filelist.names = names; 310 num_input_files = 1; 311 } else { 312 filelist.names = (const char * const *) &argv[optind]; 313 num_input_files = argc - optind; 314 } 315 316 if (SINGL_FLD) 317 get = makeline; 318 else 319 get = makekey; 320 321 if (cflag) { 322 order(&filelist, get, fldtab); 323 /* NOT REACHED */ 324 } 325 if (!outpath) { 326 toutpath[0] = '\0'; /* path not used in this case */ 327 outfile = outpath = toutpath; 328 outfp = stdout; 329 } else if (lstat(outpath, &st) == 0 330 && !S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) { 331 /* output file exists and isn't character or block device */ 332 struct sigaction act; 333 static const int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, 334 SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, 0}; 335 int outfd; 336 errno = 0; 337 if (access(outpath, W_OK)) 338 err(2, "%s", outpath); 339 (void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXX", 340 outpath); 341 if ((outfd = mkstemp(toutpath)) == -1) 342 err(2, "Cannot create temporary file `%s'", toutpath); 343 (void)atexit(cleanup); 344 act.sa_handler = onsignal; 345 (void) sigemptyset(&act.sa_mask); 346 act.sa_flags = SA_RESTART | SA_RESETHAND; 347 for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */ 348 sigaction(sigtable[i], &act, 0); 349 outfile = toutpath; 350 if ((outfp = fdopen(outfd, "w")) == NULL) 351 err(2, "Cannot open temporary file `%s'", toutpath); 352 } else { 353 outfile = outpath; 354 355 if ((outfp = fopen(outfile, "w")) == NULL) 356 err(2, "output file %s", outfile); 357 } 358 359 if (mflag) { 360 fmerge(-1, &filelist, num_input_files, get, outfp, putline, 361 fldtab); 362 } else 363 fsort(&filelist, num_input_files, outfp, fldtab); 364 365 if (outfile != outpath) { 366 if (access(outfile, F_OK)) 367 err(2, "%s", outfile); 368 369 /* 370 * Copy file permissions bits of the original file. 371 * st is initialized above, when we create the 372 * temporary spool file. 373 */ 374 if (lchmod(outfile, st.st_mode & ALLPERMS) != 0) { 375 err(2, "cannot chmod %s: output left in %s", 376 outpath, outfile); 377 } 378 379 (void)unlink(outpath); 380 if (link(outfile, outpath)) 381 err(2, "cannot link %s: output left in %s", 382 outpath, outfile); 383 (void)unlink(outfile); 384 toutpath[0] = 0; 385 } 386 exit(0); 387 } 388 389 static void 390 onsignal(int sig) 391 { 392 cleanup(); 393 } 394 395 static void 396 cleanup(void) 397 { 398 if (toutpath[0]) 399 (void)unlink(toutpath); 400 } 401 402 static void 403 usage(const char *msg) 404 { 405 if (msg != NULL) 406 (void)fprintf(stderr, "%s: %s\n", getprogname(), msg); 407 (void)fprintf(stderr, 408 "usage: %s [-bcdfHimnrSsu] [-k field1[,field2]] [-o output]" 409 " [-R char] [-T dir]", getprogname()); 410 (void)fprintf(stderr, 411 " [-t char] [file ...]\n"); 412 exit(2); 413 } 414