1 /* $NetBSD: sort.c,v 1.47 2008/11/08 17:11:56 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Ben Harris and Jaromir Dolecek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * Peter McIlroy. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 */ 63 64 /* Sort sorts a file using an optional user-defined key. 65 * Sort uses radix sort for internal sorting, and allows 66 * a choice of merge sort and radix sort for external sorting. 67 */ 68 69 #include "sort.h" 70 #include "fsort.h" 71 #include "pathnames.h" 72 73 #ifndef lint 74 __COPYRIGHT("@(#) Copyright (c) 1993\ 75 The Regents of the University of California. All rights reserved."); 76 #endif /* not lint */ 77 78 #ifndef lint 79 __RCSID("$NetBSD: sort.c,v 1.47 2008/11/08 17:11:56 christos Exp $"); 80 __SCCSID("@(#)sort.c 8.1 (Berkeley) 6/6/93"); 81 #endif /* not lint */ 82 83 #include <sys/types.h> 84 #include <sys/time.h> 85 #include <sys/resource.h> 86 87 #include <paths.h> 88 #include <signal.h> 89 #include <stdlib.h> 90 #include <string.h> 91 #include <unistd.h> 92 #include <locale.h> 93 94 int REC_D = '\n'; 95 u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */ 96 97 /* 98 * weight tables. Gweights is one of ascii, Rascii.. 99 * modified to weight rec_d = 0 (or 255) 100 */ 101 u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS]; 102 int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0; 103 104 /* 105 * Default to stable sort. 106 */ 107 int stable_sort = 1; 108 109 static char toutpath[MAXPATHLEN]; 110 111 const char *tmpdir; /* where temporary files should be put */ 112 113 static void cleanup __P((void)); 114 static void onsignal __P((int)); 115 static void usage __P((const char *)); 116 117 int main __P((int argc, char **argv)); 118 119 int 120 main(argc, argv) 121 int argc; 122 char *argv[]; 123 { 124 get_func_t get; 125 int ch, i, stdinflag = 0, tmp = 0; 126 char cflag = 0, mflag = 0; 127 char *outfile, *outpath = 0; 128 struct field *fldtab, *p; 129 size_t fldtab_sz = 3, fidx = 0; 130 struct filelist filelist; 131 FILE *outfp = NULL; 132 struct rlimit rl; 133 struct stat st; 134 135 setlocale(LC_ALL, ""); 136 137 /* bump RLIMIT_NOFILE to maximum our hard limit allows */ 138 if (getrlimit(RLIMIT_NOFILE, &rl) < 0) 139 err(2, "getrlimit"); 140 rl.rlim_cur = rl.rlim_max; 141 if (setrlimit(RLIMIT_NOFILE, &rl) < 0) 142 err(2, "setrlimit"); 143 144 d_mask[REC_D = '\n'] = REC_D_F; 145 d_mask['\t'] = d_mask[' '] = BLANK | FLD_D; 146 147 fldtab = malloc(fldtab_sz * sizeof(*fldtab)); 148 memset(fldtab, 0, fldtab_sz * sizeof(*fldtab)); 149 150 fixit(&argc, argv); 151 152 if (!(tmpdir = getenv("TMPDIR"))) 153 tmpdir = _PATH_TMP; 154 155 while ((ch = getopt(argc, argv, "bcdfik:mHno:rR:sSt:T:ux")) != -1) { 156 switch (ch) { 157 case 'b': 158 fldtab->flags |= BI | BT; 159 break; 160 case 'c': 161 cflag = 1; 162 break; 163 case 'd': case 'f': case 'i': case 'n': case 'r': 164 tmp |= optval(ch, 0); 165 if ((tmp & R) && (tmp & F)) 166 fldtab->weights = RFtable; 167 else if (tmp & F) 168 fldtab->weights = Ftable; 169 else if (tmp & R) 170 fldtab->weights = Rascii; 171 fldtab->flags |= tmp; 172 break; 173 case 'H': 174 PANIC = 0; 175 break; 176 case 'k': 177 p = realloc(fldtab, (fldtab_sz + 1) * sizeof(*fldtab)); 178 if (!p) 179 err(1, "realloc"); 180 fldtab = p; 181 memset(&fldtab[fldtab_sz], 0, 182 sizeof(fldtab[fldtab_sz])); 183 fldtab_sz++; 184 185 setfield(optarg, &fldtab[++fidx], fldtab->flags); 186 break; 187 case 'm': 188 mflag = 1; 189 break; 190 case 'o': 191 outpath = optarg; 192 break; 193 case 's': 194 /* for GNU sort compatibility (this is our default) */ 195 stable_sort = 1; 196 break; 197 case 'S': 198 stable_sort = 0; 199 break; 200 case 't': 201 if (SEP_FLAG) 202 usage("multiple field delimiters"); 203 SEP_FLAG = 1; 204 d_mask[' '] &= ~FLD_D; 205 d_mask['\t'] &= ~FLD_D; 206 d_mask[(u_char)*optarg] |= FLD_D; 207 if (d_mask[(u_char)*optarg] & REC_D_F) 208 errx(2, "record/field delimiter clash"); 209 break; 210 case 'R': 211 if (REC_D != '\n') 212 usage("multiple record delimiters"); 213 if ('\n' == (REC_D = *optarg)) 214 break; 215 if (optarg[1] != '\0') { 216 char *ep; 217 int t = 0; 218 if (optarg[0] == '\\') 219 optarg++, t = 8; 220 REC_D = (int)strtol(optarg, &ep, t); 221 if (*ep != '\0' || REC_D < 0 || 222 REC_D >= __arraycount(d_mask)) 223 errx(2, "invalid record delimiter %s", 224 optarg); 225 } 226 d_mask['\n'] = d_mask[' ']; 227 d_mask[REC_D] = REC_D_F; 228 break; 229 case 'T': 230 /* -T tmpdir */ 231 tmpdir = optarg; 232 break; 233 case 'u': 234 UNIQUE = 1; 235 break; 236 case '?': 237 default: 238 usage(NULL); 239 } 240 } 241 if (cflag && argc > optind+1) 242 errx(2, "too many input files for -c option"); 243 if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) { 244 outpath = argv[argc-1]; 245 argc -= 2; 246 } 247 if (mflag && argc - optind > (MAXFCT - (16+1))*16) 248 errx(2, "too many input files for -m option"); 249 for (i = optind; i < argc; i++) { 250 /* allow one occurrence of /dev/stdin */ 251 if (!strcmp(argv[i], "-") || !strcmp(argv[i], _PATH_STDIN)) { 252 if (stdinflag) 253 warnx("ignoring extra \"%s\" in file list", 254 argv[i]); 255 else 256 stdinflag = 1; 257 258 /* change to /dev/stdin if '-' */ 259 if (argv[i][0] == '-') 260 argv[i] = _PATH_STDIN; 261 262 } else if ((ch = access(argv[i], R_OK))) 263 err(2, "%s", argv[i]); 264 } 265 if (!(fldtab->flags & (I|D|N) || fldtab[1].icol.num)) { 266 SINGL_FLD = 1; 267 fldtab[0].icol.num = 1; 268 } else { 269 if (!fldtab[1].icol.num) { 270 fldtab[0].flags &= ~(BI|BT); 271 setfield("1", &fldtab[++fidx], fldtab->flags); 272 } 273 fldreset(fldtab); 274 fldtab[0].flags &= ~F; 275 } 276 settables(fldtab[0].flags); 277 num_init(); 278 fldtab->weights = gweights; 279 if (optind == argc) { 280 static const char * const names[] = { _PATH_STDIN, NULL }; 281 282 filelist.names = names; 283 optind--; 284 } else 285 filelist.names = (const char * const *) &argv[optind]; 286 287 if (SINGL_FLD) 288 get = makeline; 289 else 290 get = makekey; 291 292 if (cflag) { 293 order(&filelist, get, fldtab); 294 /* NOT REACHED */ 295 } 296 if (!outpath) { 297 toutpath[0] = '\0'; /* path not used in this case */ 298 outfile = outpath = toutpath; 299 outfp = stdout; 300 } else if (lstat(outpath, &st) == 0 301 && !S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) { 302 /* output file exists and isn't character or block device */ 303 struct sigaction act; 304 static const int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, 305 SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, 0}; 306 int outfd; 307 errno = 0; 308 if (access(outpath, W_OK)) 309 err(2, "%s", outpath); 310 (void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXX", 311 outpath); 312 if ((outfd = mkstemp(toutpath)) == -1) 313 err(2, "Cannot create temporary file `%s'", toutpath); 314 if ((outfp = fdopen(outfd, "w")) == NULL) 315 err(2, "Cannot open temporary file `%s'", toutpath); 316 outfile = toutpath; 317 (void)atexit(cleanup); 318 act.sa_handler = onsignal; 319 (void) sigemptyset(&act.sa_mask); 320 act.sa_flags = SA_RESTART | SA_RESETHAND; 321 for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */ 322 sigaction(sigtable[i], &act, 0); 323 } else { 324 outfile = outpath; 325 326 if ((outfp = fopen(outfile, "w")) == NULL) 327 err(2, "output file %s", outfile); 328 } 329 330 if (mflag) { 331 fmerge(-1, 0, &filelist, argc-optind, get, outfp, putline, 332 fldtab); 333 } else 334 fsort(-1, 0, 0, &filelist, argc-optind, outfp, fldtab); 335 336 if (outfile != outpath) { 337 if (access(outfile, F_OK)) 338 err(2, "%s", outfile); 339 340 /* 341 * Copy file permissions bits of the original file. 342 * st is initialized above, when we create the 343 * temporary spool file. 344 */ 345 if (lchmod(outfile, st.st_mode & ALLPERMS) != 0) { 346 err(2, "cannot chmod %s: output left in %s", 347 outpath, outfile); 348 } 349 350 (void)unlink(outpath); 351 if (link(outfile, outpath)) 352 err(2, "cannot link %s: output left in %s", 353 outpath, outfile); 354 (void)unlink(outfile); 355 } 356 exit(0); 357 } 358 359 static void 360 onsignal(sig) 361 int sig; 362 { 363 cleanup(); 364 } 365 366 static void 367 cleanup() 368 { 369 if (toutpath[0]) 370 (void)unlink(toutpath); 371 } 372 373 static void 374 usage(msg) 375 const char *msg; 376 { 377 if (msg != NULL) 378 (void)fprintf(stderr, "%s: %s\n", getprogname(), msg); 379 (void)fprintf(stderr, 380 "usage: %s [-bcdfHimnrSsu] [-k field1[,field2]] [-o output]" 381 " [-R char] [-T dir]", getprogname()); 382 (void)fprintf(stderr, 383 " [-t char] [file ...]\n"); 384 exit(2); 385 } 386