1 /* $NetBSD: sort.c,v 1.46 2008/07/21 14:19:26 lukem Exp $ */ 2 3 /*- 4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Ben Harris and Jaromir Dolecek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * Peter McIlroy. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 */ 63 64 /* Sort sorts a file using an optional user-defined key. 65 * Sort uses radix sort for internal sorting, and allows 66 * a choice of merge sort and radix sort for external sorting. 67 */ 68 69 #include "sort.h" 70 #include "fsort.h" 71 #include "pathnames.h" 72 73 #ifndef lint 74 __COPYRIGHT("@(#) Copyright (c) 1993\ 75 The Regents of the University of California. All rights reserved."); 76 #endif /* not lint */ 77 78 #ifndef lint 79 __RCSID("$NetBSD: sort.c,v 1.46 2008/07/21 14:19:26 lukem Exp $"); 80 __SCCSID("@(#)sort.c 8.1 (Berkeley) 6/6/93"); 81 #endif /* not lint */ 82 83 #include <sys/types.h> 84 #include <sys/time.h> 85 #include <sys/resource.h> 86 87 #include <paths.h> 88 #include <signal.h> 89 #include <stdlib.h> 90 #include <string.h> 91 #include <unistd.h> 92 #include <locale.h> 93 94 int REC_D = '\n'; 95 u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */ 96 97 /* 98 * weight tables. Gweights is one of ascii, Rascii.. 99 * modified to weight rec_d = 0 (or 255) 100 */ 101 u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS]; 102 int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0; 103 104 /* 105 * Default to stable sort. 106 */ 107 int stable_sort = 1; 108 109 static char toutpath[MAXPATHLEN]; 110 111 const char *tmpdir; /* where temporary files should be put */ 112 113 static void cleanup __P((void)); 114 static void onsignal __P((int)); 115 static void usage __P((const char *)); 116 117 int main __P((int argc, char **argv)); 118 119 int 120 main(argc, argv) 121 int argc; 122 char *argv[]; 123 { 124 get_func_t get; 125 int ch, i, stdinflag = 0, tmp = 0; 126 char cflag = 0, mflag = 0; 127 char *outfile, *outpath = 0; 128 struct field *fldtab, *p; 129 size_t fldtab_sz = 3, fidx = 0; 130 struct filelist filelist; 131 FILE *outfp = NULL; 132 struct rlimit rl; 133 struct stat st; 134 135 setlocale(LC_ALL, ""); 136 137 /* bump RLIMIT_NOFILE to maximum our hard limit allows */ 138 if (getrlimit(RLIMIT_NOFILE, &rl) < 0) 139 err(2, "getrlimit"); 140 rl.rlim_cur = rl.rlim_max; 141 if (setrlimit(RLIMIT_NOFILE, &rl) < 0) 142 err(2, "setrlimit"); 143 144 d_mask[REC_D = '\n'] = REC_D_F; 145 d_mask['\t'] = d_mask[' '] = BLANK | FLD_D; 146 147 fldtab = malloc(fldtab_sz * sizeof(*fldtab)); 148 memset(fldtab, 0, fldtab_sz * sizeof(*fldtab)); 149 150 fixit(&argc, argv); 151 152 if (!(tmpdir = getenv("TMPDIR"))) 153 tmpdir = _PATH_TMP; 154 155 while ((ch = getopt(argc, argv, "bcdfik:mHno:rR:sSt:T:ux")) != -1) { 156 switch (ch) { 157 case 'b': 158 fldtab->flags |= BI | BT; 159 break; 160 case 'c': 161 cflag = 1; 162 break; 163 case 'd': case 'f': case 'i': case 'n': case 'r': 164 tmp |= optval(ch, 0); 165 if ((tmp & R) && (tmp & F)) 166 fldtab->weights = RFtable; 167 else if (tmp & F) 168 fldtab->weights = Ftable; 169 else if (tmp & R) 170 fldtab->weights = Rascii; 171 fldtab->flags |= tmp; 172 break; 173 case 'H': 174 PANIC = 0; 175 break; 176 case 'k': 177 p = realloc(fldtab, (fldtab_sz + 1) * sizeof(*fldtab)); 178 if (!p) 179 err(1, "realloc"); 180 fldtab = p; 181 memset(&fldtab[fldtab_sz], 0, 182 sizeof(fldtab[fldtab_sz])); 183 fldtab_sz++; 184 185 setfield(optarg, &fldtab[++fidx], fldtab->flags); 186 break; 187 case 'm': 188 mflag = 1; 189 break; 190 case 'o': 191 outpath = optarg; 192 break; 193 case 's': 194 /* for GNU sort compatibility (this is our default) */ 195 stable_sort = 1; 196 break; 197 case 'S': 198 stable_sort = 0; 199 break; 200 case 't': 201 if (SEP_FLAG) 202 usage("multiple field delimiters"); 203 SEP_FLAG = 1; 204 d_mask[' '] &= ~FLD_D; 205 d_mask['\t'] &= ~FLD_D; 206 d_mask[(u_char)*optarg] |= FLD_D; 207 if (d_mask[(u_char)*optarg] & REC_D_F) 208 errx(2, "record/field delimiter clash"); 209 break; 210 case 'R': 211 if (REC_D != '\n') 212 usage("multiple record delimiters"); 213 if ('\n' == (REC_D = *optarg)) 214 break; 215 d_mask['\n'] = d_mask[' ']; 216 d_mask[REC_D] = REC_D_F; 217 break; 218 case 'T': 219 /* -T tmpdir */ 220 tmpdir = optarg; 221 break; 222 case 'u': 223 UNIQUE = 1; 224 break; 225 case '?': 226 default: 227 usage(NULL); 228 } 229 } 230 if (cflag && argc > optind+1) 231 errx(2, "too many input files for -c option"); 232 if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) { 233 outpath = argv[argc-1]; 234 argc -= 2; 235 } 236 if (mflag && argc - optind > (MAXFCT - (16+1))*16) 237 errx(2, "too many input files for -m option"); 238 for (i = optind; i < argc; i++) { 239 /* allow one occurrence of /dev/stdin */ 240 if (!strcmp(argv[i], "-") || !strcmp(argv[i], _PATH_STDIN)) { 241 if (stdinflag) 242 warnx("ignoring extra \"%s\" in file list", 243 argv[i]); 244 else 245 stdinflag = 1; 246 247 /* change to /dev/stdin if '-' */ 248 if (argv[i][0] == '-') 249 argv[i] = _PATH_STDIN; 250 251 } else if ((ch = access(argv[i], R_OK))) 252 err(2, "%s", argv[i]); 253 } 254 if (!(fldtab->flags & (I|D|N) || fldtab[1].icol.num)) { 255 SINGL_FLD = 1; 256 fldtab[0].icol.num = 1; 257 } else { 258 if (!fldtab[1].icol.num) { 259 fldtab[0].flags &= ~(BI|BT); 260 setfield("1", &fldtab[++fidx], fldtab->flags); 261 } 262 fldreset(fldtab); 263 fldtab[0].flags &= ~F; 264 } 265 settables(fldtab[0].flags); 266 num_init(); 267 fldtab->weights = gweights; 268 if (optind == argc) { 269 static const char * const names[] = { _PATH_STDIN, NULL }; 270 271 filelist.names = names; 272 optind--; 273 } else 274 filelist.names = (const char * const *) &argv[optind]; 275 276 if (SINGL_FLD) 277 get = makeline; 278 else 279 get = makekey; 280 281 if (cflag) { 282 order(&filelist, get, fldtab); 283 /* NOT REACHED */ 284 } 285 if (!outpath) { 286 toutpath[0] = '\0'; /* path not used in this case */ 287 outfile = outpath = toutpath; 288 outfp = stdout; 289 } else if (lstat(outpath, &st) == 0 290 && !S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) { 291 /* output file exists and isn't character or block device */ 292 struct sigaction act; 293 static const int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, 294 SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, 0}; 295 int outfd; 296 errno = 0; 297 if (access(outpath, W_OK)) 298 err(2, "%s", outpath); 299 (void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXX", 300 outpath); 301 if ((outfd = mkstemp(toutpath)) == -1) 302 err(2, "Cannot create temporary file `%s'", toutpath); 303 if ((outfp = fdopen(outfd, "w")) == NULL) 304 err(2, "Cannot open temporary file `%s'", toutpath); 305 outfile = toutpath; 306 (void)atexit(cleanup); 307 act.sa_handler = onsignal; 308 (void) sigemptyset(&act.sa_mask); 309 act.sa_flags = SA_RESTART | SA_RESETHAND; 310 for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */ 311 sigaction(sigtable[i], &act, 0); 312 } else { 313 outfile = outpath; 314 315 if ((outfp = fopen(outfile, "w")) == NULL) 316 err(2, "output file %s", outfile); 317 } 318 319 if (mflag) { 320 fmerge(-1, 0, &filelist, argc-optind, get, outfp, putline, 321 fldtab); 322 } else 323 fsort(-1, 0, 0, &filelist, argc-optind, outfp, fldtab); 324 325 if (outfile != outpath) { 326 if (access(outfile, F_OK)) 327 err(2, "%s", outfile); 328 329 /* 330 * Copy file permissions bits of the original file. 331 * st is initialized above, when we create the 332 * temporary spool file. 333 */ 334 if (lchmod(outfile, st.st_mode & ALLPERMS) != 0) { 335 err(2, "cannot chmod %s: output left in %s", 336 outpath, outfile); 337 } 338 339 (void)unlink(outpath); 340 if (link(outfile, outpath)) 341 err(2, "cannot link %s: output left in %s", 342 outpath, outfile); 343 (void)unlink(outfile); 344 } 345 exit(0); 346 } 347 348 static void 349 onsignal(sig) 350 int sig; 351 { 352 cleanup(); 353 } 354 355 static void 356 cleanup() 357 { 358 if (toutpath[0]) 359 (void)unlink(toutpath); 360 } 361 362 static void 363 usage(msg) 364 const char *msg; 365 { 366 if (msg != NULL) 367 (void)fprintf(stderr, "sort: %s\n", msg); 368 (void)fprintf(stderr, 369 "usage: %s [-bcdfHimnrSsu] [-k field1[,field2]] [-o output]" 370 " [-R char] [-T dir]", getprogname()); 371 (void)fprintf(stderr, 372 " [-t char] [file ...]\n"); 373 exit(2); 374 } 375