1 /* $NetBSD: sort.c,v 1.44 2006/10/23 19:53:25 jdolecek Exp $ */ 2 3 /*- 4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Ben Harris and Jaromir Dolecek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /*- 40 * Copyright (c) 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * Peter McIlroy. 45 * 46 * Redistribution and use in source and binary forms, with or without 47 * modification, are permitted provided that the following conditions 48 * are met: 49 * 1. Redistributions of source code must retain the above copyright 50 * notice, this list of conditions and the following disclaimer. 51 * 2. Redistributions in binary form must reproduce the above copyright 52 * notice, this list of conditions and the following disclaimer in the 53 * documentation and/or other materials provided with the distribution. 54 * 3. Neither the name of the University nor the names of its contributors 55 * may be used to endorse or promote products derived from this software 56 * without specific prior written permission. 57 * 58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 68 * SUCH DAMAGE. 69 */ 70 71 /* Sort sorts a file using an optional user-defined key. 72 * Sort uses radix sort for internal sorting, and allows 73 * a choice of merge sort and radix sort for external sorting. 74 */ 75 76 #include "sort.h" 77 #include "fsort.h" 78 #include "pathnames.h" 79 80 #ifndef lint 81 __COPYRIGHT("@(#) Copyright (c) 1993\n\ 82 The Regents of the University of California. All rights reserved.\n"); 83 #endif /* not lint */ 84 85 #ifndef lint 86 __RCSID("$NetBSD: sort.c,v 1.44 2006/10/23 19:53:25 jdolecek Exp $"); 87 __SCCSID("@(#)sort.c 8.1 (Berkeley) 6/6/93"); 88 #endif /* not lint */ 89 90 #include <sys/types.h> 91 #include <sys/time.h> 92 #include <sys/resource.h> 93 94 #include <paths.h> 95 #include <signal.h> 96 #include <stdlib.h> 97 #include <string.h> 98 #include <unistd.h> 99 #include <locale.h> 100 101 int REC_D = '\n'; 102 u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */ 103 104 /* 105 * weight tables. Gweights is one of ascii, Rascii.. 106 * modified to weight rec_d = 0 (or 255) 107 */ 108 u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS]; 109 int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0; 110 111 /* 112 * Default to stable sort. 113 */ 114 int stable_sort = 1; 115 116 static char toutpath[MAXPATHLEN]; 117 118 const char *tmpdir; /* where temporary files should be put */ 119 120 static void cleanup __P((void)); 121 static void onsignal __P((int)); 122 static void usage __P((const char *)); 123 124 int main __P((int argc, char **argv)); 125 126 int 127 main(argc, argv) 128 int argc; 129 char *argv[]; 130 { 131 get_func_t get; 132 int ch, i, stdinflag = 0, tmp = 0; 133 char cflag = 0, mflag = 0; 134 char *outfile, *outpath = 0; 135 struct field *fldtab, *p; 136 size_t fldtab_sz = 3, fidx = 0; 137 struct filelist filelist; 138 FILE *outfp = NULL; 139 struct rlimit rl; 140 struct stat st; 141 142 setlocale(LC_ALL, ""); 143 144 /* bump RLIMIT_NOFILE to maximum our hard limit allows */ 145 if (getrlimit(RLIMIT_NOFILE, &rl) < 0) 146 err(2, "getrlimit"); 147 rl.rlim_cur = rl.rlim_max; 148 if (setrlimit(RLIMIT_NOFILE, &rl) < 0) 149 err(2, "setrlimit"); 150 151 d_mask[REC_D = '\n'] = REC_D_F; 152 d_mask['\t'] = d_mask[' '] = BLANK | FLD_D; 153 154 fldtab = malloc(fldtab_sz * sizeof(*fldtab)); 155 memset(fldtab, 0, fldtab_sz * sizeof(*fldtab)); 156 157 fixit(&argc, argv); 158 159 if (!(tmpdir = getenv("TMPDIR"))) 160 tmpdir = _PATH_TMP; 161 162 while ((ch = getopt(argc, argv, "bcdfik:mHno:rR:sSt:T:ux")) != -1) { 163 switch (ch) { 164 case 'b': 165 fldtab->flags |= BI | BT; 166 break; 167 case 'c': 168 cflag = 1; 169 break; 170 case 'd': case 'f': case 'i': case 'n': case 'r': 171 tmp |= optval(ch, 0); 172 if ((tmp & R) && (tmp & F)) 173 fldtab->weights = RFtable; 174 else if (tmp & F) 175 fldtab->weights = Ftable; 176 else if (tmp & R) 177 fldtab->weights = Rascii; 178 fldtab->flags |= tmp; 179 break; 180 case 'H': 181 PANIC = 0; 182 break; 183 case 'k': 184 p = realloc(fldtab, (fldtab_sz + 1) * sizeof(*fldtab)); 185 if (!p) 186 err(1, "realloc"); 187 fldtab = p; 188 memset(&fldtab[fldtab_sz], 0, 189 sizeof(fldtab[fldtab_sz])); 190 fldtab_sz++; 191 192 setfield(optarg, &fldtab[++fidx], fldtab->flags); 193 break; 194 case 'm': 195 mflag = 1; 196 break; 197 case 'o': 198 outpath = optarg; 199 break; 200 case 's': 201 /* for GNU sort compatibility (this is our default) */ 202 stable_sort = 1; 203 break; 204 case 'S': 205 stable_sort = 0; 206 break; 207 case 't': 208 if (SEP_FLAG) 209 usage("multiple field delimiters"); 210 SEP_FLAG = 1; 211 d_mask[' '] &= ~FLD_D; 212 d_mask['\t'] &= ~FLD_D; 213 d_mask[(u_char)*optarg] |= FLD_D; 214 if (d_mask[(u_char)*optarg] & REC_D_F) 215 errx(2, "record/field delimiter clash"); 216 break; 217 case 'R': 218 if (REC_D != '\n') 219 usage("multiple record delimiters"); 220 if ('\n' == (REC_D = *optarg)) 221 break; 222 d_mask['\n'] = d_mask[' ']; 223 d_mask[REC_D] = REC_D_F; 224 break; 225 case 'T': 226 /* -T tmpdir */ 227 tmpdir = optarg; 228 break; 229 case 'u': 230 UNIQUE = 1; 231 break; 232 case '?': 233 default: 234 usage(NULL); 235 } 236 } 237 if (cflag && argc > optind+1) 238 errx(2, "too many input files for -c option"); 239 if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) { 240 outpath = argv[argc-1]; 241 argc -= 2; 242 } 243 if (mflag && argc - optind > (MAXFCT - (16+1))*16) 244 errx(2, "too many input files for -m option"); 245 for (i = optind; i < argc; i++) { 246 /* allow one occurrence of /dev/stdin */ 247 if (!strcmp(argv[i], "-") || !strcmp(argv[i], _PATH_STDIN)) { 248 if (stdinflag) 249 warnx("ignoring extra \"%s\" in file list", 250 argv[i]); 251 else 252 stdinflag = 1; 253 254 /* change to /dev/stdin if '-' */ 255 if (argv[i][0] == '-') 256 argv[i] = _PATH_STDIN; 257 258 } else if ((ch = access(argv[i], R_OK))) 259 err(2, "%s", argv[i]); 260 } 261 if (!(fldtab->flags & (I|D|N) || fldtab[1].icol.num)) { 262 SINGL_FLD = 1; 263 fldtab[0].icol.num = 1; 264 } else { 265 if (!fldtab[1].icol.num) { 266 fldtab[0].flags &= ~(BI|BT); 267 setfield("1", &fldtab[++fidx], fldtab->flags); 268 } 269 fldreset(fldtab); 270 fldtab[0].flags &= ~F; 271 } 272 settables(fldtab[0].flags); 273 num_init(); 274 fldtab->weights = gweights; 275 if (optind == argc) { 276 static const char * const names[] = { _PATH_STDIN, NULL }; 277 278 filelist.names = names; 279 optind--; 280 } else 281 filelist.names = (const char * const *) &argv[optind]; 282 283 if (SINGL_FLD) 284 get = makeline; 285 else 286 get = makekey; 287 288 if (cflag) { 289 order(&filelist, get, fldtab); 290 /* NOT REACHED */ 291 } 292 if (!outpath) { 293 toutpath[0] = '\0'; /* path not used in this case */ 294 outfile = outpath = toutpath; 295 outfp = stdout; 296 } else if (lstat(outpath, &st) == 0 297 && !S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) { 298 /* output file exists and isn't character or block device */ 299 struct sigaction act; 300 static const int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, 301 SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, 0}; 302 int outfd; 303 errno = 0; 304 if (access(outpath, W_OK)) 305 err(2, "%s", outpath); 306 (void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXX", 307 outpath); 308 if ((outfd = mkstemp(toutpath)) == -1) 309 err(2, "Cannot create temporary file `%s'", toutpath); 310 if ((outfp = fdopen(outfd, "w")) == NULL) 311 err(2, "Cannot open temporary file `%s'", toutpath); 312 outfile = toutpath; 313 (void)atexit(cleanup); 314 act.sa_handler = onsignal; 315 (void) sigemptyset(&act.sa_mask); 316 act.sa_flags = SA_RESTART | SA_RESETHAND; 317 for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */ 318 sigaction(sigtable[i], &act, 0); 319 } else { 320 outfile = outpath; 321 322 if ((outfp = fopen(outfile, "w")) == NULL) 323 err(2, "output file %s", outfile); 324 } 325 326 if (mflag) { 327 fmerge(-1, 0, &filelist, argc-optind, get, outfp, putline, 328 fldtab); 329 } else 330 fsort(-1, 0, 0, &filelist, argc-optind, outfp, fldtab); 331 332 if (outfile != outpath) { 333 if (access(outfile, F_OK)) 334 err(2, "%s", outfile); 335 336 /* 337 * Copy file permissions bits of the original file. 338 * st is initialized above, when we create the 339 * temporary spool file. 340 */ 341 if (lchmod(outfile, st.st_mode & ALLPERMS) != 0) { 342 err(2, "cannot chmod %s: output left in %s", 343 outpath, outfile); 344 } 345 346 (void)unlink(outpath); 347 if (link(outfile, outpath)) 348 err(2, "cannot link %s: output left in %s", 349 outpath, outfile); 350 (void)unlink(outfile); 351 } 352 exit(0); 353 } 354 355 static void 356 onsignal(sig) 357 int sig; 358 { 359 cleanup(); 360 } 361 362 static void 363 cleanup() 364 { 365 if (toutpath[0]) 366 (void)unlink(toutpath); 367 } 368 369 static void 370 usage(msg) 371 const char *msg; 372 { 373 if (msg != NULL) 374 (void)fprintf(stderr, "sort: %s\n", msg); 375 (void)fprintf(stderr, 376 "usage: %s [-bcdfHimnrSsu] [-k field1[,field2]] [-o output]" 377 " [-R char] [-T dir]", getprogname()); 378 (void)fprintf(stderr, 379 " [-t char] [file ...]\n"); 380 exit(2); 381 } 382