1 /* $NetBSD: sort.c,v 1.32 2002/12/24 14:58:57 jdolecek Exp $ */ 2 3 /*- 4 * Copyright (c) 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Peter McIlroy. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 /* Sort sorts a file using an optional user-defined key. 40 * Sort uses radix sort for internal sorting, and allows 41 * a choice of merge sort and radix sort for external sorting. 42 */ 43 44 #include "sort.h" 45 #include "fsort.h" 46 #include "pathnames.h" 47 48 #ifndef lint 49 __COPYRIGHT("@(#) Copyright (c) 1993\n\ 50 The Regents of the University of California. All rights reserved.\n"); 51 #endif /* not lint */ 52 53 #ifndef lint 54 __RCSID("$NetBSD: sort.c,v 1.32 2002/12/24 14:58:57 jdolecek Exp $"); 55 __SCCSID("@(#)sort.c 8.1 (Berkeley) 6/6/93"); 56 #endif /* not lint */ 57 58 #include <sys/types.h> 59 #include <sys/time.h> 60 #include <sys/resource.h> 61 62 #include <paths.h> 63 #include <signal.h> 64 #include <stdlib.h> 65 #include <string.h> 66 #include <unistd.h> 67 #include <locale.h> 68 69 int REC_D = '\n'; 70 u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */ 71 72 /* 73 * weight tables. Gweights is one of ascii, Rascii.. 74 * modified to weight rec_d = 0 (or 255) 75 */ 76 u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS]; 77 int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0; 78 struct coldesc clist[(ND+1)*2]; 79 int ncols = 0; 80 81 /* 82 * Default to stable sort. 83 */ 84 int stable_sort = 1; 85 86 static char toutpath[MAXPATHLEN]; 87 static struct field fldtab[ND+2]; 88 89 const char *tmpdir; /* where temporary files should be put */ 90 91 static void cleanup __P((void)); 92 static void onsignal __P((int)); 93 static void usage __P((const char *)); 94 95 int main __P((int argc, char **argv)); 96 97 int 98 main(argc, argv) 99 int argc; 100 char *argv[]; 101 { 102 get_func_t get; 103 int ch, i, stdinflag = 0, tmp = 0; 104 char cflag = 0, mflag = 0; 105 char *outfile, *outpath = 0; 106 struct field *ftpos; 107 struct filelist filelist; 108 FILE *outfp = NULL; 109 struct rlimit rl; 110 111 setlocale(LC_ALL, ""); 112 113 /* bump RLIMIT_NOFILE to maximum our hard limit allows */ 114 if (getrlimit(RLIMIT_NOFILE, &rl) < 0) 115 err(2, "getrlimit"); 116 rl.rlim_cur = rl.rlim_max; 117 if (setrlimit(RLIMIT_NOFILE, &rl) < 0) 118 err(2, "setrlimit"); 119 120 d_mask[REC_D = '\n'] = REC_D_F; 121 SINGL_FLD = SEP_FLAG = 0; 122 d_mask['\t'] = d_mask[' '] = BLANK | FLD_D; 123 ftpos = fldtab; 124 125 fixit(&argc, argv); 126 if (!(tmpdir = getenv("TMPDIR"))) 127 tmpdir = _PATH_TMP; 128 129 while ((ch = getopt(argc, argv, "bcdfik:mHno:rR:sSt:T:ux")) != -1) { 130 switch (ch) { 131 case 'b': 132 fldtab->flags |= BI | BT; 133 break; 134 case 'c': 135 cflag = 1; 136 break; 137 case 'd': case 'f': case 'i': case 'n': case 'r': 138 tmp |= optval(ch, 0); 139 if ((tmp & R) && (tmp & F)) 140 fldtab->weights = RFtable; 141 else if (tmp & F) 142 fldtab->weights = Ftable; 143 else if (tmp & R) 144 fldtab->weights = Rascii; 145 fldtab->flags |= tmp; 146 break; 147 case 'H': 148 PANIC = 0; 149 break; 150 case 'k': 151 setfield(optarg, ++ftpos, fldtab->flags); 152 break; 153 case 'm': 154 mflag = 1; 155 break; 156 case 'o': 157 outpath = optarg; 158 break; 159 case 's': 160 /* for GNU sort compatibility (this is our default) */ 161 stable_sort = 1; 162 break; 163 case 'S': 164 stable_sort = 0; 165 break; 166 case 't': 167 if (SEP_FLAG) 168 usage("multiple field delimiters"); 169 SEP_FLAG = 1; 170 d_mask[' '] &= ~FLD_D; 171 d_mask['\t'] &= ~FLD_D; 172 d_mask[(u_char)*optarg] |= FLD_D; 173 if (d_mask[(u_char)*optarg] & REC_D_F) 174 errx(2, "record/field delimiter clash"); 175 break; 176 case 'R': 177 if (REC_D != '\n') 178 usage("multiple record delimiters"); 179 if ('\n' == (REC_D = *optarg)) 180 break; 181 d_mask['\n'] = d_mask[' ']; 182 d_mask[REC_D] = REC_D_F; 183 break; 184 case 'T': 185 /* -T tmpdir */ 186 tmpdir = optarg; 187 break; 188 case 'u': 189 UNIQUE = 1; 190 break; 191 case '?': 192 default: 193 usage(NULL); 194 } 195 } 196 if (cflag && argc > optind+1) 197 errx(2, "too many input files for -c option"); 198 if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) { 199 outpath = argv[argc-1]; 200 argc -= 2; 201 } 202 if (mflag && argc - optind > (MAXFCT - (16+1))*16) 203 errx(2, "too many input files for -m option"); 204 for (i = optind; i < argc; i++) { 205 /* allow one occurrence of /dev/stdin */ 206 if (!strcmp(argv[i], "-") || !strcmp(argv[i], _PATH_STDIN)) { 207 if (stdinflag) 208 warnx("ignoring extra \"%s\" in file list", 209 argv[i]); 210 else 211 stdinflag = 1; 212 213 /* change to /dev/stdin if '-' */ 214 if (argv[i][0] == '-') 215 argv[i] = _PATH_STDIN; 216 217 } else if ((ch = access(argv[i], R_OK))) 218 err(2, "%s", argv[i]); 219 } 220 if (!(fldtab->flags & (I|D|N) || fldtab[1].icol.num)) { 221 SINGL_FLD = 1; 222 fldtab[0].icol.num = 1; 223 } else { 224 if (!fldtab[1].icol.num) { 225 fldtab[0].flags &= ~(BI|BT); 226 setfield("1", ++ftpos, fldtab->flags); 227 } 228 fldreset(fldtab); 229 fldtab[0].flags &= ~F; 230 } 231 settables(fldtab[0].flags); 232 num_init(); 233 fldtab->weights = gweights; 234 if (optind == argc) { 235 static const char * const names[] = { _PATH_STDIN, NULL }; 236 237 filelist.names = names; 238 optind--; 239 } else 240 filelist.names = (const char * const *) &argv[optind]; 241 242 if (SINGL_FLD) 243 get = makeline; 244 else 245 get = makekey; 246 247 if (cflag) { 248 order(&filelist, get, fldtab); 249 /* NOT REACHED */ 250 } 251 if (!outpath) { 252 (void)snprintf(toutpath, 253 sizeof(toutpath), "%sstdout", _PATH_DEV); 254 outfile = outpath = toutpath; 255 outfp = stdout; 256 } else if (!(ch = access(outpath, 0)) && 257 strncmp(_PATH_DEV, outpath, 5)) { 258 struct sigaction act; 259 static const int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, 260 SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, 0}; 261 int outfd; 262 errno = 0; 263 if (access(outpath, W_OK)) 264 err(2, "%s", outpath); 265 (void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXX", 266 outpath); 267 if ((outfd = mkstemp(toutpath)) == -1) 268 err(2, "Cannot create temporary file `%s'", toutpath); 269 if ((outfp = fdopen(outfd, "w")) == NULL) 270 err(2, "Cannot open temporary file `%s'", toutpath); 271 outfile = toutpath; 272 (void)atexit(cleanup); 273 act.sa_handler = onsignal; 274 (void) sigemptyset(&act.sa_mask); 275 act.sa_flags = SA_RESTART | SA_RESETHAND; 276 for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */ 277 sigaction(sigtable[i], &act, 0); 278 } else 279 outfile = outpath; 280 281 if (outfp == NULL && (outfp = fopen(outfile, "w")) == NULL) 282 err(2, "output file %s", outfile); 283 284 if (mflag) { 285 fmerge(-1, 0, &filelist, argc-optind, get, outfp, putline, 286 fldtab); 287 } else 288 fsort(-1, 0, 0, &filelist, argc-optind, outfp, fldtab); 289 290 if (outfile != outpath) { 291 if (access(outfile, 0)) 292 err(2, "%s", outfile); 293 (void)unlink(outpath); 294 if (link(outfile, outpath)) 295 err(2, "cannot link %s: output left in %s", 296 outpath, outfile); 297 (void)unlink(outfile); 298 } 299 exit(0); 300 } 301 302 static void 303 onsignal(sig) 304 int sig; 305 { 306 cleanup(); 307 } 308 309 static void 310 cleanup() 311 { 312 if (toutpath[0]) 313 (void)unlink(toutpath); 314 } 315 316 static void 317 usage(msg) 318 const char *msg; 319 { 320 if (msg != NULL) 321 (void)fprintf(stderr, "sort: %s\n", msg); 322 (void)fprintf(stderr, "usage: [-o output] [-cmubdfinrsS] [-t char] "); 323 (void)fprintf(stderr, "[-R char] [-k keydef] ... [files]\n"); 324 exit(2); 325 } 326