1 /* $NetBSD: sort.c,v 1.18 2001/01/13 20:21:56 soren Exp $ */ 2 3 /*- 4 * Copyright (c) 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Peter McIlroy. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 /* Sort sorts a file using an optional user-defined key. 40 * Sort uses radix sort for internal sorting, and allows 41 * a choice of merge sort and radix sort for external sorting. 42 */ 43 44 #include "sort.h" 45 #include "fsort.h" 46 #include "pathnames.h" 47 48 #ifndef lint 49 __COPYRIGHT("@(#) Copyright (c) 1993\n\ 50 The Regents of the University of California. All rights reserved.\n"); 51 #endif /* not lint */ 52 53 #ifndef lint 54 __RCSID("$NetBSD: sort.c,v 1.18 2001/01/13 20:21:56 soren Exp $"); 55 __SCCSID("@(#)sort.c 8.1 (Berkeley) 6/6/93"); 56 #endif /* not lint */ 57 58 #include <paths.h> 59 #include <signal.h> 60 #include <stdlib.h> 61 #include <string.h> 62 #include <unistd.h> 63 #include <locale.h> 64 65 int REC_D = '\n'; 66 u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */ 67 /* 68 * weight tables. Gweights is one of ascii, Rascii.. 69 * modified to weight rec_d = 0 (or 255) 70 */ 71 u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS]; 72 int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0; 73 struct coldesc clist[(ND+1)*2]; 74 int ncols = 0; 75 extern struct coldesc clist[(ND+1)*2]; 76 extern int ncols; 77 78 /* 79 * Default to stable sort. 80 */ 81 int stable_sort = 1; 82 83 char toutpath[_POSIX_PATH_MAX]; 84 85 static void cleanup __P((void)); 86 static void onsignal __P((int)); 87 static void usage __P((const char *)); 88 89 int main __P((int argc, char **argv)); 90 91 int 92 main(argc, argv) 93 int argc; 94 char *argv[]; 95 { 96 get_func_t get; 97 int ch, i, stdinflag = 0, tmp = 0; 98 char cflag = 0, mflag = 0; 99 char *outfile, *outpath = 0; 100 struct field fldtab[ND+2], *ftpos; 101 struct filelist filelist; 102 FILE *outfp = NULL; 103 104 setlocale(LC_ALL, ""); 105 106 memset(fldtab, 0, (ND+2)*sizeof(struct field)); 107 memset(d_mask, 0, NBINS); 108 d_mask[REC_D = '\n'] = REC_D_F; 109 SINGL_FLD = SEP_FLAG = 0; 110 d_mask['\t'] = d_mask[' '] = BLANK | FLD_D; 111 ftpos = fldtab; 112 113 fixit(&argc, argv); 114 115 while ((ch = getopt(argc, argv, "bcdfik:mHno:rsSt:T:ux")) != -1) { 116 switch (ch) { 117 case 'b': 118 fldtab->flags |= BI | BT; 119 break; 120 case 'c': 121 cflag = 1; 122 break; 123 case 'd': case 'f': case 'i': case 'n': case 'r': 124 tmp |= optval(ch, 0); 125 if ((tmp & R) && (tmp & F)) 126 fldtab->weights = RFtable; 127 else if (tmp & F) 128 fldtab->weights = Ftable; 129 else if (tmp & R) 130 fldtab->weights = Rascii; 131 fldtab->flags |= tmp; 132 break; 133 case 'H': 134 PANIC = 0; 135 break; 136 case 'k': 137 setfield(optarg, ++ftpos, fldtab->flags); 138 break; 139 case 'm': 140 mflag = 1; 141 break; 142 case 'o': 143 outpath = optarg; 144 break; 145 case 's': 146 /* for GNU sort compatibility (this is our default) */ 147 stable_sort = 1; 148 break; 149 case 'S': 150 stable_sort = 0; 151 break; 152 case 't': 153 if (SEP_FLAG) 154 usage("multiple field delimiters"); 155 SEP_FLAG = 1; 156 d_mask[' '] &= ~FLD_D; 157 d_mask['\t'] &= ~FLD_D; 158 d_mask[(u_char)*optarg] |= FLD_D; 159 if (d_mask[(u_char)*optarg] & REC_D_F) 160 err(2, "record/field delimiter clash"); 161 break; 162 case 'T': 163 if (REC_D != '\n') 164 usage("multiple record delimiters"); 165 if ('\n' == (REC_D = *optarg)) 166 break; 167 d_mask['\n'] = d_mask[' ']; 168 d_mask[REC_D] = REC_D_F; 169 break; 170 case 'u': 171 UNIQUE = 1; 172 break; 173 case '?': 174 default: 175 usage(NULL); 176 } 177 } 178 if (cflag && argc > optind+1) 179 errx(2, "too many input files for -c option"); 180 if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) { 181 outpath = argv[argc-1]; 182 argc -= 2; 183 } 184 if (mflag && argc - optind > (MAXFCT - (16+1))*16) 185 errx(2, "too many input files for -m option"); 186 for (i = optind; i < argc; i++) { 187 /* allow one occurrence of /dev/stdin */ 188 if (!strcmp(argv[i], "-") || !strcmp(argv[i], _PATH_STDIN)) { 189 if (stdinflag) 190 warnx("ignoring extra \"%s\" in file list", 191 argv[i]); 192 else 193 stdinflag = 1; 194 195 /* change to /dev/stdin if '-' */ 196 if (argv[i][0] == '-') 197 argv[i] = _PATH_STDIN; 198 199 } else if ((ch = access(argv[i], R_OK))) 200 err(2, "%s", argv[i]); 201 } 202 if (!(fldtab->flags & (I|D|N) || fldtab[1].icol.num)) { 203 SINGL_FLD = 1; 204 fldtab[0].icol.num = 1; 205 } else { 206 if (!fldtab[1].icol.num) { 207 fldtab[0].flags &= ~(BI|BT); 208 setfield("1", ++ftpos, fldtab->flags); 209 } 210 fldreset(fldtab); 211 fldtab[0].flags &= ~F; 212 } 213 settables(fldtab[0].flags); 214 num_init(); 215 fldtab->weights = gweights; 216 if (optind == argc) { 217 static const char * const names[] = { _PATH_STDIN, NULL }; 218 219 filelist.names = names; 220 optind--; 221 } else 222 filelist.names = (const char * const *) &argv[optind]; 223 224 if (SINGL_FLD) 225 get = makeline; 226 else 227 get = makekey; 228 229 if (cflag) { 230 order(&filelist, get, fldtab); 231 /* NOT REACHED */ 232 } 233 if (!outpath) { 234 (void)snprintf(toutpath, 235 sizeof(toutpath), "%sstdout", _PATH_DEV); 236 outfile = outpath = toutpath; 237 } else if (!(ch = access(outpath, 0)) && 238 strncmp(_PATH_DEV, outpath, 5)) { 239 static const struct sigaction act = 240 { onsignal, {{0}}, SA_RESTART | SA_RESETHAND }; 241 static const int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, SIGXCPU, 242 SIGXFSZ, SIGVTALRM, SIGPROF, 0}; 243 int outfd; 244 errno = 0; 245 if (access(outpath, W_OK)) 246 err(2, "%s", outpath); 247 (void)snprintf(toutpath, sizeof(toutpath), "%sXXXX", outpath); 248 if ((outfd = mkstemp(toutpath)) < 0 || 249 (outfp = fdopen(outfd, "w")) == 0) 250 err(2, "temporary file %s", toutpath); 251 outfile = toutpath; 252 (void)atexit(cleanup); 253 for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */ 254 sigaction(sigtable[i], &act, 0); 255 } else 256 outfile = outpath; 257 258 if (outfp == NULL && (outfp = fopen(outfile, "w")) == NULL) 259 err(2, "output file %s", outfile); 260 261 if (mflag) { 262 fmerge(-1, 0, &filelist, argc-optind, get, outfp, putline, 263 fldtab); 264 } else 265 fsort(-1, 0, 0, &filelist, argc-optind, outfp, fldtab); 266 267 if (outfile != outpath) { 268 if (access(outfile, 0)) 269 err(2, "%s", outfile); 270 (void)unlink(outpath); 271 if (link(outfile, outpath)) 272 err(2, "cannot link %s: output left in %s", 273 outpath, outfile); 274 (void)unlink(outfile); 275 } 276 exit(0); 277 } 278 279 static void 280 onsignal(sig) 281 int sig; 282 { 283 cleanup(); 284 } 285 286 static void 287 cleanup() 288 { 289 if (toutpath[0]) 290 (void)unlink(toutpath); 291 } 292 293 static void 294 usage(msg) 295 const char *msg; 296 { 297 if (msg != NULL) 298 (void)fprintf(stderr, "sort: %s\n", msg); 299 (void)fprintf(stderr, "usage: [-o output] [-cmubdfinrsS] [-t char] "); 300 (void)fprintf(stderr, "[-T char] [-k keydef] ... [files]\n"); 301 exit(2); 302 } 303