xref: /netbsd-src/usr.bin/sort/sort.c (revision 404fbe5fb94ca1e054339640cabb2801ce52dd30)
1 /*	$NetBSD: sort.c,v 1.47 2008/11/08 17:11:56 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Ben Harris and Jaromir Dolecek.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*-
33  * Copyright (c) 1993
34  *	The Regents of the University of California.  All rights reserved.
35  *
36  * This code is derived from software contributed to Berkeley by
37  * Peter McIlroy.
38  *
39  * Redistribution and use in source and binary forms, with or without
40  * modification, are permitted provided that the following conditions
41  * are met:
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. Neither the name of the University nor the names of its contributors
48  *    may be used to endorse or promote products derived from this software
49  *    without specific prior written permission.
50  *
51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE.
62  */
63 
64 /* Sort sorts a file using an optional user-defined key.
65  * Sort uses radix sort for internal sorting, and allows
66  * a choice of merge sort and radix sort for external sorting.
67  */
68 
69 #include "sort.h"
70 #include "fsort.h"
71 #include "pathnames.h"
72 
73 #ifndef lint
74 __COPYRIGHT("@(#) Copyright (c) 1993\
75  The Regents of the University of California.  All rights reserved.");
76 #endif /* not lint */
77 
78 #ifndef lint
79 __RCSID("$NetBSD: sort.c,v 1.47 2008/11/08 17:11:56 christos Exp $");
80 __SCCSID("@(#)sort.c	8.1 (Berkeley) 6/6/93");
81 #endif /* not lint */
82 
83 #include <sys/types.h>
84 #include <sys/time.h>
85 #include <sys/resource.h>
86 
87 #include <paths.h>
88 #include <signal.h>
89 #include <stdlib.h>
90 #include <string.h>
91 #include <unistd.h>
92 #include <locale.h>
93 
94 int REC_D = '\n';
95 u_char d_mask[NBINS];		/* flags for rec_d, field_d, <blank> */
96 
97 /*
98  * weight tables.  Gweights is one of ascii, Rascii..
99  * modified to weight rec_d = 0 (or 255)
100  */
101 u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS];
102 int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0;
103 
104 /*
105  * Default to stable sort.
106  */
107 int stable_sort = 1;
108 
109 static char toutpath[MAXPATHLEN];
110 
111 const char *tmpdir;	/* where temporary files should be put */
112 
113 static void cleanup __P((void));
114 static void onsignal __P((int));
115 static void usage __P((const char *));
116 
117 int main __P((int argc, char **argv));
118 
119 int
120 main(argc, argv)
121 	int argc;
122 	char *argv[];
123 {
124 	get_func_t get;
125 	int ch, i, stdinflag = 0, tmp = 0;
126 	char cflag = 0, mflag = 0;
127 	char *outfile, *outpath = 0;
128 	struct field *fldtab, *p;
129 	size_t fldtab_sz = 3, fidx = 0;
130 	struct filelist filelist;
131 	FILE *outfp = NULL;
132 	struct rlimit rl;
133 	struct stat st;
134 
135 	setlocale(LC_ALL, "");
136 
137 	/* bump RLIMIT_NOFILE to maximum our hard limit allows */
138 	if (getrlimit(RLIMIT_NOFILE, &rl) < 0)
139 		err(2, "getrlimit");
140 	rl.rlim_cur = rl.rlim_max;
141 	if (setrlimit(RLIMIT_NOFILE, &rl) < 0)
142 		err(2, "setrlimit");
143 
144 	d_mask[REC_D = '\n'] = REC_D_F;
145 	d_mask['\t'] = d_mask[' '] = BLANK | FLD_D;
146 
147 	fldtab = malloc(fldtab_sz * sizeof(*fldtab));
148 	memset(fldtab, 0, fldtab_sz * sizeof(*fldtab));
149 
150 	fixit(&argc, argv);
151 
152 	if (!(tmpdir = getenv("TMPDIR")))
153 		tmpdir = _PATH_TMP;
154 
155 	while ((ch = getopt(argc, argv, "bcdfik:mHno:rR:sSt:T:ux")) != -1) {
156 		switch (ch) {
157 		case 'b':
158 			fldtab->flags |= BI | BT;
159 			break;
160 		case 'c':
161 			cflag = 1;
162 			break;
163 		case 'd': case 'f': case 'i': case 'n': case 'r':
164 			tmp |= optval(ch, 0);
165 			if ((tmp & R) && (tmp & F))
166 				fldtab->weights = RFtable;
167 			else if (tmp & F)
168 				fldtab->weights = Ftable;
169 			else if (tmp & R)
170 				fldtab->weights = Rascii;
171 			fldtab->flags |= tmp;
172 			break;
173 		case 'H':
174 			PANIC = 0;
175 			break;
176 		case 'k':
177 			p = realloc(fldtab, (fldtab_sz + 1) * sizeof(*fldtab));
178 			if (!p)
179 				err(1, "realloc");
180 			fldtab = p;
181 			memset(&fldtab[fldtab_sz], 0,
182 			    sizeof(fldtab[fldtab_sz]));
183 			fldtab_sz++;
184 
185 			setfield(optarg, &fldtab[++fidx], fldtab->flags);
186 			break;
187 		case 'm':
188 			mflag = 1;
189 			break;
190 		case 'o':
191 			outpath = optarg;
192 			break;
193 		case 's':
194 			/* for GNU sort compatibility (this is our default) */
195 			stable_sort = 1;
196 			break;
197 		case 'S':
198 			stable_sort = 0;
199 			break;
200 		case 't':
201 			if (SEP_FLAG)
202 				usage("multiple field delimiters");
203 			SEP_FLAG = 1;
204 			d_mask[' '] &= ~FLD_D;
205 			d_mask['\t'] &= ~FLD_D;
206 			d_mask[(u_char)*optarg] |= FLD_D;
207 			if (d_mask[(u_char)*optarg] & REC_D_F)
208 				errx(2, "record/field delimiter clash");
209 			break;
210 		case 'R':
211 			if (REC_D != '\n')
212 				usage("multiple record delimiters");
213 			if ('\n' == (REC_D = *optarg))
214 				break;
215 			if (optarg[1] != '\0') {
216 				char *ep;
217 				int t = 0;
218 				if (optarg[0] == '\\')
219 					optarg++, t = 8;
220 				REC_D = (int)strtol(optarg, &ep, t);
221 				if (*ep != '\0' || REC_D < 0 ||
222 				    REC_D >= __arraycount(d_mask))
223 					errx(2, "invalid record delimiter %s",
224 					    optarg);
225 			}
226 			d_mask['\n'] = d_mask[' '];
227 			d_mask[REC_D] = REC_D_F;
228 			break;
229 		case 'T':
230 			/* -T tmpdir */
231 			tmpdir = optarg;
232 			break;
233 		case 'u':
234 			UNIQUE = 1;
235 			break;
236 		case '?':
237 		default:
238 			usage(NULL);
239 		}
240 	}
241 	if (cflag && argc > optind+1)
242 		errx(2, "too many input files for -c option");
243 	if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) {
244 		outpath = argv[argc-1];
245 		argc -= 2;
246 	}
247 	if (mflag && argc - optind > (MAXFCT - (16+1))*16)
248 		errx(2, "too many input files for -m option");
249 	for (i = optind; i < argc; i++) {
250 		/* allow one occurrence of /dev/stdin */
251 		if (!strcmp(argv[i], "-") || !strcmp(argv[i], _PATH_STDIN)) {
252 			if (stdinflag)
253 				warnx("ignoring extra \"%s\" in file list",
254 				    argv[i]);
255 			else
256 				stdinflag = 1;
257 
258 			/* change to /dev/stdin if '-' */
259 			if (argv[i][0] == '-')
260 				argv[i] = _PATH_STDIN;
261 
262 		} else if ((ch = access(argv[i], R_OK)))
263 			err(2, "%s", argv[i]);
264 	}
265 	if (!(fldtab->flags & (I|D|N) || fldtab[1].icol.num)) {
266 		SINGL_FLD = 1;
267 		fldtab[0].icol.num = 1;
268 	} else {
269 		if (!fldtab[1].icol.num) {
270 			fldtab[0].flags &= ~(BI|BT);
271 			setfield("1", &fldtab[++fidx], fldtab->flags);
272 		}
273 		fldreset(fldtab);
274 		fldtab[0].flags &= ~F;
275 	}
276 	settables(fldtab[0].flags);
277 	num_init();
278 	fldtab->weights = gweights;
279 	if (optind == argc) {
280 		static const char * const names[] = { _PATH_STDIN, NULL };
281 
282 		filelist.names = names;
283 		optind--;
284 	} else
285 		filelist.names = (const char * const *) &argv[optind];
286 
287 	if (SINGL_FLD)
288 		get = makeline;
289 	else
290 		get = makekey;
291 
292 	if (cflag) {
293 		order(&filelist, get, fldtab);
294 		/* NOT REACHED */
295 	}
296 	if (!outpath) {
297 		toutpath[0] = '\0';	/* path not used in this case */
298 		outfile = outpath = toutpath;
299 		outfp = stdout;
300 	} else if (lstat(outpath, &st) == 0
301 	    && !S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
302 		/* output file exists and isn't character or block device */
303 		struct sigaction act;
304 		static const int sigtable[] = {SIGHUP, SIGINT, SIGPIPE,
305 		    SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, 0};
306 		int outfd;
307 		errno = 0;
308 		if (access(outpath, W_OK))
309 			err(2, "%s", outpath);
310 		(void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXX",
311 		    outpath);
312 		if ((outfd = mkstemp(toutpath)) == -1)
313 			err(2, "Cannot create temporary file `%s'", toutpath);
314 		if ((outfp = fdopen(outfd, "w")) == NULL)
315 			err(2, "Cannot open temporary file `%s'", toutpath);
316 		outfile = toutpath;
317 		(void)atexit(cleanup);
318 		act.sa_handler = onsignal;
319 		(void) sigemptyset(&act.sa_mask);
320 		act.sa_flags = SA_RESTART | SA_RESETHAND;
321 		for (i = 0; sigtable[i]; ++i)	/* always unlink toutpath */
322 			sigaction(sigtable[i], &act, 0);
323 	} else {
324 		outfile = outpath;
325 
326 		if ((outfp = fopen(outfile, "w")) == NULL)
327 			err(2, "output file %s", outfile);
328 	}
329 
330 	if (mflag) {
331 		fmerge(-1, 0, &filelist, argc-optind, get, outfp, putline,
332 			fldtab);
333 	} else
334 		fsort(-1, 0, 0, &filelist, argc-optind, outfp, fldtab);
335 
336 	if (outfile != outpath) {
337 		if (access(outfile, F_OK))
338 			err(2, "%s", outfile);
339 
340 		/*
341 		 * Copy file permissions bits of the original file.
342 		 * st is initialized above, when we create the
343 		 * temporary spool file.
344 		 */
345 		if (lchmod(outfile, st.st_mode & ALLPERMS) != 0) {
346 			err(2, "cannot chmod %s: output left in %s",
347 			    outpath, outfile);
348 		}
349 
350 		(void)unlink(outpath);
351 		if (link(outfile, outpath))
352 			err(2, "cannot link %s: output left in %s",
353 			    outpath, outfile);
354 		(void)unlink(outfile);
355 	}
356 	exit(0);
357 }
358 
359 static void
360 onsignal(sig)
361 	int sig;
362 {
363 	cleanup();
364 }
365 
366 static void
367 cleanup()
368 {
369 	if (toutpath[0])
370 		(void)unlink(toutpath);
371 }
372 
373 static void
374 usage(msg)
375 	const char *msg;
376 {
377 	if (msg != NULL)
378 		(void)fprintf(stderr, "%s: %s\n", getprogname(), msg);
379 	(void)fprintf(stderr,
380 	    "usage: %s [-bcdfHimnrSsu] [-k field1[,field2]] [-o output]"
381 	    " [-R char] [-T dir]", getprogname());
382 	(void)fprintf(stderr,
383 	    "             [-t char] [file ...]\n");
384 	exit(2);
385 }
386