xref: /netbsd-src/usr.bin/xargs/xargs.c (revision 8b0f9554ff8762542c4defc4f70e1eb76fb508fa)
1 /*	$NetBSD: xargs.c,v 1.17 2007/12/15 19:44:54 perry Exp $	*/
2 
3 /*-
4  * Copyright (c) 1990, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * John B. Roll Jr.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $xMach: xargs.c,v 1.6 2002/02/23 05:27:47 tim Exp $
35  */
36 
37 #include <sys/cdefs.h>
38 #ifndef lint
39 __COPYRIGHT("@(#) Copyright (c) 1990, 1993\n\
40 	The Regents of the University of California.  All rights reserved.\n");
41 #if 0
42 static char sccsid[] = "@(#)xargs.c	8.1 (Berkeley) 6/6/93";
43 __FBSDID("$FreeBSD: src/usr.bin/xargs/xargs.c,v 1.62 2006/01/01 22:59:54 jmallett Exp $");
44 #endif
45 __RCSID("$NetBSD: xargs.c,v 1.17 2007/12/15 19:44:54 perry Exp $");
46 #endif /* not lint */
47 
48 #include <sys/param.h>
49 #include <sys/wait.h>
50 
51 #include <err.h>
52 #include <errno.h>
53 #include <fcntl.h>
54 #include <langinfo.h>
55 #include <locale.h>
56 #include <paths.h>
57 #include <regex.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <signal.h>
62 #include <unistd.h>
63 
64 #include "pathnames.h"
65 
66 static void	parse_input(int, char *[]);
67 static void	prerun(int, char *[]);
68 static int	prompt(void);
69 static void	run(char **);
70 static void	usage(void) __dead;
71 void		strnsubst(char **, const char *, const char *, size_t);
72 static void	waitchildren(const char *, int);
73 
74 static char echo[] = _PATH_ECHO;
75 static char **av, **bxp, **ep, **endxp, **xp;
76 static char *argp, *bbp, *ebp, *inpline, *p, *replstr;
77 static const char *eofstr;
78 static int count, insingle, indouble, oflag, pflag, tflag, Rflag, rval, zflag;
79 static int cnt, Iflag, jfound, Lflag, Sflag, wasquoted, xflag;
80 static int curprocs, maxprocs;
81 
82 static volatile int childerr;
83 
84 extern char **environ;
85 
86 int
87 main(int argc, char *argv[])
88 {
89 	long arg_max;
90 	int ch, Jflag, nargs, nflag, nline;
91 	size_t linelen;
92 	char *endptr;
93 
94 	setprogname(argv[0]);
95 
96 	inpline = replstr = NULL;
97 	ep = environ;
98 	eofstr = "";
99 	Jflag = nflag = 0;
100 
101 	(void)setlocale(LC_ALL, "");
102 
103 	/*
104 	 * POSIX.2 limits the exec line length to ARG_MAX - 2K.  Running that
105 	 * caused some E2BIG errors, so it was changed to ARG_MAX - 4K.  Given
106 	 * that the smallest argument is 2 bytes in length, this means that
107 	 * the number of arguments is limited to:
108 	 *
109 	 *	 (ARG_MAX - 4K - LENGTH(utility + arguments)) / 2.
110 	 *
111 	 * We arbitrarily limit the number of arguments to 5000.  This is
112 	 * allowed by POSIX.2 as long as the resulting minimum exec line is
113 	 * at least LINE_MAX.  Realloc'ing as necessary is possible, but
114 	 * probably not worthwhile.
115 	 */
116 	nargs = 5000;
117 	if ((arg_max = sysconf(_SC_ARG_MAX)) == -1)
118 		errx(1, "sysconf(_SC_ARG_MAX) failed");
119 	nline = arg_max - 4 * 1024;
120 	while (*ep != NULL) {
121 		/* 1 byte for each '\0' */
122 		nline -= strlen(*ep++) + 1 + sizeof(*ep);
123 	}
124 	maxprocs = 1;
125 	while ((ch = getopt(argc, argv, "0E:I:J:L:n:oP:pR:S:s:rtx")) != -1)
126 		switch (ch) {
127 		case 'E':
128 			eofstr = optarg;
129 			break;
130 		case 'I':
131 			Jflag = 0;
132 			Iflag = 1;
133 			Lflag = 1;
134 			replstr = optarg;
135 			break;
136 		case 'J':
137 			Iflag = 0;
138 			Jflag = 1;
139 			replstr = optarg;
140 			break;
141 		case 'L':
142 			Lflag = atoi(optarg);
143 			break;
144 		case 'n':
145 			nflag = 1;
146 			if ((nargs = atoi(optarg)) <= 0)
147 				errx(1, "illegal argument count");
148 			break;
149 		case 'o':
150 			oflag = 1;
151 			break;
152 		case 'P':
153 			if ((maxprocs = atoi(optarg)) <= 0)
154 				errx(1, "max. processes must be >0");
155 			break;
156 		case 'p':
157 			pflag = 1;
158 			break;
159 		case 'R':
160 			Rflag = strtol(optarg, &endptr, 10);
161 			if (*endptr != '\0')
162 				errx(1, "replacements must be a number");
163 			break;
164 		case 'r':
165 			/* GNU compatibility */
166 			break;
167 		case 'S':
168 			Sflag = strtoul(optarg, &endptr, 10);
169 			if (*endptr != '\0')
170 				errx(1, "replsize must be a number");
171 			break;
172 		case 's':
173 			nline = atoi(optarg);
174 			break;
175 		case 't':
176 			tflag = 1;
177 			break;
178 		case 'x':
179 			xflag = 1;
180 			break;
181 		case '0':
182 			zflag = 1;
183 			break;
184 		case '?':
185 		default:
186 			usage();
187 	}
188 	argc -= optind;
189 	argv += optind;
190 
191 	if (!Iflag && Rflag)
192 		usage();
193 	if (!Iflag && Sflag)
194 		usage();
195 	if (Iflag && !Rflag)
196 		Rflag = 5;
197 	if (Iflag && !Sflag)
198 		Sflag = 255;
199 	if (xflag && !nflag)
200 		usage();
201 	if (Iflag || Lflag)
202 		xflag = 1;
203 	if (replstr != NULL && *replstr == '\0')
204 		errx(1, "replstr may not be empty");
205 
206 	/*
207 	 * Allocate pointers for the utility name, the utility arguments,
208 	 * the maximum arguments to be read from stdin and the trailing
209 	 * NULL.
210 	 */
211 	linelen = 1 + argc + nargs + 1;
212 	if ((av = bxp = malloc(linelen * sizeof(char **))) == NULL)
213 		errx(1, "malloc failed");
214 
215 	/*
216 	 * Use the user's name for the utility as argv[0], just like the
217 	 * shell.  Echo is the default.  Set up pointers for the user's
218 	 * arguments.
219 	 */
220 	if (*argv == NULL)
221 		cnt = strlen(*bxp++ = echo);
222 	else {
223 		do {
224 			if (Jflag && strcmp(*argv, replstr) == 0) {
225 				char **avj;
226 				jfound = 1;
227 				argv++;
228 				for (avj = argv; *avj; avj++)
229 					cnt += strlen(*avj) + 1;
230 				break;
231 			}
232 			cnt += strlen(*bxp++ = *argv) + 1;
233 		} while (*++argv != NULL);
234 	}
235 
236 	/*
237 	 * Set up begin/end/traversing pointers into the array.  The -n
238 	 * count doesn't include the trailing NULL pointer, so the malloc
239 	 * added in an extra slot.
240 	 */
241 	endxp = (xp = bxp) + nargs;
242 
243 	/*
244 	 * Allocate buffer space for the arguments read from stdin and the
245 	 * trailing NULL.  Buffer space is defined as the default or specified
246 	 * space, minus the length of the utility name and arguments.  Set up
247 	 * begin/end/traversing pointers into the array.  The -s count does
248 	 * include the trailing NULL, so the malloc didn't add in an extra
249 	 * slot.
250 	 */
251 	nline -= cnt;
252 	if (nline <= 0)
253 		errx(1, "insufficient space for command");
254 
255 	if ((bbp = malloc((size_t)(nline + 1))) == NULL)
256 		errx(1, "malloc failed");
257 	ebp = (argp = p = bbp) + nline - 1;
258 	for (;;)
259 		parse_input(argc, argv);
260 }
261 
262 static void
263 parse_input(int argc, char *argv[])
264 {
265 	int ch, foundeof;
266 	char **avj;
267 
268 	foundeof = 0;
269 
270 	switch (ch = getchar()) {
271 	case EOF:
272 		/* No arguments since last exec. */
273 		if (p == bbp) {
274 			waitchildren(*argv, 1);
275 			exit(rval);
276 		}
277 		goto arg1;
278 	case ' ':
279 	case '\t':
280 		/* Quotes escape tabs and spaces. */
281 		if (insingle || indouble || zflag)
282 			goto addch;
283 		goto arg2;
284 	case '\0':
285 		if (zflag) {
286 			/*
287 			 * Increment 'count', so that nulls will be treated
288 			 * as end-of-line, as well as end-of-argument.  This
289 			 * is needed so -0 works properly with -I and -L.
290 			 */
291 			count++;
292 			goto arg2;
293 		}
294 		goto addch;
295 	case '\n':
296 		if (zflag)
297 			goto addch;
298 		count++;	    /* Indicate end-of-line (used by -L) */
299 
300 		/* Quotes do not escape newlines. */
301 arg1:		if (insingle || indouble)
302 			errx(1, "unterminated quote");
303 arg2:
304 		foundeof = *eofstr != '\0' &&
305 		    strncmp(argp, eofstr, (size_t)(p - argp)) == 0;
306 
307 		/* Do not make empty args unless they are quoted */
308 		if ((argp != p || wasquoted) && !foundeof) {
309 			*p++ = '\0';
310 			*xp++ = argp;
311 			if (Iflag) {
312 				size_t curlen;
313 
314 				if (inpline == NULL)
315 					curlen = 0;
316 				else {
317 					/*
318 					 * If this string is not zero
319 					 * length, append a space for
320 					 * separation before the next
321 					 * argument.
322 					 */
323 					if ((curlen = strlen(inpline)) != 0)
324 						(void)strcat(inpline, " ");
325 				}
326 				curlen++;
327 				/*
328 				 * Allocate enough to hold what we will
329 				 * be holding in a second, and to append
330 				 * a space next time through, if we have
331 				 * to.
332 				 */
333 				inpline = realloc(inpline, curlen + 2 +
334 				    strlen(argp));
335 				if (inpline == NULL)
336 					errx(1, "realloc failed");
337 				if (curlen == 1)
338 					(void)strcpy(inpline, argp);
339 				else
340 					(void)strcat(inpline, argp);
341 			}
342 		}
343 
344 		/*
345 		 * If max'd out on args or buffer, or reached EOF,
346 		 * run the command.  If xflag and max'd out on buffer
347 		 * but not on args, object.  Having reached the limit
348 		 * of input lines, as specified by -L is the same as
349 		 * maxing out on arguments.
350 		 */
351 		if (xp == endxp || p > ebp || ch == EOF ||
352 		    (Lflag <= count && xflag) || foundeof) {
353 			if (xflag && xp != endxp && p > ebp)
354 				errx(1, "insufficient space for arguments");
355 			if (jfound) {
356 				for (avj = argv; *avj; avj++)
357 					*xp++ = *avj;
358 			}
359 			prerun(argc, av);
360 			if (ch == EOF || foundeof) {
361 				waitchildren(*argv, 1);
362 				exit(rval);
363 			}
364 			p = bbp;
365 			xp = bxp;
366 			count = 0;
367 		}
368 		argp = p;
369 		wasquoted = 0;
370 		break;
371 	case '\'':
372 		if (indouble || zflag)
373 			goto addch;
374 		insingle = !insingle;
375 		wasquoted = 1;
376 		break;
377 	case '"':
378 		if (insingle || zflag)
379 			goto addch;
380 		indouble = !indouble;
381 		wasquoted = 1;
382 		break;
383 	case '\\':
384 		if (zflag)
385 			goto addch;
386 		/* Backslash escapes anything, is escaped by quotes. */
387 		if (!insingle && !indouble && (ch = getchar()) == EOF)
388 			errx(1, "backslash at EOF");
389 		/* FALLTHROUGH */
390 	default:
391 addch:		if (p < ebp) {
392 			*p++ = ch;
393 			break;
394 		}
395 
396 		/* If only one argument, not enough buffer space. */
397 		if (bxp == xp)
398 			errx(1, "insufficient space for argument");
399 		/* Didn't hit argument limit, so if xflag object. */
400 		if (xflag)
401 			errx(1, "insufficient space for arguments");
402 
403 		if (jfound) {
404 			for (avj = argv; *avj; avj++)
405 				*xp++ = *avj;
406 		}
407 		prerun(argc, av);
408 		xp = bxp;
409 		cnt = ebp - argp;
410 		(void)memcpy(bbp, argp, (size_t)cnt);
411 		p = (argp = bbp) + cnt;
412 		*p++ = ch;
413 		break;
414 	}
415 }
416 
417 /*
418  * Do things necessary before run()'ing, such as -I substitution,
419  * and then call run().
420  */
421 static void
422 prerun(int argc, char *argv[])
423 {
424 	char **tmp, **tmp2, **avj;
425 	int repls;
426 
427 	repls = Rflag;
428 
429 	if (argc == 0 || repls == 0) {
430 		*xp = NULL;
431 		run(argv);
432 		return;
433 	}
434 
435 	avj = argv;
436 
437 	/*
438 	 * Allocate memory to hold the argument list, and
439 	 * a NULL at the tail.
440 	 */
441 	tmp = malloc((argc + 1) * sizeof(char**));
442 	if (tmp == NULL)
443 		errx(1, "malloc failed");
444 	tmp2 = tmp;
445 
446 	/*
447 	 * Save the first argument and iterate over it, we
448 	 * cannot do strnsubst() to it.
449 	 */
450 	if ((*tmp++ = strdup(*avj++)) == NULL)
451 		errx(1, "strdup failed");
452 
453 	/*
454 	 * For each argument to utility, if we have not used up
455 	 * the number of replacements we are allowed to do, and
456 	 * if the argument contains at least one occurrence of
457 	 * replstr, call strnsubst(), else just save the string.
458 	 * Iterations over elements of avj and tmp are done
459 	 * where appropriate.
460 	 */
461 	while (--argc) {
462 		*tmp = *avj++;
463 		if (repls && strstr(*tmp, replstr) != NULL) {
464 			strnsubst(tmp++, replstr, inpline, (size_t)Sflag);
465 			if (repls > 0)
466 				repls--;
467 		} else {
468 			if ((*tmp = strdup(*tmp)) == NULL)
469 				errx(1, "strdup failed");
470 			tmp++;
471 		}
472 	}
473 
474 	/*
475 	 * Run it.
476 	 */
477 	*tmp = NULL;
478 	run(tmp2);
479 
480 	/*
481 	 * Walk from the tail to the head, free along the way.
482 	 */
483 	for (; tmp2 != tmp; tmp--)
484 		free(*tmp);
485 	/*
486 	 * Now free the list itself.
487 	 */
488 	free(tmp2);
489 
490 	/*
491 	 * Free the input line buffer, if we have one.
492 	 */
493 	if (inpline != NULL) {
494 		free(inpline);
495 		inpline = NULL;
496 	}
497 }
498 
499 static void
500 run(char **argv)
501 {
502 	int fd;
503 	char **avec;
504 
505 	/*
506 	 * If the user wants to be notified of each command before it is
507 	 * executed, notify them.  If they want the notification to be
508 	 * followed by a prompt, then prompt them.
509 	 */
510 	if (tflag || pflag) {
511 		(void)fprintf(stderr, "%s", *argv);
512 		for (avec = argv + 1; *avec != NULL; ++avec)
513 			(void)fprintf(stderr, " %s", *avec);
514 		/*
515 		 * If the user has asked to be prompted, do so.
516 		 */
517 		if (pflag)
518 			/*
519 			 * If they asked not to exec, return without execution
520 			 * but if they asked to, go to the execution.  If we
521 			 * could not open their tty, break the switch and drop
522 			 * back to -t behaviour.
523 			 */
524 			switch (prompt()) {
525 			case 0:
526 				return;
527 			case 1:
528 				goto exec;
529 			case 2:
530 				break;
531 			}
532 		(void)fprintf(stderr, "\n");
533 		(void)fflush(stderr);
534 	}
535 exec:
536 	childerr = 0;
537 	switch (vfork()) {
538 	case -1:
539 		err(1, "vfork");
540 		/*NOTREACHED*/
541 	case 0:
542 		if (oflag) {
543 			if ((fd = open(_PATH_TTY, O_RDONLY)) == -1)
544 				err(1, "can't open /dev/tty");
545 		} else {
546 			fd = open(_PATH_DEVNULL, O_RDONLY);
547 		}
548 		if (fd > STDIN_FILENO) {
549 			if (dup2(fd, STDIN_FILENO) != 0)
550 				err(1, "can't dup2 to stdin");
551 			(void)close(fd);
552 		}
553 		(void)execvp(argv[0], argv);
554 		childerr = errno;
555 		_exit(1);
556 	}
557 	curprocs++;
558 	waitchildren(*argv, 0);
559 }
560 
561 static void
562 waitchildren(const char *name, int waitall)
563 {
564 	pid_t pid;
565 	int status;
566 
567 	while ((pid = waitpid(-1, &status, !waitall && curprocs < maxprocs ?
568 	    WNOHANG : 0)) > 0) {
569 		curprocs--;
570 		/* If we couldn't invoke the utility, exit. */
571 		if (childerr != 0) {
572 			errno = childerr;
573 			err(errno == ENOENT ? 127 : 126, "%s", name);
574 		}
575 		/*
576 		 * According to POSIX, we have to exit if the utility exits
577 		 * with a 255 status, or is interrupted by a signal. xargs
578 		 * is allowed to return any exit status between 1 and 125
579 		 * in these cases, but we'll use 124 and 125, the same
580 		 * values used by GNU xargs.
581 		 */
582 		if (WIFEXITED(status)) {
583 			if (WEXITSTATUS (status) == 255) {
584 				warnx ("%s exited with status 255", name);
585 				exit(124);
586 			} else if (WEXITSTATUS (status) != 0) {
587 				rval = 123;
588 			}
589 		} else if (WIFSIGNALED (status)) {
590 			if (WTERMSIG(status) < NSIG) {
591 				warnx("%s terminated by SIG%s", name,
592 				    sys_signame[WTERMSIG(status)]);
593 			} else {
594 				warnx("%s terminated by signal %d", name,
595 				    WTERMSIG(status));
596 			}
597 			exit(125);
598 		}
599 	}
600 	if (pid == -1 && errno != ECHILD)
601 		err(1, "wait3");
602 }
603 
604 /*
605  * Prompt the user about running a command.
606  */
607 static int
608 prompt(void)
609 {
610 	regex_t cre;
611 	size_t rsize;
612 	int match;
613 	char *response;
614 	FILE *ttyfp;
615 
616 	if ((ttyfp = fopen(_PATH_TTY, "r")) == NULL)
617 		return (2);	/* Indicate that the TTY failed to open. */
618 	(void)fprintf(stderr, "?...");
619 	(void)fflush(stderr);
620 	if ((response = fgetln(ttyfp, &rsize)) == NULL ||
621 	    regcomp(&cre, nl_langinfo(YESEXPR), REG_BASIC) != 0) {
622 		(void)fclose(ttyfp);
623 		return (0);
624 	}
625 	response[rsize - 1] = '\0';
626 	match = regexec(&cre, response, 0, NULL, 0);
627 	(void)fclose(ttyfp);
628 	regfree(&cre);
629 	return (match == 0);
630 }
631 
632 static void
633 usage(void)
634 {
635 	(void)fprintf(stderr,
636 "Usage: %s [-0opt] [-E eofstr] [-I replstr [-R replacements] [-S replsize]]\n"
637 "             [-J replstr] [-L number] [-n number [-x]] [-P maxprocs]\n"
638 "             [-s size] [utility [argument ...]]\n", getprogname());
639 	exit(1);
640 }
641