xref: /openbsd-src/usr.bin/xargs/xargs.c (revision 0b7734b3d77bb9b21afec6f4621cae6c805dbd45)
1 /*	$OpenBSD: xargs.c,v 1.31 2015/12/09 19:29:49 mmcc Exp $	*/
2 /*	$FreeBSD: xargs.c,v 1.51 2003/05/03 19:09:11 obrien Exp $	*/
3 
4 /*-
5  * Copyright (c) 1990, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * John B. Roll Jr.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  * $xMach: xargs.c,v 1.6 2002/02/23 05:27:47 tim Exp $
36  */
37 
38 #include <sys/wait.h>
39 
40 #include <ctype.h>
41 #include <err.h>
42 #include <errno.h>
43 #include <fcntl.h>
44 #include <langinfo.h>
45 #include <locale.h>
46 #include <paths.h>
47 #include <regex.h>
48 #include <signal.h>
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <string.h>
52 #include <unistd.h>
53 #include <limits.h>
54 
55 #include "pathnames.h"
56 
57 static void	parse_input(int, char *[]);
58 static void	prerun(int, char *[]);
59 static int	prompt(void);
60 static void	run(char **);
61 static void	usage(void);
62 void		strnsubst(char **, const char *, const char *, size_t);
63 static void	waitchildren(const char *, int);
64 
65 static char **av, **bxp, **ep, **endxp, **xp;
66 static char *argp, *bbp, *ebp, *inpline, *p, *replstr;
67 static const char *eofstr;
68 static int count, insingle, indouble, oflag, pflag, tflag, Rflag, rval, zflag;
69 static int cnt, Iflag, jfound, Lflag, wasquoted, xflag, runeof = 1;
70 static int curprocs, maxprocs;
71 static size_t inpsize;
72 
73 extern char **environ;
74 
75 int
76 main(int argc, char *argv[])
77 {
78 	long arg_max;
79 	int ch, Jflag, nargs, nflag, nline;
80 	size_t linelen;
81 	char *endptr;
82 	const char *errstr;
83 
84 	inpline = replstr = NULL;
85 	ep = environ;
86 	eofstr = "";
87 	Jflag = nflag = 0;
88 
89 	(void)setlocale(LC_MESSAGES, "");
90 
91 	/*
92 	 * POSIX.2 limits the exec line length to ARG_MAX - 2K.  Running that
93 	 * caused some E2BIG errors, so it was changed to ARG_MAX - 4K.  Given
94 	 * that the smallest argument is 2 bytes in length, this means that
95 	 * the number of arguments is limited to:
96 	 *
97 	 *	 (ARG_MAX - 4K - LENGTH(utility + arguments)) / 2.
98 	 *
99 	 * We arbitrarily limit the number of arguments to 5000.  This is
100 	 * allowed by POSIX.2 as long as the resulting minimum exec line is
101 	 * at least LINE_MAX.  Realloc'ing as necessary is possible, but
102 	 * probably not worthwhile.
103 	 */
104 	nargs = 5000;
105 	if ((arg_max = sysconf(_SC_ARG_MAX)) == -1)
106 		errx(1, "sysconf(_SC_ARG_MAX) failed");
107 
108 	if (pledge("stdio rpath proc exec", NULL) == -1)
109 		err(1, "pledge");
110 
111 	nline = arg_max - 4 * 1024;
112 	while (*ep != NULL) {
113 		/* 1 byte for each '\0' */
114 		nline -= strlen(*ep++) + 1 + sizeof(*ep);
115 	}
116 	maxprocs = 1;
117 	while ((ch = getopt(argc, argv, "0E:I:J:L:n:oP:pR:rs:tx")) != -1)
118 		switch (ch) {
119 		case 'E':
120 			eofstr = optarg;
121 			break;
122 		case 'I':
123 			Jflag = 0;
124 			Iflag = 1;
125 			Lflag = 1;
126 			replstr = optarg;
127 			break;
128 		case 'J':
129 			Iflag = 0;
130 			Jflag = 1;
131 			replstr = optarg;
132 			break;
133 		case 'L':
134 			Lflag = strtonum(optarg, 0, INT_MAX, &errstr);
135 			if (errstr)
136 				errx(1, "-L %s: %s", optarg, errstr);
137 			break;
138 		case 'n':
139 			nflag = 1;
140 			nargs = strtonum(optarg, 1, INT_MAX, &errstr);
141 			if (errstr)
142 				errx(1, "-n %s: %s", optarg, errstr);
143 			break;
144 		case 'o':
145 			oflag = 1;
146 			break;
147 		case 'P':
148 			maxprocs = strtonum(optarg, 1, INT_MAX, &errstr);
149 			if (errstr)
150 				errx(1, "-P %s: %s", optarg, errstr);
151 			break;
152 		case 'p':
153 			pflag = 1;
154 			break;
155 		case 'r':
156 			runeof = 0;
157 			break;
158 		case 'R':
159 			Rflag = strtol(optarg, &endptr, 10);
160 			if (*endptr != '\0')
161 				errx(1, "replacements must be a number");
162 			break;
163 		case 's':
164 			nline = strtonum(optarg, 0, INT_MAX, &errstr);
165 			if (errstr)
166 				errx(1, "-s %s: %s", optarg, errstr);
167 			break;
168 		case 't':
169 			tflag = 1;
170 			break;
171 		case 'x':
172 			xflag = 1;
173 			break;
174 		case '0':
175 			zflag = 1;
176 			break;
177 		case '?':
178 		default:
179 			usage();
180 	}
181 	argc -= optind;
182 	argv += optind;
183 
184 	if (!Iflag && Rflag)
185 		usage();
186 	if (Iflag && !Rflag)
187 		Rflag = 5;
188 	if (xflag && !nflag)
189 		usage();
190 	if (Iflag || Lflag)
191 		xflag = 1;
192 	if (replstr != NULL && *replstr == '\0')
193 		errx(1, "replstr may not be empty");
194 
195 	/*
196 	 * Allocate pointers for the utility name, the utility arguments,
197 	 * the maximum arguments to be read from stdin and the trailing
198 	 * NULL.
199 	 */
200 	linelen = 1 + argc + nargs + 1;
201 	if ((av = bxp = calloc(linelen, sizeof(char **))) == NULL)
202 		err(1, NULL);
203 
204 	/*
205 	 * Use the user's name for the utility as argv[0], just like the
206 	 * shell.  Echo is the default.  Set up pointers for the user's
207 	 * arguments.
208 	 */
209 	if (*argv == NULL)
210 		cnt = strlen(*bxp++ = _PATH_ECHO);
211 	else {
212 		do {
213 			if (Jflag && strcmp(*argv, replstr) == 0) {
214 				char **avj;
215 				jfound = 1;
216 				argv++;
217 				for (avj = argv; *avj; avj++)
218 					cnt += strlen(*avj) + 1;
219 				break;
220 			}
221 			cnt += strlen(*bxp++ = *argv) + 1;
222 		} while (*++argv != NULL);
223 	}
224 
225 	/*
226 	 * Set up begin/end/traversing pointers into the array.  The -n
227 	 * count doesn't include the trailing NULL pointer, so the malloc
228 	 * added in an extra slot.
229 	 */
230 	endxp = (xp = bxp) + nargs;
231 
232 	/*
233 	 * Allocate buffer space for the arguments read from stdin and the
234 	 * trailing NULL.  Buffer space is defined as the default or specified
235 	 * space, minus the length of the utility name and arguments.  Set up
236 	 * begin/end/traversing pointers into the array.  The -s count does
237 	 * include the trailing NULL, so the malloc didn't add in an extra
238 	 * slot.
239 	 */
240 	nline -= cnt;
241 	if (nline <= 0)
242 		errx(1, "insufficient space for command");
243 
244 	if ((bbp = malloc((size_t)(nline + 1))) == NULL)
245 		err(1, NULL);
246 	ebp = (argp = p = bbp) + nline - 1;
247 	for (;;)
248 		parse_input(argc, argv);
249 }
250 
251 static void
252 parse_input(int argc, char *argv[])
253 {
254 	int hasblank = 0;
255 	static int hadblank = 0;
256 	int ch, foundeof = 0;
257 	char **avj;
258 
259 	ch = getchar();
260 	if (isblank(ch)) {
261 		/* Quotes escape tabs and spaces. */
262 		if (insingle || indouble)
263 			goto addch;
264 		hasblank = 1;
265 		if (zflag)
266 			goto addch;
267 		goto arg2;
268 	}
269 
270 	switch (ch) {
271 	case EOF:
272 		/* No arguments since last exec. */
273 		if (p == bbp) {
274 			if (runeof)
275 				prerun(0, av);
276 			waitchildren(*argv, 1);
277 			exit(rval);
278 		}
279 		goto arg1;
280 	case '\0':
281 		if (zflag)
282 			goto arg2;
283 		goto addch;
284 	case '\n':
285 		hasblank = 1;
286 		if (hadblank == 0)
287 			count++;
288 		if (zflag)
289 			goto addch;
290 
291 		/* Quotes do not escape newlines. */
292 arg1:		if (insingle || indouble)
293 			errx(1, "unterminated quote");
294 arg2:
295 		foundeof = *eofstr != '\0' &&
296 		    strcmp(argp, eofstr) == 0;
297 
298 		/* Do not make empty args unless they are quoted */
299 		if ((argp != p || wasquoted) && !foundeof) {
300 			*p++ = '\0';
301 			*xp++ = argp;
302 			if (Iflag) {
303 				size_t curlen;
304 
305 				if (inpline == NULL)
306 					curlen = 0;
307 				else {
308 					/*
309 					 * If this string is not zero
310 					 * length, append a space for
311 					 * separation before the next
312 					 * argument.
313 					 */
314 					if ((curlen = strlen(inpline)))
315 						strlcat(inpline, " ", inpsize);
316 				}
317 				curlen++;
318 				/*
319 				 * Allocate enough to hold what we will
320 				 * be holding in a second, and to append
321 				 * a space next time through, if we have
322 				 * to.
323 				 */
324 				inpsize = curlen + 2 + strlen(argp);
325 				inpline = realloc(inpline, inpsize);
326 				if (inpline == NULL)
327 					errx(1, "realloc failed");
328 				if (curlen == 1)
329 					strlcpy(inpline, argp, inpsize);
330 				else
331 					strlcat(inpline, argp, inpsize);
332 			}
333 		}
334 
335 		/*
336 		 * If max'd out on args or buffer, or reached EOF,
337 		 * run the command.  If xflag and max'd out on buffer
338 		 * but not on args, object.  Having reached the limit
339 		 * of input lines, as specified by -L is the same as
340 		 * maxing out on arguments.
341 		 */
342 		if (xp == endxp || p > ebp || ch == EOF ||
343 		    (Lflag <= count && xflag) || foundeof) {
344 			if (xflag && xp != endxp && p > ebp)
345 				errx(1, "insufficient space for arguments");
346 			if (jfound) {
347 				for (avj = argv; *avj; avj++)
348 					*xp++ = *avj;
349 			}
350 			prerun(argc, av);
351 			if (ch == EOF || foundeof) {
352 				waitchildren(*argv, 1);
353 				exit(rval);
354 			}
355 			p = bbp;
356 			xp = bxp;
357 			count = 0;
358 		}
359 		argp = p;
360 		wasquoted = 0;
361 		break;
362 	case '\'':
363 		if (indouble || zflag)
364 			goto addch;
365 		insingle = !insingle;
366 		wasquoted = 1;
367 		break;
368 	case '"':
369 		if (insingle || zflag)
370 			goto addch;
371 		indouble = !indouble;
372 		wasquoted = 1;
373 		break;
374 	case '\\':
375 		if (zflag)
376 			goto addch;
377 		/* Backslash escapes anything, is escaped by quotes. */
378 		if (!insingle && !indouble && (ch = getchar()) == EOF)
379 			errx(1, "backslash at EOF");
380 		/* FALLTHROUGH */
381 	default:
382 addch:		if (p < ebp) {
383 			*p++ = ch;
384 			break;
385 		}
386 
387 		/* If only one argument, not enough buffer space. */
388 		if (bxp == xp)
389 			errx(1, "insufficient space for argument");
390 		/* Didn't hit argument limit, so if xflag object. */
391 		if (xflag)
392 			errx(1, "insufficient space for arguments");
393 
394 		if (jfound) {
395 			for (avj = argv; *avj; avj++)
396 				*xp++ = *avj;
397 		}
398 		prerun(argc, av);
399 		xp = bxp;
400 		cnt = ebp - argp;
401 		memcpy(bbp, argp, (size_t)cnt);
402 		p = (argp = bbp) + cnt;
403 		*p++ = ch;
404 		break;
405 	}
406 	hadblank = hasblank;
407 }
408 
409 /*
410  * Do things necessary before run()'ing, such as -I substitution,
411  * and then call run().
412  */
413 static void
414 prerun(int argc, char *argv[])
415 {
416 	char **tmp, **tmp2, **avj;
417 	int repls;
418 
419 	repls = Rflag;
420 	runeof = 0;
421 
422 	if (argc == 0 || repls == 0) {
423 		*xp = NULL;
424 		run(argv);
425 		return;
426 	}
427 
428 	avj = argv;
429 
430 	/*
431 	 * Allocate memory to hold the argument list, and
432 	 * a NULL at the tail.
433 	 */
434 	tmp = calloc(argc + 1, sizeof(char**));
435 	if (tmp == NULL)
436 		err(1, NULL);
437 	tmp2 = tmp;
438 
439 	/*
440 	 * Save the first argument and iterate over it, we
441 	 * cannot do strnsubst() to it.
442 	 */
443 	if ((*tmp++ = strdup(*avj++)) == NULL)
444 		err(1, NULL);
445 
446 	/*
447 	 * For each argument to utility, if we have not used up
448 	 * the number of replacements we are allowed to do, and
449 	 * if the argument contains at least one occurrence of
450 	 * replstr, call strnsubst(), else just save the string.
451 	 * Iterations over elements of avj and tmp are done
452 	 * where appropriate.
453 	 */
454 	while (--argc) {
455 		*tmp = *avj++;
456 		if (repls && strstr(*tmp, replstr) != NULL) {
457 			strnsubst(tmp++, replstr, inpline, (size_t)255);
458 			if (repls > 0)
459 				repls--;
460 		} else {
461 			if ((*tmp = strdup(*tmp)) == NULL)
462 				err(1, NULL);
463 			tmp++;
464 		}
465 	}
466 
467 	/*
468 	 * Run it.
469 	 */
470 	*tmp = NULL;
471 	run(tmp2);
472 
473 	/*
474 	 * Walk from the tail to the head, free along the way.
475 	 */
476 	for (; tmp2 != tmp; tmp--)
477 		free(*tmp);
478 	/*
479 	 * Now free the list itself.
480 	 */
481 	free(tmp2);
482 
483 	/*
484 	 * Free the input line buffer, if we have one.
485 	 */
486 	free(inpline);
487 	inpline = NULL;
488 }
489 
490 static void
491 run(char **argv)
492 {
493 	pid_t pid;
494 	int fd;
495 	char **avec;
496 
497 	/*
498 	 * If the user wants to be notified of each command before it is
499 	 * executed, notify them.  If they want the notification to be
500 	 * followed by a prompt, then prompt them.
501 	 */
502 	if (tflag || pflag) {
503 		(void)fprintf(stderr, "%s", *argv);
504 		for (avec = argv + 1; *avec != NULL; ++avec)
505 			(void)fprintf(stderr, " %s", *avec);
506 		/*
507 		 * If the user has asked to be prompted, do so.
508 		 */
509 		if (pflag)
510 			/*
511 			 * If they asked not to exec, return without execution
512 			 * but if they asked to, go to the execution.  If we
513 			 * could not open their tty, break the switch and drop
514 			 * back to -t behaviour.
515 			 */
516 			switch (prompt()) {
517 			case 0:
518 				return;
519 			case 1:
520 				goto exec;
521 			case 2:
522 				break;
523 			}
524 		(void)fprintf(stderr, "\n");
525 		(void)fflush(stderr);
526 	}
527 exec:
528 	switch (pid = vfork()) {
529 	case -1:
530 		err(1, "vfork");
531 	case 0:
532 		if (oflag) {
533 			if ((fd = open(_PATH_TTY, O_RDONLY)) == -1) {
534 				warn("can't open /dev/tty");
535 				_exit(1);
536 			}
537 		} else {
538 			fd = open(_PATH_DEVNULL, O_RDONLY);
539 		}
540 		if (fd > STDIN_FILENO) {
541 			if (dup2(fd, STDIN_FILENO) != 0) {
542 				warn("can't dup2 to stdin");
543 				_exit(1);
544 			}
545 			close(fd);
546 		}
547 		execvp(argv[0], argv);
548 		warn("%s", argv[0]);
549 		_exit(errno == ENOENT ? 127 : 126);
550 	}
551 	curprocs++;
552 	waitchildren(*argv, 0);
553 }
554 
555 static void
556 waitchildren(const char *name, int waitall)
557 {
558 	pid_t pid;
559 	int status;
560 
561 	while ((pid = waitpid(-1, &status, !waitall && curprocs < maxprocs ?
562 	    WNOHANG : 0)) > 0) {
563 		curprocs--;
564 		/*
565 		 * According to POSIX, we have to exit if the utility exits
566 		 * with a 255 status, or is interrupted by a signal.
567 		 * We are allowed to return any exit status between 1 and
568 		 * 125 in these cases, but we'll use 124 and 125, the same
569 		 * values used by GNU xargs.
570 		 */
571 		if (WIFEXITED(status)) {
572 			if (WEXITSTATUS(status) == 255) {
573 				warnx("%s exited with status 255", name);
574 				exit(124);
575 			} else if (WEXITSTATUS(status) == 127 ||
576 			    WEXITSTATUS(status) == 126) {
577 				exit(WEXITSTATUS(status));
578 			} else if (WEXITSTATUS(status) != 0) {
579 				rval = 123;
580 			}
581 		} else if (WIFSIGNALED(status)) {
582 			if (WTERMSIG(status) != SIGPIPE) {
583 				if (WTERMSIG(status) < NSIG)
584 					warnx("%s terminated by SIG%s", name,
585 					    sys_signame[WTERMSIG(status)]);
586 				else
587 					warnx("%s terminated by signal %d",
588 					    name, WTERMSIG(status));
589 			}
590 			exit(125);
591 		}
592 	}
593 	if (pid == -1 && errno != ECHILD)
594 		err(1, "waitpid");
595 }
596 
597 /*
598  * Prompt the user about running a command.
599  */
600 static int
601 prompt(void)
602 {
603 	regex_t cre;
604 	size_t rsize;
605 	int match;
606 	char *response;
607 	FILE *ttyfp;
608 
609 	if ((ttyfp = fopen(_PATH_TTY, "r")) == NULL)
610 		return (2);	/* Indicate that the TTY failed to open. */
611 	(void)fprintf(stderr, "?...");
612 	(void)fflush(stderr);
613 	if ((response = fgetln(ttyfp, &rsize)) == NULL ||
614 	    regcomp(&cre, nl_langinfo(YESEXPR), REG_BASIC) != 0) {
615 		(void)fclose(ttyfp);
616 		return (0);
617 	}
618 	response[rsize - 1] = '\0';
619 	match = regexec(&cre, response, 0, NULL, 0);
620 	(void)fclose(ttyfp);
621 	regfree(&cre);
622 	return (match == 0);
623 }
624 
625 static void
626 usage(void)
627 {
628 	fprintf(stderr,
629 "usage: xargs [-0oprt] [-E eofstr] [-I replstr [-R replacements]] [-J replstr]\n"
630 "             [-L number] [-n number [-x]] [-P maxprocs] [-s size]\n"
631 "             [utility [argument ...]]\n");
632 	exit(1);
633 }
634