xref: /openbsd-src/usr.bin/xargs/xargs.c (revision 99fd087599a8791921855f21bd7e36130f39aadc)
1 /*	$OpenBSD: xargs.c,v 1.34 2018/06/12 15:24:31 millert Exp $	*/
2 /*	$FreeBSD: xargs.c,v 1.51 2003/05/03 19:09:11 obrien Exp $	*/
3 
4 /*-
5  * Copyright (c) 1990, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * John B. Roll Jr.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  * $xMach: xargs.c,v 1.6 2002/02/23 05:27:47 tim Exp $
36  */
37 
38 #include <sys/wait.h>
39 
40 #include <ctype.h>
41 #include <err.h>
42 #include <errno.h>
43 #include <fcntl.h>
44 #include <langinfo.h>
45 #include <locale.h>
46 #include <paths.h>
47 #include <regex.h>
48 #include <signal.h>
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <string.h>
52 #include <unistd.h>
53 #include <limits.h>
54 
55 #include "pathnames.h"
56 
57 static void	parse_input(int, char *[]);
58 static void	prerun(int, char *[]);
59 static int	prompt(void);
60 static void	run(char **);
61 static void	usage(void);
62 void		strnsubst(char **, const char *, const char *, size_t);
63 static void	waitchildren(const char *, int);
64 
65 static char **av, **bxp, **ep, **endxp, **xp;
66 static char *argp, *bbp, *ebp, *inpline, *p, *replstr;
67 static const char *eofstr;
68 static int count, insingle, indouble, oflag, pflag, tflag, Rflag, rval, zflag;
69 static int cnt, Iflag, jfound, Lflag, wasquoted, xflag, runeof = 1;
70 static int curprocs, maxprocs;
71 static size_t inpsize;
72 
73 extern char **environ;
74 
75 int
76 main(int argc, char *argv[])
77 {
78 	long arg_max;
79 	int ch, Jflag, nargs, nflag, nline;
80 	size_t linelen;
81 	char *endptr;
82 	const char *errstr;
83 
84 	inpline = replstr = NULL;
85 	ep = environ;
86 	eofstr = "";
87 	Jflag = nflag = 0;
88 
89 	(void)setlocale(LC_MESSAGES, "");
90 
91 	/*
92 	 * POSIX.2 limits the exec line length to ARG_MAX - 2K.  Running that
93 	 * caused some E2BIG errors, so it was changed to ARG_MAX - 4K.  Given
94 	 * that the smallest argument is 2 bytes in length, this means that
95 	 * the number of arguments is limited to:
96 	 *
97 	 *	 (ARG_MAX - 4K - LENGTH(utility + arguments)) / 2.
98 	 *
99 	 * We arbitrarily limit the number of arguments to 5000.  This is
100 	 * allowed by POSIX.2 as long as the resulting minimum exec line is
101 	 * at least LINE_MAX.  Realloc'ing as necessary is possible, but
102 	 * probably not worthwhile.
103 	 */
104 	nargs = 5000;
105 	if ((arg_max = sysconf(_SC_ARG_MAX)) == -1)
106 		errx(1, "sysconf(_SC_ARG_MAX) failed");
107 
108 	if (pledge("stdio rpath proc exec", NULL) == -1)
109 		err(1, "pledge");
110 
111 	nline = arg_max - 4 * 1024;
112 	while (*ep != NULL) {
113 		/* 1 byte for each '\0' */
114 		nline -= strlen(*ep++) + 1 + sizeof(*ep);
115 	}
116 	maxprocs = 1;
117 	while ((ch = getopt(argc, argv, "0E:I:J:L:n:oP:pR:rs:tx")) != -1)
118 		switch (ch) {
119 		case 'E':
120 			eofstr = optarg;
121 			break;
122 		case 'I':
123 			Jflag = 0;
124 			Iflag = 1;
125 			Lflag = 1;
126 			replstr = optarg;
127 			break;
128 		case 'J':
129 			Iflag = 0;
130 			Jflag = 1;
131 			replstr = optarg;
132 			break;
133 		case 'L':
134 			Lflag = strtonum(optarg, 0, INT_MAX, &errstr);
135 			if (errstr)
136 				errx(1, "-L %s: %s", optarg, errstr);
137 			break;
138 		case 'n':
139 			nflag = 1;
140 			nargs = strtonum(optarg, 1, INT_MAX, &errstr);
141 			if (errstr)
142 				errx(1, "-n %s: %s", optarg, errstr);
143 			break;
144 		case 'o':
145 			oflag = 1;
146 			break;
147 		case 'P':
148 			maxprocs = strtonum(optarg, 1, INT_MAX, &errstr);
149 			if (errstr)
150 				errx(1, "-P %s: %s", optarg, errstr);
151 			break;
152 		case 'p':
153 			pflag = 1;
154 			break;
155 		case 'r':
156 			runeof = 0;
157 			break;
158 		case 'R':
159 			Rflag = strtol(optarg, &endptr, 10);
160 			if (*endptr != '\0')
161 				errx(1, "replacements must be a number");
162 			break;
163 		case 's':
164 			nline = strtonum(optarg, 0, INT_MAX, &errstr);
165 			if (errstr)
166 				errx(1, "-s %s: %s", optarg, errstr);
167 			break;
168 		case 't':
169 			tflag = 1;
170 			break;
171 		case 'x':
172 			xflag = 1;
173 			break;
174 		case '0':
175 			zflag = 1;
176 			break;
177 		case '?':
178 		default:
179 			usage();
180 	}
181 	argc -= optind;
182 	argv += optind;
183 
184 	if (!Iflag && Rflag)
185 		usage();
186 	if (Iflag && !Rflag)
187 		Rflag = 5;
188 	if (xflag && !nflag)
189 		usage();
190 	if (Iflag || Lflag)
191 		xflag = 1;
192 	if (replstr != NULL && *replstr == '\0')
193 		errx(1, "replstr may not be empty");
194 
195 	/*
196 	 * Allocate pointers for the utility name, the utility arguments,
197 	 * the maximum arguments to be read from stdin and the trailing
198 	 * NULL.
199 	 */
200 	linelen = 1 + argc + nargs + 1;
201 	if ((av = bxp = calloc(linelen, sizeof(char *))) == NULL)
202 		err(1, NULL);
203 
204 	/*
205 	 * Use the user's name for the utility as argv[0], just like the
206 	 * shell.  Echo is the default.  Set up pointers for the user's
207 	 * arguments.
208 	 */
209 	if (*argv == NULL)
210 		cnt = strlen(*bxp++ = _PATH_ECHO);
211 	else {
212 		do {
213 			if (Jflag && strcmp(*argv, replstr) == 0) {
214 				char **avj;
215 				jfound = 1;
216 				argv++;
217 				for (avj = argv; *avj; avj++)
218 					cnt += strlen(*avj) + 1;
219 				break;
220 			}
221 			cnt += strlen(*bxp++ = *argv) + 1;
222 		} while (*++argv != NULL);
223 	}
224 
225 	/*
226 	 * Set up begin/end/traversing pointers into the array.  The -n
227 	 * count doesn't include the trailing NULL pointer, so the malloc
228 	 * added in an extra slot.
229 	 */
230 	endxp = (xp = bxp) + nargs;
231 
232 	/*
233 	 * Allocate buffer space for the arguments read from stdin and the
234 	 * trailing NULL.  Buffer space is defined as the default or specified
235 	 * space, minus the length of the utility name and arguments.  Set up
236 	 * begin/end/traversing pointers into the array.  The -s count does
237 	 * include the trailing NULL, so the malloc didn't add in an extra
238 	 * slot.
239 	 */
240 	nline -= cnt;
241 	if (nline <= 0)
242 		errx(1, "insufficient space for command");
243 
244 	if ((bbp = malloc((size_t)(nline + 1))) == NULL)
245 		err(1, NULL);
246 	ebp = (argp = p = bbp) + nline - 1;
247 	for (;;)
248 		parse_input(argc, argv);
249 }
250 
251 static void
252 parse_input(int argc, char *argv[])
253 {
254 	int hasblank = 0;
255 	static int hadblank = 0;
256 	int ch, foundeof = 0;
257 	char **avj;
258 
259 	ch = getchar();
260 	if (isblank(ch)) {
261 		/* Quotes escape tabs and spaces. */
262 		if (insingle || indouble)
263 			goto addch;
264 		hasblank = 1;
265 		if (zflag)
266 			goto addch;
267 		goto arg2;
268 	}
269 
270 	switch (ch) {
271 	case EOF:
272 		/* No arguments since last exec. */
273 		if (p == bbp) {
274 			if (runeof)
275 				prerun(0, av);
276 			waitchildren(*argv, 1);
277 			exit(rval);
278 		}
279 		goto arg1;
280 	case '\0':
281 		if (zflag) {
282 			/*
283 			 * Increment 'count', so that nulls will be treated
284 			 * as end-of-line, as well as end-of-argument.  This
285 			 * is needed so -0 works properly with -I and -L.
286 			 */
287 			count++;
288 			goto arg2;
289 		}
290 		goto addch;
291 	case '\n':
292 		if (zflag)
293 			goto addch;
294 		hasblank = 1;
295 		if (hadblank == 0)
296 			count++;
297 
298 		/* Quotes do not escape newlines. */
299 arg1:		if (insingle || indouble)
300 			errx(1, "unterminated quote");
301 arg2:
302 		foundeof = *eofstr != '\0' &&
303 		    strcmp(argp, eofstr) == 0;
304 
305 		/* Do not make empty args unless they are quoted */
306 		if ((argp != p || wasquoted) && !foundeof) {
307 			*p++ = '\0';
308 			*xp++ = argp;
309 			if (Iflag) {
310 				size_t curlen;
311 
312 				if (inpline == NULL)
313 					curlen = 0;
314 				else {
315 					/*
316 					 * If this string is not zero
317 					 * length, append a space for
318 					 * separation before the next
319 					 * argument.
320 					 */
321 					if ((curlen = strlen(inpline)))
322 						strlcat(inpline, " ", inpsize);
323 				}
324 				curlen++;
325 				/*
326 				 * Allocate enough to hold what we will
327 				 * be holding in a second, and to append
328 				 * a space next time through, if we have
329 				 * to.
330 				 */
331 				inpsize = curlen + 2 + strlen(argp);
332 				inpline = realloc(inpline, inpsize);
333 				if (inpline == NULL)
334 					errx(1, "realloc failed");
335 				if (curlen == 1)
336 					strlcpy(inpline, argp, inpsize);
337 				else
338 					strlcat(inpline, argp, inpsize);
339 			}
340 		}
341 
342 		/*
343 		 * If max'd out on args or buffer, or reached EOF,
344 		 * run the command.  If xflag and max'd out on buffer
345 		 * but not on args, object.  Having reached the limit
346 		 * of input lines, as specified by -L is the same as
347 		 * maxing out on arguments.
348 		 */
349 		if (xp == endxp || p > ebp || ch == EOF ||
350 		    (Lflag <= count && xflag) || foundeof) {
351 			if (xflag && xp != endxp && p > ebp)
352 				errx(1, "insufficient space for arguments");
353 			if (jfound) {
354 				for (avj = argv; *avj; avj++)
355 					*xp++ = *avj;
356 			}
357 			prerun(argc, av);
358 			if (ch == EOF || foundeof) {
359 				waitchildren(*argv, 1);
360 				exit(rval);
361 			}
362 			p = bbp;
363 			xp = bxp;
364 			count = 0;
365 		}
366 		argp = p;
367 		wasquoted = 0;
368 		break;
369 	case '\'':
370 		if (indouble || zflag)
371 			goto addch;
372 		insingle = !insingle;
373 		wasquoted = 1;
374 		break;
375 	case '"':
376 		if (insingle || zflag)
377 			goto addch;
378 		indouble = !indouble;
379 		wasquoted = 1;
380 		break;
381 	case '\\':
382 		if (zflag)
383 			goto addch;
384 		/* Backslash escapes anything, is escaped by quotes. */
385 		if (!insingle && !indouble && (ch = getchar()) == EOF)
386 			errx(1, "backslash at EOF");
387 		/* FALLTHROUGH */
388 	default:
389 addch:		if (p < ebp) {
390 			*p++ = ch;
391 			break;
392 		}
393 
394 		/* If only one argument, not enough buffer space. */
395 		if (bxp == xp)
396 			errx(1, "insufficient space for argument");
397 		/* Didn't hit argument limit, so if xflag object. */
398 		if (xflag)
399 			errx(1, "insufficient space for arguments");
400 
401 		if (jfound) {
402 			for (avj = argv; *avj; avj++)
403 				*xp++ = *avj;
404 		}
405 		prerun(argc, av);
406 		xp = bxp;
407 		cnt = ebp - argp;
408 		memmove(bbp, argp, (size_t)cnt);
409 		p = (argp = bbp) + cnt;
410 		*p++ = ch;
411 		break;
412 	}
413 	hadblank = hasblank;
414 }
415 
416 /*
417  * Do things necessary before run()'ing, such as -I substitution,
418  * and then call run().
419  */
420 static void
421 prerun(int argc, char *argv[])
422 {
423 	char **tmp, **tmp2, **avj;
424 	int repls;
425 
426 	repls = Rflag;
427 	runeof = 0;
428 
429 	if (argc == 0 || repls == 0) {
430 		*xp = NULL;
431 		run(argv);
432 		return;
433 	}
434 
435 	avj = argv;
436 
437 	/*
438 	 * Allocate memory to hold the argument list, and
439 	 * a NULL at the tail.
440 	 */
441 	tmp = calloc(argc + 1, sizeof(char *));
442 	if (tmp == NULL)
443 		err(1, NULL);
444 	tmp2 = tmp;
445 
446 	/*
447 	 * Save the first argument and iterate over it, we
448 	 * cannot do strnsubst() to it.
449 	 */
450 	if ((*tmp++ = strdup(*avj++)) == NULL)
451 		err(1, NULL);
452 
453 	/*
454 	 * For each argument to utility, if we have not used up
455 	 * the number of replacements we are allowed to do, and
456 	 * if the argument contains at least one occurrence of
457 	 * replstr, call strnsubst(), else just save the string.
458 	 * Iterations over elements of avj and tmp are done
459 	 * where appropriate.
460 	 */
461 	while (--argc) {
462 		*tmp = *avj++;
463 		if (repls && strstr(*tmp, replstr) != NULL) {
464 			strnsubst(tmp++, replstr, inpline, (size_t)255);
465 			if (repls > 0)
466 				repls--;
467 		} else {
468 			if ((*tmp = strdup(*tmp)) == NULL)
469 				err(1, NULL);
470 			tmp++;
471 		}
472 	}
473 
474 	/*
475 	 * Run it.
476 	 */
477 	*tmp = NULL;
478 	run(tmp2);
479 
480 	/*
481 	 * Walk from the tail to the head, free along the way.
482 	 */
483 	for (; tmp2 != tmp; tmp--)
484 		free(*tmp);
485 	/*
486 	 * Now free the list itself.
487 	 */
488 	free(tmp2);
489 
490 	/*
491 	 * Free the input line buffer, if we have one.
492 	 */
493 	free(inpline);
494 	inpline = NULL;
495 }
496 
497 static void
498 run(char **argv)
499 {
500 	pid_t pid;
501 	int fd;
502 	char **avec;
503 
504 	/*
505 	 * If the user wants to be notified of each command before it is
506 	 * executed, notify them.  If they want the notification to be
507 	 * followed by a prompt, then prompt them.
508 	 */
509 	if (tflag || pflag) {
510 		(void)fprintf(stderr, "%s", *argv);
511 		for (avec = argv + 1; *avec != NULL; ++avec)
512 			(void)fprintf(stderr, " %s", *avec);
513 		/*
514 		 * If the user has asked to be prompted, do so.
515 		 */
516 		if (pflag)
517 			/*
518 			 * If they asked not to exec, return without execution
519 			 * but if they asked to, go to the execution.  If we
520 			 * could not open their tty, break the switch and drop
521 			 * back to -t behaviour.
522 			 */
523 			switch (prompt()) {
524 			case 0:
525 				return;
526 			case 1:
527 				goto exec;
528 			case 2:
529 				break;
530 			}
531 		(void)fprintf(stderr, "\n");
532 		(void)fflush(stderr);
533 	}
534 exec:
535 	switch (pid = vfork()) {
536 	case -1:
537 		err(1, "vfork");
538 	case 0:
539 		if (oflag) {
540 			if ((fd = open(_PATH_TTY, O_RDONLY)) == -1) {
541 				warn("can't open /dev/tty");
542 				_exit(1);
543 			}
544 		} else {
545 			fd = open(_PATH_DEVNULL, O_RDONLY);
546 		}
547 		if (fd > STDIN_FILENO) {
548 			if (dup2(fd, STDIN_FILENO) != 0) {
549 				warn("can't dup2 to stdin");
550 				_exit(1);
551 			}
552 			close(fd);
553 		}
554 		execvp(argv[0], argv);
555 		warn("%s", argv[0]);
556 		_exit(errno == ENOENT ? 127 : 126);
557 	}
558 	curprocs++;
559 	waitchildren(*argv, 0);
560 }
561 
562 static void
563 waitchildren(const char *name, int waitall)
564 {
565 	pid_t pid;
566 	int status;
567 
568 	while ((pid = waitpid(-1, &status, !waitall && curprocs < maxprocs ?
569 	    WNOHANG : 0)) > 0) {
570 		curprocs--;
571 		/*
572 		 * According to POSIX, we have to exit if the utility exits
573 		 * with a 255 status, or is interrupted by a signal.
574 		 * We are allowed to return any exit status between 1 and
575 		 * 125 in these cases, but we'll use 124 and 125, the same
576 		 * values used by GNU xargs.
577 		 */
578 		if (WIFEXITED(status)) {
579 			if (WEXITSTATUS(status) == 255) {
580 				warnx("%s exited with status 255", name);
581 				exit(124);
582 			} else if (WEXITSTATUS(status) == 127 ||
583 			    WEXITSTATUS(status) == 126) {
584 				exit(WEXITSTATUS(status));
585 			} else if (WEXITSTATUS(status) != 0) {
586 				rval = 123;
587 			}
588 		} else if (WIFSIGNALED(status)) {
589 			if (WTERMSIG(status) != SIGPIPE) {
590 				if (WTERMSIG(status) < NSIG)
591 					warnx("%s terminated by SIG%s", name,
592 					    sys_signame[WTERMSIG(status)]);
593 				else
594 					warnx("%s terminated by signal %d",
595 					    name, WTERMSIG(status));
596 			}
597 			exit(125);
598 		}
599 	}
600 	if (pid == -1 && errno != ECHILD)
601 		err(1, "waitpid");
602 }
603 
604 /*
605  * Prompt the user about running a command.
606  */
607 static int
608 prompt(void)
609 {
610 	regex_t cre;
611 	size_t rsize;
612 	int match;
613 	char *response;
614 	FILE *ttyfp;
615 
616 	if ((ttyfp = fopen(_PATH_TTY, "r")) == NULL)
617 		return (2);	/* Indicate that the TTY failed to open. */
618 	(void)fprintf(stderr, "?...");
619 	(void)fflush(stderr);
620 	if ((response = fgetln(ttyfp, &rsize)) == NULL ||
621 	    regcomp(&cre, nl_langinfo(YESEXPR), REG_BASIC) != 0) {
622 		(void)fclose(ttyfp);
623 		return (0);
624 	}
625 	response[rsize - 1] = '\0';
626 	match = regexec(&cre, response, 0, NULL, 0);
627 	(void)fclose(ttyfp);
628 	regfree(&cre);
629 	return (match == 0);
630 }
631 
632 static void
633 usage(void)
634 {
635 	fprintf(stderr,
636 "usage: xargs [-0oprt] [-E eofstr] [-I replstr [-R replacements]] [-J replstr]\n"
637 "             [-L number] [-n number [-x]] [-P maxprocs] [-s size]\n"
638 "             [utility [argument ...]]\n");
639 	exit(1);
640 }
641