xref: /openbsd-src/usr.bin/xargs/xargs.c (revision d1526e1c2b5324dab20faa36b83f3875d8017ce2)
1 /*	$OpenBSD: xargs.c,v 1.38 2023/12/23 15:58:58 millert Exp $	*/
2 /*	$FreeBSD: xargs.c,v 1.51 2003/05/03 19:09:11 obrien Exp $	*/
3 
4 /*-
5  * Copyright (c) 1990, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * John B. Roll Jr.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  * $xMach: xargs.c,v 1.6 2002/02/23 05:27:47 tim Exp $
36  */
37 
38 #include <sys/wait.h>
39 
40 #include <ctype.h>
41 #include <err.h>
42 #include <errno.h>
43 #include <fcntl.h>
44 #include <paths.h>
45 #include <signal.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <unistd.h>
50 #include <limits.h>
51 
52 #include "pathnames.h"
53 
54 static void	parse_input(int, char *[]);
55 static void	prerun(int, char *[]);
56 static int	prompt(void);
57 static void	run(char **);
58 static void	usage(void);
59 void		strnsubst(char **, const char *, const char *, size_t);
60 static void	waitchildren(const char *, int);
61 
62 static char **av, **bxp, **ep, **endxp, **xp;
63 static char *argp, *bbp, *ebp, *inpline, *p, *replstr;
64 static const char *eofstr;
65 static int count, insingle, indouble, oflag, pflag, tflag, Rflag, rval, zflag;
66 static int cnt, Iflag, jfound, Lflag, wasquoted, xflag, runeof = 1;
67 static int curprocs, maxprocs;
68 static size_t inpsize;
69 
70 extern char **environ;
71 
72 int
main(int argc,char * argv[])73 main(int argc, char *argv[])
74 {
75 	long arg_max;
76 	int ch, Jflag, nargs, nflag, nline;
77 	size_t linelen;
78 	char *endptr;
79 	const char *errstr;
80 
81 	inpline = replstr = NULL;
82 	ep = environ;
83 	eofstr = "";
84 	Jflag = nflag = 0;
85 
86 	/*
87 	 * POSIX.2 limits the exec line length to ARG_MAX - 2K.  Running that
88 	 * caused some E2BIG errors, so it was changed to ARG_MAX - 4K.  Given
89 	 * that the smallest argument is 2 bytes in length, this means that
90 	 * the number of arguments is limited to:
91 	 *
92 	 *	 (ARG_MAX - 4K - LENGTH(utility + arguments)) / 2.
93 	 *
94 	 * We arbitrarily limit the number of arguments to 5000.  This is
95 	 * allowed by POSIX.2 as long as the resulting minimum exec line is
96 	 * at least LINE_MAX.  Realloc'ing as necessary is possible, but
97 	 * probably not worthwhile.
98 	 */
99 	nargs = 5000;
100 	if ((arg_max = sysconf(_SC_ARG_MAX)) == -1)
101 		errx(1, "sysconf(_SC_ARG_MAX) failed");
102 
103 	if (pledge("stdio rpath proc exec", NULL) == -1)
104 		err(1, "pledge");
105 
106 	nline = arg_max - 4 * 1024;
107 	while (*ep != NULL) {
108 		/* 1 byte for each '\0' */
109 		nline -= strlen(*ep++) + 1 + sizeof(*ep);
110 	}
111 	maxprocs = 1;
112 	while ((ch = getopt(argc, argv, "0E:I:J:L:n:oP:pR:rs:tx")) != -1)
113 		switch (ch) {
114 		case 'E':
115 			eofstr = optarg;
116 			break;
117 		case 'I':
118 			Jflag = 0;
119 			Iflag = 1;
120 			Lflag = 1;
121 			replstr = optarg;
122 			break;
123 		case 'J':
124 			Iflag = 0;
125 			Jflag = 1;
126 			replstr = optarg;
127 			break;
128 		case 'L':
129 			Lflag = strtonum(optarg, 0, INT_MAX, &errstr);
130 			if (errstr)
131 				errx(1, "-L %s: %s", optarg, errstr);
132 			break;
133 		case 'n':
134 			nflag = 1;
135 			nargs = strtonum(optarg, 1, INT_MAX, &errstr);
136 			if (errstr)
137 				errx(1, "-n %s: %s", optarg, errstr);
138 			break;
139 		case 'o':
140 			oflag = 1;
141 			break;
142 		case 'P':
143 			maxprocs = strtonum(optarg, 1, INT_MAX, &errstr);
144 			if (errstr)
145 				errx(1, "-P %s: %s", optarg, errstr);
146 			break;
147 		case 'p':
148 			pflag = 1;
149 			break;
150 		case 'r':
151 			runeof = 0;
152 			break;
153 		case 'R':
154 			Rflag = strtol(optarg, &endptr, 10);
155 			if (*endptr != '\0')
156 				errx(1, "replacements must be a number");
157 			break;
158 		case 's':
159 			nline = strtonum(optarg, 0, INT_MAX, &errstr);
160 			if (errstr)
161 				errx(1, "-s %s: %s", optarg, errstr);
162 			break;
163 		case 't':
164 			tflag = 1;
165 			break;
166 		case 'x':
167 			xflag = 1;
168 			break;
169 		case '0':
170 			zflag = 1;
171 			break;
172 		default:
173 			usage();
174 		}
175 	argc -= optind;
176 	argv += optind;
177 
178 	if (!Iflag && Rflag)
179 		usage();
180 	if (Iflag && !Rflag)
181 		Rflag = 5;
182 	if (xflag && !nflag)
183 		usage();
184 	if (Iflag || Lflag)
185 		xflag = 1;
186 	if (replstr != NULL && *replstr == '\0')
187 		errx(1, "replstr may not be empty");
188 
189 	/*
190 	 * Allocate pointers for the utility name, the utility arguments,
191 	 * the maximum arguments to be read from stdin and the trailing
192 	 * NULL.
193 	 */
194 	linelen = 1 + argc + nargs + 1;
195 	if ((av = bxp = calloc(linelen, sizeof(char *))) == NULL)
196 		err(1, NULL);
197 
198 	/*
199 	 * Use the user's name for the utility as argv[0], just like the
200 	 * shell.  Echo is the default.  Set up pointers for the user's
201 	 * arguments.
202 	 */
203 	if (*argv == NULL)
204 		cnt = strlen(*bxp++ = _PATH_ECHO);
205 	else {
206 		do {
207 			if (Jflag && strcmp(*argv, replstr) == 0) {
208 				char **avj;
209 				jfound = 1;
210 				argv++;
211 				for (avj = argv; *avj; avj++)
212 					cnt += strlen(*avj) + 1;
213 				break;
214 			}
215 			cnt += strlen(*bxp++ = *argv) + 1;
216 		} while (*++argv != NULL);
217 	}
218 
219 	/*
220 	 * Set up begin/end/traversing pointers into the array.  The -n
221 	 * count doesn't include the trailing NULL pointer, so the malloc
222 	 * added in an extra slot.
223 	 */
224 	endxp = (xp = bxp) + nargs;
225 
226 	/*
227 	 * Allocate buffer space for the arguments read from stdin and the
228 	 * trailing NULL.  Buffer space is defined as the default or specified
229 	 * space, minus the length of the utility name and arguments.  Set up
230 	 * begin/end/traversing pointers into the array.  The -s count does
231 	 * include the trailing NULL, so the malloc didn't add in an extra
232 	 * slot.
233 	 */
234 	nline -= cnt;
235 	if (nline <= 0)
236 		errx(1, "insufficient space for command");
237 
238 	if ((bbp = malloc((size_t)(nline + 1))) == NULL)
239 		err(1, NULL);
240 	ebp = (argp = p = bbp) + nline - 1;
241 	for (;;)
242 		parse_input(argc, argv);
243 }
244 
245 static void
parse_input(int argc,char * argv[])246 parse_input(int argc, char *argv[])
247 {
248 	int hasblank = 0;
249 	static int hadblank = 0;
250 	int ch, foundeof = 0;
251 	char **avj;
252 
253 	ch = getchar();
254 	if (isblank(ch)) {
255 		/* Quotes escape tabs and spaces. */
256 		if (insingle || indouble)
257 			goto addch;
258 		hasblank = 1;
259 		if (zflag)
260 			goto addch;
261 		goto arg2;
262 	}
263 
264 	switch (ch) {
265 	case EOF:
266 		/* No arguments since last exec. */
267 		if (p == bbp) {
268 			if (runeof)
269 				prerun(0, av);
270 			waitchildren(*argv, 1);
271 			exit(rval);
272 		}
273 		goto arg1;
274 	case '\0':
275 		if (zflag) {
276 			/*
277 			 * Increment 'count', so that nulls will be treated
278 			 * as end-of-line, as well as end-of-argument.  This
279 			 * is needed so -0 works properly with -I and -L.
280 			 */
281 			count++;
282 			goto arg2;
283 		}
284 		goto addch;
285 	case '\n':
286 		if (zflag)
287 			goto addch;
288 		hasblank = 1;
289 		if (hadblank == 0)
290 			count++;
291 
292 		/* Quotes do not escape newlines. */
293 arg1:		if (insingle || indouble)
294 			errx(1, "unterminated quote");
295 arg2:
296 		foundeof = *eofstr != '\0' &&
297 		    strcmp(argp, eofstr) == 0;
298 
299 		/*
300 		 * Do not make empty args unless they are quoted or
301 		 * we are run as "find -0" and not at EOF.
302 		 */
303 		if (((zflag && ch != EOF) || argp != p || wasquoted) &&
304 		    !foundeof) {
305 			*p++ = '\0';
306 			*xp++ = argp;
307 			if (Iflag) {
308 				size_t curlen;
309 
310 				if (inpline == NULL)
311 					curlen = 0;
312 				else {
313 					/*
314 					 * If this string is not zero
315 					 * length, append a space for
316 					 * separation before the next
317 					 * argument.
318 					 */
319 					if ((curlen = strlen(inpline)))
320 						strlcat(inpline, " ", inpsize);
321 				}
322 				curlen++;
323 				/*
324 				 * Allocate enough to hold what we will
325 				 * be holding in a second, and to append
326 				 * a space next time through, if we have
327 				 * to.
328 				 */
329 				inpsize = curlen + 2 + strlen(argp);
330 				inpline = realloc(inpline, inpsize);
331 				if (inpline == NULL)
332 					errx(1, "realloc failed");
333 				if (curlen == 1)
334 					strlcpy(inpline, argp, inpsize);
335 				else
336 					strlcat(inpline, argp, inpsize);
337 			}
338 		}
339 
340 		/*
341 		 * If max'd out on args or buffer, or reached EOF,
342 		 * run the command.  If xflag and max'd out on buffer
343 		 * but not on args, object.  Having reached the limit
344 		 * of input lines, as specified by -L is the same as
345 		 * maxing out on arguments.
346 		 */
347 		if (xp == endxp || p > ebp || ch == EOF ||
348 		    (Lflag <= count && xflag) || foundeof) {
349 			if (xflag && xp != endxp && p > ebp)
350 				errx(1, "insufficient space for arguments");
351 			if (jfound) {
352 				for (avj = argv; *avj; avj++)
353 					*xp++ = *avj;
354 			}
355 			prerun(argc, av);
356 			if (ch == EOF || foundeof) {
357 				waitchildren(*argv, 1);
358 				exit(rval);
359 			}
360 			p = bbp;
361 			xp = bxp;
362 			count = 0;
363 		}
364 		argp = p;
365 		wasquoted = 0;
366 		break;
367 	case '\'':
368 		if (indouble || zflag)
369 			goto addch;
370 		insingle = !insingle;
371 		wasquoted = 1;
372 		break;
373 	case '"':
374 		if (insingle || zflag)
375 			goto addch;
376 		indouble = !indouble;
377 		wasquoted = 1;
378 		break;
379 	case '\\':
380 		if (zflag)
381 			goto addch;
382 		/* Backslash escapes anything, is escaped by quotes. */
383 		if (!insingle && !indouble && (ch = getchar()) == EOF)
384 			errx(1, "backslash at EOF");
385 		/* FALLTHROUGH */
386 	default:
387 addch:		if (p < ebp) {
388 			*p++ = ch;
389 			break;
390 		}
391 
392 		/* If only one argument, not enough buffer space. */
393 		if (bxp == xp)
394 			errx(1, "insufficient space for argument");
395 		/* Didn't hit argument limit, so if xflag object. */
396 		if (xflag)
397 			errx(1, "insufficient space for arguments");
398 
399 		if (jfound) {
400 			for (avj = argv; *avj; avj++)
401 				*xp++ = *avj;
402 		}
403 		prerun(argc, av);
404 		xp = bxp;
405 		cnt = ebp - argp;
406 		memmove(bbp, argp, (size_t)cnt);
407 		p = (argp = bbp) + cnt;
408 		*p++ = ch;
409 		break;
410 	}
411 	hadblank = hasblank;
412 }
413 
414 /*
415  * Do things necessary before run()'ing, such as -I substitution,
416  * and then call run().
417  */
418 static void
prerun(int argc,char * argv[])419 prerun(int argc, char *argv[])
420 {
421 	char **tmp, **tmp2, **avj;
422 	int repls;
423 
424 	repls = Rflag;
425 	runeof = 0;
426 
427 	if (argc == 0 || repls == 0) {
428 		*xp = NULL;
429 		run(argv);
430 		return;
431 	}
432 
433 	avj = argv;
434 
435 	/*
436 	 * Allocate memory to hold the argument list, and
437 	 * a NULL at the tail.
438 	 */
439 	tmp = calloc(argc + 1, sizeof(char *));
440 	if (tmp == NULL)
441 		err(1, NULL);
442 	tmp2 = tmp;
443 
444 	/*
445 	 * Save the first argument and iterate over it, we
446 	 * cannot do strnsubst() to it.
447 	 */
448 	if ((*tmp++ = strdup(*avj++)) == NULL)
449 		err(1, NULL);
450 
451 	/*
452 	 * For each argument to utility, if we have not used up
453 	 * the number of replacements we are allowed to do, and
454 	 * if the argument contains at least one occurrence of
455 	 * replstr, call strnsubst(), else just save the string.
456 	 * Iterations over elements of avj and tmp are done
457 	 * where appropriate.
458 	 */
459 	while (--argc) {
460 		*tmp = *avj++;
461 		if (repls && strstr(*tmp, replstr) != NULL) {
462 			strnsubst(tmp++, replstr, inpline, (size_t)255);
463 			if (repls > 0)
464 				repls--;
465 		} else {
466 			if ((*tmp = strdup(*tmp)) == NULL)
467 				err(1, NULL);
468 			tmp++;
469 		}
470 	}
471 
472 	/*
473 	 * Run it.
474 	 */
475 	*tmp = NULL;
476 	run(tmp2);
477 
478 	/*
479 	 * Walk from the tail to the head, free along the way.
480 	 */
481 	for (; tmp2 != tmp; tmp--)
482 		free(*tmp);
483 	/*
484 	 * Now free the list itself.
485 	 */
486 	free(tmp2);
487 
488 	/*
489 	 * Free the input line buffer, if we have one.
490 	 */
491 	free(inpline);
492 	inpline = NULL;
493 }
494 
495 static void
run(char ** argv)496 run(char **argv)
497 {
498 	pid_t pid;
499 	int fd;
500 	char **avec;
501 
502 	/*
503 	 * If the user wants to be notified of each command before it is
504 	 * executed, notify them.  If they want the notification to be
505 	 * followed by a prompt, then prompt them.
506 	 */
507 	if (tflag || pflag) {
508 		fprintf(stderr, "%s", *argv);
509 		for (avec = argv + 1; *avec != NULL; ++avec)
510 			fprintf(stderr, " %s", *avec);
511 		/*
512 		 * If the user has asked to be prompted, do so.
513 		 */
514 		if (pflag)
515 			/*
516 			 * If they asked not to exec, return without execution
517 			 * but if they asked to, go to the execution.  If we
518 			 * could not open their tty, break the switch and drop
519 			 * back to -t behaviour.
520 			 */
521 			switch (prompt()) {
522 			case 0:
523 				return;
524 			case 1:
525 				goto exec;
526 			case 2:
527 				break;
528 			}
529 		fprintf(stderr, "\n");
530 		fflush(stderr);
531 	}
532 exec:
533 	switch (pid = vfork()) {
534 	case -1:
535 		err(1, "vfork");
536 	case 0:
537 		if (oflag) {
538 			if ((fd = open(_PATH_TTY, O_RDONLY)) == -1) {
539 				warn("can't open /dev/tty");
540 				_exit(1);
541 			}
542 		} else {
543 			fd = open(_PATH_DEVNULL, O_RDONLY);
544 		}
545 		if (fd > STDIN_FILENO) {
546 			if (dup2(fd, STDIN_FILENO) != 0) {
547 				warn("can't dup2 to stdin");
548 				_exit(1);
549 			}
550 			close(fd);
551 		}
552 		execvp(argv[0], argv);
553 		warn("%s", argv[0]);
554 		_exit(errno == ENOENT ? 127 : 126);
555 	}
556 	curprocs++;
557 	waitchildren(*argv, 0);
558 }
559 
560 static void
waitchildren(const char * name,int waitall)561 waitchildren(const char *name, int waitall)
562 {
563 	pid_t pid;
564 	int status;
565 
566 	while ((pid = waitpid(-1, &status, !waitall && curprocs < maxprocs ?
567 	    WNOHANG : 0)) > 0) {
568 		curprocs--;
569 		/*
570 		 * According to POSIX, we have to exit if the utility exits
571 		 * with a 255 status, or is interrupted by a signal.
572 		 * We are allowed to return any exit status between 1 and
573 		 * 125 in these cases, but we'll use 124 and 125, the same
574 		 * values used by GNU xargs.
575 		 */
576 		if (WIFEXITED(status)) {
577 			if (WEXITSTATUS(status) == 255) {
578 				warnx("%s exited with status 255", name);
579 				exit(124);
580 			} else if (WEXITSTATUS(status) == 127 ||
581 			    WEXITSTATUS(status) == 126) {
582 				exit(WEXITSTATUS(status));
583 			} else if (WEXITSTATUS(status) != 0) {
584 				rval = 123;
585 			}
586 		} else if (WIFSIGNALED(status)) {
587 			if (WTERMSIG(status) != SIGPIPE) {
588 				if (WTERMSIG(status) < NSIG)
589 					warnx("%s terminated by SIG%s", name,
590 					    sys_signame[WTERMSIG(status)]);
591 				else
592 					warnx("%s terminated by signal %d",
593 					    name, WTERMSIG(status));
594 			}
595 			exit(125);
596 		}
597 	}
598 	if (pid == -1 && errno != ECHILD)
599 		err(1, "waitpid");
600 }
601 
602 /*
603  * Prompt the user about running a command.
604  */
605 static int
prompt(void)606 prompt(void)
607 {
608 	size_t rsize;
609 	char *response;
610 	FILE *ttyfp;
611 	int doit = 0;
612 
613 	if ((ttyfp = fopen(_PATH_TTY, "r")) == NULL)
614 		return (2);	/* Indicate that the TTY failed to open. */
615 	fprintf(stderr, "?...");
616 	fflush(stderr);
617 	response = fgetln(ttyfp, &rsize);
618 	doit = response != NULL && (*response == 'y' || *response == 'Y');
619 	fclose(ttyfp);
620 	return (doit);
621 }
622 
623 static void
usage(void)624 usage(void)
625 {
626 	fprintf(stderr,
627 "usage: xargs [-0oprt] [-E eofstr] [-I replstr [-R replacements]] [-J replstr]\n"
628 "             [-L number] [-n number [-x]] [-P maxprocs] [-s size]\n"
629 "             [utility [argument ...]]\n");
630 	exit(1);
631 }
632