xref: /openbsd-src/usr.bin/xargs/xargs.c (revision a28daedfc357b214be5c701aa8ba8adb29a7f1c2)
1 /*	$OpenBSD: xargs.c,v 1.25 2007/09/02 15:19:36 deraadt Exp $	*/
2 /*	$FreeBSD: xargs.c,v 1.51 2003/05/03 19:09:11 obrien Exp $	*/
3 
4 /*-
5  * Copyright (c) 1990, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * John B. Roll Jr.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  * $xMach: xargs.c,v 1.6 2002/02/23 05:27:47 tim Exp $
36  */
37 
38 #ifndef lint
39 static const char copyright[] =
40 "@(#) Copyright (c) 1990, 1993\n\
41 	The Regents of the University of California.  All rights reserved.\n";
42 #endif /* not lint */
43 
44 #ifndef lint
45 #if 0
46 static const char sccsid[] = "@(#)xargs.c	8.1 (Berkeley) 6/6/93";
47 #else
48 static const char rcsid[] = "$OpenBSD: xargs.c,v 1.25 2007/09/02 15:19:36 deraadt Exp $";
49 #endif
50 #endif /* not lint */
51 
52 #include <sys/param.h>
53 #include <sys/wait.h>
54 
55 #include <err.h>
56 #include <errno.h>
57 #include <fcntl.h>
58 #include <langinfo.h>
59 #include <locale.h>
60 #include <paths.h>
61 #include <regex.h>
62 #include <signal.h>
63 #include <stdio.h>
64 #include <stdlib.h>
65 #include <string.h>
66 #include <unistd.h>
67 
68 #include "pathnames.h"
69 
70 static void	parse_input(int, char *[]);
71 static void	prerun(int, char *[]);
72 static int	prompt(void);
73 static void	run(char **);
74 static void	usage(void);
75 void		strnsubst(char **, const char *, const char *, size_t);
76 static void	waitchildren(const char *, int);
77 
78 static char **av, **bxp, **ep, **endxp, **xp;
79 static char *argp, *bbp, *ebp, *inpline, *p, *replstr;
80 static const char *eofstr;
81 static int count, insingle, indouble, oflag, pflag, tflag, Rflag, rval, zflag;
82 static int cnt, Iflag, jfound, Lflag, wasquoted, xflag, runeof = 1;
83 static int curprocs, maxprocs;
84 static size_t inpsize;
85 
86 extern char **environ;
87 
88 int
89 main(int argc, char *argv[])
90 {
91 	long arg_max;
92 	int ch, Jflag, nargs, nflag, nline;
93 	size_t linelen;
94 	char *endptr;
95 
96 	inpline = replstr = NULL;
97 	ep = environ;
98 	eofstr = "";
99 	Jflag = nflag = 0;
100 
101 	(void)setlocale(LC_MESSAGES, "");
102 
103 	/*
104 	 * POSIX.2 limits the exec line length to ARG_MAX - 2K.  Running that
105 	 * caused some E2BIG errors, so it was changed to ARG_MAX - 4K.  Given
106 	 * that the smallest argument is 2 bytes in length, this means that
107 	 * the number of arguments is limited to:
108 	 *
109 	 *	 (ARG_MAX - 4K - LENGTH(utility + arguments)) / 2.
110 	 *
111 	 * We arbitrarily limit the number of arguments to 5000.  This is
112 	 * allowed by POSIX.2 as long as the resulting minimum exec line is
113 	 * at least LINE_MAX.  Realloc'ing as necessary is possible, but
114 	 * probably not worthwhile.
115 	 */
116 	nargs = 5000;
117 	if ((arg_max = sysconf(_SC_ARG_MAX)) == -1)
118 		errx(1, "sysconf(_SC_ARG_MAX) failed");
119 	nline = arg_max - 4 * 1024;
120 	while (*ep != NULL) {
121 		/* 1 byte for each '\0' */
122 		nline -= strlen(*ep++) + 1 + sizeof(*ep);
123 	}
124 	maxprocs = 1;
125 	while ((ch = getopt(argc, argv, "0E:I:J:L:n:oP:pR:rs:tx")) != -1)
126 		switch (ch) {
127 		case 'E':
128 			eofstr = optarg;
129 			break;
130 		case 'I':
131 			Jflag = 0;
132 			Iflag = 1;
133 			Lflag = 1;
134 			replstr = optarg;
135 			break;
136 		case 'J':
137 			Iflag = 0;
138 			Jflag = 1;
139 			replstr = optarg;
140 			break;
141 		case 'L':
142 			Lflag = atoi(optarg);
143 			break;
144 		case 'n':
145 			nflag = 1;
146 			if ((nargs = atoi(optarg)) <= 0)
147 				errx(1, "illegal argument count");
148 			break;
149 		case 'o':
150 			oflag = 1;
151 			break;
152 		case 'P':
153 			if ((maxprocs = atoi(optarg)) <= 0)
154 				errx(1, "max. processes must be >0");
155 			break;
156 		case 'p':
157 			pflag = 1;
158 			break;
159 		case 'r':
160 			runeof = 0;
161 			break;
162 		case 'R':
163 			Rflag = strtol(optarg, &endptr, 10);
164 			if (*endptr != '\0')
165 				errx(1, "replacements must be a number");
166 			break;
167 		case 's':
168 			nline = atoi(optarg);
169 			break;
170 		case 't':
171 			tflag = 1;
172 			break;
173 		case 'x':
174 			xflag = 1;
175 			break;
176 		case '0':
177 			zflag = 1;
178 			break;
179 		case '?':
180 		default:
181 			usage();
182 	}
183 	argc -= optind;
184 	argv += optind;
185 
186 	if (!Iflag && Rflag)
187 		usage();
188 	if (Iflag && !Rflag)
189 		Rflag = 5;
190 	if (xflag && !nflag)
191 		usage();
192 	if (Iflag || Lflag)
193 		xflag = 1;
194 	if (replstr != NULL && *replstr == '\0')
195 		errx(1, "replstr may not be empty");
196 
197 	/*
198 	 * Allocate pointers for the utility name, the utility arguments,
199 	 * the maximum arguments to be read from stdin and the trailing
200 	 * NULL.
201 	 */
202 	linelen = 1 + argc + nargs + 1;
203 	if ((av = bxp = calloc(linelen, sizeof(char **))) == NULL)
204 		err(1, NULL);
205 
206 	/*
207 	 * Use the user's name for the utility as argv[0], just like the
208 	 * shell.  Echo is the default.  Set up pointers for the user's
209 	 * arguments.
210 	 */
211 	if (*argv == NULL)
212 		cnt = strlen(*bxp++ = _PATH_ECHO);
213 	else {
214 		do {
215 			if (Jflag && strcmp(*argv, replstr) == 0) {
216 				char **avj;
217 				jfound = 1;
218 				argv++;
219 				for (avj = argv; *avj; avj++)
220 					cnt += strlen(*avj) + 1;
221 				break;
222 			}
223 			cnt += strlen(*bxp++ = *argv) + 1;
224 		} while (*++argv != NULL);
225 	}
226 
227 	/*
228 	 * Set up begin/end/traversing pointers into the array.  The -n
229 	 * count doesn't include the trailing NULL pointer, so the malloc
230 	 * added in an extra slot.
231 	 */
232 	endxp = (xp = bxp) + nargs;
233 
234 	/*
235 	 * Allocate buffer space for the arguments read from stdin and the
236 	 * trailing NULL.  Buffer space is defined as the default or specified
237 	 * space, minus the length of the utility name and arguments.  Set up
238 	 * begin/end/traversing pointers into the array.  The -s count does
239 	 * include the trailing NULL, so the malloc didn't add in an extra
240 	 * slot.
241 	 */
242 	nline -= cnt;
243 	if (nline <= 0)
244 		errx(1, "insufficient space for command");
245 
246 	if ((bbp = malloc((size_t)(nline + 1))) == NULL)
247 		err(1, NULL);
248 	ebp = (argp = p = bbp) + nline - 1;
249 	for (;;)
250 		parse_input(argc, argv);
251 }
252 
253 static void
254 parse_input(int argc, char *argv[])
255 {
256 	int ch, foundeof = 0;
257 	char **avj;
258 
259 	switch (ch = getchar()) {
260 	case EOF:
261 		/* No arguments since last exec. */
262 		if (p == bbp) {
263 			if (runeof)
264 				prerun(0, av);
265 			waitchildren(*argv, 1);
266 			exit(rval);
267 		}
268 		goto arg1;
269 	case ' ':
270 	case '\t':
271 		/* Quotes escape tabs and spaces. */
272 		if (insingle || indouble || zflag)
273 			goto addch;
274 		goto arg2;
275 	case '\0':
276 		if (zflag)
277 			goto arg2;
278 		goto addch;
279 	case '\n':
280 		count++;
281 		if (zflag)
282 			goto addch;
283 
284 		/* Quotes do not escape newlines. */
285 arg1:		if (insingle || indouble)
286 			errx(1, "unterminated quote");
287 arg2:
288 		foundeof = *eofstr != '\0' &&
289 		    strcmp(argp, eofstr) == 0;
290 
291 		/* Do not make empty args unless they are quoted */
292 		if ((argp != p || wasquoted) && !foundeof) {
293 			*p++ = '\0';
294 			*xp++ = argp;
295 			if (Iflag) {
296 				size_t curlen;
297 
298 				if (inpline == NULL)
299 					curlen = 0;
300 				else {
301 					/*
302 					 * If this string is not zero
303 					 * length, append a space for
304 					 * separation before the next
305 					 * argument.
306 					 */
307 					if ((curlen = strlen(inpline)))
308 						strlcat(inpline, " ", inpsize);
309 				}
310 				curlen++;
311 				/*
312 				 * Allocate enough to hold what we will
313 				 * be holding in a second, and to append
314 				 * a space next time through, if we have
315 				 * to.
316 				 */
317 				inpsize = curlen + 2 + strlen(argp);
318 				inpline = realloc(inpline, inpsize);
319 				if (inpline == NULL)
320 					errx(1, "realloc failed");
321 				if (curlen == 1)
322 					strlcpy(inpline, argp, inpsize);
323 				else
324 					strlcat(inpline, argp, inpsize);
325 			}
326 		}
327 
328 		/*
329 		 * If max'd out on args or buffer, or reached EOF,
330 		 * run the command.  If xflag and max'd out on buffer
331 		 * but not on args, object.  Having reached the limit
332 		 * of input lines, as specified by -L is the same as
333 		 * maxing out on arguments.
334 		 */
335 		if (xp == endxp || p > ebp || ch == EOF ||
336 		    (Lflag <= count && xflag) || foundeof) {
337 			if (xflag && xp != endxp && p > ebp)
338 				errx(1, "insufficient space for arguments");
339 			if (jfound) {
340 				for (avj = argv; *avj; avj++)
341 					*xp++ = *avj;
342 			}
343 			prerun(argc, av);
344 			if (ch == EOF || foundeof) {
345 				waitchildren(*argv, 1);
346 				exit(rval);
347 			}
348 			p = bbp;
349 			xp = bxp;
350 			count = 0;
351 		}
352 		argp = p;
353 		wasquoted = 0;
354 		break;
355 	case '\'':
356 		if (indouble || zflag)
357 			goto addch;
358 		insingle = !insingle;
359 		wasquoted = 1;
360 		break;
361 	case '"':
362 		if (insingle || zflag)
363 			goto addch;
364 		indouble = !indouble;
365 		wasquoted = 1;
366 		break;
367 	case '\\':
368 		if (zflag)
369 			goto addch;
370 		/* Backslash escapes anything, is escaped by quotes. */
371 		if (!insingle && !indouble && (ch = getchar()) == EOF)
372 			errx(1, "backslash at EOF");
373 		/* FALLTHROUGH */
374 	default:
375 addch:		if (p < ebp) {
376 			*p++ = ch;
377 			break;
378 		}
379 
380 		/* If only one argument, not enough buffer space. */
381 		if (bxp == xp)
382 			errx(1, "insufficient space for argument");
383 		/* Didn't hit argument limit, so if xflag object. */
384 		if (xflag)
385 			errx(1, "insufficient space for arguments");
386 
387 		if (jfound) {
388 			for (avj = argv; *avj; avj++)
389 				*xp++ = *avj;
390 		}
391 		prerun(argc, av);
392 		xp = bxp;
393 		cnt = ebp - argp;
394 		memcpy(bbp, argp, (size_t)cnt);
395 		p = (argp = bbp) + cnt;
396 		*p++ = ch;
397 		break;
398 	}
399 }
400 
401 /*
402  * Do things necessary before run()'ing, such as -I substitution,
403  * and then call run().
404  */
405 static void
406 prerun(int argc, char *argv[])
407 {
408 	char **tmp, **tmp2, **avj;
409 	int repls;
410 
411 	repls = Rflag;
412 	runeof = 0;
413 
414 	if (argc == 0 || repls == 0) {
415 		*xp = NULL;
416 		run(argv);
417 		return;
418 	}
419 
420 	avj = argv;
421 
422 	/*
423 	 * Allocate memory to hold the argument list, and
424 	 * a NULL at the tail.
425 	 */
426 	tmp = calloc(argc + 1, sizeof(char**));
427 	if (tmp == NULL)
428 		err(1, NULL);
429 	tmp2 = tmp;
430 
431 	/*
432 	 * Save the first argument and iterate over it, we
433 	 * cannot do strnsubst() to it.
434 	 */
435 	if ((*tmp++ = strdup(*avj++)) == NULL)
436 		err(1, NULL);
437 
438 	/*
439 	 * For each argument to utility, if we have not used up
440 	 * the number of replacements we are allowed to do, and
441 	 * if the argument contains at least one occurrence of
442 	 * replstr, call strnsubst(), else just save the string.
443 	 * Iterations over elements of avj and tmp are done
444 	 * where appropriate.
445 	 */
446 	while (--argc) {
447 		*tmp = *avj++;
448 		if (repls && strstr(*tmp, replstr) != NULL) {
449 			strnsubst(tmp++, replstr, inpline, (size_t)255);
450 			if (repls > 0)
451 				repls--;
452 		} else {
453 			if ((*tmp = strdup(*tmp)) == NULL)
454 				err(1, NULL);
455 			tmp++;
456 		}
457 	}
458 
459 	/*
460 	 * Run it.
461 	 */
462 	*tmp = NULL;
463 	run(tmp2);
464 
465 	/*
466 	 * Walk from the tail to the head, free along the way.
467 	 */
468 	for (; tmp2 != tmp; tmp--)
469 		free(*tmp);
470 	/*
471 	 * Now free the list itself.
472 	 */
473 	free(tmp2);
474 
475 	/*
476 	 * Free the input line buffer, if we have one.
477 	 */
478 	if (inpline != NULL) {
479 		free(inpline);
480 		inpline = NULL;
481 	}
482 }
483 
484 static void
485 run(char **argv)
486 {
487 	pid_t pid;
488 	int fd;
489 	char **avec;
490 
491 	/*
492 	 * If the user wants to be notified of each command before it is
493 	 * executed, notify them.  If they want the notification to be
494 	 * followed by a prompt, then prompt them.
495 	 */
496 	if (tflag || pflag) {
497 		(void)fprintf(stderr, "%s", *argv);
498 		for (avec = argv + 1; *avec != NULL; ++avec)
499 			(void)fprintf(stderr, " %s", *avec);
500 		/*
501 		 * If the user has asked to be prompted, do so.
502 		 */
503 		if (pflag)
504 			/*
505 			 * If they asked not to exec, return without execution
506 			 * but if they asked to, go to the execution.  If we
507 			 * could not open their tty, break the switch and drop
508 			 * back to -t behaviour.
509 			 */
510 			switch (prompt()) {
511 			case 0:
512 				return;
513 			case 1:
514 				goto exec;
515 			case 2:
516 				break;
517 			}
518 		(void)fprintf(stderr, "\n");
519 		(void)fflush(stderr);
520 	}
521 exec:
522 	switch (pid = vfork()) {
523 	case -1:
524 		err(1, "vfork");
525 	case 0:
526 		if (oflag) {
527 			if ((fd = open(_PATH_TTY, O_RDONLY)) == -1) {
528 				warn("can't open /dev/tty");
529 				_exit(1);
530 			}
531 		} else {
532 			fd = open(_PATH_DEVNULL, O_RDONLY);
533 		}
534 		if (fd > STDIN_FILENO) {
535 			if (dup2(fd, STDIN_FILENO) != 0) {
536 				warn("can't dup2 to stdin");
537 				_exit(1);
538 			}
539 			close(fd);
540 		}
541 		execvp(argv[0], argv);
542 		warn("%s", argv[0]);
543 		_exit(errno == ENOENT ? 127 : 126);
544 	}
545 	curprocs++;
546 	waitchildren(*argv, 0);
547 }
548 
549 static void
550 waitchildren(const char *name, int waitall)
551 {
552 	pid_t pid;
553 	int status;
554 
555 	while ((pid = waitpid(-1, &status, !waitall && curprocs < maxprocs ?
556 	    WNOHANG : 0)) > 0) {
557 		curprocs--;
558 		/*
559 		 * According to POSIX, we have to exit if the utility exits
560 		 * with a 255 status, or is interrupted by a signal.
561 		 * We are allowed to return any exit status between 1 and
562 		 * 125 in these cases, but we'll use 124 and 125, the same
563 		 * values used by GNU xargs.
564 		 */
565 		if (WIFEXITED(status)) {
566 			if (WEXITSTATUS(status) == 255) {
567 				warnx("%s exited with status 255", name);
568 				exit(124);
569 			} else if (WEXITSTATUS(status) == 127 ||
570 			    WEXITSTATUS(status) == 126) {
571 				exit(WEXITSTATUS(status));
572 			} else if (WEXITSTATUS(status) != 0) {
573 				rval = 123;
574 			}
575 		} else if (WIFSIGNALED(status)) {
576 			if (WTERMSIG(status) != SIGPIPE) {
577 				if (WTERMSIG(status) < NSIG)
578 					warnx("%s terminated by SIG%s", name,
579 					    sys_signame[WTERMSIG(status)]);
580 				else
581 					warnx("%s terminated by signal %d",
582 					    name, WTERMSIG(status));
583 			}
584 			exit(125);
585 		}
586 	}
587 	if (pid == -1 && errno != ECHILD)
588 		err(1, "waitpid");
589 }
590 
591 /*
592  * Prompt the user about running a command.
593  */
594 static int
595 prompt(void)
596 {
597 	regex_t cre;
598 	size_t rsize;
599 	int match;
600 	char *response;
601 	FILE *ttyfp;
602 
603 	if ((ttyfp = fopen(_PATH_TTY, "r")) == NULL)
604 		return (2);	/* Indicate that the TTY failed to open. */
605 	(void)fprintf(stderr, "?...");
606 	(void)fflush(stderr);
607 	if ((response = fgetln(ttyfp, &rsize)) == NULL ||
608 	    regcomp(&cre, nl_langinfo(YESEXPR), REG_BASIC) != 0) {
609 		(void)fclose(ttyfp);
610 		return (0);
611 	}
612 	response[rsize - 1] = '\0';
613 	match = regexec(&cre, response, 0, NULL, 0);
614 	(void)fclose(ttyfp);
615 	regfree(&cre);
616 	return (match == 0);
617 }
618 
619 static void
620 usage(void)
621 {
622 	fprintf(stderr,
623 "usage: xargs [-0oprt] [-E eofstr] [-I replstr [-R replacements]] [-J replstr]\n"
624 "             [-L number] [-n number [-x]] [-P maxprocs] [-s size]\n"
625 "             [utility [argument ...]]\n");
626 	exit(1);
627 }
628