xref: /openbsd-src/usr.bin/xargs/xargs.c (revision 5054e3e78af0749a9bb00ba9a024b3ee2d90290f)
1 /*	$OpenBSD: xargs.c,v 1.26 2009/10/27 23:59:50 deraadt Exp $	*/
2 /*	$FreeBSD: xargs.c,v 1.51 2003/05/03 19:09:11 obrien Exp $	*/
3 
4 /*-
5  * Copyright (c) 1990, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * John B. Roll Jr.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  * $xMach: xargs.c,v 1.6 2002/02/23 05:27:47 tim Exp $
36  */
37 
38 #include <sys/param.h>
39 #include <sys/wait.h>
40 
41 #include <err.h>
42 #include <errno.h>
43 #include <fcntl.h>
44 #include <langinfo.h>
45 #include <locale.h>
46 #include <paths.h>
47 #include <regex.h>
48 #include <signal.h>
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <string.h>
52 #include <unistd.h>
53 
54 #include "pathnames.h"
55 
56 static void	parse_input(int, char *[]);
57 static void	prerun(int, char *[]);
58 static int	prompt(void);
59 static void	run(char **);
60 static void	usage(void);
61 void		strnsubst(char **, const char *, const char *, size_t);
62 static void	waitchildren(const char *, int);
63 
64 static char **av, **bxp, **ep, **endxp, **xp;
65 static char *argp, *bbp, *ebp, *inpline, *p, *replstr;
66 static const char *eofstr;
67 static int count, insingle, indouble, oflag, pflag, tflag, Rflag, rval, zflag;
68 static int cnt, Iflag, jfound, Lflag, wasquoted, xflag, runeof = 1;
69 static int curprocs, maxprocs;
70 static size_t inpsize;
71 
72 extern char **environ;
73 
74 int
75 main(int argc, char *argv[])
76 {
77 	long arg_max;
78 	int ch, Jflag, nargs, nflag, nline;
79 	size_t linelen;
80 	char *endptr;
81 
82 	inpline = replstr = NULL;
83 	ep = environ;
84 	eofstr = "";
85 	Jflag = nflag = 0;
86 
87 	(void)setlocale(LC_MESSAGES, "");
88 
89 	/*
90 	 * POSIX.2 limits the exec line length to ARG_MAX - 2K.  Running that
91 	 * caused some E2BIG errors, so it was changed to ARG_MAX - 4K.  Given
92 	 * that the smallest argument is 2 bytes in length, this means that
93 	 * the number of arguments is limited to:
94 	 *
95 	 *	 (ARG_MAX - 4K - LENGTH(utility + arguments)) / 2.
96 	 *
97 	 * We arbitrarily limit the number of arguments to 5000.  This is
98 	 * allowed by POSIX.2 as long as the resulting minimum exec line is
99 	 * at least LINE_MAX.  Realloc'ing as necessary is possible, but
100 	 * probably not worthwhile.
101 	 */
102 	nargs = 5000;
103 	if ((arg_max = sysconf(_SC_ARG_MAX)) == -1)
104 		errx(1, "sysconf(_SC_ARG_MAX) failed");
105 	nline = arg_max - 4 * 1024;
106 	while (*ep != NULL) {
107 		/* 1 byte for each '\0' */
108 		nline -= strlen(*ep++) + 1 + sizeof(*ep);
109 	}
110 	maxprocs = 1;
111 	while ((ch = getopt(argc, argv, "0E:I:J:L:n:oP:pR:rs:tx")) != -1)
112 		switch (ch) {
113 		case 'E':
114 			eofstr = optarg;
115 			break;
116 		case 'I':
117 			Jflag = 0;
118 			Iflag = 1;
119 			Lflag = 1;
120 			replstr = optarg;
121 			break;
122 		case 'J':
123 			Iflag = 0;
124 			Jflag = 1;
125 			replstr = optarg;
126 			break;
127 		case 'L':
128 			Lflag = atoi(optarg);
129 			break;
130 		case 'n':
131 			nflag = 1;
132 			if ((nargs = atoi(optarg)) <= 0)
133 				errx(1, "illegal argument count");
134 			break;
135 		case 'o':
136 			oflag = 1;
137 			break;
138 		case 'P':
139 			if ((maxprocs = atoi(optarg)) <= 0)
140 				errx(1, "max. processes must be >0");
141 			break;
142 		case 'p':
143 			pflag = 1;
144 			break;
145 		case 'r':
146 			runeof = 0;
147 			break;
148 		case 'R':
149 			Rflag = strtol(optarg, &endptr, 10);
150 			if (*endptr != '\0')
151 				errx(1, "replacements must be a number");
152 			break;
153 		case 's':
154 			nline = atoi(optarg);
155 			break;
156 		case 't':
157 			tflag = 1;
158 			break;
159 		case 'x':
160 			xflag = 1;
161 			break;
162 		case '0':
163 			zflag = 1;
164 			break;
165 		case '?':
166 		default:
167 			usage();
168 	}
169 	argc -= optind;
170 	argv += optind;
171 
172 	if (!Iflag && Rflag)
173 		usage();
174 	if (Iflag && !Rflag)
175 		Rflag = 5;
176 	if (xflag && !nflag)
177 		usage();
178 	if (Iflag || Lflag)
179 		xflag = 1;
180 	if (replstr != NULL && *replstr == '\0')
181 		errx(1, "replstr may not be empty");
182 
183 	/*
184 	 * Allocate pointers for the utility name, the utility arguments,
185 	 * the maximum arguments to be read from stdin and the trailing
186 	 * NULL.
187 	 */
188 	linelen = 1 + argc + nargs + 1;
189 	if ((av = bxp = calloc(linelen, sizeof(char **))) == NULL)
190 		err(1, NULL);
191 
192 	/*
193 	 * Use the user's name for the utility as argv[0], just like the
194 	 * shell.  Echo is the default.  Set up pointers for the user's
195 	 * arguments.
196 	 */
197 	if (*argv == NULL)
198 		cnt = strlen(*bxp++ = _PATH_ECHO);
199 	else {
200 		do {
201 			if (Jflag && strcmp(*argv, replstr) == 0) {
202 				char **avj;
203 				jfound = 1;
204 				argv++;
205 				for (avj = argv; *avj; avj++)
206 					cnt += strlen(*avj) + 1;
207 				break;
208 			}
209 			cnt += strlen(*bxp++ = *argv) + 1;
210 		} while (*++argv != NULL);
211 	}
212 
213 	/*
214 	 * Set up begin/end/traversing pointers into the array.  The -n
215 	 * count doesn't include the trailing NULL pointer, so the malloc
216 	 * added in an extra slot.
217 	 */
218 	endxp = (xp = bxp) + nargs;
219 
220 	/*
221 	 * Allocate buffer space for the arguments read from stdin and the
222 	 * trailing NULL.  Buffer space is defined as the default or specified
223 	 * space, minus the length of the utility name and arguments.  Set up
224 	 * begin/end/traversing pointers into the array.  The -s count does
225 	 * include the trailing NULL, so the malloc didn't add in an extra
226 	 * slot.
227 	 */
228 	nline -= cnt;
229 	if (nline <= 0)
230 		errx(1, "insufficient space for command");
231 
232 	if ((bbp = malloc((size_t)(nline + 1))) == NULL)
233 		err(1, NULL);
234 	ebp = (argp = p = bbp) + nline - 1;
235 	for (;;)
236 		parse_input(argc, argv);
237 }
238 
239 static void
240 parse_input(int argc, char *argv[])
241 {
242 	int ch, foundeof = 0;
243 	char **avj;
244 
245 	switch (ch = getchar()) {
246 	case EOF:
247 		/* No arguments since last exec. */
248 		if (p == bbp) {
249 			if (runeof)
250 				prerun(0, av);
251 			waitchildren(*argv, 1);
252 			exit(rval);
253 		}
254 		goto arg1;
255 	case ' ':
256 	case '\t':
257 		/* Quotes escape tabs and spaces. */
258 		if (insingle || indouble || zflag)
259 			goto addch;
260 		goto arg2;
261 	case '\0':
262 		if (zflag)
263 			goto arg2;
264 		goto addch;
265 	case '\n':
266 		count++;
267 		if (zflag)
268 			goto addch;
269 
270 		/* Quotes do not escape newlines. */
271 arg1:		if (insingle || indouble)
272 			errx(1, "unterminated quote");
273 arg2:
274 		foundeof = *eofstr != '\0' &&
275 		    strcmp(argp, eofstr) == 0;
276 
277 		/* Do not make empty args unless they are quoted */
278 		if ((argp != p || wasquoted) && !foundeof) {
279 			*p++ = '\0';
280 			*xp++ = argp;
281 			if (Iflag) {
282 				size_t curlen;
283 
284 				if (inpline == NULL)
285 					curlen = 0;
286 				else {
287 					/*
288 					 * If this string is not zero
289 					 * length, append a space for
290 					 * separation before the next
291 					 * argument.
292 					 */
293 					if ((curlen = strlen(inpline)))
294 						strlcat(inpline, " ", inpsize);
295 				}
296 				curlen++;
297 				/*
298 				 * Allocate enough to hold what we will
299 				 * be holding in a second, and to append
300 				 * a space next time through, if we have
301 				 * to.
302 				 */
303 				inpsize = curlen + 2 + strlen(argp);
304 				inpline = realloc(inpline, inpsize);
305 				if (inpline == NULL)
306 					errx(1, "realloc failed");
307 				if (curlen == 1)
308 					strlcpy(inpline, argp, inpsize);
309 				else
310 					strlcat(inpline, argp, inpsize);
311 			}
312 		}
313 
314 		/*
315 		 * If max'd out on args or buffer, or reached EOF,
316 		 * run the command.  If xflag and max'd out on buffer
317 		 * but not on args, object.  Having reached the limit
318 		 * of input lines, as specified by -L is the same as
319 		 * maxing out on arguments.
320 		 */
321 		if (xp == endxp || p > ebp || ch == EOF ||
322 		    (Lflag <= count && xflag) || foundeof) {
323 			if (xflag && xp != endxp && p > ebp)
324 				errx(1, "insufficient space for arguments");
325 			if (jfound) {
326 				for (avj = argv; *avj; avj++)
327 					*xp++ = *avj;
328 			}
329 			prerun(argc, av);
330 			if (ch == EOF || foundeof) {
331 				waitchildren(*argv, 1);
332 				exit(rval);
333 			}
334 			p = bbp;
335 			xp = bxp;
336 			count = 0;
337 		}
338 		argp = p;
339 		wasquoted = 0;
340 		break;
341 	case '\'':
342 		if (indouble || zflag)
343 			goto addch;
344 		insingle = !insingle;
345 		wasquoted = 1;
346 		break;
347 	case '"':
348 		if (insingle || zflag)
349 			goto addch;
350 		indouble = !indouble;
351 		wasquoted = 1;
352 		break;
353 	case '\\':
354 		if (zflag)
355 			goto addch;
356 		/* Backslash escapes anything, is escaped by quotes. */
357 		if (!insingle && !indouble && (ch = getchar()) == EOF)
358 			errx(1, "backslash at EOF");
359 		/* FALLTHROUGH */
360 	default:
361 addch:		if (p < ebp) {
362 			*p++ = ch;
363 			break;
364 		}
365 
366 		/* If only one argument, not enough buffer space. */
367 		if (bxp == xp)
368 			errx(1, "insufficient space for argument");
369 		/* Didn't hit argument limit, so if xflag object. */
370 		if (xflag)
371 			errx(1, "insufficient space for arguments");
372 
373 		if (jfound) {
374 			for (avj = argv; *avj; avj++)
375 				*xp++ = *avj;
376 		}
377 		prerun(argc, av);
378 		xp = bxp;
379 		cnt = ebp - argp;
380 		memcpy(bbp, argp, (size_t)cnt);
381 		p = (argp = bbp) + cnt;
382 		*p++ = ch;
383 		break;
384 	}
385 }
386 
387 /*
388  * Do things necessary before run()'ing, such as -I substitution,
389  * and then call run().
390  */
391 static void
392 prerun(int argc, char *argv[])
393 {
394 	char **tmp, **tmp2, **avj;
395 	int repls;
396 
397 	repls = Rflag;
398 	runeof = 0;
399 
400 	if (argc == 0 || repls == 0) {
401 		*xp = NULL;
402 		run(argv);
403 		return;
404 	}
405 
406 	avj = argv;
407 
408 	/*
409 	 * Allocate memory to hold the argument list, and
410 	 * a NULL at the tail.
411 	 */
412 	tmp = calloc(argc + 1, sizeof(char**));
413 	if (tmp == NULL)
414 		err(1, NULL);
415 	tmp2 = tmp;
416 
417 	/*
418 	 * Save the first argument and iterate over it, we
419 	 * cannot do strnsubst() to it.
420 	 */
421 	if ((*tmp++ = strdup(*avj++)) == NULL)
422 		err(1, NULL);
423 
424 	/*
425 	 * For each argument to utility, if we have not used up
426 	 * the number of replacements we are allowed to do, and
427 	 * if the argument contains at least one occurrence of
428 	 * replstr, call strnsubst(), else just save the string.
429 	 * Iterations over elements of avj and tmp are done
430 	 * where appropriate.
431 	 */
432 	while (--argc) {
433 		*tmp = *avj++;
434 		if (repls && strstr(*tmp, replstr) != NULL) {
435 			strnsubst(tmp++, replstr, inpline, (size_t)255);
436 			if (repls > 0)
437 				repls--;
438 		} else {
439 			if ((*tmp = strdup(*tmp)) == NULL)
440 				err(1, NULL);
441 			tmp++;
442 		}
443 	}
444 
445 	/*
446 	 * Run it.
447 	 */
448 	*tmp = NULL;
449 	run(tmp2);
450 
451 	/*
452 	 * Walk from the tail to the head, free along the way.
453 	 */
454 	for (; tmp2 != tmp; tmp--)
455 		free(*tmp);
456 	/*
457 	 * Now free the list itself.
458 	 */
459 	free(tmp2);
460 
461 	/*
462 	 * Free the input line buffer, if we have one.
463 	 */
464 	if (inpline != NULL) {
465 		free(inpline);
466 		inpline = NULL;
467 	}
468 }
469 
470 static void
471 run(char **argv)
472 {
473 	pid_t pid;
474 	int fd;
475 	char **avec;
476 
477 	/*
478 	 * If the user wants to be notified of each command before it is
479 	 * executed, notify them.  If they want the notification to be
480 	 * followed by a prompt, then prompt them.
481 	 */
482 	if (tflag || pflag) {
483 		(void)fprintf(stderr, "%s", *argv);
484 		for (avec = argv + 1; *avec != NULL; ++avec)
485 			(void)fprintf(stderr, " %s", *avec);
486 		/*
487 		 * If the user has asked to be prompted, do so.
488 		 */
489 		if (pflag)
490 			/*
491 			 * If they asked not to exec, return without execution
492 			 * but if they asked to, go to the execution.  If we
493 			 * could not open their tty, break the switch and drop
494 			 * back to -t behaviour.
495 			 */
496 			switch (prompt()) {
497 			case 0:
498 				return;
499 			case 1:
500 				goto exec;
501 			case 2:
502 				break;
503 			}
504 		(void)fprintf(stderr, "\n");
505 		(void)fflush(stderr);
506 	}
507 exec:
508 	switch (pid = vfork()) {
509 	case -1:
510 		err(1, "vfork");
511 	case 0:
512 		if (oflag) {
513 			if ((fd = open(_PATH_TTY, O_RDONLY)) == -1) {
514 				warn("can't open /dev/tty");
515 				_exit(1);
516 			}
517 		} else {
518 			fd = open(_PATH_DEVNULL, O_RDONLY);
519 		}
520 		if (fd > STDIN_FILENO) {
521 			if (dup2(fd, STDIN_FILENO) != 0) {
522 				warn("can't dup2 to stdin");
523 				_exit(1);
524 			}
525 			close(fd);
526 		}
527 		execvp(argv[0], argv);
528 		warn("%s", argv[0]);
529 		_exit(errno == ENOENT ? 127 : 126);
530 	}
531 	curprocs++;
532 	waitchildren(*argv, 0);
533 }
534 
535 static void
536 waitchildren(const char *name, int waitall)
537 {
538 	pid_t pid;
539 	int status;
540 
541 	while ((pid = waitpid(-1, &status, !waitall && curprocs < maxprocs ?
542 	    WNOHANG : 0)) > 0) {
543 		curprocs--;
544 		/*
545 		 * According to POSIX, we have to exit if the utility exits
546 		 * with a 255 status, or is interrupted by a signal.
547 		 * We are allowed to return any exit status between 1 and
548 		 * 125 in these cases, but we'll use 124 and 125, the same
549 		 * values used by GNU xargs.
550 		 */
551 		if (WIFEXITED(status)) {
552 			if (WEXITSTATUS(status) == 255) {
553 				warnx("%s exited with status 255", name);
554 				exit(124);
555 			} else if (WEXITSTATUS(status) == 127 ||
556 			    WEXITSTATUS(status) == 126) {
557 				exit(WEXITSTATUS(status));
558 			} else if (WEXITSTATUS(status) != 0) {
559 				rval = 123;
560 			}
561 		} else if (WIFSIGNALED(status)) {
562 			if (WTERMSIG(status) != SIGPIPE) {
563 				if (WTERMSIG(status) < NSIG)
564 					warnx("%s terminated by SIG%s", name,
565 					    sys_signame[WTERMSIG(status)]);
566 				else
567 					warnx("%s terminated by signal %d",
568 					    name, WTERMSIG(status));
569 			}
570 			exit(125);
571 		}
572 	}
573 	if (pid == -1 && errno != ECHILD)
574 		err(1, "waitpid");
575 }
576 
577 /*
578  * Prompt the user about running a command.
579  */
580 static int
581 prompt(void)
582 {
583 	regex_t cre;
584 	size_t rsize;
585 	int match;
586 	char *response;
587 	FILE *ttyfp;
588 
589 	if ((ttyfp = fopen(_PATH_TTY, "r")) == NULL)
590 		return (2);	/* Indicate that the TTY failed to open. */
591 	(void)fprintf(stderr, "?...");
592 	(void)fflush(stderr);
593 	if ((response = fgetln(ttyfp, &rsize)) == NULL ||
594 	    regcomp(&cre, nl_langinfo(YESEXPR), REG_BASIC) != 0) {
595 		(void)fclose(ttyfp);
596 		return (0);
597 	}
598 	response[rsize - 1] = '\0';
599 	match = regexec(&cre, response, 0, NULL, 0);
600 	(void)fclose(ttyfp);
601 	regfree(&cre);
602 	return (match == 0);
603 }
604 
605 static void
606 usage(void)
607 {
608 	fprintf(stderr,
609 "usage: xargs [-0oprt] [-E eofstr] [-I replstr [-R replacements]] [-J replstr]\n"
610 "             [-L number] [-n number [-x]] [-P maxprocs] [-s size]\n"
611 "             [utility [argument ...]]\n");
612 	exit(1);
613 }
614