xref: /openbsd-src/usr.bin/xargs/xargs.c (revision 898184e3e61f9129feb5978fad5a8c6865f00b92)
1 /*	$OpenBSD: xargs.c,v 1.27 2010/03/25 01:03:57 schwarze Exp $	*/
2 /*	$FreeBSD: xargs.c,v 1.51 2003/05/03 19:09:11 obrien Exp $	*/
3 
4 /*-
5  * Copyright (c) 1990, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * John B. Roll Jr.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  * $xMach: xargs.c,v 1.6 2002/02/23 05:27:47 tim Exp $
36  */
37 
38 #include <sys/param.h>
39 #include <sys/wait.h>
40 
41 #include <ctype.h>
42 #include <err.h>
43 #include <errno.h>
44 #include <fcntl.h>
45 #include <langinfo.h>
46 #include <locale.h>
47 #include <paths.h>
48 #include <regex.h>
49 #include <signal.h>
50 #include <stdio.h>
51 #include <stdlib.h>
52 #include <string.h>
53 #include <unistd.h>
54 
55 #include "pathnames.h"
56 
57 static void	parse_input(int, char *[]);
58 static void	prerun(int, char *[]);
59 static int	prompt(void);
60 static void	run(char **);
61 static void	usage(void);
62 void		strnsubst(char **, const char *, const char *, size_t);
63 static void	waitchildren(const char *, int);
64 
65 static char **av, **bxp, **ep, **endxp, **xp;
66 static char *argp, *bbp, *ebp, *inpline, *p, *replstr;
67 static const char *eofstr;
68 static int count, insingle, indouble, oflag, pflag, tflag, Rflag, rval, zflag;
69 static int cnt, Iflag, jfound, Lflag, wasquoted, xflag, runeof = 1;
70 static int curprocs, maxprocs;
71 static size_t inpsize;
72 
73 extern char **environ;
74 
75 int
76 main(int argc, char *argv[])
77 {
78 	long arg_max;
79 	int ch, Jflag, nargs, nflag, nline;
80 	size_t linelen;
81 	char *endptr;
82 
83 	inpline = replstr = NULL;
84 	ep = environ;
85 	eofstr = "";
86 	Jflag = nflag = 0;
87 
88 	(void)setlocale(LC_MESSAGES, "");
89 
90 	/*
91 	 * POSIX.2 limits the exec line length to ARG_MAX - 2K.  Running that
92 	 * caused some E2BIG errors, so it was changed to ARG_MAX - 4K.  Given
93 	 * that the smallest argument is 2 bytes in length, this means that
94 	 * the number of arguments is limited to:
95 	 *
96 	 *	 (ARG_MAX - 4K - LENGTH(utility + arguments)) / 2.
97 	 *
98 	 * We arbitrarily limit the number of arguments to 5000.  This is
99 	 * allowed by POSIX.2 as long as the resulting minimum exec line is
100 	 * at least LINE_MAX.  Realloc'ing as necessary is possible, but
101 	 * probably not worthwhile.
102 	 */
103 	nargs = 5000;
104 	if ((arg_max = sysconf(_SC_ARG_MAX)) == -1)
105 		errx(1, "sysconf(_SC_ARG_MAX) failed");
106 	nline = arg_max - 4 * 1024;
107 	while (*ep != NULL) {
108 		/* 1 byte for each '\0' */
109 		nline -= strlen(*ep++) + 1 + sizeof(*ep);
110 	}
111 	maxprocs = 1;
112 	while ((ch = getopt(argc, argv, "0E:I:J:L:n:oP:pR:rs:tx")) != -1)
113 		switch (ch) {
114 		case 'E':
115 			eofstr = optarg;
116 			break;
117 		case 'I':
118 			Jflag = 0;
119 			Iflag = 1;
120 			Lflag = 1;
121 			replstr = optarg;
122 			break;
123 		case 'J':
124 			Iflag = 0;
125 			Jflag = 1;
126 			replstr = optarg;
127 			break;
128 		case 'L':
129 			Lflag = atoi(optarg);
130 			break;
131 		case 'n':
132 			nflag = 1;
133 			if ((nargs = atoi(optarg)) <= 0)
134 				errx(1, "illegal argument count");
135 			break;
136 		case 'o':
137 			oflag = 1;
138 			break;
139 		case 'P':
140 			if ((maxprocs = atoi(optarg)) <= 0)
141 				errx(1, "max. processes must be >0");
142 			break;
143 		case 'p':
144 			pflag = 1;
145 			break;
146 		case 'r':
147 			runeof = 0;
148 			break;
149 		case 'R':
150 			Rflag = strtol(optarg, &endptr, 10);
151 			if (*endptr != '\0')
152 				errx(1, "replacements must be a number");
153 			break;
154 		case 's':
155 			nline = atoi(optarg);
156 			break;
157 		case 't':
158 			tflag = 1;
159 			break;
160 		case 'x':
161 			xflag = 1;
162 			break;
163 		case '0':
164 			zflag = 1;
165 			break;
166 		case '?':
167 		default:
168 			usage();
169 	}
170 	argc -= optind;
171 	argv += optind;
172 
173 	if (!Iflag && Rflag)
174 		usage();
175 	if (Iflag && !Rflag)
176 		Rflag = 5;
177 	if (xflag && !nflag)
178 		usage();
179 	if (Iflag || Lflag)
180 		xflag = 1;
181 	if (replstr != NULL && *replstr == '\0')
182 		errx(1, "replstr may not be empty");
183 
184 	/*
185 	 * Allocate pointers for the utility name, the utility arguments,
186 	 * the maximum arguments to be read from stdin and the trailing
187 	 * NULL.
188 	 */
189 	linelen = 1 + argc + nargs + 1;
190 	if ((av = bxp = calloc(linelen, sizeof(char **))) == NULL)
191 		err(1, NULL);
192 
193 	/*
194 	 * Use the user's name for the utility as argv[0], just like the
195 	 * shell.  Echo is the default.  Set up pointers for the user's
196 	 * arguments.
197 	 */
198 	if (*argv == NULL)
199 		cnt = strlen(*bxp++ = _PATH_ECHO);
200 	else {
201 		do {
202 			if (Jflag && strcmp(*argv, replstr) == 0) {
203 				char **avj;
204 				jfound = 1;
205 				argv++;
206 				for (avj = argv; *avj; avj++)
207 					cnt += strlen(*avj) + 1;
208 				break;
209 			}
210 			cnt += strlen(*bxp++ = *argv) + 1;
211 		} while (*++argv != NULL);
212 	}
213 
214 	/*
215 	 * Set up begin/end/traversing pointers into the array.  The -n
216 	 * count doesn't include the trailing NULL pointer, so the malloc
217 	 * added in an extra slot.
218 	 */
219 	endxp = (xp = bxp) + nargs;
220 
221 	/*
222 	 * Allocate buffer space for the arguments read from stdin and the
223 	 * trailing NULL.  Buffer space is defined as the default or specified
224 	 * space, minus the length of the utility name and arguments.  Set up
225 	 * begin/end/traversing pointers into the array.  The -s count does
226 	 * include the trailing NULL, so the malloc didn't add in an extra
227 	 * slot.
228 	 */
229 	nline -= cnt;
230 	if (nline <= 0)
231 		errx(1, "insufficient space for command");
232 
233 	if ((bbp = malloc((size_t)(nline + 1))) == NULL)
234 		err(1, NULL);
235 	ebp = (argp = p = bbp) + nline - 1;
236 	for (;;)
237 		parse_input(argc, argv);
238 }
239 
240 static void
241 parse_input(int argc, char *argv[])
242 {
243 	int hasblank = 0;
244 	static int hadblank = 0;
245 	int ch, foundeof = 0;
246 	char **avj;
247 
248 	ch = getchar();
249 	if (isblank(ch)) {
250 		/* Quotes escape tabs and spaces. */
251 		if (insingle || indouble)
252 			goto addch;
253 		hasblank = 1;
254 		if (zflag)
255 			goto addch;
256 		goto arg2;
257 	}
258 
259 	switch (ch) {
260 	case EOF:
261 		/* No arguments since last exec. */
262 		if (p == bbp) {
263 			if (runeof)
264 				prerun(0, av);
265 			waitchildren(*argv, 1);
266 			exit(rval);
267 		}
268 		goto arg1;
269 	case '\0':
270 		if (zflag)
271 			goto arg2;
272 		goto addch;
273 	case '\n':
274 		hasblank = 1;
275 		if (hadblank == 0)
276 			count++;
277 		if (zflag)
278 			goto addch;
279 
280 		/* Quotes do not escape newlines. */
281 arg1:		if (insingle || indouble)
282 			errx(1, "unterminated quote");
283 arg2:
284 		foundeof = *eofstr != '\0' &&
285 		    strcmp(argp, eofstr) == 0;
286 
287 		/* Do not make empty args unless they are quoted */
288 		if ((argp != p || wasquoted) && !foundeof) {
289 			*p++ = '\0';
290 			*xp++ = argp;
291 			if (Iflag) {
292 				size_t curlen;
293 
294 				if (inpline == NULL)
295 					curlen = 0;
296 				else {
297 					/*
298 					 * If this string is not zero
299 					 * length, append a space for
300 					 * separation before the next
301 					 * argument.
302 					 */
303 					if ((curlen = strlen(inpline)))
304 						strlcat(inpline, " ", inpsize);
305 				}
306 				curlen++;
307 				/*
308 				 * Allocate enough to hold what we will
309 				 * be holding in a second, and to append
310 				 * a space next time through, if we have
311 				 * to.
312 				 */
313 				inpsize = curlen + 2 + strlen(argp);
314 				inpline = realloc(inpline, inpsize);
315 				if (inpline == NULL)
316 					errx(1, "realloc failed");
317 				if (curlen == 1)
318 					strlcpy(inpline, argp, inpsize);
319 				else
320 					strlcat(inpline, argp, inpsize);
321 			}
322 		}
323 
324 		/*
325 		 * If max'd out on args or buffer, or reached EOF,
326 		 * run the command.  If xflag and max'd out on buffer
327 		 * but not on args, object.  Having reached the limit
328 		 * of input lines, as specified by -L is the same as
329 		 * maxing out on arguments.
330 		 */
331 		if (xp == endxp || p > ebp || ch == EOF ||
332 		    (Lflag <= count && xflag) || foundeof) {
333 			if (xflag && xp != endxp && p > ebp)
334 				errx(1, "insufficient space for arguments");
335 			if (jfound) {
336 				for (avj = argv; *avj; avj++)
337 					*xp++ = *avj;
338 			}
339 			prerun(argc, av);
340 			if (ch == EOF || foundeof) {
341 				waitchildren(*argv, 1);
342 				exit(rval);
343 			}
344 			p = bbp;
345 			xp = bxp;
346 			count = 0;
347 		}
348 		argp = p;
349 		wasquoted = 0;
350 		break;
351 	case '\'':
352 		if (indouble || zflag)
353 			goto addch;
354 		insingle = !insingle;
355 		wasquoted = 1;
356 		break;
357 	case '"':
358 		if (insingle || zflag)
359 			goto addch;
360 		indouble = !indouble;
361 		wasquoted = 1;
362 		break;
363 	case '\\':
364 		if (zflag)
365 			goto addch;
366 		/* Backslash escapes anything, is escaped by quotes. */
367 		if (!insingle && !indouble && (ch = getchar()) == EOF)
368 			errx(1, "backslash at EOF");
369 		/* FALLTHROUGH */
370 	default:
371 addch:		if (p < ebp) {
372 			*p++ = ch;
373 			break;
374 		}
375 
376 		/* If only one argument, not enough buffer space. */
377 		if (bxp == xp)
378 			errx(1, "insufficient space for argument");
379 		/* Didn't hit argument limit, so if xflag object. */
380 		if (xflag)
381 			errx(1, "insufficient space for arguments");
382 
383 		if (jfound) {
384 			for (avj = argv; *avj; avj++)
385 				*xp++ = *avj;
386 		}
387 		prerun(argc, av);
388 		xp = bxp;
389 		cnt = ebp - argp;
390 		memcpy(bbp, argp, (size_t)cnt);
391 		p = (argp = bbp) + cnt;
392 		*p++ = ch;
393 		break;
394 	}
395 	hadblank = hasblank;
396 }
397 
398 /*
399  * Do things necessary before run()'ing, such as -I substitution,
400  * and then call run().
401  */
402 static void
403 prerun(int argc, char *argv[])
404 {
405 	char **tmp, **tmp2, **avj;
406 	int repls;
407 
408 	repls = Rflag;
409 	runeof = 0;
410 
411 	if (argc == 0 || repls == 0) {
412 		*xp = NULL;
413 		run(argv);
414 		return;
415 	}
416 
417 	avj = argv;
418 
419 	/*
420 	 * Allocate memory to hold the argument list, and
421 	 * a NULL at the tail.
422 	 */
423 	tmp = calloc(argc + 1, sizeof(char**));
424 	if (tmp == NULL)
425 		err(1, NULL);
426 	tmp2 = tmp;
427 
428 	/*
429 	 * Save the first argument and iterate over it, we
430 	 * cannot do strnsubst() to it.
431 	 */
432 	if ((*tmp++ = strdup(*avj++)) == NULL)
433 		err(1, NULL);
434 
435 	/*
436 	 * For each argument to utility, if we have not used up
437 	 * the number of replacements we are allowed to do, and
438 	 * if the argument contains at least one occurrence of
439 	 * replstr, call strnsubst(), else just save the string.
440 	 * Iterations over elements of avj and tmp are done
441 	 * where appropriate.
442 	 */
443 	while (--argc) {
444 		*tmp = *avj++;
445 		if (repls && strstr(*tmp, replstr) != NULL) {
446 			strnsubst(tmp++, replstr, inpline, (size_t)255);
447 			if (repls > 0)
448 				repls--;
449 		} else {
450 			if ((*tmp = strdup(*tmp)) == NULL)
451 				err(1, NULL);
452 			tmp++;
453 		}
454 	}
455 
456 	/*
457 	 * Run it.
458 	 */
459 	*tmp = NULL;
460 	run(tmp2);
461 
462 	/*
463 	 * Walk from the tail to the head, free along the way.
464 	 */
465 	for (; tmp2 != tmp; tmp--)
466 		free(*tmp);
467 	/*
468 	 * Now free the list itself.
469 	 */
470 	free(tmp2);
471 
472 	/*
473 	 * Free the input line buffer, if we have one.
474 	 */
475 	if (inpline != NULL) {
476 		free(inpline);
477 		inpline = NULL;
478 	}
479 }
480 
481 static void
482 run(char **argv)
483 {
484 	pid_t pid;
485 	int fd;
486 	char **avec;
487 
488 	/*
489 	 * If the user wants to be notified of each command before it is
490 	 * executed, notify them.  If they want the notification to be
491 	 * followed by a prompt, then prompt them.
492 	 */
493 	if (tflag || pflag) {
494 		(void)fprintf(stderr, "%s", *argv);
495 		for (avec = argv + 1; *avec != NULL; ++avec)
496 			(void)fprintf(stderr, " %s", *avec);
497 		/*
498 		 * If the user has asked to be prompted, do so.
499 		 */
500 		if (pflag)
501 			/*
502 			 * If they asked not to exec, return without execution
503 			 * but if they asked to, go to the execution.  If we
504 			 * could not open their tty, break the switch and drop
505 			 * back to -t behaviour.
506 			 */
507 			switch (prompt()) {
508 			case 0:
509 				return;
510 			case 1:
511 				goto exec;
512 			case 2:
513 				break;
514 			}
515 		(void)fprintf(stderr, "\n");
516 		(void)fflush(stderr);
517 	}
518 exec:
519 	switch (pid = vfork()) {
520 	case -1:
521 		err(1, "vfork");
522 	case 0:
523 		if (oflag) {
524 			if ((fd = open(_PATH_TTY, O_RDONLY)) == -1) {
525 				warn("can't open /dev/tty");
526 				_exit(1);
527 			}
528 		} else {
529 			fd = open(_PATH_DEVNULL, O_RDONLY);
530 		}
531 		if (fd > STDIN_FILENO) {
532 			if (dup2(fd, STDIN_FILENO) != 0) {
533 				warn("can't dup2 to stdin");
534 				_exit(1);
535 			}
536 			close(fd);
537 		}
538 		execvp(argv[0], argv);
539 		warn("%s", argv[0]);
540 		_exit(errno == ENOENT ? 127 : 126);
541 	}
542 	curprocs++;
543 	waitchildren(*argv, 0);
544 }
545 
546 static void
547 waitchildren(const char *name, int waitall)
548 {
549 	pid_t pid;
550 	int status;
551 
552 	while ((pid = waitpid(-1, &status, !waitall && curprocs < maxprocs ?
553 	    WNOHANG : 0)) > 0) {
554 		curprocs--;
555 		/*
556 		 * According to POSIX, we have to exit if the utility exits
557 		 * with a 255 status, or is interrupted by a signal.
558 		 * We are allowed to return any exit status between 1 and
559 		 * 125 in these cases, but we'll use 124 and 125, the same
560 		 * values used by GNU xargs.
561 		 */
562 		if (WIFEXITED(status)) {
563 			if (WEXITSTATUS(status) == 255) {
564 				warnx("%s exited with status 255", name);
565 				exit(124);
566 			} else if (WEXITSTATUS(status) == 127 ||
567 			    WEXITSTATUS(status) == 126) {
568 				exit(WEXITSTATUS(status));
569 			} else if (WEXITSTATUS(status) != 0) {
570 				rval = 123;
571 			}
572 		} else if (WIFSIGNALED(status)) {
573 			if (WTERMSIG(status) != SIGPIPE) {
574 				if (WTERMSIG(status) < NSIG)
575 					warnx("%s terminated by SIG%s", name,
576 					    sys_signame[WTERMSIG(status)]);
577 				else
578 					warnx("%s terminated by signal %d",
579 					    name, WTERMSIG(status));
580 			}
581 			exit(125);
582 		}
583 	}
584 	if (pid == -1 && errno != ECHILD)
585 		err(1, "waitpid");
586 }
587 
588 /*
589  * Prompt the user about running a command.
590  */
591 static int
592 prompt(void)
593 {
594 	regex_t cre;
595 	size_t rsize;
596 	int match;
597 	char *response;
598 	FILE *ttyfp;
599 
600 	if ((ttyfp = fopen(_PATH_TTY, "r")) == NULL)
601 		return (2);	/* Indicate that the TTY failed to open. */
602 	(void)fprintf(stderr, "?...");
603 	(void)fflush(stderr);
604 	if ((response = fgetln(ttyfp, &rsize)) == NULL ||
605 	    regcomp(&cre, nl_langinfo(YESEXPR), REG_BASIC) != 0) {
606 		(void)fclose(ttyfp);
607 		return (0);
608 	}
609 	response[rsize - 1] = '\0';
610 	match = regexec(&cre, response, 0, NULL, 0);
611 	(void)fclose(ttyfp);
612 	regfree(&cre);
613 	return (match == 0);
614 }
615 
616 static void
617 usage(void)
618 {
619 	fprintf(stderr,
620 "usage: xargs [-0oprt] [-E eofstr] [-I replstr [-R replacements]] [-J replstr]\n"
621 "             [-L number] [-n number [-x]] [-P maxprocs] [-s size]\n"
622 "             [utility [argument ...]]\n");
623 	exit(1);
624 }
625