xref: /openbsd-src/usr.bin/make/job.c (revision 91f110e064cd7c194e59e019b83bb7496c1c84d4)
1 /*	$OpenBSD: job.c,v 1.135 2012/12/14 11:10:03 espie Exp $	*/
2 /*	$NetBSD: job.c,v 1.16 1996/11/06 17:59:08 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 2012 Marc Espie.
6  *
7  * Extensive code modifications for the OpenBSD project.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE OPENBSD PROJECT AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OPENBSD
22  * PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 /*
31  * Copyright (c) 1988, 1989, 1990 The Regents of the University of California.
32  * Copyright (c) 1988, 1989 by Adam de Boor
33  * Copyright (c) 1989 by Berkeley Softworks
34  * All rights reserved.
35  *
36  * This code is derived from software contributed to Berkeley by
37  * Adam de Boor.
38  *
39  * Redistribution and use in source and binary forms, with or without
40  * modification, are permitted provided that the following conditions
41  * are met:
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. Neither the name of the University nor the names of its contributors
48  *    may be used to endorse or promote products derived from this software
49  *    without specific prior written permission.
50  *
51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE.
62  */
63 
64 /*-
65  * job.c --
66  *	handle the creation etc. of our child processes.
67  *
68  * Interface:
69  *	Job_Make		Start the creation of the given target.
70  *
71  *	Job_Init		Called to initialize this module.
72  *
73  *	Job_Begin		execute commands attached to the .BEGIN target
74  *				if any.
75  *
76  *	can_start_job		Return true if we can start job
77  *
78  *	Job_Empty		Return true if the job table is completely
79  *				empty.
80  *
81  *	Job_Finish		Perform any final processing which needs doing.
82  *				This includes the execution of any commands
83  *				which have been/were attached to the .END
84  *				target.
85  *
86  *	Job_AbortAll		Abort all current jobs. It doesn't
87  *				handle output or do anything for the jobs,
88  *				just kills them.
89  *
90  *	Job_Wait		Wait for all running jobs to finish.
91  */
92 
93 #include <sys/types.h>
94 #include <sys/wait.h>
95 #include <ctype.h>
96 #include <errno.h>
97 #include <fcntl.h>
98 #include <signal.h>
99 #include <stdarg.h>
100 #include <stdio.h>
101 #include <stdlib.h>
102 #include <string.h>
103 #include <unistd.h>
104 #include "config.h"
105 #include "defines.h"
106 #include "job.h"
107 #include "engine.h"
108 #include "pathnames.h"
109 #include "var.h"
110 #include "targ.h"
111 #include "error.h"
112 #include "extern.h"
113 #include "lst.h"
114 #include "gnode.h"
115 #include "memory.h"
116 #include "make.h"
117 #include "buf.h"
118 
119 static int	aborting = 0;	    /* why is the make aborting? */
120 #define ABORT_ERROR	1	    /* Because of an error */
121 #define ABORT_INTERRUPT 2	    /* Because it was interrupted */
122 #define ABORT_WAIT	3	    /* Waiting for jobs to finish */
123 
124 static int	maxJobs;	/* The most children we can run at once */
125 static int	nJobs;		/* Number of jobs already allocated */
126 static bool	no_new_jobs;	/* Mark recursive shit so we shouldn't start
127 				 * something else at the same time
128 				 */
129 Job *runningJobs;		/* Jobs currently running a process */
130 Job *errorJobs;			/* Jobs in error at end */
131 static Job *heldJobs;		/* Jobs not running yet because of expensive */
132 static pid_t mypid;		/* Used for printing debugging messages */
133 
134 static volatile sig_atomic_t got_fatal;
135 
136 static volatile sig_atomic_t got_SIGINT, got_SIGHUP, got_SIGQUIT, got_SIGTERM,
137     got_SIGINFO;
138 
139 static sigset_t sigset, emptyset;
140 
141 static void handle_fatal_signal(int);
142 static void handle_siginfo(void);
143 static void postprocess_job(Job *, bool);
144 static Job *prepare_job(GNode *);
145 static void determine_job_next_step(Job *);
146 static void remove_job(Job *, bool);
147 static void may_continue_job(Job *);
148 static void continue_job(Job *);
149 static Job *reap_finished_job(pid_t);
150 static bool reap_jobs(void);
151 
152 static void loop_handle_running_jobs(void);
153 static bool expensive_job(Job *);
154 static bool expensive_command(const char *);
155 static void setup_signal(int);
156 static void notice_signal(int);
157 static void setup_all_signals(void);
158 static const char *really_kill(Job *, int);
159 static void kill_with_sudo_maybe(pid_t, int, const char *);
160 static void debug_kill_printf(const char *, ...);
161 static void debug_vprintf(const char *, va_list);
162 static void may_remove_target(Job *);
163 static const char *really_kill(Job *, int);
164 static void print_error(Job *);
165 static void internal_print_errors(void);
166 
167 static int dying_signal = 0;
168 
169 const char *	basedirectory = NULL;
170 
171 static void
172 kill_with_sudo_maybe(pid_t pid, int signo, const char *p)
173 {
174 	char buf[32]; /* largely enough */
175 
176 	for (;*p != '\0'; p++) {
177 		if (*p != 's')
178 			continue;
179 		if (p[1] != 'u')
180 			continue;
181 		p++;
182 		if (p[1] != 'd')
183 			continue;
184 		p++;
185 		if (p[1] != 'o')
186 			continue;
187 		snprintf(buf, sizeof buf, "sudo -n /bin/kill -%d %ld",
188 		    signo, (long)pid);
189 		debug_kill_printf("trying to kill with %s", buf);
190 		system(buf);
191 		return;
192 	}
193 
194 }
195 
196 static const char *
197 really_kill(Job *job, int signo)
198 {
199 	pid_t pid = job->pid;
200 	if (getpgid(pid) != getpgrp()) {
201 		if (killpg(pid, signo) == 0)
202 			return "group got signal";
203 		pid = -pid;
204 	} else {
205 		if (kill(pid, signo) == 0)
206 			return "process got signal";
207 	}
208 	if (errno == ESRCH) {
209 		job->flags |= JOB_LOST;
210 		return "not found";
211 	} else if (errno == EPERM) {
212 		kill_with_sudo_maybe(pid, signo, job->cmd);
213 		return "";
214 	} else
215 		return "should not happen";
216 }
217 
218 static void
219 may_remove_target(Job *j)
220 {
221 	int dying = check_dying_signal();
222 
223 	if (dying && !noExecute && !Targ_Precious(j->node)) {
224 		const char *file = Var(TARGET_INDEX, j->node);
225 		int r = eunlink(file);
226 
227 		if (DEBUG(JOB) && r == -1)
228 			fprintf(stderr, " *** would unlink %s\n", file);
229 		if (r != -1)
230 			fprintf(stderr, " *** %s removed\n", file);
231 	}
232 }
233 
234 static void
235 buf_addcurdir(BUFFER *buf)
236 {
237 	const char *v = Var_Value(".CURDIR");
238 	if (basedirectory != NULL) {
239 		size_t len = strlen(basedirectory);
240 		if (strncmp(basedirectory, v, len) == 0 &&
241 		    v[len] == '/') {
242 			v += len+1;
243 		} else if (strcmp(basedirectory, v) == 0) {
244 			Buf_AddString(buf, ".");
245 			return;
246 		}
247 	}
248 	Buf_AddString(buf, v);
249 }
250 
251 static const char *
252 shortened_curdir(void)
253 {
254 	static BUFFER buf;
255 	bool first = true;
256 	if (first) {
257 		Buf_Init(&buf, 0);
258 		buf_addcurdir(&buf);
259 		first = false;
260 	}
261 	return Buf_Retrieve(&buf);
262 }
263 
264 static void
265 quick_error(Job *j, int signo, bool first)
266 {
267 	if (first) {
268 		fprintf(stderr, "*** Signal SIG%s", sys_signame[signo]);
269 		fprintf(stderr, " in %s (", shortened_curdir());
270 	} else
271 		fprintf(stderr, " ");
272 
273 	fprintf(stderr, "%s", j->node->name);
274 	free(j->cmd);
275 }
276 
277 static void
278 print_error(Job *j)
279 {
280 	static bool first = true;
281 	BUFFER buf;
282 
283 	Buf_Init(&buf, 0);
284 
285 	if (j->exit_type == JOB_EXIT_BAD)
286 		Buf_printf(&buf, "*** Error %d", j->code);
287 	else if (j->exit_type == JOB_SIGNALED) {
288 		if (j->code < NSIG)
289 			Buf_printf(&buf, "*** Signal SIG%s",
290 			    sys_signame[j->code]);
291 		else
292 			Buf_printf(&buf, "*** unknown signal %d", j->code);
293 	} else
294 		Buf_printf(&buf, "*** Should not happen %d/%d",
295 		    j->exit_type, j->code);
296 	if (DEBUG(KILL) && (j->flags & JOB_LOST))
297 		Buf_AddChar(&buf, '!');
298 	if (first) {
299 		Buf_AddString(&buf, " in ");
300 		buf_addcurdir(&buf);
301 		first = false;
302 	}
303 	Buf_printf(&buf, " (%s:%lu", j->location->fname, j->location->lineno);
304 	Buf_printf(&buf, " '%s'", j->node->name);
305 	if ((j->flags & (JOB_SILENT | JOB_IS_EXPENSIVE)) == JOB_SILENT
306 	    && Buf_Size(&buf) < 140-2) {
307 		size_t len = strlen(j->cmd);
308 		Buf_AddString(&buf, ": ");
309 		if (len + Buf_Size(&buf) < 140)
310 			Buf_AddString(&buf, j->cmd);
311 		else {
312 			Buf_AddChars(&buf, 140 - Buf_Size(&buf), j->cmd);
313 			Buf_AddString(&buf, "...");
314 		}
315 	}
316 	fprintf(stderr, "%s)\n", Buf_Retrieve(&buf));
317 	Buf_Destroy(&buf);
318 	free(j->cmd);
319 }
320 static void
321 quick_summary(int signo)
322 {
323 	Job *j, *k, *jnext;
324 	bool first = true;
325 
326 	k = errorJobs;
327 	errorJobs = NULL;
328 	for (j = k; j != NULL; j = jnext) {
329 		jnext = j->next;
330 		if ((j->exit_type == JOB_EXIT_BAD && j->code == signo+128) ||
331 		    (j->exit_type == JOB_SIGNALED && j->code == signo)) {
332 			quick_error(j, signo, first);
333 			first = false;
334 		} else {
335 			j->next = errorJobs;
336 			errorJobs = j;
337 		}
338 	}
339 	if (!first)
340 		fprintf(stderr, ")\n");
341 }
342 
343 static void
344 internal_print_errors()
345 {
346 	Job *j, *k, *jnext;
347 	int dying;
348 
349 	if (!errorJobs)
350 		fprintf(stderr, "Stop in %s\n", shortened_curdir());
351 
352 	for (j = errorJobs; j != NULL; j = j->next)
353 		may_remove_target(j);
354 	dying = check_dying_signal();
355 	if (dying)
356 		quick_summary(dying);
357 	while (errorJobs != NULL) {
358 		k = errorJobs;
359 		errorJobs = NULL;
360 		for (j = k; j != NULL; j = jnext) {
361 			jnext = j->next;
362 			if (j->location->fname == k->location->fname)
363 				print_error(j);
364 			else {
365 				j->next = errorJobs;
366 				errorJobs = j;
367 			}
368 		}
369 	}
370 }
371 
372 void
373 print_errors(void)
374 {
375 	handle_all_signals();
376 	internal_print_errors();
377 }
378 
379 static void
380 setup_signal(int sig)
381 {
382 	if (signal(sig, SIG_IGN) != SIG_IGN) {
383 		(void)signal(sig, notice_signal);
384 		sigaddset(&sigset, sig);
385 	}
386 }
387 
388 static void
389 notice_signal(int sig)
390 {
391 
392 	switch(sig) {
393 	case SIGINT:
394 		got_SIGINT++;
395 		got_fatal = 1;
396 		break;
397 	case SIGHUP:
398 		got_SIGHUP++;
399 		got_fatal = 1;
400 		break;
401 	case SIGQUIT:
402 		got_SIGQUIT++;
403 		got_fatal = 1;
404 		break;
405 	case SIGTERM:
406 		got_SIGTERM++;
407 		got_fatal = 1;
408 		break;
409 	case SIGINFO:
410 		got_SIGINFO++;
411 		break;
412 	case SIGCHLD:
413 		break;
414 	}
415 }
416 
417 static void
418 setup_all_signals(void)
419 {
420 	sigemptyset(&sigset);
421 	sigemptyset(&emptyset);
422 	/*
423 	 * Catch the four signals that POSIX specifies if they aren't ignored.
424 	 * handle_signal will take care of calling JobInterrupt if appropriate.
425 	 */
426 	setup_signal(SIGINT);
427 	setup_signal(SIGHUP);
428 	setup_signal(SIGQUIT);
429 	setup_signal(SIGTERM);
430 	/* Display running jobs on SIGINFO */
431 	setup_signal(SIGINFO);
432 	/* Have to see SIGCHLD */
433 	setup_signal(SIGCHLD);
434 	got_fatal = 0;
435 }
436 
437 static void
438 handle_siginfo(void)
439 {
440 	static BUFFER buf;
441 	static size_t length = 0;
442 
443 	Job *job;
444 	bool first = true;
445 
446 	got_SIGINFO = 0;
447 	/* we have to store the info in a buffer, because status from all
448 	 * makes running would get intermixed otherwise
449 	 */
450 
451 	if (length == 0) {
452 		Buf_Init(&buf, 0);
453 		Buf_printf(&buf, "%s in ", Var_Value("MAKE"));
454 		buf_addcurdir(&buf);
455 		Buf_AddString(&buf, ": ");
456 		length = Buf_Size(&buf);
457 	} else
458 		Buf_Truncate(&buf, length);
459 
460 	for (job = runningJobs; job != NULL ; job = job->next) {
461 		if (!first)
462 			Buf_puts(&buf, ", ");
463 		first = false;
464 		Buf_puts(&buf, job->node->name);
465 	}
466 	Buf_puts(&buf, first ? "nothing running\n" : "\n");
467 
468 	fputs(Buf_Retrieve(&buf), stderr);
469 }
470 
471 int
472 check_dying_signal(void)
473 {
474 	sigset_t set;
475 	if (dying_signal)
476 		return dying_signal;
477 	sigpending(&set);
478 	if (got_SIGINT || sigismember(&set, SIGINT))
479 		return dying_signal = SIGINT;
480 	if (got_SIGHUP || sigismember(&set, SIGHUP))
481 		return dying_signal = SIGHUP;
482 	if (got_SIGQUIT || sigismember(&set, SIGQUIT))
483 		return dying_signal = SIGQUIT;
484 	if (got_SIGTERM || sigismember(&set, SIGTERM))
485 		return dying_signal = SIGTERM;
486 	return 0;
487 }
488 
489 void
490 handle_all_signals(void)
491 {
492 	if (got_SIGINFO)
493 		handle_siginfo();
494 	while (got_fatal) {
495 		got_fatal = 0;
496 		aborting = ABORT_INTERRUPT;
497 
498 		if (got_SIGINT) {
499 			got_SIGINT=0;
500 			handle_fatal_signal(SIGINT);
501 		}
502 		if (got_SIGHUP) {
503 			got_SIGHUP=0;
504 			handle_fatal_signal(SIGHUP);
505 		}
506 		if (got_SIGQUIT) {
507 			got_SIGQUIT=0;
508 			handle_fatal_signal(SIGQUIT);
509 		}
510 		if (got_SIGTERM) {
511 			got_SIGTERM=0;
512 			handle_fatal_signal(SIGTERM);
513 		}
514 	}
515 }
516 
517 static void
518 debug_vprintf(const char *fmt, va_list va)
519 {
520 	(void)printf("[%ld] ", (long)mypid);
521 	(void)vprintf(fmt, va);
522 	fflush(stdout);
523 }
524 
525 void
526 debug_job_printf(const char *fmt, ...)
527 {
528 	if (DEBUG(JOB)) {
529 		va_list va;
530 		va_start(va, fmt);
531 		debug_vprintf(fmt, va);
532 		va_end(va);
533 	}
534 }
535 
536 static void
537 debug_kill_printf(const char *fmt, ...)
538 {
539 	if (DEBUG(KILL)) {
540 		va_list va;
541 		va_start(va, fmt);
542 		debug_vprintf(fmt, va);
543 		va_end(va);
544 	}
545 }
546 
547 /*-
548  *-----------------------------------------------------------------------
549  * postprocess_job  --
550  *	Do final processing for the given job including updating
551  *	parents and starting new jobs as available/necessary.
552  *
553  * Side Effects:
554  *	If we got an error and are aborting (aborting == ABORT_ERROR) and
555  *	the job list is now empty, we are done for the day.
556  *	If we recognized an error we set the aborting flag
557  *	to ABORT_ERROR so no more jobs will be started.
558  *-----------------------------------------------------------------------
559  */
560 /*ARGSUSED*/
561 
562 static void
563 postprocess_job(Job *job, bool okay)
564 {
565 	if (okay &&
566 	    aborting != ABORT_ERROR &&
567 	    aborting != ABORT_INTERRUPT) {
568 		/* As long as we aren't aborting and the job didn't return a
569 		 * non-zero status that we shouldn't ignore, we call
570 		 * Make_Update to update the parents. */
571 		job->node->built_status = MADE;
572 		Make_Update(job->node);
573 		free(job);
574 	}
575 
576 	if (errorJobs != NULL && !keepgoing &&
577 	    aborting != ABORT_INTERRUPT)
578 		aborting = ABORT_ERROR;
579 
580 	if (aborting == ABORT_ERROR && DEBUG(QUICKDEATH))
581 		handle_fatal_signal(SIGINT);
582 	if (aborting == ABORT_ERROR && Job_Empty())
583 		Finish();
584 }
585 
586 /* expensive jobs handling: in order to avoid forking an exponential number
587  * of jobs, make tries to figure out "recursive make" configurations.
588  * It may err on the side of caution.
589  * Basically, a command is "expensive" if it's likely to fork an extra
590  * level of make: either by looking at the command proper, or if it has
591  * some specific qualities ('+cmd' are likely to be recursive, as are
592  * .MAKE: commands).  It's possible to explicitly say some targets are
593  * expensive or cheap with .EXPENSIVE or .CHEAP.
594  *
595  * While an expensive command is running, no_new_jobs
596  * is set, so jobs that would fork new processes are accumulated in the
597  * heldJobs list instead.
598  *
599  * This heuristics is also used on error exit: we display silent commands
600  * that failed, unless those ARE expensive commands: expensive commands
601  * are likely to not be failing by themselves, but to be the result of
602  * a cascade of failures in descendant makes.
603  */
604 void
605 determine_expensive_job(Job *job)
606 {
607 	if (expensive_job(job)) {
608 		job->flags |= JOB_IS_EXPENSIVE;
609 		no_new_jobs = true;
610 	} else
611 		job->flags &= ~JOB_IS_EXPENSIVE;
612 	if (DEBUG(EXPENSIVE))
613 		fprintf(stderr, "[%ld] Target %s running %.50s: %s\n",
614 		    (long)mypid, job->node->name, job->cmd,
615 		    job->flags & JOB_IS_EXPENSIVE ? "expensive" : "cheap");
616 }
617 
618 static bool
619 expensive_job(Job *job)
620 {
621 	if (job->node->type & OP_CHEAP)
622 		return false;
623 	if (job->node->type & (OP_EXPENSIVE | OP_MAKE))
624 		return true;
625 	return expensive_command(job->cmd);
626 }
627 
628 static bool
629 expensive_command(const char *s)
630 {
631 	const char *p;
632 	bool include = false;
633 	bool expensive = false;
634 
635 	/* okay, comments are cheap, always */
636 	if (*s == '#')
637 		return false;
638 	/* and commands we always execute are expensive */
639 	if (*s == '+')
640 		return true;
641 
642 	for (p = s; *p != '\0'; p++) {
643 		if (*p == ' ' || *p == '\t') {
644 			include = false;
645 			if (p[1] == '-' && p[2] == 'I')
646 				include = true;
647 		}
648 		if (include)
649 			continue;
650 		/* KMP variant, avoid looking twice at the same
651 		 * letter.
652 		 */
653 		if (*p != 'm')
654 			continue;
655 		if (p[1] != 'a')
656 			continue;
657 		p++;
658 		if (p[1] != 'k')
659 			continue;
660 		p++;
661 		if (p[1] != 'e')
662 			continue;
663 		p++;
664 		expensive = true;
665 		while (p[1] != '\0' && p[1] != ' ' && p[1] != '\t') {
666 			if (p[1] == '.' || p[1] == '/') {
667 				expensive = false;
668 				break;
669 			}
670 		    	p++;
671 		}
672 		if (expensive)
673 			return true;
674 	}
675 	return false;
676 }
677 
678 static Job *
679 prepare_job(GNode *gn)
680 {
681 	/* a new job is prepared unless its commands are bogus (we don't
682 	 * have anything for it), or if we're in touch mode.
683 	 *
684 	 * Note that even in noexec mode, some commands may still run
685 	 * thanks to the +cmd construct.
686 	 */
687 	if (node_find_valid_commands(gn)) {
688 		if (touchFlag) {
689 			Job_Touch(gn);
690 			return NULL;
691 		} else {
692 			Job *job;
693 
694 			job = emalloc(sizeof(Job));
695 			if (job == NULL)
696 				Punt("can't create job: out of memory");
697 
698 			job_attach_node(job, gn);
699 			return job;
700 		}
701 	} else {
702 		node_failure(gn);
703 		return NULL;
704 	}
705 }
706 
707 static void
708 may_continue_job(Job *job)
709 {
710 	if (no_new_jobs) {
711 		if (DEBUG(EXPENSIVE))
712 			fprintf(stderr, "[%ld] expensive -> hold %s\n",
713 			    (long)mypid, job->node->name);
714 		job->next = heldJobs;
715 		heldJobs = job;
716 	} else
717 		continue_job(job);
718 }
719 
720 static void
721 continue_job(Job *job)
722 {
723 	bool finished = job_run_next(job);
724 	if (finished)
725 		remove_job(job, true);
726 	else
727 		determine_expensive_job(job);
728 }
729 
730 /*-
731  *-----------------------------------------------------------------------
732  * Job_Make  --
733  *	Start a target-creation process going for the target described
734  *	by the graph node gn.
735  *
736  * Side Effects:
737  *	A new Job node is created and  its commands continued, which
738  *	may fork the first command of that job.
739  *-----------------------------------------------------------------------
740  */
741 void
742 Job_Make(GNode *gn)
743 {
744 	Job *job;
745 
746 	job = prepare_job(gn);
747 	if (!job)
748 		return;
749 	nJobs++;
750 	may_continue_job(job);
751 }
752 
753 static void
754 determine_job_next_step(Job *job)
755 {
756 	bool okay;
757 	if (job->flags & JOB_IS_EXPENSIVE) {
758 		no_new_jobs = false;
759 		if (DEBUG(EXPENSIVE))
760 			fprintf(stderr, "[%ld] "
761 			    "Returning from expensive target %s, "
762 			    "allowing new jobs\n", (long)mypid,
763 			    job->node->name);
764 	}
765 
766 	okay = job->exit_type == JOB_EXIT_OKAY;
767 	if (!okay || job->next_cmd == NULL)
768 		remove_job(job, okay);
769 	else
770 		may_continue_job(job);
771 }
772 
773 static void
774 remove_job(Job *job, bool okay)
775 {
776 	nJobs--;
777 	postprocess_job(job, okay);
778 	while (!no_new_jobs) {
779 		if (heldJobs != NULL) {
780 			job = heldJobs;
781 			heldJobs = heldJobs->next;
782 			if (DEBUG(EXPENSIVE))
783 				fprintf(stderr, "[%ld] cheap -> release %s\n",
784 				    (long)mypid, job->node->name);
785 			continue_job(job);
786 		} else
787 			break;
788 	}
789 }
790 
791 /*
792  * job = reap_finished_job(pid):
793  * 	retrieve and remove a job from runningJobs, based on its pid
794  *
795  *	Note that we remove it right away, so that handle_signals()
796  *	is accurate.
797  */
798 static Job *
799 reap_finished_job(pid_t pid)
800 {
801 	Job **j, *job;
802 
803 	for (j = &runningJobs; *j != NULL; j = &((*j)->next))
804 		if ((*j)->pid == pid) {
805 			job = *j;
806 			*j = job->next;
807 			return job;
808 		}
809 
810 	return NULL;
811 }
812 
813 /*
814  * classic waitpid handler: retrieve as many dead children as possible.
815  * returns true if succesful
816  */
817 static bool
818 reap_jobs(void)
819 {
820  	pid_t pid;	/* pid of dead child */
821  	int status;	/* Exit/termination status */
822 	bool reaped = false;
823 	Job *job;
824 
825 	while ((pid = waitpid(WAIT_ANY, &status, WNOHANG)) > 0) {
826 		reaped = true;
827 		job = reap_finished_job(pid);
828 
829 		if (job == NULL) {
830 			Punt("Child (%ld) not in table?", (long)pid);
831 		} else {
832 			job_handle_status(job, status);
833 			determine_job_next_step(job);
834 		}
835 	}
836 	/* sanity check, should not happen */
837 	if (pid == -1 && errno == ECHILD && runningJobs != NULL)
838 		Punt("Process has no children, but runningJobs is not empty ?");
839 	return reaped;
840 }
841 
842 void
843 handle_running_jobs(void)
844 {
845 	sigset_t old;
846 	/* reaping children in the presence of caught signals */
847 
848 	/* first, we make sure to hold on new signals, to synchronize
849 	 * reception of new stuff on sigsuspend
850 	 */
851 	sigprocmask(SIG_BLOCK, &sigset, &old);
852 	/* note this will NOT loop until runningJobs == NULL.
853 	 * It's merely an optimisation, namely that we don't need to go
854 	 * through the logic if no job is present. As soon as a job
855 	 * gets reaped, we WILL exit the loop through the break.
856 	 */
857 	while (runningJobs != NULL) {
858 		/* did we already have pending stuff that advances things ?
859 		 * then handle_all_signals() will not return
860 		 * or reap_jobs() will reap_jobs()
861 		 */
862 		handle_all_signals();
863 		if (reap_jobs())
864 			break;
865 		/* okay, so it's safe to suspend, we have nothing to do but
866 		 * wait...
867 		 */
868 		sigsuspend(&emptyset);
869 	}
870 	sigprocmask(SIG_SETMASK, &old, NULL);
871 }
872 
873 void
874 handle_one_job(Job *job)
875 {
876 	int stat;
877 	int status;
878 	sigset_t old;
879 
880 	sigprocmask(SIG_BLOCK, &sigset, &old);
881 	while (1) {
882 		handle_all_signals();
883 		stat = waitpid(job->pid, &status, WNOHANG);
884 		if (stat == job->pid)
885 			break;
886 		sigsuspend(&emptyset);
887 	}
888 	runningJobs = NULL;
889 	job_handle_status(job, status);
890 	sigprocmask(SIG_SETMASK, &old, NULL);
891 }
892 
893 static void
894 loop_handle_running_jobs()
895 {
896 	while (runningJobs != NULL)
897 		handle_running_jobs();
898 }
899 
900 void
901 Job_Init(int maxproc)
902 {
903 	runningJobs = NULL;
904 	heldJobs = NULL;
905 	errorJobs = NULL;
906 	maxJobs = maxproc;
907 	mypid = getpid();
908 
909 	nJobs = 0;
910 
911 	aborting = 0;
912 	setup_all_signals();
913 }
914 
915 bool
916 can_start_job(void)
917 {
918 	if (aborting || nJobs >= maxJobs)
919 		return false;
920 	else
921 		return true;
922 }
923 
924 bool
925 Job_Empty(void)
926 {
927 	return runningJobs == NULL;
928 }
929 
930 /*-
931  *-----------------------------------------------------------------------
932  * handle_fatal_signal --
933  *	Handle the receipt of a fatal interrupt
934  *
935  * Side Effects:
936  *	All children are killed. Another job may be started if there
937  *	is an interrupt target and the signal was SIGINT.
938  *-----------------------------------------------------------------------
939  */
940 static void
941 handle_fatal_signal(int signo)
942 {
943 	Job *job;
944 
945 	debug_kill_printf("handle_fatal_signal(%d) called.\n", signo);
946 
947 	dying_signal = signo;
948 	for (job = runningJobs; job != NULL; job = job->next) {
949 		debug_kill_printf("passing to "
950 		    "child %ld running %s: %s\n", (long)job->pid,
951 		    job->node->name, really_kill(job, signo));
952 		may_remove_target(job);
953 	}
954 
955 	if (signo == SIGINT && !touchFlag) {
956 		if ((interrupt_node->type & OP_DUMMY) == 0) {
957 			ignoreErrors = false;
958 
959 			Job_Make(interrupt_node);
960 		}
961 	}
962 	loop_handle_running_jobs();
963 	internal_print_errors();
964 
965 	/* die by that signal */
966 	sigprocmask(SIG_BLOCK, &sigset, NULL);
967 	signal(signo, SIG_DFL);
968 	kill(getpid(), signo);
969 	sigprocmask(SIG_SETMASK, &emptyset, NULL);
970 	/*NOTREACHED*/
971 	fprintf(stderr, "This should never happen\n");
972 	exit(1);
973 }
974 
975 /*
976  *-----------------------------------------------------------------------
977  * Job_Finish --
978  *	Do final processing such as the running of the commands
979  *	attached to the .END target.
980  *
981  *	return true if fatal errors have happened.
982  *-----------------------------------------------------------------------
983  */
984 bool
985 Job_Finish(void)
986 {
987 	bool problem = errorJobs != NULL;
988 
989 	if ((end_node->type & OP_DUMMY) == 0) {
990 		if (problem) {
991 			Error("Errors reported so .END ignored");
992 		} else {
993 			Job_Make(end_node);
994 			loop_handle_running_jobs();
995 		}
996 	}
997 	return problem;
998 }
999 
1000 void
1001 Job_Begin(void)
1002 {
1003 	if ((begin_node->type & OP_DUMMY) == 0) {
1004 		Job_Make(begin_node);
1005 		loop_handle_running_jobs();
1006 	}
1007 }
1008 
1009 /*-
1010  *-----------------------------------------------------------------------
1011  * Job_Wait --
1012  *	Waits for all running jobs to finish and returns. Sets 'aborting'
1013  *	to ABORT_WAIT to prevent other jobs from starting.
1014  *
1015  * Side Effects:
1016  *	Currently running jobs finish.
1017  *
1018  *-----------------------------------------------------------------------
1019  */
1020 void
1021 Job_Wait(void)
1022 {
1023 	aborting = ABORT_WAIT;
1024 	loop_handle_running_jobs();
1025 	aborting = 0;
1026 }
1027 
1028 /*-
1029  *-----------------------------------------------------------------------
1030  * Job_AbortAll --
1031  *	Abort all currently running jobs without handling output or anything.
1032  *	This function is to be called only in the event of a major
1033  *	error.
1034  *
1035  * Side Effects:
1036  *	All children are killed
1037  *-----------------------------------------------------------------------
1038  */
1039 void
1040 Job_AbortAll(void)
1041 {
1042 	Job *job;	/* the job descriptor in that element */
1043 	int foo;
1044 
1045 	aborting = ABORT_ERROR;
1046 
1047 	for (job = runningJobs; job != NULL; job = job->next) {
1048 		killpg(job->pid, SIGINT);
1049 		killpg(job->pid, SIGKILL);
1050 	}
1051 
1052 	/*
1053 	 * Catch as many children as want to report in at first, then give up
1054 	 */
1055 	while (waitpid(WAIT_ANY, &foo, WNOHANG) > 0)
1056 		continue;
1057 }
1058