xref: /openbsd-src/usr.bin/make/job.c (revision fc405d53b73a2d73393cb97f684863d17b583e38)
1 /*	$OpenBSD: job.c,v 1.164 2023/03/08 04:43:11 guenther Exp $	*/
2 /*	$NetBSD: job.c,v 1.16 1996/11/06 17:59:08 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 2012 Marc Espie.
6  *
7  * Extensive code modifications for the OpenBSD project.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE OPENBSD PROJECT AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OPENBSD
22  * PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 /*
31  * Copyright (c) 1988, 1989, 1990 The Regents of the University of California.
32  * Copyright (c) 1988, 1989 by Adam de Boor
33  * Copyright (c) 1989 by Berkeley Softworks
34  * All rights reserved.
35  *
36  * This code is derived from software contributed to Berkeley by
37  * Adam de Boor.
38  *
39  * Redistribution and use in source and binary forms, with or without
40  * modification, are permitted provided that the following conditions
41  * are met:
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. Neither the name of the University nor the names of its contributors
48  *    may be used to endorse or promote products derived from this software
49  *    without specific prior written permission.
50  *
51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE.
62  */
63 
64 /*-
65  * job.c --
66  *	handle the creation etc. of our child processes.
67  *
68  * Interface:
69  *	Job_Make		Start the creation of the given target.
70  *
71  *	Job_Init		Called to initialize this module.
72  *
73  *	can_start_job		Return true if we can start job
74  *
75  *	Job_Empty		Return true if the job table is completely
76  *				empty.
77  *
78  *	Job_AbortAll		Abort all current jobs. It doesn't
79  *				handle output or do anything for the jobs,
80  *				just kills them.
81  *
82  *	Job_Wait		Wait for all running jobs to finish.
83  */
84 
85 #include <sys/types.h>
86 #include <sys/wait.h>
87 #include <ctype.h>
88 #include <errno.h>
89 #include <fcntl.h>
90 #include <signal.h>
91 #include <stdarg.h>
92 #include <stdio.h>
93 #include <stdlib.h>
94 #include <string.h>
95 #include <unistd.h>
96 #include "config.h"
97 #include "defines.h"
98 #include "job.h"
99 #include "engine.h"
100 #include "pathnames.h"
101 #include "var.h"
102 #include "targ.h"
103 #include "error.h"
104 #include "extern.h"
105 #include "lst.h"
106 #include "gnode.h"
107 #include "memory.h"
108 #include "buf.h"
109 #include "enginechoice.h"
110 
111 static int	aborting = 0;	    /* why is the make aborting? */
112 #define ABORT_ERROR	1	    /* Because of an error */
113 #define ABORT_INTERRUPT 2	    /* Because it was interrupted */
114 #define ABORT_WAIT	3	    /* Waiting for jobs to finish */
115 
116 static bool	no_new_jobs;	/* Mark recursive shit so we shouldn't start
117 				 * something else at the same time
118 				 */
119 bool sequential;
120 Job *runningJobs;		/* Jobs currently running a process */
121 Job *errorJobs;			/* Jobs in error at end */
122 Job *availableJobs;		/* Pool of available jobs */
123 static Job *heldJobs;		/* Jobs not running yet because of expensive */
124 static pid_t mypid;		/* Used for printing debugging messages */
125 static Job *extra_job;		/* Needed for .INTERRUPT */
126 
127 static volatile sig_atomic_t got_fatal;
128 
129 static volatile sig_atomic_t got_SIGINT, got_SIGHUP, got_SIGQUIT, got_SIGTERM,
130     got_SIGINFO;
131 
132 static sigset_t sigset, emptyset, origset;
133 
134 static void handle_fatal_signal(int);
135 static void handle_siginfo(void);
136 static void postprocess_job(Job *);
137 static void determine_job_next_step(Job *);
138 static void may_continue_job(Job *);
139 static Job *reap_finished_job(pid_t);
140 static bool reap_jobs(void);
141 static void may_continue_heldback_jobs(void);
142 
143 static bool expensive_job(Job *);
144 static bool expensive_command(const char *);
145 static void setup_signal(int);
146 static void notice_signal(int);
147 static void setup_all_signals(void);
148 static const char *really_kill(Job *, int);
149 static void debug_kill_printf(const char *, ...);
150 static void debug_vprintf(const char *, va_list);
151 static void may_remove_target(Job *);
152 static void print_error(Job *);
153 static void internal_print_errors(void);
154 
155 static int dying_signal = 0;
156 
157 const char *	basedirectory = NULL;
158 
159 static const char *
160 really_kill(Job *job, int signo)
161 {
162 	pid_t pid = job->pid;
163 	if (getpgid(pid) != getpgrp()) {
164 		if (killpg(pid, signo) == 0)
165 			return "group got signal";
166 	} else {
167 		if (kill(pid, signo) == 0)
168 			return "process got signal";
169 	}
170 	if (errno == ESRCH)
171 		job->flags |= JOB_LOST;
172 	return strerror(errno);
173 }
174 
175 static void
176 may_remove_target(Job *j)
177 {
178 	int dying = check_dying_signal();
179 
180 	if (dying && !noExecute && !Targ_Precious(j->node)) {
181 		const char *file = Var(TARGET_INDEX, j->node);
182 		int r = eunlink(file);
183 
184 		if (DEBUG(JOB) && r == -1)
185 			fprintf(stderr, " *** would unlink %s\n", file);
186 		if (r != -1)
187 			fprintf(stderr, " *** %s removed\n", file);
188 	}
189 }
190 
191 static void
192 buf_addcurdir(BUFFER *buf)
193 {
194 	const char *v = Var_Value(".CURDIR");
195 	if (basedirectory != NULL) {
196 		size_t len = strlen(basedirectory);
197 		if (strncmp(basedirectory, v, len) == 0 &&
198 		    v[len] == '/') {
199 			v += len+1;
200 		} else if (strcmp(basedirectory, v) == 0) {
201 			Buf_AddString(buf, ".");
202 			return;
203 		}
204 	}
205 	Buf_AddString(buf, v);
206 }
207 
208 static const char *
209 shortened_curdir(void)
210 {
211 	static BUFFER buf;
212 	static bool first = true;
213 	if (first) {
214 		Buf_Init(&buf, 0);
215 		buf_addcurdir(&buf);
216 		first = false;
217 	}
218 	return Buf_Retrieve(&buf);
219 }
220 
221 static void
222 quick_error(Job *j, int signo, bool first)
223 {
224 	if (first) {
225 		fprintf(stderr, "*** Signal SIG%s", sys_signame[signo]);
226 		fprintf(stderr, " in %s (", shortened_curdir());
227 	} else
228 		fprintf(stderr, " ");
229 
230 	fprintf(stderr, "%s", j->node->name);
231 	free(j->cmd);
232 }
233 
234 static void
235 print_error(Job *j)
236 {
237 	static bool first = true;
238 	BUFFER buf;
239 
240 	Buf_Init(&buf, 0);
241 
242 	if (j->exit_type == JOB_EXIT_BAD)
243 		Buf_printf(&buf, "*** Error %d", j->code);
244 	else if (j->exit_type == JOB_SIGNALED) {
245 		if (j->code < NSIG)
246 			Buf_printf(&buf, "*** Signal SIG%s",
247 			    sys_signame[j->code]);
248 		else
249 			Buf_printf(&buf, "*** unknown signal %d", j->code);
250 	} else
251 		Buf_printf(&buf, "*** Should not happen %d/%d",
252 		    j->exit_type, j->code);
253 	if (DEBUG(KILL) && (j->flags & JOB_LOST))
254 		Buf_AddChar(&buf, '!');
255 	if (first) {
256 		Buf_AddString(&buf, " in ");
257 		buf_addcurdir(&buf);
258 		first = false;
259 	}
260 	Buf_printf(&buf, " (%s:%lu", j->location->fname, j->location->lineno);
261 	Buf_printf(&buf, " '%s'", j->node->name);
262 	if ((j->flags & (JOB_SILENT | JOB_IS_EXPENSIVE)) == JOB_SILENT
263 	    && Buf_Size(&buf) < 140-2) {
264 		size_t len = strlen(j->cmd);
265 		Buf_AddString(&buf, ": ");
266 		if (len + Buf_Size(&buf) < 140)
267 			Buf_AddString(&buf, j->cmd);
268 		else {
269 			Buf_AddChars(&buf, 140 - Buf_Size(&buf), j->cmd);
270 			Buf_AddString(&buf, "...");
271 		}
272 	}
273 	fprintf(stderr, "%s)\n", Buf_Retrieve(&buf));
274 	Buf_Destroy(&buf);
275 	free(j->cmd);
276 }
277 static void
278 quick_summary(int signo)
279 {
280 	Job *j, *k, *jnext;
281 	bool first = true;
282 
283 	k = errorJobs;
284 	errorJobs = NULL;
285 	for (j = k; j != NULL; j = jnext) {
286 		jnext = j->next;
287 		if ((j->exit_type == JOB_EXIT_BAD && j->code == signo+128) ||
288 		    (j->exit_type == JOB_SIGNALED && j->code == signo)) {
289 			quick_error(j, signo, first);
290 			first = false;
291 		} else {
292 			j->next = errorJobs;
293 			errorJobs = j;
294 		}
295 	}
296 	if (!first)
297 		fprintf(stderr, ")\n");
298 }
299 
300 static void
301 internal_print_errors()
302 {
303 	Job *j, *k, *jnext;
304 	int dying;
305 
306 	if (!errorJobs)
307 		fprintf(stderr, "Stop in %s\n", shortened_curdir());
308 
309 	for (j = errorJobs; j != NULL; j = j->next)
310 		may_remove_target(j);
311 	dying = check_dying_signal();
312 	if (dying)
313 		quick_summary(dying);
314 	/* Print errors grouped by file name. */
315 	while (errorJobs != NULL) {
316 		/* Select the first job. */
317 		k = errorJobs;
318 		errorJobs = NULL;
319 		for (j = k; j != NULL; j = jnext) {
320 			jnext = j->next;
321 			if (j->location->fname == k->location->fname)
322 				/* Print errors with the same filename. */
323 				print_error(j);
324 			else {
325 				/* Keep others for the next iteration. */
326 				j->next = errorJobs;
327 				errorJobs = j;
328 			}
329 		}
330 	}
331 }
332 
333 void
334 print_errors(void)
335 {
336 	handle_all_signals();
337 	internal_print_errors();
338 }
339 
340 static void
341 setup_signal(int sig)
342 {
343 	if (signal(sig, SIG_IGN) != SIG_IGN) {
344 		(void)signal(sig, notice_signal);
345 		sigaddset(&sigset, sig);
346 	}
347 }
348 
349 static void
350 notice_signal(int sig)
351 {
352 
353 	switch(sig) {
354 	case SIGINT:
355 		got_SIGINT++;
356 		got_fatal = 1;
357 		break;
358 	case SIGHUP:
359 		got_SIGHUP++;
360 		got_fatal = 1;
361 		break;
362 	case SIGQUIT:
363 		got_SIGQUIT++;
364 		got_fatal = 1;
365 		break;
366 	case SIGTERM:
367 		got_SIGTERM++;
368 		got_fatal = 1;
369 		break;
370 	case SIGINFO:
371 		got_SIGINFO++;
372 		break;
373 	case SIGCHLD:
374 		break;
375 	}
376 }
377 
378 void
379 Sigset_Init()
380 {
381 	sigemptyset(&emptyset);
382 	sigprocmask(SIG_BLOCK, &emptyset, &origset);
383 }
384 
385 static void
386 setup_all_signals(void)
387 {
388 	sigemptyset(&sigset);
389 	/*
390 	 * Catch the four signals that POSIX specifies if they aren't ignored.
391 	 * handle_signal will take care of calling JobInterrupt if appropriate.
392 	 */
393 	setup_signal(SIGINT);
394 	setup_signal(SIGHUP);
395 	setup_signal(SIGQUIT);
396 	setup_signal(SIGTERM);
397 	/* Display running jobs on SIGINFO */
398 	setup_signal(SIGINFO);
399 	/* Have to see SIGCHLD */
400 	setup_signal(SIGCHLD);
401 	got_fatal = 0;
402 }
403 
404 static void
405 handle_siginfo(void)
406 {
407 	static BUFFER buf;
408 	static size_t length = 0;
409 
410 	Job *job;
411 	bool first = true;
412 
413 	got_SIGINFO = 0;
414 	/* we have to store the info in a buffer, because status from all
415 	 * makes running would get intermixed otherwise
416 	 */
417 
418 	if (length == 0) {
419 		Buf_Init(&buf, 0);
420 		Buf_printf(&buf, "%s in ", Var_Value("MAKE"));
421 		buf_addcurdir(&buf);
422 		Buf_AddString(&buf, ": ");
423 		length = Buf_Size(&buf);
424 	} else
425 		Buf_Truncate(&buf, length);
426 
427 	for (job = runningJobs; job != NULL ; job = job->next) {
428 		if (!first)
429 			Buf_puts(&buf, ", ");
430 		first = false;
431 		Buf_puts(&buf, job->node->name);
432 	}
433 	Buf_puts(&buf, first ? "nothing running\n" : "\n");
434 
435 	fputs(Buf_Retrieve(&buf), stderr);
436 }
437 
438 int
439 check_dying_signal(void)
440 {
441 	sigset_t set;
442 	if (dying_signal)
443 		return dying_signal;
444 	sigpending(&set);
445 	if (got_SIGINT || sigismember(&set, SIGINT))
446 		return dying_signal = SIGINT;
447 	if (got_SIGHUP || sigismember(&set, SIGHUP))
448 		return dying_signal = SIGHUP;
449 	if (got_SIGQUIT || sigismember(&set, SIGQUIT))
450 		return dying_signal = SIGQUIT;
451 	if (got_SIGTERM || sigismember(&set, SIGTERM))
452 		return dying_signal = SIGTERM;
453 	return 0;
454 }
455 
456 void
457 handle_all_signals(void)
458 {
459 	if (got_SIGINFO)
460 		handle_siginfo();
461 	while (got_fatal) {
462 		got_fatal = 0;
463 		aborting = ABORT_INTERRUPT;
464 
465 		if (got_SIGINT) {
466 			got_SIGINT=0;
467 			handle_fatal_signal(SIGINT);
468 		}
469 		if (got_SIGHUP) {
470 			got_SIGHUP=0;
471 			handle_fatal_signal(SIGHUP);
472 		}
473 		if (got_SIGQUIT) {
474 			got_SIGQUIT=0;
475 			handle_fatal_signal(SIGQUIT);
476 		}
477 		if (got_SIGTERM) {
478 			got_SIGTERM=0;
479 			handle_fatal_signal(SIGTERM);
480 		}
481 	}
482 }
483 
484 static void
485 debug_vprintf(const char *fmt, va_list va)
486 {
487 	(void)printf("[%ld] ", (long)mypid);
488 	(void)vprintf(fmt, va);
489 	fflush(stdout);
490 }
491 
492 void
493 debug_job_printf(const char *fmt, ...)
494 {
495 	if (DEBUG(JOB)) {
496 		va_list va;
497 		va_start(va, fmt);
498 		debug_vprintf(fmt, va);
499 		va_end(va);
500 	}
501 }
502 
503 static void
504 debug_kill_printf(const char *fmt, ...)
505 {
506 	if (DEBUG(KILL)) {
507 		va_list va;
508 		va_start(va, fmt);
509 		debug_vprintf(fmt, va);
510 		va_end(va);
511 	}
512 }
513 
514 /*-
515  *-----------------------------------------------------------------------
516  * postprocess_job  --
517  *	Do final processing for the given job including updating
518  *	parents and starting new jobs as available/necessary.
519  *
520  * Side Effects:
521  *	If we got an error and are aborting (aborting == ABORT_ERROR) and
522  *	the job list is now empty, we are done for the day.
523  *	If we recognized an error we set the aborting flag
524  *	to ABORT_ERROR so no more jobs will be started.
525  *-----------------------------------------------------------------------
526  */
527 
528 static void
529 postprocess_job(Job *job)
530 {
531 	if (job->exit_type == JOB_EXIT_OKAY &&
532 	    aborting != ABORT_ERROR &&
533 	    aborting != ABORT_INTERRUPT) {
534 		/* As long as we aren't aborting and the job didn't return a
535 		 * non-zero status that we shouldn't ignore, we call
536 		 * Make_Update to update the parents. */
537 		job->node->built_status = REBUILT;
538 		engine_node_updated(job->node);
539 	}
540 	if (job->flags & JOB_KEEPERROR) {
541 		job->next = errorJobs;
542 		errorJobs = job;
543 	} else {
544 		job->next = availableJobs;
545 		availableJobs = job;
546 	}
547 
548 	if (errorJobs != NULL && aborting != ABORT_INTERRUPT)
549 		aborting = ABORT_ERROR;
550 
551 	if (aborting == ABORT_ERROR && DEBUG(QUICKDEATH))
552 		handle_fatal_signal(SIGINT);
553 	if (aborting == ABORT_ERROR && Job_Empty())
554 		Finish();
555 }
556 
557 /* expensive jobs handling: in order to avoid forking an exponential number
558  * of jobs, make tries to figure out "recursive make" configurations.
559  * It may err on the side of caution.
560  * Basically, a command is "expensive" if it's likely to fork an extra
561  * level of make: either by looking at the command proper, or if it has
562  * some specific qualities ('+cmd' are likely to be recursive, as are
563  * .MAKE: commands).  It's possible to explicitly say some targets are
564  * expensive or cheap with .EXPENSIVE or .CHEAP.
565  *
566  * While an expensive command is running, no_new_jobs
567  * is set, so jobs that would fork new processes are accumulated in the
568  * heldJobs list instead.
569  *
570  * XXX This heuristics is also used on error exit: we display silent commands
571  * that failed, unless those ARE expensive commands: expensive commands are
572  * likely to not be failing by themselves, but to be the result of a cascade of
573  * failures in descendant makes.
574  */
575 void
576 determine_expensive_job(Job *job)
577 {
578 	if (expensive_job(job)) {
579 		job->flags |= JOB_IS_EXPENSIVE;
580 		no_new_jobs = true;
581 	} else
582 		job->flags &= ~JOB_IS_EXPENSIVE;
583 	if (DEBUG(EXPENSIVE))
584 		fprintf(stderr, "[%ld] Target %s running %.50s: %s\n",
585 		    (long)mypid, job->node->name, job->cmd,
586 		    job->flags & JOB_IS_EXPENSIVE ? "expensive" : "cheap");
587 }
588 
589 static bool
590 expensive_job(Job *job)
591 {
592 	if (job->node->type & OP_CHEAP)
593 		return false;
594 	if (job->node->type & (OP_EXPENSIVE | OP_MAKE))
595 		return true;
596 	return expensive_command(job->cmd);
597 }
598 
599 static bool
600 expensive_command(const char *s)
601 {
602 	const char *p;
603 	bool include = false;
604 	bool expensive = false;
605 
606 	/* okay, comments are cheap, always */
607 	if (*s == '#')
608 		return false;
609 	/* and commands we always execute are expensive */
610 	if (*s == '+')
611 		return true;
612 
613 	for (p = s; *p != '\0'; p++) {
614 		if (*p == ' ' || *p == '\t') {
615 			include = false;
616 			if (p[1] == '-' && p[2] == 'I')
617 				include = true;
618 		}
619 		if (include)
620 			continue;
621 		/* KMP variant, avoid looking twice at the same
622 		 * letter.
623 		 */
624 		if (*p != 'm')
625 			continue;
626 		if (p[1] != 'a')
627 			continue;
628 		p++;
629 		if (p[1] != 'k')
630 			continue;
631 		p++;
632 		if (p[1] != 'e')
633 			continue;
634 		p++;
635 		expensive = true;
636 		while (p[1] != '\0' && p[1] != ' ' && p[1] != '\t') {
637 			if (p[1] == '.' || p[1] == '/') {
638 				expensive = false;
639 				break;
640 			}
641 		    	p++;
642 		}
643 		if (expensive)
644 			return true;
645 	}
646 	return false;
647 }
648 
649 static void
650 may_continue_job(Job *job)
651 {
652 	if (no_new_jobs) {
653 		if (DEBUG(EXPENSIVE))
654 			fprintf(stderr, "[%ld] expensive -> hold %s\n",
655 			    (long)mypid, job->node->name);
656 		job->next = heldJobs;
657 		heldJobs = job;
658 	} else {
659 		bool finished = job_run_next(job);
660 		if (finished)
661 			postprocess_job(job);
662 		else if (!sequential)
663 			determine_expensive_job(job);
664 	}
665 }
666 
667 static void
668 may_continue_heldback_jobs()
669 {
670 	while (!no_new_jobs) {
671 		if (heldJobs != NULL) {
672 			Job *job = heldJobs;
673 			heldJobs = heldJobs->next;
674 			if (DEBUG(EXPENSIVE))
675 				fprintf(stderr, "[%ld] cheap -> release %s\n",
676 				    (long)mypid, job->node->name);
677 			may_continue_job(job);
678 		} else
679 			break;
680 	}
681 }
682 
683 /*-
684  *-----------------------------------------------------------------------
685  * Job_Make  --
686  *	Start a target-creation process going for the target described
687  *	by the graph node gn.
688  *
689  * Side Effects:
690  *	A new Job node is created and  its commands continued, which
691  *	may fork the first command of that job.
692  *-----------------------------------------------------------------------
693  */
694 void
695 Job_Make(GNode *gn)
696 {
697 	Job *job = availableJobs;
698 
699 	assert(job != NULL);
700 	availableJobs = availableJobs->next;
701 	job_attach_node(job, gn);
702 	may_continue_job(job);
703 }
704 
705 static void
706 determine_job_next_step(Job *job)
707 {
708 	if (job->flags & JOB_IS_EXPENSIVE) {
709 		no_new_jobs = false;
710 		if (DEBUG(EXPENSIVE))
711 			fprintf(stderr, "[%ld] "
712 			    "Returning from expensive target %s, "
713 			    "allowing new jobs\n", (long)mypid,
714 			    job->node->name);
715 	}
716 
717 	if (job->exit_type != JOB_EXIT_OKAY || job->next_cmd == NULL)
718 		postprocess_job(job);
719 	else
720 		may_continue_job(job);
721 }
722 
723 /*
724  * job = reap_finished_job(pid):
725  * 	retrieve and remove a job from runningJobs, based on its pid
726  *
727  *	Note that we remove it right away, so that handle_signals()
728  *	is accurate.
729  */
730 static Job *
731 reap_finished_job(pid_t pid)
732 {
733 	Job **j, *job;
734 
735 	for (j = &runningJobs; *j != NULL; j = &((*j)->next))
736 		if ((*j)->pid == pid) {
737 			job = *j;
738 			*j = job->next;
739 			return job;
740 		}
741 
742 	return NULL;
743 }
744 
745 /*
746  * classic waitpid handler: retrieve as many dead children as possible.
747  * returns true if successful
748  */
749 static bool
750 reap_jobs(void)
751 {
752  	pid_t pid;	/* pid of dead child */
753  	int status;	/* Exit/termination status */
754 	bool reaped = false;
755 	Job *job;
756 
757 	while ((pid = waitpid(WAIT_ANY, &status, WNOHANG)) > 0) {
758 		if (WIFSTOPPED(status))
759 			continue;
760 		reaped = true;
761 		job = reap_finished_job(pid);
762 
763 		if (job == NULL) {
764 			Punt("Child (%ld) with status %d not in table?",
765 			    (long)pid, status);
766 		} else {
767 			handle_job_status(job, status);
768 			determine_job_next_step(job);
769 		}
770 		may_continue_heldback_jobs();
771 	}
772 	/* sanity check, should not happen */
773 	if (pid == -1 && errno == ECHILD && runningJobs != NULL)
774 		Punt("Process has no children, but runningJobs is not empty ?");
775 	return reaped;
776 }
777 
778 void
779 reset_signal_mask()
780 {
781 	sigprocmask(SIG_SETMASK, &origset, NULL);
782 }
783 
784 void
785 handle_running_jobs(void)
786 {
787 	/* reaping children in the presence of caught signals */
788 
789 	/* first, we make sure to hold on new signals, to synchronize
790 	 * reception of new stuff on sigsuspend
791 	 */
792 	sigprocmask(SIG_BLOCK, &sigset, NULL);
793 	/* note this will NOT loop until runningJobs == NULL.
794 	 * It's merely an optimisation, namely that we don't need to go
795 	 * through the logic if no job is present. As soon as a job
796 	 * gets reaped, we WILL exit the loop through the break.
797 	 */
798 	while (runningJobs != NULL) {
799 		/* did we already have pending stuff that advances things ?
800 		 * then handle_all_signals() will not return
801 		 * or reap_jobs() will reap_jobs()
802 		 */
803 		handle_all_signals();
804 		if (reap_jobs())
805 			break;
806 		/* okay, so it's safe to suspend, we have nothing to do but
807 		 * wait...
808 		 */
809 		sigsuspend(&emptyset);
810 	}
811 	reset_signal_mask();
812 }
813 
814 void
815 loop_handle_running_jobs()
816 {
817 	while (runningJobs != NULL)
818 		handle_running_jobs();
819 }
820 
821 void
822 Job_Init(int maxJobs)
823 {
824 	Job *j;
825 	int i;
826 
827 	runningJobs = NULL;
828 	heldJobs = NULL;
829 	errorJobs = NULL;
830 	availableJobs = NULL;
831 	sequential = maxJobs == 1;
832 
833 	/* we allocate n+1 jobs, since we may need an extra job for
834 	 * running .INTERRUPT.  */
835 	j = ereallocarray(NULL, sizeof(Job), maxJobs+1);
836 	for (i = 0; i != maxJobs; i++) {
837 		j[i].next = availableJobs;
838 		availableJobs = &j[i];
839 	}
840 	extra_job = &j[maxJobs];
841 	mypid = getpid();
842 
843 	aborting = 0;
844 	setup_all_signals();
845 }
846 
847 bool
848 can_start_job(void)
849 {
850 	if (aborting || availableJobs == NULL)
851 		return false;
852 	else
853 		return true;
854 }
855 
856 bool
857 Job_Empty(void)
858 {
859 	return runningJobs == NULL;
860 }
861 
862 /*-
863  *-----------------------------------------------------------------------
864  * handle_fatal_signal --
865  *	Handle the receipt of a fatal interrupt
866  *
867  * Side Effects:
868  *	All children are killed. Another job may be started if there
869  *	is an interrupt target and the signal was SIGINT.
870  *-----------------------------------------------------------------------
871  */
872 static void
873 handle_fatal_signal(int signo)
874 {
875 	Job *job;
876 
877 	debug_kill_printf("handle_fatal_signal(%d) called.\n", signo);
878 
879 	dying_signal = signo;
880 	for (job = runningJobs; job != NULL; job = job->next) {
881 		debug_kill_printf("passing to "
882 		    "child %ld running %s: %s\n", (long)job->pid,
883 		    job->node->name, really_kill(job, signo));
884 		may_remove_target(job);
885 	}
886 
887 	if (signo == SIGINT && !touchFlag) {
888 		if ((interrupt_node->type & OP_DUMMY) == 0) {
889 			ignoreErrors = false;
890 			extra_job->next = availableJobs;
891 			availableJobs = extra_job;
892 			Job_Make(interrupt_node);
893 		}
894 	}
895 	loop_handle_running_jobs();
896 	internal_print_errors();
897 
898 	/* die by that signal */
899 	sigprocmask(SIG_BLOCK, &sigset, NULL);
900 	signal(signo, SIG_DFL);
901 	kill(getpid(), signo);
902 	sigprocmask(SIG_SETMASK, &emptyset, NULL);
903 	/*NOTREACHED*/
904 	fprintf(stderr, "This should never happen\n");
905 	exit(1);
906 }
907 
908 /*-
909  *-----------------------------------------------------------------------
910  * Job_Wait --
911  *	Waits for all running jobs to finish and returns. Sets 'aborting'
912  *	to ABORT_WAIT to prevent other jobs from starting.
913  *
914  * Side Effects:
915  *	Currently running jobs finish.
916  *
917  *-----------------------------------------------------------------------
918  */
919 void
920 Job_Wait(void)
921 {
922 	aborting = ABORT_WAIT;
923 	loop_handle_running_jobs();
924 	aborting = 0;
925 }
926 
927 /*-
928  *-----------------------------------------------------------------------
929  * Job_AbortAll --
930  *	Abort all currently running jobs without handling output or anything.
931  *	This function is to be called only in the event of a major
932  *	error.
933  *
934  * Side Effects:
935  *	All children are killed
936  *-----------------------------------------------------------------------
937  */
938 void
939 Job_AbortAll(void)
940 {
941 	Job *job;	/* the job descriptor in that element */
942 	int foo;
943 
944 	aborting = ABORT_ERROR;
945 
946 	for (job = runningJobs; job != NULL; job = job->next) {
947 		debug_kill_printf("abort: send SIGINT to "
948 		    "child %ld running %s: %s\n",
949 		    (long)job->pid, job->node->name, really_kill(job, SIGINT));
950 		debug_kill_printf("abort: send SIGKILL to "
951 		    "child %ld running %s: %s\n",
952 		    (long)job->pid, job->node->name, really_kill(job, SIGKILL));
953 	}
954 
955 	/*
956 	 * Catch as many children as want to report in at first, then give up
957 	 */
958 	while (waitpid(WAIT_ANY, &foo, WNOHANG) > 0)
959 		continue;
960 }
961