1 /* $OpenPackages$ */ 2 /* $OpenBSD: job.c,v 1.115 2008/11/11 09:32:20 espie Exp $ */ 3 /* $NetBSD: job.c,v 1.16 1996/11/06 17:59:08 christos Exp $ */ 4 5 /* 6 * Copyright (c) 1988, 1989, 1990 The Regents of the University of California. 7 * Copyright (c) 1988, 1989 by Adam de Boor 8 * Copyright (c) 1989 by Berkeley Softworks 9 * All rights reserved. 10 * 11 * This code is derived from software contributed to Berkeley by 12 * Adam de Boor. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 /*- 40 * job.c -- 41 * handle the creation etc. of our child processes. 42 * 43 * Interface: 44 * Job_Make Start the creation of the given target. 45 * 46 * Job_Init Called to initialize this module. in addition, 47 * any commands attached to the .BEGIN target 48 * are executed before this function returns. 49 * Hence, the makefile must have been parsed 50 * before this function is called. 51 * 52 * Job_End Cleanup any memory used. 53 * 54 * Job_Full Return true if the job table is filled. 55 * 56 * Job_Empty Return true if the job table is completely 57 * empty. 58 * 59 * Job_Finish Perform any final processing which needs doing. 60 * This includes the execution of any commands 61 * which have been/were attached to the .END 62 * target. It should only be called when the 63 * job table is empty. 64 * 65 * Job_AbortAll Abort all currently running jobs. It doesn't 66 * handle output or do anything for the jobs, 67 * just kills them. It should only be called in 68 * an emergency, as it were. 69 * 70 * Job_Wait Wait for all currently-running jobs to finish. 71 */ 72 73 #include <sys/types.h> 74 #include <sys/wait.h> 75 #include <ctype.h> 76 #include <errno.h> 77 #include <fcntl.h> 78 #include <signal.h> 79 #include <stdarg.h> 80 #include <stdio.h> 81 #include <stdlib.h> 82 #include <string.h> 83 #include <unistd.h> 84 #include "config.h" 85 #include "defines.h" 86 #include "job.h" 87 #include "engine.h" 88 #include "pathnames.h" 89 #include "var.h" 90 #include "targ.h" 91 #include "error.h" 92 #include "lst.h" 93 #include "extern.h" 94 #include "gnode.h" 95 #include "memory.h" 96 #include "make.h" 97 98 /* 99 * The SEL_ constants determine the maximum amount of time spent in select 100 * before coming out to see if a child has finished. SEL_SEC is the number of 101 * seconds and SEL_USEC is the number of micro-seconds 102 */ 103 #define SEL_SEC 0 104 #define SEL_USEC 500000 105 106 107 /*- 108 * Job Table definitions. 109 * 110 * Each job has several things associated with it: 111 * 1) The process id of the child shell 112 * 2) The graph node describing the target being made by this job 113 * 3) An FILE* for writing out the commands. This is only 114 * used before the job is actually started. 115 * 4) Things used for handling the shell's output. 116 * the output is being caught via a pipe and 117 * the descriptors of our pipe, an array in which output is line 118 * buffered and the current position in that buffer are all 119 * maintained for each job. 120 * 5) A word of flags which determine how the module handles errors, 121 * echoing, etc. for the job 122 * 123 * The job "table" is kept as a linked Lst in 'jobs', with the number of 124 * active jobs maintained in the 'nJobs' variable. At no time will this 125 * exceed the value of 'maxJobs', initialized by the Job_Init function. 126 * 127 * When a job is finished, the Make_Update function is called on each of the 128 * parents of the node which was just remade. This takes care of the upward 129 * traversal of the dependency graph. 130 */ 131 #define JOB_BUFSIZE 1024 132 struct job_pipe { 133 int fd; 134 char buffer[JOB_BUFSIZE]; 135 size_t pos; 136 }; 137 138 typedef struct Job_ { 139 pid_t pid; /* The child's process ID */ 140 GNode *node; /* The target the child is making */ 141 short flags; /* Flags to control treatment of job */ 142 #define JOB_SPECIAL 0x004 /* Target is a special one. */ 143 #define JOB_RESTART 0x080 /* Job needs to be completely restarted */ 144 #define JOB_RESUME 0x100 /* Job needs to be resumed b/c it stopped, 145 * for some reason */ 146 #define JOB_CONTINUING 0x200 /* We are in the process of resuming this job. 147 * Used to avoid infinite recursion between 148 * JobFinish and JobRestart */ 149 #define JOB_DIDOUTPUT 0x001 150 struct job_pipe in[2]; 151 } Job; 152 153 154 static int aborting = 0; /* why is the make aborting? */ 155 #define ABORT_ERROR 1 /* Because of an error */ 156 #define ABORT_INTERRUPT 2 /* Because it was interrupted */ 157 #define ABORT_WAIT 3 /* Waiting for jobs to finish */ 158 159 static int maxJobs; /* The most children we can run at once */ 160 static int nJobs; /* The number of children currently running */ 161 static LIST runningJobs; /* The structures that describe them */ 162 static bool jobFull; /* Flag to tell when the job table is full. It 163 * is set true when nJobs equals maxJobs */ 164 static GNode *lastNode; /* The node for which output was most recently 165 * produced. */ 166 167 /* data structure linked to job handling through select */ 168 static fd_set *output_mask = NULL; /* File descriptors to look for */ 169 170 static fd_set *actual_mask = NULL; /* actual select argument */ 171 static int largest_fd = -1; 172 static size_t mask_size = 0; 173 174 static LIST stoppedJobs; 175 176 /* wait possibilities */ 177 #define JOB_EXITED 0 178 #define JOB_SIGNALED 1 179 #define JOB_CONTINUED 2 180 #define JOB_STOPPED 3 181 #define JOB_UNKNOWN 4 182 183 static LIST errorsList; 184 static int errors; 185 struct error_info { 186 int reason; 187 int code; 188 GNode *n; 189 }; 190 191 192 #if defined(USE_PGRP) && defined(SYSV) 193 # define KILL(pid, sig) killpg(-(pid), (sig)) 194 #else 195 # if defined(USE_PGRP) 196 # define KILL(pid, sig) killpg((pid), (sig)) 197 # else 198 # define KILL(pid, sig) kill((pid), (sig)) 199 # endif 200 #endif 201 202 static void signal_running_jobs(int); 203 static void handle_all_signals(void); 204 static void handle_signal(int); 205 static int JobCmpPid(void *, void *); 206 static void JobFinish(Job *, int); 207 static void finish_job(Job *, int, int); 208 static void JobExec(Job *); 209 static void JobRestart(Job *); 210 static void JobStart(GNode *, int); 211 static void JobInterrupt(int, int); 212 static void JobRestartJobs(void); 213 static void debug_printf(const char *, ...); 214 static Job *prepare_job(GNode *, int); 215 static void start_queued_job(Job *); 216 static void banner(Job *, FILE *); 217 218 /*** 219 *** Input/output from jobs 220 ***/ 221 222 /* prepare_pipe(jp, &fd): 223 * set up pipe data structure (buffer and pos) corresponding to 224 * pointed fd, and prepare to watch for it. 225 */ 226 static void prepare_pipe(struct job_pipe *, int *); 227 228 /* close_job_pipes(j): 229 * handle final output from job, and close pipes properly 230 */ 231 static void close_job_pipes(Job *); 232 233 234 static void handle_all_jobs_output(void); 235 236 /* handle_job_output(job, n, finish): 237 * n = 0 or 1 (stdout/stderr), set finish to retrieve everything. 238 */ 239 static void handle_job_output(Job *, int, bool); 240 241 static void print_partial_buffer(struct job_pipe *, Job *, FILE *, size_t); 242 static void print_partial_buffer_and_shift(struct job_pipe *, Job *, FILE *, 243 size_t); 244 static bool print_complete_lines(struct job_pipe *, Job *, FILE *, size_t); 245 246 247 static void register_error(int, int, Job *); 248 static void loop_handle_running_jobs(void); 249 static void Job_CatchChildren(void); 250 251 static void 252 register_error(int reason, int code, Job *job) 253 { 254 struct error_info *p; 255 256 errors++; 257 p = emalloc(sizeof(struct error_info)); 258 p->reason = reason; 259 p->code = code; 260 p->n = job->node; 261 Lst_AtEnd(&errorsList, p); 262 } 263 264 void 265 print_errors() 266 { 267 LstNode ln; 268 struct error_info *p; 269 const char *type; 270 271 for (ln = Lst_First(&errorsList); ln != NULL; ln = Lst_Adv(ln)) { 272 p = (struct error_info *)Lst_Datum(ln); 273 switch(p->reason) { 274 case JOB_EXITED: 275 type = "Exit status"; 276 break; 277 case JOB_SIGNALED: 278 type = "Received signal"; 279 break; 280 case JOB_STOPPED: 281 type = "Stopped"; 282 break; 283 case JOB_CONTINUED: 284 type = "Continued"; 285 break; 286 default: 287 type = "Should not happen"; 288 break; 289 } 290 if (p->n->lineno) 291 Error(" %s %d (%s, line %lu of %s)", 292 type, p->code, p->n->name, p->n->lineno, p->n->fname); 293 else 294 Error(" %s %d (%s)", type, p->code, p->n->name); 295 } 296 } 297 298 static void 299 banner(Job *job, FILE *out) 300 { 301 if (job->node != lastNode) { 302 if (DEBUG(JOBBANNER)) 303 (void)fprintf(out, "--- %s ---\n", job->node->name); 304 lastNode = job->node; 305 } 306 } 307 308 static void 309 handle_all_signals() 310 { 311 if (got_signal) 312 got_signal = 0; 313 else 314 return; 315 316 if (got_SIGINT) { 317 got_SIGINT=0; 318 handle_signal(SIGINT); 319 } 320 if (got_SIGHUP) { 321 got_SIGHUP=0; 322 handle_signal(SIGHUP); 323 } 324 if (got_SIGQUIT) { 325 got_SIGQUIT=0; 326 handle_signal(SIGQUIT); 327 } 328 if (got_SIGTERM) { 329 got_SIGTERM=0; 330 handle_signal(SIGTERM); 331 } 332 if (got_SIGTSTP) { 333 got_SIGTSTP=0; 334 handle_signal(SIGTSTP); 335 } 336 if (got_SIGTTOU) { 337 got_SIGTTOU=0; 338 handle_signal(SIGTTOU); 339 } 340 if (got_SIGTTIN) { 341 got_SIGTTIN=0; 342 handle_signal(SIGTTIN); 343 } 344 if (got_SIGWINCH) { 345 got_SIGWINCH=0; 346 handle_signal(SIGWINCH); 347 } 348 } 349 350 static void 351 signal_running_jobs(int signo) 352 { 353 LstNode ln; 354 for (ln = Lst_First(&runningJobs); ln != NULL; ln = Lst_Adv(ln)) { 355 Job *job = Lst_Datum(ln); 356 if (DEBUG(JOB)) { 357 (void)fprintf(stdout, 358 "signal %d to child %ld.\n", 359 signo, (long)job->pid); 360 (void)fflush(stdout); 361 } 362 KILL(job->pid, signo); 363 } 364 } 365 366 /*- 367 *----------------------------------------------------------------------- 368 * handle_signal -- 369 * Pass a signal to all local jobs if USE_PGRP is defined, 370 * then die ourselves. 371 * 372 * Side Effects: 373 * We die by the same signal. 374 *----------------------------------------------------------------------- 375 */ 376 static void 377 handle_signal(int signo) /* The signal number we've received */ 378 { 379 sigset_t nmask, omask; 380 struct sigaction act; 381 382 if (DEBUG(JOB)) { 383 (void)fprintf(stdout, "handle_signal(%d) called.\n", signo); 384 (void)fflush(stdout); 385 } 386 signal_running_jobs(signo); 387 388 /* 389 * Deal with proper cleanup based on the signal received. We only run 390 * the .INTERRUPT target if the signal was in fact an interrupt. The 391 * other three termination signals are more of a "get out *now*" 392 * command. 393 */ 394 if (signo == SIGINT) { 395 JobInterrupt(true, signo); 396 } else if (signo == SIGHUP || signo == SIGTERM || signo == SIGQUIT) { 397 JobInterrupt(false, signo); 398 } 399 400 /* 401 * Leave gracefully if SIGQUIT, rather than core dumping. 402 */ 403 if (signo == SIGQUIT) { 404 Finish(0); 405 } 406 407 /* 408 * Send ourselves the signal now we've given the message to everyone 409 * else. Note we block everything else possible while we're getting 410 * the signal. This ensures that all our jobs get continued when we 411 * wake up before we take any other signal. 412 */ 413 sigemptyset(&nmask); 414 sigaddset(&nmask, signo); 415 sigprocmask(SIG_SETMASK, &nmask, &omask); 416 memset(&act, 0, sizeof act); 417 act.sa_handler = SIG_DFL; 418 sigemptyset(&act.sa_mask); 419 act.sa_flags = 0; 420 sigaction(signo, &act, NULL); 421 422 if (DEBUG(JOB)) { 423 (void)fprintf(stdout, 424 "handle_signal passing signal to self, mask = %x.\n", 425 ~0 & ~(1 << (signo-1))); 426 (void)fflush(stdout); 427 } 428 (void)signal(signo, SIG_DFL); 429 430 (void)KILL(getpid(), signo); 431 432 signal_running_jobs(SIGCONT); 433 434 (void)sigprocmask(SIG_SETMASK, &omask, NULL); 435 sigprocmask(SIG_SETMASK, &omask, NULL); 436 act.sa_handler = SigHandler; 437 sigaction(signo, &act, NULL); 438 } 439 440 /*- 441 *----------------------------------------------------------------------- 442 * JobCmpPid -- 443 * Compare the pid of the job with the given pid and return 0 if they 444 * are equal. This function is called from Job_CatchChildren via 445 * Lst_Find to find the job descriptor of the finished job. 446 * 447 * Results: 448 * 0 if the pid's match 449 *----------------------------------------------------------------------- 450 */ 451 static int 452 JobCmpPid(void *job, /* job to examine */ 453 void *pid) /* process id desired */ 454 { 455 return *(pid_t *)pid - ((Job *)job)->pid; 456 } 457 458 static void 459 debug_printf(const char *fmt, ...) 460 { 461 if (DEBUG(JOB)) { 462 va_list va; 463 464 va_start(va, fmt); 465 (void)vfprintf(stdout, fmt, va); 466 fflush(stdout); 467 va_end(va); 468 } 469 } 470 471 static void 472 close_job_pipes(Job *job) 473 { 474 int i; 475 476 for (i = 1; i >= 0; i--) { 477 FD_CLR(job->in[i].fd, output_mask); 478 handle_job_output(job, i, true); 479 (void)close(job->in[i].fd); 480 } 481 } 482 483 /*- 484 *----------------------------------------------------------------------- 485 * JobFinish -- 486 * Do final processing for the given job including updating 487 * parents and starting new jobs as available/necessary. 488 * 489 * Side Effects: 490 * Some nodes may be put on the toBeMade queue. 491 * Final commands for the job are placed on end_node. 492 * 493 * If we got an error and are aborting (aborting == ABORT_ERROR) and 494 * the job list is now empty, we are done for the day. 495 * If we recognized an error we set the aborting flag 496 * to ABORT_ERROR so no more jobs will be started. 497 *----------------------------------------------------------------------- 498 */ 499 /*ARGSUSED*/ 500 501 static void 502 JobFinish(Job *job, int status) 503 { 504 int reason, code; 505 /* parse status */ 506 if (WIFEXITED(status)) { 507 reason = JOB_EXITED; 508 code = WEXITSTATUS(status); 509 } else if (WIFSIGNALED(status)) { 510 reason = JOB_SIGNALED; 511 code = WTERMSIG(status); 512 } else if (WIFCONTINUED(status)) { 513 reason = JOB_CONTINUED; 514 code = 0; 515 } else if (WIFSTOPPED(status)) { 516 reason = JOB_STOPPED; 517 code = WSTOPSIG(status); 518 } else { 519 /* can't happen, set things to be bad. */ 520 reason = UNKNOWN; 521 code = status; 522 } 523 finish_job(job, reason, code); 524 } 525 526 527 static void 528 finish_job(Job *job, int reason, int code) 529 { 530 bool done; 531 532 if ((reason == JOB_EXITED && 533 code != 0 && !(job->node->type & OP_IGNORE)) || 534 (reason == JOB_SIGNALED && code != SIGCONT)) { 535 /* 536 * If it exited non-zero and either we're doing things our 537 * way or we're not ignoring errors, the job is finished. 538 * Similarly, if the shell died because of a signal 539 * the job is also finished. In these 540 * cases, finish out the job's output before printing the exit 541 * status... 542 */ 543 close_job_pipes(job); 544 done = true; 545 } else if (reason == JOB_EXITED) { 546 /* 547 * Deal with ignored errors in -B mode. We need to print a 548 * message telling of the ignored error as well as setting 549 * status.w_status to 0 so the next command gets run. To do 550 * this, we set done to be true if in -B mode and the job 551 * exited non-zero. 552 */ 553 done = code != 0; 554 close_job_pipes(job); 555 } else { 556 /* 557 * No need to close things down or anything. 558 */ 559 done = false; 560 } 561 562 if (reason == JOB_STOPPED) { 563 debug_printf("Process %ld stopped.\n", (long)job->pid); 564 banner(job, stdout); 565 (void)fprintf(stdout, "*** Stopped -- signal %d\n", 566 code); 567 job->flags |= JOB_RESUME; 568 Lst_AtEnd(&stoppedJobs, job); 569 (void)fflush(stdout); 570 return; 571 } 572 if (reason == JOB_SIGNALED && code == SIGCONT) { 573 /* 574 * If the beastie has continued, shift the Job from the 575 * stopped list to the running one (or re-stop it if 576 * concurrency is exceeded) and go and get another 577 * child. 578 */ 579 if (job->flags & (JOB_RESUME|JOB_RESTART)) { 580 banner(job, stdout); 581 (void)fprintf(stdout, "*** Continued\n"); 582 } 583 if (!(job->flags & JOB_CONTINUING)) { 584 debug_printf( 585 "Warning: " 586 "process %ld was not continuing.\n", 587 (long)job->pid); 588 } 589 job->flags &= ~JOB_CONTINUING; 590 Lst_AtEnd(&runningJobs, job); 591 nJobs++; 592 debug_printf("Process %ld is continuing locally.\n", 593 (long)job->pid); 594 if (nJobs == maxJobs) { 595 jobFull = true; 596 debug_printf("Job queue is full.\n"); 597 } 598 (void)fflush(stdout); 599 return; 600 } 601 602 if (done || DEBUG(JOB)) { 603 if (reason == JOB_EXITED) { 604 debug_printf("Process %ld exited.\n", (long)job->pid); 605 if (code != 0) { 606 banner(job, stdout); 607 (void)fprintf(stdout, "*** Error code %d %s\n", 608 code, 609 (job->node->type & OP_IGNORE) ? 610 "(ignored)" : ""); 611 612 if (job->node->type & OP_IGNORE) { 613 reason = JOB_EXITED; 614 code = 0; 615 } 616 } else if (DEBUG(JOB)) { 617 banner(job, stdout); 618 (void)fprintf(stdout, 619 "*** Completed successfully\n"); 620 } 621 } else { 622 banner(job, stdout); 623 (void)fprintf(stdout, "*** Signal %d\n", code); 624 } 625 626 (void)fflush(stdout); 627 } 628 629 done = true; 630 631 if (done && 632 aborting != ABORT_ERROR && 633 aborting != ABORT_INTERRUPT && 634 reason == JOB_EXITED && code == 0) { 635 /* As long as we aren't aborting and the job didn't return a 636 * non-zero status that we shouldn't ignore, we call 637 * Make_Update to update the parents. */ 638 job->node->built_status = MADE; 639 Make_Update(job->node); 640 free(job); 641 } else if (!(reason == JOB_EXITED && code == 0)) { 642 register_error(reason, code, job); 643 free(job); 644 } 645 646 /* 647 * Set aborting if any error. 648 */ 649 if (errors && !keepgoing && 650 aborting != ABORT_INTERRUPT) { 651 /* 652 * If we found any errors in this batch of children and the -k 653 * flag wasn't given, we set the aborting flag so no more jobs 654 * get started. 655 */ 656 aborting = ABORT_ERROR; 657 } 658 659 if (aborting != ABORT_ERROR) 660 JobRestartJobs(); 661 662 if (aborting == ABORT_ERROR && Job_Empty()) { 663 /* 664 * If we are aborting and the job table is now empty, we finish. 665 */ 666 Finish(errors); 667 } 668 } 669 670 static void 671 prepare_pipe(struct job_pipe *p, int *fd) 672 { 673 p->pos = 0; 674 (void)fcntl(fd[0], F_SETFD, FD_CLOEXEC); 675 p->fd = fd[0]; 676 close(fd[1]); 677 678 if (output_mask == NULL || p->fd > largest_fd) { 679 int fdn, ofdn; 680 681 fdn = howmany(p->fd+1, NFDBITS); 682 ofdn = howmany(largest_fd+1, NFDBITS); 683 684 if (fdn != ofdn) { 685 output_mask = emult_realloc(output_mask, fdn, 686 sizeof(fd_mask)); 687 memset(((char *)output_mask) + ofdn * sizeof(fd_mask), 688 0, (fdn-ofdn) * sizeof(fd_mask)); 689 actual_mask = emult_realloc(actual_mask, fdn, 690 sizeof(fd_mask)); 691 mask_size = fdn * sizeof(fd_mask); 692 } 693 largest_fd = p->fd; 694 } 695 fcntl(p->fd, F_SETFL, O_NONBLOCK); 696 FD_SET(p->fd, output_mask); 697 } 698 699 /*- 700 *----------------------------------------------------------------------- 701 * JobExec -- 702 * Execute the shell for the given job. Called from JobStart and 703 * JobRestart. 704 * 705 * Side Effects: 706 * A shell is executed, outputs is altered and the Job structure added 707 * to the job table. 708 *----------------------------------------------------------------------- 709 */ 710 static void 711 JobExec(Job *job) 712 { 713 pid_t cpid; /* ID of new child */ 714 int fds[4]; 715 int *fdout = fds; 716 int *fderr = fds+2; 717 int result; 718 int i; 719 720 if (DEBUG(JOB)) { 721 LstNode ln; 722 723 (void)fprintf(stdout, "Running %s\n", job->node->name); 724 for (ln = Lst_First(&job->node->commands); ln != NULL ; 725 ln = Lst_Adv(ln)) 726 fprintf(stdout, "\t%s\n", (char *)Lst_Datum(ln)); 727 (void)fflush(stdout); 728 } 729 730 /* 731 * Some jobs produce no output and it's disconcerting to have 732 * no feedback of their running (since they produce no output, the 733 * banner with their name in it never appears). This is an attempt to 734 * provide that feedback, even if nothing follows it. 735 */ 736 banner(job, stdout); 737 738 setup_engine(); 739 740 /* Create the pipe by which we'll get the shell's output. 741 */ 742 if (pipe(fdout) == -1) 743 Punt("Cannot create pipe: %s", strerror(errno)); 744 745 if (pipe(fderr) == -1) 746 Punt("Cannot create pipe: %s", strerror(errno)); 747 748 if ((cpid = fork()) == -1) { 749 Punt("Cannot fork"); 750 } else if (cpid == 0) { 751 supervise_jobs = false; 752 /* standard pipe code to route stdout and stderr */ 753 close(fdout[0]); 754 if (dup2(fdout[1], 1) == -1) 755 Punt("Cannot dup2(outPipe): %s", strerror(errno)); 756 if (fdout[1] != 1) 757 close(fdout[1]); 758 close(fderr[0]); 759 if (dup2(fderr[1], 2) == -1) 760 Punt("Cannot dup2(errPipe): %s", strerror(errno)); 761 if (fderr[1] != 2) 762 close(fderr[1]); 763 764 #ifdef USE_PGRP 765 /* 766 * We want to switch the child into a different process family 767 * so we can kill it and all its descendants in one fell swoop, 768 * by killing its process family, but not commit suicide. 769 */ 770 # if defined(SYSV) 771 (void)setsid(); 772 # else 773 (void)setpgid(0, getpid()); 774 # endif 775 #endif /* USE_PGRP */ 776 777 if (random_delay) 778 if (!(nJobs == 1 && no_jobs_left())) 779 usleep(random() % random_delay); 780 781 /* most cases won't return, but will exit directly */ 782 result = run_prepared_gnode(job->node, 1); 783 switch(result) { 784 case MADE: 785 exit(0); 786 case ERROR: 787 exit(1); 788 default: 789 fprintf(stderr, 790 "Could not run gnode, returned %d\n", result); 791 exit(1); 792 } 793 } else { 794 supervise_jobs = true; 795 job->pid = cpid; 796 797 /* we set the current position in the buffers to the beginning 798 * and mark another stream to watch in the outputs mask 799 */ 800 for (i = 0; i < 2; i++) 801 prepare_pipe(&job->in[i], fds+2*i); 802 } 803 804 /* 805 * Now the job is actually running, add it to the table. 806 */ 807 nJobs++; 808 Lst_AtEnd(&runningJobs, job); 809 if (nJobs == maxJobs) { 810 jobFull = true; 811 } 812 } 813 814 static void 815 start_queued_job(Job *job) 816 { 817 if (DEBUG(JOB)) { 818 (void)fprintf(stdout, "Restarting %s...", 819 job->node->name); 820 (void)fflush(stdout); 821 } 822 if (nJobs >= maxJobs && !(job->flags & JOB_SPECIAL)) { 823 /* 824 * Can't be exported and not allowed to run locally -- 825 * put it back on the hold queue and mark the table 826 * full 827 */ 828 debug_printf("holding\n"); 829 Lst_AtFront(&stoppedJobs, job); 830 jobFull = true; 831 debug_printf("Job queue is full.\n"); 832 return; 833 } else { 834 /* 835 * Job may be run locally. 836 */ 837 debug_printf("running locally\n"); 838 } 839 JobExec(job); 840 } 841 842 /*- 843 *----------------------------------------------------------------------- 844 * JobRestart -- 845 * Restart a job that stopped for some reason. 846 * 847 * Side Effects: 848 * jobFull will be set if the job couldn't be run. 849 *----------------------------------------------------------------------- 850 */ 851 static void 852 JobRestart(Job *job) 853 { 854 if (job->flags & JOB_RESTART) { 855 start_queued_job(job); 856 } else { 857 /* 858 * The job has stopped and needs to be restarted. Why it 859 * stopped, we don't know... 860 */ 861 debug_printf("Resuming %s...", job->node->name); 862 if ((nJobs < maxJobs || ((job->flags & JOB_SPECIAL) && 863 maxJobs == 0)) && nJobs != maxJobs) { 864 /* 865 * If we haven't reached the concurrency limit already 866 * (or maxJobs is 0), it's ok to resume the job. 867 */ 868 bool error; 869 870 error = KILL(job->pid, SIGCONT) != 0; 871 872 if (!error) { 873 /* 874 * Make sure the user knows we've continued the 875 * beast and actually put the thing in the job 876 * table. 877 */ 878 job->flags |= JOB_CONTINUING; 879 finish_job(job, JOB_SIGNALED, SIGCONT); 880 881 job->flags &= ~(JOB_RESUME|JOB_CONTINUING); 882 debug_printf("done\n"); 883 } else { 884 Error("couldn't resume %s: %s", 885 job->node->name, strerror(errno)); 886 finish_job(job, JOB_EXITED, 1); 887 } 888 } else { 889 /* 890 * Job cannot be restarted. Mark the table as full and 891 * place the job back on the list of stopped jobs. 892 */ 893 debug_printf("table full\n"); 894 Lst_AtFront(&stoppedJobs, job); 895 jobFull = true; 896 debug_printf("Job queue is full.\n"); 897 } 898 } 899 } 900 901 static Job * 902 prepare_job(GNode *gn, int flags) 903 { 904 bool cmdsOK; /* true if the nodes commands were all right */ 905 bool noExec; /* Set true if we decide not to run the job */ 906 907 /* 908 * Check the commands now so any attributes from .DEFAULT have a chance 909 * to migrate to the node 910 */ 911 cmdsOK = Job_CheckCommands(gn); 912 expand_commands(gn); 913 914 if ((gn->type & OP_MAKE) || (!noExecute && !touchFlag)) { 915 /* 916 * We're serious here, but if the commands were bogus, we're 917 * also dead... 918 */ 919 if (!cmdsOK) 920 job_failure(gn, Punt); 921 922 if (Lst_IsEmpty(&gn->commands)) 923 noExec = true; 924 else 925 noExec = false; 926 927 } else if (noExecute) { 928 if (!cmdsOK || Lst_IsEmpty(&gn->commands)) 929 noExec = true; 930 else 931 noExec = false; 932 } else { 933 /* 934 * Just touch the target and note that no shell should be 935 * executed. Check 936 * the commands, too, but don't die if they're no good -- it 937 * does no harm to keep working up the graph. 938 */ 939 Job_Touch(gn); 940 noExec = true; 941 } 942 943 /* 944 * If we're not supposed to execute a shell, don't. 945 */ 946 if (noExec) { 947 /* 948 * We only want to work our way up the graph if we aren't here 949 * because the commands for the job were no good. 950 */ 951 if (cmdsOK && !aborting) { 952 gn->built_status = MADE; 953 Make_Update(gn); 954 } 955 return NULL; 956 } else { 957 Job *job; /* new job descriptor */ 958 job = emalloc(sizeof(Job)); 959 if (job == NULL) 960 Punt("JobStart out of memory"); 961 962 job->node = gn; 963 964 /* 965 * Set the initial value of the flags for this job based on the 966 * global ones and the node's attributes... Any flags supplied 967 * by the caller are also added to the field. 968 */ 969 job->flags = flags; 970 971 return job; 972 } 973 } 974 975 /*- 976 *----------------------------------------------------------------------- 977 * JobStart -- 978 * Start a target-creation process going for the target described 979 * by the graph node gn. 980 * 981 * Side Effects: 982 * A new Job node is created and added to the list of running 983 * jobs. Make is forked and a child shell created. 984 *----------------------------------------------------------------------- 985 */ 986 static void 987 JobStart(GNode *gn, /* target to create */ 988 int flags) /* flags for the job to override normal ones. 989 * e.g. JOB_SPECIAL */ 990 { 991 Job *job; 992 job = prepare_job(gn, flags); 993 if (!job) 994 return; 995 if (nJobs >= maxJobs && !(job->flags & JOB_SPECIAL) && 996 maxJobs != 0) { 997 /* 998 * The job can only be run locally, but we've hit the limit of 999 * local concurrency, so put the job on hold until some other 1000 * job finishes. Note that the special jobs (.BEGIN, .INTERRUPT 1001 * and .END) may be run locally even when the local limit has 1002 * been reached (e.g. when maxJobs == 0), though they will be 1003 * exported if at all possible. In addition, any target marked 1004 * with .NOEXPORT will be run locally if maxJobs is 0. 1005 */ 1006 jobFull = true; 1007 1008 debug_printf("Can only run job locally.\n"); 1009 job->flags |= JOB_RESTART; 1010 Lst_AtEnd(&stoppedJobs, job); 1011 } else { 1012 if (nJobs >= maxJobs) { 1013 /* 1014 * If we're running this job locally as a special case 1015 * (see above), at least say the table is full. 1016 */ 1017 jobFull = true; 1018 debug_printf("Local job queue is full.\n"); 1019 } 1020 JobExec(job); 1021 } 1022 } 1023 1024 /* Helper functions for JobDoOutput */ 1025 1026 1027 /* output debugging banner and print characters from 0 to endpos */ 1028 static void 1029 print_partial_buffer(struct job_pipe *p, Job *job, FILE *out, size_t endPos) 1030 { 1031 size_t i; 1032 1033 banner(job, out); 1034 job->flags |= JOB_DIDOUTPUT; 1035 for (i = 0; i < endPos; i++) 1036 putc(p->buffer[i], out); 1037 } 1038 1039 /* print partial buffer and shift remaining contents */ 1040 static void 1041 print_partial_buffer_and_shift(struct job_pipe *p, Job *job, FILE *out, 1042 size_t endPos) 1043 { 1044 size_t i; 1045 1046 print_partial_buffer(p, job, out, endPos); 1047 1048 for (i = endPos; i < p->pos; i++) 1049 p->buffer[i-endPos] = p->buffer[i]; 1050 p->pos -= endPos; 1051 } 1052 1053 /* print complete lines, looking back to the limit position 1054 * (stuff before limit was already scanned). 1055 * returns true if something was printed. 1056 */ 1057 static bool 1058 print_complete_lines(struct job_pipe *p, Job *job, FILE *out, size_t limit) 1059 { 1060 size_t i; 1061 1062 for (i = p->pos; i > limit; i--) { 1063 if (p->buffer[i-1] == '\n') { 1064 print_partial_buffer_and_shift(p, job, out, i); 1065 return true; 1066 } 1067 } 1068 return false; 1069 } 1070 1071 /*- 1072 *----------------------------------------------------------------------- 1073 * handle_pipe -- 1074 * This functions is called whenever there is something to read on the 1075 * pipe. We collect more output from the given job and store it in the 1076 * job's outBuf. If this makes up lines, we print it tagged by the job's 1077 * identifier, as necessary. 1078 * 1079 * Side Effects: 1080 * curPos may be shifted as may the contents of outBuf. 1081 *----------------------------------------------------------------------- 1082 */ 1083 static void 1084 handle_pipe(struct job_pipe *p, 1085 Job *job, FILE *out, bool finish) 1086 { 1087 int nr; /* number of bytes read */ 1088 int oldpos; /* optimization */ 1089 1090 /* want to get everything ? -> we block */ 1091 if (finish) 1092 fcntl(p->fd, F_SETFL, 0); 1093 1094 do { 1095 nr = read(p->fd, &p->buffer[p->pos], 1096 JOB_BUFSIZE - p->pos); 1097 if (nr == -1) { 1098 if (errno == EAGAIN) 1099 break; 1100 if (DEBUG(JOB)) { 1101 perror("JobDoOutput(piperead)"); 1102 } 1103 } 1104 oldpos = p->pos; 1105 p->pos += nr; 1106 if (!print_complete_lines(p, job, out, oldpos)) 1107 if (p->pos == JOB_BUFSIZE) { 1108 print_partial_buffer(p, job, out, p->pos); 1109 p->pos = 0; 1110 } 1111 } while (nr != 0); 1112 1113 /* at end of file, we print whatever is left */ 1114 if (nr == 0) { 1115 print_partial_buffer(p, job, out, p->pos); 1116 if (p->pos > 0 && p->buffer[p->pos - 1] != '\n') 1117 putchar('\n'); 1118 p->pos = 0; 1119 } 1120 } 1121 1122 static void 1123 handle_job_output(Job *job, int i, bool finish) 1124 { 1125 handle_pipe(&job->in[i], job, i == 0 ? stdout : stderr, finish); 1126 } 1127 1128 /*- 1129 *----------------------------------------------------------------------- 1130 * Job_CatchChildren -- 1131 * Handle the exit of a child. Called by handle_running_jobs 1132 * 1133 * Side Effects: 1134 * The job descriptor is removed from the list of children. 1135 * 1136 * Notes: 1137 * We do waits, blocking or not, according to the wisdom of our 1138 * caller, until there are no more children to report. For each 1139 * job, call JobFinish to finish things off. This will take care of 1140 * putting jobs on the stoppedJobs queue. 1141 *----------------------------------------------------------------------- 1142 */ 1143 void 1144 Job_CatchChildren() 1145 { 1146 pid_t pid; /* pid of dead child */ 1147 Job *job; /* job descriptor for dead child */ 1148 LstNode jnode; /* list element for finding job */ 1149 int status; /* Exit/termination status */ 1150 1151 /* 1152 * Don't even bother if we know there's no one around. 1153 */ 1154 if (nJobs == 0) 1155 return; 1156 1157 while ((pid = waitpid((pid_t) -1, &status, WNOHANG|WUNTRACED)) > 0) { 1158 handle_all_signals(); 1159 debug_printf("Process %ld exited or stopped.\n", (long)pid); 1160 1161 jnode = Lst_Find(&runningJobs, JobCmpPid, &pid); 1162 1163 if (jnode == NULL) { 1164 if (WIFSIGNALED(status) && 1165 (WTERMSIG(status) == SIGCONT)) { 1166 jnode = Lst_Find(&stoppedJobs, JobCmpPid, &pid); 1167 if (jnode == NULL) { 1168 Error("Resumed child (%ld) not in table", (long)pid); 1169 continue; 1170 } 1171 job = (Job *)Lst_Datum(jnode); 1172 Lst_Remove(&stoppedJobs, jnode); 1173 } else { 1174 Error("Child (%ld) not in table?", (long)pid); 1175 continue; 1176 } 1177 } else { 1178 job = (Job *)Lst_Datum(jnode); 1179 Lst_Remove(&runningJobs, jnode); 1180 nJobs--; 1181 if (jobFull) 1182 debug_printf("Job queue is no longer full.\n"); 1183 jobFull = false; 1184 } 1185 1186 JobFinish(job, status); 1187 } 1188 } 1189 1190 void 1191 handle_all_jobs_output(void) 1192 { 1193 int nfds; 1194 struct timeval timeout; 1195 LstNode ln, ln2; 1196 Job *job; 1197 int i; 1198 int status; 1199 1200 /* no jobs */ 1201 if (Lst_IsEmpty(&runningJobs)) 1202 return; 1203 1204 (void)fflush(stdout); 1205 1206 memcpy(actual_mask, output_mask, mask_size); 1207 timeout.tv_sec = SEL_SEC; 1208 timeout.tv_usec = SEL_USEC; 1209 1210 nfds = select(largest_fd+1, actual_mask, NULL, NULL, &timeout); 1211 handle_all_signals(); 1212 for (ln = Lst_First(&runningJobs); nfds && ln != NULL; 1213 ln = ln2) { 1214 ln2 = Lst_Adv(ln); 1215 job = (Job *)Lst_Datum(ln); 1216 job->flags &= ~JOB_DIDOUTPUT; 1217 for (i = 1; i >= 0; i--) { 1218 if (FD_ISSET(job->in[i].fd, actual_mask)) { 1219 nfds--; 1220 handle_job_output(job, i, false); 1221 } 1222 } 1223 if (job->flags & JOB_DIDOUTPUT) { 1224 if (wait4(job->pid, &status, WNOHANG|WUNTRACED, NULL) == 1225 job->pid) { 1226 Lst_Remove(&runningJobs, ln); 1227 nJobs--; 1228 jobFull = false; 1229 JobFinish(job, status); 1230 } else { 1231 Lst_Requeue(&runningJobs, ln); 1232 } 1233 } 1234 } 1235 } 1236 1237 void 1238 handle_running_jobs() 1239 { 1240 handle_all_jobs_output(); 1241 Job_CatchChildren(); 1242 } 1243 1244 static void 1245 loop_handle_running_jobs() 1246 { 1247 while (nJobs) 1248 handle_running_jobs(); 1249 } 1250 /*- 1251 *----------------------------------------------------------------------- 1252 * Job_Make -- 1253 * Start the creation of a target. Basically a front-end for 1254 * JobStart used by the Make module. 1255 * 1256 * Side Effects: 1257 * Another job is started. 1258 *----------------------------------------------------------------------- 1259 */ 1260 void 1261 Job_Make(GNode *gn) 1262 { 1263 (void)JobStart(gn, 0); 1264 } 1265 1266 /*- 1267 *----------------------------------------------------------------------- 1268 * Job_Init -- 1269 * Initialize the process module 1270 * 1271 * Side Effects: 1272 * lists and counters are initialized 1273 *----------------------------------------------------------------------- 1274 */ 1275 void 1276 Job_Init(int maxproc) 1277 { 1278 Static_Lst_Init(&runningJobs); 1279 Static_Lst_Init(&stoppedJobs); 1280 Static_Lst_Init(&errorsList); 1281 maxJobs = maxproc; 1282 nJobs = 0; 1283 jobFull = false; 1284 errors = 0; 1285 1286 aborting = 0; 1287 1288 lastNode = NULL; 1289 1290 if ((begin_node->type & OP_DUMMY) == 0) { 1291 JobStart(begin_node, JOB_SPECIAL); 1292 loop_handle_running_jobs(); 1293 } 1294 } 1295 1296 /*- 1297 *----------------------------------------------------------------------- 1298 * Job_Full -- 1299 * See if the job table is full. It is considered full if it is OR 1300 * if we are in the process of aborting OR if we have 1301 * reached/exceeded our local quota. This prevents any more jobs 1302 * from starting up. 1303 * 1304 * Results: 1305 * true if the job table is full, false otherwise 1306 *----------------------------------------------------------------------- 1307 */ 1308 bool 1309 Job_Full(void) 1310 { 1311 return aborting || jobFull; 1312 } 1313 1314 /*- 1315 *----------------------------------------------------------------------- 1316 * Job_Empty -- 1317 * See if the job table is empty. Because the local concurrency may 1318 * be set to 0, it is possible for the job table to become empty, 1319 * while the list of stoppedJobs remains non-empty. In such a case, 1320 * we want to restart as many jobs as we can. 1321 * 1322 * Results: 1323 * true if it is. false if it ain't. 1324 * ----------------------------------------------------------------------- 1325 */ 1326 bool 1327 Job_Empty(void) 1328 { 1329 if (nJobs == 0) { 1330 if (!Lst_IsEmpty(&stoppedJobs) && !aborting) { 1331 /* 1332 * The job table is obviously not full if it has no 1333 * jobs in it...Try and restart the stopped jobs. 1334 */ 1335 jobFull = false; 1336 JobRestartJobs(); 1337 return false; 1338 } else { 1339 return true; 1340 } 1341 } else { 1342 return false; 1343 } 1344 } 1345 1346 /*- 1347 *----------------------------------------------------------------------- 1348 * JobInterrupt -- 1349 * Handle the receipt of an interrupt. 1350 * 1351 * Side Effects: 1352 * All children are killed. Another job will be started if the 1353 * .INTERRUPT target was given. 1354 *----------------------------------------------------------------------- 1355 */ 1356 static void 1357 JobInterrupt(int runINTERRUPT, /* Non-zero if commands for the .INTERRUPT 1358 * target should be executed */ 1359 int signo) /* signal received */ 1360 { 1361 LstNode ln; /* element in job table */ 1362 Job *job; /* job descriptor in that element */ 1363 1364 aborting = ABORT_INTERRUPT; 1365 1366 for (ln = Lst_First(&runningJobs); ln != NULL; ln = Lst_Adv(ln)) { 1367 job = (Job *)Lst_Datum(ln); 1368 1369 if (!Targ_Precious(job->node)) { 1370 const char *file = job->node->path == NULL ? 1371 job->node->name : job->node->path; 1372 if (!noExecute && eunlink(file) != -1) { 1373 Error("*** %s removed", file); 1374 } 1375 } 1376 if (job->pid) { 1377 debug_printf("JobInterrupt passing signal to " 1378 "child %ld.\n", (long)job->pid); 1379 KILL(job->pid, signo); 1380 } 1381 } 1382 1383 if (runINTERRUPT && !touchFlag) { 1384 if ((interrupt_node->type & OP_DUMMY) == 0) { 1385 ignoreErrors = false; 1386 1387 JobStart(interrupt_node, 0); 1388 loop_handle_running_jobs(); 1389 } 1390 } 1391 exit(signo); 1392 } 1393 1394 /* 1395 *----------------------------------------------------------------------- 1396 * Job_Finish -- 1397 * Do final processing such as the running of the commands 1398 * attached to the .END target. 1399 * 1400 * Results: 1401 * Number of errors reported. 1402 * 1403 *----------------------------------------------------------------------- 1404 */ 1405 int 1406 Job_Finish(void) 1407 { 1408 if (end_node != NULL && !Lst_IsEmpty(&end_node->commands)) { 1409 if (errors) { 1410 Error("Errors reported so .END ignored"); 1411 } else { 1412 JobStart(end_node, JOB_SPECIAL); 1413 loop_handle_running_jobs(); 1414 } 1415 } 1416 return errors; 1417 } 1418 1419 #ifdef CLEANUP 1420 void 1421 Job_End(void) 1422 { 1423 } 1424 #endif 1425 1426 /*- 1427 *----------------------------------------------------------------------- 1428 * Job_Wait -- 1429 * Waits for all running jobs to finish and returns. Sets 'aborting' 1430 * to ABORT_WAIT to prevent other jobs from starting. 1431 * 1432 * Side Effects: 1433 * Currently running jobs finish. 1434 * 1435 *----------------------------------------------------------------------- 1436 */ 1437 void 1438 Job_Wait(void) 1439 { 1440 aborting = ABORT_WAIT; 1441 loop_handle_running_jobs(); 1442 aborting = 0; 1443 } 1444 1445 /*- 1446 *----------------------------------------------------------------------- 1447 * Job_AbortAll -- 1448 * Abort all currently running jobs without handling output or anything. 1449 * This function is to be called only in the event of a major 1450 * error. Most definitely NOT to be called from JobInterrupt. 1451 * 1452 * Side Effects: 1453 * All children are killed, not just the firstborn 1454 *----------------------------------------------------------------------- 1455 */ 1456 void 1457 Job_AbortAll(void) 1458 { 1459 LstNode ln; /* element in job table */ 1460 Job *job; /* the job descriptor in that element */ 1461 int foo; 1462 1463 aborting = ABORT_ERROR; 1464 1465 if (nJobs) { 1466 for (ln = Lst_First(&runningJobs); ln != NULL; 1467 ln = Lst_Adv(ln)) { 1468 job = (Job *)Lst_Datum(ln); 1469 1470 /* 1471 * kill the child process with increasingly drastic 1472 * signals to make darn sure it's dead. 1473 */ 1474 KILL(job->pid, SIGINT); 1475 KILL(job->pid, SIGKILL); 1476 } 1477 } 1478 1479 /* 1480 * Catch as many children as want to report in at first, then give up 1481 */ 1482 while (waitpid(-1, &foo, WNOHANG) > 0) 1483 continue; 1484 } 1485 1486 /*- 1487 *----------------------------------------------------------------------- 1488 * JobRestartJobs -- 1489 * Tries to restart stopped jobs if there are slots available. 1490 * Note that this tries to restart them regardless of pending errors. 1491 * It's not good to leave stopped jobs lying around! 1492 * 1493 * Side Effects: 1494 * Resumes(and possibly migrates) jobs. 1495 *----------------------------------------------------------------------- 1496 */ 1497 static void 1498 JobRestartJobs(void) 1499 { 1500 Job *job; 1501 1502 while (!Job_Full() && 1503 (job = (Job *)Lst_DeQueue(&stoppedJobs)) != NULL) { 1504 debug_printf("Job queue is not full. " 1505 "Restarting a stopped job.\n"); 1506 JobRestart(job); 1507 } 1508 } 1509