1 /* $OpenPackages$ */ 2 /* $OpenBSD: job.c,v 1.116 2009/04/26 09:25:49 espie Exp $ */ 3 /* $NetBSD: job.c,v 1.16 1996/11/06 17:59:08 christos Exp $ */ 4 5 /* 6 * Copyright (c) 1988, 1989, 1990 The Regents of the University of California. 7 * Copyright (c) 1988, 1989 by Adam de Boor 8 * Copyright (c) 1989 by Berkeley Softworks 9 * All rights reserved. 10 * 11 * This code is derived from software contributed to Berkeley by 12 * Adam de Boor. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 /*- 40 * job.c -- 41 * handle the creation etc. of our child processes. 42 * 43 * Interface: 44 * Job_Make Start the creation of the given target. 45 * 46 * Job_Init Called to initialize this module. in addition, 47 * any commands attached to the .BEGIN target 48 * are executed before this function returns. 49 * Hence, the makefile must have been parsed 50 * before this function is called. 51 * 52 * Job_End Cleanup any memory used. 53 * 54 * Job_Full Return true if the job table is filled. 55 * 56 * Job_Empty Return true if the job table is completely 57 * empty. 58 * 59 * Job_Finish Perform any final processing which needs doing. 60 * This includes the execution of any commands 61 * which have been/were attached to the .END 62 * target. It should only be called when the 63 * job table is empty. 64 * 65 * Job_AbortAll Abort all currently running jobs. It doesn't 66 * handle output or do anything for the jobs, 67 * just kills them. It should only be called in 68 * an emergency, as it were. 69 * 70 * Job_Wait Wait for all currently-running jobs to finish. 71 */ 72 73 #include <sys/types.h> 74 #include <sys/wait.h> 75 #include <ctype.h> 76 #include <errno.h> 77 #include <fcntl.h> 78 #include <signal.h> 79 #include <stdarg.h> 80 #include <stdio.h> 81 #include <stdlib.h> 82 #include <string.h> 83 #include <unistd.h> 84 #include "config.h" 85 #include "defines.h" 86 #include "job.h" 87 #include "engine.h" 88 #include "pathnames.h" 89 #include "var.h" 90 #include "targ.h" 91 #include "error.h" 92 #include "lst.h" 93 #include "extern.h" 94 #include "gnode.h" 95 #include "memory.h" 96 #include "make.h" 97 98 /* 99 * The SEL_ constants determine the maximum amount of time spent in select 100 * before coming out to see if a child has finished. SEL_SEC is the number of 101 * seconds and SEL_USEC is the number of micro-seconds 102 */ 103 #define SEL_SEC 0 104 #define SEL_USEC 500000 105 106 107 /*- 108 * Job Table definitions. 109 * 110 * Each job has several things associated with it: 111 * 1) The process id of the child shell 112 * 2) The graph node describing the target being made by this job 113 * 3) An FILE* for writing out the commands. This is only 114 * used before the job is actually started. 115 * 4) Things used for handling the shell's output. 116 * the output is being caught via a pipe and 117 * the descriptors of our pipe, an array in which output is line 118 * buffered and the current position in that buffer are all 119 * maintained for each job. 120 * 5) A word of flags which determine how the module handles errors, 121 * echoing, etc. for the job 122 * 123 * The job "table" is kept as a linked Lst in 'jobs', with the number of 124 * active jobs maintained in the 'nJobs' variable. At no time will this 125 * exceed the value of 'maxJobs', initialized by the Job_Init function. 126 * 127 * When a job is finished, the Make_Update function is called on each of the 128 * parents of the node which was just remade. This takes care of the upward 129 * traversal of the dependency graph. 130 */ 131 #define JOB_BUFSIZE 1024 132 struct job_pipe { 133 int fd; 134 char buffer[JOB_BUFSIZE]; 135 size_t pos; 136 }; 137 138 typedef struct Job_ { 139 pid_t pid; /* The child's process ID */ 140 GNode *node; /* The target the child is making */ 141 short flags; /* Flags to control treatment of job */ 142 #define JOB_SPECIAL 0x004 /* Target is a special one. */ 143 #define JOB_RESTART 0x080 /* Job needs to be completely restarted */ 144 #define JOB_RESUME 0x100 /* Job needs to be resumed b/c it stopped, 145 * for some reason */ 146 #define JOB_CONTINUING 0x200 /* We are in the process of resuming this job. 147 * Used to avoid infinite recursion between 148 * JobFinish and JobRestart */ 149 #define JOB_DIDOUTPUT 0x001 150 struct job_pipe in[2]; 151 } Job; 152 153 154 static int aborting = 0; /* why is the make aborting? */ 155 #define ABORT_ERROR 1 /* Because of an error */ 156 #define ABORT_INTERRUPT 2 /* Because it was interrupted */ 157 #define ABORT_WAIT 3 /* Waiting for jobs to finish */ 158 159 static int maxJobs; /* The most children we can run at once */ 160 static int nJobs; /* The number of children currently running */ 161 static LIST runningJobs; /* The structures that describe them */ 162 static bool jobFull; /* Flag to tell when the job table is full. It 163 * is set true when nJobs equals maxJobs */ 164 static GNode *lastNode; /* The node for which output was most recently 165 * produced. */ 166 167 /* data structure linked to job handling through select */ 168 static fd_set *output_mask = NULL; /* File descriptors to look for */ 169 170 static fd_set *actual_mask = NULL; /* actual select argument */ 171 static int largest_fd = -1; 172 static size_t mask_size = 0; 173 174 static LIST stoppedJobs; 175 176 /* wait possibilities */ 177 #define JOB_EXITED 0 178 #define JOB_SIGNALED 1 179 #define JOB_CONTINUED 2 180 #define JOB_STOPPED 3 181 #define JOB_UNKNOWN 4 182 183 static LIST errorsList; 184 static int errors; 185 struct error_info { 186 int reason; 187 int code; 188 GNode *n; 189 }; 190 191 192 #if defined(USE_PGRP) && defined(SYSV) 193 # define KILL(pid, sig) killpg(-(pid), (sig)) 194 #else 195 # if defined(USE_PGRP) 196 # define KILL(pid, sig) killpg((pid), (sig)) 197 # else 198 # define KILL(pid, sig) kill((pid), (sig)) 199 # endif 200 #endif 201 202 static void signal_running_jobs(int); 203 static void handle_all_signals(void); 204 static void handle_signal(int); 205 static int JobCmpPid(void *, void *); 206 static void JobFinish(Job *, int); 207 static void finish_job(Job *, int, int); 208 static void JobExec(Job *); 209 static void JobRestart(Job *); 210 static void JobStart(GNode *, int); 211 static void JobInterrupt(int, int); 212 static void JobRestartJobs(void); 213 static void debug_printf(const char *, ...); 214 static Job *prepare_job(GNode *, int); 215 static void start_queued_job(Job *); 216 static void banner(Job *, FILE *); 217 218 /*** 219 *** Input/output from jobs 220 ***/ 221 222 /* prepare_pipe(jp, &fd): 223 * set up pipe data structure (buffer and pos) corresponding to 224 * pointed fd, and prepare to watch for it. 225 */ 226 static void prepare_pipe(struct job_pipe *, int *); 227 228 /* close_job_pipes(j): 229 * handle final output from job, and close pipes properly 230 */ 231 static void close_job_pipes(Job *); 232 233 234 static void handle_all_jobs_output(void); 235 236 /* handle_job_output(job, n, finish): 237 * n = 0 or 1 (stdout/stderr), set finish to retrieve everything. 238 */ 239 static void handle_job_output(Job *, int, bool); 240 241 static void print_partial_buffer(struct job_pipe *, Job *, FILE *, size_t); 242 static void print_partial_buffer_and_shift(struct job_pipe *, Job *, FILE *, 243 size_t); 244 static bool print_complete_lines(struct job_pipe *, Job *, FILE *, size_t); 245 246 247 static void register_error(int, int, Job *); 248 static void loop_handle_running_jobs(void); 249 static void Job_CatchChildren(void); 250 251 static void 252 register_error(int reason, int code, Job *job) 253 { 254 struct error_info *p; 255 256 errors++; 257 p = emalloc(sizeof(struct error_info)); 258 p->reason = reason; 259 p->code = code; 260 p->n = job->node; 261 Lst_AtEnd(&errorsList, p); 262 } 263 264 void 265 print_errors() 266 { 267 LstNode ln; 268 struct error_info *p; 269 const char *type; 270 271 for (ln = Lst_First(&errorsList); ln != NULL; ln = Lst_Adv(ln)) { 272 p = (struct error_info *)Lst_Datum(ln); 273 switch(p->reason) { 274 case JOB_EXITED: 275 type = "Exit status"; 276 break; 277 case JOB_SIGNALED: 278 type = "Received signal"; 279 break; 280 case JOB_STOPPED: 281 type = "Stopped"; 282 break; 283 case JOB_CONTINUED: 284 type = "Continued"; 285 break; 286 default: 287 type = "Should not happen"; 288 break; 289 } 290 if (p->n->lineno) 291 Error(" %s %d (%s, line %lu of %s)", 292 type, p->code, p->n->name, p->n->lineno, p->n->fname); 293 else 294 Error(" %s %d (%s)", type, p->code, p->n->name); 295 } 296 } 297 298 static void 299 banner(Job *job, FILE *out) 300 { 301 if (job->node != lastNode) { 302 if (DEBUG(JOBBANNER)) 303 (void)fprintf(out, "--- %s ---\n", job->node->name); 304 lastNode = job->node; 305 } 306 } 307 308 static void 309 handle_all_signals() 310 { 311 if (got_signal) 312 got_signal = 0; 313 else 314 return; 315 316 if (got_SIGINT) { 317 got_SIGINT=0; 318 handle_signal(SIGINT); 319 } 320 if (got_SIGHUP) { 321 got_SIGHUP=0; 322 handle_signal(SIGHUP); 323 } 324 if (got_SIGQUIT) { 325 got_SIGQUIT=0; 326 handle_signal(SIGQUIT); 327 } 328 if (got_SIGTERM) { 329 got_SIGTERM=0; 330 handle_signal(SIGTERM); 331 } 332 if (got_SIGTSTP) { 333 got_SIGTSTP=0; 334 handle_signal(SIGTSTP); 335 } 336 if (got_SIGTTOU) { 337 got_SIGTTOU=0; 338 handle_signal(SIGTTOU); 339 } 340 if (got_SIGTTIN) { 341 got_SIGTTIN=0; 342 handle_signal(SIGTTIN); 343 } 344 if (got_SIGWINCH) { 345 got_SIGWINCH=0; 346 handle_signal(SIGWINCH); 347 } 348 } 349 350 static void 351 signal_running_jobs(int signo) 352 { 353 LstNode ln; 354 for (ln = Lst_First(&runningJobs); ln != NULL; ln = Lst_Adv(ln)) { 355 Job *job = Lst_Datum(ln); 356 if (DEBUG(JOB)) { 357 (void)fprintf(stdout, 358 "signal %d to child %ld.\n", 359 signo, (long)job->pid); 360 (void)fflush(stdout); 361 } 362 KILL(job->pid, signo); 363 } 364 } 365 366 /*- 367 *----------------------------------------------------------------------- 368 * handle_signal -- 369 * Pass a signal to all local jobs if USE_PGRP is defined, 370 * then die ourselves. 371 * 372 * Side Effects: 373 * We die by the same signal. 374 *----------------------------------------------------------------------- 375 */ 376 static void 377 handle_signal(int signo) /* The signal number we've received */ 378 { 379 sigset_t nmask, omask; 380 struct sigaction act; 381 382 if (DEBUG(JOB)) { 383 (void)fprintf(stdout, "handle_signal(%d) called.\n", signo); 384 (void)fflush(stdout); 385 } 386 signal_running_jobs(signo); 387 388 /* 389 * Deal with proper cleanup based on the signal received. We only run 390 * the .INTERRUPT target if the signal was in fact an interrupt. The 391 * other three termination signals are more of a "get out *now*" 392 * command. 393 */ 394 if (signo == SIGINT) { 395 JobInterrupt(true, signo); 396 } else if (signo == SIGHUP || signo == SIGTERM || signo == SIGQUIT) { 397 JobInterrupt(false, signo); 398 } 399 400 /* 401 * Leave gracefully if SIGQUIT, rather than core dumping. 402 */ 403 if (signo == SIGQUIT) { 404 Finish(0); 405 } 406 407 /* 408 * Send ourselves the signal now we've given the message to everyone 409 * else. Note we block everything else possible while we're getting 410 * the signal. This ensures that all our jobs get continued when we 411 * wake up before we take any other signal. 412 */ 413 sigemptyset(&nmask); 414 sigaddset(&nmask, signo); 415 sigprocmask(SIG_SETMASK, &nmask, &omask); 416 memset(&act, 0, sizeof act); 417 act.sa_handler = SIG_DFL; 418 sigemptyset(&act.sa_mask); 419 act.sa_flags = 0; 420 sigaction(signo, &act, NULL); 421 422 if (DEBUG(JOB)) { 423 (void)fprintf(stdout, 424 "handle_signal passing signal to self, mask = %x.\n", 425 ~0 & ~(1 << (signo-1))); 426 (void)fflush(stdout); 427 } 428 (void)signal(signo, SIG_DFL); 429 430 (void)KILL(getpid(), signo); 431 432 signal_running_jobs(SIGCONT); 433 434 (void)sigprocmask(SIG_SETMASK, &omask, NULL); 435 sigprocmask(SIG_SETMASK, &omask, NULL); 436 act.sa_handler = SigHandler; 437 sigaction(signo, &act, NULL); 438 } 439 440 /*- 441 *----------------------------------------------------------------------- 442 * JobCmpPid -- 443 * Compare the pid of the job with the given pid and return 0 if they 444 * are equal. This function is called from Job_CatchChildren via 445 * Lst_Find to find the job descriptor of the finished job. 446 * 447 * Results: 448 * 0 if the pid's match 449 *----------------------------------------------------------------------- 450 */ 451 static int 452 JobCmpPid(void *job, /* job to examine */ 453 void *pid) /* process id desired */ 454 { 455 return *(pid_t *)pid - ((Job *)job)->pid; 456 } 457 458 static void 459 debug_printf(const char *fmt, ...) 460 { 461 if (DEBUG(JOB)) { 462 va_list va; 463 464 va_start(va, fmt); 465 (void)vfprintf(stdout, fmt, va); 466 fflush(stdout); 467 va_end(va); 468 } 469 } 470 471 static void 472 close_job_pipes(Job *job) 473 { 474 int i; 475 476 for (i = 1; i >= 0; i--) { 477 FD_CLR(job->in[i].fd, output_mask); 478 handle_job_output(job, i, true); 479 (void)close(job->in[i].fd); 480 } 481 } 482 483 /*- 484 *----------------------------------------------------------------------- 485 * JobFinish -- 486 * Do final processing for the given job including updating 487 * parents and starting new jobs as available/necessary. 488 * 489 * Side Effects: 490 * Some nodes may be put on the toBeMade queue. 491 * Final commands for the job are placed on end_node. 492 * 493 * If we got an error and are aborting (aborting == ABORT_ERROR) and 494 * the job list is now empty, we are done for the day. 495 * If we recognized an error we set the aborting flag 496 * to ABORT_ERROR so no more jobs will be started. 497 *----------------------------------------------------------------------- 498 */ 499 /*ARGSUSED*/ 500 501 static void 502 JobFinish(Job *job, int status) 503 { 504 int reason, code; 505 /* parse status */ 506 if (WIFEXITED(status)) { 507 reason = JOB_EXITED; 508 code = WEXITSTATUS(status); 509 } else if (WIFSIGNALED(status)) { 510 reason = JOB_SIGNALED; 511 code = WTERMSIG(status); 512 } else if (WIFCONTINUED(status)) { 513 reason = JOB_CONTINUED; 514 code = 0; 515 } else if (WIFSTOPPED(status)) { 516 reason = JOB_STOPPED; 517 code = WSTOPSIG(status); 518 } else { 519 /* can't happen, set things to be bad. */ 520 reason = UNKNOWN; 521 code = status; 522 } 523 finish_job(job, reason, code); 524 } 525 526 527 static void 528 finish_job(Job *job, int reason, int code) 529 { 530 bool done; 531 532 if ((reason == JOB_EXITED && 533 code != 0 && !(job->node->type & OP_IGNORE)) || 534 (reason == JOB_SIGNALED && code != SIGCONT)) { 535 /* 536 * If it exited non-zero and either we're doing things our 537 * way or we're not ignoring errors, the job is finished. 538 * Similarly, if the shell died because of a signal 539 * the job is also finished. In these 540 * cases, finish out the job's output before printing the exit 541 * status... 542 */ 543 close_job_pipes(job); 544 done = true; 545 } else if (reason == JOB_EXITED) { 546 /* 547 * Deal with ignored errors in -B mode. We need to print a 548 * message telling of the ignored error as well as setting 549 * status.w_status to 0 so the next command gets run. To do 550 * this, we set done to be true if in -B mode and the job 551 * exited non-zero. 552 */ 553 done = code != 0; 554 close_job_pipes(job); 555 } else { 556 /* 557 * No need to close things down or anything. 558 */ 559 done = false; 560 } 561 562 if (reason == JOB_STOPPED) { 563 debug_printf("Process %ld stopped.\n", (long)job->pid); 564 banner(job, stdout); 565 (void)fprintf(stdout, "*** Stopped -- signal %d\n", 566 code); 567 job->flags |= JOB_RESUME; 568 Lst_AtEnd(&stoppedJobs, job); 569 (void)fflush(stdout); 570 return; 571 } 572 if (reason == JOB_SIGNALED && code == SIGCONT) { 573 /* 574 * If the beastie has continued, shift the Job from the 575 * stopped list to the running one (or re-stop it if 576 * concurrency is exceeded) and go and get another 577 * child. 578 */ 579 if (job->flags & (JOB_RESUME|JOB_RESTART)) { 580 banner(job, stdout); 581 (void)fprintf(stdout, "*** Continued\n"); 582 } 583 if (!(job->flags & JOB_CONTINUING)) { 584 debug_printf( 585 "Warning: " 586 "process %ld was not continuing.\n", 587 (long)job->pid); 588 } 589 job->flags &= ~JOB_CONTINUING; 590 Lst_AtEnd(&runningJobs, job); 591 nJobs++; 592 debug_printf("Process %ld is continuing locally.\n", 593 (long)job->pid); 594 if (nJobs == maxJobs) { 595 jobFull = true; 596 debug_printf("Job queue is full.\n"); 597 } 598 (void)fflush(stdout); 599 return; 600 } 601 602 if (done || DEBUG(JOB)) { 603 if (reason == JOB_EXITED) { 604 debug_printf("Process %ld exited.\n", (long)job->pid); 605 if (code != 0) { 606 banner(job, stdout); 607 (void)fprintf(stdout, "*** Error code %d %s\n", 608 code, 609 (job->node->type & OP_IGNORE) ? 610 "(ignored)" : ""); 611 612 if (job->node->type & OP_IGNORE) { 613 reason = JOB_EXITED; 614 code = 0; 615 } 616 } else if (DEBUG(JOB)) { 617 banner(job, stdout); 618 (void)fprintf(stdout, 619 "*** Completed successfully\n"); 620 } 621 } else { 622 banner(job, stdout); 623 (void)fprintf(stdout, "*** Signal %d\n", code); 624 } 625 626 (void)fflush(stdout); 627 } 628 629 done = true; 630 631 if (done && 632 aborting != ABORT_ERROR && 633 aborting != ABORT_INTERRUPT && 634 reason == JOB_EXITED && code == 0) { 635 /* As long as we aren't aborting and the job didn't return a 636 * non-zero status that we shouldn't ignore, we call 637 * Make_Update to update the parents. */ 638 job->node->built_status = MADE; 639 Make_Update(job->node); 640 free(job); 641 } else if (!(reason == JOB_EXITED && code == 0)) { 642 register_error(reason, code, job); 643 free(job); 644 } 645 646 /* 647 * Set aborting if any error. 648 */ 649 if (errors && !keepgoing && 650 aborting != ABORT_INTERRUPT) { 651 /* 652 * If we found any errors in this batch of children and the -k 653 * flag wasn't given, we set the aborting flag so no more jobs 654 * get started. 655 */ 656 aborting = ABORT_ERROR; 657 } 658 659 if (aborting != ABORT_ERROR) 660 JobRestartJobs(); 661 662 if (aborting == ABORT_ERROR && Job_Empty()) { 663 /* 664 * If we are aborting and the job table is now empty, we finish. 665 */ 666 Finish(errors); 667 } 668 } 669 670 static void 671 prepare_pipe(struct job_pipe *p, int *fd) 672 { 673 p->pos = 0; 674 (void)fcntl(fd[0], F_SETFD, FD_CLOEXEC); 675 p->fd = fd[0]; 676 close(fd[1]); 677 678 if (output_mask == NULL || p->fd > largest_fd) { 679 int fdn, ofdn; 680 681 fdn = howmany(p->fd+1, NFDBITS); 682 ofdn = howmany(largest_fd+1, NFDBITS); 683 684 if (fdn != ofdn) { 685 output_mask = emult_realloc(output_mask, fdn, 686 sizeof(fd_mask)); 687 memset(((char *)output_mask) + ofdn * sizeof(fd_mask), 688 0, (fdn-ofdn) * sizeof(fd_mask)); 689 actual_mask = emult_realloc(actual_mask, fdn, 690 sizeof(fd_mask)); 691 mask_size = fdn * sizeof(fd_mask); 692 } 693 largest_fd = p->fd; 694 } 695 fcntl(p->fd, F_SETFL, O_NONBLOCK); 696 FD_SET(p->fd, output_mask); 697 } 698 699 /*- 700 *----------------------------------------------------------------------- 701 * JobExec -- 702 * Execute the shell for the given job. Called from JobStart and 703 * JobRestart. 704 * 705 * Side Effects: 706 * A shell is executed, outputs is altered and the Job structure added 707 * to the job table. 708 *----------------------------------------------------------------------- 709 */ 710 static void 711 JobExec(Job *job) 712 { 713 pid_t cpid; /* ID of new child */ 714 int fds[4]; 715 int *fdout = fds; 716 int *fderr = fds+2; 717 int i; 718 719 if (DEBUG(JOB)) { 720 LstNode ln; 721 722 (void)fprintf(stdout, "Running %s\n", job->node->name); 723 for (ln = Lst_First(&job->node->commands); ln != NULL ; 724 ln = Lst_Adv(ln)) 725 fprintf(stdout, "\t%s\n", (char *)Lst_Datum(ln)); 726 (void)fflush(stdout); 727 } 728 729 /* 730 * Some jobs produce no output and it's disconcerting to have 731 * no feedback of their running (since they produce no output, the 732 * banner with their name in it never appears). This is an attempt to 733 * provide that feedback, even if nothing follows it. 734 */ 735 banner(job, stdout); 736 737 setup_engine(); 738 739 /* Create the pipe by which we'll get the shell's output. 740 */ 741 if (pipe(fdout) == -1) 742 Punt("Cannot create pipe: %s", strerror(errno)); 743 744 if (pipe(fderr) == -1) 745 Punt("Cannot create pipe: %s", strerror(errno)); 746 747 if ((cpid = fork()) == -1) { 748 Punt("Cannot fork"); 749 } else if (cpid == 0) { 750 supervise_jobs = false; 751 /* standard pipe code to route stdout and stderr */ 752 close(fdout[0]); 753 if (dup2(fdout[1], 1) == -1) 754 Punt("Cannot dup2(outPipe): %s", strerror(errno)); 755 if (fdout[1] != 1) 756 close(fdout[1]); 757 close(fderr[0]); 758 if (dup2(fderr[1], 2) == -1) 759 Punt("Cannot dup2(errPipe): %s", strerror(errno)); 760 if (fderr[1] != 2) 761 close(fderr[1]); 762 763 #ifdef USE_PGRP 764 /* 765 * We want to switch the child into a different process family 766 * so we can kill it and all its descendants in one fell swoop, 767 * by killing its process family, but not commit suicide. 768 */ 769 # if defined(SYSV) 770 (void)setsid(); 771 # else 772 (void)setpgid(0, getpid()); 773 # endif 774 #endif /* USE_PGRP */ 775 776 if (random_delay) 777 if (!(nJobs == 1 && no_jobs_left())) 778 usleep(random() % random_delay); 779 780 /* this exits directly */ 781 run_gnode_parallel(job->node); 782 /*NOTREACHED*/ 783 } else { 784 supervise_jobs = true; 785 job->pid = cpid; 786 787 /* we set the current position in the buffers to the beginning 788 * and mark another stream to watch in the outputs mask 789 */ 790 for (i = 0; i < 2; i++) 791 prepare_pipe(&job->in[i], fds+2*i); 792 } 793 794 /* 795 * Now the job is actually running, add it to the table. 796 */ 797 nJobs++; 798 Lst_AtEnd(&runningJobs, job); 799 if (nJobs == maxJobs) { 800 jobFull = true; 801 } 802 } 803 804 static void 805 start_queued_job(Job *job) 806 { 807 if (DEBUG(JOB)) { 808 (void)fprintf(stdout, "Restarting %s...", 809 job->node->name); 810 (void)fflush(stdout); 811 } 812 if (nJobs >= maxJobs && !(job->flags & JOB_SPECIAL)) { 813 /* 814 * Can't be exported and not allowed to run locally -- 815 * put it back on the hold queue and mark the table 816 * full 817 */ 818 debug_printf("holding\n"); 819 Lst_AtFront(&stoppedJobs, job); 820 jobFull = true; 821 debug_printf("Job queue is full.\n"); 822 return; 823 } else { 824 /* 825 * Job may be run locally. 826 */ 827 debug_printf("running locally\n"); 828 } 829 JobExec(job); 830 } 831 832 /*- 833 *----------------------------------------------------------------------- 834 * JobRestart -- 835 * Restart a job that stopped for some reason. 836 * 837 * Side Effects: 838 * jobFull will be set if the job couldn't be run. 839 *----------------------------------------------------------------------- 840 */ 841 static void 842 JobRestart(Job *job) 843 { 844 if (job->flags & JOB_RESTART) { 845 start_queued_job(job); 846 } else { 847 /* 848 * The job has stopped and needs to be restarted. Why it 849 * stopped, we don't know... 850 */ 851 debug_printf("Resuming %s...", job->node->name); 852 if ((nJobs < maxJobs || ((job->flags & JOB_SPECIAL) && 853 maxJobs == 0)) && nJobs != maxJobs) { 854 /* 855 * If we haven't reached the concurrency limit already 856 * (or maxJobs is 0), it's ok to resume the job. 857 */ 858 bool error; 859 860 error = KILL(job->pid, SIGCONT) != 0; 861 862 if (!error) { 863 /* 864 * Make sure the user knows we've continued the 865 * beast and actually put the thing in the job 866 * table. 867 */ 868 job->flags |= JOB_CONTINUING; 869 finish_job(job, JOB_SIGNALED, SIGCONT); 870 871 job->flags &= ~(JOB_RESUME|JOB_CONTINUING); 872 debug_printf("done\n"); 873 } else { 874 Error("couldn't resume %s: %s", 875 job->node->name, strerror(errno)); 876 finish_job(job, JOB_EXITED, 1); 877 } 878 } else { 879 /* 880 * Job cannot be restarted. Mark the table as full and 881 * place the job back on the list of stopped jobs. 882 */ 883 debug_printf("table full\n"); 884 Lst_AtFront(&stoppedJobs, job); 885 jobFull = true; 886 debug_printf("Job queue is full.\n"); 887 } 888 } 889 } 890 891 static Job * 892 prepare_job(GNode *gn, int flags) 893 { 894 bool cmdsOK; /* true if the nodes commands were all right */ 895 bool noExec; /* Set true if we decide not to run the job */ 896 897 /* 898 * Check the commands now so any attributes from .DEFAULT have a chance 899 * to migrate to the node 900 */ 901 cmdsOK = Job_CheckCommands(gn); 902 expand_commands(gn); 903 904 if ((gn->type & OP_MAKE) || (!noExecute && !touchFlag)) { 905 /* 906 * We're serious here, but if the commands were bogus, we're 907 * also dead... 908 */ 909 if (!cmdsOK) 910 job_failure(gn, Punt); 911 912 if (Lst_IsEmpty(&gn->commands)) 913 noExec = true; 914 else 915 noExec = false; 916 917 } else if (noExecute) { 918 if (!cmdsOK || Lst_IsEmpty(&gn->commands)) 919 noExec = true; 920 else 921 noExec = false; 922 } else { 923 /* 924 * Just touch the target and note that no shell should be 925 * executed. Check 926 * the commands, too, but don't die if they're no good -- it 927 * does no harm to keep working up the graph. 928 */ 929 Job_Touch(gn); 930 noExec = true; 931 } 932 933 /* 934 * If we're not supposed to execute a shell, don't. 935 */ 936 if (noExec) { 937 /* 938 * We only want to work our way up the graph if we aren't here 939 * because the commands for the job were no good. 940 */ 941 if (cmdsOK && !aborting) { 942 gn->built_status = MADE; 943 Make_Update(gn); 944 } 945 return NULL; 946 } else { 947 Job *job; /* new job descriptor */ 948 job = emalloc(sizeof(Job)); 949 if (job == NULL) 950 Punt("JobStart out of memory"); 951 952 job->node = gn; 953 954 /* 955 * Set the initial value of the flags for this job based on the 956 * global ones and the node's attributes... Any flags supplied 957 * by the caller are also added to the field. 958 */ 959 job->flags = flags; 960 961 return job; 962 } 963 } 964 965 /*- 966 *----------------------------------------------------------------------- 967 * JobStart -- 968 * Start a target-creation process going for the target described 969 * by the graph node gn. 970 * 971 * Side Effects: 972 * A new Job node is created and added to the list of running 973 * jobs. Make is forked and a child shell created. 974 *----------------------------------------------------------------------- 975 */ 976 static void 977 JobStart(GNode *gn, /* target to create */ 978 int flags) /* flags for the job to override normal ones. 979 * e.g. JOB_SPECIAL */ 980 { 981 Job *job; 982 job = prepare_job(gn, flags); 983 if (!job) 984 return; 985 if (nJobs >= maxJobs && !(job->flags & JOB_SPECIAL) && 986 maxJobs != 0) { 987 /* 988 * The job can only be run locally, but we've hit the limit of 989 * local concurrency, so put the job on hold until some other 990 * job finishes. Note that the special jobs (.BEGIN, .INTERRUPT 991 * and .END) may be run locally even when the local limit has 992 * been reached (e.g. when maxJobs == 0), though they will be 993 * exported if at all possible. In addition, any target marked 994 * with .NOEXPORT will be run locally if maxJobs is 0. 995 */ 996 jobFull = true; 997 998 debug_printf("Can only run job locally.\n"); 999 job->flags |= JOB_RESTART; 1000 Lst_AtEnd(&stoppedJobs, job); 1001 } else { 1002 if (nJobs >= maxJobs) { 1003 /* 1004 * If we're running this job locally as a special case 1005 * (see above), at least say the table is full. 1006 */ 1007 jobFull = true; 1008 debug_printf("Local job queue is full.\n"); 1009 } 1010 JobExec(job); 1011 } 1012 } 1013 1014 /* Helper functions for JobDoOutput */ 1015 1016 1017 /* output debugging banner and print characters from 0 to endpos */ 1018 static void 1019 print_partial_buffer(struct job_pipe *p, Job *job, FILE *out, size_t endPos) 1020 { 1021 size_t i; 1022 1023 banner(job, out); 1024 job->flags |= JOB_DIDOUTPUT; 1025 for (i = 0; i < endPos; i++) 1026 putc(p->buffer[i], out); 1027 } 1028 1029 /* print partial buffer and shift remaining contents */ 1030 static void 1031 print_partial_buffer_and_shift(struct job_pipe *p, Job *job, FILE *out, 1032 size_t endPos) 1033 { 1034 size_t i; 1035 1036 print_partial_buffer(p, job, out, endPos); 1037 1038 for (i = endPos; i < p->pos; i++) 1039 p->buffer[i-endPos] = p->buffer[i]; 1040 p->pos -= endPos; 1041 } 1042 1043 /* print complete lines, looking back to the limit position 1044 * (stuff before limit was already scanned). 1045 * returns true if something was printed. 1046 */ 1047 static bool 1048 print_complete_lines(struct job_pipe *p, Job *job, FILE *out, size_t limit) 1049 { 1050 size_t i; 1051 1052 for (i = p->pos; i > limit; i--) { 1053 if (p->buffer[i-1] == '\n') { 1054 print_partial_buffer_and_shift(p, job, out, i); 1055 return true; 1056 } 1057 } 1058 return false; 1059 } 1060 1061 /*- 1062 *----------------------------------------------------------------------- 1063 * handle_pipe -- 1064 * This functions is called whenever there is something to read on the 1065 * pipe. We collect more output from the given job and store it in the 1066 * job's outBuf. If this makes up lines, we print it tagged by the job's 1067 * identifier, as necessary. 1068 * 1069 * Side Effects: 1070 * curPos may be shifted as may the contents of outBuf. 1071 *----------------------------------------------------------------------- 1072 */ 1073 static void 1074 handle_pipe(struct job_pipe *p, 1075 Job *job, FILE *out, bool finish) 1076 { 1077 int nr; /* number of bytes read */ 1078 int oldpos; /* optimization */ 1079 1080 /* want to get everything ? -> we block */ 1081 if (finish) 1082 fcntl(p->fd, F_SETFL, 0); 1083 1084 do { 1085 nr = read(p->fd, &p->buffer[p->pos], 1086 JOB_BUFSIZE - p->pos); 1087 if (nr == -1) { 1088 if (errno == EAGAIN) 1089 break; 1090 if (DEBUG(JOB)) { 1091 perror("JobDoOutput(piperead)"); 1092 } 1093 } 1094 oldpos = p->pos; 1095 p->pos += nr; 1096 if (!print_complete_lines(p, job, out, oldpos)) 1097 if (p->pos == JOB_BUFSIZE) { 1098 print_partial_buffer(p, job, out, p->pos); 1099 p->pos = 0; 1100 } 1101 } while (nr != 0); 1102 1103 /* at end of file, we print whatever is left */ 1104 if (nr == 0) { 1105 print_partial_buffer(p, job, out, p->pos); 1106 if (p->pos > 0 && p->buffer[p->pos - 1] != '\n') 1107 putchar('\n'); 1108 p->pos = 0; 1109 } 1110 } 1111 1112 static void 1113 handle_job_output(Job *job, int i, bool finish) 1114 { 1115 handle_pipe(&job->in[i], job, i == 0 ? stdout : stderr, finish); 1116 } 1117 1118 /*- 1119 *----------------------------------------------------------------------- 1120 * Job_CatchChildren -- 1121 * Handle the exit of a child. Called by handle_running_jobs 1122 * 1123 * Side Effects: 1124 * The job descriptor is removed from the list of children. 1125 * 1126 * Notes: 1127 * We do waits, blocking or not, according to the wisdom of our 1128 * caller, until there are no more children to report. For each 1129 * job, call JobFinish to finish things off. This will take care of 1130 * putting jobs on the stoppedJobs queue. 1131 *----------------------------------------------------------------------- 1132 */ 1133 void 1134 Job_CatchChildren() 1135 { 1136 pid_t pid; /* pid of dead child */ 1137 Job *job; /* job descriptor for dead child */ 1138 LstNode jnode; /* list element for finding job */ 1139 int status; /* Exit/termination status */ 1140 1141 /* 1142 * Don't even bother if we know there's no one around. 1143 */ 1144 if (nJobs == 0) 1145 return; 1146 1147 while ((pid = waitpid((pid_t) -1, &status, WNOHANG|WUNTRACED)) > 0) { 1148 handle_all_signals(); 1149 debug_printf("Process %ld exited or stopped.\n", (long)pid); 1150 1151 jnode = Lst_Find(&runningJobs, JobCmpPid, &pid); 1152 1153 if (jnode == NULL) { 1154 if (WIFSIGNALED(status) && 1155 (WTERMSIG(status) == SIGCONT)) { 1156 jnode = Lst_Find(&stoppedJobs, JobCmpPid, &pid); 1157 if (jnode == NULL) { 1158 Error("Resumed child (%ld) not in table", (long)pid); 1159 continue; 1160 } 1161 job = (Job *)Lst_Datum(jnode); 1162 Lst_Remove(&stoppedJobs, jnode); 1163 } else { 1164 Error("Child (%ld) not in table?", (long)pid); 1165 continue; 1166 } 1167 } else { 1168 job = (Job *)Lst_Datum(jnode); 1169 Lst_Remove(&runningJobs, jnode); 1170 nJobs--; 1171 if (jobFull) 1172 debug_printf("Job queue is no longer full.\n"); 1173 jobFull = false; 1174 } 1175 1176 JobFinish(job, status); 1177 } 1178 } 1179 1180 void 1181 handle_all_jobs_output(void) 1182 { 1183 int nfds; 1184 struct timeval timeout; 1185 LstNode ln, ln2; 1186 Job *job; 1187 int i; 1188 int status; 1189 1190 /* no jobs */ 1191 if (Lst_IsEmpty(&runningJobs)) 1192 return; 1193 1194 (void)fflush(stdout); 1195 1196 memcpy(actual_mask, output_mask, mask_size); 1197 timeout.tv_sec = SEL_SEC; 1198 timeout.tv_usec = SEL_USEC; 1199 1200 nfds = select(largest_fd+1, actual_mask, NULL, NULL, &timeout); 1201 handle_all_signals(); 1202 for (ln = Lst_First(&runningJobs); nfds && ln != NULL; 1203 ln = ln2) { 1204 ln2 = Lst_Adv(ln); 1205 job = (Job *)Lst_Datum(ln); 1206 job->flags &= ~JOB_DIDOUTPUT; 1207 for (i = 1; i >= 0; i--) { 1208 if (FD_ISSET(job->in[i].fd, actual_mask)) { 1209 nfds--; 1210 handle_job_output(job, i, false); 1211 } 1212 } 1213 if (job->flags & JOB_DIDOUTPUT) { 1214 if (wait4(job->pid, &status, WNOHANG|WUNTRACED, NULL) == 1215 job->pid) { 1216 Lst_Remove(&runningJobs, ln); 1217 nJobs--; 1218 jobFull = false; 1219 JobFinish(job, status); 1220 } else { 1221 Lst_Requeue(&runningJobs, ln); 1222 } 1223 } 1224 } 1225 } 1226 1227 void 1228 handle_running_jobs() 1229 { 1230 handle_all_jobs_output(); 1231 Job_CatchChildren(); 1232 } 1233 1234 static void 1235 loop_handle_running_jobs() 1236 { 1237 while (nJobs) 1238 handle_running_jobs(); 1239 } 1240 /*- 1241 *----------------------------------------------------------------------- 1242 * Job_Make -- 1243 * Start the creation of a target. Basically a front-end for 1244 * JobStart used by the Make module. 1245 * 1246 * Side Effects: 1247 * Another job is started. 1248 *----------------------------------------------------------------------- 1249 */ 1250 void 1251 Job_Make(GNode *gn) 1252 { 1253 (void)JobStart(gn, 0); 1254 } 1255 1256 /*- 1257 *----------------------------------------------------------------------- 1258 * Job_Init -- 1259 * Initialize the process module 1260 * 1261 * Side Effects: 1262 * lists and counters are initialized 1263 *----------------------------------------------------------------------- 1264 */ 1265 void 1266 Job_Init(int maxproc) 1267 { 1268 Static_Lst_Init(&runningJobs); 1269 Static_Lst_Init(&stoppedJobs); 1270 Static_Lst_Init(&errorsList); 1271 maxJobs = maxproc; 1272 nJobs = 0; 1273 jobFull = false; 1274 errors = 0; 1275 1276 aborting = 0; 1277 1278 lastNode = NULL; 1279 1280 if ((begin_node->type & OP_DUMMY) == 0) { 1281 JobStart(begin_node, JOB_SPECIAL); 1282 loop_handle_running_jobs(); 1283 } 1284 } 1285 1286 /*- 1287 *----------------------------------------------------------------------- 1288 * Job_Full -- 1289 * See if the job table is full. It is considered full if it is OR 1290 * if we are in the process of aborting OR if we have 1291 * reached/exceeded our local quota. This prevents any more jobs 1292 * from starting up. 1293 * 1294 * Results: 1295 * true if the job table is full, false otherwise 1296 *----------------------------------------------------------------------- 1297 */ 1298 bool 1299 Job_Full(void) 1300 { 1301 return aborting || jobFull; 1302 } 1303 1304 /*- 1305 *----------------------------------------------------------------------- 1306 * Job_Empty -- 1307 * See if the job table is empty. Because the local concurrency may 1308 * be set to 0, it is possible for the job table to become empty, 1309 * while the list of stoppedJobs remains non-empty. In such a case, 1310 * we want to restart as many jobs as we can. 1311 * 1312 * Results: 1313 * true if it is. false if it ain't. 1314 * ----------------------------------------------------------------------- 1315 */ 1316 bool 1317 Job_Empty(void) 1318 { 1319 if (nJobs == 0) { 1320 if (!Lst_IsEmpty(&stoppedJobs) && !aborting) { 1321 /* 1322 * The job table is obviously not full if it has no 1323 * jobs in it...Try and restart the stopped jobs. 1324 */ 1325 jobFull = false; 1326 JobRestartJobs(); 1327 return false; 1328 } else { 1329 return true; 1330 } 1331 } else { 1332 return false; 1333 } 1334 } 1335 1336 /*- 1337 *----------------------------------------------------------------------- 1338 * JobInterrupt -- 1339 * Handle the receipt of an interrupt. 1340 * 1341 * Side Effects: 1342 * All children are killed. Another job will be started if the 1343 * .INTERRUPT target was given. 1344 *----------------------------------------------------------------------- 1345 */ 1346 static void 1347 JobInterrupt(int runINTERRUPT, /* Non-zero if commands for the .INTERRUPT 1348 * target should be executed */ 1349 int signo) /* signal received */ 1350 { 1351 LstNode ln; /* element in job table */ 1352 Job *job; /* job descriptor in that element */ 1353 1354 aborting = ABORT_INTERRUPT; 1355 1356 for (ln = Lst_First(&runningJobs); ln != NULL; ln = Lst_Adv(ln)) { 1357 job = (Job *)Lst_Datum(ln); 1358 1359 if (!Targ_Precious(job->node)) { 1360 const char *file = job->node->path == NULL ? 1361 job->node->name : job->node->path; 1362 if (!noExecute && eunlink(file) != -1) { 1363 Error("*** %s removed", file); 1364 } 1365 } 1366 if (job->pid) { 1367 debug_printf("JobInterrupt passing signal to " 1368 "child %ld.\n", (long)job->pid); 1369 KILL(job->pid, signo); 1370 } 1371 } 1372 1373 if (runINTERRUPT && !touchFlag) { 1374 if ((interrupt_node->type & OP_DUMMY) == 0) { 1375 ignoreErrors = false; 1376 1377 JobStart(interrupt_node, 0); 1378 loop_handle_running_jobs(); 1379 } 1380 } 1381 exit(signo); 1382 } 1383 1384 /* 1385 *----------------------------------------------------------------------- 1386 * Job_Finish -- 1387 * Do final processing such as the running of the commands 1388 * attached to the .END target. 1389 * 1390 * Results: 1391 * Number of errors reported. 1392 * 1393 *----------------------------------------------------------------------- 1394 */ 1395 int 1396 Job_Finish(void) 1397 { 1398 if ((end_node->type & OP_DUMMY) == 0) { 1399 if (errors) { 1400 Error("Errors reported so .END ignored"); 1401 } else { 1402 JobStart(end_node, JOB_SPECIAL); 1403 loop_handle_running_jobs(); 1404 } 1405 } 1406 return errors; 1407 } 1408 1409 #ifdef CLEANUP 1410 void 1411 Job_End(void) 1412 { 1413 } 1414 #endif 1415 1416 /*- 1417 *----------------------------------------------------------------------- 1418 * Job_Wait -- 1419 * Waits for all running jobs to finish and returns. Sets 'aborting' 1420 * to ABORT_WAIT to prevent other jobs from starting. 1421 * 1422 * Side Effects: 1423 * Currently running jobs finish. 1424 * 1425 *----------------------------------------------------------------------- 1426 */ 1427 void 1428 Job_Wait(void) 1429 { 1430 aborting = ABORT_WAIT; 1431 loop_handle_running_jobs(); 1432 aborting = 0; 1433 } 1434 1435 /*- 1436 *----------------------------------------------------------------------- 1437 * Job_AbortAll -- 1438 * Abort all currently running jobs without handling output or anything. 1439 * This function is to be called only in the event of a major 1440 * error. Most definitely NOT to be called from JobInterrupt. 1441 * 1442 * Side Effects: 1443 * All children are killed, not just the firstborn 1444 *----------------------------------------------------------------------- 1445 */ 1446 void 1447 Job_AbortAll(void) 1448 { 1449 LstNode ln; /* element in job table */ 1450 Job *job; /* the job descriptor in that element */ 1451 int foo; 1452 1453 aborting = ABORT_ERROR; 1454 1455 if (nJobs) { 1456 for (ln = Lst_First(&runningJobs); ln != NULL; 1457 ln = Lst_Adv(ln)) { 1458 job = (Job *)Lst_Datum(ln); 1459 1460 /* 1461 * kill the child process with increasingly drastic 1462 * signals to make darn sure it's dead. 1463 */ 1464 KILL(job->pid, SIGINT); 1465 KILL(job->pid, SIGKILL); 1466 } 1467 } 1468 1469 /* 1470 * Catch as many children as want to report in at first, then give up 1471 */ 1472 while (waitpid(-1, &foo, WNOHANG) > 0) 1473 continue; 1474 } 1475 1476 /*- 1477 *----------------------------------------------------------------------- 1478 * JobRestartJobs -- 1479 * Tries to restart stopped jobs if there are slots available. 1480 * Note that this tries to restart them regardless of pending errors. 1481 * It's not good to leave stopped jobs lying around! 1482 * 1483 * Side Effects: 1484 * Resumes(and possibly migrates) jobs. 1485 *----------------------------------------------------------------------- 1486 */ 1487 static void 1488 JobRestartJobs(void) 1489 { 1490 Job *job; 1491 1492 while (!Job_Full() && 1493 (job = (Job *)Lst_DeQueue(&stoppedJobs)) != NULL) { 1494 debug_printf("Job queue is not full. " 1495 "Restarting a stopped job.\n"); 1496 JobRestart(job); 1497 } 1498 } 1499