1 /* $NetBSD: tape.c,v 1.11 1997/06/05 11:13:26 lukem Exp $ */ 2 3 /*- 4 * Copyright (c) 1980, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #ifndef lint 37 #if 0 38 static char sccsid[] = "@(#)tape.c 8.2 (Berkeley) 3/17/94"; 39 #else 40 static char rcsid[] = "$NetBSD: tape.c,v 1.11 1997/06/05 11:13:26 lukem Exp $"; 41 #endif 42 #endif /* not lint */ 43 44 #include <sys/param.h> 45 #include <sys/socket.h> 46 #include <sys/time.h> 47 #include <sys/wait.h> 48 #ifdef sunos 49 #include <sys/vnode.h> 50 51 #include <ufs/fs.h> 52 #include <ufs/inode.h> 53 #else 54 #include <ufs/ffs/fs.h> 55 #include <ufs/ufs/dinode.h> 56 #endif 57 58 #include <protocols/dumprestore.h> 59 60 #include <errno.h> 61 #include <fcntl.h> 62 #include <setjmp.h> 63 #include <signal.h> 64 #include <stdio.h> 65 #ifdef __STDC__ 66 #include <stdlib.h> 67 #include <string.h> 68 #include <time.h> 69 #include <unistd.h> 70 #else 71 int write(), read(); 72 #endif 73 74 #include "dump.h" 75 #include "pathnames.h" 76 77 int writesize; /* size of malloc()ed buffer for tape */ 78 long lastspclrec = -1; /* tape block number of last written header */ 79 int trecno = 0; /* next record to write in current block */ 80 extern long blocksperfile; /* number of blocks per output file */ 81 long blocksthisvol; /* number of blocks on current output file */ 82 extern int ntrec; /* blocking factor on tape */ 83 extern int cartridge; 84 extern char *host; 85 char *nexttape; 86 87 static ssize_t atomic __P((ssize_t (*)(), int, char *, int)); 88 static void doslave __P((int, int)); 89 static void enslave __P((void)); 90 static void flushtape __P((void)); 91 static void killall __P((void)); 92 static void rollforward __P((void)); 93 94 /* 95 * Concurrent dump mods (Caltech) - disk block reading and tape writing 96 * are exported to several slave processes. While one slave writes the 97 * tape, the others read disk blocks; they pass control of the tape in 98 * a ring via signals. The parent process traverses the filesystem and 99 * sends writeheader()'s and lists of daddr's to the slaves via pipes. 100 * The following structure defines the instruction packets sent to slaves. 101 */ 102 struct req { 103 daddr_t dblk; 104 int count; 105 }; 106 int reqsiz; 107 108 #define SLAVES 3 /* 1 slave writing, 1 reading, 1 for slack */ 109 struct slave { 110 int tapea; /* header number at start of this chunk */ 111 int count; /* count to next header (used for TS_TAPE */ 112 /* after EOT) */ 113 int inode; /* inode that we are currently dealing with */ 114 int fd; /* FD for this slave */ 115 int pid; /* PID for this slave */ 116 int sent; /* 1 == we've sent this slave requests */ 117 int firstrec; /* record number of this block */ 118 char (*tblock)[TP_BSIZE]; /* buffer for data blocks */ 119 struct req *req; /* buffer for requests */ 120 } slaves[SLAVES+1]; 121 struct slave *slp; 122 123 char (*nextblock)[TP_BSIZE]; 124 125 static time_t tstart_volume; /* time of volume start */ 126 static int tapea_volume; /* value of spcl.c_tapea at volume start */ 127 128 int master; /* pid of master, for sending error signals */ 129 int tenths; /* length of tape used per block written */ 130 static int caught; /* have we caught the signal to proceed? */ 131 static int ready; /* have we reached the lock point without having */ 132 /* received the SIGUSR2 signal from the prev slave? */ 133 static jmp_buf jmpbuf; /* where to jump to if we are ready when the */ 134 /* SIGUSR2 arrives from the previous slave */ 135 136 int 137 alloctape() 138 { 139 int pgoff = getpagesize() - 1; 140 char *buf; 141 int i; 142 143 writesize = ntrec * TP_BSIZE; 144 reqsiz = (ntrec + 1) * sizeof(struct req); 145 /* 146 * CDC 92181's and 92185's make 0.8" gaps in 1600-bpi start/stop mode 147 * (see DEC TU80 User's Guide). The shorter gaps of 6250-bpi require 148 * repositioning after stopping, i.e, streaming mode, where the gap is 149 * variable, 0.30" to 0.45". The gap is maximal when the tape stops. 150 */ 151 if (blocksperfile == 0) 152 tenths = writesize / density + 153 (cartridge ? 16 : density == 625 ? 5 : 8); 154 /* 155 * Allocate tape buffer contiguous with the array of instruction 156 * packets, so flushtape() can write them together with one write(). 157 * Align tape buffer on page boundary to speed up tape write(). 158 */ 159 for (i = 0; i <= SLAVES; i++) { 160 buf = (char *) 161 malloc((unsigned)(reqsiz + writesize + pgoff + TP_BSIZE)); 162 if (buf == NULL) 163 return(0); 164 slaves[i].tblock = (char (*)[TP_BSIZE]) 165 (((long)&buf[ntrec + 1] + pgoff) &~ pgoff); 166 slaves[i].req = (struct req *)slaves[i].tblock - ntrec - 1; 167 } 168 slp = &slaves[0]; 169 slp->count = 1; 170 slp->tapea = 0; 171 slp->firstrec = 0; 172 nextblock = slp->tblock; 173 return(1); 174 } 175 176 void 177 writerec(dp, isspcl) 178 char *dp; 179 int isspcl; 180 { 181 182 slp->req[trecno].dblk = (daddr_t)0; 183 slp->req[trecno].count = 1; 184 *(union u_spcl *)(*(nextblock)++) = *(union u_spcl *)dp; 185 if (isspcl) 186 lastspclrec = spcl.c_tapea; 187 trecno++; 188 spcl.c_tapea++; 189 if (trecno >= ntrec) 190 flushtape(); 191 } 192 193 void 194 dumpblock(blkno, size) 195 daddr_t blkno; 196 int size; 197 { 198 int avail, tpblks, dblkno; 199 200 dblkno = fsbtodb(sblock, blkno); 201 tpblks = size >> tp_bshift; 202 while ((avail = MIN(tpblks, ntrec - trecno)) > 0) { 203 slp->req[trecno].dblk = dblkno; 204 slp->req[trecno].count = avail; 205 trecno += avail; 206 spcl.c_tapea += avail; 207 if (trecno >= ntrec) 208 flushtape(); 209 dblkno += avail << (tp_bshift - dev_bshift); 210 tpblks -= avail; 211 } 212 } 213 214 int nogripe = 0; 215 216 void 217 tperror(signo) 218 int signo; 219 { 220 221 if (pipeout) { 222 msg("write error on %s\n", tape); 223 quit("Cannot recover\n"); 224 /* NOTREACHED */ 225 } 226 msg("write error %d blocks into volume %d\n", blocksthisvol, tapeno); 227 broadcast("DUMP WRITE ERROR!\n"); 228 if (!query("Do you want to restart?")) 229 dumpabort(0); 230 msg("Closing this volume. Prepare to restart with new media;\n"); 231 msg("this dump volume will be rewritten.\n"); 232 killall(); 233 nogripe = 1; 234 close_rewind(); 235 Exit(X_REWRITE); 236 } 237 238 void 239 sigpipe(signo) 240 int signo; 241 { 242 243 quit("Broken pipe\n"); 244 } 245 246 /* 247 * do_stats -- 248 * Update xferrate stats 249 */ 250 time_t 251 do_stats() 252 { 253 time_t tnow, ttaken; 254 int blocks; 255 256 (void)time(&tnow); 257 ttaken = tnow - tstart_volume; 258 blocks = spcl.c_tapea - tapea_volume; 259 msg("Volume %d completed at: %s", tapeno, ctime(&tnow)); 260 if (ttaken > 0) { 261 msg("Volume %d took %d:%02d:%02d\n", tapeno, 262 ttaken / 3600, (ttaken % 3600) / 60, ttaken % 60); 263 msg("Volume %d transfer rate: %ld KB/s\n", tapeno, 264 blocks / ttaken); 265 xferrate += blocks / ttaken; 266 } 267 return(tnow); 268 } 269 270 /* 271 * statussig -- 272 * information message upon receipt of SIGINFO 273 * (derived from optr.c::timeest()) 274 */ 275 void 276 statussig(notused) 277 int notused; 278 { 279 time_t tnow, deltat; 280 char msgbuf[128]; 281 282 if (blockswritten < 500) 283 return; 284 (void) time((time_t *) &tnow); 285 deltat = tstart_writing - tnow + (1.0 * (tnow - tstart_writing)) 286 / blockswritten * tapesize; 287 (void)snprintf(msgbuf, sizeof(msgbuf), 288 "%3.2f%% done at %ld KB/s, finished in %d:%02d\n", 289 (blockswritten * 100.0) / tapesize, 290 (spcl.c_tapea - tapea_volume) / (tnow - tstart_volume), 291 (int)(deltat / 3600), (int)((deltat % 3600) / 60)); 292 write(STDERR_FILENO, msgbuf, strlen(msgbuf)); 293 } 294 295 static void 296 flushtape() 297 { 298 int i, blks, got; 299 long lastfirstrec; 300 301 int siz = (char *)nextblock - (char *)slp->req; 302 303 slp->req[trecno].count = 0; /* Sentinel */ 304 305 if (atomic(write, slp->fd, (char *)slp->req, siz) != siz) 306 quit("error writing command pipe: %s\n", strerror(errno)); 307 slp->sent = 1; /* we sent a request, read the response later */ 308 309 lastfirstrec = slp->firstrec; 310 311 if (++slp >= &slaves[SLAVES]) 312 slp = &slaves[0]; 313 314 /* Read results back from next slave */ 315 if (slp->sent) { 316 if (atomic(read, slp->fd, (char *)&got, sizeof got) 317 != sizeof got) { 318 perror(" DUMP: error reading command pipe in master"); 319 dumpabort(0); 320 } 321 slp->sent = 0; 322 323 /* Check for end of tape */ 324 if (got < writesize) { 325 msg("End of tape detected\n"); 326 327 /* 328 * Drain the results, don't care what the values were. 329 * If we read them here then trewind won't... 330 */ 331 for (i = 0; i < SLAVES; i++) { 332 if (slaves[i].sent) { 333 if (atomic(read, slaves[i].fd, 334 (char *)&got, sizeof got) 335 != sizeof got) { 336 perror(" DUMP: error reading command pipe in master"); 337 dumpabort(0); 338 } 339 slaves[i].sent = 0; 340 } 341 } 342 343 close_rewind(); 344 rollforward(); 345 return; 346 } 347 } 348 349 blks = 0; 350 if (spcl.c_type != TS_END) { 351 for (i = 0; i < spcl.c_count; i++) 352 if (spcl.c_addr[i] != 0) 353 blks++; 354 } 355 slp->count = lastspclrec + blks + 1 - spcl.c_tapea; 356 slp->tapea = spcl.c_tapea; 357 slp->firstrec = lastfirstrec + ntrec; 358 slp->inode = curino; 359 nextblock = slp->tblock; 360 trecno = 0; 361 asize += tenths; 362 blockswritten += ntrec; 363 blocksthisvol += ntrec; 364 if (!pipeout && (blocksperfile ? 365 (blocksthisvol >= blocksperfile) : (asize > tsize))) { 366 close_rewind(); 367 startnewtape(0); 368 } 369 timeest(); 370 } 371 372 void 373 trewind() 374 { 375 int f; 376 int got; 377 378 for (f = 0; f < SLAVES; f++) { 379 /* 380 * Drain the results, but unlike EOT we DO (or should) care 381 * what the return values were, since if we detect EOT after 382 * we think we've written the last blocks to the tape anyway, 383 * we have to replay those blocks with rollforward. 384 * 385 * fixme: punt for now. 386 */ 387 if (slaves[f].sent) { 388 if (atomic(read, slaves[f].fd, (char *)&got, sizeof got) 389 != sizeof got) { 390 perror(" DUMP: error reading command pipe in master"); 391 dumpabort(0); 392 } 393 slaves[f].sent = 0; 394 if (got != writesize) { 395 msg("EOT detected in last 2 tape records!\n"); 396 msg("Use a longer tape, decrease the size estimate\n"); 397 quit("or use no size estimate at all.\n"); 398 } 399 } 400 (void) close(slaves[f].fd); 401 } 402 while (wait((int *)NULL) >= 0) /* wait for any signals from slaves */ 403 /* void */; 404 405 if (pipeout) 406 return; 407 408 msg("Closing %s\n", tape); 409 410 #ifdef RDUMP 411 if (host) { 412 rmtclose(); 413 while (rmtopen(tape, 0) < 0) 414 sleep(10); 415 rmtclose(); 416 return; 417 } 418 #endif 419 (void) close(tapefd); 420 while ((f = open(tape, 0)) < 0) 421 sleep (10); 422 (void) close(f); 423 } 424 425 void 426 close_rewind() 427 { 428 trewind(); 429 (void)do_stats(); 430 if (nexttape) 431 return; 432 if (!nogripe) { 433 msg("Change Volumes: Mount volume #%d\n", tapeno+1); 434 broadcast("CHANGE DUMP VOLUMES!\7\7\n"); 435 } 436 while (!query("Is the new volume mounted and ready to go?")) 437 if (query("Do you want to abort?")) { 438 dumpabort(0); 439 /*NOTREACHED*/ 440 } 441 } 442 443 void 444 rollforward() 445 { 446 struct req *p, *q, *prev; 447 struct slave *tslp; 448 int i, size, savedtapea, got; 449 union u_spcl *ntb, *otb; 450 tslp = &slaves[SLAVES]; 451 ntb = (union u_spcl *)tslp->tblock[1]; 452 453 /* 454 * Each of the N slaves should have requests that need to 455 * be replayed on the next tape. Use the extra slave buffers 456 * (slaves[SLAVES]) to construct request lists to be sent to 457 * each slave in turn. 458 */ 459 for (i = 0; i < SLAVES; i++) { 460 q = &tslp->req[1]; 461 otb = (union u_spcl *)slp->tblock; 462 463 /* 464 * For each request in the current slave, copy it to tslp. 465 */ 466 467 prev = NULL; 468 for (p = slp->req; p->count > 0; p += p->count) { 469 *q = *p; 470 if (p->dblk == 0) 471 *ntb++ = *otb++; /* copy the datablock also */ 472 prev = q; 473 q += q->count; 474 } 475 if (prev == NULL) 476 quit("rollforward: protocol botch"); 477 if (prev->dblk != 0) 478 prev->count -= 1; 479 else 480 ntb--; 481 q -= 1; 482 q->count = 0; 483 q = &tslp->req[0]; 484 if (i == 0) { 485 q->dblk = 0; 486 q->count = 1; 487 trecno = 0; 488 nextblock = tslp->tblock; 489 savedtapea = spcl.c_tapea; 490 spcl.c_tapea = slp->tapea; 491 startnewtape(0); 492 spcl.c_tapea = savedtapea; 493 lastspclrec = savedtapea - 1; 494 } 495 size = (char *)ntb - (char *)q; 496 if (atomic(write, slp->fd, (char *)q, size) != size) { 497 perror(" DUMP: error writing command pipe"); 498 dumpabort(0); 499 } 500 slp->sent = 1; 501 if (++slp >= &slaves[SLAVES]) 502 slp = &slaves[0]; 503 504 q->count = 1; 505 506 if (prev->dblk != 0) { 507 /* 508 * If the last one was a disk block, make the 509 * first of this one be the last bit of that disk 510 * block... 511 */ 512 q->dblk = prev->dblk + 513 prev->count * (TP_BSIZE / DEV_BSIZE); 514 ntb = (union u_spcl *)tslp->tblock; 515 } else { 516 /* 517 * It wasn't a disk block. Copy the data to its 518 * new location in the buffer. 519 */ 520 q->dblk = 0; 521 *((union u_spcl *)tslp->tblock) = *ntb; 522 ntb = (union u_spcl *)tslp->tblock[1]; 523 } 524 } 525 slp->req[0] = *q; 526 nextblock = slp->tblock; 527 if (q->dblk == 0) 528 nextblock++; 529 trecno = 1; 530 531 /* 532 * Clear the first slaves' response. One hopes that it 533 * worked ok, otherwise the tape is much too short! 534 */ 535 if (slp->sent) { 536 if (atomic(read, slp->fd, (char *)&got, sizeof got) 537 != sizeof got) { 538 perror(" DUMP: error reading command pipe in master"); 539 dumpabort(0); 540 } 541 slp->sent = 0; 542 543 if (got != writesize) { 544 quit("EOT detected at start of the tape!\n"); 545 } 546 } 547 } 548 549 /* 550 * We implement taking and restoring checkpoints on the tape level. 551 * When each tape is opened, a new process is created by forking; this 552 * saves all of the necessary context in the parent. The child 553 * continues the dump; the parent waits around, saving the context. 554 * If the child returns X_REWRITE, then it had problems writing that tape; 555 * this causes the parent to fork again, duplicating the context, and 556 * everything continues as if nothing had happened. 557 */ 558 void 559 startnewtape(top) 560 int top; 561 { 562 int parentpid; 563 int childpid; 564 int status; 565 int waitpid; 566 char *p; 567 #ifdef sunos 568 void (*interrupt_save)(); 569 #else 570 sig_t interrupt_save; 571 #endif 572 573 interrupt_save = signal(SIGINT, SIG_IGN); 574 parentpid = getpid(); 575 tapea_volume = spcl.c_tapea; 576 (void)time(&tstart_volume); 577 578 restore_check_point: 579 (void)signal(SIGINT, interrupt_save); 580 /* 581 * All signals are inherited... 582 */ 583 childpid = fork(); 584 if (childpid < 0) { 585 msg("Context save fork fails in parent %d\n", parentpid); 586 Exit(X_ABORT); 587 } 588 if (childpid != 0) { 589 /* 590 * PARENT: 591 * save the context by waiting 592 * until the child doing all of the work returns. 593 * don't catch the interrupt 594 */ 595 signal(SIGINT, SIG_IGN); 596 #ifdef TDEBUG 597 msg("Tape: %d; parent process: %d child process %d\n", 598 tapeno+1, parentpid, childpid); 599 #endif /* TDEBUG */ 600 while ((waitpid = wait(&status)) != childpid) 601 msg("Parent %d waiting for child %d has another child %d return\n", 602 parentpid, childpid, waitpid); 603 if (status & 0xFF) { 604 msg("Child %d returns LOB status %o\n", 605 childpid, status&0xFF); 606 } 607 status = (status >> 8) & 0xFF; 608 #ifdef TDEBUG 609 switch(status) { 610 case X_FINOK: 611 msg("Child %d finishes X_FINOK\n", childpid); 612 break; 613 case X_ABORT: 614 msg("Child %d finishes X_ABORT\n", childpid); 615 break; 616 case X_REWRITE: 617 msg("Child %d finishes X_REWRITE\n", childpid); 618 break; 619 default: 620 msg("Child %d finishes unknown %d\n", 621 childpid, status); 622 break; 623 } 624 #endif /* TDEBUG */ 625 switch(status) { 626 case X_FINOK: 627 Exit(X_FINOK); 628 case X_ABORT: 629 Exit(X_ABORT); 630 case X_REWRITE: 631 goto restore_check_point; 632 default: 633 msg("Bad return code from dump: %d\n", status); 634 Exit(X_ABORT); 635 } 636 /*NOTREACHED*/ 637 } else { /* we are the child; just continue */ 638 #ifdef TDEBUG 639 sleep(4); /* allow time for parent's message to get out */ 640 msg("Child on Tape %d has parent %d, my pid = %d\n", 641 tapeno+1, parentpid, getpid()); 642 #endif /* TDEBUG */ 643 /* 644 * If we have a name like "/dev/rst0,/dev/rst1", 645 * use the name before the comma first, and save 646 * the remaining names for subsequent volumes. 647 */ 648 tapeno++; /* current tape sequence */ 649 if (nexttape || strchr(tape, ',')) { 650 if (nexttape && *nexttape) 651 tape = nexttape; 652 if ((p = strchr(tape, ',')) != NULL) { 653 *p = '\0'; 654 nexttape = p + 1; 655 } else 656 nexttape = NULL; 657 msg("Dumping volume %d on %s\n", tapeno, tape); 658 } 659 #ifdef RDUMP 660 while ((tapefd = (host ? rmtopen(tape, 2) : 661 pipeout ? 1 : open(tape, O_WRONLY|O_CREAT, 0666))) < 0) 662 #else 663 while ((tapefd = (pipeout ? 1 : 664 open(tape, O_WRONLY|O_CREAT, 0666))) < 0) 665 #endif 666 { 667 msg("Cannot open output \"%s\".\n", tape); 668 if (!query("Do you want to retry the open?")) 669 dumpabort(0); 670 } 671 672 enslave(); /* Share open tape file descriptor with slaves */ 673 674 asize = 0; 675 blocksthisvol = 0; 676 if (top) 677 newtape++; /* new tape signal */ 678 spcl.c_count = slp->count; 679 /* 680 * measure firstrec in TP_BSIZE units since restore doesn't 681 * know the correct ntrec value... 682 */ 683 spcl.c_firstrec = slp->firstrec; 684 spcl.c_volume++; 685 spcl.c_type = TS_TAPE; 686 spcl.c_flags |= DR_NEWHEADER; 687 writeheader((ino_t)slp->inode); 688 spcl.c_flags &=~ DR_NEWHEADER; 689 msg("Volume %d started at: %s", tapeno, ctime(&tstart_volume)); 690 if (tapeno > 1) 691 msg("Volume %d begins with blocks from inode %d\n", 692 tapeno, slp->inode); 693 } 694 } 695 696 void 697 dumpabort(signo) 698 int signo; 699 { 700 701 if (master != 0 && master != getpid()) 702 /* Signals master to call dumpabort */ 703 (void) kill(master, SIGTERM); 704 else { 705 killall(); 706 msg("The ENTIRE dump is aborted.\n"); 707 } 708 #ifdef RDUMP 709 rmtclose(); 710 #endif 711 Exit(X_ABORT); 712 } 713 714 __dead void 715 Exit(status) 716 int status; 717 { 718 719 #ifdef TDEBUG 720 msg("pid = %d exits with status %d\n", getpid(), status); 721 #endif /* TDEBUG */ 722 exit(status); 723 } 724 725 /* 726 * proceed - handler for SIGUSR2, used to synchronize IO between the slaves. 727 */ 728 void 729 proceed(signo) 730 int signo; 731 { 732 733 if (ready) 734 longjmp(jmpbuf, 1); 735 caught++; 736 } 737 738 void 739 enslave() 740 { 741 int cmd[2]; 742 int i, j; 743 744 master = getpid(); 745 746 signal(SIGTERM, dumpabort); /* Slave sends SIGTERM on dumpabort() */ 747 signal(SIGPIPE, sigpipe); 748 signal(SIGUSR1, tperror); /* Slave sends SIGUSR1 on tape errors */ 749 signal(SIGUSR2, proceed); /* Slave sends SIGUSR2 to next slave */ 750 751 for (i = 0; i < SLAVES; i++) { 752 if (i == slp - &slaves[0]) { 753 caught = 1; 754 } else { 755 caught = 0; 756 } 757 758 if (socketpair(AF_UNIX, SOCK_STREAM, 0, cmd) < 0 || 759 (slaves[i].pid = fork()) < 0) 760 quit("too many slaves, %d (recompile smaller): %s\n", 761 i, strerror(errno)); 762 763 slaves[i].fd = cmd[1]; 764 slaves[i].sent = 0; 765 if (slaves[i].pid == 0) { /* Slave starts up here */ 766 for (j = 0; j <= i; j++) 767 (void) close(slaves[j].fd); 768 signal(SIGINT, SIG_IGN); /* Master handles this */ 769 signal(SIGINFO, SIG_IGN); 770 doslave(cmd[0], i); 771 Exit(X_FINOK); 772 } 773 } 774 775 for (i = 0; i < SLAVES; i++) 776 (void) atomic(write, slaves[i].fd, 777 (char *) &slaves[(i + 1) % SLAVES].pid, 778 sizeof slaves[0].pid); 779 780 master = 0; 781 } 782 783 void 784 killall() 785 { 786 int i; 787 788 for (i = 0; i < SLAVES; i++) 789 if (slaves[i].pid > 0) 790 (void) kill(slaves[i].pid, SIGKILL); 791 } 792 793 /* 794 * Synchronization - each process has a lockfile, and shares file 795 * descriptors to the following process's lockfile. When our write 796 * completes, we release our lock on the following process's lock- 797 * file, allowing the following process to lock it and proceed. We 798 * get the lock back for the next cycle by swapping descriptors. 799 */ 800 static void 801 doslave(cmd, slave_number) 802 int cmd; 803 int slave_number; 804 { 805 int nread; 806 int nextslave, size, wrote, eot_count; 807 sigset_t sigset; 808 809 /* 810 * Need our own seek pointer. 811 */ 812 (void) close(diskfd); 813 if ((diskfd = open(disk, O_RDONLY)) < 0) 814 quit("slave couldn't reopen disk: %s\n", strerror(errno)); 815 816 /* 817 * Need the pid of the next slave in the loop... 818 */ 819 if ((nread = atomic(read, cmd, (char *)&nextslave, sizeof nextslave)) 820 != sizeof nextslave) { 821 quit("master/slave protocol botched - didn't get pid of next slave.\n"); 822 } 823 824 /* 825 * Get list of blocks to dump, read the blocks into tape buffer 826 */ 827 while ((nread = atomic(read, cmd, (char *)slp->req, reqsiz)) == reqsiz) { 828 struct req *p = slp->req; 829 830 for (trecno = 0; trecno < ntrec; 831 trecno += p->count, p += p->count) { 832 if (p->dblk) { 833 bread(p->dblk, slp->tblock[trecno], 834 p->count * TP_BSIZE); 835 } else { 836 if (p->count != 1 || atomic(read, cmd, 837 (char *)slp->tblock[trecno], 838 TP_BSIZE) != TP_BSIZE) 839 quit("master/slave protocol botched.\n"); 840 } 841 } 842 if (setjmp(jmpbuf) == 0) { 843 ready = 1; 844 if (!caught) 845 (void) pause(); 846 } 847 ready = 0; 848 caught = 0; 849 850 /* Try to write the data... */ 851 eot_count = 0; 852 size = 0; 853 854 while (eot_count < 10 && size < writesize) { 855 #ifdef RDUMP 856 if (host) 857 wrote = rmtwrite(slp->tblock[0]+size, 858 writesize-size); 859 else 860 #endif 861 wrote = write(tapefd, slp->tblock[0]+size, 862 writesize-size); 863 #ifdef WRITEDEBUG 864 printf("slave %d wrote %d\n", slave_number, wrote); 865 #endif 866 if (wrote < 0) 867 break; 868 if (wrote == 0) 869 eot_count++; 870 size += wrote; 871 } 872 873 #ifdef WRITEDEBUG 874 if (size != writesize) 875 printf("slave %d only wrote %d out of %d bytes and gave up.\n", 876 slave_number, size, writesize); 877 #endif 878 879 if (eot_count > 0) 880 size = 0; 881 882 /* 883 * fixme: Pyramids running OSx return ENOSPC 884 * at EOT on 1/2 inch drives. 885 */ 886 if (size < 0) { 887 (void) kill(master, SIGUSR1); 888 sigemptyset(&sigset); 889 for (;;) 890 sigsuspend(&sigset); 891 } else { 892 /* 893 * pass size of write back to master 894 * (for EOT handling) 895 */ 896 (void) atomic(write, cmd, (char *)&size, sizeof size); 897 } 898 899 /* 900 * If partial write, don't want next slave to go. 901 * Also jolts him awake. 902 */ 903 (void) kill(nextslave, SIGUSR2); 904 } 905 if (nread != 0) 906 quit("error reading command pipe: %s\n", strerror(errno)); 907 } 908 909 /* 910 * Since a read from a pipe may not return all we asked for, 911 * or a write may not write all we ask if we get a signal, 912 * loop until the count is satisfied (or error). 913 */ 914 static ssize_t 915 atomic(func, fd, buf, count) 916 ssize_t (*func)(); 917 int fd; 918 char *buf; 919 int count; 920 { 921 ssize_t got, need = count; 922 923 while ((got = (*func)(fd, buf, need)) > 0 && (need -= got) > 0) 924 buf += got; 925 return (got < 0 ? got : count - need); 926 } 927