147082Smckusick /*- 247082Smckusick * Copyright (c) 1980, 1991 The Regents of the University of California. 347082Smckusick * All rights reserved. 447082Smckusick * 547082Smckusick * %sccs.include.redist.c% 622040Sdist */ 722040Sdist 817527Ssam #ifndef lint 9*54047Smckusick static char sccsid[] = "@(#)tape.c 5.23 (Berkeley) 06/18/92"; 1046585Storek #endif /* not lint */ 1117527Ssam 1250505Smckusick #ifdef sunos 1350574Smckusick #include <sys/param.h> 1450505Smckusick #include <stdio.h> 1550505Smckusick #include <ctype.h> 1650505Smckusick #include <sys/stat.h> 1751605Sbostic #include <ufs/fs.h> 1850505Smckusick #else 1946795Sbostic #include <sys/param.h> 2046585Storek #include <sys/wait.h> 2151605Sbostic #include <ufs/ffs/fs.h> 2250505Smckusick #endif 23*54047Smckusick #include <sys/time.h> 24*54047Smckusick #include <ufs/ufs/dinode.h> 2546795Sbostic #include <signal.h> 2646795Sbostic #include <fcntl.h> 2746795Sbostic #include <protocols/dumprestore.h> 2846585Storek #include <errno.h> 2950504Smckusick #include <setjmp.h> 3046795Sbostic #ifdef __STDC__ 3146795Sbostic #include <unistd.h> 3246795Sbostic #include <stdlib.h> 3346795Sbostic #include <string.h> 3446795Sbostic #endif 3550504Smckusick #include <sys/socket.h> 3646795Sbostic #include "dump.h" 3739128Smckusick #include "pathnames.h" 381425Sroot 3929899Smckusick int writesize; /* size of malloc()ed buffer for tape */ 4029899Smckusick long lastspclrec = -1; /* tape block number of last written header */ 4129899Smckusick int trecno = 0; /* next record to write in current block */ 4246614Smckusick extern long blocksperfile; /* number of blocks per output file */ 4348621Skarels long blocksthisvol; /* number of blocks on current output file */ 4448621Skarels extern int ntrec; /* blocking factor on tape */ 4548621Skarels extern int cartridge; 4650504Smckusick extern char *host; 4747056Skarels char *nexttape; 4825219Smckusick #ifdef RDUMP 4946585Storek int rmtopen(), rmtwrite(); 5046585Storek void rmtclose(); 5125219Smckusick #endif RDUMP 5250504Smckusick void rollforward(); 5346585Storek int atomic(); 5446789Smckusick void doslave(), enslave(), flushtape(), killall(); 5546585Storek 5610911Ssam /* 5724181Smckusick * Concurrent dump mods (Caltech) - disk block reading and tape writing 5818012Smckusick * are exported to several slave processes. While one slave writes the 5918012Smckusick * tape, the others read disk blocks; they pass control of the tape in 6050504Smckusick * a ring via signals. The parent process traverses the filesystem and 6146789Smckusick * sends writeheader()'s and lists of daddr's to the slaves via pipes. 6250504Smckusick * The following structure defines the instruction packets sent to slaves. 6310911Ssam */ 6450504Smckusick struct req { 6518012Smckusick daddr_t dblk; 6618012Smckusick int count; 6750504Smckusick }; 6818012Smckusick int reqsiz; 6918012Smckusick 7024181Smckusick #define SLAVES 3 /* 1 slave writing, 1 reading, 1 for slack */ 7150504Smckusick struct slave { 7250504Smckusick int tapea; /* header number at start of this chunk */ 7350504Smckusick int count; /* count to next header (used for TS_TAPE */ 7450504Smckusick /* after EOT) */ 7550504Smckusick int inode; /* inode that we are currently dealing with */ 7650504Smckusick int fd; /* FD for this slave */ 7750504Smckusick int pid; /* PID for this slave */ 7850504Smckusick int sent; /* 1 == we've sent this slave requests */ 7950504Smckusick int firstrec; /* record number of this block */ 8050504Smckusick char (*tblock)[TP_BSIZE]; /* buffer for data blocks */ 8150504Smckusick struct req *req; /* buffer for requests */ 8250504Smckusick } slaves[SLAVES+1]; 8350504Smckusick struct slave *slp; 8418012Smckusick 8550504Smckusick char (*nextblock)[TP_BSIZE]; 8650504Smckusick 8750504Smckusick int master; /* pid of master, for sending error signals */ 8850504Smckusick int tenths; /* length of tape used per block written */ 8950504Smckusick static int caught; /* have we caught the signal to proceed? */ 9050504Smckusick static int ready; /* have we reached the lock point without having */ 9150504Smckusick /* received the SIGUSR2 signal from the prev slave? */ 9250504Smckusick static jmp_buf jmpbuf; /* where to jump to if we are ready when the */ 9350504Smckusick /* SIGUSR2 arrives from the previous slave */ 9450504Smckusick 9546585Storek int 9610911Ssam alloctape() 9710911Ssam { 9825219Smckusick int pgoff = getpagesize() - 1; 9950504Smckusick char *buf; 10050504Smckusick int i; 10110911Ssam 10210911Ssam writesize = ntrec * TP_BSIZE; 10350504Smckusick reqsiz = (ntrec + 1) * sizeof(struct req); 10424181Smckusick /* 10525219Smckusick * CDC 92181's and 92185's make 0.8" gaps in 1600-bpi start/stop mode 10625219Smckusick * (see DEC TU80 User's Guide). The shorter gaps of 6250-bpi require 10725219Smckusick * repositioning after stopping, i.e, streaming mode, where the gap is 10825219Smckusick * variable, 0.30" to 0.45". The gap is maximal when the tape stops. 10924181Smckusick */ 11047056Skarels if (blocksperfile == 0) 11147056Skarels tenths = writesize / density + 11247056Skarels (cartridge ? 16 : density == 625 ? 5 : 8); 11325219Smckusick /* 11425219Smckusick * Allocate tape buffer contiguous with the array of instruction 11546789Smckusick * packets, so flushtape() can write them together with one write(). 11625219Smckusick * Align tape buffer on page boundary to speed up tape write(). 11725219Smckusick */ 11850504Smckusick for (i = 0; i <= SLAVES; i++) { 119*54047Smckusick buf = (char *) 120*54047Smckusick malloc((unsigned)(reqsiz + writesize + pgoff + TP_BSIZE)); 12150504Smckusick if (buf == NULL) 122*54047Smckusick return(0); 12350504Smckusick slaves[i].tblock = (char (*)[TP_BSIZE]) 12450504Smckusick (((long)&buf[ntrec + 1] + pgoff) &~ pgoff); 12550504Smckusick slaves[i].req = (struct req *)slaves[i].tblock - ntrec - 1; 12650504Smckusick } 12750504Smckusick slp = &slaves[0]; 12850504Smckusick slp->count = 1; 12950504Smckusick slp->tapea = 0; 13050504Smckusick slp->firstrec = 0; 13150504Smckusick nextblock = slp->tblock; 13224181Smckusick return(1); 13310911Ssam } 13410911Ssam 13546585Storek void 13646789Smckusick writerec(dp) 1375329Smckusic char *dp; 1381425Sroot { 13950504Smckusick 14050504Smckusick slp->req[trecno].dblk = (daddr_t)0; 14150504Smckusick slp->req[trecno].count = 1; 14250504Smckusick *(union u_spcl *)(*(nextblock)++) = *(union u_spcl *)dp; 14329899Smckusick lastspclrec = spcl.c_tapea; 14424181Smckusick trecno++; 1451425Sroot spcl.c_tapea++; 14646585Storek if (trecno >= ntrec) 14746789Smckusick flushtape(); 1481425Sroot } 1491425Sroot 15046585Storek void 15146789Smckusick dumpblock(blkno, size) 1524774Smckusic daddr_t blkno; 1534774Smckusic int size; 1541425Sroot { 15525219Smckusick int avail, tpblks, dblkno; 1561425Sroot 1575329Smckusic dblkno = fsbtodb(sblock, blkno); 15846585Storek tpblks = size >> tp_bshift; 15918012Smckusick while ((avail = MIN(tpblks, ntrec - trecno)) > 0) { 16050504Smckusick slp->req[trecno].dblk = dblkno; 16150504Smckusick slp->req[trecno].count = avail; 16225219Smckusick trecno += avail; 1634774Smckusic spcl.c_tapea += avail; 16425219Smckusick if (trecno >= ntrec) 16546789Smckusick flushtape(); 16646585Storek dblkno += avail << (tp_bshift - dev_bshift); 1675329Smckusic tpblks -= avail; 1684774Smckusic } 1691425Sroot } 1701425Sroot 1711425Sroot int nogripe = 0; 1721425Sroot 17346585Storek void 17446585Storek tperror() 17546585Storek { 17650504Smckusick 17718012Smckusick if (pipeout) { 17846614Smckusick msg("write error on %s\n", tape); 17946585Storek quit("Cannot recover\n"); 18018012Smckusick /* NOTREACHED */ 18118012Smckusick } 18248621Skarels msg("write error %d blocks into volume %d\n", blocksthisvol, tapeno); 18346614Smckusick broadcast("DUMP WRITE ERROR!\n"); 18418012Smckusick if (!query("Do you want to restart?")) 18518012Smckusick dumpabort(); 18646614Smckusick msg("Closing this volume. Prepare to restart with new media;\n"); 18718012Smckusick msg("this dump volume will be rewritten.\n"); 18824181Smckusick killall(); 18918012Smckusick nogripe = 1; 19018012Smckusick close_rewind(); 19118012Smckusick Exit(X_REWRITE); 19218012Smckusick } 19318012Smckusick 19446585Storek void 19525219Smckusick sigpipe() 19625219Smckusick { 19725219Smckusick 19846585Storek quit("Broken pipe\n"); 19925219Smckusick } 20025219Smckusick 20146585Storek void 20246789Smckusick flushtape() 20318012Smckusick { 20450504Smckusick int i, blks, got; 20550504Smckusick long lastfirstrec; 20646795Sbostic #ifndef __STDC__ 20750504Smckusick int write(), read(); 20846795Sbostic #endif 20946795Sbostic 21050504Smckusick int siz = (char *)nextblock - (char *)slp->req; 2111425Sroot 21250504Smckusick slp->req[trecno].count = 0; /* Sentinel */ 21350504Smckusick 214*54047Smckusick if (atomic(write, slp->fd, (char *)slp->req, siz) != siz) 21546585Storek quit("error writing command pipe: %s\n", strerror(errno)); 21650504Smckusick slp->sent = 1; /* we sent a request, read the response later */ 21750504Smckusick 21850504Smckusick lastfirstrec = slp->firstrec; 21950504Smckusick 22050504Smckusick if (++slp >= &slaves[SLAVES]) 22150504Smckusick slp = &slaves[0]; 22250504Smckusick 22350504Smckusick /* Read results back from next slave */ 22450504Smckusick if (slp->sent) { 225*54047Smckusick if (atomic(read, slp->fd, (char *)&got, sizeof got) 226*54047Smckusick != sizeof got) { 22750504Smckusick perror(" DUMP: error reading command pipe in master"); 22850504Smckusick dumpabort(); 22950504Smckusick } 23050504Smckusick slp->sent = 0; 23150504Smckusick 23250504Smckusick /* Check for end of tape */ 23350504Smckusick if (got < writesize) { 23450504Smckusick msg("End of tape detected\n"); 23550504Smckusick 23650504Smckusick /* 23750504Smckusick * Drain the results, don't care what the values were. 23850504Smckusick * If we read them here then trewind won't... 23950504Smckusick */ 24050504Smckusick for (i = 0; i < SLAVES; i++) { 24150504Smckusick if (slaves[i].sent) { 242*54047Smckusick if (atomic(read, slaves[i].fd, 243*54047Smckusick (char *)&got, sizeof got) 244*54047Smckusick != sizeof got) { 24550504Smckusick perror(" DUMP: error reading command pipe in master"); 24650504Smckusick dumpabort(); 24750504Smckusick } 24850504Smckusick slaves[i].sent = 0; 24950504Smckusick } 25050504Smckusick } 25150504Smckusick 25250504Smckusick close_rewind(); 25350504Smckusick rollforward(); 25450504Smckusick return; 25550504Smckusick } 25650504Smckusick } 25750504Smckusick 25850504Smckusick blks = 0; 25950504Smckusick if (spcl.c_type != TS_END) { 26050504Smckusick for (i = 0; i < spcl.c_count; i++) 26150504Smckusick if (spcl.c_addr[i] != 0) 26250504Smckusick blks++; 26350504Smckusick } 26450504Smckusick slp->count = lastspclrec + blks + 1 - spcl.c_tapea; 26550504Smckusick slp->tapea = spcl.c_tapea; 26650504Smckusick slp->firstrec = lastfirstrec + ntrec; 26750504Smckusick slp->inode = curino; 26850504Smckusick nextblock = slp->tblock; 2691425Sroot trecno = 0; 27024181Smckusick asize += tenths; 27110911Ssam blockswritten += ntrec; 27248621Skarels blocksthisvol += ntrec; 27346614Smckusick if (!pipeout && (blocksperfile ? 27448621Skarels (blocksthisvol >= blocksperfile) : (asize > tsize))) { 2751425Sroot close_rewind(); 27650504Smckusick startnewtape(0); 2771425Sroot } 2781425Sroot timeest(); 2791425Sroot } 2801425Sroot 28146585Storek void 28246239Storek trewind() 2831425Sroot { 28424181Smckusick int f; 28550504Smckusick int got; 28612331Smckusick 28750504Smckusick for (f = 0; f < SLAVES; f++) { 28850504Smckusick /* 28950504Smckusick * Drain the results, but unlike EOT we DO (or should) care 29050504Smckusick * what the return values were, since if we detect EOT after 29150504Smckusick * we think we've written the last blocks to the tape anyway, 29250504Smckusick * we have to replay those blocks with rollforward. 29350504Smckusick * 29450504Smckusick * fixme: punt for now. 29550504Smckusick */ 29650504Smckusick if (slaves[f].sent) { 297*54047Smckusick if (atomic(read, slaves[f].fd, (char *)&got, sizeof got) 29850504Smckusick != sizeof got) { 29950504Smckusick perror(" DUMP: error reading command pipe in master"); 30050504Smckusick dumpabort(); 30150504Smckusick } 30250504Smckusick slaves[f].sent = 0; 30350504Smckusick if (got != writesize) { 30450504Smckusick msg("EOT detected in last 2 tape records!\n"); 30550504Smckusick msg("Use a longer tape, decrease the size estimate\n"); 30650504Smckusick quit("or use no size estimate at all.\n"); 30750504Smckusick } 30850504Smckusick } 309*54047Smckusick (void) close(slaves[f].fd); 31050504Smckusick } 31146585Storek while (wait((int *)NULL) >= 0) /* wait for any signals from slaves */ 31246585Storek /* void */; 313*54047Smckusick 314*54047Smckusick if (pipeout) 315*54047Smckusick return; 316*54047Smckusick 31748621Skarels msg("Closing %s\n", tape); 31850504Smckusick 31918012Smckusick #ifdef RDUMP 32025219Smckusick if (host) { 32125219Smckusick rmtclose(); 32225219Smckusick while (rmtopen(tape, 0) < 0) 32325219Smckusick sleep(10); 32425219Smckusick rmtclose(); 32525219Smckusick return; 32625219Smckusick } 32750504Smckusick #endif 328*54047Smckusick (void) close(tapefd); 3293214Swnj while ((f = open(tape, 0)) < 0) 3303214Swnj sleep (10); 331*54047Smckusick (void) close(f); 3321425Sroot } 3331425Sroot 33446585Storek void 3351425Sroot close_rewind() 3361425Sroot { 33746239Storek trewind(); 33848621Skarels if (nexttape) 33948621Skarels return; 34018012Smckusick if (!nogripe) { 34146614Smckusick msg("Change Volumes: Mount volume #%d\n", tapeno+1); 34246614Smckusick broadcast("CHANGE DUMP VOLUMES!\7\7\n"); 3431425Sroot } 34448621Skarels while (!query("Is the new volume mounted and ready to go?")) 34525219Smckusick if (query("Do you want to abort?")) { 3461425Sroot dumpabort(); 34725219Smckusick /*NOTREACHED*/ 34825219Smckusick } 3491425Sroot } 3501425Sroot 35150504Smckusick void 35250504Smckusick rollforward() 35350504Smckusick { 35450504Smckusick register struct req *p, *q, *prev; 35550504Smckusick register struct slave *tslp; 356*54047Smckusick int i, size, savedtapea, got; 35750504Smckusick union u_spcl *ntb, *otb; 35850504Smckusick tslp = &slaves[SLAVES]; 35950504Smckusick ntb = (union u_spcl *)tslp->tblock[1]; 36050504Smckusick 36150504Smckusick /* 36250504Smckusick * Each of the N slaves should have requests that need to 36350504Smckusick * be replayed on the next tape. Use the extra slave buffers 36450504Smckusick * (slaves[SLAVES]) to construct request lists to be sent to 36550504Smckusick * each slave in turn. 36650504Smckusick */ 36750504Smckusick for (i = 0; i < SLAVES; i++) { 36850504Smckusick q = &tslp->req[1]; 36950504Smckusick otb = (union u_spcl *)slp->tblock; 37050504Smckusick 37150504Smckusick /* 37250504Smckusick * For each request in the current slave, copy it to tslp. 37350504Smckusick */ 37450504Smckusick 37550504Smckusick for (p = slp->req; p->count > 0; p += p->count) { 37650504Smckusick *q = *p; 37750504Smckusick if (p->dblk == 0) 37850504Smckusick *ntb++ = *otb++; /* copy the datablock also */ 37950504Smckusick prev = q; 38050504Smckusick q += q->count; 38150504Smckusick } 38250504Smckusick if (prev->dblk != 0) 38350504Smckusick prev->count -= 1; 38450504Smckusick else 38550504Smckusick ntb--; 38650504Smckusick q -= 1; 38750504Smckusick q->count = 0; 38850504Smckusick q = &tslp->req[0]; 38950504Smckusick if (i == 0) { 39050504Smckusick q->dblk = 0; 39150504Smckusick q->count = 1; 39250504Smckusick trecno = 0; 39350504Smckusick nextblock = tslp->tblock; 39450504Smckusick savedtapea = spcl.c_tapea; 39550504Smckusick spcl.c_tapea = slp->tapea; 39650504Smckusick startnewtape(0); 39750504Smckusick spcl.c_tapea = savedtapea; 39850504Smckusick lastspclrec = savedtapea - 1; 39950504Smckusick } 40050504Smckusick size = (char *)ntb - (char *)q; 401*54047Smckusick if (atomic(write, slp->fd, (char *)q, size) != size) { 40250504Smckusick perror(" DUMP: error writing command pipe"); 40350504Smckusick dumpabort(); 40450504Smckusick } 40550504Smckusick slp->sent = 1; 40650504Smckusick if (++slp >= &slaves[SLAVES]) 40750504Smckusick slp = &slaves[0]; 40850504Smckusick 40950504Smckusick q->count = 1; 41050504Smckusick 41150504Smckusick if (prev->dblk != 0) { 41250504Smckusick /* 41350504Smckusick * If the last one was a disk block, make the 41450504Smckusick * first of this one be the last bit of that disk 41550504Smckusick * block... 41650504Smckusick */ 41750504Smckusick q->dblk = prev->dblk + 41850504Smckusick prev->count * (TP_BSIZE / DEV_BSIZE); 41950504Smckusick ntb = (union u_spcl *)tslp->tblock; 42050504Smckusick } else { 42150504Smckusick /* 42250504Smckusick * It wasn't a disk block. Copy the data to its 42350504Smckusick * new location in the buffer. 42450504Smckusick */ 42550504Smckusick q->dblk = 0; 42650504Smckusick *((union u_spcl *)tslp->tblock) = *ntb; 42750504Smckusick ntb = (union u_spcl *)tslp->tblock[1]; 42850504Smckusick } 42950504Smckusick } 43050504Smckusick slp->req[0] = *q; 43150504Smckusick nextblock = slp->tblock; 43250504Smckusick if (q->dblk == 0) 43350504Smckusick nextblock++; 43450504Smckusick trecno = 1; 43550504Smckusick 43650504Smckusick /* 43750504Smckusick * Clear the first slaves' response. One hopes that it 43850504Smckusick * worked ok, otherwise the tape is much too short! 43950504Smckusick */ 44050504Smckusick if (slp->sent) { 441*54047Smckusick if (atomic(read, slp->fd, (char *)&got, sizeof got) 442*54047Smckusick != sizeof got) { 44350504Smckusick perror(" DUMP: error reading command pipe in master"); 44450504Smckusick dumpabort(); 44550504Smckusick } 44650504Smckusick slp->sent = 0; 44750504Smckusick 44850504Smckusick if (got != writesize) { 44950504Smckusick quit("EOT detected at start of the tape!\n"); 45050504Smckusick } 45150504Smckusick } 45250504Smckusick } 45350504Smckusick 4541425Sroot /* 45550504Smckusick * We implement taking and restoring checkpoints on the tape level. 45650504Smckusick * When each tape is opened, a new process is created by forking; this 45750504Smckusick * saves all of the necessary context in the parent. The child 45850504Smckusick * continues the dump; the parent waits around, saving the context. 45950504Smckusick * If the child returns X_REWRITE, then it had problems writing that tape; 46050504Smckusick * this causes the parent to fork again, duplicating the context, and 46150504Smckusick * everything continues as if nothing had happened. 4621425Sroot */ 46346585Storek void 46450504Smckusick startnewtape(top) 46550504Smckusick int top; 4661425Sroot { 4671425Sroot int parentpid; 4681425Sroot int childpid; 4691425Sroot int status; 4701425Sroot int waitpid; 47147056Skarels char *p; 47250574Smckusick #ifdef sunos 473*54047Smckusick void (*interrupt_save)(); 47450574Smckusick char *index(); 47550574Smckusick #else 476*54047Smckusick sig_t interrupt_save; 47750574Smckusick #endif 4781425Sroot 479*54047Smckusick interrupt_save = signal(SIGINT, SIG_IGN); 4801425Sroot parentpid = getpid(); 4811425Sroot 4821425Sroot restore_check_point: 483*54047Smckusick (void)signal(SIGINT, interrupt_save); 48425219Smckusick /* 48525219Smckusick * All signals are inherited... 48625219Smckusick */ 4871425Sroot childpid = fork(); 48818012Smckusick if (childpid < 0) { 4891425Sroot msg("Context save fork fails in parent %d\n", parentpid); 4901425Sroot Exit(X_ABORT); 4911425Sroot } 49218012Smckusick if (childpid != 0) { 4931425Sroot /* 4941425Sroot * PARENT: 4951425Sroot * save the context by waiting 4961425Sroot * until the child doing all of the work returns. 49718012Smckusick * don't catch the interrupt 4981425Sroot */ 49925219Smckusick signal(SIGINT, SIG_IGN); 5001425Sroot #ifdef TDEBUG 5011425Sroot msg("Tape: %d; parent process: %d child process %d\n", 5021425Sroot tapeno+1, parentpid, childpid); 5031425Sroot #endif TDEBUG 50418012Smckusick while ((waitpid = wait(&status)) != childpid) 50518012Smckusick msg("Parent %d waiting for child %d has another child %d return\n", 50618012Smckusick parentpid, childpid, waitpid); 50718012Smckusick if (status & 0xFF) { 5081425Sroot msg("Child %d returns LOB status %o\n", 5091425Sroot childpid, status&0xFF); 5101425Sroot } 5111425Sroot status = (status >> 8) & 0xFF; 5121425Sroot #ifdef TDEBUG 51318012Smckusick switch(status) { 5141425Sroot case X_FINOK: 5151425Sroot msg("Child %d finishes X_FINOK\n", childpid); 5161425Sroot break; 51750504Smckusick case X_ABORT: 5181425Sroot msg("Child %d finishes X_ABORT\n", childpid); 5191425Sroot break; 5201425Sroot case X_REWRITE: 5211425Sroot msg("Child %d finishes X_REWRITE\n", childpid); 5221425Sroot break; 5231425Sroot default: 52418012Smckusick msg("Child %d finishes unknown %d\n", 52525219Smckusick childpid, status); 5261425Sroot break; 5271425Sroot } 5281425Sroot #endif TDEBUG 52918012Smckusick switch(status) { 5301425Sroot case X_FINOK: 5311425Sroot Exit(X_FINOK); 5321425Sroot case X_ABORT: 5331425Sroot Exit(X_ABORT); 5341425Sroot case X_REWRITE: 5351425Sroot goto restore_check_point; 5361425Sroot default: 5371425Sroot msg("Bad return code from dump: %d\n", status); 5381425Sroot Exit(X_ABORT); 5391425Sroot } 5401425Sroot /*NOTREACHED*/ 5411425Sroot } else { /* we are the child; just continue */ 5421425Sroot #ifdef TDEBUG 5431425Sroot sleep(4); /* allow time for parent's message to get out */ 5441425Sroot msg("Child on Tape %d has parent %d, my pid = %d\n", 5451425Sroot tapeno+1, parentpid, getpid()); 54625219Smckusick #endif TDEBUG 54747056Skarels /* 54847056Skarels * If we have a name like "/dev/rmt0,/dev/rmt1", 54947056Skarels * use the name before the comma first, and save 55048621Skarels * the remaining names for subsequent volumes. 55147056Skarels */ 55250504Smckusick tapeno++; /* current tape sequence */ 55348621Skarels if (nexttape || index(tape, ',')) { 55448621Skarels if (nexttape && *nexttape) 55548621Skarels tape = nexttape; 55648621Skarels if (p = index(tape, ',')) { 55748621Skarels *p = '\0'; 55848621Skarels nexttape = p + 1; 55948621Skarels } else 56048621Skarels nexttape = NULL; 56148621Skarels msg("Dumping volume %d on %s\n", tapeno, tape); 56248621Skarels } 56318012Smckusick #ifdef RDUMP 56446789Smckusick while ((tapefd = (host ? rmtopen(tape, 2) : 56546789Smckusick pipeout ? 1 : open(tape, O_WRONLY|O_CREAT, 0666))) < 0) 56650504Smckusick #else 56750504Smckusick while ((tapefd = (pipeout ? 1 : 56850504Smckusick open(tape, O_WRONLY|O_CREAT, 0666))) < 0) 56950504Smckusick #endif 57039128Smckusick { 57146614Smckusick msg("Cannot open output \"%s\".\n", tape); 57239128Smckusick if (!query("Do you want to retry the open?")) 57318012Smckusick dumpabort(); 57439128Smckusick } 5751425Sroot 57618012Smckusick enslave(); /* Share open tape file descriptor with slaves */ 57718012Smckusick 5781425Sroot asize = 0; 57948621Skarels blocksthisvol = 0; 58050504Smckusick if (top) 58150504Smckusick newtape++; /* new tape signal */ 58250504Smckusick spcl.c_count = slp->count; 58350504Smckusick /* 58450504Smckusick * measure firstrec in TP_BSIZE units since restore doesn't 58550504Smckusick * know the correct ntrec value... 58650504Smckusick */ 58750504Smckusick spcl.c_firstrec = slp->firstrec; 5881425Sroot spcl.c_volume++; 5891425Sroot spcl.c_type = TS_TAPE; 59030432Smckusick spcl.c_flags |= DR_NEWHEADER; 591*54047Smckusick writeheader((ino_t)slp->inode); 59230432Smckusick spcl.c_flags &=~ DR_NEWHEADER; 5931425Sroot if (tapeno > 1) 59448621Skarels msg("Volume %d begins with blocks from inode %d\n", 59550504Smckusick tapeno, slp->inode); 5961425Sroot } 5971425Sroot } 5981425Sroot 59946585Storek void 6001425Sroot dumpabort() 6011425Sroot { 60250504Smckusick 60318012Smckusick if (master != 0 && master != getpid()) 604*54047Smckusick /* Signals master to call dumpabort */ 605*54047Smckusick (void) kill(master, SIGTERM); 60624181Smckusick else { 60724181Smckusick killall(); 60824181Smckusick msg("The ENTIRE dump is aborted.\n"); 60924181Smckusick } 6101425Sroot Exit(X_ABORT); 6111425Sroot } 6121425Sroot 61346585Storek void 6141425Sroot Exit(status) 61546239Storek int status; 6161425Sroot { 61750504Smckusick 6181425Sroot #ifdef TDEBUG 6191425Sroot msg("pid = %d exits with status %d\n", getpid(), status); 6201425Sroot #endif TDEBUG 621*54047Smckusick (void) exit(status); 6221425Sroot } 62318012Smckusick 62424181Smckusick /* 62550504Smckusick * proceed - handler for SIGUSR2, used to synchronize IO between the slaves. 62624181Smckusick */ 62746585Storek void 62850504Smckusick proceed() 62924181Smckusick { 63018012Smckusick 63150504Smckusick if (ready) 63250504Smckusick longjmp(jmpbuf, 1); 63350504Smckusick caught++; 63424181Smckusick } 63524181Smckusick 63646585Storek void 63718012Smckusick enslave() 63818012Smckusick { 63950504Smckusick int cmd[2]; 64024181Smckusick register int i, j; 64118012Smckusick 64218012Smckusick master = getpid(); 64350504Smckusick 64450504Smckusick signal(SIGTERM, dumpabort); /* Slave sends SIGTERM on dumpabort() */ 64525219Smckusick signal(SIGPIPE, sigpipe); 64625219Smckusick signal(SIGUSR1, tperror); /* Slave sends SIGUSR1 on tape errors */ 64750504Smckusick signal(SIGUSR2, proceed); /* Slave sends SIGUSR2 to next slave */ 64850504Smckusick 64924181Smckusick for (i = 0; i < SLAVES; i++) { 65050504Smckusick if (i == slp - &slaves[0]) { 65150504Smckusick caught = 1; 65224181Smckusick } else { 65350504Smckusick caught = 0; 65424181Smckusick } 65550504Smckusick 65650504Smckusick if (socketpair(AF_UNIX, SOCK_STREAM, 0, cmd) < 0 || 65750504Smckusick (slaves[i].pid = fork()) < 0) 65846585Storek quit("too many slaves, %d (recompile smaller): %s\n", 65946585Storek i, strerror(errno)); 66050504Smckusick 66150504Smckusick slaves[i].fd = cmd[1]; 66250504Smckusick slaves[i].sent = 0; 66350504Smckusick if (slaves[i].pid == 0) { /* Slave starts up here */ 66418012Smckusick for (j = 0; j <= i; j++) 665*54047Smckusick (void) close(slaves[j].fd); 66625219Smckusick signal(SIGINT, SIG_IGN); /* Master handles this */ 66750504Smckusick doslave(cmd[0], i); 66818012Smckusick Exit(X_FINOK); 66918012Smckusick } 67018012Smckusick } 67150504Smckusick 67250504Smckusick for (i = 0; i < SLAVES; i++) 673*54047Smckusick (void) atomic(write, slaves[i].fd, 674*54047Smckusick (char *) &slaves[(i + 1) % SLAVES].pid, 675*54047Smckusick sizeof slaves[0].pid); 67650504Smckusick 67750504Smckusick master = 0; 67818012Smckusick } 67918012Smckusick 68046585Storek void 68124181Smckusick killall() 68218012Smckusick { 68324181Smckusick register int i; 68419982Smckusick 68524181Smckusick for (i = 0; i < SLAVES; i++) 68650504Smckusick if (slaves[i].pid > 0) 687*54047Smckusick (void) kill(slaves[i].pid, SIGKILL); 68818012Smckusick } 68918012Smckusick 69024181Smckusick /* 69124181Smckusick * Synchronization - each process has a lockfile, and shares file 69224181Smckusick * descriptors to the following process's lockfile. When our write 69324181Smckusick * completes, we release our lock on the following process's lock- 69424181Smckusick * file, allowing the following process to lock it and proceed. We 69524181Smckusick * get the lock back for the next cycle by swapping descriptors. 69624181Smckusick */ 69746585Storek void 69850504Smckusick doslave(cmd, slave_number) 69950504Smckusick register int cmd; 70050504Smckusick int slave_number; 70119982Smckusick { 70250504Smckusick register int nread; 70350504Smckusick int nextslave, size, wrote, eot_count; 70446795Sbostic #ifndef __STDC__ 70546795Sbostic int read(); 70646795Sbostic #endif 70719982Smckusick 70846789Smckusick /* 70946789Smckusick * Need our own seek pointer. 71046789Smckusick */ 711*54047Smckusick (void) close(diskfd); 71246789Smckusick if ((diskfd = open(disk, O_RDONLY)) < 0) 71346585Storek quit("slave couldn't reopen disk: %s\n", strerror(errno)); 71450504Smckusick 71524181Smckusick /* 71650504Smckusick * Need the pid of the next slave in the loop... 71750504Smckusick */ 718*54047Smckusick if ((nread = atomic(read, cmd, (char *)&nextslave, sizeof nextslave)) 71950504Smckusick != sizeof nextslave) { 72050504Smckusick quit("master/slave protocol botched - didn't get pid of next slave.\n"); 72150504Smckusick } 72250504Smckusick 72350504Smckusick /* 72425219Smckusick * Get list of blocks to dump, read the blocks into tape buffer 72524181Smckusick */ 726*54047Smckusick while ((nread = atomic(read, cmd, (char *)slp->req, reqsiz)) == reqsiz) { 72750504Smckusick register struct req *p = slp->req; 72850504Smckusick 72950504Smckusick for (trecno = 0; trecno < ntrec; 73050504Smckusick trecno += p->count, p += p->count) { 73118012Smckusick if (p->dblk) { 73250504Smckusick bread(p->dblk, slp->tblock[trecno], 73325219Smckusick p->count * TP_BSIZE); 73418012Smckusick } else { 73525219Smckusick if (p->count != 1 || atomic(read, cmd, 736*54047Smckusick (char *)slp->tblock[trecno], 737*54047Smckusick TP_BSIZE) != TP_BSIZE) 738*54047Smckusick quit("master/slave protocol botched.\n"); 73918012Smckusick } 74018012Smckusick } 74150504Smckusick if (setjmp(jmpbuf) == 0) { 74250504Smckusick ready = 1; 74350504Smckusick if (!caught) 744*54047Smckusick (void) pause(); 74550504Smckusick } 74650504Smckusick ready = 0; 74750504Smckusick caught = 0; 74825219Smckusick 74950504Smckusick /* Try to write the data... */ 75050504Smckusick eot_count = 0; 75150504Smckusick size = 0; 75250504Smckusick 75350504Smckusick while (eot_count < 10 && size < writesize) { 75418012Smckusick #ifdef RDUMP 75550504Smckusick if (host) 75650504Smckusick wrote = rmtwrite(slp->tblock[0]+size, 75750504Smckusick writesize-size); 75848621Skarels else 75950504Smckusick #endif 76050504Smckusick wrote = write(tapefd, slp->tblock[0]+size, 76150504Smckusick writesize-size); 76250504Smckusick #ifdef WRITEDEBUG 76350504Smckusick printf("slave %d wrote %d\n", slave_number, wrote); 76450504Smckusick #endif 76550504Smckusick if (wrote < 0) 76650504Smckusick break; 76750504Smckusick if (wrote == 0) 76850504Smckusick eot_count++; 76950504Smckusick size += wrote; 77050504Smckusick } 77150504Smckusick 77250504Smckusick #ifdef WRITEDEBUG 77350504Smckusick if (size != writesize) 77450504Smckusick printf("slave %d only wrote %d out of %d bytes and gave up.\n", 77550504Smckusick slave_number, size, writesize); 77650504Smckusick #endif 77750504Smckusick 77850504Smckusick if (eot_count > 0) 77950504Smckusick size = 0; 78050504Smckusick 78150504Smckusick /* 78250504Smckusick * fixme: Pyramids running OSx return ENOSPC 78350504Smckusick * at EOT on 1/2 inch drives. 78450504Smckusick */ 78550504Smckusick if (size < 0) { 786*54047Smckusick (void) kill(master, SIGUSR1); 78725219Smckusick for (;;) 788*54047Smckusick (void) sigpause(0); 78950504Smckusick } else { 79050504Smckusick /* 79150504Smckusick * pass size of write back to master 79250504Smckusick * (for EOT handling) 79350504Smckusick */ 794*54047Smckusick (void) atomic(write, cmd, (char *)&size, sizeof size); 79550504Smckusick } 79650504Smckusick 79750504Smckusick /* 79850504Smckusick * If partial write, don't want next slave to go. 79950504Smckusick * Also jolts him awake. 80050504Smckusick */ 801*54047Smckusick (void) kill(nextslave, SIGUSR2); 80250504Smckusick } 80346585Storek if (nread != 0) 80446585Storek quit("error reading command pipe: %s\n", strerror(errno)); 80518012Smckusick } 80619947Smckusick 80719947Smckusick /* 80825219Smckusick * Since a read from a pipe may not return all we asked for, 80925219Smckusick * or a write may not write all we ask if we get a signal, 81025219Smckusick * loop until the count is satisfied (or error). 81119947Smckusick */ 81246585Storek int 81325219Smckusick atomic(func, fd, buf, count) 81425219Smckusick int (*func)(), fd, count; 81519947Smckusick char *buf; 81619947Smckusick { 81725219Smckusick int got, need = count; 81819947Smckusick 81925219Smckusick while ((got = (*func)(fd, buf, need)) > 0 && (need -= got) > 0) 82019947Smckusick buf += got; 82125219Smckusick return (got < 0 ? got : count - need); 82219947Smckusick } 823