147082Smckusick /*- 247082Smckusick * Copyright (c) 1980, 1991 The Regents of the University of California. 347082Smckusick * All rights reserved. 447082Smckusick * 547082Smckusick * %sccs.include.redist.c% 622040Sdist */ 722040Sdist 817527Ssam #ifndef lint 9*57725Smckusick static char sccsid[] = "@(#)tape.c 5.26 (Berkeley) 01/25/93"; 1046585Storek #endif /* not lint */ 1117527Ssam 12*57725Smckusick #include <sys/param.h> 13*57725Smckusick #include <sys/socket.h> 14*57725Smckusick #include <sys/time.h> 15*57725Smckusick #include <sys/wait.h> 1650505Smckusick #ifdef sunos 17*57725Smckusick #include <sys/vnode.h> 18*57725Smckusick 1951605Sbostic #include <ufs/fs.h> 20*57725Smckusick #include <ufs/inode.h> 2150505Smckusick #else 2251605Sbostic #include <ufs/ffs/fs.h> 23*57725Smckusick #include <ufs/ufs/dinode.h> 2450505Smckusick #endif 25*57725Smckusick 2646795Sbostic #include <protocols/dumprestore.h> 27*57725Smckusick 2846585Storek #include <errno.h> 29*57725Smckusick #include <fcntl.h> 3050504Smckusick #include <setjmp.h> 31*57725Smckusick #include <signal.h> 32*57725Smckusick #include <stdio.h> 3346795Sbostic #ifdef __STDC__ 3446795Sbostic #include <stdlib.h> 3546795Sbostic #include <string.h> 36*57725Smckusick #include <unistd.h> 37*57725Smckusick #else 38*57725Smckusick int write(), read(); 3946795Sbostic #endif 40*57725Smckusick 4146795Sbostic #include "dump.h" 4239128Smckusick #include "pathnames.h" 431425Sroot 4429899Smckusick int writesize; /* size of malloc()ed buffer for tape */ 4529899Smckusick long lastspclrec = -1; /* tape block number of last written header */ 4629899Smckusick int trecno = 0; /* next record to write in current block */ 4746614Smckusick extern long blocksperfile; /* number of blocks per output file */ 4848621Skarels long blocksthisvol; /* number of blocks on current output file */ 4948621Skarels extern int ntrec; /* blocking factor on tape */ 5048621Skarels extern int cartridge; 5150504Smckusick extern char *host; 5247056Skarels char *nexttape; 5346585Storek 54*57725Smckusick static int atomic __P((int (*)(), int, char *, int)); 55*57725Smckusick static void doslave __P((int, int)); 56*57725Smckusick static void enslave __P((void)); 57*57725Smckusick static void flushtape __P((void)); 58*57725Smckusick static void killall __P((void)); 59*57725Smckusick static void rollforward __P((void)); 60*57725Smckusick 6110911Ssam /* 6224181Smckusick * Concurrent dump mods (Caltech) - disk block reading and tape writing 6318012Smckusick * are exported to several slave processes. While one slave writes the 6418012Smckusick * tape, the others read disk blocks; they pass control of the tape in 6550504Smckusick * a ring via signals. The parent process traverses the filesystem and 6646789Smckusick * sends writeheader()'s and lists of daddr's to the slaves via pipes. 6750504Smckusick * The following structure defines the instruction packets sent to slaves. 6810911Ssam */ 6950504Smckusick struct req { 7018012Smckusick daddr_t dblk; 7118012Smckusick int count; 7250504Smckusick }; 7318012Smckusick int reqsiz; 7418012Smckusick 7524181Smckusick #define SLAVES 3 /* 1 slave writing, 1 reading, 1 for slack */ 7650504Smckusick struct slave { 7750504Smckusick int tapea; /* header number at start of this chunk */ 7850504Smckusick int count; /* count to next header (used for TS_TAPE */ 7950504Smckusick /* after EOT) */ 8050504Smckusick int inode; /* inode that we are currently dealing with */ 8150504Smckusick int fd; /* FD for this slave */ 8250504Smckusick int pid; /* PID for this slave */ 8350504Smckusick int sent; /* 1 == we've sent this slave requests */ 8450504Smckusick int firstrec; /* record number of this block */ 8550504Smckusick char (*tblock)[TP_BSIZE]; /* buffer for data blocks */ 8650504Smckusick struct req *req; /* buffer for requests */ 8750504Smckusick } slaves[SLAVES+1]; 8850504Smckusick struct slave *slp; 8918012Smckusick 9050504Smckusick char (*nextblock)[TP_BSIZE]; 9150504Smckusick 9250504Smckusick int master; /* pid of master, for sending error signals */ 9350504Smckusick int tenths; /* length of tape used per block written */ 9450504Smckusick static int caught; /* have we caught the signal to proceed? */ 9550504Smckusick static int ready; /* have we reached the lock point without having */ 9650504Smckusick /* received the SIGUSR2 signal from the prev slave? */ 9750504Smckusick static jmp_buf jmpbuf; /* where to jump to if we are ready when the */ 9850504Smckusick /* SIGUSR2 arrives from the previous slave */ 9950504Smckusick 10046585Storek int 10110911Ssam alloctape() 10210911Ssam { 10325219Smckusick int pgoff = getpagesize() - 1; 10450504Smckusick char *buf; 10550504Smckusick int i; 10610911Ssam 10710911Ssam writesize = ntrec * TP_BSIZE; 10850504Smckusick reqsiz = (ntrec + 1) * sizeof(struct req); 10924181Smckusick /* 11025219Smckusick * CDC 92181's and 92185's make 0.8" gaps in 1600-bpi start/stop mode 11125219Smckusick * (see DEC TU80 User's Guide). The shorter gaps of 6250-bpi require 11225219Smckusick * repositioning after stopping, i.e, streaming mode, where the gap is 11325219Smckusick * variable, 0.30" to 0.45". The gap is maximal when the tape stops. 11424181Smckusick */ 11547056Skarels if (blocksperfile == 0) 11647056Skarels tenths = writesize / density + 11747056Skarels (cartridge ? 16 : density == 625 ? 5 : 8); 11825219Smckusick /* 11925219Smckusick * Allocate tape buffer contiguous with the array of instruction 12046789Smckusick * packets, so flushtape() can write them together with one write(). 12125219Smckusick * Align tape buffer on page boundary to speed up tape write(). 12225219Smckusick */ 12350504Smckusick for (i = 0; i <= SLAVES; i++) { 12454047Smckusick buf = (char *) 12554047Smckusick malloc((unsigned)(reqsiz + writesize + pgoff + TP_BSIZE)); 12650504Smckusick if (buf == NULL) 12754047Smckusick return(0); 12850504Smckusick slaves[i].tblock = (char (*)[TP_BSIZE]) 12950504Smckusick (((long)&buf[ntrec + 1] + pgoff) &~ pgoff); 13050504Smckusick slaves[i].req = (struct req *)slaves[i].tblock - ntrec - 1; 13150504Smckusick } 13250504Smckusick slp = &slaves[0]; 13350504Smckusick slp->count = 1; 13450504Smckusick slp->tapea = 0; 13550504Smckusick slp->firstrec = 0; 13650504Smckusick nextblock = slp->tblock; 13724181Smckusick return(1); 13810911Ssam } 13910911Ssam 14046585Storek void 14154595Smckusick writerec(dp, isspcl) 1425329Smckusic char *dp; 14354595Smckusick int isspcl; 1441425Sroot { 14550504Smckusick 14650504Smckusick slp->req[trecno].dblk = (daddr_t)0; 14750504Smckusick slp->req[trecno].count = 1; 14850504Smckusick *(union u_spcl *)(*(nextblock)++) = *(union u_spcl *)dp; 14954595Smckusick if (isspcl) 15054595Smckusick lastspclrec = spcl.c_tapea; 15124181Smckusick trecno++; 1521425Sroot spcl.c_tapea++; 15346585Storek if (trecno >= ntrec) 15446789Smckusick flushtape(); 1551425Sroot } 1561425Sroot 15746585Storek void 15846789Smckusick dumpblock(blkno, size) 1594774Smckusic daddr_t blkno; 1604774Smckusic int size; 1611425Sroot { 16225219Smckusick int avail, tpblks, dblkno; 1631425Sroot 1645329Smckusic dblkno = fsbtodb(sblock, blkno); 16546585Storek tpblks = size >> tp_bshift; 16618012Smckusick while ((avail = MIN(tpblks, ntrec - trecno)) > 0) { 16750504Smckusick slp->req[trecno].dblk = dblkno; 16850504Smckusick slp->req[trecno].count = avail; 16925219Smckusick trecno += avail; 1704774Smckusic spcl.c_tapea += avail; 17125219Smckusick if (trecno >= ntrec) 17246789Smckusick flushtape(); 17346585Storek dblkno += avail << (tp_bshift - dev_bshift); 1745329Smckusic tpblks -= avail; 1754774Smckusic } 1761425Sroot } 1771425Sroot 1781425Sroot int nogripe = 0; 1791425Sroot 18046585Storek void 18155288Sbostic tperror(signo) 18255288Sbostic int signo; 18346585Storek { 18450504Smckusick 18518012Smckusick if (pipeout) { 18646614Smckusick msg("write error on %s\n", tape); 18746585Storek quit("Cannot recover\n"); 18818012Smckusick /* NOTREACHED */ 18918012Smckusick } 19048621Skarels msg("write error %d blocks into volume %d\n", blocksthisvol, tapeno); 19146614Smckusick broadcast("DUMP WRITE ERROR!\n"); 19218012Smckusick if (!query("Do you want to restart?")) 19355288Sbostic dumpabort(0); 19446614Smckusick msg("Closing this volume. Prepare to restart with new media;\n"); 19518012Smckusick msg("this dump volume will be rewritten.\n"); 19624181Smckusick killall(); 19718012Smckusick nogripe = 1; 19818012Smckusick close_rewind(); 19918012Smckusick Exit(X_REWRITE); 20018012Smckusick } 20118012Smckusick 20246585Storek void 20355288Sbostic sigpipe(signo) 20455288Sbostic int signo; 20525219Smckusick { 20625219Smckusick 20746585Storek quit("Broken pipe\n"); 20825219Smckusick } 20925219Smckusick 210*57725Smckusick static void 21146789Smckusick flushtape() 21218012Smckusick { 21350504Smckusick int i, blks, got; 21450504Smckusick long lastfirstrec; 21546795Sbostic 21650504Smckusick int siz = (char *)nextblock - (char *)slp->req; 2171425Sroot 21850504Smckusick slp->req[trecno].count = 0; /* Sentinel */ 21950504Smckusick 22054047Smckusick if (atomic(write, slp->fd, (char *)slp->req, siz) != siz) 22146585Storek quit("error writing command pipe: %s\n", strerror(errno)); 22250504Smckusick slp->sent = 1; /* we sent a request, read the response later */ 22350504Smckusick 22450504Smckusick lastfirstrec = slp->firstrec; 22550504Smckusick 22650504Smckusick if (++slp >= &slaves[SLAVES]) 22750504Smckusick slp = &slaves[0]; 22850504Smckusick 22950504Smckusick /* Read results back from next slave */ 23050504Smckusick if (slp->sent) { 23154047Smckusick if (atomic(read, slp->fd, (char *)&got, sizeof got) 23254047Smckusick != sizeof got) { 23350504Smckusick perror(" DUMP: error reading command pipe in master"); 23455288Sbostic dumpabort(0); 23550504Smckusick } 23650504Smckusick slp->sent = 0; 23750504Smckusick 23850504Smckusick /* Check for end of tape */ 23950504Smckusick if (got < writesize) { 24050504Smckusick msg("End of tape detected\n"); 24150504Smckusick 24250504Smckusick /* 24350504Smckusick * Drain the results, don't care what the values were. 24450504Smckusick * If we read them here then trewind won't... 24550504Smckusick */ 24650504Smckusick for (i = 0; i < SLAVES; i++) { 24750504Smckusick if (slaves[i].sent) { 24854047Smckusick if (atomic(read, slaves[i].fd, 24954047Smckusick (char *)&got, sizeof got) 25054047Smckusick != sizeof got) { 25150504Smckusick perror(" DUMP: error reading command pipe in master"); 25255288Sbostic dumpabort(0); 25350504Smckusick } 25450504Smckusick slaves[i].sent = 0; 25550504Smckusick } 25650504Smckusick } 25750504Smckusick 25850504Smckusick close_rewind(); 25950504Smckusick rollforward(); 26050504Smckusick return; 26150504Smckusick } 26250504Smckusick } 26350504Smckusick 26450504Smckusick blks = 0; 26550504Smckusick if (spcl.c_type != TS_END) { 26650504Smckusick for (i = 0; i < spcl.c_count; i++) 26750504Smckusick if (spcl.c_addr[i] != 0) 26850504Smckusick blks++; 26950504Smckusick } 27050504Smckusick slp->count = lastspclrec + blks + 1 - spcl.c_tapea; 27150504Smckusick slp->tapea = spcl.c_tapea; 27250504Smckusick slp->firstrec = lastfirstrec + ntrec; 27350504Smckusick slp->inode = curino; 27450504Smckusick nextblock = slp->tblock; 2751425Sroot trecno = 0; 27624181Smckusick asize += tenths; 27710911Ssam blockswritten += ntrec; 27848621Skarels blocksthisvol += ntrec; 27946614Smckusick if (!pipeout && (blocksperfile ? 28048621Skarels (blocksthisvol >= blocksperfile) : (asize > tsize))) { 2811425Sroot close_rewind(); 28250504Smckusick startnewtape(0); 2831425Sroot } 2841425Sroot timeest(); 2851425Sroot } 2861425Sroot 28746585Storek void 28846239Storek trewind() 2891425Sroot { 29024181Smckusick int f; 29150504Smckusick int got; 29212331Smckusick 29350504Smckusick for (f = 0; f < SLAVES; f++) { 29450504Smckusick /* 29550504Smckusick * Drain the results, but unlike EOT we DO (or should) care 29650504Smckusick * what the return values were, since if we detect EOT after 29750504Smckusick * we think we've written the last blocks to the tape anyway, 29850504Smckusick * we have to replay those blocks with rollforward. 29950504Smckusick * 30050504Smckusick * fixme: punt for now. 30150504Smckusick */ 30250504Smckusick if (slaves[f].sent) { 30354047Smckusick if (atomic(read, slaves[f].fd, (char *)&got, sizeof got) 30450504Smckusick != sizeof got) { 30550504Smckusick perror(" DUMP: error reading command pipe in master"); 30655288Sbostic dumpabort(0); 30750504Smckusick } 30850504Smckusick slaves[f].sent = 0; 30950504Smckusick if (got != writesize) { 31050504Smckusick msg("EOT detected in last 2 tape records!\n"); 31150504Smckusick msg("Use a longer tape, decrease the size estimate\n"); 31250504Smckusick quit("or use no size estimate at all.\n"); 31350504Smckusick } 31450504Smckusick } 31554047Smckusick (void) close(slaves[f].fd); 31650504Smckusick } 31746585Storek while (wait((int *)NULL) >= 0) /* wait for any signals from slaves */ 31846585Storek /* void */; 31954047Smckusick 32054047Smckusick if (pipeout) 32154047Smckusick return; 32254047Smckusick 32348621Skarels msg("Closing %s\n", tape); 32450504Smckusick 32518012Smckusick #ifdef RDUMP 32625219Smckusick if (host) { 32725219Smckusick rmtclose(); 32825219Smckusick while (rmtopen(tape, 0) < 0) 32925219Smckusick sleep(10); 33025219Smckusick rmtclose(); 33125219Smckusick return; 33225219Smckusick } 33350504Smckusick #endif 33454047Smckusick (void) close(tapefd); 3353214Swnj while ((f = open(tape, 0)) < 0) 3363214Swnj sleep (10); 33754047Smckusick (void) close(f); 3381425Sroot } 3391425Sroot 34046585Storek void 3411425Sroot close_rewind() 3421425Sroot { 34346239Storek trewind(); 34448621Skarels if (nexttape) 34548621Skarels return; 34618012Smckusick if (!nogripe) { 34746614Smckusick msg("Change Volumes: Mount volume #%d\n", tapeno+1); 34846614Smckusick broadcast("CHANGE DUMP VOLUMES!\7\7\n"); 3491425Sroot } 35048621Skarels while (!query("Is the new volume mounted and ready to go?")) 35125219Smckusick if (query("Do you want to abort?")) { 35255288Sbostic dumpabort(0); 35325219Smckusick /*NOTREACHED*/ 35425219Smckusick } 3551425Sroot } 3561425Sroot 35750504Smckusick void 35850504Smckusick rollforward() 35950504Smckusick { 36050504Smckusick register struct req *p, *q, *prev; 36150504Smckusick register struct slave *tslp; 36254047Smckusick int i, size, savedtapea, got; 36350504Smckusick union u_spcl *ntb, *otb; 36450504Smckusick tslp = &slaves[SLAVES]; 36550504Smckusick ntb = (union u_spcl *)tslp->tblock[1]; 36650504Smckusick 36750504Smckusick /* 36850504Smckusick * Each of the N slaves should have requests that need to 36950504Smckusick * be replayed on the next tape. Use the extra slave buffers 37050504Smckusick * (slaves[SLAVES]) to construct request lists to be sent to 37150504Smckusick * each slave in turn. 37250504Smckusick */ 37350504Smckusick for (i = 0; i < SLAVES; i++) { 37450504Smckusick q = &tslp->req[1]; 37550504Smckusick otb = (union u_spcl *)slp->tblock; 37650504Smckusick 37750504Smckusick /* 37850504Smckusick * For each request in the current slave, copy it to tslp. 37950504Smckusick */ 38050504Smckusick 38150504Smckusick for (p = slp->req; p->count > 0; p += p->count) { 38250504Smckusick *q = *p; 38350504Smckusick if (p->dblk == 0) 38450504Smckusick *ntb++ = *otb++; /* copy the datablock also */ 38550504Smckusick prev = q; 38650504Smckusick q += q->count; 38750504Smckusick } 38850504Smckusick if (prev->dblk != 0) 38950504Smckusick prev->count -= 1; 39050504Smckusick else 39150504Smckusick ntb--; 39250504Smckusick q -= 1; 39350504Smckusick q->count = 0; 39450504Smckusick q = &tslp->req[0]; 39550504Smckusick if (i == 0) { 39650504Smckusick q->dblk = 0; 39750504Smckusick q->count = 1; 39850504Smckusick trecno = 0; 39950504Smckusick nextblock = tslp->tblock; 40050504Smckusick savedtapea = spcl.c_tapea; 40150504Smckusick spcl.c_tapea = slp->tapea; 40250504Smckusick startnewtape(0); 40350504Smckusick spcl.c_tapea = savedtapea; 40450504Smckusick lastspclrec = savedtapea - 1; 40550504Smckusick } 40650504Smckusick size = (char *)ntb - (char *)q; 40754047Smckusick if (atomic(write, slp->fd, (char *)q, size) != size) { 40850504Smckusick perror(" DUMP: error writing command pipe"); 40955288Sbostic dumpabort(0); 41050504Smckusick } 41150504Smckusick slp->sent = 1; 41250504Smckusick if (++slp >= &slaves[SLAVES]) 41350504Smckusick slp = &slaves[0]; 41450504Smckusick 41550504Smckusick q->count = 1; 41650504Smckusick 41750504Smckusick if (prev->dblk != 0) { 41850504Smckusick /* 41950504Smckusick * If the last one was a disk block, make the 42050504Smckusick * first of this one be the last bit of that disk 42150504Smckusick * block... 42250504Smckusick */ 42350504Smckusick q->dblk = prev->dblk + 42450504Smckusick prev->count * (TP_BSIZE / DEV_BSIZE); 42550504Smckusick ntb = (union u_spcl *)tslp->tblock; 42650504Smckusick } else { 42750504Smckusick /* 42850504Smckusick * It wasn't a disk block. Copy the data to its 42950504Smckusick * new location in the buffer. 43050504Smckusick */ 43150504Smckusick q->dblk = 0; 43250504Smckusick *((union u_spcl *)tslp->tblock) = *ntb; 43350504Smckusick ntb = (union u_spcl *)tslp->tblock[1]; 43450504Smckusick } 43550504Smckusick } 43650504Smckusick slp->req[0] = *q; 43750504Smckusick nextblock = slp->tblock; 43850504Smckusick if (q->dblk == 0) 43950504Smckusick nextblock++; 44050504Smckusick trecno = 1; 44150504Smckusick 44250504Smckusick /* 44350504Smckusick * Clear the first slaves' response. One hopes that it 44450504Smckusick * worked ok, otherwise the tape is much too short! 44550504Smckusick */ 44650504Smckusick if (slp->sent) { 44754047Smckusick if (atomic(read, slp->fd, (char *)&got, sizeof got) 44854047Smckusick != sizeof got) { 44950504Smckusick perror(" DUMP: error reading command pipe in master"); 45055288Sbostic dumpabort(0); 45150504Smckusick } 45250504Smckusick slp->sent = 0; 45350504Smckusick 45450504Smckusick if (got != writesize) { 45550504Smckusick quit("EOT detected at start of the tape!\n"); 45650504Smckusick } 45750504Smckusick } 45850504Smckusick } 45950504Smckusick 4601425Sroot /* 46150504Smckusick * We implement taking and restoring checkpoints on the tape level. 46250504Smckusick * When each tape is opened, a new process is created by forking; this 46350504Smckusick * saves all of the necessary context in the parent. The child 46450504Smckusick * continues the dump; the parent waits around, saving the context. 46550504Smckusick * If the child returns X_REWRITE, then it had problems writing that tape; 46650504Smckusick * this causes the parent to fork again, duplicating the context, and 46750504Smckusick * everything continues as if nothing had happened. 4681425Sroot */ 46946585Storek void 47050504Smckusick startnewtape(top) 47150504Smckusick int top; 4721425Sroot { 4731425Sroot int parentpid; 4741425Sroot int childpid; 4751425Sroot int status; 4761425Sroot int waitpid; 47747056Skarels char *p; 47850574Smckusick #ifdef sunos 47954047Smckusick void (*interrupt_save)(); 48050574Smckusick #else 48154047Smckusick sig_t interrupt_save; 48250574Smckusick #endif 4831425Sroot 48454047Smckusick interrupt_save = signal(SIGINT, SIG_IGN); 4851425Sroot parentpid = getpid(); 4861425Sroot 487*57725Smckusick restore_check_point: 48854047Smckusick (void)signal(SIGINT, interrupt_save); 48925219Smckusick /* 49025219Smckusick * All signals are inherited... 49125219Smckusick */ 4921425Sroot childpid = fork(); 49318012Smckusick if (childpid < 0) { 4941425Sroot msg("Context save fork fails in parent %d\n", parentpid); 4951425Sroot Exit(X_ABORT); 4961425Sroot } 49718012Smckusick if (childpid != 0) { 4981425Sroot /* 4991425Sroot * PARENT: 5001425Sroot * save the context by waiting 5011425Sroot * until the child doing all of the work returns. 50218012Smckusick * don't catch the interrupt 5031425Sroot */ 50425219Smckusick signal(SIGINT, SIG_IGN); 5051425Sroot #ifdef TDEBUG 5061425Sroot msg("Tape: %d; parent process: %d child process %d\n", 5071425Sroot tapeno+1, parentpid, childpid); 508*57725Smckusick #endif /* TDEBUG */ 50918012Smckusick while ((waitpid = wait(&status)) != childpid) 51018012Smckusick msg("Parent %d waiting for child %d has another child %d return\n", 51118012Smckusick parentpid, childpid, waitpid); 51218012Smckusick if (status & 0xFF) { 5131425Sroot msg("Child %d returns LOB status %o\n", 5141425Sroot childpid, status&0xFF); 5151425Sroot } 5161425Sroot status = (status >> 8) & 0xFF; 5171425Sroot #ifdef TDEBUG 51818012Smckusick switch(status) { 5191425Sroot case X_FINOK: 5201425Sroot msg("Child %d finishes X_FINOK\n", childpid); 5211425Sroot break; 52250504Smckusick case X_ABORT: 5231425Sroot msg("Child %d finishes X_ABORT\n", childpid); 5241425Sroot break; 5251425Sroot case X_REWRITE: 5261425Sroot msg("Child %d finishes X_REWRITE\n", childpid); 5271425Sroot break; 5281425Sroot default: 52918012Smckusick msg("Child %d finishes unknown %d\n", 53025219Smckusick childpid, status); 5311425Sroot break; 5321425Sroot } 533*57725Smckusick #endif /* TDEBUG */ 53418012Smckusick switch(status) { 5351425Sroot case X_FINOK: 5361425Sroot Exit(X_FINOK); 5371425Sroot case X_ABORT: 5381425Sroot Exit(X_ABORT); 5391425Sroot case X_REWRITE: 5401425Sroot goto restore_check_point; 5411425Sroot default: 5421425Sroot msg("Bad return code from dump: %d\n", status); 5431425Sroot Exit(X_ABORT); 5441425Sroot } 5451425Sroot /*NOTREACHED*/ 5461425Sroot } else { /* we are the child; just continue */ 5471425Sroot #ifdef TDEBUG 5481425Sroot sleep(4); /* allow time for parent's message to get out */ 5491425Sroot msg("Child on Tape %d has parent %d, my pid = %d\n", 5501425Sroot tapeno+1, parentpid, getpid()); 551*57725Smckusick #endif /* TDEBUG */ 55247056Skarels /* 55347056Skarels * If we have a name like "/dev/rmt0,/dev/rmt1", 55447056Skarels * use the name before the comma first, and save 55548621Skarels * the remaining names for subsequent volumes. 55647056Skarels */ 55750504Smckusick tapeno++; /* current tape sequence */ 55848621Skarels if (nexttape || index(tape, ',')) { 55948621Skarels if (nexttape && *nexttape) 56048621Skarels tape = nexttape; 56148621Skarels if (p = index(tape, ',')) { 56248621Skarels *p = '\0'; 56348621Skarels nexttape = p + 1; 56448621Skarels } else 56548621Skarels nexttape = NULL; 56648621Skarels msg("Dumping volume %d on %s\n", tapeno, tape); 56748621Skarels } 56818012Smckusick #ifdef RDUMP 56946789Smckusick while ((tapefd = (host ? rmtopen(tape, 2) : 57046789Smckusick pipeout ? 1 : open(tape, O_WRONLY|O_CREAT, 0666))) < 0) 57150504Smckusick #else 57250504Smckusick while ((tapefd = (pipeout ? 1 : 57350504Smckusick open(tape, O_WRONLY|O_CREAT, 0666))) < 0) 57450504Smckusick #endif 57539128Smckusick { 57646614Smckusick msg("Cannot open output \"%s\".\n", tape); 57739128Smckusick if (!query("Do you want to retry the open?")) 57855288Sbostic dumpabort(0); 57939128Smckusick } 5801425Sroot 58118012Smckusick enslave(); /* Share open tape file descriptor with slaves */ 58218012Smckusick 5831425Sroot asize = 0; 58448621Skarels blocksthisvol = 0; 58550504Smckusick if (top) 58650504Smckusick newtape++; /* new tape signal */ 58750504Smckusick spcl.c_count = slp->count; 58850504Smckusick /* 58950504Smckusick * measure firstrec in TP_BSIZE units since restore doesn't 59050504Smckusick * know the correct ntrec value... 59150504Smckusick */ 59250504Smckusick spcl.c_firstrec = slp->firstrec; 5931425Sroot spcl.c_volume++; 5941425Sroot spcl.c_type = TS_TAPE; 59530432Smckusick spcl.c_flags |= DR_NEWHEADER; 59654047Smckusick writeheader((ino_t)slp->inode); 59730432Smckusick spcl.c_flags &=~ DR_NEWHEADER; 5981425Sroot if (tapeno > 1) 59948621Skarels msg("Volume %d begins with blocks from inode %d\n", 60050504Smckusick tapeno, slp->inode); 6011425Sroot } 6021425Sroot } 6031425Sroot 60446585Storek void 60555288Sbostic dumpabort(signo) 60655288Sbostic int signo; 6071425Sroot { 60850504Smckusick 60918012Smckusick if (master != 0 && master != getpid()) 61054047Smckusick /* Signals master to call dumpabort */ 61154047Smckusick (void) kill(master, SIGTERM); 61224181Smckusick else { 61324181Smckusick killall(); 61424181Smckusick msg("The ENTIRE dump is aborted.\n"); 61524181Smckusick } 6161425Sroot Exit(X_ABORT); 6171425Sroot } 6181425Sroot 619*57725Smckusick __dead void 6201425Sroot Exit(status) 62146239Storek int status; 6221425Sroot { 62350504Smckusick 6241425Sroot #ifdef TDEBUG 6251425Sroot msg("pid = %d exits with status %d\n", getpid(), status); 626*57725Smckusick #endif /* TDEBUG */ 627*57725Smckusick exit(status); 6281425Sroot } 62918012Smckusick 63024181Smckusick /* 63150504Smckusick * proceed - handler for SIGUSR2, used to synchronize IO between the slaves. 63224181Smckusick */ 63346585Storek void 63455288Sbostic proceed(signo) 63555288Sbostic int signo; 63624181Smckusick { 63718012Smckusick 63850504Smckusick if (ready) 63950504Smckusick longjmp(jmpbuf, 1); 64050504Smckusick caught++; 64124181Smckusick } 64224181Smckusick 64346585Storek void 64418012Smckusick enslave() 64518012Smckusick { 64650504Smckusick int cmd[2]; 64724181Smckusick register int i, j; 64818012Smckusick 64918012Smckusick master = getpid(); 65050504Smckusick 65150504Smckusick signal(SIGTERM, dumpabort); /* Slave sends SIGTERM on dumpabort() */ 65225219Smckusick signal(SIGPIPE, sigpipe); 65325219Smckusick signal(SIGUSR1, tperror); /* Slave sends SIGUSR1 on tape errors */ 65450504Smckusick signal(SIGUSR2, proceed); /* Slave sends SIGUSR2 to next slave */ 65550504Smckusick 65624181Smckusick for (i = 0; i < SLAVES; i++) { 65750504Smckusick if (i == slp - &slaves[0]) { 65850504Smckusick caught = 1; 65924181Smckusick } else { 66050504Smckusick caught = 0; 66124181Smckusick } 66250504Smckusick 66350504Smckusick if (socketpair(AF_UNIX, SOCK_STREAM, 0, cmd) < 0 || 66450504Smckusick (slaves[i].pid = fork()) < 0) 66546585Storek quit("too many slaves, %d (recompile smaller): %s\n", 66646585Storek i, strerror(errno)); 66750504Smckusick 66850504Smckusick slaves[i].fd = cmd[1]; 66950504Smckusick slaves[i].sent = 0; 67050504Smckusick if (slaves[i].pid == 0) { /* Slave starts up here */ 67118012Smckusick for (j = 0; j <= i; j++) 67254047Smckusick (void) close(slaves[j].fd); 67325219Smckusick signal(SIGINT, SIG_IGN); /* Master handles this */ 67450504Smckusick doslave(cmd[0], i); 67518012Smckusick Exit(X_FINOK); 67618012Smckusick } 67718012Smckusick } 67850504Smckusick 67950504Smckusick for (i = 0; i < SLAVES; i++) 68054047Smckusick (void) atomic(write, slaves[i].fd, 68154047Smckusick (char *) &slaves[(i + 1) % SLAVES].pid, 68254047Smckusick sizeof slaves[0].pid); 68350504Smckusick 68450504Smckusick master = 0; 68518012Smckusick } 68618012Smckusick 68746585Storek void 68824181Smckusick killall() 68918012Smckusick { 69024181Smckusick register int i; 69119982Smckusick 69224181Smckusick for (i = 0; i < SLAVES; i++) 69350504Smckusick if (slaves[i].pid > 0) 69454047Smckusick (void) kill(slaves[i].pid, SIGKILL); 69518012Smckusick } 69618012Smckusick 69724181Smckusick /* 69824181Smckusick * Synchronization - each process has a lockfile, and shares file 69924181Smckusick * descriptors to the following process's lockfile. When our write 70024181Smckusick * completes, we release our lock on the following process's lock- 70124181Smckusick * file, allowing the following process to lock it and proceed. We 70224181Smckusick * get the lock back for the next cycle by swapping descriptors. 70324181Smckusick */ 704*57725Smckusick static void 70550504Smckusick doslave(cmd, slave_number) 70650504Smckusick register int cmd; 70750504Smckusick int slave_number; 70819982Smckusick { 70950504Smckusick register int nread; 71050504Smckusick int nextslave, size, wrote, eot_count; 71119982Smckusick 71246789Smckusick /* 71346789Smckusick * Need our own seek pointer. 71446789Smckusick */ 71554047Smckusick (void) close(diskfd); 71646789Smckusick if ((diskfd = open(disk, O_RDONLY)) < 0) 71746585Storek quit("slave couldn't reopen disk: %s\n", strerror(errno)); 71850504Smckusick 71924181Smckusick /* 72050504Smckusick * Need the pid of the next slave in the loop... 72150504Smckusick */ 72254047Smckusick if ((nread = atomic(read, cmd, (char *)&nextslave, sizeof nextslave)) 72350504Smckusick != sizeof nextslave) { 72450504Smckusick quit("master/slave protocol botched - didn't get pid of next slave.\n"); 72550504Smckusick } 72650504Smckusick 72750504Smckusick /* 72825219Smckusick * Get list of blocks to dump, read the blocks into tape buffer 72924181Smckusick */ 73054047Smckusick while ((nread = atomic(read, cmd, (char *)slp->req, reqsiz)) == reqsiz) { 73150504Smckusick register struct req *p = slp->req; 73250504Smckusick 73350504Smckusick for (trecno = 0; trecno < ntrec; 73450504Smckusick trecno += p->count, p += p->count) { 73518012Smckusick if (p->dblk) { 73650504Smckusick bread(p->dblk, slp->tblock[trecno], 73725219Smckusick p->count * TP_BSIZE); 73818012Smckusick } else { 73925219Smckusick if (p->count != 1 || atomic(read, cmd, 74054047Smckusick (char *)slp->tblock[trecno], 74154047Smckusick TP_BSIZE) != TP_BSIZE) 74254047Smckusick quit("master/slave protocol botched.\n"); 74318012Smckusick } 74418012Smckusick } 74550504Smckusick if (setjmp(jmpbuf) == 0) { 74650504Smckusick ready = 1; 74750504Smckusick if (!caught) 74854047Smckusick (void) pause(); 74950504Smckusick } 75050504Smckusick ready = 0; 75150504Smckusick caught = 0; 75225219Smckusick 75350504Smckusick /* Try to write the data... */ 75450504Smckusick eot_count = 0; 75550504Smckusick size = 0; 75650504Smckusick 75750504Smckusick while (eot_count < 10 && size < writesize) { 75818012Smckusick #ifdef RDUMP 75950504Smckusick if (host) 76050504Smckusick wrote = rmtwrite(slp->tblock[0]+size, 76150504Smckusick writesize-size); 76248621Skarels else 76350504Smckusick #endif 76450504Smckusick wrote = write(tapefd, slp->tblock[0]+size, 76550504Smckusick writesize-size); 76650504Smckusick #ifdef WRITEDEBUG 76750504Smckusick printf("slave %d wrote %d\n", slave_number, wrote); 76850504Smckusick #endif 76950504Smckusick if (wrote < 0) 77050504Smckusick break; 77150504Smckusick if (wrote == 0) 77250504Smckusick eot_count++; 77350504Smckusick size += wrote; 77450504Smckusick } 77550504Smckusick 77650504Smckusick #ifdef WRITEDEBUG 77750504Smckusick if (size != writesize) 77850504Smckusick printf("slave %d only wrote %d out of %d bytes and gave up.\n", 77950504Smckusick slave_number, size, writesize); 78050504Smckusick #endif 78150504Smckusick 78250504Smckusick if (eot_count > 0) 78350504Smckusick size = 0; 78450504Smckusick 78550504Smckusick /* 78650504Smckusick * fixme: Pyramids running OSx return ENOSPC 78750504Smckusick * at EOT on 1/2 inch drives. 78850504Smckusick */ 78950504Smckusick if (size < 0) { 79054047Smckusick (void) kill(master, SIGUSR1); 79125219Smckusick for (;;) 79254047Smckusick (void) sigpause(0); 79350504Smckusick } else { 79450504Smckusick /* 79550504Smckusick * pass size of write back to master 79650504Smckusick * (for EOT handling) 79750504Smckusick */ 79854047Smckusick (void) atomic(write, cmd, (char *)&size, sizeof size); 79950504Smckusick } 80050504Smckusick 80150504Smckusick /* 80250504Smckusick * If partial write, don't want next slave to go. 80350504Smckusick * Also jolts him awake. 80450504Smckusick */ 80554047Smckusick (void) kill(nextslave, SIGUSR2); 80650504Smckusick } 80746585Storek if (nread != 0) 80846585Storek quit("error reading command pipe: %s\n", strerror(errno)); 80918012Smckusick } 81019947Smckusick 81119947Smckusick /* 81225219Smckusick * Since a read from a pipe may not return all we asked for, 81325219Smckusick * or a write may not write all we ask if we get a signal, 81425219Smckusick * loop until the count is satisfied (or error). 81519947Smckusick */ 816*57725Smckusick static int 81725219Smckusick atomic(func, fd, buf, count) 818*57725Smckusick int (*func)(), fd; 81919947Smckusick char *buf; 820*57725Smckusick int count; 82119947Smckusick { 82225219Smckusick int got, need = count; 82319947Smckusick 82425219Smckusick while ((got = (*func)(fd, buf, need)) > 0 && (need -= got) > 0) 82519947Smckusick buf += got; 82625219Smckusick return (got < 0 ? got : count - need); 82719947Smckusick } 828