147082Smckusick /*- 247082Smckusick * Copyright (c) 1980, 1991 The Regents of the University of California. 347082Smckusick * All rights reserved. 447082Smckusick * 547082Smckusick * %sccs.include.redist.c% 622040Sdist */ 722040Sdist 817527Ssam #ifndef lint 9*51605Sbostic static char sccsid[] = "@(#)tape.c 5.22 (Berkeley) 11/10/91"; 1046585Storek #endif /* not lint */ 1117527Ssam 1250505Smckusick #ifdef sunos 1350574Smckusick #include <sys/param.h> 1450505Smckusick #include <stdio.h> 1550505Smckusick #include <ctype.h> 1650505Smckusick #include <sys/stat.h> 1750505Smckusick #include <sys/time.h> 1850505Smckusick #include <sys/dir.h> 1950505Smckusick #include <sys/vnode.h> 2050505Smckusick #include <ufs/inode.h> 21*51605Sbostic #include <ufs/fs.h> 2250505Smckusick #else 2346795Sbostic #include <sys/param.h> 2446585Storek #include <sys/wait.h> 25*51605Sbostic #include <ufs/ufs/dinode.h> 26*51605Sbostic #include <ufs/ffs/fs.h> 2750505Smckusick #endif 2846795Sbostic #include <signal.h> 2946795Sbostic #include <fcntl.h> 3046795Sbostic #include <protocols/dumprestore.h> 3146585Storek #include <errno.h> 3250504Smckusick #include <setjmp.h> 3346795Sbostic #ifdef __STDC__ 3446795Sbostic #include <unistd.h> 3546795Sbostic #include <stdlib.h> 3646795Sbostic #include <string.h> 3746795Sbostic #endif 3850504Smckusick #include <sys/socket.h> 3946795Sbostic #include "dump.h" 4039128Smckusick #include "pathnames.h" 411425Sroot 4229899Smckusick int writesize; /* size of malloc()ed buffer for tape */ 4329899Smckusick long lastspclrec = -1; /* tape block number of last written header */ 4429899Smckusick int trecno = 0; /* next record to write in current block */ 4546614Smckusick extern long blocksperfile; /* number of blocks per output file */ 4648621Skarels long blocksthisvol; /* number of blocks on current output file */ 4748621Skarels extern int ntrec; /* blocking factor on tape */ 4848621Skarels extern int cartridge; 4950504Smckusick extern char *host; 5047056Skarels char *nexttape; 5125219Smckusick #ifdef RDUMP 5246585Storek int rmtopen(), rmtwrite(); 5346585Storek void rmtclose(); 5425219Smckusick #endif RDUMP 5550504Smckusick void rollforward(); 5646585Storek int atomic(); 5746789Smckusick void doslave(), enslave(), flushtape(), killall(); 5846585Storek 5910911Ssam /* 6024181Smckusick * Concurrent dump mods (Caltech) - disk block reading and tape writing 6118012Smckusick * are exported to several slave processes. While one slave writes the 6218012Smckusick * tape, the others read disk blocks; they pass control of the tape in 6350504Smckusick * a ring via signals. The parent process traverses the filesystem and 6446789Smckusick * sends writeheader()'s and lists of daddr's to the slaves via pipes. 6550504Smckusick * The following structure defines the instruction packets sent to slaves. 6610911Ssam */ 6750504Smckusick struct req { 6818012Smckusick daddr_t dblk; 6918012Smckusick int count; 7050504Smckusick }; 7118012Smckusick int reqsiz; 7218012Smckusick 7324181Smckusick #define SLAVES 3 /* 1 slave writing, 1 reading, 1 for slack */ 7450504Smckusick struct slave { 7550504Smckusick int tapea; /* header number at start of this chunk */ 7650504Smckusick int count; /* count to next header (used for TS_TAPE */ 7750504Smckusick /* after EOT) */ 7850504Smckusick int inode; /* inode that we are currently dealing with */ 7950504Smckusick int fd; /* FD for this slave */ 8050504Smckusick int pid; /* PID for this slave */ 8150504Smckusick int sent; /* 1 == we've sent this slave requests */ 8250504Smckusick int firstrec; /* record number of this block */ 8350504Smckusick char (*tblock)[TP_BSIZE]; /* buffer for data blocks */ 8450504Smckusick struct req *req; /* buffer for requests */ 8550504Smckusick } slaves[SLAVES+1]; 8650504Smckusick struct slave *slp; 8718012Smckusick 8850504Smckusick char (*nextblock)[TP_BSIZE]; 8950504Smckusick 9050504Smckusick int master; /* pid of master, for sending error signals */ 9150504Smckusick int tenths; /* length of tape used per block written */ 9250504Smckusick static int caught; /* have we caught the signal to proceed? */ 9350504Smckusick static int ready; /* have we reached the lock point without having */ 9450504Smckusick /* received the SIGUSR2 signal from the prev slave? */ 9550504Smckusick static jmp_buf jmpbuf; /* where to jump to if we are ready when the */ 9650504Smckusick /* SIGUSR2 arrives from the previous slave */ 9750504Smckusick 9846585Storek int 9910911Ssam alloctape() 10010911Ssam { 10125219Smckusick int pgoff = getpagesize() - 1; 10250504Smckusick char *buf; 10350504Smckusick int i; 10410911Ssam 10510911Ssam writesize = ntrec * TP_BSIZE; 10650504Smckusick reqsiz = (ntrec + 1) * sizeof(struct req); 10724181Smckusick /* 10825219Smckusick * CDC 92181's and 92185's make 0.8" gaps in 1600-bpi start/stop mode 10925219Smckusick * (see DEC TU80 User's Guide). The shorter gaps of 6250-bpi require 11025219Smckusick * repositioning after stopping, i.e, streaming mode, where the gap is 11125219Smckusick * variable, 0.30" to 0.45". The gap is maximal when the tape stops. 11224181Smckusick */ 11347056Skarels if (blocksperfile == 0) 11447056Skarels tenths = writesize / density + 11547056Skarels (cartridge ? 16 : density == 625 ? 5 : 8); 11625219Smckusick /* 11725219Smckusick * Allocate tape buffer contiguous with the array of instruction 11846789Smckusick * packets, so flushtape() can write them together with one write(). 11925219Smckusick * Align tape buffer on page boundary to speed up tape write(). 12025219Smckusick */ 12150504Smckusick for (i = 0; i <= SLAVES; i++) { 12250504Smckusick buf = (char *) malloc(reqsiz + writesize + pgoff + TP_BSIZE); 12350504Smckusick if (buf == NULL) 12450504Smckusick return(0); 12550504Smckusick slaves[i].tblock = (char (*)[TP_BSIZE]) 12650504Smckusick (((long)&buf[ntrec + 1] + pgoff) &~ pgoff); 12750504Smckusick slaves[i].req = (struct req *)slaves[i].tblock - ntrec - 1; 12850504Smckusick } 12950504Smckusick slp = &slaves[0]; 13050504Smckusick slp->count = 1; 13150504Smckusick slp->tapea = 0; 13250504Smckusick slp->firstrec = 0; 13350504Smckusick nextblock = slp->tblock; 13424181Smckusick return(1); 13510911Ssam } 13610911Ssam 13746585Storek void 13846789Smckusick writerec(dp) 1395329Smckusic char *dp; 1401425Sroot { 14150504Smckusick 14250504Smckusick slp->req[trecno].dblk = (daddr_t)0; 14350504Smckusick slp->req[trecno].count = 1; 14450504Smckusick *(union u_spcl *)(*(nextblock)++) = *(union u_spcl *)dp; 14529899Smckusick lastspclrec = spcl.c_tapea; 14624181Smckusick trecno++; 1471425Sroot spcl.c_tapea++; 14846585Storek if (trecno >= ntrec) 14946789Smckusick flushtape(); 1501425Sroot } 1511425Sroot 15246585Storek void 15346789Smckusick dumpblock(blkno, size) 1544774Smckusic daddr_t blkno; 1554774Smckusic int size; 1561425Sroot { 15725219Smckusick int avail, tpblks, dblkno; 1581425Sroot 1595329Smckusic dblkno = fsbtodb(sblock, blkno); 16046585Storek tpblks = size >> tp_bshift; 16118012Smckusick while ((avail = MIN(tpblks, ntrec - trecno)) > 0) { 16250504Smckusick slp->req[trecno].dblk = dblkno; 16350504Smckusick slp->req[trecno].count = avail; 16425219Smckusick trecno += avail; 1654774Smckusic spcl.c_tapea += avail; 16625219Smckusick if (trecno >= ntrec) 16746789Smckusick flushtape(); 16846585Storek dblkno += avail << (tp_bshift - dev_bshift); 1695329Smckusic tpblks -= avail; 1704774Smckusic } 1711425Sroot } 1721425Sroot 1731425Sroot int nogripe = 0; 1741425Sroot 17546585Storek void 17646585Storek tperror() 17746585Storek { 17850504Smckusick 17918012Smckusick if (pipeout) { 18046614Smckusick msg("write error on %s\n", tape); 18146585Storek quit("Cannot recover\n"); 18218012Smckusick /* NOTREACHED */ 18318012Smckusick } 18448621Skarels msg("write error %d blocks into volume %d\n", blocksthisvol, tapeno); 18546614Smckusick broadcast("DUMP WRITE ERROR!\n"); 18618012Smckusick if (!query("Do you want to restart?")) 18718012Smckusick dumpabort(); 18846614Smckusick msg("Closing this volume. Prepare to restart with new media;\n"); 18918012Smckusick msg("this dump volume will be rewritten.\n"); 19024181Smckusick killall(); 19118012Smckusick nogripe = 1; 19218012Smckusick close_rewind(); 19318012Smckusick Exit(X_REWRITE); 19418012Smckusick } 19518012Smckusick 19646585Storek void 19725219Smckusick sigpipe() 19825219Smckusick { 19925219Smckusick 20046585Storek quit("Broken pipe\n"); 20125219Smckusick } 20225219Smckusick 20346585Storek void 20446789Smckusick flushtape() 20518012Smckusick { 20650504Smckusick int i, blks, got; 20750504Smckusick long lastfirstrec; 20846795Sbostic #ifndef __STDC__ 20950504Smckusick int write(), read(); 21046795Sbostic #endif 21146795Sbostic 21250504Smckusick int siz = (char *)nextblock - (char *)slp->req; 2131425Sroot 21450504Smckusick slp->req[trecno].count = 0; /* Sentinel */ 21550504Smckusick 21650504Smckusick if (atomic(write, slp->fd, slp->req, siz) != siz) 21746585Storek quit("error writing command pipe: %s\n", strerror(errno)); 21850504Smckusick slp->sent = 1; /* we sent a request, read the response later */ 21950504Smckusick 22050504Smckusick lastfirstrec = slp->firstrec; 22150504Smckusick 22250504Smckusick if (++slp >= &slaves[SLAVES]) 22350504Smckusick slp = &slaves[0]; 22450504Smckusick 22550504Smckusick /* Read results back from next slave */ 22650504Smckusick if (slp->sent) { 22750504Smckusick if (atomic(read, slp->fd, &got, sizeof got) != sizeof got) { 22850504Smckusick perror(" DUMP: error reading command pipe in master"); 22950504Smckusick dumpabort(); 23050504Smckusick } 23150504Smckusick slp->sent = 0; 23250504Smckusick 23350504Smckusick /* Check for end of tape */ 23450504Smckusick if (got < writesize) { 23550504Smckusick msg("End of tape detected\n"); 23650504Smckusick 23750504Smckusick /* 23850504Smckusick * Drain the results, don't care what the values were. 23950504Smckusick * If we read them here then trewind won't... 24050504Smckusick */ 24150504Smckusick for (i = 0; i < SLAVES; i++) { 24250504Smckusick if (slaves[i].sent) { 24350504Smckusick if (atomic(read, slaves[i].fd, &got, 24450504Smckusick sizeof got) != sizeof got) { 24550504Smckusick perror(" DUMP: error reading command pipe in master"); 24650504Smckusick dumpabort(); 24750504Smckusick } 24850504Smckusick slaves[i].sent = 0; 24950504Smckusick } 25050504Smckusick } 25150504Smckusick 25250504Smckusick close_rewind(); 25350504Smckusick rollforward(); 25450504Smckusick return; 25550504Smckusick } 25650504Smckusick } 25750504Smckusick 25850504Smckusick blks = 0; 25950504Smckusick if (spcl.c_type != TS_END) { 26050504Smckusick for (i = 0; i < spcl.c_count; i++) 26150504Smckusick if (spcl.c_addr[i] != 0) 26250504Smckusick blks++; 26350504Smckusick } 26450504Smckusick slp->count = lastspclrec + blks + 1 - spcl.c_tapea; 26550504Smckusick slp->tapea = spcl.c_tapea; 26650504Smckusick slp->firstrec = lastfirstrec + ntrec; 26750504Smckusick slp->inode = curino; 26850504Smckusick nextblock = slp->tblock; 2691425Sroot trecno = 0; 27024181Smckusick asize += tenths; 27110911Ssam blockswritten += ntrec; 27248621Skarels blocksthisvol += ntrec; 27346614Smckusick if (!pipeout && (blocksperfile ? 27448621Skarels (blocksthisvol >= blocksperfile) : (asize > tsize))) { 2751425Sroot close_rewind(); 27650504Smckusick startnewtape(0); 2771425Sroot } 2781425Sroot timeest(); 2791425Sroot } 2801425Sroot 28146585Storek void 28246239Storek trewind() 2831425Sroot { 28424181Smckusick int f; 28550504Smckusick int got; 28612331Smckusick 28712331Smckusick if (pipeout) 28812331Smckusick return; 28950504Smckusick for (f = 0; f < SLAVES; f++) { 29050504Smckusick /* 29150504Smckusick * Drain the results, but unlike EOT we DO (or should) care 29250504Smckusick * what the return values were, since if we detect EOT after 29350504Smckusick * we think we've written the last blocks to the tape anyway, 29450504Smckusick * we have to replay those blocks with rollforward. 29550504Smckusick * 29650504Smckusick * fixme: punt for now. 29750504Smckusick */ 29850504Smckusick if (slaves[f].sent) { 29950504Smckusick if (atomic(read, slaves[f].fd, &got, sizeof got) 30050504Smckusick != sizeof got) { 30150504Smckusick perror(" DUMP: error reading command pipe in master"); 30250504Smckusick dumpabort(); 30350504Smckusick } 30450504Smckusick slaves[f].sent = 0; 30550504Smckusick if (got != writesize) { 30650504Smckusick msg("EOT detected in last 2 tape records!\n"); 30750504Smckusick msg("Use a longer tape, decrease the size estimate\n"); 30850504Smckusick quit("or use no size estimate at all.\n"); 30950504Smckusick } 31050504Smckusick } 31150504Smckusick close(slaves[f].fd); 31250504Smckusick } 31346585Storek while (wait((int *)NULL) >= 0) /* wait for any signals from slaves */ 31446585Storek /* void */; 31548621Skarels msg("Closing %s\n", tape); 31650504Smckusick 31718012Smckusick #ifdef RDUMP 31825219Smckusick if (host) { 31925219Smckusick rmtclose(); 32025219Smckusick while (rmtopen(tape, 0) < 0) 32125219Smckusick sleep(10); 32225219Smckusick rmtclose(); 32325219Smckusick return; 32425219Smckusick } 32550504Smckusick #endif 32646789Smckusick close(tapefd); 3273214Swnj while ((f = open(tape, 0)) < 0) 3283214Swnj sleep (10); 3293214Swnj close(f); 3301425Sroot } 3311425Sroot 33246585Storek void 3331425Sroot close_rewind() 3341425Sroot { 33546239Storek trewind(); 33648621Skarels if (nexttape) 33748621Skarels return; 33818012Smckusick if (!nogripe) { 33946614Smckusick msg("Change Volumes: Mount volume #%d\n", tapeno+1); 34046614Smckusick broadcast("CHANGE DUMP VOLUMES!\7\7\n"); 3411425Sroot } 34248621Skarels while (!query("Is the new volume mounted and ready to go?")) 34325219Smckusick if (query("Do you want to abort?")) { 3441425Sroot dumpabort(); 34525219Smckusick /*NOTREACHED*/ 34625219Smckusick } 3471425Sroot } 3481425Sroot 34950504Smckusick #ifdef ROLLDEBUG 35050504Smckusick int do_sum(block) 35150504Smckusick union u_spcl *block; 35250504Smckusick 35350504Smckusick { 35450504Smckusick char sum = 0; 35550504Smckusick int i; 35650504Smckusick 35750504Smckusick for (i = 0; i < TP_BSIZE; i++) { 35850504Smckusick sum = sum ^ block->dummy[i]; 35950504Smckusick } 36050504Smckusick return(sum); 36150504Smckusick } 36250504Smckusick #endif 36350504Smckusick 36450504Smckusick void 36550504Smckusick rollforward() 36650504Smckusick { 36750504Smckusick register struct req *p, *q, *prev; 36850504Smckusick register struct slave *tslp; 36950504Smckusick int i, next, size, savedtapea, got; 37050504Smckusick union u_spcl *ntb, *otb; 37150504Smckusick #ifdef ROLLDEBUG 37250504Smckusick int j; 37350504Smckusick #endif 37450504Smckusick tslp = &slaves[SLAVES]; 37550504Smckusick ntb = (union u_spcl *)tslp->tblock[1]; 37650504Smckusick 37750504Smckusick /* 37850504Smckusick * Each of the N slaves should have requests that need to 37950504Smckusick * be replayed on the next tape. Use the extra slave buffers 38050504Smckusick * (slaves[SLAVES]) to construct request lists to be sent to 38150504Smckusick * each slave in turn. 38250504Smckusick */ 38350504Smckusick for (i = 0; i < SLAVES; i++) { 38450504Smckusick q = &tslp->req[1]; 38550504Smckusick otb = (union u_spcl *)slp->tblock; 38650504Smckusick 38750504Smckusick /* 38850504Smckusick * For each request in the current slave, copy it to tslp. 38950504Smckusick */ 39050504Smckusick #ifdef ROLLDEBUG 39150504Smckusick printf("replaying reqs to slave %d (%d)\n", slp - &slaves[0], 39250504Smckusick slp->pid); 39350504Smckusick j = 0; 39450504Smckusick #endif 39550504Smckusick 39650504Smckusick for (p = slp->req; p->count > 0; p += p->count) { 39750504Smckusick #ifdef ROLLDEBUG 39850504Smckusick printf(" req %d count %d dblk %d\n", 39950504Smckusick j++, p->count, p->dblk); 40050504Smckusick if (p->dblk == 0) 40150504Smckusick printf("\tsum %x\n", do_sum(otb)); 40250504Smckusick #endif 40350504Smckusick *q = *p; 40450504Smckusick if (p->dblk == 0) 40550504Smckusick *ntb++ = *otb++; /* copy the datablock also */ 40650504Smckusick prev = q; 40750504Smckusick q += q->count; 40850504Smckusick } 40950504Smckusick if (prev->dblk != 0) 41050504Smckusick prev->count -= 1; 41150504Smckusick else 41250504Smckusick ntb--; 41350504Smckusick q -= 1; 41450504Smckusick q->count = 0; 41550504Smckusick q = &tslp->req[0]; 41650504Smckusick if (i == 0) { 41750504Smckusick q->dblk = 0; 41850504Smckusick q->count = 1; 41950504Smckusick trecno = 0; 42050504Smckusick nextblock = tslp->tblock; 42150504Smckusick savedtapea = spcl.c_tapea; 42250504Smckusick spcl.c_tapea = slp->tapea; 42350504Smckusick startnewtape(0); 42450504Smckusick spcl.c_tapea = savedtapea; 42550504Smckusick lastspclrec = savedtapea - 1; 42650504Smckusick } 42750504Smckusick size = (char *)ntb - (char *)q; 42850504Smckusick if (atomic(write, slp->fd, q, size) != size) { 42950504Smckusick perror(" DUMP: error writing command pipe"); 43050504Smckusick dumpabort(); 43150504Smckusick } 43250504Smckusick slp->sent = 1; 43350504Smckusick #ifdef ROLLDEBUG 43450504Smckusick printf("after the shift:\n"); 43550504Smckusick j = 0; 43650504Smckusick for (p = tslp->req; p->count > 0; p += p->count) { 43750504Smckusick printf(" req %d count %d dblk %d\n", 43850504Smckusick j++, p->count, p->dblk); 43950504Smckusick if (p->dblk == 0) { 44050504Smckusick /* dump block also */ 44150504Smckusick } 44250504Smckusick } 44350504Smckusick #endif 44450504Smckusick if (++slp >= &slaves[SLAVES]) 44550504Smckusick slp = &slaves[0]; 44650504Smckusick 44750504Smckusick q->count = 1; 44850504Smckusick 44950504Smckusick if (prev->dblk != 0) { 45050504Smckusick /* 45150504Smckusick * If the last one was a disk block, make the 45250504Smckusick * first of this one be the last bit of that disk 45350504Smckusick * block... 45450504Smckusick */ 45550504Smckusick q->dblk = prev->dblk + 45650504Smckusick prev->count * (TP_BSIZE / DEV_BSIZE); 45750504Smckusick ntb = (union u_spcl *)tslp->tblock; 45850504Smckusick } else { 45950504Smckusick /* 46050504Smckusick * It wasn't a disk block. Copy the data to its 46150504Smckusick * new location in the buffer. 46250504Smckusick */ 46350504Smckusick q->dblk = 0; 46450504Smckusick *((union u_spcl *)tslp->tblock) = *ntb; 46550504Smckusick ntb = (union u_spcl *)tslp->tblock[1]; 46650504Smckusick } 46750504Smckusick } 46850504Smckusick slp->req[0] = *q; 46950504Smckusick nextblock = slp->tblock; 47050504Smckusick if (q->dblk == 0) 47150504Smckusick nextblock++; 47250504Smckusick trecno = 1; 47350504Smckusick 47450504Smckusick /* 47550504Smckusick * Clear the first slaves' response. One hopes that it 47650504Smckusick * worked ok, otherwise the tape is much too short! 47750504Smckusick */ 47850504Smckusick if (slp->sent) { 47950504Smckusick if (atomic(read, slp->fd, &got, sizeof got) != sizeof got) { 48050504Smckusick perror(" DUMP: error reading command pipe in master"); 48150504Smckusick dumpabort(); 48250504Smckusick } 48350504Smckusick slp->sent = 0; 48450504Smckusick 48550504Smckusick if (got != writesize) { 48650504Smckusick quit("EOT detected at start of the tape!\n"); 48750504Smckusick } 48850504Smckusick } 48950504Smckusick } 49050504Smckusick 4911425Sroot /* 49250504Smckusick * We implement taking and restoring checkpoints on the tape level. 49350504Smckusick * When each tape is opened, a new process is created by forking; this 49450504Smckusick * saves all of the necessary context in the parent. The child 49550504Smckusick * continues the dump; the parent waits around, saving the context. 49650504Smckusick * If the child returns X_REWRITE, then it had problems writing that tape; 49750504Smckusick * this causes the parent to fork again, duplicating the context, and 49850504Smckusick * everything continues as if nothing had happened. 4991425Sroot */ 50046585Storek void 50150504Smckusick startnewtape(top) 50250504Smckusick int top; 5031425Sroot { 5041425Sroot int parentpid; 5051425Sroot int childpid; 5061425Sroot int status; 5071425Sroot int waitpid; 50850504Smckusick int i; 50947056Skarels char *p; 51050574Smckusick #ifdef sunos 51150574Smckusick void (*interrupt)(); 51250574Smckusick char *index(); 51350574Smckusick #else 51450574Smckusick sig_t interrupt; 51550574Smckusick #endif 5161425Sroot 51739164Sbostic interrupt = signal(SIGINT, SIG_IGN); 5181425Sroot parentpid = getpid(); 5191425Sroot 5201425Sroot restore_check_point: 52139164Sbostic (void)signal(SIGINT, interrupt); 52225219Smckusick /* 52325219Smckusick * All signals are inherited... 52425219Smckusick */ 5251425Sroot childpid = fork(); 52618012Smckusick if (childpid < 0) { 5271425Sroot msg("Context save fork fails in parent %d\n", parentpid); 5281425Sroot Exit(X_ABORT); 5291425Sroot } 53018012Smckusick if (childpid != 0) { 5311425Sroot /* 5321425Sroot * PARENT: 5331425Sroot * save the context by waiting 5341425Sroot * until the child doing all of the work returns. 53518012Smckusick * don't catch the interrupt 5361425Sroot */ 53725219Smckusick signal(SIGINT, SIG_IGN); 5381425Sroot #ifdef TDEBUG 5391425Sroot msg("Tape: %d; parent process: %d child process %d\n", 5401425Sroot tapeno+1, parentpid, childpid); 5411425Sroot #endif TDEBUG 54218012Smckusick while ((waitpid = wait(&status)) != childpid) 54318012Smckusick msg("Parent %d waiting for child %d has another child %d return\n", 54418012Smckusick parentpid, childpid, waitpid); 54518012Smckusick if (status & 0xFF) { 5461425Sroot msg("Child %d returns LOB status %o\n", 5471425Sroot childpid, status&0xFF); 5481425Sroot } 5491425Sroot status = (status >> 8) & 0xFF; 5501425Sroot #ifdef TDEBUG 55118012Smckusick switch(status) { 5521425Sroot case X_FINOK: 5531425Sroot msg("Child %d finishes X_FINOK\n", childpid); 5541425Sroot break; 55550504Smckusick case X_ABORT: 5561425Sroot msg("Child %d finishes X_ABORT\n", childpid); 5571425Sroot break; 5581425Sroot case X_REWRITE: 5591425Sroot msg("Child %d finishes X_REWRITE\n", childpid); 5601425Sroot break; 5611425Sroot default: 56218012Smckusick msg("Child %d finishes unknown %d\n", 56325219Smckusick childpid, status); 5641425Sroot break; 5651425Sroot } 5661425Sroot #endif TDEBUG 56718012Smckusick switch(status) { 5681425Sroot case X_FINOK: 5691425Sroot Exit(X_FINOK); 5701425Sroot case X_ABORT: 5711425Sroot Exit(X_ABORT); 5721425Sroot case X_REWRITE: 5731425Sroot goto restore_check_point; 5741425Sroot default: 5751425Sroot msg("Bad return code from dump: %d\n", status); 5761425Sroot Exit(X_ABORT); 5771425Sroot } 5781425Sroot /*NOTREACHED*/ 5791425Sroot } else { /* we are the child; just continue */ 5801425Sroot #ifdef TDEBUG 5811425Sroot sleep(4); /* allow time for parent's message to get out */ 5821425Sroot msg("Child on Tape %d has parent %d, my pid = %d\n", 5831425Sroot tapeno+1, parentpid, getpid()); 58425219Smckusick #endif TDEBUG 58547056Skarels /* 58647056Skarels * If we have a name like "/dev/rmt0,/dev/rmt1", 58747056Skarels * use the name before the comma first, and save 58848621Skarels * the remaining names for subsequent volumes. 58947056Skarels */ 59050504Smckusick tapeno++; /* current tape sequence */ 59148621Skarels if (nexttape || index(tape, ',')) { 59248621Skarels if (nexttape && *nexttape) 59348621Skarels tape = nexttape; 59448621Skarels if (p = index(tape, ',')) { 59548621Skarels *p = '\0'; 59648621Skarels nexttape = p + 1; 59748621Skarels } else 59848621Skarels nexttape = NULL; 59948621Skarels msg("Dumping volume %d on %s\n", tapeno, tape); 60048621Skarels } 60118012Smckusick #ifdef RDUMP 60246789Smckusick while ((tapefd = (host ? rmtopen(tape, 2) : 60346789Smckusick pipeout ? 1 : open(tape, O_WRONLY|O_CREAT, 0666))) < 0) 60450504Smckusick #else 60550504Smckusick while ((tapefd = (pipeout ? 1 : 60650504Smckusick open(tape, O_WRONLY|O_CREAT, 0666))) < 0) 60750504Smckusick #endif 60839128Smckusick { 60946614Smckusick msg("Cannot open output \"%s\".\n", tape); 61039128Smckusick if (!query("Do you want to retry the open?")) 61118012Smckusick dumpabort(); 61239128Smckusick } 6131425Sroot 61418012Smckusick enslave(); /* Share open tape file descriptor with slaves */ 61518012Smckusick 6161425Sroot asize = 0; 61748621Skarels blocksthisvol = 0; 61850504Smckusick if (top) 61950504Smckusick newtape++; /* new tape signal */ 62050504Smckusick spcl.c_count = slp->count; 62150504Smckusick /* 62250504Smckusick * measure firstrec in TP_BSIZE units since restore doesn't 62350504Smckusick * know the correct ntrec value... 62450504Smckusick */ 62550504Smckusick spcl.c_firstrec = slp->firstrec; 6261425Sroot spcl.c_volume++; 6271425Sroot spcl.c_type = TS_TAPE; 62830432Smckusick spcl.c_flags |= DR_NEWHEADER; 62950504Smckusick writeheader(slp->inode); 63030432Smckusick spcl.c_flags &=~ DR_NEWHEADER; 6311425Sroot if (tapeno > 1) 63248621Skarels msg("Volume %d begins with blocks from inode %d\n", 63350504Smckusick tapeno, slp->inode); 6341425Sroot } 6351425Sroot } 6361425Sroot 63746585Storek void 6381425Sroot dumpabort() 6391425Sroot { 64050504Smckusick 64118012Smckusick if (master != 0 && master != getpid()) 64225219Smckusick kill(master, SIGTERM); /* Signals master to call dumpabort */ 64324181Smckusick else { 64424181Smckusick killall(); 64524181Smckusick msg("The ENTIRE dump is aborted.\n"); 64624181Smckusick } 6471425Sroot Exit(X_ABORT); 6481425Sroot } 6491425Sroot 65046585Storek void 6511425Sroot Exit(status) 65246239Storek int status; 6531425Sroot { 65450504Smckusick 6551425Sroot #ifdef TDEBUG 6561425Sroot msg("pid = %d exits with status %d\n", getpid(), status); 6571425Sroot #endif TDEBUG 6581925Swnj exit(status); 6591425Sroot } 66018012Smckusick 66124181Smckusick /* 66250504Smckusick * proceed - handler for SIGUSR2, used to synchronize IO between the slaves. 66324181Smckusick */ 66446585Storek void 66550504Smckusick proceed() 66624181Smckusick { 66718012Smckusick 66850504Smckusick if (ready) 66950504Smckusick longjmp(jmpbuf, 1); 67050504Smckusick caught++; 67124181Smckusick } 67224181Smckusick 67346585Storek void 67418012Smckusick enslave() 67518012Smckusick { 67650504Smckusick int cmd[2]; 67724181Smckusick register int i, j; 67818012Smckusick 67918012Smckusick master = getpid(); 68050504Smckusick 68150504Smckusick signal(SIGTERM, dumpabort); /* Slave sends SIGTERM on dumpabort() */ 68225219Smckusick signal(SIGPIPE, sigpipe); 68325219Smckusick signal(SIGUSR1, tperror); /* Slave sends SIGUSR1 on tape errors */ 68450504Smckusick signal(SIGUSR2, proceed); /* Slave sends SIGUSR2 to next slave */ 68550504Smckusick 68624181Smckusick for (i = 0; i < SLAVES; i++) { 68750504Smckusick if (i == slp - &slaves[0]) { 68850504Smckusick caught = 1; 68924181Smckusick } else { 69050504Smckusick caught = 0; 69124181Smckusick } 69250504Smckusick 69350504Smckusick if (socketpair(AF_UNIX, SOCK_STREAM, 0, cmd) < 0 || 69450504Smckusick (slaves[i].pid = fork()) < 0) 69546585Storek quit("too many slaves, %d (recompile smaller): %s\n", 69646585Storek i, strerror(errno)); 69750504Smckusick 69850504Smckusick slaves[i].fd = cmd[1]; 69950504Smckusick slaves[i].sent = 0; 70050504Smckusick if (slaves[i].pid == 0) { /* Slave starts up here */ 70118012Smckusick for (j = 0; j <= i; j++) 70250504Smckusick close(slaves[j].fd); 70325219Smckusick signal(SIGINT, SIG_IGN); /* Master handles this */ 70450504Smckusick doslave(cmd[0], i); 70518012Smckusick Exit(X_FINOK); 70618012Smckusick } 70718012Smckusick } 70850504Smckusick 70950504Smckusick for (i = 0; i < SLAVES; i++) 71050504Smckusick atomic(write, slaves[i].fd, &slaves[(i + 1) % SLAVES].pid, 71150504Smckusick sizeof slaves[0].pid); 71250504Smckusick 71350504Smckusick master = 0; 71418012Smckusick } 71518012Smckusick 71646585Storek void 71724181Smckusick killall() 71818012Smckusick { 71924181Smckusick register int i; 72019982Smckusick 72124181Smckusick for (i = 0; i < SLAVES; i++) 72250504Smckusick if (slaves[i].pid > 0) 72350504Smckusick kill(slaves[i].pid, SIGKILL); 72418012Smckusick } 72518012Smckusick 72624181Smckusick /* 72724181Smckusick * Synchronization - each process has a lockfile, and shares file 72824181Smckusick * descriptors to the following process's lockfile. When our write 72924181Smckusick * completes, we release our lock on the following process's lock- 73024181Smckusick * file, allowing the following process to lock it and proceed. We 73124181Smckusick * get the lock back for the next cycle by swapping descriptors. 73224181Smckusick */ 73346585Storek void 73450504Smckusick doslave(cmd, slave_number) 73550504Smckusick register int cmd; 73650504Smckusick int slave_number; 73719982Smckusick { 73850504Smckusick register int nread; 73950504Smckusick int nextslave, size, wrote, eot_count; 74046795Sbostic #ifndef __STDC__ 74146795Sbostic int read(); 74246795Sbostic #endif 74350504Smckusick #ifdef ROLLDEBUG 74450504Smckusick int dodump = 2; 74550504Smckusick FILE *out; 74650504Smckusick char name[64]; 74750504Smckusick #endif 74819982Smckusick 74946789Smckusick /* 75046789Smckusick * Need our own seek pointer. 75146789Smckusick */ 75246789Smckusick close(diskfd); 75346789Smckusick if ((diskfd = open(disk, O_RDONLY)) < 0) 75446585Storek quit("slave couldn't reopen disk: %s\n", strerror(errno)); 75550504Smckusick 75624181Smckusick /* 75750504Smckusick * Need the pid of the next slave in the loop... 75850504Smckusick */ 75950504Smckusick if ((nread = atomic(read, cmd, &nextslave, sizeof nextslave)) 76050504Smckusick != sizeof nextslave) { 76150504Smckusick quit("master/slave protocol botched - didn't get pid of next slave.\n"); 76250504Smckusick } 76350504Smckusick 76450504Smckusick #ifdef ROLLDEBUG 76550504Smckusick sprintf(name, "slave.%d", slave_number); 76650504Smckusick out = fopen(name, "w"); 76750504Smckusick #endif 76850504Smckusick /* 76925219Smckusick * Get list of blocks to dump, read the blocks into tape buffer 77024181Smckusick */ 77150504Smckusick while ((nread = atomic(read, cmd, slp->req, reqsiz)) == reqsiz) { 77250504Smckusick register struct req *p = slp->req; 77350504Smckusick int j; 77450504Smckusick struct req *rover; 77550504Smckusick char (*orover)[TP_BSIZE]; 77650504Smckusick 77750504Smckusick j = 0; 77850504Smckusick for (trecno = 0; trecno < ntrec; 77950504Smckusick trecno += p->count, p += p->count) { 78018012Smckusick if (p->dblk) { 78150504Smckusick bread(p->dblk, slp->tblock[trecno], 78225219Smckusick p->count * TP_BSIZE); 78318012Smckusick } else { 78425219Smckusick if (p->count != 1 || atomic(read, cmd, 78550504Smckusick slp->tblock[trecno], TP_BSIZE) != TP_BSIZE) 78646585Storek quit("master/slave protocol botched.\n"); 78718012Smckusick } 78850504Smckusick #ifdef ROLLDEBUG 78950504Smckusick if (dodump) { 79050504Smckusick fprintf(out, " req %d count %d dblk %d\n", 79150504Smckusick j++, p->count, p->dblk); 79250504Smckusick if (p->dblk == 0) { 79350504Smckusick fprintf(out, "\tsum %x\n", 79450504Smckusick do_sum(slp->tblock[trecno])); 79550504Smckusick } 79650504Smckusick } 79750504Smckusick #endif 79818012Smckusick } 79950504Smckusick #ifdef ROLLDEBUG 80050504Smckusick if (dodump) { 80150504Smckusick fprintf(out, "\n"); 80250504Smckusick } 80350504Smckusick if (--dodump == 0) { 80450504Smckusick fclose(out); 80550504Smckusick } 80650504Smckusick #endif 80750504Smckusick if (setjmp(jmpbuf) == 0) { 80850504Smckusick ready = 1; 80950504Smckusick if (!caught) 81050504Smckusick pause(); 81150504Smckusick } 81250504Smckusick ready = 0; 81350504Smckusick caught = 0; 81425219Smckusick 81550504Smckusick /* Try to write the data... */ 81650504Smckusick eot_count = 0; 81750504Smckusick size = 0; 81850504Smckusick 81950504Smckusick while (eot_count < 10 && size < writesize) { 82018012Smckusick #ifdef RDUMP 82150504Smckusick if (host) 82250504Smckusick wrote = rmtwrite(slp->tblock[0]+size, 82350504Smckusick writesize-size); 82448621Skarels else 82550504Smckusick #endif 82650504Smckusick wrote = write(tapefd, slp->tblock[0]+size, 82750504Smckusick writesize-size); 82850504Smckusick #ifdef WRITEDEBUG 82950504Smckusick printf("slave %d wrote %d\n", slave_number, wrote); 83050504Smckusick #endif 83150504Smckusick if (wrote < 0) 83250504Smckusick break; 83350504Smckusick if (wrote == 0) 83450504Smckusick eot_count++; 83550504Smckusick size += wrote; 83650504Smckusick } 83750504Smckusick 83850504Smckusick #ifdef WRITEDEBUG 83950504Smckusick if (size != writesize) 84050504Smckusick printf("slave %d only wrote %d out of %d bytes and gave up.\n", 84150504Smckusick slave_number, size, writesize); 84250504Smckusick #endif 84350504Smckusick 84450504Smckusick if (eot_count > 0) 84550504Smckusick size = 0; 84650504Smckusick 84750504Smckusick /* 84850504Smckusick * fixme: Pyramids running OSx return ENOSPC 84950504Smckusick * at EOT on 1/2 inch drives. 85050504Smckusick */ 85150504Smckusick if (size < 0) { 85225219Smckusick kill(master, SIGUSR1); 85325219Smckusick for (;;) 85425219Smckusick sigpause(0); 85550504Smckusick } else { 85650504Smckusick /* 85750504Smckusick * pass size of write back to master 85850504Smckusick * (for EOT handling) 85950504Smckusick */ 86050504Smckusick atomic(write, cmd, &size, sizeof size); 86150504Smckusick } 86250504Smckusick 86350504Smckusick /* 86450504Smckusick * If partial write, don't want next slave to go. 86550504Smckusick * Also jolts him awake. 86650504Smckusick */ 86750504Smckusick kill(nextslave, SIGUSR2); 86850504Smckusick } 86946585Storek if (nread != 0) 87046585Storek quit("error reading command pipe: %s\n", strerror(errno)); 87118012Smckusick } 87219947Smckusick 87319947Smckusick /* 87425219Smckusick * Since a read from a pipe may not return all we asked for, 87525219Smckusick * or a write may not write all we ask if we get a signal, 87625219Smckusick * loop until the count is satisfied (or error). 87719947Smckusick */ 87846585Storek int 87925219Smckusick atomic(func, fd, buf, count) 88025219Smckusick int (*func)(), fd, count; 88119947Smckusick char *buf; 88219947Smckusick { 88325219Smckusick int got, need = count; 88419947Smckusick 88525219Smckusick while ((got = (*func)(fd, buf, need)) > 0 && (need -= got) > 0) 88619947Smckusick buf += got; 88725219Smckusick return (got < 0 ? got : count - need); 88819947Smckusick } 889