147082Smckusick /*- 247082Smckusick * Copyright (c) 1980, 1991 The Regents of the University of California. 347082Smckusick * All rights reserved. 447082Smckusick * 547082Smckusick * %sccs.include.redist.c% 622040Sdist */ 722040Sdist 817527Ssam #ifndef lint 9*50574Smckusick static char sccsid[] = "@(#)tape.c 5.21 (Berkeley) 07/26/91"; 1046585Storek #endif /* not lint */ 1117527Ssam 1250505Smckusick #ifdef sunos 13*50574Smckusick #include <sys/param.h> 1450505Smckusick #include <stdio.h> 1550505Smckusick #include <ctype.h> 1650505Smckusick #include <sys/stat.h> 1750505Smckusick #include <sys/time.h> 1850505Smckusick #include <sys/dir.h> 1950505Smckusick #include <sys/vnode.h> 2050505Smckusick #include <ufs/inode.h> 2150505Smckusick #else 2246795Sbostic #include <sys/param.h> 2346585Storek #include <sys/wait.h> 2446795Sbostic #include <ufs/dinode.h> 2550505Smckusick #endif 2646795Sbostic #include <ufs/fs.h> 2746795Sbostic #include <signal.h> 2846795Sbostic #include <fcntl.h> 2946795Sbostic #include <protocols/dumprestore.h> 3046585Storek #include <errno.h> 3150504Smckusick #include <setjmp.h> 3246795Sbostic #ifdef __STDC__ 3346795Sbostic #include <unistd.h> 3446795Sbostic #include <stdlib.h> 3546795Sbostic #include <string.h> 3646795Sbostic #endif 3750504Smckusick #include <sys/socket.h> 3846795Sbostic #include "dump.h" 3939128Smckusick #include "pathnames.h" 401425Sroot 4129899Smckusick int writesize; /* size of malloc()ed buffer for tape */ 4229899Smckusick long lastspclrec = -1; /* tape block number of last written header */ 4329899Smckusick int trecno = 0; /* next record to write in current block */ 4446614Smckusick extern long blocksperfile; /* number of blocks per output file */ 4548621Skarels long blocksthisvol; /* number of blocks on current output file */ 4648621Skarels extern int ntrec; /* blocking factor on tape */ 4748621Skarels extern int cartridge; 4850504Smckusick extern char *host; 4947056Skarels char *nexttape; 5025219Smckusick #ifdef RDUMP 5146585Storek int rmtopen(), rmtwrite(); 5246585Storek void rmtclose(); 5325219Smckusick #endif RDUMP 5450504Smckusick void rollforward(); 5546585Storek int atomic(); 5646789Smckusick void doslave(), enslave(), flushtape(), killall(); 5746585Storek 5810911Ssam /* 5924181Smckusick * Concurrent dump mods (Caltech) - disk block reading and tape writing 6018012Smckusick * are exported to several slave processes. While one slave writes the 6118012Smckusick * tape, the others read disk blocks; they pass control of the tape in 6250504Smckusick * a ring via signals. The parent process traverses the filesystem and 6346789Smckusick * sends writeheader()'s and lists of daddr's to the slaves via pipes. 6450504Smckusick * The following structure defines the instruction packets sent to slaves. 6510911Ssam */ 6650504Smckusick struct req { 6718012Smckusick daddr_t dblk; 6818012Smckusick int count; 6950504Smckusick }; 7018012Smckusick int reqsiz; 7118012Smckusick 7224181Smckusick #define SLAVES 3 /* 1 slave writing, 1 reading, 1 for slack */ 7350504Smckusick struct slave { 7450504Smckusick int tapea; /* header number at start of this chunk */ 7550504Smckusick int count; /* count to next header (used for TS_TAPE */ 7650504Smckusick /* after EOT) */ 7750504Smckusick int inode; /* inode that we are currently dealing with */ 7850504Smckusick int fd; /* FD for this slave */ 7950504Smckusick int pid; /* PID for this slave */ 8050504Smckusick int sent; /* 1 == we've sent this slave requests */ 8150504Smckusick int firstrec; /* record number of this block */ 8250504Smckusick char (*tblock)[TP_BSIZE]; /* buffer for data blocks */ 8350504Smckusick struct req *req; /* buffer for requests */ 8450504Smckusick } slaves[SLAVES+1]; 8550504Smckusick struct slave *slp; 8618012Smckusick 8750504Smckusick char (*nextblock)[TP_BSIZE]; 8850504Smckusick 8950504Smckusick int master; /* pid of master, for sending error signals */ 9050504Smckusick int tenths; /* length of tape used per block written */ 9150504Smckusick static int caught; /* have we caught the signal to proceed? */ 9250504Smckusick static int ready; /* have we reached the lock point without having */ 9350504Smckusick /* received the SIGUSR2 signal from the prev slave? */ 9450504Smckusick static jmp_buf jmpbuf; /* where to jump to if we are ready when the */ 9550504Smckusick /* SIGUSR2 arrives from the previous slave */ 9650504Smckusick 9746585Storek int 9810911Ssam alloctape() 9910911Ssam { 10025219Smckusick int pgoff = getpagesize() - 1; 10150504Smckusick char *buf; 10250504Smckusick int i; 10310911Ssam 10410911Ssam writesize = ntrec * TP_BSIZE; 10550504Smckusick reqsiz = (ntrec + 1) * sizeof(struct req); 10624181Smckusick /* 10725219Smckusick * CDC 92181's and 92185's make 0.8" gaps in 1600-bpi start/stop mode 10825219Smckusick * (see DEC TU80 User's Guide). The shorter gaps of 6250-bpi require 10925219Smckusick * repositioning after stopping, i.e, streaming mode, where the gap is 11025219Smckusick * variable, 0.30" to 0.45". The gap is maximal when the tape stops. 11124181Smckusick */ 11247056Skarels if (blocksperfile == 0) 11347056Skarels tenths = writesize / density + 11447056Skarels (cartridge ? 16 : density == 625 ? 5 : 8); 11525219Smckusick /* 11625219Smckusick * Allocate tape buffer contiguous with the array of instruction 11746789Smckusick * packets, so flushtape() can write them together with one write(). 11825219Smckusick * Align tape buffer on page boundary to speed up tape write(). 11925219Smckusick */ 12050504Smckusick for (i = 0; i <= SLAVES; i++) { 12150504Smckusick buf = (char *) malloc(reqsiz + writesize + pgoff + TP_BSIZE); 12250504Smckusick if (buf == NULL) 12350504Smckusick return(0); 12450504Smckusick slaves[i].tblock = (char (*)[TP_BSIZE]) 12550504Smckusick (((long)&buf[ntrec + 1] + pgoff) &~ pgoff); 12650504Smckusick slaves[i].req = (struct req *)slaves[i].tblock - ntrec - 1; 12750504Smckusick } 12850504Smckusick slp = &slaves[0]; 12950504Smckusick slp->count = 1; 13050504Smckusick slp->tapea = 0; 13150504Smckusick slp->firstrec = 0; 13250504Smckusick nextblock = slp->tblock; 13324181Smckusick return(1); 13410911Ssam } 13510911Ssam 13646585Storek void 13746789Smckusick writerec(dp) 1385329Smckusic char *dp; 1391425Sroot { 14050504Smckusick 14150504Smckusick slp->req[trecno].dblk = (daddr_t)0; 14250504Smckusick slp->req[trecno].count = 1; 14350504Smckusick *(union u_spcl *)(*(nextblock)++) = *(union u_spcl *)dp; 14429899Smckusick lastspclrec = spcl.c_tapea; 14524181Smckusick trecno++; 1461425Sroot spcl.c_tapea++; 14746585Storek if (trecno >= ntrec) 14846789Smckusick flushtape(); 1491425Sroot } 1501425Sroot 15146585Storek void 15246789Smckusick dumpblock(blkno, size) 1534774Smckusic daddr_t blkno; 1544774Smckusic int size; 1551425Sroot { 15625219Smckusick int avail, tpblks, dblkno; 1571425Sroot 1585329Smckusic dblkno = fsbtodb(sblock, blkno); 15946585Storek tpblks = size >> tp_bshift; 16018012Smckusick while ((avail = MIN(tpblks, ntrec - trecno)) > 0) { 16150504Smckusick slp->req[trecno].dblk = dblkno; 16250504Smckusick slp->req[trecno].count = avail; 16325219Smckusick trecno += avail; 1644774Smckusic spcl.c_tapea += avail; 16525219Smckusick if (trecno >= ntrec) 16646789Smckusick flushtape(); 16746585Storek dblkno += avail << (tp_bshift - dev_bshift); 1685329Smckusic tpblks -= avail; 1694774Smckusic } 1701425Sroot } 1711425Sroot 1721425Sroot int nogripe = 0; 1731425Sroot 17446585Storek void 17546585Storek tperror() 17646585Storek { 17750504Smckusick 17818012Smckusick if (pipeout) { 17946614Smckusick msg("write error on %s\n", tape); 18046585Storek quit("Cannot recover\n"); 18118012Smckusick /* NOTREACHED */ 18218012Smckusick } 18348621Skarels msg("write error %d blocks into volume %d\n", blocksthisvol, tapeno); 18446614Smckusick broadcast("DUMP WRITE ERROR!\n"); 18518012Smckusick if (!query("Do you want to restart?")) 18618012Smckusick dumpabort(); 18746614Smckusick msg("Closing this volume. Prepare to restart with new media;\n"); 18818012Smckusick msg("this dump volume will be rewritten.\n"); 18924181Smckusick killall(); 19018012Smckusick nogripe = 1; 19118012Smckusick close_rewind(); 19218012Smckusick Exit(X_REWRITE); 19318012Smckusick } 19418012Smckusick 19546585Storek void 19625219Smckusick sigpipe() 19725219Smckusick { 19825219Smckusick 19946585Storek quit("Broken pipe\n"); 20025219Smckusick } 20125219Smckusick 20246585Storek void 20346789Smckusick flushtape() 20418012Smckusick { 20550504Smckusick int i, blks, got; 20650504Smckusick long lastfirstrec; 20746795Sbostic #ifndef __STDC__ 20850504Smckusick int write(), read(); 20946795Sbostic #endif 21046795Sbostic 21150504Smckusick int siz = (char *)nextblock - (char *)slp->req; 2121425Sroot 21350504Smckusick slp->req[trecno].count = 0; /* Sentinel */ 21450504Smckusick 21550504Smckusick if (atomic(write, slp->fd, slp->req, siz) != siz) 21646585Storek quit("error writing command pipe: %s\n", strerror(errno)); 21750504Smckusick slp->sent = 1; /* we sent a request, read the response later */ 21850504Smckusick 21950504Smckusick lastfirstrec = slp->firstrec; 22050504Smckusick 22150504Smckusick if (++slp >= &slaves[SLAVES]) 22250504Smckusick slp = &slaves[0]; 22350504Smckusick 22450504Smckusick /* Read results back from next slave */ 22550504Smckusick if (slp->sent) { 22650504Smckusick if (atomic(read, slp->fd, &got, sizeof got) != sizeof got) { 22750504Smckusick perror(" DUMP: error reading command pipe in master"); 22850504Smckusick dumpabort(); 22950504Smckusick } 23050504Smckusick slp->sent = 0; 23150504Smckusick 23250504Smckusick /* Check for end of tape */ 23350504Smckusick if (got < writesize) { 23450504Smckusick msg("End of tape detected\n"); 23550504Smckusick 23650504Smckusick /* 23750504Smckusick * Drain the results, don't care what the values were. 23850504Smckusick * If we read them here then trewind won't... 23950504Smckusick */ 24050504Smckusick for (i = 0; i < SLAVES; i++) { 24150504Smckusick if (slaves[i].sent) { 24250504Smckusick if (atomic(read, slaves[i].fd, &got, 24350504Smckusick sizeof got) != sizeof got) { 24450504Smckusick perror(" DUMP: error reading command pipe in master"); 24550504Smckusick dumpabort(); 24650504Smckusick } 24750504Smckusick slaves[i].sent = 0; 24850504Smckusick } 24950504Smckusick } 25050504Smckusick 25150504Smckusick close_rewind(); 25250504Smckusick rollforward(); 25350504Smckusick return; 25450504Smckusick } 25550504Smckusick } 25650504Smckusick 25750504Smckusick blks = 0; 25850504Smckusick if (spcl.c_type != TS_END) { 25950504Smckusick for (i = 0; i < spcl.c_count; i++) 26050504Smckusick if (spcl.c_addr[i] != 0) 26150504Smckusick blks++; 26250504Smckusick } 26350504Smckusick slp->count = lastspclrec + blks + 1 - spcl.c_tapea; 26450504Smckusick slp->tapea = spcl.c_tapea; 26550504Smckusick slp->firstrec = lastfirstrec + ntrec; 26650504Smckusick slp->inode = curino; 26750504Smckusick nextblock = slp->tblock; 2681425Sroot trecno = 0; 26924181Smckusick asize += tenths; 27010911Ssam blockswritten += ntrec; 27148621Skarels blocksthisvol += ntrec; 27246614Smckusick if (!pipeout && (blocksperfile ? 27348621Skarels (blocksthisvol >= blocksperfile) : (asize > tsize))) { 2741425Sroot close_rewind(); 27550504Smckusick startnewtape(0); 2761425Sroot } 2771425Sroot timeest(); 2781425Sroot } 2791425Sroot 28046585Storek void 28146239Storek trewind() 2821425Sroot { 28324181Smckusick int f; 28450504Smckusick int got; 28512331Smckusick 28612331Smckusick if (pipeout) 28712331Smckusick return; 28850504Smckusick for (f = 0; f < SLAVES; f++) { 28950504Smckusick /* 29050504Smckusick * Drain the results, but unlike EOT we DO (or should) care 29150504Smckusick * what the return values were, since if we detect EOT after 29250504Smckusick * we think we've written the last blocks to the tape anyway, 29350504Smckusick * we have to replay those blocks with rollforward. 29450504Smckusick * 29550504Smckusick * fixme: punt for now. 29650504Smckusick */ 29750504Smckusick if (slaves[f].sent) { 29850504Smckusick if (atomic(read, slaves[f].fd, &got, sizeof got) 29950504Smckusick != sizeof got) { 30050504Smckusick perror(" DUMP: error reading command pipe in master"); 30150504Smckusick dumpabort(); 30250504Smckusick } 30350504Smckusick slaves[f].sent = 0; 30450504Smckusick if (got != writesize) { 30550504Smckusick msg("EOT detected in last 2 tape records!\n"); 30650504Smckusick msg("Use a longer tape, decrease the size estimate\n"); 30750504Smckusick quit("or use no size estimate at all.\n"); 30850504Smckusick } 30950504Smckusick } 31050504Smckusick close(slaves[f].fd); 31150504Smckusick } 31246585Storek while (wait((int *)NULL) >= 0) /* wait for any signals from slaves */ 31346585Storek /* void */; 31448621Skarels msg("Closing %s\n", tape); 31550504Smckusick 31618012Smckusick #ifdef RDUMP 31725219Smckusick if (host) { 31825219Smckusick rmtclose(); 31925219Smckusick while (rmtopen(tape, 0) < 0) 32025219Smckusick sleep(10); 32125219Smckusick rmtclose(); 32225219Smckusick return; 32325219Smckusick } 32450504Smckusick #endif 32546789Smckusick close(tapefd); 3263214Swnj while ((f = open(tape, 0)) < 0) 3273214Swnj sleep (10); 3283214Swnj close(f); 3291425Sroot } 3301425Sroot 33146585Storek void 3321425Sroot close_rewind() 3331425Sroot { 33446239Storek trewind(); 33548621Skarels if (nexttape) 33648621Skarels return; 33718012Smckusick if (!nogripe) { 33846614Smckusick msg("Change Volumes: Mount volume #%d\n", tapeno+1); 33946614Smckusick broadcast("CHANGE DUMP VOLUMES!\7\7\n"); 3401425Sroot } 34148621Skarels while (!query("Is the new volume mounted and ready to go?")) 34225219Smckusick if (query("Do you want to abort?")) { 3431425Sroot dumpabort(); 34425219Smckusick /*NOTREACHED*/ 34525219Smckusick } 3461425Sroot } 3471425Sroot 34850504Smckusick #ifdef ROLLDEBUG 34950504Smckusick int do_sum(block) 35050504Smckusick union u_spcl *block; 35150504Smckusick 35250504Smckusick { 35350504Smckusick char sum = 0; 35450504Smckusick int i; 35550504Smckusick 35650504Smckusick for (i = 0; i < TP_BSIZE; i++) { 35750504Smckusick sum = sum ^ block->dummy[i]; 35850504Smckusick } 35950504Smckusick return(sum); 36050504Smckusick } 36150504Smckusick #endif 36250504Smckusick 36350504Smckusick void 36450504Smckusick rollforward() 36550504Smckusick { 36650504Smckusick register struct req *p, *q, *prev; 36750504Smckusick register struct slave *tslp; 36850504Smckusick int i, next, size, savedtapea, got; 36950504Smckusick union u_spcl *ntb, *otb; 37050504Smckusick #ifdef ROLLDEBUG 37150504Smckusick int j; 37250504Smckusick #endif 37350504Smckusick tslp = &slaves[SLAVES]; 37450504Smckusick ntb = (union u_spcl *)tslp->tblock[1]; 37550504Smckusick 37650504Smckusick /* 37750504Smckusick * Each of the N slaves should have requests that need to 37850504Smckusick * be replayed on the next tape. Use the extra slave buffers 37950504Smckusick * (slaves[SLAVES]) to construct request lists to be sent to 38050504Smckusick * each slave in turn. 38150504Smckusick */ 38250504Smckusick for (i = 0; i < SLAVES; i++) { 38350504Smckusick q = &tslp->req[1]; 38450504Smckusick otb = (union u_spcl *)slp->tblock; 38550504Smckusick 38650504Smckusick /* 38750504Smckusick * For each request in the current slave, copy it to tslp. 38850504Smckusick */ 38950504Smckusick #ifdef ROLLDEBUG 39050504Smckusick printf("replaying reqs to slave %d (%d)\n", slp - &slaves[0], 39150504Smckusick slp->pid); 39250504Smckusick j = 0; 39350504Smckusick #endif 39450504Smckusick 39550504Smckusick for (p = slp->req; p->count > 0; p += p->count) { 39650504Smckusick #ifdef ROLLDEBUG 39750504Smckusick printf(" req %d count %d dblk %d\n", 39850504Smckusick j++, p->count, p->dblk); 39950504Smckusick if (p->dblk == 0) 40050504Smckusick printf("\tsum %x\n", do_sum(otb)); 40150504Smckusick #endif 40250504Smckusick *q = *p; 40350504Smckusick if (p->dblk == 0) 40450504Smckusick *ntb++ = *otb++; /* copy the datablock also */ 40550504Smckusick prev = q; 40650504Smckusick q += q->count; 40750504Smckusick } 40850504Smckusick if (prev->dblk != 0) 40950504Smckusick prev->count -= 1; 41050504Smckusick else 41150504Smckusick ntb--; 41250504Smckusick q -= 1; 41350504Smckusick q->count = 0; 41450504Smckusick q = &tslp->req[0]; 41550504Smckusick if (i == 0) { 41650504Smckusick q->dblk = 0; 41750504Smckusick q->count = 1; 41850504Smckusick trecno = 0; 41950504Smckusick nextblock = tslp->tblock; 42050504Smckusick savedtapea = spcl.c_tapea; 42150504Smckusick spcl.c_tapea = slp->tapea; 42250504Smckusick startnewtape(0); 42350504Smckusick spcl.c_tapea = savedtapea; 42450504Smckusick lastspclrec = savedtapea - 1; 42550504Smckusick } 42650504Smckusick size = (char *)ntb - (char *)q; 42750504Smckusick if (atomic(write, slp->fd, q, size) != size) { 42850504Smckusick perror(" DUMP: error writing command pipe"); 42950504Smckusick dumpabort(); 43050504Smckusick } 43150504Smckusick slp->sent = 1; 43250504Smckusick #ifdef ROLLDEBUG 43350504Smckusick printf("after the shift:\n"); 43450504Smckusick j = 0; 43550504Smckusick for (p = tslp->req; p->count > 0; p += p->count) { 43650504Smckusick printf(" req %d count %d dblk %d\n", 43750504Smckusick j++, p->count, p->dblk); 43850504Smckusick if (p->dblk == 0) { 43950504Smckusick /* dump block also */ 44050504Smckusick } 44150504Smckusick } 44250504Smckusick #endif 44350504Smckusick if (++slp >= &slaves[SLAVES]) 44450504Smckusick slp = &slaves[0]; 44550504Smckusick 44650504Smckusick q->count = 1; 44750504Smckusick 44850504Smckusick if (prev->dblk != 0) { 44950504Smckusick /* 45050504Smckusick * If the last one was a disk block, make the 45150504Smckusick * first of this one be the last bit of that disk 45250504Smckusick * block... 45350504Smckusick */ 45450504Smckusick q->dblk = prev->dblk + 45550504Smckusick prev->count * (TP_BSIZE / DEV_BSIZE); 45650504Smckusick ntb = (union u_spcl *)tslp->tblock; 45750504Smckusick } else { 45850504Smckusick /* 45950504Smckusick * It wasn't a disk block. Copy the data to its 46050504Smckusick * new location in the buffer. 46150504Smckusick */ 46250504Smckusick q->dblk = 0; 46350504Smckusick *((union u_spcl *)tslp->tblock) = *ntb; 46450504Smckusick ntb = (union u_spcl *)tslp->tblock[1]; 46550504Smckusick } 46650504Smckusick } 46750504Smckusick slp->req[0] = *q; 46850504Smckusick nextblock = slp->tblock; 46950504Smckusick if (q->dblk == 0) 47050504Smckusick nextblock++; 47150504Smckusick trecno = 1; 47250504Smckusick 47350504Smckusick /* 47450504Smckusick * Clear the first slaves' response. One hopes that it 47550504Smckusick * worked ok, otherwise the tape is much too short! 47650504Smckusick */ 47750504Smckusick if (slp->sent) { 47850504Smckusick if (atomic(read, slp->fd, &got, sizeof got) != sizeof got) { 47950504Smckusick perror(" DUMP: error reading command pipe in master"); 48050504Smckusick dumpabort(); 48150504Smckusick } 48250504Smckusick slp->sent = 0; 48350504Smckusick 48450504Smckusick if (got != writesize) { 48550504Smckusick quit("EOT detected at start of the tape!\n"); 48650504Smckusick } 48750504Smckusick } 48850504Smckusick } 48950504Smckusick 4901425Sroot /* 49150504Smckusick * We implement taking and restoring checkpoints on the tape level. 49250504Smckusick * When each tape is opened, a new process is created by forking; this 49350504Smckusick * saves all of the necessary context in the parent. The child 49450504Smckusick * continues the dump; the parent waits around, saving the context. 49550504Smckusick * If the child returns X_REWRITE, then it had problems writing that tape; 49650504Smckusick * this causes the parent to fork again, duplicating the context, and 49750504Smckusick * everything continues as if nothing had happened. 4981425Sroot */ 49946585Storek void 50050504Smckusick startnewtape(top) 50150504Smckusick int top; 5021425Sroot { 5031425Sroot int parentpid; 5041425Sroot int childpid; 5051425Sroot int status; 5061425Sroot int waitpid; 50750504Smckusick int i; 50847056Skarels char *p; 509*50574Smckusick #ifdef sunos 510*50574Smckusick void (*interrupt)(); 511*50574Smckusick char *index(); 512*50574Smckusick #else 513*50574Smckusick sig_t interrupt; 514*50574Smckusick #endif 5151425Sroot 51639164Sbostic interrupt = signal(SIGINT, SIG_IGN); 5171425Sroot parentpid = getpid(); 5181425Sroot 5191425Sroot restore_check_point: 52039164Sbostic (void)signal(SIGINT, interrupt); 52125219Smckusick /* 52225219Smckusick * All signals are inherited... 52325219Smckusick */ 5241425Sroot childpid = fork(); 52518012Smckusick if (childpid < 0) { 5261425Sroot msg("Context save fork fails in parent %d\n", parentpid); 5271425Sroot Exit(X_ABORT); 5281425Sroot } 52918012Smckusick if (childpid != 0) { 5301425Sroot /* 5311425Sroot * PARENT: 5321425Sroot * save the context by waiting 5331425Sroot * until the child doing all of the work returns. 53418012Smckusick * don't catch the interrupt 5351425Sroot */ 53625219Smckusick signal(SIGINT, SIG_IGN); 5371425Sroot #ifdef TDEBUG 5381425Sroot msg("Tape: %d; parent process: %d child process %d\n", 5391425Sroot tapeno+1, parentpid, childpid); 5401425Sroot #endif TDEBUG 54118012Smckusick while ((waitpid = wait(&status)) != childpid) 54218012Smckusick msg("Parent %d waiting for child %d has another child %d return\n", 54318012Smckusick parentpid, childpid, waitpid); 54418012Smckusick if (status & 0xFF) { 5451425Sroot msg("Child %d returns LOB status %o\n", 5461425Sroot childpid, status&0xFF); 5471425Sroot } 5481425Sroot status = (status >> 8) & 0xFF; 5491425Sroot #ifdef TDEBUG 55018012Smckusick switch(status) { 5511425Sroot case X_FINOK: 5521425Sroot msg("Child %d finishes X_FINOK\n", childpid); 5531425Sroot break; 55450504Smckusick case X_ABORT: 5551425Sroot msg("Child %d finishes X_ABORT\n", childpid); 5561425Sroot break; 5571425Sroot case X_REWRITE: 5581425Sroot msg("Child %d finishes X_REWRITE\n", childpid); 5591425Sroot break; 5601425Sroot default: 56118012Smckusick msg("Child %d finishes unknown %d\n", 56225219Smckusick childpid, status); 5631425Sroot break; 5641425Sroot } 5651425Sroot #endif TDEBUG 56618012Smckusick switch(status) { 5671425Sroot case X_FINOK: 5681425Sroot Exit(X_FINOK); 5691425Sroot case X_ABORT: 5701425Sroot Exit(X_ABORT); 5711425Sroot case X_REWRITE: 5721425Sroot goto restore_check_point; 5731425Sroot default: 5741425Sroot msg("Bad return code from dump: %d\n", status); 5751425Sroot Exit(X_ABORT); 5761425Sroot } 5771425Sroot /*NOTREACHED*/ 5781425Sroot } else { /* we are the child; just continue */ 5791425Sroot #ifdef TDEBUG 5801425Sroot sleep(4); /* allow time for parent's message to get out */ 5811425Sroot msg("Child on Tape %d has parent %d, my pid = %d\n", 5821425Sroot tapeno+1, parentpid, getpid()); 58325219Smckusick #endif TDEBUG 58447056Skarels /* 58547056Skarels * If we have a name like "/dev/rmt0,/dev/rmt1", 58647056Skarels * use the name before the comma first, and save 58748621Skarels * the remaining names for subsequent volumes. 58847056Skarels */ 58950504Smckusick tapeno++; /* current tape sequence */ 59048621Skarels if (nexttape || index(tape, ',')) { 59148621Skarels if (nexttape && *nexttape) 59248621Skarels tape = nexttape; 59348621Skarels if (p = index(tape, ',')) { 59448621Skarels *p = '\0'; 59548621Skarels nexttape = p + 1; 59648621Skarels } else 59748621Skarels nexttape = NULL; 59848621Skarels msg("Dumping volume %d on %s\n", tapeno, tape); 59948621Skarels } 60018012Smckusick #ifdef RDUMP 60146789Smckusick while ((tapefd = (host ? rmtopen(tape, 2) : 60246789Smckusick pipeout ? 1 : open(tape, O_WRONLY|O_CREAT, 0666))) < 0) 60350504Smckusick #else 60450504Smckusick while ((tapefd = (pipeout ? 1 : 60550504Smckusick open(tape, O_WRONLY|O_CREAT, 0666))) < 0) 60650504Smckusick #endif 60739128Smckusick { 60846614Smckusick msg("Cannot open output \"%s\".\n", tape); 60939128Smckusick if (!query("Do you want to retry the open?")) 61018012Smckusick dumpabort(); 61139128Smckusick } 6121425Sroot 61318012Smckusick enslave(); /* Share open tape file descriptor with slaves */ 61418012Smckusick 6151425Sroot asize = 0; 61648621Skarels blocksthisvol = 0; 61750504Smckusick if (top) 61850504Smckusick newtape++; /* new tape signal */ 61950504Smckusick spcl.c_count = slp->count; 62050504Smckusick /* 62150504Smckusick * measure firstrec in TP_BSIZE units since restore doesn't 62250504Smckusick * know the correct ntrec value... 62350504Smckusick */ 62450504Smckusick spcl.c_firstrec = slp->firstrec; 6251425Sroot spcl.c_volume++; 6261425Sroot spcl.c_type = TS_TAPE; 62730432Smckusick spcl.c_flags |= DR_NEWHEADER; 62850504Smckusick writeheader(slp->inode); 62930432Smckusick spcl.c_flags &=~ DR_NEWHEADER; 6301425Sroot if (tapeno > 1) 63148621Skarels msg("Volume %d begins with blocks from inode %d\n", 63250504Smckusick tapeno, slp->inode); 6331425Sroot } 6341425Sroot } 6351425Sroot 63646585Storek void 6371425Sroot dumpabort() 6381425Sroot { 63950504Smckusick 64018012Smckusick if (master != 0 && master != getpid()) 64125219Smckusick kill(master, SIGTERM); /* Signals master to call dumpabort */ 64224181Smckusick else { 64324181Smckusick killall(); 64424181Smckusick msg("The ENTIRE dump is aborted.\n"); 64524181Smckusick } 6461425Sroot Exit(X_ABORT); 6471425Sroot } 6481425Sroot 64946585Storek void 6501425Sroot Exit(status) 65146239Storek int status; 6521425Sroot { 65350504Smckusick 6541425Sroot #ifdef TDEBUG 6551425Sroot msg("pid = %d exits with status %d\n", getpid(), status); 6561425Sroot #endif TDEBUG 6571925Swnj exit(status); 6581425Sroot } 65918012Smckusick 66024181Smckusick /* 66150504Smckusick * proceed - handler for SIGUSR2, used to synchronize IO between the slaves. 66224181Smckusick */ 66346585Storek void 66450504Smckusick proceed() 66524181Smckusick { 66618012Smckusick 66750504Smckusick if (ready) 66850504Smckusick longjmp(jmpbuf, 1); 66950504Smckusick caught++; 67024181Smckusick } 67124181Smckusick 67246585Storek void 67318012Smckusick enslave() 67418012Smckusick { 67550504Smckusick int cmd[2]; 67624181Smckusick register int i, j; 67718012Smckusick 67818012Smckusick master = getpid(); 67950504Smckusick 68050504Smckusick signal(SIGTERM, dumpabort); /* Slave sends SIGTERM on dumpabort() */ 68125219Smckusick signal(SIGPIPE, sigpipe); 68225219Smckusick signal(SIGUSR1, tperror); /* Slave sends SIGUSR1 on tape errors */ 68350504Smckusick signal(SIGUSR2, proceed); /* Slave sends SIGUSR2 to next slave */ 68450504Smckusick 68524181Smckusick for (i = 0; i < SLAVES; i++) { 68650504Smckusick if (i == slp - &slaves[0]) { 68750504Smckusick caught = 1; 68824181Smckusick } else { 68950504Smckusick caught = 0; 69024181Smckusick } 69150504Smckusick 69250504Smckusick if (socketpair(AF_UNIX, SOCK_STREAM, 0, cmd) < 0 || 69350504Smckusick (slaves[i].pid = fork()) < 0) 69446585Storek quit("too many slaves, %d (recompile smaller): %s\n", 69546585Storek i, strerror(errno)); 69650504Smckusick 69750504Smckusick slaves[i].fd = cmd[1]; 69850504Smckusick slaves[i].sent = 0; 69950504Smckusick if (slaves[i].pid == 0) { /* Slave starts up here */ 70018012Smckusick for (j = 0; j <= i; j++) 70150504Smckusick close(slaves[j].fd); 70225219Smckusick signal(SIGINT, SIG_IGN); /* Master handles this */ 70350504Smckusick doslave(cmd[0], i); 70418012Smckusick Exit(X_FINOK); 70518012Smckusick } 70618012Smckusick } 70750504Smckusick 70850504Smckusick for (i = 0; i < SLAVES; i++) 70950504Smckusick atomic(write, slaves[i].fd, &slaves[(i + 1) % SLAVES].pid, 71050504Smckusick sizeof slaves[0].pid); 71150504Smckusick 71250504Smckusick master = 0; 71318012Smckusick } 71418012Smckusick 71546585Storek void 71624181Smckusick killall() 71718012Smckusick { 71824181Smckusick register int i; 71919982Smckusick 72024181Smckusick for (i = 0; i < SLAVES; i++) 72150504Smckusick if (slaves[i].pid > 0) 72250504Smckusick kill(slaves[i].pid, SIGKILL); 72318012Smckusick } 72418012Smckusick 72524181Smckusick /* 72624181Smckusick * Synchronization - each process has a lockfile, and shares file 72724181Smckusick * descriptors to the following process's lockfile. When our write 72824181Smckusick * completes, we release our lock on the following process's lock- 72924181Smckusick * file, allowing the following process to lock it and proceed. We 73024181Smckusick * get the lock back for the next cycle by swapping descriptors. 73124181Smckusick */ 73246585Storek void 73350504Smckusick doslave(cmd, slave_number) 73450504Smckusick register int cmd; 73550504Smckusick int slave_number; 73619982Smckusick { 73750504Smckusick register int nread; 73850504Smckusick int nextslave, size, wrote, eot_count; 73946795Sbostic #ifndef __STDC__ 74046795Sbostic int read(); 74146795Sbostic #endif 74250504Smckusick #ifdef ROLLDEBUG 74350504Smckusick int dodump = 2; 74450504Smckusick FILE *out; 74550504Smckusick char name[64]; 74650504Smckusick #endif 74719982Smckusick 74846789Smckusick /* 74946789Smckusick * Need our own seek pointer. 75046789Smckusick */ 75146789Smckusick close(diskfd); 75246789Smckusick if ((diskfd = open(disk, O_RDONLY)) < 0) 75346585Storek quit("slave couldn't reopen disk: %s\n", strerror(errno)); 75450504Smckusick 75524181Smckusick /* 75650504Smckusick * Need the pid of the next slave in the loop... 75750504Smckusick */ 75850504Smckusick if ((nread = atomic(read, cmd, &nextslave, sizeof nextslave)) 75950504Smckusick != sizeof nextslave) { 76050504Smckusick quit("master/slave protocol botched - didn't get pid of next slave.\n"); 76150504Smckusick } 76250504Smckusick 76350504Smckusick #ifdef ROLLDEBUG 76450504Smckusick sprintf(name, "slave.%d", slave_number); 76550504Smckusick out = fopen(name, "w"); 76650504Smckusick #endif 76750504Smckusick /* 76825219Smckusick * Get list of blocks to dump, read the blocks into tape buffer 76924181Smckusick */ 77050504Smckusick while ((nread = atomic(read, cmd, slp->req, reqsiz)) == reqsiz) { 77150504Smckusick register struct req *p = slp->req; 77250504Smckusick int j; 77350504Smckusick struct req *rover; 77450504Smckusick char (*orover)[TP_BSIZE]; 77550504Smckusick 77650504Smckusick j = 0; 77750504Smckusick for (trecno = 0; trecno < ntrec; 77850504Smckusick trecno += p->count, p += p->count) { 77918012Smckusick if (p->dblk) { 78050504Smckusick bread(p->dblk, slp->tblock[trecno], 78125219Smckusick p->count * TP_BSIZE); 78218012Smckusick } else { 78325219Smckusick if (p->count != 1 || atomic(read, cmd, 78450504Smckusick slp->tblock[trecno], TP_BSIZE) != TP_BSIZE) 78546585Storek quit("master/slave protocol botched.\n"); 78618012Smckusick } 78750504Smckusick #ifdef ROLLDEBUG 78850504Smckusick if (dodump) { 78950504Smckusick fprintf(out, " req %d count %d dblk %d\n", 79050504Smckusick j++, p->count, p->dblk); 79150504Smckusick if (p->dblk == 0) { 79250504Smckusick fprintf(out, "\tsum %x\n", 79350504Smckusick do_sum(slp->tblock[trecno])); 79450504Smckusick } 79550504Smckusick } 79650504Smckusick #endif 79718012Smckusick } 79850504Smckusick #ifdef ROLLDEBUG 79950504Smckusick if (dodump) { 80050504Smckusick fprintf(out, "\n"); 80150504Smckusick } 80250504Smckusick if (--dodump == 0) { 80350504Smckusick fclose(out); 80450504Smckusick } 80550504Smckusick #endif 80650504Smckusick if (setjmp(jmpbuf) == 0) { 80750504Smckusick ready = 1; 80850504Smckusick if (!caught) 80950504Smckusick pause(); 81050504Smckusick } 81150504Smckusick ready = 0; 81250504Smckusick caught = 0; 81325219Smckusick 81450504Smckusick /* Try to write the data... */ 81550504Smckusick eot_count = 0; 81650504Smckusick size = 0; 81750504Smckusick 81850504Smckusick while (eot_count < 10 && size < writesize) { 81918012Smckusick #ifdef RDUMP 82050504Smckusick if (host) 82150504Smckusick wrote = rmtwrite(slp->tblock[0]+size, 82250504Smckusick writesize-size); 82348621Skarels else 82450504Smckusick #endif 82550504Smckusick wrote = write(tapefd, slp->tblock[0]+size, 82650504Smckusick writesize-size); 82750504Smckusick #ifdef WRITEDEBUG 82850504Smckusick printf("slave %d wrote %d\n", slave_number, wrote); 82950504Smckusick #endif 83050504Smckusick if (wrote < 0) 83150504Smckusick break; 83250504Smckusick if (wrote == 0) 83350504Smckusick eot_count++; 83450504Smckusick size += wrote; 83550504Smckusick } 83650504Smckusick 83750504Smckusick #ifdef WRITEDEBUG 83850504Smckusick if (size != writesize) 83950504Smckusick printf("slave %d only wrote %d out of %d bytes and gave up.\n", 84050504Smckusick slave_number, size, writesize); 84150504Smckusick #endif 84250504Smckusick 84350504Smckusick if (eot_count > 0) 84450504Smckusick size = 0; 84550504Smckusick 84650504Smckusick /* 84750504Smckusick * fixme: Pyramids running OSx return ENOSPC 84850504Smckusick * at EOT on 1/2 inch drives. 84950504Smckusick */ 85050504Smckusick if (size < 0) { 85125219Smckusick kill(master, SIGUSR1); 85225219Smckusick for (;;) 85325219Smckusick sigpause(0); 85450504Smckusick } else { 85550504Smckusick /* 85650504Smckusick * pass size of write back to master 85750504Smckusick * (for EOT handling) 85850504Smckusick */ 85950504Smckusick atomic(write, cmd, &size, sizeof size); 86050504Smckusick } 86150504Smckusick 86250504Smckusick /* 86350504Smckusick * If partial write, don't want next slave to go. 86450504Smckusick * Also jolts him awake. 86550504Smckusick */ 86650504Smckusick kill(nextslave, SIGUSR2); 86750504Smckusick } 86846585Storek if (nread != 0) 86946585Storek quit("error reading command pipe: %s\n", strerror(errno)); 87018012Smckusick } 87119947Smckusick 87219947Smckusick /* 87325219Smckusick * Since a read from a pipe may not return all we asked for, 87425219Smckusick * or a write may not write all we ask if we get a signal, 87525219Smckusick * loop until the count is satisfied (or error). 87619947Smckusick */ 87746585Storek int 87825219Smckusick atomic(func, fd, buf, count) 87925219Smckusick int (*func)(), fd, count; 88019947Smckusick char *buf; 88119947Smckusick { 88225219Smckusick int got, need = count; 88319947Smckusick 88425219Smckusick while ((got = (*func)(fd, buf, need)) > 0 && (need -= got) > 0) 88519947Smckusick buf += got; 88625219Smckusick return (got < 0 ? got : count - need); 88719947Smckusick } 888