1*47082Smckusick /*- 2*47082Smckusick * Copyright (c) 1980, 1991 The Regents of the University of California. 3*47082Smckusick * All rights reserved. 4*47082Smckusick * 5*47082Smckusick * %sccs.include.redist.c% 622040Sdist */ 722040Sdist 817527Ssam #ifndef lint 9*47082Smckusick static char sccsid[] = "@(#)tape.c 5.17 (Berkeley) 03/07/91"; 1046585Storek #endif /* not lint */ 1117527Ssam 1246795Sbostic #include <sys/param.h> 1346585Storek #include <sys/wait.h> 1446795Sbostic #include <ufs/dir.h> 1546795Sbostic #include <ufs/dinode.h> 1646795Sbostic #include <ufs/fs.h> 1746795Sbostic #include <signal.h> 1846795Sbostic #include <fcntl.h> 1946795Sbostic #include <protocols/dumprestore.h> 2046585Storek #include <errno.h> 2146795Sbostic #ifdef __STDC__ 2246795Sbostic #include <unistd.h> 2346795Sbostic #include <stdlib.h> 2446795Sbostic #include <string.h> 2546795Sbostic #endif 2646795Sbostic #include "dump.h" 2739128Smckusick #include "pathnames.h" 281425Sroot 2929899Smckusick char (*tblock)[TP_BSIZE]; /* pointer to malloc()ed buffer for tape */ 3029899Smckusick int writesize; /* size of malloc()ed buffer for tape */ 3129899Smckusick long lastspclrec = -1; /* tape block number of last written header */ 3229899Smckusick int trecno = 0; /* next record to write in current block */ 3346614Smckusick extern long blocksperfile; /* number of blocks per output file */ 3425219Smckusick extern int ntrec; /* blocking factor on tape */ 3525219Smckusick extern int cartridge; 3647056Skarels char *nexttape; 3725219Smckusick #ifdef RDUMP 3825219Smckusick extern char *host; 3946585Storek int rmtopen(), rmtwrite(); 4046585Storek void rmtclose(); 4125219Smckusick #endif RDUMP 421425Sroot 4346585Storek int atomic(); 4446789Smckusick void doslave(), enslave(), flushtape(), killall(); 4546585Storek 4610911Ssam /* 4724181Smckusick * Concurrent dump mods (Caltech) - disk block reading and tape writing 4818012Smckusick * are exported to several slave processes. While one slave writes the 4918012Smckusick * tape, the others read disk blocks; they pass control of the tape in 5024181Smckusick * a ring via flock(). The parent process traverses the filesystem and 5146789Smckusick * sends writeheader()'s and lists of daddr's to the slaves via pipes. 5210911Ssam */ 5318012Smckusick struct req { /* instruction packets sent to slaves */ 5418012Smckusick daddr_t dblk; 5518012Smckusick int count; 5618012Smckusick } *req; 5718012Smckusick int reqsiz; 5818012Smckusick 5924181Smckusick #define SLAVES 3 /* 1 slave writing, 1 reading, 1 for slack */ 6025219Smckusick int slavefd[SLAVES]; /* pipes from master to each slave */ 6125219Smckusick int slavepid[SLAVES]; /* used by killall() */ 6225219Smckusick int rotor; /* next slave to be instructed */ 6325219Smckusick int master; /* pid of master, for sending error signals */ 6425219Smckusick int tenths; /* length of tape used per block written */ 6518012Smckusick 6646585Storek int 6710911Ssam alloctape() 6810911Ssam { 6925219Smckusick int pgoff = getpagesize() - 1; 7010911Ssam 7110911Ssam writesize = ntrec * TP_BSIZE; 7225219Smckusick reqsiz = ntrec * sizeof(struct req); 7324181Smckusick /* 7425219Smckusick * CDC 92181's and 92185's make 0.8" gaps in 1600-bpi start/stop mode 7525219Smckusick * (see DEC TU80 User's Guide). The shorter gaps of 6250-bpi require 7625219Smckusick * repositioning after stopping, i.e, streaming mode, where the gap is 7725219Smckusick * variable, 0.30" to 0.45". The gap is maximal when the tape stops. 7824181Smckusick */ 7947056Skarels if (blocksperfile == 0) 8047056Skarels tenths = writesize / density + 8147056Skarels (cartridge ? 16 : density == 625 ? 5 : 8); 8225219Smckusick /* 8325219Smckusick * Allocate tape buffer contiguous with the array of instruction 8446789Smckusick * packets, so flushtape() can write them together with one write(). 8525219Smckusick * Align tape buffer on page boundary to speed up tape write(). 8625219Smckusick */ 8724181Smckusick req = (struct req *)malloc(reqsiz + writesize + pgoff); 8824181Smckusick if (req == NULL) 8924181Smckusick return(0); 9024181Smckusick tblock = (char (*)[TP_BSIZE]) (((long)&req[ntrec] + pgoff) &~ pgoff); 9125219Smckusick req = (struct req *)tblock - ntrec; 9224181Smckusick return(1); 9310911Ssam } 9410911Ssam 9525219Smckusick 9646585Storek void 9746789Smckusick writerec(dp) 985329Smckusic char *dp; 991425Sroot { 10018012Smckusick req[trecno].dblk = (daddr_t)0; 10118012Smckusick req[trecno].count = 1; 10224181Smckusick *(union u_spcl *)(*tblock++) = *(union u_spcl *)dp; /* movc3 */ 10329899Smckusick lastspclrec = spcl.c_tapea; 10424181Smckusick trecno++; 1051425Sroot spcl.c_tapea++; 10646585Storek if (trecno >= ntrec) 10746789Smckusick flushtape(); 1081425Sroot } 1091425Sroot 11046585Storek void 11146789Smckusick dumpblock(blkno, size) 1124774Smckusic daddr_t blkno; 1134774Smckusic int size; 1141425Sroot { 11525219Smckusick int avail, tpblks, dblkno; 1161425Sroot 1175329Smckusic dblkno = fsbtodb(sblock, blkno); 11846585Storek tpblks = size >> tp_bshift; 11918012Smckusick while ((avail = MIN(tpblks, ntrec - trecno)) > 0) { 12018012Smckusick req[trecno].dblk = dblkno; 12118012Smckusick req[trecno].count = avail; 12225219Smckusick trecno += avail; 1234774Smckusic spcl.c_tapea += avail; 12425219Smckusick if (trecno >= ntrec) 12546789Smckusick flushtape(); 12646585Storek dblkno += avail << (tp_bshift - dev_bshift); 1275329Smckusic tpblks -= avail; 1284774Smckusic } 1291425Sroot } 1301425Sroot 1311425Sroot int nogripe = 0; 1321425Sroot 13346585Storek void 13446585Storek tperror() 13546585Storek { 13618012Smckusick if (pipeout) { 13746614Smckusick msg("write error on %s\n", tape); 13846585Storek quit("Cannot recover\n"); 13918012Smckusick /* NOTREACHED */ 14018012Smckusick } 14146614Smckusick msg("write error %d blocks into volume %d\n", blockswritten, tapeno); 14246614Smckusick broadcast("DUMP WRITE ERROR!\n"); 14318012Smckusick if (!query("Do you want to restart?")) 14418012Smckusick dumpabort(); 14546614Smckusick msg("Closing this volume. Prepare to restart with new media;\n"); 14618012Smckusick msg("this dump volume will be rewritten.\n"); 14724181Smckusick killall(); 14818012Smckusick nogripe = 1; 14918012Smckusick close_rewind(); 15018012Smckusick Exit(X_REWRITE); 15118012Smckusick } 15218012Smckusick 15346585Storek void 15425219Smckusick sigpipe() 15525219Smckusick { 15625219Smckusick 15746585Storek quit("Broken pipe\n"); 15825219Smckusick } 15925219Smckusick 16046585Storek void 16146789Smckusick flushtape() 16218012Smckusick { 16346795Sbostic #ifndef __STDC__ 16446795Sbostic int write(); 16546795Sbostic #endif 16646795Sbostic 16725219Smckusick int siz = (char *)tblock - (char *)req; 1681425Sroot 16946585Storek if (atomic(write, slavefd[rotor], req, siz) != siz) 17046585Storek quit("error writing command pipe: %s\n", strerror(errno)); 17146585Storek if (++rotor >= SLAVES) 17246585Storek rotor = 0; 17318012Smckusick tblock = (char (*)[TP_BSIZE]) &req[ntrec]; 1741425Sroot trecno = 0; 17524181Smckusick asize += tenths; 17610911Ssam blockswritten += ntrec; 17746614Smckusick if (!pipeout && (blocksperfile ? 17846614Smckusick (blockswritten >= blocksperfile) : (asize > tsize))) { 1791425Sroot close_rewind(); 18046789Smckusick startnewtape(); 1811425Sroot } 1821425Sroot timeest(); 1831425Sroot } 1841425Sroot 18546585Storek void 18646239Storek trewind() 1871425Sroot { 18824181Smckusick int f; 18912331Smckusick 19012331Smckusick if (pipeout) 19112331Smckusick return; 19218012Smckusick for (f = 0; f < SLAVES; f++) 19318012Smckusick close(slavefd[f]); 19446585Storek while (wait((int *)NULL) >= 0) /* wait for any signals from slaves */ 19546585Storek /* void */; 19618012Smckusick msg("Tape rewinding\n"); 19718012Smckusick #ifdef RDUMP 19825219Smckusick if (host) { 19925219Smckusick rmtclose(); 20025219Smckusick while (rmtopen(tape, 0) < 0) 20125219Smckusick sleep(10); 20225219Smckusick rmtclose(); 20325219Smckusick return; 20425219Smckusick } 20525219Smckusick #endif RDUMP 20646789Smckusick close(tapefd); 2073214Swnj while ((f = open(tape, 0)) < 0) 2083214Swnj sleep (10); 2093214Swnj close(f); 2101425Sroot } 2111425Sroot 21246585Storek void 2131425Sroot close_rewind() 2141425Sroot { 21546239Storek trewind(); 21618012Smckusick if (!nogripe) { 21746614Smckusick msg("Change Volumes: Mount volume #%d\n", tapeno+1); 21846614Smckusick broadcast("CHANGE DUMP VOLUMES!\7\7\n"); 2191425Sroot } 22047056Skarels while (nexttape == 0 && 22147056Skarels !query("Is the new volume mounted and ready to go?")) 22225219Smckusick if (query("Do you want to abort?")) { 2231425Sroot dumpabort(); 22425219Smckusick /*NOTREACHED*/ 22525219Smckusick } 2261425Sroot } 2271425Sroot 2281425Sroot /* 22918012Smckusick * We implement taking and restoring checkpoints on the tape level. 2301425Sroot * When each tape is opened, a new process is created by forking; this 2311425Sroot * saves all of the necessary context in the parent. The child 2321425Sroot * continues the dump; the parent waits around, saving the context. 2331425Sroot * If the child returns X_REWRITE, then it had problems writing that tape; 2341425Sroot * this causes the parent to fork again, duplicating the context, and 2351425Sroot * everything continues as if nothing had happened. 2361425Sroot */ 2371425Sroot 23846585Storek void 23946789Smckusick startnewtape() 2401425Sroot { 2411425Sroot int parentpid; 2421425Sroot int childpid; 2431425Sroot int status; 2441425Sroot int waitpid; 24539164Sbostic sig_t interrupt; 24629899Smckusick int blks, i; 24747056Skarels char *p; 2481425Sroot 24939164Sbostic interrupt = signal(SIGINT, SIG_IGN); 2501425Sroot parentpid = getpid(); 2511425Sroot 2521425Sroot restore_check_point: 25339164Sbostic (void)signal(SIGINT, interrupt); 25425219Smckusick /* 25525219Smckusick * All signals are inherited... 25625219Smckusick */ 2571425Sroot childpid = fork(); 25818012Smckusick if (childpid < 0) { 2591425Sroot msg("Context save fork fails in parent %d\n", parentpid); 2601425Sroot Exit(X_ABORT); 2611425Sroot } 26218012Smckusick if (childpid != 0) { 2631425Sroot /* 2641425Sroot * PARENT: 2651425Sroot * save the context by waiting 2661425Sroot * until the child doing all of the work returns. 26718012Smckusick * don't catch the interrupt 2681425Sroot */ 26925219Smckusick signal(SIGINT, SIG_IGN); 2701425Sroot #ifdef TDEBUG 2711425Sroot msg("Tape: %d; parent process: %d child process %d\n", 2721425Sroot tapeno+1, parentpid, childpid); 2731425Sroot #endif TDEBUG 27418012Smckusick while ((waitpid = wait(&status)) != childpid) 27518012Smckusick msg("Parent %d waiting for child %d has another child %d return\n", 27618012Smckusick parentpid, childpid, waitpid); 27718012Smckusick if (status & 0xFF) { 2781425Sroot msg("Child %d returns LOB status %o\n", 2791425Sroot childpid, status&0xFF); 2801425Sroot } 2811425Sroot status = (status >> 8) & 0xFF; 2821425Sroot #ifdef TDEBUG 28318012Smckusick switch(status) { 2841425Sroot case X_FINOK: 2851425Sroot msg("Child %d finishes X_FINOK\n", childpid); 2861425Sroot break; 2871425Sroot case X_ABORT: 2881425Sroot msg("Child %d finishes X_ABORT\n", childpid); 2891425Sroot break; 2901425Sroot case X_REWRITE: 2911425Sroot msg("Child %d finishes X_REWRITE\n", childpid); 2921425Sroot break; 2931425Sroot default: 29418012Smckusick msg("Child %d finishes unknown %d\n", 29525219Smckusick childpid, status); 2961425Sroot break; 2971425Sroot } 2981425Sroot #endif TDEBUG 29918012Smckusick switch(status) { 3001425Sroot case X_FINOK: 3011425Sroot Exit(X_FINOK); 3021425Sroot case X_ABORT: 3031425Sroot Exit(X_ABORT); 3041425Sroot case X_REWRITE: 3051425Sroot goto restore_check_point; 3061425Sroot default: 3071425Sroot msg("Bad return code from dump: %d\n", status); 3081425Sroot Exit(X_ABORT); 3091425Sroot } 3101425Sroot /*NOTREACHED*/ 3111425Sroot } else { /* we are the child; just continue */ 3121425Sroot #ifdef TDEBUG 3131425Sroot sleep(4); /* allow time for parent's message to get out */ 3141425Sroot msg("Child on Tape %d has parent %d, my pid = %d\n", 3151425Sroot tapeno+1, parentpid, getpid()); 31625219Smckusick #endif TDEBUG 31747056Skarels /* 31847056Skarels * If we have a name like "/dev/rmt0,/dev/rmt1", 31947056Skarels * use the name before the comma first, and save 32047056Skarels * the second name for next time. 32147056Skarels */ 32247056Skarels if (nexttape && *nexttape) 32347056Skarels tape = nexttape; 32447056Skarels if (p = index(tape, ',')) { 32547056Skarels *p = '\0'; 32647056Skarels nexttape = p + 1; 32747056Skarels } else 32847056Skarels nexttape = NULL; 32918012Smckusick #ifdef RDUMP 33046789Smckusick while ((tapefd = (host ? rmtopen(tape, 2) : 33146789Smckusick pipeout ? 1 : open(tape, O_WRONLY|O_CREAT, 0666))) < 0) 33225219Smckusick #else RDUMP 33346789Smckusick while ((tapefd = 33446789Smckusick pipeout ? 1 : open(tape, O_WRONLY|O_CREAT, 0666)) < 0) 33524181Smckusick #endif RDUMP 33639128Smckusick { 33746614Smckusick msg("Cannot open output \"%s\".\n", tape); 33839128Smckusick if (!query("Do you want to retry the open?")) 33918012Smckusick dumpabort(); 34039128Smckusick } 3411425Sroot 34218012Smckusick enslave(); /* Share open tape file descriptor with slaves */ 34318012Smckusick 3441425Sroot asize = 0; 3451425Sroot tapeno++; /* current tape sequence */ 3461425Sroot newtape++; /* new tape signal */ 34729899Smckusick blks = 0; 34829899Smckusick if (spcl.c_type != TS_END) 34929899Smckusick for (i = 0; i < spcl.c_count; i++) 35029899Smckusick if (spcl.c_addr[i] != 0) 35129899Smckusick blks++; 35229899Smckusick spcl.c_count = blks + 1 - spcl.c_tapea + lastspclrec; 3531425Sroot spcl.c_volume++; 3541425Sroot spcl.c_type = TS_TAPE; 35530432Smckusick spcl.c_flags |= DR_NEWHEADER; 35646789Smckusick writeheader(curino); 35730432Smckusick spcl.c_flags &=~ DR_NEWHEADER; 3581425Sroot if (tapeno > 1) 35946789Smckusick msg("Tape %d begins with blocks from inode %d\n", 36046789Smckusick tapeno, curino); 3611425Sroot } 3621425Sroot } 3631425Sroot 36446585Storek void 3651425Sroot dumpabort() 3661425Sroot { 36718012Smckusick if (master != 0 && master != getpid()) 36825219Smckusick kill(master, SIGTERM); /* Signals master to call dumpabort */ 36924181Smckusick else { 37024181Smckusick killall(); 37124181Smckusick msg("The ENTIRE dump is aborted.\n"); 37224181Smckusick } 3731425Sroot Exit(X_ABORT); 3741425Sroot } 3751425Sroot 37646585Storek void 3771425Sroot Exit(status) 37846239Storek int status; 3791425Sroot { 3801425Sroot #ifdef TDEBUG 3811425Sroot msg("pid = %d exits with status %d\n", getpid(), status); 3821425Sroot #endif TDEBUG 3831925Swnj exit(status); 3841425Sroot } 38518012Smckusick 38624181Smckusick /* 38725219Smckusick * could use pipe() for this if flock() worked on pipes 38824181Smckusick */ 38946585Storek void 39024181Smckusick lockfile(fd) 39124181Smckusick int fd[2]; 39224181Smckusick { 39324181Smckusick char tmpname[20]; 39418012Smckusick 39539128Smckusick strcpy(tmpname, _PATH_LOCK); 39624181Smckusick mktemp(tmpname); 39746585Storek if ((fd[1] = creat(tmpname, 0400)) < 0) 39846585Storek quit("cannot create lockfile %s: %s\n", 39946585Storek tmpname, strerror(errno)); 40046585Storek if ((fd[0] = open(tmpname, 0)) < 0) 40146585Storek quit("cannot reopen lockfile %s: %s\n", 40246585Storek tmpname, strerror(errno)); 40346585Storek (void) unlink(tmpname); 40424181Smckusick } 40524181Smckusick 40646585Storek void 40718012Smckusick enslave() 40818012Smckusick { 40924181Smckusick int first[2], prev[2], next[2], cmd[2]; /* file descriptors */ 41024181Smckusick register int i, j; 41118012Smckusick 41218012Smckusick master = getpid(); 41325219Smckusick signal(SIGTERM, dumpabort); /* Slave sends SIGTERM on dumpabort() */ 41425219Smckusick signal(SIGPIPE, sigpipe); 41525219Smckusick signal(SIGUSR1, tperror); /* Slave sends SIGUSR1 on tape errors */ 41624181Smckusick lockfile(first); 41724181Smckusick for (i = 0; i < SLAVES; i++) { 41824181Smckusick if (i == 0) { 41924181Smckusick prev[0] = first[1]; 42024181Smckusick prev[1] = first[0]; 42124181Smckusick } else { 42224181Smckusick prev[0] = next[0]; 42324181Smckusick prev[1] = next[1]; 42424181Smckusick flock(prev[1], LOCK_EX); 42524181Smckusick } 42626485Smckusick if (i < SLAVES - 1) { 42726485Smckusick lockfile(next); 42826485Smckusick } else { 42926485Smckusick next[0] = first[0]; 43026485Smckusick next[1] = first[1]; /* Last slave loops back */ 43126485Smckusick } 43246585Storek if (pipe(cmd) < 0 || (slavepid[i] = fork()) < 0) 43346585Storek quit("too many slaves, %d (recompile smaller): %s\n", 43446585Storek i, strerror(errno)); 43518012Smckusick slavefd[i] = cmd[1]; 43625219Smckusick if (slavepid[i] == 0) { /* Slave starts up here */ 43718012Smckusick for (j = 0; j <= i; j++) 43818012Smckusick close(slavefd[j]); 43925219Smckusick signal(SIGINT, SIG_IGN); /* Master handles this */ 44025219Smckusick doslave(cmd[0], prev, next); 44118012Smckusick Exit(X_FINOK); 44218012Smckusick } 44318012Smckusick close(cmd[0]); 44424181Smckusick if (i > 0) { 44524181Smckusick close(prev[0]); 44624181Smckusick close(prev[1]); 44724181Smckusick } 44818012Smckusick } 44924181Smckusick close(first[0]); 45024181Smckusick close(first[1]); 45124181Smckusick master = 0; rotor = 0; 45218012Smckusick } 45318012Smckusick 45446585Storek void 45524181Smckusick killall() 45618012Smckusick { 45724181Smckusick register int i; 45819982Smckusick 45924181Smckusick for (i = 0; i < SLAVES; i++) 46024181Smckusick if (slavepid[i] > 0) 46124181Smckusick kill(slavepid[i], SIGKILL); 46218012Smckusick } 46318012Smckusick 46424181Smckusick /* 46524181Smckusick * Synchronization - each process has a lockfile, and shares file 46624181Smckusick * descriptors to the following process's lockfile. When our write 46724181Smckusick * completes, we release our lock on the following process's lock- 46824181Smckusick * file, allowing the following process to lock it and proceed. We 46924181Smckusick * get the lock back for the next cycle by swapping descriptors. 47024181Smckusick */ 47146585Storek void 47225219Smckusick doslave(cmd, prev, next) 47325219Smckusick register int cmd, prev[2], next[2]; 47419982Smckusick { 47525219Smckusick register int nread, toggle = 0; 47646795Sbostic #ifndef __STDC__ 47746795Sbostic int read(); 47846795Sbostic #endif 47919982Smckusick 48046789Smckusick /* 48146789Smckusick * Need our own seek pointer. 48246789Smckusick */ 48346789Smckusick close(diskfd); 48446789Smckusick if ((diskfd = open(disk, O_RDONLY)) < 0) 48546585Storek quit("slave couldn't reopen disk: %s\n", strerror(errno)); 48624181Smckusick /* 48725219Smckusick * Get list of blocks to dump, read the blocks into tape buffer 48824181Smckusick */ 48925219Smckusick while ((nread = atomic(read, cmd, req, reqsiz)) == reqsiz) { 49018012Smckusick register struct req *p = req; 49118012Smckusick for (trecno = 0; trecno < ntrec; trecno += p->count, p += p->count) { 49218012Smckusick if (p->dblk) { 49318012Smckusick bread(p->dblk, tblock[trecno], 49425219Smckusick p->count * TP_BSIZE); 49518012Smckusick } else { 49625219Smckusick if (p->count != 1 || atomic(read, cmd, 49746585Storek tblock[trecno], TP_BSIZE) != TP_BSIZE) 49846585Storek quit("master/slave protocol botched.\n"); 49918012Smckusick } 50018012Smckusick } 50124181Smckusick flock(prev[toggle], LOCK_EX); /* Wait our turn */ 50225219Smckusick 50318012Smckusick #ifdef RDUMP 50425219Smckusick if ((host ? rmtwrite(tblock[0], writesize) 50546789Smckusick : write(tapefd, tblock[0], writesize)) != writesize) { 50625219Smckusick #else RDUMP 50746789Smckusick if (write(tapefd, tblock[0], writesize) != writesize) { 50824181Smckusick #endif RDUMP 50925219Smckusick kill(master, SIGUSR1); 51025219Smckusick for (;;) 51125219Smckusick sigpause(0); 51218012Smckusick } 51324181Smckusick toggle ^= 1; 51424181Smckusick flock(next[toggle], LOCK_UN); /* Next slave's turn */ 51524181Smckusick } /* Also jolts him awake */ 51646585Storek if (nread != 0) 51746585Storek quit("error reading command pipe: %s\n", strerror(errno)); 51818012Smckusick } 51919947Smckusick 52019947Smckusick /* 52125219Smckusick * Since a read from a pipe may not return all we asked for, 52225219Smckusick * or a write may not write all we ask if we get a signal, 52325219Smckusick * loop until the count is satisfied (or error). 52419947Smckusick */ 52546585Storek int 52625219Smckusick atomic(func, fd, buf, count) 52725219Smckusick int (*func)(), fd, count; 52819947Smckusick char *buf; 52919947Smckusick { 53025219Smckusick int got, need = count; 53119947Smckusick 53225219Smckusick while ((got = (*func)(fd, buf, need)) > 0 && (need -= got) > 0) 53319947Smckusick buf += got; 53425219Smckusick return (got < 0 ? got : count - need); 53519947Smckusick } 536