xref: /csrg-svn/sbin/dump/tape.c (revision 19982)
117527Ssam #ifndef lint
2*19982Smckusick static	char *sccsid = "@(#)tape.c	1.11 (Berkeley) 05/07/85";
317527Ssam #endif
417527Ssam 
51425Sroot #include "dump.h"
618012Smckusick #include <signal.h>
71425Sroot 
810911Ssam char	(*tblock)[TP_BSIZE];	/* Pointer to malloc()ed buffer for tape */
910911Ssam int	writesize;		/* Size of malloc()ed buffer for tape */
104774Smckusic int	trecno = 0;
1118012Smckusick extern	int ntrec;		/* blocking factor on tape */
121425Sroot 
1310911Ssam /*
1418012Smckusick  * Streaming dump mods (Caltech) - disk block reading and tape writing
1518012Smckusick  * are exported to several slave processes.  While one slave writes the
1618012Smckusick  * tape, the others read disk blocks; they pass control of the tape in
1718012Smckusick  * a ring via pipes.  The parent process traverses the filesystem and
1818012Smckusick  * sends daddr's, inode records, etc, through pipes to each slave.
1918012Smckusick  * Speed from Eagle to TU77 on VAX/780 is about 140 Kbytes/second.
2018012Smckusick  * #ifdef RDUMP version is CPU-limited to about 40 Kbytes/second.
2110911Ssam  */
2218012Smckusick struct req {			/* instruction packets sent to slaves */
2318012Smckusick 	daddr_t dblk;
2418012Smckusick 	int count;
2518012Smckusick } *req;
2618012Smckusick int reqsiz;
2718012Smckusick 
2818012Smckusick #define SLAVES 3		/* 2 slaves read disk while 3rd writes tape */
2918012Smckusick #define LAG 2			/* Write behind by LAG tape blocks (rdump) */
3018012Smckusick int slavefd[SLAVES];		/* Pipes from master to each slave */
3118012Smckusick int rotor;			/* Current slave number */
3218012Smckusick int master;			/* Pid of master, for sending error signals */
3318012Smckusick int trace = 0;			/* Protocol trace; easily patchable with adb */
3418012Smckusick #define  tmsg	if (trace) msg
3518012Smckusick 
3618012Smckusick #ifdef RDUMP
3718012Smckusick extern int rmtape;
3818012Smckusick #endif
3918012Smckusick 
4018012Smckusick /*
4118012Smckusick  * Allocate tape buffer contiguous with the array of instruction packets,
4218012Smckusick  * so they can be written with a single write call in flusht().
4318012Smckusick  */
4410911Ssam alloctape()
4510911Ssam {
4610911Ssam 
4710911Ssam 	writesize = ntrec * TP_BSIZE;
4818012Smckusick 	reqsiz = ntrec * sizeof(struct req);
4918012Smckusick 	req = (struct req *)malloc(reqsiz+writesize);	/* array of packets */
5018012Smckusick 	tblock = (char (*)[TP_BSIZE]) &req[ntrec];	/* Tape buffer */
5118012Smckusick 	return (req != NULL);
5210911Ssam }
5310911Ssam 
5418012Smckusick /*
5518012Smckusick  * Send special record to be put on tape
5618012Smckusick  */
571425Sroot taprec(dp)
585329Smckusic 	char *dp;
591425Sroot {
601425Sroot 
6118012Smckusick 	tmsg("taprec %d\n", trecno);
6218012Smckusick 	req[trecno].dblk = (daddr_t)0;
6318012Smckusick 	req[trecno].count = 1;
6418012Smckusick 	*(union u_spcl *)(*tblock++) = *(union u_spcl *)dp;
651425Sroot 	spcl.c_tapea++;
6618012Smckusick 	if (++trecno >= ntrec)
671425Sroot 		flusht();
681425Sroot }
691425Sroot 
704774Smckusic dmpblk(blkno, size)
714774Smckusic 	daddr_t blkno;
724774Smckusic 	int size;
731425Sroot {
7418012Smckusick 	int tpblks, dblkno;
7518012Smckusick 	register int avail;
761425Sroot 
775329Smckusic 	if (size % TP_BSIZE != 0)
784774Smckusic 		msg("bad size to dmpblk: %d\n", size);
795329Smckusic 	dblkno = fsbtodb(sblock, blkno);
8018012Smckusick 	tpblks = size / TP_BSIZE;
8118012Smckusick 	while ((avail = MIN(tpblks, ntrec - trecno)) > 0) {
8218012Smckusick 		tmsg("dmpblk %d\n", avail);
8318012Smckusick 		req[trecno].dblk = dblkno;
8418012Smckusick 		req[trecno].count = avail;
854774Smckusic 		trecno += avail;
864774Smckusic 		spcl.c_tapea += avail;
8718012Smckusick 		if (trecno >= ntrec)
8818012Smckusick 			flusht();
895329Smckusic 		dblkno += avail * (TP_BSIZE / DEV_BSIZE);
905329Smckusic 		tpblks -= avail;
914774Smckusic 	}
921425Sroot }
931425Sroot 
941425Sroot int	nogripe = 0;
951425Sroot 
9618012Smckusick tperror() {
9718012Smckusick 	if (pipeout) {
9818012Smckusick 		msg("Tape write error on %s\n", tape);
9918012Smckusick 		msg("Cannot recover\n");
10018012Smckusick 		dumpabort();
10118012Smckusick 		/* NOTREACHED */
10218012Smckusick 	}
10318012Smckusick 	msg("Tape write error on tape %d\n", tapeno);
10418012Smckusick 	broadcast("TAPE ERROR!\n");
10518012Smckusick 	if (!query("Do you want to restart?"))
10618012Smckusick 		dumpabort();
10718012Smckusick 	msg("This tape will rewind.  After it is rewound,\n");
10818012Smckusick 	msg("replace the faulty tape with a new one;\n");
10918012Smckusick 	msg("this dump volume will be rewritten.\n");
11018012Smckusick 	nogripe = 1;
11118012Smckusick 	close_rewind();
11218012Smckusick 	Exit(X_REWRITE);
11318012Smckusick }
11418012Smckusick 
11518012Smckusick senderr()
11618012Smckusick {
11718012Smckusick 
118*19982Smckusick 	perror("  DUMP: pipe error in command to slave");
11918012Smckusick 	dumpabort();
12018012Smckusick }
12118012Smckusick 
12218012Smckusick #ifdef RDUMP
12318012Smckusick tflush(cnt)
12418012Smckusick 	int cnt;
12518012Smckusick {
12618012Smckusick 	int i;
12718012Smckusick 
12818012Smckusick 	for (i = 0; i < ntrec; i++)
12918012Smckusick 		spclrec();
13018012Smckusick }
13118012Smckusick #endif RDUMP
13218012Smckusick 
1331425Sroot flusht()
1341425Sroot {
13518012Smckusick 	int sig, siz = (char *)tblock - (char *)req;
1361425Sroot 
13718012Smckusick 	tmsg("flusht %d\n", siz);
13818012Smckusick 	sig = sigblock(1<<SIGINT-1 | 1<<SIGIOT-1);  /* Don't interrupt write */
13918012Smckusick 	if (write(slavefd[rotor], req, siz) != siz)
14018012Smckusick 		senderr();
14118012Smckusick 	sigsetmask(sig);
14218012Smckusick 	if (++rotor >= SLAVES) rotor = 0;
14318012Smckusick 	tblock = (char (*)[TP_BSIZE]) &req[ntrec];
1441425Sroot 	trecno = 0;
14510911Ssam 	asize += writesize/density;
14618012Smckusick 	asize += 7;			/* inter-record gap (why fixed?) */
14710911Ssam 	blockswritten += ntrec;
14812331Smckusick 	if (!pipeout && asize > tsize) {
1491425Sroot 		close_rewind();
1501425Sroot 		otape();
1511425Sroot 	}
1521425Sroot 	timeest();
1531425Sroot }
1541425Sroot 
1551425Sroot rewind()
1561425Sroot {
15718012Smckusick 	register int f;
15812331Smckusick 
15912331Smckusick 	if (pipeout)
16012331Smckusick 		return;
16118012Smckusick 	for (f = 0; f < SLAVES; f++)
16218012Smckusick 		close(slavefd[f]);
16318012Smckusick 	while (wait(NULL) >= 0)    ;	/* wait for any signals from slaves */
16418012Smckusick 	msg("Tape rewinding\n");
16518012Smckusick #ifdef RDUMP
16618012Smckusick 	rmtclose();
16718012Smckusick 	while (rmtopen(tape, 0) < 0)
16818012Smckusick 		sleep(10);
16918012Smckusick 	rmtclose();
1701425Sroot #else
1713214Swnj 	close(to);
1723214Swnj 	while ((f = open(tape, 0)) < 0)
1733214Swnj 		sleep (10);
1743214Swnj 	close(f);
1751425Sroot #endif
1761425Sroot }
1771425Sroot 
1781425Sroot close_rewind()
1791425Sroot {
18018012Smckusick 	rewind();
18118012Smckusick 	if (!nogripe) {
1821425Sroot 		msg("Change Tapes: Mount tape #%d\n", tapeno+1);
1831425Sroot 		broadcast("CHANGE TAPES!\7\7\n");
1841425Sroot 	}
18518012Smckusick 	while (!query("Is the new tape mounted and ready to go?"))
18618012Smckusick 		if (query("Do you want to abort?"))
1871425Sroot 			dumpabort();
1881425Sroot }
1891425Sroot 
1901425Sroot /*
19118012Smckusick  *	We implement taking and restoring checkpoints on the tape level.
1921425Sroot  *	When each tape is opened, a new process is created by forking; this
1931425Sroot  *	saves all of the necessary context in the parent.  The child
1941425Sroot  *	continues the dump; the parent waits around, saving the context.
1951425Sroot  *	If the child returns X_REWRITE, then it had problems writing that tape;
1961425Sroot  *	this causes the parent to fork again, duplicating the context, and
1971425Sroot  *	everything continues as if nothing had happened.
1981425Sroot  */
1991425Sroot 
2001425Sroot otape()
2011425Sroot {
2021425Sroot 	int	parentpid;
2031425Sroot 	int	childpid;
2041425Sroot 	int	status;
2051425Sroot 	int	waitpid;
2061425Sroot 	int	interrupt();
2071425Sroot 
2081425Sroot 	parentpid = getpid();
2091425Sroot 
2101425Sroot     restore_check_point:
2111425Sroot 	signal(SIGINT, interrupt);
2121425Sroot 	/*
2131425Sroot 	 *	All signals are inherited...
2141425Sroot 	 */
2151425Sroot 	childpid = fork();
21618012Smckusick 	if (childpid < 0) {
2171425Sroot 		msg("Context save fork fails in parent %d\n", parentpid);
2181425Sroot 		Exit(X_ABORT);
2191425Sroot 	}
22018012Smckusick 	if (childpid != 0) {
2211425Sroot 		/*
2221425Sroot 		 *	PARENT:
2231425Sroot 		 *	save the context by waiting
2241425Sroot 		 *	until the child doing all of the work returns.
22518012Smckusick 		 *	don't catch the interrupt
2261425Sroot 		 */
2271425Sroot 		signal(SIGINT, SIG_IGN);
2281425Sroot #ifdef TDEBUG
2291425Sroot 		msg("Tape: %d; parent process: %d child process %d\n",
2301425Sroot 			tapeno+1, parentpid, childpid);
2311425Sroot #endif TDEBUG
23218012Smckusick 		while ((waitpid = wait(&status)) != childpid)
23318012Smckusick 			msg("Parent %d waiting for child %d has another child %d return\n",
23418012Smckusick 				parentpid, childpid, waitpid);
23518012Smckusick 		if (status & 0xFF) {
2361425Sroot 			msg("Child %d returns LOB status %o\n",
2371425Sroot 				childpid, status&0xFF);
2381425Sroot 		}
2391425Sroot 		status = (status >> 8) & 0xFF;
2401425Sroot #ifdef TDEBUG
24118012Smckusick 		switch(status) {
2421425Sroot 			case X_FINOK:
2431425Sroot 				msg("Child %d finishes X_FINOK\n", childpid);
2441425Sroot 				break;
2451425Sroot 			case X_ABORT:
2461425Sroot 				msg("Child %d finishes X_ABORT\n", childpid);
2471425Sroot 				break;
2481425Sroot 			case X_REWRITE:
2491425Sroot 				msg("Child %d finishes X_REWRITE\n", childpid);
2501425Sroot 				break;
2511425Sroot 			default:
25218012Smckusick 				msg("Child %d finishes unknown %d\n",
25318012Smckusick 				    childpid, status);
2541425Sroot 				break;
2551425Sroot 		}
2561425Sroot #endif TDEBUG
25718012Smckusick 		switch(status) {
2581425Sroot 			case X_FINOK:
2591425Sroot 				Exit(X_FINOK);
2601425Sroot 			case X_ABORT:
2611425Sroot 				Exit(X_ABORT);
2621425Sroot 			case X_REWRITE:
2631425Sroot 				goto restore_check_point;
2641425Sroot 			default:
2651425Sroot 				msg("Bad return code from dump: %d\n", status);
2661425Sroot 				Exit(X_ABORT);
2671425Sroot 		}
2681425Sroot 		/*NOTREACHED*/
2691425Sroot 	} else {	/* we are the child; just continue */
2701425Sroot #ifdef TDEBUG
2711425Sroot 		sleep(4);	/* allow time for parent's message to get out */
2721425Sroot 		msg("Child on Tape %d has parent %d, my pid = %d\n",
2731425Sroot 			tapeno+1, parentpid, getpid());
2741425Sroot #endif
27518012Smckusick #ifdef RDUMP
27618012Smckusick 		while ((to = rmtopen(tape, 2)) < 0)
27718012Smckusick #else
27818012Smckusick 		while ((to = pipeout ? 1 : creat(tape, 0666)) < 0)
27918012Smckusick #endif
28018012Smckusick 			if (!query("Cannot open tape.  Do you want to retry the open?"))
28118012Smckusick 				dumpabort();
2821425Sroot 
28318012Smckusick 		enslave();  /* Share open tape file descriptor with slaves */
28418012Smckusick 
2851425Sroot 		asize = 0;
2861425Sroot 		tapeno++;		/* current tape sequence */
2871425Sroot 		newtape++;		/* new tape signal */
2881425Sroot 		spcl.c_volume++;
2891425Sroot 		spcl.c_type = TS_TAPE;
2901425Sroot 		spclrec();
2911425Sroot 		if (tapeno > 1)
2921425Sroot 			msg("Tape %d begins with blocks from ino %d\n",
2931425Sroot 				tapeno, ino);
2941425Sroot 	}
2951425Sroot }
2961425Sroot 
2971425Sroot dumpabort()
2981425Sroot {
29918012Smckusick 	if (master != 0 && master != getpid())
30018012Smckusick 		kill(master, SIGIOT);
3011925Swnj 	msg("The ENTIRE dump is aborted.\n");
3021425Sroot 	Exit(X_ABORT);
3031425Sroot }
3041425Sroot 
3051425Sroot Exit(status)
3061425Sroot {
3071425Sroot #ifdef TDEBUG
3081425Sroot 	msg("pid = %d exits with status %d\n", getpid(), status);
3091425Sroot #endif TDEBUG
3101925Swnj 	exit(status);
3111425Sroot }
31218012Smckusick 
31318012Smckusick #define OK 020
31418012Smckusick char tok = OK;
31518012Smckusick 
31618012Smckusick enslave()
31718012Smckusick {
31818012Smckusick 	int prev[2], next[2], cmd[2];	/* file descriptors for pipes */
319*19982Smckusick 	int i, j, ret, slavepid;
32018012Smckusick 
32118012Smckusick 	master = getpid();
32218012Smckusick 	signal(SIGPIPE, dumpabort);
32318012Smckusick 	signal(SIGIOT, tperror); /* SIGIOT asks for restart from checkpoint */
32418012Smckusick 	pipe(prev);
32518012Smckusick 	for (i = rotor = 0; i < SLAVES; ++i) {
32618012Smckusick 		if ((i < SLAVES - 1 && pipe(next) < 0) || pipe(cmd) < 0
32718012Smckusick 				|| (slavepid = fork()) < 0) {
32818012Smckusick 			perror("  DUMP: too many slaves");
32918012Smckusick 			dumpabort();
33018012Smckusick 		}
33118012Smckusick 		if (i >= SLAVES - 1)
33218012Smckusick 			next[1] = prev[1];	    /* Last slave loops back */
33318012Smckusick 		slavefd[i] = cmd[1];
33418012Smckusick 		if (slavepid == 0) {		    /* Slave starts up here */
33518012Smckusick 			for (j = 0; j <= i; j++)
33618012Smckusick 				close(slavefd[j]);
33718012Smckusick 			if (i < SLAVES - 1) {
33818012Smckusick 				close(prev[1]);
33918012Smckusick 				close(next[0]);
34018012Smckusick 			} else {		    /* Insert initial token */
341*19982Smckusick 				if ((ret = write(next[1], &tok, 1)) != 1)
342*19982Smckusick 					ringerr(ret, "cannot start token");
34318012Smckusick 			}
34418012Smckusick 			doslave(i, cmd[0], prev[0], next[1]);
34518012Smckusick 			close(next[1]);
34618012Smckusick 			j = read(prev[0], &tok, 1);   /* Eat the final token */
34718012Smckusick #ifdef RDUMP				    /* Read remaining acknowledges */
34818012Smckusick 			for (; j > 0 && (tok &~ OK) > 0; tok--) {
34918012Smckusick 				if (rmtwrite2() != writesize && (tok & OK)) {
35018012Smckusick 					kill(master, SIGIOT);
35118012Smckusick 					tok &= ~OK;
35218012Smckusick 				}
35318012Smckusick 			}
35418012Smckusick #endif
35518012Smckusick 			Exit(X_FINOK);
35618012Smckusick 		}
35718012Smckusick 		close(cmd[0]);
35818012Smckusick 		close(next[1]);
35918012Smckusick 		close(prev[0]);
36018012Smckusick 		prev[0] = next[0];
36118012Smckusick 	}
36218012Smckusick 	master = 0;
36318012Smckusick }
36418012Smckusick 
36518012Smckusick /*
36618012Smckusick  * Somebody must have died, should never happen
36718012Smckusick  */
368*19982Smckusick ringerr(code, msg, a1, a2)
369*19982Smckusick 	int code;
370*19982Smckusick 	char *msg;
371*19982Smckusick 	int a1, a2;
37218012Smckusick {
373*19982Smckusick 	char buf[BUFSIZ];
374*19982Smckusick 
375*19982Smckusick 	fprintf(stderr, "  DUMP: ");
376*19982Smckusick 	sprintf(buf, msg, a1, a2);
377*19982Smckusick 	if (code < 0)
378*19982Smckusick 		perror(msg);
379*19982Smckusick 	else if (code == 0)
380*19982Smckusick 		fprintf(stderr, "%s: unexpected EOF\n", buf);
381*19982Smckusick 	else
382*19982Smckusick 		fprintf(stderr, "%s: code %d\n", buf, code);
38318012Smckusick 	kill(master, SIGPIPE);
38418012Smckusick 	Exit(X_ABORT);
38518012Smckusick }
38618012Smckusick 
387*19982Smckusick int childnum;
388*19982Smckusick sigpipe()
389*19982Smckusick {
390*19982Smckusick 
391*19982Smckusick 	ringerr(childnum, "SIGPIPE raised");
392*19982Smckusick }
393*19982Smckusick 
39418012Smckusick doslave(num, cmd, prev, next)
39518012Smckusick 	int num, cmd, prev, next;
39618012Smckusick {
397*19982Smckusick 	int ret;
398*19982Smckusick 
39918012Smckusick 	tmsg("slave %d\n", num);
40018012Smckusick 	signal(SIGINT, SIG_IGN); 		/* Master handles it */
40118012Smckusick 	signal(SIGTERM, SIG_IGN);
402*19982Smckusick 	signal(SIGPIPE, sigpipe);
403*19982Smckusick 	childnum = num;
40418012Smckusick 	close(fi);
40518012Smckusick 	if ((fi = open(disk, 0)) < 0) {		/* Need our own seek pointer */
40618012Smckusick 		perror("  DUMP: can't reopen disk");
40718012Smckusick 		kill(master, SIGPIPE);
40818012Smckusick 		Exit(X_ABORT);
40918012Smckusick 	}
410*19982Smckusick 	while ((ret = readpipe(cmd, req, reqsiz)) == reqsiz) {
41118012Smckusick 		register struct req *p = req;
41218012Smckusick 		for (trecno = 0; trecno < ntrec; trecno += p->count, p += p->count) {
41318012Smckusick 			if (p->dblk) {
41418012Smckusick 				tmsg("%d READS %d\n", num, p->count);
41518012Smckusick 				bread(p->dblk, tblock[trecno],
41618012Smckusick 				    p->count * TP_BSIZE);
41718012Smckusick 			} else {
41818012Smckusick 				tmsg("%d PIPEIN %d\n", num, p->count);
41918012Smckusick 				if (p->count != 1)
420*19982Smckusick 					ringerr(11, "%d PIPEIN %d", num,
421*19982Smckusick 						p->count);
42219947Smckusick 				if (readpipe(cmd, tblock[trecno], TP_BSIZE) != TP_BSIZE)
42318012Smckusick 					senderr();
42418012Smckusick 			}
42518012Smckusick 		}
426*19982Smckusick 		if ((ret = read(prev, &tok, 1)) != 1)
427*19982Smckusick 			ringerr(ret, "read token");	/* Wait your turn */
42818012Smckusick 		tmsg("%d WRITE\n", num);
42918012Smckusick #ifdef RDUMP
43018012Smckusick 		if (tok & OK) {
43118012Smckusick 			rmtwrite0(writesize);
43218012Smckusick 			rmtwrite1(tblock[0], writesize);
43318012Smckusick 			tok++;		/* Number of writes in progress */
43418012Smckusick 		}
43518012Smckusick 		if (tok > (LAG|OK) && (--tok, rmtwrite2() != writesize)) {
43618012Smckusick #else
43718012Smckusick 		if ((tok & OK) &&
43818012Smckusick 		    write(to, tblock[0], writesize) != writesize) {
43918012Smckusick 			perror(tape);
44018012Smckusick #endif
44118012Smckusick 			kill(master, SIGIOT);	/* restart from checkpoint */
44218012Smckusick 			tok &= ~OK;
44318012Smckusick 		}
444*19982Smckusick 		if ((ret = write(next, &tok, 1)) != 1)
445*19982Smckusick 			ringerr(ret, "write token"); /* Next slave's turn */
44618012Smckusick 	}
447*19982Smckusick 	if (ret != 0)
448*19982Smckusick 		ringerr(ret, "partial record?");
44918012Smckusick 	tmsg("%d CLOSE\n", num);
45018012Smckusick }
45119947Smckusick 
45219947Smckusick /*
45319947Smckusick  * Since a read from a pipe may not return all we asked for
45419947Smckusick  * we must loop until we get all we need
45519947Smckusick  */
45619947Smckusick readpipe(fd, buf, cnt)
45719947Smckusick 	int fd;
45819947Smckusick 	char *buf;
45919947Smckusick 	int cnt;
46019947Smckusick {
46119947Smckusick 	int rd, got;
46219947Smckusick 
46319947Smckusick 	for (rd = cnt; rd > 0; rd -= got) {
46419947Smckusick 		got = read(fd, buf, rd);
46519947Smckusick 		if (got < 0)
46619947Smckusick 			return (got);
46719947Smckusick 		if (got == 0)
46819947Smckusick 			return (cnt - rd);
46919947Smckusick 		buf += got;
47019947Smckusick 	}
47119947Smckusick 	return (cnt);
47219947Smckusick }
473