xref: /csrg-svn/sbin/dump/tape.c (revision 30560)
122040Sdist /*
222040Sdist  * Copyright (c) 1980 Regents of the University of California.
322040Sdist  * All rights reserved.  The Berkeley software License Agreement
422040Sdist  * specifies the terms and conditions for redistribution.
522040Sdist  */
622040Sdist 
717527Ssam #ifndef lint
8*30560Smckusick static char sccsid[] = "@(#)tape.c	5.8 (Berkeley) 02/23/87";
922040Sdist #endif not lint
1017527Ssam 
1125219Smckusick #include <sys/file.h>
121425Sroot #include "dump.h"
131425Sroot 
1429899Smckusick char	(*tblock)[TP_BSIZE];	/* pointer to malloc()ed buffer for tape */
1529899Smckusick int	writesize;		/* size of malloc()ed buffer for tape */
1629899Smckusick long	lastspclrec = -1;	/* tape block number of last written header */
1729899Smckusick int	trecno = 0;		/* next record to write in current block */
1825219Smckusick extern int ntrec;		/* blocking factor on tape */
1925219Smckusick extern int cartridge;
2025219Smckusick extern int read(), write();
2125219Smckusick #ifdef RDUMP
2225219Smckusick extern char *host;
2325219Smckusick #endif RDUMP
241425Sroot 
2510911Ssam /*
2624181Smckusick  * Concurrent dump mods (Caltech) - disk block reading and tape writing
2718012Smckusick  * are exported to several slave processes.  While one slave writes the
2818012Smckusick  * tape, the others read disk blocks; they pass control of the tape in
2924181Smckusick  * a ring via flock().	The parent process traverses the filesystem and
3025219Smckusick  * sends spclrec()'s and lists of daddr's to the slaves via pipes.
3110911Ssam  */
3218012Smckusick struct req {			/* instruction packets sent to slaves */
3318012Smckusick 	daddr_t dblk;
3418012Smckusick 	int count;
3518012Smckusick } *req;
3618012Smckusick int reqsiz;
3718012Smckusick 
3824181Smckusick #define SLAVES 3		/* 1 slave writing, 1 reading, 1 for slack */
3925219Smckusick int slavefd[SLAVES];		/* pipes from master to each slave */
4025219Smckusick int slavepid[SLAVES];		/* used by killall() */
4125219Smckusick int rotor;			/* next slave to be instructed */
4225219Smckusick int master;			/* pid of master, for sending error signals */
4325219Smckusick int tenths;			/* length of tape used per block written */
4418012Smckusick 
4510911Ssam alloctape()
4610911Ssam {
4725219Smckusick 	int pgoff = getpagesize() - 1;
4810911Ssam 
4910911Ssam 	writesize = ntrec * TP_BSIZE;
5025219Smckusick 	reqsiz = ntrec * sizeof(struct req);
5124181Smckusick 	/*
5225219Smckusick 	 * CDC 92181's and 92185's make 0.8" gaps in 1600-bpi start/stop mode
5325219Smckusick 	 * (see DEC TU80 User's Guide).  The shorter gaps of 6250-bpi require
5425219Smckusick 	 * repositioning after stopping, i.e, streaming mode, where the gap is
5525219Smckusick 	 * variable, 0.30" to 0.45".  The gap is maximal when the tape stops.
5624181Smckusick 	 */
5725219Smckusick 	tenths = writesize/density + (cartridge ? 16 : density == 625 ? 5 : 8);
5825219Smckusick 	/*
5925219Smckusick 	 * Allocate tape buffer contiguous with the array of instruction
6025219Smckusick 	 * packets, so flusht() can write them together with one write().
6125219Smckusick 	 * Align tape buffer on page boundary to speed up tape write().
6225219Smckusick 	 */
6324181Smckusick 	req = (struct req *)malloc(reqsiz + writesize + pgoff);
6424181Smckusick 	if (req == NULL)
6524181Smckusick 		return(0);
6624181Smckusick 	tblock = (char (*)[TP_BSIZE]) (((long)&req[ntrec] + pgoff) &~ pgoff);
6725219Smckusick 	req = (struct req *)tblock - ntrec;
6824181Smckusick 	return(1);
6910911Ssam }
7010911Ssam 
7125219Smckusick 
721425Sroot taprec(dp)
735329Smckusic 	char *dp;
741425Sroot {
7518012Smckusick 	req[trecno].dblk = (daddr_t)0;
7618012Smckusick 	req[trecno].count = 1;
7724181Smckusick 	*(union u_spcl *)(*tblock++) = *(union u_spcl *)dp;	/* movc3 */
7829899Smckusick 	lastspclrec = spcl.c_tapea;
7924181Smckusick 	trecno++;
801425Sroot 	spcl.c_tapea++;
8124181Smckusick 	if(trecno >= ntrec)
821425Sroot 		flusht();
831425Sroot }
841425Sroot 
854774Smckusic dmpblk(blkno, size)
864774Smckusic 	daddr_t blkno;
874774Smckusic 	int size;
881425Sroot {
8925219Smckusick 	int avail, tpblks, dblkno;
901425Sroot 
915329Smckusic 	dblkno = fsbtodb(sblock, blkno);
9218012Smckusick 	tpblks = size / TP_BSIZE;
9318012Smckusick 	while ((avail = MIN(tpblks, ntrec - trecno)) > 0) {
9418012Smckusick 		req[trecno].dblk = dblkno;
9518012Smckusick 		req[trecno].count = avail;
9625219Smckusick 		trecno += avail;
974774Smckusic 		spcl.c_tapea += avail;
9825219Smckusick 		if (trecno >= ntrec)
9918012Smckusick 			flusht();
100*30560Smckusick 		dblkno += avail * (TP_BSIZE / dev_bsize);
1015329Smckusic 		tpblks -= avail;
1024774Smckusic 	}
1031425Sroot }
1041425Sroot 
1051425Sroot int	nogripe = 0;
1061425Sroot 
10718012Smckusick tperror() {
10818012Smckusick 	if (pipeout) {
10918012Smckusick 		msg("Tape write error on %s\n", tape);
11018012Smckusick 		msg("Cannot recover\n");
11118012Smckusick 		dumpabort();
11218012Smckusick 		/* NOTREACHED */
11318012Smckusick 	}
11425219Smckusick 	msg("Tape write error %d feet into tape %d\n", asize/120L, tapeno);
11518012Smckusick 	broadcast("TAPE ERROR!\n");
11618012Smckusick 	if (!query("Do you want to restart?"))
11718012Smckusick 		dumpabort();
11818012Smckusick 	msg("This tape will rewind.  After it is rewound,\n");
11918012Smckusick 	msg("replace the faulty tape with a new one;\n");
12018012Smckusick 	msg("this dump volume will be rewritten.\n");
12124181Smckusick 	killall();
12218012Smckusick 	nogripe = 1;
12318012Smckusick 	close_rewind();
12418012Smckusick 	Exit(X_REWRITE);
12518012Smckusick }
12618012Smckusick 
12725219Smckusick sigpipe()
12825219Smckusick {
12925219Smckusick 
13025219Smckusick 	msg("Broken pipe\n");
13125219Smckusick 	dumpabort();
13225219Smckusick }
13325219Smckusick 
13418012Smckusick #ifdef RDUMP
13525219Smckusick /*
13625219Smckusick  * compatibility routine
13725219Smckusick  */
13825219Smckusick tflush(i)
13925219Smckusick 	int i;
14018012Smckusick {
14118012Smckusick 
14218012Smckusick 	for (i = 0; i < ntrec; i++)
14318012Smckusick 		spclrec();
14418012Smckusick }
14518012Smckusick #endif RDUMP
14618012Smckusick 
1471425Sroot flusht()
1481425Sroot {
14925219Smckusick 	int siz = (char *)tblock - (char *)req;
1501425Sroot 
15125219Smckusick 	if (atomic(write, slavefd[rotor], req, siz) != siz) {
15225219Smckusick 		perror("  DUMP: error writing command pipe");
15324181Smckusick 		dumpabort();
15424181Smckusick 	}
15518012Smckusick 	if (++rotor >= SLAVES) rotor = 0;
15618012Smckusick 	tblock = (char (*)[TP_BSIZE]) &req[ntrec];
1571425Sroot 	trecno = 0;
15824181Smckusick 	asize += tenths;
15910911Ssam 	blockswritten += ntrec;
16012331Smckusick 	if (!pipeout && asize > tsize) {
1611425Sroot 		close_rewind();
1621425Sroot 		otape();
1631425Sroot 	}
1641425Sroot 	timeest();
1651425Sroot }
1661425Sroot 
1671425Sroot rewind()
1681425Sroot {
16924181Smckusick 	int f;
17012331Smckusick 
17112331Smckusick 	if (pipeout)
17212331Smckusick 		return;
17318012Smckusick 	for (f = 0; f < SLAVES; f++)
17418012Smckusick 		close(slavefd[f]);
17518012Smckusick 	while (wait(NULL) >= 0)    ;	/* wait for any signals from slaves */
17618012Smckusick 	msg("Tape rewinding\n");
17718012Smckusick #ifdef RDUMP
17825219Smckusick 	if (host) {
17925219Smckusick 		rmtclose();
18025219Smckusick 		while (rmtopen(tape, 0) < 0)
18125219Smckusick 			sleep(10);
18225219Smckusick 		rmtclose();
18325219Smckusick 		return;
18425219Smckusick 	}
18525219Smckusick #endif RDUMP
1863214Swnj 	close(to);
1873214Swnj 	while ((f = open(tape, 0)) < 0)
1883214Swnj 		sleep (10);
1893214Swnj 	close(f);
1901425Sroot }
1911425Sroot 
1921425Sroot close_rewind()
1931425Sroot {
19418012Smckusick 	rewind();
19518012Smckusick 	if (!nogripe) {
1961425Sroot 		msg("Change Tapes: Mount tape #%d\n", tapeno+1);
1971425Sroot 		broadcast("CHANGE TAPES!\7\7\n");
1981425Sroot 	}
19918012Smckusick 	while (!query("Is the new tape mounted and ready to go?"))
20025219Smckusick 		if (query("Do you want to abort?")) {
2011425Sroot 			dumpabort();
20225219Smckusick 			/*NOTREACHED*/
20325219Smckusick 		}
2041425Sroot }
2051425Sroot 
2061425Sroot /*
20718012Smckusick  *	We implement taking and restoring checkpoints on the tape level.
2081425Sroot  *	When each tape is opened, a new process is created by forking; this
2091425Sroot  *	saves all of the necessary context in the parent.  The child
2101425Sroot  *	continues the dump; the parent waits around, saving the context.
2111425Sroot  *	If the child returns X_REWRITE, then it had problems writing that tape;
2121425Sroot  *	this causes the parent to fork again, duplicating the context, and
2131425Sroot  *	everything continues as if nothing had happened.
2141425Sroot  */
2151425Sroot 
2161425Sroot otape()
2171425Sroot {
2181425Sroot 	int	parentpid;
2191425Sroot 	int	childpid;
2201425Sroot 	int	status;
2211425Sroot 	int	waitpid;
22225219Smckusick 	int	(*interrupt)() = signal(SIGINT, SIG_IGN);
22329899Smckusick 	int	blks, i;
2241425Sroot 
2251425Sroot 	parentpid = getpid();
2261425Sroot 
2271425Sroot     restore_check_point:
22825219Smckusick 	signal(SIGINT, interrupt);
22925219Smckusick 	/*
23025219Smckusick 	 *	All signals are inherited...
23125219Smckusick 	 */
2321425Sroot 	childpid = fork();
23318012Smckusick 	if (childpid < 0) {
2341425Sroot 		msg("Context save fork fails in parent %d\n", parentpid);
2351425Sroot 		Exit(X_ABORT);
2361425Sroot 	}
23718012Smckusick 	if (childpid != 0) {
2381425Sroot 		/*
2391425Sroot 		 *	PARENT:
2401425Sroot 		 *	save the context by waiting
2411425Sroot 		 *	until the child doing all of the work returns.
24218012Smckusick 		 *	don't catch the interrupt
2431425Sroot 		 */
24425219Smckusick 		signal(SIGINT, SIG_IGN);
2451425Sroot #ifdef TDEBUG
2461425Sroot 		msg("Tape: %d; parent process: %d child process %d\n",
2471425Sroot 			tapeno+1, parentpid, childpid);
2481425Sroot #endif TDEBUG
24918012Smckusick 		while ((waitpid = wait(&status)) != childpid)
25018012Smckusick 			msg("Parent %d waiting for child %d has another child %d return\n",
25118012Smckusick 				parentpid, childpid, waitpid);
25218012Smckusick 		if (status & 0xFF) {
2531425Sroot 			msg("Child %d returns LOB status %o\n",
2541425Sroot 				childpid, status&0xFF);
2551425Sroot 		}
2561425Sroot 		status = (status >> 8) & 0xFF;
2571425Sroot #ifdef TDEBUG
25818012Smckusick 		switch(status) {
2591425Sroot 			case X_FINOK:
2601425Sroot 				msg("Child %d finishes X_FINOK\n", childpid);
2611425Sroot 				break;
2621425Sroot 			case X_ABORT:
2631425Sroot 				msg("Child %d finishes X_ABORT\n", childpid);
2641425Sroot 				break;
2651425Sroot 			case X_REWRITE:
2661425Sroot 				msg("Child %d finishes X_REWRITE\n", childpid);
2671425Sroot 				break;
2681425Sroot 			default:
26918012Smckusick 				msg("Child %d finishes unknown %d\n",
27025219Smckusick 					childpid, status);
2711425Sroot 				break;
2721425Sroot 		}
2731425Sroot #endif TDEBUG
27418012Smckusick 		switch(status) {
2751425Sroot 			case X_FINOK:
2761425Sroot 				Exit(X_FINOK);
2771425Sroot 			case X_ABORT:
2781425Sroot 				Exit(X_ABORT);
2791425Sroot 			case X_REWRITE:
2801425Sroot 				goto restore_check_point;
2811425Sroot 			default:
2821425Sroot 				msg("Bad return code from dump: %d\n", status);
2831425Sroot 				Exit(X_ABORT);
2841425Sroot 		}
2851425Sroot 		/*NOTREACHED*/
2861425Sroot 	} else {	/* we are the child; just continue */
2871425Sroot #ifdef TDEBUG
2881425Sroot 		sleep(4);	/* allow time for parent's message to get out */
2891425Sroot 		msg("Child on Tape %d has parent %d, my pid = %d\n",
2901425Sroot 			tapeno+1, parentpid, getpid());
29125219Smckusick #endif TDEBUG
29218012Smckusick #ifdef RDUMP
29325219Smckusick 		while ((to = (host ? rmtopen(tape, 2) :
29425219Smckusick 			pipeout ? 1 : creat(tape, 0666))) < 0)
29525219Smckusick #else RDUMP
29618012Smckusick 		while ((to = pipeout ? 1 : creat(tape, 0666)) < 0)
29724181Smckusick #endif RDUMP
29818012Smckusick 			if (!query("Cannot open tape.  Do you want to retry the open?"))
29918012Smckusick 				dumpabort();
3001425Sroot 
30118012Smckusick 		enslave();  /* Share open tape file descriptor with slaves */
30218012Smckusick 
3031425Sroot 		asize = 0;
3041425Sroot 		tapeno++;		/* current tape sequence */
3051425Sroot 		newtape++;		/* new tape signal */
30629899Smckusick 		blks = 0;
30729899Smckusick 		if (spcl.c_type != TS_END)
30829899Smckusick 			for (i = 0; i < spcl.c_count; i++)
30929899Smckusick 				if (spcl.c_addr[i] != 0)
31029899Smckusick 					blks++;
31129899Smckusick 		spcl.c_count = blks + 1 - spcl.c_tapea + lastspclrec;
3121425Sroot 		spcl.c_volume++;
3131425Sroot 		spcl.c_type = TS_TAPE;
31430432Smckusick 		spcl.c_flags |= DR_NEWHEADER;
3151425Sroot 		spclrec();
31630432Smckusick 		spcl.c_flags &=~ DR_NEWHEADER;
3171425Sroot 		if (tapeno > 1)
3181425Sroot 			msg("Tape %d begins with blocks from ino %d\n",
3191425Sroot 				tapeno, ino);
3201425Sroot 	}
3211425Sroot }
3221425Sroot 
3231425Sroot dumpabort()
3241425Sroot {
32518012Smckusick 	if (master != 0 && master != getpid())
32625219Smckusick 		kill(master, SIGTERM);	/* Signals master to call dumpabort */
32724181Smckusick 	else {
32824181Smckusick 		killall();
32924181Smckusick 		msg("The ENTIRE dump is aborted.\n");
33024181Smckusick 	}
3311425Sroot 	Exit(X_ABORT);
3321425Sroot }
3331425Sroot 
3341425Sroot Exit(status)
3351425Sroot {
3361425Sroot #ifdef TDEBUG
3371425Sroot 	msg("pid = %d exits with status %d\n", getpid(), status);
3381425Sroot #endif TDEBUG
3391925Swnj 	exit(status);
3401425Sroot }
34118012Smckusick 
34224181Smckusick /*
34325219Smckusick  * could use pipe() for this if flock() worked on pipes
34424181Smckusick  */
34524181Smckusick lockfile(fd)
34624181Smckusick 	int fd[2];
34724181Smckusick {
34824181Smckusick 	char tmpname[20];
34918012Smckusick 
35024181Smckusick 	strcpy(tmpname, "/tmp/dumplockXXXXXX");
35124181Smckusick 	mktemp(tmpname);
35226485Smckusick 	if ((fd[1] = creat(tmpname, 0400)) < 0) {
35326485Smckusick 		msg("Could not create lockfile ");
35426485Smckusick 		perror(tmpname);
35526485Smckusick 		dumpabort();
35626485Smckusick 	}
35726485Smckusick 	if ((fd[0] = open(tmpname, 0)) < 0) {
35826485Smckusick 		msg("Could not reopen lockfile ");
35926485Smckusick 		perror(tmpname);
36026485Smckusick 		dumpabort();
36126485Smckusick 	}
36224181Smckusick 	unlink(tmpname);
36324181Smckusick }
36424181Smckusick 
36518012Smckusick enslave()
36618012Smckusick {
36724181Smckusick 	int first[2], prev[2], next[2], cmd[2];     /* file descriptors */
36824181Smckusick 	register int i, j;
36918012Smckusick 
37018012Smckusick 	master = getpid();
37125219Smckusick 	signal(SIGTERM, dumpabort); /* Slave sends SIGTERM on dumpabort() */
37225219Smckusick 	signal(SIGPIPE, sigpipe);
37325219Smckusick 	signal(SIGUSR1, tperror);    /* Slave sends SIGUSR1 on tape errors */
37424181Smckusick 	lockfile(first);
37524181Smckusick 	for (i = 0; i < SLAVES; i++) {
37624181Smckusick 		if (i == 0) {
37724181Smckusick 			prev[0] = first[1];
37824181Smckusick 			prev[1] = first[0];
37924181Smckusick 		} else {
38024181Smckusick 			prev[0] = next[0];
38124181Smckusick 			prev[1] = next[1];
38224181Smckusick 			flock(prev[1], LOCK_EX);
38324181Smckusick 		}
38426485Smckusick 		if (i < SLAVES - 1) {
38526485Smckusick 			lockfile(next);
38626485Smckusick 		} else {
38726485Smckusick 			next[0] = first[0];
38826485Smckusick 			next[1] = first[1];	    /* Last slave loops back */
38926485Smckusick 		}
39026485Smckusick 		if (pipe(cmd) < 0 || (slavepid[i] = fork()) < 0) {
39126485Smckusick 			msg("too many slaves, %d (recompile smaller) ", i);
39226485Smckusick 			perror("");
39318012Smckusick 			dumpabort();
39418012Smckusick 		}
39518012Smckusick 		slavefd[i] = cmd[1];
39625219Smckusick 		if (slavepid[i] == 0) { 	    /* Slave starts up here */
39718012Smckusick 			for (j = 0; j <= i; j++)
39818012Smckusick 				close(slavefd[j]);
39925219Smckusick 			signal(SIGINT, SIG_IGN);    /* Master handles this */
40025219Smckusick 			doslave(cmd[0], prev, next);
40118012Smckusick 			Exit(X_FINOK);
40218012Smckusick 		}
40318012Smckusick 		close(cmd[0]);
40424181Smckusick 		if (i > 0) {
40524181Smckusick 			close(prev[0]);
40624181Smckusick 			close(prev[1]);
40724181Smckusick 		}
40818012Smckusick 	}
40924181Smckusick 	close(first[0]);
41024181Smckusick 	close(first[1]);
41124181Smckusick 	master = 0; rotor = 0;
41218012Smckusick }
41318012Smckusick 
41424181Smckusick killall()
41518012Smckusick {
41624181Smckusick 	register int i;
41719982Smckusick 
41824181Smckusick 	for (i = 0; i < SLAVES; i++)
41924181Smckusick 		if (slavepid[i] > 0)
42024181Smckusick 			kill(slavepid[i], SIGKILL);
42118012Smckusick }
42218012Smckusick 
42324181Smckusick /*
42424181Smckusick  * Synchronization - each process has a lockfile, and shares file
42524181Smckusick  * descriptors to the following process's lockfile.  When our write
42624181Smckusick  * completes, we release our lock on the following process's lock-
42724181Smckusick  * file, allowing the following process to lock it and proceed. We
42824181Smckusick  * get the lock back for the next cycle by swapping descriptors.
42924181Smckusick  */
43025219Smckusick doslave(cmd, prev, next)
43125219Smckusick 	register int cmd, prev[2], next[2];
43219982Smckusick {
43325219Smckusick 	register int nread, toggle = 0;
43419982Smckusick 
43518012Smckusick 	close(fi);
43625219Smckusick 	if ((fi = open(disk, 0)) < 0) { 	/* Need our own seek pointer */
43724181Smckusick 		perror("  DUMP: slave couldn't reopen disk");
43825219Smckusick 		dumpabort();
43918012Smckusick 	}
44024181Smckusick 	/*
44125219Smckusick 	 * Get list of blocks to dump, read the blocks into tape buffer
44224181Smckusick 	 */
44325219Smckusick 	while ((nread = atomic(read, cmd, req, reqsiz)) == reqsiz) {
44418012Smckusick 		register struct req *p = req;
44518012Smckusick 		for (trecno = 0; trecno < ntrec; trecno += p->count, p += p->count) {
44618012Smckusick 			if (p->dblk) {
44718012Smckusick 				bread(p->dblk, tblock[trecno],
44825219Smckusick 					p->count * TP_BSIZE);
44918012Smckusick 			} else {
45025219Smckusick 				if (p->count != 1 || atomic(read, cmd,
45125219Smckusick 				    tblock[trecno], TP_BSIZE) != TP_BSIZE) {
45228644Smckusick 					msg("Master/slave protocol botched.\n");
45324181Smckusick 					dumpabort();
45424181Smckusick 				}
45518012Smckusick 			}
45618012Smckusick 		}
45724181Smckusick 		flock(prev[toggle], LOCK_EX);	/* Wait our turn */
45825219Smckusick 
45918012Smckusick #ifdef RDUMP
46025219Smckusick 		if ((host ? rmtwrite(tblock[0], writesize)
46125219Smckusick 			: write(to, tblock[0], writesize)) != writesize) {
46225219Smckusick #else RDUMP
46325219Smckusick 		if (write(to, tblock[0], writesize) != writesize) {
46424181Smckusick #endif RDUMP
46525219Smckusick 			kill(master, SIGUSR1);
46625219Smckusick 			for (;;)
46725219Smckusick 				sigpause(0);
46818012Smckusick 		}
46924181Smckusick 		toggle ^= 1;
47024181Smckusick 		flock(next[toggle], LOCK_UN);	/* Next slave's turn */
47124181Smckusick 	}					/* Also jolts him awake */
47225219Smckusick 	if (nread != 0) {
47325219Smckusick 		perror("  DUMP: error reading command pipe");
47425219Smckusick 		dumpabort();
47518012Smckusick 	}
47618012Smckusick }
47719947Smckusick 
47819947Smckusick /*
47925219Smckusick  * Since a read from a pipe may not return all we asked for,
48025219Smckusick  * or a write may not write all we ask if we get a signal,
48125219Smckusick  * loop until the count is satisfied (or error).
48219947Smckusick  */
48325219Smckusick atomic(func, fd, buf, count)
48425219Smckusick 	int (*func)(), fd, count;
48519947Smckusick 	char *buf;
48619947Smckusick {
48725219Smckusick 	int got, need = count;
48819947Smckusick 
48925219Smckusick 	while ((got = (*func)(fd, buf, need)) > 0 && (need -= got) > 0)
49019947Smckusick 		buf += got;
49125219Smckusick 	return (got < 0 ? got : count - need);
49219947Smckusick }
493