xref: /csrg-svn/sbin/dump/tape.c (revision 25219)
122040Sdist /*
222040Sdist  * Copyright (c) 1980 Regents of the University of California.
322040Sdist  * All rights reserved.  The Berkeley software License Agreement
422040Sdist  * specifies the terms and conditions for redistribution.
522040Sdist  */
622040Sdist 
717527Ssam #ifndef lint
8*25219Smckusick static char sccsid[] = "@(#)tape.c	5.3 (Berkeley) 10/16/85";
922040Sdist #endif not lint
1017527Ssam 
11*25219Smckusick #include <sys/file.h>
121425Sroot #include "dump.h"
131425Sroot 
1410911Ssam char	(*tblock)[TP_BSIZE];	/* Pointer to malloc()ed buffer for tape */
1510911Ssam int	writesize;		/* Size of malloc()ed buffer for tape */
164774Smckusic int	trecno = 0;
17*25219Smckusick extern int ntrec;		/* blocking factor on tape */
18*25219Smckusick extern int cartridge;
19*25219Smckusick extern int read(), write();
20*25219Smckusick #ifdef RDUMP
21*25219Smckusick extern char *host;
22*25219Smckusick #endif RDUMP
231425Sroot 
2410911Ssam /*
2524181Smckusick  * Concurrent dump mods (Caltech) - disk block reading and tape writing
2618012Smckusick  * are exported to several slave processes.  While one slave writes the
2718012Smckusick  * tape, the others read disk blocks; they pass control of the tape in
2824181Smckusick  * a ring via flock().	The parent process traverses the filesystem and
29*25219Smckusick  * sends spclrec()'s and lists of daddr's to the slaves via pipes.
3010911Ssam  */
3118012Smckusick struct req {			/* instruction packets sent to slaves */
3218012Smckusick 	daddr_t dblk;
3318012Smckusick 	int count;
3418012Smckusick } *req;
3518012Smckusick int reqsiz;
3618012Smckusick 
3724181Smckusick #define SLAVES 3		/* 1 slave writing, 1 reading, 1 for slack */
38*25219Smckusick int slavefd[SLAVES];		/* pipes from master to each slave */
39*25219Smckusick int slavepid[SLAVES];		/* used by killall() */
40*25219Smckusick int rotor;			/* next slave to be instructed */
41*25219Smckusick int master;			/* pid of master, for sending error signals */
42*25219Smckusick int tenths;			/* length of tape used per block written */
4318012Smckusick 
4410911Ssam alloctape()
4510911Ssam {
46*25219Smckusick 	int pgoff = getpagesize() - 1;
4710911Ssam 
4810911Ssam 	writesize = ntrec * TP_BSIZE;
49*25219Smckusick 	reqsiz = ntrec * sizeof(struct req);
5024181Smckusick 	/*
51*25219Smckusick 	 * CDC 92181's and 92185's make 0.8" gaps in 1600-bpi start/stop mode
52*25219Smckusick 	 * (see DEC TU80 User's Guide).  The shorter gaps of 6250-bpi require
53*25219Smckusick 	 * repositioning after stopping, i.e, streaming mode, where the gap is
54*25219Smckusick 	 * variable, 0.30" to 0.45".  The gap is maximal when the tape stops.
5524181Smckusick 	 */
56*25219Smckusick 	tenths = writesize/density + (cartridge ? 16 : density == 625 ? 5 : 8);
57*25219Smckusick 	/*
58*25219Smckusick 	 * Allocate tape buffer contiguous with the array of instruction
59*25219Smckusick 	 * packets, so flusht() can write them together with one write().
60*25219Smckusick 	 * Align tape buffer on page boundary to speed up tape write().
61*25219Smckusick 	 */
6224181Smckusick 	req = (struct req *)malloc(reqsiz + writesize + pgoff);
6324181Smckusick 	if (req == NULL)
6424181Smckusick 		return(0);
6524181Smckusick 	tblock = (char (*)[TP_BSIZE]) (((long)&req[ntrec] + pgoff) &~ pgoff);
66*25219Smckusick 	req = (struct req *)tblock - ntrec;
6724181Smckusick 	return(1);
6810911Ssam }
6910911Ssam 
70*25219Smckusick 
711425Sroot taprec(dp)
725329Smckusic 	char *dp;
731425Sroot {
7418012Smckusick 	req[trecno].dblk = (daddr_t)0;
7518012Smckusick 	req[trecno].count = 1;
7624181Smckusick 	*(union u_spcl *)(*tblock++) = *(union u_spcl *)dp;	/* movc3 */
7724181Smckusick 	trecno++;
781425Sroot 	spcl.c_tapea++;
7924181Smckusick 	if(trecno >= ntrec)
801425Sroot 		flusht();
811425Sroot }
821425Sroot 
834774Smckusic dmpblk(blkno, size)
844774Smckusic 	daddr_t blkno;
854774Smckusic 	int size;
861425Sroot {
87*25219Smckusick 	int avail, tpblks, dblkno;
881425Sroot 
895329Smckusic 	dblkno = fsbtodb(sblock, blkno);
9018012Smckusick 	tpblks = size / TP_BSIZE;
9118012Smckusick 	while ((avail = MIN(tpblks, ntrec - trecno)) > 0) {
9218012Smckusick 		req[trecno].dblk = dblkno;
9318012Smckusick 		req[trecno].count = avail;
94*25219Smckusick 		trecno += avail;
954774Smckusic 		spcl.c_tapea += avail;
96*25219Smckusick 		if (trecno >= ntrec)
9718012Smckusick 			flusht();
985329Smckusic 		dblkno += avail * (TP_BSIZE / DEV_BSIZE);
995329Smckusic 		tpblks -= avail;
1004774Smckusic 	}
1011425Sroot }
1021425Sroot 
1031425Sroot int	nogripe = 0;
1041425Sroot 
10518012Smckusick tperror() {
10618012Smckusick 	if (pipeout) {
10718012Smckusick 		msg("Tape write error on %s\n", tape);
10818012Smckusick 		msg("Cannot recover\n");
10918012Smckusick 		dumpabort();
11018012Smckusick 		/* NOTREACHED */
11118012Smckusick 	}
112*25219Smckusick 	msg("Tape write error %d feet into tape %d\n", asize/120L, tapeno);
11318012Smckusick 	broadcast("TAPE ERROR!\n");
11418012Smckusick 	if (!query("Do you want to restart?"))
11518012Smckusick 		dumpabort();
11618012Smckusick 	msg("This tape will rewind.  After it is rewound,\n");
11718012Smckusick 	msg("replace the faulty tape with a new one;\n");
11818012Smckusick 	msg("this dump volume will be rewritten.\n");
11924181Smckusick 	killall();
12018012Smckusick 	nogripe = 1;
12118012Smckusick 	close_rewind();
12218012Smckusick 	Exit(X_REWRITE);
12318012Smckusick }
12418012Smckusick 
125*25219Smckusick sigpipe()
126*25219Smckusick {
127*25219Smckusick 
128*25219Smckusick 	msg("Broken pipe\n");
129*25219Smckusick 	dumpabort();
130*25219Smckusick }
131*25219Smckusick 
13218012Smckusick #ifdef RDUMP
133*25219Smckusick /*
134*25219Smckusick  * compatibility routine
135*25219Smckusick  */
136*25219Smckusick tflush(i)
137*25219Smckusick 	int i;
13818012Smckusick {
13918012Smckusick 
14018012Smckusick 	for (i = 0; i < ntrec; i++)
14118012Smckusick 		spclrec();
14218012Smckusick }
14318012Smckusick #endif RDUMP
14418012Smckusick 
1451425Sroot flusht()
1461425Sroot {
147*25219Smckusick 	int siz = (char *)tblock - (char *)req;
1481425Sroot 
149*25219Smckusick 	if (atomic(write, slavefd[rotor], req, siz) != siz) {
150*25219Smckusick 		perror("  DUMP: error writing command pipe");
15124181Smckusick 		dumpabort();
15224181Smckusick 	}
15318012Smckusick 	if (++rotor >= SLAVES) rotor = 0;
15418012Smckusick 	tblock = (char (*)[TP_BSIZE]) &req[ntrec];
1551425Sroot 	trecno = 0;
15624181Smckusick 	asize += tenths;
15710911Ssam 	blockswritten += ntrec;
15812331Smckusick 	if (!pipeout && asize > tsize) {
1591425Sroot 		close_rewind();
1601425Sroot 		otape();
1611425Sroot 	}
1621425Sroot 	timeest();
1631425Sroot }
1641425Sroot 
1651425Sroot rewind()
1661425Sroot {
16724181Smckusick 	int f;
16812331Smckusick 
16912331Smckusick 	if (pipeout)
17012331Smckusick 		return;
17118012Smckusick 	for (f = 0; f < SLAVES; f++)
17218012Smckusick 		close(slavefd[f]);
17318012Smckusick 	while (wait(NULL) >= 0)    ;	/* wait for any signals from slaves */
17418012Smckusick 	msg("Tape rewinding\n");
17518012Smckusick #ifdef RDUMP
176*25219Smckusick 	if (host) {
177*25219Smckusick 		rmtclose();
178*25219Smckusick 		while (rmtopen(tape, 0) < 0)
179*25219Smckusick 			sleep(10);
180*25219Smckusick 		rmtclose();
181*25219Smckusick 		return;
182*25219Smckusick 	}
183*25219Smckusick #endif RDUMP
1843214Swnj 	close(to);
1853214Swnj 	while ((f = open(tape, 0)) < 0)
1863214Swnj 		sleep (10);
1873214Swnj 	close(f);
1881425Sroot }
1891425Sroot 
1901425Sroot close_rewind()
1911425Sroot {
19218012Smckusick 	rewind();
19318012Smckusick 	if (!nogripe) {
1941425Sroot 		msg("Change Tapes: Mount tape #%d\n", tapeno+1);
1951425Sroot 		broadcast("CHANGE TAPES!\7\7\n");
1961425Sroot 	}
19718012Smckusick 	while (!query("Is the new tape mounted and ready to go?"))
198*25219Smckusick 		if (query("Do you want to abort?")) {
1991425Sroot 			dumpabort();
200*25219Smckusick 			/*NOTREACHED*/
201*25219Smckusick 		}
2021425Sroot }
2031425Sroot 
2041425Sroot /*
20518012Smckusick  *	We implement taking and restoring checkpoints on the tape level.
2061425Sroot  *	When each tape is opened, a new process is created by forking; this
2071425Sroot  *	saves all of the necessary context in the parent.  The child
2081425Sroot  *	continues the dump; the parent waits around, saving the context.
2091425Sroot  *	If the child returns X_REWRITE, then it had problems writing that tape;
2101425Sroot  *	this causes the parent to fork again, duplicating the context, and
2111425Sroot  *	everything continues as if nothing had happened.
2121425Sroot  */
2131425Sroot 
2141425Sroot otape()
2151425Sroot {
2161425Sroot 	int	parentpid;
2171425Sroot 	int	childpid;
2181425Sroot 	int	status;
2191425Sroot 	int	waitpid;
220*25219Smckusick 	int	(*interrupt)() = signal(SIGINT, SIG_IGN);
2211425Sroot 
2221425Sroot 	parentpid = getpid();
2231425Sroot 
2241425Sroot     restore_check_point:
225*25219Smckusick 	signal(SIGINT, interrupt);
226*25219Smckusick 	/*
227*25219Smckusick 	 *	All signals are inherited...
228*25219Smckusick 	 */
2291425Sroot 	childpid = fork();
23018012Smckusick 	if (childpid < 0) {
2311425Sroot 		msg("Context save fork fails in parent %d\n", parentpid);
2321425Sroot 		Exit(X_ABORT);
2331425Sroot 	}
23418012Smckusick 	if (childpid != 0) {
2351425Sroot 		/*
2361425Sroot 		 *	PARENT:
2371425Sroot 		 *	save the context by waiting
2381425Sroot 		 *	until the child doing all of the work returns.
23918012Smckusick 		 *	don't catch the interrupt
2401425Sroot 		 */
241*25219Smckusick 		signal(SIGINT, SIG_IGN);
2421425Sroot #ifdef TDEBUG
2431425Sroot 		msg("Tape: %d; parent process: %d child process %d\n",
2441425Sroot 			tapeno+1, parentpid, childpid);
2451425Sroot #endif TDEBUG
24618012Smckusick 		while ((waitpid = wait(&status)) != childpid)
24718012Smckusick 			msg("Parent %d waiting for child %d has another child %d return\n",
24818012Smckusick 				parentpid, childpid, waitpid);
24918012Smckusick 		if (status & 0xFF) {
2501425Sroot 			msg("Child %d returns LOB status %o\n",
2511425Sroot 				childpid, status&0xFF);
2521425Sroot 		}
2531425Sroot 		status = (status >> 8) & 0xFF;
2541425Sroot #ifdef TDEBUG
25518012Smckusick 		switch(status) {
2561425Sroot 			case X_FINOK:
2571425Sroot 				msg("Child %d finishes X_FINOK\n", childpid);
2581425Sroot 				break;
2591425Sroot 			case X_ABORT:
2601425Sroot 				msg("Child %d finishes X_ABORT\n", childpid);
2611425Sroot 				break;
2621425Sroot 			case X_REWRITE:
2631425Sroot 				msg("Child %d finishes X_REWRITE\n", childpid);
2641425Sroot 				break;
2651425Sroot 			default:
26618012Smckusick 				msg("Child %d finishes unknown %d\n",
267*25219Smckusick 					childpid, status);
2681425Sroot 				break;
2691425Sroot 		}
2701425Sroot #endif TDEBUG
27118012Smckusick 		switch(status) {
2721425Sroot 			case X_FINOK:
2731425Sroot 				Exit(X_FINOK);
2741425Sroot 			case X_ABORT:
2751425Sroot 				Exit(X_ABORT);
2761425Sroot 			case X_REWRITE:
2771425Sroot 				goto restore_check_point;
2781425Sroot 			default:
2791425Sroot 				msg("Bad return code from dump: %d\n", status);
2801425Sroot 				Exit(X_ABORT);
2811425Sroot 		}
2821425Sroot 		/*NOTREACHED*/
2831425Sroot 	} else {	/* we are the child; just continue */
2841425Sroot #ifdef TDEBUG
2851425Sroot 		sleep(4);	/* allow time for parent's message to get out */
2861425Sroot 		msg("Child on Tape %d has parent %d, my pid = %d\n",
2871425Sroot 			tapeno+1, parentpid, getpid());
288*25219Smckusick #endif TDEBUG
28918012Smckusick #ifdef RDUMP
290*25219Smckusick 		while ((to = (host ? rmtopen(tape, 2) :
291*25219Smckusick 			pipeout ? 1 : creat(tape, 0666))) < 0)
292*25219Smckusick #else RDUMP
29318012Smckusick 		while ((to = pipeout ? 1 : creat(tape, 0666)) < 0)
29424181Smckusick #endif RDUMP
29518012Smckusick 			if (!query("Cannot open tape.  Do you want to retry the open?"))
29618012Smckusick 				dumpabort();
2971425Sroot 
29818012Smckusick 		enslave();  /* Share open tape file descriptor with slaves */
29918012Smckusick 
3001425Sroot 		asize = 0;
3011425Sroot 		tapeno++;		/* current tape sequence */
3021425Sroot 		newtape++;		/* new tape signal */
3031425Sroot 		spcl.c_volume++;
3041425Sroot 		spcl.c_type = TS_TAPE;
3051425Sroot 		spclrec();
3061425Sroot 		if (tapeno > 1)
3071425Sroot 			msg("Tape %d begins with blocks from ino %d\n",
3081425Sroot 				tapeno, ino);
3091425Sroot 	}
3101425Sroot }
3111425Sroot 
3121425Sroot dumpabort()
3131425Sroot {
31418012Smckusick 	if (master != 0 && master != getpid())
315*25219Smckusick 		kill(master, SIGTERM);	/* Signals master to call dumpabort */
31624181Smckusick 	else {
31724181Smckusick 		killall();
31824181Smckusick 		msg("The ENTIRE dump is aborted.\n");
31924181Smckusick 	}
3201425Sroot 	Exit(X_ABORT);
3211425Sroot }
3221425Sroot 
3231425Sroot Exit(status)
3241425Sroot {
3251425Sroot #ifdef TDEBUG
3261425Sroot 	msg("pid = %d exits with status %d\n", getpid(), status);
3271425Sroot #endif TDEBUG
3281925Swnj 	exit(status);
3291425Sroot }
33018012Smckusick 
33124181Smckusick /*
332*25219Smckusick  * could use pipe() for this if flock() worked on pipes
33324181Smckusick  */
33424181Smckusick lockfile(fd)
33524181Smckusick 	int fd[2];
33624181Smckusick {
33724181Smckusick 	char tmpname[20];
33818012Smckusick 
33924181Smckusick 	strcpy(tmpname, "/tmp/dumplockXXXXXX");
34024181Smckusick 	mktemp(tmpname);
34124181Smckusick 	if ((fd[1] = creat(tmpname, 0400)) < 0)
34224181Smckusick 		return(fd[1]);
34324181Smckusick 	fd[0] = open(tmpname, 0);
34424181Smckusick 	unlink(tmpname);
34524181Smckusick 	return (fd[0] < 0 ? fd[0] : 0);
34624181Smckusick }
34724181Smckusick 
34818012Smckusick enslave()
34918012Smckusick {
35024181Smckusick 	int first[2], prev[2], next[2], cmd[2];     /* file descriptors */
35124181Smckusick 	register int i, j;
35218012Smckusick 
35318012Smckusick 	master = getpid();
354*25219Smckusick 	signal(SIGTERM, dumpabort); /* Slave sends SIGTERM on dumpabort() */
355*25219Smckusick 	signal(SIGPIPE, sigpipe);
356*25219Smckusick 	signal(SIGUSR1, tperror);    /* Slave sends SIGUSR1 on tape errors */
35724181Smckusick 	lockfile(first);
35824181Smckusick 	for (i = 0; i < SLAVES; i++) {
35924181Smckusick 		if (i == 0) {
36024181Smckusick 			prev[0] = first[1];
36124181Smckusick 			prev[1] = first[0];
36224181Smckusick 		} else {
36324181Smckusick 			prev[0] = next[0];
36424181Smckusick 			prev[1] = next[1];
36524181Smckusick 			flock(prev[1], LOCK_EX);
36624181Smckusick 		}
36724181Smckusick 		next[0] = first[0];
36824181Smckusick 		next[1] = first[1];	    /* Last slave loops back */
36924181Smckusick 		if ((i < SLAVES-1 && lockfile(next) < 0) || pipe(cmd) < 0
37024181Smckusick 				|| (slavepid[i] = fork()) < 0) {
37124181Smckusick 			perror("  DUMP: too many slaves (recompile smaller)");
37218012Smckusick 			dumpabort();
37318012Smckusick 		}
37418012Smckusick 		slavefd[i] = cmd[1];
375*25219Smckusick 		if (slavepid[i] == 0) { 	    /* Slave starts up here */
37618012Smckusick 			for (j = 0; j <= i; j++)
37718012Smckusick 				close(slavefd[j]);
378*25219Smckusick 			signal(SIGINT, SIG_IGN);    /* Master handles this */
379*25219Smckusick 			doslave(cmd[0], prev, next);
38018012Smckusick 			Exit(X_FINOK);
38118012Smckusick 		}
38218012Smckusick 		close(cmd[0]);
38324181Smckusick 		if (i > 0) {
38424181Smckusick 			close(prev[0]);
38524181Smckusick 			close(prev[1]);
38624181Smckusick 		}
38718012Smckusick 	}
38824181Smckusick 	close(first[0]);
38924181Smckusick 	close(first[1]);
39024181Smckusick 	master = 0; rotor = 0;
39118012Smckusick }
39218012Smckusick 
39324181Smckusick killall()
39418012Smckusick {
39524181Smckusick 	register int i;
39619982Smckusick 
39724181Smckusick 	for (i = 0; i < SLAVES; i++)
39824181Smckusick 		if (slavepid[i] > 0)
39924181Smckusick 			kill(slavepid[i], SIGKILL);
40018012Smckusick }
40118012Smckusick 
40224181Smckusick /*
40324181Smckusick  * Synchronization - each process has a lockfile, and shares file
40424181Smckusick  * descriptors to the following process's lockfile.  When our write
40524181Smckusick  * completes, we release our lock on the following process's lock-
40624181Smckusick  * file, allowing the following process to lock it and proceed. We
40724181Smckusick  * get the lock back for the next cycle by swapping descriptors.
40824181Smckusick  */
409*25219Smckusick doslave(cmd, prev, next)
410*25219Smckusick 	register int cmd, prev[2], next[2];
41119982Smckusick {
412*25219Smckusick 	register int nread, toggle = 0;
41319982Smckusick 
41418012Smckusick 	close(fi);
415*25219Smckusick 	if ((fi = open(disk, 0)) < 0) { 	/* Need our own seek pointer */
41624181Smckusick 		perror("  DUMP: slave couldn't reopen disk");
417*25219Smckusick 		dumpabort();
41818012Smckusick 	}
41924181Smckusick 	/*
420*25219Smckusick 	 * Get list of blocks to dump, read the blocks into tape buffer
42124181Smckusick 	 */
422*25219Smckusick 	while ((nread = atomic(read, cmd, req, reqsiz)) == reqsiz) {
42318012Smckusick 		register struct req *p = req;
42418012Smckusick 		for (trecno = 0; trecno < ntrec; trecno += p->count, p += p->count) {
42518012Smckusick 			if (p->dblk) {
42618012Smckusick 				bread(p->dblk, tblock[trecno],
427*25219Smckusick 					p->count * TP_BSIZE);
42818012Smckusick 			} else {
429*25219Smckusick 				if (p->count != 1 || atomic(read, cmd,
430*25219Smckusick 				    tblock[trecno], TP_BSIZE) != TP_BSIZE) {
43124181Smckusick 					msg("Master/slave protocol botched");
43224181Smckusick 					dumpabort();
43324181Smckusick 				}
43418012Smckusick 			}
43518012Smckusick 		}
43624181Smckusick 		flock(prev[toggle], LOCK_EX);	/* Wait our turn */
437*25219Smckusick 
43818012Smckusick #ifdef RDUMP
439*25219Smckusick 		if ((host ? rmtwrite(tblock[0], writesize)
440*25219Smckusick 			: write(to, tblock[0], writesize)) != writesize) {
441*25219Smckusick #else RDUMP
442*25219Smckusick 		if (write(to, tblock[0], writesize) != writesize) {
44324181Smckusick #endif RDUMP
444*25219Smckusick 			kill(master, SIGUSR1);
445*25219Smckusick 			for (;;)
446*25219Smckusick 				sigpause(0);
44718012Smckusick 		}
44824181Smckusick 		toggle ^= 1;
44924181Smckusick 		flock(next[toggle], LOCK_UN);	/* Next slave's turn */
45024181Smckusick 	}					/* Also jolts him awake */
451*25219Smckusick 	if (nread != 0) {
452*25219Smckusick 		perror("  DUMP: error reading command pipe");
453*25219Smckusick 		dumpabort();
45418012Smckusick 	}
45518012Smckusick }
45619947Smckusick 
45719947Smckusick /*
458*25219Smckusick  * Since a read from a pipe may not return all we asked for,
459*25219Smckusick  * or a write may not write all we ask if we get a signal,
460*25219Smckusick  * loop until the count is satisfied (or error).
46119947Smckusick  */
462*25219Smckusick atomic(func, fd, buf, count)
463*25219Smckusick 	int (*func)(), fd, count;
46419947Smckusick 	char *buf;
46519947Smckusick {
466*25219Smckusick 	int got, need = count;
46719947Smckusick 
468*25219Smckusick 	while ((got = (*func)(fd, buf, need)) > 0 && (need -= got) > 0)
46919947Smckusick 		buf += got;
470*25219Smckusick 	return (got < 0 ? got : count - need);
47119947Smckusick }
472