xref: /csrg-svn/sys/ufs/ffs/ufs_disksubr.c (revision 63376)
123397Smckusick /*
2*63376Sbostic  * Copyright (c) 1982, 1986, 1988, 1993
3*63376Sbostic  *	The Regents of the University of California.  All rights reserved.
423397Smckusick  *
544537Sbostic  * %sccs.include.redist.c%
637736Smckusick  *
7*63376Sbostic  *	@(#)ufs_disksubr.c	8.1 (Berkeley) 06/11/93
823397Smckusick  */
916Sbill 
1051508Sbostic #include <sys/param.h>
1151508Sbostic #include <sys/systm.h>
1251508Sbostic #include <sys/buf.h>
1351508Sbostic #include <sys/disklabel.h>
1451508Sbostic #include <sys/syslog.h>
1530740Skarels 
1616Sbill /*
1751508Sbostic  * Seek sort for disks.  We depend on the driver which calls us using b_resid
1851508Sbostic  * as the current cylinder number.
192626Swnj  *
2051508Sbostic  * The argument dp structure holds a b_actf activity chain pointer on which we
2151508Sbostic  * keep two queues, sorted in ascending cylinder order.  The first queue holds
2251508Sbostic  * those requests which are positioned after the current cylinder (in the first
2351508Sbostic  * request); the second holds requests which came in after their cylinder number
2451508Sbostic  * was passed.  Thus we implement a one way scan, retracting after reaching the
2551508Sbostic  * end of the drive to the first request on the second queue, at which time it
2651508Sbostic  * becomes the first queue.
272626Swnj  *
2851508Sbostic  * A one-way scan is natural because of the way UNIX read-ahead blocks are
2951508Sbostic  * allocated.
3016Sbill  */
3116Sbill 
3216Sbill #define	b_cylin	b_resid
3316Sbill 
3451508Sbostic void
3516Sbill disksort(dp, bp)
362626Swnj 	register struct buf *dp, *bp;
3716Sbill {
3816Sbill 	register struct buf *ap;
3916Sbill 
402626Swnj 	/*
412626Swnj 	 * If nothing on the activity queue, then
422626Swnj 	 * we become the only thing.
432626Swnj 	 */
4416Sbill 	ap = dp->b_actf;
4516Sbill 	if(ap == NULL) {
4616Sbill 		dp->b_actf = bp;
4756391Smckusick 		bp->b_actf = NULL;
4816Sbill 		return;
4916Sbill 	}
502626Swnj 	/*
512626Swnj 	 * If we lie after the first (currently active)
522626Swnj 	 * request, then we must locate the second request list
532626Swnj 	 * and add ourselves to it.
542626Swnj 	 */
552626Swnj 	if (bp->b_cylin < ap->b_cylin) {
5656391Smckusick 		while (ap->b_actf) {
572626Swnj 			/*
582626Swnj 			 * Check for an ``inversion'' in the
592626Swnj 			 * normally ascending cylinder numbers,
602626Swnj 			 * indicating the start of the second request list.
612626Swnj 			 */
6256391Smckusick 			if (ap->b_actf->b_cylin < ap->b_cylin) {
632626Swnj 				/*
642626Swnj 				 * Search the second request list
652626Swnj 				 * for the first request at a larger
662626Swnj 				 * cylinder number.  We go before that;
672626Swnj 				 * if there is no such request, we go at end.
682626Swnj 				 */
692626Swnj 				do {
7056391Smckusick 					if (bp->b_cylin < ap->b_actf->b_cylin)
712626Swnj 						goto insert;
7256391Smckusick 					if (bp->b_cylin == ap->b_actf->b_cylin &&
7356391Smckusick 					    bp->b_blkno < ap->b_actf->b_blkno)
7432573Skarels 						goto insert;
7556391Smckusick 					ap = ap->b_actf;
7656391Smckusick 				} while (ap->b_actf);
772626Swnj 				goto insert;		/* after last */
782626Swnj 			}
7956391Smckusick 			ap = ap->b_actf;
8016Sbill 		}
812626Swnj 		/*
822626Swnj 		 * No inversions... we will go after the last, and
832626Swnj 		 * be the first request in the second request list.
842626Swnj 		 */
852626Swnj 		goto insert;
8616Sbill 	}
872626Swnj 	/*
882626Swnj 	 * Request is at/after the current request...
892626Swnj 	 * sort in the first request list.
902626Swnj 	 */
9156391Smckusick 	while (ap->b_actf) {
922626Swnj 		/*
932626Swnj 		 * We want to go after the current request
942626Swnj 		 * if there is an inversion after it (i.e. it is
952626Swnj 		 * the end of the first request list), or if
962626Swnj 		 * the next request is a larger cylinder than our request.
972626Swnj 		 */
9856391Smckusick 		if (ap->b_actf->b_cylin < ap->b_cylin ||
9956391Smckusick 		    bp->b_cylin < ap->b_actf->b_cylin ||
10056391Smckusick 		    (bp->b_cylin == ap->b_actf->b_cylin &&
10156391Smckusick 		    bp->b_blkno < ap->b_actf->b_blkno))
1022626Swnj 			goto insert;
10356391Smckusick 		ap = ap->b_actf;
1042626Swnj 	}
1052626Swnj 	/*
1062626Swnj 	 * Neither a second list nor a larger
1072626Swnj 	 * request... we go at the end of the first list,
1082626Swnj 	 * which is the same as the end of the whole schebang.
1092626Swnj 	 */
1102626Swnj insert:
11156391Smckusick 	bp->b_actf = ap->b_actf;
11256391Smckusick 	ap->b_actf = bp;
11316Sbill }
11430533Skarels 
11530533Skarels /*
11651508Sbostic  * Attempt to read a disk label from a device using the indicated stategy
11751508Sbostic  * routine.  The label must be partly set up before this: secpercyl and
11851508Sbostic  * anything required in the strategy routine (e.g., sector size) must be
11951508Sbostic  * filled in before calling us.  Returns NULL on success and an error
12051508Sbostic  * string on failure.
12130740Skarels  */
12230740Skarels char *
12330740Skarels readdisklabel(dev, strat, lp)
12430740Skarels 	dev_t dev;
12530740Skarels 	int (*strat)();
12630740Skarels 	register struct disklabel *lp;
12730740Skarels {
12830740Skarels 	register struct buf *bp;
12930740Skarels 	struct disklabel *dlp;
13030740Skarels 	char *msg = NULL;
13130740Skarels 
13230740Skarels 	if (lp->d_secperunit == 0)
13330740Skarels 		lp->d_secperunit = 0x1fffffff;
13430740Skarels 	lp->d_npartitions = 1;
13530740Skarels 	if (lp->d_partitions[0].p_size == 0)
13630740Skarels 		lp->d_partitions[0].p_size = 0x1fffffff;
13730740Skarels 	lp->d_partitions[0].p_offset = 0;
13830740Skarels 
13934102Skarels 	bp = geteblk((int)lp->d_secsize);
14030740Skarels 	bp->b_dev = dev;
14130740Skarels 	bp->b_blkno = LABELSECTOR;
14232068Skarels 	bp->b_bcount = lp->d_secsize;
14330740Skarels 	bp->b_flags = B_BUSY | B_READ;
14430740Skarels 	bp->b_cylin = LABELSECTOR / lp->d_secpercyl;
14530740Skarels 	(*strat)(bp);
14637736Smckusick 	if (biowait(bp)) {
14730740Skarels 		msg = "I/O error";
14832068Skarels 	} else for (dlp = (struct disklabel *)bp->b_un.b_addr;
14932068Skarels 	    dlp <= (struct disklabel *)(bp->b_un.b_addr+DEV_BSIZE-sizeof(*dlp));
15032068Skarels 	    dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
15132068Skarels 		if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) {
15232068Skarels 			if (msg == NULL)
15332068Skarels 				msg = "no disk label";
15439836Smckusick 		} else if (dlp->d_npartitions > MAXPARTITIONS ||
15539836Smckusick 			   dkcksum(dlp) != 0)
15630740Skarels 			msg = "disk label corrupted";
15732068Skarels 		else {
15830740Skarels 			*lp = *dlp;
15932068Skarels 			msg = NULL;
16032068Skarels 			break;
16132068Skarels 		}
16230740Skarels 	}
16330740Skarels 	bp->b_flags = B_INVAL | B_AGE;
16430740Skarels 	brelse(bp);
16530740Skarels 	return (msg);
16630740Skarels }
16730740Skarels 
16830740Skarels /*
16951508Sbostic  * Check new disk label for sensibility before setting it.
17032573Skarels  */
17151508Sbostic int
17232573Skarels setdisklabel(olp, nlp, openmask)
17332573Skarels 	register struct disklabel *olp, *nlp;
17432573Skarels 	u_long openmask;
17532573Skarels {
17632573Skarels 	register i;
17732573Skarels 	register struct partition *opp, *npp;
17832573Skarels 
17932573Skarels 	if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC ||
18032573Skarels 	    dkcksum(nlp) != 0)
18132573Skarels 		return (EINVAL);
18234473Smckusick 	while ((i = ffs((long)openmask)) != 0) {
18332573Skarels 		i--;
18432573Skarels 		openmask &= ~(1 << i);
18532573Skarels 		if (nlp->d_npartitions <= i)
18632573Skarels 			return (EBUSY);
18732573Skarels 		opp = &olp->d_partitions[i];
18832573Skarels 		npp = &nlp->d_partitions[i];
18932573Skarels 		if (npp->p_offset != opp->p_offset || npp->p_size < opp->p_size)
19032573Skarels 			return (EBUSY);
19132573Skarels 		/*
19232573Skarels 		 * Copy internally-set partition information
19332573Skarels 		 * if new label doesn't include it.		XXX
19432573Skarels 		 */
19532573Skarels 		if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) {
19632573Skarels 			npp->p_fstype = opp->p_fstype;
19732573Skarels 			npp->p_fsize = opp->p_fsize;
19832573Skarels 			npp->p_frag = opp->p_frag;
19932573Skarels 			npp->p_cpg = opp->p_cpg;
20032573Skarels 		}
20132573Skarels 	}
20234102Skarels  	nlp->d_checksum = 0;
20334102Skarels  	nlp->d_checksum = dkcksum(nlp);
20432573Skarels 	*olp = *nlp;
20532573Skarels 	return (0);
20632573Skarels }
20732573Skarels 
20832573Skarels /* encoding of disk minor numbers, should be elsewhere... */
20932573Skarels #define dkunit(dev)		(minor(dev) >> 3)
21032573Skarels #define dkpart(dev)		(minor(dev) & 07)
21132573Skarels #define dkminor(unit, part)	(((unit) << 3) | (part))
21232573Skarels 
21332573Skarels /*
21432573Skarels  * Write disk label back to device after modification.
21532573Skarels  */
21651508Sbostic int
21732573Skarels writedisklabel(dev, strat, lp)
21832573Skarels 	dev_t dev;
21932573Skarels 	int (*strat)();
22032573Skarels 	register struct disklabel *lp;
22132573Skarels {
22232573Skarels 	struct buf *bp;
22332573Skarels 	struct disklabel *dlp;
22432573Skarels 	int labelpart;
22532573Skarels 	int error = 0;
22632573Skarels 
22732573Skarels 	labelpart = dkpart(dev);
22832573Skarels 	if (lp->d_partitions[labelpart].p_offset != 0) {
22932573Skarels 		if (lp->d_partitions[0].p_offset != 0)
23032573Skarels 			return (EXDEV);			/* not quite right */
23132573Skarels 		labelpart = 0;
23232573Skarels 	}
23334102Skarels 	bp = geteblk((int)lp->d_secsize);
23432573Skarels 	bp->b_dev = makedev(major(dev), dkminor(dkunit(dev), labelpart));
23532573Skarels 	bp->b_blkno = LABELSECTOR;
23632573Skarels 	bp->b_bcount = lp->d_secsize;
23732573Skarels 	bp->b_flags = B_READ;
23832573Skarels 	(*strat)(bp);
23937736Smckusick 	if (error = biowait(bp))
24034102Skarels 		goto done;
24134102Skarels 	for (dlp = (struct disklabel *)bp->b_un.b_addr;
24234102Skarels 	    dlp <= (struct disklabel *)
24334102Skarels 	      (bp->b_un.b_addr + lp->d_secsize - sizeof(*dlp));
24434102Skarels 	    dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
24534102Skarels 		if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
24634102Skarels 		    dkcksum(dlp) == 0) {
24734102Skarels 			*dlp = *lp;
24834102Skarels 			bp->b_flags = B_WRITE;
24934102Skarels 			(*strat)(bp);
25037736Smckusick 			error = biowait(bp);
25134102Skarels 			goto done;
25234102Skarels 		}
25332573Skarels 	}
25434102Skarels 	error = ESRCH;
25534102Skarels done:
25632573Skarels 	brelse(bp);
25732573Skarels 	return (error);
25832573Skarels }
25932573Skarels 
26032573Skarels /*
26130533Skarels  * Compute checksum for disk label.
26230533Skarels  */
26330533Skarels dkcksum(lp)
26430533Skarels 	register struct disklabel *lp;
26530533Skarels {
26630533Skarels 	register u_short *start, *end;
26730533Skarels 	register u_short sum = 0;
26830533Skarels 
26930533Skarels 	start = (u_short *)lp;
27030533Skarels 	end = (u_short *)&lp->d_partitions[lp->d_npartitions];
27130533Skarels 	while (start < end)
27230533Skarels 		sum ^= *start++;
27330533Skarels 	return (sum);
27430533Skarels }
27534535Skarels 
27634535Skarels /*
27734535Skarels  * Disk error is the preface to plaintive error messages
27834535Skarels  * about failing disk transfers.  It prints messages of the form
27934637Skarels 
28034637Skarels hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d)
28134637Skarels 
28234535Skarels  * if the offset of the error in the transfer and a disk label
28334535Skarels  * are both available.  blkdone should be -1 if the position of the error
28434535Skarels  * is unknown; the disklabel pointer may be null from drivers that have not
28534535Skarels  * been converted to use them.  The message is printed with printf
28634535Skarels  * if pri is LOG_PRINTF, otherwise it uses log at the specified priority.
28734535Skarels  * The message should be completed (with at least a newline) with printf
28834535Skarels  * or addlog, respectively.  There is no trailing space.
28934535Skarels  */
29051508Sbostic void
29134535Skarels diskerr(bp, dname, what, pri, blkdone, lp)
29234535Skarels 	register struct buf *bp;
29334535Skarels 	char *dname, *what;
29434535Skarels 	int pri, blkdone;
29534535Skarels 	register struct disklabel *lp;
29634535Skarels {
29734535Skarels 	int unit = dkunit(bp->b_dev), part = dkpart(bp->b_dev);
29849110Skarels 	register void (*pr) __P((const char *, ...));
29934535Skarels 	char partname = 'a' + part;
30049110Skarels 	int sn;
30134535Skarels 
30234535Skarels 	if (pri != LOG_PRINTF) {
30334535Skarels 		log(pri, "");
30434535Skarels 		pr = addlog;
30534535Skarels 	} else
30634535Skarels 		pr = printf;
30734535Skarels 	(*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what,
30834535Skarels 	    bp->b_flags & B_READ ? "read" : "writ");
30934535Skarels 	sn = bp->b_blkno;
31034535Skarels 	if (bp->b_bcount <= DEV_BSIZE)
31134535Skarels 		(*pr)("%d", sn);
31234535Skarels 	else {
31334535Skarels 		if (blkdone >= 0) {
31434535Skarels 			sn += blkdone;
31534535Skarels 			(*pr)("%d of ", sn);
31634535Skarels 		}
31734535Skarels 		(*pr)("%d-%d", bp->b_blkno,
31834535Skarels 		    bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE);
31934535Skarels 	}
32034711Skarels 	if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) {
32134711Skarels #ifdef tahoe
32234711Skarels 		sn *= DEV_BSIZE / lp->d_secsize;		/* XXX */
32334711Skarels #endif
32434535Skarels 		sn += lp->d_partitions[part].p_offset;
32534637Skarels 		(*pr)(" (%s%d bn %d; cn %d", dname, unit, sn,
32634637Skarels 		    sn / lp->d_secpercyl);
32734637Skarels 		sn %= lp->d_secpercyl;
32835703Stef 		(*pr)(" tn %d sn %d)", sn / lp->d_nsectors, sn % lp->d_nsectors);
32934535Skarels 	}
33034535Skarels }
331