xref: /csrg-svn/sys/ufs/ffs/ufs_disksubr.c (revision 37736)
123397Smckusick /*
234535Skarels  * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
3*37736Smckusick  * All rights reserved.
423397Smckusick  *
5*37736Smckusick  * Redistribution and use in source and binary forms are permitted
6*37736Smckusick  * provided that the above copyright notice and this paragraph are
7*37736Smckusick  * duplicated in all such forms and that any documentation,
8*37736Smckusick  * advertising materials, and other materials related to such
9*37736Smckusick  * distribution and use acknowledge that the software was developed
10*37736Smckusick  * by the University of California, Berkeley.  The name of the
11*37736Smckusick  * University may not be used to endorse or promote products derived
12*37736Smckusick  * from this software without specific prior written permission.
13*37736Smckusick  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14*37736Smckusick  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15*37736Smckusick  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16*37736Smckusick  *
17*37736Smckusick  *	@(#)ufs_disksubr.c	7.12 (Berkeley) 05/09/89
1823397Smckusick  */
1916Sbill 
2030533Skarels #include "param.h"
2130533Skarels #include "systm.h"
2230533Skarels #include "buf.h"
2330533Skarels #include "disklabel.h"
2434535Skarels #include "syslog.h"
2530740Skarels #include "user.h"
2630740Skarels 
2716Sbill /*
282626Swnj  * Seek sort for disks.  We depend on the driver
292626Swnj  * which calls us using b_resid as the current cylinder number.
302626Swnj  *
312626Swnj  * The argument dp structure holds a b_actf activity chain pointer
322626Swnj  * on which we keep two queues, sorted in ascending cylinder order.
332626Swnj  * The first queue holds those requests which are positioned after
342626Swnj  * the current cylinder (in the first request); the second holds
352626Swnj  * requests which came in after their cylinder number was passed.
362626Swnj  * Thus we implement a one way scan, retracting after reaching the
372626Swnj  * end of the drive to the first request on the second queue,
382626Swnj  * at which time it becomes the first queue.
392626Swnj  *
402626Swnj  * A one-way scan is natural because of the way UNIX read-ahead
412626Swnj  * blocks are allocated.
4216Sbill  */
4316Sbill 
4416Sbill #define	b_cylin	b_resid
4516Sbill 
4616Sbill disksort(dp, bp)
472626Swnj 	register struct buf *dp, *bp;
4816Sbill {
4916Sbill 	register struct buf *ap;
5016Sbill 
512626Swnj 	/*
522626Swnj 	 * If nothing on the activity queue, then
532626Swnj 	 * we become the only thing.
542626Swnj 	 */
5516Sbill 	ap = dp->b_actf;
5616Sbill 	if(ap == NULL) {
5716Sbill 		dp->b_actf = bp;
5816Sbill 		dp->b_actl = bp;
5916Sbill 		bp->av_forw = NULL;
6016Sbill 		return;
6116Sbill 	}
622626Swnj 	/*
632626Swnj 	 * If we lie after the first (currently active)
642626Swnj 	 * request, then we must locate the second request list
652626Swnj 	 * and add ourselves to it.
662626Swnj 	 */
672626Swnj 	if (bp->b_cylin < ap->b_cylin) {
682626Swnj 		while (ap->av_forw) {
692626Swnj 			/*
702626Swnj 			 * Check for an ``inversion'' in the
712626Swnj 			 * normally ascending cylinder numbers,
722626Swnj 			 * indicating the start of the second request list.
732626Swnj 			 */
742626Swnj 			if (ap->av_forw->b_cylin < ap->b_cylin) {
752626Swnj 				/*
762626Swnj 				 * Search the second request list
772626Swnj 				 * for the first request at a larger
782626Swnj 				 * cylinder number.  We go before that;
792626Swnj 				 * if there is no such request, we go at end.
802626Swnj 				 */
812626Swnj 				do {
822626Swnj 					if (bp->b_cylin < ap->av_forw->b_cylin)
832626Swnj 						goto insert;
8432573Skarels 					if (bp->b_cylin == ap->av_forw->b_cylin &&
8532573Skarels 					    bp->b_blkno < ap->av_forw->b_blkno)
8632573Skarels 						goto insert;
872626Swnj 					ap = ap->av_forw;
882626Swnj 				} while (ap->av_forw);
892626Swnj 				goto insert;		/* after last */
902626Swnj 			}
912626Swnj 			ap = ap->av_forw;
9216Sbill 		}
932626Swnj 		/*
942626Swnj 		 * No inversions... we will go after the last, and
952626Swnj 		 * be the first request in the second request list.
962626Swnj 		 */
972626Swnj 		goto insert;
9816Sbill 	}
992626Swnj 	/*
1002626Swnj 	 * Request is at/after the current request...
1012626Swnj 	 * sort in the first request list.
1022626Swnj 	 */
1032626Swnj 	while (ap->av_forw) {
1042626Swnj 		/*
1052626Swnj 		 * We want to go after the current request
1062626Swnj 		 * if there is an inversion after it (i.e. it is
1072626Swnj 		 * the end of the first request list), or if
1082626Swnj 		 * the next request is a larger cylinder than our request.
1092626Swnj 		 */
1102626Swnj 		if (ap->av_forw->b_cylin < ap->b_cylin ||
11132573Skarels 		    bp->b_cylin < ap->av_forw->b_cylin ||
11232573Skarels 		    (bp->b_cylin == ap->av_forw->b_cylin &&
11332573Skarels 		    bp->b_blkno < ap->av_forw->b_blkno))
1142626Swnj 			goto insert;
1152626Swnj 		ap = ap->av_forw;
1162626Swnj 	}
1172626Swnj 	/*
1182626Swnj 	 * Neither a second list nor a larger
1192626Swnj 	 * request... we go at the end of the first list,
1202626Swnj 	 * which is the same as the end of the whole schebang.
1212626Swnj 	 */
1222626Swnj insert:
1232626Swnj 	bp->av_forw = ap->av_forw;
1242626Swnj 	ap->av_forw = bp;
1252626Swnj 	if (ap == dp->b_actl)
12616Sbill 		dp->b_actl = bp;
12716Sbill }
12830533Skarels 
12930533Skarels /*
13030740Skarels  * Attempt to read a disk label from a device
13130740Skarels  * using the indicated stategy routine.
13230740Skarels  * The label must be partly set up before this:
13330740Skarels  * secpercyl and anything required in the strategy routine
13430740Skarels  * (e.g., sector size) must be filled in before calling us.
13530740Skarels  * Returns null on success and an error string on failure.
13630740Skarels  */
13730740Skarels char *
13830740Skarels readdisklabel(dev, strat, lp)
13930740Skarels 	dev_t dev;
14030740Skarels 	int (*strat)();
14130740Skarels 	register struct disklabel *lp;
14230740Skarels {
14330740Skarels 	register struct buf *bp;
14430740Skarels 	struct disklabel *dlp;
14530740Skarels 	char *msg = NULL;
14630740Skarels 
14730740Skarels 	if (lp->d_secperunit == 0)
14830740Skarels 		lp->d_secperunit = 0x1fffffff;
14930740Skarels 	lp->d_npartitions = 1;
15030740Skarels 	if (lp->d_partitions[0].p_size == 0)
15130740Skarels 		lp->d_partitions[0].p_size = 0x1fffffff;
15230740Skarels 	lp->d_partitions[0].p_offset = 0;
15330740Skarels 
15434102Skarels 	bp = geteblk((int)lp->d_secsize);
15530740Skarels 	bp->b_dev = dev;
15630740Skarels 	bp->b_blkno = LABELSECTOR;
15732068Skarels 	bp->b_bcount = lp->d_secsize;
15830740Skarels 	bp->b_flags = B_BUSY | B_READ;
15930740Skarels 	bp->b_cylin = LABELSECTOR / lp->d_secpercyl;
16030740Skarels 	(*strat)(bp);
161*37736Smckusick 	if (biowait(bp)) {
16230740Skarels 		msg = "I/O error";
16332068Skarels 	} else for (dlp = (struct disklabel *)bp->b_un.b_addr;
16432068Skarels 	    dlp <= (struct disklabel *)(bp->b_un.b_addr+DEV_BSIZE-sizeof(*dlp));
16532068Skarels 	    dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
16632068Skarels 		if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) {
16732068Skarels 			if (msg == NULL)
16832068Skarels 				msg = "no disk label";
16932068Skarels 		} else if (dkcksum(dlp) != 0)
17030740Skarels 			msg = "disk label corrupted";
17132068Skarels 		else {
17230740Skarels 			*lp = *dlp;
17332068Skarels 			msg = NULL;
17432068Skarels 			break;
17532068Skarels 		}
17630740Skarels 	}
17732068Skarels 	if (lp->d_npartitions > MAXPARTITIONS)
17832068Skarels 		lp->d_npartitions = MAXPARTITIONS;
17930740Skarels 	bp->b_flags = B_INVAL | B_AGE;
18030740Skarels 	brelse(bp);
18130740Skarels 	return (msg);
18230740Skarels }
18330740Skarels 
18430740Skarels /*
18532573Skarels  * Check new disk label for sensibility
18632573Skarels  * before setting it.
18732573Skarels  */
18832573Skarels setdisklabel(olp, nlp, openmask)
18932573Skarels 	register struct disklabel *olp, *nlp;
19032573Skarels 	u_long openmask;
19132573Skarels {
19232573Skarels 	register i;
19332573Skarels 	register struct partition *opp, *npp;
19432573Skarels 
19532573Skarels 	if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC ||
19632573Skarels 	    dkcksum(nlp) != 0)
19732573Skarels 		return (EINVAL);
19834473Smckusick 	while ((i = ffs((long)openmask)) != 0) {
19932573Skarels 		i--;
20032573Skarels 		openmask &= ~(1 << i);
20132573Skarels 		if (nlp->d_npartitions <= i)
20232573Skarels 			return (EBUSY);
20332573Skarels 		opp = &olp->d_partitions[i];
20432573Skarels 		npp = &nlp->d_partitions[i];
20532573Skarels 		if (npp->p_offset != opp->p_offset || npp->p_size < opp->p_size)
20632573Skarels 			return (EBUSY);
20732573Skarels 		/*
20832573Skarels 		 * Copy internally-set partition information
20932573Skarels 		 * if new label doesn't include it.		XXX
21032573Skarels 		 */
21132573Skarels 		if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) {
21232573Skarels 			npp->p_fstype = opp->p_fstype;
21332573Skarels 			npp->p_fsize = opp->p_fsize;
21432573Skarels 			npp->p_frag = opp->p_frag;
21532573Skarels 			npp->p_cpg = opp->p_cpg;
21632573Skarels 		}
21732573Skarels 	}
21834102Skarels  	nlp->d_checksum = 0;
21934102Skarels  	nlp->d_checksum = dkcksum(nlp);
22032573Skarels 	*olp = *nlp;
22132573Skarels 	return (0);
22232573Skarels }
22332573Skarels 
22432573Skarels /* encoding of disk minor numbers, should be elsewhere... */
22532573Skarels #define dkunit(dev)		(minor(dev) >> 3)
22632573Skarels #define dkpart(dev)		(minor(dev) & 07)
22732573Skarels #define dkminor(unit, part)	(((unit) << 3) | (part))
22832573Skarels 
22932573Skarels /*
23032573Skarels  * Write disk label back to device after modification.
23132573Skarels  */
23232573Skarels writedisklabel(dev, strat, lp)
23332573Skarels 	dev_t dev;
23432573Skarels 	int (*strat)();
23532573Skarels 	register struct disklabel *lp;
23632573Skarels {
23732573Skarels 	struct buf *bp;
23832573Skarels 	struct disklabel *dlp;
23932573Skarels 	int labelpart;
24032573Skarels 	int error = 0;
24132573Skarels 
24232573Skarels 	labelpart = dkpart(dev);
24332573Skarels 	if (lp->d_partitions[labelpart].p_offset != 0) {
24432573Skarels 		if (lp->d_partitions[0].p_offset != 0)
24532573Skarels 			return (EXDEV);			/* not quite right */
24632573Skarels 		labelpart = 0;
24732573Skarels 	}
24834102Skarels 	bp = geteblk((int)lp->d_secsize);
24932573Skarels 	bp->b_dev = makedev(major(dev), dkminor(dkunit(dev), labelpart));
25032573Skarels 	bp->b_blkno = LABELSECTOR;
25132573Skarels 	bp->b_bcount = lp->d_secsize;
25232573Skarels 	bp->b_flags = B_READ;
25332573Skarels 	(*strat)(bp);
254*37736Smckusick 	if (error = biowait(bp))
25534102Skarels 		goto done;
25634102Skarels 	for (dlp = (struct disklabel *)bp->b_un.b_addr;
25734102Skarels 	    dlp <= (struct disklabel *)
25834102Skarels 	      (bp->b_un.b_addr + lp->d_secsize - sizeof(*dlp));
25934102Skarels 	    dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
26034102Skarels 		if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
26134102Skarels 		    dkcksum(dlp) == 0) {
26234102Skarels 			*dlp = *lp;
26334102Skarels 			bp->b_flags = B_WRITE;
26434102Skarels 			(*strat)(bp);
265*37736Smckusick 			error = biowait(bp);
26634102Skarels 			goto done;
26734102Skarels 		}
26832573Skarels 	}
26934102Skarels 	error = ESRCH;
27034102Skarels done:
27132573Skarels 	brelse(bp);
27232573Skarels 	return (error);
27332573Skarels }
27432573Skarels 
27532573Skarels /*
27630533Skarels  * Compute checksum for disk label.
27730533Skarels  */
27830533Skarels dkcksum(lp)
27930533Skarels 	register struct disklabel *lp;
28030533Skarels {
28130533Skarels 	register u_short *start, *end;
28230533Skarels 	register u_short sum = 0;
28330533Skarels 
28430533Skarels 	start = (u_short *)lp;
28530533Skarels 	end = (u_short *)&lp->d_partitions[lp->d_npartitions];
28630533Skarels 	while (start < end)
28730533Skarels 		sum ^= *start++;
28830533Skarels 	return (sum);
28930533Skarels }
29034535Skarels 
29134535Skarels /*
29234535Skarels  * Disk error is the preface to plaintive error messages
29334535Skarels  * about failing disk transfers.  It prints messages of the form
29434637Skarels 
29534637Skarels hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d)
29634637Skarels 
29734535Skarels  * if the offset of the error in the transfer and a disk label
29834535Skarels  * are both available.  blkdone should be -1 if the position of the error
29934535Skarels  * is unknown; the disklabel pointer may be null from drivers that have not
30034535Skarels  * been converted to use them.  The message is printed with printf
30134535Skarels  * if pri is LOG_PRINTF, otherwise it uses log at the specified priority.
30234535Skarels  * The message should be completed (with at least a newline) with printf
30334535Skarels  * or addlog, respectively.  There is no trailing space.
30434535Skarels  */
30534535Skarels diskerr(bp, dname, what, pri, blkdone, lp)
30634535Skarels 	register struct buf *bp;
30734535Skarels 	char *dname, *what;
30834535Skarels 	int pri, blkdone;
30934535Skarels 	register struct disklabel *lp;
31034535Skarels {
31134535Skarels 	int unit = dkunit(bp->b_dev), part = dkpart(bp->b_dev);
31234535Skarels 	register int (*pr)(), sn;
31334535Skarels 	char partname = 'a' + part;
31434535Skarels 	extern printf(), addlog();
31534535Skarels 
31634535Skarels 	if (pri != LOG_PRINTF) {
31734535Skarels 		log(pri, "");
31834535Skarels 		pr = addlog;
31934535Skarels 	} else
32034535Skarels 		pr = printf;
32134535Skarels 	(*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what,
32234535Skarels 	    bp->b_flags & B_READ ? "read" : "writ");
32334535Skarels 	sn = bp->b_blkno;
32434535Skarels 	if (bp->b_bcount <= DEV_BSIZE)
32534535Skarels 		(*pr)("%d", sn);
32634535Skarels 	else {
32734535Skarels 		if (blkdone >= 0) {
32834535Skarels 			sn += blkdone;
32934535Skarels 			(*pr)("%d of ", sn);
33034535Skarels 		}
33134535Skarels 		(*pr)("%d-%d", bp->b_blkno,
33234535Skarels 		    bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE);
33334535Skarels 	}
33434711Skarels 	if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) {
33534711Skarels #ifdef tahoe
33634711Skarels 		sn *= DEV_BSIZE / lp->d_secsize;		/* XXX */
33734711Skarels #endif
33834535Skarels 		sn += lp->d_partitions[part].p_offset;
33934637Skarels 		(*pr)(" (%s%d bn %d; cn %d", dname, unit, sn,
34034637Skarels 		    sn / lp->d_secpercyl);
34134637Skarels 		sn %= lp->d_secpercyl;
34235703Stef 		(*pr)(" tn %d sn %d)", sn / lp->d_nsectors, sn % lp->d_nsectors);
34334535Skarels 	}
34434535Skarels }
345