123397Smckusick /* 263376Sbostic * Copyright (c) 1982, 1986, 1988, 1993 363376Sbostic * The Regents of the University of California. All rights reserved. 423397Smckusick * 544537Sbostic * %sccs.include.redist.c% 637736Smckusick * 7*64426Sbostic * @(#)ufs_disksubr.c 8.2 (Berkeley) 09/05/93 823397Smckusick */ 916Sbill 1051508Sbostic #include <sys/param.h> 1151508Sbostic #include <sys/systm.h> 1251508Sbostic #include <sys/buf.h> 1351508Sbostic #include <sys/disklabel.h> 1451508Sbostic #include <sys/syslog.h> 1530740Skarels 1616Sbill /* 1751508Sbostic * Seek sort for disks. We depend on the driver which calls us using b_resid 1851508Sbostic * as the current cylinder number. 192626Swnj * 2051508Sbostic * The argument dp structure holds a b_actf activity chain pointer on which we 2151508Sbostic * keep two queues, sorted in ascending cylinder order. The first queue holds 2251508Sbostic * those requests which are positioned after the current cylinder (in the first 2351508Sbostic * request); the second holds requests which came in after their cylinder number 2451508Sbostic * was passed. Thus we implement a one way scan, retracting after reaching the 2551508Sbostic * end of the drive to the first request on the second queue, at which time it 2651508Sbostic * becomes the first queue. 272626Swnj * 2851508Sbostic * A one-way scan is natural because of the way UNIX read-ahead blocks are 2951508Sbostic * allocated. 3016Sbill */ 3116Sbill 32*64426Sbostic /* 33*64426Sbostic * For portability with historic industry practice, the 34*64426Sbostic * cylinder number has to be maintained in the `b_resid' 35*64426Sbostic * field. 36*64426Sbostic */ 37*64426Sbostic #define b_cylinder b_resid 3816Sbill 3951508Sbostic void 4016Sbill disksort(dp, bp) 412626Swnj register struct buf *dp, *bp; 4216Sbill { 43*64426Sbostic register struct buf *bq; 4416Sbill 45*64426Sbostic /* If the queue is empty, then it's easy. */ 46*64426Sbostic if (dp->b_actf == NULL) { 47*64426Sbostic bp->b_actf = NULL; 4816Sbill dp->b_actf = bp; 4916Sbill return; 5016Sbill } 51*64426Sbostic 522626Swnj /* 53*64426Sbostic * If we lie after the first (currently active) request, then we 54*64426Sbostic * must locate the second request list and add ourselves to it. 552626Swnj */ 56*64426Sbostic bq = dp->b_actf; 57*64426Sbostic if (bp->b_cylinder < bq->b_cylinder) { 58*64426Sbostic while (bq->b_actf) { 592626Swnj /* 60*64426Sbostic * Check for an ``inversion'' in the normally ascending 61*64426Sbostic * cylinder numbers, indicating the start of the second 62*64426Sbostic * request list. 632626Swnj */ 64*64426Sbostic if (bq->b_actf->b_cylinder < bq->b_cylinder) { 652626Swnj /* 66*64426Sbostic * Search the second request list for the first 67*64426Sbostic * request at a larger cylinder number. We go 68*64426Sbostic * before that; if there is no such request, we 69*64426Sbostic * go at end. 702626Swnj */ 712626Swnj do { 72*64426Sbostic if (bp->b_cylinder < 73*64426Sbostic bq->b_actf->b_cylinder) 742626Swnj goto insert; 75*64426Sbostic if (bp->b_cylinder == 76*64426Sbostic bq->b_actf->b_cylinder && 77*64426Sbostic bp->b_blkno < bq->b_actf->b_blkno) 7832573Skarels goto insert; 79*64426Sbostic bq = bq->b_actf; 80*64426Sbostic } while (bq->b_actf); 812626Swnj goto insert; /* after last */ 822626Swnj } 83*64426Sbostic bq = bq->b_actf; 8416Sbill } 852626Swnj /* 862626Swnj * No inversions... we will go after the last, and 872626Swnj * be the first request in the second request list. 882626Swnj */ 892626Swnj goto insert; 9016Sbill } 912626Swnj /* 922626Swnj * Request is at/after the current request... 932626Swnj * sort in the first request list. 942626Swnj */ 95*64426Sbostic while (bq->b_actf) { 962626Swnj /* 97*64426Sbostic * We want to go after the current request if there is an 98*64426Sbostic * inversion after it (i.e. it is the end of the first 99*64426Sbostic * request list), or if the next request is a larger cylinder 100*64426Sbostic * than our request. 1012626Swnj */ 102*64426Sbostic if (bq->b_actf->b_cylinder < bq->b_cylinder || 103*64426Sbostic bp->b_cylinder < bq->b_actf->b_cylinder || 104*64426Sbostic (bp->b_cylinder == bq->b_actf->b_cylinder && 105*64426Sbostic bp->b_blkno < bq->b_actf->b_blkno)) 1062626Swnj goto insert; 107*64426Sbostic bq = bq->b_actf; 1082626Swnj } 1092626Swnj /* 110*64426Sbostic * Neither a second list nor a larger request... we go at the end of 111*64426Sbostic * the first list, which is the same as the end of the whole schebang. 1122626Swnj */ 113*64426Sbostic insert: bp->b_actf = bq->b_actf; 114*64426Sbostic bq->b_actf = bp; 11516Sbill } 11630533Skarels 11730533Skarels /* 11851508Sbostic * Attempt to read a disk label from a device using the indicated stategy 11951508Sbostic * routine. The label must be partly set up before this: secpercyl and 12051508Sbostic * anything required in the strategy routine (e.g., sector size) must be 12151508Sbostic * filled in before calling us. Returns NULL on success and an error 12251508Sbostic * string on failure. 12330740Skarels */ 12430740Skarels char * 12530740Skarels readdisklabel(dev, strat, lp) 12630740Skarels dev_t dev; 12730740Skarels int (*strat)(); 12830740Skarels register struct disklabel *lp; 12930740Skarels { 13030740Skarels register struct buf *bp; 13130740Skarels struct disklabel *dlp; 13230740Skarels char *msg = NULL; 13330740Skarels 13430740Skarels if (lp->d_secperunit == 0) 13530740Skarels lp->d_secperunit = 0x1fffffff; 13630740Skarels lp->d_npartitions = 1; 13730740Skarels if (lp->d_partitions[0].p_size == 0) 13830740Skarels lp->d_partitions[0].p_size = 0x1fffffff; 13930740Skarels lp->d_partitions[0].p_offset = 0; 14030740Skarels 14134102Skarels bp = geteblk((int)lp->d_secsize); 14230740Skarels bp->b_dev = dev; 14330740Skarels bp->b_blkno = LABELSECTOR; 14432068Skarels bp->b_bcount = lp->d_secsize; 14530740Skarels bp->b_flags = B_BUSY | B_READ; 146*64426Sbostic bp->b_cylinder = LABELSECTOR / lp->d_secpercyl; 14730740Skarels (*strat)(bp); 148*64426Sbostic if (biowait(bp)) 14930740Skarels msg = "I/O error"; 150*64426Sbostic else for (dlp = (struct disklabel *)bp->b_un.b_addr; 15132068Skarels dlp <= (struct disklabel *)(bp->b_un.b_addr+DEV_BSIZE-sizeof(*dlp)); 15232068Skarels dlp = (struct disklabel *)((char *)dlp + sizeof(long))) { 15332068Skarels if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) { 15432068Skarels if (msg == NULL) 15532068Skarels msg = "no disk label"; 15639836Smckusick } else if (dlp->d_npartitions > MAXPARTITIONS || 15739836Smckusick dkcksum(dlp) != 0) 15830740Skarels msg = "disk label corrupted"; 15932068Skarels else { 16030740Skarels *lp = *dlp; 16132068Skarels msg = NULL; 16232068Skarels break; 16332068Skarels } 16430740Skarels } 16530740Skarels bp->b_flags = B_INVAL | B_AGE; 16630740Skarels brelse(bp); 16730740Skarels return (msg); 16830740Skarels } 16930740Skarels 17030740Skarels /* 17151508Sbostic * Check new disk label for sensibility before setting it. 17232573Skarels */ 17351508Sbostic int 17432573Skarels setdisklabel(olp, nlp, openmask) 17532573Skarels register struct disklabel *olp, *nlp; 17632573Skarels u_long openmask; 17732573Skarels { 17832573Skarels register i; 17932573Skarels register struct partition *opp, *npp; 18032573Skarels 18132573Skarels if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC || 18232573Skarels dkcksum(nlp) != 0) 18332573Skarels return (EINVAL); 18434473Smckusick while ((i = ffs((long)openmask)) != 0) { 18532573Skarels i--; 18632573Skarels openmask &= ~(1 << i); 18732573Skarels if (nlp->d_npartitions <= i) 18832573Skarels return (EBUSY); 18932573Skarels opp = &olp->d_partitions[i]; 19032573Skarels npp = &nlp->d_partitions[i]; 19132573Skarels if (npp->p_offset != opp->p_offset || npp->p_size < opp->p_size) 19232573Skarels return (EBUSY); 19332573Skarels /* 19432573Skarels * Copy internally-set partition information 19532573Skarels * if new label doesn't include it. XXX 19632573Skarels */ 19732573Skarels if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) { 19832573Skarels npp->p_fstype = opp->p_fstype; 19932573Skarels npp->p_fsize = opp->p_fsize; 20032573Skarels npp->p_frag = opp->p_frag; 20132573Skarels npp->p_cpg = opp->p_cpg; 20232573Skarels } 20332573Skarels } 20434102Skarels nlp->d_checksum = 0; 20534102Skarels nlp->d_checksum = dkcksum(nlp); 20632573Skarels *olp = *nlp; 20732573Skarels return (0); 20832573Skarels } 20932573Skarels 21032573Skarels /* encoding of disk minor numbers, should be elsewhere... */ 21132573Skarels #define dkunit(dev) (minor(dev) >> 3) 21232573Skarels #define dkpart(dev) (minor(dev) & 07) 21332573Skarels #define dkminor(unit, part) (((unit) << 3) | (part)) 21432573Skarels 21532573Skarels /* 21632573Skarels * Write disk label back to device after modification. 21732573Skarels */ 21851508Sbostic int 21932573Skarels writedisklabel(dev, strat, lp) 22032573Skarels dev_t dev; 22132573Skarels int (*strat)(); 22232573Skarels register struct disklabel *lp; 22332573Skarels { 22432573Skarels struct buf *bp; 22532573Skarels struct disklabel *dlp; 22632573Skarels int labelpart; 22732573Skarels int error = 0; 22832573Skarels 22932573Skarels labelpart = dkpart(dev); 23032573Skarels if (lp->d_partitions[labelpart].p_offset != 0) { 23132573Skarels if (lp->d_partitions[0].p_offset != 0) 23232573Skarels return (EXDEV); /* not quite right */ 23332573Skarels labelpart = 0; 23432573Skarels } 23534102Skarels bp = geteblk((int)lp->d_secsize); 23632573Skarels bp->b_dev = makedev(major(dev), dkminor(dkunit(dev), labelpart)); 23732573Skarels bp->b_blkno = LABELSECTOR; 23832573Skarels bp->b_bcount = lp->d_secsize; 23932573Skarels bp->b_flags = B_READ; 24032573Skarels (*strat)(bp); 24137736Smckusick if (error = biowait(bp)) 24234102Skarels goto done; 24334102Skarels for (dlp = (struct disklabel *)bp->b_un.b_addr; 24434102Skarels dlp <= (struct disklabel *) 24534102Skarels (bp->b_un.b_addr + lp->d_secsize - sizeof(*dlp)); 24634102Skarels dlp = (struct disklabel *)((char *)dlp + sizeof(long))) { 24734102Skarels if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC && 24834102Skarels dkcksum(dlp) == 0) { 24934102Skarels *dlp = *lp; 25034102Skarels bp->b_flags = B_WRITE; 25134102Skarels (*strat)(bp); 25237736Smckusick error = biowait(bp); 25334102Skarels goto done; 25434102Skarels } 25532573Skarels } 25634102Skarels error = ESRCH; 25734102Skarels done: 25832573Skarels brelse(bp); 25932573Skarels return (error); 26032573Skarels } 26132573Skarels 26232573Skarels /* 26330533Skarels * Compute checksum for disk label. 26430533Skarels */ 26530533Skarels dkcksum(lp) 26630533Skarels register struct disklabel *lp; 26730533Skarels { 26830533Skarels register u_short *start, *end; 26930533Skarels register u_short sum = 0; 27030533Skarels 27130533Skarels start = (u_short *)lp; 27230533Skarels end = (u_short *)&lp->d_partitions[lp->d_npartitions]; 27330533Skarels while (start < end) 27430533Skarels sum ^= *start++; 27530533Skarels return (sum); 27630533Skarels } 27734535Skarels 27834535Skarels /* 27934535Skarels * Disk error is the preface to plaintive error messages 28034535Skarels * about failing disk transfers. It prints messages of the form 28134637Skarels 28234637Skarels hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d) 28334637Skarels 28434535Skarels * if the offset of the error in the transfer and a disk label 28534535Skarels * are both available. blkdone should be -1 if the position of the error 28634535Skarels * is unknown; the disklabel pointer may be null from drivers that have not 28734535Skarels * been converted to use them. The message is printed with printf 28834535Skarels * if pri is LOG_PRINTF, otherwise it uses log at the specified priority. 28934535Skarels * The message should be completed (with at least a newline) with printf 29034535Skarels * or addlog, respectively. There is no trailing space. 29134535Skarels */ 29251508Sbostic void 29334535Skarels diskerr(bp, dname, what, pri, blkdone, lp) 29434535Skarels register struct buf *bp; 29534535Skarels char *dname, *what; 29634535Skarels int pri, blkdone; 29734535Skarels register struct disklabel *lp; 29834535Skarels { 29934535Skarels int unit = dkunit(bp->b_dev), part = dkpart(bp->b_dev); 30049110Skarels register void (*pr) __P((const char *, ...)); 30134535Skarels char partname = 'a' + part; 30249110Skarels int sn; 30334535Skarels 30434535Skarels if (pri != LOG_PRINTF) { 30534535Skarels log(pri, ""); 30634535Skarels pr = addlog; 30734535Skarels } else 30834535Skarels pr = printf; 30934535Skarels (*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what, 31034535Skarels bp->b_flags & B_READ ? "read" : "writ"); 31134535Skarels sn = bp->b_blkno; 31234535Skarels if (bp->b_bcount <= DEV_BSIZE) 31334535Skarels (*pr)("%d", sn); 31434535Skarels else { 31534535Skarels if (blkdone >= 0) { 31634535Skarels sn += blkdone; 31734535Skarels (*pr)("%d of ", sn); 31834535Skarels } 31934535Skarels (*pr)("%d-%d", bp->b_blkno, 32034535Skarels bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE); 32134535Skarels } 32234711Skarels if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) { 32334711Skarels #ifdef tahoe 32434711Skarels sn *= DEV_BSIZE / lp->d_secsize; /* XXX */ 32534711Skarels #endif 32634535Skarels sn += lp->d_partitions[part].p_offset; 32734637Skarels (*pr)(" (%s%d bn %d; cn %d", dname, unit, sn, 32834637Skarels sn / lp->d_secpercyl); 32934637Skarels sn %= lp->d_secpercyl; 33035703Stef (*pr)(" tn %d sn %d)", sn / lp->d_nsectors, sn % lp->d_nsectors); 33134535Skarels } 33234535Skarels } 333