123397Smckusick /* 234535Skarels * Copyright (c) 1982, 1986, 1988 Regents of the University of California. 3*37736Smckusick * All rights reserved. 423397Smckusick * 5*37736Smckusick * Redistribution and use in source and binary forms are permitted 6*37736Smckusick * provided that the above copyright notice and this paragraph are 7*37736Smckusick * duplicated in all such forms and that any documentation, 8*37736Smckusick * advertising materials, and other materials related to such 9*37736Smckusick * distribution and use acknowledge that the software was developed 10*37736Smckusick * by the University of California, Berkeley. The name of the 11*37736Smckusick * University may not be used to endorse or promote products derived 12*37736Smckusick * from this software without specific prior written permission. 13*37736Smckusick * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 14*37736Smckusick * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 15*37736Smckusick * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 16*37736Smckusick * 17*37736Smckusick * @(#)ufs_disksubr.c 7.12 (Berkeley) 05/09/89 1823397Smckusick */ 1916Sbill 2030533Skarels #include "param.h" 2130533Skarels #include "systm.h" 2230533Skarels #include "buf.h" 2330533Skarels #include "disklabel.h" 2434535Skarels #include "syslog.h" 2530740Skarels #include "user.h" 2630740Skarels 2716Sbill /* 282626Swnj * Seek sort for disks. We depend on the driver 292626Swnj * which calls us using b_resid as the current cylinder number. 302626Swnj * 312626Swnj * The argument dp structure holds a b_actf activity chain pointer 322626Swnj * on which we keep two queues, sorted in ascending cylinder order. 332626Swnj * The first queue holds those requests which are positioned after 342626Swnj * the current cylinder (in the first request); the second holds 352626Swnj * requests which came in after their cylinder number was passed. 362626Swnj * Thus we implement a one way scan, retracting after reaching the 372626Swnj * end of the drive to the first request on the second queue, 382626Swnj * at which time it becomes the first queue. 392626Swnj * 402626Swnj * A one-way scan is natural because of the way UNIX read-ahead 412626Swnj * blocks are allocated. 4216Sbill */ 4316Sbill 4416Sbill #define b_cylin b_resid 4516Sbill 4616Sbill disksort(dp, bp) 472626Swnj register struct buf *dp, *bp; 4816Sbill { 4916Sbill register struct buf *ap; 5016Sbill 512626Swnj /* 522626Swnj * If nothing on the activity queue, then 532626Swnj * we become the only thing. 542626Swnj */ 5516Sbill ap = dp->b_actf; 5616Sbill if(ap == NULL) { 5716Sbill dp->b_actf = bp; 5816Sbill dp->b_actl = bp; 5916Sbill bp->av_forw = NULL; 6016Sbill return; 6116Sbill } 622626Swnj /* 632626Swnj * If we lie after the first (currently active) 642626Swnj * request, then we must locate the second request list 652626Swnj * and add ourselves to it. 662626Swnj */ 672626Swnj if (bp->b_cylin < ap->b_cylin) { 682626Swnj while (ap->av_forw) { 692626Swnj /* 702626Swnj * Check for an ``inversion'' in the 712626Swnj * normally ascending cylinder numbers, 722626Swnj * indicating the start of the second request list. 732626Swnj */ 742626Swnj if (ap->av_forw->b_cylin < ap->b_cylin) { 752626Swnj /* 762626Swnj * Search the second request list 772626Swnj * for the first request at a larger 782626Swnj * cylinder number. We go before that; 792626Swnj * if there is no such request, we go at end. 802626Swnj */ 812626Swnj do { 822626Swnj if (bp->b_cylin < ap->av_forw->b_cylin) 832626Swnj goto insert; 8432573Skarels if (bp->b_cylin == ap->av_forw->b_cylin && 8532573Skarels bp->b_blkno < ap->av_forw->b_blkno) 8632573Skarels goto insert; 872626Swnj ap = ap->av_forw; 882626Swnj } while (ap->av_forw); 892626Swnj goto insert; /* after last */ 902626Swnj } 912626Swnj ap = ap->av_forw; 9216Sbill } 932626Swnj /* 942626Swnj * No inversions... we will go after the last, and 952626Swnj * be the first request in the second request list. 962626Swnj */ 972626Swnj goto insert; 9816Sbill } 992626Swnj /* 1002626Swnj * Request is at/after the current request... 1012626Swnj * sort in the first request list. 1022626Swnj */ 1032626Swnj while (ap->av_forw) { 1042626Swnj /* 1052626Swnj * We want to go after the current request 1062626Swnj * if there is an inversion after it (i.e. it is 1072626Swnj * the end of the first request list), or if 1082626Swnj * the next request is a larger cylinder than our request. 1092626Swnj */ 1102626Swnj if (ap->av_forw->b_cylin < ap->b_cylin || 11132573Skarels bp->b_cylin < ap->av_forw->b_cylin || 11232573Skarels (bp->b_cylin == ap->av_forw->b_cylin && 11332573Skarels bp->b_blkno < ap->av_forw->b_blkno)) 1142626Swnj goto insert; 1152626Swnj ap = ap->av_forw; 1162626Swnj } 1172626Swnj /* 1182626Swnj * Neither a second list nor a larger 1192626Swnj * request... we go at the end of the first list, 1202626Swnj * which is the same as the end of the whole schebang. 1212626Swnj */ 1222626Swnj insert: 1232626Swnj bp->av_forw = ap->av_forw; 1242626Swnj ap->av_forw = bp; 1252626Swnj if (ap == dp->b_actl) 12616Sbill dp->b_actl = bp; 12716Sbill } 12830533Skarels 12930533Skarels /* 13030740Skarels * Attempt to read a disk label from a device 13130740Skarels * using the indicated stategy routine. 13230740Skarels * The label must be partly set up before this: 13330740Skarels * secpercyl and anything required in the strategy routine 13430740Skarels * (e.g., sector size) must be filled in before calling us. 13530740Skarels * Returns null on success and an error string on failure. 13630740Skarels */ 13730740Skarels char * 13830740Skarels readdisklabel(dev, strat, lp) 13930740Skarels dev_t dev; 14030740Skarels int (*strat)(); 14130740Skarels register struct disklabel *lp; 14230740Skarels { 14330740Skarels register struct buf *bp; 14430740Skarels struct disklabel *dlp; 14530740Skarels char *msg = NULL; 14630740Skarels 14730740Skarels if (lp->d_secperunit == 0) 14830740Skarels lp->d_secperunit = 0x1fffffff; 14930740Skarels lp->d_npartitions = 1; 15030740Skarels if (lp->d_partitions[0].p_size == 0) 15130740Skarels lp->d_partitions[0].p_size = 0x1fffffff; 15230740Skarels lp->d_partitions[0].p_offset = 0; 15330740Skarels 15434102Skarels bp = geteblk((int)lp->d_secsize); 15530740Skarels bp->b_dev = dev; 15630740Skarels bp->b_blkno = LABELSECTOR; 15732068Skarels bp->b_bcount = lp->d_secsize; 15830740Skarels bp->b_flags = B_BUSY | B_READ; 15930740Skarels bp->b_cylin = LABELSECTOR / lp->d_secpercyl; 16030740Skarels (*strat)(bp); 161*37736Smckusick if (biowait(bp)) { 16230740Skarels msg = "I/O error"; 16332068Skarels } else for (dlp = (struct disklabel *)bp->b_un.b_addr; 16432068Skarels dlp <= (struct disklabel *)(bp->b_un.b_addr+DEV_BSIZE-sizeof(*dlp)); 16532068Skarels dlp = (struct disklabel *)((char *)dlp + sizeof(long))) { 16632068Skarels if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) { 16732068Skarels if (msg == NULL) 16832068Skarels msg = "no disk label"; 16932068Skarels } else if (dkcksum(dlp) != 0) 17030740Skarels msg = "disk label corrupted"; 17132068Skarels else { 17230740Skarels *lp = *dlp; 17332068Skarels msg = NULL; 17432068Skarels break; 17532068Skarels } 17630740Skarels } 17732068Skarels if (lp->d_npartitions > MAXPARTITIONS) 17832068Skarels lp->d_npartitions = MAXPARTITIONS; 17930740Skarels bp->b_flags = B_INVAL | B_AGE; 18030740Skarels brelse(bp); 18130740Skarels return (msg); 18230740Skarels } 18330740Skarels 18430740Skarels /* 18532573Skarels * Check new disk label for sensibility 18632573Skarels * before setting it. 18732573Skarels */ 18832573Skarels setdisklabel(olp, nlp, openmask) 18932573Skarels register struct disklabel *olp, *nlp; 19032573Skarels u_long openmask; 19132573Skarels { 19232573Skarels register i; 19332573Skarels register struct partition *opp, *npp; 19432573Skarels 19532573Skarels if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC || 19632573Skarels dkcksum(nlp) != 0) 19732573Skarels return (EINVAL); 19834473Smckusick while ((i = ffs((long)openmask)) != 0) { 19932573Skarels i--; 20032573Skarels openmask &= ~(1 << i); 20132573Skarels if (nlp->d_npartitions <= i) 20232573Skarels return (EBUSY); 20332573Skarels opp = &olp->d_partitions[i]; 20432573Skarels npp = &nlp->d_partitions[i]; 20532573Skarels if (npp->p_offset != opp->p_offset || npp->p_size < opp->p_size) 20632573Skarels return (EBUSY); 20732573Skarels /* 20832573Skarels * Copy internally-set partition information 20932573Skarels * if new label doesn't include it. XXX 21032573Skarels */ 21132573Skarels if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) { 21232573Skarels npp->p_fstype = opp->p_fstype; 21332573Skarels npp->p_fsize = opp->p_fsize; 21432573Skarels npp->p_frag = opp->p_frag; 21532573Skarels npp->p_cpg = opp->p_cpg; 21632573Skarels } 21732573Skarels } 21834102Skarels nlp->d_checksum = 0; 21934102Skarels nlp->d_checksum = dkcksum(nlp); 22032573Skarels *olp = *nlp; 22132573Skarels return (0); 22232573Skarels } 22332573Skarels 22432573Skarels /* encoding of disk minor numbers, should be elsewhere... */ 22532573Skarels #define dkunit(dev) (minor(dev) >> 3) 22632573Skarels #define dkpart(dev) (minor(dev) & 07) 22732573Skarels #define dkminor(unit, part) (((unit) << 3) | (part)) 22832573Skarels 22932573Skarels /* 23032573Skarels * Write disk label back to device after modification. 23132573Skarels */ 23232573Skarels writedisklabel(dev, strat, lp) 23332573Skarels dev_t dev; 23432573Skarels int (*strat)(); 23532573Skarels register struct disklabel *lp; 23632573Skarels { 23732573Skarels struct buf *bp; 23832573Skarels struct disklabel *dlp; 23932573Skarels int labelpart; 24032573Skarels int error = 0; 24132573Skarels 24232573Skarels labelpart = dkpart(dev); 24332573Skarels if (lp->d_partitions[labelpart].p_offset != 0) { 24432573Skarels if (lp->d_partitions[0].p_offset != 0) 24532573Skarels return (EXDEV); /* not quite right */ 24632573Skarels labelpart = 0; 24732573Skarels } 24834102Skarels bp = geteblk((int)lp->d_secsize); 24932573Skarels bp->b_dev = makedev(major(dev), dkminor(dkunit(dev), labelpart)); 25032573Skarels bp->b_blkno = LABELSECTOR; 25132573Skarels bp->b_bcount = lp->d_secsize; 25232573Skarels bp->b_flags = B_READ; 25332573Skarels (*strat)(bp); 254*37736Smckusick if (error = biowait(bp)) 25534102Skarels goto done; 25634102Skarels for (dlp = (struct disklabel *)bp->b_un.b_addr; 25734102Skarels dlp <= (struct disklabel *) 25834102Skarels (bp->b_un.b_addr + lp->d_secsize - sizeof(*dlp)); 25934102Skarels dlp = (struct disklabel *)((char *)dlp + sizeof(long))) { 26034102Skarels if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC && 26134102Skarels dkcksum(dlp) == 0) { 26234102Skarels *dlp = *lp; 26334102Skarels bp->b_flags = B_WRITE; 26434102Skarels (*strat)(bp); 265*37736Smckusick error = biowait(bp); 26634102Skarels goto done; 26734102Skarels } 26832573Skarels } 26934102Skarels error = ESRCH; 27034102Skarels done: 27132573Skarels brelse(bp); 27232573Skarels return (error); 27332573Skarels } 27432573Skarels 27532573Skarels /* 27630533Skarels * Compute checksum for disk label. 27730533Skarels */ 27830533Skarels dkcksum(lp) 27930533Skarels register struct disklabel *lp; 28030533Skarels { 28130533Skarels register u_short *start, *end; 28230533Skarels register u_short sum = 0; 28330533Skarels 28430533Skarels start = (u_short *)lp; 28530533Skarels end = (u_short *)&lp->d_partitions[lp->d_npartitions]; 28630533Skarels while (start < end) 28730533Skarels sum ^= *start++; 28830533Skarels return (sum); 28930533Skarels } 29034535Skarels 29134535Skarels /* 29234535Skarels * Disk error is the preface to plaintive error messages 29334535Skarels * about failing disk transfers. It prints messages of the form 29434637Skarels 29534637Skarels hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d) 29634637Skarels 29734535Skarels * if the offset of the error in the transfer and a disk label 29834535Skarels * are both available. blkdone should be -1 if the position of the error 29934535Skarels * is unknown; the disklabel pointer may be null from drivers that have not 30034535Skarels * been converted to use them. The message is printed with printf 30134535Skarels * if pri is LOG_PRINTF, otherwise it uses log at the specified priority. 30234535Skarels * The message should be completed (with at least a newline) with printf 30334535Skarels * or addlog, respectively. There is no trailing space. 30434535Skarels */ 30534535Skarels diskerr(bp, dname, what, pri, blkdone, lp) 30634535Skarels register struct buf *bp; 30734535Skarels char *dname, *what; 30834535Skarels int pri, blkdone; 30934535Skarels register struct disklabel *lp; 31034535Skarels { 31134535Skarels int unit = dkunit(bp->b_dev), part = dkpart(bp->b_dev); 31234535Skarels register int (*pr)(), sn; 31334535Skarels char partname = 'a' + part; 31434535Skarels extern printf(), addlog(); 31534535Skarels 31634535Skarels if (pri != LOG_PRINTF) { 31734535Skarels log(pri, ""); 31834535Skarels pr = addlog; 31934535Skarels } else 32034535Skarels pr = printf; 32134535Skarels (*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what, 32234535Skarels bp->b_flags & B_READ ? "read" : "writ"); 32334535Skarels sn = bp->b_blkno; 32434535Skarels if (bp->b_bcount <= DEV_BSIZE) 32534535Skarels (*pr)("%d", sn); 32634535Skarels else { 32734535Skarels if (blkdone >= 0) { 32834535Skarels sn += blkdone; 32934535Skarels (*pr)("%d of ", sn); 33034535Skarels } 33134535Skarels (*pr)("%d-%d", bp->b_blkno, 33234535Skarels bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE); 33334535Skarels } 33434711Skarels if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) { 33534711Skarels #ifdef tahoe 33634711Skarels sn *= DEV_BSIZE / lp->d_secsize; /* XXX */ 33734711Skarels #endif 33834535Skarels sn += lp->d_partitions[part].p_offset; 33934637Skarels (*pr)(" (%s%d bn %d; cn %d", dname, unit, sn, 34034637Skarels sn / lp->d_secpercyl); 34134637Skarels sn %= lp->d_secpercyl; 34235703Stef (*pr)(" tn %d sn %d)", sn / lp->d_nsectors, sn % lp->d_nsectors); 34334535Skarels } 34434535Skarels } 345