1 /* $NetBSD: subr_disk.c,v 1.21 1996/10/17 16:31:56 perry Exp $ */ 2 3 /* 4 * Copyright (c) 1995 Jason R. Thorpe. All rights reserved. 5 * Copyright (c) 1982, 1986, 1988, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94 42 */ 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/malloc.h> 48 #include <sys/buf.h> 49 #include <sys/syslog.h> 50 #include <sys/time.h> 51 #include <sys/disklabel.h> 52 #include <sys/disk.h> 53 54 /* 55 * A global list of all disks attached to the system. May grow or 56 * shrink over time. 57 */ 58 struct disklist_head disklist; /* TAILQ_HEAD */ 59 int disk_count; /* number of drives in global disklist */ 60 61 /* 62 * Seek sort for disks. We depend on the driver which calls us using b_resid 63 * as the current cylinder number. 64 * 65 * The argument ap structure holds a b_actf activity chain pointer on which we 66 * keep two queues, sorted in ascending cylinder order. The first queue holds 67 * those requests which are positioned after the current cylinder (in the first 68 * request); the second holds requests which came in after their cylinder number 69 * was passed. Thus we implement a one way scan, retracting after reaching the 70 * end of the drive to the first request on the second queue, at which time it 71 * becomes the first queue. 72 * 73 * A one-way scan is natural because of the way UNIX read-ahead blocks are 74 * allocated. 75 */ 76 77 void 78 disksort(ap, bp) 79 register struct buf *ap, *bp; 80 { 81 register struct buf *bq; 82 83 /* If the queue is empty, then it's easy. */ 84 if (ap->b_actf == NULL) { 85 bp->b_actf = NULL; 86 ap->b_actf = bp; 87 return; 88 } 89 90 /* 91 * If we lie after the first (currently active) request, then we 92 * must locate the second request list and add ourselves to it. 93 */ 94 bq = ap->b_actf; 95 if (bp->b_cylinder < bq->b_cylinder) { 96 while (bq->b_actf) { 97 /* 98 * Check for an ``inversion'' in the normally ascending 99 * cylinder numbers, indicating the start of the second 100 * request list. 101 */ 102 if (bq->b_actf->b_cylinder < bq->b_cylinder) { 103 /* 104 * Search the second request list for the first 105 * request at a larger cylinder number. We go 106 * before that; if there is no such request, we 107 * go at end. 108 */ 109 do { 110 if (bp->b_cylinder < 111 bq->b_actf->b_cylinder) 112 goto insert; 113 if (bp->b_cylinder == 114 bq->b_actf->b_cylinder && 115 bp->b_blkno < bq->b_actf->b_blkno) 116 goto insert; 117 bq = bq->b_actf; 118 } while (bq->b_actf); 119 goto insert; /* after last */ 120 } 121 bq = bq->b_actf; 122 } 123 /* 124 * No inversions... we will go after the last, and 125 * be the first request in the second request list. 126 */ 127 goto insert; 128 } 129 /* 130 * Request is at/after the current request... 131 * sort in the first request list. 132 */ 133 while (bq->b_actf) { 134 /* 135 * We want to go after the current request if there is an 136 * inversion after it (i.e. it is the end of the first 137 * request list), or if the next request is a larger cylinder 138 * than our request. 139 */ 140 if (bq->b_actf->b_cylinder < bq->b_cylinder || 141 bp->b_cylinder < bq->b_actf->b_cylinder || 142 (bp->b_cylinder == bq->b_actf->b_cylinder && 143 bp->b_blkno < bq->b_actf->b_blkno)) 144 goto insert; 145 bq = bq->b_actf; 146 } 147 /* 148 * Neither a second list nor a larger request... we go at the end of 149 * the first list, which is the same as the end of the whole schebang. 150 */ 151 insert: bp->b_actf = bq->b_actf; 152 bq->b_actf = bp; 153 } 154 155 /* encoding of disk minor numbers, should be elsewhere... */ 156 #define dkunit(dev) (minor(dev) >> 3) 157 #define dkpart(dev) (minor(dev) & 07) 158 #define dkminor(unit, part) (((unit) << 3) | (part)) 159 160 /* 161 * Compute checksum for disk label. 162 */ 163 u_int 164 dkcksum(lp) 165 register struct disklabel *lp; 166 { 167 register u_short *start, *end; 168 register u_short sum = 0; 169 170 start = (u_short *)lp; 171 end = (u_short *)&lp->d_partitions[lp->d_npartitions]; 172 while (start < end) 173 sum ^= *start++; 174 return (sum); 175 } 176 177 /* 178 * Disk error is the preface to plaintive error messages 179 * about failing disk transfers. It prints messages of the form 180 181 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d) 182 183 * if the offset of the error in the transfer and a disk label 184 * are both available. blkdone should be -1 if the position of the error 185 * is unknown; the disklabel pointer may be null from drivers that have not 186 * been converted to use them. The message is printed with printf 187 * if pri is LOG_PRINTF, otherwise it uses log at the specified priority. 188 * The message should be completed (with at least a newline) with printf 189 * or addlog, respectively. There is no trailing space. 190 */ 191 void 192 diskerr(bp, dname, what, pri, blkdone, lp) 193 register struct buf *bp; 194 char *dname, *what; 195 int pri, blkdone; 196 register struct disklabel *lp; 197 { 198 int unit = dkunit(bp->b_dev), part = dkpart(bp->b_dev); 199 register void (*pr) __P((const char *, ...)); 200 char partname = 'a' + part; 201 int sn; 202 203 if (pri != LOG_PRINTF) { 204 static const char fmt[] = ""; 205 log(pri, fmt); 206 pr = addlog; 207 } else 208 pr = printf; 209 (*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what, 210 bp->b_flags & B_READ ? "read" : "writ"); 211 sn = bp->b_blkno; 212 if (bp->b_bcount <= DEV_BSIZE) 213 (*pr)("%d", sn); 214 else { 215 if (blkdone >= 0) { 216 sn += blkdone; 217 (*pr)("%d of ", sn); 218 } 219 (*pr)("%d-%d", bp->b_blkno, 220 bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE); 221 } 222 if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) { 223 sn += lp->d_partitions[part].p_offset; 224 (*pr)(" (%s%d bn %d; cn %d", dname, unit, sn, 225 sn / lp->d_secpercyl); 226 sn %= lp->d_secpercyl; 227 (*pr)(" tn %d sn %d)", sn / lp->d_nsectors, sn % lp->d_nsectors); 228 } 229 } 230 231 /* 232 * Initialize the disklist. Called by main() before autoconfiguration. 233 */ 234 void 235 disk_init() 236 { 237 238 TAILQ_INIT(&disklist); 239 disk_count = 0; 240 } 241 242 /* 243 * Searches the disklist for the disk corresponding to the 244 * name provided. 245 */ 246 struct disk * 247 disk_find(name) 248 char *name; 249 { 250 struct disk *diskp; 251 252 if ((name == NULL) || (disk_count <= 0)) 253 return (NULL); 254 255 for (diskp = disklist.tqh_first; diskp != NULL; 256 diskp = diskp->dk_link.tqe_next) 257 if (strcmp(diskp->dk_name, name) == 0) 258 return (diskp); 259 260 return (NULL); 261 } 262 263 /* 264 * Attach a disk. 265 */ 266 void 267 disk_attach(diskp) 268 struct disk *diskp; 269 { 270 int s; 271 272 /* 273 * Allocate and initialize the disklabel structures. Note that 274 * it's not safe to sleep here, since we're probably going to be 275 * called during autoconfiguration. 276 */ 277 diskp->dk_label = malloc(sizeof(struct disklabel), M_DEVBUF, M_NOWAIT); 278 diskp->dk_cpulabel = malloc(sizeof(struct cpu_disklabel), M_DEVBUF, 279 M_NOWAIT); 280 if ((diskp->dk_label == NULL) || (diskp->dk_cpulabel == NULL)) 281 panic("disk_attach: can't allocate storage for disklabel"); 282 283 bzero(diskp->dk_label, sizeof(struct disklabel)); 284 bzero(diskp->dk_cpulabel, sizeof(struct cpu_disklabel)); 285 286 /* 287 * Set the attached timestamp. 288 */ 289 s = splclock(); 290 diskp->dk_attachtime = mono_time; 291 splx(s); 292 293 /* 294 * Link into the disklist. 295 */ 296 TAILQ_INSERT_TAIL(&disklist, diskp, dk_link); 297 ++disk_count; 298 } 299 300 /* 301 * Detach a disk. 302 */ 303 void 304 disk_detach(diskp) 305 struct disk *diskp; 306 { 307 308 /* 309 * Free the space used by the disklabel structures. 310 */ 311 free(diskp->dk_label, M_DEVBUF); 312 free(diskp->dk_cpulabel, M_DEVBUF); 313 314 /* 315 * Remove from the disklist. 316 */ 317 TAILQ_REMOVE(&disklist, diskp, dk_link); 318 if (--disk_count < 0) 319 panic("disk_detach: disk_count < 0"); 320 } 321 322 /* 323 * Increment a disk's busy counter. If the counter is going from 324 * 0 to 1, set the timestamp. 325 */ 326 void 327 disk_busy(diskp) 328 struct disk *diskp; 329 { 330 int s; 331 332 /* 333 * XXX We'd like to use something as accurate as microtime(), 334 * but that doesn't depend on the system TOD clock. 335 */ 336 if (diskp->dk_busy++ == 0) { 337 s = splclock(); 338 diskp->dk_timestamp = mono_time; 339 splx(s); 340 } 341 } 342 343 /* 344 * Decrement a disk's busy counter, increment the byte count, total busy 345 * time, and reset the timestamp. 346 */ 347 void 348 disk_unbusy(diskp, bcount) 349 struct disk *diskp; 350 long bcount; 351 { 352 int s; 353 struct timeval dv_time, diff_time; 354 355 if (diskp->dk_busy-- == 0) 356 panic("disk_unbusy: %s: dk_busy < 0", diskp->dk_name); 357 358 s = splclock(); 359 dv_time = mono_time; 360 splx(s); 361 362 timersub(&dv_time, &diskp->dk_timestamp, &diff_time); 363 timeradd(&diskp->dk_time, &diff_time, &diskp->dk_time); 364 365 diskp->dk_timestamp = dv_time; 366 if (bcount > 0) { 367 diskp->dk_bytes += bcount; 368 diskp->dk_xfer++; 369 } 370 } 371 372 /* 373 * Reset the metrics counters on the given disk. Note that we cannot 374 * reset the busy counter, as it may case a panic in disk_unbusy(). 375 * We also must avoid playing with the timestamp information, as it 376 * may skew any pending transfer results. 377 */ 378 void 379 disk_resetstat(diskp) 380 struct disk *diskp; 381 { 382 int s = splbio(), t; 383 384 diskp->dk_xfer = 0; 385 diskp->dk_bytes = 0; 386 387 t = splclock(); 388 diskp->dk_attachtime = mono_time; 389 splx(t); 390 391 timerclear(&diskp->dk_time); 392 393 splx(s); 394 } 395