1 /* $NetBSD: subr_disk.c,v 1.69 2005/05/29 22:24:15 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1999, 2000 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1982, 1986, 1988, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Redistribution and use in source and binary forms, with or without 50 * modification, are permitted provided that the following conditions 51 * are met: 52 * 1. Redistributions of source code must retain the above copyright 53 * notice, this list of conditions and the following disclaimer. 54 * 2. Redistributions in binary form must reproduce the above copyright 55 * notice, this list of conditions and the following disclaimer in the 56 * documentation and/or other materials provided with the distribution. 57 * 3. Neither the name of the University nor the names of its contributors 58 * may be used to endorse or promote products derived from this software 59 * without specific prior written permission. 60 * 61 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 62 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 63 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 64 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 65 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 66 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 67 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 68 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 69 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 70 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 71 * SUCH DAMAGE. 72 * 73 * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94 74 */ 75 76 #include <sys/cdefs.h> 77 __KERNEL_RCSID(0, "$NetBSD: subr_disk.c,v 1.69 2005/05/29 22:24:15 christos Exp $"); 78 79 #include "opt_compat_netbsd.h" 80 81 #include <sys/param.h> 82 #include <sys/kernel.h> 83 #include <sys/malloc.h> 84 #include <sys/buf.h> 85 #include <sys/bufq.h> 86 #include <sys/syslog.h> 87 #include <sys/disklabel.h> 88 #include <sys/disk.h> 89 #include <sys/sysctl.h> 90 #include <lib/libkern/libkern.h> 91 92 /* 93 * A global list of all disks attached to the system. May grow or 94 * shrink over time. 95 */ 96 struct disklist_head disklist = TAILQ_HEAD_INITIALIZER(disklist); 97 int disk_count; /* number of drives in global disklist */ 98 struct simplelock disklist_slock = SIMPLELOCK_INITIALIZER; 99 100 int bufq_disk_default_strat = _BUFQ_DEFAULT; 101 102 BUFQ_DEFINE(dummy, 0, NULL); /* so that bufq_strats won't be empty */ 103 104 /* 105 * Compute checksum for disk label. 106 */ 107 u_int 108 dkcksum(struct disklabel *lp) 109 { 110 u_short *start, *end; 111 u_short sum = 0; 112 113 start = (u_short *)lp; 114 end = (u_short *)&lp->d_partitions[lp->d_npartitions]; 115 while (start < end) 116 sum ^= *start++; 117 return (sum); 118 } 119 120 /* 121 * Disk error is the preface to plaintive error messages 122 * about failing disk transfers. It prints messages of the form 123 124 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d) 125 126 * if the offset of the error in the transfer and a disk label 127 * are both available. blkdone should be -1 if the position of the error 128 * is unknown; the disklabel pointer may be null from drivers that have not 129 * been converted to use them. The message is printed with printf 130 * if pri is LOG_PRINTF, otherwise it uses log at the specified priority. 131 * The message should be completed (with at least a newline) with printf 132 * or addlog, respectively. There is no trailing space. 133 */ 134 #ifndef PRIdaddr 135 #define PRIdaddr PRId64 136 #endif 137 void 138 diskerr(const struct buf *bp, const char *dname, const char *what, int pri, 139 int blkdone, const struct disklabel *lp) 140 { 141 int unit = DISKUNIT(bp->b_dev), part = DISKPART(bp->b_dev); 142 void (*pr)(const char *, ...); 143 char partname = 'a' + part; 144 daddr_t sn; 145 146 if (/*CONSTCOND*/0) 147 /* Compiler will error this is the format is wrong... */ 148 printf("%" PRIdaddr, bp->b_blkno); 149 150 if (pri != LOG_PRINTF) { 151 static const char fmt[] = ""; 152 log(pri, fmt); 153 pr = addlog; 154 } else 155 pr = printf; 156 (*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what, 157 bp->b_flags & B_READ ? "read" : "writ"); 158 sn = bp->b_blkno; 159 if (bp->b_bcount <= DEV_BSIZE) 160 (*pr)("%" PRIdaddr, sn); 161 else { 162 if (blkdone >= 0) { 163 sn += blkdone; 164 (*pr)("%" PRIdaddr " of ", sn); 165 } 166 (*pr)("%" PRIdaddr "-%" PRIdaddr "", bp->b_blkno, 167 bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE); 168 } 169 if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) { 170 sn += lp->d_partitions[part].p_offset; 171 (*pr)(" (%s%d bn %" PRIdaddr "; cn %" PRIdaddr "", 172 dname, unit, sn, sn / lp->d_secpercyl); 173 sn %= lp->d_secpercyl; 174 (*pr)(" tn %" PRIdaddr " sn %" PRIdaddr ")", 175 sn / lp->d_nsectors, sn % lp->d_nsectors); 176 } 177 } 178 179 /* 180 * Searches the disklist for the disk corresponding to the 181 * name provided. 182 */ 183 struct disk * 184 disk_find(char *name) 185 { 186 struct disk *diskp; 187 188 if ((name == NULL) || (disk_count <= 0)) 189 return (NULL); 190 191 simple_lock(&disklist_slock); 192 for (diskp = TAILQ_FIRST(&disklist); diskp != NULL; 193 diskp = TAILQ_NEXT(diskp, dk_link)) 194 if (strcmp(diskp->dk_name, name) == 0) { 195 simple_unlock(&disklist_slock); 196 return (diskp); 197 } 198 simple_unlock(&disklist_slock); 199 200 return (NULL); 201 } 202 203 /* 204 * Attach a disk. 205 */ 206 void 207 disk_attach(struct disk *diskp) 208 { 209 int s; 210 211 /* 212 * Allocate and initialize the disklabel structures. Note that 213 * it's not safe to sleep here, since we're probably going to be 214 * called during autoconfiguration. 215 */ 216 diskp->dk_label = malloc(sizeof(struct disklabel), M_DEVBUF, M_NOWAIT); 217 diskp->dk_cpulabel = malloc(sizeof(struct cpu_disklabel), M_DEVBUF, 218 M_NOWAIT); 219 if ((diskp->dk_label == NULL) || (diskp->dk_cpulabel == NULL)) 220 panic("disk_attach: can't allocate storage for disklabel"); 221 222 memset(diskp->dk_label, 0, sizeof(struct disklabel)); 223 memset(diskp->dk_cpulabel, 0, sizeof(struct cpu_disklabel)); 224 225 /* 226 * Initialize the wedge-related locks and other fields. 227 */ 228 lockinit(&diskp->dk_rawlock, PRIBIO, "dkrawlk", 0, 0); 229 lockinit(&diskp->dk_openlock, PRIBIO, "dkoplk", 0, 0); 230 LIST_INIT(&diskp->dk_wedges); 231 diskp->dk_nwedges = 0; 232 233 /* 234 * Set the attached timestamp. 235 */ 236 s = splclock(); 237 diskp->dk_attachtime = mono_time; 238 splx(s); 239 240 /* 241 * Link into the disklist. 242 */ 243 simple_lock(&disklist_slock); 244 TAILQ_INSERT_TAIL(&disklist, diskp, dk_link); 245 disk_count++; 246 simple_unlock(&disklist_slock); 247 } 248 249 /* 250 * Detach a disk. 251 */ 252 void 253 disk_detach(struct disk *diskp) 254 { 255 256 (void) lockmgr(&diskp->dk_openlock, LK_DRAIN, NULL); 257 258 /* 259 * Remove from the disklist. 260 */ 261 if (disk_count == 0) 262 panic("disk_detach: disk_count == 0"); 263 simple_lock(&disklist_slock); 264 TAILQ_REMOVE(&disklist, diskp, dk_link); 265 disk_count--; 266 simple_unlock(&disklist_slock); 267 268 /* 269 * Free the space used by the disklabel structures. 270 */ 271 free(diskp->dk_label, M_DEVBUF); 272 free(diskp->dk_cpulabel, M_DEVBUF); 273 } 274 275 /* 276 * Increment a disk's busy counter. If the counter is going from 277 * 0 to 1, set the timestamp. 278 */ 279 void 280 disk_busy(struct disk *diskp) 281 { 282 int s; 283 284 /* 285 * XXX We'd like to use something as accurate as microtime(), 286 * but that doesn't depend on the system TOD clock. 287 */ 288 if (diskp->dk_busy++ == 0) { 289 s = splclock(); 290 diskp->dk_timestamp = mono_time; 291 splx(s); 292 } 293 } 294 295 /* 296 * Decrement a disk's busy counter, increment the byte count, total busy 297 * time, and reset the timestamp. 298 */ 299 void 300 disk_unbusy(struct disk *diskp, long bcount, int read) 301 { 302 int s; 303 struct timeval dv_time, diff_time; 304 305 if (diskp->dk_busy-- == 0) { 306 printf("%s: dk_busy < 0\n", diskp->dk_name); 307 panic("disk_unbusy"); 308 } 309 310 s = splclock(); 311 dv_time = mono_time; 312 splx(s); 313 314 timersub(&dv_time, &diskp->dk_timestamp, &diff_time); 315 timeradd(&diskp->dk_time, &diff_time, &diskp->dk_time); 316 317 diskp->dk_timestamp = dv_time; 318 if (bcount > 0) { 319 if (read) { 320 diskp->dk_rbytes += bcount; 321 diskp->dk_rxfer++; 322 } else { 323 diskp->dk_wbytes += bcount; 324 diskp->dk_wxfer++; 325 } 326 } 327 } 328 329 /* 330 * Reset the metrics counters on the given disk. Note that we cannot 331 * reset the busy counter, as it may case a panic in disk_unbusy(). 332 * We also must avoid playing with the timestamp information, as it 333 * may skew any pending transfer results. 334 */ 335 void 336 disk_resetstat(struct disk *diskp) 337 { 338 int s = splbio(), t; 339 340 diskp->dk_rxfer = 0; 341 diskp->dk_rbytes = 0; 342 diskp->dk_wxfer = 0; 343 diskp->dk_wbytes = 0; 344 345 t = splclock(); 346 diskp->dk_attachtime = mono_time; 347 splx(t); 348 349 timerclear(&diskp->dk_time); 350 351 splx(s); 352 } 353 354 int 355 sysctl_hw_disknames(SYSCTLFN_ARGS) 356 { 357 char bf[DK_DISKNAMELEN + 1]; 358 char *where = oldp; 359 struct disk *diskp; 360 size_t needed, left, slen; 361 int error, first; 362 363 if (newp != NULL) 364 return (EPERM); 365 if (namelen != 0) 366 return (EINVAL); 367 368 first = 1; 369 error = 0; 370 needed = 0; 371 left = *oldlenp; 372 373 simple_lock(&disklist_slock); 374 for (diskp = TAILQ_FIRST(&disklist); diskp != NULL; 375 diskp = TAILQ_NEXT(diskp, dk_link)) { 376 if (where == NULL) 377 needed += strlen(diskp->dk_name) + 1; 378 else { 379 memset(bf, 0, sizeof(bf)); 380 if (first) { 381 strncpy(bf, diskp->dk_name, sizeof(bf)); 382 first = 0; 383 } else { 384 bf[0] = ' '; 385 strncpy(bf + 1, diskp->dk_name, 386 sizeof(bf) - 1); 387 } 388 bf[DK_DISKNAMELEN] = '\0'; 389 slen = strlen(bf); 390 if (left < slen + 1) 391 break; 392 /* +1 to copy out the trailing NUL byte */ 393 error = copyout(bf, where, slen + 1); 394 if (error) 395 break; 396 where += slen; 397 needed += slen; 398 left -= slen; 399 } 400 } 401 simple_unlock(&disklist_slock); 402 *oldlenp = needed; 403 return (error); 404 } 405 406 int 407 sysctl_hw_diskstats(SYSCTLFN_ARGS) 408 { 409 struct disk_sysctl sdisk; 410 struct disk *diskp; 411 char *where = oldp; 412 size_t tocopy, left; 413 int error; 414 415 if (newp != NULL) 416 return (EPERM); 417 418 /* 419 * The original hw.diskstats call was broken and did not require 420 * the userland to pass in it's size of struct disk_sysctl. This 421 * was fixed after NetBSD 1.6 was released, and any applications 422 * that do not pass in the size are given an error only, unless 423 * we care about 1.6 compatibility. 424 */ 425 if (namelen == 0) 426 #ifdef COMPAT_16 427 tocopy = offsetof(struct disk_sysctl, dk_rxfer); 428 #else 429 return (EINVAL); 430 #endif 431 else 432 tocopy = name[0]; 433 434 if (where == NULL) { 435 *oldlenp = disk_count * tocopy; 436 return (0); 437 } 438 439 error = 0; 440 left = *oldlenp; 441 memset(&sdisk, 0, sizeof(sdisk)); 442 *oldlenp = 0; 443 444 simple_lock(&disklist_slock); 445 TAILQ_FOREACH(diskp, &disklist, dk_link) { 446 if (left < tocopy) 447 break; 448 strncpy(sdisk.dk_name, diskp->dk_name, sizeof(sdisk.dk_name)); 449 sdisk.dk_xfer = diskp->dk_rxfer + diskp->dk_wxfer; 450 sdisk.dk_rxfer = diskp->dk_rxfer; 451 sdisk.dk_wxfer = diskp->dk_wxfer; 452 sdisk.dk_seek = diskp->dk_seek; 453 sdisk.dk_bytes = diskp->dk_rbytes + diskp->dk_wbytes; 454 sdisk.dk_rbytes = diskp->dk_rbytes; 455 sdisk.dk_wbytes = diskp->dk_wbytes; 456 sdisk.dk_attachtime_sec = diskp->dk_attachtime.tv_sec; 457 sdisk.dk_attachtime_usec = diskp->dk_attachtime.tv_usec; 458 sdisk.dk_timestamp_sec = diskp->dk_timestamp.tv_sec; 459 sdisk.dk_timestamp_usec = diskp->dk_timestamp.tv_usec; 460 sdisk.dk_time_sec = diskp->dk_time.tv_sec; 461 sdisk.dk_time_usec = diskp->dk_time.tv_usec; 462 sdisk.dk_busy = diskp->dk_busy; 463 464 error = copyout(&sdisk, where, min(tocopy, sizeof(sdisk))); 465 if (error) 466 break; 467 where += tocopy; 468 *oldlenp += tocopy; 469 left -= tocopy; 470 } 471 simple_unlock(&disklist_slock); 472 return (error); 473 } 474 475 /* 476 * Create a device buffer queue. 477 */ 478 void 479 bufq_alloc(struct bufq_state *bufq, int flags) 480 { 481 __link_set_decl(bufq_strats, const struct bufq_strat); 482 int methodid; 483 const struct bufq_strat *bsp; 484 const struct bufq_strat * const *it; 485 486 bufq->bq_flags = flags; 487 methodid = flags & BUFQ_METHOD_MASK; 488 489 switch (flags & BUFQ_SORT_MASK) { 490 case BUFQ_SORT_RAWBLOCK: 491 case BUFQ_SORT_CYLINDER: 492 break; 493 case 0: 494 if (methodid == BUFQ_FCFS) 495 break; 496 /* FALLTHROUGH */ 497 default: 498 panic("bufq_alloc: sort out of range"); 499 } 500 501 /* 502 * select strategy. 503 * if a strategy specified by flags is found, use it. 504 * otherwise, select one with the largest id number. XXX 505 */ 506 bsp = NULL; 507 __link_set_foreach(it, bufq_strats) { 508 if ((*it) == &bufq_strat_dummy) 509 continue; 510 if (methodid == (*it)->bs_id) { 511 bsp = *it; 512 break; 513 } 514 if (bsp == NULL || (*it)->bs_id > bsp->bs_id) 515 bsp = *it; 516 } 517 518 KASSERT(bsp != NULL); 519 #ifdef DEBUG 520 if (bsp->bs_id != methodid && methodid != _BUFQ_DEFAULT) 521 printf("bufq_alloc: method 0x%04x is not available.\n", 522 methodid); 523 #endif 524 #ifdef BUFQ_DEBUG 525 /* XXX aprint? */ 526 printf("bufq_alloc: using %s\n", bsp->bs_name); 527 #endif 528 (*bsp->bs_initfn)(bufq); 529 } 530 531 /* 532 * Drain a device buffer queue. 533 */ 534 void 535 bufq_drain(struct bufq_state *bufq) 536 { 537 struct buf *bp; 538 539 while ((bp = BUFQ_GET(bufq)) != NULL) { 540 bp->b_error = EIO; 541 bp->b_flags |= B_ERROR; 542 bp->b_resid = bp->b_bcount; 543 biodone(bp); 544 } 545 } 546 547 /* 548 * Destroy a device buffer queue. 549 */ 550 void 551 bufq_free(struct bufq_state *bufq) 552 { 553 554 KASSERT(bufq->bq_private != NULL); 555 KASSERT(BUFQ_PEEK(bufq) == NULL); 556 557 FREE(bufq->bq_private, M_DEVBUF); 558 bufq->bq_get = NULL; 559 bufq->bq_put = NULL; 560 } 561 562 /* 563 * Bounds checking against the media size, used for the raw partition. 564 * The sector size passed in should currently always be DEV_BSIZE, 565 * and the media size the size of the device in DEV_BSIZE sectors. 566 */ 567 int 568 bounds_check_with_mediasize(struct buf *bp, int secsize, u_int64_t mediasize) 569 { 570 int64_t sz; 571 572 sz = howmany(bp->b_bcount, secsize); 573 574 if (bp->b_blkno + sz > mediasize) { 575 sz = mediasize - bp->b_blkno; 576 if (sz == 0) { 577 /* If exactly at end of disk, return EOF. */ 578 bp->b_resid = bp->b_bcount; 579 goto done; 580 } 581 if (sz < 0) { 582 /* If past end of disk, return EINVAL. */ 583 bp->b_error = EINVAL; 584 goto bad; 585 } 586 /* Otherwise, truncate request. */ 587 bp->b_bcount = sz << DEV_BSHIFT; 588 } 589 590 return 1; 591 592 bad: 593 bp->b_flags |= B_ERROR; 594 done: 595 return 0; 596 } 597