1 /* $NetBSD: subr_disk.c,v 1.70 2005/08/20 12:00:01 yamt Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1999, 2000 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1982, 1986, 1988, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Redistribution and use in source and binary forms, with or without 50 * modification, are permitted provided that the following conditions 51 * are met: 52 * 1. Redistributions of source code must retain the above copyright 53 * notice, this list of conditions and the following disclaimer. 54 * 2. Redistributions in binary form must reproduce the above copyright 55 * notice, this list of conditions and the following disclaimer in the 56 * documentation and/or other materials provided with the distribution. 57 * 3. Neither the name of the University nor the names of its contributors 58 * may be used to endorse or promote products derived from this software 59 * without specific prior written permission. 60 * 61 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 62 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 63 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 64 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 65 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 66 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 67 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 68 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 69 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 70 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 71 * SUCH DAMAGE. 72 * 73 * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94 74 */ 75 76 #include <sys/cdefs.h> 77 __KERNEL_RCSID(0, "$NetBSD: subr_disk.c,v 1.70 2005/08/20 12:00:01 yamt Exp $"); 78 79 #include "opt_compat_netbsd.h" 80 81 #include <sys/param.h> 82 #include <sys/kernel.h> 83 #include <sys/malloc.h> 84 #include <sys/buf.h> 85 #include <sys/bufq.h> 86 #include <sys/syslog.h> 87 #include <sys/disklabel.h> 88 #include <sys/disk.h> 89 #include <sys/sysctl.h> 90 #include <lib/libkern/libkern.h> 91 92 /* 93 * A global list of all disks attached to the system. May grow or 94 * shrink over time. 95 */ 96 struct disklist_head disklist = TAILQ_HEAD_INITIALIZER(disklist); 97 int disk_count; /* number of drives in global disklist */ 98 struct simplelock disklist_slock = SIMPLELOCK_INITIALIZER; 99 100 int bufq_disk_default_strat = _BUFQ_DEFAULT; 101 102 BUFQ_DEFINE(dummy, 0, NULL); /* so that bufq_strats won't be empty */ 103 104 /* 105 * Compute checksum for disk label. 106 */ 107 u_int 108 dkcksum(struct disklabel *lp) 109 { 110 u_short *start, *end; 111 u_short sum = 0; 112 113 start = (u_short *)lp; 114 end = (u_short *)&lp->d_partitions[lp->d_npartitions]; 115 while (start < end) 116 sum ^= *start++; 117 return (sum); 118 } 119 120 /* 121 * Disk error is the preface to plaintive error messages 122 * about failing disk transfers. It prints messages of the form 123 124 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d) 125 126 * if the offset of the error in the transfer and a disk label 127 * are both available. blkdone should be -1 if the position of the error 128 * is unknown; the disklabel pointer may be null from drivers that have not 129 * been converted to use them. The message is printed with printf 130 * if pri is LOG_PRINTF, otherwise it uses log at the specified priority. 131 * The message should be completed (with at least a newline) with printf 132 * or addlog, respectively. There is no trailing space. 133 */ 134 #ifndef PRIdaddr 135 #define PRIdaddr PRId64 136 #endif 137 void 138 diskerr(const struct buf *bp, const char *dname, const char *what, int pri, 139 int blkdone, const struct disklabel *lp) 140 { 141 int unit = DISKUNIT(bp->b_dev), part = DISKPART(bp->b_dev); 142 void (*pr)(const char *, ...); 143 char partname = 'a' + part; 144 daddr_t sn; 145 146 if (/*CONSTCOND*/0) 147 /* Compiler will error this is the format is wrong... */ 148 printf("%" PRIdaddr, bp->b_blkno); 149 150 if (pri != LOG_PRINTF) { 151 static const char fmt[] = ""; 152 log(pri, fmt); 153 pr = addlog; 154 } else 155 pr = printf; 156 (*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what, 157 bp->b_flags & B_READ ? "read" : "writ"); 158 sn = bp->b_blkno; 159 if (bp->b_bcount <= DEV_BSIZE) 160 (*pr)("%" PRIdaddr, sn); 161 else { 162 if (blkdone >= 0) { 163 sn += blkdone; 164 (*pr)("%" PRIdaddr " of ", sn); 165 } 166 (*pr)("%" PRIdaddr "-%" PRIdaddr "", bp->b_blkno, 167 bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE); 168 } 169 if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) { 170 sn += lp->d_partitions[part].p_offset; 171 (*pr)(" (%s%d bn %" PRIdaddr "; cn %" PRIdaddr "", 172 dname, unit, sn, sn / lp->d_secpercyl); 173 sn %= lp->d_secpercyl; 174 (*pr)(" tn %" PRIdaddr " sn %" PRIdaddr ")", 175 sn / lp->d_nsectors, sn % lp->d_nsectors); 176 } 177 } 178 179 /* 180 * Searches the disklist for the disk corresponding to the 181 * name provided. 182 */ 183 struct disk * 184 disk_find(char *name) 185 { 186 struct disk *diskp; 187 188 if ((name == NULL) || (disk_count <= 0)) 189 return (NULL); 190 191 simple_lock(&disklist_slock); 192 for (diskp = TAILQ_FIRST(&disklist); diskp != NULL; 193 diskp = TAILQ_NEXT(diskp, dk_link)) 194 if (strcmp(diskp->dk_name, name) == 0) { 195 simple_unlock(&disklist_slock); 196 return (diskp); 197 } 198 simple_unlock(&disklist_slock); 199 200 return (NULL); 201 } 202 203 static void 204 disk_init0(struct disk *diskp) 205 { 206 207 /* 208 * Initialize the wedge-related locks and other fields. 209 */ 210 lockinit(&diskp->dk_rawlock, PRIBIO, "dkrawlk", 0, 0); 211 lockinit(&diskp->dk_openlock, PRIBIO, "dkoplk", 0, 0); 212 LIST_INIT(&diskp->dk_wedges); 213 diskp->dk_nwedges = 0; 214 } 215 216 static void 217 disk_attach0(struct disk *diskp) 218 { 219 int s; 220 221 /* 222 * Allocate and initialize the disklabel structures. Note that 223 * it's not safe to sleep here, since we're probably going to be 224 * called during autoconfiguration. 225 */ 226 diskp->dk_label = malloc(sizeof(struct disklabel), M_DEVBUF, M_NOWAIT); 227 diskp->dk_cpulabel = malloc(sizeof(struct cpu_disklabel), M_DEVBUF, 228 M_NOWAIT); 229 if ((diskp->dk_label == NULL) || (diskp->dk_cpulabel == NULL)) 230 panic("disk_attach: can't allocate storage for disklabel"); 231 232 memset(diskp->dk_label, 0, sizeof(struct disklabel)); 233 memset(diskp->dk_cpulabel, 0, sizeof(struct cpu_disklabel)); 234 235 /* 236 * Set the attached timestamp. 237 */ 238 s = splclock(); 239 diskp->dk_attachtime = mono_time; 240 splx(s); 241 242 /* 243 * Link into the disklist. 244 */ 245 simple_lock(&disklist_slock); 246 TAILQ_INSERT_TAIL(&disklist, diskp, dk_link); 247 disk_count++; 248 simple_unlock(&disklist_slock); 249 } 250 251 static void 252 disk_detach0(struct disk *diskp) 253 { 254 255 /* 256 * Remove from the disklist. 257 */ 258 if (disk_count == 0) 259 panic("disk_detach: disk_count == 0"); 260 simple_lock(&disklist_slock); 261 TAILQ_REMOVE(&disklist, diskp, dk_link); 262 disk_count--; 263 simple_unlock(&disklist_slock); 264 265 /* 266 * Free the space used by the disklabel structures. 267 */ 268 free(diskp->dk_label, M_DEVBUF); 269 free(diskp->dk_cpulabel, M_DEVBUF); 270 } 271 272 /* 273 * Attach a disk. 274 */ 275 void 276 disk_attach(struct disk *diskp) 277 { 278 279 disk_init0(diskp); 280 disk_attach0(diskp); 281 } 282 283 /* 284 * Detach a disk. 285 */ 286 void 287 disk_detach(struct disk *diskp) 288 { 289 290 (void) lockmgr(&diskp->dk_openlock, LK_DRAIN, NULL); 291 disk_detach0(diskp); 292 } 293 294 /* 295 * Initialize a pseudo disk. 296 */ 297 void 298 pseudo_disk_init(struct disk *diskp) 299 { 300 301 disk_init0(diskp); 302 } 303 304 /* 305 * Attach a pseudo disk. 306 */ 307 void 308 pseudo_disk_attach(struct disk *diskp) 309 { 310 311 disk_attach0(diskp); 312 } 313 314 /* 315 * Detach a pseudo disk. 316 */ 317 void 318 pseudo_disk_detach(struct disk *diskp) 319 { 320 321 disk_detach0(diskp); 322 } 323 324 325 /* 326 * Increment a disk's busy counter. If the counter is going from 327 * 0 to 1, set the timestamp. 328 */ 329 void 330 disk_busy(struct disk *diskp) 331 { 332 int s; 333 334 /* 335 * XXX We'd like to use something as accurate as microtime(), 336 * but that doesn't depend on the system TOD clock. 337 */ 338 if (diskp->dk_busy++ == 0) { 339 s = splclock(); 340 diskp->dk_timestamp = mono_time; 341 splx(s); 342 } 343 } 344 345 /* 346 * Decrement a disk's busy counter, increment the byte count, total busy 347 * time, and reset the timestamp. 348 */ 349 void 350 disk_unbusy(struct disk *diskp, long bcount, int read) 351 { 352 int s; 353 struct timeval dv_time, diff_time; 354 355 if (diskp->dk_busy-- == 0) { 356 printf("%s: dk_busy < 0\n", diskp->dk_name); 357 panic("disk_unbusy"); 358 } 359 360 s = splclock(); 361 dv_time = mono_time; 362 splx(s); 363 364 timersub(&dv_time, &diskp->dk_timestamp, &diff_time); 365 timeradd(&diskp->dk_time, &diff_time, &diskp->dk_time); 366 367 diskp->dk_timestamp = dv_time; 368 if (bcount > 0) { 369 if (read) { 370 diskp->dk_rbytes += bcount; 371 diskp->dk_rxfer++; 372 } else { 373 diskp->dk_wbytes += bcount; 374 diskp->dk_wxfer++; 375 } 376 } 377 } 378 379 /* 380 * Reset the metrics counters on the given disk. Note that we cannot 381 * reset the busy counter, as it may case a panic in disk_unbusy(). 382 * We also must avoid playing with the timestamp information, as it 383 * may skew any pending transfer results. 384 */ 385 void 386 disk_resetstat(struct disk *diskp) 387 { 388 int s = splbio(), t; 389 390 diskp->dk_rxfer = 0; 391 diskp->dk_rbytes = 0; 392 diskp->dk_wxfer = 0; 393 diskp->dk_wbytes = 0; 394 395 t = splclock(); 396 diskp->dk_attachtime = mono_time; 397 splx(t); 398 399 timerclear(&diskp->dk_time); 400 401 splx(s); 402 } 403 404 int 405 sysctl_hw_disknames(SYSCTLFN_ARGS) 406 { 407 char bf[DK_DISKNAMELEN + 1]; 408 char *where = oldp; 409 struct disk *diskp; 410 size_t needed, left, slen; 411 int error, first; 412 413 if (newp != NULL) 414 return (EPERM); 415 if (namelen != 0) 416 return (EINVAL); 417 418 first = 1; 419 error = 0; 420 needed = 0; 421 left = *oldlenp; 422 423 simple_lock(&disklist_slock); 424 for (diskp = TAILQ_FIRST(&disklist); diskp != NULL; 425 diskp = TAILQ_NEXT(diskp, dk_link)) { 426 if (where == NULL) 427 needed += strlen(diskp->dk_name) + 1; 428 else { 429 memset(bf, 0, sizeof(bf)); 430 if (first) { 431 strncpy(bf, diskp->dk_name, sizeof(bf)); 432 first = 0; 433 } else { 434 bf[0] = ' '; 435 strncpy(bf + 1, diskp->dk_name, 436 sizeof(bf) - 1); 437 } 438 bf[DK_DISKNAMELEN] = '\0'; 439 slen = strlen(bf); 440 if (left < slen + 1) 441 break; 442 /* +1 to copy out the trailing NUL byte */ 443 error = copyout(bf, where, slen + 1); 444 if (error) 445 break; 446 where += slen; 447 needed += slen; 448 left -= slen; 449 } 450 } 451 simple_unlock(&disklist_slock); 452 *oldlenp = needed; 453 return (error); 454 } 455 456 int 457 sysctl_hw_diskstats(SYSCTLFN_ARGS) 458 { 459 struct disk_sysctl sdisk; 460 struct disk *diskp; 461 char *where = oldp; 462 size_t tocopy, left; 463 int error; 464 465 if (newp != NULL) 466 return (EPERM); 467 468 /* 469 * The original hw.diskstats call was broken and did not require 470 * the userland to pass in it's size of struct disk_sysctl. This 471 * was fixed after NetBSD 1.6 was released, and any applications 472 * that do not pass in the size are given an error only, unless 473 * we care about 1.6 compatibility. 474 */ 475 if (namelen == 0) 476 #ifdef COMPAT_16 477 tocopy = offsetof(struct disk_sysctl, dk_rxfer); 478 #else 479 return (EINVAL); 480 #endif 481 else 482 tocopy = name[0]; 483 484 if (where == NULL) { 485 *oldlenp = disk_count * tocopy; 486 return (0); 487 } 488 489 error = 0; 490 left = *oldlenp; 491 memset(&sdisk, 0, sizeof(sdisk)); 492 *oldlenp = 0; 493 494 simple_lock(&disklist_slock); 495 TAILQ_FOREACH(diskp, &disklist, dk_link) { 496 if (left < tocopy) 497 break; 498 strncpy(sdisk.dk_name, diskp->dk_name, sizeof(sdisk.dk_name)); 499 sdisk.dk_xfer = diskp->dk_rxfer + diskp->dk_wxfer; 500 sdisk.dk_rxfer = diskp->dk_rxfer; 501 sdisk.dk_wxfer = diskp->dk_wxfer; 502 sdisk.dk_seek = diskp->dk_seek; 503 sdisk.dk_bytes = diskp->dk_rbytes + diskp->dk_wbytes; 504 sdisk.dk_rbytes = diskp->dk_rbytes; 505 sdisk.dk_wbytes = diskp->dk_wbytes; 506 sdisk.dk_attachtime_sec = diskp->dk_attachtime.tv_sec; 507 sdisk.dk_attachtime_usec = diskp->dk_attachtime.tv_usec; 508 sdisk.dk_timestamp_sec = diskp->dk_timestamp.tv_sec; 509 sdisk.dk_timestamp_usec = diskp->dk_timestamp.tv_usec; 510 sdisk.dk_time_sec = diskp->dk_time.tv_sec; 511 sdisk.dk_time_usec = diskp->dk_time.tv_usec; 512 sdisk.dk_busy = diskp->dk_busy; 513 514 error = copyout(&sdisk, where, min(tocopy, sizeof(sdisk))); 515 if (error) 516 break; 517 where += tocopy; 518 *oldlenp += tocopy; 519 left -= tocopy; 520 } 521 simple_unlock(&disklist_slock); 522 return (error); 523 } 524 525 /* 526 * Create a device buffer queue. 527 */ 528 void 529 bufq_alloc(struct bufq_state *bufq, int flags) 530 { 531 __link_set_decl(bufq_strats, const struct bufq_strat); 532 int methodid; 533 const struct bufq_strat *bsp; 534 const struct bufq_strat * const *it; 535 536 bufq->bq_flags = flags; 537 methodid = flags & BUFQ_METHOD_MASK; 538 539 switch (flags & BUFQ_SORT_MASK) { 540 case BUFQ_SORT_RAWBLOCK: 541 case BUFQ_SORT_CYLINDER: 542 break; 543 case 0: 544 if (methodid == BUFQ_FCFS) 545 break; 546 /* FALLTHROUGH */ 547 default: 548 panic("bufq_alloc: sort out of range"); 549 } 550 551 /* 552 * select strategy. 553 * if a strategy specified by flags is found, use it. 554 * otherwise, select one with the largest id number. XXX 555 */ 556 bsp = NULL; 557 __link_set_foreach(it, bufq_strats) { 558 if ((*it) == &bufq_strat_dummy) 559 continue; 560 if (methodid == (*it)->bs_id) { 561 bsp = *it; 562 break; 563 } 564 if (bsp == NULL || (*it)->bs_id > bsp->bs_id) 565 bsp = *it; 566 } 567 568 KASSERT(bsp != NULL); 569 #ifdef DEBUG 570 if (bsp->bs_id != methodid && methodid != _BUFQ_DEFAULT) 571 printf("bufq_alloc: method 0x%04x is not available.\n", 572 methodid); 573 #endif 574 #ifdef BUFQ_DEBUG 575 /* XXX aprint? */ 576 printf("bufq_alloc: using %s\n", bsp->bs_name); 577 #endif 578 (*bsp->bs_initfn)(bufq); 579 } 580 581 /* 582 * Drain a device buffer queue. 583 */ 584 void 585 bufq_drain(struct bufq_state *bufq) 586 { 587 struct buf *bp; 588 589 while ((bp = BUFQ_GET(bufq)) != NULL) { 590 bp->b_error = EIO; 591 bp->b_flags |= B_ERROR; 592 bp->b_resid = bp->b_bcount; 593 biodone(bp); 594 } 595 } 596 597 /* 598 * Destroy a device buffer queue. 599 */ 600 void 601 bufq_free(struct bufq_state *bufq) 602 { 603 604 KASSERT(bufq->bq_private != NULL); 605 KASSERT(BUFQ_PEEK(bufq) == NULL); 606 607 FREE(bufq->bq_private, M_DEVBUF); 608 bufq->bq_get = NULL; 609 bufq->bq_put = NULL; 610 } 611 612 /* 613 * Bounds checking against the media size, used for the raw partition. 614 * The sector size passed in should currently always be DEV_BSIZE, 615 * and the media size the size of the device in DEV_BSIZE sectors. 616 */ 617 int 618 bounds_check_with_mediasize(struct buf *bp, int secsize, u_int64_t mediasize) 619 { 620 int64_t sz; 621 622 sz = howmany(bp->b_bcount, secsize); 623 624 if (bp->b_blkno + sz > mediasize) { 625 sz = mediasize - bp->b_blkno; 626 if (sz == 0) { 627 /* If exactly at end of disk, return EOF. */ 628 bp->b_resid = bp->b_bcount; 629 goto done; 630 } 631 if (sz < 0) { 632 /* If past end of disk, return EINVAL. */ 633 bp->b_error = EINVAL; 634 goto bad; 635 } 636 /* Otherwise, truncate request. */ 637 bp->b_bcount = sz << DEV_BSHIFT; 638 } 639 640 return 1; 641 642 bad: 643 bp->b_flags |= B_ERROR; 644 done: 645 return 0; 646 } 647