1 /* $NetBSD: subr_disk.c,v 1.73 2005/12/26 18:45:27 perry Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1999, 2000 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1982, 1986, 1988, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Redistribution and use in source and binary forms, with or without 50 * modification, are permitted provided that the following conditions 51 * are met: 52 * 1. Redistributions of source code must retain the above copyright 53 * notice, this list of conditions and the following disclaimer. 54 * 2. Redistributions in binary form must reproduce the above copyright 55 * notice, this list of conditions and the following disclaimer in the 56 * documentation and/or other materials provided with the distribution. 57 * 3. Neither the name of the University nor the names of its contributors 58 * may be used to endorse or promote products derived from this software 59 * without specific prior written permission. 60 * 61 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 62 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 63 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 64 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 65 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 66 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 67 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 68 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 69 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 70 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 71 * SUCH DAMAGE. 72 * 73 * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94 74 */ 75 76 #include <sys/cdefs.h> 77 __KERNEL_RCSID(0, "$NetBSD: subr_disk.c,v 1.73 2005/12/26 18:45:27 perry Exp $"); 78 79 #include "opt_compat_netbsd.h" 80 81 #include <sys/param.h> 82 #include <sys/kernel.h> 83 #include <sys/malloc.h> 84 #include <sys/buf.h> 85 #include <sys/syslog.h> 86 #include <sys/disklabel.h> 87 #include <sys/disk.h> 88 #include <sys/sysctl.h> 89 #include <lib/libkern/libkern.h> 90 91 /* 92 * A global list of all disks attached to the system. May grow or 93 * shrink over time. 94 */ 95 struct disklist_head disklist = TAILQ_HEAD_INITIALIZER(disklist); 96 int disk_count; /* number of drives in global disklist */ 97 struct simplelock disklist_slock = SIMPLELOCK_INITIALIZER; 98 99 /* 100 * Compute checksum for disk label. 101 */ 102 u_int 103 dkcksum(struct disklabel *lp) 104 { 105 u_short *start, *end; 106 u_short sum = 0; 107 108 start = (u_short *)lp; 109 end = (u_short *)&lp->d_partitions[lp->d_npartitions]; 110 while (start < end) 111 sum ^= *start++; 112 return (sum); 113 } 114 115 /* 116 * Disk error is the preface to plaintive error messages 117 * about failing disk transfers. It prints messages of the form 118 119 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d) 120 121 * if the offset of the error in the transfer and a disk label 122 * are both available. blkdone should be -1 if the position of the error 123 * is unknown; the disklabel pointer may be null from drivers that have not 124 * been converted to use them. The message is printed with printf 125 * if pri is LOG_PRINTF, otherwise it uses log at the specified priority. 126 * The message should be completed (with at least a newline) with printf 127 * or addlog, respectively. There is no trailing space. 128 */ 129 #ifndef PRIdaddr 130 #define PRIdaddr PRId64 131 #endif 132 void 133 diskerr(const struct buf *bp, const char *dname, const char *what, int pri, 134 int blkdone, const struct disklabel *lp) 135 { 136 int unit = DISKUNIT(bp->b_dev), part = DISKPART(bp->b_dev); 137 void (*pr)(const char *, ...); 138 char partname = 'a' + part; 139 daddr_t sn; 140 141 if (/*CONSTCOND*/0) 142 /* Compiler will error this is the format is wrong... */ 143 printf("%" PRIdaddr, bp->b_blkno); 144 145 if (pri != LOG_PRINTF) { 146 static const char fmt[] = ""; 147 log(pri, fmt); 148 pr = addlog; 149 } else 150 pr = printf; 151 (*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what, 152 bp->b_flags & B_READ ? "read" : "writ"); 153 sn = bp->b_blkno; 154 if (bp->b_bcount <= DEV_BSIZE) 155 (*pr)("%" PRIdaddr, sn); 156 else { 157 if (blkdone >= 0) { 158 sn += blkdone; 159 (*pr)("%" PRIdaddr " of ", sn); 160 } 161 (*pr)("%" PRIdaddr "-%" PRIdaddr "", bp->b_blkno, 162 bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE); 163 } 164 if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) { 165 sn += lp->d_partitions[part].p_offset; 166 (*pr)(" (%s%d bn %" PRIdaddr "; cn %" PRIdaddr "", 167 dname, unit, sn, sn / lp->d_secpercyl); 168 sn %= lp->d_secpercyl; 169 (*pr)(" tn %" PRIdaddr " sn %" PRIdaddr ")", 170 sn / lp->d_nsectors, sn % lp->d_nsectors); 171 } 172 } 173 174 /* 175 * Searches the disklist for the disk corresponding to the 176 * name provided. 177 */ 178 struct disk * 179 disk_find(char *name) 180 { 181 struct disk *diskp; 182 183 if ((name == NULL) || (disk_count <= 0)) 184 return (NULL); 185 186 simple_lock(&disklist_slock); 187 for (diskp = TAILQ_FIRST(&disklist); diskp != NULL; 188 diskp = TAILQ_NEXT(diskp, dk_link)) 189 if (strcmp(diskp->dk_name, name) == 0) { 190 simple_unlock(&disklist_slock); 191 return (diskp); 192 } 193 simple_unlock(&disklist_slock); 194 195 return (NULL); 196 } 197 198 static void 199 disk_init0(struct disk *diskp) 200 { 201 202 /* 203 * Initialize the wedge-related locks and other fields. 204 */ 205 lockinit(&diskp->dk_rawlock, PRIBIO, "dkrawlk", 0, 0); 206 lockinit(&diskp->dk_openlock, PRIBIO, "dkoplk", 0, 0); 207 LIST_INIT(&diskp->dk_wedges); 208 diskp->dk_nwedges = 0; 209 } 210 211 static void 212 disk_attach0(struct disk *diskp) 213 { 214 int s; 215 216 /* 217 * Allocate and initialize the disklabel structures. Note that 218 * it's not safe to sleep here, since we're probably going to be 219 * called during autoconfiguration. 220 */ 221 diskp->dk_label = malloc(sizeof(struct disklabel), M_DEVBUF, M_NOWAIT); 222 diskp->dk_cpulabel = malloc(sizeof(struct cpu_disklabel), M_DEVBUF, 223 M_NOWAIT); 224 if ((diskp->dk_label == NULL) || (diskp->dk_cpulabel == NULL)) 225 panic("disk_attach: can't allocate storage for disklabel"); 226 227 memset(diskp->dk_label, 0, sizeof(struct disklabel)); 228 memset(diskp->dk_cpulabel, 0, sizeof(struct cpu_disklabel)); 229 230 /* 231 * Set the attached timestamp. 232 */ 233 s = splclock(); 234 diskp->dk_attachtime = mono_time; 235 splx(s); 236 237 /* 238 * Link into the disklist. 239 */ 240 simple_lock(&disklist_slock); 241 TAILQ_INSERT_TAIL(&disklist, diskp, dk_link); 242 disk_count++; 243 simple_unlock(&disklist_slock); 244 } 245 246 static void 247 disk_detach0(struct disk *diskp) 248 { 249 250 /* 251 * Remove from the disklist. 252 */ 253 if (disk_count == 0) 254 panic("disk_detach: disk_count == 0"); 255 simple_lock(&disklist_slock); 256 TAILQ_REMOVE(&disklist, diskp, dk_link); 257 disk_count--; 258 simple_unlock(&disklist_slock); 259 260 /* 261 * Free the space used by the disklabel structures. 262 */ 263 free(diskp->dk_label, M_DEVBUF); 264 free(diskp->dk_cpulabel, M_DEVBUF); 265 } 266 267 /* 268 * Attach a disk. 269 */ 270 void 271 disk_attach(struct disk *diskp) 272 { 273 274 disk_init0(diskp); 275 disk_attach0(diskp); 276 } 277 278 /* 279 * Detach a disk. 280 */ 281 void 282 disk_detach(struct disk *diskp) 283 { 284 285 (void) lockmgr(&diskp->dk_openlock, LK_DRAIN, NULL); 286 disk_detach0(diskp); 287 } 288 289 /* 290 * Initialize a pseudo disk. 291 */ 292 void 293 pseudo_disk_init(struct disk *diskp) 294 { 295 296 disk_init0(diskp); 297 } 298 299 /* 300 * Attach a pseudo disk. 301 */ 302 void 303 pseudo_disk_attach(struct disk *diskp) 304 { 305 306 disk_attach0(diskp); 307 } 308 309 /* 310 * Detach a pseudo disk. 311 */ 312 void 313 pseudo_disk_detach(struct disk *diskp) 314 { 315 316 disk_detach0(diskp); 317 } 318 319 320 /* 321 * Increment a disk's busy counter. If the counter is going from 322 * 0 to 1, set the timestamp. 323 */ 324 void 325 disk_busy(struct disk *diskp) 326 { 327 int s; 328 329 /* 330 * XXX We'd like to use something as accurate as microtime(), 331 * but that doesn't depend on the system TOD clock. 332 */ 333 if (diskp->dk_busy++ == 0) { 334 s = splclock(); 335 diskp->dk_timestamp = mono_time; 336 splx(s); 337 } 338 } 339 340 /* 341 * Decrement a disk's busy counter, increment the byte count, total busy 342 * time, and reset the timestamp. 343 */ 344 void 345 disk_unbusy(struct disk *diskp, long bcount, int read) 346 { 347 int s; 348 struct timeval dv_time, diff_time; 349 350 if (diskp->dk_busy-- == 0) { 351 printf("%s: dk_busy < 0\n", diskp->dk_name); 352 panic("disk_unbusy"); 353 } 354 355 s = splclock(); 356 dv_time = mono_time; 357 splx(s); 358 359 timersub(&dv_time, &diskp->dk_timestamp, &diff_time); 360 timeradd(&diskp->dk_time, &diff_time, &diskp->dk_time); 361 362 diskp->dk_timestamp = dv_time; 363 if (bcount > 0) { 364 if (read) { 365 diskp->dk_rbytes += bcount; 366 diskp->dk_rxfer++; 367 } else { 368 diskp->dk_wbytes += bcount; 369 diskp->dk_wxfer++; 370 } 371 } 372 } 373 374 /* 375 * Reset the metrics counters on the given disk. Note that we cannot 376 * reset the busy counter, as it may case a panic in disk_unbusy(). 377 * We also must avoid playing with the timestamp information, as it 378 * may skew any pending transfer results. 379 */ 380 void 381 disk_resetstat(struct disk *diskp) 382 { 383 int s = splbio(), t; 384 385 diskp->dk_rxfer = 0; 386 diskp->dk_rbytes = 0; 387 diskp->dk_wxfer = 0; 388 diskp->dk_wbytes = 0; 389 390 t = splclock(); 391 diskp->dk_attachtime = mono_time; 392 splx(t); 393 394 timerclear(&diskp->dk_time); 395 396 splx(s); 397 } 398 399 int 400 sysctl_hw_disknames(SYSCTLFN_ARGS) 401 { 402 char bf[DK_DISKNAMELEN + 1]; 403 char *where = oldp; 404 struct disk *diskp; 405 size_t needed, left, slen; 406 int error, first; 407 408 if (newp != NULL) 409 return (EPERM); 410 if (namelen != 0) 411 return (EINVAL); 412 413 first = 1; 414 error = 0; 415 needed = 0; 416 left = *oldlenp; 417 418 simple_lock(&disklist_slock); 419 for (diskp = TAILQ_FIRST(&disklist); diskp != NULL; 420 diskp = TAILQ_NEXT(diskp, dk_link)) { 421 if (where == NULL) 422 needed += strlen(diskp->dk_name) + 1; 423 else { 424 memset(bf, 0, sizeof(bf)); 425 if (first) { 426 strncpy(bf, diskp->dk_name, sizeof(bf)); 427 first = 0; 428 } else { 429 bf[0] = ' '; 430 strncpy(bf + 1, diskp->dk_name, 431 sizeof(bf) - 1); 432 } 433 bf[DK_DISKNAMELEN] = '\0'; 434 slen = strlen(bf); 435 if (left < slen + 1) 436 break; 437 /* +1 to copy out the trailing NUL byte */ 438 error = copyout(bf, where, slen + 1); 439 if (error) 440 break; 441 where += slen; 442 needed += slen; 443 left -= slen; 444 } 445 } 446 simple_unlock(&disklist_slock); 447 *oldlenp = needed; 448 return (error); 449 } 450 451 int 452 sysctl_hw_diskstats(SYSCTLFN_ARGS) 453 { 454 struct disk_sysctl sdisk; 455 struct disk *diskp; 456 char *where = oldp; 457 size_t tocopy, left; 458 int error; 459 460 if (newp != NULL) 461 return (EPERM); 462 463 /* 464 * The original hw.diskstats call was broken and did not require 465 * the userland to pass in it's size of struct disk_sysctl. This 466 * was fixed after NetBSD 1.6 was released, and any applications 467 * that do not pass in the size are given an error only, unless 468 * we care about 1.6 compatibility. 469 */ 470 if (namelen == 0) 471 #ifdef COMPAT_16 472 tocopy = offsetof(struct disk_sysctl, dk_rxfer); 473 #else 474 return (EINVAL); 475 #endif 476 else 477 tocopy = name[0]; 478 479 if (where == NULL) { 480 *oldlenp = disk_count * tocopy; 481 return (0); 482 } 483 484 error = 0; 485 left = *oldlenp; 486 memset(&sdisk, 0, sizeof(sdisk)); 487 *oldlenp = 0; 488 489 simple_lock(&disklist_slock); 490 TAILQ_FOREACH(diskp, &disklist, dk_link) { 491 if (left < tocopy) 492 break; 493 strncpy(sdisk.dk_name, diskp->dk_name, sizeof(sdisk.dk_name)); 494 sdisk.dk_xfer = diskp->dk_rxfer + diskp->dk_wxfer; 495 sdisk.dk_rxfer = diskp->dk_rxfer; 496 sdisk.dk_wxfer = diskp->dk_wxfer; 497 sdisk.dk_seek = diskp->dk_seek; 498 sdisk.dk_bytes = diskp->dk_rbytes + diskp->dk_wbytes; 499 sdisk.dk_rbytes = diskp->dk_rbytes; 500 sdisk.dk_wbytes = diskp->dk_wbytes; 501 sdisk.dk_attachtime_sec = diskp->dk_attachtime.tv_sec; 502 sdisk.dk_attachtime_usec = diskp->dk_attachtime.tv_usec; 503 sdisk.dk_timestamp_sec = diskp->dk_timestamp.tv_sec; 504 sdisk.dk_timestamp_usec = diskp->dk_timestamp.tv_usec; 505 sdisk.dk_time_sec = diskp->dk_time.tv_sec; 506 sdisk.dk_time_usec = diskp->dk_time.tv_usec; 507 sdisk.dk_busy = diskp->dk_busy; 508 509 error = copyout(&sdisk, where, min(tocopy, sizeof(sdisk))); 510 if (error) 511 break; 512 where += tocopy; 513 *oldlenp += tocopy; 514 left -= tocopy; 515 } 516 simple_unlock(&disklist_slock); 517 return (error); 518 } 519 520 /* 521 * Bounds checking against the media size, used for the raw partition. 522 * The sector size passed in should currently always be DEV_BSIZE, 523 * and the media size the size of the device in DEV_BSIZE sectors. 524 */ 525 int 526 bounds_check_with_mediasize(struct buf *bp, int secsize, uint64_t mediasize) 527 { 528 int64_t sz; 529 530 sz = howmany(bp->b_bcount, secsize); 531 532 if (bp->b_blkno + sz > mediasize) { 533 sz = mediasize - bp->b_blkno; 534 if (sz == 0) { 535 /* If exactly at end of disk, return EOF. */ 536 bp->b_resid = bp->b_bcount; 537 goto done; 538 } 539 if (sz < 0) { 540 /* If past end of disk, return EINVAL. */ 541 bp->b_error = EINVAL; 542 goto bad; 543 } 544 /* Otherwise, truncate request. */ 545 bp->b_bcount = sz << DEV_BSHIFT; 546 } 547 548 return 1; 549 550 bad: 551 bp->b_flags |= B_ERROR; 552 done: 553 return 0; 554 } 555