1 /* $OpenBSD: subr_disk.c,v 1.82 2008/08/25 11:27:00 krw Exp $ */ 2 /* $NetBSD: subr_disk.c,v 1.17 1996/03/16 23:17:08 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1995 Jason R. Thorpe. All rights reserved. 6 * Copyright (c) 1982, 1986, 1988, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/kernel.h> 44 #include <sys/malloc.h> 45 #include <sys/fcntl.h> 46 #include <sys/buf.h> 47 #include <sys/stat.h> 48 #include <sys/syslog.h> 49 #include <sys/device.h> 50 #include <sys/time.h> 51 #include <sys/disklabel.h> 52 #include <sys/conf.h> 53 #include <sys/lock.h> 54 #include <sys/disk.h> 55 #include <sys/reboot.h> 56 #include <sys/dkio.h> 57 #include <sys/dkstat.h> /* XXX */ 58 #include <sys/proc.h> 59 #include <uvm/uvm_extern.h> 60 61 #include <sys/socket.h> 62 #include <sys/socketvar.h> 63 64 #include <net/if.h> 65 66 #include <dev/rndvar.h> 67 #include <dev/cons.h> 68 69 /* 70 * A global list of all disks attached to the system. May grow or 71 * shrink over time. 72 */ 73 struct disklist_head disklist; /* TAILQ_HEAD */ 74 int disk_count; /* number of drives in global disklist */ 75 int disk_change; /* set if a disk has been attached/detached 76 * since last we looked at this variable. This 77 * is reset by hw_sysctl() 78 */ 79 80 /* 81 * Seek sort for disks. We depend on the driver which calls us using b_resid 82 * as the current cylinder number. 83 * 84 * The argument ap structure holds a b_actf activity chain pointer on which we 85 * keep two queues, sorted in ascending cylinder order. The first queue holds 86 * those requests which are positioned after the current cylinder (in the first 87 * request); the second holds requests which came in after their cylinder number 88 * was passed. Thus we implement a one way scan, retracting after reaching the 89 * end of the drive to the first request on the second queue, at which time it 90 * becomes the first queue. 91 * 92 * A one-way scan is natural because of the way UNIX read-ahead blocks are 93 * allocated. 94 */ 95 96 void 97 disksort(struct buf *ap, struct buf *bp) 98 { 99 struct buf *bq; 100 101 /* If the queue is empty, then it's easy. */ 102 if (ap->b_actf == NULL) { 103 bp->b_actf = NULL; 104 ap->b_actf = bp; 105 return; 106 } 107 108 /* 109 * If we lie after the first (currently active) request, then we 110 * must locate the second request list and add ourselves to it. 111 */ 112 bq = ap->b_actf; 113 if (bp->b_cylinder < bq->b_cylinder) { 114 while (bq->b_actf) { 115 /* 116 * Check for an ``inversion'' in the normally ascending 117 * cylinder numbers, indicating the start of the second 118 * request list. 119 */ 120 if (bq->b_actf->b_cylinder < bq->b_cylinder) { 121 /* 122 * Search the second request list for the first 123 * request at a larger cylinder number. We go 124 * before that; if there is no such request, we 125 * go at end. 126 */ 127 do { 128 if (bp->b_cylinder < 129 bq->b_actf->b_cylinder) 130 goto insert; 131 if (bp->b_cylinder == 132 bq->b_actf->b_cylinder && 133 bp->b_blkno < bq->b_actf->b_blkno) 134 goto insert; 135 bq = bq->b_actf; 136 } while (bq->b_actf); 137 goto insert; /* after last */ 138 } 139 bq = bq->b_actf; 140 } 141 /* 142 * No inversions... we will go after the last, and 143 * be the first request in the second request list. 144 */ 145 goto insert; 146 } 147 /* 148 * Request is at/after the current request... 149 * sort in the first request list. 150 */ 151 while (bq->b_actf) { 152 /* 153 * We want to go after the current request if there is an 154 * inversion after it (i.e. it is the end of the first 155 * request list), or if the next request is a larger cylinder 156 * than our request. 157 */ 158 if (bq->b_actf->b_cylinder < bq->b_cylinder || 159 bp->b_cylinder < bq->b_actf->b_cylinder || 160 (bp->b_cylinder == bq->b_actf->b_cylinder && 161 bp->b_blkno < bq->b_actf->b_blkno)) 162 goto insert; 163 bq = bq->b_actf; 164 } 165 /* 166 * Neither a second list nor a larger request... we go at the end of 167 * the first list, which is the same as the end of the whole schebang. 168 */ 169 insert: bp->b_actf = bq->b_actf; 170 bq->b_actf = bp; 171 } 172 173 /* 174 * Compute checksum for disk label. 175 */ 176 u_int 177 dkcksum(struct disklabel *lp) 178 { 179 u_int16_t *start, *end; 180 u_int16_t sum = 0; 181 182 start = (u_int16_t *)lp; 183 end = (u_int16_t *)&lp->d_partitions[lp->d_npartitions]; 184 while (start < end) 185 sum ^= *start++; 186 return (sum); 187 } 188 189 char * 190 initdisklabel(struct disklabel *lp) 191 { 192 int i; 193 194 /* minimal requirements for archetypal disk label */ 195 if (lp->d_secsize < DEV_BSIZE) 196 lp->d_secsize = DEV_BSIZE; 197 if (DL_GETDSIZE(lp) == 0) 198 DL_SETDSIZE(lp, MAXDISKSIZE); 199 if (lp->d_secpercyl == 0) 200 return ("invalid geometry"); 201 lp->d_npartitions = RAW_PART + 1; 202 for (i = 0; i < RAW_PART; i++) { 203 DL_SETPSIZE(&lp->d_partitions[i], 0); 204 DL_SETPOFFSET(&lp->d_partitions[i], 0); 205 } 206 if (DL_GETPSIZE(&lp->d_partitions[RAW_PART]) == 0) 207 DL_SETPSIZE(&lp->d_partitions[RAW_PART], DL_GETDSIZE(lp)); 208 DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0); 209 lp->d_version = 1; 210 lp->d_bbsize = 8192; 211 lp->d_sbsize = 64*1024; /* XXX ? */ 212 return (NULL); 213 } 214 215 /* 216 * Check an incoming block to make sure it is a disklabel, convert it to 217 * a newer version if needed, etc etc. 218 */ 219 char * 220 checkdisklabel(void *rlp, struct disklabel *lp) 221 { 222 struct disklabel *dlp = rlp; 223 struct __partitionv0 *v0pp; 224 struct partition *pp; 225 daddr64_t disksize; 226 char *msg = NULL; 227 int i; 228 229 if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) 230 msg = "no disk label"; 231 else if (dlp->d_npartitions > MAXPARTITIONS) 232 msg = "unreasonable partition count"; 233 else if (dkcksum(dlp) != 0) 234 msg = "disk label corrupted"; 235 236 if (msg) { 237 u_int16_t *start, *end, sum = 0; 238 239 /* If it is byte-swapped, attempt to convert it */ 240 if (swap32(dlp->d_magic) != DISKMAGIC || 241 swap32(dlp->d_magic2) != DISKMAGIC || 242 swap16(dlp->d_npartitions) > MAXPARTITIONS) 243 return (msg); 244 245 /* 246 * Need a byte-swap aware dkcksum varient 247 * inlined, because dkcksum uses a sub-field 248 */ 249 start = (u_int16_t *)dlp; 250 end = (u_int16_t *)&dlp->d_partitions[ 251 swap16(dlp->d_npartitions)]; 252 while (start < end) 253 sum ^= *start++; 254 if (sum != 0) 255 return (msg); 256 257 dlp->d_magic = swap32(dlp->d_magic); 258 dlp->d_type = swap16(dlp->d_type); 259 dlp->d_subtype = swap16(dlp->d_subtype); 260 261 /* d_typename and d_packname are strings */ 262 263 dlp->d_secsize = swap32(dlp->d_secsize); 264 dlp->d_nsectors = swap32(dlp->d_nsectors); 265 dlp->d_ntracks = swap32(dlp->d_ntracks); 266 dlp->d_ncylinders = swap32(dlp->d_ncylinders); 267 dlp->d_secpercyl = swap32(dlp->d_secpercyl); 268 dlp->d_secperunit = swap32(dlp->d_secperunit); 269 270 dlp->d_sparespertrack = swap16(dlp->d_sparespertrack); 271 dlp->d_sparespercyl = swap16(dlp->d_sparespercyl); 272 273 dlp->d_acylinders = swap32(dlp->d_acylinders); 274 275 dlp->d_rpm = swap16(dlp->d_rpm); 276 dlp->d_interleave = swap16(dlp->d_interleave); 277 dlp->d_trackskew = swap16(dlp->d_trackskew); 278 dlp->d_cylskew = swap16(dlp->d_cylskew); 279 dlp->d_headswitch = swap32(dlp->d_headswitch); 280 dlp->d_trkseek = swap32(dlp->d_trkseek); 281 dlp->d_flags = swap32(dlp->d_flags); 282 283 for (i = 0; i < NDDATA; i++) 284 dlp->d_drivedata[i] = swap32(dlp->d_drivedata[i]); 285 286 dlp->d_secperunith = swap16(dlp->d_secperunith); 287 dlp->d_version = swap16(dlp->d_version); 288 289 for (i = 0; i < NSPARE; i++) 290 dlp->d_spare[i] = swap32(dlp->d_spare[i]); 291 292 dlp->d_magic2 = swap32(dlp->d_magic2); 293 dlp->d_checksum = swap16(dlp->d_checksum); 294 295 dlp->d_npartitions = swap16(dlp->d_npartitions); 296 dlp->d_bbsize = swap32(dlp->d_bbsize); 297 dlp->d_sbsize = swap32(dlp->d_sbsize); 298 299 for (i = 0; i < MAXPARTITIONS; i++) { 300 pp = &dlp->d_partitions[i]; 301 pp->p_size = swap32(pp->p_size); 302 pp->p_offset = swap32(pp->p_offset); 303 if (dlp->d_version == 0) { 304 v0pp = (struct __partitionv0 *)pp; 305 v0pp->p_fsize = swap32(v0pp->p_fsize); 306 } else { 307 pp->p_offseth = swap16(pp->p_offseth); 308 pp->p_sizeh = swap16(pp->p_sizeh); 309 } 310 pp->p_cpg = swap16(pp->p_cpg); 311 } 312 313 dlp->d_checksum = 0; 314 dlp->d_checksum = dkcksum(dlp); 315 msg = NULL; 316 } 317 318 /* XXX should verify lots of other fields and whine a lot */ 319 320 if (msg) 321 return (msg); 322 323 /* Initial passed in lp contains the real disk size. */ 324 disksize = DL_GETDSIZE(lp); 325 326 if (lp != dlp) 327 *lp = *dlp; 328 329 if (lp->d_version == 0) { 330 lp->d_version = 1; 331 lp->d_secperunith = 0; 332 333 v0pp = (struct __partitionv0 *)lp->d_partitions; 334 pp = lp->d_partitions; 335 for (i = 0; i < lp->d_npartitions; i++, pp++, v0pp++) { 336 pp->p_fragblock = DISKLABELV1_FFS_FRAGBLOCK(v0pp-> 337 p_fsize, v0pp->p_frag); 338 pp->p_offseth = 0; 339 pp->p_sizeh = 0; 340 } 341 } 342 343 #ifdef DEBUG 344 if (DL_GETDSIZE(lp) != disksize) 345 printf("on-disk disklabel has incorrect disksize (%lld)\n", 346 DL_GETDSIZE(lp)); 347 if (DL_GETPSIZE(&lp->d_partitions[RAW_PART]) != disksize) 348 printf("on-disk disklabel RAW_PART has incorrect size (%lld)\n", 349 DL_GETPSIZE(&lp->d_partitions[RAW_PART])); 350 if (DL_GETPOFFSET(&lp->d_partitions[RAW_PART]) != 0) 351 printf("on-disk disklabel RAW_PART offset != 0 (%lld)\n", 352 DL_GETPOFFSET(&lp->d_partitions[RAW_PART])); 353 #endif 354 DL_SETDSIZE(lp, disksize); 355 DL_SETPSIZE(&lp->d_partitions[RAW_PART], disksize); 356 DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0); 357 358 lp->d_checksum = 0; 359 lp->d_checksum = dkcksum(lp); 360 return (msg); 361 } 362 363 /* 364 * If dos partition table requested, attempt to load it and 365 * find disklabel inside a DOS partition. Return buffer 366 * for use in signalling errors if requested. 367 * 368 * We would like to check if each MBR has a valid BOOT_MAGIC, but 369 * we cannot because it doesn't always exist. So.. we assume the 370 * MBR is valid. 371 */ 372 char * 373 readdoslabel(struct buf *bp, void (*strat)(struct buf *), 374 struct disklabel *lp, int *partoffp, int spoofonly) 375 { 376 struct dos_partition dp[NDOSPART], *dp2; 377 u_int32_t extoff = 0; 378 daddr64_t part_blkno = DOSBBSECTOR; 379 int dospartoff = 0, i, ourpart = -1; 380 int wander = 1, n = 0, loop = 0; 381 int offset; 382 383 if (lp->d_secpercyl == 0) 384 return ("invalid label, d_secpercyl == 0"); 385 if (lp->d_secsize == 0) 386 return ("invalid label, d_secsize == 0"); 387 388 /* do DOS partitions in the process of getting disklabel? */ 389 390 /* 391 * Read dos partition table, follow extended partitions. 392 * Map the partitions to disklabel entries i-p 393 */ 394 while (wander && n < 8 && loop < 8) { 395 loop++; 396 wander = 0; 397 if (part_blkno < extoff) 398 part_blkno = extoff; 399 400 /* read boot record */ 401 bp->b_blkno = DL_BLKTOSEC(lp, part_blkno) * DL_BLKSPERSEC(lp); 402 offset = DL_BLKOFFSET(lp, part_blkno) + DOSPARTOFF; 403 bp->b_bcount = lp->d_secsize; 404 bp->b_flags = B_BUSY | B_READ | B_RAW; 405 (*strat)(bp); 406 if (biowait(bp)) { 407 /*wrong*/ if (partoffp) 408 /*wrong*/ *partoffp = -1; 409 return ("dos partition I/O error"); 410 } 411 412 bcopy(bp->b_data + offset, dp, sizeof(dp)); 413 414 if (ourpart == -1) { 415 /* Search for our MBR partition */ 416 for (dp2=dp, i=0; i < NDOSPART && ourpart == -1; 417 i++, dp2++) 418 if (letoh32(dp2->dp_size) && 419 dp2->dp_typ == DOSPTYP_OPENBSD) 420 ourpart = i; 421 if (ourpart == -1) 422 goto donot; 423 /* 424 * This is our MBR partition. need sector 425 * address for SCSI/IDE, cylinder for 426 * ESDI/ST506/RLL 427 */ 428 dp2 = &dp[ourpart]; 429 dospartoff = letoh32(dp2->dp_start) + part_blkno; 430 431 /* found our OpenBSD partition, finish up */ 432 if (partoffp) 433 goto notfat; 434 435 if (lp->d_ntracks == 0) 436 lp->d_ntracks = dp2->dp_ehd + 1; 437 if (lp->d_nsectors == 0) 438 lp->d_nsectors = DPSECT(dp2->dp_esect); 439 if (lp->d_secpercyl == 0) 440 lp->d_secpercyl = lp->d_ntracks * 441 lp->d_nsectors; 442 } 443 donot: 444 /* 445 * In case the disklabel read below fails, we want to 446 * provide a fake label in i-p. 447 */ 448 for (dp2=dp, i=0; i < NDOSPART && n < 8; i++, dp2++) { 449 struct partition *pp = &lp->d_partitions[8+n]; 450 u_int8_t fstype; 451 452 if (dp2->dp_typ == DOSPTYP_OPENBSD) 453 continue; 454 if (letoh32(dp2->dp_size) > DL_GETDSIZE(lp)) 455 continue; 456 if (letoh32(dp2->dp_start) > DL_GETDSIZE(lp)) 457 continue; 458 if (letoh32(dp2->dp_size) == 0) 459 continue; 460 461 switch (dp2->dp_typ) { 462 case DOSPTYP_UNUSED: 463 fstype = FS_UNUSED; 464 n++; 465 break; 466 467 case DOSPTYP_LINUX: 468 fstype = FS_EXT2FS; 469 n++; 470 break; 471 472 case DOSPTYP_NTFS: 473 fstype = FS_NTFS; 474 n++; 475 break; 476 477 case DOSPTYP_FAT12: 478 case DOSPTYP_FAT16S: 479 case DOSPTYP_FAT16B: 480 case DOSPTYP_FAT16L: 481 case DOSPTYP_FAT32: 482 case DOSPTYP_FAT32L: 483 fstype = FS_MSDOS; 484 n++; 485 break; 486 case DOSPTYP_EXTEND: 487 case DOSPTYP_EXTENDL: 488 part_blkno = letoh32(dp2->dp_start) + extoff; 489 if (!extoff) { 490 extoff = letoh32(dp2->dp_start); 491 part_blkno = 0; 492 } 493 wander = 1; 494 break; 495 default: 496 fstype = FS_OTHER; 497 n++; 498 break; 499 } 500 501 /* 502 * Don't set fstype/offset/size when wandering or just 503 * looking for the offset of the OpenBSD partition. It 504 * would invalidate the disklabel checksum! 505 */ 506 if (wander || partoffp) 507 continue; 508 509 pp->p_fstype = fstype; 510 if (letoh32(dp2->dp_start)) 511 DL_SETPOFFSET(pp, 512 letoh32(dp2->dp_start) + part_blkno); 513 DL_SETPSIZE(pp, letoh32(dp2->dp_size)); 514 } 515 } 516 if (partoffp) 517 /* dospartoff has been set and we must not modify *lp. */ 518 goto notfat; 519 520 lp->d_npartitions = MAXPARTITIONS; 521 522 if (n == 0 && part_blkno == DOSBBSECTOR) { 523 u_int16_t fattest; 524 525 /* Check for a short jump instruction. */ 526 fattest = ((bp->b_data[0] << 8) & 0xff00) | 527 (bp->b_data[2] & 0xff); 528 if (fattest != 0xeb90 && fattest != 0xe900) 529 goto notfat; 530 531 /* Check for a valid bytes per sector value. */ 532 fattest = ((bp->b_data[12] << 8) & 0xff00) | 533 (bp->b_data[11] & 0xff); 534 if (fattest < 512 || fattest > 4096 || (fattest % 512 != 0)) 535 goto notfat; 536 537 /* Check the end of sector marker. */ 538 fattest = ((bp->b_data[510] << 8) & 0xff00) | 539 (bp->b_data[511] & 0xff); 540 if (fattest != 0x55aa) 541 goto notfat; 542 543 /* Looks like a FAT filesystem. Spoof 'i'. */ 544 DL_SETPSIZE(&lp->d_partitions['i' - 'a'], 545 DL_GETPSIZE(&lp->d_partitions[RAW_PART])); 546 DL_SETPOFFSET(&lp->d_partitions['i' - 'a'], 0); 547 lp->d_partitions['i' - 'a'].p_fstype = FS_MSDOS; 548 } 549 notfat: 550 551 /* record the OpenBSD partition's placement for the caller */ 552 if (partoffp) 553 *partoffp = dospartoff; 554 555 /* don't read the on-disk label if we are in spoofed-only mode */ 556 if (spoofonly) 557 return (NULL); 558 559 bp->b_blkno = DL_BLKTOSEC(lp, dospartoff + DOS_LABELSECTOR) * 560 DL_BLKSPERSEC(lp); 561 offset = DL_BLKOFFSET(lp, dospartoff + DOS_LABELSECTOR); 562 bp->b_bcount = lp->d_secsize; 563 bp->b_flags = B_BUSY | B_READ | B_RAW; 564 (*strat)(bp); 565 if (biowait(bp)) 566 return ("disk label I/O error"); 567 568 /* sub-MBR disklabels are always at a LABELOFFSET of 0 */ 569 return checkdisklabel(bp->b_data + offset, lp); 570 } 571 572 /* 573 * Check new disk label for sensibility 574 * before setting it. 575 */ 576 int 577 setdisklabel(struct disklabel *olp, struct disklabel *nlp, u_int openmask) 578 { 579 int i; 580 struct partition *opp, *npp; 581 582 /* sanity clause */ 583 if (nlp->d_secpercyl == 0 || nlp->d_secsize == 0 || 584 (nlp->d_secsize % DEV_BSIZE) != 0) 585 return (EINVAL); 586 587 /* special case to allow disklabel to be invalidated */ 588 if (nlp->d_magic == 0xffffffff) { 589 *olp = *nlp; 590 return (0); 591 } 592 593 if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC || 594 dkcksum(nlp) != 0) 595 return (EINVAL); 596 597 /* XXX missing check if other dos partitions will be overwritten */ 598 599 while (openmask != 0) { 600 i = ffs(openmask) - 1; 601 openmask &= ~(1 << i); 602 if (nlp->d_npartitions <= i) 603 return (EBUSY); 604 opp = &olp->d_partitions[i]; 605 npp = &nlp->d_partitions[i]; 606 if (DL_GETPOFFSET(npp) != DL_GETPOFFSET(opp) || 607 DL_GETPSIZE(npp) < DL_GETPSIZE(opp)) 608 return (EBUSY); 609 /* 610 * Copy internally-set partition information 611 * if new label doesn't include it. XXX 612 */ 613 if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) { 614 npp->p_fstype = opp->p_fstype; 615 npp->p_fragblock = opp->p_fragblock; 616 npp->p_cpg = opp->p_cpg; 617 } 618 } 619 nlp->d_checksum = 0; 620 nlp->d_checksum = dkcksum(nlp); 621 *olp = *nlp; 622 return (0); 623 } 624 625 /* 626 * Determine the size of the transfer, and make sure it is within the 627 * boundaries of the partition. Adjust transfer if needed, and signal errors or 628 * early completion. 629 */ 630 int 631 bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel) 632 { 633 struct partition *p = &lp->d_partitions[DISKPART(bp->b_dev)]; 634 daddr64_t sz = howmany(bp->b_bcount, DEV_BSIZE); 635 636 /* avoid division by zero */ 637 if (lp->d_secpercyl == 0) 638 goto bad; 639 640 if (bp->b_blkno < 0 || sz < 0) 641 panic("bounds_check_with_label %lld %lld\n", bp->b_blkno, sz); 642 643 /* beyond partition? */ 644 if (bp->b_blkno + sz > DL_SECTOBLK(lp, DL_GETPSIZE(p))) { 645 sz = DL_SECTOBLK(lp, DL_GETPSIZE(p)) - bp->b_blkno; 646 if (sz == 0) { 647 /* If exactly at end of disk, return EOF. */ 648 bp->b_resid = bp->b_bcount; 649 return (-1); 650 } 651 if (sz < 0) 652 /* If past end of disk, return EINVAL. */ 653 goto bad; 654 655 /* Otherwise, truncate request. */ 656 bp->b_bcount = sz << DEV_BSHIFT; 657 } 658 659 /* calculate cylinder for disksort to order transfers with */ 660 bp->b_cylinder = (bp->b_blkno + DL_SECTOBLK(lp, DL_GETPOFFSET(p))) / 661 DL_SECTOBLK(lp, lp->d_secpercyl); 662 return (1); 663 664 bad: 665 bp->b_error = EINVAL; 666 bp->b_flags |= B_ERROR; 667 return (-1); 668 } 669 670 /* 671 * Disk error is the preface to plaintive error messages 672 * about failing disk transfers. It prints messages of the form 673 674 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d) 675 676 * if the offset of the error in the transfer and a disk label 677 * are both available. blkdone should be -1 if the position of the error 678 * is unknown; the disklabel pointer may be null from drivers that have not 679 * been converted to use them. The message is printed with printf 680 * if pri is LOG_PRINTF, otherwise it uses log at the specified priority. 681 * The message should be completed (with at least a newline) with printf 682 * or addlog, respectively. There is no trailing space. 683 */ 684 void 685 diskerr(struct buf *bp, char *dname, char *what, int pri, int blkdone, 686 struct disklabel *lp) 687 { 688 int unit = DISKUNIT(bp->b_dev), part = DISKPART(bp->b_dev); 689 int (*pr)(const char *, ...); 690 char partname = 'a' + part; 691 daddr64_t sn; 692 693 if (pri != LOG_PRINTF) { 694 static const char fmt[] = ""; 695 log(pri, fmt); 696 pr = addlog; 697 } else 698 pr = printf; 699 (*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what, 700 bp->b_flags & B_READ ? "read" : "writ"); 701 sn = bp->b_blkno; 702 if (bp->b_bcount <= DEV_BSIZE) 703 (*pr)("%lld", sn); 704 else { 705 if (blkdone >= 0) { 706 sn += blkdone; 707 (*pr)("%lld of ", sn); 708 } 709 (*pr)("%lld-%lld", bp->b_blkno, 710 bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE); 711 } 712 if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) { 713 sn += DL_GETPOFFSET(&lp->d_partitions[part]); 714 (*pr)(" (%s%d bn %lld; cn %lld", dname, unit, sn, 715 sn / lp->d_secpercyl); 716 sn %= lp->d_secpercyl; 717 (*pr)(" tn %lld sn %lld)", sn / lp->d_nsectors, 718 sn % lp->d_nsectors); 719 } 720 } 721 722 /* 723 * Initialize the disklist. Called by main() before autoconfiguration. 724 */ 725 void 726 disk_init(void) 727 { 728 729 TAILQ_INIT(&disklist); 730 disk_count = disk_change = 0; 731 } 732 733 int 734 disk_construct(struct disk *diskp, char *lockname) 735 { 736 rw_init(&diskp->dk_lock, lockname); 737 mtx_init(&diskp->dk_mtx, IPL_BIO); 738 739 diskp->dk_flags |= DKF_CONSTRUCTED; 740 741 return (0); 742 } 743 744 /* 745 * Attach a disk. 746 */ 747 void 748 disk_attach(struct disk *diskp) 749 { 750 751 if (!ISSET(diskp->dk_flags, DKF_CONSTRUCTED)) 752 disk_construct(diskp, diskp->dk_name); 753 754 /* 755 * Allocate and initialize the disklabel structures. Note that 756 * it's not safe to sleep here, since we're probably going to be 757 * called during autoconfiguration. 758 */ 759 diskp->dk_label = malloc(sizeof(struct disklabel), M_DEVBUF, 760 M_NOWAIT|M_ZERO); 761 if (diskp->dk_label == NULL) 762 panic("disk_attach: can't allocate storage for disklabel"); 763 764 /* 765 * Set the attached timestamp. 766 */ 767 microuptime(&diskp->dk_attachtime); 768 769 /* 770 * Link into the disklist. 771 */ 772 TAILQ_INSERT_TAIL(&disklist, diskp, dk_link); 773 ++disk_count; 774 disk_change = 1; 775 } 776 777 /* 778 * Detach a disk. 779 */ 780 void 781 disk_detach(struct disk *diskp) 782 { 783 784 /* 785 * Free the space used by the disklabel structures. 786 */ 787 free(diskp->dk_label, M_DEVBUF); 788 789 /* 790 * Remove from the disklist. 791 */ 792 TAILQ_REMOVE(&disklist, diskp, dk_link); 793 disk_change = 1; 794 if (--disk_count < 0) 795 panic("disk_detach: disk_count < 0"); 796 } 797 798 /* 799 * Increment a disk's busy counter. If the counter is going from 800 * 0 to 1, set the timestamp. 801 */ 802 void 803 disk_busy(struct disk *diskp) 804 { 805 806 /* 807 * XXX We'd like to use something as accurate as microtime(), 808 * but that doesn't depend on the system TOD clock. 809 */ 810 mtx_enter(&diskp->dk_mtx); 811 if (diskp->dk_busy++ == 0) 812 microuptime(&diskp->dk_timestamp); 813 mtx_leave(&diskp->dk_mtx); 814 } 815 816 /* 817 * Decrement a disk's busy counter, increment the byte count, total busy 818 * time, and reset the timestamp. 819 */ 820 void 821 disk_unbusy(struct disk *diskp, long bcount, int read) 822 { 823 struct timeval dv_time, diff_time; 824 825 mtx_enter(&diskp->dk_mtx); 826 827 if (diskp->dk_busy-- == 0) 828 printf("disk_unbusy: %s: dk_busy < 0\n", diskp->dk_name); 829 830 microuptime(&dv_time); 831 832 timersub(&dv_time, &diskp->dk_timestamp, &diff_time); 833 timeradd(&diskp->dk_time, &diff_time, &diskp->dk_time); 834 835 diskp->dk_timestamp = dv_time; 836 if (bcount > 0) { 837 if (read) { 838 diskp->dk_rbytes += bcount; 839 diskp->dk_rxfer++; 840 } else { 841 diskp->dk_wbytes += bcount; 842 diskp->dk_wxfer++; 843 } 844 } else 845 diskp->dk_seek++; 846 847 mtx_leave(&diskp->dk_mtx); 848 849 add_disk_randomness(bcount ^ diff_time.tv_usec); 850 } 851 852 int 853 disk_lock(struct disk *dk) 854 { 855 int error; 856 857 error = rw_enter(&dk->dk_lock, RW_WRITE|RW_INTR); 858 859 return (error); 860 } 861 862 void 863 disk_unlock(struct disk *dk) 864 { 865 rw_exit(&dk->dk_lock); 866 } 867 868 int 869 dk_mountroot(void) 870 { 871 dev_t rawdev, rrootdev; 872 int part = DISKPART(rootdev); 873 int (*mountrootfn)(void); 874 struct disklabel dl; 875 int error; 876 877 rrootdev = blktochr(rootdev); 878 rawdev = MAKEDISKDEV(major(rrootdev), DISKUNIT(rootdev), RAW_PART); 879 #ifdef DEBUG 880 printf("rootdev=0x%x rrootdev=0x%x rawdev=0x%x\n", rootdev, 881 rrootdev, rawdev); 882 #endif 883 884 /* 885 * open device, ioctl for the disklabel, and close it. 886 */ 887 error = (cdevsw[major(rrootdev)].d_open)(rawdev, FREAD, 888 S_IFCHR, curproc); 889 if (error) 890 panic("cannot open disk, 0x%x/0x%x, error %d", 891 rootdev, rrootdev, error); 892 error = (cdevsw[major(rrootdev)].d_ioctl)(rawdev, DIOCGDINFO, 893 (caddr_t)&dl, FREAD, curproc); 894 if (error) 895 panic("cannot read disk label, 0x%x/0x%x, error %d", 896 rootdev, rrootdev, error); 897 (void) (cdevsw[major(rrootdev)].d_close)(rawdev, FREAD, 898 S_IFCHR, curproc); 899 900 if (DL_GETPSIZE(&dl.d_partitions[part]) == 0) 901 panic("root filesystem has size 0"); 902 switch (dl.d_partitions[part].p_fstype) { 903 #ifdef EXT2FS 904 case FS_EXT2FS: 905 { 906 extern int ext2fs_mountroot(void); 907 mountrootfn = ext2fs_mountroot; 908 } 909 break; 910 #endif 911 #ifdef FFS 912 case FS_BSDFFS: 913 { 914 extern int ffs_mountroot(void); 915 mountrootfn = ffs_mountroot; 916 } 917 break; 918 #endif 919 #ifdef CD9660 920 case FS_ISO9660: 921 { 922 extern int cd9660_mountroot(void); 923 mountrootfn = cd9660_mountroot; 924 } 925 break; 926 #endif 927 default: 928 #ifdef FFS 929 { 930 extern int ffs_mountroot(void); 931 932 printf("filesystem type %d not known.. assuming ffs\n", 933 dl.d_partitions[part].p_fstype); 934 mountrootfn = ffs_mountroot; 935 } 936 #else 937 panic("disk 0x%x/0x%x filesystem type %d not known", 938 rootdev, rrootdev, dl.d_partitions[part].p_fstype); 939 #endif 940 } 941 return (*mountrootfn)(); 942 } 943 944 struct bufq * 945 bufq_default_alloc(void) 946 { 947 struct bufq_default *bq; 948 949 bq = malloc(sizeof(*bq), M_DEVBUF, M_NOWAIT|M_ZERO); 950 if (bq == NULL) 951 panic("bufq_default_alloc: no memory"); 952 953 bq->bufq.bufq_free = bufq_default_free; 954 bq->bufq.bufq_add = bufq_default_add; 955 bq->bufq.bufq_get = bufq_default_get; 956 957 return ((struct bufq *)bq); 958 } 959 960 void 961 bufq_default_free(struct bufq *bq) 962 { 963 free(bq, M_DEVBUF); 964 } 965 966 void 967 bufq_default_add(struct bufq *bq, struct buf *bp) 968 { 969 struct bufq_default *bufq = (struct bufq_default *)bq; 970 struct proc *p = bp->b_proc; 971 struct buf *head; 972 973 if (p == NULL || p->p_nice < NZERO) 974 head = &bufq->bufq_head[0]; 975 else if (p->p_nice == NZERO) 976 head = &bufq->bufq_head[1]; 977 else 978 head = &bufq->bufq_head[2]; 979 980 disksort(head, bp); 981 } 982 983 struct buf * 984 bufq_default_get(struct bufq *bq) 985 { 986 struct bufq_default *bufq = (struct bufq_default *)bq; 987 struct buf *bp, *head; 988 int i; 989 990 for (i = 0; i < 3; i++) { 991 head = &bufq->bufq_head[i]; 992 if ((bp = head->b_actf)) 993 break; 994 } 995 if (bp == NULL) 996 return (NULL); 997 head->b_actf = bp->b_actf; 998 return (bp); 999 } 1000 1001 struct device * 1002 getdisk(char *str, int len, int defpart, dev_t *devp) 1003 { 1004 struct device *dv; 1005 1006 if ((dv = parsedisk(str, len, defpart, devp)) == NULL) { 1007 printf("use one of: exit"); 1008 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1009 if (dv->dv_class == DV_DISK) 1010 printf(" %s[a-p]", dv->dv_xname); 1011 #if defined(NFSCLIENT) 1012 if (dv->dv_class == DV_IFNET) 1013 printf(" %s", dv->dv_xname); 1014 #endif 1015 } 1016 printf("\n"); 1017 } 1018 return (dv); 1019 } 1020 1021 struct device * 1022 parsedisk(char *str, int len, int defpart, dev_t *devp) 1023 { 1024 struct device *dv; 1025 int majdev, part = defpart; 1026 char c; 1027 1028 if (len == 0) 1029 return (NULL); 1030 c = str[len-1]; 1031 if (c >= 'a' && (c - 'a') < MAXPARTITIONS) { 1032 part = c - 'a'; 1033 len -= 1; 1034 } 1035 1036 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1037 if (dv->dv_class == DV_DISK && 1038 strncmp(str, dv->dv_xname, len) == 0 && 1039 dv->dv_xname[len] == '\0') { 1040 majdev = findblkmajor(dv); 1041 if (majdev < 0) 1042 panic("parsedisk"); 1043 *devp = MAKEDISKDEV(majdev, dv->dv_unit, part); 1044 break; 1045 } 1046 #if defined(NFSCLIENT) 1047 if (dv->dv_class == DV_IFNET && 1048 strncmp(str, dv->dv_xname, len) == 0 && 1049 dv->dv_xname[len] == '\0') { 1050 *devp = NODEV; 1051 break; 1052 } 1053 #endif 1054 } 1055 1056 return (dv); 1057 } 1058 1059 void 1060 setroot(struct device *bootdv, int part, int exitflags) 1061 { 1062 int majdev, unit, len, s; 1063 struct swdevt *swp; 1064 struct device *rootdv, *dv; 1065 dev_t nrootdev, nswapdev = NODEV, temp = NODEV; 1066 struct ifnet *ifp = NULL; 1067 char buf[128]; 1068 #if defined(NFSCLIENT) 1069 extern char *nfsbootdevname; 1070 #endif 1071 1072 /* 1073 * If `swap generic' and we couldn't determine boot device, 1074 * ask the user. 1075 */ 1076 if (mountroot == NULL && bootdv == NULL) 1077 boothowto |= RB_ASKNAME; 1078 if (boothowto & RB_ASKNAME) { 1079 while (1) { 1080 printf("root device"); 1081 if (bootdv != NULL) { 1082 printf(" (default %s", bootdv->dv_xname); 1083 if (bootdv->dv_class == DV_DISK) 1084 printf("%c", 'a' + part); 1085 printf(")"); 1086 } 1087 printf(": "); 1088 s = splhigh(); 1089 cnpollc(TRUE); 1090 len = getsn(buf, sizeof(buf)); 1091 cnpollc(FALSE); 1092 splx(s); 1093 if (strcmp(buf, "exit") == 0) 1094 boot(exitflags); 1095 if (len == 0 && bootdv != NULL) { 1096 strlcpy(buf, bootdv->dv_xname, sizeof buf); 1097 len = strlen(buf); 1098 } 1099 if (len > 0 && buf[len - 1] == '*') { 1100 buf[--len] = '\0'; 1101 dv = getdisk(buf, len, part, &nrootdev); 1102 if (dv != NULL) { 1103 rootdv = dv; 1104 nswapdev = nrootdev; 1105 goto gotswap; 1106 } 1107 } 1108 dv = getdisk(buf, len, part, &nrootdev); 1109 if (dv != NULL) { 1110 rootdv = dv; 1111 break; 1112 } 1113 } 1114 1115 if (rootdv->dv_class == DV_IFNET) 1116 goto gotswap; 1117 1118 /* try to build swap device out of new root device */ 1119 while (1) { 1120 printf("swap device"); 1121 if (rootdv != NULL) 1122 printf(" (default %s%s)", rootdv->dv_xname, 1123 rootdv->dv_class == DV_DISK ? "b" : ""); 1124 printf(": "); 1125 s = splhigh(); 1126 cnpollc(TRUE); 1127 len = getsn(buf, sizeof(buf)); 1128 cnpollc(FALSE); 1129 splx(s); 1130 if (strcmp(buf, "exit") == 0) 1131 boot(exitflags); 1132 if (len == 0 && rootdv != NULL) { 1133 switch (rootdv->dv_class) { 1134 case DV_IFNET: 1135 nswapdev = NODEV; 1136 break; 1137 case DV_DISK: 1138 nswapdev = MAKEDISKDEV(major(nrootdev), 1139 DISKUNIT(nrootdev), 1); 1140 if (nswapdev == nrootdev) 1141 continue; 1142 break; 1143 default: 1144 break; 1145 } 1146 break; 1147 } 1148 dv = getdisk(buf, len, 1, &nswapdev); 1149 if (dv) { 1150 if (dv->dv_class == DV_IFNET) 1151 nswapdev = NODEV; 1152 if (nswapdev == nrootdev) 1153 continue; 1154 break; 1155 } 1156 } 1157 gotswap: 1158 rootdev = nrootdev; 1159 dumpdev = nswapdev; 1160 swdevt[0].sw_dev = nswapdev; 1161 swdevt[1].sw_dev = NODEV; 1162 #if defined(NFSCLIENT) 1163 } else if (mountroot == nfs_mountroot) { 1164 rootdv = bootdv; 1165 rootdev = dumpdev = swapdev = NODEV; 1166 #endif 1167 } else if (mountroot == NULL && rootdev == NODEV) { 1168 /* 1169 * `swap generic' 1170 */ 1171 rootdv = bootdv; 1172 majdev = findblkmajor(rootdv); 1173 if (majdev >= 0) { 1174 /* 1175 * Root and swap are on the disk. 1176 * Assume swap is on partition b. 1177 */ 1178 rootdev = MAKEDISKDEV(majdev, rootdv->dv_unit, part); 1179 nswapdev = MAKEDISKDEV(majdev, rootdv->dv_unit, 1); 1180 } else { 1181 /* 1182 * Root and swap are on a net. 1183 */ 1184 nswapdev = NODEV; 1185 } 1186 dumpdev = nswapdev; 1187 swdevt[0].sw_dev = nswapdev; 1188 /* swdevt[1].sw_dev = NODEV; */ 1189 } else { 1190 /* Completely pre-configured, but we want rootdv .. */ 1191 majdev = major(rootdev); 1192 if (findblkname(majdev) == NULL) 1193 return; 1194 unit = DISKUNIT(rootdev); 1195 part = DISKPART(rootdev); 1196 snprintf(buf, sizeof buf, "%s%d%c", 1197 findblkname(majdev), unit, 'a' + part); 1198 rootdv = parsedisk(buf, strlen(buf), 0, &nrootdev); 1199 if (rootdv == NULL) 1200 panic("root device (%s) not found", buf); 1201 } 1202 1203 if (rootdv && rootdv == bootdv && rootdv->dv_class == DV_IFNET) 1204 ifp = ifunit(rootdv->dv_xname); 1205 else if (bootdv && bootdv->dv_class == DV_IFNET) 1206 ifp = ifunit(bootdv->dv_xname); 1207 1208 if (ifp) 1209 if_addgroup(ifp, "netboot"); 1210 1211 switch (rootdv->dv_class) { 1212 #if defined(NFSCLIENT) 1213 case DV_IFNET: 1214 mountroot = nfs_mountroot; 1215 nfsbootdevname = rootdv->dv_xname; 1216 return; 1217 #endif 1218 case DV_DISK: 1219 mountroot = dk_mountroot; 1220 part = DISKPART(rootdev); 1221 break; 1222 default: 1223 printf("can't figure root, hope your kernel is right\n"); 1224 return; 1225 } 1226 1227 printf("root on %s%c", rootdv->dv_xname, 'a' + part); 1228 1229 /* 1230 * Make the swap partition on the root drive the primary swap. 1231 */ 1232 for (swp = swdevt; swp->sw_dev != NODEV; swp++) { 1233 if (major(rootdev) == major(swp->sw_dev) && 1234 DISKUNIT(rootdev) == DISKUNIT(swp->sw_dev)) { 1235 temp = swdevt[0].sw_dev; 1236 swdevt[0].sw_dev = swp->sw_dev; 1237 swp->sw_dev = temp; 1238 break; 1239 } 1240 } 1241 if (swp->sw_dev != NODEV) { 1242 /* 1243 * If dumpdev was the same as the old primary swap device, 1244 * move it to the new primary swap device. 1245 */ 1246 if (temp == dumpdev) 1247 dumpdev = swdevt[0].sw_dev; 1248 } 1249 if (swdevt[0].sw_dev != NODEV) 1250 printf(" swap on %s%d%c", findblkname(major(swdevt[0].sw_dev)), 1251 DISKUNIT(swdevt[0].sw_dev), 1252 'a' + DISKPART(swdevt[0].sw_dev)); 1253 if (dumpdev != NODEV) 1254 printf(" dump on %s%d%c", findblkname(major(dumpdev)), 1255 DISKUNIT(dumpdev), 'a' + DISKPART(dumpdev)); 1256 printf("\n"); 1257 } 1258 1259 extern struct nam2blk nam2blk[]; 1260 1261 int 1262 findblkmajor(struct device *dv) 1263 { 1264 char buf[16], *p; 1265 int i; 1266 1267 if (strlcpy(buf, dv->dv_xname, sizeof buf) >= sizeof buf) 1268 return (-1); 1269 for (p = buf; *p; p++) 1270 if (*p >= '0' && *p <= '9') 1271 *p = '\0'; 1272 1273 for (i = 0; nam2blk[i].name; i++) 1274 if (!strcmp(buf, nam2blk[i].name)) 1275 return (nam2blk[i].maj); 1276 return (-1); 1277 } 1278 1279 char * 1280 findblkname(int maj) 1281 { 1282 int i; 1283 1284 for (i = 0; nam2blk[i].name; i++) 1285 if (nam2blk[i].maj == maj) 1286 return (nam2blk[i].name); 1287 return (NULL); 1288 } 1289