1 /* $OpenBSD: subr_disk.c,v 1.85 2009/03/28 14:58:10 dlg Exp $ */ 2 /* $NetBSD: subr_disk.c,v 1.17 1996/03/16 23:17:08 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1995 Jason R. Thorpe. All rights reserved. 6 * Copyright (c) 1982, 1986, 1988, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/kernel.h> 44 #include <sys/malloc.h> 45 #include <sys/fcntl.h> 46 #include <sys/buf.h> 47 #include <sys/stat.h> 48 #include <sys/syslog.h> 49 #include <sys/device.h> 50 #include <sys/time.h> 51 #include <sys/disklabel.h> 52 #include <sys/conf.h> 53 #include <sys/lock.h> 54 #include <sys/disk.h> 55 #include <sys/reboot.h> 56 #include <sys/dkio.h> 57 #include <sys/dkstat.h> /* XXX */ 58 #include <sys/proc.h> 59 #include <uvm/uvm_extern.h> 60 61 #include <sys/socket.h> 62 #include <sys/socketvar.h> 63 64 #include <net/if.h> 65 66 #include <dev/rndvar.h> 67 #include <dev/cons.h> 68 69 /* 70 * A global list of all disks attached to the system. May grow or 71 * shrink over time. 72 */ 73 struct disklist_head disklist; /* TAILQ_HEAD */ 74 int disk_count; /* number of drives in global disklist */ 75 int disk_change; /* set if a disk has been attached/detached 76 * since last we looked at this variable. This 77 * is reset by hw_sysctl() 78 */ 79 80 /* 81 * Seek sort for disks. We depend on the driver which calls us using b_resid 82 * as the current cylinder number. 83 * 84 * The argument ap structure holds a b_actf activity chain pointer on which we 85 * keep two queues, sorted in ascending cylinder order. The first queue holds 86 * those requests which are positioned after the current cylinder (in the first 87 * request); the second holds requests which came in after their cylinder number 88 * was passed. Thus we implement a one way scan, retracting after reaching the 89 * end of the drive to the first request on the second queue, at which time it 90 * becomes the first queue. 91 * 92 * A one-way scan is natural because of the way UNIX read-ahead blocks are 93 * allocated. 94 */ 95 96 void 97 disksort(struct buf *ap, struct buf *bp) 98 { 99 struct buf *bq; 100 101 /* If the queue is empty, then it's easy. */ 102 if (ap->b_actf == NULL) { 103 bp->b_actf = NULL; 104 ap->b_actf = bp; 105 return; 106 } 107 108 /* 109 * If we lie after the first (currently active) request, then we 110 * must locate the second request list and add ourselves to it. 111 */ 112 bq = ap->b_actf; 113 if (bp->b_cylinder < bq->b_cylinder) { 114 while (bq->b_actf) { 115 /* 116 * Check for an ``inversion'' in the normally ascending 117 * cylinder numbers, indicating the start of the second 118 * request list. 119 */ 120 if (bq->b_actf->b_cylinder < bq->b_cylinder) { 121 /* 122 * Search the second request list for the first 123 * request at a larger cylinder number. We go 124 * before that; if there is no such request, we 125 * go at end. 126 */ 127 do { 128 if (bp->b_cylinder < 129 bq->b_actf->b_cylinder) 130 goto insert; 131 if (bp->b_cylinder == 132 bq->b_actf->b_cylinder && 133 bp->b_blkno < bq->b_actf->b_blkno) 134 goto insert; 135 bq = bq->b_actf; 136 } while (bq->b_actf); 137 goto insert; /* after last */ 138 } 139 bq = bq->b_actf; 140 } 141 /* 142 * No inversions... we will go after the last, and 143 * be the first request in the second request list. 144 */ 145 goto insert; 146 } 147 /* 148 * Request is at/after the current request... 149 * sort in the first request list. 150 */ 151 while (bq->b_actf) { 152 /* 153 * We want to go after the current request if there is an 154 * inversion after it (i.e. it is the end of the first 155 * request list), or if the next request is a larger cylinder 156 * than our request. 157 */ 158 if (bq->b_actf->b_cylinder < bq->b_cylinder || 159 bp->b_cylinder < bq->b_actf->b_cylinder || 160 (bp->b_cylinder == bq->b_actf->b_cylinder && 161 bp->b_blkno < bq->b_actf->b_blkno)) 162 goto insert; 163 bq = bq->b_actf; 164 } 165 /* 166 * Neither a second list nor a larger request... we go at the end of 167 * the first list, which is the same as the end of the whole schebang. 168 */ 169 insert: bp->b_actf = bq->b_actf; 170 bq->b_actf = bp; 171 } 172 173 /* 174 * Compute checksum for disk label. 175 */ 176 u_int 177 dkcksum(struct disklabel *lp) 178 { 179 u_int16_t *start, *end; 180 u_int16_t sum = 0; 181 182 start = (u_int16_t *)lp; 183 end = (u_int16_t *)&lp->d_partitions[lp->d_npartitions]; 184 while (start < end) 185 sum ^= *start++; 186 return (sum); 187 } 188 189 char * 190 initdisklabel(struct disklabel *lp) 191 { 192 int i; 193 194 /* minimal requirements for archetypal disk label */ 195 if (lp->d_secsize < DEV_BSIZE) 196 lp->d_secsize = DEV_BSIZE; 197 if (DL_GETDSIZE(lp) == 0) 198 DL_SETDSIZE(lp, MAXDISKSIZE); 199 if (lp->d_secpercyl == 0) 200 return ("invalid geometry"); 201 lp->d_npartitions = RAW_PART + 1; 202 for (i = 0; i < RAW_PART; i++) { 203 DL_SETPSIZE(&lp->d_partitions[i], 0); 204 DL_SETPOFFSET(&lp->d_partitions[i], 0); 205 } 206 if (DL_GETPSIZE(&lp->d_partitions[RAW_PART]) == 0) 207 DL_SETPSIZE(&lp->d_partitions[RAW_PART], DL_GETDSIZE(lp)); 208 DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0); 209 lp->d_version = 1; 210 lp->d_bbsize = 8192; 211 lp->d_sbsize = 64*1024; /* XXX ? */ 212 return (NULL); 213 } 214 215 /* 216 * Check an incoming block to make sure it is a disklabel, convert it to 217 * a newer version if needed, etc etc. 218 */ 219 char * 220 checkdisklabel(void *rlp, struct disklabel *lp) 221 { 222 struct disklabel *dlp = rlp; 223 struct __partitionv0 *v0pp; 224 struct partition *pp; 225 daddr64_t disksize; 226 char *msg = NULL; 227 int i; 228 229 if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) 230 msg = "no disk label"; 231 else if (dlp->d_npartitions > MAXPARTITIONS) 232 msg = "invalid label, partition count > MAXPARTITIONS"; 233 else if (dlp->d_secpercyl == 0) 234 msg = "invalid label, d_secpercyl == 0"; 235 else if (dlp->d_secsize == 0) 236 msg = "invalid label, d_secsize == 0"; 237 else if (dkcksum(dlp) != 0) 238 msg = "invalid label, incorrect checksum"; 239 240 if (msg) { 241 u_int16_t *start, *end, sum = 0; 242 243 /* If it is byte-swapped, attempt to convert it */ 244 if (swap32(dlp->d_magic) != DISKMAGIC || 245 swap32(dlp->d_magic2) != DISKMAGIC || 246 swap16(dlp->d_npartitions) > MAXPARTITIONS) 247 return (msg); 248 249 /* 250 * Need a byte-swap aware dkcksum varient 251 * inlined, because dkcksum uses a sub-field 252 */ 253 start = (u_int16_t *)dlp; 254 end = (u_int16_t *)&dlp->d_partitions[ 255 swap16(dlp->d_npartitions)]; 256 while (start < end) 257 sum ^= *start++; 258 if (sum != 0) 259 return (msg); 260 261 dlp->d_magic = swap32(dlp->d_magic); 262 dlp->d_type = swap16(dlp->d_type); 263 dlp->d_subtype = swap16(dlp->d_subtype); 264 265 /* d_typename and d_packname are strings */ 266 267 dlp->d_secsize = swap32(dlp->d_secsize); 268 dlp->d_nsectors = swap32(dlp->d_nsectors); 269 dlp->d_ntracks = swap32(dlp->d_ntracks); 270 dlp->d_ncylinders = swap32(dlp->d_ncylinders); 271 dlp->d_secpercyl = swap32(dlp->d_secpercyl); 272 dlp->d_secperunit = swap32(dlp->d_secperunit); 273 274 dlp->d_sparespertrack = swap16(dlp->d_sparespertrack); 275 dlp->d_sparespercyl = swap16(dlp->d_sparespercyl); 276 277 dlp->d_acylinders = swap32(dlp->d_acylinders); 278 279 dlp->d_rpm = swap16(dlp->d_rpm); 280 dlp->d_interleave = swap16(dlp->d_interleave); 281 dlp->d_trackskew = swap16(dlp->d_trackskew); 282 dlp->d_cylskew = swap16(dlp->d_cylskew); 283 dlp->d_headswitch = swap32(dlp->d_headswitch); 284 dlp->d_trkseek = swap32(dlp->d_trkseek); 285 dlp->d_flags = swap32(dlp->d_flags); 286 287 for (i = 0; i < NDDATA; i++) 288 dlp->d_drivedata[i] = swap32(dlp->d_drivedata[i]); 289 290 dlp->d_secperunith = swap16(dlp->d_secperunith); 291 dlp->d_version = swap16(dlp->d_version); 292 293 for (i = 0; i < NSPARE; i++) 294 dlp->d_spare[i] = swap32(dlp->d_spare[i]); 295 296 dlp->d_magic2 = swap32(dlp->d_magic2); 297 dlp->d_checksum = swap16(dlp->d_checksum); 298 299 dlp->d_npartitions = swap16(dlp->d_npartitions); 300 dlp->d_bbsize = swap32(dlp->d_bbsize); 301 dlp->d_sbsize = swap32(dlp->d_sbsize); 302 303 for (i = 0; i < MAXPARTITIONS; i++) { 304 pp = &dlp->d_partitions[i]; 305 pp->p_size = swap32(pp->p_size); 306 pp->p_offset = swap32(pp->p_offset); 307 if (dlp->d_version == 0) { 308 v0pp = (struct __partitionv0 *)pp; 309 v0pp->p_fsize = swap32(v0pp->p_fsize); 310 } else { 311 pp->p_offseth = swap16(pp->p_offseth); 312 pp->p_sizeh = swap16(pp->p_sizeh); 313 } 314 pp->p_cpg = swap16(pp->p_cpg); 315 } 316 317 dlp->d_checksum = 0; 318 dlp->d_checksum = dkcksum(dlp); 319 msg = NULL; 320 } 321 322 /* XXX should verify lots of other fields and whine a lot */ 323 324 if (msg) 325 return (msg); 326 327 /* Initial passed in lp contains the real disk size. */ 328 disksize = DL_GETDSIZE(lp); 329 330 if (lp != dlp) 331 *lp = *dlp; 332 333 if (lp->d_version == 0) { 334 lp->d_version = 1; 335 lp->d_secperunith = 0; 336 337 v0pp = (struct __partitionv0 *)lp->d_partitions; 338 pp = lp->d_partitions; 339 for (i = 0; i < lp->d_npartitions; i++, pp++, v0pp++) { 340 pp->p_fragblock = DISKLABELV1_FFS_FRAGBLOCK(v0pp-> 341 p_fsize, v0pp->p_frag); 342 pp->p_offseth = 0; 343 pp->p_sizeh = 0; 344 } 345 } 346 347 #ifdef DEBUG 348 if (DL_GETDSIZE(lp) != disksize) 349 printf("on-disk disklabel has incorrect disksize (%lld)\n", 350 DL_GETDSIZE(lp)); 351 if (DL_GETPSIZE(&lp->d_partitions[RAW_PART]) != disksize) 352 printf("on-disk disklabel RAW_PART has incorrect size (%lld)\n", 353 DL_GETPSIZE(&lp->d_partitions[RAW_PART])); 354 if (DL_GETPOFFSET(&lp->d_partitions[RAW_PART]) != 0) 355 printf("on-disk disklabel RAW_PART offset != 0 (%lld)\n", 356 DL_GETPOFFSET(&lp->d_partitions[RAW_PART])); 357 #endif 358 DL_SETDSIZE(lp, disksize); 359 DL_SETPSIZE(&lp->d_partitions[RAW_PART], disksize); 360 DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0); 361 362 lp->d_checksum = 0; 363 lp->d_checksum = dkcksum(lp); 364 return (msg); 365 } 366 367 /* 368 * If dos partition table requested, attempt to load it and 369 * find disklabel inside a DOS partition. Return buffer 370 * for use in signalling errors if requested. 371 * 372 * We would like to check if each MBR has a valid BOOT_MAGIC, but 373 * we cannot because it doesn't always exist. So.. we assume the 374 * MBR is valid. 375 */ 376 char * 377 readdoslabel(struct buf *bp, void (*strat)(struct buf *), 378 struct disklabel *lp, int *partoffp, int spoofonly) 379 { 380 struct dos_partition dp[NDOSPART], *dp2; 381 u_int32_t extoff = 0; 382 daddr64_t part_blkno = DOSBBSECTOR; 383 int dospartoff = 0, i, ourpart = -1; 384 int wander = 1, n = 0, loop = 0; 385 int offset; 386 387 if (lp->d_secpercyl == 0) 388 return ("invalid label, d_secpercyl == 0"); 389 if (lp->d_secsize == 0) 390 return ("invalid label, d_secsize == 0"); 391 392 /* do DOS partitions in the process of getting disklabel? */ 393 394 /* 395 * Read dos partition table, follow extended partitions. 396 * Map the partitions to disklabel entries i-p 397 */ 398 while (wander && n < 8 && loop < 8) { 399 loop++; 400 wander = 0; 401 if (part_blkno < extoff) 402 part_blkno = extoff; 403 404 /* read boot record */ 405 bp->b_blkno = DL_BLKTOSEC(lp, part_blkno) * DL_BLKSPERSEC(lp); 406 offset = DL_BLKOFFSET(lp, part_blkno) + DOSPARTOFF; 407 bp->b_bcount = lp->d_secsize; 408 bp->b_flags = B_BUSY | B_READ | B_RAW; 409 (*strat)(bp); 410 if (biowait(bp)) { 411 /*wrong*/ if (partoffp) 412 /*wrong*/ *partoffp = -1; 413 return ("dos partition I/O error"); 414 } 415 416 bcopy(bp->b_data + offset, dp, sizeof(dp)); 417 418 if (ourpart == -1) { 419 /* Search for our MBR partition */ 420 for (dp2=dp, i=0; i < NDOSPART && ourpart == -1; 421 i++, dp2++) 422 if (letoh32(dp2->dp_size) && 423 dp2->dp_typ == DOSPTYP_OPENBSD) 424 ourpart = i; 425 if (ourpart == -1) 426 goto donot; 427 /* 428 * This is our MBR partition. need sector 429 * address for SCSI/IDE, cylinder for 430 * ESDI/ST506/RLL 431 */ 432 dp2 = &dp[ourpart]; 433 dospartoff = letoh32(dp2->dp_start) + part_blkno; 434 435 /* found our OpenBSD partition, finish up */ 436 if (partoffp) 437 goto notfat; 438 439 if (lp->d_ntracks == 0) 440 lp->d_ntracks = dp2->dp_ehd + 1; 441 if (lp->d_nsectors == 0) 442 lp->d_nsectors = DPSECT(dp2->dp_esect); 443 if (lp->d_secpercyl == 0) 444 lp->d_secpercyl = lp->d_ntracks * 445 lp->d_nsectors; 446 } 447 donot: 448 /* 449 * In case the disklabel read below fails, we want to 450 * provide a fake label in i-p. 451 */ 452 for (dp2=dp, i=0; i < NDOSPART && n < 8; i++, dp2++) { 453 struct partition *pp = &lp->d_partitions[8+n]; 454 u_int8_t fstype; 455 456 if (dp2->dp_typ == DOSPTYP_OPENBSD) 457 continue; 458 if (letoh32(dp2->dp_size) > DL_GETDSIZE(lp)) 459 continue; 460 if (letoh32(dp2->dp_start) > DL_GETDSIZE(lp)) 461 continue; 462 if (letoh32(dp2->dp_size) == 0) 463 continue; 464 465 switch (dp2->dp_typ) { 466 case DOSPTYP_UNUSED: 467 fstype = FS_UNUSED; 468 n++; 469 break; 470 471 case DOSPTYP_LINUX: 472 fstype = FS_EXT2FS; 473 n++; 474 break; 475 476 case DOSPTYP_NTFS: 477 fstype = FS_NTFS; 478 n++; 479 break; 480 481 case DOSPTYP_FAT12: 482 case DOSPTYP_FAT16S: 483 case DOSPTYP_FAT16B: 484 case DOSPTYP_FAT16L: 485 case DOSPTYP_FAT32: 486 case DOSPTYP_FAT32L: 487 fstype = FS_MSDOS; 488 n++; 489 break; 490 case DOSPTYP_EXTEND: 491 case DOSPTYP_EXTENDL: 492 part_blkno = letoh32(dp2->dp_start) + extoff; 493 if (!extoff) { 494 extoff = letoh32(dp2->dp_start); 495 part_blkno = 0; 496 } 497 wander = 1; 498 break; 499 default: 500 fstype = FS_OTHER; 501 n++; 502 break; 503 } 504 505 /* 506 * Don't set fstype/offset/size when wandering or just 507 * looking for the offset of the OpenBSD partition. It 508 * would invalidate the disklabel checksum! 509 */ 510 if (wander || partoffp) 511 continue; 512 513 pp->p_fstype = fstype; 514 if (letoh32(dp2->dp_start)) 515 DL_SETPOFFSET(pp, 516 letoh32(dp2->dp_start) + part_blkno); 517 DL_SETPSIZE(pp, letoh32(dp2->dp_size)); 518 } 519 } 520 if (partoffp) 521 /* dospartoff has been set and we must not modify *lp. */ 522 goto notfat; 523 524 lp->d_npartitions = MAXPARTITIONS; 525 526 if (n == 0 && part_blkno == DOSBBSECTOR) { 527 u_int16_t fattest; 528 529 /* Check for a valid initial jmp instruction. */ 530 switch ((u_int8_t)bp->b_data[0]) { 531 case 0xeb: 532 /* 533 * Two-byte jmp instruction. The 2nd byte is the number 534 * of bytes to jmp and the 3rd byte must be a NOP. 535 */ 536 if ((u_int8_t)bp->b_data[2] != 0x90) 537 goto notfat; 538 break; 539 case 0xe9: 540 /* 541 * Three-byte jmp instruction. The next two bytes are a 542 * little-endian 16 bit value. 543 */ 544 break; 545 default: 546 goto notfat; 547 break; 548 } 549 550 /* Check for a valid bytes per sector value. */ 551 fattest = ((bp->b_data[12] << 8) & 0xff00) | 552 (bp->b_data[11] & 0xff); 553 if (fattest < 512 || fattest > 4096 || (fattest % 512 != 0)) 554 goto notfat; 555 556 /* Check the end of sector marker. */ 557 fattest = ((bp->b_data[510] << 8) & 0xff00) | 558 (bp->b_data[511] & 0xff); 559 if (fattest != 0x55aa) 560 goto notfat; 561 562 /* Looks like a FAT filesystem. Spoof 'i'. */ 563 DL_SETPSIZE(&lp->d_partitions['i' - 'a'], 564 DL_GETPSIZE(&lp->d_partitions[RAW_PART])); 565 DL_SETPOFFSET(&lp->d_partitions['i' - 'a'], 0); 566 lp->d_partitions['i' - 'a'].p_fstype = FS_MSDOS; 567 } 568 notfat: 569 570 /* record the OpenBSD partition's placement for the caller */ 571 if (partoffp) 572 *partoffp = dospartoff; 573 574 /* don't read the on-disk label if we are in spoofed-only mode */ 575 if (spoofonly) 576 return (NULL); 577 578 bp->b_blkno = DL_BLKTOSEC(lp, dospartoff + DOS_LABELSECTOR) * 579 DL_BLKSPERSEC(lp); 580 offset = DL_BLKOFFSET(lp, dospartoff + DOS_LABELSECTOR); 581 bp->b_bcount = lp->d_secsize; 582 bp->b_flags = B_BUSY | B_READ | B_RAW; 583 (*strat)(bp); 584 if (biowait(bp)) 585 return ("disk label I/O error"); 586 587 /* sub-MBR disklabels are always at a LABELOFFSET of 0 */ 588 return checkdisklabel(bp->b_data + offset, lp); 589 } 590 591 /* 592 * Check new disk label for sensibility 593 * before setting it. 594 */ 595 int 596 setdisklabel(struct disklabel *olp, struct disklabel *nlp, u_int openmask) 597 { 598 int i; 599 struct partition *opp, *npp; 600 601 /* sanity clause */ 602 if (nlp->d_secpercyl == 0 || nlp->d_secsize == 0 || 603 (nlp->d_secsize % DEV_BSIZE) != 0) 604 return (EINVAL); 605 606 /* special case to allow disklabel to be invalidated */ 607 if (nlp->d_magic == 0xffffffff) { 608 *olp = *nlp; 609 return (0); 610 } 611 612 if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC || 613 dkcksum(nlp) != 0) 614 return (EINVAL); 615 616 /* XXX missing check if other dos partitions will be overwritten */ 617 618 while (openmask != 0) { 619 i = ffs(openmask) - 1; 620 openmask &= ~(1 << i); 621 if (nlp->d_npartitions <= i) 622 return (EBUSY); 623 opp = &olp->d_partitions[i]; 624 npp = &nlp->d_partitions[i]; 625 if (DL_GETPOFFSET(npp) != DL_GETPOFFSET(opp) || 626 DL_GETPSIZE(npp) < DL_GETPSIZE(opp)) 627 return (EBUSY); 628 /* 629 * Copy internally-set partition information 630 * if new label doesn't include it. XXX 631 */ 632 if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) { 633 npp->p_fstype = opp->p_fstype; 634 npp->p_fragblock = opp->p_fragblock; 635 npp->p_cpg = opp->p_cpg; 636 } 637 } 638 nlp->d_checksum = 0; 639 nlp->d_checksum = dkcksum(nlp); 640 *olp = *nlp; 641 return (0); 642 } 643 644 /* 645 * Determine the size of the transfer, and make sure it is within the 646 * boundaries of the partition. Adjust transfer if needed, and signal errors or 647 * early completion. 648 */ 649 int 650 bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel) 651 { 652 struct partition *p = &lp->d_partitions[DISKPART(bp->b_dev)]; 653 daddr64_t sz = howmany(bp->b_bcount, DEV_BSIZE); 654 655 /* avoid division by zero */ 656 if (lp->d_secpercyl == 0) 657 goto bad; 658 659 if (bp->b_blkno < 0 || sz < 0) 660 panic("bounds_check_with_label %lld %lld\n", bp->b_blkno, sz); 661 662 /* beyond partition? */ 663 if (bp->b_blkno + sz > DL_SECTOBLK(lp, DL_GETPSIZE(p))) { 664 sz = DL_SECTOBLK(lp, DL_GETPSIZE(p)) - bp->b_blkno; 665 if (sz == 0) { 666 /* If exactly at end of disk, return EOF. */ 667 bp->b_resid = bp->b_bcount; 668 return (-1); 669 } 670 if (sz < 0) 671 /* If past end of disk, return EINVAL. */ 672 goto bad; 673 674 /* Otherwise, truncate request. */ 675 bp->b_bcount = sz << DEV_BSHIFT; 676 } 677 678 /* calculate cylinder for disksort to order transfers with */ 679 bp->b_cylinder = (bp->b_blkno + DL_SECTOBLK(lp, DL_GETPOFFSET(p))) / 680 DL_SECTOBLK(lp, lp->d_secpercyl); 681 return (1); 682 683 bad: 684 bp->b_error = EINVAL; 685 bp->b_flags |= B_ERROR; 686 return (-1); 687 } 688 689 /* 690 * Disk error is the preface to plaintive error messages 691 * about failing disk transfers. It prints messages of the form 692 693 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d) 694 695 * if the offset of the error in the transfer and a disk label 696 * are both available. blkdone should be -1 if the position of the error 697 * is unknown; the disklabel pointer may be null from drivers that have not 698 * been converted to use them. The message is printed with printf 699 * if pri is LOG_PRINTF, otherwise it uses log at the specified priority. 700 * The message should be completed (with at least a newline) with printf 701 * or addlog, respectively. There is no trailing space. 702 */ 703 void 704 diskerr(struct buf *bp, char *dname, char *what, int pri, int blkdone, 705 struct disklabel *lp) 706 { 707 int unit = DISKUNIT(bp->b_dev), part = DISKPART(bp->b_dev); 708 int (*pr)(const char *, ...); 709 char partname = 'a' + part; 710 daddr64_t sn; 711 712 if (pri != LOG_PRINTF) { 713 static const char fmt[] = ""; 714 log(pri, fmt); 715 pr = addlog; 716 } else 717 pr = printf; 718 (*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what, 719 bp->b_flags & B_READ ? "read" : "writ"); 720 sn = bp->b_blkno; 721 if (bp->b_bcount <= DEV_BSIZE) 722 (*pr)("%lld", sn); 723 else { 724 if (blkdone >= 0) { 725 sn += blkdone; 726 (*pr)("%lld of ", sn); 727 } 728 (*pr)("%lld-%lld", bp->b_blkno, 729 bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE); 730 } 731 if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) { 732 sn += DL_GETPOFFSET(&lp->d_partitions[part]); 733 (*pr)(" (%s%d bn %lld; cn %lld", dname, unit, sn, 734 sn / lp->d_secpercyl); 735 sn %= lp->d_secpercyl; 736 (*pr)(" tn %lld sn %lld)", sn / lp->d_nsectors, 737 sn % lp->d_nsectors); 738 } 739 } 740 741 /* 742 * Initialize the disklist. Called by main() before autoconfiguration. 743 */ 744 void 745 disk_init(void) 746 { 747 748 TAILQ_INIT(&disklist); 749 disk_count = disk_change = 0; 750 } 751 752 int 753 disk_construct(struct disk *diskp, char *lockname) 754 { 755 rw_init(&diskp->dk_lock, "dklk"); 756 mtx_init(&diskp->dk_mtx, IPL_BIO); 757 758 diskp->dk_flags |= DKF_CONSTRUCTED; 759 760 return (0); 761 } 762 763 /* 764 * Attach a disk. 765 */ 766 void 767 disk_attach(struct disk *diskp) 768 { 769 770 if (!ISSET(diskp->dk_flags, DKF_CONSTRUCTED)) 771 disk_construct(diskp, diskp->dk_name); 772 773 /* 774 * Allocate and initialize the disklabel structures. Note that 775 * it's not safe to sleep here, since we're probably going to be 776 * called during autoconfiguration. 777 */ 778 diskp->dk_label = malloc(sizeof(struct disklabel), M_DEVBUF, 779 M_NOWAIT|M_ZERO); 780 if (diskp->dk_label == NULL) 781 panic("disk_attach: can't allocate storage for disklabel"); 782 783 /* 784 * Set the attached timestamp. 785 */ 786 microuptime(&diskp->dk_attachtime); 787 788 /* 789 * Link into the disklist. 790 */ 791 TAILQ_INSERT_TAIL(&disklist, diskp, dk_link); 792 ++disk_count; 793 disk_change = 1; 794 } 795 796 /* 797 * Detach a disk. 798 */ 799 void 800 disk_detach(struct disk *diskp) 801 { 802 803 /* 804 * Free the space used by the disklabel structures. 805 */ 806 free(diskp->dk_label, M_DEVBUF); 807 808 /* 809 * Remove from the disklist. 810 */ 811 TAILQ_REMOVE(&disklist, diskp, dk_link); 812 disk_change = 1; 813 if (--disk_count < 0) 814 panic("disk_detach: disk_count < 0"); 815 } 816 817 /* 818 * Increment a disk's busy counter. If the counter is going from 819 * 0 to 1, set the timestamp. 820 */ 821 void 822 disk_busy(struct disk *diskp) 823 { 824 825 /* 826 * XXX We'd like to use something as accurate as microtime(), 827 * but that doesn't depend on the system TOD clock. 828 */ 829 mtx_enter(&diskp->dk_mtx); 830 if (diskp->dk_busy++ == 0) 831 microuptime(&diskp->dk_timestamp); 832 mtx_leave(&diskp->dk_mtx); 833 } 834 835 /* 836 * Decrement a disk's busy counter, increment the byte count, total busy 837 * time, and reset the timestamp. 838 */ 839 void 840 disk_unbusy(struct disk *diskp, long bcount, int read) 841 { 842 struct timeval dv_time, diff_time; 843 844 mtx_enter(&diskp->dk_mtx); 845 846 if (diskp->dk_busy-- == 0) 847 printf("disk_unbusy: %s: dk_busy < 0\n", diskp->dk_name); 848 849 microuptime(&dv_time); 850 851 timersub(&dv_time, &diskp->dk_timestamp, &diff_time); 852 timeradd(&diskp->dk_time, &diff_time, &diskp->dk_time); 853 854 diskp->dk_timestamp = dv_time; 855 if (bcount > 0) { 856 if (read) { 857 diskp->dk_rbytes += bcount; 858 diskp->dk_rxfer++; 859 } else { 860 diskp->dk_wbytes += bcount; 861 diskp->dk_wxfer++; 862 } 863 } else 864 diskp->dk_seek++; 865 866 mtx_leave(&diskp->dk_mtx); 867 868 add_disk_randomness(bcount ^ diff_time.tv_usec); 869 } 870 871 int 872 disk_lock(struct disk *dk) 873 { 874 int error; 875 876 error = rw_enter(&dk->dk_lock, RW_WRITE|RW_INTR); 877 878 return (error); 879 } 880 881 void 882 disk_unlock(struct disk *dk) 883 { 884 rw_exit(&dk->dk_lock); 885 } 886 887 int 888 dk_mountroot(void) 889 { 890 dev_t rawdev, rrootdev; 891 int part = DISKPART(rootdev); 892 int (*mountrootfn)(void); 893 struct disklabel dl; 894 int error; 895 896 rrootdev = blktochr(rootdev); 897 rawdev = MAKEDISKDEV(major(rrootdev), DISKUNIT(rootdev), RAW_PART); 898 #ifdef DEBUG 899 printf("rootdev=0x%x rrootdev=0x%x rawdev=0x%x\n", rootdev, 900 rrootdev, rawdev); 901 #endif 902 903 /* 904 * open device, ioctl for the disklabel, and close it. 905 */ 906 error = (cdevsw[major(rrootdev)].d_open)(rawdev, FREAD, 907 S_IFCHR, curproc); 908 if (error) 909 panic("cannot open disk, 0x%x/0x%x, error %d", 910 rootdev, rrootdev, error); 911 error = (cdevsw[major(rrootdev)].d_ioctl)(rawdev, DIOCGDINFO, 912 (caddr_t)&dl, FREAD, curproc); 913 if (error) 914 panic("cannot read disk label, 0x%x/0x%x, error %d", 915 rootdev, rrootdev, error); 916 (void) (cdevsw[major(rrootdev)].d_close)(rawdev, FREAD, 917 S_IFCHR, curproc); 918 919 if (DL_GETPSIZE(&dl.d_partitions[part]) == 0) 920 panic("root filesystem has size 0"); 921 switch (dl.d_partitions[part].p_fstype) { 922 #ifdef EXT2FS 923 case FS_EXT2FS: 924 { 925 extern int ext2fs_mountroot(void); 926 mountrootfn = ext2fs_mountroot; 927 } 928 break; 929 #endif 930 #ifdef FFS 931 case FS_BSDFFS: 932 { 933 extern int ffs_mountroot(void); 934 mountrootfn = ffs_mountroot; 935 } 936 break; 937 #endif 938 #ifdef CD9660 939 case FS_ISO9660: 940 { 941 extern int cd9660_mountroot(void); 942 mountrootfn = cd9660_mountroot; 943 } 944 break; 945 #endif 946 default: 947 #ifdef FFS 948 { 949 extern int ffs_mountroot(void); 950 951 printf("filesystem type %d not known.. assuming ffs\n", 952 dl.d_partitions[part].p_fstype); 953 mountrootfn = ffs_mountroot; 954 } 955 #else 956 panic("disk 0x%x/0x%x filesystem type %d not known", 957 rootdev, rrootdev, dl.d_partitions[part].p_fstype); 958 #endif 959 } 960 return (*mountrootfn)(); 961 } 962 963 struct bufq * 964 bufq_default_alloc(void) 965 { 966 struct bufq_default *bq; 967 968 bq = malloc(sizeof(*bq), M_DEVBUF, M_NOWAIT|M_ZERO); 969 if (bq == NULL) 970 panic("bufq_default_alloc: no memory"); 971 972 bq->bufq.bufq_free = bufq_default_free; 973 bq->bufq.bufq_add = bufq_default_add; 974 bq->bufq.bufq_get = bufq_default_get; 975 976 return ((struct bufq *)bq); 977 } 978 979 void 980 bufq_default_free(struct bufq *bq) 981 { 982 free(bq, M_DEVBUF); 983 } 984 985 void 986 bufq_default_add(struct bufq *bq, struct buf *bp) 987 { 988 struct bufq_default *bufq = (struct bufq_default *)bq; 989 struct proc *p = bp->b_proc; 990 struct buf *head; 991 992 if (p == NULL || p->p_nice < NZERO) 993 head = &bufq->bufq_head[0]; 994 else if (p->p_nice == NZERO) 995 head = &bufq->bufq_head[1]; 996 else 997 head = &bufq->bufq_head[2]; 998 999 disksort(head, bp); 1000 } 1001 1002 struct buf * 1003 bufq_default_get(struct bufq *bq) 1004 { 1005 struct bufq_default *bufq = (struct bufq_default *)bq; 1006 struct buf *bp, *head; 1007 int i; 1008 1009 for (i = 0; i < 3; i++) { 1010 head = &bufq->bufq_head[i]; 1011 if ((bp = head->b_actf)) 1012 break; 1013 } 1014 if (bp == NULL) 1015 return (NULL); 1016 head->b_actf = bp->b_actf; 1017 return (bp); 1018 } 1019 1020 struct device * 1021 getdisk(char *str, int len, int defpart, dev_t *devp) 1022 { 1023 struct device *dv; 1024 1025 if ((dv = parsedisk(str, len, defpart, devp)) == NULL) { 1026 printf("use one of: exit"); 1027 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1028 if (dv->dv_class == DV_DISK) 1029 printf(" %s[a-p]", dv->dv_xname); 1030 #if defined(NFSCLIENT) 1031 if (dv->dv_class == DV_IFNET) 1032 printf(" %s", dv->dv_xname); 1033 #endif 1034 } 1035 printf("\n"); 1036 } 1037 return (dv); 1038 } 1039 1040 struct device * 1041 parsedisk(char *str, int len, int defpart, dev_t *devp) 1042 { 1043 struct device *dv; 1044 int majdev, part = defpart; 1045 char c; 1046 1047 if (len == 0) 1048 return (NULL); 1049 c = str[len-1]; 1050 if (c >= 'a' && (c - 'a') < MAXPARTITIONS) { 1051 part = c - 'a'; 1052 len -= 1; 1053 } 1054 1055 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1056 if (dv->dv_class == DV_DISK && 1057 strncmp(str, dv->dv_xname, len) == 0 && 1058 dv->dv_xname[len] == '\0') { 1059 majdev = findblkmajor(dv); 1060 if (majdev < 0) 1061 panic("parsedisk"); 1062 *devp = MAKEDISKDEV(majdev, dv->dv_unit, part); 1063 break; 1064 } 1065 #if defined(NFSCLIENT) 1066 if (dv->dv_class == DV_IFNET && 1067 strncmp(str, dv->dv_xname, len) == 0 && 1068 dv->dv_xname[len] == '\0') { 1069 *devp = NODEV; 1070 break; 1071 } 1072 #endif 1073 } 1074 1075 return (dv); 1076 } 1077 1078 void 1079 setroot(struct device *bootdv, int part, int exitflags) 1080 { 1081 int majdev, unit, len, s; 1082 struct swdevt *swp; 1083 struct device *rootdv, *dv; 1084 dev_t nrootdev, nswapdev = NODEV, temp = NODEV; 1085 struct ifnet *ifp = NULL; 1086 char buf[128]; 1087 #if defined(NFSCLIENT) 1088 extern char *nfsbootdevname; 1089 #endif 1090 1091 /* 1092 * If `swap generic' and we couldn't determine boot device, 1093 * ask the user. 1094 */ 1095 if (mountroot == NULL && bootdv == NULL) 1096 boothowto |= RB_ASKNAME; 1097 if (boothowto & RB_ASKNAME) { 1098 while (1) { 1099 printf("root device"); 1100 if (bootdv != NULL) { 1101 printf(" (default %s", bootdv->dv_xname); 1102 if (bootdv->dv_class == DV_DISK) 1103 printf("%c", 'a' + part); 1104 printf(")"); 1105 } 1106 printf(": "); 1107 s = splhigh(); 1108 cnpollc(TRUE); 1109 len = getsn(buf, sizeof(buf)); 1110 cnpollc(FALSE); 1111 splx(s); 1112 if (strcmp(buf, "exit") == 0) 1113 boot(exitflags); 1114 if (len == 0 && bootdv != NULL) { 1115 strlcpy(buf, bootdv->dv_xname, sizeof buf); 1116 len = strlen(buf); 1117 } 1118 if (len > 0 && buf[len - 1] == '*') { 1119 buf[--len] = '\0'; 1120 dv = getdisk(buf, len, part, &nrootdev); 1121 if (dv != NULL) { 1122 rootdv = dv; 1123 nswapdev = nrootdev; 1124 goto gotswap; 1125 } 1126 } 1127 dv = getdisk(buf, len, part, &nrootdev); 1128 if (dv != NULL) { 1129 rootdv = dv; 1130 break; 1131 } 1132 } 1133 1134 if (rootdv->dv_class == DV_IFNET) 1135 goto gotswap; 1136 1137 /* try to build swap device out of new root device */ 1138 while (1) { 1139 printf("swap device"); 1140 if (rootdv != NULL) 1141 printf(" (default %s%s)", rootdv->dv_xname, 1142 rootdv->dv_class == DV_DISK ? "b" : ""); 1143 printf(": "); 1144 s = splhigh(); 1145 cnpollc(TRUE); 1146 len = getsn(buf, sizeof(buf)); 1147 cnpollc(FALSE); 1148 splx(s); 1149 if (strcmp(buf, "exit") == 0) 1150 boot(exitflags); 1151 if (len == 0 && rootdv != NULL) { 1152 switch (rootdv->dv_class) { 1153 case DV_IFNET: 1154 nswapdev = NODEV; 1155 break; 1156 case DV_DISK: 1157 nswapdev = MAKEDISKDEV(major(nrootdev), 1158 DISKUNIT(nrootdev), 1); 1159 if (nswapdev == nrootdev) 1160 continue; 1161 break; 1162 default: 1163 break; 1164 } 1165 break; 1166 } 1167 dv = getdisk(buf, len, 1, &nswapdev); 1168 if (dv) { 1169 if (dv->dv_class == DV_IFNET) 1170 nswapdev = NODEV; 1171 if (nswapdev == nrootdev) 1172 continue; 1173 break; 1174 } 1175 } 1176 gotswap: 1177 rootdev = nrootdev; 1178 dumpdev = nswapdev; 1179 swdevt[0].sw_dev = nswapdev; 1180 swdevt[1].sw_dev = NODEV; 1181 #if defined(NFSCLIENT) 1182 } else if (mountroot == nfs_mountroot) { 1183 rootdv = bootdv; 1184 rootdev = dumpdev = swapdev = NODEV; 1185 #endif 1186 } else if (mountroot == NULL && rootdev == NODEV) { 1187 /* 1188 * `swap generic' 1189 */ 1190 rootdv = bootdv; 1191 majdev = findblkmajor(rootdv); 1192 if (majdev >= 0) { 1193 /* 1194 * Root and swap are on the disk. 1195 * Assume swap is on partition b. 1196 */ 1197 rootdev = MAKEDISKDEV(majdev, rootdv->dv_unit, part); 1198 nswapdev = MAKEDISKDEV(majdev, rootdv->dv_unit, 1); 1199 } else { 1200 /* 1201 * Root and swap are on a net. 1202 */ 1203 nswapdev = NODEV; 1204 } 1205 dumpdev = nswapdev; 1206 swdevt[0].sw_dev = nswapdev; 1207 /* swdevt[1].sw_dev = NODEV; */ 1208 } else { 1209 /* Completely pre-configured, but we want rootdv .. */ 1210 majdev = major(rootdev); 1211 if (findblkname(majdev) == NULL) 1212 return; 1213 unit = DISKUNIT(rootdev); 1214 part = DISKPART(rootdev); 1215 snprintf(buf, sizeof buf, "%s%d%c", 1216 findblkname(majdev), unit, 'a' + part); 1217 rootdv = parsedisk(buf, strlen(buf), 0, &nrootdev); 1218 if (rootdv == NULL) 1219 panic("root device (%s) not found", buf); 1220 } 1221 1222 if (rootdv && rootdv == bootdv && rootdv->dv_class == DV_IFNET) 1223 ifp = ifunit(rootdv->dv_xname); 1224 else if (bootdv && bootdv->dv_class == DV_IFNET) 1225 ifp = ifunit(bootdv->dv_xname); 1226 1227 if (ifp) 1228 if_addgroup(ifp, "netboot"); 1229 1230 switch (rootdv->dv_class) { 1231 #if defined(NFSCLIENT) 1232 case DV_IFNET: 1233 mountroot = nfs_mountroot; 1234 nfsbootdevname = rootdv->dv_xname; 1235 return; 1236 #endif 1237 case DV_DISK: 1238 mountroot = dk_mountroot; 1239 part = DISKPART(rootdev); 1240 break; 1241 default: 1242 printf("can't figure root, hope your kernel is right\n"); 1243 return; 1244 } 1245 1246 printf("root on %s%c", rootdv->dv_xname, 'a' + part); 1247 1248 /* 1249 * Make the swap partition on the root drive the primary swap. 1250 */ 1251 for (swp = swdevt; swp->sw_dev != NODEV; swp++) { 1252 if (major(rootdev) == major(swp->sw_dev) && 1253 DISKUNIT(rootdev) == DISKUNIT(swp->sw_dev)) { 1254 temp = swdevt[0].sw_dev; 1255 swdevt[0].sw_dev = swp->sw_dev; 1256 swp->sw_dev = temp; 1257 break; 1258 } 1259 } 1260 if (swp->sw_dev != NODEV) { 1261 /* 1262 * If dumpdev was the same as the old primary swap device, 1263 * move it to the new primary swap device. 1264 */ 1265 if (temp == dumpdev) 1266 dumpdev = swdevt[0].sw_dev; 1267 } 1268 if (swdevt[0].sw_dev != NODEV) 1269 printf(" swap on %s%d%c", findblkname(major(swdevt[0].sw_dev)), 1270 DISKUNIT(swdevt[0].sw_dev), 1271 'a' + DISKPART(swdevt[0].sw_dev)); 1272 if (dumpdev != NODEV) 1273 printf(" dump on %s%d%c", findblkname(major(dumpdev)), 1274 DISKUNIT(dumpdev), 'a' + DISKPART(dumpdev)); 1275 printf("\n"); 1276 } 1277 1278 extern struct nam2blk nam2blk[]; 1279 1280 int 1281 findblkmajor(struct device *dv) 1282 { 1283 char buf[16], *p; 1284 int i; 1285 1286 if (strlcpy(buf, dv->dv_xname, sizeof buf) >= sizeof buf) 1287 return (-1); 1288 for (p = buf; *p; p++) 1289 if (*p >= '0' && *p <= '9') 1290 *p = '\0'; 1291 1292 for (i = 0; nam2blk[i].name; i++) 1293 if (!strcmp(buf, nam2blk[i].name)) 1294 return (nam2blk[i].maj); 1295 return (-1); 1296 } 1297 1298 char * 1299 findblkname(int maj) 1300 { 1301 int i; 1302 1303 for (i = 0; nam2blk[i].name; i++) 1304 if (nam2blk[i].maj == maj) 1305 return (nam2blk[i].name); 1306 return (NULL); 1307 } 1308